1 ## oils_failures_allowed: 1
2
3 #### /^.$/
4 shopt -s ysh:all
5 var pat = ''
6
7 setvar pat = /^.$/
8 echo pat=$pat
9
10 setvar pat = /%start dot %end/
11 echo pat=$pat
12
13 if ('' ~ pat) { # ERE syntax
14 echo yes
15 } else {
16 echo no
17 }
18 # $pat is same as pat
19 if ('f' ~ pat) { # ERE syntax
20 echo yes
21 } else {
22 echo no
23 }
24
25 ## STDOUT:
26 pat=^.$
27 pat=^.$
28 no
29 yes
30 ## END
31
32
33 #### /.+/
34 shopt -s ysh:all
35
36 var pat = /.+/
37 echo $pat
38
39 var s = 'foo'
40 if (s ~ pat) { # ERE syntax
41 echo yes
42 }
43 var empty = ''
44 if (empty ~ pat) { echo yes } else { echo no }
45 ## STDOUT:
46 .+
47 yes
48 no
49 ## END
50
51 #### Repeat {1,3} etc.
52 var pat = null
53
54 setvar pat = /d{2}/
55 echo $pat
56 setvar pat = /d{1,3}/
57 echo $pat
58 setvar pat = /d{1,}/
59 echo $pat
60 setvar pat = /d{,3}/
61 echo $pat
62
63
64 ## STDOUT:
65 [[:digit:]]{2}
66 [[:digit:]]{1,3}
67 [[:digit:]]{1,}
68 [[:digit:]]{,3}
69 ## END
70
71
72 #### d+ digit+ !d+ !digit+
73 shopt -s ysh:all
74
75 var pat = ''
76
77 setvar pat = /d+/
78 echo $pat
79 if ('42' ~ pat) { echo yes }
80
81 var empty = ''
82 if (empty ~ pat) { echo yes } else { echo no }
83
84 setvar pat = /digit+/
85 echo $pat
86 setvar pat = /!d+/
87 echo $pat
88 setvar pat = /!digit+/
89 echo $pat
90
91
92 ## STDOUT:
93 [[:digit:]]+
94 yes
95 no
96 [[:digit:]]+
97 [^[:digit:]]+
98 [^[:digit:]]+
99 ## END
100
101 #### Alternation and sequence
102 var pat = ''
103 setvar pat = /s d+ | w*/
104 echo $pat
105 setvar pat = /s d+ or w*/
106 echo $pat
107 ## STDOUT:
108 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
109 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
110 ## END
111
112 #### Char Class Ranges
113 shopt -s ysh:all
114
115 var pat = ''
116 setvar pat = /[0-9 a-f]+/
117 echo $pat
118 # This is equivalent
119 setvar pat = /['0' - '9' 'a' - 'f']+/
120 echo $pat
121
122 if ('0123' ~ pat) { echo yes } else { echo no }
123 if ('zzz' ~ pat) { echo yes } else { echo no }
124 if ('' ~ pat) { echo yes } else { echo no }
125 ## STDOUT:
126 [0-9a-f]+
127 [0-9a-f]+
128 yes
129 no
130 no
131 ## END
132
133 #### Char Class Set
134 shopt -s ysh:all
135 var pat = ''
136
137 # This is NOT allowed
138 # setvar pat = /[a b c]+/
139
140 setvar pat = /['abc']+/
141 echo $pat
142
143 if ('cbcb' ~ pat) { echo yes } else { echo no }
144 if ('0123' ~ pat) { echo yes } else { echo no }
145 if ('' ~ pat) { echo yes } else { echo no }
146 ## STDOUT:
147 [abc]+
148 yes
149 no
150 no
151 ## END
152
153 #### Range with escaped characters
154 shopt -s ysh:all
155
156 var pat = null
157
158 setvar pat = / [ \x00 - \x0f ] /
159 echo $pat | od -A n -t x1
160
161 ## STDOUT:
162 5b 00 2d 0f 5d 0a
163 ## END
164
165
166 #### Group ()
167 shopt -s ysh:all
168 var pat = ''
169
170 setvar pat = /(%start s or d d)/
171 echo $pat
172
173 if (' foo' ~ pat) { echo yes } else { echo no }
174 if ('-00-' ~ pat) { echo yes } else { echo no }
175 if ('foo' ~ pat) { echo yes } else { echo no }
176
177 ## STDOUT:
178 (^[[:space:]]|[[:digit:]][[:digit:]])
179 yes
180 yes
181 no
182 ## END
183
184 #### Capture is acceptable as a group
185 shopt -s ysh:all
186 var pat = /<capture %start s | d d>/
187 echo $pat
188 ## STDOUT:
189 (^[[:space:]]|[[:digit:]][[:digit:]])
190 ## END
191
192 #### literal ''
193 shopt -s ysh:all
194 var pat = ''
195
196 setvar pat = /'abc' 'def'/
197 echo $pat
198
199 #setvar pat = /'abc' '^ + * ?'/
200 #echo $pat
201
202 if ('abcde' ~ pat) { echo yes } else { echo no }
203 if ('abcdef' ~ pat) { echo yes } else { echo no }
204
205 ## STDOUT:
206 abcdef
207 no
208 yes
209 ## END
210
211 #### Single quotes and splicing (do what "foo $x ${x}" used to)
212 shopt -s ysh:all
213 var pat = ''
214
215 var x = 'x'
216 var y = 'y'
217 setvar pat = / @x @x 'abc' @x @y /
218 echo $pat
219
220 if ('xxabcx' ~ pat) { echo yes } else { echo no }
221 if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
222
223 ## STDOUT:
224 xxabcxy
225 no
226 yes
227 ## END
228
229 #### @splice
230 shopt -s ysh:all
231 var d = /d+/;
232 var ip = / @d '.' @d '.' @d '.' @d /
233 echo $ip
234 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
235 if ('0.0.0' ~ ip) { echo yes } else { echo no }
236 ## STDOUT:
237 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
238 yes
239 no
240 ## END
241
242 #### splice with capital letters
243 shopt -s ysh:all
244 var D = /d+/;
245 var ip = / D '.' D '.' D '.' D /
246 echo $ip
247 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
248 if ('0.0.0' ~ ip) { echo yes } else { echo no }
249 ## STDOUT:
250 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
251 yes
252 no
253 ## END
254
255 #### Matching escaped tab character
256 shopt -s ysh:all
257
258 var lines = :| $'aa\tbb' $'cc\tdd' |
259
260 var pat = / ('a' [\t] 'b') /
261 write pat=$pat
262 write @lines | egrep $pat
263
264 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
265
266 #### Match unicode char
267 shopt -s ysh:all
268 var pat = / 'a' dot 'b' /
269
270 if ('axb' ~ pat ) { echo yes } else { echo no }
271
272 # mu character
273 if ($'a\xce\xbcb' ~ pat ) { echo yes } else { echo no }
274
275 if ('aZZb' ~ pat ) { echo yes } else { echo no }
276 ## STDOUT:
277 yes
278 yes
279 no
280 ## END
281
282 #### Match non-ASCII byte denoted using $'\xff' (TODO: LANG=C)
283
284 # NOTE: This pattern doesn't work with en_US.UTF-8. I think the user should
285 # set LANG=C or shopt --unset libc_utf8.
286
287 shopt -s ysh:all
288 var pat = /[ $'\xff' ]/;
289
290 echo $pat | od -A n -t x1
291 if ($'\xff' ~ pat) { echo yes } else { echo no }
292 if ($'\xfe' ~ pat) { echo yes } else { echo no }
293
294 ## STDOUT:
295 5b ff 5d 0a
296 yes
297 no
298 ## END
299
300 #### Match non-ASCII byte denoted using \xff
301 shopt -s ysh:all
302 var pat = /[ \xff ]/;
303
304 # Show what it translates to
305 echo $pat | od -A n -t x1
306
307 # TODO: This might require LANG=C to work
308 #if ($'\xff' ~ pat) { echo yes } else { echo no }
309 #if ($'\xfe' ~ pat) { echo yes } else { echo no }
310
311 ## STDOUT:
312 5b ff 5d 0a
313 ## END
314
315 #### ERE can express Unicode escapes that are in the ASCII range
316 shopt -s ysh:all
317 var pat = /[ \u{7f} ]/;
318
319 echo $pat | od -A n -t x1
320 if ($'\x7f' ~ pat) { echo yes } else { echo no }
321 if ($'\x7e' ~ pat) { echo yes } else { echo no }
322
323 var pat2 = /[ \u{7f} ]/;
324 var pat3 = /[ \u{0007f} ]/;
325 test "$pat2" = "$pat3" && echo 'equal'
326
327 var range = / [ \u{70} - \u{7f} ] /
328 if ($'\x70' ~ range) { echo yes } else { echo no }
329 if ($'\x69' ~ range) { echo yes } else { echo no }
330
331 ## STDOUT:
332 5b 7f 5d 0a
333 yes
334 no
335 equal
336 yes
337 no
338 ## END
339
340 #### ERE can't express higher Unicode escapes
341 shopt -s ysh:all
342 var pat2 = /[ \u{00} - \u{ff} ]/;
343
344 # This causes an error
345 echo $pat2
346
347 # This just prints it
348 = pat2
349
350 var pat1 = /[ \u{ff} ]/;
351
352 echo $pat1 | od -A n -t x1
353 if ($'\x7f' ~ pat) { echo yes } else { echo no }
354 if ($'\x7e' ~ pat) { echo yes } else { echo no }
355
356 ## status: 1
357 ## stdout-json: ""
358
359 #### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
360 var bytes = $'\x7f\xff'
361 var pat = / [ @bytes ] /
362 echo $pat
363 ## status: 1
364 ## stdout-json: ""
365
366 #### Matching escaped tab character
367 shopt -s ysh:all
368
369 # BUG: need C strings in array literal
370 var lines = :| $'aa\tbb' $'cc\tdd' |
371
372 var pat = / ('a' [\t] 'b') /
373 write pat=$pat
374 write @lines | egrep $pat
375
376 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
377
378 #### Repeated String Literal With Single Char
379 shopt -s ysh:all
380
381 var literal = 'f'
382 var pat = null
383
384 setvar pat = / %start @literal+ %end /
385 echo $pat
386 setvar pat = / %start (@literal)+ %end /
387 echo $pat
388
389 if ('fff' ~ pat) { echo yes }
390 if ('foo' !~ pat) { echo no }
391
392 ## STDOUT:
393 ^f+$
394 ^(f)+$
395 yes
396 no
397 ## END
398
399 #### Error when unparenthesized string of more than one character is repeated
400 shopt -s ysh:all
401
402 var literal = 'foo'
403 var pat = null
404
405 setvar pat = / %start @literal+ %end /
406 echo $pat
407 setvar pat = / %start (@literal)+ %end /
408 echo $pat
409
410 if ('foofoo' ~ pat) { echo yes }
411 if ('foof' !~ pat) { echo no }
412
413 ## status: 1
414 ## stdout-json: ""
415
416 #### Instead of $'foo\\bar' use 'foo' \\ 'bar'
417 shopt -s ysh:all
418 var pat = /'foo' \\ 'bar'/
419 echo $pat
420
421 if (r'foo\bar' ~ pat) { echo yes }
422 if (r'foo.bar' !~ pat) { echo no }
423 ## STDOUT:
424 foo\\bar
425 yes
426 no
427 ## END
428
429 #### Negation of Character Class ![a-z]
430 shopt -s ysh:all
431
432 var pat = / ![ a-z ] /
433 echo $pat
434
435 if ('0' ~ pat) { echo yes }
436 if ('a' !~ pat) { echo no }
437
438 ## STDOUT:
439 [^a-z]
440 yes
441 no
442 ## END
443
444 #### Posix and Perl class in class literals
445 shopt -s ysh:all
446
447 var pat = null
448
449 setvar pat = / [ space 'z' ] /
450 echo $pat
451 #setvar pat = / [ ~space 'z' ] /
452 #echo $pat
453
454 # PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
455 # class to be \D or \S.
456 # [[:space:]z] negates the whole thing!
457 # [^[:space:]]
458
459 setvar pat = / [ digit 'z' ] /
460 echo $pat
461 #setvar pat = / [ ~digit 'z' ] /
462 #echo $pat
463
464 ## STDOUT:
465 [[:space:]z]
466 [[:digit:]z]
467 ## END
468
469 #### [!d] can't be negated because it's a literal character
470 setvar pat = / [ !d 'z' ] /
471 echo $pat
472 ## status: 2
473 ## stdout-json: ""
474
475 #### [!digit] can't be negated in POSIX ERE (but yes in Perl)
476 var pat = null
477 setvar pat = / [ !digit 'z' ] /
478 echo $pat
479 ## status: 1
480 ## stdout-json: ""
481
482 #### Operator chars in char classes (bash-like)
483
484 pat='[-]'
485 [[ '-' =~ $pat ]] && echo hyphen
486 [[ '\' =~ $pat ]] && echo FAIL
487
488 pat='[\]'
489 [[ '\' =~ $pat ]] && echo backslash
490 [[ '-' =~ $pat ]] && echo FAIL
491
492 pat='[]]'
493 [[ ']' =~ $pat ]] && echo 'right bracket'
494 [[ '[' =~ $pat ]] && echo FAIL
495
496 pat='[[]'
497 [[ '[' =~ $pat ]] && echo 'left bracket'
498 [[ ']' =~ $pat ]] && echo FAIL
499
500 pat='[.]'
501 [[ '.' =~ $pat ]] && echo period
502 [[ '\' =~ $pat ]] && echo FAIL
503
504 pat='[\^]'
505 [[ '^' =~ $pat ]] && echo caret
506 [[ '\' =~ $pat ]] && echo 'no way to have [^]'
507
508 ## STDOUT:
509 hyphen
510 backslash
511 right bracket
512 left bracket
513 period
514 caret
515 no way to have [^]
516 ## END
517
518 #### Operator chars in char classes (eggex)
519 shopt --set ysh:upgrade
520
521 var pat = / ['-'] /
522 #echo PAT=$pat
523 if ('-' ~ pat) { echo hyphen }
524 if ($'\\' ~ pat) { echo FAIL }
525
526 var pat = / [ \\ ] /
527 [[ '\' =~ $pat ]] && echo backslash
528 [[ '-' =~ $pat ]] && echo FAIL
529
530 var pat = / [ ']' ] /
531 [[ ']' =~ $pat ]] && echo 'right bracket'
532 [[ '[' =~ $pat ]] && echo FAIL
533
534 var pat = / [ '[' ] /
535 [[ '[' =~ $pat ]] && echo 'left bracket'
536 [[ ']' =~ $pat ]] && echo FAIL
537
538 var pat = / [ '.' ] /
539 [[ '.' =~ $pat ]] && echo period
540 [[ '\' =~ $pat ]] && echo FAIL
541
542 var pat = / [ \\ '^' ] /
543 [[ '^' =~ $pat ]] && echo caret
544 [[ '\' =~ $pat ]] && echo 'no way to have [^]'
545
546
547 ## STDOUT:
548 hyphen
549 backslash
550 right bracket
551 left bracket
552 period
553 caret
554 no way to have [^]
555 ## END
556
557 #### Matching ] and \ and ' and " in character classes
558 shopt -s ysh:all
559
560 # BUG: need C strings in array literal
561 var lines = :|
562 'backslash \'
563 'rbracket ]'
564 'lbracket ['
565 "sq '"
566 'dq ""'
567 |
568
569 # Weird GNU quirk: ] has to come first!
570 # []abc] works. But [abc\]] does NOT work. Stupid rule!
571
572 var pat = / [ ']' \\ \' \" ] /
573 write pat=$pat
574 write @lines | egrep $pat
575
576 ## STDOUT:
577 pat=[]'"\\]
578 backslash \
579 rbracket ]
580 sq '
581 dq ""
582 ## END
583
584 #### Matching literal hyphen in character classes
585 shopt -s ysh:all
586
587 var literal = '-'
588 var pat = / [ 'a' 'b' @literal ] /
589 write pat=$pat
590 write 'c-d' 'ab' 'cd' | grep $pat
591 ## STDOUT:
592 pat=[ab-]
593 c-d
594 ab
595 ## END
596
597 #### Char class special: ^ - ] \
598
599 # See demo/ere-char-class-literals.sh
600 #
601 # \ is special because of gawk
602
603 shopt -s ysh:upgrade
604
605
606 # Note: single caret disalowed
607 var caret = / ['^' 'x'] /
608 echo caret=$caret
609
610 var caret2 = / [ \x5e 'x'] /
611 echo caret2=$caret2
612
613 var caret3 = / [ \u{5e} 'x'] /
614 echo caret3=$caret3
615
616 if ('x' ~ caret3) {
617 echo 'match x'
618 }
619 if ('^' ~ caret3) {
620 echo 'match ^'
621 }
622
623 echo ---
624
625 var hyphen = / ['a' '-' 'b'] /
626 echo hyphen=$hyphen
627
628 var hyphen2 = / ['a' \x2d 'b' ] /
629 echo hyphen2=$hyphen2
630
631 if ('-' ~ hyphen2) {
632 echo 'match -'
633 }
634
635 if ('a' ~ hyphen2) {
636 echo 'match a'
637 }
638
639 if ('c' ~ hyphen2) {
640 echo 'match c'
641 }
642
643 echo ---
644
645 var rbracket = / [ '[' ']' ] /
646 echo rbracket=$rbracket
647
648 var rbracket2 = / [ \x5b \x5d ] /
649 echo rbracket2=$rbracket2
650
651 if ('[' ~ rbracket2) {
652 echo 'match ['
653 }
654
655 if (']' ~ rbracket2) {
656 echo 'match ]'
657 }
658
659 echo ---
660
661 var backslash = / [ 'x' \\ 'n' ] /
662 echo backslash=$backslash
663
664 var backslash2 = / [ 'x' \x5c 'n' ] /
665 echo backslash2=$backslash2
666
667 var backslash3 = / [ 'x' $'\\' 'n' ] /
668 echo backslash3=$backslash3
669
670 if ('x' ~ backslash3) {
671 echo 'match x'
672 }
673
674 if ('n' ~ backslash3) {
675 echo 'match n'
676 }
677
678 if ($'\\' ~ backslash3) {
679 echo 'match backslash'
680 }
681
682 if ($'\n' ~ backslash3) {
683 echo 'match nnewline'
684 }
685
686
687 ## STDOUT:
688 caret=[x^]
689 caret2=[x^]
690 caret3=[x^]
691 match x
692 match ^
693 ---
694 hyphen=[ab-]
695 hyphen2=[ab-]
696 match -
697 match a
698 ---
699 rbracket=[][]
700 rbracket2=[][]
701 match [
702 match ]
703 ---
704 backslash=[xn\\]
705 backslash2=[xn\\]
706 backslash3=[xn\\]
707 match x
708 match n
709 match backslash
710 ## END
711