1 ## oils_failures_allowed: 0
2
3 #### s ~ regex and s !~ regex
4 shopt -s ysh:upgrade
5
6 var s = 'foo'
7 if (s ~ '.([[:alpha:]]+)') { # ERE syntax
8 echo matches
9 argv.py $[_group(0)] $[_group(1)]
10 }
11 if (s !~ '[[:digit:]]+') {
12 echo "does not match"
13 argv.py $[_group(0)] $[_group(1)]
14 }
15
16 if (s ~ '[[:digit:]]+') {
17 echo "matches"
18 }
19 # Should be cleared now
20 # should this be Undef rather than ''?
21 try {
22 var x = _group(0)
23 }
24 if (_status === 3) {
25 echo 'got expected status 3'
26 }
27
28 try {
29 var y = _group(1)
30 }
31 if (_status === 3) {
32 echo 'got expected status 3'
33 }
34
35 ## STDOUT:
36 matches
37 ['foo', 'oo']
38 does not match
39 ['foo', 'oo']
40 got expected status 3
41 got expected status 3
42 ## END
43
44 #### Invalid regex has libc error message
45
46 shopt -s ysh:upgrade
47
48 # Hm it's hard to test this, we can't get stderr of YSH from within YSH?
49 #fopen 2>err.txt {
50 # if ('abc' ~ '+') {
51 # echo 'bad'
52 # }
53 #}
54
55 if ('abc' ~ '+') {
56 echo 'bad'
57 }
58
59 ## status: 2
60 ## STDOUT:
61 ## END
62
63 #### Eggex flags to ignore case are respected
64 shopt -s ysh:upgrade
65
66 # based on Python's spelling
67 var pat = / 'abc' ; i /
68 var pat2 = / @pat 'def' ; reg_icase / # this is allowed
69
70 if ('-abcdef-' ~ pat2) {
71 echo 'yes'
72 }
73
74 if ('-ABCDEF-' ~ pat2) {
75 echo 'yes'
76 }
77
78 if ('ABCDE' ~ pat2) {
79 echo 'BUG'
80 }
81
82 ## STDOUT:
83 yes
84 yes
85 ## END
86
87 #### Eggex flags to treat newlines as special are respected
88 shopt -s ysh:upgrade
89
90 if (u'abc123\n' ~ / digit %end /) {
91 echo 'BUG'
92 }
93 if (u'abc\n123' ~ / %start digit /) {
94 echo 'BUG'
95 }
96
97 if (u'abc123\n' ~ / digit %end ; reg_newline /) {
98 echo 'yes'
99 }
100 if (u'abc\n123' ~ / %start digit ; reg_newline /) {
101 echo 'yes'
102 }
103
104 if (u'\n' ~ / . /) {
105 echo 'yes'
106 }
107 if (u'\n' ~ / !digit /) {
108 echo 'yes'
109 }
110
111 if (u'\n' ~ / . ; reg_newline /) {
112 echo 'BUG'
113 }
114 if (u'\n' ~ / !digit ; reg_newline /) {
115 echo 'BUG'
116 }
117
118 ## STDOUT:
119 yes
120 yes
121 yes
122 yes
123 ## END
124
125 #### Positional captures with _group
126 shopt -s ysh:all
127
128 var x = 'zz 2020-08-20'
129
130 if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
131 argv.py "${BASH_REMATCH[@]}"
132 }
133
134 # THIS IS A NO-OP. The variable is SHADOWED by the special name.
135 # I think that's OK.
136 setvar BASH_REMATCH = :| reset |
137
138 if (x ~ /<capture d+> '-' <capture d+>/) {
139 argv.py "${BASH_REMATCH[@]}"
140 argv.py $[_group(0)] $[_group(1)] $[_group(2)]
141
142 # TODO: Also test _start() and _end()
143 }
144 ## STDOUT:
145 ['2020-08', '2020', '08']
146 ['2020-08', '2020', '08']
147 ['2020-08', '2020', '08']
148 ## END
149
150 #### _group() returns null when group doesn't match
151 shopt -s ysh:upgrade
152
153 var pat = / <capture 'a'> | <capture 'b'> /
154 if ('b' ~ pat) {
155 echo "$[_group(1)] $[_group(2)]"
156 }
157 ## STDOUT:
158 null b
159 ## END
160
161 #### _start() and _end()
162 shopt -s ysh:upgrade
163
164 var s = 'foo123bar'
165 if (s ~ /digit+/) {
166 echo start=$[_start(0)] end=$[_end(0)]
167 }
168 echo ---
169
170 if (s ~ / <capture [a-z]+> <capture digit+> /) {
171 echo start=$[_start(1)] end=$[_end(1)]
172 echo start=$[_start(2)] end=$[_end(2)]
173 }
174 echo ---
175
176 if (s ~ / <capture [a-z]+> | <capture digit+> /) {
177 echo start=$[_start(1)] end=$[_end(1)]
178 echo start=$[_start(2)] end=$[_end(2)]
179 }
180
181 ## STDOUT:
182 start=3 end=6
183 ---
184 start=0 end=3
185 start=3 end=6
186 ---
187 start=0 end=3
188 start=-1 end=-1
189 ## END
190
191 #### Str->search() method returns value.Match object
192
193 var s = '= Hi5- Bye6-'
194
195 var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /)
196 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
197 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
198 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
199
200 echo ---
201
202 var pos = m => end(0) # search from end position
203 var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /, pos=pos)
204 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
205 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
206 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
207
208 ## STDOUT:
209 g0 2 6 Hi5-
210 g1 2 4 Hi
211 g2 4 5 5
212 ---
213 g0 7 12 Bye6-
214 g1 7 10 Bye
215 g2 10 11 6
216 ## END
217
218 #### Str->search() only matches %start ^ when pos == 0
219
220 shopt -s ysh:upgrade
221
222 var anchored = / %start <capture d+> '-' /
223 var free = / <capture d+> '-' /
224
225 var s = '12-34-'
226
227 for pat in ([anchored, free]) {
228 echo "pat=$pat"
229
230 var pos = 0
231 while (true) {
232 var m = s => search(pat, pos=pos)
233 if (not m) {
234 break
235 }
236 echo $[m => group(0)]
237 setvar pos = m => end(0)
238 }
239
240 }
241
242 ## STDOUT:
243 pat=^([[:digit:]]+)-
244 12-
245 pat=([[:digit:]]+)-
246 12-
247 34-
248 ## END
249
250
251 #### search() and leftMatch() accept ERE string
252
253 var s = '= hi5- bye6-'
254
255 var m = s => search('([[:alpha:]]+)([[:digit:]]+)-')
256 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
257 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
258 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
259 echo ---
260
261 var m = s[2:] => leftMatch('([[:alpha:]]+)([[:digit:]]+)-')
262 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
263 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
264 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
265
266 ## STDOUT:
267 g0 2 6 hi5-
268 g1 2 4 hi
269 g2 4 5 5
270 ---
271 g0 0 4 hi5-
272 g1 0 2 hi
273 g2 2 3 5
274 ## END
275
276 #### Str->leftMatch() can implement lexer pattern
277
278 shopt -s ysh:upgrade
279
280 var lexer = / <capture d+> | <capture [a-z]+> | <capture s+> /
281 #echo $lexer
282
283 proc show-tokens (s) {
284 var pos = 0
285
286 while (true) {
287 echo "pos=$pos"
288
289 var m = s->leftMatch(lexer, pos=pos)
290 if (not m) {
291 break
292 }
293 # TODO: add groups()
294 #var groups = [m => group(1), m => group(2), m => group(3)]
295 #json write --pretty=F (groups)
296 echo "$[m => group(1)]/$[m => group(2)]/$[m => group(3)]/"
297
298 echo
299
300 setvar pos = m => end(0)
301 }
302 }
303
304 show-tokens 'ab 12'
305
306 echo '==='
307
308 # There's a token here that doesn't leftMatch()
309 show-tokens 'ab+12'
310
311 ## STDOUT:
312 pos=0
313 null/ab/null/
314
315 pos=2
316 null/null/ /
317
318 pos=3
319 12/null/null/
320
321 pos=5
322 ===
323 pos=0
324 null/ab/null/
325
326 pos=2
327 ## END
328
329 #### Named captures with m => group()
330 shopt -s ysh:all
331
332 var s = 'zz 2020-08-20'
333 var pat = /<capture d+ as year> '-' <capture d+ as month>/
334
335 var m = s => search(pat)
336 argv.py $[m => group('year')] $[m => group('month')]
337 echo $[m => start('year')] $[m => end('year')]
338 echo $[m => start('month')] $[m => end('month')]
339
340 argv.py $[m => group('oops')]
341 echo 'error'
342
343 ## status: 3
344 ## STDOUT:
345 ['2020', '08']
346 3 7
347 8 10
348 ## END
349
350 #### Named captures with _group() _start() _end()
351 shopt -s ysh:all
352
353 var x = 'zz 2020-08-20'
354
355 if (x ~ /<capture d+ as year> '-' <capture d+ as month>/) {
356 argv.py $[_group('year')] $[_group('month')]
357 echo $[_start('year')] $[_end('year')]
358 echo $[_start('month')] $[_end('month')]
359 }
360
361 argv.py $[_group('oops')]
362
363 ## status: 3
364 ## STDOUT:
365 ['2020', '08']
366 3 7
367 8 10
368 ## END
369
370 #### Named Capture Decays Without Name
371 shopt -s ysh:all
372 var pat = /<capture d+ as month>/
373 echo $pat
374
375 if ('123' ~ pat) {
376 echo yes
377 }
378
379 ## STDOUT:
380 ([[:digit:]]+)
381 yes
382 ## END
383
384 #### Nested Named Capture Uses ( ordering
385
386 shopt -s ysh:upgrade
387
388 var Date = /<capture d+ as year> '-' <capture d+ as month>/
389 var Time = /<capture d+ as hour> ':' <capture d+ as minute> (':' <capture d+ as secs>)? /
390
391 var pat = / 'when: ' (<capture Date> | <capture Time as two>) /
392 #echo $pat
393
394 proc show-groups (; m) {
395 echo 0 $[m => group(0)]
396 echo 1 $[m => group(1)] # this is everything except when
397 echo 2 $[m => group(2)]
398 echo
399 echo $[m => group('two')]
400 echo $[m => group('year')] $[m => group('month')]
401 echo $[m => group('hour')] $[m => group('minute')] $[m => group('secs')]
402 }
403
404 var m = 'when: 2023-10' => leftMatch(pat)
405
406 show-groups (m)
407
408 var m = 'when: 23:30' => leftMatch(pat)
409
410 echo ---
411 show-groups (m)
412
413 var m = 'when: 23:30:59' => leftMatch(pat)
414
415 echo ---
416 show-groups (m)
417
418 ## STDOUT:
419 0 when: 2023-10
420 1 2023-10
421 2 2023-10
422
423 null
424 2023 10
425 null null null
426 ---
427 0 when: 23:30
428 1 23:30
429 2 null
430
431 23:30
432 null null
433 23 30 null
434 ---
435 0 when: 23:30:59
436 1 23:30:59
437 2 null
438
439 23:30:59
440 null null
441 23 30 59
442 ## END
443
444 #### Capture with Type Conversion Func
445 shopt -s ysh:upgrade
446
447 var s = 'hi 42-3.14'
448 var pat = / <capture d+: int> '-' <capture d+ '.' d+ : float> /
449
450 if (s ~ pat) {
451 var g1 = _group(1) # Int
452 var g2 = _group(2) # Float
453 echo $[type(g1)] $[type(g2)]
454 }
455
456 var m = s => search(pat)
457 if (m) {
458 echo $[m => group(1) => type()] $[m => group(2) => type()]
459 }
460
461 ## STDOUT:
462 Int Float
463 Int Float
464 ## END
465
466
467 #### Named Capture with Type Conversion Func
468 shopt -s ysh:upgrade
469
470 func floatNegate(x) {
471 return (-float(x))
472 }
473
474 var s = 'hi 42-3.14'
475 var pat = / <capture d+ as left: int> '-' <capture d+ '.' d+ as right: floatNegate> /
476
477 if (s ~ pat) {
478 var g1 = _group('left') # Int
479 var g2 = _group('right') # Float
480 echo $g2
481 echo $[type(g1)] $[type(g2)]
482 }
483
484 var m = s => search(pat)
485 if (m) {
486 echo $[m => group('right')]
487 echo $[m => group('left') => type()] $[m => group('right') => type()]
488 }
489
490 ## STDOUT:
491 -3.14
492 Int Float
493 -3.14
494 Int Float
495 ## END
496
497 #### Can't splice eggex with different flags
498 shopt -s ysh:upgrade
499
500 var pat = / 'abc' ; i /
501 var pat2 = / @pat 'def' ; reg_icase / # this is allowed
502
503 var pat3 = / @pat 'def' /
504 = pat3
505
506 ## status: 1
507 ## STDOUT:
508 ## END
509
510 #### Eggex with translation preference has arbitrary flags
511 shopt -s ysh:upgrade
512
513 # TODO: can provide introspection so users can translate it?
514 # This is kind of a speculative corner of the language.
515
516 var pat = / d+ ; ignorecase ; PCRE /
517
518 # This uses ERE, as a test
519 if ('ab 12' ~ pat) {
520 echo yes
521 }
522
523 ## STDOUT:
524 yes
525 ## END
526
527
528 #### Invalid sh operation on eggex
529 var pat = / d+ /
530 #pat[invalid]=1
531 pat[invalid]+=1
532 ## status: 1
533 ## stdout-json: ""
534
535 #### Long Python Example
536
537 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
538
539 # integer ::= decinteger | bininteger | octinteger | hexinteger
540 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
541 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
542 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
543 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
544 # nonzerodigit ::= "1"..."9"
545 # digit ::= "0"..."9"
546 # bindigit ::= "0" | "1"
547 # octdigit ::= "0"..."7"
548 # hexdigit ::= digit | "a"..."f" | "A"..."F"
549
550 shopt -s ysh:all
551
552 const DecDigit = / [0-9] /
553 const BinDigit = / [0-1] /
554 const OctDigit = / [0-7] /
555 const HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
556
557 const DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
558 const BinInt = / '0' [b B] ('_'? BinDigit)+ /
559 const OctInt = / '0' [o O] ('_'? OctDigit)+ /
560 const HexInt = / '0' [x X] ('_'? HexDigit)+ /
561
562 const Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
563
564 #echo $Integer
565
566 if ( '123' ~ Integer) { echo 'Y' }
567 if ( 'zzz' !~ Integer) { echo 'N' }
568
569 if ('123_000' ~ Integer) { echo 'Y decimal' }
570 if ('000_123' !~ Integer) { echo 'N decimal' }
571
572 if ( '0b100' ~ Integer) { echo 'Y binary' }
573 if ( '0b102' !~ Integer) { echo 'N binary' }
574
575 if ( '0o755' ~ Integer) { echo 'Y octal' }
576 if ( '0o778' !~ Integer) { echo 'N octal' }
577
578 if ( '0xFF' ~ Integer) { echo 'Y hex' }
579 if ( '0xFG' !~ Integer) { echo 'N hex' }
580
581 ## STDOUT:
582 Y
583 N
584 Y decimal
585 N decimal
586 Y binary
587 N binary
588 Y octal
589 N octal
590 Y hex
591 N hex
592 ## END
593
594 #### Regex in a loop (bug regression)
595
596 shopt --set ysh:all
597
598 var content = [ 1, 2 ]
599 var i = 0
600 while (i < len(content)) {
601 var line = content[i]
602 write $[content[i]]
603 if (str(line) ~ / s* 'imports' s* '=' s* .* /) {
604 exit
605 }
606 setvar i += 1
607 }
608
609 ## STDOUT:
610 1
611 2
612 ## END
613
614
615 #### Regex in a loop depending on var
616
617 shopt --set ysh:all
618
619 var lines = ['foo', 'bar']
620 for line in (lines) {
621 write "line $line"
622
623 # = / $line /
624
625 if ("x$line" ~ / dot @line /) {
626 #if (line ~ / $line /) {
627 write "matched $line"
628 }
629 }
630
631 ## STDOUT:
632 line foo
633 matched foo
634 line bar
635 matched bar
636 ## END
637
638
639 #### Regex with [ (bug regression)
640 shopt --set ysh:all
641
642 if ('[' ~ / '[' /) {
643 echo 'sq'
644 }
645
646 if ('[' ~ / [ '[' ] /) {
647 echo 'char class'
648 }
649
650 # User-reported string
651 if ("a" ~ / s* 'imports' s* '=' s* '[' /) {
652 echo "yes"
653 }
654
655 ## STDOUT:
656 sq
657 char class
658 ## END
659
660 #### Str => replace(Str, Str)
661 shopt --set ysh:all
662
663 var mystr = 'abca'
664 write $[mystr => replace('a', 'A')] # Two matches
665 write $[mystr => replace('b', 'B')] # One match
666 write $[mystr => replace('x', 'y')] # No matches
667
668 write $[mystr => replace('abc', '')] # Empty substitution
669 write $[mystr => replace('', 'new')] # Empty substring
670 ## STDOUT:
671 AbcA
672 aBca
673 abca
674 a
675 newanewbnewcnewanew
676 ## END
677
678 #### Str => replace(Eggex, Str)
679 shopt --set ysh:all
680
681 var mystr = 'mangled----kebab--case'
682 write $[mystr => replace(/ '-'+ /, '-')]
683
684 setvar mystr = 'smaller-to-bigger'
685 write $[mystr => replace(/ '-'+ /, '---')]
686 ## STDOUT:
687 mangled-kebab-case
688 smaller---to---bigger
689 ## END
690
691 #### Str => replace(Eggex, Expr)
692 shopt --set ysh:all
693
694 var mystr = 'name: Bob'
695 write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1")]
696 write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1 (extracted from '$0')")]
697 ## STDOUT:
698 Hello Bob
699 Hello Bob (extracted from 'name: Bob')
700 ## END
701
702 #### Str => replace(*, Expr), $0
703 shopt --set ysh:all
704
705 # Functionality
706 var mystr = 'class Foo: # this class is called Foo'
707 write $[mystr => replace("Foo", ^"$0Bar")]
708 write $[mystr => replace(/ 'Foo' /, ^"$0Bar")]
709
710 # Edge-cases
711 var dollar0 = "$0"
712 func f() { return ("$0") }
713 write $["foo" => replace("o", "$0") === "f$dollar0$dollar0"]
714 write $["foo" => replace("o", ^[f()]) === "f$dollar0$dollar0"]
715 write $[f() === "$dollar0"]
716 ## STDOUT:
717 class FooBar: # this class is called FooBar
718 class FooBar: # this class is called FooBar
719 true
720 true
721 true
722 ## END
723
724 #### Str => replace(Eggex, Expr), scopes
725 shopt --set ysh:all
726
727 var mystr = '123'
728
729 var anotherVar = 'surprise!'
730 write $[mystr => replace(/ <capture d+> /, ^"Hello $1 ($anotherVar)")]
731
732 var globalName = '456'
733 write $[mystr => replace(/ <capture d+ as globalName> /, ^"Hello $globalName")]
734
735 write $[mystr => replace(/ <capture d+ as localName> /, ^"Hello $localName, $globalName")]
736 ## STDOUT:
737 Hello 123 (surprise!)
738 Hello 123
739 Hello 123, 456
740 ## END
741
742 #### Str => replace(Eggex, *, count)
743 shopt --set ysh:all
744
745 var mystr = '1abc2abc3abc'
746
747 for count in (-2..4) {
748 write $[mystr => replace('abc', "-", count=count)]
749 write $[mystr => replace('abc', ^"-", count=count)]
750 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
751 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
752 }
753 ## STDOUT:
754 1-2-3-
755 1-2-3-
756 1-2-3-
757 1-2-3-
758 1-2-3-
759 1-2-3-
760 1-2-3-
761 1-2-3-
762 1abc2abc3abc
763 1abc2abc3abc
764 1abc2abc3abc
765 1abc2abc3abc
766 1-2abc3abc
767 1-2abc3abc
768 1-2abc3abc
769 1-2abc3abc
770 1-2-3abc
771 1-2-3abc
772 1-2-3abc
773 1-2-3abc
774 1-2-3-
775 1-2-3-
776 1-2-3-
777 1-2-3-
778 ## END
779
780 #### Str => replace(Str, Str), empty new/old strings
781 var mystr = 'abca'
782 write $[mystr => replace('abc', '')] # Empty substitution
783 write $[mystr => replace('', 'new')] # Empty substring
784 write $[mystr => replace('', 'new', count=1)] # Empty substring, count != -1
785 write $[mystr => replace('', 'new', count=10)] # Empty substring, count too large
786 ## STDOUT:
787 a
788 newanewbnewcnewanew
789 newabca
790 newanewbnewcnewanew
791 ## END
792
793 #### Str => replace(Eggex, Lazy), convert_func
794 shopt --set ysh:all
795
796 var mystr = '123'
797
798 write $[mystr => replace(/ <capture d+ as n : int> /, ^"$[n + 1]")]
799
800 # values automatically get stringified
801 write $[mystr => replace(/ <capture d+ as n : int> /, ^"$1")]
802
803 func not_str(inp) {
804 return ({ "value": inp })
805 }
806
807 # should fail to stringify $1
808 try { call mystr => replace(/ <capture d+ : not_str> /, ^"$1") }
809 write status=$_status
810 ## STDOUT:
811 124
812 123
813 status=3
814 ## END
815
816 #### Str => replace(Eggex, *), eflags
817 shopt --set ysh:all
818
819 var mystr = $'1-2-3\n4-5'
820 write $[mystr => replace(/ d+ /, ^"[$0]")]
821 write $[mystr => replace(/ ^ d+ /, ^"[$0]")]
822 write $[mystr => replace(/ ^ d+ ; reg_newline /, ^"[$0]")]
823 ## STDOUT:
824 [1]-[2]-[3]
825 [4]-[5]
826 [1]-2-3
827 4-5
828 [1]-2-3
829 [4]-5
830 ## END