1 #!/bin/bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 ### Match is unanchored at both ends
38 [[ 'bar' =~ a ]] && echo true
39 # stdout: true
40
41 ### Failed match
42 [[ 'bar' =~ X ]] && echo true
43 # status: 1
44 # stdout-json: ""
45
46 ### Regex quoted with \ -- preferred in bash
47 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
48 # stdout: true
49
50 ### Regex quoted with single quotes
51 # bash doesn't like the quotes
52 [[ 'a b' =~ '^(a b)$' ]] && echo true
53 # stdout: true
54 # status: 0
55 # OK bash stdout-json: ""
56 # OK bash status: 1
57
58 ### Regex quoted with double quotes
59 # bash doesn't like the quotes
60 [[ 'a b' =~ "^(a b)$" ]] && echo true
61 # stdout: true
62 # status: 0
63 # OK bash stdout-json: ""
64 # OK bash status: 1
65
66 ### Fix single quotes by storing in variable
67 pat='^(a b)$'
68 [[ 'a b' =~ $pat ]] && echo true
69 # stdout: true
70
71 ### Fix single quotes by storing in variable
72 pat="^(a b)$"
73 [[ 'a b' =~ $pat ]] && echo true
74 # stdout: true
75
76 ### Double quoting pat variable -- again bash doesn't like it.
77 pat="^(a b)$"
78 [[ 'a b' =~ "$pat" ]] && echo true
79 # stdout: true
80 # status: 0
81 # OK bash stdout-json: ""
82 # OK bash status: 1
83
84 ### Regex with == and not =~ is parse error, different lexer mode required
85 # They both give a syntax error. This is lame.
86 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
87 # status: 2
88 # OK zsh status: 1
89
90 ### Omitting ( )
91 [[ '^a b$' == ^a\ b$ ]] && echo true
92 # stdout: true
93
94 ### Malformed regex
95 # Are they trying to PARSE the regex? Do they feed the buffer directly to
96 # regcomp()?
97 [[ 'a b' =~ ^)a\ b($ ]] && echo true
98 # status: 2
99 # OK zsh status: 1
100
101 ### Regex with char class
102 # For some reason it doesn't work without parens?
103 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
104 # stdout: true
105
106 ### Operators lose meaning in () in regex state (BASH_REGEX_CAHRS)
107 [[ '< >' =~ (< >) ]] && echo true
108 # stdout: true
109 # N-I zsh stdout-json: ""
110 # N-I zsh status: 1
111
112 ### Regex with |
113 [[ 'bar' =~ foo|bar ]] && echo true
114 # stdout: true
115 # N-I zsh stdout-json: ""
116 # N-I zsh status: 1