1 #
2 # Only bash and zsh seem to implement [[ foo =~ '' ]]
3 #
4 # ^(a b)$ is a regex that should match 'a b' in a group.
5 #
6 # Not sure what bash is doing here... I think I have to just be empirical.
7 # Might need "compat" switch for parsing the regex. It should be an opaque
8 # string like zsh, not sure why it isn't.
9 #
10 # I think this is just papering over bugs...
11 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
12 #
13 # Storing the regular expression in a shell variable is often a useful way to
14 # avoid problems with quoting characters that are special to the shell. It is
15 # sometimes difficult to specify a regular expression literally without using
16 # quotes, or to keep track of the quoting used by regular expressions while
17 # paying attention to the shell’s quote removal. Using a shell variable to
18 # store the pattern decreases these problems. For example, the following is
19 # equivalent to the above:
20 #
21 # pattern='[[:space:]]*(a)?b'
22 # [[ $line =~ $pattern ]]
23 #
24 # If you want to match a character that’s special to the regular expression
25 # grammar, it has to be quoted to remove its special meaning. This means that in
26 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
27 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
28 # literal ‘.’. Shell programmers should take special care with backslashes, since
29 # backslashes are used both by the shell and regular expressions to remove the
30 # special meaning from the following character. The following two sets of
31 # commands are not equivalent:
32 #
33 # From bash code: ( | ) are treated special. Normally they must be quoted, but
34 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
35
36 #### BASH_REMATCH
37 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
38 echo status=$?
39 argv.py "${BASH_REMATCH[@]}"
40
41 [[ failed =~ ([a-z]+)([0-9]+) ]]
42 echo status=$?
43 argv.py "${BASH_REMATCH[@]}" # not cleared!
44
45 ## STDOUT:
46 status=0
47 ['foo123', 'foo', '123']
48 status=1
49 []
50 ## END
51 ## N-I zsh STDOUT:
52 status=0
53 ['']
54 status=1
55 ['']
56 ## END
57
58 #### Match is unanchored at both ends
59 [[ 'bar' =~ a ]] && echo true
60 ## stdout: true
61
62 #### Failed match
63 [[ 'bar' =~ X ]] && echo true
64 ## status: 1
65 ## stdout-json: ""
66
67 #### Regex quoted with \ -- preferred in bash
68 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
69 ## stdout: true
70
71 #### Regex quoted with single quotes
72 # bash doesn't like the quotes
73 [[ 'a b' =~ '^(a b)$' ]] && echo true
74 ## stdout-json: ""
75 ## status: 1
76 ## OK zsh stdout: true
77 ## OK zsh status: 0
78
79 #### Regex quoted with double quotes
80 # bash doesn't like the quotes
81 [[ 'a b' =~ "^(a b)$" ]] && echo true
82 ## stdout-json: ""
83 ## status: 1
84 ## OK zsh stdout: true
85 ## OK zsh status: 0
86
87 #### Fix single quotes by storing in variable
88 pat='^(a b)$'
89 [[ 'a b' =~ $pat ]] && echo true
90 ## stdout: true
91
92 #### Fix single quotes by storing in variable
93 pat="^(a b)$"
94 [[ 'a b' =~ $pat ]] && echo true
95 ## stdout: true
96
97 #### Double quoting pat variable -- again bash doesn't like it.
98 pat="^(a b)$"
99 [[ 'a b' =~ "$pat" ]] && echo true
100 ## stdout-json: ""
101 ## status: 1
102 ## OK zsh stdout: true
103 ## OK zsh status: 0
104
105 #### Mixing quoted and unquoted parts
106 [[ 'a b' =~ 'a 'b ]] && echo true
107 [[ "a b" =~ "a "'b' ]] && echo true
108 ## STDOUT:
109 true
110 true
111 ## END
112
113 #### Regex with == and not =~ is parse error, different lexer mode required
114 # They both give a syntax error. This is lame.
115 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
116 ## status: 2
117 ## OK zsh status: 1
118
119 #### Omitting ( )
120 [[ '^a b$' == ^a\ b$ ]] && echo true
121 ## stdout: true
122
123 #### Malformed regex
124 # Are they trying to PARSE the regex? Do they feed the buffer directly to
125 # regcomp()?
126 [[ 'a b' =~ ^)a\ b($ ]] && echo true
127 ## stdout-json: ""
128 ## status: 2
129 ## OK zsh status: 1
130
131 #### Regex with char class containing space
132 # For some reason it doesn't work without parens?
133 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
134 ## stdout: true
135
136 #### Operators and space lose meaning inside ()
137 [[ '< >' =~ (< >) ]] && echo true
138 ## stdout: true
139 ## N-I zsh stdout-json: ""
140 ## N-I zsh status: 1
141
142 #### Regex with |
143 [[ 'bar' =~ foo|bar ]] && echo true
144 ## stdout: true
145 ## N-I zsh stdout-json: ""
146 ## N-I zsh status: 1
147
148 #### Regex to match literal brackets []
149
150 # bash-completion relies on this, so we're making it match bash.
151 # zsh understandably differs.
152 [[ '[]' =~ \[\] ]] && echo true
153
154 # Another way to write this.
155 pat='\[\]'
156 [[ '[]' =~ $pat ]] && echo true
157 ## STDOUT:
158 true
159 true
160 ## END
161 ## OK zsh STDOUT:
162 true
163 ## END
164
165 #### Regex to match literals . ^ $ etc.
166 [[ 'x' =~ \. ]] || echo false
167 [[ '.' =~ \. ]] && echo true
168
169 [[ 'xx' =~ \^\$ ]] || echo false
170 [[ '^$' =~ \^\$ ]] && echo true
171
172 [[ 'xxx' =~ \+\*\? ]] || echo false
173 [[ '*+?' =~ \*\+\? ]] && echo true
174
175 [[ 'xx' =~ \{\} ]] || echo false
176 [[ '{}' =~ \{\} ]] && echo true
177 ## STDOUT:
178 false
179 true
180 false
181 true
182 false
183 true
184 false
185 true
186 ## END
187 ## BUG zsh STDOUT:
188 true
189 false
190 false
191 false
192 ## END
193 ## BUG zsh status: 1
194
195 #### Unquoted { is a regex parse error
196 [[ { =~ { ]] && echo true
197 echo status=$?
198 ## stdout-json: ""
199 ## status: 2
200 ## BUG bash stdout-json: "status=2\n"
201 ## BUG bash status: 0
202 ## BUG zsh stdout-json: "status=1\n"
203 ## BUG zsh status: 0
204
205 #### Fatal error inside [[ =~ ]]
206
207 # zsh and osh are stricter than bash. bash treats [[ like a command.
208
209 [[ a =~ $(( 1 / 0 )) ]]
210 echo status=$?
211 ## stdout-json: ""
212 ## status: 1
213 ## BUG bash stdout: status=1
214 ## BUG bash status: 0
215
216 #### Quoted { and +
217 [[ { =~ "{" ]] && echo 'yes {'
218 [[ + =~ "+" ]] && echo 'yes +'
219 [[ * =~ "*" ]] && echo 'yes *'
220 [[ ? =~ "?" ]] && echo 'yes ?'
221 [[ ^ =~ "^" ]] && echo 'yes ^'
222 [[ $ =~ "$" ]] && echo 'yes $'
223 [[ '(' =~ '(' ]] && echo 'yes ('
224 [[ ')' =~ ')' ]] && echo 'yes )'
225 [[ '|' =~ '|' ]] && echo 'yes |'
226 [[ '\' =~ '\' ]] && echo 'yes \'
227 echo ---
228
229 [[ . =~ "." ]] && echo 'yes .'
230 [[ z =~ "." ]] || echo 'no .'
231 echo ---
232
233 # This rule is weird but all shells agree. I would expect that the - gets
234 # escaped? It's an operator? but it behaves like a-z.
235 [[ a =~ ["a-z"] ]]; echo "a $?"
236 [[ - =~ ["a-z"] ]]; echo "- $?"
237 [[ b =~ ['a-z'] ]]; echo "b $?"
238 [[ z =~ ['a-z'] ]]; echo "z $?"
239
240 echo status=$?
241 ## STDOUT:
242 yes {
243 yes +
244 yes *
245 yes ?
246 yes ^
247 yes $
248 yes (
249 yes )
250 yes |
251 yes \
252 ---
253 yes .
254 no .
255 ---
256 a 0
257 - 1
258 b 0
259 z 0
260 status=0
261 ## END
262 ## N-I zsh STDOUT:
263 yes ^
264 yes $
265 yes )
266 yes |
267 ---
268 yes .
269 ---
270 a 0
271 - 1
272 b 0
273 z 0
274 status=0
275 ## END
276
277 #### Escaped {
278 # from bash-completion
279 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
280 ## STDOUT:
281 ['$PA', '$', 'PA']
282 ## END
283 ## BUG zsh stdout-json: ""
284 ## BUG zsh status: 1
285
286 #### Escaped { stored in variable first
287 # from bash-completion
288 pat='^(\$\{?)([A-Za-z0-9_]*)$'
289 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
290 ## STDOUT:
291 ['$PA', '$', 'PA']
292 ## END
293 ## BUG zsh STDOUT:
294 ['']
295 ## END
296
297 #### regex with ?
298 [[ 'c' =~ c? ]] && echo true
299 [[ '' =~ c? ]] && echo true
300 ## STDOUT:
301 true
302 true
303 ## END
304
305 #### regex with unprintable characters
306 # can't have nul byte
307
308 # This pattern has literal characters
309 pat=$'^[\x01\x02]+$'
310
311 [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
312 [[ $'a\x01' =~ $pat ]]; echo status=$?
313
314 # NOTE: There doesn't appear to be any way to escape these!
315 pat2='^[\x01\x02]+$'
316
317 ## STDOUT:
318 status=0
319 status=1
320 ## END
321
322 #### pattern $f(x) -- regression
323 f=fff
324 [[ fffx =~ $f(x) ]]
325 echo status=$?
326 [[ ffx =~ $f(x) ]]
327 echo status=$?
328 ## STDOUT:
329 status=0
330 status=1
331 ## END
332
333 #### pattern a=(1)
334 [[ a=x =~ a=(x) ]]
335 echo status=$?
336 [[ =x =~ a=(x) ]]
337 echo status=$?
338 ## STDOUT:
339 status=0
340 status=1
341 ## END
342 ## BUG zsh status: 1
343 ## BUG zsh STDOUT:
344 status=0
345 ## END
346
347 #### pattern @f(x)
348 shopt -s parse_at
349 [[ @fx =~ @f(x) ]]
350 echo status=$?
351 [[ fx =~ @f(x) ]]
352 echo status=$?
353 ## STDOUT:
354 status=0
355 status=1
356 ## END