1 |
# |
2 |
# Only bash and zsh seem to implement [[ foo =~ '' ]] |
3 |
# |
4 |
# ^(a b)$ is a regex that should match 'a b' in a group. |
5 |
# |
6 |
# Not sure what bash is doing here... I think I have to just be empirical. |
7 |
# Might need "compat" switch for parsing the regex. It should be an opaque |
8 |
# string like zsh, not sure why it isn't. |
9 |
# |
10 |
# I think this is just papering over bugs... |
11 |
# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
12 |
# |
13 |
# Storing the regular expression in a shell variable is often a useful way to |
14 |
# avoid problems with quoting characters that are special to the shell. It is |
15 |
# sometimes difficult to specify a regular expression literally without using |
16 |
# quotes, or to keep track of the quoting used by regular expressions while |
17 |
# paying attention to the shell’s quote removal. Using a shell variable to |
18 |
# store the pattern decreases these problems. For example, the following is |
19 |
# equivalent to the above: |
20 |
# |
21 |
# pattern='[[:space:]]*(a)?b' |
22 |
# [[ $line =~ $pattern ]] |
23 |
# |
24 |
# If you want to match a character that’s special to the regular expression |
25 |
# grammar, it has to be quoted to remove its special meaning. This means that in |
26 |
# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
27 |
# regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
28 |
# literal ‘.’. Shell programmers should take special care with backslashes, since |
29 |
# backslashes are used both by the shell and regular expressions to remove the |
30 |
# special meaning from the following character. The following two sets of |
31 |
# commands are not equivalent: |
32 |
# |
33 |
# From bash code: ( | ) are treated special. Normally they must be quoted, but |
34 |
# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
35 |
|
36 |
#### BASH_REMATCH |
37 |
[[ foo123 =~ ([a-z]+)([0-9]+) ]] |
38 |
argv.py "${BASH_REMATCH[@]}" |
39 |
## STDOUT: |
40 |
['foo123', 'foo', '123'] |
41 |
## END |
42 |
## N-I zsh STDOUT: |
43 |
[''] |
44 |
## END |
45 |
|
46 |
#### Match is unanchored at both ends |
47 |
[[ 'bar' =~ a ]] && echo true |
48 |
## stdout: true |
49 |
|
50 |
#### Failed match |
51 |
[[ 'bar' =~ X ]] && echo true |
52 |
## status: 1 |
53 |
## stdout-json: "" |
54 |
|
55 |
#### Regex quoted with \ -- preferred in bash |
56 |
[[ 'a b' =~ ^(a\ b)$ ]] && echo true |
57 |
## stdout: true |
58 |
|
59 |
#### Regex quoted with single quotes |
60 |
# bash doesn't like the quotes |
61 |
[[ 'a b' =~ '^(a b)$' ]] && echo true |
62 |
## stdout-json: "" |
63 |
## status: 1 |
64 |
## OK zsh stdout: true |
65 |
## OK zsh status: 0 |
66 |
|
67 |
#### Regex quoted with double quotes |
68 |
# bash doesn't like the quotes |
69 |
[[ 'a b' =~ "^(a b)$" ]] && echo true |
70 |
## stdout-json: "" |
71 |
## status: 1 |
72 |
## OK zsh stdout: true |
73 |
## OK zsh status: 0 |
74 |
|
75 |
#### Fix single quotes by storing in variable |
76 |
pat='^(a b)$' |
77 |
[[ 'a b' =~ $pat ]] && echo true |
78 |
## stdout: true |
79 |
|
80 |
#### Fix single quotes by storing in variable |
81 |
pat="^(a b)$" |
82 |
[[ 'a b' =~ $pat ]] && echo true |
83 |
## stdout: true |
84 |
|
85 |
#### Double quoting pat variable -- again bash doesn't like it. |
86 |
pat="^(a b)$" |
87 |
[[ 'a b' =~ "$pat" ]] && echo true |
88 |
## stdout-json: "" |
89 |
## status: 1 |
90 |
## OK zsh stdout: true |
91 |
## OK zsh status: 0 |
92 |
|
93 |
#### Mixing quoted and unquoted parts |
94 |
[[ 'a b' =~ 'a 'b ]] && echo true |
95 |
[[ "a b" =~ "a "'b' ]] && echo true |
96 |
## STDOUT: |
97 |
true |
98 |
true |
99 |
## END |
100 |
|
101 |
#### Regex with == and not =~ is parse error, different lexer mode required |
102 |
# They both give a syntax error. This is lame. |
103 |
[[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
104 |
## status: 2 |
105 |
## OK zsh status: 1 |
106 |
|
107 |
#### Omitting ( ) |
108 |
[[ '^a b$' == ^a\ b$ ]] && echo true |
109 |
## stdout: true |
110 |
|
111 |
#### Malformed regex |
112 |
# Are they trying to PARSE the regex? Do they feed the buffer directly to |
113 |
# regcomp()? |
114 |
[[ 'a b' =~ ^)a\ b($ ]] && echo true |
115 |
## stdout-json: "" |
116 |
## status: 2 |
117 |
## OK zsh status: 1 |
118 |
|
119 |
#### Regex with char class containing space |
120 |
# For some reason it doesn't work without parens? |
121 |
[[ 'ba ba ' =~ ([a b]+) ]] && echo true |
122 |
## stdout: true |
123 |
|
124 |
#### Operators and space lose meaning inside () |
125 |
[[ '< >' =~ (< >) ]] && echo true |
126 |
## stdout: true |
127 |
## N-I zsh stdout-json: "" |
128 |
## N-I zsh status: 1 |
129 |
|
130 |
#### Regex with | |
131 |
[[ 'bar' =~ foo|bar ]] && echo true |
132 |
## stdout: true |
133 |
## N-I zsh stdout-json: "" |
134 |
## N-I zsh status: 1 |
135 |
|
136 |
#### Regex to match literal brackets [] |
137 |
|
138 |
# bash-completion relies on this, so we're making it match bash. |
139 |
# zsh understandably differs. |
140 |
[[ '[]' =~ \[\] ]] && echo true |
141 |
|
142 |
# Another way to write this. |
143 |
pat='\[\]' |
144 |
[[ '[]' =~ $pat ]] && echo true |
145 |
## STDOUT: |
146 |
true |
147 |
true |
148 |
## END |
149 |
## OK zsh STDOUT: |
150 |
true |
151 |
## END |
152 |
|
153 |
#### Regex to match literals . ^ $ etc. |
154 |
[[ 'x' =~ \. ]] || echo false |
155 |
[[ '.' =~ \. ]] && echo true |
156 |
|
157 |
[[ 'xx' =~ \^\$ ]] || echo false |
158 |
[[ '^$' =~ \^\$ ]] && echo true |
159 |
|
160 |
[[ 'xxx' =~ \+\*\? ]] || echo false |
161 |
[[ '*+?' =~ \*\+\? ]] && echo true |
162 |
|
163 |
[[ 'xx' =~ \{\} ]] || echo false |
164 |
[[ '{}' =~ \{\} ]] && echo true |
165 |
## STDOUT: |
166 |
false |
167 |
true |
168 |
false |
169 |
true |
170 |
false |
171 |
true |
172 |
false |
173 |
true |
174 |
## END |
175 |
## BUG zsh STDOUT: |
176 |
true |
177 |
false |
178 |
false |
179 |
false |
180 |
## END |
181 |
## BUG zsh status: 1 |
182 |
|
183 |
#### Unquoted { is a regex parse error |
184 |
[[ { =~ { ]] && echo true |
185 |
echo status=$? |
186 |
## stdout-json: "" |
187 |
## status: 2 |
188 |
## BUG bash stdout-json: "status=2\n" |
189 |
## BUG bash status: 0 |
190 |
## BUG zsh stdout-json: "status=1\n" |
191 |
## BUG zsh status: 0 |
192 |
|
193 |
#### Fatal error inside [[ =~ ]] |
194 |
|
195 |
# zsh and osh are stricter than bash. bash treats [[ like a command. |
196 |
|
197 |
[[ a =~ $(( 1 / 0 )) ]] |
198 |
echo status=$? |
199 |
## stdout-json: "" |
200 |
## status: 1 |
201 |
## BUG bash stdout: status=1 |
202 |
## BUG bash status: 0 |
203 |
|
204 |
#### Quoted { and + |
205 |
[[ { =~ "{" ]] && echo 'yes {' |
206 |
[[ + =~ "+" ]] && echo 'yes +' |
207 |
[[ * =~ "*" ]] && echo 'yes *' |
208 |
[[ ? =~ "?" ]] && echo 'yes ?' |
209 |
[[ ^ =~ "^" ]] && echo 'yes ^' |
210 |
[[ $ =~ "$" ]] && echo 'yes $' |
211 |
[[ '(' =~ '(' ]] && echo 'yes (' |
212 |
[[ ')' =~ ')' ]] && echo 'yes )' |
213 |
[[ '|' =~ '|' ]] && echo 'yes |' |
214 |
[[ '\' =~ '\' ]] && echo 'yes \' |
215 |
echo --- |
216 |
|
217 |
[[ . =~ "." ]] && echo 'yes .' |
218 |
[[ z =~ "." ]] || echo 'no .' |
219 |
echo --- |
220 |
|
221 |
# This rule is weird but all shells agree. I would expect that the - gets |
222 |
# escaped? It's an operator? but it behaves like a-z. |
223 |
[[ a =~ ["a-z"] ]]; echo "a $?" |
224 |
[[ - =~ ["a-z"] ]]; echo "- $?" |
225 |
[[ b =~ ['a-z'] ]]; echo "b $?" |
226 |
[[ z =~ ['a-z'] ]]; echo "z $?" |
227 |
|
228 |
echo status=$? |
229 |
## STDOUT: |
230 |
yes { |
231 |
yes + |
232 |
yes * |
233 |
yes ? |
234 |
yes ^ |
235 |
yes $ |
236 |
yes ( |
237 |
yes ) |
238 |
yes | |
239 |
yes \ |
240 |
--- |
241 |
yes . |
242 |
no . |
243 |
--- |
244 |
a 0 |
245 |
- 1 |
246 |
b 0 |
247 |
z 0 |
248 |
status=0 |
249 |
## END |
250 |
## N-I zsh STDOUT: |
251 |
yes ^ |
252 |
yes $ |
253 |
yes ) |
254 |
yes | |
255 |
--- |
256 |
yes . |
257 |
--- |
258 |
a 0 |
259 |
- 1 |
260 |
b 0 |
261 |
z 0 |
262 |
status=0 |
263 |
## END |
264 |
|
265 |
#### Escaped { |
266 |
# from bash-completion |
267 |
[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}" |
268 |
## STDOUT: |
269 |
['$PA', '$', 'PA'] |
270 |
## END |
271 |
## BUG zsh stdout-json: "" |
272 |
## BUG zsh status: 1 |
273 |
|
274 |
#### Escaped { stored in variable first |
275 |
# from bash-completion |
276 |
pat='^(\$\{?)([A-Za-z0-9_]*)$' |
277 |
[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}" |
278 |
## STDOUT: |
279 |
['$PA', '$', 'PA'] |
280 |
## END |
281 |
## BUG zsh STDOUT: |
282 |
[''] |
283 |
## END |
284 |
|
285 |
#### regex with ? |
286 |
[[ 'c' =~ c? ]] && echo true |
287 |
[[ '' =~ c? ]] && echo true |
288 |
## STDOUT: |
289 |
true |
290 |
true |
291 |
## END |
292 |
|
293 |
#### regex with unprintable characters |
294 |
# can't have nul byte |
295 |
|
296 |
# This pattern has literal characters |
297 |
pat=$'^[\x01\x02]+$' |
298 |
|
299 |
[[ $'\x01\x02\x01' =~ $pat ]]; echo status=$? |
300 |
[[ $'a\x01' =~ $pat ]]; echo status=$? |
301 |
|
302 |
# NOTE: There doesn't appear to be any way to escape these! |
303 |
pat2='^[\x01\x02]+$' |
304 |
|
305 |
## STDOUT: |
306 |
status=0 |
307 |
status=1 |
308 |
## END |
309 |
|
310 |
#### pattern $f(x) -- regression |
311 |
f=fff |
312 |
[[ fffx =~ $f(x) ]] |
313 |
echo status=$? |
314 |
[[ ffx =~ $f(x) ]] |
315 |
echo status=$? |
316 |
## STDOUT: |
317 |
status=0 |
318 |
status=1 |
319 |
## END |
320 |
|
321 |
#### pattern a=(1) |
322 |
[[ a=x =~ a=(x) ]] |
323 |
echo status=$? |
324 |
[[ =x =~ a=(x) ]] |
325 |
echo status=$? |
326 |
## STDOUT: |
327 |
status=0 |
328 |
status=1 |
329 |
## END |
330 |
## BUG zsh status: 1 |
331 |
## BUG zsh STDOUT: |
332 |
status=0 |
333 |
## END |
334 |
|
335 |
#### pattern @f(x) |
336 |
shopt -s parse_at |
337 |
[[ @fx =~ @f(x) ]] |
338 |
echo status=$? |
339 |
[[ fx =~ @f(x) ]] |
340 |
echo status=$? |
341 |
## STDOUT: |
342 |
status=0 |
343 |
status=1 |
344 |
## END |