1 |
#!/usr/bin/env bash |
2 |
# |
3 |
# Only bash and zsh seem to implement [[ foo =~ '' ]] |
4 |
# |
5 |
# ^(a b)$ is a regex that should match 'a b' in a group. |
6 |
# |
7 |
# Not sure what bash is doing here... I think I have to just be empirical. |
8 |
# Might need "compat" switch for parsing the regex. It should be an opaque |
9 |
# string like zsh, not sure why it isn't. |
10 |
# |
11 |
# I think this is just papering over bugs... |
12 |
# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
13 |
# |
14 |
# Storing the regular expression in a shell variable is often a useful way to |
15 |
# avoid problems with quoting characters that are special to the shell. It is |
16 |
# sometimes difficult to specify a regular expression literally without using |
17 |
# quotes, or to keep track of the quoting used by regular expressions while |
18 |
# paying attention to the shell’s quote removal. Using a shell variable to |
19 |
# store the pattern decreases these problems. For example, the following is |
20 |
# equivalent to the above: |
21 |
# |
22 |
# pattern='[[:space:]]*(a)?b' |
23 |
# [[ $line =~ $pattern ]] |
24 |
# |
25 |
# If you want to match a character that’s special to the regular expression |
26 |
# grammar, it has to be quoted to remove its special meaning. This means that in |
27 |
# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
28 |
# regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
29 |
# literal ‘.’. Shell programmers should take special care with backslashes, since |
30 |
# backslashes are used both by the shell and regular expressions to remove the |
31 |
# special meaning from the following character. The following two sets of |
32 |
# commands are not equivalent: |
33 |
# |
34 |
# From bash code: ( | ) are treated special. Normally they must be quoted, but |
35 |
# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
36 |
|
37 |
#### BASH_REMATCH |
38 |
[[ foo123 =~ ([a-z]+)([0-9]+) ]] |
39 |
argv.py "${BASH_REMATCH[@]}" |
40 |
## STDOUT: |
41 |
['foo123', 'foo', '123'] |
42 |
## END |
43 |
## N-I zsh STDOUT: |
44 |
[''] |
45 |
## END |
46 |
|
47 |
#### Match is unanchored at both ends |
48 |
[[ 'bar' =~ a ]] && echo true |
49 |
## stdout: true |
50 |
|
51 |
#### Failed match |
52 |
[[ 'bar' =~ X ]] && echo true |
53 |
## status: 1 |
54 |
## stdout-json: "" |
55 |
|
56 |
#### Regex quoted with \ -- preferred in bash |
57 |
[[ 'a b' =~ ^(a\ b)$ ]] && echo true |
58 |
## stdout: true |
59 |
|
60 |
#### Regex quoted with single quotes |
61 |
# bash doesn't like the quotes |
62 |
[[ 'a b' =~ '^(a b)$' ]] && echo true |
63 |
## stdout-json: "" |
64 |
## status: 1 |
65 |
## OK zsh stdout: true |
66 |
## OK zsh status: 0 |
67 |
|
68 |
#### Regex quoted with double quotes |
69 |
# bash doesn't like the quotes |
70 |
[[ 'a b' =~ "^(a b)$" ]] && echo true |
71 |
## stdout-json: "" |
72 |
## status: 1 |
73 |
## OK zsh stdout: true |
74 |
## OK zsh status: 0 |
75 |
|
76 |
#### Fix single quotes by storing in variable |
77 |
pat='^(a b)$' |
78 |
[[ 'a b' =~ $pat ]] && echo true |
79 |
## stdout: true |
80 |
|
81 |
#### Fix single quotes by storing in variable |
82 |
pat="^(a b)$" |
83 |
[[ 'a b' =~ $pat ]] && echo true |
84 |
## stdout: true |
85 |
|
86 |
#### Double quoting pat variable -- again bash doesn't like it. |
87 |
pat="^(a b)$" |
88 |
[[ 'a b' =~ "$pat" ]] && echo true |
89 |
## stdout-json: "" |
90 |
## status: 1 |
91 |
## OK zsh stdout: true |
92 |
## OK zsh status: 0 |
93 |
|
94 |
#### Mixing quoted and unquoted parts |
95 |
[[ 'a b' =~ 'a 'b ]] && echo true |
96 |
[[ "a b" =~ "a "'b' ]] && echo true |
97 |
## STDOUT: |
98 |
true |
99 |
true |
100 |
## END |
101 |
|
102 |
#### Regex with == and not =~ is parse error, different lexer mode required |
103 |
# They both give a syntax error. This is lame. |
104 |
[[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
105 |
## status: 2 |
106 |
## OK zsh status: 1 |
107 |
|
108 |
#### Omitting ( ) |
109 |
[[ '^a b$' == ^a\ b$ ]] && echo true |
110 |
## stdout: true |
111 |
|
112 |
#### Malformed regex |
113 |
# Are they trying to PARSE the regex? Do they feed the buffer directly to |
114 |
# regcomp()? |
115 |
[[ 'a b' =~ ^)a\ b($ ]] && echo true |
116 |
## status: 2 |
117 |
## OK zsh status: 1 |
118 |
|
119 |
#### Regex with char class containing space |
120 |
# For some reason it doesn't work without parens? |
121 |
[[ 'ba ba ' =~ ([a b]+) ]] && echo true |
122 |
## stdout: true |
123 |
|
124 |
#### Operators and space lose meaning inside () |
125 |
[[ '< >' =~ (< >) ]] && echo true |
126 |
## stdout: true |
127 |
## N-I zsh stdout-json: "" |
128 |
## N-I zsh status: 1 |
129 |
|
130 |
#### Regex with | |
131 |
[[ 'bar' =~ foo|bar ]] && echo true |
132 |
## stdout: true |
133 |
## N-I zsh stdout-json: "" |
134 |
## N-I zsh status: 1 |
135 |
|
136 |
#### Regex to match literal brackets [] |
137 |
|
138 |
# bash-completion relies on this, so we're making it match bash. |
139 |
# zsh understandably differs. |
140 |
[[ '[]' =~ \[\] ]] && echo true |
141 |
|
142 |
# Another way to write this. |
143 |
pat='\[\]' |
144 |
[[ '[]' =~ $pat ]] && echo true |
145 |
## STDOUT: |
146 |
true |
147 |
true |
148 |
## END |
149 |
## OK zsh STDOUT: |
150 |
true |
151 |
## END |
152 |
|
153 |
#### Regex to match literals . ^ $ etc. |
154 |
[[ 'x' =~ \. ]] || echo false |
155 |
[[ '.' =~ \. ]] && echo true |
156 |
|
157 |
[[ 'xx' =~ \^\$ ]] || echo false |
158 |
[[ '^$' =~ \^\$ ]] && echo true |
159 |
|
160 |
[[ 'xxx' =~ \+\*\? ]] || echo false |
161 |
[[ '*+?' =~ \*\+\? ]] && echo true |
162 |
|
163 |
[[ 'xx' =~ \{\} ]] || echo false |
164 |
[[ '{}' =~ \{\} ]] && echo true |
165 |
## STDOUT: |
166 |
false |
167 |
true |
168 |
false |
169 |
true |
170 |
false |
171 |
true |
172 |
false |
173 |
true |
174 |
## END |
175 |
## BUG zsh STDOUT: |
176 |
true |
177 |
false |
178 |
false |
179 |
false |
180 |
## END |
181 |
## BUG zsh status: 1 |
182 |
|
183 |
#### Unquoted { is a regex parse error |
184 |
[[ { =~ { ]] && echo true |
185 |
echo status=$? |
186 |
## stdout-json: "" |
187 |
## status: 2 |
188 |
## BUG bash stdout-json: "status=2\n" |
189 |
## BUG bash status: 0 |
190 |
## BUG zsh stdout-json: "status=1\n" |
191 |
## BUG zsh status: 0 |
192 |
|
193 |
#### Fatal error inside [[ =~ ]] |
194 |
|
195 |
# zsh and osh are stricter than bash. bash treats [[ like a command. |
196 |
|
197 |
[[ a =~ $(( 1 / 0 )) ]] |
198 |
echo status=$? |
199 |
## stdout-json: "" |
200 |
## status: 1 |
201 |
## BUG bash stdout: status=1 |
202 |
## BUG bash status: 0 |
203 |
|
204 |
#### Quoted { |
205 |
[[ { =~ "{" ]] && echo true |
206 |
echo status=$? |
207 |
## STDOUT: |
208 |
true |
209 |
status=0 |
210 |
## END |
211 |
## N-I zsh STDOUT: |
212 |
status=1 |
213 |
## END |
214 |
|
215 |
#### Escaped { |
216 |
# from bash-completion |
217 |
[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}" |
218 |
## STDOUT: |
219 |
['$PA', '$', 'PA'] |
220 |
## END |
221 |
## BUG zsh stdout-json: "" |
222 |
## BUG zsh status: 1 |
223 |
|
224 |
#### Escaped { stored in variable first |
225 |
# from bash-completion |
226 |
pat='^(\$\{?)([A-Za-z0-9_]*)$' |
227 |
[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}" |
228 |
## STDOUT: |
229 |
['$PA', '$', 'PA'] |
230 |
## END |
231 |
## BUG zsh STDOUT: |
232 |
[''] |
233 |
## END |
234 |
|
235 |
#### regex with ? |
236 |
[[ 'c' =~ c? ]] && echo true |
237 |
[[ '' =~ c? ]] && echo true |
238 |
## STDOUT: |
239 |
true |
240 |
true |
241 |
## END |