1 #!/usr/bin/env python2
2 """
3 lexer_def_test.py: Tests for lexer_def.py
4 """
5 from __future__ import print_function
6
7 import re
8 import unittest
9
10 from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
11 from _devbuild.gen.types_asdl import lex_mode_e
12 from core.test_lib import Tok
13 from mycpp.mylib import log
14 from core import test_lib
15 from frontend import lexer_def
16 from frontend import consts
17 from frontend import match
18
19 _ = log
20
21
22 def _InitLexer(s):
23 arena = test_lib.MakeArena('<lex_test.py>')
24 _, lexer = test_lib.InitLexer(s, arena)
25 return lexer
26
27
28 class AsdlTest(unittest.TestCase):
29 def testLexMode(self):
30 print(lex_mode_e.DQ)
31
32
33 CMD = """\
34 ls /
35 ls /home/
36 """
37
38
39 class LexerTest(unittest.TestCase):
40 def assertTokensEqual(self, left, right):
41 self.assertTrue(test_lib.TokensEqual(left, right),
42 'Expected %r, got %r' % (left, right))
43
44 def testRead(self):
45 lexer = _InitLexer(CMD)
46
47 t = lexer.Read(lex_mode_e.ShCommand)
48 self.assertTokensEqual(Tok(Id.Lit_Chars, 'ls'), t)
49 t = lexer.Read(lex_mode_e.ShCommand)
50
51 self.assertTokensEqual(Tok(Id.WS_Space, None), t)
52
53 t = lexer.Read(lex_mode_e.ShCommand)
54 self.assertTokensEqual(Tok(Id.Lit_Chars, '/'), t)
55
56 t = lexer.Read(lex_mode_e.ShCommand)
57 self.assertTokensEqual(Tok(Id.Op_Newline, None), t)
58
59 # Line two
60 t = lexer.Read(lex_mode_e.ShCommand)
61 self.assertTokensEqual(Tok(Id.Lit_Chars, 'ls'), t)
62
63 t = lexer.Read(lex_mode_e.ShCommand)
64 self.assertTokensEqual(Tok(Id.WS_Space, None), t)
65
66 t = lexer.Read(lex_mode_e.ShCommand)
67 self.assertTokensEqual(Tok(Id.Lit_Chars, '/home/'), t)
68
69 t = lexer.Read(lex_mode_e.ShCommand)
70 self.assertTokensEqual(Tok(Id.Op_Newline, None), t)
71
72 t = lexer.Read(lex_mode_e.ShCommand)
73 self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
74
75 # Another EOF gives EOF
76 t = lexer.Read(lex_mode_e.ShCommand)
77 self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
78
79 def testMode_VSub_ArgUnquoted(self):
80 # Another EOF gives EOF
81 lexer = _InitLexer("'hi'")
82 t = lexer.Read(lex_mode_e.VSub_ArgUnquoted)
83 #self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
84 #t = l.Read(lex_mode_e.VSub_ArgUnquoted)
85 print(t)
86
87 def testMode_ExtGlob(self):
88 lexer = _InitLexer('@(foo|bar)')
89
90 t = lexer.Read(lex_mode_e.ShCommand)
91 self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
92
93 t = lexer.Read(lex_mode_e.ExtGlob)
94 self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'), t)
95
96 t = lexer.Read(lex_mode_e.ExtGlob)
97 self.assertTokensEqual(Tok(Id.Op_Pipe, None), t)
98
99 t = lexer.Read(lex_mode_e.ExtGlob)
100 self.assertTokensEqual(Tok(Id.Lit_Chars, 'bar'), t)
101
102 t = lexer.Read(lex_mode_e.ExtGlob)
103 self.assertTokensEqual(Tok(Id.Op_RParen, None), t)
104
105 # Individual cases
106
107 lexer = _InitLexer('@(')
108 t = lexer.Read(lex_mode_e.ExtGlob)
109 self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
110
111 lexer = _InitLexer('*(')
112 t = lexer.Read(lex_mode_e.ExtGlob)
113 self.assertTokensEqual(Tok(Id.ExtGlob_Star, '*('), t)
114
115 lexer = _InitLexer('?(')
116 t = lexer.Read(lex_mode_e.ExtGlob)
117 self.assertTokensEqual(Tok(Id.ExtGlob_QMark, '?('), t)
118
119 lexer = _InitLexer('$')
120 t = lexer.Read(lex_mode_e.ExtGlob)
121 self.assertTokensEqual(Tok(Id.Lit_Other, '$'), t)
122
123 def testMode_BashRegex(self):
124 lexer = _InitLexer('(foo|bar)')
125
126 t = lexer.Read(lex_mode_e.BashRegex)
127 self.assertTokensEqual(Tok(Id.Lit_Other, '('), t)
128
129 t = lexer.Read(lex_mode_e.BashRegex)
130 self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'), t)
131
132 t = lexer.Read(lex_mode_e.BashRegex)
133 self.assertTokensEqual(Tok(Id.Lit_Other, '|'), t)
134
135 def testMode_DBracket(self):
136 lex = _InitLexer('-z foo')
137 t = lex.Read(lex_mode_e.DBracket)
138 self.assertTokensEqual(Tok(Id.BoolUnary_z, '-z'), t)
139 self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
140
141 def testMode_DollarSq(self):
142 lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
143
144 t = lexer.Read(lex_mode_e.SQ_C)
145 print(t)
146 self.assertTokensEqual(Tok(Id.Char_Literals, 'foo bar'), t)
147
148 t = lexer.Read(lex_mode_e.SQ_C)
149 print(t)
150 self.assertTokensEqual(Tok(Id.Char_OneChar, r'\n'), t)
151
152 def testMode_Backtick(self):
153 CASES = [
154 r'echo \" \\ hi`',
155 r'`',
156 r'',
157 ]
158
159 for case in CASES:
160 print()
161 print('--- %s ---' % case)
162 print()
163
164 lexer = _InitLexer(case)
165
166 while True:
167 t = lexer.Read(lex_mode_e.Backtick)
168 print(t)
169 if t.id == Id.Eof_Real:
170 break
171
172 def testMode_Printf(self):
173 CASES = [
174 r'hello %s\n',
175 r'%% percent %%\377',
176 ]
177
178 for case in CASES:
179 print()
180 print('--- %s ---' % case)
181 print()
182
183 lexer = _InitLexer(case)
184
185 while True:
186 t = lexer.Read(lex_mode_e.PrintfOuter)
187 print(t)
188 if t.id == Id.Eof_Real:
189 break
190
191 # Now test the Printf_Percent mode
192 CASES = [r'-3.3f', r'03d']
193
194 for case in CASES:
195 print()
196 print('--- %s ---' % case)
197 print()
198
199 lexer = _InitLexer(case)
200
201 while True:
202 t = lexer.Read(lex_mode_e.PrintfPercent)
203 print(t)
204 if t.id == Id.Eof_Real:
205 break
206
207 def testMode_Expr(self):
208 CASES = [
209 r'@[ ]',
210 ]
211
212 for case in CASES:
213 print()
214 print('--- %s ---' % case)
215 print()
216
217 lexer = _InitLexer(case)
218
219 while True:
220 t = lexer.Read(lex_mode_e.Expr)
221 print(t)
222 if t.id == Id.Eof_Real:
223 break
224
225 def testLookPastSpace(self):
226 # I think this is the usage pattern we care about. Peek and Next() past
227 # the function; then Peek() the next token. Then Lookahead in that state.
228 lexer = _InitLexer('fun()')
229
230 t = lexer.Read(lex_mode_e.ShCommand)
231 self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'), t)
232
233 t = lexer.Read(lex_mode_e.ShCommand)
234 self.assertTokensEqual(Tok(Id.Op_LParen, None), t)
235
236 self.assertEqual(Id.Op_RParen,
237 lexer.LookPastSpace(lex_mode_e.ShCommand))
238
239 lexer = _InitLexer('fun ()')
240
241 t = lexer.Read(lex_mode_e.ShCommand)
242 self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'), t)
243
244 t = lexer.Read(lex_mode_e.ShCommand)
245 self.assertTokensEqual(Tok(Id.WS_Space, None), t)
246
247 self.assertEqual(Id.Op_LParen,
248 lexer.LookPastSpace(lex_mode_e.ShCommand))
249
250 def testPushHint(self):
251 # Extglob use case
252 lexer = _InitLexer('@()')
253 lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
254
255 t = lexer.Read(lex_mode_e.ShCommand)
256 self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
257
258 t = lexer.Read(lex_mode_e.ShCommand)
259 self.assertTokensEqual(Tok(Id.Right_ExtGlob, None), t)
260
261 t = lexer.Read(lex_mode_e.ShCommand)
262 self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
263
264 def testEmitCompDummy(self):
265 lexer = _InitLexer('echo ')
266 lexer.EmitCompDummy()
267
268 t = lexer.Read(lex_mode_e.ShCommand)
269 self.assertTokensEqual(Tok(Id.Lit_Chars, 'echo'), t)
270
271 t = lexer.Read(lex_mode_e.ShCommand)
272 self.assertTokensEqual(Tok(Id.WS_Space, None), t)
273
274 # Right before EOF
275 t = lexer.Read(lex_mode_e.ShCommand)
276 self.assertTokensEqual(Tok(Id.Lit_CompDummy, ''), t)
277
278 t = lexer.Read(lex_mode_e.ShCommand)
279 self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
280
281
282 class LineLexerTest(unittest.TestCase):
283 def setUp(self):
284 self.arena = test_lib.MakeArena('<lex_test.py>')
285
286 def assertTokensEqual(self, left, right):
287 #log('LEFT %s', left)
288 #log('RIGHT %s', right)
289 # self.assertTrue(test_lib.TokensEqual(left, right))
290 self.assertEqual(left.id, right.id,
291 '%s != %s' % (Id_str(left.id), Id_str(right.id)))
292 self.assertEqual(left.tval, right.tval)
293
294 def testReadOuter(self):
295 l = test_lib.InitLineLexer('\n', self.arena)
296 self.assertTokensEqual(Tok(Id.Op_Newline, None),
297 l.Read(lex_mode_e.ShCommand))
298
299 def testRead_VSub_ArgUnquoted(self):
300 l = test_lib.InitLineLexer("'hi'", self.arena)
301 t = l.Read(lex_mode_e.VSub_ArgUnquoted)
302 self.assertEqual(Id.Left_SingleQuote, t.id)
303
304 def testLookPastSpace(self):
305 # Lines always end with '\n'
306 l = test_lib.InitLineLexer('', self.arena)
307 self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
308
309 l = test_lib.InitLineLexer('foo', self.arena)
310 self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'),
311 l.Read(lex_mode_e.ShCommand))
312 self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
313
314 l = test_lib.InitLineLexer('foo bar', self.arena)
315 self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'),
316 l.Read(lex_mode_e.ShCommand))
317 self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
318
319 # No lookahead; using the cursor!
320 l = test_lib.InitLineLexer('fun(', self.arena)
321 self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'),
322 l.Read(lex_mode_e.ShCommand))
323 self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
324
325 l = test_lib.InitLineLexer('fun (', self.arena)
326 self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'),
327 l.Read(lex_mode_e.ShCommand))
328 self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
329
330
331 class RegexTest(unittest.TestCase):
332 def testNul(self):
333 nul_pat = re.compile(r'[\0]')
334 self.assertEqual(False, bool(nul_pat.match('x')))
335 self.assertEqual(True, bool(nul_pat.match('\0')))
336
337 _, p, _ = lexer_def.ECHO_E_DEF[-1]
338 print('P %r' % p)
339 last_echo_e_pat = re.compile(p)
340 self.assertEqual(True, bool(last_echo_e_pat.match('x')))
341 self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
342
343
344 class OtherLexerTest(unittest.TestCase):
345 def testEchoLexer(self):
346 CASES = [
347 r'newline \n NUL \0 octal \0377 hex \x00',
348 r'unicode \u0065 \U00000065',
349 r'\d \e \f \g',
350 ]
351 for s in CASES:
352 lex = match.EchoLexer(s)
353 print(lex.Tokens())
354
355 def testPS1Lexer(self):
356 print(list(match.Ps1Tokens(r'foo')))
357 print(list(match.Ps1Tokens(r'\h \w \$')))
358
359 def testHistoryLexer(self):
360 print(list(match.HistoryTokens(r'echo hi')))
361
362 print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
363
364 # No history operator with \ escape
365 tokens = list(match.HistoryTokens(r'echo \!!'))
366 print(tokens)
367 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
368
369 print(list(match.HistoryTokens(r'echo !3...')))
370 print(list(match.HistoryTokens(r'echo !-5...')))
371 print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
372
373 print('---')
374
375 # No history operator in single quotes
376 tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
377 print(tokens)
378 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
379
380 # No history operator in incomplete single quotes
381 tokens = list(match.HistoryTokens(r"echo '!! "))
382 print(tokens)
383 self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
384
385 # Quoted single quote, and then a History operator
386 tokens = list(match.HistoryTokens(r"echo \' !! "))
387 print(tokens)
388 # YES operator
389 self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
390
391 def testHistoryDoesNotConflict(self):
392 # https://github.com/oilshell/oil/issues/264
393 #
394 # Bash has a bunch of hacks to suppress the conflict between ! for history
395 # and:
396 #
397 # 1. [!abc] globbing
398 # 2. ${!foo} indirect expansion
399 # 3. $!x -- the PID
400 # 4. !(foo|bar) -- extended glob
401 #
402 # I guess [[ a != b ]] doesn't match the pattern in bash.
403
404 three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
405 two_other = [Id.History_Other, Id.History_Other]
406 CASES = [
407 (r'[!abc]', three_other),
408 (r'${!indirect}', three_other),
409 (r'$!x', three_other), # didn't need a special case
410 (r'!(foo|bar)', two_other), # didn't need a special case
411 ]
412
413 for s, expected_types in CASES:
414 tokens = list(match.HistoryTokens(s))
415 print(tokens)
416 actual_types = [id_ for id_, val in tokens]
417
418 self.assert_(Id.History_Search not in actual_types, tokens)
419
420 self.assertEqual(expected_types, actual_types)
421
422 def testBraceRangeLexer(self):
423 CASES = [
424 'a..z',
425 '100..300',
426 '-300..-100..1',
427 '1.3', # invalid
428 'aa',
429 ]
430 for s in CASES:
431 lex = match.BraceRangeLexer(s)
432 print(lex.Tokens())
433
434
435 if __name__ == '__main__':
436 unittest.main()