OILS
/
frontend
/
id_kind_def.py
1 |
#!/usr/bin/env python2
|
2 |
# Copyright 2016 Andy Chu. All rights reserved.
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
5 |
# You may obtain a copy of the License at
|
6 |
#
|
7 |
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
"""
|
9 |
id_kind_def.py - Id and Kind definitions, used for Token, Word, Nodes, etc.
|
10 |
|
11 |
NOTE: If this changes, the lexer may need to be recompiled with
|
12 |
build/codegen.sh lexer.
|
13 |
"""
|
14 |
from __future__ import print_function
|
15 |
|
16 |
from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
|
17 |
#from mycpp.mylib import log
|
18 |
|
19 |
from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
|
20 |
if TYPE_CHECKING: # avoid circular build deps
|
21 |
from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
|
22 |
|
23 |
|
24 |
class IdSpec(object):
|
25 |
"""Identifiers that form the "spine" of the shell program
|
26 |
representation."""
|
27 |
|
28 |
def __init__(self, kind_lookup, bool_ops):
|
29 |
# type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
|
30 |
self.id_str2int = {} # type: Dict[str, int]
|
31 |
self.kind_str2int = {} # type: Dict[str, int]
|
32 |
|
33 |
self.kind_lookup = kind_lookup # Id int -> Kind int
|
34 |
self.kind_name_list = [] # type: List[str]
|
35 |
self.kind_sizes = [] # type: List[int] # optional stats
|
36 |
|
37 |
self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
|
38 |
self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
|
39 |
|
40 |
# Incremented on each method call
|
41 |
# IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
|
42 |
self.id_index = 1
|
43 |
self.kind_index = 1
|
44 |
|
45 |
def LexerPairs(self, kind):
|
46 |
# type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
|
47 |
result = []
|
48 |
for is_regex, pat, id_ in self.lexer_pairs[kind]:
|
49 |
result.append((is_regex, pat, id_))
|
50 |
return result
|
51 |
|
52 |
def _AddId(self, id_name, kind=None):
|
53 |
# type: (str, Optional[int]) -> int
|
54 |
"""
|
55 |
Args:
|
56 |
id_name: e.g. BoolBinary_Equal
|
57 |
kind: override autoassignment. For AddBoolBinaryForBuiltin
|
58 |
"""
|
59 |
t = self.id_index
|
60 |
|
61 |
self.id_str2int[id_name] = t
|
62 |
|
63 |
if kind is None:
|
64 |
kind = self.kind_index
|
65 |
self.kind_lookup[t] = kind
|
66 |
|
67 |
self.id_index += 1 # mutate last
|
68 |
return t # the index we used
|
69 |
|
70 |
def _AddKind(self, kind_name):
|
71 |
# type: (str) -> None
|
72 |
self.kind_str2int[kind_name] = self.kind_index
|
73 |
#log('%s = %d', kind_name, self.kind_index)
|
74 |
self.kind_index += 1
|
75 |
self.kind_name_list.append(kind_name)
|
76 |
|
77 |
def AddKind(self, kind_name, tokens):
|
78 |
# type: (str, List[str]) -> None
|
79 |
assert isinstance(tokens, list), tokens
|
80 |
|
81 |
for name in tokens:
|
82 |
id_name = '%s_%s' % (kind_name, name)
|
83 |
self._AddId(id_name)
|
84 |
|
85 |
# Must be after adding Id
|
86 |
self._AddKind(kind_name)
|
87 |
self.kind_sizes.append(len(tokens)) # debug info
|
88 |
|
89 |
def AddKindPairs(self, kind_name, pairs):
|
90 |
# type: (str, List[Tuple[str, str]]) -> None
|
91 |
assert isinstance(pairs, list), pairs
|
92 |
|
93 |
lexer_pairs = []
|
94 |
for name, char_pat in pairs:
|
95 |
id_name = '%s_%s' % (kind_name, name)
|
96 |
id_int = self._AddId(id_name)
|
97 |
# After _AddId
|
98 |
lexer_pairs.append((False, char_pat, id_int)) # Constant
|
99 |
|
100 |
self.lexer_pairs[self.kind_index] = lexer_pairs
|
101 |
|
102 |
# Must be after adding Id
|
103 |
self._AddKind(kind_name)
|
104 |
self.kind_sizes.append(len(pairs)) # debug info
|
105 |
|
106 |
def AddBoolKind(
|
107 |
self,
|
108 |
kind_name, # type: str
|
109 |
arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
|
110 |
):
|
111 |
# type: (...) -> None
|
112 |
"""
|
113 |
Args:
|
114 |
kind_name: string
|
115 |
arg_type_pairs: dictionary of bool_arg_type_e -> []
|
116 |
"""
|
117 |
lexer_pairs = []
|
118 |
num_tokens = 0
|
119 |
for arg_type, pairs in arg_type_pairs:
|
120 |
#print(arg_type, pairs)
|
121 |
|
122 |
for name, char_pat in pairs:
|
123 |
# BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
|
124 |
id_name = '%s_%s' % (kind_name, name)
|
125 |
id_int = self._AddId(id_name)
|
126 |
self.AddBoolOp(id_int, arg_type) # register type
|
127 |
lexer_pairs.append((False, char_pat, id_int)) # constant
|
128 |
|
129 |
num_tokens += len(pairs)
|
130 |
|
131 |
self.lexer_pairs[self.kind_index] = lexer_pairs
|
132 |
|
133 |
# Must do this after _AddId()
|
134 |
self._AddKind(kind_name)
|
135 |
self.kind_sizes.append(num_tokens) # debug info
|
136 |
|
137 |
def AddBoolBinaryForBuiltin(self, id_name, kind):
|
138 |
# type: (str, int) -> int
|
139 |
"""For [ = ] [ == ] and [ != ].
|
140 |
|
141 |
These operators are NOT added to the lexer. The are "lexed" as
|
142 |
word.String.
|
143 |
"""
|
144 |
id_name = 'BoolBinary_%s' % id_name
|
145 |
id_int = self._AddId(id_name, kind=kind)
|
146 |
self.AddBoolOp(id_int, bool_arg_type_e.Str)
|
147 |
return id_int
|
148 |
|
149 |
def AddBoolOp(self, id_int, arg_type):
|
150 |
# type: (int, bool_arg_type_t) -> None
|
151 |
"""Associate an ID integer with an bool_arg_type_e."""
|
152 |
self.bool_ops[id_int] = arg_type
|
153 |
|
154 |
|
155 |
def AddKinds(spec):
|
156 |
# type: (IdSpec) -> None
|
157 |
|
158 |
# A compound word, in arith context, boolean context, or command context.
|
159 |
# A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
|
160 |
spec.AddKind('Word', ['Compound'])
|
161 |
|
162 |
# Token IDs in Kind.Arith are first to make the TDOP precedence table small.
|
163 |
#
|
164 |
# NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
|
165 |
# Actually all of Arith could be folded into Op, because we are using
|
166 |
# WordParser._ReadArithWord vs. WordParser._ReadWord.
|
167 |
spec.AddKindPairs(
|
168 |
'Arith',
|
169 |
[
|
170 |
('Semi', ';'), # ternary for loop only
|
171 |
('Comma', ','), # function call and C comma operator
|
172 |
('Plus', '+'),
|
173 |
('Minus', '-'),
|
174 |
('Star', '*'),
|
175 |
('Slash', '/'),
|
176 |
('Percent', '%'),
|
177 |
('DPlus', '++'),
|
178 |
('DMinus', '--'),
|
179 |
('DStar', '**'),
|
180 |
('LParen', '('),
|
181 |
('RParen', ')'), # grouping and function call extension
|
182 |
('LBracket', '['),
|
183 |
('RBracket', ']'), # array and assoc array subscript
|
184 |
('RBrace', '}'), # for end of var sub
|
185 |
|
186 |
# Logical Ops
|
187 |
('QMark', '?'),
|
188 |
('Colon', ':'), # Ternary Op: a < b ? 0 : 1
|
189 |
('LessEqual', '<='),
|
190 |
('Less', '<'),
|
191 |
('GreatEqual', '>='),
|
192 |
('Great', '>'),
|
193 |
('DEqual', '=='),
|
194 |
('NEqual', '!='),
|
195 |
# note: these 3 are not in YSH Expr. (Could be used in find dialect.)
|
196 |
('DAmp', '&&'),
|
197 |
('DPipe', '||'),
|
198 |
('Bang', '!'),
|
199 |
|
200 |
# Bitwise ops
|
201 |
('DGreat', '>>'),
|
202 |
('DLess', '<<'),
|
203 |
# YSH: ^ is exponent
|
204 |
('Amp', '&'),
|
205 |
('Pipe', '|'),
|
206 |
('Caret', '^'),
|
207 |
('Tilde', '~'),
|
208 |
|
209 |
# 11 mutating operators: = += -= etc.
|
210 |
('Equal', '='),
|
211 |
('PlusEqual', '+='),
|
212 |
('MinusEqual', '-='),
|
213 |
('StarEqual', '*='),
|
214 |
('SlashEqual', '/='),
|
215 |
('PercentEqual', '%='),
|
216 |
('DGreatEqual', '>>='),
|
217 |
('DLessEqual', '<<='),
|
218 |
('AmpEqual', '&='),
|
219 |
('PipeEqual', '|='),
|
220 |
('CaretEqual', '^=')
|
221 |
])
|
222 |
|
223 |
spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
|
224 |
|
225 |
spec.AddKind('Undefined', ['Tok']) # for initial state
|
226 |
|
227 |
# The Unknown kind is used when we lex something, but it's invalid.
|
228 |
# Examples:
|
229 |
# ${^}
|
230 |
# $'\z' Such bad codes are accepted when parse_backslash is on
|
231 |
# (default in OSH), so we have to lex them.
|
232 |
# (x == y) should used === or ~==
|
233 |
spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual'])
|
234 |
|
235 |
spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
|
236 |
|
237 |
spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment'])
|
238 |
|
239 |
# Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
|
240 |
# lex_mode_e.Arith
|
241 |
spec.AddKind('WS', ['Space'])
|
242 |
|
243 |
spec.AddKind(
|
244 |
'Lit',
|
245 |
[
|
246 |
'Chars',
|
247 |
'VarLike',
|
248 |
'ArrayLhsOpen',
|
249 |
'ArrayLhsClose',
|
250 |
'Splice', # @func(a, b)
|
251 |
'AtLBracket', # @[split(x)]
|
252 |
'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
|
253 |
'Other',
|
254 |
'EscapedChar',
|
255 |
'RegexMeta',
|
256 |
'LBracket',
|
257 |
'RBracket', # for assoc array literals, static globs
|
258 |
'Star',
|
259 |
'QMark',
|
260 |
# Either brace expansion or keyword for { and }
|
261 |
'LBrace',
|
262 |
'RBrace',
|
263 |
'Comma',
|
264 |
'Equals', # For = f()
|
265 |
'Underscore', # For _ f()
|
266 |
'Dollar', # detecting 'echo $'
|
267 |
'DRightBracket', # the ]] that matches [[, NOT a keyword
|
268 |
'TildeLike', # tilde expansion
|
269 |
'Pound', # for comment or VarOp state
|
270 |
'TPound', # for doc comments like ###
|
271 |
'TDot', # for multiline commands ...
|
272 |
'Slash',
|
273 |
'Percent', # / # % for patsub, NOT unary op
|
274 |
'Colon', # x=foo:~:~root needs tilde expansion
|
275 |
'Digits', # for lex_mode_e.Arith
|
276 |
'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
|
277 |
'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
|
278 |
'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
|
279 |
# syntax error in YSH, but NOT OSH
|
280 |
'CompDummy', # A fake Lit_* token to get partial words during
|
281 |
# completion
|
282 |
])
|
283 |
|
284 |
# For recognizing \` and \" and \\ within backticks. There's an extra layer
|
285 |
# of backslash quoting.
|
286 |
spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
|
287 |
|
288 |
spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
|
289 |
|
290 |
spec.AddKind(
|
291 |
'Op',
|
292 |
[
|
293 |
'Newline', # mostly equivalent to SEMI
|
294 |
'Amp', # &
|
295 |
'Pipe', # |
|
296 |
'PipeAmp', # |& -- bash extension for stderr
|
297 |
'DAmp', # &&
|
298 |
'DPipe', # ||
|
299 |
'Semi', # ;
|
300 |
'DSemi', # ;; for case
|
301 |
'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
|
302 |
'RParen', # Default, will be translated to Id.Right_*
|
303 |
'DLeftParen',
|
304 |
'DRightParen',
|
305 |
|
306 |
# for [[ ]] language
|
307 |
'Less', # <
|
308 |
'Great', # >
|
309 |
'Bang', # !
|
310 |
|
311 |
# YSH [] {}
|
312 |
'LBracket',
|
313 |
'RBracket',
|
314 |
'LBrace',
|
315 |
'RBrace',
|
316 |
])
|
317 |
|
318 |
# YSH expressions use Kind.Expr and Kind.Arith (further below)
|
319 |
spec.AddKind(
|
320 |
'Expr',
|
321 |
[
|
322 |
'Reserved', # <- means nothing but it's reserved now
|
323 |
'Symbol', # %foo
|
324 |
'Name',
|
325 |
'DecInt',
|
326 |
'BinInt',
|
327 |
'OctInt',
|
328 |
'HexInt',
|
329 |
'Float',
|
330 |
'Bang', # eggex !digit, ![a-z]
|
331 |
'Dot',
|
332 |
'DColon',
|
333 |
'RArrow',
|
334 |
'RDArrow',
|
335 |
'DSlash', # integer division
|
336 |
'TEqual',
|
337 |
'NotDEqual',
|
338 |
'TildeDEqual', # === !== ~==
|
339 |
'At',
|
340 |
'DoubleAt', # splice operators
|
341 |
'Ellipsis', # for varargs
|
342 |
'Dollar', # legacy regex
|
343 |
'NotTilde', # !~
|
344 |
'DTilde',
|
345 |
'NotDTilde', # ~~ !~~
|
346 |
'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
|
347 |
# and ${} '' "" (and all other strings)
|
348 |
|
349 |
# Constants
|
350 |
'Null',
|
351 |
'True',
|
352 |
'False',
|
353 |
|
354 |
# Keywords are resolved after lexing, but otherwise behave like tokens.
|
355 |
'And',
|
356 |
'Or',
|
357 |
'Not',
|
358 |
|
359 |
# List comprehensions
|
360 |
'For',
|
361 |
'Is',
|
362 |
'In',
|
363 |
'If',
|
364 |
'Else',
|
365 |
'Func', # For function literals
|
366 |
'As',
|
367 |
'Virtual',
|
368 |
'Override',
|
369 |
'Abstract',
|
370 |
|
371 |
# Tea-specific
|
372 |
'While',
|
373 |
'Break',
|
374 |
'Continue',
|
375 |
'Return'
|
376 |
])
|
377 |
|
378 |
# For C-escaped strings.
|
379 |
spec.AddKind(
|
380 |
'Char',
|
381 |
[
|
382 |
'OneChar',
|
383 |
'Stop',
|
384 |
'Hex',
|
385 |
# Two variants of Octal: \377, and \0377.
|
386 |
'Octal3',
|
387 |
'Octal4',
|
388 |
'Unicode4',
|
389 |
'Unicode8', # legacy
|
390 |
'UBraced',
|
391 |
'Pound', # YSH
|
392 |
'Literals',
|
393 |
])
|
394 |
|
395 |
# Regular expression primtiives.
|
396 |
spec.AddKind(
|
397 |
'Re',
|
398 |
[
|
399 |
'Start', # ^ or %start
|
400 |
'End', # $ or %end
|
401 |
'Dot', # . or dot
|
402 |
# Future: %boundary generates \b in Python/Perl, etc.
|
403 |
])
|
404 |
|
405 |
spec.AddKind(
|
406 |
'Redir',
|
407 |
[
|
408 |
'Less', # < stdin
|
409 |
'Great', # > stdout
|
410 |
'DLess', # << here doc redirect
|
411 |
'TLess', # <<< bash only here string
|
412 |
'DGreat', # >> append stdout
|
413 |
'GreatAnd', # >& descriptor redirect
|
414 |
'LessAnd', # <& descriptor redirect
|
415 |
'DLessDash', # <<- here doc redirect for tabs?
|
416 |
'LessGreat', # <>
|
417 |
'Clobber', # >| POSIX?
|
418 |
'AndGreat', # bash &> stdout/stderr to file
|
419 |
'AndDGreat', # bash &>> stdout/stderr append to file
|
420 |
|
421 |
#'GreatPlus', # >+ is append in YSH
|
422 |
#'DGreatPlus', # >>+ is append to string in YSH
|
423 |
])
|
424 |
|
425 |
# NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
|
426 |
# get that.
|
427 |
spec.AddKind(
|
428 |
'Left',
|
429 |
[
|
430 |
'DoubleQuote',
|
431 |
'SingleQuote', # ''
|
432 |
'RSingleQuote', # r''
|
433 |
'DollarSingleQuote', # $'' for \n escapes
|
434 |
|
435 |
# Multiline versions
|
436 |
'TDoubleQuote', # """ """
|
437 |
'TSingleQuote', # ''' '''
|
438 |
'RTSingleQuote', # r''' '''
|
439 |
'DollarTSingleQuote', # $''' '''
|
440 |
'Backtick', # `
|
441 |
'DollarParen', # $(
|
442 |
'DollarBrace', # ${
|
443 |
'DollarDParen', # $((
|
444 |
'DollarBracket', # $[ - synonym for $(( in bash and zsh
|
445 |
'DollarDoubleQuote', # $" for bash localized strings
|
446 |
'ProcSubIn', # <( )
|
447 |
'ProcSubOut', # >( )
|
448 |
'AtParen', # @( for split command sub
|
449 |
'CaretParen', # ^( for Block literal in expression mode
|
450 |
'CaretBracket', # ^[ for Expr literal
|
451 |
'CaretBrace', # ^{ for Arglist
|
452 |
'ColonPipe', # :| for word arrays
|
453 |
'PercentParen', # legacy %( for word arrays
|
454 |
])
|
455 |
|
456 |
spec.AddKind(
|
457 |
'Right',
|
458 |
[
|
459 |
'DoubleQuote',
|
460 |
'SingleQuote',
|
461 |
'Backtick', # `
|
462 |
'DollarBrace', # }
|
463 |
'DollarDParen', # )) -- really the second one is a PushHint()
|
464 |
# ArithSub2 is just Id.Arith_RBracket
|
465 |
'DollarDoubleQuote', # "
|
466 |
'DollarSingleQuote', # '
|
467 |
|
468 |
# Disambiguated right parens
|
469 |
'Subshell', # )
|
470 |
'ShFunction', # )
|
471 |
'CasePat', # )
|
472 |
'ShArrayLiteral', # )
|
473 |
'ExtGlob', # )
|
474 |
'BlockLiteral', # } that matches &{ echo hi }
|
475 |
])
|
476 |
|
477 |
spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
|
478 |
|
479 |
# First position of var sub ${
|
480 |
# Id.VOp2_Pound -- however you can't tell the difference at first! It could
|
481 |
# be an op or a name. So it makes sense to base i on the state.
|
482 |
# Id.VOp2_At
|
483 |
# But then you have AS_STAR, or Id.Arith_Star maybe
|
484 |
|
485 |
spec.AddKind(
|
486 |
'VSub',
|
487 |
[
|
488 |
'DollarName', # $foo
|
489 |
'Name', # 'foo' in ${foo}
|
490 |
'Number', # $0 .. $9
|
491 |
'Bang', # $!
|
492 |
'At', # $@ or [@] for array subscripting
|
493 |
'Pound', # $# or ${#var} for length
|
494 |
'Dollar', # $$
|
495 |
'Star', # $*
|
496 |
'Hyphen', # $-
|
497 |
'QMark', # $?
|
498 |
'Dot', # ${.myproc builtin sub}
|
499 |
])
|
500 |
|
501 |
spec.AddKindPairs('VTest', [
|
502 |
('ColonHyphen', ':-'),
|
503 |
('Hyphen', '-'),
|
504 |
('ColonEquals', ':='),
|
505 |
('Equals', '='),
|
506 |
('ColonQMark', ':?'),
|
507 |
('QMark', '?'),
|
508 |
('ColonPlus', ':+'),
|
509 |
('Plus', '+'),
|
510 |
])
|
511 |
|
512 |
# Statically parse @P, so @x etc. is an error.
|
513 |
spec.AddKindPairs(
|
514 |
'VOp0',
|
515 |
[
|
516 |
('Q', '@Q'), # ${x@Q} for quoting
|
517 |
('E', '@E'),
|
518 |
('P', '@P'), # ${PS1@P} for prompt eval
|
519 |
('A', '@A'),
|
520 |
('a', '@a'),
|
521 |
])
|
522 |
|
523 |
# String removal ops
|
524 |
spec.AddKindPairs(
|
525 |
'VOp1',
|
526 |
[
|
527 |
('Percent', '%'),
|
528 |
('DPercent', '%%'),
|
529 |
('Pound', '#'),
|
530 |
('DPound', '##'),
|
531 |
# Case ops, in bash. At least parse them. Execution might require
|
532 |
# unicode stuff.
|
533 |
('Caret', '^'),
|
534 |
('DCaret', '^^'),
|
535 |
('Comma', ','),
|
536 |
('DComma', ',,'),
|
537 |
])
|
538 |
|
539 |
spec.AddKindPairs(
|
540 |
'VOpOil',
|
541 |
[
|
542 |
('Pipe', '|'), # ${x|html}
|
543 |
('Space', ' '), # ${x %.3f}
|
544 |
])
|
545 |
|
546 |
# Not in POSIX, but in Bash
|
547 |
spec.AddKindPairs(
|
548 |
'VOp2',
|
549 |
[
|
550 |
('Slash', '/'), # / for replacement
|
551 |
('Colon', ':'), # : for slicing
|
552 |
('LBracket', '['), # [ for indexing
|
553 |
('RBracket', ']'), # ] for indexing
|
554 |
])
|
555 |
|
556 |
# Can only occur after ${!prefix@}
|
557 |
spec.AddKindPairs('VOp3', [
|
558 |
('At', '@'),
|
559 |
('Star', '*'),
|
560 |
])
|
561 |
|
562 |
# This kind is for Node types that are NOT tokens.
|
563 |
spec.AddKind(
|
564 |
'Node',
|
565 |
[
|
566 |
# Arithmetic nodes
|
567 |
'PostDPlus',
|
568 |
'PostDMinus', # Postfix inc/dec.
|
569 |
# Prefix inc/dec use Arith_DPlus/Arith_DMinus.
|
570 |
'UnaryPlus',
|
571 |
'UnaryMinus', # +1 and -1, to distinguish from infix.
|
572 |
# Actually we don't need this because we they
|
573 |
# will be under Expr1/Plus vs Expr2/Plus.
|
574 |
'NotIn',
|
575 |
'IsNot', # For YSH comparisons
|
576 |
])
|
577 |
|
578 |
# NOTE: Not doing AddKindPairs() here because oil will have a different set
|
579 |
# of keywords. It will probably have for/in/while/until/case/if/else/elif,
|
580 |
# and then func/proc.
|
581 |
spec.AddKind(
|
582 |
'KW',
|
583 |
[
|
584 |
'DLeftBracket',
|
585 |
'Bang',
|
586 |
'For',
|
587 |
'While',
|
588 |
'Until',
|
589 |
'Do',
|
590 |
'Done',
|
591 |
'In',
|
592 |
'Case',
|
593 |
'Esac',
|
594 |
'If',
|
595 |
'Fi',
|
596 |
'Then',
|
597 |
'Else',
|
598 |
'Elif',
|
599 |
'Function',
|
600 |
'Time',
|
601 |
|
602 |
# YSH keywords.
|
603 |
'Const',
|
604 |
'Var',
|
605 |
'SetVar',
|
606 |
'SetRef',
|
607 |
'SetGlobal',
|
608 |
# later: Auto
|
609 |
'Proc',
|
610 |
'Func',
|
611 |
'Data',
|
612 |
'Enum',
|
613 |
'Class',
|
614 |
'Import',
|
615 |
|
616 |
# 'Match', 'With', # matching
|
617 |
# not sure: yield
|
618 |
# mycpp
|
619 |
# 'Switch',
|
620 |
# - 'init' (constructor) and maybe 'call'
|
621 |
# try except (no finally?)
|
622 |
|
623 |
# builtins, NOT keywords: use, fork, wait, etc.
|
624 |
# Things that don't affect parsing shouldn't be keywords.
|
625 |
])
|
626 |
|
627 |
# Unlike bash, we parse control flow statically. They're not
|
628 |
# dynamically-resolved builtins.
|
629 |
spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
|
630 |
|
631 |
# Special Kind for lookahead in the lexer. It's never seen by anything else.
|
632 |
spec.AddKind('LookAhead', ['FuncParens'])
|
633 |
|
634 |
# For parsing globs and converting them to regexes.
|
635 |
spec.AddKind('Glob', [
|
636 |
'LBracket',
|
637 |
'RBracket',
|
638 |
'Star',
|
639 |
'QMark',
|
640 |
'Bang',
|
641 |
'Caret',
|
642 |
'EscapedChar',
|
643 |
'BadBackslash',
|
644 |
'CleanLiterals',
|
645 |
'OtherLiteral',
|
646 |
])
|
647 |
|
648 |
# For C-escaped strings.
|
649 |
spec.AddKind(
|
650 |
'Format',
|
651 |
[
|
652 |
'EscapedPercent',
|
653 |
'Percent', # starts another lexer mode
|
654 |
'Flag',
|
655 |
'Num',
|
656 |
'Dot',
|
657 |
'Type',
|
658 |
'Star',
|
659 |
'Time',
|
660 |
'Zero',
|
661 |
])
|
662 |
|
663 |
# For parsing prompt strings like PS1.
|
664 |
spec.AddKind('PS', [
|
665 |
'Subst',
|
666 |
'Octal3',
|
667 |
'LBrace',
|
668 |
'RBrace',
|
669 |
'Literals',
|
670 |
'BadBackslash',
|
671 |
])
|
672 |
|
673 |
spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
|
674 |
|
675 |
# Note: not used now
|
676 |
spec.AddKind(
|
677 |
'QSN',
|
678 |
[
|
679 |
# LiteralBytes is a string, optimized for the common case
|
680 |
'LiteralBytes',
|
681 |
# A byte that we'll look at individually, e.g. \r \n, or just a low
|
682 |
# control code like \x01
|
683 |
'SpecialByte',
|
684 |
# UTF-8 sequences:
|
685 |
'Begin2',
|
686 |
'Begin3',
|
687 |
'Begin4',
|
688 |
'Cont',
|
689 |
])
|
690 |
|
691 |
|
692 |
# Shared between [[ and test/[.
|
693 |
_UNARY_STR_CHARS = 'zn' # -z -n
|
694 |
_UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
|
695 |
_UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
|
696 |
|
697 |
_BINARY_PATH = ['ef', 'nt', 'ot']
|
698 |
_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
|
699 |
|
700 |
|
701 |
def _Dash(strs):
|
702 |
# type: (List[str]) -> List[Tuple[str, str]]
|
703 |
# Gives a pair of (token name, string to match)
|
704 |
return [(s, '-' + s) for s in strs]
|
705 |
|
706 |
|
707 |
def AddBoolKinds(spec):
|
708 |
# type: (IdSpec) -> None
|
709 |
spec.AddBoolKind('BoolUnary', [
|
710 |
(bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
|
711 |
(bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
|
712 |
(bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
|
713 |
])
|
714 |
|
715 |
spec.AddBoolKind('BoolBinary', [
|
716 |
(bool_arg_type_e.Str, [
|
717 |
('GlobEqual', '='),
|
718 |
('GlobDEqual', '=='),
|
719 |
('GlobNEqual', '!='),
|
720 |
('EqualTilde', '=~'),
|
721 |
]),
|
722 |
(bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
|
723 |
(bool_arg_type_e.Int, _Dash(_BINARY_INT)),
|
724 |
])
|
725 |
|
726 |
Id = spec.id_str2int
|
727 |
# logical, arity, arg_type
|
728 |
spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
|
729 |
spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
|
730 |
spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
|
731 |
|
732 |
spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
|
733 |
spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
|
734 |
|
735 |
|
736 |
def SetupTestBuiltin(
|
737 |
id_spec, # type: IdSpec
|
738 |
unary_lookup, # type: Dict[str, int]
|
739 |
binary_lookup, # type: Dict[str, int]
|
740 |
other_lookup, # type: Dict[str, int]
|
741 |
):
|
742 |
# type: (...) -> None
|
743 |
"""Setup tokens for test/[.
|
744 |
|
745 |
Similar to _AddBoolKinds above. Differences:
|
746 |
- =~ doesn't exist
|
747 |
- && -> -a, || -> -o
|
748 |
- ( ) -> Op_LParen (they don't appear above)
|
749 |
"""
|
750 |
Id = id_spec.id_str2int
|
751 |
Kind = id_spec.kind_str2int
|
752 |
|
753 |
for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
|
754 |
id_name = 'BoolUnary_%s' % letter
|
755 |
unary_lookup['-' + letter] = Id[id_name]
|
756 |
|
757 |
for s in _BINARY_PATH + _BINARY_INT:
|
758 |
id_name = 'BoolBinary_%s' % s
|
759 |
binary_lookup['-' + s] = Id[id_name]
|
760 |
|
761 |
# Like the [[ definition above, but without globbing and without =~ .
|
762 |
|
763 |
for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
|
764 |
('NEqual', '!=')]:
|
765 |
id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
|
766 |
|
767 |
binary_lookup[token_str] = id_int
|
768 |
|
769 |
# Some of these names don't quite match, but it keeps the BoolParser simple.
|
770 |
binary_lookup['<'] = Id['Op_Less']
|
771 |
binary_lookup['>'] = Id['Op_Great']
|
772 |
|
773 |
# NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
|
774 |
# BoolUnary_o. The parser rather than the tokenizer handles this.
|
775 |
other_lookup['!'] = Id['KW_Bang'] # like [[ !
|
776 |
other_lookup['('] = Id['Op_LParen']
|
777 |
other_lookup[')'] = Id['Op_RParen']
|
778 |
|
779 |
other_lookup[']'] = Id['Arith_RBracket'] # For closing ]
|