1 """
2 tdop.py - Library for expression parsing.
3 """
4
5 from _devbuild.gen.id_kind_asdl import Id, Id_t
6 from _devbuild.gen.syntax_asdl import (loc, arith_expr, arith_expr_e,
7 arith_expr_t, word_t, CompoundWord,
8 SimpleVarSub)
9 from _devbuild.gen.types_asdl import lex_mode_e
10 from core.error import p_die
11 from core import ui
12 from frontend import lexer
13 from mycpp import mylib
14 from mycpp.mylib import tagswitch
15 from osh import word_
16
17 from typing import (Callable, List, Dict, Tuple, Any, cast, TYPE_CHECKING)
18
19 if TYPE_CHECKING: # break circular dep
20 from osh.word_parse import WordParser
21 from core import optview
22 LeftFunc = Callable[['TdopParser', word_t, arith_expr_t, int], arith_expr_t]
23 NullFunc = Callable[['TdopParser', word_t, int], arith_expr_t]
24
25
26 def IsIndexable(node):
27 # type: (arith_expr_t) -> bool
28 """In POSIX shell arith, a[1] is allowed but a[1][1] isn't.
29
30 We also allow $a[i] and foo$x[i] (formerly parse_dynamic_arith)
31 """
32 with tagswitch(node) as case:
33 if case(arith_expr_e.VarSub, arith_expr_e.Word):
34 return True
35 return False
36
37
38 def CheckLhsExpr(node, blame_word):
39 # type: (arith_expr_t, word_t) -> None
40 """Determine if a node is a valid L-value by whitelisting tags.
41
42 Valid:
43 x = y
44 a[1] = y
45 Invalid:
46 a[0][0] = y
47 """
48 UP_node = node
49 if node.tag() == arith_expr_e.Binary:
50 node = cast(arith_expr.Binary, UP_node)
51 if node.op_id == Id.Arith_LBracket and IsIndexable(node.left):
52 return
53 # But a[0][0] = 1 is NOT valid.
54
55 if IsIndexable(node):
56 return
57
58 p_die("Left-hand side of this assignment is invalid", loc.Word(blame_word))
59
60
61 #
62 # Null Denotation
63 #
64
65
66 def NullError(p, t, bp):
67 # type: (TdopParser, word_t, int) -> arith_expr_t
68 # TODO: I need position information
69 p_die("Token can't be used in prefix position", loc.Word(t))
70 return None # never reached
71
72
73 def NullConstant(p, w, bp):
74 # type: (TdopParser, word_t, int) -> arith_expr_t
75 name_tok = word_.LooksLikeArithVar(w)
76 if name_tok:
77 return SimpleVarSub(name_tok, lexer.TokenVal(name_tok))
78
79 # Id.Word_Compound in the spec ensures this cast is valid
80 return cast(CompoundWord, w)
81
82
83 def NullParen(p, t, bp):
84 # type: (TdopParser, word_t, int) -> arith_expr_t
85 """Arithmetic grouping."""
86 r = p.ParseUntil(bp)
87 p.Eat(Id.Arith_RParen)
88 return r
89
90
91 def NullPrefixOp(p, w, bp):
92 # type: (TdopParser, word_t, int) -> arith_expr_t
93 """Prefix operator.
94
95 Low precedence: return, raise, etc.
96 return x+y is return (x+y), not (return x) + y
97
98 High precedence: logical negation, bitwise complement, etc.
99 !x && y is (!x) && y, not !(x && y)
100 """
101 right = p.ParseUntil(bp)
102 return arith_expr.Unary(word_.ArithId(w), right)
103
104
105 #
106 # Left Denotation
107 #
108
109
110 def LeftError(p, t, left, rbp):
111 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
112 # Hm is this not called because of binding power?
113 p_die("Token can't be used in infix position", loc.Word(t))
114 return None # never reached
115
116
117 def LeftBinaryOp(p, w, left, rbp):
118 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
119 """Normal binary operator like 1+2 or 2*3, etc."""
120 # TODO: w should be a Token, and we should extract the token from it.
121 return arith_expr.Binary(word_.ArithId(w), left, p.ParseUntil(rbp))
122
123
124 def LeftAssign(p, w, left, rbp):
125 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
126 """Normal binary operator like 1+2 or 2*3, etc."""
127 # x += 1, or a[i] += 1
128
129 CheckLhsExpr(left, w)
130 return arith_expr.BinaryAssign(word_.ArithId(w), left, p.ParseUntil(rbp))
131
132
133 #
134 # Parser definition
135 # TODO: To be consistent, move this to osh/tdop_def.py.
136 #
137
138 if mylib.PYTHON:
139
140 def _ModuleAndFuncName(f):
141 # type: (Any) -> Tuple[str, str]
142 namespace = f.__module__.split('.')[-1]
143 return namespace, f.__name__
144
145 def _CppFuncName(f):
146 # type: (Any) -> str
147 return '%s::%s' % _ModuleAndFuncName(f)
148
149 class LeftInfo(object):
150 """Row for operator.
151
152 In C++ this should be a big array.
153 """
154
155 def __init__(self, led=None, lbp=0, rbp=0):
156 # type: (LeftFunc, int, int) -> None
157 self.led = led or LeftError
158 self.lbp = lbp
159 self.rbp = rbp
160
161 def __str__(self):
162 # type: () -> str
163 """Used by C++ code generation."""
164 return '{ %s, %d, %d },' % (_CppFuncName(
165 self.led), self.lbp, self.rbp)
166
167 def ModuleAndFuncName(self):
168 # type: () -> Tuple[str, str]
169 """Used by C++ code generation."""
170 return _ModuleAndFuncName(self.led)
171
172 class NullInfo(object):
173 """Row for operator.
174
175 In C++ this should be a big array.
176 """
177
178 def __init__(self, nud=None, bp=0):
179 # type: (NullFunc, int) -> None
180 self.nud = nud or LeftError
181 self.bp = bp
182
183 def __str__(self):
184 # type: () -> str
185 """Used by C++ code generation."""
186 return '{ %s, %d },' % (_CppFuncName(self.nud), self.bp)
187
188 def ModuleAndFuncName(self):
189 # type: () -> Tuple[str, str]
190 """Used by C++ code generation."""
191 return _ModuleAndFuncName(self.nud)
192
193 class ParserSpec(object):
194 """Specification for a TDOP parser.
195
196 This can be compiled to a table in C++.
197 """
198
199 def __init__(self):
200 # type: () -> None
201 self.nud_lookup = {} # type: Dict[Id_t, NullInfo]
202 self.led_lookup = {} # type: Dict[Id_t, LeftInfo]
203
204 def Null(self, bp, nud, tokens):
205 # type: (int, NullFunc, List[Id_t]) -> None
206 """Register a token that doesn't take anything on the left.
207
208 Examples: constant, prefix operator, error.
209 """
210 for token in tokens:
211 self.nud_lookup[token] = NullInfo(nud=nud, bp=bp)
212 if token not in self.led_lookup:
213 self.led_lookup[token] = LeftInfo() # error
214
215 def _RegisterLed(self, lbp, rbp, led, tokens):
216 # type: (int, int, LeftFunc, List[Id_t]) -> None
217 for token in tokens:
218 if token not in self.nud_lookup:
219 self.nud_lookup[token] = NullInfo(NullError)
220 self.led_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
221
222 def Left(self, bp, led, tokens):
223 # type: (int, LeftFunc, List[Id_t]) -> None
224 """Register a token that takes an expression on the left."""
225 self._RegisterLed(bp, bp, led, tokens)
226
227 def LeftRightAssoc(self, bp, led, tokens):
228 # type: (int, LeftFunc, List[Id_t]) -> None
229 """Register a right associative operator."""
230 self._RegisterLed(bp, bp - 1, led, tokens)
231
232 def LookupNud(self, token):
233 # type: (Id_t) -> NullInfo
234
235 # As long as the table is complete, this shouldn't fail
236 return self.nud_lookup[token]
237
238 def LookupLed(self, token):
239 # type: (Id_t) -> LeftInfo
240 """Get a left_info for the token."""
241
242 # As long as the table is complete, this shouldn't fail
243 return self.led_lookup[token]
244
245
246 class TdopParser(object):
247 def __init__(self, spec, w_parser, parse_opts):
248 # type: (ParserSpec, WordParser, optview.Parse) -> None
249 self.spec = spec
250 self.w_parser = w_parser
251 self.parse_opts = parse_opts
252
253 # NOTE: Next() overwrites this state, so we don't need a Reset() method in
254 # between reuses of this TdopParser instance.
255 self.cur_word = None # type: word_t # current token
256 self.op_id = Id.Undefined_Tok
257
258 def CurrentId(self):
259 # type: () -> Id_t
260 """Glue used by the WordParser to check for extra tokens."""
261 return word_.CommandId(self.cur_word)
262
263 def AtToken(self, token_type):
264 # type: (Id_t) -> bool
265 return self.op_id == token_type
266
267 def Eat(self, token_type):
268 # type: (Id_t) -> None
269 """Assert that we're at the current token and advance."""
270 if not self.AtToken(token_type):
271 p_die(
272 'Parser expected %s, got %s' %
273 (ui.PrettyId(token_type), ui.PrettyId(self.op_id)),
274 loc.Word(self.cur_word))
275 self.Next()
276
277 def Next(self):
278 # type: () -> bool
279 self.cur_word = self.w_parser.ReadWord(lex_mode_e.Arith)
280 self.op_id = word_.ArithId(self.cur_word)
281 return True
282
283 def ParseUntil(self, rbp):
284 # type: (int) -> arith_expr_t
285 """Parse to the right, eating tokens until we encounter a token with
286 binding power LESS THAN OR EQUAL TO rbp."""
287 # TODO: use Kind.Eof
288 if self.op_id in (Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick):
289 p_die('Unexpected end of input', loc.Word(self.cur_word))
290
291 t = self.cur_word
292 null_info = self.spec.LookupNud(self.op_id)
293
294 self.Next() # skip over the token, e.g. ! ~ + -
295 node = null_info.nud(self, t, null_info.bp)
296
297 while True:
298 t = self.cur_word
299 left_info = self.spec.LookupLed(self.op_id)
300
301 # Examples:
302 # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
303 # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
304 # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
305 if rbp >= left_info.lbp:
306 break
307 self.Next() # skip over the token, e.g. / *
308
309 node = left_info.led(self, t, node, left_info.rbp)
310
311 return node
312
313 def Parse(self):
314 # type: () -> arith_expr_t
315
316 self.Next() # may raise ParseError
317
318 if not self.parse_opts.parse_sh_arith():
319 # Affects:
320 # echo $(( x ))
321 # ${a[i]} which should be $[a[i]] -- could have better error
322 #
323 # Note: sh_expr_eval.UnsafeArith has a dynamic e_die() check
324 #
325 # Doesn't affect:
326 # printf -v x # unsafe_arith.ParseLValue
327 # unset x # unsafe_arith.ParseLValue
328 # ${!ref} # unsafe_arith.ParseVarRef
329 # declare -n # not fully implemented yet
330 #
331 # a[i+1]= # parse_sh_assign
332 #
333 # (( a = 1 )) # parse_dparen
334 # for (( i = 0; ... # parse_dparen
335
336 p_die("POSIX shell arithmetic isn't allowed (parse_sh_arith)",
337 loc.Word(self.cur_word))
338
339 return self.ParseUntil(0)