OILS
/
frontend
/
syntax.asdl
1 |
# Data types for the Oils AST, aka "Lossless Syntax Tree".
|
2 |
#
|
3 |
# Invariant: the source text can be reconstructed byte-for-byte from this
|
4 |
# tree. The test/arena.sh file partially verifies this.
|
5 |
#
|
6 |
# Exceptions:
|
7 |
# - <<- here docs with leading tabs, since we don't want those for
|
8 |
# conversion. We don't want files with mixed tabs and spaces.
|
9 |
# - Distinguishing between function styles wasn't necessary:
|
10 |
# - foo() { } vs function foo { } # ksh style
|
11 |
|
12 |
# We usually try to preserve the physical order of the source in the ASDL
|
13 |
# fields. One exception is the order of redirects:
|
14 |
#
|
15 |
# echo >out.txt hi
|
16 |
# # versus
|
17 |
# echo hi >out.txt
|
18 |
|
19 |
# Unrepresented:
|
20 |
# - let arithmetic (rarely used)
|
21 |
# - coprocesses # one with arg and one without
|
22 |
# - select block
|
23 |
# - case fallthrough ;& and ;;&
|
24 |
|
25 |
# Possible refactorings:
|
26 |
#
|
27 |
# printf_part = Literal %Token | ...
|
28 |
#
|
29 |
# # %CompoundWord as first class variant:
|
30 |
# bool_expr = WordTest %CompoundWord | ...
|
31 |
#
|
32 |
# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
|
33 |
# string_part = ... # subset of word_part
|
34 |
#
|
35 |
# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
|
36 |
# - Remove command.NoOp ?
|
37 |
|
38 |
module syntax
|
39 |
{
|
40 |
# More efficient than the List[bool] pattern we've been using
|
41 |
BoolParamBox = (bool b)
|
42 |
IntParamBox = (int i)
|
43 |
|
44 |
# core/main_loop.py
|
45 |
parse_result = EmptyLine | Eof | Node(command cmd)
|
46 |
|
47 |
# 'source' represents the location of a line / token.
|
48 |
source =
|
49 |
Interactive
|
50 |
| Headless
|
51 |
| Unused(str comment) # completion and history never show parse errors?
|
52 |
| CFlag
|
53 |
| Stdin(str comment)
|
54 |
|
55 |
# TODO: if it's not the main script, it's sourced, and you could provide
|
56 |
# a chain of locations back to the sourced script!
|
57 |
# MainFile(str path) or SourcedFile(str path, loc location)
|
58 |
| MainFile(str path)
|
59 |
| SourcedFile(str path, loc location)
|
60 |
|
61 |
# code parsed from a word
|
62 |
# used for 'eval', 'trap', 'printf', 'complete -W', etc.
|
63 |
| ArgvWord(str what, loc location)
|
64 |
|
65 |
# code parsed from the value of a variable
|
66 |
# used for $PS1 $PROMPT_COMMAND
|
67 |
| Variable(str var_name, loc location)
|
68 |
|
69 |
# Point to the original variable reference
|
70 |
| VarRef(Token orig_tok)
|
71 |
|
72 |
# alias expansion (location of first word)
|
73 |
| Alias(str argv0, loc argv0_loc)
|
74 |
|
75 |
# 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
|
76 |
# TODO: use this for eval_unsafe_arith instead of Variable
|
77 |
| Reparsed(str what, Token left_token, Token right_token)
|
78 |
|
79 |
# For --location-str
|
80 |
| Synthetic(str s)
|
81 |
|
82 |
SourceLine = (int line_num, str content, source src)
|
83 |
|
84 |
# Token TODO:
|
85 |
# - remove .tval field. If necessary, the string value should be manually
|
86 |
# computed and attached to specific LST nodes.
|
87 |
# - maybe get rid of span_id, and re-compute length on demand too
|
88 |
# -
|
89 |
Token = (id id, int col, int length, int span_id, SourceLine? line, str tval)
|
90 |
|
91 |
# Slight ASDL bug: CompoundWord has to be defined before using it as a shared
|
92 |
# variant. The _product_counter algorithm should be moved into a separate
|
93 |
# tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
|
94 |
CompoundWord = (List[word_part] parts)
|
95 |
|
96 |
# Source location for errors
|
97 |
loc =
|
98 |
Missing # equivalent of runtime.NO_SPID
|
99 |
| Token %Token
|
100 |
# Very common case: argv arrays need original location
|
101 |
| ArgWord %CompoundWord
|
102 |
| WordPart(word_part p)
|
103 |
| Word(word w)
|
104 |
| Arith(arith_expr a)
|
105 |
# e.g. for errexit blaming
|
106 |
| Command(command c)
|
107 |
|
108 |
debug_frame =
|
109 |
Main(str dollar0)
|
110 |
# call_loc => BASH_LINENO
|
111 |
# call_loc may be None with new --source flag?
|
112 |
| Source(Token? call_tok, str source_name)
|
113 |
# def_tok => BASH_SOURCE
|
114 |
# call_loc may be None if invoked via RunFuncForCompletion?
|
115 |
| Call(Token? call_tok, Token def_tok, str func_name)
|
116 |
|
117 |
#
|
118 |
# Shell language
|
119 |
#
|
120 |
|
121 |
bracket_op =
|
122 |
WholeArray(id op_id) # * or @
|
123 |
| ArrayIndex(arith_expr expr)
|
124 |
|
125 |
suffix_op =
|
126 |
Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
|
127 |
| Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
|
128 |
# TODO: Implement YSH ${x|html} and ${x %.3f}
|
129 |
| Static(Token tok, str arg)
|
130 |
| PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
|
131 |
# begin is optional with ${array::1}
|
132 |
| Slice(arith_expr? begin, arith_expr? length)
|
133 |
|
134 |
BracedVarSub = (
|
135 |
Token left, # in dynamic ParseVarRef, same as name_tok
|
136 |
Token token, # location for the name
|
137 |
str var_name, # the name
|
138 |
Token? prefix_op, # prefix # or ! operators
|
139 |
bracket_op? bracket_op,
|
140 |
suffix_op? suffix_op,
|
141 |
Token right # in dynamic ParseVarRef, same as name_tok
|
142 |
)
|
143 |
|
144 |
# Variants:
|
145 |
# - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
|
146 |
# - And """ and ''' e.g. Id.Left_TDoubleQuote
|
147 |
DoubleQuoted = (Token left, List[word_part] parts, Token right)
|
148 |
SingleQuoted = (Token left, List[Token] tokens, Token right)
|
149 |
|
150 |
SimpleVarSub = (Token left, str var_name)
|
151 |
|
152 |
CommandSub = (Token left_token, command child, Token right)
|
153 |
|
154 |
# - can contain word.BracedTree
|
155 |
# - no 'Token right' for now, doesn't appear to be used
|
156 |
ShArrayLiteral = (Token left, List[word] words, Token right)
|
157 |
|
158 |
# Unevaluated, typed arguments for func and proc.
|
159 |
# Note that ...arg is expr.Spread.
|
160 |
ArgList = (
|
161 |
Token left, List[expr] pos_args, List[NamedArg] named_args, Token right
|
162 |
)
|
163 |
|
164 |
AssocPair = (CompoundWord key, CompoundWord value)
|
165 |
|
166 |
word_part =
|
167 |
ShArrayLiteral %ShArrayLiteral
|
168 |
| BashAssocLiteral(Token left, List[AssocPair] pairs, Token right)
|
169 |
| Literal %Token
|
170 |
# escaped case is separate so the evaluator doesn't have to check token ID
|
171 |
| EscapedLiteral(Token token, str ch)
|
172 |
| SingleQuoted %SingleQuoted
|
173 |
| DoubleQuoted %DoubleQuoted
|
174 |
| SimpleVarSub %SimpleVarSub
|
175 |
| BracedVarSub %BracedVarSub
|
176 |
# For command sub and process sub: $(...) <(...) >(...)
|
177 |
| CommandSub %CommandSub
|
178 |
# ~ or ~bob
|
179 |
| TildeSub(Token token, str? user_name)
|
180 |
| ArithSub(Token left, arith_expr anode, Token right)
|
181 |
# {a,b,c}
|
182 |
| BracedTuple(List[CompoundWord] words)
|
183 |
# {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
|
184 |
# {a..f} or {a..f..2} or {a..f..-2}
|
185 |
# the whole range is one Token,
|
186 |
| BracedRange(Token blame_tok, id kind, str start, str end, int step)
|
187 |
# note: optional int may need special handling in ASDL
|
188 |
# extended globs are parsed statically, unlike globs
|
189 |
| ExtGlob(Token op, List[CompoundWord] arms, Token right)
|
190 |
|
191 |
# YSH word_part extensions
|
192 |
|
193 |
# @myarray
|
194 |
| Splice(Token blame_tok, str var_name)
|
195 |
# $[d.key], etc.
|
196 |
| ExprSub(Token left, expr child, Token right)
|
197 |
|
198 |
# Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
|
199 |
# The latter is semantically necessary. (See osh/word_parse.py).
|
200 |
# At runtime: RHS of 'declare x='.
|
201 |
rhs_word = Empty | Compound %CompoundWord
|
202 |
|
203 |
word =
|
204 |
# Returns from WordParser, but not generally stored in LST
|
205 |
Operator %Token
|
206 |
# A Compound word can contain any word_part except the Braced*Part.
|
207 |
# We could model this with another variant type but it incurs runtime
|
208 |
# overhead and seems like overkill. Note that DoubleQuoted can't
|
209 |
# contain a SingleQuoted, etc. either.
|
210 |
| Compound %CompoundWord
|
211 |
# For word sequences command.Simple, ShArrayLiteral, for_iter.Words
|
212 |
# Could be its own type
|
213 |
| BracedTree(List[word_part] parts)
|
214 |
# For dynamic parsing of test aka [ - the string is already evaluated.
|
215 |
| String(id id, str s, CompoundWord? blame_loc)
|
216 |
|
217 |
# Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
|
218 |
sh_lhs_expr =
|
219 |
Name(Token left, str name)
|
220 |
| IndexedName(Token left, str name, arith_expr index)
|
221 |
| UnparsedIndex(Token left, str name, str index) # for translation
|
222 |
|
223 |
arith_expr =
|
224 |
VarSub %SimpleVarSub # e.g. $(( x ))
|
225 |
| Word %CompoundWord # e.g. $(( 123'456'$y ))
|
226 |
|
227 |
| UnaryAssign(id op_id, arith_expr child)
|
228 |
| BinaryAssign(id op_id, arith_expr left, arith_expr right)
|
229 |
|
230 |
| Unary(id op_id, arith_expr child)
|
231 |
# TODO: op should be token, e.g. for divide by zero
|
232 |
| Binary(id op_id, arith_expr left, arith_expr right)
|
233 |
| TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
|
234 |
|
235 |
bool_expr =
|
236 |
WordTest(word w) # e.g. [[ myword ]]
|
237 |
| Binary(id op_id, word left, word right)
|
238 |
| Unary(id op_id, word child)
|
239 |
| LogicalNot(bool_expr child)
|
240 |
| LogicalAnd(bool_expr left, bool_expr right)
|
241 |
| LogicalOr(bool_expr left, bool_expr right)
|
242 |
|
243 |
redir_loc =
|
244 |
Fd(int fd) | VarName(str name)
|
245 |
|
246 |
redir_param =
|
247 |
Word %CompoundWord
|
248 |
| HereDoc(word here_begin, # e.g. EOF or 'EOF'
|
249 |
Token? here_end_tok, # Token consisting of the whole line
|
250 |
# It's always filled in AFTER creation, but
|
251 |
# temporarily so optional
|
252 |
List[word_part] stdin_parts # one for each line
|
253 |
)
|
254 |
|
255 |
Redir = (Token op, redir_loc loc, redir_param arg)
|
256 |
|
257 |
assign_op = Equal | PlusEqual
|
258 |
AssignPair = (Token left, sh_lhs_expr lhs, assign_op op, rhs_word rhs)
|
259 |
EnvPair = (Token left, str name, rhs_word val)
|
260 |
|
261 |
condition =
|
262 |
Shell(List[command] commands) # if false; true; then echo hi; fi
|
263 |
| YshExpr(expr e) # if (x > 0) { echo hi }
|
264 |
# TODO: add more specific blame location
|
265 |
|
266 |
# Each arm tests one word against multiple words
|
267 |
# shell: *.cc|*.h) echo C++ ;;
|
268 |
# YSH: *.cc|*.h { echo C++ }
|
269 |
#
|
270 |
# Three location tokens:
|
271 |
# 1. left - shell has ( or *.cc ysh has *.cc
|
272 |
# 2. middle - shell has ) ysh has {
|
273 |
# 3. right - shell has optional ;; ysh has required }
|
274 |
#
|
275 |
# For YSH typed case, left can be ( and /
|
276 |
# And case_pat may contain more details
|
277 |
CaseArm = (
|
278 |
Token left, pat pattern, Token middle, List[command] action,
|
279 |
Token? right
|
280 |
)
|
281 |
|
282 |
# The argument to match against in a case command
|
283 |
# In YSH-style case commands we match against an `expr`, but in sh-style case
|
284 |
# commands we match against a word.
|
285 |
case_arg =
|
286 |
Word(word w)
|
287 |
| YshExpr(expr e)
|
288 |
|
289 |
pat =
|
290 |
Else
|
291 |
| Words(List[word] words)
|
292 |
| YshExprs(List[expr] exprs)
|
293 |
| Eggex(re eggex)
|
294 |
|
295 |
# Each if arm starts with either an "if" or "elif" keyword
|
296 |
# In YSH, the then keyword is not used (replaced by braces {})
|
297 |
IfArm = (
|
298 |
Token keyword, condition cond, Token? then_kw, List[command] action,
|
299 |
List[int] spids)
|
300 |
|
301 |
for_iter =
|
302 |
Args # for x; do echo $x; done # implicit "$@"
|
303 |
| Words(List[word] words) # for x in 'foo' *.py { echo $x }
|
304 |
# like ShArrayLiteral, but no location for %(
|
305 |
| YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
|
306 |
|
307 |
BraceGroup = (
|
308 |
Token left, Token? doc_token, List[command] children,
|
309 |
List[Redir] redirects, Token right
|
310 |
)
|
311 |
|
312 |
# Retain references to lines
|
313 |
BlockArg = (BraceGroup brace_group, List[SourceLine] lines)
|
314 |
|
315 |
command =
|
316 |
NoOp
|
317 |
| Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
|
318 |
List[EnvPair] more_env,
|
319 |
List[word] words, List[Redir] redirects,
|
320 |
ArgList? typed_args, BlockArg? block,
|
321 |
# do_fork is semantic, not syntactic
|
322 |
bool do_fork)
|
323 |
# This doesn't technically belong in the LST, but it's convenient for
|
324 |
# execution
|
325 |
| ExpandedAlias(command child, List[Redir] redirects, List[EnvPair] more_env)
|
326 |
| Sentence(command child, Token terminator)
|
327 |
# Represents "bare assignment"
|
328 |
# Token left is redundant with pairs[0].left
|
329 |
| ShAssignment(Token left, List[AssignPair] pairs, List[Redir] redirects)
|
330 |
| Retval(Token keyword, expr val)
|
331 |
| ControlFlow(Token keyword, word? arg_word)
|
332 |
# ops are | |&
|
333 |
| Pipeline(Token? negated, List[command] children, List[Token] ops)
|
334 |
# ops are && ||
|
335 |
| AndOr(List[command] children, List[Token] ops)
|
336 |
# Part of for, while, until (but not if, case, ShFunction). No redirects.
|
337 |
| DoGroup(Token left, List[command] children, Token right)
|
338 |
# A brace group is a compound command, with redirects.
|
339 |
| BraceGroup %BraceGroup
|
340 |
# Contains a single child, like CommandSub
|
341 |
| Subshell(Token left, command child, Token right, List[Redir] redirects)
|
342 |
| DParen(Token left, arith_expr child, Token right, List[Redir] redirects)
|
343 |
| DBracket(Token left, bool_expr expr, Token right, List[Redir] redirects)
|
344 |
# up to 3 iterations variables
|
345 |
| ForEach(Token keyword, List[str] iter_names, for_iter iterable,
|
346 |
Token? semi_tok, command body, List[Redir] redirects)
|
347 |
# C-style for loop. Any of the 3 expressions can be omitted.
|
348 |
# Note: body is required, but only optional here because of initialization
|
349 |
# order.
|
350 |
| ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
|
351 |
arith_expr? update, command? body, List[Redir] redirects)
|
352 |
| WhileUntil(Token keyword, condition cond, command body, List[Redir] redirects)
|
353 |
| If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
|
354 |
Token? fi_kw, List[Redir] redirects)
|
355 |
| Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
|
356 |
Token arms_end, List[Redir] redirects)
|
357 |
# The keyword is optional in the case of bash-style functions
|
358 |
# (ie. "foo() { ... }") which do not have one.
|
359 |
| ShFunction(Token? keyword, Token name_tok, str name, command body)
|
360 |
| TimeBlock(Token keyword, command pipeline)
|
361 |
# Some nodes optimize it out as List[command], but we use CommandList for
|
362 |
# 1. the top level
|
363 |
# 2. ls ; ls & ls (same line)
|
364 |
# 3. CommandSub # single child that's a CommandList
|
365 |
# 4. Subshell # single child that's a CommandList
|
366 |
| CommandList(List[command] children)
|
367 |
|
368 |
# YSH command constructs
|
369 |
|
370 |
# For 'x = myexpr'. There's no type and no comma allowed.
|
371 |
| BareDecl(Token lhs, expr rhs)
|
372 |
# var, const
|
373 |
| VarDecl(Token? keyword, List[NameType] lhs, expr rhs)
|
374 |
# setvar, maybe 'auto' later
|
375 |
| PlaceMutation(Token keyword, List[place_expr] lhs, Token op, expr rhs)
|
376 |
# = keyword
|
377 |
| Expr(Token keyword, expr e)
|
378 |
| Proc(Token keyword, Token name, proc_sig sig, command body)
|
379 |
| Func(Token keyword, Token name,
|
380 |
List[Param] pos_params, RestParam? rest_of_pos,
|
381 |
List[Param] named_params, RestParam? rest_of_named,
|
382 |
command body)
|
383 |
|
384 |
# Tea
|
385 |
|
386 |
| TeaFunc(Token name,
|
387 |
List[Param] pos_params, RestParam? pos_splat,
|
388 |
List[Param] named_params, RestParam? named_splat,
|
389 |
List[TypeExpr] return_types, command body)
|
390 |
| Data(Token name, List[Param] params)
|
391 |
| Enum(Token name, List[Variant] variants)
|
392 |
| Class(Token name, Token? extends, List[class_item] items)
|
393 |
| Import(SingleQuoted path, Token? alias, List[ImportName] names)
|
394 |
| For(List[NameType] targets, expr iterable, command body)
|
395 |
| While(expr test, command body)
|
396 |
| Break | Continue
|
397 |
| Return(expr? value)
|
398 |
|
399 |
# Binary(x expr, y expr) or Nullary %Token
|
400 |
# In the first case we have a tag, and an anonymous type.
|
401 |
variant_type =
|
402 |
Anon(List[Param] params)
|
403 |
| Ref(Token type_name)
|
404 |
|
405 |
# tags and types are separate in our model of algebraic data types
|
406 |
Variant = (Token tag_name, variant_type? typ)
|
407 |
|
408 |
class_item =
|
409 |
Data(Token keyword, List[NameType] fields)
|
410 |
| Method()
|
411 |
|
412 |
ImportName = (Token name, Token? alias)
|
413 |
|
414 |
Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
|
415 |
RestParam = (Token blame_tok, str name)
|
416 |
|
417 |
# 'open' is for proc p { }; closed is for proc p () { }
|
418 |
proc_sig =
|
419 |
Open
|
420 |
| Closed(List[Param] word_params, RestParam? rest_of_words,
|
421 |
List[Param] pos_params, RestParam? rest_of_pos,
|
422 |
List[Param] named_params, RestParam? rest_of_named,
|
423 |
RestParam? block_param)
|
424 |
|
425 |
#
|
426 |
# Glob representation, for converting ${x//} to extended regexes.
|
427 |
#
|
428 |
|
429 |
# Example: *.[ch] is:
|
430 |
# GlobOp(<Glob_Star '*'>),
|
431 |
# GlobLit(Glob_OtherLiteral, '.'),
|
432 |
# CharClass(False, ['ch']) # from Glob_CleanLiterals token
|
433 |
|
434 |
glob_part =
|
435 |
Literal(id id, str s)
|
436 |
| Operator(id op_id) # * or ?
|
437 |
| CharClass(bool negated, List[str] strs)
|
438 |
|
439 |
# Char classes are opaque for now. If we ever need them:
|
440 |
# - Collating symbols are [. .]
|
441 |
# - Equivalence classes are [=
|
442 |
|
443 |
printf_part =
|
444 |
Literal(Token token)
|
445 |
# flags are 0 hyphen space + #
|
446 |
# type is 's' for %s, etc.
|
447 |
| Percent(List[Token] flags, Token? width, Token? precision, Token type)
|
448 |
|
449 |
#
|
450 |
# YSH Language
|
451 |
#
|
452 |
# Copied and modified from Python-3.7/Parser/Python.asdl !
|
453 |
|
454 |
expr_context = Load | Store | Del | AugLoad | AugStore | Param
|
455 |
|
456 |
# Type expressions: Int List[Int] Dict[Str, Any]
|
457 |
# Do we have Func[Int, Int => Int] ? I guess we can parse that into this
|
458 |
# system.
|
459 |
TypeExpr = (Token tok, str name, List[TypeExpr] params)
|
460 |
|
461 |
# LHS binding in loops, list comprehensions, and var/const
|
462 |
NameType = (Token name, TypeExpr? typ)
|
463 |
|
464 |
# TODO: Inline this into GenExp and ListComp? Just use a flag there?
|
465 |
Comprehension = (List[NameType] lhs, expr iter, expr? cond)
|
466 |
|
467 |
# Named arguments supplied to call. Token is null for f(; ...named).
|
468 |
NamedArg = (Token? name, expr value)
|
469 |
|
470 |
# Subscripts are lists of expressions
|
471 |
# a[:i, n] (we don't have matrices, but we have data frames)
|
472 |
Subscript = (expr obj, expr index)
|
473 |
|
474 |
# Attributes are obj.attr, d->key, name::scope,
|
475 |
Attribute = (expr obj, Token op, Token attr, expr_context ctx)
|
476 |
|
477 |
# Places that can be mutated.
|
478 |
place_expr =
|
479 |
Var(Token name) # TODO: add str var_name
|
480 |
| Subscript %Subscript
|
481 |
| Attribute %Attribute
|
482 |
|
483 |
expr =
|
484 |
# a variable name to evaluate
|
485 |
Var(Token name) # TODO: add str var_name
|
486 |
# For null, Bool, Int, Float
|
487 |
# Python uses Num(object n), which doesn't respect our "LST" invariant.
|
488 |
| Const(Token c)
|
489 |
# @(one 'two' "$three")
|
490 |
| ShArrayLiteral %ShArrayLiteral
|
491 |
# @[a b c] @[1 2 3] @[(1+1) (2+2)]
|
492 |
| RegexLiteral(Token left, re regex, List[Token] flags, Token? trans_pref)
|
493 |
|
494 |
| SimpleVarSub %SimpleVarSub
|
495 |
| BracedVarSub %BracedVarSub
|
496 |
| CommandSub %CommandSub
|
497 |
| SingleQuoted %SingleQuoted
|
498 |
| DoubleQuoted %DoubleQuoted
|
499 |
|
500 |
| BlockArg %BlockArg
|
501 |
|
502 |
| Lambda(List[NameType] params, expr body)
|
503 |
|
504 |
| Unary(Token op, expr child)
|
505 |
| Binary(Token op, expr left, expr right)
|
506 |
# x < 4 < 3 and (x < 4) < 3
|
507 |
| Compare(expr left, List[Token] ops, List[expr] comparators)
|
508 |
| FuncCall(expr func, ArgList args)
|
509 |
|
510 |
# TODO: Need a representation for method call. We don't just want
|
511 |
# Attribute() and then Call()
|
512 |
|
513 |
| IfExp(expr test, expr body, expr orelse)
|
514 |
| Tuple(List[expr] elts, expr_context ctx)
|
515 |
|
516 |
| List(List[expr] elts, expr_context ctx)
|
517 |
| Dict(List[expr] keys, List[expr] values)
|
518 |
# For the values in {n1, n2}
|
519 |
| Implicit
|
520 |
|
521 |
| ListComp(expr elt, List[Comprehension] generators)
|
522 |
# not implemented
|
523 |
| DictComp(expr key, expr value, List[Comprehension] generators)
|
524 |
| GeneratorExp(expr elt, List[Comprehension] generators)
|
525 |
|
526 |
# Ranges are written 1:2, with first class expression syntax. There is no
|
527 |
# step as in Python. Use range(0, 10, step=2) for that.
|
528 |
| Range(expr lower, expr upper)
|
529 |
|
530 |
# Slices occur within [] only. Unlike ranges, the start/end can be #
|
531 |
# implicit. Like ranges, denote a step with slice(0, 10, step=2).
|
532 |
# a[3:] a[:i]
|
533 |
| Slice(expr? lower, expr? upper)
|
534 |
|
535 |
| Subscript %Subscript
|
536 |
| Attribute %Attribute
|
537 |
|
538 |
# Ellipsis is like 'Starred' within Python, which are valid on the LHS in
|
539 |
# Python for unpacking, and # within list literals for splicing.
|
540 |
# (Starred is NOT used for {k:v, **a}. That used a blank "keys"
|
541 |
# attribute.)
|
542 |
|
543 |
# I think we can use { **pairs } like Python
|
544 |
| Spread(expr child, expr_context ctx)
|
545 |
|
546 |
#
|
547 |
# Regex Language (Eggex)
|
548 |
#
|
549 |
|
550 |
# e.g. alnum digit
|
551 |
PosixClass = (Token? negated, str name)
|
552 |
# e.g. d w s
|
553 |
PerlClass = (Token? negated, str name)
|
554 |
|
555 |
# Note: .NET has && in character classes, making it a recursive language
|
556 |
|
557 |
class_literal_term =
|
558 |
PosixClass %PosixClass
|
559 |
| PerlClass %PerlClass
|
560 |
# [a-z] ~[a-z] TODO: Doesn't respect LST invariant
|
561 |
|
562 |
| Range(Token start, Token end)
|
563 |
| CharLiteral(Token tok)
|
564 |
|
565 |
| SingleQuoted %SingleQuoted
|
566 |
# @chars
|
567 |
| Splice(Token name, str var_name)
|
568 |
|
569 |
# Char Sets and Ranges both use Char Codes
|
570 |
# with u_braced == true : \u{ff}
|
571 |
# with u_braced == false: \xff \\ 'a' a '0' 0
|
572 |
# ERE doesn't make a distinction, but compiling to Python/PCRE can use it
|
573 |
CharCode = (int i, bool u_braced, Token blame_tok)
|
574 |
|
575 |
# evaluated version of class_literal_term (could be in runtime.asdl)
|
576 |
char_class_term =
|
577 |
PosixClass %PosixClass
|
578 |
| PerlClass %PerlClass
|
579 |
|
580 |
| Range(CharCode start, CharCode end)
|
581 |
|
582 |
# For [ \x00 \\ ]
|
583 |
| CharCode %CharCode
|
584 |
|
585 |
# NOTE: modifier is unused now, can represent L or P
|
586 |
re_repeat =
|
587 |
Op(Token op)
|
588 |
| Num(Token times)
|
589 |
# dot{1,2}
|
590 |
| Range(Token? lower, Token? upper)
|
591 |
# Haven't implemented the modifier, e.g. x{+ P}
|
592 |
# | Num(Token times, id modifier)
|
593 |
# | Range(Token? lower, Token? upper, id modifier)
|
594 |
|
595 |
re =
|
596 |
# e.g. . ^ $ %begin \u123
|
597 |
Token %Token
|
598 |
| PosixClass %PosixClass
|
599 |
| PerlClass %PerlClass
|
600 |
# syntax [ $x \n ]
|
601 |
| CharClassLiteral(bool negated, List[class_literal_term] terms)
|
602 |
# evaluated [ 'abc' \n ]
|
603 |
| CharClass(bool negated, List[char_class_term] terms)
|
604 |
|
605 |
# @D
|
606 |
| Splice(Token name, str var_name)
|
607 |
|
608 |
| SingleQuoted %SingleQuoted
|
609 |
|
610 |
# Compound:
|
611 |
| Repeat(re child, re_repeat op)
|
612 |
| Seq(List[re] children)
|
613 |
| Alt(List[re] children)
|
614 |
|
615 |
| Group(re child)
|
616 |
# TODO: <d+ : month Int> needs Token? type field
|
617 |
| Capture(re child, Token? var_name)
|
618 |
| Backtracking(bool negated, Token name, re child)
|
619 |
|
620 |
# These nodes are never parsed; they're part of execution.
|
621 |
# Right now we do them in _EvalRegex, though many transformations could be
|
622 |
# done as constant evaluation.
|
623 |
|
624 |
| Primitive(id id) # . dot etc.
|
625 |
# String substitutions are evaluated into literals
|
626 |
| LiteralChars(str s, Token blame_tok)
|
627 |
}
|