OILS
/
osh
/
cmd_parse.py
1 |
# Copyright 2016 Andy Chu. All rights reserved.
|
2 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
3 |
# you may not use this file except in compliance with the License.
|
4 |
# You may obtain a copy of the License at
|
5 |
#
|
6 |
# http://www.apache.org/licenses/LICENSE-2.0
|
7 |
"""
|
8 |
cmd_parse.py - Parse high level shell commands.
|
9 |
"""
|
10 |
from __future__ import print_function
|
11 |
|
12 |
from _devbuild.gen import grammar_nt
|
13 |
from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
|
14 |
from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
|
15 |
from _devbuild.gen.syntax_asdl import (
|
16 |
loc,
|
17 |
SourceLine,
|
18 |
source,
|
19 |
parse_result,
|
20 |
parse_result_t,
|
21 |
command,
|
22 |
command_t,
|
23 |
condition,
|
24 |
condition_t,
|
25 |
for_iter,
|
26 |
ArgList,
|
27 |
BraceGroup,
|
28 |
BlockArg,
|
29 |
CaseArm,
|
30 |
case_arg,
|
31 |
IfArm,
|
32 |
pat,
|
33 |
pat_t,
|
34 |
Redir,
|
35 |
redir_param,
|
36 |
redir_loc,
|
37 |
redir_loc_t,
|
38 |
word_e,
|
39 |
word_t,
|
40 |
CompoundWord,
|
41 |
Token,
|
42 |
word_part_e,
|
43 |
word_part_t,
|
44 |
rhs_word,
|
45 |
rhs_word_t,
|
46 |
sh_lhs_expr,
|
47 |
sh_lhs_expr_t,
|
48 |
AssignPair,
|
49 |
EnvPair,
|
50 |
assign_op_e,
|
51 |
NameType,
|
52 |
proc_sig,
|
53 |
proc_sig_e,
|
54 |
)
|
55 |
from core import alloc
|
56 |
from core import error
|
57 |
from core.error import p_die
|
58 |
from core import ui
|
59 |
from frontend import consts
|
60 |
from frontend import lexer
|
61 |
from frontend import location
|
62 |
from frontend import match
|
63 |
from frontend import reader
|
64 |
from mycpp.mylib import log
|
65 |
from osh import braces
|
66 |
from osh import bool_parse
|
67 |
from osh import word_
|
68 |
|
69 |
from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
|
70 |
if TYPE_CHECKING:
|
71 |
from core.alloc import Arena
|
72 |
from core import optview
|
73 |
from frontend.lexer import Lexer
|
74 |
from frontend.parse_lib import ParseContext, AliasesInFlight
|
75 |
from frontend.reader import _Reader
|
76 |
from osh.word_parse import WordParser
|
77 |
|
78 |
_ = Kind_str # for debug prints
|
79 |
|
80 |
TAB_CH = 9 # ord('\t')
|
81 |
SPACE_CH = 32 # ord(' ')
|
82 |
|
83 |
|
84 |
def _ReadHereLines(
|
85 |
line_reader, # type: _Reader
|
86 |
h, # type: Redir
|
87 |
delimiter, # type: str
|
88 |
):
|
89 |
# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
|
90 |
# NOTE: We read all lines at once, instead of parsing line-by-line,
|
91 |
# because of cases like this:
|
92 |
# cat <<EOF
|
93 |
# 1 $(echo 2
|
94 |
# echo 3) 4
|
95 |
# EOF
|
96 |
here_lines = [] # type: List[Tuple[SourceLine, int]]
|
97 |
last_line = None # type: Tuple[SourceLine, int]
|
98 |
strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
|
99 |
|
100 |
while True:
|
101 |
src_line, unused_offset = line_reader.GetLine()
|
102 |
|
103 |
if src_line is None: # EOF
|
104 |
# An unterminated here doc is just a warning in bash. We make it
|
105 |
# fatal because we want to be strict, and because it causes problems
|
106 |
# reporting other errors.
|
107 |
# Attribute it to the << in <<EOF for now.
|
108 |
p_die("Couldn't find terminator for here doc that starts here",
|
109 |
h.op)
|
110 |
|
111 |
assert len(src_line.content) != 0 # None should be the empty line
|
112 |
|
113 |
line = src_line.content
|
114 |
|
115 |
# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
|
116 |
# the first tab.
|
117 |
start_offset = 0
|
118 |
if strip_leading_tabs:
|
119 |
n = len(line)
|
120 |
i = 0 # used after loop exit
|
121 |
while i < n:
|
122 |
if line[i] != '\t':
|
123 |
break
|
124 |
i += 1
|
125 |
start_offset = i
|
126 |
|
127 |
if line[start_offset:].rstrip() == delimiter:
|
128 |
last_line = (src_line, start_offset)
|
129 |
break
|
130 |
|
131 |
here_lines.append((src_line, start_offset))
|
132 |
|
133 |
return here_lines, last_line
|
134 |
|
135 |
|
136 |
def _MakeLiteralHereLines(
|
137 |
here_lines, # type: List[Tuple[SourceLine, int]]
|
138 |
arena, # type: Arena
|
139 |
):
|
140 |
# type: (...) -> List[word_part_t] # less precise because List is invariant type
|
141 |
"""Create a line_span and a token for each line."""
|
142 |
tokens = [] # type: List[Token]
|
143 |
for src_line, start_offset in here_lines:
|
144 |
t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
|
145 |
src_line, src_line.content[start_offset:])
|
146 |
tokens.append(t)
|
147 |
parts = [cast(word_part_t, t) for t in tokens]
|
148 |
return parts
|
149 |
|
150 |
|
151 |
def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
|
152 |
# type: (ParseContext, Redir, _Reader, Arena) -> None
|
153 |
"""Fill in attributes of a pending here doc node."""
|
154 |
h = cast(redir_param.HereDoc, r.arg)
|
155 |
# "If any character in word is quoted, the delimiter shall be formed by
|
156 |
# performing quote removal on word, and the here-document lines shall not
|
157 |
# be expanded. Otherwise, the delimiter shall be the word itself."
|
158 |
# NOTE: \EOF counts, or even E\OF
|
159 |
ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
|
160 |
if not ok:
|
161 |
p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
|
162 |
|
163 |
here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
|
164 |
|
165 |
if delim_quoted: # << 'EOF'
|
166 |
# Literal for each line.
|
167 |
h.stdin_parts = _MakeLiteralHereLines(here_lines, arena)
|
168 |
else:
|
169 |
line_reader = reader.VirtualLineReader(here_lines, arena)
|
170 |
w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
|
171 |
w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
|
172 |
|
173 |
end_line, end_pos = last_line
|
174 |
|
175 |
# Create a Token with the end terminator. Maintains the invariant that the
|
176 |
# tokens "add up".
|
177 |
h.here_end_tok = arena.NewToken(Id.Undefined_Tok, end_pos,
|
178 |
len(end_line.content), end_line, '')
|
179 |
|
180 |
|
181 |
def _MakeAssignPair(parse_ctx, preparsed, arena):
|
182 |
# type: (ParseContext, PreParsedItem, Arena) -> AssignPair
|
183 |
"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
|
184 |
|
185 |
left_token, close_token, part_offset, w = preparsed
|
186 |
|
187 |
if left_token.id == Id.Lit_VarLike: # s=1
|
188 |
if lexer.IsPlusEquals(left_token):
|
189 |
var_name = lexer.TokenSliceRight(left_token, -2)
|
190 |
op = assign_op_e.PlusEqual
|
191 |
else:
|
192 |
var_name = lexer.TokenSliceRight(left_token, -1)
|
193 |
op = assign_op_e.Equal
|
194 |
|
195 |
tmp = sh_lhs_expr.Name(left_token, var_name)
|
196 |
|
197 |
lhs = cast(sh_lhs_expr_t, tmp)
|
198 |
|
199 |
elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.one_pass_parse:
|
200 |
var_name = lexer.TokenSliceRight(left_token, -1)
|
201 |
if lexer.IsPlusEquals(close_token):
|
202 |
op = assign_op_e.PlusEqual
|
203 |
else:
|
204 |
op = assign_op_e.Equal
|
205 |
|
206 |
assert left_token.line == close_token.line, \
|
207 |
'%s and %s not on same line' % (left_token, close_token)
|
208 |
|
209 |
left_pos = left_token.col + left_token.length
|
210 |
index_str = left_token.line.content[left_pos:close_token.col]
|
211 |
lhs = sh_lhs_expr.UnparsedIndex(left_token, var_name, index_str)
|
212 |
|
213 |
elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
|
214 |
var_name = lexer.TokenSliceRight(left_token, -1)
|
215 |
if lexer.IsPlusEquals(close_token):
|
216 |
op = assign_op_e.PlusEqual
|
217 |
else:
|
218 |
op = assign_op_e.Equal
|
219 |
|
220 |
span1 = left_token
|
221 |
span2 = close_token
|
222 |
# Similar to SnipCodeString / SnipCodeBlock
|
223 |
if span1.line == span2.line:
|
224 |
# extract what's between brackets
|
225 |
code_str = span1.line.content[span1.col + span1.length:span2.col]
|
226 |
else:
|
227 |
raise NotImplementedError('%s != %s' % (span1.line, span2.line))
|
228 |
a_parser = parse_ctx.MakeArithParser(code_str)
|
229 |
|
230 |
# a[i+1]= is a place
|
231 |
src = source.Reparsed('array place', left_token, close_token)
|
232 |
with alloc.ctx_SourceCode(arena, src):
|
233 |
index_node = a_parser.Parse() # may raise error.Parse
|
234 |
|
235 |
tmp3 = sh_lhs_expr.IndexedName(left_token, var_name, index_node)
|
236 |
|
237 |
lhs = cast(sh_lhs_expr_t, tmp3)
|
238 |
|
239 |
else:
|
240 |
raise AssertionError()
|
241 |
|
242 |
# TODO: Should we also create a rhs_expr.ArrayLiteral here?
|
243 |
n = len(w.parts)
|
244 |
if part_offset == n:
|
245 |
rhs = rhs_word.Empty # type: rhs_word_t
|
246 |
else:
|
247 |
# tmp2 is for intersection of C++/MyPy type systems
|
248 |
tmp2 = CompoundWord(w.parts[part_offset:])
|
249 |
word_.TildeDetectAssign(tmp2)
|
250 |
rhs = tmp2
|
251 |
|
252 |
return AssignPair(left_token, lhs, op, rhs)
|
253 |
|
254 |
|
255 |
def _AppendMoreEnv(preparsed_list, more_env):
|
256 |
# type: (PreParsedList, List[EnvPair]) -> None
|
257 |
"""Helper to modify a SimpleCommand node.
|
258 |
|
259 |
Args:
|
260 |
preparsed: a list of 4-tuples from DetectShAssignment
|
261 |
more_env: a list to append env_pairs to
|
262 |
"""
|
263 |
for left_token, _, part_offset, w in preparsed_list:
|
264 |
if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
|
265 |
p_die("Environment binding shouldn't look like an array assignment",
|
266 |
left_token)
|
267 |
|
268 |
if lexer.IsPlusEquals(left_token):
|
269 |
p_die('Expected = in environment binding, got +=', left_token)
|
270 |
|
271 |
var_name = lexer.TokenSliceRight(left_token, -1)
|
272 |
n = len(w.parts)
|
273 |
if part_offset == n:
|
274 |
val = rhs_word.Empty # type: rhs_word_t
|
275 |
else:
|
276 |
val = CompoundWord(w.parts[part_offset:])
|
277 |
|
278 |
pair = EnvPair(left_token, var_name, val)
|
279 |
more_env.append(pair)
|
280 |
|
281 |
|
282 |
if TYPE_CHECKING:
|
283 |
PreParsedItem = Tuple[Token, Optional[Token], int, CompoundWord]
|
284 |
PreParsedList = List[PreParsedItem]
|
285 |
|
286 |
|
287 |
def _SplitSimpleCommandPrefix(words):
|
288 |
# type: (List[CompoundWord]) -> Tuple[PreParsedList, List[CompoundWord]]
|
289 |
"""Second pass of SimpleCommand parsing: look for assignment words."""
|
290 |
preparsed_list = [] # type: PreParsedList
|
291 |
suffix_words = [] # type: List[CompoundWord]
|
292 |
|
293 |
done_prefix = False
|
294 |
for w in words:
|
295 |
if done_prefix:
|
296 |
suffix_words.append(w)
|
297 |
continue
|
298 |
|
299 |
left_token, close_token, part_offset = word_.DetectShAssignment(w)
|
300 |
if left_token:
|
301 |
preparsed_list.append((left_token, close_token, part_offset, w))
|
302 |
else:
|
303 |
done_prefix = True
|
304 |
suffix_words.append(w)
|
305 |
|
306 |
return preparsed_list, suffix_words
|
307 |
|
308 |
|
309 |
def _MakeSimpleCommand(
|
310 |
preparsed_list, # type: PreParsedList
|
311 |
suffix_words, # type: List[CompoundWord]
|
312 |
redirects, # type: List[Redir]
|
313 |
typed_args, # type: Optional[ArgList]
|
314 |
block, # type: Optional[BlockArg]
|
315 |
):
|
316 |
# type: (...) -> command.Simple
|
317 |
"""Create an command.Simple node."""
|
318 |
|
319 |
# FOO=(1 2 3) ls is not allowed.
|
320 |
for _, _, _, w in preparsed_list:
|
321 |
if word_.HasArrayPart(w):
|
322 |
p_die("Environment bindings can't contain array literals",
|
323 |
loc.Word(w))
|
324 |
|
325 |
# NOTE: It would be possible to add this check back. But it already happens
|
326 |
# at runtime in EvalWordSequence2.
|
327 |
# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
|
328 |
if 0:
|
329 |
for w in suffix_words:
|
330 |
if word_.HasArrayPart(w):
|
331 |
p_die("Commands can't contain array literals", loc.Word(w))
|
332 |
|
333 |
assert len(suffix_words) != 0
|
334 |
# {a,b,c} # Use { before brace detection
|
335 |
# ~/bin/ls # Use ~ before tilde detection
|
336 |
part0 = suffix_words[0].parts[0]
|
337 |
blame_tok = location.LeftTokenForWordPart(part0)
|
338 |
|
339 |
# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
|
340 |
# can't implement bash's behavior of having say {~bob,~jane}/src work,
|
341 |
# because we only have a BracedTree.
|
342 |
# This is documented in spec/brace-expansion.
|
343 |
# NOTE: Technically we could do expansion outside of 'oshc translate', but it
|
344 |
# doesn't seem worth it.
|
345 |
words2 = braces.BraceDetectAll(suffix_words)
|
346 |
words3 = word_.TildeDetectAll(words2)
|
347 |
|
348 |
more_env = [] # type: List[EnvPair]
|
349 |
_AppendMoreEnv(preparsed_list, more_env)
|
350 |
|
351 |
# do_fork by default
|
352 |
return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
|
353 |
block, True)
|
354 |
|
355 |
|
356 |
class VarChecker(object):
|
357 |
"""Statically check for proc and variable usage errors."""
|
358 |
|
359 |
def __init__(self):
|
360 |
# type: () -> None
|
361 |
"""
|
362 |
Args:
|
363 |
oil_proc: Whether to disallow nested proc/function declarations
|
364 |
"""
|
365 |
# self.tokens for location info: 'proc' or another token
|
366 |
self.tokens = [] # type: List[Token]
|
367 |
self.names = [] # type: List[Dict[str, Id_t]]
|
368 |
|
369 |
def Push(self, blame_tok):
|
370 |
# type: (Token) -> None
|
371 |
"""Bash allows this, but it's confusing because it's the same as two
|
372 |
functions at the top level.
|
373 |
|
374 |
f() {
|
375 |
g() {
|
376 |
echo 'top level function defined in another one'
|
377 |
}
|
378 |
}
|
379 |
|
380 |
YSH disallows nested procs.
|
381 |
"""
|
382 |
if len(self.tokens) != 0:
|
383 |
if self.tokens[0].id == Id.KW_Proc or blame_tok.id == Id.KW_Proc:
|
384 |
p_die("procs and shell functions can't be nested", blame_tok)
|
385 |
|
386 |
self.tokens.append(blame_tok)
|
387 |
entry = {} # type: Dict[str, Id_t]
|
388 |
self.names.append(entry)
|
389 |
|
390 |
def Pop(self):
|
391 |
# type: () -> None
|
392 |
self.names.pop()
|
393 |
self.tokens.pop()
|
394 |
|
395 |
def Check(self, keyword_id, name_tok):
|
396 |
# type: (Id_t, Token) -> None
|
397 |
"""Check for errors in declaration and mutation errors.
|
398 |
|
399 |
var x, const x:
|
400 |
x already declared
|
401 |
setvar x:
|
402 |
x is not declared
|
403 |
x is constant
|
404 |
setglobal x:
|
405 |
No errors are possible; we would need all these many conditions to
|
406 |
statically know the names:
|
407 |
- no 'source'
|
408 |
- shopt -u copy_env.
|
409 |
- AND use lib has to be static
|
410 |
setref x:
|
411 |
Should only mutate out params
|
412 |
|
413 |
Also should p(:out) declare 'out' as well as '__out'? Then you can't have
|
414 |
local variables with the same name.
|
415 |
"""
|
416 |
# Don't check the global level! Semantics are different here!
|
417 |
if len(self.names) == 0:
|
418 |
return
|
419 |
|
420 |
top = self.names[-1]
|
421 |
name = name_tok.tval
|
422 |
if keyword_id in (Id.KW_Const, Id.KW_Var):
|
423 |
if name in top:
|
424 |
p_die('%r was already declared' % name, name_tok)
|
425 |
else:
|
426 |
top[name] = keyword_id
|
427 |
|
428 |
if keyword_id == Id.KW_SetVar:
|
429 |
if name not in top:
|
430 |
p_die("%r hasn't been declared" % name, name_tok)
|
431 |
|
432 |
if name in top and top[name] == Id.KW_Const:
|
433 |
p_die("Can't modify constant %r" % name, name_tok)
|
434 |
|
435 |
# TODO: setref should only mutate out params.
|
436 |
|
437 |
|
438 |
class ctx_VarChecker(object):
|
439 |
|
440 |
def __init__(self, var_checker, blame_tok):
|
441 |
# type: (VarChecker, Token) -> None
|
442 |
var_checker.Push(blame_tok)
|
443 |
self.var_checker = var_checker
|
444 |
|
445 |
def __enter__(self):
|
446 |
# type: () -> None
|
447 |
pass
|
448 |
|
449 |
def __exit__(self, type, value, traceback):
|
450 |
# type: (Any, Any, Any) -> None
|
451 |
self.var_checker.Pop()
|
452 |
|
453 |
|
454 |
class ctx_CmdMode(object):
|
455 |
|
456 |
def __init__(self, cmd_parse, new_cmd_mode):
|
457 |
# type: (CommandParser, cmd_mode_t) -> None
|
458 |
self.cmd_parse = cmd_parse
|
459 |
self.prev_cmd_mode = cmd_parse.cmd_mode
|
460 |
cmd_parse.cmd_mode = new_cmd_mode
|
461 |
|
462 |
def __enter__(self):
|
463 |
# type: () -> None
|
464 |
pass
|
465 |
|
466 |
def __exit__(self, type, value, traceback):
|
467 |
# type: (Any, Any, Any) -> None
|
468 |
self.cmd_parse.cmd_mode = self.prev_cmd_mode
|
469 |
|
470 |
|
471 |
|
472 |
SECONDARY_KEYWORDS = [
|
473 |
Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
|
474 |
Id.KW_Esac
|
475 |
]
|
476 |
|
477 |
|
478 |
class CommandParser(object):
|
479 |
"""Recursive descent parser derived from POSIX shell grammar.
|
480 |
|
481 |
This is a BNF grammar:
|
482 |
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
|
483 |
|
484 |
- Augmented with both bash/OSH and YSH constructs.
|
485 |
|
486 |
- We use regex-like iteration rather than recursive references
|
487 |
? means optional (0 or 1)
|
488 |
* means 0 or more
|
489 |
+ means 1 or more
|
490 |
|
491 |
- Keywords are spelled in Caps:
|
492 |
If Elif Case
|
493 |
|
494 |
- Operator tokens are quoted:
|
495 |
'(' '|'
|
496 |
|
497 |
or can be spelled directly if it matters:
|
498 |
|
499 |
Op_LParen Op_Pipe
|
500 |
|
501 |
- Non-terminals are snake_case:
|
502 |
brace_group subshell
|
503 |
|
504 |
Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
|
505 |
the production should be in the method docstrings, e.g.
|
506 |
|
507 |
def ParseSubshell():
|
508 |
"
|
509 |
subshell : '(' compound_list ')'
|
510 |
|
511 |
Looking at Op_LParen # Comment to say how this method is called
|
512 |
"
|
513 |
|
514 |
The grammar may be factored to make parsing easier.
|
515 |
"""
|
516 |
|
517 |
def __init__(self,
|
518 |
parse_ctx,
|
519 |
parse_opts,
|
520 |
w_parser,
|
521 |
lexer,
|
522 |
line_reader,
|
523 |
eof_id=Id.Eof_Real):
|
524 |
# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
|
525 |
self.parse_ctx = parse_ctx
|
526 |
self.aliases = parse_ctx.aliases # aliases to expand at parse time
|
527 |
|
528 |
self.parse_opts = parse_opts
|
529 |
self.w_parser = w_parser # type: WordParser # for normal parsing
|
530 |
self.lexer = lexer # for pushing hints, lookahead to (
|
531 |
self.line_reader = line_reader # for here docs
|
532 |
self.eof_id = eof_id
|
533 |
|
534 |
self.arena = line_reader.arena # for adding here doc and alias spans
|
535 |
self.aliases_in_flight = [] # type: AliasesInFlight
|
536 |
|
537 |
# A hacky boolean to remove 'if cd / {' ambiguity.
|
538 |
self.allow_block = True
|
539 |
|
540 |
# Stack of booleans for nested Attr and SHELL nodes.
|
541 |
# Attr nodes allow bare assignment x = 42, but not shell x=42.
|
542 |
# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
|
543 |
# nodes, but x42 is still allowed.
|
544 |
#
|
545 |
# Note: this stack could be optimized by turning it into an integer and
|
546 |
# binary encoding.
|
547 |
self.hay_attrs_stack = [] # type: List[bool]
|
548 |
|
549 |
# Note: VarChecker is instantiated with each CommandParser, which means
|
550 |
# that two 'proc foo' -- inside a command sub and outside -- don't
|
551 |
# conflict, because they use different CommandParser instances. I think
|
552 |
# this OK but you can imagine different behaviors.
|
553 |
self.var_checker = VarChecker()
|
554 |
|
555 |
self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
|
556 |
|
557 |
self.Reset()
|
558 |
|
559 |
# Init_() function for "keyword arg"
|
560 |
def Init_AliasesInFlight(self, aliases_in_flight):
|
561 |
# type: (AliasesInFlight) -> None
|
562 |
self.aliases_in_flight = aliases_in_flight
|
563 |
|
564 |
def Reset(self):
|
565 |
# type: () -> None
|
566 |
"""Reset our own internal state.
|
567 |
|
568 |
Called by the interactive loop.
|
569 |
"""
|
570 |
# Cursor state set by _GetWord()
|
571 |
self.next_lex_mode = lex_mode_e.ShCommand
|
572 |
self.cur_word = None # type: word_t # current word
|
573 |
self.c_kind = Kind.Undefined
|
574 |
self.c_id = Id.Undefined_Tok
|
575 |
|
576 |
self.pending_here_docs = [
|
577 |
] # type: List[Redir] # should have HereLiteral arg
|
578 |
|
579 |
def ResetInputObjects(self):
|
580 |
# type: () -> None
|
581 |
"""Reset the internal state of our inputs.
|
582 |
|
583 |
Called by the interactive loop.
|
584 |
"""
|
585 |
self.w_parser.Reset()
|
586 |
self.lexer.ResetInputObjects()
|
587 |
self.line_reader.Reset()
|
588 |
|
589 |
def _SetNext(self):
|
590 |
# type: () -> None
|
591 |
"""Call this when you no longer need the current token.
|
592 |
|
593 |
This method is lazy. A subsequent call to _GetWord() will
|
594 |
actually read the next Token.
|
595 |
"""
|
596 |
self.next_lex_mode = lex_mode_e.ShCommand
|
597 |
|
598 |
def _GetWord(self):
|
599 |
# type: () -> None
|
600 |
"""Call this when you need to make a decision based on Id or Kind.
|
601 |
|
602 |
If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
|
603 |
self.c_id and self.c_kind.
|
604 |
|
605 |
Otherwise it does nothing.
|
606 |
"""
|
607 |
if self.next_lex_mode != lex_mode_e.Undefined:
|
608 |
w = self.w_parser.ReadWord(self.next_lex_mode)
|
609 |
#log("w %s", w)
|
610 |
|
611 |
# Here docs only happen in command mode, so other kinds of newlines don't
|
612 |
# count.
|
613 |
if w.tag() == word_e.Operator:
|
614 |
tok = cast(Token, w)
|
615 |
if tok.id == Id.Op_Newline:
|
616 |
for h in self.pending_here_docs:
|
617 |
_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
|
618 |
self.arena)
|
619 |
del self.pending_here_docs[:] # No .clear() until Python 3.3.
|
620 |
|
621 |
self.cur_word = w
|
622 |
|
623 |
self.c_kind = word_.CommandKind(self.cur_word)
|
624 |
self.c_id = word_.CommandId(self.cur_word)
|
625 |
self.next_lex_mode = lex_mode_e.Undefined
|
626 |
|
627 |
def _Eat(self, c_id, msg=None):
|
628 |
# type: (Id_t, Optional[str]) -> word_t
|
629 |
"""Consume a word of a type, maybe showing a custom error message.
|
630 |
|
631 |
Args:
|
632 |
c_id: the Id we expected
|
633 |
msg: improved error message
|
634 |
"""
|
635 |
self._GetWord()
|
636 |
if self.c_id != c_id:
|
637 |
if msg is None:
|
638 |
msg = 'Expected word type %s, got %s' % (ui.PrettyId(c_id),
|
639 |
ui.PrettyId(self.c_id))
|
640 |
p_die(msg, loc.Word(self.cur_word))
|
641 |
|
642 |
skipped = self.cur_word
|
643 |
self._SetNext()
|
644 |
return skipped
|
645 |
|
646 |
def _NewlineOk(self):
|
647 |
# type: () -> None
|
648 |
"""Check for optional newline and consume it."""
|
649 |
self._GetWord()
|
650 |
if self.c_id == Id.Op_Newline:
|
651 |
self._SetNext()
|
652 |
|
653 |
def _AtSecondaryKeyword(self):
|
654 |
# type: () -> bool
|
655 |
self._GetWord()
|
656 |
if self.c_id in SECONDARY_KEYWORDS:
|
657 |
return True
|
658 |
return False
|
659 |
|
660 |
def ParseRedirect(self):
|
661 |
# type: () -> Redir
|
662 |
self._GetWord()
|
663 |
assert self.c_kind == Kind.Redir, self.cur_word
|
664 |
op_tok = cast(Token, self.cur_word) # for MyPy
|
665 |
|
666 |
# Note: the lexer could take distinguish between
|
667 |
# >out
|
668 |
# 3>out
|
669 |
# {fd}>out
|
670 |
#
|
671 |
# which would make the code below faster. But small string optimization
|
672 |
# would also speed it up, since redirects are small.
|
673 |
|
674 |
op_val = lexer.TokenVal(op_tok)
|
675 |
if op_val[0] == '{':
|
676 |
pos = op_val.find('}')
|
677 |
assert pos != -1 # lexer ensures this
|
678 |
where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
|
679 |
|
680 |
elif op_val[0].isdigit():
|
681 |
pos = 1
|
682 |
if op_val[1].isdigit():
|
683 |
pos = 2
|
684 |
where = redir_loc.Fd(int(op_val[:pos]))
|
685 |
|
686 |
else:
|
687 |
where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
|
688 |
|
689 |
self._SetNext()
|
690 |
|
691 |
self._GetWord()
|
692 |
# Other redirect
|
693 |
if self.c_kind != Kind.Word:
|
694 |
p_die('Invalid token after redirect operator',
|
695 |
loc.Word(self.cur_word))
|
696 |
|
697 |
# Here doc
|
698 |
if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
|
699 |
arg = redir_param.HereDoc.CreateNull()
|
700 |
arg.here_begin = self.cur_word
|
701 |
arg.stdin_parts = []
|
702 |
|
703 |
r = Redir(op_tok, where, arg)
|
704 |
|
705 |
self.pending_here_docs.append(r) # will be filled on next newline.
|
706 |
|
707 |
self._SetNext()
|
708 |
return r
|
709 |
|
710 |
arg_word = self.cur_word
|
711 |
tilde = word_.TildeDetect(arg_word)
|
712 |
if tilde:
|
713 |
arg_word = tilde
|
714 |
self._SetNext()
|
715 |
|
716 |
# We should never get Empty, Token, etc.
|
717 |
assert arg_word.tag() == word_e.Compound, arg_word
|
718 |
return Redir(op_tok, where, cast(CompoundWord, arg_word))
|
719 |
|
720 |
def _ParseRedirectList(self):
|
721 |
# type: () -> List[Redir]
|
722 |
"""Try parsing any redirects at the cursor.
|
723 |
|
724 |
This is used for blocks only, not commands.
|
725 |
"""
|
726 |
redirects = [] # type: List[Redir]
|
727 |
while True:
|
728 |
# This prediction needs to ONLY accept redirect operators. Should we
|
729 |
# make them a separate Kind?
|
730 |
self._GetWord()
|
731 |
if self.c_kind != Kind.Redir:
|
732 |
break
|
733 |
|
734 |
node = self.ParseRedirect()
|
735 |
redirects.append(node)
|
736 |
self._SetNext()
|
737 |
|
738 |
return redirects
|
739 |
|
740 |
def _ScanSimpleCommand(self):
|
741 |
# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[BlockArg]]
|
742 |
"""First pass: Split into redirects and words."""
|
743 |
redirects = [] # type: List[Redir]
|
744 |
words = [] # type: List[CompoundWord]
|
745 |
typed_args = None # type: Optional[ArgList]
|
746 |
block = None # type: Optional[BlockArg]
|
747 |
|
748 |
first_word_caps = False # does first word look like Caps, but not CAPS
|
749 |
|
750 |
i = 0
|
751 |
while True:
|
752 |
self._GetWord()
|
753 |
if self.c_kind == Kind.Redir:
|
754 |
node = self.ParseRedirect()
|
755 |
redirects.append(node)
|
756 |
|
757 |
elif self.c_kind == Kind.Word:
|
758 |
if self.parse_opts.parse_brace():
|
759 |
# Treat { and } more like operators
|
760 |
if self.c_id == Id.Lit_LBrace:
|
761 |
if self.allow_block: # Disabled for if/while condition, etc.
|
762 |
|
763 |
# allow x = 42
|
764 |
self.hay_attrs_stack.append(first_word_caps)
|
765 |
brace_group = self.ParseBraceGroup()
|
766 |
|
767 |
# So we can get the source code back later
|
768 |
lines = self.arena.SaveLinesAndDiscard(
|
769 |
brace_group.left, brace_group.right)
|
770 |
block = BlockArg(brace_group, lines)
|
771 |
|
772 |
self.hay_attrs_stack.pop()
|
773 |
|
774 |
if 0:
|
775 |
print('--')
|
776 |
block.PrettyPrint()
|
777 |
print('\n--')
|
778 |
break
|
779 |
elif self.c_id == Id.Lit_RBrace:
|
780 |
# Another thing: { echo hi }
|
781 |
# We're DONE!!!
|
782 |
break
|
783 |
|
784 |
w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
|
785 |
words.append(w)
|
786 |
if i == 0:
|
787 |
ok, word_str, quoted = word_.StaticEval(w)
|
788 |
# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
|
789 |
if (ok and len(word_str) and word_str[0].isupper() and
|
790 |
not word_str.isupper()):
|
791 |
first_word_caps = True
|
792 |
#log('W %s', word_str)
|
793 |
|
794 |
elif self.c_id == Id.Op_LParen:
|
795 |
# 1. Check that there's a preceding space
|
796 |
prev_byte = self.lexer.ByteLookBack()
|
797 |
if prev_byte not in (SPACE_CH, TAB_CH):
|
798 |
if self.parse_opts.parse_at():
|
799 |
p_die('Space required before (',
|
800 |
loc.Word(self.cur_word))
|
801 |
else:
|
802 |
# inline func call like @sorted(x) is invalid in OSH, but the
|
803 |
# solution isn't a space
|
804 |
p_die(
|
805 |
'Unexpected left paren (might need a space before it)',
|
806 |
loc.Word(self.cur_word))
|
807 |
|
808 |
# 2. Check that it's not (). We disallow this because it's a no-op and
|
809 |
# there could be confusion with shell func defs.
|
810 |
# For some reason we need to call lexer.LookPastSpace, not
|
811 |
# w_parser.LookPastSpace. I think this is because we're at (, which is
|
812 |
# an operator token. All the other cases are like 'x=', which is PART
|
813 |
# of a word, and we don't know if it will end.
|
814 |
next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
|
815 |
if next_id == Id.Op_RParen:
|
816 |
p_die('Empty arg list not allowed', loc.Word(self.cur_word))
|
817 |
|
818 |
typed_args = self.w_parser.ParseProcCallArgs()
|
819 |
|
820 |
else:
|
821 |
break
|
822 |
|
823 |
self._SetNext()
|
824 |
i += 1
|
825 |
return redirects, words, typed_args, block
|
826 |
|
827 |
def _MaybeExpandAliases(self, words):
|
828 |
# type: (List[CompoundWord]) -> Optional[command_t]
|
829 |
"""Try to expand aliases.
|
830 |
|
831 |
Args:
|
832 |
words: A list of Compound
|
833 |
|
834 |
Returns:
|
835 |
A new LST node, or None.
|
836 |
|
837 |
Our implementation of alias has two design choices:
|
838 |
- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
|
839 |
- What grammar rule to parse the expanded alias buffer with. In our case
|
840 |
it's ParseCommand().
|
841 |
|
842 |
This doesn't quite match what other shells do, but I can't figure out a
|
843 |
better places.
|
844 |
|
845 |
Most test cases pass, except for ones like:
|
846 |
|
847 |
alias LBRACE='{'
|
848 |
LBRACE echo one; echo two; }
|
849 |
|
850 |
alias MULTILINE='echo 1
|
851 |
echo 2
|
852 |
echo 3'
|
853 |
MULTILINE
|
854 |
|
855 |
NOTE: dash handles aliases in a totally different way. It has a global
|
856 |
variable checkkwd in parser.c. It assigns it all over the grammar, like
|
857 |
this:
|
858 |
|
859 |
checkkwd = CHKNL | CHKKWD | CHKALIAS;
|
860 |
|
861 |
The readtoken() function checks (checkkwd & CHKALIAS) and then calls
|
862 |
lookupalias(). This seems to provide a consistent behavior among shells,
|
863 |
but it's less modular and testable.
|
864 |
|
865 |
Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
|
866 |
|
867 |
Returns:
|
868 |
A command node if any aliases were expanded, or None otherwise.
|
869 |
"""
|
870 |
# Start a new list if there aren't any. This will be passed recursively
|
871 |
# through CommandParser instances.
|
872 |
aliases_in_flight = (self.aliases_in_flight
|
873 |
if len(self.aliases_in_flight) else [])
|
874 |
|
875 |
# for error message
|
876 |
first_word_str = None # type: Optional[str]
|
877 |
argv0_loc = loc.Word(words[0])
|
878 |
|
879 |
expanded = [] # type: List[str]
|
880 |
i = 0
|
881 |
n = len(words)
|
882 |
|
883 |
while i < n:
|
884 |
w = words[i]
|
885 |
|
886 |
ok, word_str, quoted = word_.StaticEval(w)
|
887 |
if not ok or quoted:
|
888 |
break
|
889 |
|
890 |
alias_exp = self.aliases.get(word_str)
|
891 |
if alias_exp is None:
|
892 |
break
|
893 |
|
894 |
# Prevent infinite loops. This is subtle: we want to prevent infinite
|
895 |
# expansion of alias echo='echo x'. But we don't want to prevent
|
896 |
# expansion of the second word in 'echo echo', so we add 'i' to
|
897 |
# "aliases_in_flight".
|
898 |
if (word_str, i) in aliases_in_flight:
|
899 |
break
|
900 |
|
901 |
if i == 0:
|
902 |
first_word_str = word_str # for error message
|
903 |
|
904 |
#log('%r -> %r', word_str, alias_exp)
|
905 |
aliases_in_flight.append((word_str, i))
|
906 |
expanded.append(alias_exp)
|
907 |
i += 1
|
908 |
|
909 |
if not alias_exp.endswith(' '):
|
910 |
# alias e='echo [ ' is the same expansion as
|
911 |
# alias e='echo ['
|
912 |
# The trailing space indicates whether we should continue to expand
|
913 |
# aliases; it's not part of it.
|
914 |
expanded.append(' ')
|
915 |
break # No more expansions
|
916 |
|
917 |
if len(expanded) == 0: # No expansions; caller does parsing.
|
918 |
return None
|
919 |
|
920 |
# We are expanding an alias, so copy the rest of the words and re-parse.
|
921 |
if i < n:
|
922 |
left_tok = location.LeftTokenForWord(words[i])
|
923 |
right_tok = location.RightTokenForWord(words[-1])
|
924 |
|
925 |
# OLD CONSTRAINT
|
926 |
#assert left_tok.line_id == right_tok.line_id
|
927 |
|
928 |
words_str = self.arena.SnipCodeString(left_tok, right_tok)
|
929 |
expanded.append(words_str)
|
930 |
|
931 |
code_str = ''.join(expanded)
|
932 |
|
933 |
# TODO:
|
934 |
# Aliases break static parsing (like backticks), so use our own Arena.
|
935 |
# This matters for Hay, which calls SaveLinesAndDiscard().
|
936 |
# arena = alloc.Arena()
|
937 |
arena = self.arena
|
938 |
|
939 |
line_reader = reader.StringLineReader(code_str, arena)
|
940 |
cp = self.parse_ctx.MakeOshParser(line_reader)
|
941 |
cp.Init_AliasesInFlight(aliases_in_flight)
|
942 |
|
943 |
# break circular dep
|
944 |
from frontend import parse_lib
|
945 |
|
946 |
# The interaction between COMPLETION and ALIASES requires special care.
|
947 |
# See docstring of BeginAliasExpansion() in parse_lib.py.
|
948 |
src = source.Alias(first_word_str, argv0_loc)
|
949 |
with alloc.ctx_SourceCode(arena, src):
|
950 |
with parse_lib.ctx_Alias(self.parse_ctx.trail):
|
951 |
try:
|
952 |
# _ParseCommandTerm() handles multiline commands, compound commands, etc.
|
953 |
# as opposed to ParseLogicalLine()
|
954 |
node = cp._ParseCommandTerm()
|
955 |
except error.Parse as e:
|
956 |
# Failure to parse alias expansion is a fatal error
|
957 |
# We don't need more handling here/
|
958 |
raise
|
959 |
|
960 |
if 0:
|
961 |
log('AFTER expansion:')
|
962 |
node.PrettyPrint()
|
963 |
|
964 |
return node
|
965 |
|
966 |
def ParseSimpleCommand(self):
|
967 |
# type: () -> command_t
|
968 |
"""Fixed transcription of the POSIX grammar (TODO: port to
|
969 |
grammar/Shell.g)
|
970 |
|
971 |
io_file : '<' filename
|
972 |
| LESSAND filename
|
973 |
...
|
974 |
|
975 |
io_here : DLESS here_end
|
976 |
| DLESSDASH here_end
|
977 |
|
978 |
redirect : IO_NUMBER (io_redirect | io_here)
|
979 |
|
980 |
prefix_part : ASSIGNMENT_WORD | redirect
|
981 |
cmd_part : WORD | redirect
|
982 |
|
983 |
assign_kw : Declare | Export | Local | Readonly
|
984 |
|
985 |
# Without any words it is parsed as a command, not an assignment
|
986 |
assign_listing : assign_kw
|
987 |
|
988 |
# Now we have something to do (might be changing assignment flags too)
|
989 |
# NOTE: any prefixes should be a warning, but they are allowed in shell.
|
990 |
assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
|
991 |
|
992 |
# an external command, a function call, or a builtin -- a "word_command"
|
993 |
word_command : prefix_part* cmd_part+
|
994 |
|
995 |
simple_command : assign_listing
|
996 |
| assignment
|
997 |
| proc_command
|
998 |
|
999 |
Simple imperative algorithm:
|
1000 |
|
1001 |
1) Read a list of words and redirects. Append them to separate lists.
|
1002 |
2) Look for the first non-assignment word. If it's declare, etc., then
|
1003 |
keep parsing words AND assign words. Otherwise, just parse words.
|
1004 |
3) If there are no non-assignment words, then it's a global assignment.
|
1005 |
|
1006 |
{ redirects, global assignments } OR
|
1007 |
{ redirects, prefix_bindings, words } OR
|
1008 |
{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
|
1009 |
|
1010 |
THEN CHECK that prefix bindings don't have any array literal parts!
|
1011 |
global assignment and keyword assignments can have the of course.
|
1012 |
well actually EXPORT shouldn't have them either -- WARNING
|
1013 |
|
1014 |
3 cases we want to warn: prefix_bindings for assignment, and array literal
|
1015 |
in prefix bindings, or export
|
1016 |
|
1017 |
A command can be an assignment word, word, or redirect on its own.
|
1018 |
|
1019 |
ls
|
1020 |
>out.txt
|
1021 |
|
1022 |
>out.txt FOO=bar # this touches the file
|
1023 |
|
1024 |
Or any sequence:
|
1025 |
ls foo bar
|
1026 |
<in.txt ls foo bar >out.txt
|
1027 |
<in.txt ls >out.txt foo bar
|
1028 |
|
1029 |
Or add one or more environment bindings:
|
1030 |
VAR=val env
|
1031 |
>out.txt VAR=val env
|
1032 |
|
1033 |
here_end vs filename is a matter of whether we test that it's quoted. e.g.
|
1034 |
<<EOF vs <<'EOF'.
|
1035 |
"""
|
1036 |
redirects, words, typed_args, block = self._ScanSimpleCommand()
|
1037 |
|
1038 |
typed_loc = None # type: Optional[Token]
|
1039 |
if block:
|
1040 |
typed_loc = block.brace_group.left
|
1041 |
if typed_args:
|
1042 |
typed_loc = typed_args.left # preferred over block location
|
1043 |
|
1044 |
if len(words) == 0: # e.g. >out.txt # redirect without words
|
1045 |
assert len(redirects) != 0
|
1046 |
if typed_loc is not None:
|
1047 |
p_die("Unexpected typed args", typed_loc)
|
1048 |
|
1049 |
simple = command.Simple.CreateNull()
|
1050 |
simple.blame_tok = redirects[0].op
|
1051 |
simple.more_env = []
|
1052 |
simple.words = []
|
1053 |
simple.redirects = redirects
|
1054 |
return simple
|
1055 |
|
1056 |
# Disallow =a because it's confusing
|
1057 |
part0 = words[0].parts[0]
|
1058 |
if part0.tag() == word_part_e.Literal:
|
1059 |
tok = cast(Token, part0)
|
1060 |
if tok.id == Id.Lit_Equals:
|
1061 |
p_die(
|
1062 |
"=word isn't allowed. Hint: either quote it or add a space after =\n"
|
1063 |
"to pretty print an expression", tok)
|
1064 |
|
1065 |
preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
|
1066 |
if len(preparsed_list):
|
1067 |
left_token, _, _, _ = preparsed_list[0]
|
1068 |
|
1069 |
# Disallow X=Y inside proc and func
|
1070 |
# and inside Hay Attr blocks
|
1071 |
# But allow X=Y at the top level
|
1072 |
# for interactive use foo=bar
|
1073 |
# for global constants GLOBAL=~/src
|
1074 |
# because YSH assignment doesn't have tilde sub
|
1075 |
if len(suffix_words) == 0:
|
1076 |
if self.cmd_mode != cmd_mode_e.Shell or (
|
1077 |
len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]):
|
1078 |
p_die('Use var/setvar to assign in YSH', left_token)
|
1079 |
|
1080 |
# Set a reference to words and redirects for completion. We want to
|
1081 |
# inspect this state after a failed parse.
|
1082 |
self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
|
1083 |
|
1084 |
if len(suffix_words) == 0:
|
1085 |
if typed_loc is not None:
|
1086 |
p_die("Unexpected typed args", typed_loc)
|
1087 |
|
1088 |
# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
|
1089 |
pairs = [] # type: List[AssignPair]
|
1090 |
for preparsed in preparsed_list:
|
1091 |
pairs.append(
|
1092 |
_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
|
1093 |
|
1094 |
left_tok = location.LeftTokenForCompoundWord(words[0])
|
1095 |
return command.ShAssignment(left_tok, pairs, redirects)
|
1096 |
|
1097 |
kind, kw_token = word_.IsControlFlow(suffix_words[0])
|
1098 |
|
1099 |
if kind == Kind.ControlFlow:
|
1100 |
if kw_token.id == Id.ControlFlow_Return:
|
1101 |
# return x - inside procs and shell functions
|
1102 |
# return (x) - inside funcs
|
1103 |
if typed_args is None:
|
1104 |
if self.cmd_mode not in (cmd_mode_e.Shell, cmd_mode_e.Proc):
|
1105 |
p_die('Shell-style returns not allowed here', kw_token)
|
1106 |
else:
|
1107 |
if self.cmd_mode != cmd_mode_e.Func:
|
1108 |
p_die('Typed return is only allowed inside func',
|
1109 |
typed_loc)
|
1110 |
if len(typed_args.pos_args) != 1:
|
1111 |
p_die("Typed return expects one argument", typed_loc)
|
1112 |
if len(typed_args.named_args) != 0:
|
1113 |
p_die("Typed return doesn't take named arguments",
|
1114 |
typed_loc)
|
1115 |
return command.Retval(kw_token, typed_args.pos_args[0])
|
1116 |
|
1117 |
if typed_loc is not None:
|
1118 |
p_die("Unexpected typed args", typed_loc)
|
1119 |
if not self.parse_opts.parse_ignored() and len(redirects):
|
1120 |
p_die("Control flow shouldn't have redirects", kw_token)
|
1121 |
|
1122 |
if len(preparsed_list): # FOO=bar local spam=eggs not allowed
|
1123 |
# TODO: Change location as above
|
1124 |
left_token, _, _, _ = preparsed_list[0]
|
1125 |
p_die("Control flow shouldn't have environment bindings",
|
1126 |
left_token)
|
1127 |
|
1128 |
# Attach the token for errors. (ShAssignment may not need it.)
|
1129 |
if len(suffix_words) == 1:
|
1130 |
arg_word = None # type: Optional[word_t]
|
1131 |
elif len(suffix_words) == 2:
|
1132 |
arg_word = suffix_words[1]
|
1133 |
else:
|
1134 |
p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
|
1135 |
loc.Word(suffix_words[2]))
|
1136 |
|
1137 |
return command.ControlFlow(kw_token, arg_word)
|
1138 |
|
1139 |
# Alias expansion only understands words, not typed args ( ) or block { }
|
1140 |
if not typed_args and not block and self.parse_opts.expand_aliases():
|
1141 |
# If any expansions were detected, then parse again.
|
1142 |
expanded_node = self._MaybeExpandAliases(suffix_words)
|
1143 |
if expanded_node:
|
1144 |
# Attach env bindings and redirects to the expanded node.
|
1145 |
more_env = [] # type: List[EnvPair]
|
1146 |
_AppendMoreEnv(preparsed_list, more_env)
|
1147 |
exp = command.ExpandedAlias(expanded_node, redirects, more_env)
|
1148 |
return exp
|
1149 |
|
1150 |
# TODO: check that we don't have env1=x x[1]=y env2=z here.
|
1151 |
|
1152 |
# FOO=bar printenv.py FOO
|
1153 |
node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
|
1154 |
typed_args, block)
|
1155 |
return node
|
1156 |
|
1157 |
def ParseBraceGroup(self):
|
1158 |
# type: () -> BraceGroup
|
1159 |
"""
|
1160 |
Original:
|
1161 |
brace_group : LBrace command_list RBrace ;
|
1162 |
|
1163 |
YSH:
|
1164 |
brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
|
1165 |
|
1166 |
The doc comment can only occur if there's a newline.
|
1167 |
"""
|
1168 |
ate = self._Eat(Id.Lit_LBrace)
|
1169 |
left = word_.BraceToken(ate)
|
1170 |
|
1171 |
doc_token = None # type: Token
|
1172 |
self._GetWord()
|
1173 |
if self.c_id == Id.Op_Newline:
|
1174 |
self._SetNext()
|
1175 |
with word_.ctx_EmitDocToken(self.w_parser):
|
1176 |
self._GetWord()
|
1177 |
|
1178 |
if self.c_id == Id.Ignored_Comment:
|
1179 |
doc_token = cast(Token, self.cur_word)
|
1180 |
self._SetNext()
|
1181 |
|
1182 |
c_list = self._ParseCommandList()
|
1183 |
|
1184 |
ate = self._Eat(Id.Lit_RBrace)
|
1185 |
right = word_.BraceToken(ate)
|
1186 |
|
1187 |
# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
|
1188 |
# would allow us to revert this back to None, which was changed in
|
1189 |
# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
|
1190 |
# behavior saves allocations, but is less type safe.
|
1191 |
return BraceGroup(left, doc_token, c_list.children, [],
|
1192 |
right) # no redirects yet
|
1193 |
|
1194 |
def ParseDoGroup(self):
|
1195 |
# type: () -> command.DoGroup
|
1196 |
"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
|
1197 |
|
1198 |
do_group : Do command_list Done ; /* Apply rule 6 */
|
1199 |
"""
|
1200 |
ate = self._Eat(Id.KW_Do)
|
1201 |
do_kw = word_.AsKeywordToken(ate)
|
1202 |
|
1203 |
c_list = self._ParseCommandList() # could be anything
|
1204 |
|
1205 |
ate = self._Eat(Id.KW_Done)
|
1206 |
done_kw = word_.AsKeywordToken(ate)
|
1207 |
|
1208 |
return command.DoGroup(do_kw, c_list.children, done_kw)
|
1209 |
|
1210 |
def ParseForWords(self):
|
1211 |
# type: () -> Tuple[List[CompoundWord], Optional[Token]]
|
1212 |
"""
|
1213 |
for_words : WORD* for_sep
|
1214 |
;
|
1215 |
for_sep : ';' newline_ok
|
1216 |
| NEWLINES
|
1217 |
;
|
1218 |
"""
|
1219 |
words = [] # type: List[CompoundWord]
|
1220 |
# The span_id of any semi-colon, so we can remove it.
|
1221 |
semi_tok = None # type: Optional[Token]
|
1222 |
|
1223 |
while True:
|
1224 |
self._GetWord()
|
1225 |
if self.c_id == Id.Op_Semi:
|
1226 |
tok = cast(Token, self.cur_word)
|
1227 |
semi_tok = tok
|
1228 |
self._SetNext()
|
1229 |
self._NewlineOk()
|
1230 |
break
|
1231 |
elif self.c_id == Id.Op_Newline:
|
1232 |
self._SetNext()
|
1233 |
break
|
1234 |
elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
1235 |
break
|
1236 |
|
1237 |
if self.cur_word.tag() != word_e.Compound:
|
1238 |
# TODO: Can we also show a pointer to the 'for' keyword?
|
1239 |
p_die('Invalid word in for loop', loc.Word(self.cur_word))
|
1240 |
|
1241 |
w2 = cast(CompoundWord, self.cur_word)
|
1242 |
words.append(w2)
|
1243 |
self._SetNext()
|
1244 |
return words, semi_tok
|
1245 |
|
1246 |
def _ParseForExprLoop(self, for_kw):
|
1247 |
# type: (Token) -> command.ForExpr
|
1248 |
"""
|
1249 |
Shell:
|
1250 |
for '((' init ';' cond ';' update '))' for_sep? do_group
|
1251 |
|
1252 |
YSH:
|
1253 |
for '((' init ';' cond ';' update '))' for_sep? brace_group
|
1254 |
"""
|
1255 |
node = self.w_parser.ReadForExpression()
|
1256 |
node.keyword = for_kw
|
1257 |
|
1258 |
self._SetNext()
|
1259 |
|
1260 |
self._GetWord()
|
1261 |
if self.c_id == Id.Op_Semi:
|
1262 |
self._SetNext()
|
1263 |
self._NewlineOk()
|
1264 |
elif self.c_id == Id.Op_Newline:
|
1265 |
self._SetNext()
|
1266 |
elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
|
1267 |
pass
|
1268 |
elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
|
1269 |
pass
|
1270 |
else:
|
1271 |
p_die('Invalid word after for expression', loc.Word(self.cur_word))
|
1272 |
|
1273 |
if self.c_id == Id.Lit_LBrace:
|
1274 |
node.body = self.ParseBraceGroup()
|
1275 |
else:
|
1276 |
node.body = self.ParseDoGroup()
|
1277 |
return node
|
1278 |
|
1279 |
def _ParseForEachLoop(self, for_kw):
|
1280 |
# type: (Token) -> command.ForEach
|
1281 |
node = command.ForEach.CreateNull(alloc_lists=True)
|
1282 |
node.keyword = for_kw
|
1283 |
|
1284 |
num_iter_names = 0
|
1285 |
while True:
|
1286 |
w = self.cur_word
|
1287 |
|
1288 |
# Hack that makes the language more familiar:
|
1289 |
# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
|
1290 |
# - 'x y' is also accepted but not idiomatic.
|
1291 |
UP_w = w
|
1292 |
if w.tag() == word_e.Compound:
|
1293 |
w = cast(CompoundWord, UP_w)
|
1294 |
if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
|
1295 |
w.parts.pop()
|
1296 |
|
1297 |
ok, iter_name, quoted = word_.StaticEval(w)
|
1298 |
if not ok or quoted: # error: for $x
|
1299 |
p_die('Expected loop variable (a constant word)', loc.Word(w))
|
1300 |
|
1301 |
if not match.IsValidVarName(iter_name): # error: for -
|
1302 |
# TODO: consider commas?
|
1303 |
if ',' in iter_name:
|
1304 |
p_die('Loop variables look like x, y (fix spaces)',
|
1305 |
loc.Word(w))
|
1306 |
p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
|
1307 |
|
1308 |
node.iter_names.append(iter_name)
|
1309 |
num_iter_names += 1
|
1310 |
self._SetNext()
|
1311 |
|
1312 |
self._GetWord()
|
1313 |
# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
|
1314 |
# Subtlety: 'var' is KW_Var and is a valid loop name
|
1315 |
if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
|
1316 |
break
|
1317 |
|
1318 |
if num_iter_names == 3:
|
1319 |
p_die('Unexpected word after 3 loop variables',
|
1320 |
loc.Word(self.cur_word))
|
1321 |
|
1322 |
self._NewlineOk()
|
1323 |
|
1324 |
self._GetWord()
|
1325 |
if self.c_id == Id.KW_In:
|
1326 |
# Ideally we would want ( not 'in'. But we still have to fix the bug
|
1327 |
# where we require a SPACE between in and (
|
1328 |
# for x in(y) # should be accepted, but isn't
|
1329 |
|
1330 |
expr_blame = word_.AsKeywordToken(self.cur_word)
|
1331 |
|
1332 |
self._SetNext() # skip in
|
1333 |
if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
1334 |
enode, last_token = self.parse_ctx.ParseYshExpr(
|
1335 |
self.lexer, grammar_nt.oil_expr)
|
1336 |
node.iterable = for_iter.YshExpr(enode, expr_blame)
|
1337 |
|
1338 |
# For simplicity, we don't accept for x in (obj); do ...
|
1339 |
self._GetWord()
|
1340 |
if self.c_id != Id.Lit_LBrace:
|
1341 |
p_die('Expected { after iterable expression',
|
1342 |
loc.Word(self.cur_word))
|
1343 |
else:
|
1344 |
semi_tok = None # type: Optional[Token]
|
1345 |
iter_words, semi_tok = self.ParseForWords()
|
1346 |
node.semi_tok = semi_tok
|
1347 |
|
1348 |
if not self.parse_opts.parse_bare_word() and len(
|
1349 |
iter_words) == 1:
|
1350 |
ok, s, quoted = word_.StaticEval(iter_words[0])
|
1351 |
if ok and match.IsValidVarName(s) and not quoted:
|
1352 |
p_die(
|
1353 |
'Surround this word with either parens or quotes (parse_bare_word)',
|
1354 |
loc.Word(iter_words[0]))
|
1355 |
|
1356 |
words2 = braces.BraceDetectAll(iter_words)
|
1357 |
words3 = word_.TildeDetectAll(words2)
|
1358 |
node.iterable = for_iter.Words(words3)
|
1359 |
|
1360 |
# Now that we know there are words, do an extra check
|
1361 |
if num_iter_names > 2:
|
1362 |
p_die('Expected at most 2 loop variables', for_kw)
|
1363 |
|
1364 |
elif self.c_id == Id.KW_Do:
|
1365 |
node.iterable = for_iter.Args # implicitly loop over "$@"
|
1366 |
# do not advance
|
1367 |
|
1368 |
elif self.c_id == Id.Op_Semi: # for x; do
|
1369 |
node.iterable = for_iter.Args # implicitly loop over "$@"
|
1370 |
self._SetNext()
|
1371 |
|
1372 |
else: # for foo BAD
|
1373 |
p_die('Unexpected word after for loop variable',
|
1374 |
loc.Word(self.cur_word))
|
1375 |
|
1376 |
self._GetWord()
|
1377 |
if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
|
1378 |
node.body = self.ParseBraceGroup()
|
1379 |
else:
|
1380 |
node.body = self.ParseDoGroup()
|
1381 |
|
1382 |
return node
|
1383 |
|
1384 |
def ParseFor(self):
|
1385 |
# type: () -> command_t
|
1386 |
"""
|
1387 |
TODO: Update the grammar
|
1388 |
|
1389 |
for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
|
1390 |
| For '((' ... TODO
|
1391 |
"""
|
1392 |
ate = self._Eat(Id.KW_For)
|
1393 |
for_kw = word_.AsKeywordToken(ate)
|
1394 |
|
1395 |
self._GetWord()
|
1396 |
if self.c_id == Id.Op_DLeftParen:
|
1397 |
if not self.parse_opts.parse_dparen():
|
1398 |
p_die("Bash for loops aren't allowed (parse_dparen)",
|
1399 |
loc.Word(self.cur_word))
|
1400 |
|
1401 |
# for (( i = 0; i < 10; i++)
|
1402 |
n1 = self._ParseForExprLoop(for_kw)
|
1403 |
n1.redirects = self._ParseRedirectList()
|
1404 |
return n1
|
1405 |
else:
|
1406 |
# for x in a b; do echo hi; done
|
1407 |
n2 = self._ParseForEachLoop(for_kw)
|
1408 |
n2.redirects = self._ParseRedirectList()
|
1409 |
return n2
|
1410 |
|
1411 |
def _ParseConditionList(self):
|
1412 |
# type: () -> condition_t
|
1413 |
"""
|
1414 |
condition_list: command_list
|
1415 |
|
1416 |
This is a helper to parse a condition list for if commands and while/until
|
1417 |
loops. It will throw a parse error if there are no conditions in the list.
|
1418 |
"""
|
1419 |
self.allow_block = False
|
1420 |
commands = self._ParseCommandList()
|
1421 |
self.allow_block = True
|
1422 |
|
1423 |
if len(commands.children) == 0:
|
1424 |
p_die("Expected a condition", loc.Word(self.cur_word))
|
1425 |
|
1426 |
return condition.Shell(commands.children)
|
1427 |
|
1428 |
def ParseWhileUntil(self, keyword):
|
1429 |
# type: (Token) -> command.WhileUntil
|
1430 |
"""
|
1431 |
while_clause : While command_list do_group ;
|
1432 |
until_clause : Until command_list do_group ;
|
1433 |
"""
|
1434 |
self._SetNext() # skip keyword
|
1435 |
|
1436 |
if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
|
1437 |
) == Id.Op_LParen:
|
1438 |
enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
|
1439 |
grammar_nt.oil_expr)
|
1440 |
cond = condition.YshExpr(enode) # type: condition_t
|
1441 |
else:
|
1442 |
cond = self._ParseConditionList()
|
1443 |
|
1444 |
# NOTE: The LSTs will be different for OSH and YSH, but the execution
|
1445 |
# should be unchanged. To be sure we should desugar.
|
1446 |
self._GetWord()
|
1447 |
if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
1448 |
# while test -f foo {
|
1449 |
body_node = self.ParseBraceGroup() # type: command_t
|
1450 |
else:
|
1451 |
body_node = self.ParseDoGroup()
|
1452 |
|
1453 |
# no redirects yet
|
1454 |
return command.WhileUntil(keyword, cond, body_node, None)
|
1455 |
|
1456 |
def ParseCaseArm(self):
|
1457 |
# type: () -> CaseArm
|
1458 |
"""
|
1459 |
case_item: '('? pattern ('|' pattern)* ')'
|
1460 |
newline_ok command_term? trailer? ;
|
1461 |
|
1462 |
Looking at '(' or pattern
|
1463 |
"""
|
1464 |
self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
1465 |
|
1466 |
left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
|
1467 |
|
1468 |
if self.c_id == Id.Op_LParen: # Optional (
|
1469 |
self._SetNext()
|
1470 |
|
1471 |
pat_words = [] # type: List[word_t]
|
1472 |
while True:
|
1473 |
self._GetWord()
|
1474 |
if self.c_kind != Kind.Word:
|
1475 |
p_die('Expected case pattern', loc.Word(self.cur_word))
|
1476 |
pat_words.append(self.cur_word)
|
1477 |
self._SetNext()
|
1478 |
|
1479 |
self._GetWord()
|
1480 |
if self.c_id == Id.Op_Pipe:
|
1481 |
self._SetNext()
|
1482 |
else:
|
1483 |
break
|
1484 |
|
1485 |
ate = self._Eat(Id.Right_CasePat)
|
1486 |
middle_tok = word_.AsOperatorToken(ate)
|
1487 |
|
1488 |
self._NewlineOk()
|
1489 |
|
1490 |
self._GetWord()
|
1491 |
if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
|
1492 |
c_list = self._ParseCommandTerm()
|
1493 |
action_children = c_list.children
|
1494 |
else:
|
1495 |
action_children = []
|
1496 |
|
1497 |
dsemi_tok = None # type: Token
|
1498 |
self._GetWord()
|
1499 |
if self.c_id == Id.KW_Esac: # missing last ;;
|
1500 |
pass
|
1501 |
elif self.c_id == Id.Op_DSemi:
|
1502 |
dsemi_tok = word_.AsOperatorToken(self.cur_word)
|
1503 |
self._SetNext()
|
1504 |
else:
|
1505 |
# Happens on EOF
|
1506 |
p_die('Expected ;; or esac', loc.Word(self.cur_word))
|
1507 |
|
1508 |
self._NewlineOk()
|
1509 |
|
1510 |
return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
|
1511 |
action_children, dsemi_tok)
|
1512 |
|
1513 |
def ParseYshCaseArm(self, discriminant):
|
1514 |
# type: (Id_t) -> CaseArm
|
1515 |
"""
|
1516 |
case_item : pattern newline_ok brace_group newline_ok
|
1517 |
pattern : pat_words
|
1518 |
| pat_exprs
|
1519 |
| pat_eggex
|
1520 |
| pat_else
|
1521 |
pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
|
1522 |
pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
|
1523 |
pat_word : WORD
|
1524 |
pat_eggex : '/' oil_eggex '/'
|
1525 |
pat_expr : '(' oil_expr ')'
|
1526 |
pat_else : '(' Id.KW_Else ')'
|
1527 |
|
1528 |
Looking at: 'pattern'
|
1529 |
|
1530 |
Note that the trailing `newline_ok` in `case_item` is handled by
|
1531 |
`ParseYshCase`. We do this because parsing that `newline_ok` returns
|
1532 |
the next "discriminant" for the next token, so it makes more sense to
|
1533 |
handle it there.
|
1534 |
"""
|
1535 |
left_tok = None # type: Token
|
1536 |
pattern = None # type: pat_t
|
1537 |
|
1538 |
if discriminant in (Id.Op_LParen, Id.Arith_Slash):
|
1539 |
# pat_exprs, pat_else or pat_eggex
|
1540 |
pattern, left_tok = self.w_parser.ParseYshCasePattern()
|
1541 |
else:
|
1542 |
# pat_words
|
1543 |
pat_words = [] # type: List[word_t]
|
1544 |
while True:
|
1545 |
self._GetWord()
|
1546 |
if self.c_kind != Kind.Word:
|
1547 |
p_die('Expected case pattern', loc.Word(self.cur_word))
|
1548 |
pat_words.append(self.cur_word)
|
1549 |
self._SetNext()
|
1550 |
|
1551 |
if not left_tok:
|
1552 |
left_tok = location.LeftTokenForWord(self.cur_word)
|
1553 |
|
1554 |
self._NewlineOk()
|
1555 |
|
1556 |
self._GetWord()
|
1557 |
if self.c_id == Id.Op_Pipe:
|
1558 |
self._SetNext()
|
1559 |
self._NewlineOk()
|
1560 |
else:
|
1561 |
break
|
1562 |
pattern = pat.Words(pat_words)
|
1563 |
|
1564 |
self._NewlineOk()
|
1565 |
action = self.ParseBraceGroup()
|
1566 |
|
1567 |
# The left token of the action is our "middle" token
|
1568 |
return CaseArm(left_tok, pattern, action.left, action.children,
|
1569 |
action.right)
|
1570 |
|
1571 |
def ParseYshCase(self, case_kw):
|
1572 |
# type: (Token) -> command.Case
|
1573 |
"""
|
1574 |
ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
|
1575 |
|
1576 |
Looking at: token after 'case'
|
1577 |
"""
|
1578 |
enode, _ = self.parse_ctx.ParseYshExpr(self.lexer, grammar_nt.oil_expr)
|
1579 |
to_match = case_arg.YshExpr(enode)
|
1580 |
|
1581 |
ate = self._Eat(Id.Lit_LBrace)
|
1582 |
arms_start = word_.BraceToken(ate)
|
1583 |
|
1584 |
discriminant = self.w_parser.NewlineOkForYshCase()
|
1585 |
|
1586 |
# Note: for now, zero arms are accepted, just like POSIX case $x in esac
|
1587 |
arms = [] # type: List[CaseArm]
|
1588 |
while discriminant != Id.Op_RBrace:
|
1589 |
arm = self.ParseYshCaseArm(discriminant)
|
1590 |
arms.append(arm)
|
1591 |
|
1592 |
discriminant = self.w_parser.NewlineOkForYshCase()
|
1593 |
|
1594 |
# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
|
1595 |
# token is read as an Id.Op_RBrace, but we need to store this as a
|
1596 |
# Id.Lit_RBrace.
|
1597 |
ate = self._Eat(Id.Op_RBrace)
|
1598 |
arms_end = word_.AsOperatorToken(ate)
|
1599 |
arms_end.id = Id.Lit_RBrace
|
1600 |
|
1601 |
return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
|
1602 |
|
1603 |
def ParseOldCase(self, case_kw):
|
1604 |
# type: (Token) -> command.Case
|
1605 |
"""
|
1606 |
case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
|
1607 |
|
1608 |
-> Looking at WORD
|
1609 |
|
1610 |
FYI original POSIX case list, which takes pains for DSEMI
|
1611 |
|
1612 |
case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
|
1613 |
"""
|
1614 |
self._GetWord()
|
1615 |
w = self.cur_word
|
1616 |
if not self.parse_opts.parse_bare_word():
|
1617 |
ok, s, quoted = word_.StaticEval(w)
|
1618 |
if ok and not quoted:
|
1619 |
p_die(
|
1620 |
"This is a constant string. You may want a variable like $x (parse_bare_word)",
|
1621 |
loc.Word(w))
|
1622 |
|
1623 |
if w.tag() != word_e.Compound:
|
1624 |
p_die("Expected a word to match against", loc.Word(w))
|
1625 |
|
1626 |
to_match = case_arg.Word(w)
|
1627 |
self._SetNext() # past WORD
|
1628 |
|
1629 |
self._NewlineOk()
|
1630 |
|
1631 |
ate = self._Eat(Id.KW_In)
|
1632 |
arms_start = word_.AsKeywordToken(ate)
|
1633 |
|
1634 |
self._NewlineOk()
|
1635 |
|
1636 |
arms = [] # type: List[CaseArm]
|
1637 |
while True:
|
1638 |
self._GetWord()
|
1639 |
if self.c_id == Id.KW_Esac: # this is Kind.Word
|
1640 |
break
|
1641 |
# case arm should begin with a pattern word or (
|
1642 |
if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
|
1643 |
break
|
1644 |
|
1645 |
arm = self.ParseCaseArm()
|
1646 |
arms.append(arm)
|
1647 |
|
1648 |
ate = self._Eat(Id.KW_Esac)
|
1649 |
arms_end = word_.AsKeywordToken(ate)
|
1650 |
|
1651 |
# no redirects yet
|
1652 |
return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
|
1653 |
|
1654 |
def ParseCase(self):
|
1655 |
# type: () -> command.Case
|
1656 |
"""
|
1657 |
case_clause : old_case # from POSIX
|
1658 |
| ysh_case
|
1659 |
;
|
1660 |
|
1661 |
Looking at 'Case'
|
1662 |
"""
|
1663 |
case_kw = word_.AsKeywordToken(self.cur_word)
|
1664 |
self._SetNext() # past 'case'
|
1665 |
|
1666 |
if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
1667 |
return self.ParseYshCase(case_kw)
|
1668 |
else:
|
1669 |
return self.ParseOldCase(case_kw)
|
1670 |
|
1671 |
def _ParseYshElifElse(self, if_node):
|
1672 |
# type: (command.If) -> None
|
1673 |
"""If test -f foo { echo foo.
|
1674 |
|
1675 |
} elif test -f bar; test -f spam { ^ we parsed up to here echo
|
1676 |
bar } else { echo none }
|
1677 |
"""
|
1678 |
arms = if_node.arms
|
1679 |
|
1680 |
while self.c_id == Id.KW_Elif:
|
1681 |
elif_kw = word_.AsKeywordToken(self.cur_word)
|
1682 |
self._SetNext() # skip elif
|
1683 |
if (self.parse_opts.parse_paren() and
|
1684 |
self.w_parser.LookPastSpace() == Id.Op_LParen):
|
1685 |
enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
|
1686 |
grammar_nt.oil_expr)
|
1687 |
cond = condition.YshExpr(enode) # type: condition_t
|
1688 |
else:
|
1689 |
self.allow_block = False
|
1690 |
commands = self._ParseCommandList()
|
1691 |
self.allow_block = True
|
1692 |
cond = condition.Shell(commands.children)
|
1693 |
|
1694 |
body = self.ParseBraceGroup()
|
1695 |
self._GetWord()
|
1696 |
|
1697 |
arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
|
1698 |
arms.append(arm)
|
1699 |
|
1700 |
self._GetWord()
|
1701 |
if self.c_id == Id.KW_Else:
|
1702 |
self._SetNext()
|
1703 |
body = self.ParseBraceGroup()
|
1704 |
if_node.else_action = body.children
|
1705 |
|
1706 |
def _ParseYshIf(self, if_kw, cond):
|
1707 |
# type: (Token, condition_t) -> command.If
|
1708 |
"""if test -f foo {
|
1709 |
|
1710 |
# ^ we parsed up to here
|
1711 |
echo foo
|
1712 |
} elif test -f bar; test -f spam {
|
1713 |
echo bar
|
1714 |
} else {
|
1715 |
echo none
|
1716 |
}
|
1717 |
NOTE: If you do something like if test -n foo{, the parser keeps going, and
|
1718 |
the error is confusing because it doesn't point to the right place.
|
1719 |
|
1720 |
I think we might need strict_brace so that foo{ is disallowed. It has to
|
1721 |
be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
|
1722 |
form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
|
1723 |
Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
|
1724 |
"""
|
1725 |
if_node = command.If.CreateNull(alloc_lists=True)
|
1726 |
if_node.if_kw = if_kw
|
1727 |
|
1728 |
body1 = self.ParseBraceGroup()
|
1729 |
# Every arm has 1 spid, unlike shell-style
|
1730 |
# TODO: We could get the spids from the brace group.
|
1731 |
arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
|
1732 |
|
1733 |
if_node.arms.append(arm)
|
1734 |
|
1735 |
self._GetWord()
|
1736 |
if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
1737 |
self._ParseYshElifElse(if_node)
|
1738 |
# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
|
1739 |
# spid because that's in the BraceGroup.
|
1740 |
return if_node
|
1741 |
|
1742 |
def _ParseElifElse(self, if_node):
|
1743 |
# type: (command.If) -> None
|
1744 |
"""
|
1745 |
else_part: (Elif command_list Then command_list)* Else command_list ;
|
1746 |
"""
|
1747 |
arms = if_node.arms
|
1748 |
|
1749 |
self._GetWord()
|
1750 |
while self.c_id == Id.KW_Elif:
|
1751 |
elif_kw = word_.AsKeywordToken(self.cur_word)
|
1752 |
self._SetNext() # past 'elif'
|
1753 |
|
1754 |
cond = self._ParseConditionList()
|
1755 |
|
1756 |
ate = self._Eat(Id.KW_Then)
|
1757 |
then_kw = word_.AsKeywordToken(ate)
|
1758 |
|
1759 |
body = self._ParseCommandList()
|
1760 |
arm = IfArm(elif_kw, cond, then_kw, body.children,
|
1761 |
[elif_kw.span_id, then_kw.span_id])
|
1762 |
|
1763 |
arms.append(arm)
|
1764 |
|
1765 |
self._GetWord()
|
1766 |
if self.c_id == Id.KW_Else:
|
1767 |
else_kw = word_.AsKeywordToken(self.cur_word)
|
1768 |
self._SetNext() # past 'else'
|
1769 |
body = self._ParseCommandList()
|
1770 |
if_node.else_action = body.children
|
1771 |
else:
|
1772 |
else_kw = None
|
1773 |
|
1774 |
if_node.else_kw = else_kw
|
1775 |
|
1776 |
def ParseIf(self):
|
1777 |
# type: () -> command.If
|
1778 |
"""
|
1779 |
if_clause : If command_list Then command_list else_part? Fi ;
|
1780 |
|
1781 |
open : '{' | Then
|
1782 |
close : '}' | Fi
|
1783 |
|
1784 |
ysh_if : If ( command_list | '(' expr ')' )
|
1785 |
open command_list else_part? close;
|
1786 |
|
1787 |
There are 2 conditionals here: parse_paren, then parse_brace
|
1788 |
"""
|
1789 |
if_node = command.If.CreateNull(alloc_lists=True)
|
1790 |
if_kw = word_.AsKeywordToken(self.cur_word)
|
1791 |
if_node.if_kw = if_kw
|
1792 |
self._SetNext() # past 'if'
|
1793 |
|
1794 |
if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
|
1795 |
) == Id.Op_LParen:
|
1796 |
# if (x + 1)
|
1797 |
enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
|
1798 |
grammar_nt.oil_expr)
|
1799 |
cond = condition.YshExpr(enode) # type: condition_t
|
1800 |
else:
|
1801 |
# if echo 1; echo 2; then
|
1802 |
# Remove ambiguity with if cd / {
|
1803 |
cond = self._ParseConditionList()
|
1804 |
|
1805 |
self._GetWord()
|
1806 |
if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
1807 |
return self._ParseYshIf(if_kw, cond)
|
1808 |
|
1809 |
ate = self._Eat(Id.KW_Then)
|
1810 |
then_kw = word_.AsKeywordToken(ate)
|
1811 |
|
1812 |
body = self._ParseCommandList()
|
1813 |
|
1814 |
# First arm
|
1815 |
arm = IfArm(if_kw, cond, then_kw, body.children,
|
1816 |
[if_kw.span_id, then_kw.span_id])
|
1817 |
if_node.arms.append(arm)
|
1818 |
|
1819 |
# 2nd to Nth arm
|
1820 |
if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
1821 |
self._ParseElifElse(if_node)
|
1822 |
|
1823 |
ate = self._Eat(Id.KW_Fi)
|
1824 |
if_node.fi_kw = word_.AsKeywordToken(ate)
|
1825 |
|
1826 |
return if_node
|
1827 |
|
1828 |
def ParseTime(self):
|
1829 |
# type: () -> command_t
|
1830 |
"""Time [-p] pipeline.
|
1831 |
|
1832 |
According to bash help.
|
1833 |
"""
|
1834 |
time_kw = word_.AsKeywordToken(self.cur_word)
|
1835 |
self._SetNext() # skip time
|
1836 |
pipeline = self.ParsePipeline()
|
1837 |
return command.TimeBlock(time_kw, pipeline)
|
1838 |
|
1839 |
def ParseCompoundCommand(self):
|
1840 |
# type: () -> command_t
|
1841 |
"""
|
1842 |
Refactoring: we put io_redirect* here instead of in function_body and
|
1843 |
command.
|
1844 |
|
1845 |
compound_command : brace_group io_redirect*
|
1846 |
| subshell io_redirect*
|
1847 |
| for_clause io_redirect*
|
1848 |
| while_clause io_redirect*
|
1849 |
| until_clause io_redirect*
|
1850 |
| if_clause io_redirect*
|
1851 |
| case_clause io_redirect*
|
1852 |
|
1853 |
# bash extensions
|
1854 |
| time_clause
|
1855 |
| [[ BoolExpr ]]
|
1856 |
| (( ArithExpr ))
|
1857 |
"""
|
1858 |
self._GetWord()
|
1859 |
if self.c_id == Id.Lit_LBrace:
|
1860 |
n1 = self.ParseBraceGroup()
|
1861 |
n1.redirects = self._ParseRedirectList()
|
1862 |
return n1
|
1863 |
if self.c_id == Id.Op_LParen:
|
1864 |
n2 = self.ParseSubshell()
|
1865 |
n2.redirects = self._ParseRedirectList()
|
1866 |
return n2
|
1867 |
|
1868 |
if self.c_id == Id.KW_For:
|
1869 |
# Note: Redirects parsed in this call. POSIX for and bash for (( have
|
1870 |
# redirects, but YSH for doesn't.
|
1871 |
return self.ParseFor()
|
1872 |
if self.c_id in (Id.KW_While, Id.KW_Until):
|
1873 |
keyword = word_.AsKeywordToken(self.cur_word)
|
1874 |
n3 = self.ParseWhileUntil(keyword)
|
1875 |
n3.redirects = self._ParseRedirectList()
|
1876 |
return n3
|
1877 |
|
1878 |
if self.c_id == Id.KW_If:
|
1879 |
n4 = self.ParseIf()
|
1880 |
n4.redirects = self._ParseRedirectList()
|
1881 |
return n4
|
1882 |
if self.c_id == Id.KW_Case:
|
1883 |
n5 = self.ParseCase()
|
1884 |
n5.redirects = self._ParseRedirectList()
|
1885 |
return n5
|
1886 |
|
1887 |
if self.c_id == Id.KW_DLeftBracket:
|
1888 |
n6 = self.ParseDBracket()
|
1889 |
n6.redirects = self._ParseRedirectList()
|
1890 |
return n6
|
1891 |
if self.c_id == Id.Op_DLeftParen:
|
1892 |
if not self.parse_opts.parse_dparen():
|
1893 |
p_die('You may want a space between parens (parse_dparen)',
|
1894 |
loc.Word(self.cur_word))
|
1895 |
n7 = self.ParseDParen()
|
1896 |
n7.redirects = self._ParseRedirectList()
|
1897 |
return n7
|
1898 |
|
1899 |
# bash extensions: no redirects
|
1900 |
if self.c_id == Id.KW_Time:
|
1901 |
return self.ParseTime()
|
1902 |
|
1903 |
# Happens in function body, e.g. myfunc() oops
|
1904 |
p_die('Unexpected word while parsing compound command',
|
1905 |
loc.Word(self.cur_word))
|
1906 |
assert False # for MyPy
|
1907 |
|
1908 |
def ParseFunctionDef(self):
|
1909 |
# type: () -> command.ShFunction
|
1910 |
"""
|
1911 |
function_header : fname '(' ')'
|
1912 |
function_def : function_header newline_ok function_body ;
|
1913 |
|
1914 |
Precondition: Looking at the function name.
|
1915 |
|
1916 |
NOTE: There is an ambiguity with:
|
1917 |
|
1918 |
function foo ( echo hi ) and
|
1919 |
function foo () ( echo hi )
|
1920 |
|
1921 |
Bash only accepts the latter, though it doesn't really follow a grammar.
|
1922 |
"""
|
1923 |
word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
|
1924 |
name = word_.ShFunctionName(word0)
|
1925 |
if len(name) == 0: # example: foo$x is invalid
|
1926 |
p_die('Invalid function name', loc.Word(word0))
|
1927 |
|
1928 |
part0 = word0.parts[0]
|
1929 |
# If we got a non-empty string from ShFunctionName, this should be true.
|
1930 |
assert part0.tag() == word_part_e.Literal
|
1931 |
blame_tok = cast(Token, part0) # for ctx_VarChecker
|
1932 |
|
1933 |
self._SetNext() # move past function name
|
1934 |
|
1935 |
# Must be true because of lookahead
|
1936 |
self._GetWord()
|
1937 |
assert self.c_id == Id.Op_LParen, self.cur_word
|
1938 |
|
1939 |
self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
1940 |
self._SetNext()
|
1941 |
|
1942 |
self._GetWord()
|
1943 |
if self.c_id == Id.Right_ShFunction:
|
1944 |
# 'f ()' implies a function definition, since invoking it with no args
|
1945 |
# would just be 'f'
|
1946 |
self._SetNext()
|
1947 |
|
1948 |
self._NewlineOk()
|
1949 |
|
1950 |
func = command.ShFunction.CreateNull()
|
1951 |
func.name = name
|
1952 |
with ctx_VarChecker(self.var_checker, blame_tok):
|
1953 |
func.body = self.ParseCompoundCommand()
|
1954 |
|
1955 |
func.name_tok = location.LeftTokenForCompoundWord(word0)
|
1956 |
return func
|
1957 |
else:
|
1958 |
p_die('Expected ) in function definition', loc.Word(self.cur_word))
|
1959 |
return None
|
1960 |
|
1961 |
def ParseKshFunctionDef(self):
|
1962 |
# type: () -> command.ShFunction
|
1963 |
"""
|
1964 |
ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
|
1965 |
"""
|
1966 |
keyword_tok = word_.AsKeywordToken(self.cur_word)
|
1967 |
|
1968 |
self._SetNext() # skip past 'function'
|
1969 |
self._GetWord()
|
1970 |
|
1971 |
cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
|
1972 |
name = word_.ShFunctionName(cur_word)
|
1973 |
if len(name) == 0: # example: foo$x is invalid
|
1974 |
p_die('Invalid KSH-style function name', loc.Word(cur_word))
|
1975 |
|
1976 |
name_word = self.cur_word
|
1977 |
self._SetNext() # skip past 'function name
|
1978 |
|
1979 |
self._GetWord()
|
1980 |
if self.c_id == Id.Op_LParen:
|
1981 |
self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
1982 |
self._SetNext()
|
1983 |
self._Eat(Id.Right_ShFunction)
|
1984 |
|
1985 |
self._NewlineOk()
|
1986 |
|
1987 |
func = command.ShFunction.CreateNull()
|
1988 |
func.name = name
|
1989 |
with ctx_VarChecker(self.var_checker, keyword_tok):
|
1990 |
func.body = self.ParseCompoundCommand()
|
1991 |
|
1992 |
func.keyword = keyword_tok
|
1993 |
func.name_tok = location.LeftTokenForWord(name_word)
|
1994 |
return func
|
1995 |
|
1996 |
def ParseYshProc(self):
|
1997 |
# type: () -> command.Proc
|
1998 |
node = command.Proc.CreateNull(alloc_lists=True)
|
1999 |
|
2000 |
keyword_tok = word_.AsKeywordToken(self.cur_word)
|
2001 |
node.keyword = keyword_tok
|
2002 |
|
2003 |
with ctx_VarChecker(self.var_checker, keyword_tok):
|
2004 |
with ctx_CmdMode(self, cmd_mode_e.Proc):
|
2005 |
self.w_parser.ParseProc(node)
|
2006 |
if node.sig.tag() == proc_sig_e.Closed: # Register params
|
2007 |
sig = cast(proc_sig.Closed, node.sig)
|
2008 |
|
2009 |
# Treat params as variables.
|
2010 |
for param in sig.word_params:
|
2011 |
# TODO: Check() should not look at tval
|
2012 |
name_tok = param.blame_tok
|
2013 |
self.var_checker.Check(Id.KW_Var, name_tok)
|
2014 |
if sig.rest_of_words:
|
2015 |
name_tok = sig.rest_of_words.blame_tok
|
2016 |
self.var_checker.Check(Id.KW_Var, name_tok)
|
2017 |
# We COULD register __out here but it would require a different API.
|
2018 |
#if param.prefix and param.prefix.id == Id.Arith_Colon:
|
2019 |
# self.var_checker.Check(Id.KW_Var, '__' + param.name)
|
2020 |
|
2021 |
self._SetNext()
|
2022 |
node.body = self.ParseBraceGroup()
|
2023 |
# No redirects for YSH procs (only at call site)
|
2024 |
|
2025 |
return node
|
2026 |
|
2027 |
def ParseYshFunc(self):
|
2028 |
# type: () -> command.Func
|
2029 |
"""
|
2030 |
ysh_func: KW_Func Expr_Name '(' [func_params] [';' func_params] ')' brace_group
|
2031 |
|
2032 |
Looking at KW_Func
|
2033 |
"""
|
2034 |
node = command.Func.CreateNull(alloc_lists=True)
|
2035 |
|
2036 |
keyword_tok = word_.AsKeywordToken(self.cur_word)
|
2037 |
node.keyword = keyword_tok
|
2038 |
|
2039 |
with ctx_VarChecker(self.var_checker, keyword_tok):
|
2040 |
self.parse_ctx.ParseFunc(self.lexer, node)
|
2041 |
|
2042 |
for param in node.pos_params:
|
2043 |
name_tok = param.blame_tok
|
2044 |
self.var_checker.Check(Id.KW_Var, name_tok)
|
2045 |
if node.rest_of_pos:
|
2046 |
name_tok = node.rest_of_pos.blame_tok
|
2047 |
self.var_checker.Check(Id.KW_Var, name_tok)
|
2048 |
|
2049 |
self._SetNext()
|
2050 |
with ctx_CmdMode(self, cmd_mode_e.Func):
|
2051 |
node.body = self.ParseBraceGroup()
|
2052 |
|
2053 |
return node
|
2054 |
|
2055 |
def ParseCoproc(self):
|
2056 |
# type: () -> command_t
|
2057 |
"""
|
2058 |
TODO: command.Coproc?
|
2059 |
"""
|
2060 |
raise NotImplementedError()
|
2061 |
|
2062 |
def ParseSubshell(self):
|
2063 |
# type: () -> command.Subshell
|
2064 |
"""
|
2065 |
subshell : '(' compound_list ')'
|
2066 |
|
2067 |
Looking at Op_LParen
|
2068 |
"""
|
2069 |
left = word_.AsOperatorToken(self.cur_word)
|
2070 |
self._SetNext() # skip past (
|
2071 |
|
2072 |
# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
|
2073 |
# translation stack, we want to delay it.
|
2074 |
|
2075 |
self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
|
2076 |
|
2077 |
c_list = self._ParseCommandList()
|
2078 |
if len(c_list.children) == 1:
|
2079 |
child = c_list.children[0]
|
2080 |
else:
|
2081 |
child = c_list
|
2082 |
|
2083 |
ate = self._Eat(Id.Right_Subshell)
|
2084 |
right = word_.AsOperatorToken(ate)
|
2085 |
|
2086 |
return command.Subshell(left, child, right, None) # no redirects yet
|
2087 |
|
2088 |
def ParseDBracket(self):
|
2089 |
# type: () -> command.DBracket
|
2090 |
"""Pass the underlying word parser off to the boolean expression
|
2091 |
parser."""
|
2092 |
left = word_.AsKeywordToken(self.cur_word)
|
2093 |
# TODO: Test interactive. Without closing ]], you should get > prompt
|
2094 |
# (PS2)
|
2095 |
|
2096 |
self._SetNext() # skip [[
|
2097 |
b_parser = bool_parse.BoolParser(self.w_parser)
|
2098 |
bnode, right = b_parser.Parse() # May raise
|
2099 |
return command.DBracket(left, bnode, right, None) # no redirects yet
|
2100 |
|
2101 |
def ParseDParen(self):
|
2102 |
# type: () -> command.DParen
|
2103 |
left = word_.AsOperatorToken(self.cur_word)
|
2104 |
|
2105 |
self._SetNext() # skip ((
|
2106 |
anode, right = self.w_parser.ReadDParen()
|
2107 |
assert anode is not None
|
2108 |
|
2109 |
return command.DParen(left, anode, right, None) # no redirects yet
|
2110 |
|
2111 |
def ParseCommand(self):
|
2112 |
# type: () -> command_t
|
2113 |
"""
|
2114 |
command : simple_command
|
2115 |
| compound_command # OSH edit: io_redirect* folded in
|
2116 |
| function_def
|
2117 |
| ksh_function_def
|
2118 |
|
2119 |
# YSH extensions
|
2120 |
| proc NAME ...
|
2121 |
| const ...
|
2122 |
| var ...
|
2123 |
| setglobal ...
|
2124 |
| setref ...
|
2125 |
| setvar ...
|
2126 |
| _ EXPR
|
2127 |
| = EXPR
|
2128 |
;
|
2129 |
|
2130 |
Note: the reason const / var are not part of compound_command is because
|
2131 |
they can't be alone in a shell function body.
|
2132 |
|
2133 |
Example:
|
2134 |
This is valid shell f() if true; then echo hi; fi
|
2135 |
This is invalid f() var x = 1
|
2136 |
"""
|
2137 |
if self._AtSecondaryKeyword():
|
2138 |
p_die('Unexpected word when parsing command',
|
2139 |
loc.Word(self.cur_word))
|
2140 |
|
2141 |
# YSH Extensions
|
2142 |
|
2143 |
if self.c_id == Id.KW_Proc: # proc p { ... }
|
2144 |
# proc is hidden because of the 'local reasoning' principle
|
2145 |
# Code inside procs should be YSH, full stop. That means oil:upgrade is
|
2146 |
# on.
|
2147 |
if self.parse_opts.parse_proc():
|
2148 |
return self.ParseYshProc()
|
2149 |
|
2150 |
# Otherwise silently pass. This is to support scripts like:
|
2151 |
# $ bash -c 'proc() { echo p; }; proc'
|
2152 |
|
2153 |
if self.c_id == Id.KW_Func: # func f(x) { ... }
|
2154 |
if self.parse_opts.parse_func() and not self.parse_opts.parse_tea():
|
2155 |
return self.ParseYshFunc()
|
2156 |
|
2157 |
# Otherwise silently pass, like for the procs.
|
2158 |
|
2159 |
if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
|
2160 |
keyword_id = self.c_id
|
2161 |
kw_token = word_.LiteralToken(self.cur_word)
|
2162 |
self._SetNext()
|
2163 |
n8 = self.w_parser.ParseVarDecl(kw_token)
|
2164 |
for lhs in n8.lhs:
|
2165 |
self.var_checker.Check(keyword_id, lhs.name)
|
2166 |
return n8
|
2167 |
|
2168 |
if self.c_id in (Id.KW_SetVar, Id.KW_SetRef, Id.KW_SetGlobal):
|
2169 |
kw_token = word_.LiteralToken(self.cur_word)
|
2170 |
self._SetNext()
|
2171 |
n9 = self.w_parser.ParsePlaceMutation(kw_token, self.var_checker)
|
2172 |
return n9
|
2173 |
|
2174 |
if self.c_id in (Id.Lit_Underscore, Id.Lit_Equals): # = 42 + 1
|
2175 |
keyword = word_.LiteralToken(self.cur_word)
|
2176 |
assert keyword is not None
|
2177 |
self._SetNext()
|
2178 |
enode = self.w_parser.ParseCommandExpr()
|
2179 |
return command.Expr(keyword, enode)
|
2180 |
|
2181 |
if self.c_id == Id.KW_Function:
|
2182 |
return self.ParseKshFunctionDef()
|
2183 |
|
2184 |
# Top-level keywords to hide: func, data, enum, class/mod. Not sure about
|
2185 |
# 'use'.
|
2186 |
if self.parse_opts.parse_tea():
|
2187 |
if self.c_id == Id.KW_Func:
|
2188 |
out0 = command.TeaFunc.CreateNull(alloc_lists=True)
|
2189 |
self.parse_ctx.ParseTeaFunc(self.lexer, out0)
|
2190 |
self._SetNext()
|
2191 |
return out0
|
2192 |
if self.c_id == Id.KW_Data:
|
2193 |
out1 = command.Data.CreateNull(alloc_lists=True)
|
2194 |
self.parse_ctx.ParseDataType(self.lexer, out1)
|
2195 |
self._SetNext()
|
2196 |
return out1
|
2197 |
if self.c_id == Id.KW_Enum:
|
2198 |
out2 = command.Enum.CreateNull(alloc_lists=True)
|
2199 |
self.parse_ctx.ParseEnum(self.lexer, out2)
|
2200 |
self._SetNext()
|
2201 |
return out2
|
2202 |
if self.c_id == Id.KW_Class:
|
2203 |
out3 = command.Class.CreateNull(alloc_lists=True)
|
2204 |
self.parse_ctx.ParseClass(self.lexer, out3)
|
2205 |
self._SetNext()
|
2206 |
return out3
|
2207 |
if self.c_id == Id.KW_Import:
|
2208 |
# Needs last_token because it ends with an optional thing?
|
2209 |
out4 = command.Import.CreateNull(alloc_lists=True)
|
2210 |
self.w_parser.ParseImport(out4)
|
2211 |
self._SetNext()
|
2212 |
return out4
|
2213 |
|
2214 |
if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
|
2215 |
Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
|
2216 |
Id.KW_If, Id.KW_Case, Id.KW_Time):
|
2217 |
return self.ParseCompoundCommand()
|
2218 |
|
2219 |
# Syntax error for '}' starting a line, which all shells disallow.
|
2220 |
if self.c_id == Id.Lit_RBrace:
|
2221 |
p_die('Unexpected right brace', loc.Word(self.cur_word))
|
2222 |
|
2223 |
if self.c_kind == Kind.Redir: # Leading redirect
|
2224 |
return self.ParseSimpleCommand()
|
2225 |
|
2226 |
if self.c_kind == Kind.Word:
|
2227 |
cur_word = cast(CompoundWord, self.cur_word) # ensured by Kind.Word
|
2228 |
|
2229 |
# NOTE: At the top level, only Token and Compound are possible.
|
2230 |
# Can this be modelled better in the type system, removing asserts?
|
2231 |
#
|
2232 |
# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
|
2233 |
# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
|
2234 |
# That requires 2 tokens of lookahead, which we don't have
|
2235 |
#
|
2236 |
# Or maybe we don't just have ParseSimpleCommand -- we will have
|
2237 |
# ParseYshCommand or something
|
2238 |
|
2239 |
if (self.w_parser.LookAheadFuncParens() and
|
2240 |
not word_.IsVarLike(cur_word)):
|
2241 |
return self.ParseFunctionDef() # f() { echo; } # function
|
2242 |
|
2243 |
# Parse x = 1+2*3 when inside HayNode { } blocks
|
2244 |
parts = cur_word.parts
|
2245 |
if self.parse_opts.parse_equals() and len(parts) == 1:
|
2246 |
part0 = parts[0]
|
2247 |
if part0.tag() == word_part_e.Literal:
|
2248 |
tok = cast(Token, part0)
|
2249 |
if (match.IsValidVarName(tok.tval) and
|
2250 |
self.w_parser.LookPastSpace() == Id.Lit_Equals):
|
2251 |
assert tok.id == Id.Lit_Chars, tok
|
2252 |
|
2253 |
if len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]:
|
2254 |
# Note: no static var_checker.Check() for bare assignment
|
2255 |
enode = self.w_parser.ParseBareDecl()
|
2256 |
self._SetNext() # Somehow this is necessary
|
2257 |
# TODO: Use BareDecl here. Well, do that when we
|
2258 |
# treat it as const or lazy.
|
2259 |
return command.VarDecl(None, [NameType(tok, None)],
|
2260 |
enode)
|
2261 |
else:
|
2262 |
self._SetNext()
|
2263 |
self._GetWord()
|
2264 |
p_die(
|
2265 |
'Unexpected = (Hint: use var/setvar, or quote it)',
|
2266 |
loc.Word(self.cur_word))
|
2267 |
|
2268 |
# echo foo
|
2269 |
# f=(a b c) # array
|
2270 |
# array[1+2]+=1
|
2271 |
return self.ParseSimpleCommand()
|
2272 |
|
2273 |
if self.c_kind == Kind.Eof:
|
2274 |
p_die("Unexpected EOF while parsing command",
|
2275 |
loc.Word(self.cur_word))
|
2276 |
|
2277 |
# NOTE: This only happens in batch mode in the second turn of the loop!
|
2278 |
# e.g. )
|
2279 |
p_die("Invalid word while parsing command", loc.Word(self.cur_word))
|
2280 |
|
2281 |
assert False # for MyPy
|
2282 |
|
2283 |
def ParsePipeline(self):
|
2284 |
# type: () -> command_t
|
2285 |
"""
|
2286 |
pipeline : Bang? command ( '|' newline_ok command )* ;
|
2287 |
"""
|
2288 |
negated = None # type: Optional[Token]
|
2289 |
|
2290 |
self._GetWord()
|
2291 |
if self.c_id == Id.KW_Bang:
|
2292 |
negated = word_.AsKeywordToken(self.cur_word)
|
2293 |
self._SetNext()
|
2294 |
|
2295 |
child = self.ParseCommand()
|
2296 |
assert child is not None
|
2297 |
|
2298 |
children = [child]
|
2299 |
|
2300 |
self._GetWord()
|
2301 |
if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
2302 |
if negated is not None:
|
2303 |
node = command.Pipeline(negated, children, [])
|
2304 |
return node
|
2305 |
else:
|
2306 |
return child # no pipeline
|
2307 |
|
2308 |
# | or |&
|
2309 |
ops = [] # type: List[Token]
|
2310 |
while True:
|
2311 |
op = word_.AsOperatorToken(self.cur_word)
|
2312 |
ops.append(op)
|
2313 |
|
2314 |
self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
|
2315 |
self._NewlineOk()
|
2316 |
|
2317 |
child = self.ParseCommand()
|
2318 |
children.append(child)
|
2319 |
|
2320 |
self._GetWord()
|
2321 |
if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
2322 |
break
|
2323 |
|
2324 |
return command.Pipeline(negated, children, ops)
|
2325 |
|
2326 |
def ParseAndOr(self):
|
2327 |
# type: () -> command_t
|
2328 |
self._GetWord()
|
2329 |
if self.c_id == Id.Word_Compound:
|
2330 |
first_word_tok = word_.LiteralToken(self.cur_word)
|
2331 |
if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
|
2332 |
# We got '...', so parse in multiline mode
|
2333 |
self._SetNext()
|
2334 |
with word_.ctx_Multiline(self.w_parser):
|
2335 |
return self._ParseAndOr()
|
2336 |
|
2337 |
# Parse in normal mode, not multiline
|
2338 |
return self._ParseAndOr()
|
2339 |
|
2340 |
def _ParseAndOr(self):
|
2341 |
# type: () -> command_t
|
2342 |
"""
|
2343 |
and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
|
2344 |
| pipeline
|
2345 |
|
2346 |
Note that it is left recursive and left associative. We parse it
|
2347 |
iteratively with a token of lookahead.
|
2348 |
"""
|
2349 |
child = self.ParsePipeline()
|
2350 |
assert child is not None
|
2351 |
|
2352 |
self._GetWord()
|
2353 |
if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
2354 |
return child
|
2355 |
|
2356 |
ops = [] # type: List[Token]
|
2357 |
children = [child]
|
2358 |
|
2359 |
while True:
|
2360 |
ops.append(word_.AsOperatorToken(self.cur_word))
|
2361 |
|
2362 |
self._SetNext() # skip past || &&
|
2363 |
self._NewlineOk()
|
2364 |
|
2365 |
child = self.ParsePipeline()
|
2366 |
children.append(child)
|
2367 |
|
2368 |
self._GetWord()
|
2369 |
if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
2370 |
break
|
2371 |
|
2372 |
return command.AndOr(children, ops)
|
2373 |
|
2374 |
# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
|
2375 |
|
2376 |
# At the top level, we execute after every line, e.g. to
|
2377 |
# - process alias (a form of dynamic parsing)
|
2378 |
# - process 'exit', because invalid syntax might appear after it
|
2379 |
|
2380 |
# On the other hand, for a while loop body, we parse the whole thing at once,
|
2381 |
# and then execute it. We don't want to parse it over and over again!
|
2382 |
|
2383 |
# COMPARE
|
2384 |
# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
|
2385 |
# command_term : and_or (trailer and_or)* ; # CHILDREN
|
2386 |
|
2387 |
def _ParseCommandLine(self):
|
2388 |
# type: () -> command_t
|
2389 |
"""
|
2390 |
command_line : and_or (sync_op and_or)* trailer? ;
|
2391 |
trailer : sync_op newline_ok
|
2392 |
| NEWLINES;
|
2393 |
sync_op : '&' | ';';
|
2394 |
|
2395 |
NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
|
2396 |
there is another command word after the sync op.
|
2397 |
|
2398 |
But it's easier to express imperatively. Do the following in a loop:
|
2399 |
1. ParseAndOr
|
2400 |
2. Peek.
|
2401 |
a. If there's a newline, then return. (We're only parsing a single
|
2402 |
line.)
|
2403 |
b. If there's a sync_op, process it. Then look for a newline and
|
2404 |
return. Otherwise, parse another AndOr.
|
2405 |
"""
|
2406 |
# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
|
2407 |
# I don't think we should add anything else here; otherwise it will be
|
2408 |
# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
|
2409 |
END_LIST = [Id.Op_Newline, Id.Eof_Real]
|
2410 |
|
2411 |
children = [] # type: List[command_t]
|
2412 |
done = False
|
2413 |
while not done:
|
2414 |
child = self.ParseAndOr()
|
2415 |
|
2416 |
self._GetWord()
|
2417 |
if self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
2418 |
tok = cast(Token, self.cur_word) # for MyPy
|
2419 |
child = command.Sentence(child, tok)
|
2420 |
self._SetNext()
|
2421 |
|
2422 |
self._GetWord()
|
2423 |
if self.c_id in END_LIST:
|
2424 |
done = True
|
2425 |
|
2426 |
elif self.c_id in END_LIST:
|
2427 |
done = True
|
2428 |
|
2429 |
else:
|
2430 |
# e.g. echo a(b)
|
2431 |
p_die('Invalid word while parsing command line',
|
2432 |
loc.Word(self.cur_word))
|
2433 |
|
2434 |
children.append(child)
|
2435 |
|
2436 |
# Simplify the AST.
|
2437 |
if len(children) > 1:
|
2438 |
return command.CommandList(children)
|
2439 |
else:
|
2440 |
return children[0]
|
2441 |
|
2442 |
def _ParseCommandTerm(self):
|
2443 |
# type: () -> command.CommandList
|
2444 |
""""
|
2445 |
command_term : and_or (trailer and_or)* ;
|
2446 |
trailer : sync_op newline_ok
|
2447 |
| NEWLINES;
|
2448 |
sync_op : '&' | ';';
|
2449 |
|
2450 |
This is handled in imperative style, like _ParseCommandLine.
|
2451 |
Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
|
2452 |
which is slightly different. (HOW? Is it the DSEMI?)
|
2453 |
|
2454 |
Returns:
|
2455 |
syntax_asdl.command
|
2456 |
"""
|
2457 |
# Token types that will end the command term.
|
2458 |
END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
|
2459 |
|
2460 |
# NOTE: This is similar to _ParseCommandLine.
|
2461 |
#
|
2462 |
# - Why aren't we doing END_LIST in _ParseCommandLine?
|
2463 |
# - Because you will never be inside $() at the top level.
|
2464 |
# - We also know it will end in a newline. It can't end in "fi"!
|
2465 |
# - example: if true; then { echo hi; } fi
|
2466 |
|
2467 |
children = [] # type: List[command_t]
|
2468 |
done = False
|
2469 |
while not done:
|
2470 |
# Most keywords are valid "first words". But do/done/then do not BEGIN
|
2471 |
# commands, so they are not valid.
|
2472 |
if self._AtSecondaryKeyword():
|
2473 |
break
|
2474 |
|
2475 |
child = self.ParseAndOr()
|
2476 |
|
2477 |
self._GetWord()
|
2478 |
if self.c_id == Id.Op_Newline:
|
2479 |
self._SetNext()
|
2480 |
|
2481 |
self._GetWord()
|
2482 |
if self.c_id in END_LIST:
|
2483 |
done = True
|
2484 |
|
2485 |
elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
2486 |
tok = cast(Token, self.cur_word) # for MyPy
|
2487 |
child = command.Sentence(child, tok)
|
2488 |
self._SetNext()
|
2489 |
|
2490 |
self._GetWord()
|
2491 |
if self.c_id == Id.Op_Newline:
|
2492 |
self._SetNext() # skip over newline
|
2493 |
|
2494 |
# Test if we should keep going. There might be another command after
|
2495 |
# the semi and newline.
|
2496 |
self._GetWord()
|
2497 |
if self.c_id in END_LIST: # \n EOF
|
2498 |
done = True
|
2499 |
|
2500 |
elif self.c_id in END_LIST: # ; EOF
|
2501 |
done = True
|
2502 |
|
2503 |
elif self.c_id in END_LIST: # EOF
|
2504 |
done = True
|
2505 |
|
2506 |
# For if test -f foo; test -f bar {
|
2507 |
elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
2508 |
done = True
|
2509 |
|
2510 |
elif self.c_kind != Kind.Word:
|
2511 |
# e.g. f() { echo (( x )) ; }
|
2512 |
# but can't fail on 'fi fi', see osh/cmd_parse_test.py
|
2513 |
|
2514 |
#log("Invalid %s", self.cur_word)
|
2515 |
p_die("Invalid word while parsing command list",
|
2516 |
loc.Word(self.cur_word))
|
2517 |
|
2518 |
children.append(child)
|
2519 |
|
2520 |
return command.CommandList(children)
|
2521 |
|
2522 |
def _ParseCommandList(self):
|
2523 |
# type: () -> command.CommandList
|
2524 |
"""
|
2525 |
command_list : newline_ok command_term trailer? ;
|
2526 |
|
2527 |
This one is called by all the compound commands. It's basically a command
|
2528 |
block.
|
2529 |
|
2530 |
NOTE: Rather than translating the CFG directly, the code follows a style
|
2531 |
more like this: more like this: (and_or trailer)+. It makes capture
|
2532 |
easier.
|
2533 |
"""
|
2534 |
self._NewlineOk()
|
2535 |
return self._ParseCommandTerm()
|
2536 |
|
2537 |
def ParseLogicalLine(self):
|
2538 |
# type: () -> command_t
|
2539 |
"""Parse a single line for main_loop.
|
2540 |
|
2541 |
A wrapper around _ParseCommandLine(). Similar but not identical to
|
2542 |
_ParseCommandList() and ParseCommandSub().
|
2543 |
|
2544 |
Raises:
|
2545 |
ParseError
|
2546 |
"""
|
2547 |
self._NewlineOk()
|
2548 |
self._GetWord()
|
2549 |
if self.c_id == Id.Eof_Real:
|
2550 |
return None # main loop checks for here docs
|
2551 |
node = self._ParseCommandLine()
|
2552 |
return node
|
2553 |
|
2554 |
def ParseInteractiveLine(self):
|
2555 |
# type: () -> parse_result_t
|
2556 |
"""Parse a single line for Interactive main_loop.
|
2557 |
|
2558 |
Different from ParseLogicalLine because newlines are handled differently.
|
2559 |
|
2560 |
Raises:
|
2561 |
ParseError
|
2562 |
"""
|
2563 |
self._GetWord()
|
2564 |
if self.c_id == Id.Op_Newline:
|
2565 |
return parse_result.EmptyLine
|
2566 |
if self.c_id == Id.Eof_Real:
|
2567 |
return parse_result.Eof
|
2568 |
|
2569 |
node = self._ParseCommandLine()
|
2570 |
return parse_result.Node(node)
|
2571 |
|
2572 |
def ParseCommandSub(self):
|
2573 |
# type: () -> command_t
|
2574 |
"""Parse $(echo hi) and `echo hi` for word_parse.py.
|
2575 |
|
2576 |
They can have multiple lines, like this: echo $( echo one echo
|
2577 |
two )
|
2578 |
"""
|
2579 |
self._NewlineOk()
|
2580 |
|
2581 |
self._GetWord()
|
2582 |
if self.c_kind == Kind.Eof: # e.g. $()
|
2583 |
return command.NoOp
|
2584 |
|
2585 |
c_list = self._ParseCommandTerm()
|
2586 |
if len(c_list.children) == 1:
|
2587 |
return c_list.children[0]
|
2588 |
else:
|
2589 |
return c_list
|
2590 |
|
2591 |
def CheckForPendingHereDocs(self):
|
2592 |
# type: () -> None
|
2593 |
# NOTE: This happens when there is no newline at the end of a file, like
|
2594 |
# osh -c 'cat <<EOF'
|
2595 |
if len(self.pending_here_docs):
|
2596 |
node = self.pending_here_docs[0] # Just show the first one?
|
2597 |
h = cast(redir_param.HereDoc, node.arg)
|
2598 |
p_die('Unterminated here doc began here', loc.Word(h.here_begin))
|