1 # Copyright 2016 Andy Chu. All rights reserved.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
5 #
6 # http://www.apache.org/licenses/LICENSE-2.0
7 """
8 cmd_parse.py - Parse high level shell commands.
9 """
10 from __future__ import print_function
11
12 from _devbuild.gen import grammar_nt
13 from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14 from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15 from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 BlockArg,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs_expr,
47 sh_lhs_expr_t,
48 AssignPair,
49 EnvPair,
50 assign_op_e,
51 NameType,
52 proc_sig,
53 proc_sig_e,
54 )
55 from core import alloc
56 from core import error
57 from core.error import p_die
58 from core import ui
59 from frontend import consts
60 from frontend import lexer
61 from frontend import location
62 from frontend import match
63 from frontend import reader
64 from mycpp.mylib import log
65 from osh import braces
66 from osh import bool_parse
67 from osh import word_
68
69 from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
70 if TYPE_CHECKING:
71 from core.alloc import Arena
72 from core import optview
73 from frontend.lexer import Lexer
74 from frontend.parse_lib import ParseContext, AliasesInFlight
75 from frontend.reader import _Reader
76 from osh.word_parse import WordParser
77
78 _ = Kind_str # for debug prints
79
80 TAB_CH = 9 # ord('\t')
81 SPACE_CH = 32 # ord(' ')
82
83
84 def _ReadHereLines(
85 line_reader, # type: _Reader
86 h, # type: Redir
87 delimiter, # type: str
88 ):
89 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
90 # NOTE: We read all lines at once, instead of parsing line-by-line,
91 # because of cases like this:
92 # cat <<EOF
93 # 1 $(echo 2
94 # echo 3) 4
95 # EOF
96 here_lines = [] # type: List[Tuple[SourceLine, int]]
97 last_line = None # type: Tuple[SourceLine, int]
98 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
99
100 while True:
101 src_line, unused_offset = line_reader.GetLine()
102
103 if src_line is None: # EOF
104 # An unterminated here doc is just a warning in bash. We make it
105 # fatal because we want to be strict, and because it causes problems
106 # reporting other errors.
107 # Attribute it to the << in <<EOF for now.
108 p_die("Couldn't find terminator for here doc that starts here",
109 h.op)
110
111 assert len(src_line.content) != 0 # None should be the empty line
112
113 line = src_line.content
114
115 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
116 # the first tab.
117 start_offset = 0
118 if strip_leading_tabs:
119 n = len(line)
120 i = 0 # used after loop exit
121 while i < n:
122 if line[i] != '\t':
123 break
124 i += 1
125 start_offset = i
126
127 if line[start_offset:].rstrip() == delimiter:
128 last_line = (src_line, start_offset)
129 break
130
131 here_lines.append((src_line, start_offset))
132
133 return here_lines, last_line
134
135
136 def _MakeLiteralHereLines(
137 here_lines, # type: List[Tuple[SourceLine, int]]
138 arena, # type: Arena
139 ):
140 # type: (...) -> List[word_part_t] # less precise because List is invariant type
141 """Create a line_span and a token for each line."""
142 tokens = [] # type: List[Token]
143 for src_line, start_offset in here_lines:
144 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
145 src_line, src_line.content[start_offset:])
146 tokens.append(t)
147 parts = [cast(word_part_t, t) for t in tokens]
148 return parts
149
150
151 def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
152 # type: (ParseContext, Redir, _Reader, Arena) -> None
153 """Fill in attributes of a pending here doc node."""
154 h = cast(redir_param.HereDoc, r.arg)
155 # "If any character in word is quoted, the delimiter shall be formed by
156 # performing quote removal on word, and the here-document lines shall not
157 # be expanded. Otherwise, the delimiter shall be the word itself."
158 # NOTE: \EOF counts, or even E\OF
159 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
160 if not ok:
161 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
162
163 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
164
165 if delim_quoted: # << 'EOF'
166 # Literal for each line.
167 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena)
168 else:
169 line_reader = reader.VirtualLineReader(here_lines, arena)
170 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
171 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
172
173 end_line, end_pos = last_line
174
175 # Create a Token with the end terminator. Maintains the invariant that the
176 # tokens "add up".
177 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, end_pos,
178 len(end_line.content), end_line, '')
179
180
181 def _MakeAssignPair(parse_ctx, preparsed, arena):
182 # type: (ParseContext, PreParsedItem, Arena) -> AssignPair
183 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
184
185 left_token, close_token, part_offset, w = preparsed
186
187 if left_token.id == Id.Lit_VarLike: # s=1
188 if lexer.IsPlusEquals(left_token):
189 var_name = lexer.TokenSliceRight(left_token, -2)
190 op = assign_op_e.PlusEqual
191 else:
192 var_name = lexer.TokenSliceRight(left_token, -1)
193 op = assign_op_e.Equal
194
195 tmp = sh_lhs_expr.Name(left_token, var_name)
196
197 lhs = cast(sh_lhs_expr_t, tmp)
198
199 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.one_pass_parse:
200 var_name = lexer.TokenSliceRight(left_token, -1)
201 if lexer.IsPlusEquals(close_token):
202 op = assign_op_e.PlusEqual
203 else:
204 op = assign_op_e.Equal
205
206 assert left_token.line == close_token.line, \
207 '%s and %s not on same line' % (left_token, close_token)
208
209 left_pos = left_token.col + left_token.length
210 index_str = left_token.line.content[left_pos:close_token.col]
211 lhs = sh_lhs_expr.UnparsedIndex(left_token, var_name, index_str)
212
213 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
214 var_name = lexer.TokenSliceRight(left_token, -1)
215 if lexer.IsPlusEquals(close_token):
216 op = assign_op_e.PlusEqual
217 else:
218 op = assign_op_e.Equal
219
220 span1 = left_token
221 span2 = close_token
222 # Similar to SnipCodeString / SnipCodeBlock
223 if span1.line == span2.line:
224 # extract what's between brackets
225 code_str = span1.line.content[span1.col + span1.length:span2.col]
226 else:
227 raise NotImplementedError('%s != %s' % (span1.line, span2.line))
228 a_parser = parse_ctx.MakeArithParser(code_str)
229
230 # a[i+1]= is a place
231 src = source.Reparsed('array place', left_token, close_token)
232 with alloc.ctx_SourceCode(arena, src):
233 index_node = a_parser.Parse() # may raise error.Parse
234
235 tmp3 = sh_lhs_expr.IndexedName(left_token, var_name, index_node)
236
237 lhs = cast(sh_lhs_expr_t, tmp3)
238
239 else:
240 raise AssertionError()
241
242 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
243 n = len(w.parts)
244 if part_offset == n:
245 rhs = rhs_word.Empty # type: rhs_word_t
246 else:
247 # tmp2 is for intersection of C++/MyPy type systems
248 tmp2 = CompoundWord(w.parts[part_offset:])
249 word_.TildeDetectAssign(tmp2)
250 rhs = tmp2
251
252 return AssignPair(left_token, lhs, op, rhs)
253
254
255 def _AppendMoreEnv(preparsed_list, more_env):
256 # type: (PreParsedList, List[EnvPair]) -> None
257 """Helper to modify a SimpleCommand node.
258
259 Args:
260 preparsed: a list of 4-tuples from DetectShAssignment
261 more_env: a list to append env_pairs to
262 """
263 for left_token, _, part_offset, w in preparsed_list:
264 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
265 p_die("Environment binding shouldn't look like an array assignment",
266 left_token)
267
268 if lexer.IsPlusEquals(left_token):
269 p_die('Expected = in environment binding, got +=', left_token)
270
271 var_name = lexer.TokenSliceRight(left_token, -1)
272 n = len(w.parts)
273 if part_offset == n:
274 val = rhs_word.Empty # type: rhs_word_t
275 else:
276 val = CompoundWord(w.parts[part_offset:])
277
278 pair = EnvPair(left_token, var_name, val)
279 more_env.append(pair)
280
281
282 if TYPE_CHECKING:
283 PreParsedItem = Tuple[Token, Optional[Token], int, CompoundWord]
284 PreParsedList = List[PreParsedItem]
285
286
287 def _SplitSimpleCommandPrefix(words):
288 # type: (List[CompoundWord]) -> Tuple[PreParsedList, List[CompoundWord]]
289 """Second pass of SimpleCommand parsing: look for assignment words."""
290 preparsed_list = [] # type: PreParsedList
291 suffix_words = [] # type: List[CompoundWord]
292
293 done_prefix = False
294 for w in words:
295 if done_prefix:
296 suffix_words.append(w)
297 continue
298
299 left_token, close_token, part_offset = word_.DetectShAssignment(w)
300 if left_token:
301 preparsed_list.append((left_token, close_token, part_offset, w))
302 else:
303 done_prefix = True
304 suffix_words.append(w)
305
306 return preparsed_list, suffix_words
307
308
309 def _MakeSimpleCommand(
310 preparsed_list, # type: PreParsedList
311 suffix_words, # type: List[CompoundWord]
312 redirects, # type: List[Redir]
313 typed_args, # type: Optional[ArgList]
314 block, # type: Optional[BlockArg]
315 ):
316 # type: (...) -> command.Simple
317 """Create an command.Simple node."""
318
319 # FOO=(1 2 3) ls is not allowed.
320 for _, _, _, w in preparsed_list:
321 if word_.HasArrayPart(w):
322 p_die("Environment bindings can't contain array literals",
323 loc.Word(w))
324
325 # NOTE: It would be possible to add this check back. But it already happens
326 # at runtime in EvalWordSequence2.
327 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
328 if 0:
329 for w in suffix_words:
330 if word_.HasArrayPart(w):
331 p_die("Commands can't contain array literals", loc.Word(w))
332
333 assert len(suffix_words) != 0
334 # {a,b,c} # Use { before brace detection
335 # ~/bin/ls # Use ~ before tilde detection
336 part0 = suffix_words[0].parts[0]
337 blame_tok = location.LeftTokenForWordPart(part0)
338
339 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
340 # can't implement bash's behavior of having say {~bob,~jane}/src work,
341 # because we only have a BracedTree.
342 # This is documented in spec/brace-expansion.
343 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
344 # doesn't seem worth it.
345 words2 = braces.BraceDetectAll(suffix_words)
346 words3 = word_.TildeDetectAll(words2)
347
348 more_env = [] # type: List[EnvPair]
349 _AppendMoreEnv(preparsed_list, more_env)
350
351 # do_fork by default
352 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
353 block, True)
354
355
356 class VarChecker(object):
357 """Statically check for proc and variable usage errors."""
358
359 def __init__(self):
360 # type: () -> None
361 """
362 Args:
363 oil_proc: Whether to disallow nested proc/function declarations
364 """
365 # self.tokens for location info: 'proc' or another token
366 self.tokens = [] # type: List[Token]
367 self.names = [] # type: List[Dict[str, Id_t]]
368
369 def Push(self, blame_tok):
370 # type: (Token) -> None
371 """Bash allows this, but it's confusing because it's the same as two
372 functions at the top level.
373
374 f() {
375 g() {
376 echo 'top level function defined in another one'
377 }
378 }
379
380 YSH disallows nested procs.
381 """
382 if len(self.tokens) != 0:
383 if self.tokens[0].id == Id.KW_Proc or blame_tok.id == Id.KW_Proc:
384 p_die("procs and shell functions can't be nested", blame_tok)
385
386 self.tokens.append(blame_tok)
387 entry = {} # type: Dict[str, Id_t]
388 self.names.append(entry)
389
390 def Pop(self):
391 # type: () -> None
392 self.names.pop()
393 self.tokens.pop()
394
395 def Check(self, keyword_id, name_tok):
396 # type: (Id_t, Token) -> None
397 """Check for errors in declaration and mutation errors.
398
399 var x, const x:
400 x already declared
401 setvar x:
402 x is not declared
403 x is constant
404 setglobal x:
405 No errors are possible; we would need all these many conditions to
406 statically know the names:
407 - no 'source'
408 - shopt -u copy_env.
409 - AND use lib has to be static
410 setref x:
411 Should only mutate out params
412
413 Also should p(:out) declare 'out' as well as '__out'? Then you can't have
414 local variables with the same name.
415 """
416 # Don't check the global level! Semantics are different here!
417 if len(self.names) == 0:
418 return
419
420 top = self.names[-1]
421 name = name_tok.tval
422 if keyword_id in (Id.KW_Const, Id.KW_Var):
423 if name in top:
424 p_die('%r was already declared' % name, name_tok)
425 else:
426 top[name] = keyword_id
427
428 if keyword_id == Id.KW_SetVar:
429 if name not in top:
430 p_die("%r hasn't been declared" % name, name_tok)
431
432 if name in top and top[name] == Id.KW_Const:
433 p_die("Can't modify constant %r" % name, name_tok)
434
435 # TODO: setref should only mutate out params.
436
437
438 class ctx_VarChecker(object):
439
440 def __init__(self, var_checker, blame_tok):
441 # type: (VarChecker, Token) -> None
442 var_checker.Push(blame_tok)
443 self.var_checker = var_checker
444
445 def __enter__(self):
446 # type: () -> None
447 pass
448
449 def __exit__(self, type, value, traceback):
450 # type: (Any, Any, Any) -> None
451 self.var_checker.Pop()
452
453
454 class ctx_CmdMode(object):
455
456 def __init__(self, cmd_parse, new_cmd_mode):
457 # type: (CommandParser, cmd_mode_t) -> None
458 self.cmd_parse = cmd_parse
459 self.prev_cmd_mode = cmd_parse.cmd_mode
460 cmd_parse.cmd_mode = new_cmd_mode
461
462 def __enter__(self):
463 # type: () -> None
464 pass
465
466 def __exit__(self, type, value, traceback):
467 # type: (Any, Any, Any) -> None
468 self.cmd_parse.cmd_mode = self.prev_cmd_mode
469
470
471
472 SECONDARY_KEYWORDS = [
473 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
474 Id.KW_Esac
475 ]
476
477
478 class CommandParser(object):
479 """Recursive descent parser derived from POSIX shell grammar.
480
481 This is a BNF grammar:
482 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
483
484 - Augmented with both bash/OSH and YSH constructs.
485
486 - We use regex-like iteration rather than recursive references
487 ? means optional (0 or 1)
488 * means 0 or more
489 + means 1 or more
490
491 - Keywords are spelled in Caps:
492 If Elif Case
493
494 - Operator tokens are quoted:
495 '(' '|'
496
497 or can be spelled directly if it matters:
498
499 Op_LParen Op_Pipe
500
501 - Non-terminals are snake_case:
502 brace_group subshell
503
504 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
505 the production should be in the method docstrings, e.g.
506
507 def ParseSubshell():
508 "
509 subshell : '(' compound_list ')'
510
511 Looking at Op_LParen # Comment to say how this method is called
512 "
513
514 The grammar may be factored to make parsing easier.
515 """
516
517 def __init__(self,
518 parse_ctx,
519 parse_opts,
520 w_parser,
521 lexer,
522 line_reader,
523 eof_id=Id.Eof_Real):
524 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
525 self.parse_ctx = parse_ctx
526 self.aliases = parse_ctx.aliases # aliases to expand at parse time
527
528 self.parse_opts = parse_opts
529 self.w_parser = w_parser # type: WordParser # for normal parsing
530 self.lexer = lexer # for pushing hints, lookahead to (
531 self.line_reader = line_reader # for here docs
532 self.eof_id = eof_id
533
534 self.arena = line_reader.arena # for adding here doc and alias spans
535 self.aliases_in_flight = [] # type: AliasesInFlight
536
537 # A hacky boolean to remove 'if cd / {' ambiguity.
538 self.allow_block = True
539
540 # Stack of booleans for nested Attr and SHELL nodes.
541 # Attr nodes allow bare assignment x = 42, but not shell x=42.
542 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
543 # nodes, but x42 is still allowed.
544 #
545 # Note: this stack could be optimized by turning it into an integer and
546 # binary encoding.
547 self.hay_attrs_stack = [] # type: List[bool]
548
549 # Note: VarChecker is instantiated with each CommandParser, which means
550 # that two 'proc foo' -- inside a command sub and outside -- don't
551 # conflict, because they use different CommandParser instances. I think
552 # this OK but you can imagine different behaviors.
553 self.var_checker = VarChecker()
554
555 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
556
557 self.Reset()
558
559 # Init_() function for "keyword arg"
560 def Init_AliasesInFlight(self, aliases_in_flight):
561 # type: (AliasesInFlight) -> None
562 self.aliases_in_flight = aliases_in_flight
563
564 def Reset(self):
565 # type: () -> None
566 """Reset our own internal state.
567
568 Called by the interactive loop.
569 """
570 # Cursor state set by _GetWord()
571 self.next_lex_mode = lex_mode_e.ShCommand
572 self.cur_word = None # type: word_t # current word
573 self.c_kind = Kind.Undefined
574 self.c_id = Id.Undefined_Tok
575
576 self.pending_here_docs = [
577 ] # type: List[Redir] # should have HereLiteral arg
578
579 def ResetInputObjects(self):
580 # type: () -> None
581 """Reset the internal state of our inputs.
582
583 Called by the interactive loop.
584 """
585 self.w_parser.Reset()
586 self.lexer.ResetInputObjects()
587 self.line_reader.Reset()
588
589 def _SetNext(self):
590 # type: () -> None
591 """Call this when you no longer need the current token.
592
593 This method is lazy. A subsequent call to _GetWord() will
594 actually read the next Token.
595 """
596 self.next_lex_mode = lex_mode_e.ShCommand
597
598 def _GetWord(self):
599 # type: () -> None
600 """Call this when you need to make a decision based on Id or Kind.
601
602 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
603 self.c_id and self.c_kind.
604
605 Otherwise it does nothing.
606 """
607 if self.next_lex_mode != lex_mode_e.Undefined:
608 w = self.w_parser.ReadWord(self.next_lex_mode)
609 #log("w %s", w)
610
611 # Here docs only happen in command mode, so other kinds of newlines don't
612 # count.
613 if w.tag() == word_e.Operator:
614 tok = cast(Token, w)
615 if tok.id == Id.Op_Newline:
616 for h in self.pending_here_docs:
617 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
618 self.arena)
619 del self.pending_here_docs[:] # No .clear() until Python 3.3.
620
621 self.cur_word = w
622
623 self.c_kind = word_.CommandKind(self.cur_word)
624 self.c_id = word_.CommandId(self.cur_word)
625 self.next_lex_mode = lex_mode_e.Undefined
626
627 def _Eat(self, c_id, msg=None):
628 # type: (Id_t, Optional[str]) -> word_t
629 """Consume a word of a type, maybe showing a custom error message.
630
631 Args:
632 c_id: the Id we expected
633 msg: improved error message
634 """
635 self._GetWord()
636 if self.c_id != c_id:
637 if msg is None:
638 msg = 'Expected word type %s, got %s' % (ui.PrettyId(c_id),
639 ui.PrettyId(self.c_id))
640 p_die(msg, loc.Word(self.cur_word))
641
642 skipped = self.cur_word
643 self._SetNext()
644 return skipped
645
646 def _NewlineOk(self):
647 # type: () -> None
648 """Check for optional newline and consume it."""
649 self._GetWord()
650 if self.c_id == Id.Op_Newline:
651 self._SetNext()
652
653 def _AtSecondaryKeyword(self):
654 # type: () -> bool
655 self._GetWord()
656 if self.c_id in SECONDARY_KEYWORDS:
657 return True
658 return False
659
660 def ParseRedirect(self):
661 # type: () -> Redir
662 self._GetWord()
663 assert self.c_kind == Kind.Redir, self.cur_word
664 op_tok = cast(Token, self.cur_word) # for MyPy
665
666 # Note: the lexer could take distinguish between
667 # >out
668 # 3>out
669 # {fd}>out
670 #
671 # which would make the code below faster. But small string optimization
672 # would also speed it up, since redirects are small.
673
674 op_val = lexer.TokenVal(op_tok)
675 if op_val[0] == '{':
676 pos = op_val.find('}')
677 assert pos != -1 # lexer ensures this
678 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
679
680 elif op_val[0].isdigit():
681 pos = 1
682 if op_val[1].isdigit():
683 pos = 2
684 where = redir_loc.Fd(int(op_val[:pos]))
685
686 else:
687 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
688
689 self._SetNext()
690
691 self._GetWord()
692 # Other redirect
693 if self.c_kind != Kind.Word:
694 p_die('Invalid token after redirect operator',
695 loc.Word(self.cur_word))
696
697 # Here doc
698 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
699 arg = redir_param.HereDoc.CreateNull()
700 arg.here_begin = self.cur_word
701 arg.stdin_parts = []
702
703 r = Redir(op_tok, where, arg)
704
705 self.pending_here_docs.append(r) # will be filled on next newline.
706
707 self._SetNext()
708 return r
709
710 arg_word = self.cur_word
711 tilde = word_.TildeDetect(arg_word)
712 if tilde:
713 arg_word = tilde
714 self._SetNext()
715
716 # We should never get Empty, Token, etc.
717 assert arg_word.tag() == word_e.Compound, arg_word
718 return Redir(op_tok, where, cast(CompoundWord, arg_word))
719
720 def _ParseRedirectList(self):
721 # type: () -> List[Redir]
722 """Try parsing any redirects at the cursor.
723
724 This is used for blocks only, not commands.
725 """
726 redirects = [] # type: List[Redir]
727 while True:
728 # This prediction needs to ONLY accept redirect operators. Should we
729 # make them a separate Kind?
730 self._GetWord()
731 if self.c_kind != Kind.Redir:
732 break
733
734 node = self.ParseRedirect()
735 redirects.append(node)
736 self._SetNext()
737
738 return redirects
739
740 def _ScanSimpleCommand(self):
741 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[BlockArg]]
742 """First pass: Split into redirects and words."""
743 redirects = [] # type: List[Redir]
744 words = [] # type: List[CompoundWord]
745 typed_args = None # type: Optional[ArgList]
746 block = None # type: Optional[BlockArg]
747
748 first_word_caps = False # does first word look like Caps, but not CAPS
749
750 i = 0
751 while True:
752 self._GetWord()
753 if self.c_kind == Kind.Redir:
754 node = self.ParseRedirect()
755 redirects.append(node)
756
757 elif self.c_kind == Kind.Word:
758 if self.parse_opts.parse_brace():
759 # Treat { and } more like operators
760 if self.c_id == Id.Lit_LBrace:
761 if self.allow_block: # Disabled for if/while condition, etc.
762
763 # allow x = 42
764 self.hay_attrs_stack.append(first_word_caps)
765 brace_group = self.ParseBraceGroup()
766
767 # So we can get the source code back later
768 lines = self.arena.SaveLinesAndDiscard(
769 brace_group.left, brace_group.right)
770 block = BlockArg(brace_group, lines)
771
772 self.hay_attrs_stack.pop()
773
774 if 0:
775 print('--')
776 block.PrettyPrint()
777 print('\n--')
778 break
779 elif self.c_id == Id.Lit_RBrace:
780 # Another thing: { echo hi }
781 # We're DONE!!!
782 break
783
784 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
785 words.append(w)
786 if i == 0:
787 ok, word_str, quoted = word_.StaticEval(w)
788 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
789 if (ok and len(word_str) and word_str[0].isupper() and
790 not word_str.isupper()):
791 first_word_caps = True
792 #log('W %s', word_str)
793
794 elif self.c_id == Id.Op_LParen:
795 # 1. Check that there's a preceding space
796 prev_byte = self.lexer.ByteLookBack()
797 if prev_byte not in (SPACE_CH, TAB_CH):
798 if self.parse_opts.parse_at():
799 p_die('Space required before (',
800 loc.Word(self.cur_word))
801 else:
802 # inline func call like @sorted(x) is invalid in OSH, but the
803 # solution isn't a space
804 p_die(
805 'Unexpected left paren (might need a space before it)',
806 loc.Word(self.cur_word))
807
808 # 2. Check that it's not (). We disallow this because it's a no-op and
809 # there could be confusion with shell func defs.
810 # For some reason we need to call lexer.LookPastSpace, not
811 # w_parser.LookPastSpace. I think this is because we're at (, which is
812 # an operator token. All the other cases are like 'x=', which is PART
813 # of a word, and we don't know if it will end.
814 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
815 if next_id == Id.Op_RParen:
816 p_die('Empty arg list not allowed', loc.Word(self.cur_word))
817
818 typed_args = self.w_parser.ParseProcCallArgs()
819
820 else:
821 break
822
823 self._SetNext()
824 i += 1
825 return redirects, words, typed_args, block
826
827 def _MaybeExpandAliases(self, words):
828 # type: (List[CompoundWord]) -> Optional[command_t]
829 """Try to expand aliases.
830
831 Args:
832 words: A list of Compound
833
834 Returns:
835 A new LST node, or None.
836
837 Our implementation of alias has two design choices:
838 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
839 - What grammar rule to parse the expanded alias buffer with. In our case
840 it's ParseCommand().
841
842 This doesn't quite match what other shells do, but I can't figure out a
843 better places.
844
845 Most test cases pass, except for ones like:
846
847 alias LBRACE='{'
848 LBRACE echo one; echo two; }
849
850 alias MULTILINE='echo 1
851 echo 2
852 echo 3'
853 MULTILINE
854
855 NOTE: dash handles aliases in a totally different way. It has a global
856 variable checkkwd in parser.c. It assigns it all over the grammar, like
857 this:
858
859 checkkwd = CHKNL | CHKKWD | CHKALIAS;
860
861 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
862 lookupalias(). This seems to provide a consistent behavior among shells,
863 but it's less modular and testable.
864
865 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
866
867 Returns:
868 A command node if any aliases were expanded, or None otherwise.
869 """
870 # Start a new list if there aren't any. This will be passed recursively
871 # through CommandParser instances.
872 aliases_in_flight = (self.aliases_in_flight
873 if len(self.aliases_in_flight) else [])
874
875 # for error message
876 first_word_str = None # type: Optional[str]
877 argv0_loc = loc.Word(words[0])
878
879 expanded = [] # type: List[str]
880 i = 0
881 n = len(words)
882
883 while i < n:
884 w = words[i]
885
886 ok, word_str, quoted = word_.StaticEval(w)
887 if not ok or quoted:
888 break
889
890 alias_exp = self.aliases.get(word_str)
891 if alias_exp is None:
892 break
893
894 # Prevent infinite loops. This is subtle: we want to prevent infinite
895 # expansion of alias echo='echo x'. But we don't want to prevent
896 # expansion of the second word in 'echo echo', so we add 'i' to
897 # "aliases_in_flight".
898 if (word_str, i) in aliases_in_flight:
899 break
900
901 if i == 0:
902 first_word_str = word_str # for error message
903
904 #log('%r -> %r', word_str, alias_exp)
905 aliases_in_flight.append((word_str, i))
906 expanded.append(alias_exp)
907 i += 1
908
909 if not alias_exp.endswith(' '):
910 # alias e='echo [ ' is the same expansion as
911 # alias e='echo ['
912 # The trailing space indicates whether we should continue to expand
913 # aliases; it's not part of it.
914 expanded.append(' ')
915 break # No more expansions
916
917 if len(expanded) == 0: # No expansions; caller does parsing.
918 return None
919
920 # We are expanding an alias, so copy the rest of the words and re-parse.
921 if i < n:
922 left_tok = location.LeftTokenForWord(words[i])
923 right_tok = location.RightTokenForWord(words[-1])
924
925 # OLD CONSTRAINT
926 #assert left_tok.line_id == right_tok.line_id
927
928 words_str = self.arena.SnipCodeString(left_tok, right_tok)
929 expanded.append(words_str)
930
931 code_str = ''.join(expanded)
932
933 # TODO:
934 # Aliases break static parsing (like backticks), so use our own Arena.
935 # This matters for Hay, which calls SaveLinesAndDiscard().
936 # arena = alloc.Arena()
937 arena = self.arena
938
939 line_reader = reader.StringLineReader(code_str, arena)
940 cp = self.parse_ctx.MakeOshParser(line_reader)
941 cp.Init_AliasesInFlight(aliases_in_flight)
942
943 # break circular dep
944 from frontend import parse_lib
945
946 # The interaction between COMPLETION and ALIASES requires special care.
947 # See docstring of BeginAliasExpansion() in parse_lib.py.
948 src = source.Alias(first_word_str, argv0_loc)
949 with alloc.ctx_SourceCode(arena, src):
950 with parse_lib.ctx_Alias(self.parse_ctx.trail):
951 try:
952 # _ParseCommandTerm() handles multiline commands, compound commands, etc.
953 # as opposed to ParseLogicalLine()
954 node = cp._ParseCommandTerm()
955 except error.Parse as e:
956 # Failure to parse alias expansion is a fatal error
957 # We don't need more handling here/
958 raise
959
960 if 0:
961 log('AFTER expansion:')
962 node.PrettyPrint()
963
964 return node
965
966 def ParseSimpleCommand(self):
967 # type: () -> command_t
968 """Fixed transcription of the POSIX grammar (TODO: port to
969 grammar/Shell.g)
970
971 io_file : '<' filename
972 | LESSAND filename
973 ...
974
975 io_here : DLESS here_end
976 | DLESSDASH here_end
977
978 redirect : IO_NUMBER (io_redirect | io_here)
979
980 prefix_part : ASSIGNMENT_WORD | redirect
981 cmd_part : WORD | redirect
982
983 assign_kw : Declare | Export | Local | Readonly
984
985 # Without any words it is parsed as a command, not an assignment
986 assign_listing : assign_kw
987
988 # Now we have something to do (might be changing assignment flags too)
989 # NOTE: any prefixes should be a warning, but they are allowed in shell.
990 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
991
992 # an external command, a function call, or a builtin -- a "word_command"
993 word_command : prefix_part* cmd_part+
994
995 simple_command : assign_listing
996 | assignment
997 | proc_command
998
999 Simple imperative algorithm:
1000
1001 1) Read a list of words and redirects. Append them to separate lists.
1002 2) Look for the first non-assignment word. If it's declare, etc., then
1003 keep parsing words AND assign words. Otherwise, just parse words.
1004 3) If there are no non-assignment words, then it's a global assignment.
1005
1006 { redirects, global assignments } OR
1007 { redirects, prefix_bindings, words } OR
1008 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1009
1010 THEN CHECK that prefix bindings don't have any array literal parts!
1011 global assignment and keyword assignments can have the of course.
1012 well actually EXPORT shouldn't have them either -- WARNING
1013
1014 3 cases we want to warn: prefix_bindings for assignment, and array literal
1015 in prefix bindings, or export
1016
1017 A command can be an assignment word, word, or redirect on its own.
1018
1019 ls
1020 >out.txt
1021
1022 >out.txt FOO=bar # this touches the file
1023
1024 Or any sequence:
1025 ls foo bar
1026 <in.txt ls foo bar >out.txt
1027 <in.txt ls >out.txt foo bar
1028
1029 Or add one or more environment bindings:
1030 VAR=val env
1031 >out.txt VAR=val env
1032
1033 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1034 <<EOF vs <<'EOF'.
1035 """
1036 redirects, words, typed_args, block = self._ScanSimpleCommand()
1037
1038 typed_loc = None # type: Optional[Token]
1039 if block:
1040 typed_loc = block.brace_group.left
1041 if typed_args:
1042 typed_loc = typed_args.left # preferred over block location
1043
1044 if len(words) == 0: # e.g. >out.txt # redirect without words
1045 assert len(redirects) != 0
1046 if typed_loc is not None:
1047 p_die("Unexpected typed args", typed_loc)
1048
1049 simple = command.Simple.CreateNull()
1050 simple.blame_tok = redirects[0].op
1051 simple.more_env = []
1052 simple.words = []
1053 simple.redirects = redirects
1054 return simple
1055
1056 # Disallow =a because it's confusing
1057 part0 = words[0].parts[0]
1058 if part0.tag() == word_part_e.Literal:
1059 tok = cast(Token, part0)
1060 if tok.id == Id.Lit_Equals:
1061 p_die(
1062 "=word isn't allowed. Hint: either quote it or add a space after =\n"
1063 "to pretty print an expression", tok)
1064
1065 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1066 if len(preparsed_list):
1067 left_token, _, _, _ = preparsed_list[0]
1068
1069 # Disallow X=Y inside proc and func
1070 # and inside Hay Attr blocks
1071 # But allow X=Y at the top level
1072 # for interactive use foo=bar
1073 # for global constants GLOBAL=~/src
1074 # because YSH assignment doesn't have tilde sub
1075 if len(suffix_words) == 0:
1076 if self.cmd_mode != cmd_mode_e.Shell or (
1077 len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]):
1078 p_die('Use var/setvar to assign in YSH', left_token)
1079
1080 # Set a reference to words and redirects for completion. We want to
1081 # inspect this state after a failed parse.
1082 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1083
1084 if len(suffix_words) == 0:
1085 if typed_loc is not None:
1086 p_die("Unexpected typed args", typed_loc)
1087
1088 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1089 pairs = [] # type: List[AssignPair]
1090 for preparsed in preparsed_list:
1091 pairs.append(
1092 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1093
1094 left_tok = location.LeftTokenForCompoundWord(words[0])
1095 return command.ShAssignment(left_tok, pairs, redirects)
1096
1097 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1098
1099 if kind == Kind.ControlFlow:
1100 if kw_token.id == Id.ControlFlow_Return:
1101 # return x - inside procs and shell functions
1102 # return (x) - inside funcs
1103 if typed_args is None:
1104 if self.cmd_mode not in (cmd_mode_e.Shell, cmd_mode_e.Proc):
1105 p_die('Shell-style returns not allowed here', kw_token)
1106 else:
1107 if self.cmd_mode != cmd_mode_e.Func:
1108 p_die('Typed return is only allowed inside func',
1109 typed_loc)
1110 if len(typed_args.pos_args) != 1:
1111 p_die("Typed return expects one argument", typed_loc)
1112 if len(typed_args.named_args) != 0:
1113 p_die("Typed return doesn't take named arguments",
1114 typed_loc)
1115 return command.Retval(kw_token, typed_args.pos_args[0])
1116
1117 if typed_loc is not None:
1118 p_die("Unexpected typed args", typed_loc)
1119 if not self.parse_opts.parse_ignored() and len(redirects):
1120 p_die("Control flow shouldn't have redirects", kw_token)
1121
1122 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1123 # TODO: Change location as above
1124 left_token, _, _, _ = preparsed_list[0]
1125 p_die("Control flow shouldn't have environment bindings",
1126 left_token)
1127
1128 # Attach the token for errors. (ShAssignment may not need it.)
1129 if len(suffix_words) == 1:
1130 arg_word = None # type: Optional[word_t]
1131 elif len(suffix_words) == 2:
1132 arg_word = suffix_words[1]
1133 else:
1134 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1135 loc.Word(suffix_words[2]))
1136
1137 return command.ControlFlow(kw_token, arg_word)
1138
1139 # Alias expansion only understands words, not typed args ( ) or block { }
1140 if not typed_args and not block and self.parse_opts.expand_aliases():
1141 # If any expansions were detected, then parse again.
1142 expanded_node = self._MaybeExpandAliases(suffix_words)
1143 if expanded_node:
1144 # Attach env bindings and redirects to the expanded node.
1145 more_env = [] # type: List[EnvPair]
1146 _AppendMoreEnv(preparsed_list, more_env)
1147 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1148 return exp
1149
1150 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1151
1152 # FOO=bar printenv.py FOO
1153 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1154 typed_args, block)
1155 return node
1156
1157 def ParseBraceGroup(self):
1158 # type: () -> BraceGroup
1159 """
1160 Original:
1161 brace_group : LBrace command_list RBrace ;
1162
1163 YSH:
1164 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1165
1166 The doc comment can only occur if there's a newline.
1167 """
1168 ate = self._Eat(Id.Lit_LBrace)
1169 left = word_.BraceToken(ate)
1170
1171 doc_token = None # type: Token
1172 self._GetWord()
1173 if self.c_id == Id.Op_Newline:
1174 self._SetNext()
1175 with word_.ctx_EmitDocToken(self.w_parser):
1176 self._GetWord()
1177
1178 if self.c_id == Id.Ignored_Comment:
1179 doc_token = cast(Token, self.cur_word)
1180 self._SetNext()
1181
1182 c_list = self._ParseCommandList()
1183
1184 ate = self._Eat(Id.Lit_RBrace)
1185 right = word_.BraceToken(ate)
1186
1187 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1188 # would allow us to revert this back to None, which was changed in
1189 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1190 # behavior saves allocations, but is less type safe.
1191 return BraceGroup(left, doc_token, c_list.children, [],
1192 right) # no redirects yet
1193
1194 def ParseDoGroup(self):
1195 # type: () -> command.DoGroup
1196 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1197
1198 do_group : Do command_list Done ; /* Apply rule 6 */
1199 """
1200 ate = self._Eat(Id.KW_Do)
1201 do_kw = word_.AsKeywordToken(ate)
1202
1203 c_list = self._ParseCommandList() # could be anything
1204
1205 ate = self._Eat(Id.KW_Done)
1206 done_kw = word_.AsKeywordToken(ate)
1207
1208 return command.DoGroup(do_kw, c_list.children, done_kw)
1209
1210 def ParseForWords(self):
1211 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1212 """
1213 for_words : WORD* for_sep
1214 ;
1215 for_sep : ';' newline_ok
1216 | NEWLINES
1217 ;
1218 """
1219 words = [] # type: List[CompoundWord]
1220 # The span_id of any semi-colon, so we can remove it.
1221 semi_tok = None # type: Optional[Token]
1222
1223 while True:
1224 self._GetWord()
1225 if self.c_id == Id.Op_Semi:
1226 tok = cast(Token, self.cur_word)
1227 semi_tok = tok
1228 self._SetNext()
1229 self._NewlineOk()
1230 break
1231 elif self.c_id == Id.Op_Newline:
1232 self._SetNext()
1233 break
1234 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1235 break
1236
1237 if self.cur_word.tag() != word_e.Compound:
1238 # TODO: Can we also show a pointer to the 'for' keyword?
1239 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1240
1241 w2 = cast(CompoundWord, self.cur_word)
1242 words.append(w2)
1243 self._SetNext()
1244 return words, semi_tok
1245
1246 def _ParseForExprLoop(self, for_kw):
1247 # type: (Token) -> command.ForExpr
1248 """
1249 Shell:
1250 for '((' init ';' cond ';' update '))' for_sep? do_group
1251
1252 YSH:
1253 for '((' init ';' cond ';' update '))' for_sep? brace_group
1254 """
1255 node = self.w_parser.ReadForExpression()
1256 node.keyword = for_kw
1257
1258 self._SetNext()
1259
1260 self._GetWord()
1261 if self.c_id == Id.Op_Semi:
1262 self._SetNext()
1263 self._NewlineOk()
1264 elif self.c_id == Id.Op_Newline:
1265 self._SetNext()
1266 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1267 pass
1268 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1269 pass
1270 else:
1271 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1272
1273 if self.c_id == Id.Lit_LBrace:
1274 node.body = self.ParseBraceGroup()
1275 else:
1276 node.body = self.ParseDoGroup()
1277 return node
1278
1279 def _ParseForEachLoop(self, for_kw):
1280 # type: (Token) -> command.ForEach
1281 node = command.ForEach.CreateNull(alloc_lists=True)
1282 node.keyword = for_kw
1283
1284 num_iter_names = 0
1285 while True:
1286 w = self.cur_word
1287
1288 # Hack that makes the language more familiar:
1289 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1290 # - 'x y' is also accepted but not idiomatic.
1291 UP_w = w
1292 if w.tag() == word_e.Compound:
1293 w = cast(CompoundWord, UP_w)
1294 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1295 w.parts.pop()
1296
1297 ok, iter_name, quoted = word_.StaticEval(w)
1298 if not ok or quoted: # error: for $x
1299 p_die('Expected loop variable (a constant word)', loc.Word(w))
1300
1301 if not match.IsValidVarName(iter_name): # error: for -
1302 # TODO: consider commas?
1303 if ',' in iter_name:
1304 p_die('Loop variables look like x, y (fix spaces)',
1305 loc.Word(w))
1306 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1307
1308 node.iter_names.append(iter_name)
1309 num_iter_names += 1
1310 self._SetNext()
1311
1312 self._GetWord()
1313 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1314 # Subtlety: 'var' is KW_Var and is a valid loop name
1315 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1316 break
1317
1318 if num_iter_names == 3:
1319 p_die('Unexpected word after 3 loop variables',
1320 loc.Word(self.cur_word))
1321
1322 self._NewlineOk()
1323
1324 self._GetWord()
1325 if self.c_id == Id.KW_In:
1326 # Ideally we would want ( not 'in'. But we still have to fix the bug
1327 # where we require a SPACE between in and (
1328 # for x in(y) # should be accepted, but isn't
1329
1330 expr_blame = word_.AsKeywordToken(self.cur_word)
1331
1332 self._SetNext() # skip in
1333 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1334 enode, last_token = self.parse_ctx.ParseYshExpr(
1335 self.lexer, grammar_nt.oil_expr)
1336 node.iterable = for_iter.YshExpr(enode, expr_blame)
1337
1338 # For simplicity, we don't accept for x in (obj); do ...
1339 self._GetWord()
1340 if self.c_id != Id.Lit_LBrace:
1341 p_die('Expected { after iterable expression',
1342 loc.Word(self.cur_word))
1343 else:
1344 semi_tok = None # type: Optional[Token]
1345 iter_words, semi_tok = self.ParseForWords()
1346 node.semi_tok = semi_tok
1347
1348 if not self.parse_opts.parse_bare_word() and len(
1349 iter_words) == 1:
1350 ok, s, quoted = word_.StaticEval(iter_words[0])
1351 if ok and match.IsValidVarName(s) and not quoted:
1352 p_die(
1353 'Surround this word with either parens or quotes (parse_bare_word)',
1354 loc.Word(iter_words[0]))
1355
1356 words2 = braces.BraceDetectAll(iter_words)
1357 words3 = word_.TildeDetectAll(words2)
1358 node.iterable = for_iter.Words(words3)
1359
1360 # Now that we know there are words, do an extra check
1361 if num_iter_names > 2:
1362 p_die('Expected at most 2 loop variables', for_kw)
1363
1364 elif self.c_id == Id.KW_Do:
1365 node.iterable = for_iter.Args # implicitly loop over "$@"
1366 # do not advance
1367
1368 elif self.c_id == Id.Op_Semi: # for x; do
1369 node.iterable = for_iter.Args # implicitly loop over "$@"
1370 self._SetNext()
1371
1372 else: # for foo BAD
1373 p_die('Unexpected word after for loop variable',
1374 loc.Word(self.cur_word))
1375
1376 self._GetWord()
1377 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1378 node.body = self.ParseBraceGroup()
1379 else:
1380 node.body = self.ParseDoGroup()
1381
1382 return node
1383
1384 def ParseFor(self):
1385 # type: () -> command_t
1386 """
1387 TODO: Update the grammar
1388
1389 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1390 | For '((' ... TODO
1391 """
1392 ate = self._Eat(Id.KW_For)
1393 for_kw = word_.AsKeywordToken(ate)
1394
1395 self._GetWord()
1396 if self.c_id == Id.Op_DLeftParen:
1397 if not self.parse_opts.parse_dparen():
1398 p_die("Bash for loops aren't allowed (parse_dparen)",
1399 loc.Word(self.cur_word))
1400
1401 # for (( i = 0; i < 10; i++)
1402 n1 = self._ParseForExprLoop(for_kw)
1403 n1.redirects = self._ParseRedirectList()
1404 return n1
1405 else:
1406 # for x in a b; do echo hi; done
1407 n2 = self._ParseForEachLoop(for_kw)
1408 n2.redirects = self._ParseRedirectList()
1409 return n2
1410
1411 def _ParseConditionList(self):
1412 # type: () -> condition_t
1413 """
1414 condition_list: command_list
1415
1416 This is a helper to parse a condition list for if commands and while/until
1417 loops. It will throw a parse error if there are no conditions in the list.
1418 """
1419 self.allow_block = False
1420 commands = self._ParseCommandList()
1421 self.allow_block = True
1422
1423 if len(commands.children) == 0:
1424 p_die("Expected a condition", loc.Word(self.cur_word))
1425
1426 return condition.Shell(commands.children)
1427
1428 def ParseWhileUntil(self, keyword):
1429 # type: (Token) -> command.WhileUntil
1430 """
1431 while_clause : While command_list do_group ;
1432 until_clause : Until command_list do_group ;
1433 """
1434 self._SetNext() # skip keyword
1435
1436 if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1437 ) == Id.Op_LParen:
1438 enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1439 grammar_nt.oil_expr)
1440 cond = condition.YshExpr(enode) # type: condition_t
1441 else:
1442 cond = self._ParseConditionList()
1443
1444 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1445 # should be unchanged. To be sure we should desugar.
1446 self._GetWord()
1447 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1448 # while test -f foo {
1449 body_node = self.ParseBraceGroup() # type: command_t
1450 else:
1451 body_node = self.ParseDoGroup()
1452
1453 # no redirects yet
1454 return command.WhileUntil(keyword, cond, body_node, None)
1455
1456 def ParseCaseArm(self):
1457 # type: () -> CaseArm
1458 """
1459 case_item: '('? pattern ('|' pattern)* ')'
1460 newline_ok command_term? trailer? ;
1461
1462 Looking at '(' or pattern
1463 """
1464 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1465
1466 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1467
1468 if self.c_id == Id.Op_LParen: # Optional (
1469 self._SetNext()
1470
1471 pat_words = [] # type: List[word_t]
1472 while True:
1473 self._GetWord()
1474 if self.c_kind != Kind.Word:
1475 p_die('Expected case pattern', loc.Word(self.cur_word))
1476 pat_words.append(self.cur_word)
1477 self._SetNext()
1478
1479 self._GetWord()
1480 if self.c_id == Id.Op_Pipe:
1481 self._SetNext()
1482 else:
1483 break
1484
1485 ate = self._Eat(Id.Right_CasePat)
1486 middle_tok = word_.AsOperatorToken(ate)
1487
1488 self._NewlineOk()
1489
1490 self._GetWord()
1491 if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1492 c_list = self._ParseCommandTerm()
1493 action_children = c_list.children
1494 else:
1495 action_children = []
1496
1497 dsemi_tok = None # type: Token
1498 self._GetWord()
1499 if self.c_id == Id.KW_Esac: # missing last ;;
1500 pass
1501 elif self.c_id == Id.Op_DSemi:
1502 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1503 self._SetNext()
1504 else:
1505 # Happens on EOF
1506 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1507
1508 self._NewlineOk()
1509
1510 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1511 action_children, dsemi_tok)
1512
1513 def ParseYshCaseArm(self, discriminant):
1514 # type: (Id_t) -> CaseArm
1515 """
1516 case_item : pattern newline_ok brace_group newline_ok
1517 pattern : pat_words
1518 | pat_exprs
1519 | pat_eggex
1520 | pat_else
1521 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1522 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1523 pat_word : WORD
1524 pat_eggex : '/' oil_eggex '/'
1525 pat_expr : '(' oil_expr ')'
1526 pat_else : '(' Id.KW_Else ')'
1527
1528 Looking at: 'pattern'
1529
1530 Note that the trailing `newline_ok` in `case_item` is handled by
1531 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1532 the next "discriminant" for the next token, so it makes more sense to
1533 handle it there.
1534 """
1535 left_tok = None # type: Token
1536 pattern = None # type: pat_t
1537
1538 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1539 # pat_exprs, pat_else or pat_eggex
1540 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1541 else:
1542 # pat_words
1543 pat_words = [] # type: List[word_t]
1544 while True:
1545 self._GetWord()
1546 if self.c_kind != Kind.Word:
1547 p_die('Expected case pattern', loc.Word(self.cur_word))
1548 pat_words.append(self.cur_word)
1549 self._SetNext()
1550
1551 if not left_tok:
1552 left_tok = location.LeftTokenForWord(self.cur_word)
1553
1554 self._NewlineOk()
1555
1556 self._GetWord()
1557 if self.c_id == Id.Op_Pipe:
1558 self._SetNext()
1559 self._NewlineOk()
1560 else:
1561 break
1562 pattern = pat.Words(pat_words)
1563
1564 self._NewlineOk()
1565 action = self.ParseBraceGroup()
1566
1567 # The left token of the action is our "middle" token
1568 return CaseArm(left_tok, pattern, action.left, action.children,
1569 action.right)
1570
1571 def ParseYshCase(self, case_kw):
1572 # type: (Token) -> command.Case
1573 """
1574 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1575
1576 Looking at: token after 'case'
1577 """
1578 enode, _ = self.parse_ctx.ParseYshExpr(self.lexer, grammar_nt.oil_expr)
1579 to_match = case_arg.YshExpr(enode)
1580
1581 ate = self._Eat(Id.Lit_LBrace)
1582 arms_start = word_.BraceToken(ate)
1583
1584 discriminant = self.w_parser.NewlineOkForYshCase()
1585
1586 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1587 arms = [] # type: List[CaseArm]
1588 while discriminant != Id.Op_RBrace:
1589 arm = self.ParseYshCaseArm(discriminant)
1590 arms.append(arm)
1591
1592 discriminant = self.w_parser.NewlineOkForYshCase()
1593
1594 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1595 # token is read as an Id.Op_RBrace, but we need to store this as a
1596 # Id.Lit_RBrace.
1597 ate = self._Eat(Id.Op_RBrace)
1598 arms_end = word_.AsOperatorToken(ate)
1599 arms_end.id = Id.Lit_RBrace
1600
1601 return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
1602
1603 def ParseOldCase(self, case_kw):
1604 # type: (Token) -> command.Case
1605 """
1606 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1607
1608 -> Looking at WORD
1609
1610 FYI original POSIX case list, which takes pains for DSEMI
1611
1612 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1613 """
1614 self._GetWord()
1615 w = self.cur_word
1616 if not self.parse_opts.parse_bare_word():
1617 ok, s, quoted = word_.StaticEval(w)
1618 if ok and not quoted:
1619 p_die(
1620 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1621 loc.Word(w))
1622
1623 if w.tag() != word_e.Compound:
1624 p_die("Expected a word to match against", loc.Word(w))
1625
1626 to_match = case_arg.Word(w)
1627 self._SetNext() # past WORD
1628
1629 self._NewlineOk()
1630
1631 ate = self._Eat(Id.KW_In)
1632 arms_start = word_.AsKeywordToken(ate)
1633
1634 self._NewlineOk()
1635
1636 arms = [] # type: List[CaseArm]
1637 while True:
1638 self._GetWord()
1639 if self.c_id == Id.KW_Esac: # this is Kind.Word
1640 break
1641 # case arm should begin with a pattern word or (
1642 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1643 break
1644
1645 arm = self.ParseCaseArm()
1646 arms.append(arm)
1647
1648 ate = self._Eat(Id.KW_Esac)
1649 arms_end = word_.AsKeywordToken(ate)
1650
1651 # no redirects yet
1652 return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
1653
1654 def ParseCase(self):
1655 # type: () -> command.Case
1656 """
1657 case_clause : old_case # from POSIX
1658 | ysh_case
1659 ;
1660
1661 Looking at 'Case'
1662 """
1663 case_kw = word_.AsKeywordToken(self.cur_word)
1664 self._SetNext() # past 'case'
1665
1666 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1667 return self.ParseYshCase(case_kw)
1668 else:
1669 return self.ParseOldCase(case_kw)
1670
1671 def _ParseYshElifElse(self, if_node):
1672 # type: (command.If) -> None
1673 """If test -f foo { echo foo.
1674
1675 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1676 bar } else { echo none }
1677 """
1678 arms = if_node.arms
1679
1680 while self.c_id == Id.KW_Elif:
1681 elif_kw = word_.AsKeywordToken(self.cur_word)
1682 self._SetNext() # skip elif
1683 if (self.parse_opts.parse_paren() and
1684 self.w_parser.LookPastSpace() == Id.Op_LParen):
1685 enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1686 grammar_nt.oil_expr)
1687 cond = condition.YshExpr(enode) # type: condition_t
1688 else:
1689 self.allow_block = False
1690 commands = self._ParseCommandList()
1691 self.allow_block = True
1692 cond = condition.Shell(commands.children)
1693
1694 body = self.ParseBraceGroup()
1695 self._GetWord()
1696
1697 arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1698 arms.append(arm)
1699
1700 self._GetWord()
1701 if self.c_id == Id.KW_Else:
1702 self._SetNext()
1703 body = self.ParseBraceGroup()
1704 if_node.else_action = body.children
1705
1706 def _ParseYshIf(self, if_kw, cond):
1707 # type: (Token, condition_t) -> command.If
1708 """if test -f foo {
1709
1710 # ^ we parsed up to here
1711 echo foo
1712 } elif test -f bar; test -f spam {
1713 echo bar
1714 } else {
1715 echo none
1716 }
1717 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1718 the error is confusing because it doesn't point to the right place.
1719
1720 I think we might need strict_brace so that foo{ is disallowed. It has to
1721 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1722 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1723 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1724 """
1725 if_node = command.If.CreateNull(alloc_lists=True)
1726 if_node.if_kw = if_kw
1727
1728 body1 = self.ParseBraceGroup()
1729 # Every arm has 1 spid, unlike shell-style
1730 # TODO: We could get the spids from the brace group.
1731 arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1732
1733 if_node.arms.append(arm)
1734
1735 self._GetWord()
1736 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1737 self._ParseYshElifElse(if_node)
1738 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1739 # spid because that's in the BraceGroup.
1740 return if_node
1741
1742 def _ParseElifElse(self, if_node):
1743 # type: (command.If) -> None
1744 """
1745 else_part: (Elif command_list Then command_list)* Else command_list ;
1746 """
1747 arms = if_node.arms
1748
1749 self._GetWord()
1750 while self.c_id == Id.KW_Elif:
1751 elif_kw = word_.AsKeywordToken(self.cur_word)
1752 self._SetNext() # past 'elif'
1753
1754 cond = self._ParseConditionList()
1755
1756 ate = self._Eat(Id.KW_Then)
1757 then_kw = word_.AsKeywordToken(ate)
1758
1759 body = self._ParseCommandList()
1760 arm = IfArm(elif_kw, cond, then_kw, body.children,
1761 [elif_kw.span_id, then_kw.span_id])
1762
1763 arms.append(arm)
1764
1765 self._GetWord()
1766 if self.c_id == Id.KW_Else:
1767 else_kw = word_.AsKeywordToken(self.cur_word)
1768 self._SetNext() # past 'else'
1769 body = self._ParseCommandList()
1770 if_node.else_action = body.children
1771 else:
1772 else_kw = None
1773
1774 if_node.else_kw = else_kw
1775
1776 def ParseIf(self):
1777 # type: () -> command.If
1778 """
1779 if_clause : If command_list Then command_list else_part? Fi ;
1780
1781 open : '{' | Then
1782 close : '}' | Fi
1783
1784 ysh_if : If ( command_list | '(' expr ')' )
1785 open command_list else_part? close;
1786
1787 There are 2 conditionals here: parse_paren, then parse_brace
1788 """
1789 if_node = command.If.CreateNull(alloc_lists=True)
1790 if_kw = word_.AsKeywordToken(self.cur_word)
1791 if_node.if_kw = if_kw
1792 self._SetNext() # past 'if'
1793
1794 if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1795 ) == Id.Op_LParen:
1796 # if (x + 1)
1797 enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1798 grammar_nt.oil_expr)
1799 cond = condition.YshExpr(enode) # type: condition_t
1800 else:
1801 # if echo 1; echo 2; then
1802 # Remove ambiguity with if cd / {
1803 cond = self._ParseConditionList()
1804
1805 self._GetWord()
1806 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1807 return self._ParseYshIf(if_kw, cond)
1808
1809 ate = self._Eat(Id.KW_Then)
1810 then_kw = word_.AsKeywordToken(ate)
1811
1812 body = self._ParseCommandList()
1813
1814 # First arm
1815 arm = IfArm(if_kw, cond, then_kw, body.children,
1816 [if_kw.span_id, then_kw.span_id])
1817 if_node.arms.append(arm)
1818
1819 # 2nd to Nth arm
1820 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1821 self._ParseElifElse(if_node)
1822
1823 ate = self._Eat(Id.KW_Fi)
1824 if_node.fi_kw = word_.AsKeywordToken(ate)
1825
1826 return if_node
1827
1828 def ParseTime(self):
1829 # type: () -> command_t
1830 """Time [-p] pipeline.
1831
1832 According to bash help.
1833 """
1834 time_kw = word_.AsKeywordToken(self.cur_word)
1835 self._SetNext() # skip time
1836 pipeline = self.ParsePipeline()
1837 return command.TimeBlock(time_kw, pipeline)
1838
1839 def ParseCompoundCommand(self):
1840 # type: () -> command_t
1841 """
1842 Refactoring: we put io_redirect* here instead of in function_body and
1843 command.
1844
1845 compound_command : brace_group io_redirect*
1846 | subshell io_redirect*
1847 | for_clause io_redirect*
1848 | while_clause io_redirect*
1849 | until_clause io_redirect*
1850 | if_clause io_redirect*
1851 | case_clause io_redirect*
1852
1853 # bash extensions
1854 | time_clause
1855 | [[ BoolExpr ]]
1856 | (( ArithExpr ))
1857 """
1858 self._GetWord()
1859 if self.c_id == Id.Lit_LBrace:
1860 n1 = self.ParseBraceGroup()
1861 n1.redirects = self._ParseRedirectList()
1862 return n1
1863 if self.c_id == Id.Op_LParen:
1864 n2 = self.ParseSubshell()
1865 n2.redirects = self._ParseRedirectList()
1866 return n2
1867
1868 if self.c_id == Id.KW_For:
1869 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1870 # redirects, but YSH for doesn't.
1871 return self.ParseFor()
1872 if self.c_id in (Id.KW_While, Id.KW_Until):
1873 keyword = word_.AsKeywordToken(self.cur_word)
1874 n3 = self.ParseWhileUntil(keyword)
1875 n3.redirects = self._ParseRedirectList()
1876 return n3
1877
1878 if self.c_id == Id.KW_If:
1879 n4 = self.ParseIf()
1880 n4.redirects = self._ParseRedirectList()
1881 return n4
1882 if self.c_id == Id.KW_Case:
1883 n5 = self.ParseCase()
1884 n5.redirects = self._ParseRedirectList()
1885 return n5
1886
1887 if self.c_id == Id.KW_DLeftBracket:
1888 n6 = self.ParseDBracket()
1889 n6.redirects = self._ParseRedirectList()
1890 return n6
1891 if self.c_id == Id.Op_DLeftParen:
1892 if not self.parse_opts.parse_dparen():
1893 p_die('You may want a space between parens (parse_dparen)',
1894 loc.Word(self.cur_word))
1895 n7 = self.ParseDParen()
1896 n7.redirects = self._ParseRedirectList()
1897 return n7
1898
1899 # bash extensions: no redirects
1900 if self.c_id == Id.KW_Time:
1901 return self.ParseTime()
1902
1903 # Happens in function body, e.g. myfunc() oops
1904 p_die('Unexpected word while parsing compound command',
1905 loc.Word(self.cur_word))
1906 assert False # for MyPy
1907
1908 def ParseFunctionDef(self):
1909 # type: () -> command.ShFunction
1910 """
1911 function_header : fname '(' ')'
1912 function_def : function_header newline_ok function_body ;
1913
1914 Precondition: Looking at the function name.
1915
1916 NOTE: There is an ambiguity with:
1917
1918 function foo ( echo hi ) and
1919 function foo () ( echo hi )
1920
1921 Bash only accepts the latter, though it doesn't really follow a grammar.
1922 """
1923 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
1924 name = word_.ShFunctionName(word0)
1925 if len(name) == 0: # example: foo$x is invalid
1926 p_die('Invalid function name', loc.Word(word0))
1927
1928 part0 = word0.parts[0]
1929 # If we got a non-empty string from ShFunctionName, this should be true.
1930 assert part0.tag() == word_part_e.Literal
1931 blame_tok = cast(Token, part0) # for ctx_VarChecker
1932
1933 self._SetNext() # move past function name
1934
1935 # Must be true because of lookahead
1936 self._GetWord()
1937 assert self.c_id == Id.Op_LParen, self.cur_word
1938
1939 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
1940 self._SetNext()
1941
1942 self._GetWord()
1943 if self.c_id == Id.Right_ShFunction:
1944 # 'f ()' implies a function definition, since invoking it with no args
1945 # would just be 'f'
1946 self._SetNext()
1947
1948 self._NewlineOk()
1949
1950 func = command.ShFunction.CreateNull()
1951 func.name = name
1952 with ctx_VarChecker(self.var_checker, blame_tok):
1953 func.body = self.ParseCompoundCommand()
1954
1955 func.name_tok = location.LeftTokenForCompoundWord(word0)
1956 return func
1957 else:
1958 p_die('Expected ) in function definition', loc.Word(self.cur_word))
1959 return None
1960
1961 def ParseKshFunctionDef(self):
1962 # type: () -> command.ShFunction
1963 """
1964 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
1965 """
1966 keyword_tok = word_.AsKeywordToken(self.cur_word)
1967
1968 self._SetNext() # skip past 'function'
1969 self._GetWord()
1970
1971 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
1972 name = word_.ShFunctionName(cur_word)
1973 if len(name) == 0: # example: foo$x is invalid
1974 p_die('Invalid KSH-style function name', loc.Word(cur_word))
1975
1976 name_word = self.cur_word
1977 self._SetNext() # skip past 'function name
1978
1979 self._GetWord()
1980 if self.c_id == Id.Op_LParen:
1981 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
1982 self._SetNext()
1983 self._Eat(Id.Right_ShFunction)
1984
1985 self._NewlineOk()
1986
1987 func = command.ShFunction.CreateNull()
1988 func.name = name
1989 with ctx_VarChecker(self.var_checker, keyword_tok):
1990 func.body = self.ParseCompoundCommand()
1991
1992 func.keyword = keyword_tok
1993 func.name_tok = location.LeftTokenForWord(name_word)
1994 return func
1995
1996 def ParseYshProc(self):
1997 # type: () -> command.Proc
1998 node = command.Proc.CreateNull(alloc_lists=True)
1999
2000 keyword_tok = word_.AsKeywordToken(self.cur_word)
2001 node.keyword = keyword_tok
2002
2003 with ctx_VarChecker(self.var_checker, keyword_tok):
2004 with ctx_CmdMode(self, cmd_mode_e.Proc):
2005 self.w_parser.ParseProc(node)
2006 if node.sig.tag() == proc_sig_e.Closed: # Register params
2007 sig = cast(proc_sig.Closed, node.sig)
2008
2009 # Treat params as variables.
2010 for param in sig.word_params:
2011 # TODO: Check() should not look at tval
2012 name_tok = param.blame_tok
2013 self.var_checker.Check(Id.KW_Var, name_tok)
2014 if sig.rest_of_words:
2015 name_tok = sig.rest_of_words.blame_tok
2016 self.var_checker.Check(Id.KW_Var, name_tok)
2017 # We COULD register __out here but it would require a different API.
2018 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2019 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2020
2021 self._SetNext()
2022 node.body = self.ParseBraceGroup()
2023 # No redirects for YSH procs (only at call site)
2024
2025 return node
2026
2027 def ParseYshFunc(self):
2028 # type: () -> command.Func
2029 """
2030 ysh_func: KW_Func Expr_Name '(' [func_params] [';' func_params] ')' brace_group
2031
2032 Looking at KW_Func
2033 """
2034 node = command.Func.CreateNull(alloc_lists=True)
2035
2036 keyword_tok = word_.AsKeywordToken(self.cur_word)
2037 node.keyword = keyword_tok
2038
2039 with ctx_VarChecker(self.var_checker, keyword_tok):
2040 self.parse_ctx.ParseFunc(self.lexer, node)
2041
2042 for param in node.pos_params:
2043 name_tok = param.blame_tok
2044 self.var_checker.Check(Id.KW_Var, name_tok)
2045 if node.rest_of_pos:
2046 name_tok = node.rest_of_pos.blame_tok
2047 self.var_checker.Check(Id.KW_Var, name_tok)
2048
2049 self._SetNext()
2050 with ctx_CmdMode(self, cmd_mode_e.Func):
2051 node.body = self.ParseBraceGroup()
2052
2053 return node
2054
2055 def ParseCoproc(self):
2056 # type: () -> command_t
2057 """
2058 TODO: command.Coproc?
2059 """
2060 raise NotImplementedError()
2061
2062 def ParseSubshell(self):
2063 # type: () -> command.Subshell
2064 """
2065 subshell : '(' compound_list ')'
2066
2067 Looking at Op_LParen
2068 """
2069 left = word_.AsOperatorToken(self.cur_word)
2070 self._SetNext() # skip past (
2071
2072 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2073 # translation stack, we want to delay it.
2074
2075 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2076
2077 c_list = self._ParseCommandList()
2078 if len(c_list.children) == 1:
2079 child = c_list.children[0]
2080 else:
2081 child = c_list
2082
2083 ate = self._Eat(Id.Right_Subshell)
2084 right = word_.AsOperatorToken(ate)
2085
2086 return command.Subshell(left, child, right, None) # no redirects yet
2087
2088 def ParseDBracket(self):
2089 # type: () -> command.DBracket
2090 """Pass the underlying word parser off to the boolean expression
2091 parser."""
2092 left = word_.AsKeywordToken(self.cur_word)
2093 # TODO: Test interactive. Without closing ]], you should get > prompt
2094 # (PS2)
2095
2096 self._SetNext() # skip [[
2097 b_parser = bool_parse.BoolParser(self.w_parser)
2098 bnode, right = b_parser.Parse() # May raise
2099 return command.DBracket(left, bnode, right, None) # no redirects yet
2100
2101 def ParseDParen(self):
2102 # type: () -> command.DParen
2103 left = word_.AsOperatorToken(self.cur_word)
2104
2105 self._SetNext() # skip ((
2106 anode, right = self.w_parser.ReadDParen()
2107 assert anode is not None
2108
2109 return command.DParen(left, anode, right, None) # no redirects yet
2110
2111 def ParseCommand(self):
2112 # type: () -> command_t
2113 """
2114 command : simple_command
2115 | compound_command # OSH edit: io_redirect* folded in
2116 | function_def
2117 | ksh_function_def
2118
2119 # YSH extensions
2120 | proc NAME ...
2121 | const ...
2122 | var ...
2123 | setglobal ...
2124 | setref ...
2125 | setvar ...
2126 | _ EXPR
2127 | = EXPR
2128 ;
2129
2130 Note: the reason const / var are not part of compound_command is because
2131 they can't be alone in a shell function body.
2132
2133 Example:
2134 This is valid shell f() if true; then echo hi; fi
2135 This is invalid f() var x = 1
2136 """
2137 if self._AtSecondaryKeyword():
2138 p_die('Unexpected word when parsing command',
2139 loc.Word(self.cur_word))
2140
2141 # YSH Extensions
2142
2143 if self.c_id == Id.KW_Proc: # proc p { ... }
2144 # proc is hidden because of the 'local reasoning' principle
2145 # Code inside procs should be YSH, full stop. That means oil:upgrade is
2146 # on.
2147 if self.parse_opts.parse_proc():
2148 return self.ParseYshProc()
2149
2150 # Otherwise silently pass. This is to support scripts like:
2151 # $ bash -c 'proc() { echo p; }; proc'
2152
2153 if self.c_id == Id.KW_Func: # func f(x) { ... }
2154 if self.parse_opts.parse_func() and not self.parse_opts.parse_tea():
2155 return self.ParseYshFunc()
2156
2157 # Otherwise silently pass, like for the procs.
2158
2159 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2160 keyword_id = self.c_id
2161 kw_token = word_.LiteralToken(self.cur_word)
2162 self._SetNext()
2163 n8 = self.w_parser.ParseVarDecl(kw_token)
2164 for lhs in n8.lhs:
2165 self.var_checker.Check(keyword_id, lhs.name)
2166 return n8
2167
2168 if self.c_id in (Id.KW_SetVar, Id.KW_SetRef, Id.KW_SetGlobal):
2169 kw_token = word_.LiteralToken(self.cur_word)
2170 self._SetNext()
2171 n9 = self.w_parser.ParsePlaceMutation(kw_token, self.var_checker)
2172 return n9
2173
2174 if self.c_id in (Id.Lit_Underscore, Id.Lit_Equals): # = 42 + 1
2175 keyword = word_.LiteralToken(self.cur_word)
2176 assert keyword is not None
2177 self._SetNext()
2178 enode = self.w_parser.ParseCommandExpr()
2179 return command.Expr(keyword, enode)
2180
2181 if self.c_id == Id.KW_Function:
2182 return self.ParseKshFunctionDef()
2183
2184 # Top-level keywords to hide: func, data, enum, class/mod. Not sure about
2185 # 'use'.
2186 if self.parse_opts.parse_tea():
2187 if self.c_id == Id.KW_Func:
2188 out0 = command.TeaFunc.CreateNull(alloc_lists=True)
2189 self.parse_ctx.ParseTeaFunc(self.lexer, out0)
2190 self._SetNext()
2191 return out0
2192 if self.c_id == Id.KW_Data:
2193 out1 = command.Data.CreateNull(alloc_lists=True)
2194 self.parse_ctx.ParseDataType(self.lexer, out1)
2195 self._SetNext()
2196 return out1
2197 if self.c_id == Id.KW_Enum:
2198 out2 = command.Enum.CreateNull(alloc_lists=True)
2199 self.parse_ctx.ParseEnum(self.lexer, out2)
2200 self._SetNext()
2201 return out2
2202 if self.c_id == Id.KW_Class:
2203 out3 = command.Class.CreateNull(alloc_lists=True)
2204 self.parse_ctx.ParseClass(self.lexer, out3)
2205 self._SetNext()
2206 return out3
2207 if self.c_id == Id.KW_Import:
2208 # Needs last_token because it ends with an optional thing?
2209 out4 = command.Import.CreateNull(alloc_lists=True)
2210 self.w_parser.ParseImport(out4)
2211 self._SetNext()
2212 return out4
2213
2214 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2215 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2216 Id.KW_If, Id.KW_Case, Id.KW_Time):
2217 return self.ParseCompoundCommand()
2218
2219 # Syntax error for '}' starting a line, which all shells disallow.
2220 if self.c_id == Id.Lit_RBrace:
2221 p_die('Unexpected right brace', loc.Word(self.cur_word))
2222
2223 if self.c_kind == Kind.Redir: # Leading redirect
2224 return self.ParseSimpleCommand()
2225
2226 if self.c_kind == Kind.Word:
2227 cur_word = cast(CompoundWord, self.cur_word) # ensured by Kind.Word
2228
2229 # NOTE: At the top level, only Token and Compound are possible.
2230 # Can this be modelled better in the type system, removing asserts?
2231 #
2232 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2233 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2234 # That requires 2 tokens of lookahead, which we don't have
2235 #
2236 # Or maybe we don't just have ParseSimpleCommand -- we will have
2237 # ParseYshCommand or something
2238
2239 if (self.w_parser.LookAheadFuncParens() and
2240 not word_.IsVarLike(cur_word)):
2241 return self.ParseFunctionDef() # f() { echo; } # function
2242
2243 # Parse x = 1+2*3 when inside HayNode { } blocks
2244 parts = cur_word.parts
2245 if self.parse_opts.parse_equals() and len(parts) == 1:
2246 part0 = parts[0]
2247 if part0.tag() == word_part_e.Literal:
2248 tok = cast(Token, part0)
2249 if (match.IsValidVarName(tok.tval) and
2250 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2251 assert tok.id == Id.Lit_Chars, tok
2252
2253 if len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]:
2254 # Note: no static var_checker.Check() for bare assignment
2255 enode = self.w_parser.ParseBareDecl()
2256 self._SetNext() # Somehow this is necessary
2257 # TODO: Use BareDecl here. Well, do that when we
2258 # treat it as const or lazy.
2259 return command.VarDecl(None, [NameType(tok, None)],
2260 enode)
2261 else:
2262 self._SetNext()
2263 self._GetWord()
2264 p_die(
2265 'Unexpected = (Hint: use var/setvar, or quote it)',
2266 loc.Word(self.cur_word))
2267
2268 # echo foo
2269 # f=(a b c) # array
2270 # array[1+2]+=1
2271 return self.ParseSimpleCommand()
2272
2273 if self.c_kind == Kind.Eof:
2274 p_die("Unexpected EOF while parsing command",
2275 loc.Word(self.cur_word))
2276
2277 # NOTE: This only happens in batch mode in the second turn of the loop!
2278 # e.g. )
2279 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2280
2281 assert False # for MyPy
2282
2283 def ParsePipeline(self):
2284 # type: () -> command_t
2285 """
2286 pipeline : Bang? command ( '|' newline_ok command )* ;
2287 """
2288 negated = None # type: Optional[Token]
2289
2290 self._GetWord()
2291 if self.c_id == Id.KW_Bang:
2292 negated = word_.AsKeywordToken(self.cur_word)
2293 self._SetNext()
2294
2295 child = self.ParseCommand()
2296 assert child is not None
2297
2298 children = [child]
2299
2300 self._GetWord()
2301 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2302 if negated is not None:
2303 node = command.Pipeline(negated, children, [])
2304 return node
2305 else:
2306 return child # no pipeline
2307
2308 # | or |&
2309 ops = [] # type: List[Token]
2310 while True:
2311 op = word_.AsOperatorToken(self.cur_word)
2312 ops.append(op)
2313
2314 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2315 self._NewlineOk()
2316
2317 child = self.ParseCommand()
2318 children.append(child)
2319
2320 self._GetWord()
2321 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2322 break
2323
2324 return command.Pipeline(negated, children, ops)
2325
2326 def ParseAndOr(self):
2327 # type: () -> command_t
2328 self._GetWord()
2329 if self.c_id == Id.Word_Compound:
2330 first_word_tok = word_.LiteralToken(self.cur_word)
2331 if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2332 # We got '...', so parse in multiline mode
2333 self._SetNext()
2334 with word_.ctx_Multiline(self.w_parser):
2335 return self._ParseAndOr()
2336
2337 # Parse in normal mode, not multiline
2338 return self._ParseAndOr()
2339
2340 def _ParseAndOr(self):
2341 # type: () -> command_t
2342 """
2343 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2344 | pipeline
2345
2346 Note that it is left recursive and left associative. We parse it
2347 iteratively with a token of lookahead.
2348 """
2349 child = self.ParsePipeline()
2350 assert child is not None
2351
2352 self._GetWord()
2353 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2354 return child
2355
2356 ops = [] # type: List[Token]
2357 children = [child]
2358
2359 while True:
2360 ops.append(word_.AsOperatorToken(self.cur_word))
2361
2362 self._SetNext() # skip past || &&
2363 self._NewlineOk()
2364
2365 child = self.ParsePipeline()
2366 children.append(child)
2367
2368 self._GetWord()
2369 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2370 break
2371
2372 return command.AndOr(children, ops)
2373
2374 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2375
2376 # At the top level, we execute after every line, e.g. to
2377 # - process alias (a form of dynamic parsing)
2378 # - process 'exit', because invalid syntax might appear after it
2379
2380 # On the other hand, for a while loop body, we parse the whole thing at once,
2381 # and then execute it. We don't want to parse it over and over again!
2382
2383 # COMPARE
2384 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2385 # command_term : and_or (trailer and_or)* ; # CHILDREN
2386
2387 def _ParseCommandLine(self):
2388 # type: () -> command_t
2389 """
2390 command_line : and_or (sync_op and_or)* trailer? ;
2391 trailer : sync_op newline_ok
2392 | NEWLINES;
2393 sync_op : '&' | ';';
2394
2395 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2396 there is another command word after the sync op.
2397
2398 But it's easier to express imperatively. Do the following in a loop:
2399 1. ParseAndOr
2400 2. Peek.
2401 a. If there's a newline, then return. (We're only parsing a single
2402 line.)
2403 b. If there's a sync_op, process it. Then look for a newline and
2404 return. Otherwise, parse another AndOr.
2405 """
2406 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2407 # I don't think we should add anything else here; otherwise it will be
2408 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2409 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2410
2411 children = [] # type: List[command_t]
2412 done = False
2413 while not done:
2414 child = self.ParseAndOr()
2415
2416 self._GetWord()
2417 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2418 tok = cast(Token, self.cur_word) # for MyPy
2419 child = command.Sentence(child, tok)
2420 self._SetNext()
2421
2422 self._GetWord()
2423 if self.c_id in END_LIST:
2424 done = True
2425
2426 elif self.c_id in END_LIST:
2427 done = True
2428
2429 else:
2430 # e.g. echo a(b)
2431 p_die('Invalid word while parsing command line',
2432 loc.Word(self.cur_word))
2433
2434 children.append(child)
2435
2436 # Simplify the AST.
2437 if len(children) > 1:
2438 return command.CommandList(children)
2439 else:
2440 return children[0]
2441
2442 def _ParseCommandTerm(self):
2443 # type: () -> command.CommandList
2444 """"
2445 command_term : and_or (trailer and_or)* ;
2446 trailer : sync_op newline_ok
2447 | NEWLINES;
2448 sync_op : '&' | ';';
2449
2450 This is handled in imperative style, like _ParseCommandLine.
2451 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2452 which is slightly different. (HOW? Is it the DSEMI?)
2453
2454 Returns:
2455 syntax_asdl.command
2456 """
2457 # Token types that will end the command term.
2458 END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2459
2460 # NOTE: This is similar to _ParseCommandLine.
2461 #
2462 # - Why aren't we doing END_LIST in _ParseCommandLine?
2463 # - Because you will never be inside $() at the top level.
2464 # - We also know it will end in a newline. It can't end in "fi"!
2465 # - example: if true; then { echo hi; } fi
2466
2467 children = [] # type: List[command_t]
2468 done = False
2469 while not done:
2470 # Most keywords are valid "first words". But do/done/then do not BEGIN
2471 # commands, so they are not valid.
2472 if self._AtSecondaryKeyword():
2473 break
2474
2475 child = self.ParseAndOr()
2476
2477 self._GetWord()
2478 if self.c_id == Id.Op_Newline:
2479 self._SetNext()
2480
2481 self._GetWord()
2482 if self.c_id in END_LIST:
2483 done = True
2484
2485 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2486 tok = cast(Token, self.cur_word) # for MyPy
2487 child = command.Sentence(child, tok)
2488 self._SetNext()
2489
2490 self._GetWord()
2491 if self.c_id == Id.Op_Newline:
2492 self._SetNext() # skip over newline
2493
2494 # Test if we should keep going. There might be another command after
2495 # the semi and newline.
2496 self._GetWord()
2497 if self.c_id in END_LIST: # \n EOF
2498 done = True
2499
2500 elif self.c_id in END_LIST: # ; EOF
2501 done = True
2502
2503 elif self.c_id in END_LIST: # EOF
2504 done = True
2505
2506 # For if test -f foo; test -f bar {
2507 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2508 done = True
2509
2510 elif self.c_kind != Kind.Word:
2511 # e.g. f() { echo (( x )) ; }
2512 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2513
2514 #log("Invalid %s", self.cur_word)
2515 p_die("Invalid word while parsing command list",
2516 loc.Word(self.cur_word))
2517
2518 children.append(child)
2519
2520 return command.CommandList(children)
2521
2522 def _ParseCommandList(self):
2523 # type: () -> command.CommandList
2524 """
2525 command_list : newline_ok command_term trailer? ;
2526
2527 This one is called by all the compound commands. It's basically a command
2528 block.
2529
2530 NOTE: Rather than translating the CFG directly, the code follows a style
2531 more like this: more like this: (and_or trailer)+. It makes capture
2532 easier.
2533 """
2534 self._NewlineOk()
2535 return self._ParseCommandTerm()
2536
2537 def ParseLogicalLine(self):
2538 # type: () -> command_t
2539 """Parse a single line for main_loop.
2540
2541 A wrapper around _ParseCommandLine(). Similar but not identical to
2542 _ParseCommandList() and ParseCommandSub().
2543
2544 Raises:
2545 ParseError
2546 """
2547 self._NewlineOk()
2548 self._GetWord()
2549 if self.c_id == Id.Eof_Real:
2550 return None # main loop checks for here docs
2551 node = self._ParseCommandLine()
2552 return node
2553
2554 def ParseInteractiveLine(self):
2555 # type: () -> parse_result_t
2556 """Parse a single line for Interactive main_loop.
2557
2558 Different from ParseLogicalLine because newlines are handled differently.
2559
2560 Raises:
2561 ParseError
2562 """
2563 self._GetWord()
2564 if self.c_id == Id.Op_Newline:
2565 return parse_result.EmptyLine
2566 if self.c_id == Id.Eof_Real:
2567 return parse_result.Eof
2568
2569 node = self._ParseCommandLine()
2570 return parse_result.Node(node)
2571
2572 def ParseCommandSub(self):
2573 # type: () -> command_t
2574 """Parse $(echo hi) and `echo hi` for word_parse.py.
2575
2576 They can have multiple lines, like this: echo $( echo one echo
2577 two )
2578 """
2579 self._NewlineOk()
2580
2581 self._GetWord()
2582 if self.c_kind == Kind.Eof: # e.g. $()
2583 return command.NoOp
2584
2585 c_list = self._ParseCommandTerm()
2586 if len(c_list.children) == 1:
2587 return c_list.children[0]
2588 else:
2589 return c_list
2590
2591 def CheckForPendingHereDocs(self):
2592 # type: () -> None
2593 # NOTE: This happens when there is no newline at the end of a file, like
2594 # osh -c 'cat <<EOF'
2595 if len(self.pending_here_docs):
2596 node = self.pending_here_docs[0] # Just show the first one?
2597 h = cast(redir_param.HereDoc, node.arg)
2598 p_die('Unterminated here doc began here', loc.Word(h.here_begin))