osh/cmd_parse.py

OILS / osh / cmd_parse.py

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	BlockArg,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs_expr,
47	sh_lhs_expr_t,
48	AssignPair,
49	EnvPair,
50	assign_op_e,
51	NameType,
52	proc_sig,
53	proc_sig_e,
54	)
55	from core import alloc
56	from core import error
57	from core.error import p_die
58	from core import ui
59	from frontend import consts
60	from frontend import lexer
61	from frontend import location
62	from frontend import match
63	from frontend import reader
64	from mycpp.mylib import log
65	from osh import braces
66	from osh import bool_parse
67	from osh import word_
68
69	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
70	if TYPE_CHECKING:
71	from core.alloc import Arena
72	from core import optview
73	from frontend.lexer import Lexer
74	from frontend.parse_lib import ParseContext, AliasesInFlight
75	from frontend.reader import _Reader
76	from osh.word_parse import WordParser
77
78	_ = Kind_str # for debug prints
79
80	TAB_CH = 9 # ord('\t')
81	SPACE_CH = 32 # ord(' ')
82
83
84	def _ReadHereLines(
85	line_reader, # type: _Reader
86	h, # type: Redir
87	delimiter, # type: str
88	):
89	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
90	# NOTE: We read all lines at once, instead of parsing line-by-line,
91	# because of cases like this:
92	# cat <<EOF
93	# 1 $(echo 2
94	# echo 3) 4
95	# EOF
96	here_lines = [] # type: List[Tuple[SourceLine, int]]
97	last_line = None # type: Tuple[SourceLine, int]
98	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
99
100	while True:
101	src_line, unused_offset = line_reader.GetLine()
102
103	if src_line is None: # EOF
104	# An unterminated here doc is just a warning in bash. We make it
105	# fatal because we want to be strict, and because it causes problems
106	# reporting other errors.
107	# Attribute it to the << in <<EOF for now.
108	p_die("Couldn't find terminator for here doc that starts here",
109	h.op)
110
111	assert len(src_line.content) != 0 # None should be the empty line
112
113	line = src_line.content
114
115	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
116	# the first tab.
117	start_offset = 0
118	if strip_leading_tabs:
119	n = len(line)
120	i = 0 # used after loop exit
121	while i < n:
122	if line[i] != '\t':
123	break
124	i += 1
125	start_offset = i
126
127	if line[start_offset:].rstrip() == delimiter:
128	last_line = (src_line, start_offset)
129	break
130
131	here_lines.append((src_line, start_offset))
132
133	return here_lines, last_line
134
135
136	def _MakeLiteralHereLines(
137	here_lines, # type: List[Tuple[SourceLine, int]]
138	arena, # type: Arena
139	):
140	# type: (...) -> List[word_part_t] # less precise because List is invariant type
141	"""Create a line_span and a token for each line."""
142	tokens = [] # type: List[Token]
143	for src_line, start_offset in here_lines:
144	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
145	src_line, src_line.content[start_offset:])
146	tokens.append(t)
147	parts = [cast(word_part_t, t) for t in tokens]
148	return parts
149
150
151	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
152	# type: (ParseContext, Redir, _Reader, Arena) -> None
153	"""Fill in attributes of a pending here doc node."""
154	h = cast(redir_param.HereDoc, r.arg)
155	# "If any character in word is quoted, the delimiter shall be formed by
156	# performing quote removal on word, and the here-document lines shall not
157	# be expanded. Otherwise, the delimiter shall be the word itself."
158	# NOTE: \EOF counts, or even E\OF
159	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
160	if not ok:
161	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
162
163	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
164
165	if delim_quoted: # << 'EOF'
166	# Literal for each line.
167	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena)
168	else:
169	line_reader = reader.VirtualLineReader(here_lines, arena)
170	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
171	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
172
173	end_line, end_pos = last_line
174
175	# Create a Token with the end terminator. Maintains the invariant that the
176	# tokens "add up".
177	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, end_pos,
178	len(end_line.content), end_line, '')
179
180
181	def _MakeAssignPair(parse_ctx, preparsed, arena):
182	# type: (ParseContext, PreParsedItem, Arena) -> AssignPair
183	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
184
185	left_token, close_token, part_offset, w = preparsed
186
187	if left_token.id == Id.Lit_VarLike: # s=1
188	if lexer.IsPlusEquals(left_token):
189	var_name = lexer.TokenSliceRight(left_token, -2)
190	op = assign_op_e.PlusEqual
191	else:
192	var_name = lexer.TokenSliceRight(left_token, -1)
193	op = assign_op_e.Equal
194
195	tmp = sh_lhs_expr.Name(left_token, var_name)
196
197	lhs = cast(sh_lhs_expr_t, tmp)
198
199	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.one_pass_parse:
200	var_name = lexer.TokenSliceRight(left_token, -1)
201	if lexer.IsPlusEquals(close_token):
202	op = assign_op_e.PlusEqual
203	else:
204	op = assign_op_e.Equal
205
206	assert left_token.line == close_token.line, \
207	'%s and %s not on same line' % (left_token, close_token)
208
209	left_pos = left_token.col + left_token.length
210	index_str = left_token.line.content[left_pos:close_token.col]
211	lhs = sh_lhs_expr.UnparsedIndex(left_token, var_name, index_str)
212
213	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
214	var_name = lexer.TokenSliceRight(left_token, -1)
215	if lexer.IsPlusEquals(close_token):
216	op = assign_op_e.PlusEqual
217	else:
218	op = assign_op_e.Equal
219
220	span1 = left_token
221	span2 = close_token
222	# Similar to SnipCodeString / SnipCodeBlock
223	if span1.line == span2.line:
224	# extract what's between brackets
225	code_str = span1.line.content[span1.col + span1.length:span2.col]
226	else:
227	raise NotImplementedError('%s != %s' % (span1.line, span2.line))
228	a_parser = parse_ctx.MakeArithParser(code_str)
229
230	# a[i+1]= is a place
231	src = source.Reparsed('array place', left_token, close_token)
232	with alloc.ctx_SourceCode(arena, src):
233	index_node = a_parser.Parse() # may raise error.Parse
234
235	tmp3 = sh_lhs_expr.IndexedName(left_token, var_name, index_node)
236
237	lhs = cast(sh_lhs_expr_t, tmp3)
238
239	else:
240	raise AssertionError()
241
242	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
243	n = len(w.parts)
244	if part_offset == n:
245	rhs = rhs_word.Empty # type: rhs_word_t
246	else:
247	# tmp2 is for intersection of C++/MyPy type systems
248	tmp2 = CompoundWord(w.parts[part_offset:])
249	word_.TildeDetectAssign(tmp2)
250	rhs = tmp2
251
252	return AssignPair(left_token, lhs, op, rhs)
253
254
255	def _AppendMoreEnv(preparsed_list, more_env):
256	# type: (PreParsedList, List[EnvPair]) -> None
257	"""Helper to modify a SimpleCommand node.
258
259	Args:
260	preparsed: a list of 4-tuples from DetectShAssignment
261	more_env: a list to append env_pairs to
262	"""
263	for left_token, _, part_offset, w in preparsed_list:
264	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
265	p_die("Environment binding shouldn't look like an array assignment",
266	left_token)
267
268	if lexer.IsPlusEquals(left_token):
269	p_die('Expected = in environment binding, got +=', left_token)
270
271	var_name = lexer.TokenSliceRight(left_token, -1)
272	n = len(w.parts)
273	if part_offset == n:
274	val = rhs_word.Empty # type: rhs_word_t
275	else:
276	val = CompoundWord(w.parts[part_offset:])
277
278	pair = EnvPair(left_token, var_name, val)
279	more_env.append(pair)
280
281
282	if TYPE_CHECKING:
283	PreParsedItem = Tuple[Token, Optional[Token], int, CompoundWord]
284	PreParsedList = List[PreParsedItem]
285
286
287	def _SplitSimpleCommandPrefix(words):
288	# type: (List[CompoundWord]) -> Tuple[PreParsedList, List[CompoundWord]]
289	"""Second pass of SimpleCommand parsing: look for assignment words."""
290	preparsed_list = [] # type: PreParsedList
291	suffix_words = [] # type: List[CompoundWord]
292
293	done_prefix = False
294	for w in words:
295	if done_prefix:
296	suffix_words.append(w)
297	continue
298
299	left_token, close_token, part_offset = word_.DetectShAssignment(w)
300	if left_token:
301	preparsed_list.append((left_token, close_token, part_offset, w))
302	else:
303	done_prefix = True
304	suffix_words.append(w)
305
306	return preparsed_list, suffix_words
307
308
309	def _MakeSimpleCommand(
310	preparsed_list, # type: PreParsedList
311	suffix_words, # type: List[CompoundWord]
312	redirects, # type: List[Redir]
313	typed_args, # type: Optional[ArgList]
314	block, # type: Optional[BlockArg]
315	):
316	# type: (...) -> command.Simple
317	"""Create an command.Simple node."""
318
319	# FOO=(1 2 3) ls is not allowed.
320	for _, _, _, w in preparsed_list:
321	if word_.HasArrayPart(w):
322	p_die("Environment bindings can't contain array literals",
323	loc.Word(w))
324
325	# NOTE: It would be possible to add this check back. But it already happens
326	# at runtime in EvalWordSequence2.
327	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
328	if 0:
329	for w in suffix_words:
330	if word_.HasArrayPart(w):
331	p_die("Commands can't contain array literals", loc.Word(w))
332
333	assert len(suffix_words) != 0
334	# {a,b,c} # Use { before brace detection
335	# ~/bin/ls # Use ~ before tilde detection
336	part0 = suffix_words[0].parts[0]
337	blame_tok = location.LeftTokenForWordPart(part0)
338
339	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
340	# can't implement bash's behavior of having say {~bob,~jane}/src work,
341	# because we only have a BracedTree.
342	# This is documented in spec/brace-expansion.
343	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
344	# doesn't seem worth it.
345	words2 = braces.BraceDetectAll(suffix_words)
346	words3 = word_.TildeDetectAll(words2)
347
348	more_env = [] # type: List[EnvPair]
349	_AppendMoreEnv(preparsed_list, more_env)
350
351	# do_fork by default
352	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
353	block, True)
354
355
356	class VarChecker(object):
357	"""Statically check for proc and variable usage errors."""
358
359	def __init__(self):
360	# type: () -> None
361	"""
362	Args:
363	oil_proc: Whether to disallow nested proc/function declarations
364	"""
365	# self.tokens for location info: 'proc' or another token
366	self.tokens = [] # type: List[Token]
367	self.names = [] # type: List[Dict[str, Id_t]]
368
369	def Push(self, blame_tok):
370	# type: (Token) -> None
371	"""Bash allows this, but it's confusing because it's the same as two
372	functions at the top level.
373
374	f() {
375	g() {
376	echo 'top level function defined in another one'
377	}
378	}
379
380	YSH disallows nested procs.
381	"""
382	if len(self.tokens) != 0:
383	if self.tokens[0].id == Id.KW_Proc or blame_tok.id == Id.KW_Proc:
384	p_die("procs and shell functions can't be nested", blame_tok)
385
386	self.tokens.append(blame_tok)
387	entry = {} # type: Dict[str, Id_t]
388	self.names.append(entry)
389
390	def Pop(self):
391	# type: () -> None
392	self.names.pop()
393	self.tokens.pop()
394
395	def Check(self, keyword_id, name_tok):
396	# type: (Id_t, Token) -> None
397	"""Check for errors in declaration and mutation errors.
398
399	var x, const x:
400	x already declared
401	setvar x:
402	x is not declared
403	x is constant
404	setglobal x:
405	No errors are possible; we would need all these many conditions to
406	statically know the names:
407	- no 'source'
408	- shopt -u copy_env.
409	- AND use lib has to be static
410	setref x:
411	Should only mutate out params
412
413	Also should p(:out) declare 'out' as well as '__out'? Then you can't have
414	local variables with the same name.
415	"""
416	# Don't check the global level! Semantics are different here!
417	if len(self.names) == 0:
418	return
419
420	top = self.names[-1]
421	name = name_tok.tval
422	if keyword_id in (Id.KW_Const, Id.KW_Var):
423	if name in top:
424	p_die('%r was already declared' % name, name_tok)
425	else:
426	top[name] = keyword_id
427
428	if keyword_id == Id.KW_SetVar:
429	if name not in top:
430	p_die("%r hasn't been declared" % name, name_tok)
431
432	if name in top and top[name] == Id.KW_Const:
433	p_die("Can't modify constant %r" % name, name_tok)
434
435	# TODO: setref should only mutate out params.
436
437
438	class ctx_VarChecker(object):
439
440	def __init__(self, var_checker, blame_tok):
441	# type: (VarChecker, Token) -> None
442	var_checker.Push(blame_tok)
443	self.var_checker = var_checker
444
445	def __enter__(self):
446	# type: () -> None
447	pass
448
449	def __exit__(self, type, value, traceback):
450	# type: (Any, Any, Any) -> None
451	self.var_checker.Pop()
452
453
454	class ctx_CmdMode(object):
455
456	def __init__(self, cmd_parse, new_cmd_mode):
457	# type: (CommandParser, cmd_mode_t) -> None
458	self.cmd_parse = cmd_parse
459	self.prev_cmd_mode = cmd_parse.cmd_mode
460	cmd_parse.cmd_mode = new_cmd_mode
461
462	def __enter__(self):
463	# type: () -> None
464	pass
465
466	def __exit__(self, type, value, traceback):
467	# type: (Any, Any, Any) -> None
468	self.cmd_parse.cmd_mode = self.prev_cmd_mode
469
470
471
472	SECONDARY_KEYWORDS = [
473	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
474	Id.KW_Esac
475	]
476
477
478	class CommandParser(object):
479	"""Recursive descent parser derived from POSIX shell grammar.
480
481	This is a BNF grammar:
482	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
483
484	- Augmented with both bash/OSH and YSH constructs.
485
486	- We use regex-like iteration rather than recursive references
487	? means optional (0 or 1)
488	* means 0 or more
489	+ means 1 or more
490
491	- Keywords are spelled in Caps:
492	If Elif Case
493
494	- Operator tokens are quoted:
495	'(' '\|'
496
497	or can be spelled directly if it matters:
498
499	Op_LParen Op_Pipe
500
501	- Non-terminals are snake_case:
502	brace_group subshell
503
504	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
505	the production should be in the method docstrings, e.g.
506
507	def ParseSubshell():
508	"
509	subshell : '(' compound_list ')'
510
511	Looking at Op_LParen # Comment to say how this method is called
512	"
513
514	The grammar may be factored to make parsing easier.
515	"""
516
517	def __init__(self,
518	parse_ctx,
519	parse_opts,
520	w_parser,
521	lexer,
522	line_reader,
523	eof_id=Id.Eof_Real):
524	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
525	self.parse_ctx = parse_ctx
526	self.aliases = parse_ctx.aliases # aliases to expand at parse time
527
528	self.parse_opts = parse_opts
529	self.w_parser = w_parser # type: WordParser # for normal parsing
530	self.lexer = lexer # for pushing hints, lookahead to (
531	self.line_reader = line_reader # for here docs
532	self.eof_id = eof_id
533
534	self.arena = line_reader.arena # for adding here doc and alias spans
535	self.aliases_in_flight = [] # type: AliasesInFlight
536
537	# A hacky boolean to remove 'if cd / {' ambiguity.
538	self.allow_block = True
539
540	# Stack of booleans for nested Attr and SHELL nodes.
541	# Attr nodes allow bare assignment x = 42, but not shell x=42.
542	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
543	# nodes, but x42 is still allowed.
544	#
545	# Note: this stack could be optimized by turning it into an integer and
546	# binary encoding.
547	self.hay_attrs_stack = [] # type: List[bool]
548
549	# Note: VarChecker is instantiated with each CommandParser, which means
550	# that two 'proc foo' -- inside a command sub and outside -- don't
551	# conflict, because they use different CommandParser instances. I think
552	# this OK but you can imagine different behaviors.
553	self.var_checker = VarChecker()
554
555	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
556
557	self.Reset()
558
559	# Init_() function for "keyword arg"
560	def Init_AliasesInFlight(self, aliases_in_flight):
561	# type: (AliasesInFlight) -> None
562	self.aliases_in_flight = aliases_in_flight
563
564	def Reset(self):
565	# type: () -> None
566	"""Reset our own internal state.
567
568	Called by the interactive loop.
569	"""
570	# Cursor state set by _GetWord()
571	self.next_lex_mode = lex_mode_e.ShCommand
572	self.cur_word = None # type: word_t # current word
573	self.c_kind = Kind.Undefined
574	self.c_id = Id.Undefined_Tok
575
576	self.pending_here_docs = [
577	] # type: List[Redir] # should have HereLiteral arg
578
579	def ResetInputObjects(self):
580	# type: () -> None
581	"""Reset the internal state of our inputs.
582
583	Called by the interactive loop.
584	"""
585	self.w_parser.Reset()
586	self.lexer.ResetInputObjects()
587	self.line_reader.Reset()
588
589	def _SetNext(self):
590	# type: () -> None
591	"""Call this when you no longer need the current token.
592
593	This method is lazy. A subsequent call to _GetWord() will
594	actually read the next Token.
595	"""
596	self.next_lex_mode = lex_mode_e.ShCommand
597
598	def _GetWord(self):
599	# type: () -> None
600	"""Call this when you need to make a decision based on Id or Kind.
601
602	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
603	self.c_id and self.c_kind.
604
605	Otherwise it does nothing.
606	"""
607	if self.next_lex_mode != lex_mode_e.Undefined:
608	w = self.w_parser.ReadWord(self.next_lex_mode)
609	#log("w %s", w)
610
611	# Here docs only happen in command mode, so other kinds of newlines don't
612	# count.
613	if w.tag() == word_e.Operator:
614	tok = cast(Token, w)
615	if tok.id == Id.Op_Newline:
616	for h in self.pending_here_docs:
617	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
618	self.arena)
619	del self.pending_here_docs[:] # No .clear() until Python 3.3.
620
621	self.cur_word = w
622
623	self.c_kind = word_.CommandKind(self.cur_word)
624	self.c_id = word_.CommandId(self.cur_word)
625	self.next_lex_mode = lex_mode_e.Undefined
626
627	def _Eat(self, c_id, msg=None):
628	# type: (Id_t, Optional[str]) -> word_t
629	"""Consume a word of a type, maybe showing a custom error message.
630
631	Args:
632	c_id: the Id we expected
633	msg: improved error message
634	"""
635	self._GetWord()
636	if self.c_id != c_id:
637	if msg is None:
638	msg = 'Expected word type %s, got %s' % (ui.PrettyId(c_id),
639	ui.PrettyId(self.c_id))
640	p_die(msg, loc.Word(self.cur_word))
641
642	skipped = self.cur_word
643	self._SetNext()
644	return skipped
645
646	def _NewlineOk(self):
647	# type: () -> None
648	"""Check for optional newline and consume it."""
649	self._GetWord()
650	if self.c_id == Id.Op_Newline:
651	self._SetNext()
652
653	def _AtSecondaryKeyword(self):
654	# type: () -> bool
655	self._GetWord()
656	if self.c_id in SECONDARY_KEYWORDS:
657	return True
658	return False
659
660	def ParseRedirect(self):
661	# type: () -> Redir
662	self._GetWord()
663	assert self.c_kind == Kind.Redir, self.cur_word
664	op_tok = cast(Token, self.cur_word) # for MyPy
665
666	# Note: the lexer could take distinguish between
667	# >out
668	# 3>out
669	# {fd}>out
670	#
671	# which would make the code below faster. But small string optimization
672	# would also speed it up, since redirects are small.
673
674	op_val = lexer.TokenVal(op_tok)
675	if op_val[0] == '{':
676	pos = op_val.find('}')
677	assert pos != -1 # lexer ensures this
678	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
679
680	elif op_val[0].isdigit():
681	pos = 1
682	if op_val[1].isdigit():
683	pos = 2
684	where = redir_loc.Fd(int(op_val[:pos]))
685
686	else:
687	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
688
689	self._SetNext()
690
691	self._GetWord()
692	# Other redirect
693	if self.c_kind != Kind.Word:
694	p_die('Invalid token after redirect operator',
695	loc.Word(self.cur_word))
696
697	# Here doc
698	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
699	arg = redir_param.HereDoc.CreateNull()
700	arg.here_begin = self.cur_word
701	arg.stdin_parts = []
702
703	r = Redir(op_tok, where, arg)
704
705	self.pending_here_docs.append(r) # will be filled on next newline.
706
707	self._SetNext()
708	return r
709
710	arg_word = self.cur_word
711	tilde = word_.TildeDetect(arg_word)
712	if tilde:
713	arg_word = tilde
714	self._SetNext()
715
716	# We should never get Empty, Token, etc.
717	assert arg_word.tag() == word_e.Compound, arg_word
718	return Redir(op_tok, where, cast(CompoundWord, arg_word))
719
720	def _ParseRedirectList(self):
721	# type: () -> List[Redir]
722	"""Try parsing any redirects at the cursor.
723
724	This is used for blocks only, not commands.
725	"""
726	redirects = [] # type: List[Redir]
727	while True:
728	# This prediction needs to ONLY accept redirect operators. Should we
729	# make them a separate Kind?
730	self._GetWord()
731	if self.c_kind != Kind.Redir:
732	break
733
734	node = self.ParseRedirect()
735	redirects.append(node)
736	self._SetNext()
737
738	return redirects
739
740	def _ScanSimpleCommand(self):
741	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[BlockArg]]
742	"""First pass: Split into redirects and words."""
743	redirects = [] # type: List[Redir]
744	words = [] # type: List[CompoundWord]
745	typed_args = None # type: Optional[ArgList]
746	block = None # type: Optional[BlockArg]
747
748	first_word_caps = False # does first word look like Caps, but not CAPS
749
750	i = 0
751	while True:
752	self._GetWord()
753	if self.c_kind == Kind.Redir:
754	node = self.ParseRedirect()
755	redirects.append(node)
756
757	elif self.c_kind == Kind.Word:
758	if self.parse_opts.parse_brace():
759	# Treat { and } more like operators
760	if self.c_id == Id.Lit_LBrace:
761	if self.allow_block: # Disabled for if/while condition, etc.
762
763	# allow x = 42
764	self.hay_attrs_stack.append(first_word_caps)
765	brace_group = self.ParseBraceGroup()
766
767	# So we can get the source code back later
768	lines = self.arena.SaveLinesAndDiscard(
769	brace_group.left, brace_group.right)
770	block = BlockArg(brace_group, lines)
771
772	self.hay_attrs_stack.pop()
773
774	if 0:
775	print('--')
776	block.PrettyPrint()
777	print('\n--')
778	break
779	elif self.c_id == Id.Lit_RBrace:
780	# Another thing: { echo hi }
781	# We're DONE!!!
782	break
783
784	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
785	words.append(w)
786	if i == 0:
787	ok, word_str, quoted = word_.StaticEval(w)
788	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
789	if (ok and len(word_str) and word_str[0].isupper() and
790	not word_str.isupper()):
791	first_word_caps = True
792	#log('W %s', word_str)
793
794	elif self.c_id == Id.Op_LParen:
795	# 1. Check that there's a preceding space
796	prev_byte = self.lexer.ByteLookBack()
797	if prev_byte not in (SPACE_CH, TAB_CH):
798	if self.parse_opts.parse_at():
799	p_die('Space required before (',
800	loc.Word(self.cur_word))
801	else:
802	# inline func call like @sorted(x) is invalid in OSH, but the
803	# solution isn't a space
804	p_die(
805	'Unexpected left paren (might need a space before it)',
806	loc.Word(self.cur_word))
807
808	# 2. Check that it's not (). We disallow this because it's a no-op and
809	# there could be confusion with shell func defs.
810	# For some reason we need to call lexer.LookPastSpace, not
811	# w_parser.LookPastSpace. I think this is because we're at (, which is
812	# an operator token. All the other cases are like 'x=', which is PART
813	# of a word, and we don't know if it will end.
814	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
815	if next_id == Id.Op_RParen:
816	p_die('Empty arg list not allowed', loc.Word(self.cur_word))
817
818	typed_args = self.w_parser.ParseProcCallArgs()
819
820	else:
821	break
822
823	self._SetNext()
824	i += 1
825	return redirects, words, typed_args, block
826
827	def _MaybeExpandAliases(self, words):
828	# type: (List[CompoundWord]) -> Optional[command_t]
829	"""Try to expand aliases.
830
831	Args:
832	words: A list of Compound
833
834	Returns:
835	A new LST node, or None.
836
837	Our implementation of alias has two design choices:
838	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
839	- What grammar rule to parse the expanded alias buffer with. In our case
840	it's ParseCommand().
841
842	This doesn't quite match what other shells do, but I can't figure out a
843	better places.
844
845	Most test cases pass, except for ones like:
846
847	alias LBRACE='{'
848	LBRACE echo one; echo two; }
849
850	alias MULTILINE='echo 1
851	echo 2
852	echo 3'
853	MULTILINE
854
855	NOTE: dash handles aliases in a totally different way. It has a global
856	variable checkkwd in parser.c. It assigns it all over the grammar, like
857	this:
858
859	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
860
861	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
862	lookupalias(). This seems to provide a consistent behavior among shells,
863	but it's less modular and testable.
864
865	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
866
867	Returns:
868	A command node if any aliases were expanded, or None otherwise.
869	"""
870	# Start a new list if there aren't any. This will be passed recursively
871	# through CommandParser instances.
872	aliases_in_flight = (self.aliases_in_flight
873	if len(self.aliases_in_flight) else [])
874
875	# for error message
876	first_word_str = None # type: Optional[str]
877	argv0_loc = loc.Word(words[0])
878
879	expanded = [] # type: List[str]
880	i = 0
881	n = len(words)
882
883	while i < n:
884	w = words[i]
885
886	ok, word_str, quoted = word_.StaticEval(w)
887	if not ok or quoted:
888	break
889
890	alias_exp = self.aliases.get(word_str)
891	if alias_exp is None:
892	break
893
894	# Prevent infinite loops. This is subtle: we want to prevent infinite
895	# expansion of alias echo='echo x'. But we don't want to prevent
896	# expansion of the second word in 'echo echo', so we add 'i' to
897	# "aliases_in_flight".
898	if (word_str, i) in aliases_in_flight:
899	break
900
901	if i == 0:
902	first_word_str = word_str # for error message
903
904	#log('%r -> %r', word_str, alias_exp)
905	aliases_in_flight.append((word_str, i))
906	expanded.append(alias_exp)
907	i += 1
908
909	if not alias_exp.endswith(' '):
910	# alias e='echo [ ' is the same expansion as
911	# alias e='echo ['
912	# The trailing space indicates whether we should continue to expand
913	# aliases; it's not part of it.
914	expanded.append(' ')
915	break # No more expansions
916
917	if len(expanded) == 0: # No expansions; caller does parsing.
918	return None
919
920	# We are expanding an alias, so copy the rest of the words and re-parse.
921	if i < n:
922	left_tok = location.LeftTokenForWord(words[i])
923	right_tok = location.RightTokenForWord(words[-1])
924
925	# OLD CONSTRAINT
926	#assert left_tok.line_id == right_tok.line_id
927
928	words_str = self.arena.SnipCodeString(left_tok, right_tok)
929	expanded.append(words_str)
930
931	code_str = ''.join(expanded)
932
933	# TODO:
934	# Aliases break static parsing (like backticks), so use our own Arena.
935	# This matters for Hay, which calls SaveLinesAndDiscard().
936	# arena = alloc.Arena()
937	arena = self.arena
938
939	line_reader = reader.StringLineReader(code_str, arena)
940	cp = self.parse_ctx.MakeOshParser(line_reader)
941	cp.Init_AliasesInFlight(aliases_in_flight)
942
943	# break circular dep
944	from frontend import parse_lib
945
946	# The interaction between COMPLETION and ALIASES requires special care.
947	# See docstring of BeginAliasExpansion() in parse_lib.py.
948	src = source.Alias(first_word_str, argv0_loc)
949	with alloc.ctx_SourceCode(arena, src):
950	with parse_lib.ctx_Alias(self.parse_ctx.trail):
951	try:
952	# _ParseCommandTerm() handles multiline commands, compound commands, etc.
953	# as opposed to ParseLogicalLine()
954	node = cp._ParseCommandTerm()
955	except error.Parse as e:
956	# Failure to parse alias expansion is a fatal error
957	# We don't need more handling here/
958	raise
959
960	if 0:
961	log('AFTER expansion:')
962	node.PrettyPrint()
963
964	return node
965
966	def ParseSimpleCommand(self):
967	# type: () -> command_t
968	"""Fixed transcription of the POSIX grammar (TODO: port to
969	grammar/Shell.g)
970
971	io_file : '<' filename
972	\| LESSAND filename
973	...
974
975	io_here : DLESS here_end
976	\| DLESSDASH here_end
977
978	redirect : IO_NUMBER (io_redirect \| io_here)
979
980	prefix_part : ASSIGNMENT_WORD \| redirect
981	cmd_part : WORD \| redirect
982
983	assign_kw : Declare \| Export \| Local \| Readonly
984
985	# Without any words it is parsed as a command, not an assignment
986	assign_listing : assign_kw
987
988	# Now we have something to do (might be changing assignment flags too)
989	# NOTE: any prefixes should be a warning, but they are allowed in shell.
990	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
991
992	# an external command, a function call, or a builtin -- a "word_command"
993	word_command : prefix_part* cmd_part+
994
995	simple_command : assign_listing
996	\| assignment
997	\| proc_command
998
999	Simple imperative algorithm:
1000
1001	1) Read a list of words and redirects. Append them to separate lists.
1002	2) Look for the first non-assignment word. If it's declare, etc., then
1003	keep parsing words AND assign words. Otherwise, just parse words.
1004	3) If there are no non-assignment words, then it's a global assignment.
1005
1006	{ redirects, global assignments } OR
1007	{ redirects, prefix_bindings, words } OR
1008	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1009
1010	THEN CHECK that prefix bindings don't have any array literal parts!
1011	global assignment and keyword assignments can have the of course.
1012	well actually EXPORT shouldn't have them either -- WARNING
1013
1014	3 cases we want to warn: prefix_bindings for assignment, and array literal
1015	in prefix bindings, or export
1016
1017	A command can be an assignment word, word, or redirect on its own.
1018
1019	ls
1020	>out.txt
1021
1022	>out.txt FOO=bar # this touches the file
1023
1024	Or any sequence:
1025	ls foo bar
1026	<in.txt ls foo bar >out.txt
1027	<in.txt ls >out.txt foo bar
1028
1029	Or add one or more environment bindings:
1030	VAR=val env
1031	>out.txt VAR=val env
1032
1033	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1034	<<EOF vs <<'EOF'.
1035	"""
1036	redirects, words, typed_args, block = self._ScanSimpleCommand()
1037
1038	typed_loc = None # type: Optional[Token]
1039	if block:
1040	typed_loc = block.brace_group.left
1041	if typed_args:
1042	typed_loc = typed_args.left # preferred over block location
1043
1044	if len(words) == 0: # e.g. >out.txt # redirect without words
1045	assert len(redirects) != 0
1046	if typed_loc is not None:
1047	p_die("Unexpected typed args", typed_loc)
1048
1049	simple = command.Simple.CreateNull()
1050	simple.blame_tok = redirects[0].op
1051	simple.more_env = []
1052	simple.words = []
1053	simple.redirects = redirects
1054	return simple
1055
1056	# Disallow =a because it's confusing
1057	part0 = words[0].parts[0]
1058	if part0.tag() == word_part_e.Literal:
1059	tok = cast(Token, part0)
1060	if tok.id == Id.Lit_Equals:
1061	p_die(
1062	"=word isn't allowed. Hint: either quote it or add a space after =\n"
1063	"to pretty print an expression", tok)
1064
1065	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1066	if len(preparsed_list):
1067	left_token, _, _, _ = preparsed_list[0]
1068
1069	# Disallow X=Y inside proc and func
1070	# and inside Hay Attr blocks
1071	# But allow X=Y at the top level
1072	# for interactive use foo=bar
1073	# for global constants GLOBAL=~/src
1074	# because YSH assignment doesn't have tilde sub
1075	if len(suffix_words) == 0:
1076	if self.cmd_mode != cmd_mode_e.Shell or (
1077	len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]):
1078	p_die('Use var/setvar to assign in YSH', left_token)
1079
1080	# Set a reference to words and redirects for completion. We want to
1081	# inspect this state after a failed parse.
1082	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1083
1084	if len(suffix_words) == 0:
1085	if typed_loc is not None:
1086	p_die("Unexpected typed args", typed_loc)
1087
1088	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1089	pairs = [] # type: List[AssignPair]
1090	for preparsed in preparsed_list:
1091	pairs.append(
1092	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1093
1094	left_tok = location.LeftTokenForCompoundWord(words[0])
1095	return command.ShAssignment(left_tok, pairs, redirects)
1096
1097	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1098
1099	if kind == Kind.ControlFlow:
1100	if kw_token.id == Id.ControlFlow_Return:
1101	# return x - inside procs and shell functions
1102	# return (x) - inside funcs
1103	if typed_args is None:
1104	if self.cmd_mode not in (cmd_mode_e.Shell, cmd_mode_e.Proc):
1105	p_die('Shell-style returns not allowed here', kw_token)
1106	else:
1107	if self.cmd_mode != cmd_mode_e.Func:
1108	p_die('Typed return is only allowed inside func',
1109	typed_loc)
1110	if len(typed_args.pos_args) != 1:
1111	p_die("Typed return expects one argument", typed_loc)
1112	if len(typed_args.named_args) != 0:
1113	p_die("Typed return doesn't take named arguments",
1114	typed_loc)
1115	return command.Retval(kw_token, typed_args.pos_args[0])
1116
1117	if typed_loc is not None:
1118	p_die("Unexpected typed args", typed_loc)
1119	if not self.parse_opts.parse_ignored() and len(redirects):
1120	p_die("Control flow shouldn't have redirects", kw_token)
1121
1122	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1123	# TODO: Change location as above
1124	left_token, _, _, _ = preparsed_list[0]
1125	p_die("Control flow shouldn't have environment bindings",
1126	left_token)
1127
1128	# Attach the token for errors. (ShAssignment may not need it.)
1129	if len(suffix_words) == 1:
1130	arg_word = None # type: Optional[word_t]
1131	elif len(suffix_words) == 2:
1132	arg_word = suffix_words[1]
1133	else:
1134	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1135	loc.Word(suffix_words[2]))
1136
1137	return command.ControlFlow(kw_token, arg_word)
1138
1139	# Alias expansion only understands words, not typed args ( ) or block { }
1140	if not typed_args and not block and self.parse_opts.expand_aliases():
1141	# If any expansions were detected, then parse again.
1142	expanded_node = self._MaybeExpandAliases(suffix_words)
1143	if expanded_node:
1144	# Attach env bindings and redirects to the expanded node.
1145	more_env = [] # type: List[EnvPair]
1146	_AppendMoreEnv(preparsed_list, more_env)
1147	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1148	return exp
1149
1150	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1151
1152	# FOO=bar printenv.py FOO
1153	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1154	typed_args, block)
1155	return node
1156
1157	def ParseBraceGroup(self):
1158	# type: () -> BraceGroup
1159	"""
1160	Original:
1161	brace_group : LBrace command_list RBrace ;
1162
1163	YSH:
1164	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1165
1166	The doc comment can only occur if there's a newline.
1167	"""
1168	ate = self._Eat(Id.Lit_LBrace)
1169	left = word_.BraceToken(ate)
1170
1171	doc_token = None # type: Token
1172	self._GetWord()
1173	if self.c_id == Id.Op_Newline:
1174	self._SetNext()
1175	with word_.ctx_EmitDocToken(self.w_parser):
1176	self._GetWord()
1177
1178	if self.c_id == Id.Ignored_Comment:
1179	doc_token = cast(Token, self.cur_word)
1180	self._SetNext()
1181
1182	c_list = self._ParseCommandList()
1183
1184	ate = self._Eat(Id.Lit_RBrace)
1185	right = word_.BraceToken(ate)
1186
1187	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1188	# would allow us to revert this back to None, which was changed in
1189	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1190	# behavior saves allocations, but is less type safe.
1191	return BraceGroup(left, doc_token, c_list.children, [],
1192	right) # no redirects yet
1193
1194	def ParseDoGroup(self):
1195	# type: () -> command.DoGroup
1196	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1197
1198	do_group : Do command_list Done ; /* Apply rule 6 */
1199	"""
1200	ate = self._Eat(Id.KW_Do)
1201	do_kw = word_.AsKeywordToken(ate)
1202
1203	c_list = self._ParseCommandList() # could be anything
1204
1205	ate = self._Eat(Id.KW_Done)
1206	done_kw = word_.AsKeywordToken(ate)
1207
1208	return command.DoGroup(do_kw, c_list.children, done_kw)
1209
1210	def ParseForWords(self):
1211	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1212	"""
1213	for_words : WORD* for_sep
1214	;
1215	for_sep : ';' newline_ok
1216	\| NEWLINES
1217	;
1218	"""
1219	words = [] # type: List[CompoundWord]
1220	# The span_id of any semi-colon, so we can remove it.
1221	semi_tok = None # type: Optional[Token]
1222
1223	while True:
1224	self._GetWord()
1225	if self.c_id == Id.Op_Semi:
1226	tok = cast(Token, self.cur_word)
1227	semi_tok = tok
1228	self._SetNext()
1229	self._NewlineOk()
1230	break
1231	elif self.c_id == Id.Op_Newline:
1232	self._SetNext()
1233	break
1234	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1235	break
1236
1237	if self.cur_word.tag() != word_e.Compound:
1238	# TODO: Can we also show a pointer to the 'for' keyword?
1239	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1240
1241	w2 = cast(CompoundWord, self.cur_word)
1242	words.append(w2)
1243	self._SetNext()
1244	return words, semi_tok
1245
1246	def _ParseForExprLoop(self, for_kw):
1247	# type: (Token) -> command.ForExpr
1248	"""
1249	Shell:
1250	for '((' init ';' cond ';' update '))' for_sep? do_group
1251
1252	YSH:
1253	for '((' init ';' cond ';' update '))' for_sep? brace_group
1254	"""
1255	node = self.w_parser.ReadForExpression()
1256	node.keyword = for_kw
1257
1258	self._SetNext()
1259
1260	self._GetWord()
1261	if self.c_id == Id.Op_Semi:
1262	self._SetNext()
1263	self._NewlineOk()
1264	elif self.c_id == Id.Op_Newline:
1265	self._SetNext()
1266	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1267	pass
1268	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1269	pass
1270	else:
1271	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1272
1273	if self.c_id == Id.Lit_LBrace:
1274	node.body = self.ParseBraceGroup()
1275	else:
1276	node.body = self.ParseDoGroup()
1277	return node
1278
1279	def _ParseForEachLoop(self, for_kw):
1280	# type: (Token) -> command.ForEach
1281	node = command.ForEach.CreateNull(alloc_lists=True)
1282	node.keyword = for_kw
1283
1284	num_iter_names = 0
1285	while True:
1286	w = self.cur_word
1287
1288	# Hack that makes the language more familiar:
1289	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1290	# - 'x y' is also accepted but not idiomatic.
1291	UP_w = w
1292	if w.tag() == word_e.Compound:
1293	w = cast(CompoundWord, UP_w)
1294	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1295	w.parts.pop()
1296
1297	ok, iter_name, quoted = word_.StaticEval(w)
1298	if not ok or quoted: # error: for $x
1299	p_die('Expected loop variable (a constant word)', loc.Word(w))
1300
1301	if not match.IsValidVarName(iter_name): # error: for -
1302	# TODO: consider commas?
1303	if ',' in iter_name:
1304	p_die('Loop variables look like x, y (fix spaces)',
1305	loc.Word(w))
1306	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1307
1308	node.iter_names.append(iter_name)
1309	num_iter_names += 1
1310	self._SetNext()
1311
1312	self._GetWord()
1313	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1314	# Subtlety: 'var' is KW_Var and is a valid loop name
1315	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1316	break
1317
1318	if num_iter_names == 3:
1319	p_die('Unexpected word after 3 loop variables',
1320	loc.Word(self.cur_word))
1321
1322	self._NewlineOk()
1323
1324	self._GetWord()
1325	if self.c_id == Id.KW_In:
1326	# Ideally we would want ( not 'in'. But we still have to fix the bug
1327	# where we require a SPACE between in and (
1328	# for x in(y) # should be accepted, but isn't
1329
1330	expr_blame = word_.AsKeywordToken(self.cur_word)
1331
1332	self._SetNext() # skip in
1333	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1334	enode, last_token = self.parse_ctx.ParseYshExpr(
1335	self.lexer, grammar_nt.oil_expr)
1336	node.iterable = for_iter.YshExpr(enode, expr_blame)
1337
1338	# For simplicity, we don't accept for x in (obj); do ...
1339	self._GetWord()
1340	if self.c_id != Id.Lit_LBrace:
1341	p_die('Expected { after iterable expression',
1342	loc.Word(self.cur_word))
1343	else:
1344	semi_tok = None # type: Optional[Token]
1345	iter_words, semi_tok = self.ParseForWords()
1346	node.semi_tok = semi_tok
1347
1348	if not self.parse_opts.parse_bare_word() and len(
1349	iter_words) == 1:
1350	ok, s, quoted = word_.StaticEval(iter_words[0])
1351	if ok and match.IsValidVarName(s) and not quoted:
1352	p_die(
1353	'Surround this word with either parens or quotes (parse_bare_word)',
1354	loc.Word(iter_words[0]))
1355
1356	words2 = braces.BraceDetectAll(iter_words)
1357	words3 = word_.TildeDetectAll(words2)
1358	node.iterable = for_iter.Words(words3)
1359
1360	# Now that we know there are words, do an extra check
1361	if num_iter_names > 2:
1362	p_die('Expected at most 2 loop variables', for_kw)
1363
1364	elif self.c_id == Id.KW_Do:
1365	node.iterable = for_iter.Args # implicitly loop over "$@"
1366	# do not advance
1367
1368	elif self.c_id == Id.Op_Semi: # for x; do
1369	node.iterable = for_iter.Args # implicitly loop over "$@"
1370	self._SetNext()
1371
1372	else: # for foo BAD
1373	p_die('Unexpected word after for loop variable',
1374	loc.Word(self.cur_word))
1375
1376	self._GetWord()
1377	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1378	node.body = self.ParseBraceGroup()
1379	else:
1380	node.body = self.ParseDoGroup()
1381
1382	return node
1383
1384	def ParseFor(self):
1385	# type: () -> command_t
1386	"""
1387	TODO: Update the grammar
1388
1389	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1390	\| For '((' ... TODO
1391	"""
1392	ate = self._Eat(Id.KW_For)
1393	for_kw = word_.AsKeywordToken(ate)
1394
1395	self._GetWord()
1396	if self.c_id == Id.Op_DLeftParen:
1397	if not self.parse_opts.parse_dparen():
1398	p_die("Bash for loops aren't allowed (parse_dparen)",
1399	loc.Word(self.cur_word))
1400
1401	# for (( i = 0; i < 10; i++)
1402	n1 = self._ParseForExprLoop(for_kw)
1403	n1.redirects = self._ParseRedirectList()
1404	return n1
1405	else:
1406	# for x in a b; do echo hi; done
1407	n2 = self._ParseForEachLoop(for_kw)
1408	n2.redirects = self._ParseRedirectList()
1409	return n2
1410
1411	def _ParseConditionList(self):
1412	# type: () -> condition_t
1413	"""
1414	condition_list: command_list
1415
1416	This is a helper to parse a condition list for if commands and while/until
1417	loops. It will throw a parse error if there are no conditions in the list.
1418	"""
1419	self.allow_block = False
1420	commands = self._ParseCommandList()
1421	self.allow_block = True
1422
1423	if len(commands.children) == 0:
1424	p_die("Expected a condition", loc.Word(self.cur_word))
1425
1426	return condition.Shell(commands.children)
1427
1428	def ParseWhileUntil(self, keyword):
1429	# type: (Token) -> command.WhileUntil
1430	"""
1431	while_clause : While command_list do_group ;
1432	until_clause : Until command_list do_group ;
1433	"""
1434	self._SetNext() # skip keyword
1435
1436	if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1437	) == Id.Op_LParen:
1438	enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1439	grammar_nt.oil_expr)
1440	cond = condition.YshExpr(enode) # type: condition_t
1441	else:
1442	cond = self._ParseConditionList()
1443
1444	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1445	# should be unchanged. To be sure we should desugar.
1446	self._GetWord()
1447	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1448	# while test -f foo {
1449	body_node = self.ParseBraceGroup() # type: command_t
1450	else:
1451	body_node = self.ParseDoGroup()
1452
1453	# no redirects yet
1454	return command.WhileUntil(keyword, cond, body_node, None)
1455
1456	def ParseCaseArm(self):
1457	# type: () -> CaseArm
1458	"""
1459	case_item: '('? pattern ('\|' pattern)* ')'
1460	newline_ok command_term? trailer? ;
1461
1462	Looking at '(' or pattern
1463	"""
1464	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1465
1466	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1467
1468	if self.c_id == Id.Op_LParen: # Optional (
1469	self._SetNext()
1470
1471	pat_words = [] # type: List[word_t]
1472	while True:
1473	self._GetWord()
1474	if self.c_kind != Kind.Word:
1475	p_die('Expected case pattern', loc.Word(self.cur_word))
1476	pat_words.append(self.cur_word)
1477	self._SetNext()
1478
1479	self._GetWord()
1480	if self.c_id == Id.Op_Pipe:
1481	self._SetNext()
1482	else:
1483	break
1484
1485	ate = self._Eat(Id.Right_CasePat)
1486	middle_tok = word_.AsOperatorToken(ate)
1487
1488	self._NewlineOk()
1489
1490	self._GetWord()
1491	if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1492	c_list = self._ParseCommandTerm()
1493	action_children = c_list.children
1494	else:
1495	action_children = []
1496
1497	dsemi_tok = None # type: Token
1498	self._GetWord()
1499	if self.c_id == Id.KW_Esac: # missing last ;;
1500	pass
1501	elif self.c_id == Id.Op_DSemi:
1502	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1503	self._SetNext()
1504	else:
1505	# Happens on EOF
1506	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1507
1508	self._NewlineOk()
1509
1510	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1511	action_children, dsemi_tok)
1512
1513	def ParseYshCaseArm(self, discriminant):
1514	# type: (Id_t) -> CaseArm
1515	"""
1516	case_item : pattern newline_ok brace_group newline_ok
1517	pattern : pat_words
1518	\| pat_exprs
1519	\| pat_eggex
1520	\| pat_else
1521	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1522	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1523	pat_word : WORD
1524	pat_eggex : '/' oil_eggex '/'
1525	pat_expr : '(' oil_expr ')'
1526	pat_else : '(' Id.KW_Else ')'
1527
1528	Looking at: 'pattern'
1529
1530	Note that the trailing `newline_ok` in `case_item` is handled by
1531	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1532	the next "discriminant" for the next token, so it makes more sense to
1533	handle it there.
1534	"""
1535	left_tok = None # type: Token
1536	pattern = None # type: pat_t
1537
1538	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1539	# pat_exprs, pat_else or pat_eggex
1540	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1541	else:
1542	# pat_words
1543	pat_words = [] # type: List[word_t]
1544	while True:
1545	self._GetWord()
1546	if self.c_kind != Kind.Word:
1547	p_die('Expected case pattern', loc.Word(self.cur_word))
1548	pat_words.append(self.cur_word)
1549	self._SetNext()
1550
1551	if not left_tok:
1552	left_tok = location.LeftTokenForWord(self.cur_word)
1553
1554	self._NewlineOk()
1555
1556	self._GetWord()
1557	if self.c_id == Id.Op_Pipe:
1558	self._SetNext()
1559	self._NewlineOk()
1560	else:
1561	break
1562	pattern = pat.Words(pat_words)
1563
1564	self._NewlineOk()
1565	action = self.ParseBraceGroup()
1566
1567	# The left token of the action is our "middle" token
1568	return CaseArm(left_tok, pattern, action.left, action.children,
1569	action.right)
1570
1571	def ParseYshCase(self, case_kw):
1572	# type: (Token) -> command.Case
1573	"""
1574	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1575
1576	Looking at: token after 'case'
1577	"""
1578	enode, _ = self.parse_ctx.ParseYshExpr(self.lexer, grammar_nt.oil_expr)
1579	to_match = case_arg.YshExpr(enode)
1580
1581	ate = self._Eat(Id.Lit_LBrace)
1582	arms_start = word_.BraceToken(ate)
1583
1584	discriminant = self.w_parser.NewlineOkForYshCase()
1585
1586	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1587	arms = [] # type: List[CaseArm]
1588	while discriminant != Id.Op_RBrace:
1589	arm = self.ParseYshCaseArm(discriminant)
1590	arms.append(arm)
1591
1592	discriminant = self.w_parser.NewlineOkForYshCase()
1593
1594	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1595	# token is read as an Id.Op_RBrace, but we need to store this as a
1596	# Id.Lit_RBrace.
1597	ate = self._Eat(Id.Op_RBrace)
1598	arms_end = word_.AsOperatorToken(ate)
1599	arms_end.id = Id.Lit_RBrace
1600
1601	return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
1602
1603	def ParseOldCase(self, case_kw):
1604	# type: (Token) -> command.Case
1605	"""
1606	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1607
1608	-> Looking at WORD
1609
1610	FYI original POSIX case list, which takes pains for DSEMI
1611
1612	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1613	"""
1614	self._GetWord()
1615	w = self.cur_word
1616	if not self.parse_opts.parse_bare_word():
1617	ok, s, quoted = word_.StaticEval(w)
1618	if ok and not quoted:
1619	p_die(
1620	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1621	loc.Word(w))
1622
1623	if w.tag() != word_e.Compound:
1624	p_die("Expected a word to match against", loc.Word(w))
1625
1626	to_match = case_arg.Word(w)
1627	self._SetNext() # past WORD
1628
1629	self._NewlineOk()
1630
1631	ate = self._Eat(Id.KW_In)
1632	arms_start = word_.AsKeywordToken(ate)
1633
1634	self._NewlineOk()
1635
1636	arms = [] # type: List[CaseArm]
1637	while True:
1638	self._GetWord()
1639	if self.c_id == Id.KW_Esac: # this is Kind.Word
1640	break
1641	# case arm should begin with a pattern word or (
1642	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1643	break
1644
1645	arm = self.ParseCaseArm()
1646	arms.append(arm)
1647
1648	ate = self._Eat(Id.KW_Esac)
1649	arms_end = word_.AsKeywordToken(ate)
1650
1651	# no redirects yet
1652	return command.Case(case_kw, to_match, arms_start, arms, arms_end, None)
1653
1654	def ParseCase(self):
1655	# type: () -> command.Case
1656	"""
1657	case_clause : old_case # from POSIX
1658	\| ysh_case
1659	;
1660
1661	Looking at 'Case'
1662	"""
1663	case_kw = word_.AsKeywordToken(self.cur_word)
1664	self._SetNext() # past 'case'
1665
1666	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1667	return self.ParseYshCase(case_kw)
1668	else:
1669	return self.ParseOldCase(case_kw)
1670
1671	def _ParseYshElifElse(self, if_node):
1672	# type: (command.If) -> None
1673	"""If test -f foo { echo foo.
1674
1675	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1676	bar } else { echo none }
1677	"""
1678	arms = if_node.arms
1679
1680	while self.c_id == Id.KW_Elif:
1681	elif_kw = word_.AsKeywordToken(self.cur_word)
1682	self._SetNext() # skip elif
1683	if (self.parse_opts.parse_paren() and
1684	self.w_parser.LookPastSpace() == Id.Op_LParen):
1685	enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1686	grammar_nt.oil_expr)
1687	cond = condition.YshExpr(enode) # type: condition_t
1688	else:
1689	self.allow_block = False
1690	commands = self._ParseCommandList()
1691	self.allow_block = True
1692	cond = condition.Shell(commands.children)
1693
1694	body = self.ParseBraceGroup()
1695	self._GetWord()
1696
1697	arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1698	arms.append(arm)
1699
1700	self._GetWord()
1701	if self.c_id == Id.KW_Else:
1702	self._SetNext()
1703	body = self.ParseBraceGroup()
1704	if_node.else_action = body.children
1705
1706	def _ParseYshIf(self, if_kw, cond):
1707	# type: (Token, condition_t) -> command.If
1708	"""if test -f foo {
1709
1710	# ^ we parsed up to here
1711	echo foo
1712	} elif test -f bar; test -f spam {
1713	echo bar
1714	} else {
1715	echo none
1716	}
1717	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1718	the error is confusing because it doesn't point to the right place.
1719
1720	I think we might need strict_brace so that foo{ is disallowed. It has to
1721	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1722	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1723	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1724	"""
1725	if_node = command.If.CreateNull(alloc_lists=True)
1726	if_node.if_kw = if_kw
1727
1728	body1 = self.ParseBraceGroup()
1729	# Every arm has 1 spid, unlike shell-style
1730	# TODO: We could get the spids from the brace group.
1731	arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1732
1733	if_node.arms.append(arm)
1734
1735	self._GetWord()
1736	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1737	self._ParseYshElifElse(if_node)
1738	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1739	# spid because that's in the BraceGroup.
1740	return if_node
1741
1742	def _ParseElifElse(self, if_node):
1743	# type: (command.If) -> None
1744	"""
1745	else_part: (Elif command_list Then command_list)* Else command_list ;
1746	"""
1747	arms = if_node.arms
1748
1749	self._GetWord()
1750	while self.c_id == Id.KW_Elif:
1751	elif_kw = word_.AsKeywordToken(self.cur_word)
1752	self._SetNext() # past 'elif'
1753
1754	cond = self._ParseConditionList()
1755
1756	ate = self._Eat(Id.KW_Then)
1757	then_kw = word_.AsKeywordToken(ate)
1758
1759	body = self._ParseCommandList()
1760	arm = IfArm(elif_kw, cond, then_kw, body.children,
1761	[elif_kw.span_id, then_kw.span_id])
1762
1763	arms.append(arm)
1764
1765	self._GetWord()
1766	if self.c_id == Id.KW_Else:
1767	else_kw = word_.AsKeywordToken(self.cur_word)
1768	self._SetNext() # past 'else'
1769	body = self._ParseCommandList()
1770	if_node.else_action = body.children
1771	else:
1772	else_kw = None
1773
1774	if_node.else_kw = else_kw
1775
1776	def ParseIf(self):
1777	# type: () -> command.If
1778	"""
1779	if_clause : If command_list Then command_list else_part? Fi ;
1780
1781	open : '{' \| Then
1782	close : '}' \| Fi
1783
1784	ysh_if : If ( command_list \| '(' expr ')' )
1785	open command_list else_part? close;
1786
1787	There are 2 conditionals here: parse_paren, then parse_brace
1788	"""
1789	if_node = command.If.CreateNull(alloc_lists=True)
1790	if_kw = word_.AsKeywordToken(self.cur_word)
1791	if_node.if_kw = if_kw
1792	self._SetNext() # past 'if'
1793
1794	if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1795	) == Id.Op_LParen:
1796	# if (x + 1)
1797	enode, _ = self.parse_ctx.ParseYshExpr(self.lexer,
1798	grammar_nt.oil_expr)
1799	cond = condition.YshExpr(enode) # type: condition_t
1800	else:
1801	# if echo 1; echo 2; then
1802	# Remove ambiguity with if cd / {
1803	cond = self._ParseConditionList()
1804
1805	self._GetWord()
1806	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1807	return self._ParseYshIf(if_kw, cond)
1808
1809	ate = self._Eat(Id.KW_Then)
1810	then_kw = word_.AsKeywordToken(ate)
1811
1812	body = self._ParseCommandList()
1813
1814	# First arm
1815	arm = IfArm(if_kw, cond, then_kw, body.children,
1816	[if_kw.span_id, then_kw.span_id])
1817	if_node.arms.append(arm)
1818
1819	# 2nd to Nth arm
1820	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1821	self._ParseElifElse(if_node)
1822
1823	ate = self._Eat(Id.KW_Fi)
1824	if_node.fi_kw = word_.AsKeywordToken(ate)
1825
1826	return if_node
1827
1828	def ParseTime(self):
1829	# type: () -> command_t
1830	"""Time [-p] pipeline.
1831
1832	According to bash help.
1833	"""
1834	time_kw = word_.AsKeywordToken(self.cur_word)
1835	self._SetNext() # skip time
1836	pipeline = self.ParsePipeline()
1837	return command.TimeBlock(time_kw, pipeline)
1838
1839	def ParseCompoundCommand(self):
1840	# type: () -> command_t
1841	"""
1842	Refactoring: we put io_redirect* here instead of in function_body and
1843	command.
1844
1845	compound_command : brace_group io_redirect*
1846	\| subshell io_redirect*
1847	\| for_clause io_redirect*
1848	\| while_clause io_redirect*
1849	\| until_clause io_redirect*
1850	\| if_clause io_redirect*
1851	\| case_clause io_redirect*
1852
1853	# bash extensions
1854	\| time_clause
1855	\| [[ BoolExpr ]]
1856	\| (( ArithExpr ))
1857	"""
1858	self._GetWord()
1859	if self.c_id == Id.Lit_LBrace:
1860	n1 = self.ParseBraceGroup()
1861	n1.redirects = self._ParseRedirectList()
1862	return n1
1863	if self.c_id == Id.Op_LParen:
1864	n2 = self.ParseSubshell()
1865	n2.redirects = self._ParseRedirectList()
1866	return n2
1867
1868	if self.c_id == Id.KW_For:
1869	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1870	# redirects, but YSH for doesn't.
1871	return self.ParseFor()
1872	if self.c_id in (Id.KW_While, Id.KW_Until):
1873	keyword = word_.AsKeywordToken(self.cur_word)
1874	n3 = self.ParseWhileUntil(keyword)
1875	n3.redirects = self._ParseRedirectList()
1876	return n3
1877
1878	if self.c_id == Id.KW_If:
1879	n4 = self.ParseIf()
1880	n4.redirects = self._ParseRedirectList()
1881	return n4
1882	if self.c_id == Id.KW_Case:
1883	n5 = self.ParseCase()
1884	n5.redirects = self._ParseRedirectList()
1885	return n5
1886
1887	if self.c_id == Id.KW_DLeftBracket:
1888	n6 = self.ParseDBracket()
1889	n6.redirects = self._ParseRedirectList()
1890	return n6
1891	if self.c_id == Id.Op_DLeftParen:
1892	if not self.parse_opts.parse_dparen():
1893	p_die('You may want a space between parens (parse_dparen)',
1894	loc.Word(self.cur_word))
1895	n7 = self.ParseDParen()
1896	n7.redirects = self._ParseRedirectList()
1897	return n7
1898
1899	# bash extensions: no redirects
1900	if self.c_id == Id.KW_Time:
1901	return self.ParseTime()
1902
1903	# Happens in function body, e.g. myfunc() oops
1904	p_die('Unexpected word while parsing compound command',
1905	loc.Word(self.cur_word))
1906	assert False # for MyPy
1907
1908	def ParseFunctionDef(self):
1909	# type: () -> command.ShFunction
1910	"""
1911	function_header : fname '(' ')'
1912	function_def : function_header newline_ok function_body ;
1913
1914	Precondition: Looking at the function name.
1915
1916	NOTE: There is an ambiguity with:
1917
1918	function foo ( echo hi ) and
1919	function foo () ( echo hi )
1920
1921	Bash only accepts the latter, though it doesn't really follow a grammar.
1922	"""
1923	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
1924	name = word_.ShFunctionName(word0)
1925	if len(name) == 0: # example: foo$x is invalid
1926	p_die('Invalid function name', loc.Word(word0))
1927
1928	part0 = word0.parts[0]
1929	# If we got a non-empty string from ShFunctionName, this should be true.
1930	assert part0.tag() == word_part_e.Literal
1931	blame_tok = cast(Token, part0) # for ctx_VarChecker
1932
1933	self._SetNext() # move past function name
1934
1935	# Must be true because of lookahead
1936	self._GetWord()
1937	assert self.c_id == Id.Op_LParen, self.cur_word
1938
1939	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
1940	self._SetNext()
1941
1942	self._GetWord()
1943	if self.c_id == Id.Right_ShFunction:
1944	# 'f ()' implies a function definition, since invoking it with no args
1945	# would just be 'f'
1946	self._SetNext()
1947
1948	self._NewlineOk()
1949
1950	func = command.ShFunction.CreateNull()
1951	func.name = name
1952	with ctx_VarChecker(self.var_checker, blame_tok):
1953	func.body = self.ParseCompoundCommand()
1954
1955	func.name_tok = location.LeftTokenForCompoundWord(word0)
1956	return func
1957	else:
1958	p_die('Expected ) in function definition', loc.Word(self.cur_word))
1959	return None
1960
1961	def ParseKshFunctionDef(self):
1962	# type: () -> command.ShFunction
1963	"""
1964	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
1965	"""
1966	keyword_tok = word_.AsKeywordToken(self.cur_word)
1967
1968	self._SetNext() # skip past 'function'
1969	self._GetWord()
1970
1971	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
1972	name = word_.ShFunctionName(cur_word)
1973	if len(name) == 0: # example: foo$x is invalid
1974	p_die('Invalid KSH-style function name', loc.Word(cur_word))
1975
1976	name_word = self.cur_word
1977	self._SetNext() # skip past 'function name
1978
1979	self._GetWord()
1980	if self.c_id == Id.Op_LParen:
1981	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
1982	self._SetNext()
1983	self._Eat(Id.Right_ShFunction)
1984
1985	self._NewlineOk()
1986
1987	func = command.ShFunction.CreateNull()
1988	func.name = name
1989	with ctx_VarChecker(self.var_checker, keyword_tok):
1990	func.body = self.ParseCompoundCommand()
1991
1992	func.keyword = keyword_tok
1993	func.name_tok = location.LeftTokenForWord(name_word)
1994	return func
1995
1996	def ParseYshProc(self):
1997	# type: () -> command.Proc
1998	node = command.Proc.CreateNull(alloc_lists=True)
1999
2000	keyword_tok = word_.AsKeywordToken(self.cur_word)
2001	node.keyword = keyword_tok
2002
2003	with ctx_VarChecker(self.var_checker, keyword_tok):
2004	with ctx_CmdMode(self, cmd_mode_e.Proc):
2005	self.w_parser.ParseProc(node)
2006	if node.sig.tag() == proc_sig_e.Closed: # Register params
2007	sig = cast(proc_sig.Closed, node.sig)
2008
2009	# Treat params as variables.
2010	for param in sig.word_params:
2011	# TODO: Check() should not look at tval
2012	name_tok = param.blame_tok
2013	self.var_checker.Check(Id.KW_Var, name_tok)
2014	if sig.rest_of_words:
2015	name_tok = sig.rest_of_words.blame_tok
2016	self.var_checker.Check(Id.KW_Var, name_tok)
2017	# We COULD register __out here but it would require a different API.
2018	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2019	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2020
2021	self._SetNext()
2022	node.body = self.ParseBraceGroup()
2023	# No redirects for YSH procs (only at call site)
2024
2025	return node
2026
2027	def ParseYshFunc(self):
2028	# type: () -> command.Func
2029	"""
2030	ysh_func: KW_Func Expr_Name '(' [func_params] [';' func_params] ')' brace_group
2031
2032	Looking at KW_Func
2033	"""
2034	node = command.Func.CreateNull(alloc_lists=True)
2035
2036	keyword_tok = word_.AsKeywordToken(self.cur_word)
2037	node.keyword = keyword_tok
2038
2039	with ctx_VarChecker(self.var_checker, keyword_tok):
2040	self.parse_ctx.ParseFunc(self.lexer, node)
2041
2042	for param in node.pos_params:
2043	name_tok = param.blame_tok
2044	self.var_checker.Check(Id.KW_Var, name_tok)
2045	if node.rest_of_pos:
2046	name_tok = node.rest_of_pos.blame_tok
2047	self.var_checker.Check(Id.KW_Var, name_tok)
2048
2049	self._SetNext()
2050	with ctx_CmdMode(self, cmd_mode_e.Func):
2051	node.body = self.ParseBraceGroup()
2052
2053	return node
2054
2055	def ParseCoproc(self):
2056	# type: () -> command_t
2057	"""
2058	TODO: command.Coproc?
2059	"""
2060	raise NotImplementedError()
2061
2062	def ParseSubshell(self):
2063	# type: () -> command.Subshell
2064	"""
2065	subshell : '(' compound_list ')'
2066
2067	Looking at Op_LParen
2068	"""
2069	left = word_.AsOperatorToken(self.cur_word)
2070	self._SetNext() # skip past (
2071
2072	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2073	# translation stack, we want to delay it.
2074
2075	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2076
2077	c_list = self._ParseCommandList()
2078	if len(c_list.children) == 1:
2079	child = c_list.children[0]
2080	else:
2081	child = c_list
2082
2083	ate = self._Eat(Id.Right_Subshell)
2084	right = word_.AsOperatorToken(ate)
2085
2086	return command.Subshell(left, child, right, None) # no redirects yet
2087
2088	def ParseDBracket(self):
2089	# type: () -> command.DBracket
2090	"""Pass the underlying word parser off to the boolean expression
2091	parser."""
2092	left = word_.AsKeywordToken(self.cur_word)
2093	# TODO: Test interactive. Without closing ]], you should get > prompt
2094	# (PS2)
2095
2096	self._SetNext() # skip [[
2097	b_parser = bool_parse.BoolParser(self.w_parser)
2098	bnode, right = b_parser.Parse() # May raise
2099	return command.DBracket(left, bnode, right, None) # no redirects yet
2100
2101	def ParseDParen(self):
2102	# type: () -> command.DParen
2103	left = word_.AsOperatorToken(self.cur_word)
2104
2105	self._SetNext() # skip ((
2106	anode, right = self.w_parser.ReadDParen()
2107	assert anode is not None
2108
2109	return command.DParen(left, anode, right, None) # no redirects yet
2110
2111	def ParseCommand(self):
2112	# type: () -> command_t
2113	"""
2114	command : simple_command
2115	\| compound_command # OSH edit: io_redirect* folded in
2116	\| function_def
2117	\| ksh_function_def
2118
2119	# YSH extensions
2120	\| proc NAME ...
2121	\| const ...
2122	\| var ...
2123	\| setglobal ...
2124	\| setref ...
2125	\| setvar ...
2126	\| _ EXPR
2127	\| = EXPR
2128	;
2129
2130	Note: the reason const / var are not part of compound_command is because
2131	they can't be alone in a shell function body.
2132
2133	Example:
2134	This is valid shell f() if true; then echo hi; fi
2135	This is invalid f() var x = 1
2136	"""
2137	if self._AtSecondaryKeyword():
2138	p_die('Unexpected word when parsing command',
2139	loc.Word(self.cur_word))
2140
2141	# YSH Extensions
2142
2143	if self.c_id == Id.KW_Proc: # proc p { ... }
2144	# proc is hidden because of the 'local reasoning' principle
2145	# Code inside procs should be YSH, full stop. That means oil:upgrade is
2146	# on.
2147	if self.parse_opts.parse_proc():
2148	return self.ParseYshProc()
2149
2150	# Otherwise silently pass. This is to support scripts like:
2151	# $ bash -c 'proc() { echo p; }; proc'
2152
2153	if self.c_id == Id.KW_Func: # func f(x) { ... }
2154	if self.parse_opts.parse_func() and not self.parse_opts.parse_tea():
2155	return self.ParseYshFunc()
2156
2157	# Otherwise silently pass, like for the procs.
2158
2159	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2160	keyword_id = self.c_id
2161	kw_token = word_.LiteralToken(self.cur_word)
2162	self._SetNext()
2163	n8 = self.w_parser.ParseVarDecl(kw_token)
2164	for lhs in n8.lhs:
2165	self.var_checker.Check(keyword_id, lhs.name)
2166	return n8
2167
2168	if self.c_id in (Id.KW_SetVar, Id.KW_SetRef, Id.KW_SetGlobal):
2169	kw_token = word_.LiteralToken(self.cur_word)
2170	self._SetNext()
2171	n9 = self.w_parser.ParsePlaceMutation(kw_token, self.var_checker)
2172	return n9
2173
2174	if self.c_id in (Id.Lit_Underscore, Id.Lit_Equals): # = 42 + 1
2175	keyword = word_.LiteralToken(self.cur_word)
2176	assert keyword is not None
2177	self._SetNext()
2178	enode = self.w_parser.ParseCommandExpr()
2179	return command.Expr(keyword, enode)
2180
2181	if self.c_id == Id.KW_Function:
2182	return self.ParseKshFunctionDef()
2183
2184	# Top-level keywords to hide: func, data, enum, class/mod. Not sure about
2185	# 'use'.
2186	if self.parse_opts.parse_tea():
2187	if self.c_id == Id.KW_Func:
2188	out0 = command.TeaFunc.CreateNull(alloc_lists=True)
2189	self.parse_ctx.ParseTeaFunc(self.lexer, out0)
2190	self._SetNext()
2191	return out0
2192	if self.c_id == Id.KW_Data:
2193	out1 = command.Data.CreateNull(alloc_lists=True)
2194	self.parse_ctx.ParseDataType(self.lexer, out1)
2195	self._SetNext()
2196	return out1
2197	if self.c_id == Id.KW_Enum:
2198	out2 = command.Enum.CreateNull(alloc_lists=True)
2199	self.parse_ctx.ParseEnum(self.lexer, out2)
2200	self._SetNext()
2201	return out2
2202	if self.c_id == Id.KW_Class:
2203	out3 = command.Class.CreateNull(alloc_lists=True)
2204	self.parse_ctx.ParseClass(self.lexer, out3)
2205	self._SetNext()
2206	return out3
2207	if self.c_id == Id.KW_Import:
2208	# Needs last_token because it ends with an optional thing?
2209	out4 = command.Import.CreateNull(alloc_lists=True)
2210	self.w_parser.ParseImport(out4)
2211	self._SetNext()
2212	return out4
2213
2214	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2215	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2216	Id.KW_If, Id.KW_Case, Id.KW_Time):
2217	return self.ParseCompoundCommand()
2218
2219	# Syntax error for '}' starting a line, which all shells disallow.
2220	if self.c_id == Id.Lit_RBrace:
2221	p_die('Unexpected right brace', loc.Word(self.cur_word))
2222
2223	if self.c_kind == Kind.Redir: # Leading redirect
2224	return self.ParseSimpleCommand()
2225
2226	if self.c_kind == Kind.Word:
2227	cur_word = cast(CompoundWord, self.cur_word) # ensured by Kind.Word
2228
2229	# NOTE: At the top level, only Token and Compound are possible.
2230	# Can this be modelled better in the type system, removing asserts?
2231	#
2232	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2233	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2234	# That requires 2 tokens of lookahead, which we don't have
2235	#
2236	# Or maybe we don't just have ParseSimpleCommand -- we will have
2237	# ParseYshCommand or something
2238
2239	if (self.w_parser.LookAheadFuncParens() and
2240	not word_.IsVarLike(cur_word)):
2241	return self.ParseFunctionDef() # f() { echo; } # function
2242
2243	# Parse x = 1+2*3 when inside HayNode { } blocks
2244	parts = cur_word.parts
2245	if self.parse_opts.parse_equals() and len(parts) == 1:
2246	part0 = parts[0]
2247	if part0.tag() == word_part_e.Literal:
2248	tok = cast(Token, part0)
2249	if (match.IsValidVarName(tok.tval) and
2250	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2251	assert tok.id == Id.Lit_Chars, tok
2252
2253	if len(self.hay_attrs_stack) and self.hay_attrs_stack[-1]:
2254	# Note: no static var_checker.Check() for bare assignment
2255	enode = self.w_parser.ParseBareDecl()
2256	self._SetNext() # Somehow this is necessary
2257	# TODO: Use BareDecl here. Well, do that when we
2258	# treat it as const or lazy.
2259	return command.VarDecl(None, [NameType(tok, None)],
2260	enode)
2261	else:
2262	self._SetNext()
2263	self._GetWord()
2264	p_die(
2265	'Unexpected = (Hint: use var/setvar, or quote it)',
2266	loc.Word(self.cur_word))
2267
2268	# echo foo
2269	# f=(a b c) # array
2270	# array[1+2]+=1
2271	return self.ParseSimpleCommand()
2272
2273	if self.c_kind == Kind.Eof:
2274	p_die("Unexpected EOF while parsing command",
2275	loc.Word(self.cur_word))
2276
2277	# NOTE: This only happens in batch mode in the second turn of the loop!
2278	# e.g. )
2279	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2280
2281	assert False # for MyPy
2282
2283	def ParsePipeline(self):
2284	# type: () -> command_t
2285	"""
2286	pipeline : Bang? command ( '\|' newline_ok command )* ;
2287	"""
2288	negated = None # type: Optional[Token]
2289
2290	self._GetWord()
2291	if self.c_id == Id.KW_Bang:
2292	negated = word_.AsKeywordToken(self.cur_word)
2293	self._SetNext()
2294
2295	child = self.ParseCommand()
2296	assert child is not None
2297
2298	children = [child]
2299
2300	self._GetWord()
2301	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2302	if negated is not None:
2303	node = command.Pipeline(negated, children, [])
2304	return node
2305	else:
2306	return child # no pipeline
2307
2308	# \| or \|&
2309	ops = [] # type: List[Token]
2310	while True:
2311	op = word_.AsOperatorToken(self.cur_word)
2312	ops.append(op)
2313
2314	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2315	self._NewlineOk()
2316
2317	child = self.ParseCommand()
2318	children.append(child)
2319
2320	self._GetWord()
2321	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2322	break
2323
2324	return command.Pipeline(negated, children, ops)
2325
2326	def ParseAndOr(self):
2327	# type: () -> command_t
2328	self._GetWord()
2329	if self.c_id == Id.Word_Compound:
2330	first_word_tok = word_.LiteralToken(self.cur_word)
2331	if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2332	# We got '...', so parse in multiline mode
2333	self._SetNext()
2334	with word_.ctx_Multiline(self.w_parser):
2335	return self._ParseAndOr()
2336
2337	# Parse in normal mode, not multiline
2338	return self._ParseAndOr()
2339
2340	def _ParseAndOr(self):
2341	# type: () -> command_t
2342	"""
2343	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2344	\| pipeline
2345
2346	Note that it is left recursive and left associative. We parse it
2347	iteratively with a token of lookahead.
2348	"""
2349	child = self.ParsePipeline()
2350	assert child is not None
2351
2352	self._GetWord()
2353	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2354	return child
2355
2356	ops = [] # type: List[Token]
2357	children = [child]
2358
2359	while True:
2360	ops.append(word_.AsOperatorToken(self.cur_word))
2361
2362	self._SetNext() # skip past \|\| &&
2363	self._NewlineOk()
2364
2365	child = self.ParsePipeline()
2366	children.append(child)
2367
2368	self._GetWord()
2369	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2370	break
2371
2372	return command.AndOr(children, ops)
2373
2374	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2375
2376	# At the top level, we execute after every line, e.g. to
2377	# - process alias (a form of dynamic parsing)
2378	# - process 'exit', because invalid syntax might appear after it
2379
2380	# On the other hand, for a while loop body, we parse the whole thing at once,
2381	# and then execute it. We don't want to parse it over and over again!
2382
2383	# COMPARE
2384	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2385	# command_term : and_or (trailer and_or)* ; # CHILDREN
2386
2387	def _ParseCommandLine(self):
2388	# type: () -> command_t
2389	"""
2390	command_line : and_or (sync_op and_or)* trailer? ;
2391	trailer : sync_op newline_ok
2392	\| NEWLINES;
2393	sync_op : '&' \| ';';
2394
2395	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2396	there is another command word after the sync op.
2397
2398	But it's easier to express imperatively. Do the following in a loop:
2399	1. ParseAndOr
2400	2. Peek.
2401	a. If there's a newline, then return. (We're only parsing a single
2402	line.)
2403	b. If there's a sync_op, process it. Then look for a newline and
2404	return. Otherwise, parse another AndOr.
2405	"""
2406	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2407	# I don't think we should add anything else here; otherwise it will be
2408	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2409	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2410
2411	children = [] # type: List[command_t]
2412	done = False
2413	while not done:
2414	child = self.ParseAndOr()
2415
2416	self._GetWord()
2417	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2418	tok = cast(Token, self.cur_word) # for MyPy
2419	child = command.Sentence(child, tok)
2420	self._SetNext()
2421
2422	self._GetWord()
2423	if self.c_id in END_LIST:
2424	done = True
2425
2426	elif self.c_id in END_LIST:
2427	done = True
2428
2429	else:
2430	# e.g. echo a(b)
2431	p_die('Invalid word while parsing command line',
2432	loc.Word(self.cur_word))
2433
2434	children.append(child)
2435
2436	# Simplify the AST.
2437	if len(children) > 1:
2438	return command.CommandList(children)
2439	else:
2440	return children[0]
2441
2442	def _ParseCommandTerm(self):
2443	# type: () -> command.CommandList
2444	""""
2445	command_term : and_or (trailer and_or)* ;
2446	trailer : sync_op newline_ok
2447	\| NEWLINES;
2448	sync_op : '&' \| ';';
2449
2450	This is handled in imperative style, like _ParseCommandLine.
2451	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2452	which is slightly different. (HOW? Is it the DSEMI?)
2453
2454	Returns:
2455	syntax_asdl.command
2456	"""
2457	# Token types that will end the command term.
2458	END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2459
2460	# NOTE: This is similar to _ParseCommandLine.
2461	#
2462	# - Why aren't we doing END_LIST in _ParseCommandLine?
2463	# - Because you will never be inside $() at the top level.
2464	# - We also know it will end in a newline. It can't end in "fi"!
2465	# - example: if true; then { echo hi; } fi
2466
2467	children = [] # type: List[command_t]
2468	done = False
2469	while not done:
2470	# Most keywords are valid "first words". But do/done/then do not BEGIN
2471	# commands, so they are not valid.
2472	if self._AtSecondaryKeyword():
2473	break
2474
2475	child = self.ParseAndOr()
2476
2477	self._GetWord()
2478	if self.c_id == Id.Op_Newline:
2479	self._SetNext()
2480
2481	self._GetWord()
2482	if self.c_id in END_LIST:
2483	done = True
2484
2485	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2486	tok = cast(Token, self.cur_word) # for MyPy
2487	child = command.Sentence(child, tok)
2488	self._SetNext()
2489
2490	self._GetWord()
2491	if self.c_id == Id.Op_Newline:
2492	self._SetNext() # skip over newline
2493
2494	# Test if we should keep going. There might be another command after
2495	# the semi and newline.
2496	self._GetWord()
2497	if self.c_id in END_LIST: # \n EOF
2498	done = True
2499
2500	elif self.c_id in END_LIST: # ; EOF
2501	done = True
2502
2503	elif self.c_id in END_LIST: # EOF
2504	done = True
2505
2506	# For if test -f foo; test -f bar {
2507	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2508	done = True
2509
2510	elif self.c_kind != Kind.Word:
2511	# e.g. f() { echo (( x )) ; }
2512	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2513
2514	#log("Invalid %s", self.cur_word)
2515	p_die("Invalid word while parsing command list",
2516	loc.Word(self.cur_word))
2517
2518	children.append(child)
2519
2520	return command.CommandList(children)
2521
2522	def _ParseCommandList(self):
2523	# type: () -> command.CommandList
2524	"""
2525	command_list : newline_ok command_term trailer? ;
2526
2527	This one is called by all the compound commands. It's basically a command
2528	block.
2529
2530	NOTE: Rather than translating the CFG directly, the code follows a style
2531	more like this: more like this: (and_or trailer)+. It makes capture
2532	easier.
2533	"""
2534	self._NewlineOk()
2535	return self._ParseCommandTerm()
2536
2537	def ParseLogicalLine(self):
2538	# type: () -> command_t
2539	"""Parse a single line for main_loop.
2540
2541	A wrapper around _ParseCommandLine(). Similar but not identical to
2542	_ParseCommandList() and ParseCommandSub().
2543
2544	Raises:
2545	ParseError
2546	"""
2547	self._NewlineOk()
2548	self._GetWord()
2549	if self.c_id == Id.Eof_Real:
2550	return None # main loop checks for here docs
2551	node = self._ParseCommandLine()
2552	return node
2553
2554	def ParseInteractiveLine(self):
2555	# type: () -> parse_result_t
2556	"""Parse a single line for Interactive main_loop.
2557
2558	Different from ParseLogicalLine because newlines are handled differently.
2559
2560	Raises:
2561	ParseError
2562	"""
2563	self._GetWord()
2564	if self.c_id == Id.Op_Newline:
2565	return parse_result.EmptyLine
2566	if self.c_id == Id.Eof_Real:
2567	return parse_result.Eof
2568
2569	node = self._ParseCommandLine()
2570	return parse_result.Node(node)
2571
2572	def ParseCommandSub(self):
2573	# type: () -> command_t
2574	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2575
2576	They can have multiple lines, like this: echo $( echo one echo
2577	two )
2578	"""
2579	self._NewlineOk()
2580
2581	self._GetWord()
2582	if self.c_kind == Kind.Eof: # e.g. $()
2583	return command.NoOp
2584
2585	c_list = self._ParseCommandTerm()
2586	if len(c_list.children) == 1:
2587	return c_list.children[0]
2588	else:
2589	return c_list
2590
2591	def CheckForPendingHereDocs(self):
2592	# type: () -> None
2593	# NOTE: This happens when there is no newline at the end of a file, like
2594	# osh -c 'cat <<EOF'
2595	if len(self.pending_here_docs):
2596	node = self.pending_here_docs[0] # Just show the first one?
2597	h = cast(redir_param.HereDoc, node.arg)
2598	p_die('Unterminated here doc began here', loc.Word(h.here_begin))