OILS / ysh / grammar_gen.py View on Github | oilshell.org

244 lines, 167 significant
1#!/usr/bin/env python2
2"""
3grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
4"""
5from __future__ import print_function
6
7import os
8import sys
9
10from _devbuild.gen.id_kind_asdl import Id, Kind
11from _devbuild.gen.syntax_asdl import source
12
13from core import alloc
14from core import optview
15from mycpp.mylib import log
16from frontend import lexer
17from frontend import lexer_def
18from frontend import reader
19from pgen2 import parse, pgen, token
20
21
22class OilTokenDef(object):
23
24 def __init__(self, ops, more_ops, keyword_ops):
25 self.ops = ops
26 self.more_ops = more_ops
27 self.keyword_ops = keyword_ops
28
29 def GetTerminalNum(self, label):
30 """e.g. translate Expr_Name in the grammar to 178."""
31 id_ = getattr(Id, label)
32 #log('Id %s = %d', id_, id_)
33 assert id_ < token.NT_OFFSET, id_
34 return id_
35
36 def GetKeywordNum(self, s):
37 """e.g 'xor' -> Id.Expr_Xor.
38
39 Python doesn't have this, but Oil does. Returns None if not
40 found.
41 """
42 id_ = self.keyword_ops.get(s)
43 if id_ is None:
44 return None
45 assert id_ < token.NT_OFFSET, id_
46 return id_
47
48 def GetOpNum(self, op_str):
49 """
50 Args:
51 op_str: '>='
52
53 Returns:
54 Integer for '>=' or Id.Arith_GreatEqual
55 """
56 # Fail if not there
57 id_ = self.ops.get(op_str) or self.more_ops[op_str]
58 assert id_ < token.NT_OFFSET, id_
59 return id_
60
61
62def MakeOilLexer(code_str, arena):
63 arena.PushSource(source.MainFile('pgen2_main'))
64 line_reader = reader.StringLineReader(code_str, arena)
65 line_lexer = lexer.LineLexer(arena)
66 lex = lexer.Lexer(line_lexer, line_reader)
67 return lex
68
69
70def main(argv):
71 action = argv[1]
72 argv = argv[2:]
73
74 # Used at grammar BUILD time.
75 OPS = {
76 '!': Id.Expr_Bang,
77 '.': Id.Expr_Dot,
78 '..': Id.Expr_DDot,
79 '->': Id.Expr_RArrow,
80 '=>': Id.Expr_RDArrow,
81 '//': Id.Expr_DSlash,
82 '++': Id.Arith_DPlus,
83 '!~': Id.Expr_NotTilde,
84 '~~': Id.Expr_DTilde,
85 '!~~': Id.Expr_NotDTilde,
86 '~==': Id.Expr_TildeDEqual,
87 '===': Id.Expr_TEqual,
88 '!==': Id.Expr_NotDEqual,
89 '@': Id.Expr_At,
90 '...': Id.Expr_Ellipsis,
91 '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
92 '**=': Id.Expr_DStarEqual,
93 '//=': Id.Expr_DSlashEqual,
94 }
95
96 # Note: We have two lists of ops because Id.Op_Semi is used, not
97 # Id.Arith_Semi.
98 for _, token_str, id_ in lexer_def.EXPR_OPS:
99 assert token_str not in OPS, token_str
100 OPS[token_str] = id_
101
102 # Tokens that look like / or ${ or @{
103 triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
104 lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
105 lexer_def.EXPR_WORDS)
106 more_ops = {}
107 for _, token_str, id_ in triples:
108 if token_str in more_ops:
109 import pprint
110 raise AssertionError(
111 '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
112 more_ops[token_str] = id_
113
114 # Tokens that look like 'for'
115 keyword_ops = {}
116 for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
117 assert token_str not in keyword_ops, token_str
118 keyword_ops[token_str] = id_
119
120 if 0:
121 from pprint import pprint
122 pprint(OPS)
123 print('---')
124 pprint(more_ops)
125 print('---')
126 pprint(keyword_ops)
127 print('---')
128
129 tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
130
131 if action == 'py': # generate the grammar and parse it
132 grammar_path = argv[0]
133 out_dir = argv[1]
134
135 basename, _ = os.path.splitext(os.path.basename(grammar_path))
136
137 # HACK for find:
138 if basename == 'find':
139 from tools.find import tokenizer as find_tokenizer
140 tok_def = find_tokenizer.TokenDef()
141
142 with open(grammar_path) as f:
143 gr = pgen.MakeGrammar(f, tok_def=tok_def)
144
145 marshal_path = os.path.join(out_dir, basename + '.marshal')
146 with open(marshal_path, 'wb') as out_f:
147 gr.dump(out_f)
148
149 nonterm_py = os.path.join(out_dir, basename + '_nt.py')
150 with open(nonterm_py, 'w') as out_f:
151 gr.dump_nonterminals_py(out_f)
152
153 log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
154 out_dir, basename)
155
156 #gr.report()
157
158 elif action == 'cpp': # generate the grammar and parse it
159 grammar_path = argv[0]
160 out_dir = argv[1]
161
162 basename, _ = os.path.splitext(os.path.basename(grammar_path))
163
164 with open(grammar_path) as f:
165 gr = pgen.MakeGrammar(f, tok_def=tok_def)
166
167 nonterm_h = os.path.join(out_dir, basename + '_nt.h')
168 with open(nonterm_h, 'w') as out_f:
169 gr.dump_nonterminals_cpp(out_f)
170
171 grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
172 with open(grammar_cpp_path, 'w') as src_f:
173 gr.dump_cpp(src_f)
174
175 if 0:
176 log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path,
177 out_dir, basename)
178
179 elif action == 'parse': # generate the grammar and parse it
180 # Remove build dependency
181 from frontend import parse_lib
182 from ysh import expr_parse
183 from ysh import expr_to_ast
184
185 grammar_path = argv[0]
186 start_symbol = argv[1]
187 code_str = argv[2]
188
189 # For choosing lexer and semantic actions
190 grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
191
192 with open(grammar_path) as f:
193 gr = pgen.MakeGrammar(f, tok_def=tok_def)
194
195 arena = alloc.Arena()
196 lex_ = MakeOilLexer(code_str, arena)
197
198 is_expr = grammar_name in ('calc', 'grammar')
199
200 parse_opts = optview.Parse([], [])
201 parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
202 p = expr_parse.ExprParser(parse_ctx, gr, False)
203 try:
204 with expr_parse.ctx_PNodeAllocator(p):
205 pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
206 except parse.ParseError as e:
207 log('Parse Error: %s', e)
208 return 1
209
210 names = expr_to_ast.MakeGrammarNames(gr)
211 p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
212 p_printer.Print(pnode)
213
214 if is_expr:
215 tr = expr_to_ast.Transformer(gr)
216 if start_symbol == 'eval_input':
217 ast_node = tr.Expr(pnode)
218 elif start_symbol == 'ysh_case_pat':
219 ast_node = tr.YshCasePattern(pnode)
220 else:
221 ast_node = tr.VarDecl(pnode)
222 ast_node.PrettyPrint()
223 print()
224
225 elif action == 'stdlib-test':
226 # This shows how deep Python's parse tree is. It doesn't use semantic
227 # actions to prune on the fly!
228
229 import parser # builtin module
230 t = parser.expr('1+2')
231 print(t)
232 t2 = parser.st2tuple(t)
233 print(t2)
234
235 else:
236 raise RuntimeError('Invalid action %r' % action)
237
238
239if __name__ == '__main__':
240 try:
241 sys.exit(main(sys.argv))
242 except RuntimeError as e:
243 print('FATAL: %s' % e, file=sys.stderr)
244 sys.exit(1)