OILS
/
osh
/
builtin_printf.py
1 |
#!/usr/bin/env python2
|
2 |
"""Builtin_printf.py."""
|
3 |
from __future__ import print_function
|
4 |
|
5 |
import time as time_ # avoid name conflict
|
6 |
|
7 |
from _devbuild.gen import arg_types
|
8 |
from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
9 |
from _devbuild.gen.runtime_asdl import cmd_value, value, value_e
|
10 |
from _devbuild.gen.syntax_asdl import (
|
11 |
loc,
|
12 |
loc_e,
|
13 |
loc_t,
|
14 |
source,
|
15 |
Token,
|
16 |
CompoundWord,
|
17 |
printf_part,
|
18 |
printf_part_e,
|
19 |
printf_part_t,
|
20 |
)
|
21 |
from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t
|
22 |
|
23 |
from core import alloc
|
24 |
from core import error
|
25 |
from core.error import e_die, p_die
|
26 |
from core import state
|
27 |
from core import vm
|
28 |
from frontend import flag_spec
|
29 |
from frontend import consts
|
30 |
from frontend import lexer
|
31 |
from frontend import match
|
32 |
from frontend import reader
|
33 |
from mycpp import mylib
|
34 |
from mycpp.mylib import log
|
35 |
from osh import sh_expr_eval
|
36 |
from osh import word_compile
|
37 |
from data_lang import qsn
|
38 |
|
39 |
import posix_ as posix
|
40 |
|
41 |
from typing import Dict, List, TYPE_CHECKING, cast
|
42 |
|
43 |
if TYPE_CHECKING:
|
44 |
from core import ui
|
45 |
from core.state import Mem
|
46 |
from frontend import parse_lib
|
47 |
|
48 |
_ = log
|
49 |
|
50 |
|
51 |
class _FormatStringParser(object):
|
52 |
"""
|
53 |
Grammar:
|
54 |
|
55 |
width = Num | Star
|
56 |
precision = Dot (Num | Star | Zero)?
|
57 |
fmt = Percent (Flag | Zero)* width? precision? (Type | Time)
|
58 |
part = Char_* | Format_EscapedPercent | fmt
|
59 |
printf_format = part* Eof_Real # we're using the main lexer
|
60 |
|
61 |
Maybe: bash also supports %(strftime)T
|
62 |
"""
|
63 |
|
64 |
def __init__(self, lexer):
|
65 |
# type: (lexer.Lexer) -> None
|
66 |
self.lexer = lexer
|
67 |
|
68 |
# uninitialized values
|
69 |
self.cur_token = None # type: Token
|
70 |
self.token_type = Id.Undefined_Tok # type: Id_t
|
71 |
self.token_kind = Kind.Undefined # type: Kind_t
|
72 |
|
73 |
def _Next(self, lex_mode):
|
74 |
# type: (lex_mode_t) -> None
|
75 |
"""Advance a token."""
|
76 |
self.cur_token = self.lexer.Read(lex_mode)
|
77 |
self.token_type = self.cur_token.id
|
78 |
self.token_kind = consts.GetKind(self.token_type)
|
79 |
|
80 |
def _ParseFormatStr(self):
|
81 |
# type: () -> printf_part_t
|
82 |
"""Fmt production."""
|
83 |
self._Next(lex_mode_e.PrintfPercent) # move past %
|
84 |
|
85 |
part = printf_part.Percent.CreateNull(alloc_lists=True)
|
86 |
while self.token_type in (Id.Format_Flag, Id.Format_Zero):
|
87 |
# space and + could be implemented
|
88 |
flag = lexer.TokenVal(self.cur_token) # allocation will be cached
|
89 |
if flag in '# +':
|
90 |
p_die("osh printf doesn't support the %r flag" % flag,
|
91 |
self.cur_token)
|
92 |
|
93 |
part.flags.append(self.cur_token)
|
94 |
self._Next(lex_mode_e.PrintfPercent)
|
95 |
|
96 |
if self.token_type in (Id.Format_Num, Id.Format_Star):
|
97 |
part.width = self.cur_token
|
98 |
self._Next(lex_mode_e.PrintfPercent)
|
99 |
|
100 |
if self.token_type == Id.Format_Dot:
|
101 |
part.precision = self.cur_token
|
102 |
self._Next(lex_mode_e.PrintfPercent) # past dot
|
103 |
if self.token_type in (Id.Format_Num, Id.Format_Star,
|
104 |
Id.Format_Zero):
|
105 |
part.precision = self.cur_token
|
106 |
self._Next(lex_mode_e.PrintfPercent)
|
107 |
|
108 |
if self.token_type in (Id.Format_Type, Id.Format_Time):
|
109 |
part.type = self.cur_token
|
110 |
|
111 |
# ADDITIONAL VALIDATION outside the "grammar".
|
112 |
type_val = lexer.TokenVal(part.type) # allocation will be cached
|
113 |
if type_val in 'eEfFgG':
|
114 |
p_die("osh printf doesn't support floating point", part.type)
|
115 |
# These two could be implemented. %c needs utf-8 decoding.
|
116 |
if type_val == 'c':
|
117 |
p_die("osh printf doesn't support single characters (bytes)",
|
118 |
part.type)
|
119 |
|
120 |
elif self.token_type == Id.Unknown_Tok:
|
121 |
p_die('Invalid printf format character', self.cur_token)
|
122 |
|
123 |
else:
|
124 |
p_die('Expected a printf format character', self.cur_token)
|
125 |
|
126 |
return part
|
127 |
|
128 |
def Parse(self):
|
129 |
# type: () -> List[printf_part_t]
|
130 |
self._Next(lex_mode_e.PrintfOuter)
|
131 |
parts = [] # type: List[printf_part_t]
|
132 |
while True:
|
133 |
if (self.token_kind == Kind.Char or
|
134 |
self.token_type == Id.Format_EscapedPercent or
|
135 |
self.token_type == Id.Unknown_Backslash):
|
136 |
|
137 |
# Note: like in echo -e, we don't fail with Unknown_Backslash here
|
138 |
# when shopt -u parse_backslash because it's at runtime rather than
|
139 |
# parse time.
|
140 |
# Users should use $'' or the future static printf ${x %.3f}.
|
141 |
|
142 |
parts.append(printf_part.Literal(self.cur_token))
|
143 |
|
144 |
elif self.token_type == Id.Format_Percent:
|
145 |
parts.append(self._ParseFormatStr())
|
146 |
|
147 |
elif self.token_type in (Id.Eof_Real, Id.Eol_Tok):
|
148 |
# Id.Eol_Tok: special case for format string of '\x00'.
|
149 |
break
|
150 |
|
151 |
else:
|
152 |
raise AssertionError(self.token_type)
|
153 |
|
154 |
self._Next(lex_mode_e.PrintfOuter)
|
155 |
|
156 |
return parts
|
157 |
|
158 |
|
159 |
class Printf(vm._Builtin):
|
160 |
def __init__(self, mem, parse_ctx, unsafe_arith, errfmt):
|
161 |
# type: (Mem, parse_lib.ParseContext, sh_expr_eval.UnsafeArith, ui.ErrorFormatter) -> None
|
162 |
self.mem = mem
|
163 |
self.parse_ctx = parse_ctx
|
164 |
self.unsafe_arith = unsafe_arith
|
165 |
self.errfmt = errfmt
|
166 |
self.parse_cache = {} # type: Dict[str, List[printf_part_t]]
|
167 |
|
168 |
self.shell_start_time = time_.time(
|
169 |
) # this object initialized in main()
|
170 |
|
171 |
def _Format(self, parts, varargs, locs, out):
|
172 |
# type: (List[printf_part_t], List[str], List[CompoundWord], List[str]) -> int
|
173 |
"""Hairy printf formatting logic."""
|
174 |
|
175 |
arg_index = 0
|
176 |
num_args = len(varargs)
|
177 |
backslash_c = False
|
178 |
|
179 |
while True: # loop over arguments
|
180 |
for part in parts: # loop over parsed format string
|
181 |
UP_part = part
|
182 |
if part.tag() == printf_part_e.Literal:
|
183 |
part = cast(printf_part.Literal, UP_part)
|
184 |
token = part.token
|
185 |
if token.id == Id.Format_EscapedPercent:
|
186 |
s = '%'
|
187 |
else:
|
188 |
s = word_compile.EvalCStringToken(token)
|
189 |
out.append(s)
|
190 |
|
191 |
elif part.tag() == printf_part_e.Percent:
|
192 |
# Note: This case is very long, but hard to refactor because of the
|
193 |
# error cases and "recycling" of args! (arg_index, return 1, etc.)
|
194 |
part = cast(printf_part.Percent, UP_part)
|
195 |
|
196 |
# TODO: These calculations are independent of the data, so could be
|
197 |
# cached
|
198 |
flags = [] # type: List[str]
|
199 |
if len(part.flags) > 0:
|
200 |
for flag_token in part.flags:
|
201 |
flags.append(lexer.TokenVal(flag_token))
|
202 |
|
203 |
width = -1 # nonexistent
|
204 |
if part.width:
|
205 |
if part.width.id in (Id.Format_Num, Id.Format_Zero):
|
206 |
width_str = lexer.TokenVal(part.width)
|
207 |
width_loc = part.width # type: loc_t
|
208 |
elif part.width.id == Id.Format_Star:
|
209 |
if arg_index < num_args:
|
210 |
width_str = varargs[arg_index]
|
211 |
width_loc = locs[arg_index]
|
212 |
arg_index += 1
|
213 |
else:
|
214 |
width_str = '' # invalid
|
215 |
width_loc = loc.Missing
|
216 |
else:
|
217 |
raise AssertionError()
|
218 |
|
219 |
try:
|
220 |
width = int(width_str)
|
221 |
except ValueError:
|
222 |
if width_loc.tag() == loc_e.Missing:
|
223 |
width_loc = part.width
|
224 |
self.errfmt.Print_("printf got invalid width %r" %
|
225 |
width_str,
|
226 |
blame_loc=width_loc)
|
227 |
return 1
|
228 |
|
229 |
precision = -1 # nonexistent
|
230 |
if part.precision:
|
231 |
if part.precision.id == Id.Format_Dot:
|
232 |
precision_str = '0'
|
233 |
precision_loc = part.precision # type: loc_t
|
234 |
elif part.precision.id in (Id.Format_Num,
|
235 |
Id.Format_Zero):
|
236 |
precision_str = lexer.TokenVal(part.precision)
|
237 |
precision_loc = part.precision
|
238 |
elif part.precision.id == Id.Format_Star:
|
239 |
if arg_index < num_args:
|
240 |
precision_str = varargs[arg_index]
|
241 |
precision_loc = locs[arg_index]
|
242 |
arg_index += 1
|
243 |
else:
|
244 |
precision_str = ''
|
245 |
precision_loc = loc.Missing
|
246 |
else:
|
247 |
raise AssertionError()
|
248 |
|
249 |
try:
|
250 |
precision = int(precision_str)
|
251 |
except ValueError:
|
252 |
if precision_loc.tag() == loc_e.Missing:
|
253 |
precision_loc = part.precision
|
254 |
self.errfmt.Print_(
|
255 |
'printf got invalid precision %r' %
|
256 |
precision_str,
|
257 |
blame_loc=precision_loc)
|
258 |
return 1
|
259 |
|
260 |
if arg_index < num_args:
|
261 |
s = varargs[arg_index]
|
262 |
word_loc = locs[arg_index] # type: loc_t
|
263 |
arg_index += 1
|
264 |
has_arg = True
|
265 |
else:
|
266 |
s = ''
|
267 |
word_loc = loc.Missing
|
268 |
has_arg = False
|
269 |
|
270 |
# Note: %s could be lexed into Id.Percent_S. Although small string
|
271 |
# optimization would remove the allocation as well.
|
272 |
typ = lexer.TokenVal(part.type)
|
273 |
if typ == 's':
|
274 |
if precision >= 0:
|
275 |
s = s[:precision] # truncate
|
276 |
|
277 |
elif typ == 'q':
|
278 |
# TODO: most shells give \' for single quote, while OSH gives $'\''
|
279 |
# this could matter when SSH'ing
|
280 |
s = qsn.maybe_shell_encode(s)
|
281 |
|
282 |
elif typ == 'b':
|
283 |
# Process just like echo -e, except \c handling is simpler.
|
284 |
|
285 |
c_parts = [] # type: List[str]
|
286 |
lex = match.EchoLexer(s)
|
287 |
while True:
|
288 |
id_, tok_val = lex.Next()
|
289 |
if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator
|
290 |
break
|
291 |
|
292 |
# Note: DummyToken is OK because EvalCStringToken() doesn't have
|
293 |
# any syntax errors.
|
294 |
tok = lexer.DummyToken(id_, tok_val)
|
295 |
p = word_compile.EvalCStringToken(tok)
|
296 |
|
297 |
# Unusual behavior: '\c' aborts processing!
|
298 |
if p is None:
|
299 |
backslash_c = True
|
300 |
break
|
301 |
|
302 |
c_parts.append(p)
|
303 |
s = ''.join(c_parts)
|
304 |
|
305 |
elif part.type.id == Id.Format_Time or typ in 'diouxX':
|
306 |
# %(...)T and %d share this complex integer conversion logic
|
307 |
|
308 |
try:
|
309 |
d = int(
|
310 |
s
|
311 |
) # note: spaces like ' -42 ' accepted and normalized
|
312 |
|
313 |
except ValueError:
|
314 |
# 'a is interpreted as the ASCII value of 'a'
|
315 |
if len(s) >= 1 and s[0] in '\'"':
|
316 |
# TODO: utf-8 decode s[1:] to be more correct. Probably
|
317 |
# depends on issue #366, a utf-8 library.
|
318 |
# Note: len(s) == 1 means there is a NUL (0) after the quote..
|
319 |
d = ord(s[1]) if len(s) >= 2 else 0
|
320 |
|
321 |
# No argument means -1 for %(...)T as in Bash Reference Manual
|
322 |
# 4.2 "If no argument is specified, conversion behaves as if -1
|
323 |
# had been given."
|
324 |
elif not has_arg and part.type.id == Id.Format_Time:
|
325 |
d = -1
|
326 |
|
327 |
else:
|
328 |
if has_arg:
|
329 |
blame_loc = word_loc # type: loc_t
|
330 |
else:
|
331 |
blame_loc = part.type
|
332 |
self.errfmt.Print_(
|
333 |
'printf expected an integer, got %r' % s,
|
334 |
blame_loc)
|
335 |
return 1
|
336 |
|
337 |
if part.type.id == Id.Format_Time:
|
338 |
# Initialize timezone:
|
339 |
# `localtime' uses the current timezone information initialized
|
340 |
# by `tzset'. The function `tzset' refers to the environment
|
341 |
# variable `TZ'. When the exported variable `TZ' is present,
|
342 |
# its value should be reflected in the real environment
|
343 |
# variable `TZ' before call of `tzset'.
|
344 |
#
|
345 |
# Note: unlike LANG, TZ doesn't seem to change behavior if it's
|
346 |
# not exported.
|
347 |
#
|
348 |
# TODO: In YSH, provide an API that doesn't rely on libc's global
|
349 |
# state.
|
350 |
|
351 |
tzcell = self.mem.GetCell('TZ')
|
352 |
if tzcell and tzcell.exported and tzcell.val.tag(
|
353 |
) == value_e.Str:
|
354 |
tzval = cast(value.Str, tzcell.val)
|
355 |
posix.putenv('TZ', tzval.s)
|
356 |
|
357 |
time_.tzset()
|
358 |
|
359 |
# Handle special values:
|
360 |
# User can specify two special values -1 and -2 as in Bash
|
361 |
# Reference Manual 4.2: "Two special argument values may be
|
362 |
# used: -1 represents the current time, and -2 represents the
|
363 |
# time the shell was invoked." from
|
364 |
# https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf
|
365 |
if d == -1: # the current time
|
366 |
ts = time_.time()
|
367 |
elif d == -2: # the shell start time
|
368 |
ts = self.shell_start_time
|
369 |
else:
|
370 |
ts = d
|
371 |
|
372 |
s = time_.strftime(typ[1:-2], time_.localtime(ts))
|
373 |
if precision >= 0:
|
374 |
s = s[:precision] # truncate
|
375 |
|
376 |
else: # typ in 'diouxX'
|
377 |
# Disallowed because it depends on 32- or 64- bit
|
378 |
if d < 0 and typ in 'ouxX':
|
379 |
e_die(
|
380 |
"Can't format negative number %d with %%%s"
|
381 |
% (d, typ), part.type)
|
382 |
|
383 |
if typ == 'o':
|
384 |
s = mylib.octal(d)
|
385 |
elif typ == 'x':
|
386 |
s = mylib.hex_lower(d)
|
387 |
elif typ == 'X':
|
388 |
s = mylib.hex_upper(d)
|
389 |
else: # diu
|
390 |
s = str(d) # without spaces like ' -42 '
|
391 |
|
392 |
# There are TWO different ways to ZERO PAD, and they differ on
|
393 |
# the negative sign! See spec/builtin-printf
|
394 |
|
395 |
zero_pad = 0 # no zero padding
|
396 |
if width >= 0 and '0' in flags:
|
397 |
zero_pad = 1 # style 1
|
398 |
elif precision > 0 and len(s) < precision:
|
399 |
zero_pad = 2 # style 2
|
400 |
|
401 |
if zero_pad:
|
402 |
negative = (s[0] == '-')
|
403 |
if negative:
|
404 |
digits = s[1:]
|
405 |
sign = '-'
|
406 |
if zero_pad == 1:
|
407 |
# [%06d] -42 becomes [-00042] (6 TOTAL)
|
408 |
n = width - 1
|
409 |
else:
|
410 |
# [%6.6d] -42 becomes [-000042] (1 for '-' + 6)
|
411 |
n = precision
|
412 |
else:
|
413 |
digits = s
|
414 |
sign = ''
|
415 |
if zero_pad == 1:
|
416 |
n = width
|
417 |
else:
|
418 |
n = precision
|
419 |
s = sign + digits.rjust(n, '0')
|
420 |
|
421 |
else:
|
422 |
raise AssertionError()
|
423 |
|
424 |
if width >= 0:
|
425 |
if '-' in flags:
|
426 |
s = s.ljust(width, ' ')
|
427 |
else:
|
428 |
s = s.rjust(width, ' ')
|
429 |
|
430 |
out.append(s)
|
431 |
|
432 |
else:
|
433 |
raise AssertionError()
|
434 |
|
435 |
if backslash_c: # 'printf %b a\cb xx' - \c terminates processing!
|
436 |
break
|
437 |
|
438 |
if arg_index == 0:
|
439 |
# We went through ALL parts and didn't consume ANY arg.
|
440 |
# Example: print x y
|
441 |
break
|
442 |
if arg_index >= num_args:
|
443 |
# We printed all args
|
444 |
break
|
445 |
# There are more arg: Implement the 'arg recycling' behavior.
|
446 |
|
447 |
return 0
|
448 |
|
449 |
def Run(self, cmd_val):
|
450 |
# type: (cmd_value.Argv) -> int
|
451 |
"""
|
452 |
printf: printf [-v var] format [argument ...]
|
453 |
"""
|
454 |
attrs, arg_r = flag_spec.ParseCmdVal('printf', cmd_val)
|
455 |
arg = arg_types.printf(attrs.attrs)
|
456 |
|
457 |
fmt, fmt_loc = arg_r.ReadRequired2('requires a format string')
|
458 |
varargs, locs = arg_r.Rest2()
|
459 |
|
460 |
#log('fmt %s', fmt)
|
461 |
#log('vals %s', vals)
|
462 |
|
463 |
arena = self.parse_ctx.arena
|
464 |
if fmt in self.parse_cache:
|
465 |
parts = self.parse_cache[fmt]
|
466 |
else:
|
467 |
line_reader = reader.StringLineReader(fmt, arena)
|
468 |
# TODO: Make public
|
469 |
lexer = self.parse_ctx.MakeLexer(line_reader)
|
470 |
parser = _FormatStringParser(lexer)
|
471 |
|
472 |
with alloc.ctx_SourceCode(arena, source.ArgvWord('printf', fmt_loc)):
|
473 |
try:
|
474 |
parts = parser.Parse()
|
475 |
except error.Parse as e:
|
476 |
self.errfmt.PrettyPrintError(e)
|
477 |
return 2 # parse error
|
478 |
|
479 |
self.parse_cache[fmt] = parts
|
480 |
|
481 |
if 0:
|
482 |
print()
|
483 |
for part in parts:
|
484 |
part.PrettyPrint()
|
485 |
print()
|
486 |
|
487 |
out = [] # type: List[str]
|
488 |
status = self._Format(parts, varargs, locs, out)
|
489 |
if status != 0:
|
490 |
return status # failure
|
491 |
|
492 |
result = ''.join(out)
|
493 |
if arg.v is not None:
|
494 |
# TODO: get the location for arg.v!
|
495 |
v_loc = loc.Missing
|
496 |
lval = self.unsafe_arith.ParseLValue(arg.v, v_loc)
|
497 |
state.BuiltinSetValue(self.mem, lval, value.Str(result))
|
498 |
else:
|
499 |
mylib.Stdout().write(result)
|
500 |
return 0
|