Line data Source code
1 : /*
2 : string_format.h -- implementation of string.format().
3 :
4 : It uses the Objects/stringlib conventions, so that it can be
5 : compiled for both unicode and string objects.
6 : */
7 :
8 :
9 : /* Defines for Python 2.6 compatibility */
10 : #if PY_VERSION_HEX < 0x03000000
11 : #define PyLong_FromSsize_t _PyLong_FromSsize_t
12 : #endif
13 :
14 : /* Defines for more efficiently reallocating the string buffer */
15 : #define INITIAL_SIZE_INCREMENT 100
16 : #define SIZE_MULTIPLIER 2
17 : #define MAX_SIZE_INCREMENT 3200
18 :
19 :
20 : /************************************************************************/
21 : /*********** Global data structures and forward declarations *********/
22 : /************************************************************************/
23 :
24 : /*
25 : A SubString consists of the characters between two string or
26 : unicode pointers.
27 : */
28 : typedef struct {
29 : STRINGLIB_CHAR *ptr;
30 : STRINGLIB_CHAR *end;
31 : } SubString;
32 :
33 :
34 : typedef enum {
35 : ANS_INIT,
36 : ANS_AUTO,
37 : ANS_MANUAL
38 : } AutoNumberState; /* Keep track if we're auto-numbering fields */
39 :
40 : /* Keeps track of our auto-numbering state, and which number field we're on */
41 : typedef struct {
42 : AutoNumberState an_state;
43 : int an_field_number;
44 : } AutoNumber;
45 :
46 :
47 : /* forward declaration for recursion */
48 : static PyObject *
49 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
50 : int recursion_depth, AutoNumber *auto_number);
51 :
52 :
53 :
54 : /************************************************************************/
55 : /************************** Utility functions ************************/
56 : /************************************************************************/
57 :
58 : static void
59 351 : AutoNumber_Init(AutoNumber *auto_number)
60 : {
61 351 : auto_number->an_state = ANS_INIT;
62 351 : auto_number->an_field_number = 0;
63 351 : }
64 :
65 : /* fill in a SubString from a pointer and length */
66 : Py_LOCAL_INLINE(void)
67 9408 : SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68 : {
69 9408 : str->ptr = p;
70 9408 : if (p == NULL)
71 5754 : str->end = NULL;
72 : else
73 3654 : str->end = str->ptr + len;
74 9408 : }
75 :
76 : /* return a new string. if str->ptr is NULL, return None */
77 : Py_LOCAL_INLINE(PyObject *)
78 888 : SubString_new_object(SubString *str)
79 : {
80 888 : if (str->ptr == NULL) {
81 0 : Py_INCREF(Py_None);
82 0 : return Py_None;
83 : }
84 888 : return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85 : }
86 :
87 : /* return a new string. if str->ptr is NULL, return None */
88 : Py_LOCAL_INLINE(PyObject *)
89 0 : SubString_new_object_or_empty(SubString *str)
90 : {
91 0 : if (str->ptr == NULL) {
92 0 : return STRINGLIB_NEW(NULL, 0);
93 : }
94 0 : return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95 : }
96 :
97 : /* Return 1 if an error has been detected switching between automatic
98 : field numbering and manual field specification, else return 0. Set
99 : ValueError on error. */
100 : static int
101 96 : autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102 : {
103 96 : if (state == ANS_MANUAL) {
104 96 : if (field_name_is_empty) {
105 0 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 : "manual field specification to "
107 : "automatic field numbering");
108 0 : return 1;
109 : }
110 : }
111 : else {
112 0 : if (!field_name_is_empty) {
113 0 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 : "automatic field numbering to "
115 : "manual field specification");
116 0 : return 1;
117 : }
118 : }
119 96 : return 0;
120 : }
121 :
122 :
123 : /************************************************************************/
124 : /*********** Output string management functions ****************/
125 : /************************************************************************/
126 :
127 : typedef struct {
128 : STRINGLIB_CHAR *ptr;
129 : STRINGLIB_CHAR *end;
130 : PyObject *obj;
131 : Py_ssize_t size_increment;
132 : } OutputString;
133 :
134 : /* initialize an OutputString object, reserving size characters */
135 : static int
136 351 : output_initialize(OutputString *output, Py_ssize_t size)
137 : {
138 351 : output->obj = STRINGLIB_NEW(NULL, size);
139 351 : if (output->obj == NULL)
140 0 : return 0;
141 :
142 351 : output->ptr = STRINGLIB_STR(output->obj);
143 351 : output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 351 : output->size_increment = INITIAL_SIZE_INCREMENT;
145 :
146 351 : return 1;
147 : }
148 :
149 : /*
150 : output_extend reallocates the output string buffer.
151 : It returns a status: 0 for a failed reallocation,
152 : 1 for success.
153 : */
154 :
155 : static int
156 48 : output_extend(OutputString *output, Py_ssize_t count)
157 : {
158 48 : STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 48 : Py_ssize_t curlen = output->ptr - startptr;
160 48 : Py_ssize_t maxlen = curlen + count + output->size_increment;
161 :
162 48 : if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 0 : return 0;
164 48 : startptr = STRINGLIB_STR(output->obj);
165 48 : output->ptr = startptr + curlen;
166 48 : output->end = startptr + maxlen;
167 48 : if (output->size_increment < MAX_SIZE_INCREMENT)
168 48 : output->size_increment *= SIZE_MULTIPLIER;
169 48 : return 1;
170 : }
171 :
172 : /*
173 : output_data dumps characters into our output string
174 : buffer.
175 :
176 : In some cases, it has to reallocate the string.
177 :
178 : It returns a status: 0 for a failed reallocation,
179 : 1 for success.
180 : */
181 : static int
182 2223 : output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183 : {
184 2223 : if ((count > output->end - output->ptr) && !output_extend(output, count))
185 0 : return 0;
186 2223 : memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 2223 : output->ptr += count;
188 2223 : return 1;
189 : }
190 :
191 : /************************************************************************/
192 : /*********** Format string parsing -- integers and identifiers *********/
193 : /************************************************************************/
194 :
195 : static Py_ssize_t
196 984 : get_integer(const SubString *str)
197 : {
198 984 : Py_ssize_t accumulator = 0;
199 : Py_ssize_t digitval;
200 : STRINGLIB_CHAR *p;
201 :
202 : /* empty string is an error */
203 984 : if (str->ptr >= str->end)
204 0 : return -1;
205 :
206 1080 : for (p = str->ptr; p < str->end; p++) {
207 984 : digitval = STRINGLIB_TODECIMAL(*p);
208 984 : if (digitval < 0)
209 888 : return -1;
210 : /*
211 : Detect possible overflow before it happens:
212 :
213 : accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
214 : accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
215 : */
216 96 : if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
217 0 : PyErr_Format(PyExc_ValueError,
218 : "Too many decimal digits in format string");
219 0 : return -1;
220 : }
221 96 : accumulator = accumulator * 10 + digitval;
222 : }
223 96 : return accumulator;
224 : }
225 :
226 : /************************************************************************/
227 : /******** Functions to get field objects and specification strings ******/
228 : /************************************************************************/
229 :
230 : /* do the equivalent of obj.name */
231 : static PyObject *
232 0 : getattr(PyObject *obj, SubString *name)
233 : {
234 : PyObject *newobj;
235 0 : PyObject *str = SubString_new_object(name);
236 0 : if (str == NULL)
237 0 : return NULL;
238 0 : newobj = PyObject_GetAttr(obj, str);
239 0 : Py_DECREF(str);
240 0 : return newobj;
241 : }
242 :
243 : /* do the equivalent of obj[idx], where obj is a sequence */
244 : static PyObject *
245 0 : getitem_sequence(PyObject *obj, Py_ssize_t idx)
246 : {
247 0 : return PySequence_GetItem(obj, idx);
248 : }
249 :
250 : /* do the equivalent of obj[idx], where obj is not a sequence */
251 : static PyObject *
252 0 : getitem_idx(PyObject *obj, Py_ssize_t idx)
253 : {
254 : PyObject *newobj;
255 0 : PyObject *idx_obj = PyLong_FromSsize_t(idx);
256 0 : if (idx_obj == NULL)
257 0 : return NULL;
258 0 : newobj = PyObject_GetItem(obj, idx_obj);
259 0 : Py_DECREF(idx_obj);
260 0 : return newobj;
261 : }
262 :
263 : /* do the equivalent of obj[name] */
264 : static PyObject *
265 0 : getitem_str(PyObject *obj, SubString *name)
266 : {
267 : PyObject *newobj;
268 0 : PyObject *str = SubString_new_object(name);
269 0 : if (str == NULL)
270 0 : return NULL;
271 0 : newobj = PyObject_GetItem(obj, str);
272 0 : Py_DECREF(str);
273 0 : return newobj;
274 : }
275 :
276 : typedef struct {
277 : /* the entire string we're parsing. we assume that someone else
278 : is managing its lifetime, and that it will exist for the
279 : lifetime of the iterator. can be empty */
280 : SubString str;
281 :
282 : /* pointer to where we are inside field_name */
283 : STRINGLIB_CHAR *ptr;
284 : } FieldNameIterator;
285 :
286 :
287 : static int
288 984 : FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
289 : Py_ssize_t len)
290 : {
291 984 : SubString_init(&self->str, ptr, len);
292 984 : self->ptr = self->str.ptr;
293 984 : return 1;
294 : }
295 :
296 : static int
297 0 : _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
298 : {
299 : STRINGLIB_CHAR c;
300 :
301 0 : name->ptr = self->ptr;
302 :
303 : /* return everything until '.' or '[' */
304 0 : while (self->ptr < self->str.end) {
305 0 : switch (c = *self->ptr++) {
306 : case '[':
307 : case '.':
308 : /* backup so that we this character will be seen next time */
309 0 : self->ptr--;
310 0 : break;
311 : default:
312 0 : continue;
313 : }
314 0 : break;
315 : }
316 : /* end of string is okay */
317 0 : name->end = self->ptr;
318 0 : return 1;
319 : }
320 :
321 : static int
322 0 : _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
323 : {
324 0 : int bracket_seen = 0;
325 : STRINGLIB_CHAR c;
326 :
327 0 : name->ptr = self->ptr;
328 :
329 : /* return everything until ']' */
330 0 : while (self->ptr < self->str.end) {
331 0 : switch (c = *self->ptr++) {
332 : case ']':
333 0 : bracket_seen = 1;
334 0 : break;
335 : default:
336 0 : continue;
337 : }
338 0 : break;
339 : }
340 : /* make sure we ended with a ']' */
341 0 : if (!bracket_seen) {
342 0 : PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
343 0 : return 0;
344 : }
345 :
346 : /* end of string is okay */
347 : /* don't include the ']' */
348 0 : name->end = self->ptr-1;
349 0 : return 1;
350 : }
351 :
352 : /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
353 : static int
354 984 : FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
355 : Py_ssize_t *name_idx, SubString *name)
356 : {
357 : /* check at end of input */
358 984 : if (self->ptr >= self->str.end)
359 984 : return 1;
360 :
361 0 : switch (*self->ptr++) {
362 : case '.':
363 0 : *is_attribute = 1;
364 0 : if (_FieldNameIterator_attr(self, name) == 0)
365 0 : return 0;
366 0 : *name_idx = -1;
367 0 : break;
368 : case '[':
369 0 : *is_attribute = 0;
370 0 : if (_FieldNameIterator_item(self, name) == 0)
371 0 : return 0;
372 0 : *name_idx = get_integer(name);
373 0 : if (*name_idx == -1 && PyErr_Occurred())
374 0 : return 0;
375 0 : break;
376 : default:
377 : /* Invalid character follows ']' */
378 0 : PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
379 : "follow ']' in format field specifier");
380 0 : return 0;
381 : }
382 :
383 : /* empty string is an error */
384 0 : if (name->ptr == name->end) {
385 0 : PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
386 0 : return 0;
387 : }
388 :
389 0 : return 2;
390 : }
391 :
392 :
393 : /* input: field_name
394 : output: 'first' points to the part before the first '[' or '.'
395 : 'first_idx' is -1 if 'first' is not an integer, otherwise
396 : it's the value of first converted to an integer
397 : 'rest' is an iterator to return the rest
398 : */
399 : static int
400 984 : field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
401 : Py_ssize_t *first_idx, FieldNameIterator *rest,
402 : AutoNumber *auto_number)
403 : {
404 : STRINGLIB_CHAR c;
405 984 : STRINGLIB_CHAR *p = ptr;
406 984 : STRINGLIB_CHAR *end = ptr + len;
407 : int field_name_is_empty;
408 : int using_numeric_index;
409 :
410 : /* find the part up until the first '.' or '[' */
411 7896 : while (p < end) {
412 5928 : switch (c = *p++) {
413 : case '[':
414 : case '.':
415 : /* backup so that we this character is available to the
416 : "rest" iterator */
417 0 : p--;
418 0 : break;
419 : default:
420 5928 : continue;
421 : }
422 0 : break;
423 : }
424 :
425 : /* set up the return values */
426 984 : SubString_init(first, ptr, p - ptr);
427 984 : FieldNameIterator_init(rest, p, end - p);
428 :
429 : /* see if "first" is an integer, in which case it's used as an index */
430 984 : *first_idx = get_integer(first);
431 984 : if (*first_idx == -1 && PyErr_Occurred())
432 0 : return 0;
433 :
434 984 : field_name_is_empty = first->ptr >= first->end;
435 :
436 : /* If the field name is omitted or if we have a numeric index
437 : specified, then we're doing numeric indexing into args. */
438 984 : using_numeric_index = field_name_is_empty || *first_idx != -1;
439 :
440 : /* We always get here exactly one time for each field we're
441 : processing. And we get here in field order (counting by left
442 : braces). So this is the perfect place to handle automatic field
443 : numbering if the field name is omitted. */
444 :
445 : /* Check if we need to do the auto-numbering. It's not needed if
446 : we're called from string.Format routines, because it's handled
447 : in that class by itself. */
448 984 : if (auto_number) {
449 : /* Initialize our auto numbering state if this is the first
450 : time we're either auto-numbering or manually numbering. */
451 984 : if (auto_number->an_state == ANS_INIT && using_numeric_index)
452 96 : auto_number->an_state = field_name_is_empty ?
453 : ANS_AUTO : ANS_MANUAL;
454 :
455 : /* Make sure our state is consistent with what we're doing
456 : this time through. Only check if we're using a numeric
457 : index. */
458 984 : if (using_numeric_index)
459 96 : if (autonumber_state_error(auto_number->an_state,
460 : field_name_is_empty))
461 0 : return 0;
462 : /* Zero length field means we want to do auto-numbering of the
463 : fields. */
464 984 : if (field_name_is_empty)
465 0 : *first_idx = (auto_number->an_field_number)++;
466 : }
467 :
468 984 : return 1;
469 : }
470 :
471 :
472 : /*
473 : get_field_object returns the object inside {}, before the
474 : format_spec. It handles getindex and getattr lookups and consumes
475 : the entire input string.
476 : */
477 : static PyObject *
478 984 : get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
479 : AutoNumber *auto_number)
480 : {
481 984 : PyObject *obj = NULL;
482 : int ok;
483 : int is_attribute;
484 : SubString name;
485 : SubString first;
486 : Py_ssize_t index;
487 : FieldNameIterator rest;
488 :
489 984 : if (!field_name_split(input->ptr, input->end - input->ptr, &first,
490 : &index, &rest, auto_number)) {
491 0 : goto error;
492 : }
493 :
494 984 : if (index == -1) {
495 : /* look up in kwargs */
496 888 : PyObject *key = SubString_new_object(&first);
497 888 : if (key == NULL)
498 0 : goto error;
499 888 : if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
500 0 : PyErr_SetObject(PyExc_KeyError, key);
501 0 : Py_DECREF(key);
502 0 : goto error;
503 : }
504 888 : Py_DECREF(key);
505 888 : Py_INCREF(obj);
506 : }
507 : else {
508 : /* look up in args */
509 96 : obj = PySequence_GetItem(args, index);
510 96 : if (obj == NULL)
511 0 : goto error;
512 : }
513 :
514 : /* iterate over the rest of the field_name */
515 1968 : while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
516 : &name)) == 2) {
517 : PyObject *tmp;
518 :
519 0 : if (is_attribute)
520 : /* getattr lookup "." */
521 0 : tmp = getattr(obj, &name);
522 : else
523 : /* getitem lookup "[]" */
524 0 : if (index == -1)
525 0 : tmp = getitem_str(obj, &name);
526 : else
527 0 : if (PySequence_Check(obj))
528 0 : tmp = getitem_sequence(obj, index);
529 : else
530 : /* not a sequence */
531 0 : tmp = getitem_idx(obj, index);
532 0 : if (tmp == NULL)
533 0 : goto error;
534 :
535 : /* assign to obj */
536 0 : Py_DECREF(obj);
537 0 : obj = tmp;
538 : }
539 : /* end of iterator, this is the non-error case */
540 984 : if (ok == 1)
541 984 : return obj;
542 : error:
543 0 : Py_XDECREF(obj);
544 0 : return NULL;
545 : }
546 :
547 : /************************************************************************/
548 : /***************** Field rendering functions **************************/
549 : /************************************************************************/
550 :
551 : /*
552 : render_field() is the main function in this section. It takes the
553 : field object and field specification string generated by
554 : get_field_and_spec, and renders the field into the output string.
555 :
556 : render_field calls fieldobj.__format__(format_spec) method, and
557 : appends to the output.
558 : */
559 : static int
560 984 : render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
561 : {
562 984 : int ok = 0;
563 984 : PyObject *result = NULL;
564 984 : PyObject *format_spec_object = NULL;
565 984 : PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
566 984 : STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
567 : format_spec->ptr : NULL;
568 1968 : Py_ssize_t format_spec_len = format_spec->ptr ?
569 984 : format_spec->end - format_spec->ptr : 0;
570 :
571 : /* If we know the type exactly, skip the lookup of __format__ and just
572 : call the formatter directly. */
573 : #if STRINGLIB_IS_UNICODE
574 0 : if (PyUnicode_CheckExact(fieldobj))
575 0 : formatter = _PyUnicode_FormatAdvanced;
576 : /* Unfortunately, there's a problem with checking for int, long,
577 : and float here. If we're being included as unicode, their
578 : formatters expect string format_spec args. For now, just skip
579 : this optimization for unicode. This could be fixed, but it's a
580 : hassle. */
581 : #else
582 984 : if (PyString_CheckExact(fieldobj))
583 606 : formatter = _PyBytes_FormatAdvanced;
584 378 : else if (PyInt_CheckExact(fieldobj))
585 378 : formatter =_PyInt_FormatAdvanced;
586 0 : else if (PyLong_CheckExact(fieldobj))
587 0 : formatter =_PyLong_FormatAdvanced;
588 0 : else if (PyFloat_CheckExact(fieldobj))
589 0 : formatter = _PyFloat_FormatAdvanced;
590 : #endif
591 :
592 984 : if (formatter) {
593 : /* we know exactly which formatter will be called when __format__ is
594 : looked up, so call it directly, instead. */
595 984 : result = formatter(fieldobj, format_spec_start, format_spec_len);
596 : }
597 : else {
598 : /* We need to create an object out of the pointers we have, because
599 : __format__ takes a string/unicode object for format_spec. */
600 0 : format_spec_object = STRINGLIB_NEW(format_spec_start,
601 : format_spec_len);
602 0 : if (format_spec_object == NULL)
603 0 : goto done;
604 :
605 0 : result = PyObject_Format(fieldobj, format_spec_object);
606 : }
607 984 : if (result == NULL)
608 0 : goto done;
609 :
610 : #if PY_VERSION_HEX >= 0x03000000
611 : assert(PyUnicode_Check(result));
612 : #else
613 : assert(PyString_Check(result) || PyUnicode_Check(result));
614 :
615 : /* Convert result to our type. We could be str, and result could
616 : be unicode */
617 : {
618 984 : PyObject *tmp = STRINGLIB_TOSTR(result);
619 984 : if (tmp == NULL)
620 0 : goto done;
621 984 : Py_DECREF(result);
622 984 : result = tmp;
623 : }
624 : #endif
625 :
626 1968 : ok = output_data(output,
627 984 : STRINGLIB_STR(result), STRINGLIB_LEN(result));
628 : done:
629 984 : Py_XDECREF(format_spec_object);
630 984 : Py_XDECREF(result);
631 984 : return ok;
632 : }
633 :
634 : static int
635 984 : parse_field(SubString *str, SubString *field_name, SubString *format_spec,
636 : STRINGLIB_CHAR *conversion)
637 : {
638 : /* Note this function works if the field name is zero length,
639 : which is good. Zero length field names are handled later, in
640 : field_name_split. */
641 :
642 984 : STRINGLIB_CHAR c = 0;
643 :
644 : /* initialize these, as they may be empty */
645 984 : *conversion = '\0';
646 984 : SubString_init(format_spec, NULL, 0);
647 :
648 : /* Search for the field name. it's terminated by the end of
649 : the string, or a ':' or '!' */
650 984 : field_name->ptr = str->ptr;
651 7896 : while (str->ptr < str->end) {
652 6360 : switch (c = *(str->ptr++)) {
653 : case ':':
654 : case '!':
655 432 : break;
656 : default:
657 5928 : continue;
658 : }
659 432 : break;
660 : }
661 :
662 984 : if (c == '!' || c == ':') {
663 : /* we have a format specifier and/or a conversion */
664 : /* don't include the last character */
665 432 : field_name->end = str->ptr-1;
666 :
667 : /* the format specifier is the rest of the string */
668 432 : format_spec->ptr = str->ptr;
669 432 : format_spec->end = str->end;
670 :
671 : /* see if there's a conversion specifier */
672 864 : if (c == '!') {
673 : /* there must be another character present */
674 54 : if (format_spec->ptr >= format_spec->end) {
675 0 : PyErr_SetString(PyExc_ValueError,
676 : "end of format while looking for conversion "
677 : "specifier");
678 0 : return 0;
679 : }
680 54 : *conversion = *(format_spec->ptr++);
681 :
682 : /* if there is another character, it must be a colon */
683 54 : if (format_spec->ptr < format_spec->end) {
684 0 : c = *(format_spec->ptr++);
685 0 : if (c != ':') {
686 0 : PyErr_SetString(PyExc_ValueError,
687 : "expected ':' after format specifier");
688 0 : return 0;
689 : }
690 : }
691 : }
692 : }
693 : else
694 : /* end of string, there's no format_spec or conversion */
695 552 : field_name->end = str->ptr;
696 :
697 984 : return 1;
698 : }
699 :
700 : /************************************************************************/
701 : /******* Output string allocation and escape-to-markup processing ******/
702 : /************************************************************************/
703 :
704 : /* MarkupIterator breaks the string into pieces of either literal
705 : text, or things inside {} that need to be marked up. it is
706 : designed to make it easy to wrap a Python iterator around it, for
707 : use with the Formatter class */
708 :
709 : typedef struct {
710 : SubString str;
711 : } MarkupIterator;
712 :
713 : static int
714 351 : MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
715 : {
716 351 : SubString_init(&self->str, ptr, len);
717 351 : return 1;
718 : }
719 :
720 : /* returns 0 on error, 1 on non-error termination, and 2 if it got a
721 : string (or something to be expanded) */
722 : static int
723 1590 : MarkupIterator_next(MarkupIterator *self, SubString *literal,
724 : int *field_present, SubString *field_name,
725 : SubString *format_spec, STRINGLIB_CHAR *conversion,
726 : int *format_spec_needs_expanding)
727 : {
728 : int at_end;
729 1590 : STRINGLIB_CHAR c = 0;
730 : STRINGLIB_CHAR *start;
731 : int count;
732 : Py_ssize_t len;
733 1590 : int markup_follows = 0;
734 :
735 : /* initialize all of the output variables */
736 1590 : SubString_init(literal, NULL, 0);
737 1590 : SubString_init(field_name, NULL, 0);
738 1590 : SubString_init(format_spec, NULL, 0);
739 1590 : *conversion = '\0';
740 1590 : *format_spec_needs_expanding = 0;
741 1590 : *field_present = 0;
742 :
743 : /* No more input, end of iterator. This is the normal exit
744 : path. */
745 1590 : if (self->str.ptr >= self->str.end)
746 351 : return 1;
747 :
748 1239 : start = self->str.ptr;
749 :
750 : /* First read any literal text. Read until the end of string, an
751 : escaped '{' or '}', or an unescaped '{'. In order to never
752 : allocate memory and so I can just pass pointers around, if
753 : there's an escaped '{' or '}' then we'll return the literal
754 : including the brace, but no format object. The next time
755 : through, we'll return the rest of the literal, skipping past
756 : the second consecutive brace. */
757 43728 : while (self->str.ptr < self->str.end) {
758 42234 : switch (c = *(self->str.ptr++)) {
759 : case '{':
760 : case '}':
761 984 : markup_follows = 1;
762 984 : break;
763 : default:
764 41250 : continue;
765 : }
766 984 : break;
767 : }
768 :
769 1239 : at_end = self->str.ptr >= self->str.end;
770 1239 : len = self->str.ptr - start;
771 :
772 1239 : if ((c == '}') && (at_end || (c != *self->str.ptr))) {
773 0 : PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
774 : "in format string");
775 0 : return 0;
776 : }
777 1239 : if (at_end && c == '{') {
778 0 : PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
779 : "in format string");
780 0 : return 0;
781 : }
782 1239 : if (!at_end) {
783 984 : if (c == *self->str.ptr) {
784 : /* escaped } or {, skip it in the input. there is no
785 : markup object following us, just this literal text */
786 0 : self->str.ptr++;
787 0 : markup_follows = 0;
788 : }
789 : else
790 984 : len--;
791 : }
792 :
793 : /* record the literal text */
794 1239 : literal->ptr = start;
795 1239 : literal->end = start + len;
796 :
797 1239 : if (!markup_follows)
798 255 : return 2;
799 :
800 : /* this is markup, find the end of the string by counting nested
801 : braces. note that this prohibits escaped braces, so that
802 : format_specs cannot have braces in them. */
803 984 : *field_present = 1;
804 984 : count = 1;
805 :
806 984 : start = self->str.ptr;
807 :
808 : /* we know we can't have a zero length string, so don't worry
809 : about that case */
810 8952 : while (self->str.ptr < self->str.end) {
811 7968 : switch (c = *(self->str.ptr++)) {
812 : case '{':
813 : /* the format spec needs to be recursively expanded.
814 : this is an optimization, and not strictly needed */
815 0 : *format_spec_needs_expanding = 1;
816 0 : count++;
817 0 : break;
818 : case '}':
819 984 : count--;
820 984 : if (count <= 0) {
821 : /* we're done. parse and get out */
822 : SubString s;
823 :
824 984 : SubString_init(&s, start, self->str.ptr - 1 - start);
825 984 : if (parse_field(&s, field_name, format_spec, conversion) == 0)
826 0 : return 0;
827 :
828 : /* success */
829 984 : return 2;
830 : }
831 0 : break;
832 : }
833 : }
834 :
835 : /* end of string while searching for matching '}' */
836 0 : PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
837 0 : return 0;
838 : }
839 :
840 :
841 : /* do the !r or !s conversion on obj */
842 : static PyObject *
843 54 : do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
844 : {
845 : /* XXX in pre-3.0, do we need to convert this to unicode, since it
846 : might have returned a string? */
847 54 : switch (conversion) {
848 : case 'r':
849 54 : return PyObject_Repr(obj);
850 : case 's':
851 0 : return STRINGLIB_TOSTR(obj);
852 : default:
853 0 : if (conversion > 32 && conversion < 127) {
854 : /* It's the ASCII subrange; casting to char is safe
855 : (assuming the execution character set is an ASCII
856 : superset). */
857 0 : PyErr_Format(PyExc_ValueError,
858 : "Unknown conversion specifier %c",
859 0 : (char)conversion);
860 : } else
861 0 : PyErr_Format(PyExc_ValueError,
862 : "Unknown conversion specifier \\x%x",
863 : (unsigned int)conversion);
864 0 : return NULL;
865 : }
866 : }
867 :
868 : /* given:
869 :
870 : {field_name!conversion:format_spec}
871 :
872 : compute the result and write it to output.
873 : format_spec_needs_expanding is an optimization. if it's false,
874 : just output the string directly, otherwise recursively expand the
875 : format_spec string.
876 :
877 : field_name is allowed to be zero length, in which case we
878 : are doing auto field numbering.
879 : */
880 :
881 : static int
882 984 : output_markup(SubString *field_name, SubString *format_spec,
883 : int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
884 : OutputString *output, PyObject *args, PyObject *kwargs,
885 : int recursion_depth, AutoNumber *auto_number)
886 : {
887 984 : PyObject *tmp = NULL;
888 984 : PyObject *fieldobj = NULL;
889 : SubString expanded_format_spec;
890 : SubString *actual_format_spec;
891 984 : int result = 0;
892 :
893 : /* convert field_name to an object */
894 984 : fieldobj = get_field_object(field_name, args, kwargs, auto_number);
895 984 : if (fieldobj == NULL)
896 0 : goto done;
897 :
898 984 : if (conversion != '\0') {
899 54 : tmp = do_conversion(fieldobj, conversion);
900 54 : if (tmp == NULL)
901 0 : goto done;
902 :
903 : /* do the assignment, transferring ownership: fieldobj = tmp */
904 54 : Py_DECREF(fieldobj);
905 54 : fieldobj = tmp;
906 54 : tmp = NULL;
907 : }
908 :
909 : /* if needed, recurively compute the format_spec */
910 984 : if (format_spec_needs_expanding) {
911 0 : tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
912 : auto_number);
913 0 : if (tmp == NULL)
914 0 : goto done;
915 :
916 : /* note that in the case we're expanding the format string,
917 : tmp must be kept around until after the call to
918 : render_field. */
919 0 : SubString_init(&expanded_format_spec,
920 0 : STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
921 0 : actual_format_spec = &expanded_format_spec;
922 : }
923 : else
924 984 : actual_format_spec = format_spec;
925 :
926 984 : if (render_field(fieldobj, actual_format_spec, output) == 0)
927 0 : goto done;
928 :
929 984 : result = 1;
930 :
931 : done:
932 984 : Py_XDECREF(fieldobj);
933 984 : Py_XDECREF(tmp);
934 :
935 984 : return result;
936 : }
937 :
938 : /*
939 : do_markup is the top-level loop for the format() method. It
940 : searches through the format string for escapes to markup codes, and
941 : calls other functions to move non-markup text to the output,
942 : and to perform the markup to the output.
943 : */
944 : static int
945 351 : do_markup(SubString *input, PyObject *args, PyObject *kwargs,
946 : OutputString *output, int recursion_depth, AutoNumber *auto_number)
947 : {
948 : MarkupIterator iter;
949 : int format_spec_needs_expanding;
950 : int result;
951 : int field_present;
952 : SubString literal;
953 : SubString field_name;
954 : SubString format_spec;
955 : STRINGLIB_CHAR conversion;
956 :
957 351 : MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
958 351 : while ((result = MarkupIterator_next(&iter, &literal, &field_present,
959 : &field_name, &format_spec,
960 : &conversion,
961 : &format_spec_needs_expanding)) == 2) {
962 1239 : if (!output_data(output, literal.ptr, literal.end - literal.ptr))
963 0 : return 0;
964 1239 : if (field_present)
965 984 : if (!output_markup(&field_name, &format_spec,
966 : format_spec_needs_expanding, conversion, output,
967 : args, kwargs, recursion_depth, auto_number))
968 0 : return 0;
969 : }
970 351 : return result;
971 : }
972 :
973 :
974 : /*
975 : build_string allocates the output string and then
976 : calls do_markup to do the heavy lifting.
977 : */
978 : static PyObject *
979 351 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
980 : int recursion_depth, AutoNumber *auto_number)
981 : {
982 : OutputString output;
983 351 : PyObject *result = NULL;
984 : Py_ssize_t count;
985 :
986 351 : output.obj = NULL; /* needed so cleanup code always works */
987 :
988 : /* check the recursion level */
989 351 : if (recursion_depth <= 0) {
990 0 : PyErr_SetString(PyExc_ValueError,
991 : "Max string recursion exceeded");
992 0 : goto done;
993 : }
994 :
995 : /* initial size is the length of the format string, plus the size
996 : increment. seems like a reasonable default */
997 351 : if (!output_initialize(&output,
998 351 : input->end - input->ptr +
999 : INITIAL_SIZE_INCREMENT))
1000 0 : goto done;
1001 :
1002 351 : if (!do_markup(input, args, kwargs, &output, recursion_depth,
1003 : auto_number)) {
1004 0 : goto done;
1005 : }
1006 :
1007 351 : count = output.ptr - STRINGLIB_STR(output.obj);
1008 351 : if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1009 0 : goto done;
1010 : }
1011 :
1012 : /* transfer ownership to result */
1013 351 : result = output.obj;
1014 351 : output.obj = NULL;
1015 :
1016 : done:
1017 351 : Py_XDECREF(output.obj);
1018 351 : return result;
1019 : }
1020 :
1021 : /************************************************************************/
1022 : /*********** main routine ***********************************************/
1023 : /************************************************************************/
1024 :
1025 : /* this is the main entry point */
1026 : static PyObject *
1027 351 : do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1028 : {
1029 : SubString input;
1030 :
1031 : /* PEP 3101 says only 2 levels, so that
1032 : "{0:{1}}".format('abc', 's') # works
1033 : "{0:{1:{2}}}".format('abc', 's', '') # fails
1034 : */
1035 351 : int recursion_depth = 2;
1036 :
1037 : AutoNumber auto_number;
1038 :
1039 351 : AutoNumber_Init(&auto_number);
1040 351 : SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1041 351 : return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1042 : }
1043 :
1044 :
1045 :
1046 : /************************************************************************/
1047 : /*********** formatteriterator ******************************************/
1048 : /************************************************************************/
1049 :
1050 : /* This is used to implement string.Formatter.vparse(). It exists so
1051 : Formatter can share code with the built in unicode.format() method.
1052 : It's really just a wrapper around MarkupIterator that is callable
1053 : from Python. */
1054 :
1055 : typedef struct {
1056 : PyObject_HEAD
1057 :
1058 : STRINGLIB_OBJECT *str;
1059 :
1060 : MarkupIterator it_markup;
1061 : } formatteriterobject;
1062 :
1063 : static void
1064 0 : formatteriter_dealloc(formatteriterobject *it)
1065 : {
1066 0 : Py_XDECREF(it->str);
1067 0 : PyObject_FREE(it);
1068 0 : }
1069 :
1070 : /* returns a tuple:
1071 : (literal, field_name, format_spec, conversion)
1072 :
1073 : literal is any literal text to output. might be zero length
1074 : field_name is the string before the ':'. might be None
1075 : format_spec is the string after the ':'. mibht be None
1076 : conversion is either None, or the string after the '!'
1077 : */
1078 : static PyObject *
1079 0 : formatteriter_next(formatteriterobject *it)
1080 : {
1081 : SubString literal;
1082 : SubString field_name;
1083 : SubString format_spec;
1084 : STRINGLIB_CHAR conversion;
1085 : int format_spec_needs_expanding;
1086 : int field_present;
1087 0 : int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1088 : &field_name, &format_spec, &conversion,
1089 : &format_spec_needs_expanding);
1090 :
1091 : /* all of the SubString objects point into it->str, so no
1092 : memory management needs to be done on them */
1093 : assert(0 <= result && result <= 2);
1094 0 : if (result == 0 || result == 1)
1095 : /* if 0, error has already been set, if 1, iterator is empty */
1096 0 : return NULL;
1097 : else {
1098 0 : PyObject *literal_str = NULL;
1099 0 : PyObject *field_name_str = NULL;
1100 0 : PyObject *format_spec_str = NULL;
1101 0 : PyObject *conversion_str = NULL;
1102 0 : PyObject *tuple = NULL;
1103 :
1104 0 : literal_str = SubString_new_object(&literal);
1105 0 : if (literal_str == NULL)
1106 0 : goto done;
1107 :
1108 0 : field_name_str = SubString_new_object(&field_name);
1109 0 : if (field_name_str == NULL)
1110 0 : goto done;
1111 :
1112 : /* if field_name is non-zero length, return a string for
1113 : format_spec (even if zero length), else return None */
1114 0 : format_spec_str = (field_present ?
1115 0 : SubString_new_object_or_empty :
1116 : SubString_new_object)(&format_spec);
1117 0 : if (format_spec_str == NULL)
1118 0 : goto done;
1119 :
1120 : /* if the conversion is not specified, return a None,
1121 : otherwise create a one length string with the conversion
1122 : character */
1123 0 : if (conversion == '\0') {
1124 0 : conversion_str = Py_None;
1125 0 : Py_INCREF(conversion_str);
1126 : }
1127 : else
1128 0 : conversion_str = STRINGLIB_NEW(&conversion, 1);
1129 0 : if (conversion_str == NULL)
1130 0 : goto done;
1131 :
1132 0 : tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1133 : conversion_str);
1134 : done:
1135 0 : Py_XDECREF(literal_str);
1136 0 : Py_XDECREF(field_name_str);
1137 0 : Py_XDECREF(format_spec_str);
1138 0 : Py_XDECREF(conversion_str);
1139 0 : return tuple;
1140 : }
1141 : }
1142 :
1143 : static PyMethodDef formatteriter_methods[] = {
1144 : {NULL, NULL} /* sentinel */
1145 : };
1146 :
1147 : static PyTypeObject PyFormatterIter_Type = {
1148 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1149 : "formatteriterator", /* tp_name */
1150 : sizeof(formatteriterobject), /* tp_basicsize */
1151 : 0, /* tp_itemsize */
1152 : /* methods */
1153 : (destructor)formatteriter_dealloc, /* tp_dealloc */
1154 : 0, /* tp_print */
1155 : 0, /* tp_getattr */
1156 : 0, /* tp_setattr */
1157 : 0, /* tp_compare */
1158 : 0, /* tp_repr */
1159 : 0, /* tp_as_number */
1160 : 0, /* tp_as_sequence */
1161 : 0, /* tp_as_mapping */
1162 : 0, /* tp_hash */
1163 : 0, /* tp_call */
1164 : 0, /* tp_str */
1165 : PyObject_GenericGetAttr, /* tp_getattro */
1166 : 0, /* tp_setattro */
1167 : 0, /* tp_as_buffer */
1168 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1169 : 0, /* tp_doc */
1170 : 0, /* tp_traverse */
1171 : 0, /* tp_clear */
1172 : 0, /* tp_richcompare */
1173 : 0, /* tp_weaklistoffset */
1174 : PyObject_SelfIter, /* tp_iter */
1175 : (iternextfunc)formatteriter_next, /* tp_iternext */
1176 : formatteriter_methods, /* tp_methods */
1177 : 0,
1178 : };
1179 :
1180 : /* unicode_formatter_parser is used to implement
1181 : string.Formatter.vformat. it parses a string and returns tuples
1182 : describing the parsed elements. It's a wrapper around
1183 : stringlib/string_format.h's MarkupIterator */
1184 : static PyObject *
1185 0 : formatter_parser(STRINGLIB_OBJECT *self)
1186 : {
1187 : formatteriterobject *it;
1188 :
1189 0 : it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1190 0 : if (it == NULL)
1191 0 : return NULL;
1192 :
1193 : /* take ownership, give the object to the iterator */
1194 0 : Py_INCREF(self);
1195 0 : it->str = self;
1196 :
1197 : /* initialize the contained MarkupIterator */
1198 0 : MarkupIterator_init(&it->it_markup,
1199 0 : STRINGLIB_STR(self),
1200 : STRINGLIB_LEN(self));
1201 :
1202 0 : return (PyObject *)it;
1203 : }
1204 :
1205 :
1206 : /************************************************************************/
1207 : /*********** fieldnameiterator ******************************************/
1208 : /************************************************************************/
1209 :
1210 :
1211 : /* This is used to implement string.Formatter.vparse(). It parses the
1212 : field name into attribute and item values. It's a Python-callable
1213 : wrapper around FieldNameIterator */
1214 :
1215 : typedef struct {
1216 : PyObject_HEAD
1217 :
1218 : STRINGLIB_OBJECT *str;
1219 :
1220 : FieldNameIterator it_field;
1221 : } fieldnameiterobject;
1222 :
1223 : static void
1224 0 : fieldnameiter_dealloc(fieldnameiterobject *it)
1225 : {
1226 0 : Py_XDECREF(it->str);
1227 0 : PyObject_FREE(it);
1228 0 : }
1229 :
1230 : /* returns a tuple:
1231 : (is_attr, value)
1232 : is_attr is true if we used attribute syntax (e.g., '.foo')
1233 : false if we used index syntax (e.g., '[foo]')
1234 : value is an integer or string
1235 : */
1236 : static PyObject *
1237 0 : fieldnameiter_next(fieldnameiterobject *it)
1238 : {
1239 : int result;
1240 : int is_attr;
1241 : Py_ssize_t idx;
1242 : SubString name;
1243 :
1244 0 : result = FieldNameIterator_next(&it->it_field, &is_attr,
1245 : &idx, &name);
1246 0 : if (result == 0 || result == 1)
1247 : /* if 0, error has already been set, if 1, iterator is empty */
1248 0 : return NULL;
1249 : else {
1250 0 : PyObject* result = NULL;
1251 0 : PyObject* is_attr_obj = NULL;
1252 0 : PyObject* obj = NULL;
1253 :
1254 0 : is_attr_obj = PyBool_FromLong(is_attr);
1255 0 : if (is_attr_obj == NULL)
1256 0 : goto done;
1257 :
1258 : /* either an integer or a string */
1259 0 : if (idx != -1)
1260 0 : obj = PyLong_FromSsize_t(idx);
1261 : else
1262 0 : obj = SubString_new_object(&name);
1263 0 : if (obj == NULL)
1264 0 : goto done;
1265 :
1266 : /* return a tuple of values */
1267 0 : result = PyTuple_Pack(2, is_attr_obj, obj);
1268 :
1269 : done:
1270 0 : Py_XDECREF(is_attr_obj);
1271 0 : Py_XDECREF(obj);
1272 0 : return result;
1273 : }
1274 : }
1275 :
1276 : static PyMethodDef fieldnameiter_methods[] = {
1277 : {NULL, NULL} /* sentinel */
1278 : };
1279 :
1280 : static PyTypeObject PyFieldNameIter_Type = {
1281 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1282 : "fieldnameiterator", /* tp_name */
1283 : sizeof(fieldnameiterobject), /* tp_basicsize */
1284 : 0, /* tp_itemsize */
1285 : /* methods */
1286 : (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1287 : 0, /* tp_print */
1288 : 0, /* tp_getattr */
1289 : 0, /* tp_setattr */
1290 : 0, /* tp_compare */
1291 : 0, /* tp_repr */
1292 : 0, /* tp_as_number */
1293 : 0, /* tp_as_sequence */
1294 : 0, /* tp_as_mapping */
1295 : 0, /* tp_hash */
1296 : 0, /* tp_call */
1297 : 0, /* tp_str */
1298 : PyObject_GenericGetAttr, /* tp_getattro */
1299 : 0, /* tp_setattro */
1300 : 0, /* tp_as_buffer */
1301 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1302 : 0, /* tp_doc */
1303 : 0, /* tp_traverse */
1304 : 0, /* tp_clear */
1305 : 0, /* tp_richcompare */
1306 : 0, /* tp_weaklistoffset */
1307 : PyObject_SelfIter, /* tp_iter */
1308 : (iternextfunc)fieldnameiter_next, /* tp_iternext */
1309 : fieldnameiter_methods, /* tp_methods */
1310 : 0};
1311 :
1312 : /* unicode_formatter_field_name_split is used to implement
1313 : string.Formatter.vformat. it takes a PEP 3101 "field name", and
1314 : returns a tuple of (first, rest): "first", the part before the
1315 : first '.' or '['; and "rest", an iterator for the rest of the field
1316 : name. it's a wrapper around stringlib/string_format.h's
1317 : field_name_split. The iterator it returns is a
1318 : FieldNameIterator */
1319 : static PyObject *
1320 0 : formatter_field_name_split(STRINGLIB_OBJECT *self)
1321 : {
1322 : SubString first;
1323 : Py_ssize_t first_idx;
1324 : fieldnameiterobject *it;
1325 :
1326 0 : PyObject *first_obj = NULL;
1327 0 : PyObject *result = NULL;
1328 :
1329 0 : it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1330 0 : if (it == NULL)
1331 0 : return NULL;
1332 :
1333 : /* take ownership, give the object to the iterator. this is
1334 : just to keep the field_name alive */
1335 0 : Py_INCREF(self);
1336 0 : it->str = self;
1337 :
1338 : /* Pass in auto_number = NULL. We'll return an empty string for
1339 : first_obj in that case. */
1340 0 : if (!field_name_split(STRINGLIB_STR(self),
1341 : STRINGLIB_LEN(self),
1342 : &first, &first_idx, &it->it_field, NULL))
1343 0 : goto done;
1344 :
1345 : /* first becomes an integer, if possible; else a string */
1346 0 : if (first_idx != -1)
1347 0 : first_obj = PyLong_FromSsize_t(first_idx);
1348 : else
1349 : /* convert "first" into a string object */
1350 0 : first_obj = SubString_new_object(&first);
1351 0 : if (first_obj == NULL)
1352 0 : goto done;
1353 :
1354 : /* return a tuple of values */
1355 0 : result = PyTuple_Pack(2, first_obj, it);
1356 :
1357 : done:
1358 0 : Py_XDECREF(it);
1359 0 : Py_XDECREF(first_obj);
1360 0 : return result;
1361 : }
|