Line data Source code
1 : /* String (str/bytes) object implementation */
2 :
3 : #define PY_SSIZE_T_CLEAN
4 :
5 : #include "Python.h"
6 : #include <ctype.h>
7 : #include <stddef.h>
8 :
9 : #ifdef COUNT_ALLOCS
10 : Py_ssize_t null_strings, one_strings;
11 : #endif
12 :
13 : static PyStringObject *characters[UCHAR_MAX + 1];
14 : static PyStringObject *nullstring;
15 :
16 : /* This dictionary holds all interned strings. Note that references to
17 : strings in this dictionary are *not* counted in the string's ob_refcnt.
18 : When the interned string reaches a refcnt of 0 the string deallocation
19 : function will delete the reference from this dictionary.
20 :
21 : Another way to look at this is that to say that the actual reference
22 : count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 : */
24 : static PyObject *interned;
25 :
26 : /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 : for a string of length n should request PyStringObject_SIZE + n bytes.
28 :
29 : Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 : 3 bytes per string allocation on a typical system.
31 : */
32 : #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33 :
34 : /*
35 : For PyString_FromString(), the parameter `str' points to a null-terminated
36 : string containing exactly `size' bytes.
37 :
38 : For PyString_FromStringAndSize(), the parameter `str' is
39 : either NULL or else points to a string containing at least `size' bytes.
40 : For PyString_FromStringAndSize(), the string in the `str' parameter does
41 : not have to be null-terminated. (Therefore it is safe to construct a
42 : substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 : If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 : bytes (setting the last byte to the null terminating character) and you can
45 : fill in the data yourself. If `str' is non-NULL then the resulting
46 : PyString object must be treated as immutable and you must not fill in nor
47 : alter the data yourself, since the strings may be shared.
48 :
49 : The PyObject member `op->ob_size', which denotes the number of "extra
50 : items" in a variable-size object, will contain the number of bytes
51 : allocated for string data, not counting the null terminating character.
52 : It is therefore equal to the `size' parameter (for
53 : PyString_FromStringAndSize()) or the length of the string in the `str'
54 : parameter (for PyString_FromString()).
55 : */
56 : PyObject *
57 89238 : PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 : {
59 : register PyStringObject *op;
60 89238 : if (size < 0) {
61 0 : PyErr_SetString(PyExc_SystemError,
62 : "Negative size passed to PyString_FromStringAndSize");
63 0 : return NULL;
64 : }
65 89238 : if (size == 0 && (op = nullstring) != NULL) {
66 : #ifdef COUNT_ALLOCS
67 : null_strings++;
68 : #endif
69 1453 : Py_INCREF(op);
70 1453 : return (PyObject *)op;
71 : }
72 93821 : if (size == 1 && str != NULL &&
73 6036 : (op = characters[*str & UCHAR_MAX]) != NULL)
74 : {
75 : #ifdef COUNT_ALLOCS
76 : one_strings++;
77 : #endif
78 5298 : Py_INCREF(op);
79 5298 : return (PyObject *)op;
80 : }
81 :
82 82487 : if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 0 : PyErr_SetString(PyExc_OverflowError, "string is too large");
84 0 : return NULL;
85 : }
86 :
87 : /* Inline PyObject_NewVar */
88 82487 : op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 82487 : if (op == NULL)
90 0 : return PyErr_NoMemory();
91 82487 : (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92 82487 : op->ob_shash = -1;
93 82487 : op->ob_sstate = SSTATE_NOT_INTERNED;
94 82487 : if (str != NULL)
95 11495 : Py_MEMCPY(op->ob_sval, str, size);
96 82487 : op->ob_sval[size] = '\0';
97 : /* share short strings */
98 82487 : if (size == 0) {
99 3 : PyObject *t = (PyObject *)op;
100 3 : PyString_InternInPlace(&t);
101 3 : op = (PyStringObject *)t;
102 3 : nullstring = op;
103 3 : Py_INCREF(op);
104 82484 : } else if (size == 1 && str != NULL) {
105 738 : PyObject *t = (PyObject *)op;
106 738 : PyString_InternInPlace(&t);
107 738 : op = (PyStringObject *)t;
108 738 : characters[*str & UCHAR_MAX] = op;
109 738 : Py_INCREF(op);
110 : }
111 82487 : return (PyObject *) op;
112 : }
113 :
114 : PyObject *
115 75725 : PyString_FromString(const char *str)
116 : {
117 : register size_t size;
118 : register PyStringObject *op;
119 :
120 : assert(str != NULL);
121 75725 : size = strlen(str);
122 75725 : if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 0 : PyErr_SetString(PyExc_OverflowError,
124 : "string is too long for a Python string");
125 0 : return NULL;
126 : }
127 75725 : if (size == 0 && (op = nullstring) != NULL) {
128 : #ifdef COUNT_ALLOCS
129 : null_strings++;
130 : #endif
131 984 : Py_INCREF(op);
132 984 : return (PyObject *)op;
133 : }
134 74741 : if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 : #ifdef COUNT_ALLOCS
136 : one_strings++;
137 : #endif
138 1500 : Py_INCREF(op);
139 1500 : return (PyObject *)op;
140 : }
141 :
142 : /* Inline PyObject_NewVar */
143 73241 : op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 73241 : if (op == NULL)
145 0 : return PyErr_NoMemory();
146 73241 : (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147 73241 : op->ob_shash = -1;
148 73241 : op->ob_sstate = SSTATE_NOT_INTERNED;
149 73241 : Py_MEMCPY(op->ob_sval, str, size+1);
150 : /* share short strings */
151 73241 : if (size == 0) {
152 0 : PyObject *t = (PyObject *)op;
153 0 : PyString_InternInPlace(&t);
154 0 : op = (PyStringObject *)t;
155 0 : nullstring = op;
156 0 : Py_INCREF(op);
157 73241 : } else if (size == 1) {
158 30 : PyObject *t = (PyObject *)op;
159 30 : PyString_InternInPlace(&t);
160 30 : op = (PyStringObject *)t;
161 30 : characters[*str & UCHAR_MAX] = op;
162 30 : Py_INCREF(op);
163 : }
164 73241 : return (PyObject *) op;
165 : }
166 :
167 : PyObject *
168 5657 : PyString_FromFormatV(const char *format, va_list vargs)
169 : {
170 : va_list count;
171 5657 : Py_ssize_t n = 0;
172 : const char* f;
173 : char *s;
174 : PyObject* string;
175 :
176 : #ifdef VA_LIST_IS_ARRAY
177 5657 : Py_MEMCPY(count, vargs, sizeof(va_list));
178 : #else
179 : #ifdef __va_copy
180 : __va_copy(count, vargs);
181 : #else
182 : count = vargs;
183 : #endif
184 : #endif
185 : /* step 1: figure out how large a buffer we need */
186 178281 : for (f = format; *f; f++) {
187 172624 : if (*f == '%') {
188 : #ifdef HAVE_LONG_LONG
189 11009 : int longlongflag = 0;
190 : #endif
191 11009 : const char* p = f;
192 11009 : while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 : ;
194 :
195 : /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 : * they don't affect the amount of space we reserve.
197 : */
198 11009 : if (*f == 'l') {
199 0 : if (f[1] == 'd' || f[1] == 'u') {
200 0 : ++f;
201 : }
202 : #ifdef HAVE_LONG_LONG
203 0 : else if (f[1] == 'l' &&
204 0 : (f[2] == 'd' || f[2] == 'u')) {
205 0 : longlongflag = 1;
206 0 : f += 2;
207 : }
208 : #endif
209 : }
210 11009 : else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 0 : ++f;
212 : }
213 :
214 11009 : switch (*f) {
215 : case 'c':
216 0 : (void)va_arg(count, int);
217 : /* fall through... */
218 : case '%':
219 0 : n++;
220 0 : break;
221 : case 'd': case 'u': case 'i': case 'x':
222 11 : (void) va_arg(count, int);
223 : #ifdef HAVE_LONG_LONG
224 : /* Need at most
225 : ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 : plus 1 for the sign. 53/22 is an upper
227 : bound for log10(256). */
228 11 : if (longlongflag)
229 0 : n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 : else
231 : #endif
232 : /* 20 bytes is enough to hold a 64-bit
233 : integer. Decimal takes the most
234 : space. This isn't enough for
235 : octal. */
236 11 : n += 20;
237 :
238 11 : break;
239 : case 's':
240 10998 : s = va_arg(count, char*);
241 10998 : n += strlen(s);
242 10998 : break;
243 : case 'p':
244 0 : (void) va_arg(count, int);
245 : /* maximum 64-bit pointer representation:
246 : * 0xffffffffffffffff
247 : * so 19 characters is enough.
248 : * XXX I count 18 -- what's the extra for?
249 : */
250 0 : n += 19;
251 0 : break;
252 : default:
253 : /* if we stumble upon an unknown
254 : formatting code, copy the rest of
255 : the format string to the output
256 : string. (we cannot just skip the
257 : code, since there's no way to know
258 : what's in the argument list) */
259 0 : n += strlen(p);
260 0 : goto expand;
261 : }
262 : } else
263 161615 : n++;
264 : }
265 : expand:
266 : /* step 2: fill the buffer */
267 : /* Since we've analyzed how much space we need for the worst case,
268 : use sprintf directly instead of the slower PyOS_snprintf. */
269 5657 : string = PyString_FromStringAndSize(NULL, n);
270 5657 : if (!string)
271 0 : return NULL;
272 :
273 5657 : s = PyString_AsString(string);
274 :
275 178281 : for (f = format; *f; f++) {
276 172624 : if (*f == '%') {
277 11009 : const char* p = f++;
278 : Py_ssize_t i;
279 11009 : int longflag = 0;
280 : #ifdef HAVE_LONG_LONG
281 11009 : int longlongflag = 0;
282 : #endif
283 11009 : int size_tflag = 0;
284 : /* parse the width.precision part (we're only
285 : interested in the precision value, if any) */
286 11009 : n = 0;
287 22018 : while (isdigit(Py_CHARMASK(*f)))
288 0 : n = (n*10) + *f++ - '0';
289 11009 : if (*f == '.') {
290 10995 : f++;
291 10995 : n = 0;
292 49626 : while (isdigit(Py_CHARMASK(*f)))
293 27636 : n = (n*10) + *f++ - '0';
294 : }
295 22018 : while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 0 : f++;
297 : /* Handle %ld, %lu, %lld and %llu. */
298 11009 : if (*f == 'l') {
299 0 : if (f[1] == 'd' || f[1] == 'u') {
300 0 : longflag = 1;
301 0 : ++f;
302 : }
303 : #ifdef HAVE_LONG_LONG
304 0 : else if (f[1] == 'l' &&
305 0 : (f[2] == 'd' || f[2] == 'u')) {
306 0 : longlongflag = 1;
307 0 : f += 2;
308 : }
309 : #endif
310 : }
311 : /* handle the size_t flag. */
312 11009 : else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 0 : size_tflag = 1;
314 0 : ++f;
315 : }
316 :
317 11009 : switch (*f) {
318 : case 'c':
319 0 : *s++ = va_arg(vargs, int);
320 0 : break;
321 : case 'd':
322 11 : if (longflag)
323 0 : sprintf(s, "%ld", va_arg(vargs, long));
324 : #ifdef HAVE_LONG_LONG
325 11 : else if (longlongflag)
326 0 : sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 : va_arg(vargs, PY_LONG_LONG));
328 : #endif
329 11 : else if (size_tflag)
330 0 : sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 : va_arg(vargs, Py_ssize_t));
332 : else
333 11 : sprintf(s, "%d", va_arg(vargs, int));
334 11 : s += strlen(s);
335 11 : break;
336 : case 'u':
337 0 : if (longflag)
338 0 : sprintf(s, "%lu",
339 : va_arg(vargs, unsigned long));
340 : #ifdef HAVE_LONG_LONG
341 0 : else if (longlongflag)
342 0 : sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 : va_arg(vargs, PY_LONG_LONG));
344 : #endif
345 0 : else if (size_tflag)
346 0 : sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 : va_arg(vargs, size_t));
348 : else
349 0 : sprintf(s, "%u",
350 : va_arg(vargs, unsigned int));
351 0 : s += strlen(s);
352 0 : break;
353 : case 'i':
354 0 : sprintf(s, "%i", va_arg(vargs, int));
355 0 : s += strlen(s);
356 0 : break;
357 : case 'x':
358 0 : sprintf(s, "%x", va_arg(vargs, int));
359 0 : s += strlen(s);
360 0 : break;
361 : case 's':
362 10998 : p = va_arg(vargs, char*);
363 10998 : i = strlen(p);
364 10998 : if (n > 0 && i > n)
365 0 : i = n;
366 10998 : Py_MEMCPY(s, p, i);
367 10998 : s += i;
368 10998 : break;
369 : case 'p':
370 0 : sprintf(s, "%p", va_arg(vargs, void*));
371 : /* %p is ill-defined: ensure leading 0x. */
372 0 : if (s[1] == 'X')
373 0 : s[1] = 'x';
374 0 : else if (s[1] != 'x') {
375 0 : memmove(s+2, s, strlen(s)+1);
376 0 : s[0] = '0';
377 0 : s[1] = 'x';
378 : }
379 0 : s += strlen(s);
380 0 : break;
381 : case '%':
382 0 : *s++ = '%';
383 0 : break;
384 : default:
385 0 : strcpy(s, p);
386 0 : s += strlen(s);
387 0 : goto end;
388 : }
389 : } else
390 161615 : *s++ = *f;
391 : }
392 :
393 : end:
394 5657 : if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 0 : return NULL;
396 5657 : return string;
397 : }
398 :
399 : PyObject *
400 11 : PyString_FromFormat(const char *format, ...)
401 : {
402 : PyObject* ret;
403 : va_list vargs;
404 :
405 : #ifdef HAVE_STDARG_PROTOTYPES
406 11 : va_start(vargs, format);
407 : #else
408 : va_start(vargs);
409 : #endif
410 11 : ret = PyString_FromFormatV(format, vargs);
411 11 : va_end(vargs);
412 11 : return ret;
413 : }
414 :
415 :
416 0 : PyObject *PyString_Decode(const char *s,
417 : Py_ssize_t size,
418 : const char *encoding,
419 : const char *errors)
420 : {
421 : PyObject *v, *str;
422 :
423 0 : str = PyString_FromStringAndSize(s, size);
424 0 : if (str == NULL)
425 0 : return NULL;
426 0 : v = PyString_AsDecodedString(str, encoding, errors);
427 0 : Py_DECREF(str);
428 0 : return v;
429 : }
430 :
431 0 : PyObject *PyString_AsDecodedObject(PyObject *str,
432 : const char *encoding,
433 : const char *errors)
434 : {
435 : PyObject *v;
436 :
437 0 : if (!PyString_Check(str)) {
438 0 : PyErr_BadArgument();
439 0 : goto onError;
440 : }
441 :
442 0 : if (encoding == NULL) {
443 : #ifdef Py_USING_UNICODE
444 0 : encoding = PyUnicode_GetDefaultEncoding();
445 : #else
446 : PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 : goto onError;
448 : #endif
449 : }
450 :
451 : /* Decode via the codec registry */
452 0 : v = _PyCodec_DecodeText(str, encoding, errors);
453 0 : if (v == NULL)
454 0 : goto onError;
455 :
456 0 : return v;
457 :
458 : onError:
459 0 : return NULL;
460 : }
461 :
462 0 : PyObject *PyString_AsDecodedString(PyObject *str,
463 : const char *encoding,
464 : const char *errors)
465 : {
466 : PyObject *v;
467 :
468 0 : v = PyString_AsDecodedObject(str, encoding, errors);
469 0 : if (v == NULL)
470 0 : goto onError;
471 :
472 : #ifdef Py_USING_UNICODE
473 : /* Convert Unicode to a string using the default encoding */
474 0 : if (PyUnicode_Check(v)) {
475 0 : PyObject *temp = v;
476 0 : v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 0 : Py_DECREF(temp);
478 0 : if (v == NULL)
479 0 : goto onError;
480 : }
481 : #endif
482 0 : if (!PyString_Check(v)) {
483 0 : PyErr_Format(PyExc_TypeError,
484 : "decoder did not return a string object (type=%.400s)",
485 0 : Py_TYPE(v)->tp_name);
486 0 : Py_DECREF(v);
487 0 : goto onError;
488 : }
489 :
490 0 : return v;
491 :
492 : onError:
493 0 : return NULL;
494 : }
495 :
496 0 : PyObject *PyString_Encode(const char *s,
497 : Py_ssize_t size,
498 : const char *encoding,
499 : const char *errors)
500 : {
501 : PyObject *v, *str;
502 :
503 0 : str = PyString_FromStringAndSize(s, size);
504 0 : if (str == NULL)
505 0 : return NULL;
506 0 : v = PyString_AsEncodedString(str, encoding, errors);
507 0 : Py_DECREF(str);
508 0 : return v;
509 : }
510 :
511 0 : PyObject *PyString_AsEncodedObject(PyObject *str,
512 : const char *encoding,
513 : const char *errors)
514 : {
515 : PyObject *v;
516 :
517 0 : if (!PyString_Check(str)) {
518 0 : PyErr_BadArgument();
519 0 : goto onError;
520 : }
521 :
522 0 : if (encoding == NULL) {
523 : #ifdef Py_USING_UNICODE
524 0 : encoding = PyUnicode_GetDefaultEncoding();
525 : #else
526 : PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 : goto onError;
528 : #endif
529 : }
530 :
531 : /* Encode via the codec registry */
532 0 : v = _PyCodec_EncodeText(str, encoding, errors);
533 0 : if (v == NULL)
534 0 : goto onError;
535 :
536 0 : return v;
537 :
538 : onError:
539 0 : return NULL;
540 : }
541 :
542 0 : PyObject *PyString_AsEncodedString(PyObject *str,
543 : const char *encoding,
544 : const char *errors)
545 : {
546 : PyObject *v;
547 :
548 0 : v = PyString_AsEncodedObject(str, encoding, errors);
549 0 : if (v == NULL)
550 0 : goto onError;
551 :
552 : #ifdef Py_USING_UNICODE
553 : /* Convert Unicode to a string using the default encoding */
554 0 : if (PyUnicode_Check(v)) {
555 0 : PyObject *temp = v;
556 0 : v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 0 : Py_DECREF(temp);
558 0 : if (v == NULL)
559 0 : goto onError;
560 : }
561 : #endif
562 0 : if (!PyString_Check(v)) {
563 0 : PyErr_Format(PyExc_TypeError,
564 : "encoder did not return a string object (type=%.400s)",
565 0 : Py_TYPE(v)->tp_name);
566 0 : Py_DECREF(v);
567 0 : goto onError;
568 : }
569 :
570 0 : return v;
571 :
572 : onError:
573 0 : return NULL;
574 : }
575 :
576 : static void
577 143874 : string_dealloc(PyObject *op)
578 : {
579 143874 : switch (PyString_CHECK_INTERNED(op)) {
580 : case SSTATE_NOT_INTERNED:
581 129380 : break;
582 :
583 : case SSTATE_INTERNED_MORTAL:
584 : /* revive dead object temporarily for DelItem */
585 14494 : Py_REFCNT(op) = 3;
586 14494 : if (PyDict_DelItem(interned, op) != 0)
587 0 : Py_FatalError(
588 : "deletion of interned string failed");
589 14494 : break;
590 :
591 : case SSTATE_INTERNED_IMMORTAL:
592 0 : Py_FatalError("Immortal interned string died.");
593 :
594 : default:
595 0 : Py_FatalError("Inconsistent interned string state.");
596 : }
597 143874 : Py_TYPE(op)->tp_free(op);
598 143874 : }
599 :
600 : /* Unescape a backslash-escaped string. If unicode is non-zero,
601 : the string is a u-literal. If recode_encoding is non-zero,
602 : the string is UTF-8 encoded and should be re-encoded in the
603 : specified encoding. */
604 :
605 69 : PyObject *PyString_DecodeEscape(const char *s,
606 : Py_ssize_t len,
607 : const char *errors,
608 : Py_ssize_t unicode,
609 : const char *recode_encoding)
610 : {
611 : int c;
612 : char *p, *buf;
613 : const char *end;
614 : PyObject *v;
615 69 : Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 69 : v = PyString_FromStringAndSize((char *)NULL, newlen);
617 69 : if (v == NULL)
618 0 : return NULL;
619 69 : p = buf = PyString_AsString(v);
620 69 : end = s + len;
621 5547 : while (s < end) {
622 5409 : if (*s != '\\') {
623 : non_esc:
624 : #ifdef Py_USING_UNICODE
625 5325 : if (recode_encoding && (*s & 0x80)) {
626 : PyObject *u, *w;
627 : char *r;
628 : const char* t;
629 : Py_ssize_t rn;
630 0 : t = s;
631 : /* Decode non-ASCII bytes as UTF-8. */
632 0 : while (t < end && (*t & 0x80)) t++;
633 0 : u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 0 : if(!u) goto failed;
635 :
636 : /* Recode them in target encoding. */
637 0 : w = PyUnicode_AsEncodedString(
638 : u, recode_encoding, errors);
639 0 : Py_DECREF(u);
640 0 : if (!w) goto failed;
641 :
642 : /* Append bytes to output buffer. */
643 : assert(PyString_Check(w));
644 0 : r = PyString_AS_STRING(w);
645 0 : rn = PyString_GET_SIZE(w);
646 0 : Py_MEMCPY(p, r, rn);
647 0 : p += rn;
648 0 : Py_DECREF(w);
649 0 : s = t;
650 : } else {
651 5325 : *p++ = *s++;
652 : }
653 : #else
654 : *p++ = *s++;
655 : #endif
656 5325 : continue;
657 : }
658 84 : s++;
659 84 : if (s==end) {
660 0 : PyErr_SetString(PyExc_ValueError,
661 : "Trailing \\ in string");
662 0 : goto failed;
663 : }
664 84 : switch (*s++) {
665 : /* XXX This assumes ASCII! */
666 1 : case '\n': break;
667 8 : case '\\': *p++ = '\\'; break;
668 0 : case '\'': *p++ = '\''; break;
669 0 : case '\"': *p++ = '\"'; break;
670 0 : case 'b': *p++ = '\b'; break;
671 0 : case 'f': *p++ = '\014'; break; /* FF */
672 11 : case 't': *p++ = '\t'; break;
673 36 : case 'n': *p++ = '\n'; break;
674 0 : case 'r': *p++ = '\r'; break;
675 0 : case 'v': *p++ = '\013'; break; /* VT */
676 0 : case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 : case '0': case '1': case '2': case '3':
678 : case '4': case '5': case '6': case '7':
679 26 : c = s[-1] - '0';
680 26 : if (s < end && '0' <= *s && *s <= '7') {
681 14 : c = (c<<3) + *s++ - '0';
682 14 : if (s < end && '0' <= *s && *s <= '7')
683 14 : c = (c<<3) + *s++ - '0';
684 : }
685 26 : *p++ = c;
686 26 : break;
687 : case 'x':
688 4 : if (s+1 < end &&
689 4 : isxdigit(Py_CHARMASK(s[0])) &&
690 2 : isxdigit(Py_CHARMASK(s[1])))
691 : {
692 2 : unsigned int x = 0;
693 2 : c = Py_CHARMASK(*s);
694 2 : s++;
695 2 : if (isdigit(c))
696 2 : x = c - '0';
697 0 : else if (islower(c))
698 0 : x = 10 + c - 'a';
699 : else
700 0 : x = 10 + c - 'A';
701 2 : x = x << 4;
702 2 : c = Py_CHARMASK(*s);
703 2 : s++;
704 2 : if (isdigit(c))
705 2 : x += c - '0';
706 0 : else if (islower(c))
707 0 : x += 10 + c - 'a';
708 : else
709 0 : x += 10 + c - 'A';
710 2 : *p++ = x;
711 2 : break;
712 : }
713 0 : if (!errors || strcmp(errors, "strict") == 0) {
714 0 : PyErr_SetString(PyExc_ValueError,
715 : "invalid \\x escape");
716 0 : goto failed;
717 : }
718 0 : if (strcmp(errors, "replace") == 0) {
719 0 : *p++ = '?';
720 0 : } else if (strcmp(errors, "ignore") == 0)
721 : /* do nothing */;
722 : else {
723 0 : PyErr_Format(PyExc_ValueError,
724 : "decoding error; "
725 : "unknown error handling code: %.400s",
726 : errors);
727 0 : goto failed;
728 : }
729 : /* skip \x */
730 0 : if (s < end && isxdigit(Py_CHARMASK(s[0])))
731 0 : s++; /* and a hexdigit */
732 0 : break;
733 : #ifndef Py_USING_UNICODE
734 : case 'u':
735 : case 'U':
736 : case 'N':
737 : if (unicode) {
738 : PyErr_SetString(PyExc_ValueError,
739 : "Unicode escapes not legal "
740 : "when Unicode disabled");
741 : goto failed;
742 : }
743 : #endif
744 : default:
745 0 : *p++ = '\\';
746 0 : s--;
747 0 : goto non_esc; /* an arbitrary number of unescaped
748 : UTF-8 bytes may follow. */
749 : }
750 : }
751 69 : if (p-buf < newlen)
752 69 : _PyString_Resize(&v, p - buf); /* v is cleared on error */
753 69 : return v;
754 : failed:
755 0 : Py_DECREF(v);
756 0 : return NULL;
757 : }
758 :
759 : /* -------------------------------------------------------------------- */
760 : /* object api */
761 :
762 : static Py_ssize_t
763 0 : string_getsize(register PyObject *op)
764 : {
765 : char *s;
766 : Py_ssize_t len;
767 0 : if (PyString_AsStringAndSize(op, &s, &len))
768 0 : return -1;
769 0 : return len;
770 : }
771 :
772 : static /*const*/ char *
773 0 : string_getbuffer(register PyObject *op)
774 : {
775 : char *s;
776 : Py_ssize_t len;
777 0 : if (PyString_AsStringAndSize(op, &s, &len))
778 0 : return NULL;
779 0 : return s;
780 : }
781 :
782 : Py_ssize_t
783 6499 : PyString_Size(register PyObject *op)
784 : {
785 6499 : if (!PyString_Check(op))
786 0 : return string_getsize(op);
787 6499 : return Py_SIZE(op);
788 : }
789 :
790 : /*const*/ char *
791 308022 : PyString_AsString(register PyObject *op)
792 : {
793 308022 : if (!PyString_Check(op))
794 0 : return string_getbuffer(op);
795 308022 : return ((PyStringObject *)op) -> ob_sval;
796 : }
797 :
798 : int
799 27 : PyString_AsStringAndSize(register PyObject *obj,
800 : register char **s,
801 : register Py_ssize_t *len)
802 : {
803 27 : if (s == NULL) {
804 0 : PyErr_BadInternalCall();
805 0 : return -1;
806 : }
807 :
808 27 : if (!PyString_Check(obj)) {
809 : #ifdef Py_USING_UNICODE
810 0 : if (PyUnicode_Check(obj)) {
811 0 : obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 0 : if (obj == NULL)
813 0 : return -1;
814 : }
815 : else
816 : #endif
817 : {
818 0 : PyErr_Format(PyExc_TypeError,
819 : "expected string or Unicode object, "
820 0 : "%.200s found", Py_TYPE(obj)->tp_name);
821 0 : return -1;
822 : }
823 : }
824 :
825 27 : *s = PyString_AS_STRING(obj);
826 27 : if (len != NULL)
827 0 : *len = PyString_GET_SIZE(obj);
828 27 : else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 0 : PyErr_SetString(PyExc_TypeError,
830 : "expected string without null bytes");
831 0 : return -1;
832 : }
833 27 : return 0;
834 : }
835 :
836 : /* -------------------------------------------------------------------- */
837 : /* Methods */
838 :
839 : #include "stringlib/stringdefs.h"
840 : #include "stringlib/fastsearch.h"
841 :
842 : #include "stringlib/count.h"
843 : #include "stringlib/find.h"
844 : #include "stringlib/partition.h"
845 : #include "stringlib/split.h"
846 :
847 : #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848 : #include "stringlib/localeutil.h"
849 :
850 :
851 :
852 : static int
853 0 : string_print(PyStringObject *op, FILE *fp, int flags)
854 : {
855 : Py_ssize_t i, str_len;
856 : char c;
857 : int quote;
858 :
859 : /* XXX Ought to check for interrupts when writing long strings */
860 0 : if (! PyString_CheckExact(op)) {
861 : int ret;
862 : /* A str subclass may have its own __str__ method. */
863 0 : op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 0 : if (op == NULL)
865 0 : return -1;
866 0 : ret = string_print(op, fp, flags);
867 0 : Py_DECREF(op);
868 0 : return ret;
869 : }
870 0 : if (flags & Py_PRINT_RAW) {
871 0 : char *data = op->ob_sval;
872 0 : Py_ssize_t size = Py_SIZE(op);
873 : Py_BEGIN_ALLOW_THREADS
874 0 : while (size > INT_MAX) {
875 : /* Very long strings cannot be written atomically.
876 : * But don't write exactly INT_MAX bytes at a time
877 : * to avoid memory aligment issues.
878 : */
879 0 : const int chunk_size = INT_MAX & ~0x3FFF;
880 0 : fwrite(data, 1, chunk_size, fp);
881 0 : data += chunk_size;
882 0 : size -= chunk_size;
883 : }
884 : #ifdef __VMS
885 : if (size) fwrite(data, (size_t)size, 1, fp);
886 : #else
887 0 : fwrite(data, 1, (size_t)size, fp);
888 : #endif
889 : Py_END_ALLOW_THREADS
890 0 : return 0;
891 : }
892 :
893 : /* figure out which quote to use; single is preferred */
894 0 : quote = '\'';
895 0 : if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 0 : !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 0 : quote = '"';
898 :
899 0 : str_len = Py_SIZE(op);
900 : Py_BEGIN_ALLOW_THREADS
901 0 : fputc(quote, fp);
902 0 : for (i = 0; i < str_len; i++) {
903 : /* Since strings are immutable and the caller should have a
904 : reference, accessing the internal buffer should not be an issue
905 : with the GIL released. */
906 0 : c = op->ob_sval[i];
907 0 : if (c == quote || c == '\\')
908 0 : fprintf(fp, "\\%c", c);
909 0 : else if (c == '\t')
910 0 : fprintf(fp, "\\t");
911 0 : else if (c == '\n')
912 0 : fprintf(fp, "\\n");
913 0 : else if (c == '\r')
914 0 : fprintf(fp, "\\r");
915 0 : else if (c < ' ' || c >= 0x7f)
916 0 : fprintf(fp, "\\x%02x", c & 0xff);
917 : else
918 0 : fputc(c, fp);
919 : }
920 0 : fputc(quote, fp);
921 : Py_END_ALLOW_THREADS
922 0 : return 0;
923 : }
924 :
925 : PyObject *
926 342 : PyString_Repr(PyObject *obj, int smartquotes)
927 : {
928 342 : register PyStringObject* op = (PyStringObject*) obj;
929 : size_t newsize;
930 : PyObject *v;
931 342 : if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
932 0 : PyErr_SetString(PyExc_OverflowError,
933 : "string is too large to make repr");
934 0 : return NULL;
935 : }
936 342 : newsize = 2 + 4*Py_SIZE(op);
937 342 : v = PyString_FromStringAndSize((char *)NULL, newsize);
938 342 : if (v == NULL) {
939 0 : return NULL;
940 : }
941 : else {
942 : register Py_ssize_t i;
943 : register char c;
944 : register char *p;
945 : int quote;
946 :
947 : /* figure out which quote to use; single is preferred */
948 342 : quote = '\'';
949 684 : if (smartquotes &&
950 342 : memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
951 0 : !memchr(op->ob_sval, '"', Py_SIZE(op)))
952 0 : quote = '"';
953 :
954 342 : p = PyString_AS_STRING(v);
955 342 : *p++ = quote;
956 2520 : for (i = 0; i < Py_SIZE(op); i++) {
957 : /* There's at least enough room for a hex escape
958 : and a closing quote. */
959 : assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
960 2178 : c = op->ob_sval[i];
961 2178 : if (c == quote || c == '\\')
962 0 : *p++ = '\\', *p++ = c;
963 2178 : else if (c == '\t')
964 0 : *p++ = '\\', *p++ = 't';
965 2178 : else if (c == '\n')
966 0 : *p++ = '\\', *p++ = 'n';
967 2178 : else if (c == '\r')
968 0 : *p++ = '\\', *p++ = 'r';
969 2178 : else if (c < ' ' || c >= 0x7f) {
970 : /* For performance, we don't want to call
971 : PyOS_snprintf here (extra layers of
972 : function call). */
973 0 : sprintf(p, "\\x%02x", c & 0xff);
974 0 : p += 4;
975 : }
976 : else
977 2178 : *p++ = c;
978 : }
979 : assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
980 342 : *p++ = quote;
981 342 : *p = '\0';
982 342 : if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
983 0 : return NULL;
984 342 : return v;
985 : }
986 : }
987 :
988 : static PyObject *
989 342 : string_repr(PyObject *op)
990 : {
991 342 : return PyString_Repr(op, 1);
992 : }
993 :
994 : static PyObject *
995 0 : string_str(PyObject *s)
996 : {
997 : assert(PyString_Check(s));
998 0 : if (PyString_CheckExact(s)) {
999 0 : Py_INCREF(s);
1000 0 : return s;
1001 : }
1002 : else {
1003 : /* Subtype -- return genuine string with the same value. */
1004 0 : PyStringObject *t = (PyStringObject *) s;
1005 0 : return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1006 : }
1007 : }
1008 :
1009 : static Py_ssize_t
1010 36678 : string_length(PyStringObject *a)
1011 : {
1012 36678 : return Py_SIZE(a);
1013 : }
1014 :
1015 : static PyObject *
1016 6739 : string_concat(register PyStringObject *a, register PyObject *bb)
1017 : {
1018 : register Py_ssize_t size;
1019 : register PyStringObject *op;
1020 6739 : if (!PyString_Check(bb)) {
1021 : #ifdef Py_USING_UNICODE
1022 0 : if (PyUnicode_Check(bb))
1023 0 : return PyUnicode_Concat((PyObject *)a, bb);
1024 : #endif
1025 0 : if (PyByteArray_Check(bb))
1026 0 : return PyByteArray_Concat((PyObject *)a, bb);
1027 0 : PyErr_Format(PyExc_TypeError,
1028 : "cannot concatenate 'str' and '%.200s' objects",
1029 0 : Py_TYPE(bb)->tp_name);
1030 0 : return NULL;
1031 : }
1032 : #define b ((PyStringObject *)bb)
1033 : /* Optimize cases with empty left or right operand */
1034 6793 : if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1035 108 : PyString_CheckExact(a) && PyString_CheckExact(b)) {
1036 54 : if (Py_SIZE(a) == 0) {
1037 54 : Py_INCREF(bb);
1038 54 : return bb;
1039 : }
1040 0 : Py_INCREF(a);
1041 0 : return (PyObject *)a;
1042 : }
1043 : /* Check that string sizes are not negative, to prevent an
1044 : overflow in cases where we are passed incorrectly-created
1045 : strings with negative lengths (due to a bug in other code).
1046 : */
1047 13370 : if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 6685 : Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 0 : PyErr_SetString(PyExc_OverflowError,
1050 : "strings are too large to concat");
1051 0 : return NULL;
1052 : }
1053 6685 : size = Py_SIZE(a) + Py_SIZE(b);
1054 :
1055 : /* Inline PyObject_NewVar */
1056 6685 : if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1057 0 : PyErr_SetString(PyExc_OverflowError,
1058 : "strings are too large to concat");
1059 0 : return NULL;
1060 : }
1061 6685 : op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1062 6685 : if (op == NULL)
1063 0 : return PyErr_NoMemory();
1064 6685 : (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1065 6685 : op->ob_shash = -1;
1066 6685 : op->ob_sstate = SSTATE_NOT_INTERNED;
1067 6685 : Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1068 6685 : Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1069 6685 : op->ob_sval[size] = '\0';
1070 6685 : return (PyObject *) op;
1071 : #undef b
1072 : }
1073 :
1074 : static PyObject *
1075 24 : string_repeat(register PyStringObject *a, register Py_ssize_t n)
1076 : {
1077 : register Py_ssize_t i;
1078 : register Py_ssize_t j;
1079 : register Py_ssize_t size;
1080 : register PyStringObject *op;
1081 : size_t nbytes;
1082 24 : if (n < 0)
1083 0 : n = 0;
1084 : /* watch out for overflows: the size can overflow Py_ssize_t,
1085 : * and the # of bytes needed can overflow size_t
1086 : */
1087 24 : if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1088 0 : PyErr_SetString(PyExc_OverflowError,
1089 : "repeated string is too long");
1090 0 : return NULL;
1091 : }
1092 24 : size = Py_SIZE(a) * n;
1093 24 : if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1094 9 : Py_INCREF(a);
1095 9 : return (PyObject *)a;
1096 : }
1097 15 : nbytes = (size_t)size;
1098 15 : if (nbytes + PyStringObject_SIZE <= nbytes) {
1099 0 : PyErr_SetString(PyExc_OverflowError,
1100 : "repeated string is too long");
1101 0 : return NULL;
1102 : }
1103 15 : op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1104 15 : if (op == NULL)
1105 0 : return PyErr_NoMemory();
1106 15 : (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1107 15 : op->ob_shash = -1;
1108 15 : op->ob_sstate = SSTATE_NOT_INTERNED;
1109 15 : op->ob_sval[size] = '\0';
1110 15 : if (Py_SIZE(a) == 1 && n > 0) {
1111 15 : memset(op->ob_sval, a->ob_sval[0] , n);
1112 15 : return (PyObject *) op;
1113 : }
1114 0 : i = 0;
1115 0 : if (i < size) {
1116 0 : Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1117 0 : i = Py_SIZE(a);
1118 : }
1119 0 : while (i < size) {
1120 0 : j = (i <= size-i) ? i : size-i;
1121 0 : Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1122 0 : i += j;
1123 : }
1124 0 : return (PyObject *) op;
1125 : }
1126 :
1127 : /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128 :
1129 : static PyObject *
1130 8253 : string_slice(register PyStringObject *a, register Py_ssize_t i,
1131 : register Py_ssize_t j)
1132 : /* j -- may be negative! */
1133 : {
1134 8253 : if (i < 0)
1135 0 : i = 0;
1136 8253 : if (j < 0)
1137 0 : j = 0; /* Avoid signed/unsigned bug in next line */
1138 8253 : if (j > Py_SIZE(a))
1139 1257 : j = Py_SIZE(a);
1140 8253 : if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1141 : /* It's the same as a */
1142 1503 : Py_INCREF(a);
1143 1503 : return (PyObject *)a;
1144 : }
1145 6750 : if (j < i)
1146 0 : j = i;
1147 6750 : return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1148 : }
1149 :
1150 : static int
1151 5535 : string_contains(PyObject *str_obj, PyObject *sub_obj)
1152 : {
1153 5535 : if (!PyString_CheckExact(sub_obj)) {
1154 : #ifdef Py_USING_UNICODE
1155 0 : if (PyUnicode_Check(sub_obj))
1156 0 : return PyUnicode_Contains(str_obj, sub_obj);
1157 : #endif
1158 0 : if (!PyString_Check(sub_obj)) {
1159 0 : PyErr_Format(PyExc_TypeError,
1160 : "'in <string>' requires string as left operand, "
1161 0 : "not %.200s", Py_TYPE(sub_obj)->tp_name);
1162 0 : return -1;
1163 : }
1164 : }
1165 :
1166 5535 : return stringlib_contains_obj(str_obj, sub_obj);
1167 : }
1168 :
1169 : static PyObject *
1170 43455 : string_item(PyStringObject *a, register Py_ssize_t i)
1171 : {
1172 : char pchar;
1173 : PyObject *v;
1174 43455 : if (i < 0 || i >= Py_SIZE(a)) {
1175 2297 : PyErr_SetString(PyExc_IndexError, "string index out of range");
1176 2297 : return NULL;
1177 : }
1178 41158 : pchar = a->ob_sval[i];
1179 41158 : v = (PyObject *)characters[pchar & UCHAR_MAX];
1180 41158 : if (v == NULL)
1181 45 : v = PyString_FromStringAndSize(&pchar, 1);
1182 : else {
1183 : #ifdef COUNT_ALLOCS
1184 : one_strings++;
1185 : #endif
1186 41113 : Py_INCREF(v);
1187 : }
1188 41158 : return v;
1189 : }
1190 :
1191 : static PyObject*
1192 91364 : string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1193 : {
1194 : int c;
1195 : Py_ssize_t len_a, len_b;
1196 : Py_ssize_t min_len;
1197 : PyObject *result;
1198 :
1199 : /* Make sure both arguments are strings. */
1200 91364 : if (!(PyString_Check(a) && PyString_Check(b))) {
1201 2700 : result = Py_NotImplemented;
1202 2700 : goto out;
1203 : }
1204 88664 : if (a == b) {
1205 4232 : switch (op) {
1206 : case Py_EQ:case Py_LE:case Py_GE:
1207 3605 : result = Py_True;
1208 3605 : goto out;
1209 : case Py_NE:case Py_LT:case Py_GT:
1210 627 : result = Py_False;
1211 627 : goto out;
1212 : }
1213 : }
1214 84432 : if (op == Py_EQ) {
1215 : /* Supporting Py_NE here as well does not save
1216 : much time, since Py_NE is rarely used. */
1217 46425 : if (Py_SIZE(a) == Py_SIZE(b)
1218 24698 : && (a->ob_sval[0] == b->ob_sval[0]
1219 4402 : && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1220 3823 : result = Py_True;
1221 : } else {
1222 42602 : result = Py_False;
1223 : }
1224 46425 : goto out;
1225 : }
1226 38007 : len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1227 38007 : min_len = (len_a < len_b) ? len_a : len_b;
1228 38007 : if (min_len > 0) {
1229 38007 : c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1230 38007 : if (c==0)
1231 7891 : c = memcmp(a->ob_sval, b->ob_sval, min_len);
1232 : } else
1233 0 : c = 0;
1234 38007 : if (c == 0)
1235 910 : c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1236 38007 : switch (op) {
1237 36120 : case Py_LT: c = c < 0; break;
1238 618 : case Py_LE: c = c <= 0; break;
1239 0 : case Py_EQ: assert(0); break; /* unreachable */
1240 1269 : case Py_NE: c = c != 0; break;
1241 0 : case Py_GT: c = c > 0; break;
1242 0 : case Py_GE: c = c >= 0; break;
1243 : default:
1244 0 : result = Py_NotImplemented;
1245 0 : goto out;
1246 : }
1247 38007 : result = c ? Py_True : Py_False;
1248 : out:
1249 91364 : Py_INCREF(result);
1250 91364 : return result;
1251 : }
1252 :
1253 : int
1254 73653 : _PyString_Eq(PyObject *o1, PyObject *o2)
1255 : {
1256 73653 : PyStringObject *a = (PyStringObject*) o1;
1257 73653 : PyStringObject *b = (PyStringObject*) o2;
1258 147306 : return Py_SIZE(a) == Py_SIZE(b)
1259 73653 : && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260 : }
1261 :
1262 : static long
1263 198233 : string_hash(PyStringObject *a)
1264 : {
1265 : register Py_ssize_t len;
1266 : register unsigned char *p;
1267 : register long x;
1268 :
1269 : #ifdef Py_DEBUG
1270 : assert(_Py_HashSecret_Initialized);
1271 : #endif
1272 198233 : if (a->ob_shash != -1)
1273 85330 : return a->ob_shash;
1274 112903 : len = Py_SIZE(a);
1275 : /*
1276 : We make the hash of the empty string be 0, rather than using
1277 : (prefix ^ suffix), since this slightly obfuscates the hash secret
1278 : */
1279 112903 : if (len == 0) {
1280 3 : a->ob_shash = 0;
1281 3 : return 0;
1282 : }
1283 112900 : p = (unsigned char *) a->ob_sval;
1284 112900 : x = _Py_HashSecret.prefix;
1285 112900 : x ^= *p << 7;
1286 1261290 : while (--len >= 0)
1287 1035490 : x = (1000003*x) ^ *p++;
1288 112900 : x ^= Py_SIZE(a);
1289 112900 : x ^= _Py_HashSecret.suffix;
1290 112900 : if (x == -1)
1291 0 : x = -2;
1292 112900 : a->ob_shash = x;
1293 112900 : return x;
1294 : }
1295 :
1296 : static PyObject*
1297 34513 : string_subscript(PyStringObject* self, PyObject* item)
1298 : {
1299 34513 : if (PyIndex_Check(item)) {
1300 34315 : Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301 34315 : if (i == -1 && PyErr_Occurred())
1302 0 : return NULL;
1303 34315 : if (i < 0)
1304 0 : i += PyString_GET_SIZE(self);
1305 34315 : return string_item(self, i);
1306 : }
1307 198 : else if (PySlice_Check(item)) {
1308 : Py_ssize_t start, stop, step, slicelength, cur, i;
1309 : char* source_buf;
1310 : char* result_buf;
1311 : PyObject* result;
1312 :
1313 198 : if (PySlice_GetIndicesEx((PySliceObject*)item,
1314 : PyString_GET_SIZE(self),
1315 : &start, &stop, &step, &slicelength) < 0) {
1316 0 : return NULL;
1317 : }
1318 :
1319 198 : if (slicelength <= 0) {
1320 0 : return PyString_FromStringAndSize("", 0);
1321 : }
1322 198 : else if (start == 0 && step == 1 &&
1323 0 : slicelength == PyString_GET_SIZE(self) &&
1324 0 : PyString_CheckExact(self)) {
1325 0 : Py_INCREF(self);
1326 0 : return (PyObject *)self;
1327 : }
1328 198 : else if (step == 1) {
1329 0 : return PyString_FromStringAndSize(
1330 0 : PyString_AS_STRING(self) + start,
1331 : slicelength);
1332 : }
1333 : else {
1334 198 : source_buf = PyString_AsString((PyObject*)self);
1335 198 : result_buf = (char *)PyMem_Malloc(slicelength);
1336 198 : if (result_buf == NULL)
1337 0 : return PyErr_NoMemory();
1338 :
1339 51084 : for (cur = start, i = 0; i < slicelength;
1340 50688 : cur += step, i++) {
1341 50688 : result_buf[i] = source_buf[cur];
1342 : }
1343 :
1344 198 : result = PyString_FromStringAndSize(result_buf,
1345 : slicelength);
1346 198 : PyMem_Free(result_buf);
1347 198 : return result;
1348 : }
1349 : }
1350 : else {
1351 0 : PyErr_Format(PyExc_TypeError,
1352 : "string indices must be integers, not %.200s",
1353 0 : Py_TYPE(item)->tp_name);
1354 0 : return NULL;
1355 : }
1356 : }
1357 :
1358 : static Py_ssize_t
1359 2898 : string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1360 : {
1361 2898 : if ( index != 0 ) {
1362 0 : PyErr_SetString(PyExc_SystemError,
1363 : "accessing non-existent string segment");
1364 0 : return -1;
1365 : }
1366 2898 : *ptr = (void *)self->ob_sval;
1367 2898 : return Py_SIZE(self);
1368 : }
1369 :
1370 : static Py_ssize_t
1371 0 : string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372 : {
1373 0 : PyErr_SetString(PyExc_TypeError,
1374 : "Cannot use string as modifiable buffer");
1375 0 : return -1;
1376 : }
1377 :
1378 : static Py_ssize_t
1379 10008 : string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1380 : {
1381 10008 : if ( lenp )
1382 0 : *lenp = Py_SIZE(self);
1383 10008 : return 1;
1384 : }
1385 :
1386 : static Py_ssize_t
1387 63 : string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1388 : {
1389 63 : if ( index != 0 ) {
1390 0 : PyErr_SetString(PyExc_SystemError,
1391 : "accessing non-existent string segment");
1392 0 : return -1;
1393 : }
1394 63 : *ptr = self->ob_sval;
1395 63 : return Py_SIZE(self);
1396 : }
1397 :
1398 : static int
1399 2442 : string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1400 : {
1401 4884 : return PyBuffer_FillInfo(view, (PyObject*)self,
1402 2442 : (void *)self->ob_sval, Py_SIZE(self),
1403 : 1, flags);
1404 : }
1405 :
1406 : static PySequenceMethods string_as_sequence = {
1407 : (lenfunc)string_length, /*sq_length*/
1408 : (binaryfunc)string_concat, /*sq_concat*/
1409 : (ssizeargfunc)string_repeat, /*sq_repeat*/
1410 : (ssizeargfunc)string_item, /*sq_item*/
1411 : (ssizessizeargfunc)string_slice, /*sq_slice*/
1412 : 0, /*sq_ass_item*/
1413 : 0, /*sq_ass_slice*/
1414 : (objobjproc)string_contains /*sq_contains*/
1415 : };
1416 :
1417 : static PyMappingMethods string_as_mapping = {
1418 : (lenfunc)string_length,
1419 : (binaryfunc)string_subscript,
1420 : 0,
1421 : };
1422 :
1423 : static PyBufferProcs string_as_buffer = {
1424 : (readbufferproc)string_buffer_getreadbuf,
1425 : (writebufferproc)string_buffer_getwritebuf,
1426 : (segcountproc)string_buffer_getsegcount,
1427 : (charbufferproc)string_buffer_getcharbuf,
1428 : (getbufferproc)string_buffer_getbuffer,
1429 : 0, /* XXX */
1430 : };
1431 :
1432 :
1433 :
1434 : #define LEFTSTRIP 0
1435 : #define RIGHTSTRIP 1
1436 : #define BOTHSTRIP 2
1437 :
1438 : /* Arrays indexed by above */
1439 : static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440 :
1441 : #define STRIPNAME(i) (stripformat[i]+3)
1442 :
1443 : PyDoc_STRVAR(split__doc__,
1444 : "S.split([sep [,maxsplit]]) -> list of strings\n\
1445 : \n\
1446 : Return a list of the words in the string S, using sep as the\n\
1447 : delimiter string. If maxsplit is given, at most maxsplit\n\
1448 : splits are done. If sep is not specified or is None, any\n\
1449 : whitespace string is a separator and empty strings are removed\n\
1450 : from the result.");
1451 :
1452 : static PyObject *
1453 78 : string_split(PyStringObject *self, PyObject *args)
1454 : {
1455 78 : Py_ssize_t len = PyString_GET_SIZE(self), n;
1456 78 : Py_ssize_t maxsplit = -1;
1457 78 : const char *s = PyString_AS_STRING(self), *sub;
1458 78 : PyObject *subobj = Py_None;
1459 :
1460 78 : if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461 0 : return NULL;
1462 78 : if (maxsplit < 0)
1463 78 : maxsplit = PY_SSIZE_T_MAX;
1464 78 : if (subobj == Py_None)
1465 51 : return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466 27 : if (PyString_Check(subobj)) {
1467 27 : sub = PyString_AS_STRING(subobj);
1468 27 : n = PyString_GET_SIZE(subobj);
1469 : }
1470 : #ifdef Py_USING_UNICODE
1471 0 : else if (PyUnicode_Check(subobj))
1472 0 : return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1473 : #endif
1474 0 : else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475 0 : return NULL;
1476 :
1477 27 : return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1478 : }
1479 :
1480 : PyDoc_STRVAR(partition__doc__,
1481 : "S.partition(sep) -> (head, sep, tail)\n\
1482 : \n\
1483 : Search for the separator sep in S, and return the part before it,\n\
1484 : the separator itself, and the part after it. If the separator is not\n\
1485 : found, return S and two empty strings.");
1486 :
1487 : static PyObject *
1488 0 : string_partition(PyStringObject *self, PyObject *sep_obj)
1489 : {
1490 : const char *sep;
1491 : Py_ssize_t sep_len;
1492 :
1493 0 : if (PyString_Check(sep_obj)) {
1494 0 : sep = PyString_AS_STRING(sep_obj);
1495 0 : sep_len = PyString_GET_SIZE(sep_obj);
1496 : }
1497 : #ifdef Py_USING_UNICODE
1498 0 : else if (PyUnicode_Check(sep_obj))
1499 0 : return PyUnicode_Partition((PyObject *) self, sep_obj);
1500 : #endif
1501 0 : else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502 0 : return NULL;
1503 :
1504 0 : return stringlib_partition(
1505 : (PyObject*) self,
1506 0 : PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507 : sep_obj, sep, sep_len
1508 : );
1509 : }
1510 :
1511 : PyDoc_STRVAR(rpartition__doc__,
1512 : "S.rpartition(sep) -> (head, sep, tail)\n\
1513 : \n\
1514 : Search for the separator sep in S, starting at the end of S, and return\n\
1515 : the part before it, the separator itself, and the part after it. If the\n\
1516 : separator is not found, return two empty strings and S.");
1517 :
1518 : static PyObject *
1519 0 : string_rpartition(PyStringObject *self, PyObject *sep_obj)
1520 : {
1521 : const char *sep;
1522 : Py_ssize_t sep_len;
1523 :
1524 0 : if (PyString_Check(sep_obj)) {
1525 0 : sep = PyString_AS_STRING(sep_obj);
1526 0 : sep_len = PyString_GET_SIZE(sep_obj);
1527 : }
1528 : #ifdef Py_USING_UNICODE
1529 0 : else if (PyUnicode_Check(sep_obj))
1530 0 : return PyUnicode_RPartition((PyObject *) self, sep_obj);
1531 : #endif
1532 0 : else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533 0 : return NULL;
1534 :
1535 0 : return stringlib_rpartition(
1536 : (PyObject*) self,
1537 0 : PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538 : sep_obj, sep, sep_len
1539 : );
1540 : }
1541 :
1542 : PyDoc_STRVAR(rsplit__doc__,
1543 : "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1544 : \n\
1545 : Return a list of the words in the string S, using sep as the\n\
1546 : delimiter string, starting at the end of the string and working\n\
1547 : to the front. If maxsplit is given, at most maxsplit splits are\n\
1548 : done. If sep is not specified or is None, any whitespace string\n\
1549 : is a separator.");
1550 :
1551 : static PyObject *
1552 0 : string_rsplit(PyStringObject *self, PyObject *args)
1553 : {
1554 0 : Py_ssize_t len = PyString_GET_SIZE(self), n;
1555 0 : Py_ssize_t maxsplit = -1;
1556 0 : const char *s = PyString_AS_STRING(self), *sub;
1557 0 : PyObject *subobj = Py_None;
1558 :
1559 0 : if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560 0 : return NULL;
1561 0 : if (maxsplit < 0)
1562 0 : maxsplit = PY_SSIZE_T_MAX;
1563 0 : if (subobj == Py_None)
1564 0 : return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565 0 : if (PyString_Check(subobj)) {
1566 0 : sub = PyString_AS_STRING(subobj);
1567 0 : n = PyString_GET_SIZE(subobj);
1568 : }
1569 : #ifdef Py_USING_UNICODE
1570 0 : else if (PyUnicode_Check(subobj))
1571 0 : return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1572 : #endif
1573 0 : else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574 0 : return NULL;
1575 :
1576 0 : return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1577 : }
1578 :
1579 :
1580 : PyDoc_STRVAR(join__doc__,
1581 : "S.join(iterable) -> string\n\
1582 : \n\
1583 : Return a string which is the concatenation of the strings in the\n\
1584 : iterable. The separator between elements is S.");
1585 :
1586 : static PyObject *
1587 1222 : string_join(PyStringObject *self, PyObject *orig)
1588 : {
1589 1222 : char *sep = PyString_AS_STRING(self);
1590 1222 : const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591 1222 : PyObject *res = NULL;
1592 : char *p;
1593 1222 : Py_ssize_t seqlen = 0;
1594 1222 : size_t sz = 0;
1595 : Py_ssize_t i;
1596 : PyObject *seq, *item;
1597 :
1598 1222 : seq = PySequence_Fast(orig, "can only join an iterable");
1599 1222 : if (seq == NULL) {
1600 0 : return NULL;
1601 : }
1602 :
1603 1222 : seqlen = PySequence_Size(seq);
1604 1222 : if (seqlen == 0) {
1605 0 : Py_DECREF(seq);
1606 0 : return PyString_FromString("");
1607 : }
1608 1222 : if (seqlen == 1) {
1609 400 : item = PySequence_Fast_GET_ITEM(seq, 0);
1610 400 : if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611 400 : Py_INCREF(item);
1612 400 : Py_DECREF(seq);
1613 400 : return item;
1614 : }
1615 : }
1616 :
1617 : /* There are at least two things to join, or else we have a subclass
1618 : * of the builtin types in the sequence.
1619 : * Do a pre-pass to figure out the total amount of space we'll
1620 : * need (sz), see whether any argument is absurd, and defer to
1621 : * the Unicode join if appropriate.
1622 : */
1623 4584 : for (i = 0; i < seqlen; i++) {
1624 3762 : const size_t old_sz = sz;
1625 3762 : item = PySequence_Fast_GET_ITEM(seq, i);
1626 3762 : if (!PyString_Check(item)){
1627 : #ifdef Py_USING_UNICODE
1628 0 : if (PyUnicode_Check(item)) {
1629 : /* Defer to Unicode join.
1630 : * CAUTION: There's no guarantee that the
1631 : * original sequence can be iterated over
1632 : * again, so we must pass seq here.
1633 : */
1634 : PyObject *result;
1635 0 : result = PyUnicode_Join((PyObject *)self, seq);
1636 0 : Py_DECREF(seq);
1637 0 : return result;
1638 : }
1639 : #endif
1640 0 : PyErr_Format(PyExc_TypeError,
1641 : "sequence item %zd: expected string,"
1642 : " %.80s found",
1643 0 : i, Py_TYPE(item)->tp_name);
1644 0 : Py_DECREF(seq);
1645 0 : return NULL;
1646 : }
1647 3762 : sz += PyString_GET_SIZE(item);
1648 3762 : if (i != 0)
1649 2940 : sz += seplen;
1650 3762 : if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651 0 : PyErr_SetString(PyExc_OverflowError,
1652 : "join() result is too long for a Python string");
1653 0 : Py_DECREF(seq);
1654 0 : return NULL;
1655 : }
1656 : }
1657 :
1658 : /* Allocate result space. */
1659 822 : res = PyString_FromStringAndSize((char*)NULL, sz);
1660 822 : if (res == NULL) {
1661 0 : Py_DECREF(seq);
1662 0 : return NULL;
1663 : }
1664 :
1665 : /* Catenate everything. */
1666 822 : p = PyString_AS_STRING(res);
1667 4584 : for (i = 0; i < seqlen; ++i) {
1668 : size_t n;
1669 3762 : item = PySequence_Fast_GET_ITEM(seq, i);
1670 3762 : n = PyString_GET_SIZE(item);
1671 3762 : Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672 3762 : p += n;
1673 3762 : if (i < seqlen - 1) {
1674 2940 : Py_MEMCPY(p, sep, seplen);
1675 2940 : p += seplen;
1676 : }
1677 : }
1678 :
1679 822 : Py_DECREF(seq);
1680 822 : return res;
1681 : }
1682 :
1683 : PyObject *
1684 81 : _PyString_Join(PyObject *sep, PyObject *x)
1685 : {
1686 : assert(sep != NULL && PyString_Check(sep));
1687 : assert(x != NULL);
1688 81 : return string_join((PyStringObject *)sep, x);
1689 : }
1690 :
1691 : /* helper macro to fixup start/end slice values */
1692 : #define ADJUST_INDICES(start, end, len) \
1693 : if (end > len) \
1694 : end = len; \
1695 : else if (end < 0) { \
1696 : end += len; \
1697 : if (end < 0) \
1698 : end = 0; \
1699 : } \
1700 : if (start < 0) { \
1701 : start += len; \
1702 : if (start < 0) \
1703 : start = 0; \
1704 : }
1705 :
1706 : Py_LOCAL_INLINE(Py_ssize_t)
1707 126 : string_find_internal(PyStringObject *self, PyObject *args, int dir)
1708 : {
1709 : PyObject *subobj;
1710 : const char *sub;
1711 : Py_ssize_t sub_len;
1712 126 : Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1713 :
1714 126 : if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715 : args, &subobj, &start, &end))
1716 0 : return -2;
1717 :
1718 126 : if (PyString_Check(subobj)) {
1719 126 : sub = PyString_AS_STRING(subobj);
1720 126 : sub_len = PyString_GET_SIZE(subobj);
1721 : }
1722 : #ifdef Py_USING_UNICODE
1723 0 : else if (PyUnicode_Check(subobj))
1724 0 : return PyUnicode_Find(
1725 : (PyObject *)self, subobj, start, end, dir);
1726 : #endif
1727 0 : else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728 : /* XXX - the "expected a character buffer object" is pretty
1729 : confusing for a non-expert. remap to something else ? */
1730 0 : return -2;
1731 :
1732 126 : if (dir > 0)
1733 216 : return stringlib_find_slice(
1734 108 : PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 : sub, sub_len, start, end);
1736 : else
1737 36 : return stringlib_rfind_slice(
1738 18 : PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739 : sub, sub_len, start, end);
1740 : }
1741 :
1742 :
1743 : PyDoc_STRVAR(find__doc__,
1744 : "S.find(sub [,start [,end]]) -> int\n\
1745 : \n\
1746 : Return the lowest index in S where substring sub is found,\n\
1747 : such that sub is contained within S[start:end]. Optional\n\
1748 : arguments start and end are interpreted as in slice notation.\n\
1749 : \n\
1750 : Return -1 on failure.");
1751 :
1752 : static PyObject *
1753 108 : string_find(PyStringObject *self, PyObject *args)
1754 : {
1755 108 : Py_ssize_t result = string_find_internal(self, args, +1);
1756 108 : if (result == -2)
1757 0 : return NULL;
1758 108 : return PyInt_FromSsize_t(result);
1759 : }
1760 :
1761 :
1762 : PyDoc_STRVAR(index__doc__,
1763 : "S.index(sub [,start [,end]]) -> int\n\
1764 : \n\
1765 : Like S.find() but raise ValueError when the substring is not found.");
1766 :
1767 : static PyObject *
1768 0 : string_index(PyStringObject *self, PyObject *args)
1769 : {
1770 0 : Py_ssize_t result = string_find_internal(self, args, +1);
1771 0 : if (result == -2)
1772 0 : return NULL;
1773 0 : if (result == -1) {
1774 0 : PyErr_SetString(PyExc_ValueError,
1775 : "substring not found");
1776 0 : return NULL;
1777 : }
1778 0 : return PyInt_FromSsize_t(result);
1779 : }
1780 :
1781 :
1782 : PyDoc_STRVAR(rfind__doc__,
1783 : "S.rfind(sub [,start [,end]]) -> int\n\
1784 : \n\
1785 : Return the highest index in S where substring sub is found,\n\
1786 : such that sub is contained within S[start:end]. Optional\n\
1787 : arguments start and end are interpreted as in slice notation.\n\
1788 : \n\
1789 : Return -1 on failure.");
1790 :
1791 : static PyObject *
1792 18 : string_rfind(PyStringObject *self, PyObject *args)
1793 : {
1794 18 : Py_ssize_t result = string_find_internal(self, args, -1);
1795 18 : if (result == -2)
1796 0 : return NULL;
1797 18 : return PyInt_FromSsize_t(result);
1798 : }
1799 :
1800 :
1801 : PyDoc_STRVAR(rindex__doc__,
1802 : "S.rindex(sub [,start [,end]]) -> int\n\
1803 : \n\
1804 : Like S.rfind() but raise ValueError when the substring is not found.");
1805 :
1806 : static PyObject *
1807 0 : string_rindex(PyStringObject *self, PyObject *args)
1808 : {
1809 0 : Py_ssize_t result = string_find_internal(self, args, -1);
1810 0 : if (result == -2)
1811 0 : return NULL;
1812 0 : if (result == -1) {
1813 0 : PyErr_SetString(PyExc_ValueError,
1814 : "substring not found");
1815 0 : return NULL;
1816 : }
1817 0 : return PyInt_FromSsize_t(result);
1818 : }
1819 :
1820 :
1821 : Py_LOCAL_INLINE(PyObject *)
1822 9 : do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1823 : {
1824 9 : char *s = PyString_AS_STRING(self);
1825 9 : Py_ssize_t len = PyString_GET_SIZE(self);
1826 9 : char *sep = PyString_AS_STRING(sepobj);
1827 9 : Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828 : Py_ssize_t i, j;
1829 :
1830 9 : i = 0;
1831 9 : if (striptype != RIGHTSTRIP) {
1832 0 : while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833 0 : i++;
1834 : }
1835 : }
1836 :
1837 9 : j = len;
1838 9 : if (striptype != LEFTSTRIP) {
1839 : do {
1840 18 : j--;
1841 18 : } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842 9 : j++;
1843 : }
1844 :
1845 9 : if (i == 0 && j == len && PyString_CheckExact(self)) {
1846 0 : Py_INCREF(self);
1847 0 : return (PyObject*)self;
1848 : }
1849 : else
1850 9 : return PyString_FromStringAndSize(s+i, j-i);
1851 : }
1852 :
1853 :
1854 : Py_LOCAL_INLINE(PyObject *)
1855 753 : do_strip(PyStringObject *self, int striptype)
1856 : {
1857 753 : char *s = PyString_AS_STRING(self);
1858 753 : Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1859 :
1860 753 : i = 0;
1861 753 : if (striptype != RIGHTSTRIP) {
1862 2967 : while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863 1461 : i++;
1864 : }
1865 : }
1866 :
1867 753 : j = len;
1868 753 : if (striptype != LEFTSTRIP) {
1869 : do {
1870 801 : j--;
1871 801 : } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872 753 : j++;
1873 : }
1874 :
1875 753 : if (i == 0 && j == len && PyString_CheckExact(self)) {
1876 213 : Py_INCREF(self);
1877 213 : return (PyObject*)self;
1878 : }
1879 : else
1880 540 : return PyString_FromStringAndSize(s+i, j-i);
1881 : }
1882 :
1883 :
1884 : Py_LOCAL_INLINE(PyObject *)
1885 9 : do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1886 : {
1887 9 : PyObject *sep = NULL;
1888 :
1889 9 : if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890 0 : return NULL;
1891 :
1892 9 : if (sep != NULL && sep != Py_None) {
1893 9 : if (PyString_Check(sep))
1894 9 : return do_xstrip(self, striptype, sep);
1895 : #ifdef Py_USING_UNICODE
1896 0 : else if (PyUnicode_Check(sep)) {
1897 0 : PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898 : PyObject *res;
1899 0 : if (uniself==NULL)
1900 0 : return NULL;
1901 0 : res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902 : striptype, sep);
1903 0 : Py_DECREF(uniself);
1904 0 : return res;
1905 : }
1906 : #endif
1907 0 : PyErr_Format(PyExc_TypeError,
1908 : #ifdef Py_USING_UNICODE
1909 : "%s arg must be None, str or unicode",
1910 : #else
1911 : "%s arg must be None or str",
1912 : #endif
1913 0 : STRIPNAME(striptype));
1914 0 : return NULL;
1915 : }
1916 :
1917 0 : return do_strip(self, striptype);
1918 : }
1919 :
1920 :
1921 : PyDoc_STRVAR(strip__doc__,
1922 : "S.strip([chars]) -> string or unicode\n\
1923 : \n\
1924 : Return a copy of the string S with leading and trailing\n\
1925 : whitespace removed.\n\
1926 : If chars is given and not None, remove characters in chars instead.\n\
1927 : If chars is unicode, S will be converted to unicode before stripping");
1928 :
1929 : static PyObject *
1930 753 : string_strip(PyStringObject *self, PyObject *args)
1931 : {
1932 753 : if (PyTuple_GET_SIZE(args) == 0)
1933 753 : return do_strip(self, BOTHSTRIP); /* Common case */
1934 : else
1935 0 : return do_argstrip(self, BOTHSTRIP, args);
1936 : }
1937 :
1938 :
1939 : PyDoc_STRVAR(lstrip__doc__,
1940 : "S.lstrip([chars]) -> string or unicode\n\
1941 : \n\
1942 : Return a copy of the string S with leading whitespace removed.\n\
1943 : If chars is given and not None, remove characters in chars instead.\n\
1944 : If chars is unicode, S will be converted to unicode before stripping");
1945 :
1946 : static PyObject *
1947 0 : string_lstrip(PyStringObject *self, PyObject *args)
1948 : {
1949 0 : if (PyTuple_GET_SIZE(args) == 0)
1950 0 : return do_strip(self, LEFTSTRIP); /* Common case */
1951 : else
1952 0 : return do_argstrip(self, LEFTSTRIP, args);
1953 : }
1954 :
1955 :
1956 : PyDoc_STRVAR(rstrip__doc__,
1957 : "S.rstrip([chars]) -> string or unicode\n\
1958 : \n\
1959 : Return a copy of the string S with trailing whitespace removed.\n\
1960 : If chars is given and not None, remove characters in chars instead.\n\
1961 : If chars is unicode, S will be converted to unicode before stripping");
1962 :
1963 : static PyObject *
1964 9 : string_rstrip(PyStringObject *self, PyObject *args)
1965 : {
1966 9 : if (PyTuple_GET_SIZE(args) == 0)
1967 0 : return do_strip(self, RIGHTSTRIP); /* Common case */
1968 : else
1969 9 : return do_argstrip(self, RIGHTSTRIP, args);
1970 : }
1971 :
1972 :
1973 : PyDoc_STRVAR(lower__doc__,
1974 : "S.lower() -> string\n\
1975 : \n\
1976 : Return a copy of the string S converted to lowercase.");
1977 :
1978 : /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979 : #ifndef _tolower
1980 : #define _tolower tolower
1981 : #endif
1982 :
1983 : static PyObject *
1984 0 : string_lower(PyStringObject *self)
1985 : {
1986 : char *s;
1987 0 : Py_ssize_t i, n = PyString_GET_SIZE(self);
1988 : PyObject *newobj;
1989 :
1990 0 : newobj = PyString_FromStringAndSize(NULL, n);
1991 0 : if (!newobj)
1992 0 : return NULL;
1993 :
1994 0 : s = PyString_AS_STRING(newobj);
1995 :
1996 0 : Py_MEMCPY(s, PyString_AS_STRING(self), n);
1997 :
1998 0 : for (i = 0; i < n; i++) {
1999 0 : int c = Py_CHARMASK(s[i]);
2000 0 : if (isupper(c))
2001 0 : s[i] = _tolower(c);
2002 : }
2003 :
2004 0 : return newobj;
2005 : }
2006 :
2007 : PyDoc_STRVAR(upper__doc__,
2008 : "S.upper() -> string\n\
2009 : \n\
2010 : Return a copy of the string S converted to uppercase.");
2011 :
2012 : #ifndef _toupper
2013 : #define _toupper toupper
2014 : #endif
2015 :
2016 : static PyObject *
2017 0 : string_upper(PyStringObject *self)
2018 : {
2019 : char *s;
2020 0 : Py_ssize_t i, n = PyString_GET_SIZE(self);
2021 : PyObject *newobj;
2022 :
2023 0 : newobj = PyString_FromStringAndSize(NULL, n);
2024 0 : if (!newobj)
2025 0 : return NULL;
2026 :
2027 0 : s = PyString_AS_STRING(newobj);
2028 :
2029 0 : Py_MEMCPY(s, PyString_AS_STRING(self), n);
2030 :
2031 0 : for (i = 0; i < n; i++) {
2032 0 : int c = Py_CHARMASK(s[i]);
2033 0 : if (islower(c))
2034 0 : s[i] = _toupper(c);
2035 : }
2036 :
2037 0 : return newobj;
2038 : }
2039 :
2040 : PyDoc_STRVAR(title__doc__,
2041 : "S.title() -> string\n\
2042 : \n\
2043 : Return a titlecased version of S, i.e. words start with uppercase\n\
2044 : characters, all remaining cased characters have lowercase.");
2045 :
2046 : static PyObject*
2047 0 : string_title(PyStringObject *self)
2048 : {
2049 0 : char *s = PyString_AS_STRING(self), *s_new;
2050 0 : Py_ssize_t i, n = PyString_GET_SIZE(self);
2051 0 : int previous_is_cased = 0;
2052 : PyObject *newobj;
2053 :
2054 0 : newobj = PyString_FromStringAndSize(NULL, n);
2055 0 : if (newobj == NULL)
2056 0 : return NULL;
2057 0 : s_new = PyString_AsString(newobj);
2058 0 : for (i = 0; i < n; i++) {
2059 0 : int c = Py_CHARMASK(*s++);
2060 0 : if (islower(c)) {
2061 0 : if (!previous_is_cased)
2062 0 : c = toupper(c);
2063 0 : previous_is_cased = 1;
2064 0 : } else if (isupper(c)) {
2065 0 : if (previous_is_cased)
2066 0 : c = tolower(c);
2067 0 : previous_is_cased = 1;
2068 : } else
2069 0 : previous_is_cased = 0;
2070 0 : *s_new++ = c;
2071 : }
2072 0 : return newobj;
2073 : }
2074 :
2075 : PyDoc_STRVAR(capitalize__doc__,
2076 : "S.capitalize() -> string\n\
2077 : \n\
2078 : Return a copy of the string S with only its first character\n\
2079 : capitalized.");
2080 :
2081 : static PyObject *
2082 0 : string_capitalize(PyStringObject *self)
2083 : {
2084 0 : char *s = PyString_AS_STRING(self), *s_new;
2085 0 : Py_ssize_t i, n = PyString_GET_SIZE(self);
2086 : PyObject *newobj;
2087 :
2088 0 : newobj = PyString_FromStringAndSize(NULL, n);
2089 0 : if (newobj == NULL)
2090 0 : return NULL;
2091 0 : s_new = PyString_AsString(newobj);
2092 0 : if (0 < n) {
2093 0 : int c = Py_CHARMASK(*s++);
2094 0 : if (islower(c))
2095 0 : *s_new = toupper(c);
2096 : else
2097 0 : *s_new = c;
2098 0 : s_new++;
2099 : }
2100 0 : for (i = 1; i < n; i++) {
2101 0 : int c = Py_CHARMASK(*s++);
2102 0 : if (isupper(c))
2103 0 : *s_new = tolower(c);
2104 : else
2105 0 : *s_new = c;
2106 0 : s_new++;
2107 : }
2108 0 : return newobj;
2109 : }
2110 :
2111 :
2112 : PyDoc_STRVAR(count__doc__,
2113 : "S.count(sub[, start[, end]]) -> int\n\
2114 : \n\
2115 : Return the number of non-overlapping occurrences of substring sub in\n\
2116 : string S[start:end]. Optional arguments start and end are interpreted\n\
2117 : as in slice notation.");
2118 :
2119 : static PyObject *
2120 0 : string_count(PyStringObject *self, PyObject *args)
2121 : {
2122 : PyObject *sub_obj;
2123 0 : const char *str = PyString_AS_STRING(self), *sub;
2124 : Py_ssize_t sub_len;
2125 0 : Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2126 :
2127 0 : if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2128 0 : return NULL;
2129 :
2130 0 : if (PyString_Check(sub_obj)) {
2131 0 : sub = PyString_AS_STRING(sub_obj);
2132 0 : sub_len = PyString_GET_SIZE(sub_obj);
2133 : }
2134 : #ifdef Py_USING_UNICODE
2135 0 : else if (PyUnicode_Check(sub_obj)) {
2136 : Py_ssize_t count;
2137 0 : count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138 0 : if (count == -1)
2139 0 : return NULL;
2140 : else
2141 0 : return PyInt_FromSsize_t(count);
2142 : }
2143 : #endif
2144 0 : else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145 0 : return NULL;
2146 :
2147 0 : ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2148 :
2149 0 : return PyInt_FromSsize_t(
2150 : stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151 : );
2152 : }
2153 :
2154 : PyDoc_STRVAR(swapcase__doc__,
2155 : "S.swapcase() -> string\n\
2156 : \n\
2157 : Return a copy of the string S with uppercase characters\n\
2158 : converted to lowercase and vice versa.");
2159 :
2160 : static PyObject *
2161 0 : string_swapcase(PyStringObject *self)
2162 : {
2163 0 : char *s = PyString_AS_STRING(self), *s_new;
2164 0 : Py_ssize_t i, n = PyString_GET_SIZE(self);
2165 : PyObject *newobj;
2166 :
2167 0 : newobj = PyString_FromStringAndSize(NULL, n);
2168 0 : if (newobj == NULL)
2169 0 : return NULL;
2170 0 : s_new = PyString_AsString(newobj);
2171 0 : for (i = 0; i < n; i++) {
2172 0 : int c = Py_CHARMASK(*s++);
2173 0 : if (islower(c)) {
2174 0 : *s_new = toupper(c);
2175 : }
2176 0 : else if (isupper(c)) {
2177 0 : *s_new = tolower(c);
2178 : }
2179 : else
2180 0 : *s_new = c;
2181 0 : s_new++;
2182 : }
2183 0 : return newobj;
2184 : }
2185 :
2186 :
2187 : PyDoc_STRVAR(translate__doc__,
2188 : "S.translate(table [,deletechars]) -> string\n\
2189 : \n\
2190 : Return a copy of the string S, where all characters occurring\n\
2191 : in the optional argument deletechars are removed, and the\n\
2192 : remaining characters have been mapped through the given\n\
2193 : translation table, which must be a string of length 256 or None.\n\
2194 : If the table argument is None, no translation is applied and\n\
2195 : the operation simply removes the characters in deletechars.");
2196 :
2197 : static PyObject *
2198 237 : string_translate(PyStringObject *self, PyObject *args)
2199 : {
2200 : register char *input, *output;
2201 : const char *table;
2202 237 : register Py_ssize_t i, c, changed = 0;
2203 237 : PyObject *input_obj = (PyObject*)self;
2204 237 : const char *output_start, *del_table=NULL;
2205 237 : Py_ssize_t inlen, tablen, dellen = 0;
2206 : PyObject *result;
2207 : int trans_table[256];
2208 237 : PyObject *tableobj, *delobj = NULL;
2209 :
2210 237 : if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211 : &tableobj, &delobj))
2212 0 : return NULL;
2213 :
2214 237 : if (PyString_Check(tableobj)) {
2215 237 : table = PyString_AS_STRING(tableobj);
2216 237 : tablen = PyString_GET_SIZE(tableobj);
2217 : }
2218 0 : else if (tableobj == Py_None) {
2219 0 : table = NULL;
2220 0 : tablen = 256;
2221 : }
2222 : #ifdef Py_USING_UNICODE
2223 0 : else if (PyUnicode_Check(tableobj)) {
2224 : /* Unicode .translate() does not support the deletechars
2225 : parameter; instead a mapping to None will cause characters
2226 : to be deleted. */
2227 0 : if (delobj != NULL) {
2228 0 : PyErr_SetString(PyExc_TypeError,
2229 : "deletions are implemented differently for unicode");
2230 0 : return NULL;
2231 : }
2232 0 : return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233 : }
2234 : #endif
2235 0 : else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236 0 : return NULL;
2237 :
2238 237 : if (tablen != 256) {
2239 0 : PyErr_SetString(PyExc_ValueError,
2240 : "translation table must be 256 characters long");
2241 0 : return NULL;
2242 : }
2243 :
2244 237 : if (delobj != NULL) {
2245 0 : if (PyString_Check(delobj)) {
2246 0 : del_table = PyString_AS_STRING(delobj);
2247 0 : dellen = PyString_GET_SIZE(delobj);
2248 : }
2249 : #ifdef Py_USING_UNICODE
2250 0 : else if (PyUnicode_Check(delobj)) {
2251 0 : PyErr_SetString(PyExc_TypeError,
2252 : "deletions are implemented differently for unicode");
2253 0 : return NULL;
2254 : }
2255 : #endif
2256 0 : else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257 0 : return NULL;
2258 : }
2259 : else {
2260 237 : del_table = NULL;
2261 237 : dellen = 0;
2262 : }
2263 :
2264 237 : inlen = PyString_GET_SIZE(input_obj);
2265 237 : result = PyString_FromStringAndSize((char *)NULL, inlen);
2266 237 : if (result == NULL)
2267 0 : return NULL;
2268 237 : output_start = output = PyString_AsString(result);
2269 237 : input = PyString_AS_STRING(input_obj);
2270 :
2271 237 : if (dellen == 0 && table != NULL) {
2272 : /* If no deletions are required, use faster code */
2273 51285 : for (i = inlen; --i >= 0; ) {
2274 50811 : c = Py_CHARMASK(*input++);
2275 50811 : if (Py_CHARMASK((*output++ = table[c])) != c)
2276 50745 : changed = 1;
2277 : }
2278 237 : if (changed || !PyString_CheckExact(input_obj))
2279 237 : return result;
2280 0 : Py_DECREF(result);
2281 0 : Py_INCREF(input_obj);
2282 0 : return input_obj;
2283 : }
2284 :
2285 0 : if (table == NULL) {
2286 0 : for (i = 0; i < 256; i++)
2287 0 : trans_table[i] = Py_CHARMASK(i);
2288 : } else {
2289 0 : for (i = 0; i < 256; i++)
2290 0 : trans_table[i] = Py_CHARMASK(table[i]);
2291 : }
2292 :
2293 0 : for (i = 0; i < dellen; i++)
2294 0 : trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2295 :
2296 0 : for (i = inlen; --i >= 0; ) {
2297 0 : c = Py_CHARMASK(*input++);
2298 0 : if (trans_table[c] != -1)
2299 0 : if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300 0 : continue;
2301 0 : changed = 1;
2302 : }
2303 0 : if (!changed && PyString_CheckExact(input_obj)) {
2304 0 : Py_DECREF(result);
2305 0 : Py_INCREF(input_obj);
2306 0 : return input_obj;
2307 : }
2308 : /* Fix the size of the resulting string */
2309 0 : if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310 0 : return NULL;
2311 0 : return result;
2312 : }
2313 :
2314 :
2315 : /* find and count characters and substrings */
2316 :
2317 : #define findchar(target, target_len, c) \
2318 : ((char *)memchr((const void *)(target), c, target_len))
2319 :
2320 : /* String ops must return a string. */
2321 : /* If the object is subclass of string, create a copy */
2322 : Py_LOCAL(PyStringObject *)
2323 30 : return_self(PyStringObject *self)
2324 : {
2325 30 : if (PyString_CheckExact(self)) {
2326 30 : Py_INCREF(self);
2327 30 : return self;
2328 : }
2329 0 : return (PyStringObject *)PyString_FromStringAndSize(
2330 0 : PyString_AS_STRING(self),
2331 : PyString_GET_SIZE(self));
2332 : }
2333 :
2334 : Py_LOCAL_INLINE(Py_ssize_t)
2335 27 : countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2336 : {
2337 27 : Py_ssize_t count=0;
2338 27 : const char *start=target;
2339 27 : const char *end=target+target_len;
2340 :
2341 282 : while ( (start=findchar(start, end-start, c)) != NULL ) {
2342 228 : count++;
2343 228 : if (count >= maxcount)
2344 0 : break;
2345 228 : start += 1;
2346 : }
2347 27 : return count;
2348 : }
2349 :
2350 :
2351 : /* Algorithms for different cases of string replacement */
2352 :
2353 : /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2354 : Py_LOCAL(PyStringObject *)
2355 0 : replace_interleave(PyStringObject *self,
2356 : const char *to_s, Py_ssize_t to_len,
2357 : Py_ssize_t maxcount)
2358 : {
2359 : char *self_s, *result_s;
2360 : Py_ssize_t self_len, result_len;
2361 : Py_ssize_t count, i, product;
2362 : PyStringObject *result;
2363 :
2364 0 : self_len = PyString_GET_SIZE(self);
2365 :
2366 : /* 1 at the end plus 1 after every character */
2367 0 : count = self_len+1;
2368 0 : if (maxcount < count)
2369 0 : count = maxcount;
2370 :
2371 : /* Check for overflow */
2372 : /* result_len = count * to_len + self_len; */
2373 0 : product = count * to_len;
2374 0 : if (product / to_len != count) {
2375 0 : PyErr_SetString(PyExc_OverflowError,
2376 : "replace string is too long");
2377 0 : return NULL;
2378 : }
2379 0 : result_len = product + self_len;
2380 0 : if (result_len < 0) {
2381 0 : PyErr_SetString(PyExc_OverflowError,
2382 : "replace string is too long");
2383 0 : return NULL;
2384 : }
2385 :
2386 0 : if (! (result = (PyStringObject *)
2387 : PyString_FromStringAndSize(NULL, result_len)) )
2388 0 : return NULL;
2389 :
2390 0 : self_s = PyString_AS_STRING(self);
2391 0 : result_s = PyString_AS_STRING(result);
2392 :
2393 : /* TODO: special case single character, which doesn't need memcpy */
2394 :
2395 : /* Lay the first one down (guaranteed this will occur) */
2396 0 : Py_MEMCPY(result_s, to_s, to_len);
2397 0 : result_s += to_len;
2398 0 : count -= 1;
2399 :
2400 0 : for (i=0; i<count; i++) {
2401 0 : *result_s++ = *self_s++;
2402 0 : Py_MEMCPY(result_s, to_s, to_len);
2403 0 : result_s += to_len;
2404 : }
2405 :
2406 : /* Copy the rest of the original string */
2407 0 : Py_MEMCPY(result_s, self_s, self_len-i);
2408 :
2409 0 : return result;
2410 : }
2411 :
2412 : /* Special case for deleting a single character */
2413 : /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2414 : Py_LOCAL(PyStringObject *)
2415 27 : replace_delete_single_character(PyStringObject *self,
2416 : char from_c, Py_ssize_t maxcount)
2417 : {
2418 : char *self_s, *result_s;
2419 : char *start, *next, *end;
2420 : Py_ssize_t self_len, result_len;
2421 : Py_ssize_t count;
2422 : PyStringObject *result;
2423 :
2424 27 : self_len = PyString_GET_SIZE(self);
2425 27 : self_s = PyString_AS_STRING(self);
2426 :
2427 27 : count = countchar(self_s, self_len, from_c, maxcount);
2428 27 : if (count == 0) {
2429 0 : return return_self(self);
2430 : }
2431 :
2432 27 : result_len = self_len - count; /* from_len == 1 */
2433 : assert(result_len>=0);
2434 :
2435 27 : if ( (result = (PyStringObject *)
2436 : PyString_FromStringAndSize(NULL, result_len)) == NULL)
2437 0 : return NULL;
2438 27 : result_s = PyString_AS_STRING(result);
2439 :
2440 27 : start = self_s;
2441 27 : end = self_s + self_len;
2442 282 : while (count-- > 0) {
2443 228 : next = findchar(start, end-start, from_c);
2444 228 : if (next == NULL)
2445 0 : break;
2446 228 : Py_MEMCPY(result_s, start, next-start);
2447 228 : result_s += (next-start);
2448 228 : start = next+1;
2449 : }
2450 27 : Py_MEMCPY(result_s, start, end-start);
2451 :
2452 27 : return result;
2453 : }
2454 :
2455 : /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2456 :
2457 : Py_LOCAL(PyStringObject *)
2458 0 : replace_delete_substring(PyStringObject *self,
2459 : const char *from_s, Py_ssize_t from_len,
2460 : Py_ssize_t maxcount) {
2461 : char *self_s, *result_s;
2462 : char *start, *next, *end;
2463 : Py_ssize_t self_len, result_len;
2464 : Py_ssize_t count, offset;
2465 : PyStringObject *result;
2466 :
2467 0 : self_len = PyString_GET_SIZE(self);
2468 0 : self_s = PyString_AS_STRING(self);
2469 :
2470 0 : count = stringlib_count(self_s, self_len,
2471 : from_s, from_len,
2472 : maxcount);
2473 :
2474 0 : if (count == 0) {
2475 : /* no matches */
2476 0 : return return_self(self);
2477 : }
2478 :
2479 0 : result_len = self_len - (count * from_len);
2480 : assert (result_len>=0);
2481 :
2482 0 : if ( (result = (PyStringObject *)
2483 : PyString_FromStringAndSize(NULL, result_len)) == NULL )
2484 0 : return NULL;
2485 :
2486 0 : result_s = PyString_AS_STRING(result);
2487 :
2488 0 : start = self_s;
2489 0 : end = self_s + self_len;
2490 0 : while (count-- > 0) {
2491 0 : offset = stringlib_find(start, end-start,
2492 : from_s, from_len,
2493 : 0);
2494 0 : if (offset == -1)
2495 0 : break;
2496 0 : next = start + offset;
2497 :
2498 0 : Py_MEMCPY(result_s, start, next-start);
2499 :
2500 0 : result_s += (next-start);
2501 0 : start = next+from_len;
2502 : }
2503 0 : Py_MEMCPY(result_s, start, end-start);
2504 0 : return result;
2505 : }
2506 :
2507 : /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2508 : Py_LOCAL(PyStringObject *)
2509 30 : replace_single_character_in_place(PyStringObject *self,
2510 : char from_c, char to_c,
2511 : Py_ssize_t maxcount)
2512 : {
2513 : char *self_s, *result_s, *start, *end, *next;
2514 : Py_ssize_t self_len;
2515 : PyStringObject *result;
2516 :
2517 : /* The result string will be the same size */
2518 30 : self_s = PyString_AS_STRING(self);
2519 30 : self_len = PyString_GET_SIZE(self);
2520 :
2521 30 : next = findchar(self_s, self_len, from_c);
2522 :
2523 30 : if (next == NULL) {
2524 : /* No matches; return the original string */
2525 30 : return return_self(self);
2526 : }
2527 :
2528 : /* Need to make a new string */
2529 0 : result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2530 0 : if (result == NULL)
2531 0 : return NULL;
2532 0 : result_s = PyString_AS_STRING(result);
2533 0 : Py_MEMCPY(result_s, self_s, self_len);
2534 :
2535 : /* change everything in-place, starting with this one */
2536 0 : start = result_s + (next-self_s);
2537 0 : *start = to_c;
2538 0 : start++;
2539 0 : end = result_s + self_len;
2540 :
2541 0 : while (--maxcount > 0) {
2542 0 : next = findchar(start, end-start, from_c);
2543 0 : if (next == NULL)
2544 0 : break;
2545 0 : *next = to_c;
2546 0 : start = next+1;
2547 : }
2548 :
2549 0 : return result;
2550 : }
2551 :
2552 : /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2553 : Py_LOCAL(PyStringObject *)
2554 0 : replace_substring_in_place(PyStringObject *self,
2555 : const char *from_s, Py_ssize_t from_len,
2556 : const char *to_s, Py_ssize_t to_len,
2557 : Py_ssize_t maxcount)
2558 : {
2559 : char *result_s, *start, *end;
2560 : char *self_s;
2561 : Py_ssize_t self_len, offset;
2562 : PyStringObject *result;
2563 :
2564 : /* The result string will be the same size */
2565 :
2566 0 : self_s = PyString_AS_STRING(self);
2567 0 : self_len = PyString_GET_SIZE(self);
2568 :
2569 0 : offset = stringlib_find(self_s, self_len,
2570 : from_s, from_len,
2571 : 0);
2572 0 : if (offset == -1) {
2573 : /* No matches; return the original string */
2574 0 : return return_self(self);
2575 : }
2576 :
2577 : /* Need to make a new string */
2578 0 : result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2579 0 : if (result == NULL)
2580 0 : return NULL;
2581 0 : result_s = PyString_AS_STRING(result);
2582 0 : Py_MEMCPY(result_s, self_s, self_len);
2583 :
2584 : /* change everything in-place, starting with this one */
2585 0 : start = result_s + offset;
2586 0 : Py_MEMCPY(start, to_s, from_len);
2587 0 : start += from_len;
2588 0 : end = result_s + self_len;
2589 :
2590 0 : while ( --maxcount > 0) {
2591 0 : offset = stringlib_find(start, end-start,
2592 : from_s, from_len,
2593 : 0);
2594 0 : if (offset==-1)
2595 0 : break;
2596 0 : Py_MEMCPY(start+offset, to_s, from_len);
2597 0 : start += offset+from_len;
2598 : }
2599 :
2600 0 : return result;
2601 : }
2602 :
2603 : /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2604 : Py_LOCAL(PyStringObject *)
2605 0 : replace_single_character(PyStringObject *self,
2606 : char from_c,
2607 : const char *to_s, Py_ssize_t to_len,
2608 : Py_ssize_t maxcount)
2609 : {
2610 : char *self_s, *result_s;
2611 : char *start, *next, *end;
2612 : Py_ssize_t self_len, result_len;
2613 : Py_ssize_t count, product;
2614 : PyStringObject *result;
2615 :
2616 0 : self_s = PyString_AS_STRING(self);
2617 0 : self_len = PyString_GET_SIZE(self);
2618 :
2619 0 : count = countchar(self_s, self_len, from_c, maxcount);
2620 0 : if (count == 0) {
2621 : /* no matches, return unchanged */
2622 0 : return return_self(self);
2623 : }
2624 :
2625 : /* use the difference between current and new, hence the "-1" */
2626 : /* result_len = self_len + count * (to_len-1) */
2627 0 : product = count * (to_len-1);
2628 0 : if (product / (to_len-1) != count) {
2629 0 : PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630 0 : return NULL;
2631 : }
2632 0 : result_len = self_len + product;
2633 0 : if (result_len < 0) {
2634 0 : PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635 0 : return NULL;
2636 : }
2637 :
2638 0 : if ( (result = (PyStringObject *)
2639 : PyString_FromStringAndSize(NULL, result_len)) == NULL)
2640 0 : return NULL;
2641 0 : result_s = PyString_AS_STRING(result);
2642 :
2643 0 : start = self_s;
2644 0 : end = self_s + self_len;
2645 0 : while (count-- > 0) {
2646 0 : next = findchar(start, end-start, from_c);
2647 0 : if (next == NULL)
2648 0 : break;
2649 :
2650 0 : if (next == start) {
2651 : /* replace with the 'to' */
2652 0 : Py_MEMCPY(result_s, to_s, to_len);
2653 0 : result_s += to_len;
2654 0 : start += 1;
2655 : } else {
2656 : /* copy the unchanged old then the 'to' */
2657 0 : Py_MEMCPY(result_s, start, next-start);
2658 0 : result_s += (next-start);
2659 0 : Py_MEMCPY(result_s, to_s, to_len);
2660 0 : result_s += to_len;
2661 0 : start = next+1;
2662 : }
2663 : }
2664 : /* Copy the remainder of the remaining string */
2665 0 : Py_MEMCPY(result_s, start, end-start);
2666 :
2667 0 : return result;
2668 : }
2669 :
2670 : /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2671 : Py_LOCAL(PyStringObject *)
2672 0 : replace_substring(PyStringObject *self,
2673 : const char *from_s, Py_ssize_t from_len,
2674 : const char *to_s, Py_ssize_t to_len,
2675 : Py_ssize_t maxcount) {
2676 : char *self_s, *result_s;
2677 : char *start, *next, *end;
2678 : Py_ssize_t self_len, result_len;
2679 : Py_ssize_t count, offset, product;
2680 : PyStringObject *result;
2681 :
2682 0 : self_s = PyString_AS_STRING(self);
2683 0 : self_len = PyString_GET_SIZE(self);
2684 :
2685 0 : count = stringlib_count(self_s, self_len,
2686 : from_s, from_len,
2687 : maxcount);
2688 :
2689 0 : if (count == 0) {
2690 : /* no matches, return unchanged */
2691 0 : return return_self(self);
2692 : }
2693 :
2694 : /* Check for overflow */
2695 : /* result_len = self_len + count * (to_len-from_len) */
2696 0 : product = count * (to_len-from_len);
2697 0 : if (product / (to_len-from_len) != count) {
2698 0 : PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699 0 : return NULL;
2700 : }
2701 0 : result_len = self_len + product;
2702 0 : if (result_len < 0) {
2703 0 : PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2704 0 : return NULL;
2705 : }
2706 :
2707 0 : if ( (result = (PyStringObject *)
2708 : PyString_FromStringAndSize(NULL, result_len)) == NULL)
2709 0 : return NULL;
2710 0 : result_s = PyString_AS_STRING(result);
2711 :
2712 0 : start = self_s;
2713 0 : end = self_s + self_len;
2714 0 : while (count-- > 0) {
2715 0 : offset = stringlib_find(start, end-start,
2716 : from_s, from_len,
2717 : 0);
2718 0 : if (offset == -1)
2719 0 : break;
2720 0 : next = start+offset;
2721 0 : if (next == start) {
2722 : /* replace with the 'to' */
2723 0 : Py_MEMCPY(result_s, to_s, to_len);
2724 0 : result_s += to_len;
2725 0 : start += from_len;
2726 : } else {
2727 : /* copy the unchanged old then the 'to' */
2728 0 : Py_MEMCPY(result_s, start, next-start);
2729 0 : result_s += (next-start);
2730 0 : Py_MEMCPY(result_s, to_s, to_len);
2731 0 : result_s += to_len;
2732 0 : start = next+from_len;
2733 : }
2734 : }
2735 : /* Copy the remainder of the remaining string */
2736 0 : Py_MEMCPY(result_s, start, end-start);
2737 :
2738 0 : return result;
2739 : }
2740 :
2741 :
2742 : Py_LOCAL(PyStringObject *)
2743 57 : replace(PyStringObject *self,
2744 : const char *from_s, Py_ssize_t from_len,
2745 : const char *to_s, Py_ssize_t to_len,
2746 : Py_ssize_t maxcount)
2747 : {
2748 57 : if (maxcount < 0) {
2749 57 : maxcount = PY_SSIZE_T_MAX;
2750 0 : } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2751 : /* nothing to do; return the original string */
2752 0 : return return_self(self);
2753 : }
2754 :
2755 57 : if (maxcount == 0 ||
2756 0 : (from_len == 0 && to_len == 0)) {
2757 : /* nothing to do; return the original string */
2758 0 : return return_self(self);
2759 : }
2760 :
2761 : /* Handle zero-length special cases */
2762 :
2763 57 : if (from_len == 0) {
2764 : /* insert the 'to' string everywhere. */
2765 : /* >>> "Python".replace("", ".") */
2766 : /* '.P.y.t.h.o.n.' */
2767 0 : return replace_interleave(self, to_s, to_len, maxcount);
2768 : }
2769 :
2770 : /* Except for "".replace("", "A") == "A" there is no way beyond this */
2771 : /* point for an empty self string to generate a non-empty string */
2772 : /* Special case so the remaining code always gets a non-empty string */
2773 57 : if (PyString_GET_SIZE(self) == 0) {
2774 0 : return return_self(self);
2775 : }
2776 :
2777 57 : if (to_len == 0) {
2778 : /* delete all occurrences of 'from' string */
2779 27 : if (from_len == 1) {
2780 27 : return replace_delete_single_character(
2781 27 : self, from_s[0], maxcount);
2782 : } else {
2783 0 : return replace_delete_substring(self, from_s, from_len, maxcount);
2784 : }
2785 : }
2786 :
2787 : /* Handle special case where both strings have the same length */
2788 :
2789 30 : if (from_len == to_len) {
2790 30 : if (from_len == 1) {
2791 60 : return replace_single_character_in_place(
2792 : self,
2793 30 : from_s[0],
2794 30 : to_s[0],
2795 : maxcount);
2796 : } else {
2797 0 : return replace_substring_in_place(
2798 : self, from_s, from_len, to_s, to_len, maxcount);
2799 : }
2800 : }
2801 :
2802 : /* Otherwise use the more generic algorithms */
2803 0 : if (from_len == 1) {
2804 0 : return replace_single_character(self, from_s[0],
2805 : to_s, to_len, maxcount);
2806 : } else {
2807 : /* len('from')>=2, len('to')>=1 */
2808 0 : return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2809 : }
2810 : }
2811 :
2812 : PyDoc_STRVAR(replace__doc__,
2813 : "S.replace(old, new[, count]) -> string\n\
2814 : \n\
2815 : Return a copy of string S with all occurrences of substring\n\
2816 : old replaced by new. If the optional argument count is\n\
2817 : given, only the first count occurrences are replaced.");
2818 :
2819 : static PyObject *
2820 57 : string_replace(PyStringObject *self, PyObject *args)
2821 : {
2822 57 : Py_ssize_t count = -1;
2823 : PyObject *from, *to;
2824 : const char *from_s, *to_s;
2825 : Py_ssize_t from_len, to_len;
2826 :
2827 57 : if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2828 0 : return NULL;
2829 :
2830 57 : if (PyString_Check(from)) {
2831 57 : from_s = PyString_AS_STRING(from);
2832 57 : from_len = PyString_GET_SIZE(from);
2833 : }
2834 : #ifdef Py_USING_UNICODE
2835 57 : if (PyUnicode_Check(from))
2836 0 : return PyUnicode_Replace((PyObject *)self,
2837 : from, to, count);
2838 : #endif
2839 57 : else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2840 0 : return NULL;
2841 :
2842 57 : if (PyString_Check(to)) {
2843 57 : to_s = PyString_AS_STRING(to);
2844 57 : to_len = PyString_GET_SIZE(to);
2845 : }
2846 : #ifdef Py_USING_UNICODE
2847 0 : else if (PyUnicode_Check(to))
2848 0 : return PyUnicode_Replace((PyObject *)self,
2849 : from, to, count);
2850 : #endif
2851 0 : else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2852 0 : return NULL;
2853 :
2854 57 : return (PyObject *)replace((PyStringObject *) self,
2855 : from_s, from_len,
2856 : to_s, to_len, count);
2857 : }
2858 :
2859 : /** End DALKE **/
2860 :
2861 : /* Matches the end (direction >= 0) or start (direction < 0) of self
2862 : * against substr, using the start and end arguments. Returns
2863 : * -1 on error, 0 if not found and 1 if found.
2864 : */
2865 : Py_LOCAL(int)
2866 804 : _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2867 : Py_ssize_t end, int direction)
2868 : {
2869 804 : Py_ssize_t len = PyString_GET_SIZE(self);
2870 : Py_ssize_t slen;
2871 : const char* sub;
2872 : const char* str;
2873 :
2874 804 : if (PyString_Check(substr)) {
2875 804 : sub = PyString_AS_STRING(substr);
2876 804 : slen = PyString_GET_SIZE(substr);
2877 : }
2878 : #ifdef Py_USING_UNICODE
2879 0 : else if (PyUnicode_Check(substr))
2880 0 : return PyUnicode_Tailmatch((PyObject *)self,
2881 : substr, start, end, direction);
2882 : #endif
2883 0 : else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2884 0 : return -1;
2885 804 : str = PyString_AS_STRING(self);
2886 :
2887 804 : ADJUST_INDICES(start, end, len);
2888 :
2889 804 : if (direction < 0) {
2890 : /* startswith */
2891 570 : if (start+slen > len)
2892 0 : return 0;
2893 : } else {
2894 : /* endswith */
2895 234 : if (end-start < slen || start > len)
2896 0 : return 0;
2897 :
2898 234 : if (end-slen > start)
2899 234 : start = end - slen;
2900 : }
2901 804 : if (end-start >= slen)
2902 804 : return ! memcmp(str+start, sub, slen);
2903 0 : return 0;
2904 : }
2905 :
2906 :
2907 : PyDoc_STRVAR(startswith__doc__,
2908 : "S.startswith(prefix[, start[, end]]) -> bool\n\
2909 : \n\
2910 : Return True if S starts with the specified prefix, False otherwise.\n\
2911 : With optional start, test S beginning at that position.\n\
2912 : With optional end, stop comparing S at that position.\n\
2913 : prefix can also be a tuple of strings to try.");
2914 :
2915 : static PyObject *
2916 570 : string_startswith(PyStringObject *self, PyObject *args)
2917 : {
2918 570 : Py_ssize_t start = 0;
2919 570 : Py_ssize_t end = PY_SSIZE_T_MAX;
2920 : PyObject *subobj;
2921 : int result;
2922 :
2923 570 : if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2924 0 : return NULL;
2925 570 : if (PyTuple_Check(subobj)) {
2926 : Py_ssize_t i;
2927 0 : for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2928 0 : result = _string_tailmatch(self,
2929 0 : PyTuple_GET_ITEM(subobj, i),
2930 : start, end, -1);
2931 0 : if (result == -1)
2932 0 : return NULL;
2933 0 : else if (result) {
2934 0 : Py_RETURN_TRUE;
2935 : }
2936 : }
2937 0 : Py_RETURN_FALSE;
2938 : }
2939 570 : result = _string_tailmatch(self, subobj, start, end, -1);
2940 570 : if (result == -1) {
2941 0 : if (PyErr_ExceptionMatches(PyExc_TypeError))
2942 0 : PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2943 0 : "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2944 0 : return NULL;
2945 : }
2946 : else
2947 570 : return PyBool_FromLong(result);
2948 : }
2949 :
2950 :
2951 : PyDoc_STRVAR(endswith__doc__,
2952 : "S.endswith(suffix[, start[, end]]) -> bool\n\
2953 : \n\
2954 : Return True if S ends with the specified suffix, False otherwise.\n\
2955 : With optional start, test S beginning at that position.\n\
2956 : With optional end, stop comparing S at that position.\n\
2957 : suffix can also be a tuple of strings to try.");
2958 :
2959 : static PyObject *
2960 234 : string_endswith(PyStringObject *self, PyObject *args)
2961 : {
2962 234 : Py_ssize_t start = 0;
2963 234 : Py_ssize_t end = PY_SSIZE_T_MAX;
2964 : PyObject *subobj;
2965 : int result;
2966 :
2967 234 : if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2968 0 : return NULL;
2969 234 : if (PyTuple_Check(subobj)) {
2970 : Py_ssize_t i;
2971 0 : for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2972 0 : result = _string_tailmatch(self,
2973 0 : PyTuple_GET_ITEM(subobj, i),
2974 : start, end, +1);
2975 0 : if (result == -1)
2976 0 : return NULL;
2977 0 : else if (result) {
2978 0 : Py_RETURN_TRUE;
2979 : }
2980 : }
2981 0 : Py_RETURN_FALSE;
2982 : }
2983 234 : result = _string_tailmatch(self, subobj, start, end, +1);
2984 234 : if (result == -1) {
2985 0 : if (PyErr_ExceptionMatches(PyExc_TypeError))
2986 0 : PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2987 0 : "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2988 0 : return NULL;
2989 : }
2990 : else
2991 234 : return PyBool_FromLong(result);
2992 : }
2993 :
2994 :
2995 : PyDoc_STRVAR(encode__doc__,
2996 : "S.encode([encoding[,errors]]) -> object\n\
2997 : \n\
2998 : Encodes S using the codec registered for encoding. encoding defaults\n\
2999 : to the default encoding. errors may be given to set a different error\n\
3000 : handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3001 : a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3002 : 'xmlcharrefreplace' as well as any other name registered with\n\
3003 : codecs.register_error that is able to handle UnicodeEncodeErrors.");
3004 :
3005 : static PyObject *
3006 0 : string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3007 : {
3008 : static char *kwlist[] = {"encoding", "errors", 0};
3009 0 : char *encoding = NULL;
3010 0 : char *errors = NULL;
3011 : PyObject *v;
3012 :
3013 0 : if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3014 : kwlist, &encoding, &errors))
3015 0 : return NULL;
3016 0 : v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3017 0 : if (v == NULL)
3018 0 : goto onError;
3019 0 : if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3020 0 : PyErr_Format(PyExc_TypeError,
3021 : "encoder did not return a string/unicode object "
3022 : "(type=%.400s)",
3023 0 : Py_TYPE(v)->tp_name);
3024 0 : Py_DECREF(v);
3025 0 : return NULL;
3026 : }
3027 0 : return v;
3028 :
3029 : onError:
3030 0 : return NULL;
3031 : }
3032 :
3033 :
3034 : PyDoc_STRVAR(decode__doc__,
3035 : "S.decode([encoding[,errors]]) -> object\n\
3036 : \n\
3037 : Decodes S using the codec registered for encoding. encoding defaults\n\
3038 : to the default encoding. errors may be given to set a different error\n\
3039 : handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3040 : a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3041 : as well as any other name registered with codecs.register_error that is\n\
3042 : able to handle UnicodeDecodeErrors.");
3043 :
3044 : static PyObject *
3045 0 : string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3046 : {
3047 : static char *kwlist[] = {"encoding", "errors", 0};
3048 0 : char *encoding = NULL;
3049 0 : char *errors = NULL;
3050 : PyObject *v;
3051 :
3052 0 : if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3053 : kwlist, &encoding, &errors))
3054 0 : return NULL;
3055 0 : v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3056 0 : if (v == NULL)
3057 0 : goto onError;
3058 0 : if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3059 0 : PyErr_Format(PyExc_TypeError,
3060 : "decoder did not return a string/unicode object "
3061 : "(type=%.400s)",
3062 0 : Py_TYPE(v)->tp_name);
3063 0 : Py_DECREF(v);
3064 0 : return NULL;
3065 : }
3066 0 : return v;
3067 :
3068 : onError:
3069 0 : return NULL;
3070 : }
3071 :
3072 :
3073 : PyDoc_STRVAR(expandtabs__doc__,
3074 : "S.expandtabs([tabsize]) -> string\n\
3075 : \n\
3076 : Return a copy of S where all tab characters are expanded using spaces.\n\
3077 : If tabsize is not given, a tab size of 8 characters is assumed.");
3078 :
3079 : static PyObject*
3080 0 : string_expandtabs(PyStringObject *self, PyObject *args)
3081 : {
3082 : const char *e, *p, *qe;
3083 : char *q;
3084 : Py_ssize_t i, j, incr;
3085 : PyObject *u;
3086 0 : int tabsize = 8;
3087 :
3088 0 : if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3089 0 : return NULL;
3090 :
3091 : /* First pass: determine size of output string */
3092 0 : i = 0; /* chars up to and including most recent \n or \r */
3093 0 : j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3094 0 : e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3095 0 : for (p = PyString_AS_STRING(self); p < e; p++) {
3096 0 : if (*p == '\t') {
3097 0 : if (tabsize > 0) {
3098 0 : incr = tabsize - (j % tabsize);
3099 0 : if (j > PY_SSIZE_T_MAX - incr)
3100 0 : goto overflow1;
3101 0 : j += incr;
3102 : }
3103 : }
3104 : else {
3105 0 : if (j > PY_SSIZE_T_MAX - 1)
3106 0 : goto overflow1;
3107 0 : j++;
3108 0 : if (*p == '\n' || *p == '\r') {
3109 0 : if (i > PY_SSIZE_T_MAX - j)
3110 0 : goto overflow1;
3111 0 : i += j;
3112 0 : j = 0;
3113 : }
3114 : }
3115 : }
3116 :
3117 0 : if (i > PY_SSIZE_T_MAX - j)
3118 0 : goto overflow1;
3119 :
3120 : /* Second pass: create output string and fill it */
3121 0 : u = PyString_FromStringAndSize(NULL, i + j);
3122 0 : if (!u)
3123 0 : return NULL;
3124 :
3125 0 : j = 0; /* same as in first pass */
3126 0 : q = PyString_AS_STRING(u); /* next output char */
3127 0 : qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3128 :
3129 0 : for (p = PyString_AS_STRING(self); p < e; p++) {
3130 0 : if (*p == '\t') {
3131 0 : if (tabsize > 0) {
3132 0 : i = tabsize - (j % tabsize);
3133 0 : j += i;
3134 0 : while (i--) {
3135 0 : if (q >= qe)
3136 0 : goto overflow2;
3137 0 : *q++ = ' ';
3138 : }
3139 : }
3140 : }
3141 : else {
3142 0 : if (q >= qe)
3143 0 : goto overflow2;
3144 0 : *q++ = *p;
3145 0 : j++;
3146 0 : if (*p == '\n' || *p == '\r')
3147 0 : j = 0;
3148 : }
3149 : }
3150 :
3151 0 : return u;
3152 :
3153 : overflow2:
3154 0 : Py_DECREF(u);
3155 : overflow1:
3156 0 : PyErr_SetString(PyExc_OverflowError, "new string is too long");
3157 0 : return NULL;
3158 : }
3159 :
3160 : Py_LOCAL_INLINE(PyObject *)
3161 0 : pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3162 : {
3163 : PyObject *u;
3164 :
3165 0 : if (left < 0)
3166 0 : left = 0;
3167 0 : if (right < 0)
3168 0 : right = 0;
3169 :
3170 0 : if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3171 0 : Py_INCREF(self);
3172 0 : return (PyObject *)self;
3173 : }
3174 :
3175 0 : u = PyString_FromStringAndSize(NULL,
3176 0 : left + PyString_GET_SIZE(self) + right);
3177 0 : if (u) {
3178 0 : if (left)
3179 0 : memset(PyString_AS_STRING(u), fill, left);
3180 0 : Py_MEMCPY(PyString_AS_STRING(u) + left,
3181 0 : PyString_AS_STRING(self),
3182 0 : PyString_GET_SIZE(self));
3183 0 : if (right)
3184 0 : memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3185 : fill, right);
3186 : }
3187 :
3188 0 : return u;
3189 : }
3190 :
3191 : PyDoc_STRVAR(ljust__doc__,
3192 : "S.ljust(width[, fillchar]) -> string\n"
3193 : "\n"
3194 : "Return S left-justified in a string of length width. Padding is\n"
3195 : "done using the specified fill character (default is a space).");
3196 :
3197 : static PyObject *
3198 0 : string_ljust(PyStringObject *self, PyObject *args)
3199 : {
3200 : Py_ssize_t width;
3201 0 : char fillchar = ' ';
3202 :
3203 0 : if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3204 0 : return NULL;
3205 :
3206 0 : if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3207 0 : Py_INCREF(self);
3208 0 : return (PyObject*) self;
3209 : }
3210 :
3211 0 : return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3212 : }
3213 :
3214 :
3215 : PyDoc_STRVAR(rjust__doc__,
3216 : "S.rjust(width[, fillchar]) -> string\n"
3217 : "\n"
3218 : "Return S right-justified in a string of length width. Padding is\n"
3219 : "done using the specified fill character (default is a space)");
3220 :
3221 : static PyObject *
3222 0 : string_rjust(PyStringObject *self, PyObject *args)
3223 : {
3224 : Py_ssize_t width;
3225 0 : char fillchar = ' ';
3226 :
3227 0 : if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3228 0 : return NULL;
3229 :
3230 0 : if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3231 0 : Py_INCREF(self);
3232 0 : return (PyObject*) self;
3233 : }
3234 :
3235 0 : return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3236 : }
3237 :
3238 :
3239 : PyDoc_STRVAR(center__doc__,
3240 : "S.center(width[, fillchar]) -> string\n"
3241 : "\n"
3242 : "Return S centered in a string of length width. Padding is\n"
3243 : "done using the specified fill character (default is a space)");
3244 :
3245 : static PyObject *
3246 0 : string_center(PyStringObject *self, PyObject *args)
3247 : {
3248 : Py_ssize_t marg, left;
3249 : Py_ssize_t width;
3250 0 : char fillchar = ' ';
3251 :
3252 0 : if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3253 0 : return NULL;
3254 :
3255 0 : if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3256 0 : Py_INCREF(self);
3257 0 : return (PyObject*) self;
3258 : }
3259 :
3260 0 : marg = width - PyString_GET_SIZE(self);
3261 0 : left = marg / 2 + (marg & width & 1);
3262 :
3263 0 : return pad(self, left, marg - left, fillchar);
3264 : }
3265 :
3266 : PyDoc_STRVAR(zfill__doc__,
3267 : "S.zfill(width) -> string\n"
3268 : "\n"
3269 : "Pad a numeric string S with zeros on the left, to fill a field\n"
3270 : "of the specified width. The string S is never truncated.");
3271 :
3272 : static PyObject *
3273 0 : string_zfill(PyStringObject *self, PyObject *args)
3274 : {
3275 : Py_ssize_t fill;
3276 : PyObject *s;
3277 : char *p;
3278 : Py_ssize_t width;
3279 :
3280 0 : if (!PyArg_ParseTuple(args, "n:zfill", &width))
3281 0 : return NULL;
3282 :
3283 0 : if (PyString_GET_SIZE(self) >= width) {
3284 0 : if (PyString_CheckExact(self)) {
3285 0 : Py_INCREF(self);
3286 0 : return (PyObject*) self;
3287 : }
3288 : else
3289 0 : return PyString_FromStringAndSize(
3290 0 : PyString_AS_STRING(self),
3291 : PyString_GET_SIZE(self)
3292 : );
3293 : }
3294 :
3295 0 : fill = width - PyString_GET_SIZE(self);
3296 :
3297 0 : s = pad(self, fill, 0, '0');
3298 :
3299 0 : if (s == NULL)
3300 0 : return NULL;
3301 :
3302 0 : p = PyString_AS_STRING(s);
3303 0 : if (p[fill] == '+' || p[fill] == '-') {
3304 : /* move sign to beginning of string */
3305 0 : p[0] = p[fill];
3306 0 : p[fill] = '0';
3307 : }
3308 :
3309 0 : return (PyObject*) s;
3310 : }
3311 :
3312 : PyDoc_STRVAR(isspace__doc__,
3313 : "S.isspace() -> bool\n\
3314 : \n\
3315 : Return True if all characters in S are whitespace\n\
3316 : and there is at least one character in S, False otherwise.");
3317 :
3318 : static PyObject*
3319 0 : string_isspace(PyStringObject *self)
3320 : {
3321 0 : register const unsigned char *p
3322 : = (unsigned char *) PyString_AS_STRING(self);
3323 : register const unsigned char *e;
3324 :
3325 : /* Shortcut for single character strings */
3326 0 : if (PyString_GET_SIZE(self) == 1 &&
3327 0 : isspace(*p))
3328 0 : return PyBool_FromLong(1);
3329 :
3330 : /* Special case for empty strings */
3331 0 : if (PyString_GET_SIZE(self) == 0)
3332 0 : return PyBool_FromLong(0);
3333 :
3334 0 : e = p + PyString_GET_SIZE(self);
3335 0 : for (; p < e; p++) {
3336 0 : if (!isspace(*p))
3337 0 : return PyBool_FromLong(0);
3338 : }
3339 0 : return PyBool_FromLong(1);
3340 : }
3341 :
3342 :
3343 : PyDoc_STRVAR(isalpha__doc__,
3344 : "S.isalpha() -> bool\n\
3345 : \n\
3346 : Return True if all characters in S are alphabetic\n\
3347 : and there is at least one character in S, False otherwise.");
3348 :
3349 : static PyObject*
3350 2760 : string_isalpha(PyStringObject *self)
3351 : {
3352 2760 : register const unsigned char *p
3353 : = (unsigned char *) PyString_AS_STRING(self);
3354 : register const unsigned char *e;
3355 :
3356 : /* Shortcut for single character strings */
3357 5520 : if (PyString_GET_SIZE(self) == 1 &&
3358 2760 : isalpha(*p))
3359 1272 : return PyBool_FromLong(1);
3360 :
3361 : /* Special case for empty strings */
3362 1488 : if (PyString_GET_SIZE(self) == 0)
3363 0 : return PyBool_FromLong(0);
3364 :
3365 1488 : e = p + PyString_GET_SIZE(self);
3366 1488 : for (; p < e; p++) {
3367 1488 : if (!isalpha(*p))
3368 1488 : return PyBool_FromLong(0);
3369 : }
3370 0 : return PyBool_FromLong(1);
3371 : }
3372 :
3373 :
3374 : PyDoc_STRVAR(isalnum__doc__,
3375 : "S.isalnum() -> bool\n\
3376 : \n\
3377 : Return True if all characters in S are alphanumeric\n\
3378 : and there is at least one character in S, False otherwise.");
3379 :
3380 : static PyObject*
3381 960 : string_isalnum(PyStringObject *self)
3382 : {
3383 960 : register const unsigned char *p
3384 : = (unsigned char *) PyString_AS_STRING(self);
3385 : register const unsigned char *e;
3386 :
3387 : /* Shortcut for single character strings */
3388 1920 : if (PyString_GET_SIZE(self) == 1 &&
3389 960 : isalnum(*p))
3390 951 : return PyBool_FromLong(1);
3391 :
3392 : /* Special case for empty strings */
3393 9 : if (PyString_GET_SIZE(self) == 0)
3394 0 : return PyBool_FromLong(0);
3395 :
3396 9 : e = p + PyString_GET_SIZE(self);
3397 9 : for (; p < e; p++) {
3398 9 : if (!isalnum(*p))
3399 9 : return PyBool_FromLong(0);
3400 : }
3401 0 : return PyBool_FromLong(1);
3402 : }
3403 :
3404 :
3405 : PyDoc_STRVAR(isdigit__doc__,
3406 : "S.isdigit() -> bool\n\
3407 : \n\
3408 : Return True if all characters in S are digits\n\
3409 : and there is at least one character in S, False otherwise.");
3410 :
3411 : static PyObject*
3412 141 : string_isdigit(PyStringObject *self)
3413 : {
3414 141 : register const unsigned char *p
3415 : = (unsigned char *) PyString_AS_STRING(self);
3416 : register const unsigned char *e;
3417 :
3418 : /* Shortcut for single character strings */
3419 282 : if (PyString_GET_SIZE(self) == 1 &&
3420 141 : isdigit(*p))
3421 0 : return PyBool_FromLong(1);
3422 :
3423 : /* Special case for empty strings */
3424 141 : if (PyString_GET_SIZE(self) == 0)
3425 0 : return PyBool_FromLong(0);
3426 :
3427 141 : e = p + PyString_GET_SIZE(self);
3428 141 : for (; p < e; p++) {
3429 141 : if (!isdigit(*p))
3430 141 : return PyBool_FromLong(0);
3431 : }
3432 0 : return PyBool_FromLong(1);
3433 : }
3434 :
3435 :
3436 : PyDoc_STRVAR(islower__doc__,
3437 : "S.islower() -> bool\n\
3438 : \n\
3439 : Return True if all cased characters in S are lowercase and there is\n\
3440 : at least one cased character in S, False otherwise.");
3441 :
3442 : static PyObject*
3443 0 : string_islower(PyStringObject *self)
3444 : {
3445 0 : register const unsigned char *p
3446 : = (unsigned char *) PyString_AS_STRING(self);
3447 : register const unsigned char *e;
3448 : int cased;
3449 :
3450 : /* Shortcut for single character strings */
3451 0 : if (PyString_GET_SIZE(self) == 1)
3452 0 : return PyBool_FromLong(islower(*p) != 0);
3453 :
3454 : /* Special case for empty strings */
3455 0 : if (PyString_GET_SIZE(self) == 0)
3456 0 : return PyBool_FromLong(0);
3457 :
3458 0 : e = p + PyString_GET_SIZE(self);
3459 0 : cased = 0;
3460 0 : for (; p < e; p++) {
3461 0 : if (isupper(*p))
3462 0 : return PyBool_FromLong(0);
3463 0 : else if (!cased && islower(*p))
3464 0 : cased = 1;
3465 : }
3466 0 : return PyBool_FromLong(cased);
3467 : }
3468 :
3469 :
3470 : PyDoc_STRVAR(isupper__doc__,
3471 : "S.isupper() -> bool\n\
3472 : \n\
3473 : Return True if all cased characters in S are uppercase and there is\n\
3474 : at least one cased character in S, False otherwise.");
3475 :
3476 : static PyObject*
3477 1272 : string_isupper(PyStringObject *self)
3478 : {
3479 1272 : register const unsigned char *p
3480 : = (unsigned char *) PyString_AS_STRING(self);
3481 : register const unsigned char *e;
3482 : int cased;
3483 :
3484 : /* Shortcut for single character strings */
3485 1272 : if (PyString_GET_SIZE(self) == 1)
3486 1272 : return PyBool_FromLong(isupper(*p) != 0);
3487 :
3488 : /* Special case for empty strings */
3489 0 : if (PyString_GET_SIZE(self) == 0)
3490 0 : return PyBool_FromLong(0);
3491 :
3492 0 : e = p + PyString_GET_SIZE(self);
3493 0 : cased = 0;
3494 0 : for (; p < e; p++) {
3495 0 : if (islower(*p))
3496 0 : return PyBool_FromLong(0);
3497 0 : else if (!cased && isupper(*p))
3498 0 : cased = 1;
3499 : }
3500 0 : return PyBool_FromLong(cased);
3501 : }
3502 :
3503 :
3504 : PyDoc_STRVAR(istitle__doc__,
3505 : "S.istitle() -> bool\n\
3506 : \n\
3507 : Return True if S is a titlecased string and there is at least one\n\
3508 : character in S, i.e. uppercase characters may only follow uncased\n\
3509 : characters and lowercase characters only cased ones. Return False\n\
3510 : otherwise.");
3511 :
3512 : static PyObject*
3513 0 : string_istitle(PyStringObject *self, PyObject *uncased)
3514 : {
3515 0 : register const unsigned char *p
3516 : = (unsigned char *) PyString_AS_STRING(self);
3517 : register const unsigned char *e;
3518 : int cased, previous_is_cased;
3519 :
3520 : /* Shortcut for single character strings */
3521 0 : if (PyString_GET_SIZE(self) == 1)
3522 0 : return PyBool_FromLong(isupper(*p) != 0);
3523 :
3524 : /* Special case for empty strings */
3525 0 : if (PyString_GET_SIZE(self) == 0)
3526 0 : return PyBool_FromLong(0);
3527 :
3528 0 : e = p + PyString_GET_SIZE(self);
3529 0 : cased = 0;
3530 0 : previous_is_cased = 0;
3531 0 : for (; p < e; p++) {
3532 0 : register const unsigned char ch = *p;
3533 :
3534 0 : if (isupper(ch)) {
3535 0 : if (previous_is_cased)
3536 0 : return PyBool_FromLong(0);
3537 0 : previous_is_cased = 1;
3538 0 : cased = 1;
3539 : }
3540 0 : else if (islower(ch)) {
3541 0 : if (!previous_is_cased)
3542 0 : return PyBool_FromLong(0);
3543 0 : previous_is_cased = 1;
3544 0 : cased = 1;
3545 : }
3546 : else
3547 0 : previous_is_cased = 0;
3548 : }
3549 0 : return PyBool_FromLong(cased);
3550 : }
3551 :
3552 :
3553 : PyDoc_STRVAR(splitlines__doc__,
3554 : "S.splitlines(keepends=False) -> list of strings\n\
3555 : \n\
3556 : Return a list of the lines in S, breaking at line boundaries.\n\
3557 : Line breaks are not included in the resulting list unless keepends\n\
3558 : is given and true.");
3559 :
3560 : static PyObject*
3561 9 : string_splitlines(PyStringObject *self, PyObject *args)
3562 : {
3563 9 : int keepends = 0;
3564 :
3565 9 : if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3566 0 : return NULL;
3567 :
3568 18 : return stringlib_splitlines(
3569 9 : (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3570 : keepends
3571 : );
3572 : }
3573 :
3574 : PyDoc_STRVAR(sizeof__doc__,
3575 : "S.__sizeof__() -> size of S in memory, in bytes");
3576 :
3577 : static PyObject *
3578 0 : string_sizeof(PyStringObject *v)
3579 : {
3580 : Py_ssize_t res;
3581 0 : res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3582 0 : return PyInt_FromSsize_t(res);
3583 : }
3584 :
3585 : static PyObject *
3586 0 : string_getnewargs(PyStringObject *v)
3587 : {
3588 0 : return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3589 : }
3590 :
3591 :
3592 : #include "stringlib/string_format.h"
3593 :
3594 : PyDoc_STRVAR(format__doc__,
3595 : "S.format(*args, **kwargs) -> string\n\
3596 : \n\
3597 : Return a formatted version of S, using substitutions from args and kwargs.\n\
3598 : The substitutions are identified by braces ('{' and '}').");
3599 :
3600 : static PyObject *
3601 0 : string__format__(PyObject* self, PyObject* args)
3602 : {
3603 : PyObject *format_spec;
3604 0 : PyObject *result = NULL;
3605 0 : PyObject *tmp = NULL;
3606 :
3607 : /* If 2.x, convert format_spec to the same type as value */
3608 : /* This is to allow things like u''.format('') */
3609 0 : if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3610 0 : goto done;
3611 0 : if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3612 0 : PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3613 0 : "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3614 0 : goto done;
3615 : }
3616 0 : tmp = PyObject_Str(format_spec);
3617 0 : if (tmp == NULL)
3618 0 : goto done;
3619 0 : format_spec = tmp;
3620 :
3621 0 : result = _PyBytes_FormatAdvanced(self,
3622 0 : PyString_AS_STRING(format_spec),
3623 0 : PyString_GET_SIZE(format_spec));
3624 : done:
3625 0 : Py_XDECREF(tmp);
3626 0 : return result;
3627 : }
3628 :
3629 : PyDoc_STRVAR(p_format__doc__,
3630 : "S.__format__(format_spec) -> string\n\
3631 : \n\
3632 : Return a formatted version of S as described by format_spec.");
3633 :
3634 :
3635 : static PyMethodDef
3636 : string_methods[] = {
3637 : /* Counterparts of the obsolete stropmodule functions; except
3638 : string.maketrans(). */
3639 : {"join", (PyCFunction)string_join, METH_O, join__doc__},
3640 : {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3641 : {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3642 : {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3643 : {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3644 : {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3645 : {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3646 : {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3647 : {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3648 : {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3649 : {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3650 : {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3651 : {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3652 : capitalize__doc__},
3653 : {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3654 : {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3655 : endswith__doc__},
3656 : {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3657 : {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3658 : {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3659 : {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3660 : {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3661 : {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3662 : {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3663 : {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3664 : {"rpartition", (PyCFunction)string_rpartition, METH_O,
3665 : rpartition__doc__},
3666 : {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3667 : startswith__doc__},
3668 : {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3669 : {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3670 : swapcase__doc__},
3671 : {"translate", (PyCFunction)string_translate, METH_VARARGS,
3672 : translate__doc__},
3673 : {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3674 : {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3675 : {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3676 : {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3677 : {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3678 : {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3679 : {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3680 : {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3681 : {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3682 : {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3683 : {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3684 : {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3685 : expandtabs__doc__},
3686 : {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3687 : splitlines__doc__},
3688 : {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3689 : sizeof__doc__},
3690 : {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3691 : {NULL, NULL} /* sentinel */
3692 : };
3693 :
3694 : static PyObject *
3695 : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3696 :
3697 : static PyObject *
3698 1119 : string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3699 : {
3700 1119 : PyObject *x = NULL;
3701 : static char *kwlist[] = {"object", 0};
3702 :
3703 1119 : if (type != &PyString_Type)
3704 0 : return str_subtype_new(type, args, kwds);
3705 1119 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3706 0 : return NULL;
3707 1119 : if (x == NULL)
3708 0 : return PyString_FromString("");
3709 1119 : return PyObject_Str(x);
3710 : }
3711 :
3712 : static PyObject *
3713 0 : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3714 : {
3715 : PyObject *tmp, *pnew;
3716 : Py_ssize_t n;
3717 :
3718 : assert(PyType_IsSubtype(type, &PyString_Type));
3719 0 : tmp = string_new(&PyString_Type, args, kwds);
3720 0 : if (tmp == NULL)
3721 0 : return NULL;
3722 : assert(PyString_Check(tmp));
3723 0 : n = PyString_GET_SIZE(tmp);
3724 0 : pnew = type->tp_alloc(type, n);
3725 0 : if (pnew != NULL) {
3726 0 : Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3727 0 : ((PyStringObject *)pnew)->ob_shash =
3728 0 : ((PyStringObject *)tmp)->ob_shash;
3729 0 : ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3730 : }
3731 0 : Py_DECREF(tmp);
3732 0 : return pnew;
3733 : }
3734 :
3735 : static PyObject *
3736 0 : basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3737 : {
3738 0 : PyErr_SetString(PyExc_TypeError,
3739 : "The basestring type cannot be instantiated");
3740 0 : return NULL;
3741 : }
3742 :
3743 : static PyObject *
3744 0 : string_mod(PyObject *v, PyObject *w)
3745 : {
3746 0 : if (!PyString_Check(v)) {
3747 0 : Py_INCREF(Py_NotImplemented);
3748 0 : return Py_NotImplemented;
3749 : }
3750 0 : return PyString_Format(v, w);
3751 : }
3752 :
3753 : PyDoc_STRVAR(basestring_doc,
3754 : "Type basestring cannot be instantiated; it is the base for str and unicode.");
3755 :
3756 : static PyNumberMethods string_as_number = {
3757 : 0, /*nb_add*/
3758 : 0, /*nb_subtract*/
3759 : 0, /*nb_multiply*/
3760 : 0, /*nb_divide*/
3761 : string_mod, /*nb_remainder*/
3762 : };
3763 :
3764 :
3765 : PyTypeObject PyBaseString_Type = {
3766 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
3767 : "basestring",
3768 : 0,
3769 : 0,
3770 : 0, /* tp_dealloc */
3771 : 0, /* tp_print */
3772 : 0, /* tp_getattr */
3773 : 0, /* tp_setattr */
3774 : 0, /* tp_compare */
3775 : 0, /* tp_repr */
3776 : 0, /* tp_as_number */
3777 : 0, /* tp_as_sequence */
3778 : 0, /* tp_as_mapping */
3779 : 0, /* tp_hash */
3780 : 0, /* tp_call */
3781 : 0, /* tp_str */
3782 : 0, /* tp_getattro */
3783 : 0, /* tp_setattro */
3784 : 0, /* tp_as_buffer */
3785 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3786 : basestring_doc, /* tp_doc */
3787 : 0, /* tp_traverse */
3788 : 0, /* tp_clear */
3789 : 0, /* tp_richcompare */
3790 : 0, /* tp_weaklistoffset */
3791 : 0, /* tp_iter */
3792 : 0, /* tp_iternext */
3793 : 0, /* tp_methods */
3794 : 0, /* tp_members */
3795 : 0, /* tp_getset */
3796 : &PyBaseObject_Type, /* tp_base */
3797 : 0, /* tp_dict */
3798 : 0, /* tp_descr_get */
3799 : 0, /* tp_descr_set */
3800 : 0, /* tp_dictoffset */
3801 : 0, /* tp_init */
3802 : 0, /* tp_alloc */
3803 : basestring_new, /* tp_new */
3804 : 0, /* tp_free */
3805 : };
3806 :
3807 : PyDoc_STRVAR(string_doc,
3808 : "str(object='') -> string\n\
3809 : \n\
3810 : Return a nice string representation of the object.\n\
3811 : If the argument is a string, the return value is the same object.");
3812 :
3813 : PyTypeObject PyString_Type = {
3814 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
3815 : "str",
3816 : PyStringObject_SIZE,
3817 : sizeof(char),
3818 : string_dealloc, /* tp_dealloc */
3819 : (printfunc)string_print, /* tp_print */
3820 : 0, /* tp_getattr */
3821 : 0, /* tp_setattr */
3822 : 0, /* tp_compare */
3823 : string_repr, /* tp_repr */
3824 : &string_as_number, /* tp_as_number */
3825 : &string_as_sequence, /* tp_as_sequence */
3826 : &string_as_mapping, /* tp_as_mapping */
3827 : (hashfunc)string_hash, /* tp_hash */
3828 : 0, /* tp_call */
3829 : string_str, /* tp_str */
3830 : PyObject_GenericGetAttr, /* tp_getattro */
3831 : 0, /* tp_setattro */
3832 : &string_as_buffer, /* tp_as_buffer */
3833 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3834 : Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3835 : Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3836 : string_doc, /* tp_doc */
3837 : 0, /* tp_traverse */
3838 : 0, /* tp_clear */
3839 : (richcmpfunc)string_richcompare, /* tp_richcompare */
3840 : 0, /* tp_weaklistoffset */
3841 : 0, /* tp_iter */
3842 : 0, /* tp_iternext */
3843 : string_methods, /* tp_methods */
3844 : 0, /* tp_members */
3845 : 0, /* tp_getset */
3846 : &PyBaseString_Type, /* tp_base */
3847 : 0, /* tp_dict */
3848 : 0, /* tp_descr_get */
3849 : 0, /* tp_descr_set */
3850 : 0, /* tp_dictoffset */
3851 : 0, /* tp_init */
3852 : 0, /* tp_alloc */
3853 : string_new, /* tp_new */
3854 : PyObject_Del, /* tp_free */
3855 : };
3856 :
3857 : void
3858 6739 : PyString_Concat(register PyObject **pv, register PyObject *w)
3859 : {
3860 : register PyObject *v;
3861 6739 : if (*pv == NULL)
3862 0 : return;
3863 6739 : if (w == NULL || !PyString_Check(*pv)) {
3864 0 : Py_CLEAR(*pv);
3865 0 : return;
3866 : }
3867 6739 : v = string_concat((PyStringObject *) *pv, w);
3868 6739 : Py_SETREF(*pv, v);
3869 : }
3870 :
3871 : void
3872 168 : PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3873 : {
3874 168 : PyString_Concat(pv, w);
3875 168 : Py_XDECREF(w);
3876 168 : }
3877 :
3878 :
3879 : /* The following function breaks the notion that strings are immutable:
3880 : it changes the size of a string. We get away with this only if there
3881 : is only one module referencing the object. You can also think of it
3882 : as creating a new string object and destroying the old one, only
3883 : more efficiently. In any case, don't use this if the string may
3884 : already be known to some other part of the code...
3885 : Note that if there's not enough memory to resize the string, the original
3886 : string object at *pv is deallocated, *pv is set to NULL, an "out of
3887 : memory" exception is set, and -1 is returned. Else (on success) 0 is
3888 : returned, and the value in *pv may or may not be the same as on input.
3889 : As always, an extra byte is allocated for a trailing \0 byte (newsize
3890 : does *not* include that), and a trailing \0 byte is stored.
3891 : */
3892 :
3893 : int
3894 11473 : _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3895 : {
3896 : register PyObject *v;
3897 : register PyStringObject *sv;
3898 11473 : v = *pv;
3899 22946 : if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3900 11473 : PyString_CHECK_INTERNED(v)) {
3901 0 : *pv = 0;
3902 0 : Py_DECREF(v);
3903 0 : PyErr_BadInternalCall();
3904 0 : return -1;
3905 : }
3906 : /* XXX UNREF/NEWREF interface should be more symmetrical */
3907 : _Py_DEC_REFTOTAL;
3908 : _Py_ForgetReference(v);
3909 11473 : *pv = (PyObject *)
3910 11473 : PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3911 11473 : if (*pv == NULL) {
3912 0 : PyObject_Del(v);
3913 0 : PyErr_NoMemory();
3914 0 : return -1;
3915 : }
3916 11473 : _Py_NewReference(*pv);
3917 11473 : sv = (PyStringObject *) *pv;
3918 11473 : Py_SIZE(sv) = newsize;
3919 11473 : sv->ob_sval[newsize] = '\0';
3920 11473 : sv->ob_shash = -1; /* invalidate cached hash value */
3921 11473 : return 0;
3922 : }
3923 :
3924 : /* Helpers for formatstring */
3925 :
3926 : Py_LOCAL_INLINE(PyObject *)
3927 2220 : getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3928 : {
3929 2220 : Py_ssize_t argidx = *p_argidx;
3930 2220 : if (argidx < arglen) {
3931 2220 : (*p_argidx)++;
3932 2220 : if (arglen < 0)
3933 114 : return args;
3934 : else
3935 2106 : return PyTuple_GetItem(args, argidx);
3936 : }
3937 0 : PyErr_SetString(PyExc_TypeError,
3938 : "not enough arguments for format string");
3939 0 : return NULL;
3940 : }
3941 :
3942 : /* Format codes
3943 : * F_LJUST '-'
3944 : * F_SIGN '+'
3945 : * F_BLANK ' '
3946 : * F_ALT '#'
3947 : * F_ZERO '0'
3948 : */
3949 : #define F_LJUST (1<<0)
3950 : #define F_SIGN (1<<1)
3951 : #define F_BLANK (1<<2)
3952 : #define F_ALT (1<<3)
3953 : #define F_ZERO (1<<4)
3954 :
3955 : /* Returns a new reference to a PyString object, or NULL on failure. */
3956 :
3957 : static PyObject *
3958 0 : formatfloat(PyObject *v, int flags, int prec, int type)
3959 : {
3960 : char *p;
3961 : PyObject *result;
3962 : double x;
3963 :
3964 0 : x = PyFloat_AsDouble(v);
3965 0 : if (x == -1.0 && PyErr_Occurred()) {
3966 0 : PyErr_Format(PyExc_TypeError, "float argument required, "
3967 0 : "not %.200s", Py_TYPE(v)->tp_name);
3968 0 : return NULL;
3969 : }
3970 :
3971 0 : if (prec < 0)
3972 0 : prec = 6;
3973 :
3974 0 : p = PyOS_double_to_string(x, type, prec,
3975 0 : (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3976 :
3977 0 : if (p == NULL)
3978 0 : return NULL;
3979 0 : result = PyString_FromStringAndSize(p, strlen(p));
3980 0 : PyMem_Free(p);
3981 0 : return result;
3982 : }
3983 :
3984 : /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3985 : * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3986 : * Python's regular ints.
3987 : * Return value: a new PyString*, or NULL if error.
3988 : * . *pbuf is set to point into it,
3989 : * *plen set to the # of chars following that.
3990 : * Caller must decref it when done using pbuf.
3991 : * The string starting at *pbuf is of the form
3992 : * "-"? ("0x" | "0X")? digit+
3993 : * "0x"/"0X" are present only for x and X conversions, with F_ALT
3994 : * set in flags. The case of hex digits will be correct,
3995 : * There will be at least prec digits, zero-filled on the left if
3996 : * necessary to get that many.
3997 : * val object to be converted
3998 : * flags bitmask of format flags; only F_ALT is looked at
3999 : * prec minimum number of digits; 0-fill on left if needed
4000 : * type a character in [duoxX]; u acts the same as d
4001 : *
4002 : * CAUTION: o, x and X conversions on regular ints can never
4003 : * produce a '-' sign, but can for Python's unbounded ints.
4004 : */
4005 : PyObject*
4006 0 : _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4007 : char **pbuf, int *plen)
4008 : {
4009 0 : PyObject *result = NULL, *r1;
4010 : const char *s;
4011 : char *buf;
4012 : Py_ssize_t i;
4013 : int sign; /* 1 if '-', else 0 */
4014 : int len; /* number of characters */
4015 : Py_ssize_t llen;
4016 : int numdigits; /* len == numnondigits + skipped + numdigits */
4017 : int numnondigits, skipped, filled;
4018 : const char *method;
4019 :
4020 0 : switch (type) {
4021 : case 'd':
4022 : case 'u':
4023 0 : method = "str";
4024 0 : result = Py_TYPE(val)->tp_str(val);
4025 0 : break;
4026 : case 'o':
4027 0 : method = "oct";
4028 0 : result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4029 0 : break;
4030 : case 'x':
4031 : case 'X':
4032 0 : method = "hex";
4033 0 : result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4034 0 : break;
4035 : default:
4036 : assert(!"'type' not in [duoxX]");
4037 : }
4038 0 : if (!result)
4039 0 : return NULL;
4040 :
4041 0 : if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4042 0 : Py_DECREF(result);
4043 0 : return NULL;
4044 : }
4045 0 : if (llen > INT_MAX) {
4046 0 : PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4047 0 : Py_DECREF(result);
4048 0 : return NULL;
4049 : }
4050 0 : len = (int)llen;
4051 0 : if (len > 0 && s[len-1] == 'L') {
4052 0 : --len;
4053 0 : if (len == 0)
4054 0 : goto error;
4055 : }
4056 0 : sign = s[0] == '-';
4057 0 : numnondigits = sign;
4058 :
4059 : /* Need to skip 0x, 0X or 0. */
4060 0 : skipped = 0;
4061 0 : switch (type) {
4062 : case 'o':
4063 0 : if (s[sign] != '0')
4064 0 : goto error;
4065 : /* If 0 is only digit, leave it alone. */
4066 0 : if ((flags & F_ALT) == 0 && len - sign > 1)
4067 0 : skipped = 1;
4068 0 : break;
4069 : case 'x':
4070 : case 'X':
4071 0 : if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4072 : goto error;
4073 0 : if ((flags & F_ALT) == 0)
4074 0 : skipped = 2;
4075 : else
4076 0 : numnondigits += 2;
4077 0 : break;
4078 : }
4079 0 : numdigits = len - numnondigits - skipped;
4080 0 : if (numdigits <= 0)
4081 0 : goto error;
4082 :
4083 0 : filled = prec - numdigits;
4084 0 : if (filled < 0)
4085 0 : filled = 0;
4086 0 : len = numnondigits + filled + numdigits;
4087 :
4088 : /* To modify the string in-place, there can only be one reference. */
4089 0 : if (skipped >= filled &&
4090 0 : PyString_CheckExact(result) &&
4091 0 : Py_REFCNT(result) == 1 &&
4092 0 : !PyString_CHECK_INTERNED(result))
4093 : {
4094 0 : r1 = NULL;
4095 0 : buf = (char *)s + skipped - filled;
4096 : }
4097 : else {
4098 0 : r1 = result;
4099 0 : result = PyString_FromStringAndSize(NULL, len);
4100 0 : if (!result) {
4101 0 : Py_DECREF(r1);
4102 0 : return NULL;
4103 : }
4104 0 : buf = PyString_AS_STRING(result);
4105 : }
4106 :
4107 0 : for (i = numnondigits; --i >= 0;)
4108 0 : buf[i] = s[i];
4109 0 : buf += numnondigits;
4110 0 : s += numnondigits + skipped;
4111 0 : for (i = 0; i < filled; i++)
4112 0 : *buf++ = '0';
4113 0 : if (r1 == NULL) {
4114 : assert(buf == s);
4115 0 : buf += numdigits;
4116 : }
4117 : else {
4118 0 : for (i = 0; i < numdigits; i++)
4119 0 : *buf++ = *s++;
4120 : }
4121 0 : *buf = '\0';
4122 0 : buf -= len;
4123 0 : Py_XDECREF(r1);
4124 :
4125 : /* Fix up case for hex conversions. */
4126 0 : if (type == 'X') {
4127 : /* Need to convert all lower case letters to upper case.
4128 : and need to convert 0x to 0X (and -0x to -0X). */
4129 0 : for (i = 0; i < len; i++) {
4130 0 : if (buf[i] >= 'a' && buf[i] <= 'z')
4131 0 : buf[i] -= 'a'-'A';
4132 : }
4133 : }
4134 0 : *pbuf = buf;
4135 0 : *plen = len;
4136 0 : return result;
4137 :
4138 : error:
4139 0 : PyErr_Format(PyExc_ValueError,
4140 : "%%%c format: invalid result of __%s__ (type=%.200s)",
4141 0 : type, method, Py_TYPE(val)->tp_name);
4142 0 : Py_DECREF(result);
4143 0 : return NULL;
4144 : }
4145 :
4146 : Py_LOCAL_INLINE(int)
4147 0 : formatint(char *buf, size_t buflen, int flags,
4148 : int prec, int type, PyObject *v)
4149 : {
4150 : /* fmt = '%#.' + `prec` + 'l' + `type`
4151 : worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4152 : + 1 + 1 = 24 */
4153 : char fmt[64]; /* plenty big enough! */
4154 : char *sign;
4155 : long x;
4156 :
4157 0 : x = PyInt_AsLong(v);
4158 0 : if (x == -1 && PyErr_Occurred()) {
4159 0 : PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4160 0 : Py_TYPE(v)->tp_name);
4161 0 : return -1;
4162 : }
4163 0 : if (x < 0 && type == 'u') {
4164 0 : type = 'd';
4165 : }
4166 0 : if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4167 0 : sign = "-";
4168 : else
4169 0 : sign = "";
4170 0 : if (prec < 0)
4171 0 : prec = 1;
4172 :
4173 0 : if ((flags & F_ALT) &&
4174 0 : (type == 'x' || type == 'X')) {
4175 : /* When converting under %#x or %#X, there are a number
4176 : * of issues that cause pain:
4177 : * - when 0 is being converted, the C standard leaves off
4178 : * the '0x' or '0X', which is inconsistent with other
4179 : * %#x/%#X conversions and inconsistent with Python's
4180 : * hex() function
4181 : * - there are platforms that violate the standard and
4182 : * convert 0 with the '0x' or '0X'
4183 : * (Metrowerks, Compaq Tru64)
4184 : * - there are platforms that give '0x' when converting
4185 : * under %#X, but convert 0 in accordance with the
4186 : * standard (OS/2 EMX)
4187 : *
4188 : * We can achieve the desired consistency by inserting our
4189 : * own '0x' or '0X' prefix, and substituting %x/%X in place
4190 : * of %#x/%#X.
4191 : *
4192 : * Note that this is the same approach as used in
4193 : * formatint() in unicodeobject.c
4194 : */
4195 0 : PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4196 : sign, type, prec, type);
4197 : }
4198 : else {
4199 0 : PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4200 0 : sign, (flags&F_ALT) ? "#" : "",
4201 : prec, type);
4202 : }
4203 :
4204 : /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4205 : * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4206 : */
4207 0 : if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4208 0 : PyErr_SetString(PyExc_OverflowError,
4209 : "formatted integer is too long (precision too large?)");
4210 0 : return -1;
4211 : }
4212 0 : if (sign[0])
4213 0 : PyOS_snprintf(buf, buflen, fmt, -x);
4214 : else
4215 0 : PyOS_snprintf(buf, buflen, fmt, x);
4216 0 : return (int)strlen(buf);
4217 : }
4218 :
4219 : Py_LOCAL_INLINE(int)
4220 0 : formatchar(char *buf, size_t buflen, PyObject *v)
4221 : {
4222 : /* presume that the buffer is at least 2 characters long */
4223 0 : if (PyString_Check(v)) {
4224 0 : if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4225 0 : return -1;
4226 : }
4227 : else {
4228 0 : if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4229 0 : return -1;
4230 : }
4231 0 : buf[1] = '\0';
4232 0 : return 1;
4233 : }
4234 :
4235 : /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4236 :
4237 : FORMATBUFLEN is the length of the buffer in which the ints &
4238 : chars are formatted. XXX This is a magic number. Each formatting
4239 : routine does bounds checking to ensure no overflow, but a better
4240 : solution may be to malloc a buffer of appropriate size for each
4241 : format. For now, the current solution is sufficient.
4242 : */
4243 : #define FORMATBUFLEN (size_t)120
4244 :
4245 : PyObject *
4246 1542 : PyString_Format(PyObject *format, PyObject *args)
4247 : {
4248 : char *fmt, *res;
4249 : Py_ssize_t arglen, argidx;
4250 : Py_ssize_t reslen, rescnt, fmtcnt;
4251 1542 : int args_owned = 0;
4252 : PyObject *result, *orig_args;
4253 : #ifdef Py_USING_UNICODE
4254 : PyObject *v, *w;
4255 : #endif
4256 1542 : PyObject *dict = NULL;
4257 1542 : if (format == NULL || !PyString_Check(format) || args == NULL) {
4258 0 : PyErr_BadInternalCall();
4259 0 : return NULL;
4260 : }
4261 1542 : orig_args = args;
4262 1542 : fmt = PyString_AS_STRING(format);
4263 1542 : fmtcnt = PyString_GET_SIZE(format);
4264 1542 : reslen = rescnt = fmtcnt + 100;
4265 1542 : result = PyString_FromStringAndSize((char *)NULL, reslen);
4266 1542 : if (result == NULL)
4267 0 : return NULL;
4268 1542 : res = PyString_AsString(result);
4269 1542 : if (PyTuple_Check(args)) {
4270 1437 : arglen = PyTuple_GET_SIZE(args);
4271 1437 : argidx = 0;
4272 : }
4273 : else {
4274 105 : arglen = -1;
4275 105 : argidx = -2;
4276 : }
4277 3084 : if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4278 1647 : !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4279 3 : dict = args;
4280 8898 : while (--fmtcnt >= 0) {
4281 5814 : if (*fmt != '%') {
4282 3594 : if (--rescnt < 0) {
4283 0 : rescnt = fmtcnt + 100;
4284 0 : reslen += rescnt;
4285 0 : if (_PyString_Resize(&result, reslen))
4286 0 : return NULL;
4287 0 : res = PyString_AS_STRING(result)
4288 0 : + reslen - rescnt;
4289 0 : --rescnt;
4290 : }
4291 3594 : *res++ = *fmt++;
4292 : }
4293 : else {
4294 : /* Got a format specifier */
4295 2220 : int flags = 0;
4296 2220 : Py_ssize_t width = -1;
4297 2220 : int prec = -1;
4298 2220 : int c = '\0';
4299 : int fill;
4300 : int isnumok;
4301 2220 : PyObject *v = NULL;
4302 2220 : PyObject *temp = NULL;
4303 : char *pbuf;
4304 : int sign;
4305 : Py_ssize_t len;
4306 : char formatbuf[FORMATBUFLEN];
4307 : /* For format{int,char}() */
4308 : #ifdef Py_USING_UNICODE
4309 2220 : char *fmt_start = fmt;
4310 2220 : Py_ssize_t argidx_start = argidx;
4311 : #endif
4312 :
4313 2220 : fmt++;
4314 2220 : if (*fmt == '(') {
4315 : char *keystart;
4316 : Py_ssize_t keylen;
4317 : PyObject *key;
4318 12 : int pcount = 1;
4319 :
4320 12 : if (dict == NULL) {
4321 0 : PyErr_SetString(PyExc_TypeError,
4322 : "format requires a mapping");
4323 0 : goto error;
4324 : }
4325 12 : ++fmt;
4326 12 : --fmtcnt;
4327 12 : keystart = fmt;
4328 : /* Skip over balanced parentheses */
4329 78 : while (pcount > 0 && --fmtcnt >= 0) {
4330 54 : if (*fmt == ')')
4331 12 : --pcount;
4332 42 : else if (*fmt == '(')
4333 0 : ++pcount;
4334 54 : fmt++;
4335 : }
4336 12 : keylen = fmt - keystart - 1;
4337 12 : if (fmtcnt < 0 || pcount > 0) {
4338 0 : PyErr_SetString(PyExc_ValueError,
4339 : "incomplete format key");
4340 0 : goto error;
4341 : }
4342 12 : key = PyString_FromStringAndSize(keystart,
4343 : keylen);
4344 12 : if (key == NULL)
4345 0 : goto error;
4346 12 : if (args_owned) {
4347 9 : Py_DECREF(args);
4348 9 : args_owned = 0;
4349 : }
4350 12 : args = PyObject_GetItem(dict, key);
4351 12 : Py_DECREF(key);
4352 12 : if (args == NULL) {
4353 0 : goto error;
4354 : }
4355 12 : args_owned = 1;
4356 12 : arglen = -1;
4357 12 : argidx = -2;
4358 : }
4359 4440 : while (--fmtcnt >= 0) {
4360 2220 : switch (c = *fmt++) {
4361 0 : case '-': flags |= F_LJUST; continue;
4362 0 : case '+': flags |= F_SIGN; continue;
4363 0 : case ' ': flags |= F_BLANK; continue;
4364 0 : case '#': flags |= F_ALT; continue;
4365 0 : case '0': flags |= F_ZERO; continue;
4366 : }
4367 2220 : break;
4368 : }
4369 2220 : if (c == '*') {
4370 0 : v = getnextarg(args, arglen, &argidx);
4371 0 : if (v == NULL)
4372 0 : goto error;
4373 0 : if (!PyInt_Check(v)) {
4374 0 : PyErr_SetString(PyExc_TypeError,
4375 : "* wants int");
4376 0 : goto error;
4377 : }
4378 0 : width = PyInt_AsSsize_t(v);
4379 0 : if (width == -1 && PyErr_Occurred())
4380 0 : goto error;
4381 0 : if (width < 0) {
4382 0 : flags |= F_LJUST;
4383 0 : width = -width;
4384 : }
4385 0 : if (--fmtcnt >= 0)
4386 0 : c = *fmt++;
4387 : }
4388 2220 : else if (c >= 0 && isdigit(c)) {
4389 0 : width = c - '0';
4390 0 : while (--fmtcnt >= 0) {
4391 0 : c = Py_CHARMASK(*fmt++);
4392 0 : if (!isdigit(c))
4393 0 : break;
4394 0 : if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4395 0 : PyErr_SetString(
4396 : PyExc_ValueError,
4397 : "width too big");
4398 0 : goto error;
4399 : }
4400 0 : width = width*10 + (c - '0');
4401 : }
4402 : }
4403 2220 : if (c == '.') {
4404 0 : prec = 0;
4405 0 : if (--fmtcnt >= 0)
4406 0 : c = *fmt++;
4407 0 : if (c == '*') {
4408 0 : v = getnextarg(args, arglen, &argidx);
4409 0 : if (v == NULL)
4410 0 : goto error;
4411 0 : if (!PyInt_Check(v)) {
4412 0 : PyErr_SetString(
4413 : PyExc_TypeError,
4414 : "* wants int");
4415 0 : goto error;
4416 : }
4417 0 : prec = _PyInt_AsInt(v);
4418 0 : if (prec == -1 && PyErr_Occurred())
4419 0 : goto error;
4420 0 : if (prec < 0)
4421 0 : prec = 0;
4422 0 : if (--fmtcnt >= 0)
4423 0 : c = *fmt++;
4424 : }
4425 0 : else if (c >= 0 && isdigit(c)) {
4426 0 : prec = c - '0';
4427 0 : while (--fmtcnt >= 0) {
4428 0 : c = Py_CHARMASK(*fmt++);
4429 0 : if (!isdigit(c))
4430 0 : break;
4431 0 : if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4432 0 : PyErr_SetString(
4433 : PyExc_ValueError,
4434 : "prec too big");
4435 0 : goto error;
4436 : }
4437 0 : prec = prec*10 + (c - '0');
4438 : }
4439 : }
4440 : } /* prec */
4441 2220 : if (fmtcnt >= 0) {
4442 2220 : if (c == 'h' || c == 'l' || c == 'L') {
4443 0 : if (--fmtcnt >= 0)
4444 0 : c = *fmt++;
4445 : }
4446 : }
4447 2220 : if (fmtcnt < 0) {
4448 0 : PyErr_SetString(PyExc_ValueError,
4449 : "incomplete format");
4450 0 : goto error;
4451 : }
4452 2220 : if (c != '%') {
4453 2220 : v = getnextarg(args, arglen, &argidx);
4454 2220 : if (v == NULL)
4455 0 : goto error;
4456 : }
4457 2220 : sign = 0;
4458 2220 : fill = ' ';
4459 2220 : switch (c) {
4460 : case '%':
4461 0 : pbuf = "%";
4462 0 : len = 1;
4463 0 : break;
4464 : case 's':
4465 : #ifdef Py_USING_UNICODE
4466 1452 : if (PyUnicode_Check(v)) {
4467 0 : fmt = fmt_start;
4468 0 : argidx = argidx_start;
4469 0 : goto unicode;
4470 : }
4471 : #endif
4472 1452 : temp = _PyObject_Str(v);
4473 : #ifdef Py_USING_UNICODE
4474 1452 : if (temp != NULL && PyUnicode_Check(temp)) {
4475 0 : Py_DECREF(temp);
4476 0 : fmt = fmt_start;
4477 0 : argidx = argidx_start;
4478 0 : goto unicode;
4479 : }
4480 : #endif
4481 : /* Fall through */
4482 : case 'r':
4483 2220 : if (c == 'r')
4484 768 : temp = PyObject_Repr(v);
4485 2220 : if (temp == NULL)
4486 0 : goto error;
4487 2220 : if (!PyString_Check(temp)) {
4488 0 : PyErr_SetString(PyExc_TypeError,
4489 : "%s argument has non-string str()");
4490 0 : Py_DECREF(temp);
4491 0 : goto error;
4492 : }
4493 2220 : pbuf = PyString_AS_STRING(temp);
4494 2220 : len = PyString_GET_SIZE(temp);
4495 2220 : if (prec >= 0 && len > prec)
4496 0 : len = prec;
4497 2220 : break;
4498 : case 'i':
4499 : case 'd':
4500 : case 'u':
4501 : case 'o':
4502 : case 'x':
4503 : case 'X':
4504 0 : if (c == 'i')
4505 0 : c = 'd';
4506 0 : isnumok = 0;
4507 0 : if (PyNumber_Check(v)) {
4508 0 : PyObject *iobj=NULL;
4509 :
4510 0 : if (PyInt_Check(v) || (PyLong_Check(v))) {
4511 0 : iobj = v;
4512 0 : Py_INCREF(iobj);
4513 : }
4514 : else {
4515 0 : iobj = PyNumber_Int(v);
4516 0 : if (iobj==NULL) {
4517 0 : PyErr_Clear();
4518 0 : iobj = PyNumber_Long(v);
4519 : }
4520 : }
4521 0 : if (iobj!=NULL) {
4522 0 : if (PyInt_Check(iobj)) {
4523 0 : isnumok = 1;
4524 0 : pbuf = formatbuf;
4525 0 : len = formatint(pbuf,
4526 : sizeof(formatbuf),
4527 : flags, prec, c, iobj);
4528 0 : Py_DECREF(iobj);
4529 0 : if (len < 0)
4530 0 : goto error;
4531 0 : sign = 1;
4532 : }
4533 0 : else if (PyLong_Check(iobj)) {
4534 : int ilen;
4535 :
4536 0 : isnumok = 1;
4537 0 : temp = _PyString_FormatLong(iobj, flags,
4538 : prec, c, &pbuf, &ilen);
4539 0 : Py_DECREF(iobj);
4540 0 : len = ilen;
4541 0 : if (!temp)
4542 0 : goto error;
4543 0 : sign = 1;
4544 : }
4545 : else {
4546 0 : Py_DECREF(iobj);
4547 : }
4548 : }
4549 : }
4550 0 : if (!isnumok) {
4551 0 : PyErr_Format(PyExc_TypeError,
4552 : "%%%c format: a number is required, "
4553 0 : "not %.200s", c, Py_TYPE(v)->tp_name);
4554 0 : goto error;
4555 : }
4556 0 : if (flags & F_ZERO)
4557 0 : fill = '0';
4558 0 : break;
4559 : case 'e':
4560 : case 'E':
4561 : case 'f':
4562 : case 'F':
4563 : case 'g':
4564 : case 'G':
4565 0 : temp = formatfloat(v, flags, prec, c);
4566 0 : if (temp == NULL)
4567 0 : goto error;
4568 0 : pbuf = PyString_AS_STRING(temp);
4569 0 : len = PyString_GET_SIZE(temp);
4570 0 : sign = 1;
4571 0 : if (flags & F_ZERO)
4572 0 : fill = '0';
4573 0 : break;
4574 : case 'c':
4575 : #ifdef Py_USING_UNICODE
4576 0 : if (PyUnicode_Check(v)) {
4577 0 : fmt = fmt_start;
4578 0 : argidx = argidx_start;
4579 0 : goto unicode;
4580 : }
4581 : #endif
4582 0 : pbuf = formatbuf;
4583 0 : len = formatchar(pbuf, sizeof(formatbuf), v);
4584 0 : if (len < 0)
4585 0 : goto error;
4586 0 : break;
4587 : default:
4588 0 : PyErr_Format(PyExc_ValueError,
4589 : "unsupported format character '%c' (0x%x) "
4590 : "at index %zd",
4591 : c, c,
4592 0 : (Py_ssize_t)(fmt - 1 -
4593 0 : PyString_AsString(format)));
4594 0 : goto error;
4595 : }
4596 2220 : if (sign) {
4597 0 : if (*pbuf == '-' || *pbuf == '+') {
4598 0 : sign = *pbuf++;
4599 0 : len--;
4600 : }
4601 0 : else if (flags & F_SIGN)
4602 0 : sign = '+';
4603 0 : else if (flags & F_BLANK)
4604 0 : sign = ' ';
4605 : else
4606 0 : sign = 0;
4607 : }
4608 2220 : if (width < len)
4609 2220 : width = len;
4610 2220 : if (rescnt - (sign != 0) < width) {
4611 0 : reslen -= rescnt;
4612 0 : rescnt = width + fmtcnt + 100;
4613 0 : reslen += rescnt;
4614 0 : if (reslen < 0) {
4615 0 : Py_DECREF(result);
4616 0 : Py_XDECREF(temp);
4617 0 : return PyErr_NoMemory();
4618 : }
4619 0 : if (_PyString_Resize(&result, reslen)) {
4620 0 : Py_XDECREF(temp);
4621 0 : return NULL;
4622 : }
4623 0 : res = PyString_AS_STRING(result)
4624 0 : + reslen - rescnt;
4625 : }
4626 2220 : if (sign) {
4627 0 : if (fill != ' ')
4628 0 : *res++ = sign;
4629 0 : rescnt--;
4630 0 : if (width > len)
4631 0 : width--;
4632 : }
4633 2220 : if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4634 : assert(pbuf[0] == '0');
4635 : assert(pbuf[1] == c);
4636 0 : if (fill != ' ') {
4637 0 : *res++ = *pbuf++;
4638 0 : *res++ = *pbuf++;
4639 : }
4640 0 : rescnt -= 2;
4641 0 : width -= 2;
4642 0 : if (width < 0)
4643 0 : width = 0;
4644 0 : len -= 2;
4645 : }
4646 2220 : if (width > len && !(flags & F_LJUST)) {
4647 : do {
4648 0 : --rescnt;
4649 0 : *res++ = fill;
4650 0 : } while (--width > len);
4651 : }
4652 2220 : if (fill == ' ') {
4653 2220 : if (sign)
4654 0 : *res++ = sign;
4655 2220 : if ((flags & F_ALT) &&
4656 0 : (c == 'x' || c == 'X')) {
4657 : assert(pbuf[0] == '0');
4658 : assert(pbuf[1] == c);
4659 0 : *res++ = *pbuf++;
4660 0 : *res++ = *pbuf++;
4661 : }
4662 : }
4663 2220 : Py_MEMCPY(res, pbuf, len);
4664 2220 : res += len;
4665 2220 : rescnt -= len;
4666 4440 : while (--width >= len) {
4667 0 : --rescnt;
4668 0 : *res++ = ' ';
4669 : }
4670 2220 : if (dict && (argidx < arglen) && c != '%') {
4671 0 : PyErr_SetString(PyExc_TypeError,
4672 : "not all arguments converted during string formatting");
4673 0 : Py_XDECREF(temp);
4674 0 : goto error;
4675 : }
4676 2220 : Py_XDECREF(temp);
4677 : } /* '%' */
4678 : } /* until end */
4679 1542 : if (argidx < arglen && !dict) {
4680 0 : PyErr_SetString(PyExc_TypeError,
4681 : "not all arguments converted during string formatting");
4682 0 : goto error;
4683 : }
4684 1542 : if (args_owned) {
4685 3 : Py_DECREF(args);
4686 : }
4687 1542 : if (_PyString_Resize(&result, reslen - rescnt))
4688 0 : return NULL;
4689 1542 : return result;
4690 :
4691 : #ifdef Py_USING_UNICODE
4692 : unicode:
4693 0 : if (args_owned) {
4694 0 : Py_DECREF(args);
4695 0 : args_owned = 0;
4696 : }
4697 : /* Fiddle args right (remove the first argidx arguments) */
4698 0 : if (PyTuple_Check(orig_args) && argidx > 0) {
4699 : PyObject *v;
4700 0 : Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4701 0 : v = PyTuple_New(n);
4702 0 : if (v == NULL)
4703 0 : goto error;
4704 0 : while (--n >= 0) {
4705 0 : PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4706 0 : Py_INCREF(w);
4707 0 : PyTuple_SET_ITEM(v, n, w);
4708 : }
4709 0 : args = v;
4710 : } else {
4711 0 : Py_INCREF(orig_args);
4712 0 : args = orig_args;
4713 : }
4714 0 : args_owned = 1;
4715 : /* Take what we have of the result and let the Unicode formatting
4716 : function format the rest of the input. */
4717 0 : rescnt = res - PyString_AS_STRING(result);
4718 0 : if (_PyString_Resize(&result, rescnt))
4719 0 : goto error;
4720 0 : fmtcnt = PyString_GET_SIZE(format) - \
4721 0 : (fmt - PyString_AS_STRING(format));
4722 0 : format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4723 0 : if (format == NULL)
4724 0 : goto error;
4725 0 : v = PyUnicode_Format(format, args);
4726 0 : Py_DECREF(format);
4727 0 : if (v == NULL)
4728 0 : goto error;
4729 : /* Paste what we have (result) to what the Unicode formatting
4730 : function returned (v) and return the result (or error) */
4731 0 : w = PyUnicode_Concat(result, v);
4732 0 : Py_DECREF(result);
4733 0 : Py_DECREF(v);
4734 0 : Py_DECREF(args);
4735 0 : return w;
4736 : #endif /* Py_USING_UNICODE */
4737 :
4738 : error:
4739 0 : Py_DECREF(result);
4740 0 : if (args_owned) {
4741 0 : Py_DECREF(args);
4742 : }
4743 0 : return NULL;
4744 : }
4745 :
4746 : void
4747 217401 : PyString_InternInPlace(PyObject **p)
4748 : {
4749 217401 : register PyStringObject *s = (PyStringObject *)(*p);
4750 : PyObject *t;
4751 217401 : if (s == NULL || !PyString_Check(s))
4752 0 : Py_FatalError("PyString_InternInPlace: strings only please!");
4753 : /* If it's a string subclass, we don't really know what putting
4754 : it in the interned dict might do. */
4755 217401 : if (!PyString_CheckExact(s))
4756 0 : return;
4757 217401 : if (PyString_CHECK_INTERNED(s))
4758 132757 : return;
4759 84644 : if (interned == NULL) {
4760 3 : interned = PyDict_New();
4761 3 : if (interned == NULL) {
4762 0 : PyErr_Clear(); /* Don't leave an exception */
4763 0 : return;
4764 : }
4765 : }
4766 84644 : t = PyDict_GetItem(interned, (PyObject *)s);
4767 84644 : if (t) {
4768 62095 : Py_INCREF(t);
4769 62095 : Py_SETREF(*p, t);
4770 62095 : return;
4771 : }
4772 :
4773 22549 : if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4774 0 : PyErr_Clear();
4775 0 : return;
4776 : }
4777 : /* The two references in interned are not counted by refcnt.
4778 : The string deallocator will take care of this */
4779 22549 : Py_REFCNT(s) -= 2;
4780 22549 : PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4781 : }
4782 :
4783 : void
4784 0 : PyString_InternImmortal(PyObject **p)
4785 : {
4786 0 : PyString_InternInPlace(p);
4787 0 : if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4788 0 : PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4789 0 : Py_INCREF(*p);
4790 : }
4791 0 : }
4792 :
4793 :
4794 : PyObject *
4795 34550 : PyString_InternFromString(const char *cp)
4796 : {
4797 34550 : PyObject *s = PyString_FromString(cp);
4798 34550 : if (s == NULL)
4799 0 : return NULL;
4800 34550 : PyString_InternInPlace(&s);
4801 34550 : return s;
4802 : }
4803 :
4804 : void
4805 3 : PyString_Fini(void)
4806 : {
4807 : int i;
4808 771 : for (i = 0; i < UCHAR_MAX + 1; i++)
4809 768 : Py_CLEAR(characters[i]);
4810 3 : Py_CLEAR(nullstring);
4811 3 : }
4812 :
4813 0 : void _Py_ReleaseInternedStrings(void)
4814 : {
4815 : PyObject *keys;
4816 : PyStringObject *s;
4817 : Py_ssize_t i, n;
4818 0 : Py_ssize_t immortal_size = 0, mortal_size = 0;
4819 :
4820 0 : if (interned == NULL || !PyDict_Check(interned))
4821 0 : return;
4822 0 : keys = PyDict_Keys(interned);
4823 0 : if (keys == NULL || !PyList_Check(keys)) {
4824 0 : PyErr_Clear();
4825 0 : return;
4826 : }
4827 :
4828 : /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4829 : detector, interned strings are not forcibly deallocated; rather, we
4830 : give them their stolen references back, and then clear and DECREF
4831 : the interned dict. */
4832 :
4833 0 : n = PyList_GET_SIZE(keys);
4834 0 : fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4835 : n);
4836 0 : for (i = 0; i < n; i++) {
4837 0 : s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4838 0 : switch (s->ob_sstate) {
4839 : case SSTATE_NOT_INTERNED:
4840 : /* XXX Shouldn't happen */
4841 0 : break;
4842 : case SSTATE_INTERNED_IMMORTAL:
4843 0 : Py_REFCNT(s) += 1;
4844 0 : immortal_size += Py_SIZE(s);
4845 0 : break;
4846 : case SSTATE_INTERNED_MORTAL:
4847 0 : Py_REFCNT(s) += 2;
4848 0 : mortal_size += Py_SIZE(s);
4849 0 : break;
4850 : default:
4851 0 : Py_FatalError("Inconsistent interned string state.");
4852 : }
4853 0 : s->ob_sstate = SSTATE_NOT_INTERNED;
4854 : }
4855 0 : fprintf(stderr, "total size of all interned strings: "
4856 : "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4857 : "mortal/immortal\n", mortal_size, immortal_size);
4858 0 : Py_DECREF(keys);
4859 0 : PyDict_Clear(interned);
4860 0 : Py_CLEAR(interned);
4861 : }
|