LCOV - code coverage report
Current view: top level - Objects - stringobject.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 790 2265 34.9 %
Date: 2017-04-19 Functions: 60 111 54.1 %

          Line data    Source code
       1             : /* String (str/bytes) object implementation */
       2             : 
       3             : #define PY_SSIZE_T_CLEAN
       4             : 
       5             : #include "Python.h"
       6             : #include <ctype.h>
       7             : #include <stddef.h>
       8             : 
       9             : #ifdef COUNT_ALLOCS
      10             : Py_ssize_t null_strings, one_strings;
      11             : #endif
      12             : 
      13             : static PyStringObject *characters[UCHAR_MAX + 1];
      14             : static PyStringObject *nullstring;
      15             : 
      16             : /* This dictionary holds all interned strings.  Note that references to
      17             :    strings in this dictionary are *not* counted in the string's ob_refcnt.
      18             :    When the interned string reaches a refcnt of 0 the string deallocation
      19             :    function will delete the reference from this dictionary.
      20             : 
      21             :    Another way to look at this is that to say that the actual reference
      22             :    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
      23             : */
      24             : static PyObject *interned;
      25             : 
      26             : /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
      27             :    for a string of length n should request PyStringObject_SIZE + n bytes.
      28             : 
      29             :    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
      30             :    3 bytes per string allocation on a typical system.
      31             : */
      32             : #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
      33             : 
      34             : /*
      35             :    For PyString_FromString(), the parameter `str' points to a null-terminated
      36             :    string containing exactly `size' bytes.
      37             : 
      38             :    For PyString_FromStringAndSize(), the parameter `str' is
      39             :    either NULL or else points to a string containing at least `size' bytes.
      40             :    For PyString_FromStringAndSize(), the string in the `str' parameter does
      41             :    not have to be null-terminated.  (Therefore it is safe to construct a
      42             :    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
      43             :    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
      44             :    bytes (setting the last byte to the null terminating character) and you can
      45             :    fill in the data yourself.  If `str' is non-NULL then the resulting
      46             :    PyString object must be treated as immutable and you must not fill in nor
      47             :    alter the data yourself, since the strings may be shared.
      48             : 
      49             :    The PyObject member `op->ob_size', which denotes the number of "extra
      50             :    items" in a variable-size object, will contain the number of bytes
      51             :    allocated for string data, not counting the null terminating character.
      52             :    It is therefore equal to the `size' parameter (for
      53             :    PyString_FromStringAndSize()) or the length of the string in the `str'
      54             :    parameter (for PyString_FromString()).
      55             : */
      56             : PyObject *
      57       89238 : PyString_FromStringAndSize(const char *str, Py_ssize_t size)
      58             : {
      59             :     register PyStringObject *op;
      60       89238 :     if (size < 0) {
      61           0 :         PyErr_SetString(PyExc_SystemError,
      62             :             "Negative size passed to PyString_FromStringAndSize");
      63           0 :         return NULL;
      64             :     }
      65       89238 :     if (size == 0 && (op = nullstring) != NULL) {
      66             : #ifdef COUNT_ALLOCS
      67             :         null_strings++;
      68             : #endif
      69        1453 :         Py_INCREF(op);
      70        1453 :         return (PyObject *)op;
      71             :     }
      72       93821 :     if (size == 1 && str != NULL &&
      73        6036 :         (op = characters[*str & UCHAR_MAX]) != NULL)
      74             :     {
      75             : #ifdef COUNT_ALLOCS
      76             :         one_strings++;
      77             : #endif
      78        5298 :         Py_INCREF(op);
      79        5298 :         return (PyObject *)op;
      80             :     }
      81             : 
      82       82487 :     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
      83           0 :         PyErr_SetString(PyExc_OverflowError, "string is too large");
      84           0 :         return NULL;
      85             :     }
      86             : 
      87             :     /* Inline PyObject_NewVar */
      88       82487 :     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
      89       82487 :     if (op == NULL)
      90           0 :         return PyErr_NoMemory();
      91       82487 :     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
      92       82487 :     op->ob_shash = -1;
      93       82487 :     op->ob_sstate = SSTATE_NOT_INTERNED;
      94       82487 :     if (str != NULL)
      95       11495 :         Py_MEMCPY(op->ob_sval, str, size);
      96       82487 :     op->ob_sval[size] = '\0';
      97             :     /* share short strings */
      98       82487 :     if (size == 0) {
      99           3 :         PyObject *t = (PyObject *)op;
     100           3 :         PyString_InternInPlace(&t);
     101           3 :         op = (PyStringObject *)t;
     102           3 :         nullstring = op;
     103           3 :         Py_INCREF(op);
     104       82484 :     } else if (size == 1 && str != NULL) {
     105         738 :         PyObject *t = (PyObject *)op;
     106         738 :         PyString_InternInPlace(&t);
     107         738 :         op = (PyStringObject *)t;
     108         738 :         characters[*str & UCHAR_MAX] = op;
     109         738 :         Py_INCREF(op);
     110             :     }
     111       82487 :     return (PyObject *) op;
     112             : }
     113             : 
     114             : PyObject *
     115       75725 : PyString_FromString(const char *str)
     116             : {
     117             :     register size_t size;
     118             :     register PyStringObject *op;
     119             : 
     120             :     assert(str != NULL);
     121       75725 :     size = strlen(str);
     122       75725 :     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
     123           0 :         PyErr_SetString(PyExc_OverflowError,
     124             :             "string is too long for a Python string");
     125           0 :         return NULL;
     126             :     }
     127       75725 :     if (size == 0 && (op = nullstring) != NULL) {
     128             : #ifdef COUNT_ALLOCS
     129             :         null_strings++;
     130             : #endif
     131         984 :         Py_INCREF(op);
     132         984 :         return (PyObject *)op;
     133             :     }
     134       74741 :     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
     135             : #ifdef COUNT_ALLOCS
     136             :         one_strings++;
     137             : #endif
     138        1500 :         Py_INCREF(op);
     139        1500 :         return (PyObject *)op;
     140             :     }
     141             : 
     142             :     /* Inline PyObject_NewVar */
     143       73241 :     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
     144       73241 :     if (op == NULL)
     145           0 :         return PyErr_NoMemory();
     146       73241 :     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
     147       73241 :     op->ob_shash = -1;
     148       73241 :     op->ob_sstate = SSTATE_NOT_INTERNED;
     149       73241 :     Py_MEMCPY(op->ob_sval, str, size+1);
     150             :     /* share short strings */
     151       73241 :     if (size == 0) {
     152           0 :         PyObject *t = (PyObject *)op;
     153           0 :         PyString_InternInPlace(&t);
     154           0 :         op = (PyStringObject *)t;
     155           0 :         nullstring = op;
     156           0 :         Py_INCREF(op);
     157       73241 :     } else if (size == 1) {
     158          30 :         PyObject *t = (PyObject *)op;
     159          30 :         PyString_InternInPlace(&t);
     160          30 :         op = (PyStringObject *)t;
     161          30 :         characters[*str & UCHAR_MAX] = op;
     162          30 :         Py_INCREF(op);
     163             :     }
     164       73241 :     return (PyObject *) op;
     165             : }
     166             : 
     167             : PyObject *
     168        5657 : PyString_FromFormatV(const char *format, va_list vargs)
     169             : {
     170             :     va_list count;
     171        5657 :     Py_ssize_t n = 0;
     172             :     const char* f;
     173             :     char *s;
     174             :     PyObject* string;
     175             : 
     176             : #ifdef VA_LIST_IS_ARRAY
     177        5657 :     Py_MEMCPY(count, vargs, sizeof(va_list));
     178             : #else
     179             : #ifdef  __va_copy
     180             :     __va_copy(count, vargs);
     181             : #else
     182             :     count = vargs;
     183             : #endif
     184             : #endif
     185             :     /* step 1: figure out how large a buffer we need */
     186      178281 :     for (f = format; *f; f++) {
     187      172624 :         if (*f == '%') {
     188             : #ifdef HAVE_LONG_LONG
     189       11009 :             int longlongflag = 0;
     190             : #endif
     191       11009 :             const char* p = f;
     192       11009 :             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
     193             :                 ;
     194             : 
     195             :             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
     196             :              * they don't affect the amount of space we reserve.
     197             :              */
     198       11009 :             if (*f == 'l') {
     199           0 :                 if (f[1] == 'd' || f[1] == 'u') {
     200           0 :                     ++f;
     201             :                 }
     202             : #ifdef HAVE_LONG_LONG
     203           0 :                 else if (f[1] == 'l' &&
     204           0 :                          (f[2] == 'd' || f[2] == 'u')) {
     205           0 :                     longlongflag = 1;
     206           0 :                     f += 2;
     207             :                 }
     208             : #endif
     209             :             }
     210       11009 :             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
     211           0 :                 ++f;
     212             :             }
     213             : 
     214       11009 :             switch (*f) {
     215             :             case 'c':
     216           0 :                 (void)va_arg(count, int);
     217             :                 /* fall through... */
     218             :             case '%':
     219           0 :                 n++;
     220           0 :                 break;
     221             :             case 'd': case 'u': case 'i': case 'x':
     222          11 :                 (void) va_arg(count, int);
     223             : #ifdef HAVE_LONG_LONG
     224             :                 /* Need at most
     225             :                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
     226             :                    plus 1 for the sign.  53/22 is an upper
     227             :                    bound for log10(256). */
     228          11 :                 if (longlongflag)
     229           0 :                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
     230             :                 else
     231             : #endif
     232             :                     /* 20 bytes is enough to hold a 64-bit
     233             :                        integer.  Decimal takes the most
     234             :                        space.  This isn't enough for
     235             :                        octal. */
     236          11 :                     n += 20;
     237             : 
     238          11 :                 break;
     239             :             case 's':
     240       10998 :                 s = va_arg(count, char*);
     241       10998 :                 n += strlen(s);
     242       10998 :                 break;
     243             :             case 'p':
     244           0 :                 (void) va_arg(count, int);
     245             :                 /* maximum 64-bit pointer representation:
     246             :                  * 0xffffffffffffffff
     247             :                  * so 19 characters is enough.
     248             :                  * XXX I count 18 -- what's the extra for?
     249             :                  */
     250           0 :                 n += 19;
     251           0 :                 break;
     252             :             default:
     253             :                 /* if we stumble upon an unknown
     254             :                    formatting code, copy the rest of
     255             :                    the format string to the output
     256             :                    string. (we cannot just skip the
     257             :                    code, since there's no way to know
     258             :                    what's in the argument list) */
     259           0 :                 n += strlen(p);
     260           0 :                 goto expand;
     261             :             }
     262             :         } else
     263      161615 :             n++;
     264             :     }
     265             :  expand:
     266             :     /* step 2: fill the buffer */
     267             :     /* Since we've analyzed how much space we need for the worst case,
     268             :        use sprintf directly instead of the slower PyOS_snprintf. */
     269        5657 :     string = PyString_FromStringAndSize(NULL, n);
     270        5657 :     if (!string)
     271           0 :         return NULL;
     272             : 
     273        5657 :     s = PyString_AsString(string);
     274             : 
     275      178281 :     for (f = format; *f; f++) {
     276      172624 :         if (*f == '%') {
     277       11009 :             const char* p = f++;
     278             :             Py_ssize_t i;
     279       11009 :             int longflag = 0;
     280             : #ifdef HAVE_LONG_LONG
     281       11009 :             int longlongflag = 0;
     282             : #endif
     283       11009 :             int size_tflag = 0;
     284             :             /* parse the width.precision part (we're only
     285             :                interested in the precision value, if any) */
     286       11009 :             n = 0;
     287       22018 :             while (isdigit(Py_CHARMASK(*f)))
     288           0 :                 n = (n*10) + *f++ - '0';
     289       11009 :             if (*f == '.') {
     290       10995 :                 f++;
     291       10995 :                 n = 0;
     292       49626 :                 while (isdigit(Py_CHARMASK(*f)))
     293       27636 :                     n = (n*10) + *f++ - '0';
     294             :             }
     295       22018 :             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
     296           0 :                 f++;
     297             :             /* Handle %ld, %lu, %lld and %llu. */
     298       11009 :             if (*f == 'l') {
     299           0 :                 if (f[1] == 'd' || f[1] == 'u') {
     300           0 :                     longflag = 1;
     301           0 :                     ++f;
     302             :                 }
     303             : #ifdef HAVE_LONG_LONG
     304           0 :                 else if (f[1] == 'l' &&
     305           0 :                          (f[2] == 'd' || f[2] == 'u')) {
     306           0 :                     longlongflag = 1;
     307           0 :                     f += 2;
     308             :                 }
     309             : #endif
     310             :             }
     311             :             /* handle the size_t flag. */
     312       11009 :             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
     313           0 :                 size_tflag = 1;
     314           0 :                 ++f;
     315             :             }
     316             : 
     317       11009 :             switch (*f) {
     318             :             case 'c':
     319           0 :                 *s++ = va_arg(vargs, int);
     320           0 :                 break;
     321             :             case 'd':
     322          11 :                 if (longflag)
     323           0 :                     sprintf(s, "%ld", va_arg(vargs, long));
     324             : #ifdef HAVE_LONG_LONG
     325          11 :                 else if (longlongflag)
     326           0 :                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
     327             :                         va_arg(vargs, PY_LONG_LONG));
     328             : #endif
     329          11 :                 else if (size_tflag)
     330           0 :                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
     331             :                         va_arg(vargs, Py_ssize_t));
     332             :                 else
     333          11 :                     sprintf(s, "%d", va_arg(vargs, int));
     334          11 :                 s += strlen(s);
     335          11 :                 break;
     336             :             case 'u':
     337           0 :                 if (longflag)
     338           0 :                     sprintf(s, "%lu",
     339             :                         va_arg(vargs, unsigned long));
     340             : #ifdef HAVE_LONG_LONG
     341           0 :                 else if (longlongflag)
     342           0 :                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
     343             :                         va_arg(vargs, PY_LONG_LONG));
     344             : #endif
     345           0 :                 else if (size_tflag)
     346           0 :                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
     347             :                         va_arg(vargs, size_t));
     348             :                 else
     349           0 :                     sprintf(s, "%u",
     350             :                         va_arg(vargs, unsigned int));
     351           0 :                 s += strlen(s);
     352           0 :                 break;
     353             :             case 'i':
     354           0 :                 sprintf(s, "%i", va_arg(vargs, int));
     355           0 :                 s += strlen(s);
     356           0 :                 break;
     357             :             case 'x':
     358           0 :                 sprintf(s, "%x", va_arg(vargs, int));
     359           0 :                 s += strlen(s);
     360           0 :                 break;
     361             :             case 's':
     362       10998 :                 p = va_arg(vargs, char*);
     363       10998 :                 i = strlen(p);
     364       10998 :                 if (n > 0 && i > n)
     365           0 :                     i = n;
     366       10998 :                 Py_MEMCPY(s, p, i);
     367       10998 :                 s += i;
     368       10998 :                 break;
     369             :             case 'p':
     370           0 :                 sprintf(s, "%p", va_arg(vargs, void*));
     371             :                 /* %p is ill-defined:  ensure leading 0x. */
     372           0 :                 if (s[1] == 'X')
     373           0 :                     s[1] = 'x';
     374           0 :                 else if (s[1] != 'x') {
     375           0 :                     memmove(s+2, s, strlen(s)+1);
     376           0 :                     s[0] = '0';
     377           0 :                     s[1] = 'x';
     378             :                 }
     379           0 :                 s += strlen(s);
     380           0 :                 break;
     381             :             case '%':
     382           0 :                 *s++ = '%';
     383           0 :                 break;
     384             :             default:
     385           0 :                 strcpy(s, p);
     386           0 :                 s += strlen(s);
     387           0 :                 goto end;
     388             :             }
     389             :         } else
     390      161615 :             *s++ = *f;
     391             :     }
     392             : 
     393             :  end:
     394        5657 :     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
     395           0 :         return NULL;
     396        5657 :     return string;
     397             : }
     398             : 
     399             : PyObject *
     400          11 : PyString_FromFormat(const char *format, ...)
     401             : {
     402             :     PyObject* ret;
     403             :     va_list vargs;
     404             : 
     405             : #ifdef HAVE_STDARG_PROTOTYPES
     406          11 :     va_start(vargs, format);
     407             : #else
     408             :     va_start(vargs);
     409             : #endif
     410          11 :     ret = PyString_FromFormatV(format, vargs);
     411          11 :     va_end(vargs);
     412          11 :     return ret;
     413             : }
     414             : 
     415             : 
     416           0 : PyObject *PyString_Decode(const char *s,
     417             :                           Py_ssize_t size,
     418             :                           const char *encoding,
     419             :                           const char *errors)
     420             : {
     421             :     PyObject *v, *str;
     422             : 
     423           0 :     str = PyString_FromStringAndSize(s, size);
     424           0 :     if (str == NULL)
     425           0 :         return NULL;
     426           0 :     v = PyString_AsDecodedString(str, encoding, errors);
     427           0 :     Py_DECREF(str);
     428           0 :     return v;
     429             : }
     430             : 
     431           0 : PyObject *PyString_AsDecodedObject(PyObject *str,
     432             :                                    const char *encoding,
     433             :                                    const char *errors)
     434             : {
     435             :     PyObject *v;
     436             : 
     437           0 :     if (!PyString_Check(str)) {
     438           0 :         PyErr_BadArgument();
     439           0 :         goto onError;
     440             :     }
     441             : 
     442           0 :     if (encoding == NULL) {
     443             : #ifdef Py_USING_UNICODE
     444           0 :         encoding = PyUnicode_GetDefaultEncoding();
     445             : #else
     446             :         PyErr_SetString(PyExc_ValueError, "no encoding specified");
     447             :         goto onError;
     448             : #endif
     449             :     }
     450             : 
     451             :     /* Decode via the codec registry */
     452           0 :     v = _PyCodec_DecodeText(str, encoding, errors);
     453           0 :     if (v == NULL)
     454           0 :         goto onError;
     455             : 
     456           0 :     return v;
     457             : 
     458             :  onError:
     459           0 :     return NULL;
     460             : }
     461             : 
     462           0 : PyObject *PyString_AsDecodedString(PyObject *str,
     463             :                                    const char *encoding,
     464             :                                    const char *errors)
     465             : {
     466             :     PyObject *v;
     467             : 
     468           0 :     v = PyString_AsDecodedObject(str, encoding, errors);
     469           0 :     if (v == NULL)
     470           0 :         goto onError;
     471             : 
     472             : #ifdef Py_USING_UNICODE
     473             :     /* Convert Unicode to a string using the default encoding */
     474           0 :     if (PyUnicode_Check(v)) {
     475           0 :         PyObject *temp = v;
     476           0 :         v = PyUnicode_AsEncodedString(v, NULL, NULL);
     477           0 :         Py_DECREF(temp);
     478           0 :         if (v == NULL)
     479           0 :             goto onError;
     480             :     }
     481             : #endif
     482           0 :     if (!PyString_Check(v)) {
     483           0 :         PyErr_Format(PyExc_TypeError,
     484             :                      "decoder did not return a string object (type=%.400s)",
     485           0 :                      Py_TYPE(v)->tp_name);
     486           0 :         Py_DECREF(v);
     487           0 :         goto onError;
     488             :     }
     489             : 
     490           0 :     return v;
     491             : 
     492             :  onError:
     493           0 :     return NULL;
     494             : }
     495             : 
     496           0 : PyObject *PyString_Encode(const char *s,
     497             :                           Py_ssize_t size,
     498             :                           const char *encoding,
     499             :                           const char *errors)
     500             : {
     501             :     PyObject *v, *str;
     502             : 
     503           0 :     str = PyString_FromStringAndSize(s, size);
     504           0 :     if (str == NULL)
     505           0 :         return NULL;
     506           0 :     v = PyString_AsEncodedString(str, encoding, errors);
     507           0 :     Py_DECREF(str);
     508           0 :     return v;
     509             : }
     510             : 
     511           0 : PyObject *PyString_AsEncodedObject(PyObject *str,
     512             :                                    const char *encoding,
     513             :                                    const char *errors)
     514             : {
     515             :     PyObject *v;
     516             : 
     517           0 :     if (!PyString_Check(str)) {
     518           0 :         PyErr_BadArgument();
     519           0 :         goto onError;
     520             :     }
     521             : 
     522           0 :     if (encoding == NULL) {
     523             : #ifdef Py_USING_UNICODE
     524           0 :         encoding = PyUnicode_GetDefaultEncoding();
     525             : #else
     526             :         PyErr_SetString(PyExc_ValueError, "no encoding specified");
     527             :         goto onError;
     528             : #endif
     529             :     }
     530             : 
     531             :     /* Encode via the codec registry */
     532           0 :     v = _PyCodec_EncodeText(str, encoding, errors);
     533           0 :     if (v == NULL)
     534           0 :         goto onError;
     535             : 
     536           0 :     return v;
     537             : 
     538             :  onError:
     539           0 :     return NULL;
     540             : }
     541             : 
     542           0 : PyObject *PyString_AsEncodedString(PyObject *str,
     543             :                                    const char *encoding,
     544             :                                    const char *errors)
     545             : {
     546             :     PyObject *v;
     547             : 
     548           0 :     v = PyString_AsEncodedObject(str, encoding, errors);
     549           0 :     if (v == NULL)
     550           0 :         goto onError;
     551             : 
     552             : #ifdef Py_USING_UNICODE
     553             :     /* Convert Unicode to a string using the default encoding */
     554           0 :     if (PyUnicode_Check(v)) {
     555           0 :         PyObject *temp = v;
     556           0 :         v = PyUnicode_AsEncodedString(v, NULL, NULL);
     557           0 :         Py_DECREF(temp);
     558           0 :         if (v == NULL)
     559           0 :             goto onError;
     560             :     }
     561             : #endif
     562           0 :     if (!PyString_Check(v)) {
     563           0 :         PyErr_Format(PyExc_TypeError,
     564             :                      "encoder did not return a string object (type=%.400s)",
     565           0 :                      Py_TYPE(v)->tp_name);
     566           0 :         Py_DECREF(v);
     567           0 :         goto onError;
     568             :     }
     569             : 
     570           0 :     return v;
     571             : 
     572             :  onError:
     573           0 :     return NULL;
     574             : }
     575             : 
     576             : static void
     577      143874 : string_dealloc(PyObject *op)
     578             : {
     579      143874 :     switch (PyString_CHECK_INTERNED(op)) {
     580             :         case SSTATE_NOT_INTERNED:
     581      129380 :             break;
     582             : 
     583             :         case SSTATE_INTERNED_MORTAL:
     584             :             /* revive dead object temporarily for DelItem */
     585       14494 :             Py_REFCNT(op) = 3;
     586       14494 :             if (PyDict_DelItem(interned, op) != 0)
     587           0 :                 Py_FatalError(
     588             :                     "deletion of interned string failed");
     589       14494 :             break;
     590             : 
     591             :         case SSTATE_INTERNED_IMMORTAL:
     592           0 :             Py_FatalError("Immortal interned string died.");
     593             : 
     594             :         default:
     595           0 :             Py_FatalError("Inconsistent interned string state.");
     596             :     }
     597      143874 :     Py_TYPE(op)->tp_free(op);
     598      143874 : }
     599             : 
     600             : /* Unescape a backslash-escaped string. If unicode is non-zero,
     601             :    the string is a u-literal. If recode_encoding is non-zero,
     602             :    the string is UTF-8 encoded and should be re-encoded in the
     603             :    specified encoding.  */
     604             : 
     605          69 : PyObject *PyString_DecodeEscape(const char *s,
     606             :                                 Py_ssize_t len,
     607             :                                 const char *errors,
     608             :                                 Py_ssize_t unicode,
     609             :                                 const char *recode_encoding)
     610             : {
     611             :     int c;
     612             :     char *p, *buf;
     613             :     const char *end;
     614             :     PyObject *v;
     615          69 :     Py_ssize_t newlen = recode_encoding ? 4*len:len;
     616          69 :     v = PyString_FromStringAndSize((char *)NULL, newlen);
     617          69 :     if (v == NULL)
     618           0 :         return NULL;
     619          69 :     p = buf = PyString_AsString(v);
     620          69 :     end = s + len;
     621        5547 :     while (s < end) {
     622        5409 :         if (*s != '\\') {
     623             :           non_esc:
     624             : #ifdef Py_USING_UNICODE
     625        5325 :             if (recode_encoding && (*s & 0x80)) {
     626             :                 PyObject *u, *w;
     627             :                 char *r;
     628             :                 const char* t;
     629             :                 Py_ssize_t rn;
     630           0 :                 t = s;
     631             :                 /* Decode non-ASCII bytes as UTF-8. */
     632           0 :                 while (t < end && (*t & 0x80)) t++;
     633           0 :                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
     634           0 :                 if(!u) goto failed;
     635             : 
     636             :                 /* Recode them in target encoding. */
     637           0 :                 w = PyUnicode_AsEncodedString(
     638             :                     u, recode_encoding, errors);
     639           0 :                 Py_DECREF(u);
     640           0 :                 if (!w)                 goto failed;
     641             : 
     642             :                 /* Append bytes to output buffer. */
     643             :                 assert(PyString_Check(w));
     644           0 :                 r = PyString_AS_STRING(w);
     645           0 :                 rn = PyString_GET_SIZE(w);
     646           0 :                 Py_MEMCPY(p, r, rn);
     647           0 :                 p += rn;
     648           0 :                 Py_DECREF(w);
     649           0 :                 s = t;
     650             :             } else {
     651        5325 :                 *p++ = *s++;
     652             :             }
     653             : #else
     654             :             *p++ = *s++;
     655             : #endif
     656        5325 :             continue;
     657             :         }
     658          84 :         s++;
     659          84 :         if (s==end) {
     660           0 :             PyErr_SetString(PyExc_ValueError,
     661             :                             "Trailing \\ in string");
     662           0 :             goto failed;
     663             :         }
     664          84 :         switch (*s++) {
     665             :         /* XXX This assumes ASCII! */
     666           1 :         case '\n': break;
     667           8 :         case '\\': *p++ = '\\'; break;
     668           0 :         case '\'': *p++ = '\''; break;
     669           0 :         case '\"': *p++ = '\"'; break;
     670           0 :         case 'b': *p++ = '\b'; break;
     671           0 :         case 'f': *p++ = '\014'; break; /* FF */
     672          11 :         case 't': *p++ = '\t'; break;
     673          36 :         case 'n': *p++ = '\n'; break;
     674           0 :         case 'r': *p++ = '\r'; break;
     675           0 :         case 'v': *p++ = '\013'; break; /* VT */
     676           0 :         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
     677             :         case '0': case '1': case '2': case '3':
     678             :         case '4': case '5': case '6': case '7':
     679          26 :             c = s[-1] - '0';
     680          26 :             if (s < end && '0' <= *s && *s <= '7') {
     681          14 :                 c = (c<<3) + *s++ - '0';
     682          14 :                 if (s < end && '0' <= *s && *s <= '7')
     683          14 :                     c = (c<<3) + *s++ - '0';
     684             :             }
     685          26 :             *p++ = c;
     686          26 :             break;
     687             :         case 'x':
     688           4 :             if (s+1 < end &&
     689           4 :                 isxdigit(Py_CHARMASK(s[0])) &&
     690           2 :                 isxdigit(Py_CHARMASK(s[1])))
     691             :             {
     692           2 :                 unsigned int x = 0;
     693           2 :                 c = Py_CHARMASK(*s);
     694           2 :                 s++;
     695           2 :                 if (isdigit(c))
     696           2 :                     x = c - '0';
     697           0 :                 else if (islower(c))
     698           0 :                     x = 10 + c - 'a';
     699             :                 else
     700           0 :                     x = 10 + c - 'A';
     701           2 :                 x = x << 4;
     702           2 :                 c = Py_CHARMASK(*s);
     703           2 :                 s++;
     704           2 :                 if (isdigit(c))
     705           2 :                     x += c - '0';
     706           0 :                 else if (islower(c))
     707           0 :                     x += 10 + c - 'a';
     708             :                 else
     709           0 :                     x += 10 + c - 'A';
     710           2 :                 *p++ = x;
     711           2 :                 break;
     712             :             }
     713           0 :             if (!errors || strcmp(errors, "strict") == 0) {
     714           0 :                 PyErr_SetString(PyExc_ValueError,
     715             :                                 "invalid \\x escape");
     716           0 :                 goto failed;
     717             :             }
     718           0 :             if (strcmp(errors, "replace") == 0) {
     719           0 :                 *p++ = '?';
     720           0 :             } else if (strcmp(errors, "ignore") == 0)
     721             :                 /* do nothing */;
     722             :             else {
     723           0 :                 PyErr_Format(PyExc_ValueError,
     724             :                              "decoding error; "
     725             :                              "unknown error handling code: %.400s",
     726             :                              errors);
     727           0 :                 goto failed;
     728             :             }
     729             :             /* skip \x */
     730           0 :             if (s < end && isxdigit(Py_CHARMASK(s[0])))
     731           0 :                 s++; /* and a hexdigit */
     732           0 :             break;
     733             : #ifndef Py_USING_UNICODE
     734             :         case 'u':
     735             :         case 'U':
     736             :         case 'N':
     737             :             if (unicode) {
     738             :                 PyErr_SetString(PyExc_ValueError,
     739             :                           "Unicode escapes not legal "
     740             :                           "when Unicode disabled");
     741             :                 goto failed;
     742             :             }
     743             : #endif
     744             :         default:
     745           0 :             *p++ = '\\';
     746           0 :             s--;
     747           0 :             goto non_esc; /* an arbitrary number of unescaped
     748             :                              UTF-8 bytes may follow. */
     749             :         }
     750             :     }
     751          69 :     if (p-buf < newlen)
     752          69 :         _PyString_Resize(&v, p - buf); /* v is cleared on error */
     753          69 :     return v;
     754             :   failed:
     755           0 :     Py_DECREF(v);
     756           0 :     return NULL;
     757             : }
     758             : 
     759             : /* -------------------------------------------------------------------- */
     760             : /* object api */
     761             : 
     762             : static Py_ssize_t
     763           0 : string_getsize(register PyObject *op)
     764             : {
     765             :     char *s;
     766             :     Py_ssize_t len;
     767           0 :     if (PyString_AsStringAndSize(op, &s, &len))
     768           0 :         return -1;
     769           0 :     return len;
     770             : }
     771             : 
     772             : static /*const*/ char *
     773           0 : string_getbuffer(register PyObject *op)
     774             : {
     775             :     char *s;
     776             :     Py_ssize_t len;
     777           0 :     if (PyString_AsStringAndSize(op, &s, &len))
     778           0 :         return NULL;
     779           0 :     return s;
     780             : }
     781             : 
     782             : Py_ssize_t
     783        6499 : PyString_Size(register PyObject *op)
     784             : {
     785        6499 :     if (!PyString_Check(op))
     786           0 :         return string_getsize(op);
     787        6499 :     return Py_SIZE(op);
     788             : }
     789             : 
     790             : /*const*/ char *
     791      308022 : PyString_AsString(register PyObject *op)
     792             : {
     793      308022 :     if (!PyString_Check(op))
     794           0 :         return string_getbuffer(op);
     795      308022 :     return ((PyStringObject *)op) -> ob_sval;
     796             : }
     797             : 
     798             : int
     799          27 : PyString_AsStringAndSize(register PyObject *obj,
     800             :                          register char **s,
     801             :                          register Py_ssize_t *len)
     802             : {
     803          27 :     if (s == NULL) {
     804           0 :         PyErr_BadInternalCall();
     805           0 :         return -1;
     806             :     }
     807             : 
     808          27 :     if (!PyString_Check(obj)) {
     809             : #ifdef Py_USING_UNICODE
     810           0 :         if (PyUnicode_Check(obj)) {
     811           0 :             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
     812           0 :             if (obj == NULL)
     813           0 :                 return -1;
     814             :         }
     815             :         else
     816             : #endif
     817             :         {
     818           0 :             PyErr_Format(PyExc_TypeError,
     819             :                          "expected string or Unicode object, "
     820           0 :                          "%.200s found", Py_TYPE(obj)->tp_name);
     821           0 :             return -1;
     822             :         }
     823             :     }
     824             : 
     825          27 :     *s = PyString_AS_STRING(obj);
     826          27 :     if (len != NULL)
     827           0 :         *len = PyString_GET_SIZE(obj);
     828          27 :     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
     829           0 :         PyErr_SetString(PyExc_TypeError,
     830             :                         "expected string without null bytes");
     831           0 :         return -1;
     832             :     }
     833          27 :     return 0;
     834             : }
     835             : 
     836             : /* -------------------------------------------------------------------- */
     837             : /* Methods */
     838             : 
     839             : #include "stringlib/stringdefs.h"
     840             : #include "stringlib/fastsearch.h"
     841             : 
     842             : #include "stringlib/count.h"
     843             : #include "stringlib/find.h"
     844             : #include "stringlib/partition.h"
     845             : #include "stringlib/split.h"
     846             : 
     847             : #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
     848             : #include "stringlib/localeutil.h"
     849             : 
     850             : 
     851             : 
     852             : static int
     853           0 : string_print(PyStringObject *op, FILE *fp, int flags)
     854             : {
     855             :     Py_ssize_t i, str_len;
     856             :     char c;
     857             :     int quote;
     858             : 
     859             :     /* XXX Ought to check for interrupts when writing long strings */
     860           0 :     if (! PyString_CheckExact(op)) {
     861             :         int ret;
     862             :         /* A str subclass may have its own __str__ method. */
     863           0 :         op = (PyStringObject *) PyObject_Str((PyObject *)op);
     864           0 :         if (op == NULL)
     865           0 :             return -1;
     866           0 :         ret = string_print(op, fp, flags);
     867           0 :         Py_DECREF(op);
     868           0 :         return ret;
     869             :     }
     870           0 :     if (flags & Py_PRINT_RAW) {
     871           0 :         char *data = op->ob_sval;
     872           0 :         Py_ssize_t size = Py_SIZE(op);
     873             :         Py_BEGIN_ALLOW_THREADS
     874           0 :         while (size > INT_MAX) {
     875             :             /* Very long strings cannot be written atomically.
     876             :              * But don't write exactly INT_MAX bytes at a time
     877             :              * to avoid memory aligment issues.
     878             :              */
     879           0 :             const int chunk_size = INT_MAX & ~0x3FFF;
     880           0 :             fwrite(data, 1, chunk_size, fp);
     881           0 :             data += chunk_size;
     882           0 :             size -= chunk_size;
     883             :         }
     884             : #ifdef __VMS
     885             :         if (size) fwrite(data, (size_t)size, 1, fp);
     886             : #else
     887           0 :         fwrite(data, 1, (size_t)size, fp);
     888             : #endif
     889             :         Py_END_ALLOW_THREADS
     890           0 :         return 0;
     891             :     }
     892             : 
     893             :     /* figure out which quote to use; single is preferred */
     894           0 :     quote = '\'';
     895           0 :     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
     896           0 :         !memchr(op->ob_sval, '"', Py_SIZE(op)))
     897           0 :         quote = '"';
     898             : 
     899           0 :     str_len = Py_SIZE(op);
     900             :     Py_BEGIN_ALLOW_THREADS
     901           0 :     fputc(quote, fp);
     902           0 :     for (i = 0; i < str_len; i++) {
     903             :         /* Since strings are immutable and the caller should have a
     904             :         reference, accessing the internal buffer should not be an issue
     905             :         with the GIL released. */
     906           0 :         c = op->ob_sval[i];
     907           0 :         if (c == quote || c == '\\')
     908           0 :             fprintf(fp, "\\%c", c);
     909           0 :         else if (c == '\t')
     910           0 :             fprintf(fp, "\\t");
     911           0 :         else if (c == '\n')
     912           0 :             fprintf(fp, "\\n");
     913           0 :         else if (c == '\r')
     914           0 :             fprintf(fp, "\\r");
     915           0 :         else if (c < ' ' || c >= 0x7f)
     916           0 :             fprintf(fp, "\\x%02x", c & 0xff);
     917             :         else
     918           0 :             fputc(c, fp);
     919             :     }
     920           0 :     fputc(quote, fp);
     921             :     Py_END_ALLOW_THREADS
     922           0 :     return 0;
     923             : }
     924             : 
     925             : PyObject *
     926         342 : PyString_Repr(PyObject *obj, int smartquotes)
     927             : {
     928         342 :     register PyStringObject* op = (PyStringObject*) obj;
     929             :     size_t newsize;
     930             :     PyObject *v;
     931         342 :     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
     932           0 :         PyErr_SetString(PyExc_OverflowError,
     933             :             "string is too large to make repr");
     934           0 :         return NULL;
     935             :     }
     936         342 :     newsize = 2 + 4*Py_SIZE(op);
     937         342 :     v = PyString_FromStringAndSize((char *)NULL, newsize);
     938         342 :     if (v == NULL) {
     939           0 :         return NULL;
     940             :     }
     941             :     else {
     942             :         register Py_ssize_t i;
     943             :         register char c;
     944             :         register char *p;
     945             :         int quote;
     946             : 
     947             :         /* figure out which quote to use; single is preferred */
     948         342 :         quote = '\'';
     949         684 :         if (smartquotes &&
     950         342 :             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
     951           0 :             !memchr(op->ob_sval, '"', Py_SIZE(op)))
     952           0 :             quote = '"';
     953             : 
     954         342 :         p = PyString_AS_STRING(v);
     955         342 :         *p++ = quote;
     956        2520 :         for (i = 0; i < Py_SIZE(op); i++) {
     957             :             /* There's at least enough room for a hex escape
     958             :                and a closing quote. */
     959             :             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
     960        2178 :             c = op->ob_sval[i];
     961        2178 :             if (c == quote || c == '\\')
     962           0 :                 *p++ = '\\', *p++ = c;
     963        2178 :             else if (c == '\t')
     964           0 :                 *p++ = '\\', *p++ = 't';
     965        2178 :             else if (c == '\n')
     966           0 :                 *p++ = '\\', *p++ = 'n';
     967        2178 :             else if (c == '\r')
     968           0 :                 *p++ = '\\', *p++ = 'r';
     969        2178 :             else if (c < ' ' || c >= 0x7f) {
     970             :                 /* For performance, we don't want to call
     971             :                    PyOS_snprintf here (extra layers of
     972             :                    function call). */
     973           0 :                 sprintf(p, "\\x%02x", c & 0xff);
     974           0 :                 p += 4;
     975             :             }
     976             :             else
     977        2178 :                 *p++ = c;
     978             :         }
     979             :         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
     980         342 :         *p++ = quote;
     981         342 :         *p = '\0';
     982         342 :         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
     983           0 :             return NULL;
     984         342 :         return v;
     985             :     }
     986             : }
     987             : 
     988             : static PyObject *
     989         342 : string_repr(PyObject *op)
     990             : {
     991         342 :     return PyString_Repr(op, 1);
     992             : }
     993             : 
     994             : static PyObject *
     995           0 : string_str(PyObject *s)
     996             : {
     997             :     assert(PyString_Check(s));
     998           0 :     if (PyString_CheckExact(s)) {
     999           0 :         Py_INCREF(s);
    1000           0 :         return s;
    1001             :     }
    1002             :     else {
    1003             :         /* Subtype -- return genuine string with the same value. */
    1004           0 :         PyStringObject *t = (PyStringObject *) s;
    1005           0 :         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
    1006             :     }
    1007             : }
    1008             : 
    1009             : static Py_ssize_t
    1010       36678 : string_length(PyStringObject *a)
    1011             : {
    1012       36678 :     return Py_SIZE(a);
    1013             : }
    1014             : 
    1015             : static PyObject *
    1016        6739 : string_concat(register PyStringObject *a, register PyObject *bb)
    1017             : {
    1018             :     register Py_ssize_t size;
    1019             :     register PyStringObject *op;
    1020        6739 :     if (!PyString_Check(bb)) {
    1021             : #ifdef Py_USING_UNICODE
    1022           0 :         if (PyUnicode_Check(bb))
    1023           0 :             return PyUnicode_Concat((PyObject *)a, bb);
    1024             : #endif
    1025           0 :         if (PyByteArray_Check(bb))
    1026           0 :             return PyByteArray_Concat((PyObject *)a, bb);
    1027           0 :         PyErr_Format(PyExc_TypeError,
    1028             :                      "cannot concatenate 'str' and '%.200s' objects",
    1029           0 :                      Py_TYPE(bb)->tp_name);
    1030           0 :         return NULL;
    1031             :     }
    1032             : #define b ((PyStringObject *)bb)
    1033             :     /* Optimize cases with empty left or right operand */
    1034        6793 :     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
    1035         108 :         PyString_CheckExact(a) && PyString_CheckExact(b)) {
    1036          54 :         if (Py_SIZE(a) == 0) {
    1037          54 :             Py_INCREF(bb);
    1038          54 :             return bb;
    1039             :         }
    1040           0 :         Py_INCREF(a);
    1041           0 :         return (PyObject *)a;
    1042             :     }
    1043             :     /* Check that string sizes are not negative, to prevent an
    1044             :        overflow in cases where we are passed incorrectly-created
    1045             :        strings with negative lengths (due to a bug in other code).
    1046             :     */
    1047       13370 :     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
    1048        6685 :         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
    1049           0 :         PyErr_SetString(PyExc_OverflowError,
    1050             :                         "strings are too large to concat");
    1051           0 :         return NULL;
    1052             :     }
    1053        6685 :     size = Py_SIZE(a) + Py_SIZE(b);
    1054             : 
    1055             :     /* Inline PyObject_NewVar */
    1056        6685 :     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
    1057           0 :         PyErr_SetString(PyExc_OverflowError,
    1058             :                         "strings are too large to concat");
    1059           0 :         return NULL;
    1060             :     }
    1061        6685 :     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
    1062        6685 :     if (op == NULL)
    1063           0 :         return PyErr_NoMemory();
    1064        6685 :     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
    1065        6685 :     op->ob_shash = -1;
    1066        6685 :     op->ob_sstate = SSTATE_NOT_INTERNED;
    1067        6685 :     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
    1068        6685 :     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
    1069        6685 :     op->ob_sval[size] = '\0';
    1070        6685 :     return (PyObject *) op;
    1071             : #undef b
    1072             : }
    1073             : 
    1074             : static PyObject *
    1075          24 : string_repeat(register PyStringObject *a, register Py_ssize_t n)
    1076             : {
    1077             :     register Py_ssize_t i;
    1078             :     register Py_ssize_t j;
    1079             :     register Py_ssize_t size;
    1080             :     register PyStringObject *op;
    1081             :     size_t nbytes;
    1082          24 :     if (n < 0)
    1083           0 :         n = 0;
    1084             :     /* watch out for overflows:  the size can overflow Py_ssize_t,
    1085             :      * and the # of bytes needed can overflow size_t
    1086             :      */
    1087          24 :     if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
    1088           0 :         PyErr_SetString(PyExc_OverflowError,
    1089             :             "repeated string is too long");
    1090           0 :         return NULL;
    1091             :     }
    1092          24 :     size = Py_SIZE(a) * n;
    1093          24 :     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
    1094           9 :         Py_INCREF(a);
    1095           9 :         return (PyObject *)a;
    1096             :     }
    1097          15 :     nbytes = (size_t)size;
    1098          15 :     if (nbytes + PyStringObject_SIZE <= nbytes) {
    1099           0 :         PyErr_SetString(PyExc_OverflowError,
    1100             :             "repeated string is too long");
    1101           0 :         return NULL;
    1102             :     }
    1103          15 :     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
    1104          15 :     if (op == NULL)
    1105           0 :         return PyErr_NoMemory();
    1106          15 :     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
    1107          15 :     op->ob_shash = -1;
    1108          15 :     op->ob_sstate = SSTATE_NOT_INTERNED;
    1109          15 :     op->ob_sval[size] = '\0';
    1110          15 :     if (Py_SIZE(a) == 1 && n > 0) {
    1111          15 :         memset(op->ob_sval, a->ob_sval[0] , n);
    1112          15 :         return (PyObject *) op;
    1113             :     }
    1114           0 :     i = 0;
    1115           0 :     if (i < size) {
    1116           0 :         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
    1117           0 :         i = Py_SIZE(a);
    1118             :     }
    1119           0 :     while (i < size) {
    1120           0 :         j = (i <= size-i)  ?  i  :  size-i;
    1121           0 :         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
    1122           0 :         i += j;
    1123             :     }
    1124           0 :     return (PyObject *) op;
    1125             : }
    1126             : 
    1127             : /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
    1128             : 
    1129             : static PyObject *
    1130        8253 : string_slice(register PyStringObject *a, register Py_ssize_t i,
    1131             :              register Py_ssize_t j)
    1132             :      /* j -- may be negative! */
    1133             : {
    1134        8253 :     if (i < 0)
    1135           0 :         i = 0;
    1136        8253 :     if (j < 0)
    1137           0 :         j = 0; /* Avoid signed/unsigned bug in next line */
    1138        8253 :     if (j > Py_SIZE(a))
    1139        1257 :         j = Py_SIZE(a);
    1140        8253 :     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
    1141             :         /* It's the same as a */
    1142        1503 :         Py_INCREF(a);
    1143        1503 :         return (PyObject *)a;
    1144             :     }
    1145        6750 :     if (j < i)
    1146           0 :         j = i;
    1147        6750 :     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
    1148             : }
    1149             : 
    1150             : static int
    1151        5535 : string_contains(PyObject *str_obj, PyObject *sub_obj)
    1152             : {
    1153        5535 :     if (!PyString_CheckExact(sub_obj)) {
    1154             : #ifdef Py_USING_UNICODE
    1155           0 :         if (PyUnicode_Check(sub_obj))
    1156           0 :             return PyUnicode_Contains(str_obj, sub_obj);
    1157             : #endif
    1158           0 :         if (!PyString_Check(sub_obj)) {
    1159           0 :             PyErr_Format(PyExc_TypeError,
    1160             :                 "'in <string>' requires string as left operand, "
    1161           0 :                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
    1162           0 :             return -1;
    1163             :         }
    1164             :     }
    1165             : 
    1166        5535 :     return stringlib_contains_obj(str_obj, sub_obj);
    1167             : }
    1168             : 
    1169             : static PyObject *
    1170       43455 : string_item(PyStringObject *a, register Py_ssize_t i)
    1171             : {
    1172             :     char pchar;
    1173             :     PyObject *v;
    1174       43455 :     if (i < 0 || i >= Py_SIZE(a)) {
    1175        2297 :         PyErr_SetString(PyExc_IndexError, "string index out of range");
    1176        2297 :         return NULL;
    1177             :     }
    1178       41158 :     pchar = a->ob_sval[i];
    1179       41158 :     v = (PyObject *)characters[pchar & UCHAR_MAX];
    1180       41158 :     if (v == NULL)
    1181          45 :         v = PyString_FromStringAndSize(&pchar, 1);
    1182             :     else {
    1183             : #ifdef COUNT_ALLOCS
    1184             :         one_strings++;
    1185             : #endif
    1186       41113 :         Py_INCREF(v);
    1187             :     }
    1188       41158 :     return v;
    1189             : }
    1190             : 
    1191             : static PyObject*
    1192       91364 : string_richcompare(PyStringObject *a, PyStringObject *b, int op)
    1193             : {
    1194             :     int c;
    1195             :     Py_ssize_t len_a, len_b;
    1196             :     Py_ssize_t min_len;
    1197             :     PyObject *result;
    1198             : 
    1199             :     /* Make sure both arguments are strings. */
    1200       91364 :     if (!(PyString_Check(a) && PyString_Check(b))) {
    1201        2700 :         result = Py_NotImplemented;
    1202        2700 :         goto out;
    1203             :     }
    1204       88664 :     if (a == b) {
    1205        4232 :         switch (op) {
    1206             :         case Py_EQ:case Py_LE:case Py_GE:
    1207        3605 :             result = Py_True;
    1208        3605 :             goto out;
    1209             :         case Py_NE:case Py_LT:case Py_GT:
    1210         627 :             result = Py_False;
    1211         627 :             goto out;
    1212             :         }
    1213             :     }
    1214       84432 :     if (op == Py_EQ) {
    1215             :         /* Supporting Py_NE here as well does not save
    1216             :            much time, since Py_NE is rarely used.  */
    1217       46425 :         if (Py_SIZE(a) == Py_SIZE(b)
    1218       24698 :             && (a->ob_sval[0] == b->ob_sval[0]
    1219        4402 :             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
    1220        3823 :             result = Py_True;
    1221             :         } else {
    1222       42602 :             result = Py_False;
    1223             :         }
    1224       46425 :         goto out;
    1225             :     }
    1226       38007 :     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
    1227       38007 :     min_len = (len_a < len_b) ? len_a : len_b;
    1228       38007 :     if (min_len > 0) {
    1229       38007 :         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
    1230       38007 :         if (c==0)
    1231        7891 :             c = memcmp(a->ob_sval, b->ob_sval, min_len);
    1232             :     } else
    1233           0 :         c = 0;
    1234       38007 :     if (c == 0)
    1235         910 :         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
    1236       38007 :     switch (op) {
    1237       36120 :     case Py_LT: c = c <  0; break;
    1238         618 :     case Py_LE: c = c <= 0; break;
    1239           0 :     case Py_EQ: assert(0);  break; /* unreachable */
    1240        1269 :     case Py_NE: c = c != 0; break;
    1241           0 :     case Py_GT: c = c >  0; break;
    1242           0 :     case Py_GE: c = c >= 0; break;
    1243             :     default:
    1244           0 :         result = Py_NotImplemented;
    1245           0 :         goto out;
    1246             :     }
    1247       38007 :     result = c ? Py_True : Py_False;
    1248             :   out:
    1249       91364 :     Py_INCREF(result);
    1250       91364 :     return result;
    1251             : }
    1252             : 
    1253             : int
    1254       73653 : _PyString_Eq(PyObject *o1, PyObject *o2)
    1255             : {
    1256       73653 :     PyStringObject *a = (PyStringObject*) o1;
    1257       73653 :     PyStringObject *b = (PyStringObject*) o2;
    1258      147306 :     return Py_SIZE(a) == Py_SIZE(b)
    1259       73653 :       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
    1260             : }
    1261             : 
    1262             : static long
    1263      198233 : string_hash(PyStringObject *a)
    1264             : {
    1265             :     register Py_ssize_t len;
    1266             :     register unsigned char *p;
    1267             :     register long x;
    1268             : 
    1269             : #ifdef Py_DEBUG
    1270             :     assert(_Py_HashSecret_Initialized);
    1271             : #endif
    1272      198233 :     if (a->ob_shash != -1)
    1273       85330 :         return a->ob_shash;
    1274      112903 :     len = Py_SIZE(a);
    1275             :     /*
    1276             :       We make the hash of the empty string be 0, rather than using
    1277             :       (prefix ^ suffix), since this slightly obfuscates the hash secret
    1278             :     */
    1279      112903 :     if (len == 0) {
    1280           3 :         a->ob_shash = 0;
    1281           3 :         return 0;
    1282             :     }
    1283      112900 :     p = (unsigned char *) a->ob_sval;
    1284      112900 :     x = _Py_HashSecret.prefix;
    1285      112900 :     x ^= *p << 7;
    1286     1261290 :     while (--len >= 0)
    1287     1035490 :         x = (1000003*x) ^ *p++;
    1288      112900 :     x ^= Py_SIZE(a);
    1289      112900 :     x ^= _Py_HashSecret.suffix;
    1290      112900 :     if (x == -1)
    1291           0 :         x = -2;
    1292      112900 :     a->ob_shash = x;
    1293      112900 :     return x;
    1294             : }
    1295             : 
    1296             : static PyObject*
    1297       34513 : string_subscript(PyStringObject* self, PyObject* item)
    1298             : {
    1299       34513 :     if (PyIndex_Check(item)) {
    1300       34315 :         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
    1301       34315 :         if (i == -1 && PyErr_Occurred())
    1302           0 :             return NULL;
    1303       34315 :         if (i < 0)
    1304           0 :             i += PyString_GET_SIZE(self);
    1305       34315 :         return string_item(self, i);
    1306             :     }
    1307         198 :     else if (PySlice_Check(item)) {
    1308             :         Py_ssize_t start, stop, step, slicelength, cur, i;
    1309             :         char* source_buf;
    1310             :         char* result_buf;
    1311             :         PyObject* result;
    1312             : 
    1313         198 :         if (PySlice_GetIndicesEx((PySliceObject*)item,
    1314             :                          PyString_GET_SIZE(self),
    1315             :                          &start, &stop, &step, &slicelength) < 0) {
    1316           0 :             return NULL;
    1317             :         }
    1318             : 
    1319         198 :         if (slicelength <= 0) {
    1320           0 :             return PyString_FromStringAndSize("", 0);
    1321             :         }
    1322         198 :         else if (start == 0 && step == 1 &&
    1323           0 :                  slicelength == PyString_GET_SIZE(self) &&
    1324           0 :                  PyString_CheckExact(self)) {
    1325           0 :             Py_INCREF(self);
    1326           0 :             return (PyObject *)self;
    1327             :         }
    1328         198 :         else if (step == 1) {
    1329           0 :             return PyString_FromStringAndSize(
    1330           0 :                 PyString_AS_STRING(self) + start,
    1331             :                 slicelength);
    1332             :         }
    1333             :         else {
    1334         198 :             source_buf = PyString_AsString((PyObject*)self);
    1335         198 :             result_buf = (char *)PyMem_Malloc(slicelength);
    1336         198 :             if (result_buf == NULL)
    1337           0 :                 return PyErr_NoMemory();
    1338             : 
    1339       51084 :             for (cur = start, i = 0; i < slicelength;
    1340       50688 :                  cur += step, i++) {
    1341       50688 :                 result_buf[i] = source_buf[cur];
    1342             :             }
    1343             : 
    1344         198 :             result = PyString_FromStringAndSize(result_buf,
    1345             :                                                 slicelength);
    1346         198 :             PyMem_Free(result_buf);
    1347         198 :             return result;
    1348             :         }
    1349             :     }
    1350             :     else {
    1351           0 :         PyErr_Format(PyExc_TypeError,
    1352             :                      "string indices must be integers, not %.200s",
    1353           0 :                      Py_TYPE(item)->tp_name);
    1354           0 :         return NULL;
    1355             :     }
    1356             : }
    1357             : 
    1358             : static Py_ssize_t
    1359        2898 : string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
    1360             : {
    1361        2898 :     if ( index != 0 ) {
    1362           0 :         PyErr_SetString(PyExc_SystemError,
    1363             :                         "accessing non-existent string segment");
    1364           0 :         return -1;
    1365             :     }
    1366        2898 :     *ptr = (void *)self->ob_sval;
    1367        2898 :     return Py_SIZE(self);
    1368             : }
    1369             : 
    1370             : static Py_ssize_t
    1371           0 : string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
    1372             : {
    1373           0 :     PyErr_SetString(PyExc_TypeError,
    1374             :                     "Cannot use string as modifiable buffer");
    1375           0 :     return -1;
    1376             : }
    1377             : 
    1378             : static Py_ssize_t
    1379       10008 : string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
    1380             : {
    1381       10008 :     if ( lenp )
    1382           0 :         *lenp = Py_SIZE(self);
    1383       10008 :     return 1;
    1384             : }
    1385             : 
    1386             : static Py_ssize_t
    1387          63 : string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
    1388             : {
    1389          63 :     if ( index != 0 ) {
    1390           0 :         PyErr_SetString(PyExc_SystemError,
    1391             :                         "accessing non-existent string segment");
    1392           0 :         return -1;
    1393             :     }
    1394          63 :     *ptr = self->ob_sval;
    1395          63 :     return Py_SIZE(self);
    1396             : }
    1397             : 
    1398             : static int
    1399        2442 : string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
    1400             : {
    1401        4884 :     return PyBuffer_FillInfo(view, (PyObject*)self,
    1402        2442 :                              (void *)self->ob_sval, Py_SIZE(self),
    1403             :                              1, flags);
    1404             : }
    1405             : 
    1406             : static PySequenceMethods string_as_sequence = {
    1407             :     (lenfunc)string_length, /*sq_length*/
    1408             :     (binaryfunc)string_concat, /*sq_concat*/
    1409             :     (ssizeargfunc)string_repeat, /*sq_repeat*/
    1410             :     (ssizeargfunc)string_item, /*sq_item*/
    1411             :     (ssizessizeargfunc)string_slice, /*sq_slice*/
    1412             :     0,                  /*sq_ass_item*/
    1413             :     0,                  /*sq_ass_slice*/
    1414             :     (objobjproc)string_contains /*sq_contains*/
    1415             : };
    1416             : 
    1417             : static PyMappingMethods string_as_mapping = {
    1418             :     (lenfunc)string_length,
    1419             :     (binaryfunc)string_subscript,
    1420             :     0,
    1421             : };
    1422             : 
    1423             : static PyBufferProcs string_as_buffer = {
    1424             :     (readbufferproc)string_buffer_getreadbuf,
    1425             :     (writebufferproc)string_buffer_getwritebuf,
    1426             :     (segcountproc)string_buffer_getsegcount,
    1427             :     (charbufferproc)string_buffer_getcharbuf,
    1428             :     (getbufferproc)string_buffer_getbuffer,
    1429             :     0, /* XXX */
    1430             : };
    1431             : 
    1432             : 
    1433             : 
    1434             : #define LEFTSTRIP 0
    1435             : #define RIGHTSTRIP 1
    1436             : #define BOTHSTRIP 2
    1437             : 
    1438             : /* Arrays indexed by above */
    1439             : static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
    1440             : 
    1441             : #define STRIPNAME(i) (stripformat[i]+3)
    1442             : 
    1443             : PyDoc_STRVAR(split__doc__,
    1444             : "S.split([sep [,maxsplit]]) -> list of strings\n\
    1445             : \n\
    1446             : Return a list of the words in the string S, using sep as the\n\
    1447             : delimiter string.  If maxsplit is given, at most maxsplit\n\
    1448             : splits are done. If sep is not specified or is None, any\n\
    1449             : whitespace string is a separator and empty strings are removed\n\
    1450             : from the result.");
    1451             : 
    1452             : static PyObject *
    1453          78 : string_split(PyStringObject *self, PyObject *args)
    1454             : {
    1455          78 :     Py_ssize_t len = PyString_GET_SIZE(self), n;
    1456          78 :     Py_ssize_t maxsplit = -1;
    1457          78 :     const char *s = PyString_AS_STRING(self), *sub;
    1458          78 :     PyObject *subobj = Py_None;
    1459             : 
    1460          78 :     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
    1461           0 :         return NULL;
    1462          78 :     if (maxsplit < 0)
    1463          78 :         maxsplit = PY_SSIZE_T_MAX;
    1464          78 :     if (subobj == Py_None)
    1465          51 :         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
    1466          27 :     if (PyString_Check(subobj)) {
    1467          27 :         sub = PyString_AS_STRING(subobj);
    1468          27 :         n = PyString_GET_SIZE(subobj);
    1469             :     }
    1470             : #ifdef Py_USING_UNICODE
    1471           0 :     else if (PyUnicode_Check(subobj))
    1472           0 :         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
    1473             : #endif
    1474           0 :     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
    1475           0 :         return NULL;
    1476             : 
    1477          27 :     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
    1478             : }
    1479             : 
    1480             : PyDoc_STRVAR(partition__doc__,
    1481             : "S.partition(sep) -> (head, sep, tail)\n\
    1482             : \n\
    1483             : Search for the separator sep in S, and return the part before it,\n\
    1484             : the separator itself, and the part after it.  If the separator is not\n\
    1485             : found, return S and two empty strings.");
    1486             : 
    1487             : static PyObject *
    1488           0 : string_partition(PyStringObject *self, PyObject *sep_obj)
    1489             : {
    1490             :     const char *sep;
    1491             :     Py_ssize_t sep_len;
    1492             : 
    1493           0 :     if (PyString_Check(sep_obj)) {
    1494           0 :         sep = PyString_AS_STRING(sep_obj);
    1495           0 :         sep_len = PyString_GET_SIZE(sep_obj);
    1496             :     }
    1497             : #ifdef Py_USING_UNICODE
    1498           0 :     else if (PyUnicode_Check(sep_obj))
    1499           0 :         return PyUnicode_Partition((PyObject *) self, sep_obj);
    1500             : #endif
    1501           0 :     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
    1502           0 :         return NULL;
    1503             : 
    1504           0 :     return stringlib_partition(
    1505             :         (PyObject*) self,
    1506           0 :         PyString_AS_STRING(self), PyString_GET_SIZE(self),
    1507             :         sep_obj, sep, sep_len
    1508             :         );
    1509             : }
    1510             : 
    1511             : PyDoc_STRVAR(rpartition__doc__,
    1512             : "S.rpartition(sep) -> (head, sep, tail)\n\
    1513             : \n\
    1514             : Search for the separator sep in S, starting at the end of S, and return\n\
    1515             : the part before it, the separator itself, and the part after it.  If the\n\
    1516             : separator is not found, return two empty strings and S.");
    1517             : 
    1518             : static PyObject *
    1519           0 : string_rpartition(PyStringObject *self, PyObject *sep_obj)
    1520             : {
    1521             :     const char *sep;
    1522             :     Py_ssize_t sep_len;
    1523             : 
    1524           0 :     if (PyString_Check(sep_obj)) {
    1525           0 :         sep = PyString_AS_STRING(sep_obj);
    1526           0 :         sep_len = PyString_GET_SIZE(sep_obj);
    1527             :     }
    1528             : #ifdef Py_USING_UNICODE
    1529           0 :     else if (PyUnicode_Check(sep_obj))
    1530           0 :         return PyUnicode_RPartition((PyObject *) self, sep_obj);
    1531             : #endif
    1532           0 :     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
    1533           0 :         return NULL;
    1534             : 
    1535           0 :     return stringlib_rpartition(
    1536             :         (PyObject*) self,
    1537           0 :         PyString_AS_STRING(self), PyString_GET_SIZE(self),
    1538             :         sep_obj, sep, sep_len
    1539             :         );
    1540             : }
    1541             : 
    1542             : PyDoc_STRVAR(rsplit__doc__,
    1543             : "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
    1544             : \n\
    1545             : Return a list of the words in the string S, using sep as the\n\
    1546             : delimiter string, starting at the end of the string and working\n\
    1547             : to the front.  If maxsplit is given, at most maxsplit splits are\n\
    1548             : done. If sep is not specified or is None, any whitespace string\n\
    1549             : is a separator.");
    1550             : 
    1551             : static PyObject *
    1552           0 : string_rsplit(PyStringObject *self, PyObject *args)
    1553             : {
    1554           0 :     Py_ssize_t len = PyString_GET_SIZE(self), n;
    1555           0 :     Py_ssize_t maxsplit = -1;
    1556           0 :     const char *s = PyString_AS_STRING(self), *sub;
    1557           0 :     PyObject *subobj = Py_None;
    1558             : 
    1559           0 :     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
    1560           0 :         return NULL;
    1561           0 :     if (maxsplit < 0)
    1562           0 :         maxsplit = PY_SSIZE_T_MAX;
    1563           0 :     if (subobj == Py_None)
    1564           0 :         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
    1565           0 :     if (PyString_Check(subobj)) {
    1566           0 :         sub = PyString_AS_STRING(subobj);
    1567           0 :         n = PyString_GET_SIZE(subobj);
    1568             :     }
    1569             : #ifdef Py_USING_UNICODE
    1570           0 :     else if (PyUnicode_Check(subobj))
    1571           0 :         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
    1572             : #endif
    1573           0 :     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
    1574           0 :         return NULL;
    1575             : 
    1576           0 :     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
    1577             : }
    1578             : 
    1579             : 
    1580             : PyDoc_STRVAR(join__doc__,
    1581             : "S.join(iterable) -> string\n\
    1582             : \n\
    1583             : Return a string which is the concatenation of the strings in the\n\
    1584             : iterable.  The separator between elements is S.");
    1585             : 
    1586             : static PyObject *
    1587        1222 : string_join(PyStringObject *self, PyObject *orig)
    1588             : {
    1589        1222 :     char *sep = PyString_AS_STRING(self);
    1590        1222 :     const Py_ssize_t seplen = PyString_GET_SIZE(self);
    1591        1222 :     PyObject *res = NULL;
    1592             :     char *p;
    1593        1222 :     Py_ssize_t seqlen = 0;
    1594        1222 :     size_t sz = 0;
    1595             :     Py_ssize_t i;
    1596             :     PyObject *seq, *item;
    1597             : 
    1598        1222 :     seq = PySequence_Fast(orig, "can only join an iterable");
    1599        1222 :     if (seq == NULL) {
    1600           0 :         return NULL;
    1601             :     }
    1602             : 
    1603        1222 :     seqlen = PySequence_Size(seq);
    1604        1222 :     if (seqlen == 0) {
    1605           0 :         Py_DECREF(seq);
    1606           0 :         return PyString_FromString("");
    1607             :     }
    1608        1222 :     if (seqlen == 1) {
    1609         400 :         item = PySequence_Fast_GET_ITEM(seq, 0);
    1610         400 :         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
    1611         400 :             Py_INCREF(item);
    1612         400 :             Py_DECREF(seq);
    1613         400 :             return item;
    1614             :         }
    1615             :     }
    1616             : 
    1617             :     /* There are at least two things to join, or else we have a subclass
    1618             :      * of the builtin types in the sequence.
    1619             :      * Do a pre-pass to figure out the total amount of space we'll
    1620             :      * need (sz), see whether any argument is absurd, and defer to
    1621             :      * the Unicode join if appropriate.
    1622             :      */
    1623        4584 :     for (i = 0; i < seqlen; i++) {
    1624        3762 :         const size_t old_sz = sz;
    1625        3762 :         item = PySequence_Fast_GET_ITEM(seq, i);
    1626        3762 :         if (!PyString_Check(item)){
    1627             : #ifdef Py_USING_UNICODE
    1628           0 :             if (PyUnicode_Check(item)) {
    1629             :                 /* Defer to Unicode join.
    1630             :                  * CAUTION:  There's no guarantee that the
    1631             :                  * original sequence can be iterated over
    1632             :                  * again, so we must pass seq here.
    1633             :                  */
    1634             :                 PyObject *result;
    1635           0 :                 result = PyUnicode_Join((PyObject *)self, seq);
    1636           0 :                 Py_DECREF(seq);
    1637           0 :                 return result;
    1638             :             }
    1639             : #endif
    1640           0 :             PyErr_Format(PyExc_TypeError,
    1641             :                          "sequence item %zd: expected string,"
    1642             :                          " %.80s found",
    1643           0 :                          i, Py_TYPE(item)->tp_name);
    1644           0 :             Py_DECREF(seq);
    1645           0 :             return NULL;
    1646             :         }
    1647        3762 :         sz += PyString_GET_SIZE(item);
    1648        3762 :         if (i != 0)
    1649        2940 :             sz += seplen;
    1650        3762 :         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
    1651           0 :             PyErr_SetString(PyExc_OverflowError,
    1652             :                 "join() result is too long for a Python string");
    1653           0 :             Py_DECREF(seq);
    1654           0 :             return NULL;
    1655             :         }
    1656             :     }
    1657             : 
    1658             :     /* Allocate result space. */
    1659         822 :     res = PyString_FromStringAndSize((char*)NULL, sz);
    1660         822 :     if (res == NULL) {
    1661           0 :         Py_DECREF(seq);
    1662           0 :         return NULL;
    1663             :     }
    1664             : 
    1665             :     /* Catenate everything. */
    1666         822 :     p = PyString_AS_STRING(res);
    1667        4584 :     for (i = 0; i < seqlen; ++i) {
    1668             :         size_t n;
    1669        3762 :         item = PySequence_Fast_GET_ITEM(seq, i);
    1670        3762 :         n = PyString_GET_SIZE(item);
    1671        3762 :         Py_MEMCPY(p, PyString_AS_STRING(item), n);
    1672        3762 :         p += n;
    1673        3762 :         if (i < seqlen - 1) {
    1674        2940 :             Py_MEMCPY(p, sep, seplen);
    1675        2940 :             p += seplen;
    1676             :         }
    1677             :     }
    1678             : 
    1679         822 :     Py_DECREF(seq);
    1680         822 :     return res;
    1681             : }
    1682             : 
    1683             : PyObject *
    1684          81 : _PyString_Join(PyObject *sep, PyObject *x)
    1685             : {
    1686             :     assert(sep != NULL && PyString_Check(sep));
    1687             :     assert(x != NULL);
    1688          81 :     return string_join((PyStringObject *)sep, x);
    1689             : }
    1690             : 
    1691             : /* helper macro to fixup start/end slice values */
    1692             : #define ADJUST_INDICES(start, end, len)         \
    1693             :     if (end > len)                          \
    1694             :         end = len;                          \
    1695             :     else if (end < 0) {                     \
    1696             :         end += len;                         \
    1697             :         if (end < 0)                        \
    1698             :         end = 0;                        \
    1699             :     }                                       \
    1700             :     if (start < 0) {                        \
    1701             :         start += len;                       \
    1702             :         if (start < 0)                      \
    1703             :         start = 0;                      \
    1704             :     }
    1705             : 
    1706             : Py_LOCAL_INLINE(Py_ssize_t)
    1707         126 : string_find_internal(PyStringObject *self, PyObject *args, int dir)
    1708             : {
    1709             :     PyObject *subobj;
    1710             :     const char *sub;
    1711             :     Py_ssize_t sub_len;
    1712         126 :     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
    1713             : 
    1714         126 :     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
    1715             :                                     args, &subobj, &start, &end))
    1716           0 :         return -2;
    1717             : 
    1718         126 :     if (PyString_Check(subobj)) {
    1719         126 :         sub = PyString_AS_STRING(subobj);
    1720         126 :         sub_len = PyString_GET_SIZE(subobj);
    1721             :     }
    1722             : #ifdef Py_USING_UNICODE
    1723           0 :     else if (PyUnicode_Check(subobj))
    1724           0 :         return PyUnicode_Find(
    1725             :             (PyObject *)self, subobj, start, end, dir);
    1726             : #endif
    1727           0 :     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
    1728             :         /* XXX - the "expected a character buffer object" is pretty
    1729             :            confusing for a non-expert.  remap to something else ? */
    1730           0 :         return -2;
    1731             : 
    1732         126 :     if (dir > 0)
    1733         216 :         return stringlib_find_slice(
    1734         108 :             PyString_AS_STRING(self), PyString_GET_SIZE(self),
    1735             :             sub, sub_len, start, end);
    1736             :     else
    1737          36 :         return stringlib_rfind_slice(
    1738          18 :             PyString_AS_STRING(self), PyString_GET_SIZE(self),
    1739             :             sub, sub_len, start, end);
    1740             : }
    1741             : 
    1742             : 
    1743             : PyDoc_STRVAR(find__doc__,
    1744             : "S.find(sub [,start [,end]]) -> int\n\
    1745             : \n\
    1746             : Return the lowest index in S where substring sub is found,\n\
    1747             : such that sub is contained within S[start:end].  Optional\n\
    1748             : arguments start and end are interpreted as in slice notation.\n\
    1749             : \n\
    1750             : Return -1 on failure.");
    1751             : 
    1752             : static PyObject *
    1753         108 : string_find(PyStringObject *self, PyObject *args)
    1754             : {
    1755         108 :     Py_ssize_t result = string_find_internal(self, args, +1);
    1756         108 :     if (result == -2)
    1757           0 :         return NULL;
    1758         108 :     return PyInt_FromSsize_t(result);
    1759             : }
    1760             : 
    1761             : 
    1762             : PyDoc_STRVAR(index__doc__,
    1763             : "S.index(sub [,start [,end]]) -> int\n\
    1764             : \n\
    1765             : Like S.find() but raise ValueError when the substring is not found.");
    1766             : 
    1767             : static PyObject *
    1768           0 : string_index(PyStringObject *self, PyObject *args)
    1769             : {
    1770           0 :     Py_ssize_t result = string_find_internal(self, args, +1);
    1771           0 :     if (result == -2)
    1772           0 :         return NULL;
    1773           0 :     if (result == -1) {
    1774           0 :         PyErr_SetString(PyExc_ValueError,
    1775             :                         "substring not found");
    1776           0 :         return NULL;
    1777             :     }
    1778           0 :     return PyInt_FromSsize_t(result);
    1779             : }
    1780             : 
    1781             : 
    1782             : PyDoc_STRVAR(rfind__doc__,
    1783             : "S.rfind(sub [,start [,end]]) -> int\n\
    1784             : \n\
    1785             : Return the highest index in S where substring sub is found,\n\
    1786             : such that sub is contained within S[start:end].  Optional\n\
    1787             : arguments start and end are interpreted as in slice notation.\n\
    1788             : \n\
    1789             : Return -1 on failure.");
    1790             : 
    1791             : static PyObject *
    1792          18 : string_rfind(PyStringObject *self, PyObject *args)
    1793             : {
    1794          18 :     Py_ssize_t result = string_find_internal(self, args, -1);
    1795          18 :     if (result == -2)
    1796           0 :         return NULL;
    1797          18 :     return PyInt_FromSsize_t(result);
    1798             : }
    1799             : 
    1800             : 
    1801             : PyDoc_STRVAR(rindex__doc__,
    1802             : "S.rindex(sub [,start [,end]]) -> int\n\
    1803             : \n\
    1804             : Like S.rfind() but raise ValueError when the substring is not found.");
    1805             : 
    1806             : static PyObject *
    1807           0 : string_rindex(PyStringObject *self, PyObject *args)
    1808             : {
    1809           0 :     Py_ssize_t result = string_find_internal(self, args, -1);
    1810           0 :     if (result == -2)
    1811           0 :         return NULL;
    1812           0 :     if (result == -1) {
    1813           0 :         PyErr_SetString(PyExc_ValueError,
    1814             :                         "substring not found");
    1815           0 :         return NULL;
    1816             :     }
    1817           0 :     return PyInt_FromSsize_t(result);
    1818             : }
    1819             : 
    1820             : 
    1821             : Py_LOCAL_INLINE(PyObject *)
    1822           9 : do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
    1823             : {
    1824           9 :     char *s = PyString_AS_STRING(self);
    1825           9 :     Py_ssize_t len = PyString_GET_SIZE(self);
    1826           9 :     char *sep = PyString_AS_STRING(sepobj);
    1827           9 :     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
    1828             :     Py_ssize_t i, j;
    1829             : 
    1830           9 :     i = 0;
    1831           9 :     if (striptype != RIGHTSTRIP) {
    1832           0 :         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
    1833           0 :             i++;
    1834             :         }
    1835             :     }
    1836             : 
    1837           9 :     j = len;
    1838           9 :     if (striptype != LEFTSTRIP) {
    1839             :         do {
    1840          18 :             j--;
    1841          18 :         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
    1842           9 :         j++;
    1843             :     }
    1844             : 
    1845           9 :     if (i == 0 && j == len && PyString_CheckExact(self)) {
    1846           0 :         Py_INCREF(self);
    1847           0 :         return (PyObject*)self;
    1848             :     }
    1849             :     else
    1850           9 :         return PyString_FromStringAndSize(s+i, j-i);
    1851             : }
    1852             : 
    1853             : 
    1854             : Py_LOCAL_INLINE(PyObject *)
    1855         753 : do_strip(PyStringObject *self, int striptype)
    1856             : {
    1857         753 :     char *s = PyString_AS_STRING(self);
    1858         753 :     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
    1859             : 
    1860         753 :     i = 0;
    1861         753 :     if (striptype != RIGHTSTRIP) {
    1862        2967 :         while (i < len && isspace(Py_CHARMASK(s[i]))) {
    1863        1461 :             i++;
    1864             :         }
    1865             :     }
    1866             : 
    1867         753 :     j = len;
    1868         753 :     if (striptype != LEFTSTRIP) {
    1869             :         do {
    1870         801 :             j--;
    1871         801 :         } while (j >= i && isspace(Py_CHARMASK(s[j])));
    1872         753 :         j++;
    1873             :     }
    1874             : 
    1875         753 :     if (i == 0 && j == len && PyString_CheckExact(self)) {
    1876         213 :         Py_INCREF(self);
    1877         213 :         return (PyObject*)self;
    1878             :     }
    1879             :     else
    1880         540 :         return PyString_FromStringAndSize(s+i, j-i);
    1881             : }
    1882             : 
    1883             : 
    1884             : Py_LOCAL_INLINE(PyObject *)
    1885           9 : do_argstrip(PyStringObject *self, int striptype, PyObject *args)
    1886             : {
    1887           9 :     PyObject *sep = NULL;
    1888             : 
    1889           9 :     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
    1890           0 :         return NULL;
    1891             : 
    1892           9 :     if (sep != NULL && sep != Py_None) {
    1893           9 :         if (PyString_Check(sep))
    1894           9 :             return do_xstrip(self, striptype, sep);
    1895             : #ifdef Py_USING_UNICODE
    1896           0 :         else if (PyUnicode_Check(sep)) {
    1897           0 :             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
    1898             :             PyObject *res;
    1899           0 :             if (uniself==NULL)
    1900           0 :                 return NULL;
    1901           0 :             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
    1902             :                 striptype, sep);
    1903           0 :             Py_DECREF(uniself);
    1904           0 :             return res;
    1905             :         }
    1906             : #endif
    1907           0 :         PyErr_Format(PyExc_TypeError,
    1908             : #ifdef Py_USING_UNICODE
    1909             :                      "%s arg must be None, str or unicode",
    1910             : #else
    1911             :                      "%s arg must be None or str",
    1912             : #endif
    1913           0 :                      STRIPNAME(striptype));
    1914           0 :         return NULL;
    1915             :     }
    1916             : 
    1917           0 :     return do_strip(self, striptype);
    1918             : }
    1919             : 
    1920             : 
    1921             : PyDoc_STRVAR(strip__doc__,
    1922             : "S.strip([chars]) -> string or unicode\n\
    1923             : \n\
    1924             : Return a copy of the string S with leading and trailing\n\
    1925             : whitespace removed.\n\
    1926             : If chars is given and not None, remove characters in chars instead.\n\
    1927             : If chars is unicode, S will be converted to unicode before stripping");
    1928             : 
    1929             : static PyObject *
    1930         753 : string_strip(PyStringObject *self, PyObject *args)
    1931             : {
    1932         753 :     if (PyTuple_GET_SIZE(args) == 0)
    1933         753 :         return do_strip(self, BOTHSTRIP); /* Common case */
    1934             :     else
    1935           0 :         return do_argstrip(self, BOTHSTRIP, args);
    1936             : }
    1937             : 
    1938             : 
    1939             : PyDoc_STRVAR(lstrip__doc__,
    1940             : "S.lstrip([chars]) -> string or unicode\n\
    1941             : \n\
    1942             : Return a copy of the string S with leading whitespace removed.\n\
    1943             : If chars is given and not None, remove characters in chars instead.\n\
    1944             : If chars is unicode, S will be converted to unicode before stripping");
    1945             : 
    1946             : static PyObject *
    1947           0 : string_lstrip(PyStringObject *self, PyObject *args)
    1948             : {
    1949           0 :     if (PyTuple_GET_SIZE(args) == 0)
    1950           0 :         return do_strip(self, LEFTSTRIP); /* Common case */
    1951             :     else
    1952           0 :         return do_argstrip(self, LEFTSTRIP, args);
    1953             : }
    1954             : 
    1955             : 
    1956             : PyDoc_STRVAR(rstrip__doc__,
    1957             : "S.rstrip([chars]) -> string or unicode\n\
    1958             : \n\
    1959             : Return a copy of the string S with trailing whitespace removed.\n\
    1960             : If chars is given and not None, remove characters in chars instead.\n\
    1961             : If chars is unicode, S will be converted to unicode before stripping");
    1962             : 
    1963             : static PyObject *
    1964           9 : string_rstrip(PyStringObject *self, PyObject *args)
    1965             : {
    1966           9 :     if (PyTuple_GET_SIZE(args) == 0)
    1967           0 :         return do_strip(self, RIGHTSTRIP); /* Common case */
    1968             :     else
    1969           9 :         return do_argstrip(self, RIGHTSTRIP, args);
    1970             : }
    1971             : 
    1972             : 
    1973             : PyDoc_STRVAR(lower__doc__,
    1974             : "S.lower() -> string\n\
    1975             : \n\
    1976             : Return a copy of the string S converted to lowercase.");
    1977             : 
    1978             : /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
    1979             : #ifndef _tolower
    1980             : #define _tolower tolower
    1981             : #endif
    1982             : 
    1983             : static PyObject *
    1984           0 : string_lower(PyStringObject *self)
    1985             : {
    1986             :     char *s;
    1987           0 :     Py_ssize_t i, n = PyString_GET_SIZE(self);
    1988             :     PyObject *newobj;
    1989             : 
    1990           0 :     newobj = PyString_FromStringAndSize(NULL, n);
    1991           0 :     if (!newobj)
    1992           0 :         return NULL;
    1993             : 
    1994           0 :     s = PyString_AS_STRING(newobj);
    1995             : 
    1996           0 :     Py_MEMCPY(s, PyString_AS_STRING(self), n);
    1997             : 
    1998           0 :     for (i = 0; i < n; i++) {
    1999           0 :         int c = Py_CHARMASK(s[i]);
    2000           0 :         if (isupper(c))
    2001           0 :             s[i] = _tolower(c);
    2002             :     }
    2003             : 
    2004           0 :     return newobj;
    2005             : }
    2006             : 
    2007             : PyDoc_STRVAR(upper__doc__,
    2008             : "S.upper() -> string\n\
    2009             : \n\
    2010             : Return a copy of the string S converted to uppercase.");
    2011             : 
    2012             : #ifndef _toupper
    2013             : #define _toupper toupper
    2014             : #endif
    2015             : 
    2016             : static PyObject *
    2017           0 : string_upper(PyStringObject *self)
    2018             : {
    2019             :     char *s;
    2020           0 :     Py_ssize_t i, n = PyString_GET_SIZE(self);
    2021             :     PyObject *newobj;
    2022             : 
    2023           0 :     newobj = PyString_FromStringAndSize(NULL, n);
    2024           0 :     if (!newobj)
    2025           0 :         return NULL;
    2026             : 
    2027           0 :     s = PyString_AS_STRING(newobj);
    2028             : 
    2029           0 :     Py_MEMCPY(s, PyString_AS_STRING(self), n);
    2030             : 
    2031           0 :     for (i = 0; i < n; i++) {
    2032           0 :         int c = Py_CHARMASK(s[i]);
    2033           0 :         if (islower(c))
    2034           0 :             s[i] = _toupper(c);
    2035             :     }
    2036             : 
    2037           0 :     return newobj;
    2038             : }
    2039             : 
    2040             : PyDoc_STRVAR(title__doc__,
    2041             : "S.title() -> string\n\
    2042             : \n\
    2043             : Return a titlecased version of S, i.e. words start with uppercase\n\
    2044             : characters, all remaining cased characters have lowercase.");
    2045             : 
    2046             : static PyObject*
    2047           0 : string_title(PyStringObject *self)
    2048             : {
    2049           0 :     char *s = PyString_AS_STRING(self), *s_new;
    2050           0 :     Py_ssize_t i, n = PyString_GET_SIZE(self);
    2051           0 :     int previous_is_cased = 0;
    2052             :     PyObject *newobj;
    2053             : 
    2054           0 :     newobj = PyString_FromStringAndSize(NULL, n);
    2055           0 :     if (newobj == NULL)
    2056           0 :         return NULL;
    2057           0 :     s_new = PyString_AsString(newobj);
    2058           0 :     for (i = 0; i < n; i++) {
    2059           0 :         int c = Py_CHARMASK(*s++);
    2060           0 :         if (islower(c)) {
    2061           0 :             if (!previous_is_cased)
    2062           0 :                 c = toupper(c);
    2063           0 :             previous_is_cased = 1;
    2064           0 :         } else if (isupper(c)) {
    2065           0 :             if (previous_is_cased)
    2066           0 :                 c = tolower(c);
    2067           0 :             previous_is_cased = 1;
    2068             :         } else
    2069           0 :             previous_is_cased = 0;
    2070           0 :         *s_new++ = c;
    2071             :     }
    2072           0 :     return newobj;
    2073             : }
    2074             : 
    2075             : PyDoc_STRVAR(capitalize__doc__,
    2076             : "S.capitalize() -> string\n\
    2077             : \n\
    2078             : Return a copy of the string S with only its first character\n\
    2079             : capitalized.");
    2080             : 
    2081             : static PyObject *
    2082           0 : string_capitalize(PyStringObject *self)
    2083             : {
    2084           0 :     char *s = PyString_AS_STRING(self), *s_new;
    2085           0 :     Py_ssize_t i, n = PyString_GET_SIZE(self);
    2086             :     PyObject *newobj;
    2087             : 
    2088           0 :     newobj = PyString_FromStringAndSize(NULL, n);
    2089           0 :     if (newobj == NULL)
    2090           0 :         return NULL;
    2091           0 :     s_new = PyString_AsString(newobj);
    2092           0 :     if (0 < n) {
    2093           0 :         int c = Py_CHARMASK(*s++);
    2094           0 :         if (islower(c))
    2095           0 :             *s_new = toupper(c);
    2096             :         else
    2097           0 :             *s_new = c;
    2098           0 :         s_new++;
    2099             :     }
    2100           0 :     for (i = 1; i < n; i++) {
    2101           0 :         int c = Py_CHARMASK(*s++);
    2102           0 :         if (isupper(c))
    2103           0 :             *s_new = tolower(c);
    2104             :         else
    2105           0 :             *s_new = c;
    2106           0 :         s_new++;
    2107             :     }
    2108           0 :     return newobj;
    2109             : }
    2110             : 
    2111             : 
    2112             : PyDoc_STRVAR(count__doc__,
    2113             : "S.count(sub[, start[, end]]) -> int\n\
    2114             : \n\
    2115             : Return the number of non-overlapping occurrences of substring sub in\n\
    2116             : string S[start:end].  Optional arguments start and end are interpreted\n\
    2117             : as in slice notation.");
    2118             : 
    2119             : static PyObject *
    2120           0 : string_count(PyStringObject *self, PyObject *args)
    2121             : {
    2122             :     PyObject *sub_obj;
    2123           0 :     const char *str = PyString_AS_STRING(self), *sub;
    2124             :     Py_ssize_t sub_len;
    2125           0 :     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
    2126             : 
    2127           0 :     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
    2128           0 :         return NULL;
    2129             : 
    2130           0 :     if (PyString_Check(sub_obj)) {
    2131           0 :         sub = PyString_AS_STRING(sub_obj);
    2132           0 :         sub_len = PyString_GET_SIZE(sub_obj);
    2133             :     }
    2134             : #ifdef Py_USING_UNICODE
    2135           0 :     else if (PyUnicode_Check(sub_obj)) {
    2136             :         Py_ssize_t count;
    2137           0 :         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
    2138           0 :         if (count == -1)
    2139           0 :             return NULL;
    2140             :         else
    2141           0 :             return PyInt_FromSsize_t(count);
    2142             :     }
    2143             : #endif
    2144           0 :     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
    2145           0 :         return NULL;
    2146             : 
    2147           0 :     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
    2148             : 
    2149           0 :     return PyInt_FromSsize_t(
    2150             :         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
    2151             :         );
    2152             : }
    2153             : 
    2154             : PyDoc_STRVAR(swapcase__doc__,
    2155             : "S.swapcase() -> string\n\
    2156             : \n\
    2157             : Return a copy of the string S with uppercase characters\n\
    2158             : converted to lowercase and vice versa.");
    2159             : 
    2160             : static PyObject *
    2161           0 : string_swapcase(PyStringObject *self)
    2162             : {
    2163           0 :     char *s = PyString_AS_STRING(self), *s_new;
    2164           0 :     Py_ssize_t i, n = PyString_GET_SIZE(self);
    2165             :     PyObject *newobj;
    2166             : 
    2167           0 :     newobj = PyString_FromStringAndSize(NULL, n);
    2168           0 :     if (newobj == NULL)
    2169           0 :         return NULL;
    2170           0 :     s_new = PyString_AsString(newobj);
    2171           0 :     for (i = 0; i < n; i++) {
    2172           0 :         int c = Py_CHARMASK(*s++);
    2173           0 :         if (islower(c)) {
    2174           0 :             *s_new = toupper(c);
    2175             :         }
    2176           0 :         else if (isupper(c)) {
    2177           0 :             *s_new = tolower(c);
    2178             :         }
    2179             :         else
    2180           0 :             *s_new = c;
    2181           0 :         s_new++;
    2182             :     }
    2183           0 :     return newobj;
    2184             : }
    2185             : 
    2186             : 
    2187             : PyDoc_STRVAR(translate__doc__,
    2188             : "S.translate(table [,deletechars]) -> string\n\
    2189             : \n\
    2190             : Return a copy of the string S, where all characters occurring\n\
    2191             : in the optional argument deletechars are removed, and the\n\
    2192             : remaining characters have been mapped through the given\n\
    2193             : translation table, which must be a string of length 256 or None.\n\
    2194             : If the table argument is None, no translation is applied and\n\
    2195             : the operation simply removes the characters in deletechars.");
    2196             : 
    2197             : static PyObject *
    2198         237 : string_translate(PyStringObject *self, PyObject *args)
    2199             : {
    2200             :     register char *input, *output;
    2201             :     const char *table;
    2202         237 :     register Py_ssize_t i, c, changed = 0;
    2203         237 :     PyObject *input_obj = (PyObject*)self;
    2204         237 :     const char *output_start, *del_table=NULL;
    2205         237 :     Py_ssize_t inlen, tablen, dellen = 0;
    2206             :     PyObject *result;
    2207             :     int trans_table[256];
    2208         237 :     PyObject *tableobj, *delobj = NULL;
    2209             : 
    2210         237 :     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
    2211             :                           &tableobj, &delobj))
    2212           0 :         return NULL;
    2213             : 
    2214         237 :     if (PyString_Check(tableobj)) {
    2215         237 :         table = PyString_AS_STRING(tableobj);
    2216         237 :         tablen = PyString_GET_SIZE(tableobj);
    2217             :     }
    2218           0 :     else if (tableobj == Py_None) {
    2219           0 :         table = NULL;
    2220           0 :         tablen = 256;
    2221             :     }
    2222             : #ifdef Py_USING_UNICODE
    2223           0 :     else if (PyUnicode_Check(tableobj)) {
    2224             :         /* Unicode .translate() does not support the deletechars
    2225             :            parameter; instead a mapping to None will cause characters
    2226             :            to be deleted. */
    2227           0 :         if (delobj != NULL) {
    2228           0 :             PyErr_SetString(PyExc_TypeError,
    2229             :             "deletions are implemented differently for unicode");
    2230           0 :             return NULL;
    2231             :         }
    2232           0 :         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
    2233             :     }
    2234             : #endif
    2235           0 :     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
    2236           0 :         return NULL;
    2237             : 
    2238         237 :     if (tablen != 256) {
    2239           0 :         PyErr_SetString(PyExc_ValueError,
    2240             :           "translation table must be 256 characters long");
    2241           0 :         return NULL;
    2242             :     }
    2243             : 
    2244         237 :     if (delobj != NULL) {
    2245           0 :         if (PyString_Check(delobj)) {
    2246           0 :             del_table = PyString_AS_STRING(delobj);
    2247           0 :             dellen = PyString_GET_SIZE(delobj);
    2248             :         }
    2249             : #ifdef Py_USING_UNICODE
    2250           0 :         else if (PyUnicode_Check(delobj)) {
    2251           0 :             PyErr_SetString(PyExc_TypeError,
    2252             :             "deletions are implemented differently for unicode");
    2253           0 :             return NULL;
    2254             :         }
    2255             : #endif
    2256           0 :         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
    2257           0 :             return NULL;
    2258             :     }
    2259             :     else {
    2260         237 :         del_table = NULL;
    2261         237 :         dellen = 0;
    2262             :     }
    2263             : 
    2264         237 :     inlen = PyString_GET_SIZE(input_obj);
    2265         237 :     result = PyString_FromStringAndSize((char *)NULL, inlen);
    2266         237 :     if (result == NULL)
    2267           0 :         return NULL;
    2268         237 :     output_start = output = PyString_AsString(result);
    2269         237 :     input = PyString_AS_STRING(input_obj);
    2270             : 
    2271         237 :     if (dellen == 0 && table != NULL) {
    2272             :         /* If no deletions are required, use faster code */
    2273       51285 :         for (i = inlen; --i >= 0; ) {
    2274       50811 :             c = Py_CHARMASK(*input++);
    2275       50811 :             if (Py_CHARMASK((*output++ = table[c])) != c)
    2276       50745 :                 changed = 1;
    2277             :         }
    2278         237 :         if (changed || !PyString_CheckExact(input_obj))
    2279         237 :             return result;
    2280           0 :         Py_DECREF(result);
    2281           0 :         Py_INCREF(input_obj);
    2282           0 :         return input_obj;
    2283             :     }
    2284             : 
    2285           0 :     if (table == NULL) {
    2286           0 :         for (i = 0; i < 256; i++)
    2287           0 :             trans_table[i] = Py_CHARMASK(i);
    2288             :     } else {
    2289           0 :         for (i = 0; i < 256; i++)
    2290           0 :             trans_table[i] = Py_CHARMASK(table[i]);
    2291             :     }
    2292             : 
    2293           0 :     for (i = 0; i < dellen; i++)
    2294           0 :         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
    2295             : 
    2296           0 :     for (i = inlen; --i >= 0; ) {
    2297           0 :         c = Py_CHARMASK(*input++);
    2298           0 :         if (trans_table[c] != -1)
    2299           0 :             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
    2300           0 :                 continue;
    2301           0 :         changed = 1;
    2302             :     }
    2303           0 :     if (!changed && PyString_CheckExact(input_obj)) {
    2304           0 :         Py_DECREF(result);
    2305           0 :         Py_INCREF(input_obj);
    2306           0 :         return input_obj;
    2307             :     }
    2308             :     /* Fix the size of the resulting string */
    2309           0 :     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
    2310           0 :         return NULL;
    2311           0 :     return result;
    2312             : }
    2313             : 
    2314             : 
    2315             : /* find and count characters and substrings */
    2316             : 
    2317             : #define findchar(target, target_len, c)                         \
    2318             :   ((char *)memchr((const void *)(target), c, target_len))
    2319             : 
    2320             : /* String ops must return a string.  */
    2321             : /* If the object is subclass of string, create a copy */
    2322             : Py_LOCAL(PyStringObject *)
    2323          30 : return_self(PyStringObject *self)
    2324             : {
    2325          30 :     if (PyString_CheckExact(self)) {
    2326          30 :         Py_INCREF(self);
    2327          30 :         return self;
    2328             :     }
    2329           0 :     return (PyStringObject *)PyString_FromStringAndSize(
    2330           0 :         PyString_AS_STRING(self),
    2331             :         PyString_GET_SIZE(self));
    2332             : }
    2333             : 
    2334             : Py_LOCAL_INLINE(Py_ssize_t)
    2335          27 : countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
    2336             : {
    2337          27 :     Py_ssize_t count=0;
    2338          27 :     const char *start=target;
    2339          27 :     const char *end=target+target_len;
    2340             : 
    2341         282 :     while ( (start=findchar(start, end-start, c)) != NULL ) {
    2342         228 :         count++;
    2343         228 :         if (count >= maxcount)
    2344           0 :             break;
    2345         228 :         start += 1;
    2346             :     }
    2347          27 :     return count;
    2348             : }
    2349             : 
    2350             : 
    2351             : /* Algorithms for different cases of string replacement */
    2352             : 
    2353             : /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
    2354             : Py_LOCAL(PyStringObject *)
    2355           0 : replace_interleave(PyStringObject *self,
    2356             :                    const char *to_s, Py_ssize_t to_len,
    2357             :                    Py_ssize_t maxcount)
    2358             : {
    2359             :     char *self_s, *result_s;
    2360             :     Py_ssize_t self_len, result_len;
    2361             :     Py_ssize_t count, i, product;
    2362             :     PyStringObject *result;
    2363             : 
    2364           0 :     self_len = PyString_GET_SIZE(self);
    2365             : 
    2366             :     /* 1 at the end plus 1 after every character */
    2367           0 :     count = self_len+1;
    2368           0 :     if (maxcount < count)
    2369           0 :         count = maxcount;
    2370             : 
    2371             :     /* Check for overflow */
    2372             :     /*   result_len = count * to_len + self_len; */
    2373           0 :     product = count * to_len;
    2374           0 :     if (product / to_len != count) {
    2375           0 :         PyErr_SetString(PyExc_OverflowError,
    2376             :                         "replace string is too long");
    2377           0 :         return NULL;
    2378             :     }
    2379           0 :     result_len = product + self_len;
    2380           0 :     if (result_len < 0) {
    2381           0 :         PyErr_SetString(PyExc_OverflowError,
    2382             :                         "replace string is too long");
    2383           0 :         return NULL;
    2384             :     }
    2385             : 
    2386           0 :     if (! (result = (PyStringObject *)
    2387             :                      PyString_FromStringAndSize(NULL, result_len)) )
    2388           0 :         return NULL;
    2389             : 
    2390           0 :     self_s = PyString_AS_STRING(self);
    2391           0 :     result_s = PyString_AS_STRING(result);
    2392             : 
    2393             :     /* TODO: special case single character, which doesn't need memcpy */
    2394             : 
    2395             :     /* Lay the first one down (guaranteed this will occur) */
    2396           0 :     Py_MEMCPY(result_s, to_s, to_len);
    2397           0 :     result_s += to_len;
    2398           0 :     count -= 1;
    2399             : 
    2400           0 :     for (i=0; i<count; i++) {
    2401           0 :         *result_s++ = *self_s++;
    2402           0 :         Py_MEMCPY(result_s, to_s, to_len);
    2403           0 :         result_s += to_len;
    2404             :     }
    2405             : 
    2406             :     /* Copy the rest of the original string */
    2407           0 :     Py_MEMCPY(result_s, self_s, self_len-i);
    2408             : 
    2409           0 :     return result;
    2410             : }
    2411             : 
    2412             : /* Special case for deleting a single character */
    2413             : /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
    2414             : Py_LOCAL(PyStringObject *)
    2415          27 : replace_delete_single_character(PyStringObject *self,
    2416             :                                 char from_c, Py_ssize_t maxcount)
    2417             : {
    2418             :     char *self_s, *result_s;
    2419             :     char *start, *next, *end;
    2420             :     Py_ssize_t self_len, result_len;
    2421             :     Py_ssize_t count;
    2422             :     PyStringObject *result;
    2423             : 
    2424          27 :     self_len = PyString_GET_SIZE(self);
    2425          27 :     self_s = PyString_AS_STRING(self);
    2426             : 
    2427          27 :     count = countchar(self_s, self_len, from_c, maxcount);
    2428          27 :     if (count == 0) {
    2429           0 :         return return_self(self);
    2430             :     }
    2431             : 
    2432          27 :     result_len = self_len - count;  /* from_len == 1 */
    2433             :     assert(result_len>=0);
    2434             : 
    2435          27 :     if ( (result = (PyStringObject *)
    2436             :                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
    2437           0 :         return NULL;
    2438          27 :     result_s = PyString_AS_STRING(result);
    2439             : 
    2440          27 :     start = self_s;
    2441          27 :     end = self_s + self_len;
    2442         282 :     while (count-- > 0) {
    2443         228 :         next = findchar(start, end-start, from_c);
    2444         228 :         if (next == NULL)
    2445           0 :             break;
    2446         228 :         Py_MEMCPY(result_s, start, next-start);
    2447         228 :         result_s += (next-start);
    2448         228 :         start = next+1;
    2449             :     }
    2450          27 :     Py_MEMCPY(result_s, start, end-start);
    2451             : 
    2452          27 :     return result;
    2453             : }
    2454             : 
    2455             : /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
    2456             : 
    2457             : Py_LOCAL(PyStringObject *)
    2458           0 : replace_delete_substring(PyStringObject *self,
    2459             :                          const char *from_s, Py_ssize_t from_len,
    2460             :                          Py_ssize_t maxcount) {
    2461             :     char *self_s, *result_s;
    2462             :     char *start, *next, *end;
    2463             :     Py_ssize_t self_len, result_len;
    2464             :     Py_ssize_t count, offset;
    2465             :     PyStringObject *result;
    2466             : 
    2467           0 :     self_len = PyString_GET_SIZE(self);
    2468           0 :     self_s = PyString_AS_STRING(self);
    2469             : 
    2470           0 :     count = stringlib_count(self_s, self_len,
    2471             :                             from_s, from_len,
    2472             :                             maxcount);
    2473             : 
    2474           0 :     if (count == 0) {
    2475             :         /* no matches */
    2476           0 :         return return_self(self);
    2477             :     }
    2478             : 
    2479           0 :     result_len = self_len - (count * from_len);
    2480             :     assert (result_len>=0);
    2481             : 
    2482           0 :     if ( (result = (PyStringObject *)
    2483             :           PyString_FromStringAndSize(NULL, result_len)) == NULL )
    2484           0 :         return NULL;
    2485             : 
    2486           0 :     result_s = PyString_AS_STRING(result);
    2487             : 
    2488           0 :     start = self_s;
    2489           0 :     end = self_s + self_len;
    2490           0 :     while (count-- > 0) {
    2491           0 :         offset = stringlib_find(start, end-start,
    2492             :                                 from_s, from_len,
    2493             :                                 0);
    2494           0 :         if (offset == -1)
    2495           0 :             break;
    2496           0 :         next = start + offset;
    2497             : 
    2498           0 :         Py_MEMCPY(result_s, start, next-start);
    2499             : 
    2500           0 :         result_s += (next-start);
    2501           0 :         start = next+from_len;
    2502             :     }
    2503           0 :     Py_MEMCPY(result_s, start, end-start);
    2504           0 :     return result;
    2505             : }
    2506             : 
    2507             : /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
    2508             : Py_LOCAL(PyStringObject *)
    2509          30 : replace_single_character_in_place(PyStringObject *self,
    2510             :                                   char from_c, char to_c,
    2511             :                                   Py_ssize_t maxcount)
    2512             : {
    2513             :     char *self_s, *result_s, *start, *end, *next;
    2514             :     Py_ssize_t self_len;
    2515             :     PyStringObject *result;
    2516             : 
    2517             :     /* The result string will be the same size */
    2518          30 :     self_s = PyString_AS_STRING(self);
    2519          30 :     self_len = PyString_GET_SIZE(self);
    2520             : 
    2521          30 :     next = findchar(self_s, self_len, from_c);
    2522             : 
    2523          30 :     if (next == NULL) {
    2524             :         /* No matches; return the original string */
    2525          30 :         return return_self(self);
    2526             :     }
    2527             : 
    2528             :     /* Need to make a new string */
    2529           0 :     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
    2530           0 :     if (result == NULL)
    2531           0 :         return NULL;
    2532           0 :     result_s = PyString_AS_STRING(result);
    2533           0 :     Py_MEMCPY(result_s, self_s, self_len);
    2534             : 
    2535             :     /* change everything in-place, starting with this one */
    2536           0 :     start =  result_s + (next-self_s);
    2537           0 :     *start = to_c;
    2538           0 :     start++;
    2539           0 :     end = result_s + self_len;
    2540             : 
    2541           0 :     while (--maxcount > 0) {
    2542           0 :         next = findchar(start, end-start, from_c);
    2543           0 :         if (next == NULL)
    2544           0 :             break;
    2545           0 :         *next = to_c;
    2546           0 :         start = next+1;
    2547             :     }
    2548             : 
    2549           0 :     return result;
    2550             : }
    2551             : 
    2552             : /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
    2553             : Py_LOCAL(PyStringObject *)
    2554           0 : replace_substring_in_place(PyStringObject *self,
    2555             :                            const char *from_s, Py_ssize_t from_len,
    2556             :                            const char *to_s, Py_ssize_t to_len,
    2557             :                            Py_ssize_t maxcount)
    2558             : {
    2559             :     char *result_s, *start, *end;
    2560             :     char *self_s;
    2561             :     Py_ssize_t self_len, offset;
    2562             :     PyStringObject *result;
    2563             : 
    2564             :     /* The result string will be the same size */
    2565             : 
    2566           0 :     self_s = PyString_AS_STRING(self);
    2567           0 :     self_len = PyString_GET_SIZE(self);
    2568             : 
    2569           0 :     offset = stringlib_find(self_s, self_len,
    2570             :                             from_s, from_len,
    2571             :                             0);
    2572           0 :     if (offset == -1) {
    2573             :         /* No matches; return the original string */
    2574           0 :         return return_self(self);
    2575             :     }
    2576             : 
    2577             :     /* Need to make a new string */
    2578           0 :     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
    2579           0 :     if (result == NULL)
    2580           0 :         return NULL;
    2581           0 :     result_s = PyString_AS_STRING(result);
    2582           0 :     Py_MEMCPY(result_s, self_s, self_len);
    2583             : 
    2584             :     /* change everything in-place, starting with this one */
    2585           0 :     start =  result_s + offset;
    2586           0 :     Py_MEMCPY(start, to_s, from_len);
    2587           0 :     start += from_len;
    2588           0 :     end = result_s + self_len;
    2589             : 
    2590           0 :     while ( --maxcount > 0) {
    2591           0 :         offset = stringlib_find(start, end-start,
    2592             :                                 from_s, from_len,
    2593             :                                 0);
    2594           0 :         if (offset==-1)
    2595           0 :             break;
    2596           0 :         Py_MEMCPY(start+offset, to_s, from_len);
    2597           0 :         start += offset+from_len;
    2598             :     }
    2599             : 
    2600           0 :     return result;
    2601             : }
    2602             : 
    2603             : /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
    2604             : Py_LOCAL(PyStringObject *)
    2605           0 : replace_single_character(PyStringObject *self,
    2606             :                          char from_c,
    2607             :                          const char *to_s, Py_ssize_t to_len,
    2608             :                          Py_ssize_t maxcount)
    2609             : {
    2610             :     char *self_s, *result_s;
    2611             :     char *start, *next, *end;
    2612             :     Py_ssize_t self_len, result_len;
    2613             :     Py_ssize_t count, product;
    2614             :     PyStringObject *result;
    2615             : 
    2616           0 :     self_s = PyString_AS_STRING(self);
    2617           0 :     self_len = PyString_GET_SIZE(self);
    2618             : 
    2619           0 :     count = countchar(self_s, self_len, from_c, maxcount);
    2620           0 :     if (count == 0) {
    2621             :         /* no matches, return unchanged */
    2622           0 :         return return_self(self);
    2623             :     }
    2624             : 
    2625             :     /* use the difference between current and new, hence the "-1" */
    2626             :     /*   result_len = self_len + count * (to_len-1)  */
    2627           0 :     product = count * (to_len-1);
    2628           0 :     if (product / (to_len-1) != count) {
    2629           0 :         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
    2630           0 :         return NULL;
    2631             :     }
    2632           0 :     result_len = self_len + product;
    2633           0 :     if (result_len < 0) {
    2634           0 :         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
    2635           0 :         return NULL;
    2636             :     }
    2637             : 
    2638           0 :     if ( (result = (PyStringObject *)
    2639             :           PyString_FromStringAndSize(NULL, result_len)) == NULL)
    2640           0 :         return NULL;
    2641           0 :     result_s = PyString_AS_STRING(result);
    2642             : 
    2643           0 :     start = self_s;
    2644           0 :     end = self_s + self_len;
    2645           0 :     while (count-- > 0) {
    2646           0 :         next = findchar(start, end-start, from_c);
    2647           0 :         if (next == NULL)
    2648           0 :             break;
    2649             : 
    2650           0 :         if (next == start) {
    2651             :             /* replace with the 'to' */
    2652           0 :             Py_MEMCPY(result_s, to_s, to_len);
    2653           0 :             result_s += to_len;
    2654           0 :             start += 1;
    2655             :         } else {
    2656             :             /* copy the unchanged old then the 'to' */
    2657           0 :             Py_MEMCPY(result_s, start, next-start);
    2658           0 :             result_s += (next-start);
    2659           0 :             Py_MEMCPY(result_s, to_s, to_len);
    2660           0 :             result_s += to_len;
    2661           0 :             start = next+1;
    2662             :         }
    2663             :     }
    2664             :     /* Copy the remainder of the remaining string */
    2665           0 :     Py_MEMCPY(result_s, start, end-start);
    2666             : 
    2667           0 :     return result;
    2668             : }
    2669             : 
    2670             : /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
    2671             : Py_LOCAL(PyStringObject *)
    2672           0 : replace_substring(PyStringObject *self,
    2673             :                   const char *from_s, Py_ssize_t from_len,
    2674             :                   const char *to_s, Py_ssize_t to_len,
    2675             :                   Py_ssize_t maxcount) {
    2676             :     char *self_s, *result_s;
    2677             :     char *start, *next, *end;
    2678             :     Py_ssize_t self_len, result_len;
    2679             :     Py_ssize_t count, offset, product;
    2680             :     PyStringObject *result;
    2681             : 
    2682           0 :     self_s = PyString_AS_STRING(self);
    2683           0 :     self_len = PyString_GET_SIZE(self);
    2684             : 
    2685           0 :     count = stringlib_count(self_s, self_len,
    2686             :                             from_s, from_len,
    2687             :                             maxcount);
    2688             : 
    2689           0 :     if (count == 0) {
    2690             :         /* no matches, return unchanged */
    2691           0 :         return return_self(self);
    2692             :     }
    2693             : 
    2694             :     /* Check for overflow */
    2695             :     /*    result_len = self_len + count * (to_len-from_len) */
    2696           0 :     product = count * (to_len-from_len);
    2697           0 :     if (product / (to_len-from_len) != count) {
    2698           0 :         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
    2699           0 :         return NULL;
    2700             :     }
    2701           0 :     result_len = self_len + product;
    2702           0 :     if (result_len < 0) {
    2703           0 :         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
    2704           0 :         return NULL;
    2705             :     }
    2706             : 
    2707           0 :     if ( (result = (PyStringObject *)
    2708             :           PyString_FromStringAndSize(NULL, result_len)) == NULL)
    2709           0 :         return NULL;
    2710           0 :     result_s = PyString_AS_STRING(result);
    2711             : 
    2712           0 :     start = self_s;
    2713           0 :     end = self_s + self_len;
    2714           0 :     while (count-- > 0) {
    2715           0 :         offset = stringlib_find(start, end-start,
    2716             :                                 from_s, from_len,
    2717             :                                 0);
    2718           0 :         if (offset == -1)
    2719           0 :             break;
    2720           0 :         next = start+offset;
    2721           0 :         if (next == start) {
    2722             :             /* replace with the 'to' */
    2723           0 :             Py_MEMCPY(result_s, to_s, to_len);
    2724           0 :             result_s += to_len;
    2725           0 :             start += from_len;
    2726             :         } else {
    2727             :             /* copy the unchanged old then the 'to' */
    2728           0 :             Py_MEMCPY(result_s, start, next-start);
    2729           0 :             result_s += (next-start);
    2730           0 :             Py_MEMCPY(result_s, to_s, to_len);
    2731           0 :             result_s += to_len;
    2732           0 :             start = next+from_len;
    2733             :         }
    2734             :     }
    2735             :     /* Copy the remainder of the remaining string */
    2736           0 :     Py_MEMCPY(result_s, start, end-start);
    2737             : 
    2738           0 :     return result;
    2739             : }
    2740             : 
    2741             : 
    2742             : Py_LOCAL(PyStringObject *)
    2743          57 : replace(PyStringObject *self,
    2744             :     const char *from_s, Py_ssize_t from_len,
    2745             :     const char *to_s, Py_ssize_t to_len,
    2746             :     Py_ssize_t maxcount)
    2747             : {
    2748          57 :     if (maxcount < 0) {
    2749          57 :         maxcount = PY_SSIZE_T_MAX;
    2750           0 :     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
    2751             :         /* nothing to do; return the original string */
    2752           0 :         return return_self(self);
    2753             :     }
    2754             : 
    2755          57 :     if (maxcount == 0 ||
    2756           0 :         (from_len == 0 && to_len == 0)) {
    2757             :         /* nothing to do; return the original string */
    2758           0 :         return return_self(self);
    2759             :     }
    2760             : 
    2761             :     /* Handle zero-length special cases */
    2762             : 
    2763          57 :     if (from_len == 0) {
    2764             :         /* insert the 'to' string everywhere.   */
    2765             :         /*    >>> "Python".replace("", ".")     */
    2766             :         /*    '.P.y.t.h.o.n.'                   */
    2767           0 :         return replace_interleave(self, to_s, to_len, maxcount);
    2768             :     }
    2769             : 
    2770             :     /* Except for "".replace("", "A") == "A" there is no way beyond this */
    2771             :     /* point for an empty self string to generate a non-empty string */
    2772             :     /* Special case so the remaining code always gets a non-empty string */
    2773          57 :     if (PyString_GET_SIZE(self) == 0) {
    2774           0 :         return return_self(self);
    2775             :     }
    2776             : 
    2777          57 :     if (to_len == 0) {
    2778             :         /* delete all occurrences of 'from' string */
    2779          27 :         if (from_len == 1) {
    2780          27 :             return replace_delete_single_character(
    2781          27 :                 self, from_s[0], maxcount);
    2782             :         } else {
    2783           0 :             return replace_delete_substring(self, from_s, from_len, maxcount);
    2784             :         }
    2785             :     }
    2786             : 
    2787             :     /* Handle special case where both strings have the same length */
    2788             : 
    2789          30 :     if (from_len == to_len) {
    2790          30 :         if (from_len == 1) {
    2791          60 :             return replace_single_character_in_place(
    2792             :                 self,
    2793          30 :                 from_s[0],
    2794          30 :                 to_s[0],
    2795             :                 maxcount);
    2796             :         } else {
    2797           0 :             return replace_substring_in_place(
    2798             :                 self, from_s, from_len, to_s, to_len, maxcount);
    2799             :         }
    2800             :     }
    2801             : 
    2802             :     /* Otherwise use the more generic algorithms */
    2803           0 :     if (from_len == 1) {
    2804           0 :         return replace_single_character(self, from_s[0],
    2805             :                                         to_s, to_len, maxcount);
    2806             :     } else {
    2807             :         /* len('from')>=2, len('to')>=1 */
    2808           0 :         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
    2809             :     }
    2810             : }
    2811             : 
    2812             : PyDoc_STRVAR(replace__doc__,
    2813             : "S.replace(old, new[, count]) -> string\n\
    2814             : \n\
    2815             : Return a copy of string S with all occurrences of substring\n\
    2816             : old replaced by new.  If the optional argument count is\n\
    2817             : given, only the first count occurrences are replaced.");
    2818             : 
    2819             : static PyObject *
    2820          57 : string_replace(PyStringObject *self, PyObject *args)
    2821             : {
    2822          57 :     Py_ssize_t count = -1;
    2823             :     PyObject *from, *to;
    2824             :     const char *from_s, *to_s;
    2825             :     Py_ssize_t from_len, to_len;
    2826             : 
    2827          57 :     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
    2828           0 :         return NULL;
    2829             : 
    2830          57 :     if (PyString_Check(from)) {
    2831          57 :         from_s = PyString_AS_STRING(from);
    2832          57 :         from_len = PyString_GET_SIZE(from);
    2833             :     }
    2834             : #ifdef Py_USING_UNICODE
    2835          57 :     if (PyUnicode_Check(from))
    2836           0 :         return PyUnicode_Replace((PyObject *)self,
    2837             :                                  from, to, count);
    2838             : #endif
    2839          57 :     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
    2840           0 :         return NULL;
    2841             : 
    2842          57 :     if (PyString_Check(to)) {
    2843          57 :         to_s = PyString_AS_STRING(to);
    2844          57 :         to_len = PyString_GET_SIZE(to);
    2845             :     }
    2846             : #ifdef Py_USING_UNICODE
    2847           0 :     else if (PyUnicode_Check(to))
    2848           0 :         return PyUnicode_Replace((PyObject *)self,
    2849             :                                  from, to, count);
    2850             : #endif
    2851           0 :     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
    2852           0 :         return NULL;
    2853             : 
    2854          57 :     return (PyObject *)replace((PyStringObject *) self,
    2855             :                                from_s, from_len,
    2856             :                                to_s, to_len, count);
    2857             : }
    2858             : 
    2859             : /** End DALKE **/
    2860             : 
    2861             : /* Matches the end (direction >= 0) or start (direction < 0) of self
    2862             :  * against substr, using the start and end arguments. Returns
    2863             :  * -1 on error, 0 if not found and 1 if found.
    2864             :  */
    2865             : Py_LOCAL(int)
    2866         804 : _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
    2867             :                   Py_ssize_t end, int direction)
    2868             : {
    2869         804 :     Py_ssize_t len = PyString_GET_SIZE(self);
    2870             :     Py_ssize_t slen;
    2871             :     const char* sub;
    2872             :     const char* str;
    2873             : 
    2874         804 :     if (PyString_Check(substr)) {
    2875         804 :         sub = PyString_AS_STRING(substr);
    2876         804 :         slen = PyString_GET_SIZE(substr);
    2877             :     }
    2878             : #ifdef Py_USING_UNICODE
    2879           0 :     else if (PyUnicode_Check(substr))
    2880           0 :         return PyUnicode_Tailmatch((PyObject *)self,
    2881             :                                    substr, start, end, direction);
    2882             : #endif
    2883           0 :     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
    2884           0 :         return -1;
    2885         804 :     str = PyString_AS_STRING(self);
    2886             : 
    2887         804 :     ADJUST_INDICES(start, end, len);
    2888             : 
    2889         804 :     if (direction < 0) {
    2890             :         /* startswith */
    2891         570 :         if (start+slen > len)
    2892           0 :             return 0;
    2893             :     } else {
    2894             :         /* endswith */
    2895         234 :         if (end-start < slen || start > len)
    2896           0 :             return 0;
    2897             : 
    2898         234 :         if (end-slen > start)
    2899         234 :             start = end - slen;
    2900             :     }
    2901         804 :     if (end-start >= slen)
    2902         804 :         return ! memcmp(str+start, sub, slen);
    2903           0 :     return 0;
    2904             : }
    2905             : 
    2906             : 
    2907             : PyDoc_STRVAR(startswith__doc__,
    2908             : "S.startswith(prefix[, start[, end]]) -> bool\n\
    2909             : \n\
    2910             : Return True if S starts with the specified prefix, False otherwise.\n\
    2911             : With optional start, test S beginning at that position.\n\
    2912             : With optional end, stop comparing S at that position.\n\
    2913             : prefix can also be a tuple of strings to try.");
    2914             : 
    2915             : static PyObject *
    2916         570 : string_startswith(PyStringObject *self, PyObject *args)
    2917             : {
    2918         570 :     Py_ssize_t start = 0;
    2919         570 :     Py_ssize_t end = PY_SSIZE_T_MAX;
    2920             :     PyObject *subobj;
    2921             :     int result;
    2922             : 
    2923         570 :     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
    2924           0 :         return NULL;
    2925         570 :     if (PyTuple_Check(subobj)) {
    2926             :         Py_ssize_t i;
    2927           0 :         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
    2928           0 :             result = _string_tailmatch(self,
    2929           0 :                             PyTuple_GET_ITEM(subobj, i),
    2930             :                             start, end, -1);
    2931           0 :             if (result == -1)
    2932           0 :                 return NULL;
    2933           0 :             else if (result) {
    2934           0 :                 Py_RETURN_TRUE;
    2935             :             }
    2936             :         }
    2937           0 :         Py_RETURN_FALSE;
    2938             :     }
    2939         570 :     result = _string_tailmatch(self, subobj, start, end, -1);
    2940         570 :     if (result == -1) {
    2941           0 :         if (PyErr_ExceptionMatches(PyExc_TypeError))
    2942           0 :             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
    2943           0 :                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
    2944           0 :         return NULL;
    2945             :     }
    2946             :     else
    2947         570 :         return PyBool_FromLong(result);
    2948             : }
    2949             : 
    2950             : 
    2951             : PyDoc_STRVAR(endswith__doc__,
    2952             : "S.endswith(suffix[, start[, end]]) -> bool\n\
    2953             : \n\
    2954             : Return True if S ends with the specified suffix, False otherwise.\n\
    2955             : With optional start, test S beginning at that position.\n\
    2956             : With optional end, stop comparing S at that position.\n\
    2957             : suffix can also be a tuple of strings to try.");
    2958             : 
    2959             : static PyObject *
    2960         234 : string_endswith(PyStringObject *self, PyObject *args)
    2961             : {
    2962         234 :     Py_ssize_t start = 0;
    2963         234 :     Py_ssize_t end = PY_SSIZE_T_MAX;
    2964             :     PyObject *subobj;
    2965             :     int result;
    2966             : 
    2967         234 :     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
    2968           0 :         return NULL;
    2969         234 :     if (PyTuple_Check(subobj)) {
    2970             :         Py_ssize_t i;
    2971           0 :         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
    2972           0 :             result = _string_tailmatch(self,
    2973           0 :                             PyTuple_GET_ITEM(subobj, i),
    2974             :                             start, end, +1);
    2975           0 :             if (result == -1)
    2976           0 :                 return NULL;
    2977           0 :             else if (result) {
    2978           0 :                 Py_RETURN_TRUE;
    2979             :             }
    2980             :         }
    2981           0 :         Py_RETURN_FALSE;
    2982             :     }
    2983         234 :     result = _string_tailmatch(self, subobj, start, end, +1);
    2984         234 :     if (result == -1) {
    2985           0 :         if (PyErr_ExceptionMatches(PyExc_TypeError))
    2986           0 :             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
    2987           0 :                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
    2988           0 :         return NULL;
    2989             :     }
    2990             :     else
    2991         234 :         return PyBool_FromLong(result);
    2992             : }
    2993             : 
    2994             : 
    2995             : PyDoc_STRVAR(encode__doc__,
    2996             : "S.encode([encoding[,errors]]) -> object\n\
    2997             : \n\
    2998             : Encodes S using the codec registered for encoding. encoding defaults\n\
    2999             : to the default encoding. errors may be given to set a different error\n\
    3000             : handling scheme. Default is 'strict' meaning that encoding errors raise\n\
    3001             : a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
    3002             : 'xmlcharrefreplace' as well as any other name registered with\n\
    3003             : codecs.register_error that is able to handle UnicodeEncodeErrors.");
    3004             : 
    3005             : static PyObject *
    3006           0 : string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
    3007             : {
    3008             :     static char *kwlist[] = {"encoding", "errors", 0};
    3009           0 :     char *encoding = NULL;
    3010           0 :     char *errors = NULL;
    3011             :     PyObject *v;
    3012             : 
    3013           0 :     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
    3014             :                                      kwlist, &encoding, &errors))
    3015           0 :         return NULL;
    3016           0 :     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
    3017           0 :     if (v == NULL)
    3018           0 :         goto onError;
    3019           0 :     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
    3020           0 :         PyErr_Format(PyExc_TypeError,
    3021             :                      "encoder did not return a string/unicode object "
    3022             :                      "(type=%.400s)",
    3023           0 :                      Py_TYPE(v)->tp_name);
    3024           0 :         Py_DECREF(v);
    3025           0 :         return NULL;
    3026             :     }
    3027           0 :     return v;
    3028             : 
    3029             :  onError:
    3030           0 :     return NULL;
    3031             : }
    3032             : 
    3033             : 
    3034             : PyDoc_STRVAR(decode__doc__,
    3035             : "S.decode([encoding[,errors]]) -> object\n\
    3036             : \n\
    3037             : Decodes S using the codec registered for encoding. encoding defaults\n\
    3038             : to the default encoding. errors may be given to set a different error\n\
    3039             : handling scheme. Default is 'strict' meaning that encoding errors raise\n\
    3040             : a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
    3041             : as well as any other name registered with codecs.register_error that is\n\
    3042             : able to handle UnicodeDecodeErrors.");
    3043             : 
    3044             : static PyObject *
    3045           0 : string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
    3046             : {
    3047             :     static char *kwlist[] = {"encoding", "errors", 0};
    3048           0 :     char *encoding = NULL;
    3049           0 :     char *errors = NULL;
    3050             :     PyObject *v;
    3051             : 
    3052           0 :     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
    3053             :                                      kwlist, &encoding, &errors))
    3054           0 :         return NULL;
    3055           0 :     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
    3056           0 :     if (v == NULL)
    3057           0 :         goto onError;
    3058           0 :     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
    3059           0 :         PyErr_Format(PyExc_TypeError,
    3060             :                      "decoder did not return a string/unicode object "
    3061             :                      "(type=%.400s)",
    3062           0 :                      Py_TYPE(v)->tp_name);
    3063           0 :         Py_DECREF(v);
    3064           0 :         return NULL;
    3065             :     }
    3066           0 :     return v;
    3067             : 
    3068             :  onError:
    3069           0 :     return NULL;
    3070             : }
    3071             : 
    3072             : 
    3073             : PyDoc_STRVAR(expandtabs__doc__,
    3074             : "S.expandtabs([tabsize]) -> string\n\
    3075             : \n\
    3076             : Return a copy of S where all tab characters are expanded using spaces.\n\
    3077             : If tabsize is not given, a tab size of 8 characters is assumed.");
    3078             : 
    3079             : static PyObject*
    3080           0 : string_expandtabs(PyStringObject *self, PyObject *args)
    3081             : {
    3082             :     const char *e, *p, *qe;
    3083             :     char *q;
    3084             :     Py_ssize_t i, j, incr;
    3085             :     PyObject *u;
    3086           0 :     int tabsize = 8;
    3087             : 
    3088           0 :     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
    3089           0 :         return NULL;
    3090             : 
    3091             :     /* First pass: determine size of output string */
    3092           0 :     i = 0; /* chars up to and including most recent \n or \r */
    3093           0 :     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
    3094           0 :     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
    3095           0 :     for (p = PyString_AS_STRING(self); p < e; p++) {
    3096           0 :         if (*p == '\t') {
    3097           0 :             if (tabsize > 0) {
    3098           0 :                 incr = tabsize - (j % tabsize);
    3099           0 :                 if (j > PY_SSIZE_T_MAX - incr)
    3100           0 :                     goto overflow1;
    3101           0 :                 j += incr;
    3102             :             }
    3103             :         }
    3104             :         else {
    3105           0 :             if (j > PY_SSIZE_T_MAX - 1)
    3106           0 :                 goto overflow1;
    3107           0 :             j++;
    3108           0 :             if (*p == '\n' || *p == '\r') {
    3109           0 :                 if (i > PY_SSIZE_T_MAX - j)
    3110           0 :                     goto overflow1;
    3111           0 :                 i += j;
    3112           0 :                 j = 0;
    3113             :             }
    3114             :         }
    3115             :     }
    3116             : 
    3117           0 :     if (i > PY_SSIZE_T_MAX - j)
    3118           0 :         goto overflow1;
    3119             : 
    3120             :     /* Second pass: create output string and fill it */
    3121           0 :     u = PyString_FromStringAndSize(NULL, i + j);
    3122           0 :     if (!u)
    3123           0 :         return NULL;
    3124             : 
    3125           0 :     j = 0; /* same as in first pass */
    3126           0 :     q = PyString_AS_STRING(u); /* next output char */
    3127           0 :     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
    3128             : 
    3129           0 :     for (p = PyString_AS_STRING(self); p < e; p++) {
    3130           0 :         if (*p == '\t') {
    3131           0 :             if (tabsize > 0) {
    3132           0 :                 i = tabsize - (j % tabsize);
    3133           0 :                 j += i;
    3134           0 :                 while (i--) {
    3135           0 :                     if (q >= qe)
    3136           0 :                         goto overflow2;
    3137           0 :                     *q++ = ' ';
    3138             :                 }
    3139             :             }
    3140             :         }
    3141             :         else {
    3142           0 :             if (q >= qe)
    3143           0 :                 goto overflow2;
    3144           0 :             *q++ = *p;
    3145           0 :             j++;
    3146           0 :             if (*p == '\n' || *p == '\r')
    3147           0 :                 j = 0;
    3148             :         }
    3149             :     }
    3150             : 
    3151           0 :     return u;
    3152             : 
    3153             :   overflow2:
    3154           0 :     Py_DECREF(u);
    3155             :   overflow1:
    3156           0 :     PyErr_SetString(PyExc_OverflowError, "new string is too long");
    3157           0 :     return NULL;
    3158             : }
    3159             : 
    3160             : Py_LOCAL_INLINE(PyObject *)
    3161           0 : pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
    3162             : {
    3163             :     PyObject *u;
    3164             : 
    3165           0 :     if (left < 0)
    3166           0 :         left = 0;
    3167           0 :     if (right < 0)
    3168           0 :         right = 0;
    3169             : 
    3170           0 :     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
    3171           0 :         Py_INCREF(self);
    3172           0 :         return (PyObject *)self;
    3173             :     }
    3174             : 
    3175           0 :     u = PyString_FromStringAndSize(NULL,
    3176           0 :                                    left + PyString_GET_SIZE(self) + right);
    3177           0 :     if (u) {
    3178           0 :         if (left)
    3179           0 :             memset(PyString_AS_STRING(u), fill, left);
    3180           0 :         Py_MEMCPY(PyString_AS_STRING(u) + left,
    3181           0 :                PyString_AS_STRING(self),
    3182           0 :                PyString_GET_SIZE(self));
    3183           0 :         if (right)
    3184           0 :             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
    3185             :                fill, right);
    3186             :     }
    3187             : 
    3188           0 :     return u;
    3189             : }
    3190             : 
    3191             : PyDoc_STRVAR(ljust__doc__,
    3192             : "S.ljust(width[, fillchar]) -> string\n"
    3193             : "\n"
    3194             : "Return S left-justified in a string of length width. Padding is\n"
    3195             : "done using the specified fill character (default is a space).");
    3196             : 
    3197             : static PyObject *
    3198           0 : string_ljust(PyStringObject *self, PyObject *args)
    3199             : {
    3200             :     Py_ssize_t width;
    3201           0 :     char fillchar = ' ';
    3202             : 
    3203           0 :     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
    3204           0 :         return NULL;
    3205             : 
    3206           0 :     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
    3207           0 :         Py_INCREF(self);
    3208           0 :         return (PyObject*) self;
    3209             :     }
    3210             : 
    3211           0 :     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
    3212             : }
    3213             : 
    3214             : 
    3215             : PyDoc_STRVAR(rjust__doc__,
    3216             : "S.rjust(width[, fillchar]) -> string\n"
    3217             : "\n"
    3218             : "Return S right-justified in a string of length width. Padding is\n"
    3219             : "done using the specified fill character (default is a space)");
    3220             : 
    3221             : static PyObject *
    3222           0 : string_rjust(PyStringObject *self, PyObject *args)
    3223             : {
    3224             :     Py_ssize_t width;
    3225           0 :     char fillchar = ' ';
    3226             : 
    3227           0 :     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
    3228           0 :         return NULL;
    3229             : 
    3230           0 :     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
    3231           0 :         Py_INCREF(self);
    3232           0 :         return (PyObject*) self;
    3233             :     }
    3234             : 
    3235           0 :     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
    3236             : }
    3237             : 
    3238             : 
    3239             : PyDoc_STRVAR(center__doc__,
    3240             : "S.center(width[, fillchar]) -> string\n"
    3241             : "\n"
    3242             : "Return S centered in a string of length width. Padding is\n"
    3243             : "done using the specified fill character (default is a space)");
    3244             : 
    3245             : static PyObject *
    3246           0 : string_center(PyStringObject *self, PyObject *args)
    3247             : {
    3248             :     Py_ssize_t marg, left;
    3249             :     Py_ssize_t width;
    3250           0 :     char fillchar = ' ';
    3251             : 
    3252           0 :     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
    3253           0 :         return NULL;
    3254             : 
    3255           0 :     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
    3256           0 :         Py_INCREF(self);
    3257           0 :         return (PyObject*) self;
    3258             :     }
    3259             : 
    3260           0 :     marg = width - PyString_GET_SIZE(self);
    3261           0 :     left = marg / 2 + (marg & width & 1);
    3262             : 
    3263           0 :     return pad(self, left, marg - left, fillchar);
    3264             : }
    3265             : 
    3266             : PyDoc_STRVAR(zfill__doc__,
    3267             : "S.zfill(width) -> string\n"
    3268             : "\n"
    3269             : "Pad a numeric string S with zeros on the left, to fill a field\n"
    3270             : "of the specified width.  The string S is never truncated.");
    3271             : 
    3272             : static PyObject *
    3273           0 : string_zfill(PyStringObject *self, PyObject *args)
    3274             : {
    3275             :     Py_ssize_t fill;
    3276             :     PyObject *s;
    3277             :     char *p;
    3278             :     Py_ssize_t width;
    3279             : 
    3280           0 :     if (!PyArg_ParseTuple(args, "n:zfill", &width))
    3281           0 :         return NULL;
    3282             : 
    3283           0 :     if (PyString_GET_SIZE(self) >= width) {
    3284           0 :         if (PyString_CheckExact(self)) {
    3285           0 :             Py_INCREF(self);
    3286           0 :             return (PyObject*) self;
    3287             :         }
    3288             :         else
    3289           0 :             return PyString_FromStringAndSize(
    3290           0 :                 PyString_AS_STRING(self),
    3291             :                 PyString_GET_SIZE(self)
    3292             :             );
    3293             :     }
    3294             : 
    3295           0 :     fill = width - PyString_GET_SIZE(self);
    3296             : 
    3297           0 :     s = pad(self, fill, 0, '0');
    3298             : 
    3299           0 :     if (s == NULL)
    3300           0 :         return NULL;
    3301             : 
    3302           0 :     p = PyString_AS_STRING(s);
    3303           0 :     if (p[fill] == '+' || p[fill] == '-') {
    3304             :         /* move sign to beginning of string */
    3305           0 :         p[0] = p[fill];
    3306           0 :         p[fill] = '0';
    3307             :     }
    3308             : 
    3309           0 :     return (PyObject*) s;
    3310             : }
    3311             : 
    3312             : PyDoc_STRVAR(isspace__doc__,
    3313             : "S.isspace() -> bool\n\
    3314             : \n\
    3315             : Return True if all characters in S are whitespace\n\
    3316             : and there is at least one character in S, False otherwise.");
    3317             : 
    3318             : static PyObject*
    3319           0 : string_isspace(PyStringObject *self)
    3320             : {
    3321           0 :     register const unsigned char *p
    3322             :         = (unsigned char *) PyString_AS_STRING(self);
    3323             :     register const unsigned char *e;
    3324             : 
    3325             :     /* Shortcut for single character strings */
    3326           0 :     if (PyString_GET_SIZE(self) == 1 &&
    3327           0 :         isspace(*p))
    3328           0 :         return PyBool_FromLong(1);
    3329             : 
    3330             :     /* Special case for empty strings */
    3331           0 :     if (PyString_GET_SIZE(self) == 0)
    3332           0 :         return PyBool_FromLong(0);
    3333             : 
    3334           0 :     e = p + PyString_GET_SIZE(self);
    3335           0 :     for (; p < e; p++) {
    3336           0 :         if (!isspace(*p))
    3337           0 :             return PyBool_FromLong(0);
    3338             :     }
    3339           0 :     return PyBool_FromLong(1);
    3340             : }
    3341             : 
    3342             : 
    3343             : PyDoc_STRVAR(isalpha__doc__,
    3344             : "S.isalpha() -> bool\n\
    3345             : \n\
    3346             : Return True if all characters in S are alphabetic\n\
    3347             : and there is at least one character in S, False otherwise.");
    3348             : 
    3349             : static PyObject*
    3350        2760 : string_isalpha(PyStringObject *self)
    3351             : {
    3352        2760 :     register const unsigned char *p
    3353             :         = (unsigned char *) PyString_AS_STRING(self);
    3354             :     register const unsigned char *e;
    3355             : 
    3356             :     /* Shortcut for single character strings */
    3357        5520 :     if (PyString_GET_SIZE(self) == 1 &&
    3358        2760 :         isalpha(*p))
    3359        1272 :         return PyBool_FromLong(1);
    3360             : 
    3361             :     /* Special case for empty strings */
    3362        1488 :     if (PyString_GET_SIZE(self) == 0)
    3363           0 :         return PyBool_FromLong(0);
    3364             : 
    3365        1488 :     e = p + PyString_GET_SIZE(self);
    3366        1488 :     for (; p < e; p++) {
    3367        1488 :         if (!isalpha(*p))
    3368        1488 :             return PyBool_FromLong(0);
    3369             :     }
    3370           0 :     return PyBool_FromLong(1);
    3371             : }
    3372             : 
    3373             : 
    3374             : PyDoc_STRVAR(isalnum__doc__,
    3375             : "S.isalnum() -> bool\n\
    3376             : \n\
    3377             : Return True if all characters in S are alphanumeric\n\
    3378             : and there is at least one character in S, False otherwise.");
    3379             : 
    3380             : static PyObject*
    3381         960 : string_isalnum(PyStringObject *self)
    3382             : {
    3383         960 :     register const unsigned char *p
    3384             :         = (unsigned char *) PyString_AS_STRING(self);
    3385             :     register const unsigned char *e;
    3386             : 
    3387             :     /* Shortcut for single character strings */
    3388        1920 :     if (PyString_GET_SIZE(self) == 1 &&
    3389         960 :         isalnum(*p))
    3390         951 :         return PyBool_FromLong(1);
    3391             : 
    3392             :     /* Special case for empty strings */
    3393           9 :     if (PyString_GET_SIZE(self) == 0)
    3394           0 :         return PyBool_FromLong(0);
    3395             : 
    3396           9 :     e = p + PyString_GET_SIZE(self);
    3397           9 :     for (; p < e; p++) {
    3398           9 :         if (!isalnum(*p))
    3399           9 :             return PyBool_FromLong(0);
    3400             :     }
    3401           0 :     return PyBool_FromLong(1);
    3402             : }
    3403             : 
    3404             : 
    3405             : PyDoc_STRVAR(isdigit__doc__,
    3406             : "S.isdigit() -> bool\n\
    3407             : \n\
    3408             : Return True if all characters in S are digits\n\
    3409             : and there is at least one character in S, False otherwise.");
    3410             : 
    3411             : static PyObject*
    3412         141 : string_isdigit(PyStringObject *self)
    3413             : {
    3414         141 :     register const unsigned char *p
    3415             :         = (unsigned char *) PyString_AS_STRING(self);
    3416             :     register const unsigned char *e;
    3417             : 
    3418             :     /* Shortcut for single character strings */
    3419         282 :     if (PyString_GET_SIZE(self) == 1 &&
    3420         141 :         isdigit(*p))
    3421           0 :         return PyBool_FromLong(1);
    3422             : 
    3423             :     /* Special case for empty strings */
    3424         141 :     if (PyString_GET_SIZE(self) == 0)
    3425           0 :         return PyBool_FromLong(0);
    3426             : 
    3427         141 :     e = p + PyString_GET_SIZE(self);
    3428         141 :     for (; p < e; p++) {
    3429         141 :         if (!isdigit(*p))
    3430         141 :             return PyBool_FromLong(0);
    3431             :     }
    3432           0 :     return PyBool_FromLong(1);
    3433             : }
    3434             : 
    3435             : 
    3436             : PyDoc_STRVAR(islower__doc__,
    3437             : "S.islower() -> bool\n\
    3438             : \n\
    3439             : Return True if all cased characters in S are lowercase and there is\n\
    3440             : at least one cased character in S, False otherwise.");
    3441             : 
    3442             : static PyObject*
    3443           0 : string_islower(PyStringObject *self)
    3444             : {
    3445           0 :     register const unsigned char *p
    3446             :         = (unsigned char *) PyString_AS_STRING(self);
    3447             :     register const unsigned char *e;
    3448             :     int cased;
    3449             : 
    3450             :     /* Shortcut for single character strings */
    3451           0 :     if (PyString_GET_SIZE(self) == 1)
    3452           0 :         return PyBool_FromLong(islower(*p) != 0);
    3453             : 
    3454             :     /* Special case for empty strings */
    3455           0 :     if (PyString_GET_SIZE(self) == 0)
    3456           0 :         return PyBool_FromLong(0);
    3457             : 
    3458           0 :     e = p + PyString_GET_SIZE(self);
    3459           0 :     cased = 0;
    3460           0 :     for (; p < e; p++) {
    3461           0 :         if (isupper(*p))
    3462           0 :             return PyBool_FromLong(0);
    3463           0 :         else if (!cased && islower(*p))
    3464           0 :             cased = 1;
    3465             :     }
    3466           0 :     return PyBool_FromLong(cased);
    3467             : }
    3468             : 
    3469             : 
    3470             : PyDoc_STRVAR(isupper__doc__,
    3471             : "S.isupper() -> bool\n\
    3472             : \n\
    3473             : Return True if all cased characters in S are uppercase and there is\n\
    3474             : at least one cased character in S, False otherwise.");
    3475             : 
    3476             : static PyObject*
    3477        1272 : string_isupper(PyStringObject *self)
    3478             : {
    3479        1272 :     register const unsigned char *p
    3480             :         = (unsigned char *) PyString_AS_STRING(self);
    3481             :     register const unsigned char *e;
    3482             :     int cased;
    3483             : 
    3484             :     /* Shortcut for single character strings */
    3485        1272 :     if (PyString_GET_SIZE(self) == 1)
    3486        1272 :         return PyBool_FromLong(isupper(*p) != 0);
    3487             : 
    3488             :     /* Special case for empty strings */
    3489           0 :     if (PyString_GET_SIZE(self) == 0)
    3490           0 :         return PyBool_FromLong(0);
    3491             : 
    3492           0 :     e = p + PyString_GET_SIZE(self);
    3493           0 :     cased = 0;
    3494           0 :     for (; p < e; p++) {
    3495           0 :         if (islower(*p))
    3496           0 :             return PyBool_FromLong(0);
    3497           0 :         else if (!cased && isupper(*p))
    3498           0 :             cased = 1;
    3499             :     }
    3500           0 :     return PyBool_FromLong(cased);
    3501             : }
    3502             : 
    3503             : 
    3504             : PyDoc_STRVAR(istitle__doc__,
    3505             : "S.istitle() -> bool\n\
    3506             : \n\
    3507             : Return True if S is a titlecased string and there is at least one\n\
    3508             : character in S, i.e. uppercase characters may only follow uncased\n\
    3509             : characters and lowercase characters only cased ones. Return False\n\
    3510             : otherwise.");
    3511             : 
    3512             : static PyObject*
    3513           0 : string_istitle(PyStringObject *self, PyObject *uncased)
    3514             : {
    3515           0 :     register const unsigned char *p
    3516             :         = (unsigned char *) PyString_AS_STRING(self);
    3517             :     register const unsigned char *e;
    3518             :     int cased, previous_is_cased;
    3519             : 
    3520             :     /* Shortcut for single character strings */
    3521           0 :     if (PyString_GET_SIZE(self) == 1)
    3522           0 :         return PyBool_FromLong(isupper(*p) != 0);
    3523             : 
    3524             :     /* Special case for empty strings */
    3525           0 :     if (PyString_GET_SIZE(self) == 0)
    3526           0 :         return PyBool_FromLong(0);
    3527             : 
    3528           0 :     e = p + PyString_GET_SIZE(self);
    3529           0 :     cased = 0;
    3530           0 :     previous_is_cased = 0;
    3531           0 :     for (; p < e; p++) {
    3532           0 :         register const unsigned char ch = *p;
    3533             : 
    3534           0 :         if (isupper(ch)) {
    3535           0 :             if (previous_is_cased)
    3536           0 :                 return PyBool_FromLong(0);
    3537           0 :             previous_is_cased = 1;
    3538           0 :             cased = 1;
    3539             :         }
    3540           0 :         else if (islower(ch)) {
    3541           0 :             if (!previous_is_cased)
    3542           0 :                 return PyBool_FromLong(0);
    3543           0 :             previous_is_cased = 1;
    3544           0 :             cased = 1;
    3545             :         }
    3546             :         else
    3547           0 :             previous_is_cased = 0;
    3548             :     }
    3549           0 :     return PyBool_FromLong(cased);
    3550             : }
    3551             : 
    3552             : 
    3553             : PyDoc_STRVAR(splitlines__doc__,
    3554             : "S.splitlines(keepends=False) -> list of strings\n\
    3555             : \n\
    3556             : Return a list of the lines in S, breaking at line boundaries.\n\
    3557             : Line breaks are not included in the resulting list unless keepends\n\
    3558             : is given and true.");
    3559             : 
    3560             : static PyObject*
    3561           9 : string_splitlines(PyStringObject *self, PyObject *args)
    3562             : {
    3563           9 :     int keepends = 0;
    3564             : 
    3565           9 :     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
    3566           0 :         return NULL;
    3567             : 
    3568          18 :     return stringlib_splitlines(
    3569           9 :         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
    3570             :         keepends
    3571             :     );
    3572             : }
    3573             : 
    3574             : PyDoc_STRVAR(sizeof__doc__,
    3575             : "S.__sizeof__() -> size of S in memory, in bytes");
    3576             : 
    3577             : static PyObject *
    3578           0 : string_sizeof(PyStringObject *v)
    3579             : {
    3580             :     Py_ssize_t res;
    3581           0 :     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
    3582           0 :     return PyInt_FromSsize_t(res);
    3583             : }
    3584             : 
    3585             : static PyObject *
    3586           0 : string_getnewargs(PyStringObject *v)
    3587             : {
    3588           0 :     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
    3589             : }
    3590             : 
    3591             : 
    3592             : #include "stringlib/string_format.h"
    3593             : 
    3594             : PyDoc_STRVAR(format__doc__,
    3595             : "S.format(*args, **kwargs) -> string\n\
    3596             : \n\
    3597             : Return a formatted version of S, using substitutions from args and kwargs.\n\
    3598             : The substitutions are identified by braces ('{' and '}').");
    3599             : 
    3600             : static PyObject *
    3601           0 : string__format__(PyObject* self, PyObject* args)
    3602             : {
    3603             :     PyObject *format_spec;
    3604           0 :     PyObject *result = NULL;
    3605           0 :     PyObject *tmp = NULL;
    3606             : 
    3607             :     /* If 2.x, convert format_spec to the same type as value */
    3608             :     /* This is to allow things like u''.format('') */
    3609           0 :     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
    3610           0 :         goto done;
    3611           0 :     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
    3612           0 :         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
    3613           0 :                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
    3614           0 :         goto done;
    3615             :     }
    3616           0 :     tmp = PyObject_Str(format_spec);
    3617           0 :     if (tmp == NULL)
    3618           0 :         goto done;
    3619           0 :     format_spec = tmp;
    3620             : 
    3621           0 :     result = _PyBytes_FormatAdvanced(self,
    3622           0 :                                      PyString_AS_STRING(format_spec),
    3623           0 :                                      PyString_GET_SIZE(format_spec));
    3624             : done:
    3625           0 :     Py_XDECREF(tmp);
    3626           0 :     return result;
    3627             : }
    3628             : 
    3629             : PyDoc_STRVAR(p_format__doc__,
    3630             : "S.__format__(format_spec) -> string\n\
    3631             : \n\
    3632             : Return a formatted version of S as described by format_spec.");
    3633             : 
    3634             : 
    3635             : static PyMethodDef
    3636             : string_methods[] = {
    3637             :     /* Counterparts of the obsolete stropmodule functions; except
    3638             :        string.maketrans(). */
    3639             :     {"join", (PyCFunction)string_join, METH_O, join__doc__},
    3640             :     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
    3641             :     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
    3642             :     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
    3643             :     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
    3644             :     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
    3645             :     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
    3646             :     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
    3647             :     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
    3648             :     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
    3649             :     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
    3650             :     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
    3651             :     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
    3652             :      capitalize__doc__},
    3653             :     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
    3654             :     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
    3655             :      endswith__doc__},
    3656             :     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
    3657             :     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
    3658             :     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
    3659             :     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
    3660             :     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
    3661             :     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
    3662             :     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
    3663             :     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
    3664             :     {"rpartition", (PyCFunction)string_rpartition, METH_O,
    3665             :      rpartition__doc__},
    3666             :     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
    3667             :      startswith__doc__},
    3668             :     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
    3669             :     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
    3670             :      swapcase__doc__},
    3671             :     {"translate", (PyCFunction)string_translate, METH_VARARGS,
    3672             :      translate__doc__},
    3673             :     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
    3674             :     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
    3675             :     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
    3676             :     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
    3677             :     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
    3678             :     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
    3679             :     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
    3680             :     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
    3681             :     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
    3682             :     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
    3683             :     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
    3684             :     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
    3685             :      expandtabs__doc__},
    3686             :     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
    3687             :      splitlines__doc__},
    3688             :     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
    3689             :      sizeof__doc__},
    3690             :     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
    3691             :     {NULL,     NULL}                         /* sentinel */
    3692             : };
    3693             : 
    3694             : static PyObject *
    3695             : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
    3696             : 
    3697             : static PyObject *
    3698        1119 : string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    3699             : {
    3700        1119 :     PyObject *x = NULL;
    3701             :     static char *kwlist[] = {"object", 0};
    3702             : 
    3703        1119 :     if (type != &PyString_Type)
    3704           0 :         return str_subtype_new(type, args, kwds);
    3705        1119 :     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
    3706           0 :         return NULL;
    3707        1119 :     if (x == NULL)
    3708           0 :         return PyString_FromString("");
    3709        1119 :     return PyObject_Str(x);
    3710             : }
    3711             : 
    3712             : static PyObject *
    3713           0 : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    3714             : {
    3715             :     PyObject *tmp, *pnew;
    3716             :     Py_ssize_t n;
    3717             : 
    3718             :     assert(PyType_IsSubtype(type, &PyString_Type));
    3719           0 :     tmp = string_new(&PyString_Type, args, kwds);
    3720           0 :     if (tmp == NULL)
    3721           0 :         return NULL;
    3722             :     assert(PyString_Check(tmp));
    3723           0 :     n = PyString_GET_SIZE(tmp);
    3724           0 :     pnew = type->tp_alloc(type, n);
    3725           0 :     if (pnew != NULL) {
    3726           0 :         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
    3727           0 :         ((PyStringObject *)pnew)->ob_shash =
    3728           0 :             ((PyStringObject *)tmp)->ob_shash;
    3729           0 :         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
    3730             :     }
    3731           0 :     Py_DECREF(tmp);
    3732           0 :     return pnew;
    3733             : }
    3734             : 
    3735             : static PyObject *
    3736           0 : basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    3737             : {
    3738           0 :     PyErr_SetString(PyExc_TypeError,
    3739             :                     "The basestring type cannot be instantiated");
    3740           0 :     return NULL;
    3741             : }
    3742             : 
    3743             : static PyObject *
    3744           0 : string_mod(PyObject *v, PyObject *w)
    3745             : {
    3746           0 :     if (!PyString_Check(v)) {
    3747           0 :         Py_INCREF(Py_NotImplemented);
    3748           0 :         return Py_NotImplemented;
    3749             :     }
    3750           0 :     return PyString_Format(v, w);
    3751             : }
    3752             : 
    3753             : PyDoc_STRVAR(basestring_doc,
    3754             : "Type basestring cannot be instantiated; it is the base for str and unicode.");
    3755             : 
    3756             : static PyNumberMethods string_as_number = {
    3757             :     0,                          /*nb_add*/
    3758             :     0,                          /*nb_subtract*/
    3759             :     0,                          /*nb_multiply*/
    3760             :     0,                          /*nb_divide*/
    3761             :     string_mod,                 /*nb_remainder*/
    3762             : };
    3763             : 
    3764             : 
    3765             : PyTypeObject PyBaseString_Type = {
    3766             :     PyVarObject_HEAD_INIT(&PyType_Type, 0)
    3767             :     "basestring",
    3768             :     0,
    3769             :     0,
    3770             :     0,                                          /* tp_dealloc */
    3771             :     0,                                          /* tp_print */
    3772             :     0,                                          /* tp_getattr */
    3773             :     0,                                          /* tp_setattr */
    3774             :     0,                                          /* tp_compare */
    3775             :     0,                                          /* tp_repr */
    3776             :     0,                                          /* tp_as_number */
    3777             :     0,                                          /* tp_as_sequence */
    3778             :     0,                                          /* tp_as_mapping */
    3779             :     0,                                          /* tp_hash */
    3780             :     0,                                          /* tp_call */
    3781             :     0,                                          /* tp_str */
    3782             :     0,                                          /* tp_getattro */
    3783             :     0,                                          /* tp_setattro */
    3784             :     0,                                          /* tp_as_buffer */
    3785             :     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
    3786             :     basestring_doc,                             /* tp_doc */
    3787             :     0,                                          /* tp_traverse */
    3788             :     0,                                          /* tp_clear */
    3789             :     0,                                          /* tp_richcompare */
    3790             :     0,                                          /* tp_weaklistoffset */
    3791             :     0,                                          /* tp_iter */
    3792             :     0,                                          /* tp_iternext */
    3793             :     0,                                          /* tp_methods */
    3794             :     0,                                          /* tp_members */
    3795             :     0,                                          /* tp_getset */
    3796             :     &PyBaseObject_Type,                         /* tp_base */
    3797             :     0,                                          /* tp_dict */
    3798             :     0,                                          /* tp_descr_get */
    3799             :     0,                                          /* tp_descr_set */
    3800             :     0,                                          /* tp_dictoffset */
    3801             :     0,                                          /* tp_init */
    3802             :     0,                                          /* tp_alloc */
    3803             :     basestring_new,                             /* tp_new */
    3804             :     0,                                          /* tp_free */
    3805             : };
    3806             : 
    3807             : PyDoc_STRVAR(string_doc,
    3808             : "str(object='') -> string\n\
    3809             : \n\
    3810             : Return a nice string representation of the object.\n\
    3811             : If the argument is a string, the return value is the same object.");
    3812             : 
    3813             : PyTypeObject PyString_Type = {
    3814             :     PyVarObject_HEAD_INIT(&PyType_Type, 0)
    3815             :     "str",
    3816             :     PyStringObject_SIZE,
    3817             :     sizeof(char),
    3818             :     string_dealloc,                             /* tp_dealloc */
    3819             :     (printfunc)string_print,                    /* tp_print */
    3820             :     0,                                          /* tp_getattr */
    3821             :     0,                                          /* tp_setattr */
    3822             :     0,                                          /* tp_compare */
    3823             :     string_repr,                                /* tp_repr */
    3824             :     &string_as_number,                          /* tp_as_number */
    3825             :     &string_as_sequence,                        /* tp_as_sequence */
    3826             :     &string_as_mapping,                         /* tp_as_mapping */
    3827             :     (hashfunc)string_hash,                      /* tp_hash */
    3828             :     0,                                          /* tp_call */
    3829             :     string_str,                                 /* tp_str */
    3830             :     PyObject_GenericGetAttr,                    /* tp_getattro */
    3831             :     0,                                          /* tp_setattro */
    3832             :     &string_as_buffer,                          /* tp_as_buffer */
    3833             :     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
    3834             :         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
    3835             :         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
    3836             :     string_doc,                                 /* tp_doc */
    3837             :     0,                                          /* tp_traverse */
    3838             :     0,                                          /* tp_clear */
    3839             :     (richcmpfunc)string_richcompare,            /* tp_richcompare */
    3840             :     0,                                          /* tp_weaklistoffset */
    3841             :     0,                                          /* tp_iter */
    3842             :     0,                                          /* tp_iternext */
    3843             :     string_methods,                             /* tp_methods */
    3844             :     0,                                          /* tp_members */
    3845             :     0,                                          /* tp_getset */
    3846             :     &PyBaseString_Type,                         /* tp_base */
    3847             :     0,                                          /* tp_dict */
    3848             :     0,                                          /* tp_descr_get */
    3849             :     0,                                          /* tp_descr_set */
    3850             :     0,                                          /* tp_dictoffset */
    3851             :     0,                                          /* tp_init */
    3852             :     0,                                          /* tp_alloc */
    3853             :     string_new,                                 /* tp_new */
    3854             :     PyObject_Del,                               /* tp_free */
    3855             : };
    3856             : 
    3857             : void
    3858        6739 : PyString_Concat(register PyObject **pv, register PyObject *w)
    3859             : {
    3860             :     register PyObject *v;
    3861        6739 :     if (*pv == NULL)
    3862           0 :         return;
    3863        6739 :     if (w == NULL || !PyString_Check(*pv)) {
    3864           0 :         Py_CLEAR(*pv);
    3865           0 :         return;
    3866             :     }
    3867        6739 :     v = string_concat((PyStringObject *) *pv, w);
    3868        6739 :     Py_SETREF(*pv, v);
    3869             : }
    3870             : 
    3871             : void
    3872         168 : PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
    3873             : {
    3874         168 :     PyString_Concat(pv, w);
    3875         168 :     Py_XDECREF(w);
    3876         168 : }
    3877             : 
    3878             : 
    3879             : /* The following function breaks the notion that strings are immutable:
    3880             :    it changes the size of a string.  We get away with this only if there
    3881             :    is only one module referencing the object.  You can also think of it
    3882             :    as creating a new string object and destroying the old one, only
    3883             :    more efficiently.  In any case, don't use this if the string may
    3884             :    already be known to some other part of the code...
    3885             :    Note that if there's not enough memory to resize the string, the original
    3886             :    string object at *pv is deallocated, *pv is set to NULL, an "out of
    3887             :    memory" exception is set, and -1 is returned.  Else (on success) 0 is
    3888             :    returned, and the value in *pv may or may not be the same as on input.
    3889             :    As always, an extra byte is allocated for a trailing \0 byte (newsize
    3890             :    does *not* include that), and a trailing \0 byte is stored.
    3891             : */
    3892             : 
    3893             : int
    3894       11473 : _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
    3895             : {
    3896             :     register PyObject *v;
    3897             :     register PyStringObject *sv;
    3898       11473 :     v = *pv;
    3899       22946 :     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
    3900       11473 :         PyString_CHECK_INTERNED(v)) {
    3901           0 :         *pv = 0;
    3902           0 :         Py_DECREF(v);
    3903           0 :         PyErr_BadInternalCall();
    3904           0 :         return -1;
    3905             :     }
    3906             :     /* XXX UNREF/NEWREF interface should be more symmetrical */
    3907             :     _Py_DEC_REFTOTAL;
    3908             :     _Py_ForgetReference(v);
    3909       11473 :     *pv = (PyObject *)
    3910       11473 :         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
    3911       11473 :     if (*pv == NULL) {
    3912           0 :         PyObject_Del(v);
    3913           0 :         PyErr_NoMemory();
    3914           0 :         return -1;
    3915             :     }
    3916       11473 :     _Py_NewReference(*pv);
    3917       11473 :     sv = (PyStringObject *) *pv;
    3918       11473 :     Py_SIZE(sv) = newsize;
    3919       11473 :     sv->ob_sval[newsize] = '\0';
    3920       11473 :     sv->ob_shash = -1;          /* invalidate cached hash value */
    3921       11473 :     return 0;
    3922             : }
    3923             : 
    3924             : /* Helpers for formatstring */
    3925             : 
    3926             : Py_LOCAL_INLINE(PyObject *)
    3927        2220 : getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
    3928             : {
    3929        2220 :     Py_ssize_t argidx = *p_argidx;
    3930        2220 :     if (argidx < arglen) {
    3931        2220 :         (*p_argidx)++;
    3932        2220 :         if (arglen < 0)
    3933         114 :             return args;
    3934             :         else
    3935        2106 :             return PyTuple_GetItem(args, argidx);
    3936             :     }
    3937           0 :     PyErr_SetString(PyExc_TypeError,
    3938             :                     "not enough arguments for format string");
    3939           0 :     return NULL;
    3940             : }
    3941             : 
    3942             : /* Format codes
    3943             :  * F_LJUST      '-'
    3944             :  * F_SIGN       '+'
    3945             :  * F_BLANK      ' '
    3946             :  * F_ALT        '#'
    3947             :  * F_ZERO       '0'
    3948             :  */
    3949             : #define F_LJUST (1<<0)
    3950             : #define F_SIGN  (1<<1)
    3951             : #define F_BLANK (1<<2)
    3952             : #define F_ALT   (1<<3)
    3953             : #define F_ZERO  (1<<4)
    3954             : 
    3955             : /* Returns a new reference to a PyString object, or NULL on failure. */
    3956             : 
    3957             : static PyObject *
    3958           0 : formatfloat(PyObject *v, int flags, int prec, int type)
    3959             : {
    3960             :     char *p;
    3961             :     PyObject *result;
    3962             :     double x;
    3963             : 
    3964           0 :     x = PyFloat_AsDouble(v);
    3965           0 :     if (x == -1.0 && PyErr_Occurred()) {
    3966           0 :         PyErr_Format(PyExc_TypeError, "float argument required, "
    3967           0 :                      "not %.200s", Py_TYPE(v)->tp_name);
    3968           0 :         return NULL;
    3969             :     }
    3970             : 
    3971           0 :     if (prec < 0)
    3972           0 :         prec = 6;
    3973             : 
    3974           0 :     p = PyOS_double_to_string(x, type, prec,
    3975           0 :                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
    3976             : 
    3977           0 :     if (p == NULL)
    3978           0 :         return NULL;
    3979           0 :     result = PyString_FromStringAndSize(p, strlen(p));
    3980           0 :     PyMem_Free(p);
    3981           0 :     return result;
    3982             : }
    3983             : 
    3984             : /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
    3985             :  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
    3986             :  * Python's regular ints.
    3987             :  * Return value:  a new PyString*, or NULL if error.
    3988             :  *  .  *pbuf is set to point into it,
    3989             :  *     *plen set to the # of chars following that.
    3990             :  *     Caller must decref it when done using pbuf.
    3991             :  *     The string starting at *pbuf is of the form
    3992             :  *         "-"? ("0x" | "0X")? digit+
    3993             :  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
    3994             :  *         set in flags.  The case of hex digits will be correct,
    3995             :  *     There will be at least prec digits, zero-filled on the left if
    3996             :  *         necessary to get that many.
    3997             :  * val          object to be converted
    3998             :  * flags        bitmask of format flags; only F_ALT is looked at
    3999             :  * prec         minimum number of digits; 0-fill on left if needed
    4000             :  * type         a character in [duoxX]; u acts the same as d
    4001             :  *
    4002             :  * CAUTION:  o, x and X conversions on regular ints can never
    4003             :  * produce a '-' sign, but can for Python's unbounded ints.
    4004             :  */
    4005             : PyObject*
    4006           0 : _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
    4007             :                      char **pbuf, int *plen)
    4008             : {
    4009           0 :     PyObject *result = NULL, *r1;
    4010             :     const char *s;
    4011             :     char *buf;
    4012             :     Py_ssize_t i;
    4013             :     int sign;           /* 1 if '-', else 0 */
    4014             :     int len;            /* number of characters */
    4015             :     Py_ssize_t llen;
    4016             :     int numdigits;      /* len == numnondigits + skipped + numdigits */
    4017             :     int numnondigits, skipped, filled;
    4018             :     const char *method;
    4019             : 
    4020           0 :     switch (type) {
    4021             :     case 'd':
    4022             :     case 'u':
    4023           0 :         method = "str";
    4024           0 :         result = Py_TYPE(val)->tp_str(val);
    4025           0 :         break;
    4026             :     case 'o':
    4027           0 :         method = "oct";
    4028           0 :         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
    4029           0 :         break;
    4030             :     case 'x':
    4031             :     case 'X':
    4032           0 :         method = "hex";
    4033           0 :         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
    4034           0 :         break;
    4035             :     default:
    4036             :         assert(!"'type' not in [duoxX]");
    4037             :     }
    4038           0 :     if (!result)
    4039           0 :         return NULL;
    4040             : 
    4041           0 :     if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
    4042           0 :         Py_DECREF(result);
    4043           0 :         return NULL;
    4044             :     }
    4045           0 :     if (llen > INT_MAX) {
    4046           0 :         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
    4047           0 :         Py_DECREF(result);
    4048           0 :         return NULL;
    4049             :     }
    4050           0 :     len = (int)llen;
    4051           0 :     if (len > 0 && s[len-1] == 'L') {
    4052           0 :         --len;
    4053           0 :         if (len == 0)
    4054           0 :             goto error;
    4055             :     }
    4056           0 :     sign = s[0] == '-';
    4057           0 :     numnondigits = sign;
    4058             : 
    4059             :     /* Need to skip 0x, 0X or 0. */
    4060           0 :     skipped = 0;
    4061           0 :     switch (type) {
    4062             :     case 'o':
    4063           0 :         if (s[sign] != '0')
    4064           0 :             goto error;
    4065             :         /* If 0 is only digit, leave it alone. */
    4066           0 :         if ((flags & F_ALT) == 0 && len - sign > 1)
    4067           0 :             skipped = 1;
    4068           0 :         break;
    4069             :     case 'x':
    4070             :     case 'X':
    4071           0 :         if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
    4072             :             goto error;
    4073           0 :         if ((flags & F_ALT) == 0)
    4074           0 :             skipped = 2;
    4075             :         else
    4076           0 :             numnondigits += 2;
    4077           0 :         break;
    4078             :     }
    4079           0 :     numdigits = len - numnondigits - skipped;
    4080           0 :     if (numdigits <= 0)
    4081           0 :         goto error;
    4082             : 
    4083           0 :     filled = prec - numdigits;
    4084           0 :     if (filled < 0)
    4085           0 :         filled = 0;
    4086           0 :     len = numnondigits + filled + numdigits;
    4087             : 
    4088             :     /* To modify the string in-place, there can only be one reference. */
    4089           0 :     if (skipped >= filled &&
    4090           0 :         PyString_CheckExact(result) &&
    4091           0 :         Py_REFCNT(result) == 1 &&
    4092           0 :         !PyString_CHECK_INTERNED(result))
    4093             :     {
    4094           0 :         r1 = NULL;
    4095           0 :         buf = (char *)s + skipped - filled;
    4096             :     }
    4097             :     else {
    4098           0 :         r1 = result;
    4099           0 :         result = PyString_FromStringAndSize(NULL, len);
    4100           0 :         if (!result) {
    4101           0 :             Py_DECREF(r1);
    4102           0 :             return NULL;
    4103             :         }
    4104           0 :         buf = PyString_AS_STRING(result);
    4105             :     }
    4106             : 
    4107           0 :     for (i = numnondigits; --i >= 0;)
    4108           0 :         buf[i] = s[i];
    4109           0 :     buf += numnondigits;
    4110           0 :     s += numnondigits + skipped;
    4111           0 :     for (i = 0; i < filled; i++)
    4112           0 :         *buf++ = '0';
    4113           0 :     if (r1 == NULL) {
    4114             :         assert(buf == s);
    4115           0 :         buf += numdigits;
    4116             :     }
    4117             :     else {
    4118           0 :         for (i = 0; i < numdigits; i++)
    4119           0 :             *buf++ = *s++;
    4120             :     }
    4121           0 :     *buf = '\0';
    4122           0 :     buf -= len;
    4123           0 :     Py_XDECREF(r1);
    4124             : 
    4125             :     /* Fix up case for hex conversions. */
    4126           0 :     if (type == 'X') {
    4127             :         /* Need to convert all lower case letters to upper case.
    4128             :            and need to convert 0x to 0X (and -0x to -0X). */
    4129           0 :         for (i = 0; i < len; i++) {
    4130           0 :             if (buf[i] >= 'a' && buf[i] <= 'z')
    4131           0 :                 buf[i] -= 'a'-'A';
    4132             :         }
    4133             :     }
    4134           0 :     *pbuf = buf;
    4135           0 :     *plen = len;
    4136           0 :     return result;
    4137             : 
    4138             : error:
    4139           0 :     PyErr_Format(PyExc_ValueError,
    4140             :                  "%%%c format: invalid result of __%s__ (type=%.200s)",
    4141           0 :                  type, method, Py_TYPE(val)->tp_name);
    4142           0 :     Py_DECREF(result);
    4143           0 :     return NULL;
    4144             : }
    4145             : 
    4146             : Py_LOCAL_INLINE(int)
    4147           0 : formatint(char *buf, size_t buflen, int flags,
    4148             :           int prec, int type, PyObject *v)
    4149             : {
    4150             :     /* fmt = '%#.' + `prec` + 'l' + `type`
    4151             :        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
    4152             :        + 1 + 1 = 24 */
    4153             :     char fmt[64];       /* plenty big enough! */
    4154             :     char *sign;
    4155             :     long x;
    4156             : 
    4157           0 :     x = PyInt_AsLong(v);
    4158           0 :     if (x == -1 && PyErr_Occurred()) {
    4159           0 :         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
    4160           0 :                      Py_TYPE(v)->tp_name);
    4161           0 :         return -1;
    4162             :     }
    4163           0 :     if (x < 0 && type == 'u') {
    4164           0 :         type = 'd';
    4165             :     }
    4166           0 :     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
    4167           0 :         sign = "-";
    4168             :     else
    4169           0 :         sign = "";
    4170           0 :     if (prec < 0)
    4171           0 :         prec = 1;
    4172             : 
    4173           0 :     if ((flags & F_ALT) &&
    4174           0 :         (type == 'x' || type == 'X')) {
    4175             :         /* When converting under %#x or %#X, there are a number
    4176             :          * of issues that cause pain:
    4177             :          * - when 0 is being converted, the C standard leaves off
    4178             :          *   the '0x' or '0X', which is inconsistent with other
    4179             :          *   %#x/%#X conversions and inconsistent with Python's
    4180             :          *   hex() function
    4181             :          * - there are platforms that violate the standard and
    4182             :          *   convert 0 with the '0x' or '0X'
    4183             :          *   (Metrowerks, Compaq Tru64)
    4184             :          * - there are platforms that give '0x' when converting
    4185             :          *   under %#X, but convert 0 in accordance with the
    4186             :          *   standard (OS/2 EMX)
    4187             :          *
    4188             :          * We can achieve the desired consistency by inserting our
    4189             :          * own '0x' or '0X' prefix, and substituting %x/%X in place
    4190             :          * of %#x/%#X.
    4191             :          *
    4192             :          * Note that this is the same approach as used in
    4193             :          * formatint() in unicodeobject.c
    4194             :          */
    4195           0 :         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
    4196             :                       sign, type, prec, type);
    4197             :     }
    4198             :     else {
    4199           0 :         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
    4200           0 :                       sign, (flags&F_ALT) ? "#" : "",
    4201             :                       prec, type);
    4202             :     }
    4203             : 
    4204             :     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
    4205             :      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
    4206             :      */
    4207           0 :     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
    4208           0 :         PyErr_SetString(PyExc_OverflowError,
    4209             :             "formatted integer is too long (precision too large?)");
    4210           0 :         return -1;
    4211             :     }
    4212           0 :     if (sign[0])
    4213           0 :         PyOS_snprintf(buf, buflen, fmt, -x);
    4214             :     else
    4215           0 :         PyOS_snprintf(buf, buflen, fmt, x);
    4216           0 :     return (int)strlen(buf);
    4217             : }
    4218             : 
    4219             : Py_LOCAL_INLINE(int)
    4220           0 : formatchar(char *buf, size_t buflen, PyObject *v)
    4221             : {
    4222             :     /* presume that the buffer is at least 2 characters long */
    4223           0 :     if (PyString_Check(v)) {
    4224           0 :         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
    4225           0 :             return -1;
    4226             :     }
    4227             :     else {
    4228           0 :         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
    4229           0 :             return -1;
    4230             :     }
    4231           0 :     buf[1] = '\0';
    4232           0 :     return 1;
    4233             : }
    4234             : 
    4235             : /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
    4236             : 
    4237             :    FORMATBUFLEN is the length of the buffer in which the ints &
    4238             :    chars are formatted. XXX This is a magic number. Each formatting
    4239             :    routine does bounds checking to ensure no overflow, but a better
    4240             :    solution may be to malloc a buffer of appropriate size for each
    4241             :    format. For now, the current solution is sufficient.
    4242             : */
    4243             : #define FORMATBUFLEN (size_t)120
    4244             : 
    4245             : PyObject *
    4246        1542 : PyString_Format(PyObject *format, PyObject *args)
    4247             : {
    4248             :     char *fmt, *res;
    4249             :     Py_ssize_t arglen, argidx;
    4250             :     Py_ssize_t reslen, rescnt, fmtcnt;
    4251        1542 :     int args_owned = 0;
    4252             :     PyObject *result, *orig_args;
    4253             : #ifdef Py_USING_UNICODE
    4254             :     PyObject *v, *w;
    4255             : #endif
    4256        1542 :     PyObject *dict = NULL;
    4257        1542 :     if (format == NULL || !PyString_Check(format) || args == NULL) {
    4258           0 :         PyErr_BadInternalCall();
    4259           0 :         return NULL;
    4260             :     }
    4261        1542 :     orig_args = args;
    4262        1542 :     fmt = PyString_AS_STRING(format);
    4263        1542 :     fmtcnt = PyString_GET_SIZE(format);
    4264        1542 :     reslen = rescnt = fmtcnt + 100;
    4265        1542 :     result = PyString_FromStringAndSize((char *)NULL, reslen);
    4266        1542 :     if (result == NULL)
    4267           0 :         return NULL;
    4268        1542 :     res = PyString_AsString(result);
    4269        1542 :     if (PyTuple_Check(args)) {
    4270        1437 :         arglen = PyTuple_GET_SIZE(args);
    4271        1437 :         argidx = 0;
    4272             :     }
    4273             :     else {
    4274         105 :         arglen = -1;
    4275         105 :         argidx = -2;
    4276             :     }
    4277        3084 :     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
    4278        1647 :         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
    4279           3 :         dict = args;
    4280        8898 :     while (--fmtcnt >= 0) {
    4281        5814 :         if (*fmt != '%') {
    4282        3594 :             if (--rescnt < 0) {
    4283           0 :                 rescnt = fmtcnt + 100;
    4284           0 :                 reslen += rescnt;
    4285           0 :                 if (_PyString_Resize(&result, reslen))
    4286           0 :                     return NULL;
    4287           0 :                 res = PyString_AS_STRING(result)
    4288           0 :                     + reslen - rescnt;
    4289           0 :                 --rescnt;
    4290             :             }
    4291        3594 :             *res++ = *fmt++;
    4292             :         }
    4293             :         else {
    4294             :             /* Got a format specifier */
    4295        2220 :             int flags = 0;
    4296        2220 :             Py_ssize_t width = -1;
    4297        2220 :             int prec = -1;
    4298        2220 :             int c = '\0';
    4299             :             int fill;
    4300             :             int isnumok;
    4301        2220 :             PyObject *v = NULL;
    4302        2220 :             PyObject *temp = NULL;
    4303             :             char *pbuf;
    4304             :             int sign;
    4305             :             Py_ssize_t len;
    4306             :             char formatbuf[FORMATBUFLEN];
    4307             :                  /* For format{int,char}() */
    4308             : #ifdef Py_USING_UNICODE
    4309        2220 :             char *fmt_start = fmt;
    4310        2220 :             Py_ssize_t argidx_start = argidx;
    4311             : #endif
    4312             : 
    4313        2220 :             fmt++;
    4314        2220 :             if (*fmt == '(') {
    4315             :                 char *keystart;
    4316             :                 Py_ssize_t keylen;
    4317             :                 PyObject *key;
    4318          12 :                 int pcount = 1;
    4319             : 
    4320          12 :                 if (dict == NULL) {
    4321           0 :                     PyErr_SetString(PyExc_TypeError,
    4322             :                              "format requires a mapping");
    4323           0 :                     goto error;
    4324             :                 }
    4325          12 :                 ++fmt;
    4326          12 :                 --fmtcnt;
    4327          12 :                 keystart = fmt;
    4328             :                 /* Skip over balanced parentheses */
    4329          78 :                 while (pcount > 0 && --fmtcnt >= 0) {
    4330          54 :                     if (*fmt == ')')
    4331          12 :                         --pcount;
    4332          42 :                     else if (*fmt == '(')
    4333           0 :                         ++pcount;
    4334          54 :                     fmt++;
    4335             :                 }
    4336          12 :                 keylen = fmt - keystart - 1;
    4337          12 :                 if (fmtcnt < 0 || pcount > 0) {
    4338           0 :                     PyErr_SetString(PyExc_ValueError,
    4339             :                                "incomplete format key");
    4340           0 :                     goto error;
    4341             :                 }
    4342          12 :                 key = PyString_FromStringAndSize(keystart,
    4343             :                                                  keylen);
    4344          12 :                 if (key == NULL)
    4345           0 :                     goto error;
    4346          12 :                 if (args_owned) {
    4347           9 :                     Py_DECREF(args);
    4348           9 :                     args_owned = 0;
    4349             :                 }
    4350          12 :                 args = PyObject_GetItem(dict, key);
    4351          12 :                 Py_DECREF(key);
    4352          12 :                 if (args == NULL) {
    4353           0 :                     goto error;
    4354             :                 }
    4355          12 :                 args_owned = 1;
    4356          12 :                 arglen = -1;
    4357          12 :                 argidx = -2;
    4358             :             }
    4359        4440 :             while (--fmtcnt >= 0) {
    4360        2220 :                 switch (c = *fmt++) {
    4361           0 :                 case '-': flags |= F_LJUST; continue;
    4362           0 :                 case '+': flags |= F_SIGN; continue;
    4363           0 :                 case ' ': flags |= F_BLANK; continue;
    4364           0 :                 case '#': flags |= F_ALT; continue;
    4365           0 :                 case '0': flags |= F_ZERO; continue;
    4366             :                 }
    4367        2220 :                 break;
    4368             :             }
    4369        2220 :             if (c == '*') {
    4370           0 :                 v = getnextarg(args, arglen, &argidx);
    4371           0 :                 if (v == NULL)
    4372           0 :                     goto error;
    4373           0 :                 if (!PyInt_Check(v)) {
    4374           0 :                     PyErr_SetString(PyExc_TypeError,
    4375             :                                     "* wants int");
    4376           0 :                     goto error;
    4377             :                 }
    4378           0 :                 width = PyInt_AsSsize_t(v);
    4379           0 :                 if (width == -1 && PyErr_Occurred())
    4380           0 :                     goto error;
    4381           0 :                 if (width < 0) {
    4382           0 :                     flags |= F_LJUST;
    4383           0 :                     width = -width;
    4384             :                 }
    4385           0 :                 if (--fmtcnt >= 0)
    4386           0 :                     c = *fmt++;
    4387             :             }
    4388        2220 :             else if (c >= 0 && isdigit(c)) {
    4389           0 :                 width = c - '0';
    4390           0 :                 while (--fmtcnt >= 0) {
    4391           0 :                     c = Py_CHARMASK(*fmt++);
    4392           0 :                     if (!isdigit(c))
    4393           0 :                         break;
    4394           0 :                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
    4395           0 :                         PyErr_SetString(
    4396             :                             PyExc_ValueError,
    4397             :                             "width too big");
    4398           0 :                         goto error;
    4399             :                     }
    4400           0 :                     width = width*10 + (c - '0');
    4401             :                 }
    4402             :             }
    4403        2220 :             if (c == '.') {
    4404           0 :                 prec = 0;
    4405           0 :                 if (--fmtcnt >= 0)
    4406           0 :                     c = *fmt++;
    4407           0 :                 if (c == '*') {
    4408           0 :                     v = getnextarg(args, arglen, &argidx);
    4409           0 :                     if (v == NULL)
    4410           0 :                         goto error;
    4411           0 :                     if (!PyInt_Check(v)) {
    4412           0 :                         PyErr_SetString(
    4413             :                             PyExc_TypeError,
    4414             :                             "* wants int");
    4415           0 :                         goto error;
    4416             :                     }
    4417           0 :                     prec = _PyInt_AsInt(v);
    4418           0 :                     if (prec == -1 && PyErr_Occurred())
    4419           0 :                         goto error;
    4420           0 :                     if (prec < 0)
    4421           0 :                         prec = 0;
    4422           0 :                     if (--fmtcnt >= 0)
    4423           0 :                         c = *fmt++;
    4424             :                 }
    4425           0 :                 else if (c >= 0 && isdigit(c)) {
    4426           0 :                     prec = c - '0';
    4427           0 :                     while (--fmtcnt >= 0) {
    4428           0 :                         c = Py_CHARMASK(*fmt++);
    4429           0 :                         if (!isdigit(c))
    4430           0 :                             break;
    4431           0 :                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
    4432           0 :                             PyErr_SetString(
    4433             :                                 PyExc_ValueError,
    4434             :                                 "prec too big");
    4435           0 :                             goto error;
    4436             :                         }
    4437           0 :                         prec = prec*10 + (c - '0');
    4438             :                     }
    4439             :                 }
    4440             :             } /* prec */
    4441        2220 :             if (fmtcnt >= 0) {
    4442        2220 :                 if (c == 'h' || c == 'l' || c == 'L') {
    4443           0 :                     if (--fmtcnt >= 0)
    4444           0 :                         c = *fmt++;
    4445             :                 }
    4446             :             }
    4447        2220 :             if (fmtcnt < 0) {
    4448           0 :                 PyErr_SetString(PyExc_ValueError,
    4449             :                                 "incomplete format");
    4450           0 :                 goto error;
    4451             :             }
    4452        2220 :             if (c != '%') {
    4453        2220 :                 v = getnextarg(args, arglen, &argidx);
    4454        2220 :                 if (v == NULL)
    4455           0 :                     goto error;
    4456             :             }
    4457        2220 :             sign = 0;
    4458        2220 :             fill = ' ';
    4459        2220 :             switch (c) {
    4460             :             case '%':
    4461           0 :                 pbuf = "%";
    4462           0 :                 len = 1;
    4463           0 :                 break;
    4464             :             case 's':
    4465             : #ifdef Py_USING_UNICODE
    4466        1452 :                 if (PyUnicode_Check(v)) {
    4467           0 :                     fmt = fmt_start;
    4468           0 :                     argidx = argidx_start;
    4469           0 :                     goto unicode;
    4470             :                 }
    4471             : #endif
    4472        1452 :                 temp = _PyObject_Str(v);
    4473             : #ifdef Py_USING_UNICODE
    4474        1452 :                 if (temp != NULL && PyUnicode_Check(temp)) {
    4475           0 :                     Py_DECREF(temp);
    4476           0 :                     fmt = fmt_start;
    4477           0 :                     argidx = argidx_start;
    4478           0 :                     goto unicode;
    4479             :                 }
    4480             : #endif
    4481             :                 /* Fall through */
    4482             :             case 'r':
    4483        2220 :                 if (c == 'r')
    4484         768 :                     temp = PyObject_Repr(v);
    4485        2220 :                 if (temp == NULL)
    4486           0 :                     goto error;
    4487        2220 :                 if (!PyString_Check(temp)) {
    4488           0 :                     PyErr_SetString(PyExc_TypeError,
    4489             :                       "%s argument has non-string str()");
    4490           0 :                     Py_DECREF(temp);
    4491           0 :                     goto error;
    4492             :                 }
    4493        2220 :                 pbuf = PyString_AS_STRING(temp);
    4494        2220 :                 len = PyString_GET_SIZE(temp);
    4495        2220 :                 if (prec >= 0 && len > prec)
    4496           0 :                     len = prec;
    4497        2220 :                 break;
    4498             :             case 'i':
    4499             :             case 'd':
    4500             :             case 'u':
    4501             :             case 'o':
    4502             :             case 'x':
    4503             :             case 'X':
    4504           0 :                 if (c == 'i')
    4505           0 :                     c = 'd';
    4506           0 :                 isnumok = 0;
    4507           0 :                 if (PyNumber_Check(v)) {
    4508           0 :                     PyObject *iobj=NULL;
    4509             : 
    4510           0 :                     if (PyInt_Check(v) || (PyLong_Check(v))) {
    4511           0 :                         iobj = v;
    4512           0 :                         Py_INCREF(iobj);
    4513             :                     }
    4514             :                     else {
    4515           0 :                         iobj = PyNumber_Int(v);
    4516           0 :                         if (iobj==NULL) {
    4517           0 :                             PyErr_Clear();
    4518           0 :                             iobj = PyNumber_Long(v);
    4519             :                         }
    4520             :                     }
    4521           0 :                     if (iobj!=NULL) {
    4522           0 :                         if (PyInt_Check(iobj)) {
    4523           0 :                             isnumok = 1;
    4524           0 :                             pbuf = formatbuf;
    4525           0 :                             len = formatint(pbuf,
    4526             :                                             sizeof(formatbuf),
    4527             :                                             flags, prec, c, iobj);
    4528           0 :                             Py_DECREF(iobj);
    4529           0 :                             if (len < 0)
    4530           0 :                                 goto error;
    4531           0 :                             sign = 1;
    4532             :                         }
    4533           0 :                         else if (PyLong_Check(iobj)) {
    4534             :                             int ilen;
    4535             : 
    4536           0 :                             isnumok = 1;
    4537           0 :                             temp = _PyString_FormatLong(iobj, flags,
    4538             :                                 prec, c, &pbuf, &ilen);
    4539           0 :                             Py_DECREF(iobj);
    4540           0 :                             len = ilen;
    4541           0 :                             if (!temp)
    4542           0 :                                 goto error;
    4543           0 :                             sign = 1;
    4544             :                         }
    4545             :                         else {
    4546           0 :                             Py_DECREF(iobj);
    4547             :                         }
    4548             :                     }
    4549             :                 }
    4550           0 :                 if (!isnumok) {
    4551           0 :                     PyErr_Format(PyExc_TypeError,
    4552             :                         "%%%c format: a number is required, "
    4553           0 :                         "not %.200s", c, Py_TYPE(v)->tp_name);
    4554           0 :                     goto error;
    4555             :                 }
    4556           0 :                 if (flags & F_ZERO)
    4557           0 :                     fill = '0';
    4558           0 :                 break;
    4559             :             case 'e':
    4560             :             case 'E':
    4561             :             case 'f':
    4562             :             case 'F':
    4563             :             case 'g':
    4564             :             case 'G':
    4565           0 :                 temp = formatfloat(v, flags, prec, c);
    4566           0 :                 if (temp == NULL)
    4567           0 :                     goto error;
    4568           0 :                 pbuf = PyString_AS_STRING(temp);
    4569           0 :                 len = PyString_GET_SIZE(temp);
    4570           0 :                 sign = 1;
    4571           0 :                 if (flags & F_ZERO)
    4572           0 :                     fill = '0';
    4573           0 :                 break;
    4574             :             case 'c':
    4575             : #ifdef Py_USING_UNICODE
    4576           0 :                 if (PyUnicode_Check(v)) {
    4577           0 :                     fmt = fmt_start;
    4578           0 :                     argidx = argidx_start;
    4579           0 :                     goto unicode;
    4580             :                 }
    4581             : #endif
    4582           0 :                 pbuf = formatbuf;
    4583           0 :                 len = formatchar(pbuf, sizeof(formatbuf), v);
    4584           0 :                 if (len < 0)
    4585           0 :                     goto error;
    4586           0 :                 break;
    4587             :             default:
    4588           0 :                 PyErr_Format(PyExc_ValueError,
    4589             :                   "unsupported format character '%c' (0x%x) "
    4590             :                   "at index %zd",
    4591             :                   c, c,
    4592           0 :                   (Py_ssize_t)(fmt - 1 -
    4593           0 :                                PyString_AsString(format)));
    4594           0 :                 goto error;
    4595             :             }
    4596        2220 :             if (sign) {
    4597           0 :                 if (*pbuf == '-' || *pbuf == '+') {
    4598           0 :                     sign = *pbuf++;
    4599           0 :                     len--;
    4600             :                 }
    4601           0 :                 else if (flags & F_SIGN)
    4602           0 :                     sign = '+';
    4603           0 :                 else if (flags & F_BLANK)
    4604           0 :                     sign = ' ';
    4605             :                 else
    4606           0 :                     sign = 0;
    4607             :             }
    4608        2220 :             if (width < len)
    4609        2220 :                 width = len;
    4610        2220 :             if (rescnt - (sign != 0) < width) {
    4611           0 :                 reslen -= rescnt;
    4612           0 :                 rescnt = width + fmtcnt + 100;
    4613           0 :                 reslen += rescnt;
    4614           0 :                 if (reslen < 0) {
    4615           0 :                     Py_DECREF(result);
    4616           0 :                     Py_XDECREF(temp);
    4617           0 :                     return PyErr_NoMemory();
    4618             :                 }
    4619           0 :                 if (_PyString_Resize(&result, reslen)) {
    4620           0 :                     Py_XDECREF(temp);
    4621           0 :                     return NULL;
    4622             :                 }
    4623           0 :                 res = PyString_AS_STRING(result)
    4624           0 :                     + reslen - rescnt;
    4625             :             }
    4626        2220 :             if (sign) {
    4627           0 :                 if (fill != ' ')
    4628           0 :                     *res++ = sign;
    4629           0 :                 rescnt--;
    4630           0 :                 if (width > len)
    4631           0 :                     width--;
    4632             :             }
    4633        2220 :             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
    4634             :                 assert(pbuf[0] == '0');
    4635             :                 assert(pbuf[1] == c);
    4636           0 :                 if (fill != ' ') {
    4637           0 :                     *res++ = *pbuf++;
    4638           0 :                     *res++ = *pbuf++;
    4639             :                 }
    4640           0 :                 rescnt -= 2;
    4641           0 :                 width -= 2;
    4642           0 :                 if (width < 0)
    4643           0 :                     width = 0;
    4644           0 :                 len -= 2;
    4645             :             }
    4646        2220 :             if (width > len && !(flags & F_LJUST)) {
    4647             :                 do {
    4648           0 :                     --rescnt;
    4649           0 :                     *res++ = fill;
    4650           0 :                 } while (--width > len);
    4651             :             }
    4652        2220 :             if (fill == ' ') {
    4653        2220 :                 if (sign)
    4654           0 :                     *res++ = sign;
    4655        2220 :                 if ((flags & F_ALT) &&
    4656           0 :                     (c == 'x' || c == 'X')) {
    4657             :                     assert(pbuf[0] == '0');
    4658             :                     assert(pbuf[1] == c);
    4659           0 :                     *res++ = *pbuf++;
    4660           0 :                     *res++ = *pbuf++;
    4661             :                 }
    4662             :             }
    4663        2220 :             Py_MEMCPY(res, pbuf, len);
    4664        2220 :             res += len;
    4665        2220 :             rescnt -= len;
    4666        4440 :             while (--width >= len) {
    4667           0 :                 --rescnt;
    4668           0 :                 *res++ = ' ';
    4669             :             }
    4670        2220 :             if (dict && (argidx < arglen) && c != '%') {
    4671           0 :                 PyErr_SetString(PyExc_TypeError,
    4672             :                            "not all arguments converted during string formatting");
    4673           0 :                 Py_XDECREF(temp);
    4674           0 :                 goto error;
    4675             :             }
    4676        2220 :             Py_XDECREF(temp);
    4677             :         } /* '%' */
    4678             :     } /* until end */
    4679        1542 :     if (argidx < arglen && !dict) {
    4680           0 :         PyErr_SetString(PyExc_TypeError,
    4681             :                         "not all arguments converted during string formatting");
    4682           0 :         goto error;
    4683             :     }
    4684        1542 :     if (args_owned) {
    4685           3 :         Py_DECREF(args);
    4686             :     }
    4687        1542 :     if (_PyString_Resize(&result, reslen - rescnt))
    4688           0 :         return NULL;
    4689        1542 :     return result;
    4690             : 
    4691             : #ifdef Py_USING_UNICODE
    4692             :  unicode:
    4693           0 :     if (args_owned) {
    4694           0 :         Py_DECREF(args);
    4695           0 :         args_owned = 0;
    4696             :     }
    4697             :     /* Fiddle args right (remove the first argidx arguments) */
    4698           0 :     if (PyTuple_Check(orig_args) && argidx > 0) {
    4699             :         PyObject *v;
    4700           0 :         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
    4701           0 :         v = PyTuple_New(n);
    4702           0 :         if (v == NULL)
    4703           0 :             goto error;
    4704           0 :         while (--n >= 0) {
    4705           0 :             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
    4706           0 :             Py_INCREF(w);
    4707           0 :             PyTuple_SET_ITEM(v, n, w);
    4708             :         }
    4709           0 :         args = v;
    4710             :     } else {
    4711           0 :         Py_INCREF(orig_args);
    4712           0 :         args = orig_args;
    4713             :     }
    4714           0 :     args_owned = 1;
    4715             :     /* Take what we have of the result and let the Unicode formatting
    4716             :        function format the rest of the input. */
    4717           0 :     rescnt = res - PyString_AS_STRING(result);
    4718           0 :     if (_PyString_Resize(&result, rescnt))
    4719           0 :         goto error;
    4720           0 :     fmtcnt = PyString_GET_SIZE(format) - \
    4721           0 :              (fmt - PyString_AS_STRING(format));
    4722           0 :     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
    4723           0 :     if (format == NULL)
    4724           0 :         goto error;
    4725           0 :     v = PyUnicode_Format(format, args);
    4726           0 :     Py_DECREF(format);
    4727           0 :     if (v == NULL)
    4728           0 :         goto error;
    4729             :     /* Paste what we have (result) to what the Unicode formatting
    4730             :        function returned (v) and return the result (or error) */
    4731           0 :     w = PyUnicode_Concat(result, v);
    4732           0 :     Py_DECREF(result);
    4733           0 :     Py_DECREF(v);
    4734           0 :     Py_DECREF(args);
    4735           0 :     return w;
    4736             : #endif /* Py_USING_UNICODE */
    4737             : 
    4738             :  error:
    4739           0 :     Py_DECREF(result);
    4740           0 :     if (args_owned) {
    4741           0 :         Py_DECREF(args);
    4742             :     }
    4743           0 :     return NULL;
    4744             : }
    4745             : 
    4746             : void
    4747      217401 : PyString_InternInPlace(PyObject **p)
    4748             : {
    4749      217401 :     register PyStringObject *s = (PyStringObject *)(*p);
    4750             :     PyObject *t;
    4751      217401 :     if (s == NULL || !PyString_Check(s))
    4752           0 :         Py_FatalError("PyString_InternInPlace: strings only please!");
    4753             :     /* If it's a string subclass, we don't really know what putting
    4754             :        it in the interned dict might do. */
    4755      217401 :     if (!PyString_CheckExact(s))
    4756           0 :         return;
    4757      217401 :     if (PyString_CHECK_INTERNED(s))
    4758      132757 :         return;
    4759       84644 :     if (interned == NULL) {
    4760           3 :         interned = PyDict_New();
    4761           3 :         if (interned == NULL) {
    4762           0 :             PyErr_Clear(); /* Don't leave an exception */
    4763           0 :             return;
    4764             :         }
    4765             :     }
    4766       84644 :     t = PyDict_GetItem(interned, (PyObject *)s);
    4767       84644 :     if (t) {
    4768       62095 :         Py_INCREF(t);
    4769       62095 :         Py_SETREF(*p, t);
    4770       62095 :         return;
    4771             :     }
    4772             : 
    4773       22549 :     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
    4774           0 :         PyErr_Clear();
    4775           0 :         return;
    4776             :     }
    4777             :     /* The two references in interned are not counted by refcnt.
    4778             :        The string deallocator will take care of this */
    4779       22549 :     Py_REFCNT(s) -= 2;
    4780       22549 :     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
    4781             : }
    4782             : 
    4783             : void
    4784           0 : PyString_InternImmortal(PyObject **p)
    4785             : {
    4786           0 :     PyString_InternInPlace(p);
    4787           0 :     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
    4788           0 :         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
    4789           0 :         Py_INCREF(*p);
    4790             :     }
    4791           0 : }
    4792             : 
    4793             : 
    4794             : PyObject *
    4795       34550 : PyString_InternFromString(const char *cp)
    4796             : {
    4797       34550 :     PyObject *s = PyString_FromString(cp);
    4798       34550 :     if (s == NULL)
    4799           0 :         return NULL;
    4800       34550 :     PyString_InternInPlace(&s);
    4801       34550 :     return s;
    4802             : }
    4803             : 
    4804             : void
    4805           3 : PyString_Fini(void)
    4806             : {
    4807             :     int i;
    4808         771 :     for (i = 0; i < UCHAR_MAX + 1; i++)
    4809         768 :         Py_CLEAR(characters[i]);
    4810           3 :     Py_CLEAR(nullstring);
    4811           3 : }
    4812             : 
    4813           0 : void _Py_ReleaseInternedStrings(void)
    4814             : {
    4815             :     PyObject *keys;
    4816             :     PyStringObject *s;
    4817             :     Py_ssize_t i, n;
    4818           0 :     Py_ssize_t immortal_size = 0, mortal_size = 0;
    4819             : 
    4820           0 :     if (interned == NULL || !PyDict_Check(interned))
    4821           0 :         return;
    4822           0 :     keys = PyDict_Keys(interned);
    4823           0 :     if (keys == NULL || !PyList_Check(keys)) {
    4824           0 :         PyErr_Clear();
    4825           0 :         return;
    4826             :     }
    4827             : 
    4828             :     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
    4829             :        detector, interned strings are not forcibly deallocated; rather, we
    4830             :        give them their stolen references back, and then clear and DECREF
    4831             :        the interned dict. */
    4832             : 
    4833           0 :     n = PyList_GET_SIZE(keys);
    4834           0 :     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
    4835             :         n);
    4836           0 :     for (i = 0; i < n; i++) {
    4837           0 :         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
    4838           0 :         switch (s->ob_sstate) {
    4839             :         case SSTATE_NOT_INTERNED:
    4840             :             /* XXX Shouldn't happen */
    4841           0 :             break;
    4842             :         case SSTATE_INTERNED_IMMORTAL:
    4843           0 :             Py_REFCNT(s) += 1;
    4844           0 :             immortal_size += Py_SIZE(s);
    4845           0 :             break;
    4846             :         case SSTATE_INTERNED_MORTAL:
    4847           0 :             Py_REFCNT(s) += 2;
    4848           0 :             mortal_size += Py_SIZE(s);
    4849           0 :             break;
    4850             :         default:
    4851           0 :             Py_FatalError("Inconsistent interned string state.");
    4852             :         }
    4853           0 :         s->ob_sstate = SSTATE_NOT_INTERNED;
    4854             :     }
    4855           0 :     fprintf(stderr, "total size of all interned strings: "
    4856             :                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
    4857             :                     "mortal/immortal\n", mortal_size, immortal_size);
    4858           0 :     Py_DECREF(keys);
    4859           0 :     PyDict_Clear(interned);
    4860           0 :     Py_CLEAR(interned);
    4861             : }

Generated by: LCOV version 1.10