Line data Source code
1 : /* strop module */
2 :
3 : #define PY_SSIZE_T_CLEAN
4 : #include "Python.h"
5 : #include <ctype.h>
6 :
7 : PyDoc_STRVAR(strop_module__doc__,
8 : "Common string manipulations, optimized for speed.\n"
9 : "\n"
10 : "Always use \"import string\" rather than referencing\n"
11 : "this module directly.");
12 :
13 : /* XXX This file assumes that the <ctype.h> is*() functions
14 : XXX are defined for all 8-bit characters! */
15 :
16 : #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17 : "strop functions are obsolete; use string methods")) \
18 : return NULL
19 :
20 : /* The lstrip(), rstrip() and strip() functions are implemented
21 : in do_strip(), which uses an additional parameter to indicate what
22 : type of strip should occur. */
23 :
24 : #define LEFTSTRIP 0
25 : #define RIGHTSTRIP 1
26 : #define BOTHSTRIP 2
27 :
28 :
29 : static PyObject *
30 0 : split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31 : {
32 0 : Py_ssize_t i = 0, j;
33 : int err;
34 0 : Py_ssize_t countsplit = 0;
35 : PyObject* item;
36 0 : PyObject *list = PyList_New(0);
37 :
38 0 : if (list == NULL)
39 0 : return NULL;
40 :
41 0 : while (i < len) {
42 0 : while (i < len && isspace(Py_CHARMASK(s[i]))) {
43 0 : i = i+1;
44 : }
45 0 : j = i;
46 0 : while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47 0 : i = i+1;
48 : }
49 0 : if (j < i) {
50 0 : item = PyString_FromStringAndSize(s+j, i-j);
51 0 : if (item == NULL)
52 0 : goto finally;
53 :
54 0 : err = PyList_Append(list, item);
55 0 : Py_DECREF(item);
56 0 : if (err < 0)
57 0 : goto finally;
58 :
59 0 : countsplit++;
60 0 : while (i < len && isspace(Py_CHARMASK(s[i]))) {
61 0 : i = i+1;
62 : }
63 0 : if (maxsplit && (countsplit >= maxsplit) && i < len) {
64 0 : item = PyString_FromStringAndSize(
65 : s+i, len - i);
66 0 : if (item == NULL)
67 0 : goto finally;
68 :
69 0 : err = PyList_Append(list, item);
70 0 : Py_DECREF(item);
71 0 : if (err < 0)
72 0 : goto finally;
73 :
74 0 : i = len;
75 : }
76 : }
77 : }
78 0 : return list;
79 : finally:
80 0 : Py_DECREF(list);
81 0 : return NULL;
82 : }
83 :
84 :
85 : PyDoc_STRVAR(splitfields__doc__,
86 : "split(s [,sep [,maxsplit]]) -> list of strings\n"
87 : "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88 : "\n"
89 : "Return a list of the words in the string s, using sep as the\n"
90 : "delimiter string. If maxsplit is nonzero, splits into at most\n"
91 : "maxsplit words. If sep is not specified, any whitespace string\n"
92 : "is a separator. Maxsplit defaults to 0.\n"
93 : "\n"
94 : "(split and splitfields are synonymous)");
95 :
96 : static PyObject *
97 0 : strop_splitfields(PyObject *self, PyObject *args)
98 : {
99 : Py_ssize_t len, n, i, j, err;
100 : Py_ssize_t splitcount, maxsplit;
101 : char *s, *sub;
102 : PyObject *list, *item;
103 :
104 0 : WARN;
105 0 : sub = NULL;
106 0 : n = 0;
107 0 : splitcount = 0;
108 0 : maxsplit = 0;
109 0 : if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110 0 : return NULL;
111 0 : if (sub == NULL)
112 0 : return split_whitespace(s, len, maxsplit);
113 0 : if (n == 0) {
114 0 : PyErr_SetString(PyExc_ValueError, "empty separator");
115 0 : return NULL;
116 : }
117 :
118 0 : list = PyList_New(0);
119 0 : if (list == NULL)
120 0 : return NULL;
121 :
122 0 : i = j = 0;
123 0 : while (i+n <= len) {
124 0 : if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125 0 : item = PyString_FromStringAndSize(s+j, i-j);
126 0 : if (item == NULL)
127 0 : goto fail;
128 0 : err = PyList_Append(list, item);
129 0 : Py_DECREF(item);
130 0 : if (err < 0)
131 0 : goto fail;
132 0 : i = j = i + n;
133 0 : splitcount++;
134 0 : if (maxsplit && (splitcount >= maxsplit))
135 0 : break;
136 : }
137 : else
138 0 : i++;
139 : }
140 0 : item = PyString_FromStringAndSize(s+j, len-j);
141 0 : if (item == NULL)
142 0 : goto fail;
143 0 : err = PyList_Append(list, item);
144 0 : Py_DECREF(item);
145 0 : if (err < 0)
146 0 : goto fail;
147 :
148 0 : return list;
149 :
150 : fail:
151 0 : Py_DECREF(list);
152 0 : return NULL;
153 : }
154 :
155 :
156 : PyDoc_STRVAR(joinfields__doc__,
157 : "join(list [,sep]) -> string\n"
158 : "joinfields(list [,sep]) -> string\n"
159 : "\n"
160 : "Return a string composed of the words in list, with\n"
161 : "intervening occurrences of sep. Sep defaults to a single\n"
162 : "space.\n"
163 : "\n"
164 : "(join and joinfields are synonymous)");
165 :
166 : static PyObject *
167 0 : strop_joinfields(PyObject *self, PyObject *args)
168 : {
169 : PyObject *seq;
170 0 : char *sep = NULL;
171 0 : Py_ssize_t seqlen, seplen = 0;
172 0 : Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173 0 : PyObject *res = NULL;
174 0 : char* p = NULL;
175 : ssizeargfunc getitemfunc;
176 :
177 0 : WARN;
178 0 : if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179 0 : return NULL;
180 0 : if (sep == NULL) {
181 0 : sep = " ";
182 0 : seplen = 1;
183 : }
184 :
185 0 : seqlen = PySequence_Size(seq);
186 0 : if (seqlen < 0 && PyErr_Occurred())
187 0 : return NULL;
188 :
189 0 : if (seqlen == 1) {
190 : /* Optimization if there's only one item */
191 0 : PyObject *item = PySequence_GetItem(seq, 0);
192 0 : if (item && !PyString_Check(item)) {
193 0 : PyErr_SetString(PyExc_TypeError,
194 : "first argument must be sequence of strings");
195 0 : Py_DECREF(item);
196 0 : return NULL;
197 : }
198 0 : return item;
199 : }
200 :
201 0 : if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202 0 : return NULL;
203 0 : p = PyString_AsString(res);
204 :
205 : /* optimize for lists, since it's the most common case. all others
206 : * (tuples and arbitrary sequences) just use the sequence abstract
207 : * interface.
208 : */
209 0 : if (PyList_Check(seq)) {
210 0 : for (i = 0; i < seqlen; i++) {
211 0 : PyObject *item = PyList_GET_ITEM(seq, i);
212 0 : if (!PyString_Check(item)) {
213 0 : PyErr_SetString(PyExc_TypeError,
214 : "first argument must be sequence of strings");
215 0 : Py_DECREF(res);
216 0 : return NULL;
217 : }
218 0 : slen = PyString_GET_SIZE(item);
219 0 : if (slen > PY_SSIZE_T_MAX - reslen ||
220 0 : seplen > PY_SSIZE_T_MAX - reslen - seplen) {
221 0 : PyErr_SetString(PyExc_OverflowError,
222 : "input too long");
223 0 : Py_DECREF(res);
224 0 : return NULL;
225 : }
226 0 : while (reslen + slen + seplen >= sz) {
227 0 : if (_PyString_Resize(&res, sz * 2) < 0)
228 0 : return NULL;
229 0 : sz *= 2;
230 0 : p = PyString_AsString(res) + reslen;
231 : }
232 0 : if (i > 0) {
233 0 : memcpy(p, sep, seplen);
234 0 : p += seplen;
235 0 : reslen += seplen;
236 : }
237 0 : memcpy(p, PyString_AS_STRING(item), slen);
238 0 : p += slen;
239 0 : reslen += slen;
240 : }
241 0 : _PyString_Resize(&res, reslen);
242 0 : return res;
243 : }
244 :
245 0 : if (seq->ob_type->tp_as_sequence == NULL ||
246 0 : (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
247 : {
248 0 : PyErr_SetString(PyExc_TypeError,
249 : "first argument must be a sequence");
250 0 : return NULL;
251 : }
252 : /* This is now type safe */
253 0 : for (i = 0; i < seqlen; i++) {
254 0 : PyObject *item = getitemfunc(seq, i);
255 0 : if (!item || !PyString_Check(item)) {
256 0 : PyErr_SetString(PyExc_TypeError,
257 : "first argument must be sequence of strings");
258 0 : Py_DECREF(res);
259 0 : Py_XDECREF(item);
260 0 : return NULL;
261 : }
262 0 : slen = PyString_GET_SIZE(item);
263 0 : if (slen > PY_SSIZE_T_MAX - reslen ||
264 0 : seplen > PY_SSIZE_T_MAX - reslen - seplen) {
265 0 : PyErr_SetString(PyExc_OverflowError,
266 : "input too long");
267 0 : Py_DECREF(res);
268 0 : Py_XDECREF(item);
269 0 : return NULL;
270 : }
271 0 : while (reslen + slen + seplen >= sz) {
272 0 : if (_PyString_Resize(&res, sz * 2) < 0) {
273 0 : Py_DECREF(item);
274 0 : return NULL;
275 : }
276 0 : sz *= 2;
277 0 : p = PyString_AsString(res) + reslen;
278 : }
279 0 : if (i > 0) {
280 0 : memcpy(p, sep, seplen);
281 0 : p += seplen;
282 0 : reslen += seplen;
283 : }
284 0 : memcpy(p, PyString_AS_STRING(item), slen);
285 0 : p += slen;
286 0 : reslen += slen;
287 0 : Py_DECREF(item);
288 : }
289 0 : _PyString_Resize(&res, reslen);
290 0 : return res;
291 : }
292 :
293 :
294 : PyDoc_STRVAR(find__doc__,
295 : "find(s, sub [,start [,end]]) -> in\n"
296 : "\n"
297 : "Return the lowest index in s where substring sub is found,\n"
298 : "such that sub is contained within s[start,end]. Optional\n"
299 : "arguments start and end are interpreted as in slice notation.\n"
300 : "\n"
301 : "Return -1 on failure.");
302 :
303 : static PyObject *
304 0 : strop_find(PyObject *self, PyObject *args)
305 : {
306 : char *s, *sub;
307 0 : Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
308 :
309 0 : WARN;
310 0 : if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
311 0 : return NULL;
312 :
313 0 : if (last > len)
314 0 : last = len;
315 0 : if (last < 0)
316 0 : last += len;
317 0 : if (last < 0)
318 0 : last = 0;
319 0 : if (i < 0)
320 0 : i += len;
321 0 : if (i < 0)
322 0 : i = 0;
323 :
324 0 : if (n == 0 && i <= last)
325 0 : return PyInt_FromLong((long)i);
326 :
327 0 : last -= n;
328 0 : for (; i <= last; ++i)
329 0 : if (s[i] == sub[0] &&
330 0 : (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
331 0 : return PyInt_FromLong((long)i);
332 :
333 0 : return PyInt_FromLong(-1L);
334 : }
335 :
336 :
337 : PyDoc_STRVAR(rfind__doc__,
338 : "rfind(s, sub [,start [,end]]) -> int\n"
339 : "\n"
340 : "Return the highest index in s where substring sub is found,\n"
341 : "such that sub is contained within s[start,end]. Optional\n"
342 : "arguments start and end are interpreted as in slice notation.\n"
343 : "\n"
344 : "Return -1 on failure.");
345 :
346 : static PyObject *
347 0 : strop_rfind(PyObject *self, PyObject *args)
348 : {
349 : char *s, *sub;
350 : Py_ssize_t len, n, j;
351 0 : Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
352 :
353 0 : WARN;
354 0 : if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
355 0 : return NULL;
356 :
357 0 : if (last > len)
358 0 : last = len;
359 0 : if (last < 0)
360 0 : last += len;
361 0 : if (last < 0)
362 0 : last = 0;
363 0 : if (i < 0)
364 0 : i += len;
365 0 : if (i < 0)
366 0 : i = 0;
367 :
368 0 : if (n == 0 && i <= last)
369 0 : return PyInt_FromLong((long)last);
370 :
371 0 : for (j = last-n; j >= i; --j)
372 0 : if (s[j] == sub[0] &&
373 0 : (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
374 0 : return PyInt_FromLong((long)j);
375 :
376 0 : return PyInt_FromLong(-1L);
377 : }
378 :
379 :
380 : static PyObject *
381 0 : do_strip(PyObject *args, int striptype)
382 : {
383 : char *s;
384 : Py_ssize_t len, i, j;
385 :
386 :
387 0 : if (PyString_AsStringAndSize(args, &s, &len))
388 0 : return NULL;
389 :
390 0 : i = 0;
391 0 : if (striptype != RIGHTSTRIP) {
392 0 : while (i < len && isspace(Py_CHARMASK(s[i]))) {
393 0 : i++;
394 : }
395 : }
396 :
397 0 : j = len;
398 0 : if (striptype != LEFTSTRIP) {
399 : do {
400 0 : j--;
401 0 : } while (j >= i && isspace(Py_CHARMASK(s[j])));
402 0 : j++;
403 : }
404 :
405 0 : if (i == 0 && j == len) {
406 0 : Py_INCREF(args);
407 0 : return args;
408 : }
409 : else
410 0 : return PyString_FromStringAndSize(s+i, j-i);
411 : }
412 :
413 :
414 : PyDoc_STRVAR(strip__doc__,
415 : "strip(s) -> string\n"
416 : "\n"
417 : "Return a copy of the string s with leading and trailing\n"
418 : "whitespace removed.");
419 :
420 : static PyObject *
421 0 : strop_strip(PyObject *self, PyObject *args)
422 : {
423 0 : WARN;
424 0 : return do_strip(args, BOTHSTRIP);
425 : }
426 :
427 :
428 : PyDoc_STRVAR(lstrip__doc__,
429 : "lstrip(s) -> string\n"
430 : "\n"
431 : "Return a copy of the string s with leading whitespace removed.");
432 :
433 : static PyObject *
434 0 : strop_lstrip(PyObject *self, PyObject *args)
435 : {
436 0 : WARN;
437 0 : return do_strip(args, LEFTSTRIP);
438 : }
439 :
440 :
441 : PyDoc_STRVAR(rstrip__doc__,
442 : "rstrip(s) -> string\n"
443 : "\n"
444 : "Return a copy of the string s with trailing whitespace removed.");
445 :
446 : static PyObject *
447 0 : strop_rstrip(PyObject *self, PyObject *args)
448 : {
449 0 : WARN;
450 0 : return do_strip(args, RIGHTSTRIP);
451 : }
452 :
453 :
454 : PyDoc_STRVAR(lower__doc__,
455 : "lower(s) -> string\n"
456 : "\n"
457 : "Return a copy of the string s converted to lowercase.");
458 :
459 : static PyObject *
460 0 : strop_lower(PyObject *self, PyObject *args)
461 : {
462 : char *s, *s_new;
463 : Py_ssize_t i, n;
464 : PyObject *newstr;
465 : int changed;
466 :
467 0 : WARN;
468 0 : if (PyString_AsStringAndSize(args, &s, &n))
469 0 : return NULL;
470 0 : newstr = PyString_FromStringAndSize(NULL, n);
471 0 : if (newstr == NULL)
472 0 : return NULL;
473 0 : s_new = PyString_AsString(newstr);
474 0 : changed = 0;
475 0 : for (i = 0; i < n; i++) {
476 0 : int c = Py_CHARMASK(*s++);
477 0 : if (isupper(c)) {
478 0 : changed = 1;
479 0 : *s_new = tolower(c);
480 : } else
481 0 : *s_new = c;
482 0 : s_new++;
483 : }
484 0 : if (!changed) {
485 0 : Py_DECREF(newstr);
486 0 : Py_INCREF(args);
487 0 : return args;
488 : }
489 0 : return newstr;
490 : }
491 :
492 :
493 : PyDoc_STRVAR(upper__doc__,
494 : "upper(s) -> string\n"
495 : "\n"
496 : "Return a copy of the string s converted to uppercase.");
497 :
498 : static PyObject *
499 0 : strop_upper(PyObject *self, PyObject *args)
500 : {
501 : char *s, *s_new;
502 : Py_ssize_t i, n;
503 : PyObject *newstr;
504 : int changed;
505 :
506 0 : WARN;
507 0 : if (PyString_AsStringAndSize(args, &s, &n))
508 0 : return NULL;
509 0 : newstr = PyString_FromStringAndSize(NULL, n);
510 0 : if (newstr == NULL)
511 0 : return NULL;
512 0 : s_new = PyString_AsString(newstr);
513 0 : changed = 0;
514 0 : for (i = 0; i < n; i++) {
515 0 : int c = Py_CHARMASK(*s++);
516 0 : if (islower(c)) {
517 0 : changed = 1;
518 0 : *s_new = toupper(c);
519 : } else
520 0 : *s_new = c;
521 0 : s_new++;
522 : }
523 0 : if (!changed) {
524 0 : Py_DECREF(newstr);
525 0 : Py_INCREF(args);
526 0 : return args;
527 : }
528 0 : return newstr;
529 : }
530 :
531 :
532 : PyDoc_STRVAR(capitalize__doc__,
533 : "capitalize(s) -> string\n"
534 : "\n"
535 : "Return a copy of the string s with only its first character\n"
536 : "capitalized.");
537 :
538 : static PyObject *
539 0 : strop_capitalize(PyObject *self, PyObject *args)
540 : {
541 : char *s, *s_new;
542 : Py_ssize_t i, n;
543 : PyObject *newstr;
544 : int changed;
545 :
546 0 : WARN;
547 0 : if (PyString_AsStringAndSize(args, &s, &n))
548 0 : return NULL;
549 0 : newstr = PyString_FromStringAndSize(NULL, n);
550 0 : if (newstr == NULL)
551 0 : return NULL;
552 0 : s_new = PyString_AsString(newstr);
553 0 : changed = 0;
554 0 : if (0 < n) {
555 0 : int c = Py_CHARMASK(*s++);
556 0 : if (islower(c)) {
557 0 : changed = 1;
558 0 : *s_new = toupper(c);
559 : } else
560 0 : *s_new = c;
561 0 : s_new++;
562 : }
563 0 : for (i = 1; i < n; i++) {
564 0 : int c = Py_CHARMASK(*s++);
565 0 : if (isupper(c)) {
566 0 : changed = 1;
567 0 : *s_new = tolower(c);
568 : } else
569 0 : *s_new = c;
570 0 : s_new++;
571 : }
572 0 : if (!changed) {
573 0 : Py_DECREF(newstr);
574 0 : Py_INCREF(args);
575 0 : return args;
576 : }
577 0 : return newstr;
578 : }
579 :
580 :
581 : PyDoc_STRVAR(expandtabs__doc__,
582 : "expandtabs(string, [tabsize]) -> string\n"
583 : "\n"
584 : "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
585 : "depending on the current column and the given tab size (default 8).\n"
586 : "The column number is reset to zero after each newline occurring in the\n"
587 : "string. This doesn't understand other non-printing characters.");
588 :
589 : static PyObject *
590 0 : strop_expandtabs(PyObject *self, PyObject *args)
591 : {
592 : /* Original by Fredrik Lundh */
593 : char* e;
594 : char* p;
595 : char* q;
596 : Py_ssize_t i, j;
597 : PyObject* out;
598 : char* string;
599 : Py_ssize_t stringlen;
600 0 : int tabsize = 8;
601 :
602 0 : WARN;
603 : /* Get arguments */
604 0 : if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
605 0 : return NULL;
606 0 : if (tabsize < 1) {
607 0 : PyErr_SetString(PyExc_ValueError,
608 : "tabsize must be at least 1");
609 0 : return NULL;
610 : }
611 :
612 : /* First pass: determine size of output string */
613 0 : i = j = 0; /* j: current column; i: total of previous lines */
614 0 : e = string + stringlen;
615 0 : for (p = string; p < e; p++) {
616 0 : if (*p == '\t') {
617 0 : Py_ssize_t incr = tabsize - (j%tabsize);
618 0 : if (j > PY_SSIZE_T_MAX - incr)
619 0 : goto overflow;
620 0 : j += incr;
621 : } else {
622 0 : if (j > PY_SSIZE_T_MAX - 1)
623 0 : goto overflow;
624 0 : j++;
625 0 : if (*p == '\n') {
626 0 : if (i > PY_SSIZE_T_MAX - j)
627 0 : goto overflow;
628 0 : i += j;
629 0 : j = 0;
630 : }
631 : }
632 : }
633 :
634 0 : if (i > PY_SSIZE_T_MAX - j)
635 0 : goto overflow;
636 :
637 : /* Second pass: create output string and fill it */
638 0 : out = PyString_FromStringAndSize(NULL, i+j);
639 0 : if (out == NULL)
640 0 : return NULL;
641 :
642 0 : i = 0;
643 0 : q = PyString_AS_STRING(out);
644 :
645 0 : for (p = string; p < e; p++) {
646 0 : if (*p == '\t') {
647 0 : j = tabsize - (i%tabsize);
648 0 : i += j;
649 0 : while (j-- > 0)
650 0 : *q++ = ' ';
651 : } else {
652 0 : *q++ = *p;
653 0 : i++;
654 0 : if (*p == '\n')
655 0 : i = 0;
656 : }
657 : }
658 :
659 0 : return out;
660 : overflow:
661 0 : PyErr_SetString(PyExc_OverflowError, "result is too long");
662 0 : return NULL;
663 : }
664 :
665 :
666 : PyDoc_STRVAR(count__doc__,
667 : "count(s, sub[, start[, end]]) -> int\n"
668 : "\n"
669 : "Return the number of occurrences of substring sub in string\n"
670 : "s[start:end]. Optional arguments start and end are\n"
671 : "interpreted as in slice notation.");
672 :
673 : static PyObject *
674 0 : strop_count(PyObject *self, PyObject *args)
675 : {
676 : char *s, *sub;
677 : Py_ssize_t len, n;
678 0 : Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
679 : Py_ssize_t m, r;
680 :
681 0 : WARN;
682 0 : if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
683 0 : return NULL;
684 0 : if (last > len)
685 0 : last = len;
686 0 : if (last < 0)
687 0 : last += len;
688 0 : if (last < 0)
689 0 : last = 0;
690 0 : if (i < 0)
691 0 : i += len;
692 0 : if (i < 0)
693 0 : i = 0;
694 0 : m = last + 1 - n;
695 0 : if (n == 0)
696 0 : return PyInt_FromLong((long) (m-i));
697 :
698 0 : r = 0;
699 0 : while (i < m) {
700 0 : if (!memcmp(s+i, sub, n)) {
701 0 : r++;
702 0 : i += n;
703 : } else {
704 0 : i++;
705 : }
706 : }
707 0 : return PyInt_FromLong((long) r);
708 : }
709 :
710 :
711 : PyDoc_STRVAR(swapcase__doc__,
712 : "swapcase(s) -> string\n"
713 : "\n"
714 : "Return a copy of the string s with upper case characters\n"
715 : "converted to lowercase and vice versa.");
716 :
717 : static PyObject *
718 0 : strop_swapcase(PyObject *self, PyObject *args)
719 : {
720 : char *s, *s_new;
721 : Py_ssize_t i, n;
722 : PyObject *newstr;
723 : int changed;
724 :
725 0 : WARN;
726 0 : if (PyString_AsStringAndSize(args, &s, &n))
727 0 : return NULL;
728 0 : newstr = PyString_FromStringAndSize(NULL, n);
729 0 : if (newstr == NULL)
730 0 : return NULL;
731 0 : s_new = PyString_AsString(newstr);
732 0 : changed = 0;
733 0 : for (i = 0; i < n; i++) {
734 0 : int c = Py_CHARMASK(*s++);
735 0 : if (islower(c)) {
736 0 : changed = 1;
737 0 : *s_new = toupper(c);
738 : }
739 0 : else if (isupper(c)) {
740 0 : changed = 1;
741 0 : *s_new = tolower(c);
742 : }
743 : else
744 0 : *s_new = c;
745 0 : s_new++;
746 : }
747 0 : if (!changed) {
748 0 : Py_DECREF(newstr);
749 0 : Py_INCREF(args);
750 0 : return args;
751 : }
752 0 : return newstr;
753 : }
754 :
755 :
756 : PyDoc_STRVAR(atoi__doc__,
757 : "atoi(s [,base]) -> int\n"
758 : "\n"
759 : "Return the integer represented by the string s in the given\n"
760 : "base, which defaults to 10. The string s must consist of one\n"
761 : "or more digits, possibly preceded by a sign. If base is 0, it\n"
762 : "is chosen from the leading characters of s, 0 for octal, 0x or\n"
763 : "0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
764 : "accepted.");
765 :
766 : static PyObject *
767 0 : strop_atoi(PyObject *self, PyObject *args)
768 : {
769 : char *s, *end;
770 0 : int base = 10;
771 : long x;
772 : char buffer[256]; /* For errors */
773 :
774 0 : WARN;
775 0 : if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
776 0 : return NULL;
777 :
778 0 : if ((base != 0 && base < 2) || base > 36) {
779 0 : PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
780 0 : return NULL;
781 : }
782 :
783 0 : while (*s && isspace(Py_CHARMASK(*s)))
784 0 : s++;
785 0 : errno = 0;
786 0 : if (base == 0 && s[0] == '0')
787 0 : x = (long) PyOS_strtoul(s, &end, base);
788 : else
789 0 : x = PyOS_strtol(s, &end, base);
790 0 : if (end == s || !isalnum(Py_CHARMASK(end[-1])))
791 : goto bad;
792 0 : while (*end && isspace(Py_CHARMASK(*end)))
793 0 : end++;
794 0 : if (*end != '\0') {
795 : bad:
796 0 : PyOS_snprintf(buffer, sizeof(buffer),
797 : "invalid literal for atoi(): %.200s", s);
798 0 : PyErr_SetString(PyExc_ValueError, buffer);
799 0 : return NULL;
800 : }
801 0 : else if (errno != 0) {
802 0 : PyOS_snprintf(buffer, sizeof(buffer),
803 : "atoi() literal too large: %.200s", s);
804 0 : PyErr_SetString(PyExc_ValueError, buffer);
805 0 : return NULL;
806 : }
807 0 : return PyInt_FromLong(x);
808 : }
809 :
810 :
811 : PyDoc_STRVAR(atol__doc__,
812 : "atol(s [,base]) -> long\n"
813 : "\n"
814 : "Return the long integer represented by the string s in the\n"
815 : "given base, which defaults to 10. The string s must consist\n"
816 : "of one or more digits, possibly preceded by a sign. If base\n"
817 : "is 0, it is chosen from the leading characters of s, 0 for\n"
818 : "octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
819 : "0x or 0X is accepted. A trailing L or l is not accepted,\n"
820 : "unless base is 0.");
821 :
822 : static PyObject *
823 0 : strop_atol(PyObject *self, PyObject *args)
824 : {
825 : char *s, *end;
826 0 : int base = 10;
827 : PyObject *x;
828 : char buffer[256]; /* For errors */
829 :
830 0 : WARN;
831 0 : if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
832 0 : return NULL;
833 :
834 0 : if ((base != 0 && base < 2) || base > 36) {
835 0 : PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
836 0 : return NULL;
837 : }
838 :
839 0 : while (*s && isspace(Py_CHARMASK(*s)))
840 0 : s++;
841 0 : if (s[0] == '\0') {
842 0 : PyErr_SetString(PyExc_ValueError, "empty string for atol()");
843 0 : return NULL;
844 : }
845 0 : x = PyLong_FromString(s, &end, base);
846 0 : if (x == NULL)
847 0 : return NULL;
848 0 : if (base == 0 && (*end == 'l' || *end == 'L'))
849 0 : end++;
850 0 : while (*end && isspace(Py_CHARMASK(*end)))
851 0 : end++;
852 0 : if (*end != '\0') {
853 0 : PyOS_snprintf(buffer, sizeof(buffer),
854 : "invalid literal for atol(): %.200s", s);
855 0 : PyErr_SetString(PyExc_ValueError, buffer);
856 0 : Py_DECREF(x);
857 0 : return NULL;
858 : }
859 0 : return x;
860 : }
861 :
862 :
863 : PyDoc_STRVAR(atof__doc__,
864 : "atof(s) -> float\n"
865 : "\n"
866 : "Return the floating point number represented by the string s.");
867 :
868 : static PyObject *
869 0 : strop_atof(PyObject *self, PyObject *args)
870 : {
871 : char *s, *end;
872 : double x;
873 : char buffer[256]; /* For errors */
874 :
875 0 : WARN;
876 0 : if (!PyArg_ParseTuple(args, "s:atof", &s))
877 0 : return NULL;
878 0 : while (*s && isspace(Py_CHARMASK(*s)))
879 0 : s++;
880 0 : if (s[0] == '\0') {
881 0 : PyErr_SetString(PyExc_ValueError, "empty string for atof()");
882 0 : return NULL;
883 : }
884 :
885 : PyFPE_START_PROTECT("strop_atof", return 0)
886 0 : x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
887 : PyFPE_END_PROTECT(x)
888 0 : if (x == -1 && PyErr_Occurred())
889 0 : return NULL;
890 0 : while (*end && isspace(Py_CHARMASK(*end)))
891 0 : end++;
892 0 : if (*end != '\0') {
893 0 : PyOS_snprintf(buffer, sizeof(buffer),
894 : "invalid literal for atof(): %.200s", s);
895 0 : PyErr_SetString(PyExc_ValueError, buffer);
896 0 : return NULL;
897 : }
898 0 : return PyFloat_FromDouble(x);
899 : }
900 :
901 :
902 : PyDoc_STRVAR(maketrans__doc__,
903 : "maketrans(frm, to) -> string\n"
904 : "\n"
905 : "Return a translation table (a string of 256 bytes long)\n"
906 : "suitable for use in string.translate. The strings frm and to\n"
907 : "must be of the same length.");
908 :
909 : static PyObject *
910 3 : strop_maketrans(PyObject *self, PyObject *args)
911 : {
912 3 : unsigned char *c, *from=NULL, *to=NULL;
913 3 : Py_ssize_t i, fromlen=0, tolen=0;
914 : PyObject *result;
915 :
916 3 : if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
917 0 : return NULL;
918 :
919 3 : if (fromlen != tolen) {
920 0 : PyErr_SetString(PyExc_ValueError,
921 : "maketrans arguments must have same length");
922 0 : return NULL;
923 : }
924 :
925 3 : result = PyString_FromStringAndSize((char *)NULL, 256);
926 3 : if (result == NULL)
927 0 : return NULL;
928 3 : c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
929 771 : for (i = 0; i < 256; i++)
930 768 : c[i]=(unsigned char)i;
931 21 : for (i = 0; i < fromlen; i++)
932 18 : c[from[i]]=to[i];
933 :
934 3 : return result;
935 : }
936 :
937 :
938 : PyDoc_STRVAR(translate__doc__,
939 : "translate(s,table [,deletechars]) -> string\n"
940 : "\n"
941 : "Return a copy of the string s, where all characters occurring\n"
942 : "in the optional argument deletechars are removed, and the\n"
943 : "remaining characters have been mapped through the given\n"
944 : "translation table, which must be a string of length 256.");
945 :
946 : static PyObject *
947 0 : strop_translate(PyObject *self, PyObject *args)
948 : {
949 : register char *input, *table, *output;
950 : Py_ssize_t i;
951 0 : int c, changed = 0;
952 : PyObject *input_obj;
953 0 : char *table1, *output_start, *del_table=NULL;
954 0 : Py_ssize_t inlen, tablen, dellen = 0;
955 : PyObject *result;
956 : int trans_table[256];
957 :
958 0 : WARN;
959 0 : if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
960 : &table1, &tablen, &del_table, &dellen))
961 0 : return NULL;
962 0 : if (tablen != 256) {
963 0 : PyErr_SetString(PyExc_ValueError,
964 : "translation table must be 256 characters long");
965 0 : return NULL;
966 : }
967 :
968 0 : table = table1;
969 0 : inlen = PyString_GET_SIZE(input_obj);
970 0 : result = PyString_FromStringAndSize((char *)NULL, inlen);
971 0 : if (result == NULL)
972 0 : return NULL;
973 0 : output_start = output = PyString_AsString(result);
974 0 : input = PyString_AsString(input_obj);
975 :
976 0 : if (dellen == 0) {
977 : /* If no deletions are required, use faster code */
978 0 : for (i = inlen; --i >= 0; ) {
979 0 : c = Py_CHARMASK(*input++);
980 0 : if (Py_CHARMASK((*output++ = table[c])) != c)
981 0 : changed = 1;
982 : }
983 0 : if (changed)
984 0 : return result;
985 0 : Py_DECREF(result);
986 0 : Py_INCREF(input_obj);
987 0 : return input_obj;
988 : }
989 :
990 0 : for (i = 0; i < 256; i++)
991 0 : trans_table[i] = Py_CHARMASK(table[i]);
992 :
993 0 : for (i = 0; i < dellen; i++)
994 0 : trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
995 :
996 0 : for (i = inlen; --i >= 0; ) {
997 0 : c = Py_CHARMASK(*input++);
998 0 : if (trans_table[c] != -1)
999 0 : if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1000 0 : continue;
1001 0 : changed = 1;
1002 : }
1003 0 : if (!changed) {
1004 0 : Py_DECREF(result);
1005 0 : Py_INCREF(input_obj);
1006 0 : return input_obj;
1007 : }
1008 : /* Fix the size of the resulting string */
1009 0 : if (inlen > 0)
1010 0 : _PyString_Resize(&result, output - output_start);
1011 0 : return result;
1012 : }
1013 :
1014 :
1015 : /* What follows is used for implementing replace(). Perry Stoll. */
1016 :
1017 : /*
1018 : mymemfind
1019 :
1020 : strstr replacement for arbitrary blocks of memory.
1021 :
1022 : Locates the first occurrence in the memory pointed to by MEM of the
1023 : contents of memory pointed to by PAT. Returns the index into MEM if
1024 : found, or -1 if not found. If len of PAT is greater than length of
1025 : MEM, the function returns -1.
1026 : */
1027 : static Py_ssize_t
1028 0 : mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1029 : {
1030 : register Py_ssize_t ii;
1031 :
1032 : /* pattern can not occur in the last pat_len-1 chars */
1033 0 : len -= pat_len;
1034 :
1035 0 : for (ii = 0; ii <= len; ii++) {
1036 0 : if (mem[ii] == pat[0] &&
1037 0 : (pat_len == 1 ||
1038 0 : memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1039 0 : return ii;
1040 : }
1041 : }
1042 0 : return -1;
1043 : }
1044 :
1045 : /*
1046 : mymemcnt
1047 :
1048 : Return the number of distinct times PAT is found in MEM.
1049 : meaning mem=1111 and pat==11 returns 2.
1050 : mem=11111 and pat==11 also return 2.
1051 : */
1052 : static Py_ssize_t
1053 0 : mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1054 : {
1055 0 : register Py_ssize_t offset = 0;
1056 0 : Py_ssize_t nfound = 0;
1057 :
1058 0 : while (len >= 0) {
1059 0 : offset = mymemfind(mem, len, pat, pat_len);
1060 0 : if (offset == -1)
1061 0 : break;
1062 0 : mem += offset + pat_len;
1063 0 : len -= offset + pat_len;
1064 0 : nfound++;
1065 : }
1066 0 : return nfound;
1067 : }
1068 :
1069 : /*
1070 : mymemreplace
1071 :
1072 : Return a string in which all occurrences of PAT in memory STR are
1073 : replaced with SUB.
1074 :
1075 : If length of PAT is less than length of STR or there are no occurrences
1076 : of PAT in STR, then the original string is returned. Otherwise, a new
1077 : string is allocated here and returned.
1078 :
1079 : on return, out_len is:
1080 : the length of output string, or
1081 : -1 if the input string is returned, or
1082 : unchanged if an error occurs (no memory).
1083 :
1084 : return value is:
1085 : the new string allocated locally, or
1086 : NULL if an error occurred.
1087 : */
1088 : static char *
1089 0 : mymemreplace(const char *str, Py_ssize_t len, /* input string */
1090 : const char *pat, Py_ssize_t pat_len, /* pattern string to find */
1091 : const char *sub, Py_ssize_t sub_len, /* substitution string */
1092 : Py_ssize_t count, /* number of replacements */
1093 : Py_ssize_t *out_len)
1094 : {
1095 : char *out_s;
1096 : char *new_s;
1097 : Py_ssize_t nfound, offset, new_len, delta_len, abs_delta;
1098 :
1099 0 : if (len == 0 || pat_len > len)
1100 : goto return_same;
1101 :
1102 : /* find length of output string */
1103 0 : nfound = mymemcnt(str, len, pat, pat_len);
1104 0 : if (count < 0)
1105 0 : count = PY_SSIZE_T_MAX;
1106 0 : else if (nfound > count)
1107 0 : nfound = count;
1108 0 : if (nfound == 0)
1109 0 : goto return_same;
1110 :
1111 0 : delta_len = sub_len - pat_len;
1112 0 : abs_delta = (delta_len < 0) ? -delta_len : delta_len;
1113 0 : if (PY_SSIZE_T_MAX/nfound < abs_delta)
1114 0 : return NULL;
1115 0 : delta_len *= nfound;
1116 0 : if (PY_SSIZE_T_MAX - len < delta_len)
1117 0 : return NULL;
1118 0 : new_len = len + delta_len;
1119 0 : if (new_len == 0) {
1120 : /* Have to allocate something for the caller to free(). */
1121 0 : out_s = (char *)PyMem_MALLOC(1);
1122 0 : if (out_s == NULL)
1123 0 : return NULL;
1124 0 : out_s[0] = '\0';
1125 : }
1126 : else {
1127 : assert(new_len > 0);
1128 0 : new_s = (char *)PyMem_MALLOC(new_len);
1129 0 : if (new_s == NULL)
1130 0 : return NULL;
1131 0 : out_s = new_s;
1132 :
1133 0 : for (; count > 0 && len > 0; --count) {
1134 : /* find index of next instance of pattern */
1135 0 : offset = mymemfind(str, len, pat, pat_len);
1136 0 : if (offset == -1)
1137 0 : break;
1138 :
1139 : /* copy non matching part of input string */
1140 0 : memcpy(new_s, str, offset);
1141 0 : str += offset + pat_len;
1142 0 : len -= offset + pat_len;
1143 :
1144 : /* copy substitute into the output string */
1145 0 : new_s += offset;
1146 0 : memcpy(new_s, sub, sub_len);
1147 0 : new_s += sub_len;
1148 : }
1149 : /* copy any remaining values into output string */
1150 0 : if (len > 0)
1151 0 : memcpy(new_s, str, len);
1152 : }
1153 0 : *out_len = new_len;
1154 0 : return out_s;
1155 :
1156 : return_same:
1157 0 : *out_len = -1;
1158 0 : return (char *)str; /* cast away const */
1159 : }
1160 :
1161 :
1162 : PyDoc_STRVAR(replace__doc__,
1163 : "replace (str, old, new[, maxsplit]) -> string\n"
1164 : "\n"
1165 : "Return a copy of string str with all occurrences of substring\n"
1166 : "old replaced by new. If the optional argument maxsplit is\n"
1167 : "given, only the first maxsplit occurrences are replaced.");
1168 :
1169 : static PyObject *
1170 0 : strop_replace(PyObject *self, PyObject *args)
1171 : {
1172 : char *str, *pat,*sub,*new_s;
1173 : Py_ssize_t len,pat_len,sub_len,out_len;
1174 0 : Py_ssize_t count = -1;
1175 : PyObject *newstr;
1176 :
1177 0 : WARN;
1178 0 : if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1179 : &str, &len, &pat, &pat_len, &sub, &sub_len,
1180 : &count))
1181 0 : return NULL;
1182 0 : if (pat_len <= 0) {
1183 0 : PyErr_SetString(PyExc_ValueError, "empty pattern string");
1184 0 : return NULL;
1185 : }
1186 : /* CAUTION: strop treats a replace count of 0 as infinity, unlke
1187 : * current (2.1) string.py and string methods. Preserve this for
1188 : * ... well, hard to say for what <wink>.
1189 : */
1190 0 : if (count == 0)
1191 0 : count = -1;
1192 0 : new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1193 0 : if (new_s == NULL) {
1194 0 : PyErr_NoMemory();
1195 0 : return NULL;
1196 : }
1197 0 : if (out_len == -1) {
1198 : /* we're returning another reference to the input string */
1199 0 : newstr = PyTuple_GetItem(args, 0);
1200 0 : Py_XINCREF(newstr);
1201 : }
1202 : else {
1203 0 : newstr = PyString_FromStringAndSize(new_s, out_len);
1204 0 : PyMem_FREE(new_s);
1205 : }
1206 0 : return newstr;
1207 : }
1208 :
1209 :
1210 : /* List of functions defined in the module */
1211 :
1212 : static PyMethodDef
1213 : strop_methods[] = {
1214 : {"atof", strop_atof, METH_VARARGS, atof__doc__},
1215 : {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1216 : {"atol", strop_atol, METH_VARARGS, atol__doc__},
1217 : {"capitalize", strop_capitalize, METH_O, capitalize__doc__},
1218 : {"count", strop_count, METH_VARARGS, count__doc__},
1219 : {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1220 : {"find", strop_find, METH_VARARGS, find__doc__},
1221 : {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1222 : {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1223 : {"lstrip", strop_lstrip, METH_O, lstrip__doc__},
1224 : {"lower", strop_lower, METH_O, lower__doc__},
1225 : {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1226 : {"replace", strop_replace, METH_VARARGS, replace__doc__},
1227 : {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1228 : {"rstrip", strop_rstrip, METH_O, rstrip__doc__},
1229 : {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1230 : {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1231 : {"strip", strop_strip, METH_O, strip__doc__},
1232 : {"swapcase", strop_swapcase, METH_O, swapcase__doc__},
1233 : {"translate", strop_translate, METH_VARARGS, translate__doc__},
1234 : {"upper", strop_upper, METH_O, upper__doc__},
1235 : {NULL, NULL} /* sentinel */
1236 : };
1237 :
1238 :
1239 : PyMODINIT_FUNC
1240 3 : initstrop(void)
1241 : {
1242 : PyObject *m, *s;
1243 : char buf[256];
1244 : int c, n;
1245 3 : m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1246 : (PyObject*)NULL, PYTHON_API_VERSION);
1247 3 : if (m == NULL)
1248 3 : return;
1249 :
1250 : /* Create 'whitespace' object */
1251 3 : n = 0;
1252 771 : for (c = 0; c < 256; c++) {
1253 768 : if (isspace(c))
1254 18 : buf[n++] = c;
1255 : }
1256 3 : s = PyString_FromStringAndSize(buf, n);
1257 3 : if (s)
1258 3 : PyModule_AddObject(m, "whitespace", s);
1259 :
1260 : /* Create 'lowercase' object */
1261 3 : n = 0;
1262 771 : for (c = 0; c < 256; c++) {
1263 768 : if (islower(c))
1264 78 : buf[n++] = c;
1265 : }
1266 3 : s = PyString_FromStringAndSize(buf, n);
1267 3 : if (s)
1268 3 : PyModule_AddObject(m, "lowercase", s);
1269 :
1270 : /* Create 'uppercase' object */
1271 3 : n = 0;
1272 771 : for (c = 0; c < 256; c++) {
1273 768 : if (isupper(c))
1274 78 : buf[n++] = c;
1275 : }
1276 3 : s = PyString_FromStringAndSize(buf, n);
1277 3 : if (s)
1278 3 : PyModule_AddObject(m, "uppercase", s);
1279 : }
|