Line data Source code
1 : /*
2 : An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 :
4 : Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 :
6 : Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 : */
8 :
9 : #define PY_SSIZE_T_CLEAN
10 : #include "Python.h"
11 : #include "structmember.h"
12 : #include "_iomodule.h"
13 :
14 : /* TextIOBase */
15 :
16 : PyDoc_STRVAR(textiobase_doc,
17 : "Base class for text I/O.\n"
18 : "\n"
19 : "This class provides a character and line based interface to stream\n"
20 : "I/O. There is no readinto method because Python's character strings\n"
21 : "are immutable. There is no public constructor.\n"
22 : );
23 :
24 : static PyObject *
25 0 : _unsupported(const char *message)
26 : {
27 0 : PyErr_SetString(_PyIO_unsupported_operation, message);
28 0 : return NULL;
29 : }
30 :
31 : PyDoc_STRVAR(textiobase_detach_doc,
32 : "Separate the underlying buffer from the TextIOBase and return it.\n"
33 : "\n"
34 : "After the underlying buffer has been detached, the TextIO is in an\n"
35 : "unusable state.\n"
36 : );
37 :
38 : static PyObject *
39 0 : textiobase_detach(PyObject *self)
40 : {
41 0 : return _unsupported("detach");
42 : }
43 :
44 : PyDoc_STRVAR(textiobase_read_doc,
45 : "Read at most n characters from stream.\n"
46 : "\n"
47 : "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 : "If n is negative or omitted, read until EOF.\n"
49 : );
50 :
51 : static PyObject *
52 0 : textiobase_read(PyObject *self, PyObject *args)
53 : {
54 0 : return _unsupported("read");
55 : }
56 :
57 : PyDoc_STRVAR(textiobase_readline_doc,
58 : "Read until newline or EOF.\n"
59 : "\n"
60 : "Returns an empty string if EOF is hit immediately.\n"
61 : );
62 :
63 : static PyObject *
64 0 : textiobase_readline(PyObject *self, PyObject *args)
65 : {
66 0 : return _unsupported("readline");
67 : }
68 :
69 : PyDoc_STRVAR(textiobase_write_doc,
70 : "Write string to stream.\n"
71 : "Returns the number of characters written (which is always equal to\n"
72 : "the length of the string).\n"
73 : );
74 :
75 : static PyObject *
76 0 : textiobase_write(PyObject *self, PyObject *args)
77 : {
78 0 : return _unsupported("write");
79 : }
80 :
81 : PyDoc_STRVAR(textiobase_encoding_doc,
82 : "Encoding of the text stream.\n"
83 : "\n"
84 : "Subclasses should override.\n"
85 : );
86 :
87 : static PyObject *
88 0 : textiobase_encoding_get(PyObject *self, void *context)
89 : {
90 0 : Py_RETURN_NONE;
91 : }
92 :
93 : PyDoc_STRVAR(textiobase_newlines_doc,
94 : "Line endings translated so far.\n"
95 : "\n"
96 : "Only line endings translated during reading are considered.\n"
97 : "\n"
98 : "Subclasses should override.\n"
99 : );
100 :
101 : static PyObject *
102 0 : textiobase_newlines_get(PyObject *self, void *context)
103 : {
104 0 : Py_RETURN_NONE;
105 : }
106 :
107 : PyDoc_STRVAR(textiobase_errors_doc,
108 : "The error setting of the decoder or encoder.\n"
109 : "\n"
110 : "Subclasses should override.\n"
111 : );
112 :
113 : static PyObject *
114 0 : textiobase_errors_get(PyObject *self, void *context)
115 : {
116 0 : Py_RETURN_NONE;
117 : }
118 :
119 :
120 : static PyMethodDef textiobase_methods[] = {
121 : {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 : {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 : {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 : {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 : {NULL, NULL}
126 : };
127 :
128 : static PyGetSetDef textiobase_getset[] = {
129 : {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 : {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 : {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 : {NULL}
133 : };
134 :
135 : PyTypeObject PyTextIOBase_Type = {
136 : PyVarObject_HEAD_INIT(NULL, 0)
137 : "_io._TextIOBase", /*tp_name*/
138 : 0, /*tp_basicsize*/
139 : 0, /*tp_itemsize*/
140 : 0, /*tp_dealloc*/
141 : 0, /*tp_print*/
142 : 0, /*tp_getattr*/
143 : 0, /*tp_setattr*/
144 : 0, /*tp_compare */
145 : 0, /*tp_repr*/
146 : 0, /*tp_as_number*/
147 : 0, /*tp_as_sequence*/
148 : 0, /*tp_as_mapping*/
149 : 0, /*tp_hash */
150 : 0, /*tp_call*/
151 : 0, /*tp_str*/
152 : 0, /*tp_getattro*/
153 : 0, /*tp_setattro*/
154 : 0, /*tp_as_buffer*/
155 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 : textiobase_doc, /* tp_doc */
157 : 0, /* tp_traverse */
158 : 0, /* tp_clear */
159 : 0, /* tp_richcompare */
160 : 0, /* tp_weaklistoffset */
161 : 0, /* tp_iter */
162 : 0, /* tp_iternext */
163 : textiobase_methods, /* tp_methods */
164 : 0, /* tp_members */
165 : textiobase_getset, /* tp_getset */
166 : &PyIOBase_Type, /* tp_base */
167 : 0, /* tp_dict */
168 : 0, /* tp_descr_get */
169 : 0, /* tp_descr_set */
170 : 0, /* tp_dictoffset */
171 : 0, /* tp_init */
172 : 0, /* tp_alloc */
173 : 0, /* tp_new */
174 : };
175 :
176 :
177 : /* IncrementalNewlineDecoder */
178 :
179 : PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 : "Codec used when reading a file in universal newlines mode. It wraps\n"
181 : "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 : "records the types of newlines encountered. When used with\n"
183 : "translate=False, it ensures that the newline sequence is returned in\n"
184 : "one piece. When used with decoder=None, it expects unicode strings as\n"
185 : "decode input and translates newlines without first invoking an external\n"
186 : "decoder.\n"
187 : );
188 :
189 : typedef struct {
190 : PyObject_HEAD
191 : PyObject *decoder;
192 : PyObject *errors;
193 : signed int pendingcr: 1;
194 : signed int translate: 1;
195 : unsigned int seennl: 3;
196 : } nldecoder_object;
197 :
198 : static int
199 0 : incrementalnewlinedecoder_init(nldecoder_object *self,
200 : PyObject *args, PyObject *kwds)
201 : {
202 : PyObject *decoder;
203 : int translate;
204 0 : PyObject *errors = NULL;
205 0 : char *kwlist[] = {"decoder", "translate", "errors", NULL};
206 :
207 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 : kwlist, &decoder, &translate, &errors))
209 0 : return -1;
210 :
211 0 : self->decoder = decoder;
212 0 : Py_INCREF(decoder);
213 :
214 0 : if (errors == NULL) {
215 0 : self->errors = PyUnicode_FromString("strict");
216 0 : if (self->errors == NULL)
217 0 : return -1;
218 : }
219 : else {
220 0 : Py_INCREF(errors);
221 0 : self->errors = errors;
222 : }
223 :
224 0 : self->translate = translate;
225 0 : self->seennl = 0;
226 0 : self->pendingcr = 0;
227 :
228 0 : return 0;
229 : }
230 :
231 : static void
232 0 : incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233 : {
234 0 : Py_CLEAR(self->decoder);
235 0 : Py_CLEAR(self->errors);
236 0 : Py_TYPE(self)->tp_free((PyObject *)self);
237 0 : }
238 :
239 : static int
240 0 : check_decoded(PyObject *decoded)
241 : {
242 0 : if (decoded == NULL)
243 0 : return -1;
244 0 : if (!PyUnicode_Check(decoded)) {
245 0 : PyErr_Format(PyExc_TypeError,
246 : "decoder should return a string result, not '%.200s'",
247 0 : Py_TYPE(decoded)->tp_name);
248 0 : Py_DECREF(decoded);
249 0 : return -1;
250 : }
251 0 : return 0;
252 : }
253 :
254 : #define SEEN_CR 1
255 : #define SEEN_LF 2
256 : #define SEEN_CRLF 4
257 : #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258 :
259 : PyObject *
260 0 : _PyIncrementalNewlineDecoder_decode(PyObject *_self,
261 : PyObject *input, int final)
262 : {
263 : PyObject *output;
264 : Py_ssize_t output_len;
265 0 : nldecoder_object *self = (nldecoder_object *) _self;
266 :
267 0 : if (self->decoder == NULL) {
268 0 : PyErr_SetString(PyExc_ValueError,
269 : "IncrementalNewlineDecoder.__init__ not called");
270 0 : return NULL;
271 : }
272 :
273 : /* decode input (with the eventual \r from a previous pass) */
274 0 : if (self->decoder != Py_None) {
275 0 : output = PyObject_CallMethodObjArgs(self->decoder,
276 : _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277 : }
278 : else {
279 0 : output = input;
280 0 : Py_INCREF(output);
281 : }
282 :
283 0 : if (check_decoded(output) < 0)
284 0 : return NULL;
285 :
286 0 : output_len = PyUnicode_GET_SIZE(output);
287 0 : if (self->pendingcr && (final || output_len > 0)) {
288 : Py_UNICODE *out;
289 0 : PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290 0 : if (modified == NULL)
291 0 : goto error;
292 0 : out = PyUnicode_AS_UNICODE(modified);
293 0 : out[0] = '\r';
294 0 : memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295 : output_len * sizeof(Py_UNICODE));
296 0 : Py_DECREF(output);
297 0 : output = modified;
298 0 : self->pendingcr = 0;
299 0 : output_len++;
300 : }
301 :
302 : /* retain last \r even when not translating data:
303 : * then readline() is sure to get \r\n in one pass
304 : */
305 0 : if (!final) {
306 0 : if (output_len > 0
307 0 : && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308 :
309 0 : if (Py_REFCNT(output) == 1) {
310 0 : if (PyUnicode_Resize(&output, output_len - 1) < 0)
311 0 : goto error;
312 : }
313 : else {
314 0 : PyObject *modified = PyUnicode_FromUnicode(
315 0 : PyUnicode_AS_UNICODE(output),
316 : output_len - 1);
317 0 : if (modified == NULL)
318 0 : goto error;
319 0 : Py_DECREF(output);
320 0 : output = modified;
321 : }
322 0 : self->pendingcr = 1;
323 : }
324 : }
325 :
326 : /* Record which newlines are read and do newline translation if desired,
327 : all in one pass. */
328 : {
329 : Py_UNICODE *in_str;
330 : Py_ssize_t len;
331 0 : int seennl = self->seennl;
332 0 : int only_lf = 0;
333 :
334 0 : in_str = PyUnicode_AS_UNICODE(output);
335 0 : len = PyUnicode_GET_SIZE(output);
336 :
337 0 : if (len == 0)
338 0 : return output;
339 :
340 : /* If, up to now, newlines are consistently \n, do a quick check
341 : for the \r *byte* with the libc's optimized memchr.
342 : */
343 0 : if (seennl == SEEN_LF || seennl == 0) {
344 0 : only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
345 : }
346 :
347 0 : if (only_lf) {
348 : /* If not already seen, quick scan for a possible "\n" character.
349 : (there's nothing else to be done, even when in translation mode)
350 : */
351 0 : if (seennl == 0 &&
352 0 : memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353 : Py_UNICODE *s, *end;
354 0 : s = in_str;
355 0 : end = in_str + len;
356 : for (;;) {
357 : Py_UNICODE c;
358 : /* Fast loop for non-control characters */
359 0 : while (*s > '\n')
360 0 : s++;
361 0 : c = *s++;
362 0 : if (c == '\n') {
363 0 : seennl |= SEEN_LF;
364 0 : break;
365 : }
366 0 : if (s > end)
367 0 : break;
368 0 : }
369 : }
370 : /* Finished: we have scanned for newlines, and none of them
371 : need translating */
372 : }
373 0 : else if (!self->translate) {
374 : Py_UNICODE *s, *end;
375 : /* We have already seen all newline types, no need to scan again */
376 0 : if (seennl == SEEN_ALL)
377 0 : goto endscan;
378 0 : s = in_str;
379 0 : end = in_str + len;
380 : for (;;) {
381 : Py_UNICODE c;
382 : /* Fast loop for non-control characters */
383 0 : while (*s > '\r')
384 0 : s++;
385 0 : c = *s++;
386 0 : if (c == '\n')
387 0 : seennl |= SEEN_LF;
388 0 : else if (c == '\r') {
389 0 : if (*s == '\n') {
390 0 : seennl |= SEEN_CRLF;
391 0 : s++;
392 : }
393 : else
394 0 : seennl |= SEEN_CR;
395 : }
396 0 : if (s > end)
397 0 : break;
398 0 : if (seennl == SEEN_ALL)
399 0 : break;
400 0 : }
401 : endscan:
402 : ;
403 : }
404 : else {
405 0 : PyObject *translated = NULL;
406 : Py_UNICODE *out_str;
407 : Py_UNICODE *in, *out, *end;
408 0 : if (Py_REFCNT(output) != 1) {
409 : /* We could try to optimize this so that we only do a copy
410 : when there is something to translate. On the other hand,
411 : most decoders should only output non-shared strings, i.e.
412 : translation is done in place. */
413 0 : translated = PyUnicode_FromUnicode(NULL, len);
414 0 : if (translated == NULL)
415 0 : goto error;
416 : assert(Py_REFCNT(translated) == 1);
417 0 : memcpy(PyUnicode_AS_UNICODE(translated),
418 0 : PyUnicode_AS_UNICODE(output),
419 : len * sizeof(Py_UNICODE));
420 : }
421 : else {
422 0 : translated = output;
423 : }
424 0 : out_str = PyUnicode_AS_UNICODE(translated);
425 0 : in = in_str;
426 0 : out = out_str;
427 0 : end = in_str + len;
428 : for (;;) {
429 : Py_UNICODE c;
430 : /* Fast loop for non-control characters */
431 0 : while ((c = *in++) > '\r')
432 0 : *out++ = c;
433 0 : if (c == '\n') {
434 0 : *out++ = c;
435 0 : seennl |= SEEN_LF;
436 0 : continue;
437 : }
438 0 : if (c == '\r') {
439 0 : if (*in == '\n') {
440 0 : in++;
441 0 : seennl |= SEEN_CRLF;
442 : }
443 : else
444 0 : seennl |= SEEN_CR;
445 0 : *out++ = '\n';
446 0 : continue;
447 : }
448 0 : if (in > end)
449 0 : break;
450 0 : *out++ = c;
451 0 : }
452 0 : if (translated != output) {
453 0 : Py_DECREF(output);
454 0 : output = translated;
455 : }
456 0 : if (out - out_str != len) {
457 0 : if (PyUnicode_Resize(&output, out - out_str) < 0)
458 0 : goto error;
459 : }
460 : }
461 0 : self->seennl |= seennl;
462 : }
463 :
464 0 : return output;
465 :
466 : error:
467 0 : Py_DECREF(output);
468 0 : return NULL;
469 : }
470 :
471 : static PyObject *
472 0 : incrementalnewlinedecoder_decode(nldecoder_object *self,
473 : PyObject *args, PyObject *kwds)
474 : {
475 0 : char *kwlist[] = {"input", "final", NULL};
476 : PyObject *input;
477 0 : int final = 0;
478 :
479 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 : kwlist, &input, &final))
481 0 : return NULL;
482 0 : return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483 : }
484 :
485 : static PyObject *
486 0 : incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
487 : {
488 : PyObject *buffer;
489 : unsigned PY_LONG_LONG flag;
490 :
491 0 : if (self->decoder != Py_None) {
492 0 : PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 : _PyIO_str_getstate, NULL);
494 0 : if (state == NULL)
495 0 : return NULL;
496 0 : if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 0 : Py_DECREF(state);
498 0 : return NULL;
499 : }
500 0 : Py_INCREF(buffer);
501 0 : Py_DECREF(state);
502 : }
503 : else {
504 0 : buffer = PyBytes_FromString("");
505 0 : flag = 0;
506 : }
507 0 : flag <<= 1;
508 0 : if (self->pendingcr)
509 0 : flag |= 1;
510 0 : return Py_BuildValue("NK", buffer, flag);
511 : }
512 :
513 : static PyObject *
514 0 : incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
515 : {
516 : PyObject *buffer;
517 : unsigned PY_LONG_LONG flag;
518 :
519 0 : if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 0 : return NULL;
521 :
522 0 : self->pendingcr = (int) flag & 1;
523 0 : flag >>= 1;
524 :
525 0 : if (self->decoder != Py_None)
526 0 : return PyObject_CallMethod(self->decoder,
527 : "setstate", "((OK))", buffer, flag);
528 : else
529 0 : Py_RETURN_NONE;
530 : }
531 :
532 : static PyObject *
533 0 : incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
534 : {
535 0 : self->seennl = 0;
536 0 : self->pendingcr = 0;
537 0 : if (self->decoder != Py_None)
538 0 : return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 : else
540 0 : Py_RETURN_NONE;
541 : }
542 :
543 : static PyObject *
544 0 : incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
545 : {
546 0 : switch (self->seennl) {
547 : case SEEN_CR:
548 0 : return PyUnicode_FromString("\r");
549 : case SEEN_LF:
550 0 : return PyUnicode_FromString("\n");
551 : case SEEN_CRLF:
552 0 : return PyUnicode_FromString("\r\n");
553 : case SEEN_CR | SEEN_LF:
554 0 : return Py_BuildValue("ss", "\r", "\n");
555 : case SEEN_CR | SEEN_CRLF:
556 0 : return Py_BuildValue("ss", "\r", "\r\n");
557 : case SEEN_LF | SEEN_CRLF:
558 0 : return Py_BuildValue("ss", "\n", "\r\n");
559 : case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 0 : return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 : default:
562 0 : Py_RETURN_NONE;
563 : }
564 :
565 : }
566 :
567 :
568 : static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 : {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 : {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 : {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 : {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
573 : {NULL}
574 : };
575 :
576 : static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 : {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
578 : {NULL}
579 : };
580 :
581 : PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 : PyVarObject_HEAD_INIT(NULL, 0)
583 : "_io.IncrementalNewlineDecoder", /*tp_name*/
584 : sizeof(nldecoder_object), /*tp_basicsize*/
585 : 0, /*tp_itemsize*/
586 : (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
587 : 0, /*tp_print*/
588 : 0, /*tp_getattr*/
589 : 0, /*tp_setattr*/
590 : 0, /*tp_compare */
591 : 0, /*tp_repr*/
592 : 0, /*tp_as_number*/
593 : 0, /*tp_as_sequence*/
594 : 0, /*tp_as_mapping*/
595 : 0, /*tp_hash */
596 : 0, /*tp_call*/
597 : 0, /*tp_str*/
598 : 0, /*tp_getattro*/
599 : 0, /*tp_setattro*/
600 : 0, /*tp_as_buffer*/
601 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
602 : incrementalnewlinedecoder_doc, /* tp_doc */
603 : 0, /* tp_traverse */
604 : 0, /* tp_clear */
605 : 0, /* tp_richcompare */
606 : 0, /*tp_weaklistoffset*/
607 : 0, /* tp_iter */
608 : 0, /* tp_iternext */
609 : incrementalnewlinedecoder_methods, /* tp_methods */
610 : 0, /* tp_members */
611 : incrementalnewlinedecoder_getset, /* tp_getset */
612 : 0, /* tp_base */
613 : 0, /* tp_dict */
614 : 0, /* tp_descr_get */
615 : 0, /* tp_descr_set */
616 : 0, /* tp_dictoffset */
617 : (initproc)incrementalnewlinedecoder_init, /* tp_init */
618 : 0, /* tp_alloc */
619 : PyType_GenericNew, /* tp_new */
620 : };
621 :
622 :
623 : /* TextIOWrapper */
624 :
625 : PyDoc_STRVAR(textiowrapper_doc,
626 : "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 : "\n"
628 : "encoding gives the name of the encoding that the stream will be\n"
629 : "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 : "\n"
631 : "errors determines the strictness of encoding and decoding (see the\n"
632 : "codecs.register) and defaults to \"strict\".\n"
633 : "\n"
634 : "newline controls how line endings are handled. It can be None, '',\n"
635 : "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
636 : "\n"
637 : "* On input, if newline is None, universal newlines mode is\n"
638 : " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 : " these are translated into '\\n' before being returned to the\n"
640 : " caller. If it is '', universal newline mode is enabled, but line\n"
641 : " endings are returned to the caller untranslated. If it has any of\n"
642 : " the other legal values, input lines are only terminated by the given\n"
643 : " string, and the line ending is returned to the caller untranslated.\n"
644 : "\n"
645 : "* On output, if newline is None, any '\\n' characters written are\n"
646 : " translated to the system default line separator, os.linesep. If\n"
647 : " newline is '', no translation takes place. If newline is any of the\n"
648 : " other legal values, any '\\n' characters written are translated to\n"
649 : " the given string.\n"
650 : "\n"
651 : "If line_buffering is True, a call to flush is implied when a call to\n"
652 : "write contains a newline character."
653 : );
654 :
655 : typedef PyObject *
656 : (*encodefunc_t)(PyObject *, PyObject *);
657 :
658 : typedef struct
659 : {
660 : PyObject_HEAD
661 : int ok; /* initialized? */
662 : int detached;
663 : Py_ssize_t chunk_size;
664 : PyObject *buffer;
665 : PyObject *encoding;
666 : PyObject *encoder;
667 : PyObject *decoder;
668 : PyObject *readnl;
669 : PyObject *errors;
670 : const char *writenl; /* utf-8 encoded, NULL stands for \n */
671 : char line_buffering;
672 : char readuniversal;
673 : char readtranslate;
674 : char writetranslate;
675 : char seekable;
676 : char telling;
677 : /* Specialized encoding func (see below) */
678 : encodefunc_t encodefunc;
679 : /* Whether or not it's the start of the stream */
680 : char encoding_start_of_stream;
681 :
682 : /* Reads and writes are internally buffered in order to speed things up.
683 : However, any read will first flush the write buffer if itsn't empty.
684 :
685 : Please also note that text to be written is first encoded before being
686 : buffered. This is necessary so that encoding errors are immediately
687 : reported to the caller, but it unfortunately means that the
688 : IncrementalEncoder (whose encode() method is always written in Python)
689 : becomes a bottleneck for small writes.
690 : */
691 : PyObject *decoded_chars; /* buffer for text returned from decoder */
692 : Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 : PyObject *pending_bytes; /* list of bytes objects waiting to be
694 : written, or NULL */
695 : Py_ssize_t pending_bytes_count;
696 : PyObject *snapshot;
697 : /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 : * dec_flags is the second (integer) item of the decoder state and
699 : * next_input is the chunk of input bytes that comes next after the
700 : * snapshot point. We use this to reconstruct decoder states in tell().
701 : */
702 :
703 : /* Cache raw object if it's a FileIO object */
704 : PyObject *raw;
705 :
706 : PyObject *weakreflist;
707 : PyObject *dict;
708 : } textio;
709 :
710 :
711 : /* A couple of specialized cases in order to bypass the slow incremental
712 : encoding methods for the most popular encodings. */
713 :
714 : static PyObject *
715 0 : ascii_encode(textio *self, PyObject *text)
716 : {
717 0 : return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
718 : PyUnicode_GET_SIZE(text),
719 0 : PyBytes_AS_STRING(self->errors));
720 : }
721 :
722 : static PyObject *
723 0 : utf16be_encode(textio *self, PyObject *text)
724 : {
725 0 : return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
726 : PyUnicode_GET_SIZE(text),
727 0 : PyBytes_AS_STRING(self->errors), 1);
728 : }
729 :
730 : static PyObject *
731 0 : utf16le_encode(textio *self, PyObject *text)
732 : {
733 0 : return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 : PyUnicode_GET_SIZE(text),
735 0 : PyBytes_AS_STRING(self->errors), -1);
736 : }
737 :
738 : static PyObject *
739 0 : utf16_encode(textio *self, PyObject *text)
740 : {
741 0 : if (!self->encoding_start_of_stream) {
742 : /* Skip the BOM and use native byte ordering */
743 : #if defined(WORDS_BIGENDIAN)
744 : return utf16be_encode(self, text);
745 : #else
746 0 : return utf16le_encode(self, text);
747 : #endif
748 : }
749 0 : return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
750 : PyUnicode_GET_SIZE(text),
751 0 : PyBytes_AS_STRING(self->errors), 0);
752 : }
753 :
754 : static PyObject *
755 0 : utf32be_encode(textio *self, PyObject *text)
756 : {
757 0 : return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
758 : PyUnicode_GET_SIZE(text),
759 0 : PyBytes_AS_STRING(self->errors), 1);
760 : }
761 :
762 : static PyObject *
763 0 : utf32le_encode(textio *self, PyObject *text)
764 : {
765 0 : return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 : PyUnicode_GET_SIZE(text),
767 0 : PyBytes_AS_STRING(self->errors), -1);
768 : }
769 :
770 : static PyObject *
771 0 : utf32_encode(textio *self, PyObject *text)
772 : {
773 0 : if (!self->encoding_start_of_stream) {
774 : /* Skip the BOM and use native byte ordering */
775 : #if defined(WORDS_BIGENDIAN)
776 : return utf32be_encode(self, text);
777 : #else
778 0 : return utf32le_encode(self, text);
779 : #endif
780 : }
781 0 : return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
782 : PyUnicode_GET_SIZE(text),
783 0 : PyBytes_AS_STRING(self->errors), 0);
784 : }
785 :
786 : static PyObject *
787 0 : utf8_encode(textio *self, PyObject *text)
788 : {
789 0 : return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
790 : PyUnicode_GET_SIZE(text),
791 0 : PyBytes_AS_STRING(self->errors));
792 : }
793 :
794 : static PyObject *
795 0 : latin1_encode(textio *self, PyObject *text)
796 : {
797 0 : return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
798 : PyUnicode_GET_SIZE(text),
799 0 : PyBytes_AS_STRING(self->errors));
800 : }
801 :
802 : /* Map normalized encoding names onto the specialized encoding funcs */
803 :
804 : typedef struct {
805 : const char *name;
806 : encodefunc_t encodefunc;
807 : } encodefuncentry;
808 :
809 : static encodefuncentry encodefuncs[] = {
810 : {"ascii", (encodefunc_t) ascii_encode},
811 : {"iso8859-1", (encodefunc_t) latin1_encode},
812 : {"utf-8", (encodefunc_t) utf8_encode},
813 : {"utf-16-be", (encodefunc_t) utf16be_encode},
814 : {"utf-16-le", (encodefunc_t) utf16le_encode},
815 : {"utf-16", (encodefunc_t) utf16_encode},
816 : {"utf-32-be", (encodefunc_t) utf32be_encode},
817 : {"utf-32-le", (encodefunc_t) utf32le_encode},
818 : {"utf-32", (encodefunc_t) utf32_encode},
819 : {NULL, NULL}
820 : };
821 :
822 :
823 : static int
824 0 : textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
825 : {
826 0 : char *kwlist[] = {"buffer", "encoding", "errors",
827 : "newline", "line_buffering",
828 : NULL};
829 0 : PyObject *buffer, *raw, *codec_info = NULL;
830 0 : char *encoding = NULL;
831 0 : char *errors = NULL;
832 0 : char *newline = NULL;
833 0 : int line_buffering = 0;
834 :
835 : PyObject *res;
836 : int r;
837 :
838 0 : self->ok = 0;
839 0 : self->detached = 0;
840 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
841 : kwlist, &buffer, &encoding, &errors,
842 : &newline, &line_buffering))
843 0 : return -1;
844 :
845 0 : if (newline && newline[0] != '\0'
846 0 : && !(newline[0] == '\n' && newline[1] == '\0')
847 0 : && !(newline[0] == '\r' && newline[1] == '\0')
848 0 : && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 0 : PyErr_Format(PyExc_ValueError,
850 : "illegal newline value: %s", newline);
851 0 : return -1;
852 : }
853 :
854 0 : Py_CLEAR(self->buffer);
855 0 : Py_CLEAR(self->encoding);
856 0 : Py_CLEAR(self->encoder);
857 0 : Py_CLEAR(self->decoder);
858 0 : Py_CLEAR(self->readnl);
859 0 : Py_CLEAR(self->decoded_chars);
860 0 : Py_CLEAR(self->pending_bytes);
861 0 : Py_CLEAR(self->snapshot);
862 0 : Py_CLEAR(self->errors);
863 0 : Py_CLEAR(self->raw);
864 0 : self->decoded_chars_used = 0;
865 0 : self->pending_bytes_count = 0;
866 0 : self->encodefunc = NULL;
867 0 : self->writenl = NULL;
868 :
869 0 : if (encoding == NULL && self->encoding == NULL) {
870 0 : if (_PyIO_locale_module == NULL) {
871 0 : _PyIO_locale_module = PyImport_ImportModule("locale");
872 0 : if (_PyIO_locale_module == NULL)
873 0 : goto catch_ImportError;
874 : else
875 0 : goto use_locale;
876 : }
877 : else {
878 : use_locale:
879 0 : self->encoding = PyObject_CallMethod(
880 : _PyIO_locale_module, "getpreferredencoding", NULL);
881 0 : if (self->encoding == NULL) {
882 : catch_ImportError:
883 : /*
884 : Importing locale can raise an ImportError because of
885 : _functools, and locale.getpreferredencoding can raise an
886 : ImportError if _locale is not available. These will happen
887 : during module building.
888 : */
889 0 : if (PyErr_ExceptionMatches(PyExc_ImportError)) {
890 0 : PyErr_Clear();
891 0 : self->encoding = PyString_FromString("ascii");
892 : }
893 : else
894 0 : goto error;
895 : }
896 0 : else if (!PyString_Check(self->encoding))
897 0 : Py_CLEAR(self->encoding);
898 : }
899 : }
900 0 : if (self->encoding != NULL)
901 0 : encoding = PyString_AsString(self->encoding);
902 0 : else if (encoding != NULL) {
903 0 : self->encoding = PyString_FromString(encoding);
904 0 : if (self->encoding == NULL)
905 0 : goto error;
906 : }
907 : else {
908 0 : PyErr_SetString(PyExc_IOError,
909 : "could not determine default encoding");
910 : }
911 :
912 : /* Check we have been asked for a real text encoding */
913 0 : codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
914 0 : if (codec_info == NULL) {
915 0 : Py_CLEAR(self->encoding);
916 0 : goto error;
917 : }
918 :
919 : /* XXX: Failures beyond this point have the potential to leak elements
920 : * of the partially constructed object (like self->encoding)
921 : */
922 :
923 0 : if (errors == NULL)
924 0 : errors = "strict";
925 0 : self->errors = PyBytes_FromString(errors);
926 0 : if (self->errors == NULL)
927 0 : goto error;
928 :
929 0 : self->chunk_size = 8192;
930 0 : self->readuniversal = (newline == NULL || newline[0] == '\0');
931 0 : self->line_buffering = line_buffering;
932 0 : self->readtranslate = (newline == NULL);
933 0 : if (newline) {
934 0 : self->readnl = PyString_FromString(newline);
935 0 : if (self->readnl == NULL)
936 0 : goto error;
937 : }
938 0 : self->writetranslate = (newline == NULL || newline[0] != '\0');
939 0 : if (!self->readuniversal && self->writetranslate) {
940 0 : self->writenl = PyString_AsString(self->readnl);
941 0 : if (!strcmp(self->writenl, "\n"))
942 0 : self->writenl = NULL;
943 : }
944 : #ifdef MS_WINDOWS
945 : else
946 : self->writenl = "\r\n";
947 : #endif
948 :
949 : /* Build the decoder object */
950 0 : res = PyObject_CallMethod(buffer, "readable", NULL);
951 0 : if (res == NULL)
952 0 : goto error;
953 0 : r = PyObject_IsTrue(res);
954 0 : Py_DECREF(res);
955 0 : if (r == -1)
956 0 : goto error;
957 0 : if (r == 1) {
958 0 : self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
959 : errors);
960 0 : if (self->decoder == NULL)
961 0 : goto error;
962 :
963 0 : if (self->readuniversal) {
964 0 : PyObject *incrementalDecoder = PyObject_CallFunction(
965 : (PyObject *)&PyIncrementalNewlineDecoder_Type,
966 0 : "Oi", self->decoder, (int)self->readtranslate);
967 0 : if (incrementalDecoder == NULL)
968 0 : goto error;
969 0 : Py_XSETREF(self->decoder, incrementalDecoder);
970 : }
971 : }
972 :
973 : /* Build the encoder object */
974 0 : res = PyObject_CallMethod(buffer, "writable", NULL);
975 0 : if (res == NULL)
976 0 : goto error;
977 0 : r = PyObject_IsTrue(res);
978 0 : Py_DECREF(res);
979 0 : if (r == -1)
980 0 : goto error;
981 0 : if (r == 1) {
982 0 : self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
983 : errors);
984 0 : if (self->encoder == NULL)
985 0 : goto error;
986 : /* Get the normalized named of the codec */
987 0 : res = PyObject_GetAttrString(codec_info, "name");
988 0 : if (res == NULL) {
989 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
990 0 : PyErr_Clear();
991 : else
992 0 : goto error;
993 : }
994 0 : else if (PyString_Check(res)) {
995 0 : encodefuncentry *e = encodefuncs;
996 0 : while (e->name != NULL) {
997 0 : if (!strcmp(PyString_AS_STRING(res), e->name)) {
998 0 : self->encodefunc = e->encodefunc;
999 0 : break;
1000 : }
1001 0 : e++;
1002 : }
1003 : }
1004 0 : Py_XDECREF(res);
1005 : }
1006 :
1007 : /* Finished sorting out the codec details */
1008 0 : Py_DECREF(codec_info);
1009 :
1010 0 : self->buffer = buffer;
1011 0 : Py_INCREF(buffer);
1012 :
1013 0 : if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1014 0 : Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1015 0 : Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1016 0 : raw = PyObject_GetAttrString(buffer, "raw");
1017 : /* Cache the raw FileIO object to speed up 'closed' checks */
1018 0 : if (raw == NULL) {
1019 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 0 : PyErr_Clear();
1021 : else
1022 0 : goto error;
1023 : }
1024 0 : else if (Py_TYPE(raw) == &PyFileIO_Type)
1025 0 : self->raw = raw;
1026 : else
1027 0 : Py_DECREF(raw);
1028 : }
1029 :
1030 0 : res = PyObject_CallMethod(buffer, "seekable", NULL);
1031 0 : if (res == NULL)
1032 0 : goto error;
1033 0 : r = PyObject_IsTrue(res);
1034 0 : Py_DECREF(res);
1035 0 : if (r < 0)
1036 0 : goto error;
1037 0 : self->seekable = self->telling = r;
1038 :
1039 0 : self->encoding_start_of_stream = 0;
1040 0 : if (self->seekable && self->encoder) {
1041 : PyObject *cookieObj;
1042 : int cmp;
1043 :
1044 0 : self->encoding_start_of_stream = 1;
1045 :
1046 0 : cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1047 0 : if (cookieObj == NULL)
1048 0 : goto error;
1049 :
1050 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1051 0 : Py_DECREF(cookieObj);
1052 0 : if (cmp < 0) {
1053 0 : goto error;
1054 : }
1055 :
1056 0 : if (cmp == 0) {
1057 0 : self->encoding_start_of_stream = 0;
1058 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1059 : _PyIO_zero, NULL);
1060 0 : if (res == NULL)
1061 0 : goto error;
1062 0 : Py_DECREF(res);
1063 : }
1064 : }
1065 :
1066 0 : self->ok = 1;
1067 0 : return 0;
1068 :
1069 : error:
1070 0 : Py_XDECREF(codec_info);
1071 0 : return -1;
1072 : }
1073 :
1074 : static void
1075 0 : _textiowrapper_clear(textio *self)
1076 : {
1077 0 : self->ok = 0;
1078 0 : Py_CLEAR(self->buffer);
1079 0 : Py_CLEAR(self->encoding);
1080 0 : Py_CLEAR(self->encoder);
1081 0 : Py_CLEAR(self->decoder);
1082 0 : Py_CLEAR(self->readnl);
1083 0 : Py_CLEAR(self->decoded_chars);
1084 0 : Py_CLEAR(self->pending_bytes);
1085 0 : Py_CLEAR(self->snapshot);
1086 0 : Py_CLEAR(self->errors);
1087 0 : Py_CLEAR(self->raw);
1088 :
1089 0 : Py_CLEAR(self->dict);
1090 0 : }
1091 :
1092 : static void
1093 0 : textiowrapper_dealloc(textio *self)
1094 : {
1095 0 : if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1096 0 : return;
1097 0 : _PyObject_GC_UNTRACK(self);
1098 0 : if (self->weakreflist != NULL)
1099 0 : PyObject_ClearWeakRefs((PyObject *)self);
1100 0 : _textiowrapper_clear(self);
1101 0 : Py_TYPE(self)->tp_free((PyObject *)self);
1102 : }
1103 :
1104 : static int
1105 0 : textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1106 : {
1107 0 : Py_VISIT(self->buffer);
1108 0 : Py_VISIT(self->encoding);
1109 0 : Py_VISIT(self->encoder);
1110 0 : Py_VISIT(self->decoder);
1111 0 : Py_VISIT(self->readnl);
1112 0 : Py_VISIT(self->decoded_chars);
1113 0 : Py_VISIT(self->pending_bytes);
1114 0 : Py_VISIT(self->snapshot);
1115 0 : Py_VISIT(self->errors);
1116 0 : Py_VISIT(self->raw);
1117 :
1118 0 : Py_VISIT(self->dict);
1119 0 : return 0;
1120 : }
1121 :
1122 : static int
1123 0 : textiowrapper_clear(textio *self)
1124 : {
1125 0 : if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1126 0 : return -1;
1127 0 : _textiowrapper_clear(self);
1128 0 : return 0;
1129 : }
1130 :
1131 : static PyObject *
1132 : textiowrapper_closed_get(textio *self, void *context);
1133 :
1134 : /* This macro takes some shortcuts to make the common case faster. */
1135 : #define CHECK_CLOSED(self) \
1136 : do { \
1137 : int r; \
1138 : PyObject *_res; \
1139 : if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1140 : if (self->raw != NULL) \
1141 : r = _PyFileIO_closed(self->raw); \
1142 : else { \
1143 : _res = textiowrapper_closed_get(self, NULL); \
1144 : if (_res == NULL) \
1145 : return NULL; \
1146 : r = PyObject_IsTrue(_res); \
1147 : Py_DECREF(_res); \
1148 : if (r < 0) \
1149 : return NULL; \
1150 : } \
1151 : if (r > 0) { \
1152 : PyErr_SetString(PyExc_ValueError, \
1153 : "I/O operation on closed file."); \
1154 : return NULL; \
1155 : } \
1156 : } \
1157 : else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1158 : return NULL; \
1159 : } while (0)
1160 :
1161 : #define CHECK_INITIALIZED(self) \
1162 : if (self->ok <= 0) { \
1163 : PyErr_SetString(PyExc_ValueError, \
1164 : "I/O operation on uninitialized object"); \
1165 : return NULL; \
1166 : }
1167 :
1168 : #define CHECK_ATTACHED(self) \
1169 : CHECK_INITIALIZED(self); \
1170 : if (self->detached) { \
1171 : PyErr_SetString(PyExc_ValueError, \
1172 : "underlying buffer has been detached"); \
1173 : return NULL; \
1174 : }
1175 :
1176 : #define CHECK_ATTACHED_INT(self) \
1177 : if (self->ok <= 0) { \
1178 : PyErr_SetString(PyExc_ValueError, \
1179 : "I/O operation on uninitialized object"); \
1180 : return -1; \
1181 : } else if (self->detached) { \
1182 : PyErr_SetString(PyExc_ValueError, \
1183 : "underlying buffer has been detached"); \
1184 : return -1; \
1185 : }
1186 :
1187 :
1188 : static PyObject *
1189 0 : textiowrapper_detach(textio *self)
1190 : {
1191 : PyObject *buffer, *res;
1192 0 : CHECK_ATTACHED(self);
1193 0 : res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1194 0 : if (res == NULL)
1195 0 : return NULL;
1196 0 : Py_DECREF(res);
1197 0 : buffer = self->buffer;
1198 0 : self->buffer = NULL;
1199 0 : self->detached = 1;
1200 0 : return buffer;
1201 : }
1202 :
1203 : Py_LOCAL_INLINE(const Py_UNICODE *)
1204 0 : findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1205 : {
1206 : /* like wcschr, but doesn't stop at NULL characters */
1207 0 : while (size-- > 0) {
1208 0 : if (*s == ch)
1209 0 : return s;
1210 0 : s++;
1211 : }
1212 0 : return NULL;
1213 : }
1214 :
1215 : /* Flush the internal write buffer. This doesn't explicitly flush the
1216 : underlying buffered object, though. */
1217 : static int
1218 0 : _textiowrapper_writeflush(textio *self)
1219 : {
1220 : PyObject *pending, *b, *ret;
1221 :
1222 0 : if (self->pending_bytes == NULL)
1223 0 : return 0;
1224 :
1225 0 : pending = self->pending_bytes;
1226 0 : Py_INCREF(pending);
1227 0 : self->pending_bytes_count = 0;
1228 0 : Py_CLEAR(self->pending_bytes);
1229 :
1230 0 : b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1231 0 : Py_DECREF(pending);
1232 0 : if (b == NULL)
1233 0 : return -1;
1234 0 : ret = NULL;
1235 : do {
1236 0 : ret = PyObject_CallMethodObjArgs(self->buffer,
1237 : _PyIO_str_write, b, NULL);
1238 0 : } while (ret == NULL && _PyIO_trap_eintr());
1239 0 : Py_DECREF(b);
1240 0 : if (ret == NULL)
1241 0 : return -1;
1242 0 : Py_DECREF(ret);
1243 0 : return 0;
1244 : }
1245 :
1246 : static PyObject *
1247 0 : textiowrapper_write(textio *self, PyObject *args)
1248 : {
1249 : PyObject *ret;
1250 : PyObject *text; /* owned reference */
1251 : PyObject *b;
1252 : Py_ssize_t textlen;
1253 0 : int haslf = 0;
1254 0 : int needflush = 0;
1255 :
1256 0 : CHECK_ATTACHED(self);
1257 :
1258 0 : if (!PyArg_ParseTuple(args, "U:write", &text)) {
1259 0 : return NULL;
1260 : }
1261 :
1262 0 : CHECK_CLOSED(self);
1263 :
1264 0 : if (self->encoder == NULL) {
1265 0 : PyErr_SetString(PyExc_IOError, "not writable");
1266 0 : return NULL;
1267 : }
1268 :
1269 0 : Py_INCREF(text);
1270 :
1271 0 : textlen = PyUnicode_GetSize(text);
1272 :
1273 0 : if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1274 0 : if (findchar(PyUnicode_AS_UNICODE(text),
1275 0 : PyUnicode_GET_SIZE(text), '\n'))
1276 0 : haslf = 1;
1277 :
1278 0 : if (haslf && self->writetranslate && self->writenl != NULL) {
1279 0 : PyObject *newtext = PyObject_CallMethod(
1280 : text, "replace", "ss", "\n", self->writenl);
1281 0 : Py_DECREF(text);
1282 0 : if (newtext == NULL)
1283 0 : return NULL;
1284 0 : text = newtext;
1285 : }
1286 :
1287 0 : if (self->line_buffering &&
1288 0 : (haslf ||
1289 0 : findchar(PyUnicode_AS_UNICODE(text),
1290 0 : PyUnicode_GET_SIZE(text), '\r')))
1291 0 : needflush = 1;
1292 :
1293 : /* XXX What if we were just reading? */
1294 0 : if (self->encodefunc != NULL) {
1295 0 : b = (*self->encodefunc)((PyObject *) self, text);
1296 0 : self->encoding_start_of_stream = 0;
1297 : }
1298 : else
1299 0 : b = PyObject_CallMethodObjArgs(self->encoder,
1300 : _PyIO_str_encode, text, NULL);
1301 0 : Py_DECREF(text);
1302 0 : if (b == NULL)
1303 0 : return NULL;
1304 :
1305 0 : if (self->pending_bytes == NULL) {
1306 0 : self->pending_bytes = PyList_New(0);
1307 0 : if (self->pending_bytes == NULL) {
1308 0 : Py_DECREF(b);
1309 0 : return NULL;
1310 : }
1311 0 : self->pending_bytes_count = 0;
1312 : }
1313 0 : if (PyList_Append(self->pending_bytes, b) < 0) {
1314 0 : Py_DECREF(b);
1315 0 : return NULL;
1316 : }
1317 0 : self->pending_bytes_count += PyBytes_GET_SIZE(b);
1318 0 : Py_DECREF(b);
1319 0 : if (self->pending_bytes_count > self->chunk_size || needflush) {
1320 0 : if (_textiowrapper_writeflush(self) < 0)
1321 0 : return NULL;
1322 : }
1323 :
1324 0 : if (needflush) {
1325 0 : ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1326 0 : if (ret == NULL)
1327 0 : return NULL;
1328 0 : Py_DECREF(ret);
1329 : }
1330 :
1331 0 : Py_CLEAR(self->snapshot);
1332 :
1333 0 : if (self->decoder) {
1334 0 : ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1335 0 : if (ret == NULL)
1336 0 : return NULL;
1337 0 : Py_DECREF(ret);
1338 : }
1339 :
1340 0 : return PyLong_FromSsize_t(textlen);
1341 : }
1342 :
1343 : /* Steal a reference to chars and store it in the decoded_char buffer;
1344 : */
1345 : static void
1346 0 : textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1347 : {
1348 0 : Py_XSETREF(self->decoded_chars, chars);
1349 0 : self->decoded_chars_used = 0;
1350 0 : }
1351 :
1352 : static PyObject *
1353 0 : textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1354 : {
1355 : PyObject *chars;
1356 : Py_ssize_t avail;
1357 :
1358 0 : if (self->decoded_chars == NULL)
1359 0 : return PyUnicode_FromStringAndSize(NULL, 0);
1360 :
1361 0 : avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1362 0 : - self->decoded_chars_used);
1363 :
1364 : assert(avail >= 0);
1365 :
1366 0 : if (n < 0 || n > avail)
1367 0 : n = avail;
1368 :
1369 0 : if (self->decoded_chars_used > 0 || n < avail) {
1370 0 : chars = PyUnicode_FromUnicode(
1371 0 : PyUnicode_AS_UNICODE(self->decoded_chars)
1372 0 : + self->decoded_chars_used, n);
1373 0 : if (chars == NULL)
1374 0 : return NULL;
1375 : }
1376 : else {
1377 0 : chars = self->decoded_chars;
1378 0 : Py_INCREF(chars);
1379 : }
1380 :
1381 0 : self->decoded_chars_used += n;
1382 0 : return chars;
1383 : }
1384 :
1385 : /* Read and decode the next chunk of data from the BufferedReader.
1386 : */
1387 : static int
1388 0 : textiowrapper_read_chunk(textio *self)
1389 : {
1390 0 : PyObject *dec_buffer = NULL;
1391 0 : PyObject *dec_flags = NULL;
1392 0 : PyObject *input_chunk = NULL;
1393 : PyObject *decoded_chars, *chunk_size;
1394 : int eof;
1395 :
1396 : /* The return value is True unless EOF was reached. The decoded string is
1397 : * placed in self._decoded_chars (replacing its previous value). The
1398 : * entire input chunk is sent to the decoder, though some of it may remain
1399 : * buffered in the decoder, yet to be converted.
1400 : */
1401 :
1402 0 : if (self->decoder == NULL) {
1403 0 : PyErr_SetString(PyExc_IOError, "not readable");
1404 0 : return -1;
1405 : }
1406 :
1407 0 : if (self->telling) {
1408 : /* To prepare for tell(), we need to snapshot a point in the file
1409 : * where the decoder's input buffer is empty.
1410 : */
1411 :
1412 0 : PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1413 : _PyIO_str_getstate, NULL);
1414 0 : if (state == NULL)
1415 0 : return -1;
1416 : /* Given this, we know there was a valid snapshot point
1417 : * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1418 : */
1419 0 : if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1420 0 : Py_DECREF(state);
1421 0 : return -1;
1422 : }
1423 0 : Py_INCREF(dec_buffer);
1424 0 : Py_INCREF(dec_flags);
1425 0 : Py_DECREF(state);
1426 : }
1427 :
1428 : /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1429 0 : chunk_size = PyLong_FromSsize_t(self->chunk_size);
1430 0 : if (chunk_size == NULL)
1431 0 : goto fail;
1432 0 : input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1433 : _PyIO_str_read1, chunk_size, NULL);
1434 0 : Py_DECREF(chunk_size);
1435 0 : if (input_chunk == NULL)
1436 0 : goto fail;
1437 0 : if (!PyBytes_Check(input_chunk)) {
1438 0 : PyErr_Format(PyExc_TypeError,
1439 : "underlying read1() should have returned a bytes object, "
1440 0 : "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
1441 0 : goto fail;
1442 : }
1443 :
1444 0 : eof = (PyBytes_Size(input_chunk) == 0);
1445 :
1446 0 : if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1447 0 : decoded_chars = _PyIncrementalNewlineDecoder_decode(
1448 : self->decoder, input_chunk, eof);
1449 : }
1450 : else {
1451 0 : decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1452 : _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1453 : }
1454 :
1455 0 : if (check_decoded(decoded_chars) < 0)
1456 0 : goto fail;
1457 0 : textiowrapper_set_decoded_chars(self, decoded_chars);
1458 0 : if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1459 0 : eof = 0;
1460 :
1461 0 : if (self->telling) {
1462 : /* At the snapshot point, len(dec_buffer) bytes before the read, the
1463 : * next input to be decoded is dec_buffer + input_chunk.
1464 : */
1465 0 : PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1466 0 : if (next_input == NULL)
1467 0 : goto fail;
1468 0 : if (!PyBytes_Check(next_input)) {
1469 0 : PyErr_Format(PyExc_TypeError,
1470 : "decoder getstate() should have returned a bytes "
1471 : "object, not '%.200s'",
1472 0 : Py_TYPE(next_input)->tp_name);
1473 0 : Py_DECREF(next_input);
1474 0 : goto fail;
1475 : }
1476 0 : Py_DECREF(dec_buffer);
1477 0 : Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
1478 : }
1479 0 : Py_DECREF(input_chunk);
1480 :
1481 0 : return (eof == 0);
1482 :
1483 : fail:
1484 0 : Py_XDECREF(dec_buffer);
1485 0 : Py_XDECREF(dec_flags);
1486 0 : Py_XDECREF(input_chunk);
1487 0 : return -1;
1488 : }
1489 :
1490 : static PyObject *
1491 0 : textiowrapper_read(textio *self, PyObject *args)
1492 : {
1493 0 : Py_ssize_t n = -1;
1494 0 : PyObject *result = NULL, *chunks = NULL;
1495 :
1496 0 : CHECK_ATTACHED(self);
1497 :
1498 0 : if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1499 0 : return NULL;
1500 :
1501 0 : CHECK_CLOSED(self);
1502 :
1503 0 : if (self->decoder == NULL) {
1504 0 : PyErr_SetString(PyExc_IOError, "not readable");
1505 0 : return NULL;
1506 : }
1507 :
1508 0 : if (_textiowrapper_writeflush(self) < 0)
1509 0 : return NULL;
1510 :
1511 0 : if (n < 0) {
1512 : /* Read everything */
1513 0 : PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1514 : PyObject *decoded, *final;
1515 0 : if (bytes == NULL)
1516 0 : goto fail;
1517 0 : decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1518 : bytes, Py_True, NULL);
1519 0 : Py_DECREF(bytes);
1520 0 : if (check_decoded(decoded) < 0)
1521 0 : goto fail;
1522 :
1523 0 : result = textiowrapper_get_decoded_chars(self, -1);
1524 :
1525 0 : if (result == NULL) {
1526 0 : Py_DECREF(decoded);
1527 0 : return NULL;
1528 : }
1529 :
1530 0 : final = PyUnicode_Concat(result, decoded);
1531 0 : Py_DECREF(result);
1532 0 : Py_DECREF(decoded);
1533 0 : if (final == NULL)
1534 0 : goto fail;
1535 :
1536 0 : Py_CLEAR(self->snapshot);
1537 0 : return final;
1538 : }
1539 : else {
1540 0 : int res = 1;
1541 0 : Py_ssize_t remaining = n;
1542 :
1543 0 : result = textiowrapper_get_decoded_chars(self, n);
1544 0 : if (result == NULL)
1545 0 : goto fail;
1546 0 : remaining -= PyUnicode_GET_SIZE(result);
1547 :
1548 : /* Keep reading chunks until we have n characters to return */
1549 0 : while (remaining > 0) {
1550 0 : res = textiowrapper_read_chunk(self);
1551 0 : if (res < 0) {
1552 : /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1553 : when EINTR occurs so we needn't do it ourselves. */
1554 0 : if (_PyIO_trap_eintr()) {
1555 0 : continue;
1556 : }
1557 0 : goto fail;
1558 : }
1559 0 : if (res == 0) /* EOF */
1560 0 : break;
1561 0 : if (chunks == NULL) {
1562 0 : chunks = PyList_New(0);
1563 0 : if (chunks == NULL)
1564 0 : goto fail;
1565 : }
1566 0 : if (PyList_Append(chunks, result) < 0)
1567 0 : goto fail;
1568 0 : Py_DECREF(result);
1569 0 : result = textiowrapper_get_decoded_chars(self, remaining);
1570 0 : if (result == NULL)
1571 0 : goto fail;
1572 0 : remaining -= PyUnicode_GET_SIZE(result);
1573 : }
1574 0 : if (chunks != NULL) {
1575 0 : if (result != NULL && PyList_Append(chunks, result) < 0)
1576 0 : goto fail;
1577 0 : Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1578 0 : if (result == NULL)
1579 0 : goto fail;
1580 0 : Py_CLEAR(chunks);
1581 : }
1582 0 : return result;
1583 : }
1584 : fail:
1585 0 : Py_XDECREF(result);
1586 0 : Py_XDECREF(chunks);
1587 0 : return NULL;
1588 : }
1589 :
1590 :
1591 : /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1592 : that is to the NUL character. Otherwise the function will produce
1593 : incorrect results. */
1594 : static Py_UNICODE *
1595 0 : find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1596 : {
1597 0 : Py_UNICODE *s = start;
1598 : for (;;) {
1599 0 : while (*s > ch)
1600 0 : s++;
1601 0 : if (*s == ch)
1602 0 : return s;
1603 0 : if (s == end)
1604 0 : return NULL;
1605 0 : s++;
1606 0 : }
1607 : }
1608 :
1609 : Py_ssize_t
1610 0 : _PyIO_find_line_ending(
1611 : int translated, int universal, PyObject *readnl,
1612 : Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1613 : {
1614 0 : Py_ssize_t len = end - start;
1615 :
1616 0 : if (translated) {
1617 : /* Newlines are already translated, only search for \n */
1618 0 : Py_UNICODE *pos = find_control_char(start, end, '\n');
1619 0 : if (pos != NULL)
1620 0 : return pos - start + 1;
1621 : else {
1622 0 : *consumed = len;
1623 0 : return -1;
1624 : }
1625 : }
1626 0 : else if (universal) {
1627 : /* Universal newline search. Find any of \r, \r\n, \n
1628 : * The decoder ensures that \r\n are not split in two pieces
1629 : */
1630 0 : Py_UNICODE *s = start;
1631 : for (;;) {
1632 : Py_UNICODE ch;
1633 : /* Fast path for non-control chars. The loop always ends
1634 : since the Py_UNICODE storage is NUL-terminated. */
1635 0 : while (*s > '\r')
1636 0 : s++;
1637 0 : if (s >= end) {
1638 0 : *consumed = len;
1639 0 : return -1;
1640 : }
1641 0 : ch = *s++;
1642 0 : if (ch == '\n')
1643 0 : return s - start;
1644 0 : if (ch == '\r') {
1645 0 : if (*s == '\n')
1646 0 : return s - start + 1;
1647 : else
1648 0 : return s - start;
1649 : }
1650 0 : }
1651 : }
1652 : else {
1653 : /* Non-universal mode. */
1654 0 : Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1655 0 : unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1656 0 : if (readnl_len == 1) {
1657 0 : Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1658 0 : if (pos != NULL)
1659 0 : return pos - start + 1;
1660 0 : *consumed = len;
1661 0 : return -1;
1662 : }
1663 : else {
1664 0 : Py_UNICODE *s = start;
1665 0 : Py_UNICODE *e = end - readnl_len + 1;
1666 : Py_UNICODE *pos;
1667 0 : if (e < s)
1668 0 : e = s;
1669 0 : while (s < e) {
1670 : Py_ssize_t i;
1671 0 : Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1672 0 : if (pos == NULL || pos >= e)
1673 : break;
1674 0 : for (i = 1; i < readnl_len; i++) {
1675 0 : if (pos[i] != nl[i])
1676 0 : break;
1677 : }
1678 0 : if (i == readnl_len)
1679 0 : return pos - start + readnl_len;
1680 0 : s = pos + 1;
1681 : }
1682 0 : pos = find_control_char(e, end, nl[0]);
1683 0 : if (pos == NULL)
1684 0 : *consumed = len;
1685 : else
1686 0 : *consumed = pos - start;
1687 0 : return -1;
1688 : }
1689 : }
1690 : }
1691 :
1692 : static PyObject *
1693 0 : _textiowrapper_readline(textio *self, Py_ssize_t limit)
1694 : {
1695 0 : PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1696 : Py_ssize_t start, endpos, chunked, offset_to_buffer;
1697 : int res;
1698 :
1699 0 : CHECK_CLOSED(self);
1700 :
1701 0 : if (_textiowrapper_writeflush(self) < 0)
1702 0 : return NULL;
1703 :
1704 0 : chunked = 0;
1705 :
1706 : while (1) {
1707 : Py_UNICODE *ptr;
1708 : Py_ssize_t line_len;
1709 0 : Py_ssize_t consumed = 0;
1710 :
1711 : /* First, get some data if necessary */
1712 0 : res = 1;
1713 0 : while (!self->decoded_chars ||
1714 0 : !PyUnicode_GET_SIZE(self->decoded_chars)) {
1715 0 : res = textiowrapper_read_chunk(self);
1716 0 : if (res < 0) {
1717 : /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1718 : when EINTR occurs so we needn't do it ourselves. */
1719 0 : if (_PyIO_trap_eintr()) {
1720 0 : continue;
1721 : }
1722 0 : goto error;
1723 : }
1724 0 : if (res == 0)
1725 0 : break;
1726 : }
1727 0 : if (res == 0) {
1728 : /* end of file */
1729 0 : textiowrapper_set_decoded_chars(self, NULL);
1730 0 : Py_CLEAR(self->snapshot);
1731 0 : start = endpos = offset_to_buffer = 0;
1732 0 : break;
1733 : }
1734 :
1735 0 : if (remaining == NULL) {
1736 0 : line = self->decoded_chars;
1737 0 : start = self->decoded_chars_used;
1738 0 : offset_to_buffer = 0;
1739 0 : Py_INCREF(line);
1740 : }
1741 : else {
1742 : assert(self->decoded_chars_used == 0);
1743 0 : line = PyUnicode_Concat(remaining, self->decoded_chars);
1744 0 : start = 0;
1745 0 : offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1746 0 : Py_CLEAR(remaining);
1747 0 : if (line == NULL)
1748 0 : goto error;
1749 : }
1750 :
1751 0 : ptr = PyUnicode_AS_UNICODE(line);
1752 0 : line_len = PyUnicode_GET_SIZE(line);
1753 :
1754 0 : endpos = _PyIO_find_line_ending(
1755 0 : self->readtranslate, self->readuniversal, self->readnl,
1756 0 : ptr + start, ptr + line_len, &consumed);
1757 0 : if (endpos >= 0) {
1758 0 : endpos += start;
1759 0 : if (limit >= 0 && (endpos - start) + chunked >= limit)
1760 0 : endpos = start + limit - chunked;
1761 0 : break;
1762 : }
1763 :
1764 : /* We can put aside up to `endpos` */
1765 0 : endpos = consumed + start;
1766 0 : if (limit >= 0 && (endpos - start) + chunked >= limit) {
1767 : /* Didn't find line ending, but reached length limit */
1768 0 : endpos = start + limit - chunked;
1769 0 : break;
1770 : }
1771 :
1772 0 : if (endpos > start) {
1773 : /* No line ending seen yet - put aside current data */
1774 : PyObject *s;
1775 0 : if (chunks == NULL) {
1776 0 : chunks = PyList_New(0);
1777 0 : if (chunks == NULL)
1778 0 : goto error;
1779 : }
1780 0 : s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1781 0 : if (s == NULL)
1782 0 : goto error;
1783 0 : if (PyList_Append(chunks, s) < 0) {
1784 0 : Py_DECREF(s);
1785 0 : goto error;
1786 : }
1787 0 : chunked += PyUnicode_GET_SIZE(s);
1788 0 : Py_DECREF(s);
1789 : }
1790 : /* There may be some remaining bytes we'll have to prepend to the
1791 : next chunk of data */
1792 0 : if (endpos < line_len) {
1793 0 : remaining = PyUnicode_FromUnicode(
1794 0 : ptr + endpos, line_len - endpos);
1795 0 : if (remaining == NULL)
1796 0 : goto error;
1797 : }
1798 0 : Py_CLEAR(line);
1799 : /* We have consumed the buffer */
1800 0 : textiowrapper_set_decoded_chars(self, NULL);
1801 0 : }
1802 :
1803 0 : if (line != NULL) {
1804 : /* Our line ends in the current buffer */
1805 0 : self->decoded_chars_used = endpos - offset_to_buffer;
1806 0 : if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1807 0 : if (start == 0 && Py_REFCNT(line) == 1) {
1808 0 : if (PyUnicode_Resize(&line, endpos) < 0)
1809 0 : goto error;
1810 : }
1811 : else {
1812 0 : PyObject *s = PyUnicode_FromUnicode(
1813 0 : PyUnicode_AS_UNICODE(line) + start, endpos - start);
1814 0 : Py_CLEAR(line);
1815 0 : if (s == NULL)
1816 0 : goto error;
1817 0 : line = s;
1818 : }
1819 : }
1820 : }
1821 0 : if (remaining != NULL) {
1822 0 : if (chunks == NULL) {
1823 0 : chunks = PyList_New(0);
1824 0 : if (chunks == NULL)
1825 0 : goto error;
1826 : }
1827 0 : if (PyList_Append(chunks, remaining) < 0)
1828 0 : goto error;
1829 0 : Py_CLEAR(remaining);
1830 : }
1831 0 : if (chunks != NULL) {
1832 0 : if (line != NULL && PyList_Append(chunks, line) < 0)
1833 0 : goto error;
1834 0 : Py_XSETREF(line, PyUnicode_Join(_PyIO_empty_str, chunks));
1835 0 : if (line == NULL)
1836 0 : goto error;
1837 0 : Py_DECREF(chunks);
1838 : }
1839 0 : if (line == NULL)
1840 0 : line = PyUnicode_FromStringAndSize(NULL, 0);
1841 :
1842 0 : return line;
1843 :
1844 : error:
1845 0 : Py_XDECREF(chunks);
1846 0 : Py_XDECREF(remaining);
1847 0 : Py_XDECREF(line);
1848 0 : return NULL;
1849 : }
1850 :
1851 : static PyObject *
1852 0 : textiowrapper_readline(textio *self, PyObject *args)
1853 : {
1854 0 : PyObject *limitobj = NULL;
1855 0 : Py_ssize_t limit = -1;
1856 :
1857 0 : CHECK_ATTACHED(self);
1858 0 : if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1859 0 : return NULL;
1860 : }
1861 0 : if (limitobj) {
1862 0 : if (!PyNumber_Check(limitobj)) {
1863 0 : PyErr_Format(PyExc_TypeError,
1864 : "integer argument expected, got '%.200s'",
1865 0 : Py_TYPE(limitobj)->tp_name);
1866 0 : return NULL;
1867 : }
1868 0 : limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1869 0 : if (limit == -1 && PyErr_Occurred())
1870 0 : return NULL;
1871 : }
1872 0 : return _textiowrapper_readline(self, limit);
1873 : }
1874 :
1875 : /* Seek and Tell */
1876 :
1877 : typedef struct {
1878 : Py_off_t start_pos;
1879 : int dec_flags;
1880 : int bytes_to_feed;
1881 : int chars_to_skip;
1882 : char need_eof;
1883 : } cookie_type;
1884 :
1885 : /*
1886 : To speed up cookie packing/unpacking, we store the fields in a temporary
1887 : string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1888 : The following macros define at which offsets in the intermediary byte
1889 : string the various CookieStruct fields will be stored.
1890 : */
1891 :
1892 : #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1893 :
1894 : #if defined(WORDS_BIGENDIAN)
1895 :
1896 : # define IS_LITTLE_ENDIAN 0
1897 :
1898 : /* We want the least significant byte of start_pos to also be the least
1899 : significant byte of the cookie, which means that in big-endian mode we
1900 : must copy the fields in reverse order. */
1901 :
1902 : # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1903 : # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1904 : # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1905 : # define OFF_CHARS_TO_SKIP (sizeof(char))
1906 : # define OFF_NEED_EOF 0
1907 :
1908 : #else
1909 :
1910 : # define IS_LITTLE_ENDIAN 1
1911 :
1912 : /* Little-endian mode: the least significant byte of start_pos will
1913 : naturally end up the least significant byte of the cookie. */
1914 :
1915 : # define OFF_START_POS 0
1916 : # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1917 : # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1918 : # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1919 : # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1920 :
1921 : #endif
1922 :
1923 : static int
1924 0 : textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1925 : {
1926 : unsigned char buffer[COOKIE_BUF_LEN];
1927 0 : PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1928 0 : if (cookieLong == NULL)
1929 0 : return -1;
1930 :
1931 0 : if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1932 : IS_LITTLE_ENDIAN, 0) < 0) {
1933 0 : Py_DECREF(cookieLong);
1934 0 : return -1;
1935 : }
1936 0 : Py_DECREF(cookieLong);
1937 :
1938 0 : memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1939 0 : memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1940 0 : memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1941 0 : memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1942 0 : memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1943 :
1944 0 : return 0;
1945 : }
1946 :
1947 : static PyObject *
1948 0 : textiowrapper_build_cookie(cookie_type *cookie)
1949 : {
1950 : unsigned char buffer[COOKIE_BUF_LEN];
1951 :
1952 0 : memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1953 0 : memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1954 0 : memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1955 0 : memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1956 0 : memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1957 :
1958 0 : return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1959 : }
1960 : #undef IS_LITTLE_ENDIAN
1961 :
1962 : static int
1963 0 : _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1964 : {
1965 : PyObject *res;
1966 : /* When seeking to the start of the stream, we call decoder.reset()
1967 : rather than decoder.getstate().
1968 : This is for a few decoders such as utf-16 for which the state value
1969 : at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1970 : utf-16, that we are expecting a BOM).
1971 : */
1972 0 : if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1973 0 : res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1974 : else
1975 0 : res = PyObject_CallMethod(self->decoder, "setstate",
1976 : "((si))", "", cookie->dec_flags);
1977 0 : if (res == NULL)
1978 0 : return -1;
1979 0 : Py_DECREF(res);
1980 0 : return 0;
1981 : }
1982 :
1983 : static int
1984 0 : _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1985 : {
1986 : PyObject *res;
1987 : /* Same as _textiowrapper_decoder_setstate() above. */
1988 0 : if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1989 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1990 0 : self->encoding_start_of_stream = 1;
1991 : }
1992 : else {
1993 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1994 : _PyIO_zero, NULL);
1995 0 : self->encoding_start_of_stream = 0;
1996 : }
1997 0 : if (res == NULL)
1998 0 : return -1;
1999 0 : Py_DECREF(res);
2000 0 : return 0;
2001 : }
2002 :
2003 : static PyObject *
2004 0 : textiowrapper_seek(textio *self, PyObject *args)
2005 : {
2006 : PyObject *cookieObj, *posobj;
2007 : cookie_type cookie;
2008 0 : int whence = 0;
2009 : PyObject *res;
2010 : int cmp;
2011 :
2012 0 : CHECK_ATTACHED(self);
2013 :
2014 0 : if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2015 0 : return NULL;
2016 0 : CHECK_CLOSED(self);
2017 :
2018 0 : Py_INCREF(cookieObj);
2019 :
2020 0 : if (!self->seekable) {
2021 0 : PyErr_SetString(PyExc_IOError,
2022 : "underlying stream is not seekable");
2023 0 : goto fail;
2024 : }
2025 :
2026 0 : if (whence == 1) {
2027 : /* seek relative to current position */
2028 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2029 0 : if (cmp < 0)
2030 0 : goto fail;
2031 :
2032 0 : if (cmp == 0) {
2033 0 : PyErr_SetString(PyExc_IOError,
2034 : "can't do nonzero cur-relative seeks");
2035 0 : goto fail;
2036 : }
2037 :
2038 : /* Seeking to the current position should attempt to
2039 : * sync the underlying buffer with the current position.
2040 : */
2041 0 : Py_DECREF(cookieObj);
2042 0 : cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2043 0 : if (cookieObj == NULL)
2044 0 : goto fail;
2045 : }
2046 0 : else if (whence == 2) {
2047 : /* seek relative to end of file */
2048 :
2049 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2050 0 : if (cmp < 0)
2051 0 : goto fail;
2052 :
2053 0 : if (cmp == 0) {
2054 0 : PyErr_SetString(PyExc_IOError,
2055 : "can't do nonzero end-relative seeks");
2056 0 : goto fail;
2057 : }
2058 :
2059 0 : res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2060 0 : if (res == NULL)
2061 0 : goto fail;
2062 0 : Py_DECREF(res);
2063 :
2064 0 : textiowrapper_set_decoded_chars(self, NULL);
2065 0 : Py_CLEAR(self->snapshot);
2066 0 : if (self->decoder) {
2067 0 : res = PyObject_CallMethod(self->decoder, "reset", NULL);
2068 0 : if (res == NULL)
2069 0 : goto fail;
2070 0 : Py_DECREF(res);
2071 : }
2072 :
2073 0 : res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2074 0 : Py_XDECREF(cookieObj);
2075 0 : return res;
2076 : }
2077 0 : else if (whence != 0) {
2078 0 : PyErr_Format(PyExc_ValueError,
2079 : "invalid whence (%d, should be 0, 1 or 2)", whence);
2080 0 : goto fail;
2081 : }
2082 :
2083 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2084 0 : if (cmp < 0)
2085 0 : goto fail;
2086 :
2087 0 : if (cmp == 1) {
2088 0 : PyObject *repr = PyObject_Repr(cookieObj);
2089 0 : if (repr != NULL) {
2090 0 : PyErr_Format(PyExc_ValueError,
2091 : "negative seek position %s",
2092 0 : PyString_AS_STRING(repr));
2093 0 : Py_DECREF(repr);
2094 : }
2095 0 : goto fail;
2096 : }
2097 :
2098 0 : res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2099 0 : if (res == NULL)
2100 0 : goto fail;
2101 0 : Py_DECREF(res);
2102 :
2103 : /* The strategy of seek() is to go back to the safe start point
2104 : * and replay the effect of read(chars_to_skip) from there.
2105 : */
2106 0 : if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2107 0 : goto fail;
2108 :
2109 : /* Seek back to the safe start point. */
2110 0 : posobj = PyLong_FromOff_t(cookie.start_pos);
2111 0 : if (posobj == NULL)
2112 0 : goto fail;
2113 0 : res = PyObject_CallMethodObjArgs(self->buffer,
2114 : _PyIO_str_seek, posobj, NULL);
2115 0 : Py_DECREF(posobj);
2116 0 : if (res == NULL)
2117 0 : goto fail;
2118 0 : Py_DECREF(res);
2119 :
2120 0 : textiowrapper_set_decoded_chars(self, NULL);
2121 0 : Py_CLEAR(self->snapshot);
2122 :
2123 : /* Restore the decoder to its state from the safe start point. */
2124 0 : if (self->decoder) {
2125 0 : if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2126 0 : goto fail;
2127 : }
2128 :
2129 0 : if (cookie.chars_to_skip) {
2130 : /* Just like _read_chunk, feed the decoder and save a snapshot. */
2131 0 : PyObject *input_chunk = PyObject_CallMethod(
2132 : self->buffer, "read", "i", cookie.bytes_to_feed);
2133 : PyObject *decoded;
2134 :
2135 0 : if (input_chunk == NULL)
2136 0 : goto fail;
2137 :
2138 0 : if (!PyBytes_Check(input_chunk)) {
2139 0 : PyErr_Format(PyExc_TypeError,
2140 : "underlying read() should have returned a bytes "
2141 : "object, not '%.200s'",
2142 0 : Py_TYPE(input_chunk)->tp_name);
2143 0 : Py_DECREF(input_chunk);
2144 0 : goto fail;
2145 : }
2146 :
2147 0 : self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2148 0 : if (self->snapshot == NULL) {
2149 0 : Py_DECREF(input_chunk);
2150 0 : goto fail;
2151 : }
2152 :
2153 0 : decoded = PyObject_CallMethod(self->decoder, "decode",
2154 0 : "Oi", input_chunk, (int)cookie.need_eof);
2155 :
2156 0 : if (check_decoded(decoded) < 0)
2157 0 : goto fail;
2158 :
2159 0 : textiowrapper_set_decoded_chars(self, decoded);
2160 :
2161 : /* Skip chars_to_skip of the decoded characters. */
2162 0 : if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2163 0 : PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2164 0 : goto fail;
2165 : }
2166 0 : self->decoded_chars_used = cookie.chars_to_skip;
2167 : }
2168 : else {
2169 0 : self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2170 0 : if (self->snapshot == NULL)
2171 0 : goto fail;
2172 : }
2173 :
2174 : /* Finally, reset the encoder (merely useful for proper BOM handling) */
2175 0 : if (self->encoder) {
2176 0 : if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2177 0 : goto fail;
2178 : }
2179 0 : return cookieObj;
2180 : fail:
2181 0 : Py_XDECREF(cookieObj);
2182 0 : return NULL;
2183 :
2184 : }
2185 :
2186 : static PyObject *
2187 0 : textiowrapper_tell(textio *self, PyObject *args)
2188 : {
2189 : PyObject *res;
2190 0 : PyObject *posobj = NULL;
2191 0 : cookie_type cookie = {0,0,0,0,0};
2192 : PyObject *next_input;
2193 : Py_ssize_t chars_to_skip, chars_decoded;
2194 0 : PyObject *saved_state = NULL;
2195 : char *input, *input_end;
2196 :
2197 0 : CHECK_ATTACHED(self);
2198 0 : CHECK_CLOSED(self);
2199 :
2200 0 : if (!self->seekable) {
2201 0 : PyErr_SetString(PyExc_IOError,
2202 : "underlying stream is not seekable");
2203 0 : goto fail;
2204 : }
2205 0 : if (!self->telling) {
2206 0 : PyErr_SetString(PyExc_IOError,
2207 : "telling position disabled by next() call");
2208 0 : goto fail;
2209 : }
2210 :
2211 0 : if (_textiowrapper_writeflush(self) < 0)
2212 0 : return NULL;
2213 0 : res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2214 0 : if (res == NULL)
2215 0 : goto fail;
2216 0 : Py_DECREF(res);
2217 :
2218 0 : posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2219 0 : if (posobj == NULL)
2220 0 : goto fail;
2221 :
2222 0 : if (self->decoder == NULL || self->snapshot == NULL) {
2223 : assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2224 0 : return posobj;
2225 : }
2226 :
2227 : #if defined(HAVE_LARGEFILE_SUPPORT)
2228 : cookie.start_pos = PyLong_AsLongLong(posobj);
2229 : #else
2230 0 : cookie.start_pos = PyLong_AsLong(posobj);
2231 : #endif
2232 0 : if (PyErr_Occurred())
2233 0 : goto fail;
2234 :
2235 : /* Skip backward to the snapshot point (see _read_chunk). */
2236 0 : if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2237 0 : goto fail;
2238 :
2239 : assert (PyBytes_Check(next_input));
2240 :
2241 0 : cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2242 :
2243 : /* How many decoded characters have been used up since the snapshot? */
2244 0 : if (self->decoded_chars_used == 0) {
2245 : /* We haven't moved from the snapshot point. */
2246 0 : Py_DECREF(posobj);
2247 0 : return textiowrapper_build_cookie(&cookie);
2248 : }
2249 :
2250 0 : chars_to_skip = self->decoded_chars_used;
2251 :
2252 : /* Starting from the snapshot position, we will walk the decoder
2253 : * forward until it gives us enough decoded characters.
2254 : */
2255 0 : saved_state = PyObject_CallMethodObjArgs(self->decoder,
2256 : _PyIO_str_getstate, NULL);
2257 0 : if (saved_state == NULL)
2258 0 : goto fail;
2259 :
2260 : /* Note our initial start point. */
2261 0 : if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2262 0 : goto fail;
2263 :
2264 : /* Feed the decoder one byte at a time. As we go, note the
2265 : * nearest "safe start point" before the current location
2266 : * (a point where the decoder has nothing buffered, so seek()
2267 : * can safely start from there and advance to this location).
2268 : */
2269 0 : chars_decoded = 0;
2270 0 : input = PyBytes_AS_STRING(next_input);
2271 0 : input_end = input + PyBytes_GET_SIZE(next_input);
2272 0 : while (input < input_end) {
2273 : PyObject *state;
2274 : char *dec_buffer;
2275 : Py_ssize_t dec_buffer_len;
2276 : int dec_flags;
2277 :
2278 0 : PyObject *decoded = PyObject_CallMethod(
2279 : self->decoder, "decode", "s#", input, (Py_ssize_t)1);
2280 0 : if (check_decoded(decoded) < 0)
2281 0 : goto fail;
2282 0 : chars_decoded += PyUnicode_GET_SIZE(decoded);
2283 0 : Py_DECREF(decoded);
2284 :
2285 0 : cookie.bytes_to_feed += 1;
2286 :
2287 0 : state = PyObject_CallMethodObjArgs(self->decoder,
2288 : _PyIO_str_getstate, NULL);
2289 0 : if (state == NULL)
2290 0 : goto fail;
2291 0 : if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2292 0 : Py_DECREF(state);
2293 0 : goto fail;
2294 : }
2295 0 : Py_DECREF(state);
2296 :
2297 0 : if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2298 : /* Decoder buffer is empty, so this is a safe start point. */
2299 0 : cookie.start_pos += cookie.bytes_to_feed;
2300 0 : chars_to_skip -= chars_decoded;
2301 0 : cookie.dec_flags = dec_flags;
2302 0 : cookie.bytes_to_feed = 0;
2303 0 : chars_decoded = 0;
2304 : }
2305 0 : if (chars_decoded >= chars_to_skip)
2306 0 : break;
2307 0 : input++;
2308 : }
2309 0 : if (input == input_end) {
2310 : /* We didn't get enough decoded data; signal EOF to get more. */
2311 0 : PyObject *decoded = PyObject_CallMethod(
2312 : self->decoder, "decode", "si", "", /* final = */ 1);
2313 0 : if (check_decoded(decoded) < 0)
2314 0 : goto fail;
2315 0 : chars_decoded += PyUnicode_GET_SIZE(decoded);
2316 0 : Py_DECREF(decoded);
2317 0 : cookie.need_eof = 1;
2318 :
2319 0 : if (chars_decoded < chars_to_skip) {
2320 0 : PyErr_SetString(PyExc_IOError,
2321 : "can't reconstruct logical file position");
2322 0 : goto fail;
2323 : }
2324 : }
2325 :
2326 : /* finally */
2327 0 : Py_XDECREF(posobj);
2328 0 : res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2329 0 : Py_DECREF(saved_state);
2330 0 : if (res == NULL)
2331 0 : return NULL;
2332 0 : Py_DECREF(res);
2333 :
2334 : /* The returned cookie corresponds to the last safe start point. */
2335 0 : cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2336 0 : return textiowrapper_build_cookie(&cookie);
2337 :
2338 : fail:
2339 0 : Py_XDECREF(posobj);
2340 0 : if (saved_state) {
2341 : PyObject *type, *value, *traceback;
2342 0 : PyErr_Fetch(&type, &value, &traceback);
2343 :
2344 0 : res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2345 0 : _PyErr_ReplaceException(type, value, traceback);
2346 0 : Py_DECREF(saved_state);
2347 0 : Py_XDECREF(res);
2348 : }
2349 0 : return NULL;
2350 : }
2351 :
2352 : static PyObject *
2353 0 : textiowrapper_truncate(textio *self, PyObject *args)
2354 : {
2355 0 : PyObject *pos = Py_None;
2356 : PyObject *res;
2357 :
2358 0 : CHECK_ATTACHED(self)
2359 0 : if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2360 0 : return NULL;
2361 : }
2362 :
2363 0 : res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2364 0 : if (res == NULL)
2365 0 : return NULL;
2366 0 : Py_DECREF(res);
2367 :
2368 0 : return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2369 : }
2370 :
2371 : static PyObject *
2372 0 : textiowrapper_repr(textio *self)
2373 : {
2374 : PyObject *nameobj, *res;
2375 0 : PyObject *namerepr = NULL, *encrepr = NULL;
2376 :
2377 0 : CHECK_INITIALIZED(self);
2378 :
2379 0 : nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2380 0 : if (nameobj == NULL) {
2381 0 : if (PyErr_ExceptionMatches(PyExc_Exception))
2382 0 : PyErr_Clear();
2383 : else
2384 0 : goto error;
2385 0 : encrepr = PyObject_Repr(self->encoding);
2386 0 : res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2387 0 : PyString_AS_STRING(encrepr));
2388 : }
2389 : else {
2390 0 : encrepr = PyObject_Repr(self->encoding);
2391 0 : namerepr = PyObject_Repr(nameobj);
2392 0 : res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2393 0 : PyString_AS_STRING(namerepr),
2394 0 : PyString_AS_STRING(encrepr));
2395 0 : Py_DECREF(nameobj);
2396 : }
2397 0 : Py_XDECREF(namerepr);
2398 0 : Py_XDECREF(encrepr);
2399 0 : return res;
2400 :
2401 : error:
2402 0 : Py_XDECREF(namerepr);
2403 0 : Py_XDECREF(encrepr);
2404 0 : return NULL;
2405 : }
2406 :
2407 :
2408 : /* Inquiries */
2409 :
2410 : static PyObject *
2411 0 : textiowrapper_fileno(textio *self, PyObject *args)
2412 : {
2413 0 : CHECK_ATTACHED(self);
2414 0 : return PyObject_CallMethod(self->buffer, "fileno", NULL);
2415 : }
2416 :
2417 : static PyObject *
2418 0 : textiowrapper_seekable(textio *self, PyObject *args)
2419 : {
2420 0 : CHECK_ATTACHED(self);
2421 0 : return PyObject_CallMethod(self->buffer, "seekable", NULL);
2422 : }
2423 :
2424 : static PyObject *
2425 0 : textiowrapper_readable(textio *self, PyObject *args)
2426 : {
2427 0 : CHECK_ATTACHED(self);
2428 0 : return PyObject_CallMethod(self->buffer, "readable", NULL);
2429 : }
2430 :
2431 : static PyObject *
2432 0 : textiowrapper_writable(textio *self, PyObject *args)
2433 : {
2434 0 : CHECK_ATTACHED(self);
2435 0 : return PyObject_CallMethod(self->buffer, "writable", NULL);
2436 : }
2437 :
2438 : static PyObject *
2439 0 : textiowrapper_isatty(textio *self, PyObject *args)
2440 : {
2441 0 : CHECK_ATTACHED(self);
2442 0 : return PyObject_CallMethod(self->buffer, "isatty", NULL);
2443 : }
2444 :
2445 : static PyObject *
2446 0 : textiowrapper_flush(textio *self, PyObject *args)
2447 : {
2448 0 : CHECK_ATTACHED(self);
2449 0 : CHECK_CLOSED(self);
2450 0 : self->telling = self->seekable;
2451 0 : if (_textiowrapper_writeflush(self) < 0)
2452 0 : return NULL;
2453 0 : return PyObject_CallMethod(self->buffer, "flush", NULL);
2454 : }
2455 :
2456 : static PyObject *
2457 0 : textiowrapper_close(textio *self, PyObject *args)
2458 : {
2459 : PyObject *res;
2460 : int r;
2461 0 : CHECK_ATTACHED(self);
2462 :
2463 0 : res = textiowrapper_closed_get(self, NULL);
2464 0 : if (res == NULL)
2465 0 : return NULL;
2466 0 : r = PyObject_IsTrue(res);
2467 0 : Py_DECREF(res);
2468 0 : if (r < 0)
2469 0 : return NULL;
2470 :
2471 0 : if (r > 0) {
2472 0 : Py_RETURN_NONE; /* stream already closed */
2473 : }
2474 : else {
2475 0 : PyObject *exc = NULL, *val, *tb;
2476 0 : res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2477 0 : if (res == NULL)
2478 0 : PyErr_Fetch(&exc, &val, &tb);
2479 : else
2480 0 : Py_DECREF(res);
2481 :
2482 0 : res = PyObject_CallMethod(self->buffer, "close", NULL);
2483 0 : if (exc != NULL) {
2484 0 : _PyErr_ReplaceException(exc, val, tb);
2485 0 : Py_CLEAR(res);
2486 : }
2487 0 : return res;
2488 : }
2489 : }
2490 :
2491 : static PyObject *
2492 0 : textiowrapper_iternext(textio *self)
2493 : {
2494 : PyObject *line;
2495 :
2496 0 : CHECK_ATTACHED(self);
2497 :
2498 0 : self->telling = 0;
2499 0 : if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2500 : /* Skip method call overhead for speed */
2501 0 : line = _textiowrapper_readline(self, -1);
2502 : }
2503 : else {
2504 0 : line = PyObject_CallMethodObjArgs((PyObject *)self,
2505 : _PyIO_str_readline, NULL);
2506 0 : if (line && !PyUnicode_Check(line)) {
2507 0 : PyErr_Format(PyExc_IOError,
2508 : "readline() should have returned an str object, "
2509 0 : "not '%.200s'", Py_TYPE(line)->tp_name);
2510 0 : Py_DECREF(line);
2511 0 : return NULL;
2512 : }
2513 : }
2514 :
2515 0 : if (line == NULL)
2516 0 : return NULL;
2517 :
2518 0 : if (PyUnicode_GET_SIZE(line) == 0) {
2519 : /* Reached EOF or would have blocked */
2520 0 : Py_DECREF(line);
2521 0 : Py_CLEAR(self->snapshot);
2522 0 : self->telling = self->seekable;
2523 0 : return NULL;
2524 : }
2525 :
2526 0 : return line;
2527 : }
2528 :
2529 : static PyObject *
2530 0 : textiowrapper_name_get(textio *self, void *context)
2531 : {
2532 0 : CHECK_ATTACHED(self);
2533 0 : return PyObject_GetAttrString(self->buffer, "name");
2534 : }
2535 :
2536 : static PyObject *
2537 0 : textiowrapper_closed_get(textio *self, void *context)
2538 : {
2539 0 : CHECK_ATTACHED(self);
2540 0 : return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2541 : }
2542 :
2543 : static PyObject *
2544 0 : textiowrapper_newlines_get(textio *self, void *context)
2545 : {
2546 : PyObject *res;
2547 0 : CHECK_ATTACHED(self);
2548 0 : if (self->decoder == NULL)
2549 0 : Py_RETURN_NONE;
2550 0 : res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2551 0 : if (res == NULL) {
2552 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2553 0 : PyErr_Clear();
2554 0 : Py_RETURN_NONE;
2555 : }
2556 : else {
2557 0 : return NULL;
2558 : }
2559 : }
2560 0 : return res;
2561 : }
2562 :
2563 : static PyObject *
2564 0 : textiowrapper_errors_get(textio *self, void *context)
2565 : {
2566 0 : CHECK_INITIALIZED(self);
2567 0 : Py_INCREF(self->errors);
2568 0 : return self->errors;
2569 : }
2570 :
2571 : static PyObject *
2572 0 : textiowrapper_chunk_size_get(textio *self, void *context)
2573 : {
2574 0 : CHECK_ATTACHED(self);
2575 0 : return PyLong_FromSsize_t(self->chunk_size);
2576 : }
2577 :
2578 : static int
2579 0 : textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2580 : {
2581 : Py_ssize_t n;
2582 0 : CHECK_ATTACHED_INT(self);
2583 0 : n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2584 0 : if (n == -1 && PyErr_Occurred())
2585 0 : return -1;
2586 0 : if (n <= 0) {
2587 0 : PyErr_SetString(PyExc_ValueError,
2588 : "a strictly positive integer is required");
2589 0 : return -1;
2590 : }
2591 0 : self->chunk_size = n;
2592 0 : return 0;
2593 : }
2594 :
2595 : static PyMethodDef textiowrapper_methods[] = {
2596 : {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2597 : {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2598 : {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2599 : {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2600 : {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2601 : {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2602 :
2603 : {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2604 : {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2605 : {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2606 : {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2607 : {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2608 :
2609 : {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2610 : {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2611 : {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2612 : {NULL, NULL}
2613 : };
2614 :
2615 : static PyMemberDef textiowrapper_members[] = {
2616 : {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2617 : {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2618 : {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2619 : {NULL}
2620 : };
2621 :
2622 : static PyGetSetDef textiowrapper_getset[] = {
2623 : {"name", (getter)textiowrapper_name_get, NULL, NULL},
2624 : {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2625 : /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2626 : */
2627 : {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2628 : {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2629 : {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2630 : (setter)textiowrapper_chunk_size_set, NULL},
2631 : {NULL}
2632 : };
2633 :
2634 : PyTypeObject PyTextIOWrapper_Type = {
2635 : PyVarObject_HEAD_INIT(NULL, 0)
2636 : "_io.TextIOWrapper", /*tp_name*/
2637 : sizeof(textio), /*tp_basicsize*/
2638 : 0, /*tp_itemsize*/
2639 : (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2640 : 0, /*tp_print*/
2641 : 0, /*tp_getattr*/
2642 : 0, /*tps_etattr*/
2643 : 0, /*tp_compare */
2644 : (reprfunc)textiowrapper_repr,/*tp_repr*/
2645 : 0, /*tp_as_number*/
2646 : 0, /*tp_as_sequence*/
2647 : 0, /*tp_as_mapping*/
2648 : 0, /*tp_hash */
2649 : 0, /*tp_call*/
2650 : 0, /*tp_str*/
2651 : 0, /*tp_getattro*/
2652 : 0, /*tp_setattro*/
2653 : 0, /*tp_as_buffer*/
2654 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2655 : | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2656 : textiowrapper_doc, /* tp_doc */
2657 : (traverseproc)textiowrapper_traverse, /* tp_traverse */
2658 : (inquiry)textiowrapper_clear, /* tp_clear */
2659 : 0, /* tp_richcompare */
2660 : offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2661 : 0, /* tp_iter */
2662 : (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2663 : textiowrapper_methods, /* tp_methods */
2664 : textiowrapper_members, /* tp_members */
2665 : textiowrapper_getset, /* tp_getset */
2666 : 0, /* tp_base */
2667 : 0, /* tp_dict */
2668 : 0, /* tp_descr_get */
2669 : 0, /* tp_descr_set */
2670 : offsetof(textio, dict), /*tp_dictoffset*/
2671 : (initproc)textiowrapper_init, /* tp_init */
2672 : 0, /* tp_alloc */
2673 : PyType_GenericNew, /* tp_new */
2674 : };
|