1 """
2 Python implementation of the io module.
3 """
4
5 import os
6 import abc
7 import codecs
8 import errno
9 import stat
10 import sys
11 # Import _thread instead of threading to reduce startup cost
12 from _thread import allocate_lock as Lock
13 if sys.platform in {'win32', 'cygwin'}:
14 from msvcrt import setmode as _setmode
15 else:
16 _setmode = None
17
18 import io
19 from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20
21 valid_seek_flags = {0, 1, 2} # Hardwired values
22 if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
26 # open() uses st_blksize whenever we can
27 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29 # NOTE: Base classes defined here are registered with the "official" ABCs
30 # defined in io.py. We don't use real inheritance though, because we don't want
31 # to inherit the C implementations.
32
33 # Rebind for compatibility
34 BlockingIOError = BlockingIOError
35
36 # Does io.IOBase finalizer log the exception if the close() method fails?
37 # The exception is ignored silently by default in release build.
38 _IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
39 # Does open() check its 'errors' argument?
40 _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
41
42
43 def text_encoding(encoding, stacklevel=2):
44 """
45 A helper function to choose the text encoding.
46
47 When encoding is not None, this function returns it.
48 Otherwise, this function returns the default text encoding
49 (i.e. "locale" or "utf-8" depends on UTF-8 mode).
50
51 This function emits an EncodingWarning if *encoding* is None and
52 sys.flags.warn_default_encoding is true.
53
54 This can be used in APIs with an encoding=None parameter
55 that pass it to TextIOWrapper or open.
56 However, please consider using encoding="utf-8" for new APIs.
57 """
58 if encoding is None:
59 if sys.flags.utf8_mode:
60 encoding = "utf-8"
61 else:
62 encoding = "locale"
63 if sys.flags.warn_default_encoding:
64 import warnings
65 warnings.warn("'encoding' argument not specified.",
66 EncodingWarning, stacklevel + 1)
67 return encoding
68
69
70 # Wrapper for builtins.open
71 #
72 # Trick so that open() won't become a bound method when stored
73 # as a class variable (as dbm.dumb does).
74 #
75 # See init_set_builtins_open() in Python/pylifecycle.c.
76 @staticmethod
77 def open(file, mode="r", buffering=-1, encoding=None, errors=None,
78 newline=None, closefd=True, opener=None):
79
80 r"""Open file and return a stream. Raise OSError upon failure.
81
82 file is either a text or byte string giving the name (and the path
83 if the file isn't in the current working directory) of the file to
84 be opened or an integer file descriptor of the file to be
85 wrapped. (If a file descriptor is given, it is closed when the
86 returned I/O object is closed, unless closefd is set to False.)
87
88 mode is an optional string that specifies the mode in which the file is
89 opened. It defaults to 'r' which means open for reading in text mode. Other
90 common values are 'w' for writing (truncating the file if it already
91 exists), 'x' for exclusive creation of a new file, and 'a' for appending
92 (which on some Unix systems, means that all writes append to the end of the
93 file regardless of the current seek position). In text mode, if encoding is
94 not specified the encoding used is platform dependent. (For reading and
95 writing raw bytes use binary mode and leave encoding unspecified.) The
96 available modes are:
97
98 ========= ===============================================================
99 Character Meaning
100 --------- ---------------------------------------------------------------
101 'r' open for reading (default)
102 'w' open for writing, truncating the file first
103 'x' create a new file and open it for writing
104 'a' open for writing, appending to the end of the file if it exists
105 'b' binary mode
106 't' text mode (default)
107 '+' open a disk file for updating (reading and writing)
108 ========= ===============================================================
109
110 The default mode is 'rt' (open for reading text). For binary random
111 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
112 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
113 raises an `FileExistsError` if the file already exists.
114
115 Python distinguishes between files opened in binary and text modes,
116 even when the underlying operating system doesn't. Files opened in
117 binary mode (appending 'b' to the mode argument) return contents as
118 bytes objects without any decoding. In text mode (the default, or when
119 't' is appended to the mode argument), the contents of the file are
120 returned as strings, the bytes having been first decoded using a
121 platform-dependent encoding or using the specified encoding if given.
122
123 buffering is an optional integer used to set the buffering policy.
124 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
125 line buffering (only usable in text mode), and an integer > 1 to indicate
126 the size of a fixed-size chunk buffer. When no buffering argument is
127 given, the default buffering policy works as follows:
128
129 * Binary files are buffered in fixed-size chunks; the size of the buffer
130 is chosen using a heuristic trying to determine the underlying device's
131 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
132 On many systems, the buffer will typically be 4096 or 8192 bytes long.
133
134 * "Interactive" text files (files for which isatty() returns True)
135 use line buffering. Other text files use the policy described above
136 for binary files.
137
138 encoding is the str name of the encoding used to decode or encode the
139 file. This should only be used in text mode. The default encoding is
140 platform dependent, but any encoding supported by Python can be
141 passed. See the codecs module for the list of supported encodings.
142
143 errors is an optional string that specifies how encoding errors are to
144 be handled---this argument should not be used in binary mode. Pass
145 'strict' to raise a ValueError exception if there is an encoding error
146 (the default of None has the same effect), or pass 'ignore' to ignore
147 errors. (Note that ignoring encoding errors can lead to data loss.)
148 See the documentation for codecs.register for a list of the permitted
149 encoding error strings.
150
151 newline is a string controlling how universal newlines works (it only
152 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
153 as follows:
154
155 * On input, if newline is None, universal newlines mode is
156 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
157 these are translated into '\n' before being returned to the
158 caller. If it is '', universal newline mode is enabled, but line
159 endings are returned to the caller untranslated. If it has any of
160 the other legal values, input lines are only terminated by the given
161 string, and the line ending is returned to the caller untranslated.
162
163 * On output, if newline is None, any '\n' characters written are
164 translated to the system default line separator, os.linesep. If
165 newline is '', no translation takes place. If newline is any of the
166 other legal values, any '\n' characters written are translated to
167 the given string.
168
169 closedfd is a bool. If closefd is False, the underlying file descriptor will
170 be kept open when the file is closed. This does not work when a file name is
171 given and must be True in that case.
172
173 The newly created file is non-inheritable.
174
175 A custom opener can be used by passing a callable as *opener*. The
176 underlying file descriptor for the file object is then obtained by calling
177 *opener* with (*file*, *flags*). *opener* must return an open file
178 descriptor (passing os.open as *opener* results in functionality similar to
179 passing None).
180
181 open() returns a file object whose type depends on the mode, and
182 through which the standard file operations such as reading and writing
183 are performed. When open() is used to open a file in a text mode ('w',
184 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
185 a file in a binary mode, the returned class varies: in read binary
186 mode, it returns a BufferedReader; in write binary and append binary
187 modes, it returns a BufferedWriter, and in read/write mode, it returns
188 a BufferedRandom.
189
190 It is also possible to use a string or bytearray as a file for both
191 reading and writing. For strings StringIO can be used like a file
192 opened in a text mode, and for bytes a BytesIO can be used like a file
193 opened in a binary mode.
194 """
195 if not isinstance(file, int):
196 file = os.fspath(file)
197 if not isinstance(file, (str, bytes, int)):
198 raise TypeError("invalid file: %r" % file)
199 if not isinstance(mode, str):
200 raise TypeError("invalid mode: %r" % mode)
201 if not isinstance(buffering, int):
202 raise TypeError("invalid buffering: %r" % buffering)
203 if encoding is not None and not isinstance(encoding, str):
204 raise TypeError("invalid encoding: %r" % encoding)
205 if errors is not None and not isinstance(errors, str):
206 raise TypeError("invalid errors: %r" % errors)
207 modes = set(mode)
208 if modes - set("axrwb+t") or len(mode) > len(modes):
209 raise ValueError("invalid mode: %r" % mode)
210 creating = "x" in modes
211 reading = "r" in modes
212 writing = "w" in modes
213 appending = "a" in modes
214 updating = "+" in modes
215 text = "t" in modes
216 binary = "b" in modes
217 if text and binary:
218 raise ValueError("can't have text and binary mode at once")
219 if creating + reading + writing + appending > 1:
220 raise ValueError("can't have read/write/append mode at once")
221 if not (creating or reading or writing or appending):
222 raise ValueError("must have exactly one of read/write/append mode")
223 if binary and encoding is not None:
224 raise ValueError("binary mode doesn't take an encoding argument")
225 if binary and errors is not None:
226 raise ValueError("binary mode doesn't take an errors argument")
227 if binary and newline is not None:
228 raise ValueError("binary mode doesn't take a newline argument")
229 if binary and buffering == 1:
230 import warnings
231 warnings.warn("line buffering (buffering=1) isn't supported in binary "
232 "mode, the default buffer size will be used",
233 RuntimeWarning, 2)
234 raw = FileIO(file,
235 (creating and "x" or "") +
236 (reading and "r" or "") +
237 (writing and "w" or "") +
238 (appending and "a" or "") +
239 (updating and "+" or ""),
240 closefd, opener=opener)
241 result = raw
242 try:
243 line_buffering = False
244 if buffering == 1 or buffering < 0 and raw.isatty():
245 buffering = -1
246 line_buffering = True
247 if buffering < 0:
248 buffering = DEFAULT_BUFFER_SIZE
249 try:
250 bs = os.fstat(raw.fileno()).st_blksize
251 except (OSError, AttributeError):
252 pass
253 else:
254 if bs > 1:
255 buffering = bs
256 if buffering < 0:
257 raise ValueError("invalid buffering size")
258 if buffering == 0:
259 if binary:
260 return result
261 raise ValueError("can't have unbuffered text I/O")
262 if updating:
263 buffer = BufferedRandom(raw, buffering)
264 elif creating or writing or appending:
265 buffer = BufferedWriter(raw, buffering)
266 elif reading:
267 buffer = BufferedReader(raw, buffering)
268 else:
269 raise ValueError("unknown mode: %r" % mode)
270 result = buffer
271 if binary:
272 return result
273 encoding = text_encoding(encoding)
274 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
275 result = text
276 text.mode = mode
277 return result
278 except:
279 result.close()
280 raise
281
282 # Define a default pure-Python implementation for open_code()
283 # that does not allow hooks. Warn on first use. Defined for tests.
284 def _open_code_with_warning(path):
285 """Opens the provided file with mode ``'rb'``. This function
286 should be used when the intent is to treat the contents as
287 executable code.
288
289 ``path`` should be an absolute path.
290
291 When supported by the runtime, this function can be hooked
292 in order to allow embedders more control over code files.
293 This functionality is not supported on the current runtime.
294 """
295 import warnings
296 warnings.warn("_pyio.open_code() may not be using hooks",
297 RuntimeWarning, 2)
298 return open(path, "rb")
299
300 try:
301 open_code = io.open_code
302 except AttributeError:
303 open_code = _open_code_with_warning
304
305
306 # In normal operation, both `UnsupportedOperation`s should be bound to the
307 # same object.
308 try:
309 UnsupportedOperation = io.UnsupportedOperation
310 except AttributeError:
311 class ESC[4;38;5;81mUnsupportedOperation(ESC[4;38;5;149mOSError, ESC[4;38;5;149mValueError):
312 pass
313
314
315 class ESC[4;38;5;81mIOBase(metaclass=ESC[4;38;5;149mabcESC[4;38;5;149m.ESC[4;38;5;149mABCMeta):
316
317 """The abstract base class for all I/O classes.
318
319 This class provides dummy implementations for many methods that
320 derived classes can override selectively; the default implementations
321 represent a file that cannot be read, written or seeked.
322
323 Even though IOBase does not declare read or write because
324 their signatures will vary, implementations and clients should
325 consider those methods part of the interface. Also, implementations
326 may raise UnsupportedOperation when operations they do not support are
327 called.
328
329 The basic type used for binary data read from or written to a file is
330 bytes. Other bytes-like objects are accepted as method arguments too.
331 Text I/O classes work with str data.
332
333 Note that calling any method (even inquiries) on a closed stream is
334 undefined. Implementations may raise OSError in this case.
335
336 IOBase (and its subclasses) support the iterator protocol, meaning
337 that an IOBase object can be iterated over yielding the lines in a
338 stream.
339
340 IOBase also supports the :keyword:`with` statement. In this example,
341 fp is closed after the suite of the with statement is complete:
342
343 with open('spam.txt', 'r') as fp:
344 fp.write('Spam and eggs!')
345 """
346
347 ### Internal ###
348
349 def _unsupported(self, name):
350 """Internal: raise an OSError exception for unsupported operations."""
351 raise UnsupportedOperation("%s.%s() not supported" %
352 (self.__class__.__name__, name))
353
354 ### Positioning ###
355
356 def seek(self, pos, whence=0):
357 """Change stream position.
358
359 Change the stream position to byte offset pos. Argument pos is
360 interpreted relative to the position indicated by whence. Values
361 for whence are ints:
362
363 * 0 -- start of stream (the default); offset should be zero or positive
364 * 1 -- current stream position; offset may be negative
365 * 2 -- end of stream; offset is usually negative
366 Some operating systems / file systems could provide additional values.
367
368 Return an int indicating the new absolute position.
369 """
370 self._unsupported("seek")
371
372 def tell(self):
373 """Return an int indicating the current stream position."""
374 return self.seek(0, 1)
375
376 def truncate(self, pos=None):
377 """Truncate file to size bytes.
378
379 Size defaults to the current IO position as reported by tell(). Return
380 the new size.
381 """
382 self._unsupported("truncate")
383
384 ### Flush and close ###
385
386 def flush(self):
387 """Flush write buffers, if applicable.
388
389 This is not implemented for read-only and non-blocking streams.
390 """
391 self._checkClosed()
392 # XXX Should this return the number of bytes written???
393
394 __closed = False
395
396 def close(self):
397 """Flush and close the IO object.
398
399 This method has no effect if the file is already closed.
400 """
401 if not self.__closed:
402 try:
403 self.flush()
404 finally:
405 self.__closed = True
406
407 def __del__(self):
408 """Destructor. Calls close()."""
409 try:
410 closed = self.closed
411 except AttributeError:
412 # If getting closed fails, then the object is probably
413 # in an unusable state, so ignore.
414 return
415
416 if closed:
417 return
418
419 if _IOBASE_EMITS_UNRAISABLE:
420 self.close()
421 else:
422 # The try/except block is in case this is called at program
423 # exit time, when it's possible that globals have already been
424 # deleted, and then the close() call might fail. Since
425 # there's nothing we can do about such failures and they annoy
426 # the end users, we suppress the traceback.
427 try:
428 self.close()
429 except:
430 pass
431
432 ### Inquiries ###
433
434 def seekable(self):
435 """Return a bool indicating whether object supports random access.
436
437 If False, seek(), tell() and truncate() will raise OSError.
438 This method may need to do a test seek().
439 """
440 return False
441
442 def _checkSeekable(self, msg=None):
443 """Internal: raise UnsupportedOperation if file is not seekable
444 """
445 if not self.seekable():
446 raise UnsupportedOperation("File or stream is not seekable."
447 if msg is None else msg)
448
449 def readable(self):
450 """Return a bool indicating whether object was opened for reading.
451
452 If False, read() will raise OSError.
453 """
454 return False
455
456 def _checkReadable(self, msg=None):
457 """Internal: raise UnsupportedOperation if file is not readable
458 """
459 if not self.readable():
460 raise UnsupportedOperation("File or stream is not readable."
461 if msg is None else msg)
462
463 def writable(self):
464 """Return a bool indicating whether object was opened for writing.
465
466 If False, write() and truncate() will raise OSError.
467 """
468 return False
469
470 def _checkWritable(self, msg=None):
471 """Internal: raise UnsupportedOperation if file is not writable
472 """
473 if not self.writable():
474 raise UnsupportedOperation("File or stream is not writable."
475 if msg is None else msg)
476
477 @property
478 def closed(self):
479 """closed: bool. True iff the file has been closed.
480
481 For backwards compatibility, this is a property, not a predicate.
482 """
483 return self.__closed
484
485 def _checkClosed(self, msg=None):
486 """Internal: raise a ValueError if file is closed
487 """
488 if self.closed:
489 raise ValueError("I/O operation on closed file."
490 if msg is None else msg)
491
492 ### Context manager ###
493
494 def __enter__(self): # That's a forward reference
495 """Context management protocol. Returns self (an instance of IOBase)."""
496 self._checkClosed()
497 return self
498
499 def __exit__(self, *args):
500 """Context management protocol. Calls close()"""
501 self.close()
502
503 ### Lower-level APIs ###
504
505 # XXX Should these be present even if unimplemented?
506
507 def fileno(self):
508 """Returns underlying file descriptor (an int) if one exists.
509
510 An OSError is raised if the IO object does not use a file descriptor.
511 """
512 self._unsupported("fileno")
513
514 def isatty(self):
515 """Return a bool indicating whether this is an 'interactive' stream.
516
517 Return False if it can't be determined.
518 """
519 self._checkClosed()
520 return False
521
522 ### Readline[s] and writelines ###
523
524 def readline(self, size=-1):
525 r"""Read and return a line of bytes from the stream.
526
527 If size is specified, at most size bytes will be read.
528 Size should be an int.
529
530 The line terminator is always b'\n' for binary files; for text
531 files, the newlines argument to open can be used to select the line
532 terminator(s) recognized.
533 """
534 # For backwards compatibility, a (slowish) readline().
535 if hasattr(self, "peek"):
536 def nreadahead():
537 readahead = self.peek(1)
538 if not readahead:
539 return 1
540 n = (readahead.find(b"\n") + 1) or len(readahead)
541 if size >= 0:
542 n = min(n, size)
543 return n
544 else:
545 def nreadahead():
546 return 1
547 if size is None:
548 size = -1
549 else:
550 try:
551 size_index = size.__index__
552 except AttributeError:
553 raise TypeError(f"{size!r} is not an integer")
554 else:
555 size = size_index()
556 res = bytearray()
557 while size < 0 or len(res) < size:
558 b = self.read(nreadahead())
559 if not b:
560 break
561 res += b
562 if res.endswith(b"\n"):
563 break
564 return bytes(res)
565
566 def __iter__(self):
567 self._checkClosed()
568 return self
569
570 def __next__(self):
571 line = self.readline()
572 if not line:
573 raise StopIteration
574 return line
575
576 def readlines(self, hint=None):
577 """Return a list of lines from the stream.
578
579 hint can be specified to control the number of lines read: no more
580 lines will be read if the total size (in bytes/characters) of all
581 lines so far exceeds hint.
582 """
583 if hint is None or hint <= 0:
584 return list(self)
585 n = 0
586 lines = []
587 for line in self:
588 lines.append(line)
589 n += len(line)
590 if n >= hint:
591 break
592 return lines
593
594 def writelines(self, lines):
595 """Write a list of lines to the stream.
596
597 Line separators are not added, so it is usual for each of the lines
598 provided to have a line separator at the end.
599 """
600 self._checkClosed()
601 for line in lines:
602 self.write(line)
603
604 io.IOBase.register(IOBase)
605
606
607 class ESC[4;38;5;81mRawIOBase(ESC[4;38;5;149mIOBase):
608
609 """Base class for raw binary I/O."""
610
611 # The read() method is implemented by calling readinto(); derived
612 # classes that want to support read() only need to implement
613 # readinto() as a primitive operation. In general, readinto() can be
614 # more efficient than read().
615
616 # (It would be tempting to also provide an implementation of
617 # readinto() in terms of read(), in case the latter is a more suitable
618 # primitive operation, but that would lead to nasty recursion in case
619 # a subclass doesn't implement either.)
620
621 def read(self, size=-1):
622 """Read and return up to size bytes, where size is an int.
623
624 Returns an empty bytes object on EOF, or None if the object is
625 set not to block and has no data to read.
626 """
627 if size is None:
628 size = -1
629 if size < 0:
630 return self.readall()
631 b = bytearray(size.__index__())
632 n = self.readinto(b)
633 if n is None:
634 return None
635 del b[n:]
636 return bytes(b)
637
638 def readall(self):
639 """Read until EOF, using multiple read() call."""
640 res = bytearray()
641 while data := self.read(DEFAULT_BUFFER_SIZE):
642 res += data
643 if res:
644 return bytes(res)
645 else:
646 # b'' or None
647 return data
648
649 def readinto(self, b):
650 """Read bytes into a pre-allocated bytes-like object b.
651
652 Returns an int representing the number of bytes read (0 for EOF), or
653 None if the object is set not to block and has no data to read.
654 """
655 self._unsupported("readinto")
656
657 def write(self, b):
658 """Write the given buffer to the IO stream.
659
660 Returns the number of bytes written, which may be less than the
661 length of b in bytes.
662 """
663 self._unsupported("write")
664
665 io.RawIOBase.register(RawIOBase)
666 from _io import FileIO
667 RawIOBase.register(FileIO)
668
669
670 class ESC[4;38;5;81mBufferedIOBase(ESC[4;38;5;149mIOBase):
671
672 """Base class for buffered IO objects.
673
674 The main difference with RawIOBase is that the read() method
675 supports omitting the size argument, and does not have a default
676 implementation that defers to readinto().
677
678 In addition, read(), readinto() and write() may raise
679 BlockingIOError if the underlying raw stream is in non-blocking
680 mode and not ready; unlike their raw counterparts, they will never
681 return None.
682
683 A typical implementation should not inherit from a RawIOBase
684 implementation, but wrap one.
685 """
686
687 def read(self, size=-1):
688 """Read and return up to size bytes, where size is an int.
689
690 If the argument is omitted, None, or negative, reads and
691 returns all data until EOF.
692
693 If the argument is positive, and the underlying raw stream is
694 not 'interactive', multiple raw reads may be issued to satisfy
695 the byte count (unless EOF is reached first). But for
696 interactive raw streams (XXX and for pipes?), at most one raw
697 read will be issued, and a short result does not imply that
698 EOF is imminent.
699
700 Returns an empty bytes array on EOF.
701
702 Raises BlockingIOError if the underlying raw stream has no
703 data at the moment.
704 """
705 self._unsupported("read")
706
707 def read1(self, size=-1):
708 """Read up to size bytes with at most one read() system call,
709 where size is an int.
710 """
711 self._unsupported("read1")
712
713 def readinto(self, b):
714 """Read bytes into a pre-allocated bytes-like object b.
715
716 Like read(), this may issue multiple reads to the underlying raw
717 stream, unless the latter is 'interactive'.
718
719 Returns an int representing the number of bytes read (0 for EOF).
720
721 Raises BlockingIOError if the underlying raw stream has no
722 data at the moment.
723 """
724
725 return self._readinto(b, read1=False)
726
727 def readinto1(self, b):
728 """Read bytes into buffer *b*, using at most one system call
729
730 Returns an int representing the number of bytes read (0 for EOF).
731
732 Raises BlockingIOError if the underlying raw stream has no
733 data at the moment.
734 """
735
736 return self._readinto(b, read1=True)
737
738 def _readinto(self, b, read1):
739 if not isinstance(b, memoryview):
740 b = memoryview(b)
741 b = b.cast('B')
742
743 if read1:
744 data = self.read1(len(b))
745 else:
746 data = self.read(len(b))
747 n = len(data)
748
749 b[:n] = data
750
751 return n
752
753 def write(self, b):
754 """Write the given bytes buffer to the IO stream.
755
756 Return the number of bytes written, which is always the length of b
757 in bytes.
758
759 Raises BlockingIOError if the buffer is full and the
760 underlying raw stream cannot accept more data at the moment.
761 """
762 self._unsupported("write")
763
764 def detach(self):
765 """
766 Separate the underlying raw stream from the buffer and return it.
767
768 After the raw stream has been detached, the buffer is in an unusable
769 state.
770 """
771 self._unsupported("detach")
772
773 io.BufferedIOBase.register(BufferedIOBase)
774
775
776 class ESC[4;38;5;81m_BufferedIOMixin(ESC[4;38;5;149mBufferedIOBase):
777
778 """A mixin implementation of BufferedIOBase with an underlying raw stream.
779
780 This passes most requests on to the underlying raw stream. It
781 does *not* provide implementations of read(), readinto() or
782 write().
783 """
784
785 def __init__(self, raw):
786 self._raw = raw
787
788 ### Positioning ###
789
790 def seek(self, pos, whence=0):
791 new_position = self.raw.seek(pos, whence)
792 if new_position < 0:
793 raise OSError("seek() returned an invalid position")
794 return new_position
795
796 def tell(self):
797 pos = self.raw.tell()
798 if pos < 0:
799 raise OSError("tell() returned an invalid position")
800 return pos
801
802 def truncate(self, pos=None):
803 self._checkClosed()
804 self._checkWritable()
805
806 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
807 # and a flush may be necessary to synch both views of the current
808 # file state.
809 self.flush()
810
811 if pos is None:
812 pos = self.tell()
813 # XXX: Should seek() be used, instead of passing the position
814 # XXX directly to truncate?
815 return self.raw.truncate(pos)
816
817 ### Flush and close ###
818
819 def flush(self):
820 if self.closed:
821 raise ValueError("flush on closed file")
822 self.raw.flush()
823
824 def close(self):
825 if self.raw is not None and not self.closed:
826 try:
827 # may raise BlockingIOError or BrokenPipeError etc
828 self.flush()
829 finally:
830 self.raw.close()
831
832 def detach(self):
833 if self.raw is None:
834 raise ValueError("raw stream already detached")
835 self.flush()
836 raw = self._raw
837 self._raw = None
838 return raw
839
840 ### Inquiries ###
841
842 def seekable(self):
843 return self.raw.seekable()
844
845 @property
846 def raw(self):
847 return self._raw
848
849 @property
850 def closed(self):
851 return self.raw.closed
852
853 @property
854 def name(self):
855 return self.raw.name
856
857 @property
858 def mode(self):
859 return self.raw.mode
860
861 def __getstate__(self):
862 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
863
864 def __repr__(self):
865 modname = self.__class__.__module__
866 clsname = self.__class__.__qualname__
867 try:
868 name = self.name
869 except AttributeError:
870 return "<{}.{}>".format(modname, clsname)
871 else:
872 return "<{}.{} name={!r}>".format(modname, clsname, name)
873
874 ### Lower-level APIs ###
875
876 def fileno(self):
877 return self.raw.fileno()
878
879 def isatty(self):
880 return self.raw.isatty()
881
882
883 class ESC[4;38;5;81mBytesIO(ESC[4;38;5;149mBufferedIOBase):
884
885 """Buffered I/O implementation using an in-memory bytes buffer."""
886
887 # Initialize _buffer as soon as possible since it's used by __del__()
888 # which calls close()
889 _buffer = None
890
891 def __init__(self, initial_bytes=None):
892 buf = bytearray()
893 if initial_bytes is not None:
894 buf += initial_bytes
895 self._buffer = buf
896 self._pos = 0
897
898 def __getstate__(self):
899 if self.closed:
900 raise ValueError("__getstate__ on closed file")
901 return self.__dict__.copy()
902
903 def getvalue(self):
904 """Return the bytes value (contents) of the buffer
905 """
906 if self.closed:
907 raise ValueError("getvalue on closed file")
908 return bytes(self._buffer)
909
910 def getbuffer(self):
911 """Return a readable and writable view of the buffer.
912 """
913 if self.closed:
914 raise ValueError("getbuffer on closed file")
915 return memoryview(self._buffer)
916
917 def close(self):
918 if self._buffer is not None:
919 self._buffer.clear()
920 super().close()
921
922 def read(self, size=-1):
923 if self.closed:
924 raise ValueError("read from closed file")
925 if size is None:
926 size = -1
927 else:
928 try:
929 size_index = size.__index__
930 except AttributeError:
931 raise TypeError(f"{size!r} is not an integer")
932 else:
933 size = size_index()
934 if size < 0:
935 size = len(self._buffer)
936 if len(self._buffer) <= self._pos:
937 return b""
938 newpos = min(len(self._buffer), self._pos + size)
939 b = self._buffer[self._pos : newpos]
940 self._pos = newpos
941 return bytes(b)
942
943 def read1(self, size=-1):
944 """This is the same as read.
945 """
946 return self.read(size)
947
948 def write(self, b):
949 if self.closed:
950 raise ValueError("write to closed file")
951 if isinstance(b, str):
952 raise TypeError("can't write str to binary stream")
953 with memoryview(b) as view:
954 n = view.nbytes # Size of any bytes-like object
955 if n == 0:
956 return 0
957 pos = self._pos
958 if pos > len(self._buffer):
959 # Inserts null bytes between the current end of the file
960 # and the new write position.
961 padding = b'\x00' * (pos - len(self._buffer))
962 self._buffer += padding
963 self._buffer[pos:pos + n] = b
964 self._pos += n
965 return n
966
967 def seek(self, pos, whence=0):
968 if self.closed:
969 raise ValueError("seek on closed file")
970 try:
971 pos_index = pos.__index__
972 except AttributeError:
973 raise TypeError(f"{pos!r} is not an integer")
974 else:
975 pos = pos_index()
976 if whence == 0:
977 if pos < 0:
978 raise ValueError("negative seek position %r" % (pos,))
979 self._pos = pos
980 elif whence == 1:
981 self._pos = max(0, self._pos + pos)
982 elif whence == 2:
983 self._pos = max(0, len(self._buffer) + pos)
984 else:
985 raise ValueError("unsupported whence value")
986 return self._pos
987
988 def tell(self):
989 if self.closed:
990 raise ValueError("tell on closed file")
991 return self._pos
992
993 def truncate(self, pos=None):
994 if self.closed:
995 raise ValueError("truncate on closed file")
996 if pos is None:
997 pos = self._pos
998 else:
999 try:
1000 pos_index = pos.__index__
1001 except AttributeError:
1002 raise TypeError(f"{pos!r} is not an integer")
1003 else:
1004 pos = pos_index()
1005 if pos < 0:
1006 raise ValueError("negative truncate position %r" % (pos,))
1007 del self._buffer[pos:]
1008 return pos
1009
1010 def readable(self):
1011 if self.closed:
1012 raise ValueError("I/O operation on closed file.")
1013 return True
1014
1015 def writable(self):
1016 if self.closed:
1017 raise ValueError("I/O operation on closed file.")
1018 return True
1019
1020 def seekable(self):
1021 if self.closed:
1022 raise ValueError("I/O operation on closed file.")
1023 return True
1024
1025
1026 class ESC[4;38;5;81mBufferedReader(ESC[4;38;5;149m_BufferedIOMixin):
1027
1028 """BufferedReader(raw[, buffer_size])
1029
1030 A buffer for a readable, sequential BaseRawIO object.
1031
1032 The constructor creates a BufferedReader for the given readable raw
1033 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1034 is used.
1035 """
1036
1037 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1038 """Create a new buffered reader using the given readable raw IO object.
1039 """
1040 if not raw.readable():
1041 raise OSError('"raw" argument must be readable.')
1042
1043 _BufferedIOMixin.__init__(self, raw)
1044 if buffer_size <= 0:
1045 raise ValueError("invalid buffer size")
1046 self.buffer_size = buffer_size
1047 self._reset_read_buf()
1048 self._read_lock = Lock()
1049
1050 def readable(self):
1051 return self.raw.readable()
1052
1053 def _reset_read_buf(self):
1054 self._read_buf = b""
1055 self._read_pos = 0
1056
1057 def read(self, size=None):
1058 """Read size bytes.
1059
1060 Returns exactly size bytes of data unless the underlying raw IO
1061 stream reaches EOF or if the call would block in non-blocking
1062 mode. If size is negative, read until EOF or until read() would
1063 block.
1064 """
1065 if size is not None and size < -1:
1066 raise ValueError("invalid number of bytes to read")
1067 with self._read_lock:
1068 return self._read_unlocked(size)
1069
1070 def _read_unlocked(self, n=None):
1071 nodata_val = b""
1072 empty_values = (b"", None)
1073 buf = self._read_buf
1074 pos = self._read_pos
1075
1076 # Special case for when the number of bytes to read is unspecified.
1077 if n is None or n == -1:
1078 self._reset_read_buf()
1079 if hasattr(self.raw, 'readall'):
1080 chunk = self.raw.readall()
1081 if chunk is None:
1082 return buf[pos:] or None
1083 else:
1084 return buf[pos:] + chunk
1085 chunks = [buf[pos:]] # Strip the consumed bytes.
1086 current_size = 0
1087 while True:
1088 # Read until EOF or until read() would block.
1089 chunk = self.raw.read()
1090 if chunk in empty_values:
1091 nodata_val = chunk
1092 break
1093 current_size += len(chunk)
1094 chunks.append(chunk)
1095 return b"".join(chunks) or nodata_val
1096
1097 # The number of bytes to read is specified, return at most n bytes.
1098 avail = len(buf) - pos # Length of the available buffered data.
1099 if n <= avail:
1100 # Fast path: the data to read is fully buffered.
1101 self._read_pos += n
1102 return buf[pos:pos+n]
1103 # Slow path: read from the stream until enough bytes are read,
1104 # or until an EOF occurs or until read() would block.
1105 chunks = [buf[pos:]]
1106 wanted = max(self.buffer_size, n)
1107 while avail < n:
1108 chunk = self.raw.read(wanted)
1109 if chunk in empty_values:
1110 nodata_val = chunk
1111 break
1112 avail += len(chunk)
1113 chunks.append(chunk)
1114 # n is more than avail only when an EOF occurred or when
1115 # read() would have blocked.
1116 n = min(n, avail)
1117 out = b"".join(chunks)
1118 self._read_buf = out[n:] # Save the extra data in the buffer.
1119 self._read_pos = 0
1120 return out[:n] if out else nodata_val
1121
1122 def peek(self, size=0):
1123 """Returns buffered bytes without advancing the position.
1124
1125 The argument indicates a desired minimal number of bytes; we
1126 do at most one raw read to satisfy it. We never return more
1127 than self.buffer_size.
1128 """
1129 self._checkClosed("peek of closed file")
1130 with self._read_lock:
1131 return self._peek_unlocked(size)
1132
1133 def _peek_unlocked(self, n=0):
1134 want = min(n, self.buffer_size)
1135 have = len(self._read_buf) - self._read_pos
1136 if have < want or have <= 0:
1137 to_read = self.buffer_size - have
1138 current = self.raw.read(to_read)
1139 if current:
1140 self._read_buf = self._read_buf[self._read_pos:] + current
1141 self._read_pos = 0
1142 return self._read_buf[self._read_pos:]
1143
1144 def read1(self, size=-1):
1145 """Reads up to size bytes, with at most one read() system call."""
1146 # Returns up to size bytes. If at least one byte is buffered, we
1147 # only return buffered bytes. Otherwise, we do one raw read.
1148 self._checkClosed("read of closed file")
1149 if size < 0:
1150 size = self.buffer_size
1151 if size == 0:
1152 return b""
1153 with self._read_lock:
1154 self._peek_unlocked(1)
1155 return self._read_unlocked(
1156 min(size, len(self._read_buf) - self._read_pos))
1157
1158 # Implementing readinto() and readinto1() is not strictly necessary (we
1159 # could rely on the base class that provides an implementation in terms of
1160 # read() and read1()). We do it anyway to keep the _pyio implementation
1161 # similar to the io implementation (which implements the methods for
1162 # performance reasons).
1163 def _readinto(self, buf, read1):
1164 """Read data into *buf* with at most one system call."""
1165
1166 self._checkClosed("readinto of closed file")
1167
1168 # Need to create a memoryview object of type 'b', otherwise
1169 # we may not be able to assign bytes to it, and slicing it
1170 # would create a new object.
1171 if not isinstance(buf, memoryview):
1172 buf = memoryview(buf)
1173 if buf.nbytes == 0:
1174 return 0
1175 buf = buf.cast('B')
1176
1177 written = 0
1178 with self._read_lock:
1179 while written < len(buf):
1180
1181 # First try to read from internal buffer
1182 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1183 if avail:
1184 buf[written:written+avail] = \
1185 self._read_buf[self._read_pos:self._read_pos+avail]
1186 self._read_pos += avail
1187 written += avail
1188 if written == len(buf):
1189 break
1190
1191 # If remaining space in callers buffer is larger than
1192 # internal buffer, read directly into callers buffer
1193 if len(buf) - written > self.buffer_size:
1194 n = self.raw.readinto(buf[written:])
1195 if not n:
1196 break # eof
1197 written += n
1198
1199 # Otherwise refill internal buffer - unless we're
1200 # in read1 mode and already got some data
1201 elif not (read1 and written):
1202 if not self._peek_unlocked(1):
1203 break # eof
1204
1205 # In readinto1 mode, return as soon as we have some data
1206 if read1 and written:
1207 break
1208
1209 return written
1210
1211 def tell(self):
1212 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1213
1214 def seek(self, pos, whence=0):
1215 if whence not in valid_seek_flags:
1216 raise ValueError("invalid whence value")
1217 self._checkClosed("seek of closed file")
1218 with self._read_lock:
1219 if whence == 1:
1220 pos -= len(self._read_buf) - self._read_pos
1221 pos = _BufferedIOMixin.seek(self, pos, whence)
1222 self._reset_read_buf()
1223 return pos
1224
1225 class ESC[4;38;5;81mBufferedWriter(ESC[4;38;5;149m_BufferedIOMixin):
1226
1227 """A buffer for a writeable sequential RawIO object.
1228
1229 The constructor creates a BufferedWriter for the given writeable raw
1230 stream. If the buffer_size is not given, it defaults to
1231 DEFAULT_BUFFER_SIZE.
1232 """
1233
1234 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1235 if not raw.writable():
1236 raise OSError('"raw" argument must be writable.')
1237
1238 _BufferedIOMixin.__init__(self, raw)
1239 if buffer_size <= 0:
1240 raise ValueError("invalid buffer size")
1241 self.buffer_size = buffer_size
1242 self._write_buf = bytearray()
1243 self._write_lock = Lock()
1244
1245 def writable(self):
1246 return self.raw.writable()
1247
1248 def write(self, b):
1249 if isinstance(b, str):
1250 raise TypeError("can't write str to binary stream")
1251 with self._write_lock:
1252 if self.closed:
1253 raise ValueError("write to closed file")
1254 # XXX we can implement some more tricks to try and avoid
1255 # partial writes
1256 if len(self._write_buf) > self.buffer_size:
1257 # We're full, so let's pre-flush the buffer. (This may
1258 # raise BlockingIOError with characters_written == 0.)
1259 self._flush_unlocked()
1260 before = len(self._write_buf)
1261 self._write_buf.extend(b)
1262 written = len(self._write_buf) - before
1263 if len(self._write_buf) > self.buffer_size:
1264 try:
1265 self._flush_unlocked()
1266 except BlockingIOError as e:
1267 if len(self._write_buf) > self.buffer_size:
1268 # We've hit the buffer_size. We have to accept a partial
1269 # write and cut back our buffer.
1270 overage = len(self._write_buf) - self.buffer_size
1271 written -= overage
1272 self._write_buf = self._write_buf[:self.buffer_size]
1273 raise BlockingIOError(e.errno, e.strerror, written)
1274 return written
1275
1276 def truncate(self, pos=None):
1277 with self._write_lock:
1278 self._flush_unlocked()
1279 if pos is None:
1280 pos = self.raw.tell()
1281 return self.raw.truncate(pos)
1282
1283 def flush(self):
1284 with self._write_lock:
1285 self._flush_unlocked()
1286
1287 def _flush_unlocked(self):
1288 if self.closed:
1289 raise ValueError("flush on closed file")
1290 while self._write_buf:
1291 try:
1292 n = self.raw.write(self._write_buf)
1293 except BlockingIOError:
1294 raise RuntimeError("self.raw should implement RawIOBase: it "
1295 "should not raise BlockingIOError")
1296 if n is None:
1297 raise BlockingIOError(
1298 errno.EAGAIN,
1299 "write could not complete without blocking", 0)
1300 if n > len(self._write_buf) or n < 0:
1301 raise OSError("write() returned incorrect number of bytes")
1302 del self._write_buf[:n]
1303
1304 def tell(self):
1305 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1306
1307 def seek(self, pos, whence=0):
1308 if whence not in valid_seek_flags:
1309 raise ValueError("invalid whence value")
1310 with self._write_lock:
1311 self._flush_unlocked()
1312 return _BufferedIOMixin.seek(self, pos, whence)
1313
1314 def close(self):
1315 with self._write_lock:
1316 if self.raw is None or self.closed:
1317 return
1318 # We have to release the lock and call self.flush() (which will
1319 # probably just re-take the lock) in case flush has been overridden in
1320 # a subclass or the user set self.flush to something. This is the same
1321 # behavior as the C implementation.
1322 try:
1323 # may raise BlockingIOError or BrokenPipeError etc
1324 self.flush()
1325 finally:
1326 with self._write_lock:
1327 self.raw.close()
1328
1329
1330 class ESC[4;38;5;81mBufferedRWPair(ESC[4;38;5;149mBufferedIOBase):
1331
1332 """A buffered reader and writer object together.
1333
1334 A buffered reader object and buffered writer object put together to
1335 form a sequential IO object that can read and write. This is typically
1336 used with a socket or two-way pipe.
1337
1338 reader and writer are RawIOBase objects that are readable and
1339 writeable respectively. If the buffer_size is omitted it defaults to
1340 DEFAULT_BUFFER_SIZE.
1341 """
1342
1343 # XXX The usefulness of this (compared to having two separate IO
1344 # objects) is questionable.
1345
1346 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
1347 """Constructor.
1348
1349 The arguments are two RawIO instances.
1350 """
1351 if not reader.readable():
1352 raise OSError('"reader" argument must be readable.')
1353
1354 if not writer.writable():
1355 raise OSError('"writer" argument must be writable.')
1356
1357 self.reader = BufferedReader(reader, buffer_size)
1358 self.writer = BufferedWriter(writer, buffer_size)
1359
1360 def read(self, size=-1):
1361 if size is None:
1362 size = -1
1363 return self.reader.read(size)
1364
1365 def readinto(self, b):
1366 return self.reader.readinto(b)
1367
1368 def write(self, b):
1369 return self.writer.write(b)
1370
1371 def peek(self, size=0):
1372 return self.reader.peek(size)
1373
1374 def read1(self, size=-1):
1375 return self.reader.read1(size)
1376
1377 def readinto1(self, b):
1378 return self.reader.readinto1(b)
1379
1380 def readable(self):
1381 return self.reader.readable()
1382
1383 def writable(self):
1384 return self.writer.writable()
1385
1386 def flush(self):
1387 return self.writer.flush()
1388
1389 def close(self):
1390 try:
1391 self.writer.close()
1392 finally:
1393 self.reader.close()
1394
1395 def isatty(self):
1396 return self.reader.isatty() or self.writer.isatty()
1397
1398 @property
1399 def closed(self):
1400 return self.writer.closed
1401
1402
1403 class ESC[4;38;5;81mBufferedRandom(ESC[4;38;5;149mBufferedWriter, ESC[4;38;5;149mBufferedReader):
1404
1405 """A buffered interface to random access streams.
1406
1407 The constructor creates a reader and writer for a seekable stream,
1408 raw, given in the first argument. If the buffer_size is omitted it
1409 defaults to DEFAULT_BUFFER_SIZE.
1410 """
1411
1412 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1413 raw._checkSeekable()
1414 BufferedReader.__init__(self, raw, buffer_size)
1415 BufferedWriter.__init__(self, raw, buffer_size)
1416
1417 def seek(self, pos, whence=0):
1418 if whence not in valid_seek_flags:
1419 raise ValueError("invalid whence value")
1420 self.flush()
1421 if self._read_buf:
1422 # Undo read ahead.
1423 with self._read_lock:
1424 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1425 # First do the raw seek, then empty the read buffer, so that
1426 # if the raw seek fails, we don't lose buffered data forever.
1427 pos = self.raw.seek(pos, whence)
1428 with self._read_lock:
1429 self._reset_read_buf()
1430 if pos < 0:
1431 raise OSError("seek() returned invalid position")
1432 return pos
1433
1434 def tell(self):
1435 if self._write_buf:
1436 return BufferedWriter.tell(self)
1437 else:
1438 return BufferedReader.tell(self)
1439
1440 def truncate(self, pos=None):
1441 if pos is None:
1442 pos = self.tell()
1443 # Use seek to flush the read buffer.
1444 return BufferedWriter.truncate(self, pos)
1445
1446 def read(self, size=None):
1447 if size is None:
1448 size = -1
1449 self.flush()
1450 return BufferedReader.read(self, size)
1451
1452 def readinto(self, b):
1453 self.flush()
1454 return BufferedReader.readinto(self, b)
1455
1456 def peek(self, size=0):
1457 self.flush()
1458 return BufferedReader.peek(self, size)
1459
1460 def read1(self, size=-1):
1461 self.flush()
1462 return BufferedReader.read1(self, size)
1463
1464 def readinto1(self, b):
1465 self.flush()
1466 return BufferedReader.readinto1(self, b)
1467
1468 def write(self, b):
1469 if self._read_buf:
1470 # Undo readahead
1471 with self._read_lock:
1472 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1473 self._reset_read_buf()
1474 return BufferedWriter.write(self, b)
1475
1476
1477 class ESC[4;38;5;81mFileIO(ESC[4;38;5;149mRawIOBase):
1478 _fd = -1
1479 _created = False
1480 _readable = False
1481 _writable = False
1482 _appending = False
1483 _seekable = None
1484 _closefd = True
1485
1486 def __init__(self, file, mode='r', closefd=True, opener=None):
1487 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1488 writing, exclusive creation or appending. The file will be created if it
1489 doesn't exist when opened for writing or appending; it will be truncated
1490 when opened for writing. A FileExistsError will be raised if it already
1491 exists when opened for creating. Opening a file for creating implies
1492 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1493 to allow simultaneous reading and writing. A custom opener can be used by
1494 passing a callable as *opener*. The underlying file descriptor for the file
1495 object is then obtained by calling opener with (*name*, *flags*).
1496 *opener* must return an open file descriptor (passing os.open as *opener*
1497 results in functionality similar to passing None).
1498 """
1499 if self._fd >= 0:
1500 # Have to close the existing file first.
1501 try:
1502 if self._closefd:
1503 os.close(self._fd)
1504 finally:
1505 self._fd = -1
1506
1507 if isinstance(file, float):
1508 raise TypeError('integer argument expected, got float')
1509 if isinstance(file, int):
1510 fd = file
1511 if fd < 0:
1512 raise ValueError('negative file descriptor')
1513 else:
1514 fd = -1
1515
1516 if not isinstance(mode, str):
1517 raise TypeError('invalid mode: %s' % (mode,))
1518 if not set(mode) <= set('xrwab+'):
1519 raise ValueError('invalid mode: %s' % (mode,))
1520 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1521 raise ValueError('Must have exactly one of create/read/write/append '
1522 'mode and at most one plus')
1523
1524 if 'x' in mode:
1525 self._created = True
1526 self._writable = True
1527 flags = os.O_EXCL | os.O_CREAT
1528 elif 'r' in mode:
1529 self._readable = True
1530 flags = 0
1531 elif 'w' in mode:
1532 self._writable = True
1533 flags = os.O_CREAT | os.O_TRUNC
1534 elif 'a' in mode:
1535 self._writable = True
1536 self._appending = True
1537 flags = os.O_APPEND | os.O_CREAT
1538
1539 if '+' in mode:
1540 self._readable = True
1541 self._writable = True
1542
1543 if self._readable and self._writable:
1544 flags |= os.O_RDWR
1545 elif self._readable:
1546 flags |= os.O_RDONLY
1547 else:
1548 flags |= os.O_WRONLY
1549
1550 flags |= getattr(os, 'O_BINARY', 0)
1551
1552 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1553 getattr(os, 'O_CLOEXEC', 0))
1554 flags |= noinherit_flag
1555
1556 owned_fd = None
1557 try:
1558 if fd < 0:
1559 if not closefd:
1560 raise ValueError('Cannot use closefd=False with file name')
1561 if opener is None:
1562 fd = os.open(file, flags, 0o666)
1563 else:
1564 fd = opener(file, flags)
1565 if not isinstance(fd, int):
1566 raise TypeError('expected integer from opener')
1567 if fd < 0:
1568 raise OSError('Negative file descriptor')
1569 owned_fd = fd
1570 if not noinherit_flag:
1571 os.set_inheritable(fd, False)
1572
1573 self._closefd = closefd
1574 fdfstat = os.fstat(fd)
1575 try:
1576 if stat.S_ISDIR(fdfstat.st_mode):
1577 raise IsADirectoryError(errno.EISDIR,
1578 os.strerror(errno.EISDIR), file)
1579 except AttributeError:
1580 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
1581 # don't exist.
1582 pass
1583 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1584 if self._blksize <= 1:
1585 self._blksize = DEFAULT_BUFFER_SIZE
1586
1587 if _setmode:
1588 # don't translate newlines (\r\n <=> \n)
1589 _setmode(fd, os.O_BINARY)
1590
1591 self.name = file
1592 if self._appending:
1593 # For consistent behaviour, we explicitly seek to the
1594 # end of file (otherwise, it might be done only on the
1595 # first write()).
1596 try:
1597 os.lseek(fd, 0, SEEK_END)
1598 except OSError as e:
1599 if e.errno != errno.ESPIPE:
1600 raise
1601 except:
1602 if owned_fd is not None:
1603 os.close(owned_fd)
1604 raise
1605 self._fd = fd
1606
1607 def __del__(self):
1608 if self._fd >= 0 and self._closefd and not self.closed:
1609 import warnings
1610 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1611 stacklevel=2, source=self)
1612 self.close()
1613
1614 def __getstate__(self):
1615 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
1616
1617 def __repr__(self):
1618 class_name = '%s.%s' % (self.__class__.__module__,
1619 self.__class__.__qualname__)
1620 if self.closed:
1621 return '<%s [closed]>' % class_name
1622 try:
1623 name = self.name
1624 except AttributeError:
1625 return ('<%s fd=%d mode=%r closefd=%r>' %
1626 (class_name, self._fd, self.mode, self._closefd))
1627 else:
1628 return ('<%s name=%r mode=%r closefd=%r>' %
1629 (class_name, name, self.mode, self._closefd))
1630
1631 def _checkReadable(self):
1632 if not self._readable:
1633 raise UnsupportedOperation('File not open for reading')
1634
1635 def _checkWritable(self, msg=None):
1636 if not self._writable:
1637 raise UnsupportedOperation('File not open for writing')
1638
1639 def read(self, size=None):
1640 """Read at most size bytes, returned as bytes.
1641
1642 Only makes one system call, so less data may be returned than requested
1643 In non-blocking mode, returns None if no data is available.
1644 Return an empty bytes object at EOF.
1645 """
1646 self._checkClosed()
1647 self._checkReadable()
1648 if size is None or size < 0:
1649 return self.readall()
1650 try:
1651 return os.read(self._fd, size)
1652 except BlockingIOError:
1653 return None
1654
1655 def readall(self):
1656 """Read all data from the file, returned as bytes.
1657
1658 In non-blocking mode, returns as much as is immediately available,
1659 or None if no data is available. Return an empty bytes object at EOF.
1660 """
1661 self._checkClosed()
1662 self._checkReadable()
1663 bufsize = DEFAULT_BUFFER_SIZE
1664 try:
1665 pos = os.lseek(self._fd, 0, SEEK_CUR)
1666 end = os.fstat(self._fd).st_size
1667 if end >= pos:
1668 bufsize = end - pos + 1
1669 except OSError:
1670 pass
1671
1672 result = bytearray()
1673 while True:
1674 if len(result) >= bufsize:
1675 bufsize = len(result)
1676 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1677 n = bufsize - len(result)
1678 try:
1679 chunk = os.read(self._fd, n)
1680 except BlockingIOError:
1681 if result:
1682 break
1683 return None
1684 if not chunk: # reached the end of the file
1685 break
1686 result += chunk
1687
1688 return bytes(result)
1689
1690 def readinto(self, b):
1691 """Same as RawIOBase.readinto()."""
1692 m = memoryview(b).cast('B')
1693 data = self.read(len(m))
1694 n = len(data)
1695 m[:n] = data
1696 return n
1697
1698 def write(self, b):
1699 """Write bytes b to file, return number written.
1700
1701 Only makes one system call, so not all of the data may be written.
1702 The number of bytes actually written is returned. In non-blocking mode,
1703 returns None if the write would block.
1704 """
1705 self._checkClosed()
1706 self._checkWritable()
1707 try:
1708 return os.write(self._fd, b)
1709 except BlockingIOError:
1710 return None
1711
1712 def seek(self, pos, whence=SEEK_SET):
1713 """Move to new file position.
1714
1715 Argument offset is a byte count. Optional argument whence defaults to
1716 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1717 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1718 and SEEK_END or 2 (move relative to end of file, usually negative, although
1719 many platforms allow seeking beyond the end of a file).
1720
1721 Note that not all file objects are seekable.
1722 """
1723 if isinstance(pos, float):
1724 raise TypeError('an integer is required')
1725 self._checkClosed()
1726 return os.lseek(self._fd, pos, whence)
1727
1728 def tell(self):
1729 """tell() -> int. Current file position.
1730
1731 Can raise OSError for non seekable files."""
1732 self._checkClosed()
1733 return os.lseek(self._fd, 0, SEEK_CUR)
1734
1735 def truncate(self, size=None):
1736 """Truncate the file to at most size bytes.
1737
1738 Size defaults to the current file position, as returned by tell().
1739 The current file position is changed to the value of size.
1740 """
1741 self._checkClosed()
1742 self._checkWritable()
1743 if size is None:
1744 size = self.tell()
1745 os.ftruncate(self._fd, size)
1746 return size
1747
1748 def close(self):
1749 """Close the file.
1750
1751 A closed file cannot be used for further I/O operations. close() may be
1752 called more than once without error.
1753 """
1754 if not self.closed:
1755 try:
1756 if self._closefd:
1757 os.close(self._fd)
1758 finally:
1759 super().close()
1760
1761 def seekable(self):
1762 """True if file supports random-access."""
1763 self._checkClosed()
1764 if self._seekable is None:
1765 try:
1766 self.tell()
1767 except OSError:
1768 self._seekable = False
1769 else:
1770 self._seekable = True
1771 return self._seekable
1772
1773 def readable(self):
1774 """True if file was opened in a read mode."""
1775 self._checkClosed()
1776 return self._readable
1777
1778 def writable(self):
1779 """True if file was opened in a write mode."""
1780 self._checkClosed()
1781 return self._writable
1782
1783 def fileno(self):
1784 """Return the underlying file descriptor (an integer)."""
1785 self._checkClosed()
1786 return self._fd
1787
1788 def isatty(self):
1789 """True if the file is connected to a TTY device."""
1790 self._checkClosed()
1791 return os.isatty(self._fd)
1792
1793 @property
1794 def closefd(self):
1795 """True if the file descriptor will be closed by close()."""
1796 return self._closefd
1797
1798 @property
1799 def mode(self):
1800 """String giving the file mode"""
1801 if self._created:
1802 if self._readable:
1803 return 'xb+'
1804 else:
1805 return 'xb'
1806 elif self._appending:
1807 if self._readable:
1808 return 'ab+'
1809 else:
1810 return 'ab'
1811 elif self._readable:
1812 if self._writable:
1813 return 'rb+'
1814 else:
1815 return 'rb'
1816 else:
1817 return 'wb'
1818
1819
1820 class ESC[4;38;5;81mTextIOBase(ESC[4;38;5;149mIOBase):
1821
1822 """Base class for text I/O.
1823
1824 This class provides a character and line based interface to stream
1825 I/O.
1826 """
1827
1828 def read(self, size=-1):
1829 """Read at most size characters from stream, where size is an int.
1830
1831 Read from underlying buffer until we have size characters or we hit EOF.
1832 If size is negative or omitted, read until EOF.
1833
1834 Returns a string.
1835 """
1836 self._unsupported("read")
1837
1838 def write(self, s):
1839 """Write string s to stream and returning an int."""
1840 self._unsupported("write")
1841
1842 def truncate(self, pos=None):
1843 """Truncate size to pos, where pos is an int."""
1844 self._unsupported("truncate")
1845
1846 def readline(self):
1847 """Read until newline or EOF.
1848
1849 Returns an empty string if EOF is hit immediately.
1850 """
1851 self._unsupported("readline")
1852
1853 def detach(self):
1854 """
1855 Separate the underlying buffer from the TextIOBase and return it.
1856
1857 After the underlying buffer has been detached, the TextIO is in an
1858 unusable state.
1859 """
1860 self._unsupported("detach")
1861
1862 @property
1863 def encoding(self):
1864 """Subclasses should override."""
1865 return None
1866
1867 @property
1868 def newlines(self):
1869 """Line endings translated so far.
1870
1871 Only line endings translated during reading are considered.
1872
1873 Subclasses should override.
1874 """
1875 return None
1876
1877 @property
1878 def errors(self):
1879 """Error setting of the decoder or encoder.
1880
1881 Subclasses should override."""
1882 return None
1883
1884 io.TextIOBase.register(TextIOBase)
1885
1886
1887 class ESC[4;38;5;81mIncrementalNewlineDecoder(ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mIncrementalDecoder):
1888 r"""Codec used when reading a file in universal newlines mode. It wraps
1889 another incremental decoder, translating \r\n and \r into \n. It also
1890 records the types of newlines encountered. When used with
1891 translate=False, it ensures that the newline sequence is returned in
1892 one piece.
1893 """
1894 def __init__(self, decoder, translate, errors='strict'):
1895 codecs.IncrementalDecoder.__init__(self, errors=errors)
1896 self.translate = translate
1897 self.decoder = decoder
1898 self.seennl = 0
1899 self.pendingcr = False
1900
1901 def decode(self, input, final=False):
1902 # decode input (with the eventual \r from a previous pass)
1903 if self.decoder is None:
1904 output = input
1905 else:
1906 output = self.decoder.decode(input, final=final)
1907 if self.pendingcr and (output or final):
1908 output = "\r" + output
1909 self.pendingcr = False
1910
1911 # retain last \r even when not translating data:
1912 # then readline() is sure to get \r\n in one pass
1913 if output.endswith("\r") and not final:
1914 output = output[:-1]
1915 self.pendingcr = True
1916
1917 # Record which newlines are read
1918 crlf = output.count('\r\n')
1919 cr = output.count('\r') - crlf
1920 lf = output.count('\n') - crlf
1921 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1922 | (crlf and self._CRLF)
1923
1924 if self.translate:
1925 if crlf:
1926 output = output.replace("\r\n", "\n")
1927 if cr:
1928 output = output.replace("\r", "\n")
1929
1930 return output
1931
1932 def getstate(self):
1933 if self.decoder is None:
1934 buf = b""
1935 flag = 0
1936 else:
1937 buf, flag = self.decoder.getstate()
1938 flag <<= 1
1939 if self.pendingcr:
1940 flag |= 1
1941 return buf, flag
1942
1943 def setstate(self, state):
1944 buf, flag = state
1945 self.pendingcr = bool(flag & 1)
1946 if self.decoder is not None:
1947 self.decoder.setstate((buf, flag >> 1))
1948
1949 def reset(self):
1950 self.seennl = 0
1951 self.pendingcr = False
1952 if self.decoder is not None:
1953 self.decoder.reset()
1954
1955 _LF = 1
1956 _CR = 2
1957 _CRLF = 4
1958
1959 @property
1960 def newlines(self):
1961 return (None,
1962 "\n",
1963 "\r",
1964 ("\r", "\n"),
1965 "\r\n",
1966 ("\n", "\r\n"),
1967 ("\r", "\r\n"),
1968 ("\r", "\n", "\r\n")
1969 )[self.seennl]
1970
1971
1972 class ESC[4;38;5;81mTextIOWrapper(ESC[4;38;5;149mTextIOBase):
1973
1974 r"""Character and line based layer over a BufferedIOBase object, buffer.
1975
1976 encoding gives the name of the encoding that the stream will be
1977 decoded or encoded with. It defaults to locale.getencoding().
1978
1979 errors determines the strictness of encoding and decoding (see the
1980 codecs.register) and defaults to "strict".
1981
1982 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1983 handling of line endings. If it is None, universal newlines is
1984 enabled. With this enabled, on input, the lines endings '\n', '\r',
1985 or '\r\n' are translated to '\n' before being returned to the
1986 caller. Conversely, on output, '\n' is translated to the system
1987 default line separator, os.linesep. If newline is any other of its
1988 legal values, that newline becomes the newline when the file is read
1989 and it is returned untranslated. On output, '\n' is converted to the
1990 newline.
1991
1992 If line_buffering is True, a call to flush is implied when a call to
1993 write contains a newline character.
1994 """
1995
1996 _CHUNK_SIZE = 2048
1997
1998 # Initialize _buffer as soon as possible since it's used by __del__()
1999 # which calls close()
2000 _buffer = None
2001
2002 # The write_through argument has no effect here since this
2003 # implementation always writes through. The argument is present only
2004 # so that the signature can match the signature of the C version.
2005 def __init__(self, buffer, encoding=None, errors=None, newline=None,
2006 line_buffering=False, write_through=False):
2007 self._check_newline(newline)
2008 encoding = text_encoding(encoding)
2009
2010 if encoding == "locale":
2011 encoding = self._get_locale_encoding()
2012
2013 if not isinstance(encoding, str):
2014 raise ValueError("invalid encoding: %r" % encoding)
2015
2016 if not codecs.lookup(encoding)._is_text_encoding:
2017 msg = ("%r is not a text encoding; "
2018 "use codecs.open() to handle arbitrary codecs")
2019 raise LookupError(msg % encoding)
2020
2021 if errors is None:
2022 errors = "strict"
2023 else:
2024 if not isinstance(errors, str):
2025 raise ValueError("invalid errors: %r" % errors)
2026 if _CHECK_ERRORS:
2027 codecs.lookup_error(errors)
2028
2029 self._buffer = buffer
2030 self._decoded_chars = '' # buffer for text returned from decoder
2031 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2032 self._snapshot = None # info for reconstructing decoder state
2033 self._seekable = self._telling = self.buffer.seekable()
2034 self._has_read1 = hasattr(self.buffer, 'read1')
2035 self._configure(encoding, errors, newline,
2036 line_buffering, write_through)
2037
2038 def _check_newline(self, newline):
2039 if newline is not None and not isinstance(newline, str):
2040 raise TypeError("illegal newline type: %r" % (type(newline),))
2041 if newline not in (None, "", "\n", "\r", "\r\n"):
2042 raise ValueError("illegal newline value: %r" % (newline,))
2043
2044 def _configure(self, encoding=None, errors=None, newline=None,
2045 line_buffering=False, write_through=False):
2046 self._encoding = encoding
2047 self._errors = errors
2048 self._encoder = None
2049 self._decoder = None
2050 self._b2cratio = 0.0
2051
2052 self._readuniversal = not newline
2053 self._readtranslate = newline is None
2054 self._readnl = newline
2055 self._writetranslate = newline != ''
2056 self._writenl = newline or os.linesep
2057
2058 self._line_buffering = line_buffering
2059 self._write_through = write_through
2060
2061 # don't write a BOM in the middle of a file
2062 if self._seekable and self.writable():
2063 position = self.buffer.tell()
2064 if position != 0:
2065 try:
2066 self._get_encoder().setstate(0)
2067 except LookupError:
2068 # Sometimes the encoder doesn't exist
2069 pass
2070
2071 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2072 # where dec_flags is the second (integer) item of the decoder state
2073 # and next_input is the chunk of input bytes that comes next after the
2074 # snapshot point. We use this to reconstruct decoder states in tell().
2075
2076 # Naming convention:
2077 # - "bytes_..." for integer variables that count input bytes
2078 # - "chars_..." for integer variables that count decoded characters
2079
2080 def __repr__(self):
2081 result = "<{}.{}".format(self.__class__.__module__,
2082 self.__class__.__qualname__)
2083 try:
2084 name = self.name
2085 except AttributeError:
2086 pass
2087 else:
2088 result += " name={0!r}".format(name)
2089 try:
2090 mode = self.mode
2091 except AttributeError:
2092 pass
2093 else:
2094 result += " mode={0!r}".format(mode)
2095 return result + " encoding={0!r}>".format(self.encoding)
2096
2097 @property
2098 def encoding(self):
2099 return self._encoding
2100
2101 @property
2102 def errors(self):
2103 return self._errors
2104
2105 @property
2106 def line_buffering(self):
2107 return self._line_buffering
2108
2109 @property
2110 def write_through(self):
2111 return self._write_through
2112
2113 @property
2114 def buffer(self):
2115 return self._buffer
2116
2117 def reconfigure(self, *,
2118 encoding=None, errors=None, newline=Ellipsis,
2119 line_buffering=None, write_through=None):
2120 """Reconfigure the text stream with new parameters.
2121
2122 This also flushes the stream.
2123 """
2124 if (self._decoder is not None
2125 and (encoding is not None or errors is not None
2126 or newline is not Ellipsis)):
2127 raise UnsupportedOperation(
2128 "It is not possible to set the encoding or newline of stream "
2129 "after the first read")
2130
2131 if errors is None:
2132 if encoding is None:
2133 errors = self._errors
2134 else:
2135 errors = 'strict'
2136 elif not isinstance(errors, str):
2137 raise TypeError("invalid errors: %r" % errors)
2138
2139 if encoding is None:
2140 encoding = self._encoding
2141 else:
2142 if not isinstance(encoding, str):
2143 raise TypeError("invalid encoding: %r" % encoding)
2144 if encoding == "locale":
2145 encoding = self._get_locale_encoding()
2146
2147 if newline is Ellipsis:
2148 newline = self._readnl
2149 self._check_newline(newline)
2150
2151 if line_buffering is None:
2152 line_buffering = self.line_buffering
2153 if write_through is None:
2154 write_through = self.write_through
2155
2156 self.flush()
2157 self._configure(encoding, errors, newline,
2158 line_buffering, write_through)
2159
2160 def seekable(self):
2161 if self.closed:
2162 raise ValueError("I/O operation on closed file.")
2163 return self._seekable
2164
2165 def readable(self):
2166 return self.buffer.readable()
2167
2168 def writable(self):
2169 return self.buffer.writable()
2170
2171 def flush(self):
2172 self.buffer.flush()
2173 self._telling = self._seekable
2174
2175 def close(self):
2176 if self.buffer is not None and not self.closed:
2177 try:
2178 self.flush()
2179 finally:
2180 self.buffer.close()
2181
2182 @property
2183 def closed(self):
2184 return self.buffer.closed
2185
2186 @property
2187 def name(self):
2188 return self.buffer.name
2189
2190 def fileno(self):
2191 return self.buffer.fileno()
2192
2193 def isatty(self):
2194 return self.buffer.isatty()
2195
2196 def write(self, s):
2197 'Write data, where s is a str'
2198 if self.closed:
2199 raise ValueError("write to closed file")
2200 if not isinstance(s, str):
2201 raise TypeError("can't write %s to text stream" %
2202 s.__class__.__name__)
2203 length = len(s)
2204 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2205 if haslf and self._writetranslate and self._writenl != "\n":
2206 s = s.replace("\n", self._writenl)
2207 encoder = self._encoder or self._get_encoder()
2208 # XXX What if we were just reading?
2209 b = encoder.encode(s)
2210 self.buffer.write(b)
2211 if self._line_buffering and (haslf or "\r" in s):
2212 self.flush()
2213 self._set_decoded_chars('')
2214 self._snapshot = None
2215 if self._decoder:
2216 self._decoder.reset()
2217 return length
2218
2219 def _get_encoder(self):
2220 make_encoder = codecs.getincrementalencoder(self._encoding)
2221 self._encoder = make_encoder(self._errors)
2222 return self._encoder
2223
2224 def _get_decoder(self):
2225 make_decoder = codecs.getincrementaldecoder(self._encoding)
2226 decoder = make_decoder(self._errors)
2227 if self._readuniversal:
2228 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2229 self._decoder = decoder
2230 return decoder
2231
2232 # The following three methods implement an ADT for _decoded_chars.
2233 # Text returned from the decoder is buffered here until the client
2234 # requests it by calling our read() or readline() method.
2235 def _set_decoded_chars(self, chars):
2236 """Set the _decoded_chars buffer."""
2237 self._decoded_chars = chars
2238 self._decoded_chars_used = 0
2239
2240 def _get_decoded_chars(self, n=None):
2241 """Advance into the _decoded_chars buffer."""
2242 offset = self._decoded_chars_used
2243 if n is None:
2244 chars = self._decoded_chars[offset:]
2245 else:
2246 chars = self._decoded_chars[offset:offset + n]
2247 self._decoded_chars_used += len(chars)
2248 return chars
2249
2250 def _get_locale_encoding(self):
2251 try:
2252 import locale
2253 except ImportError:
2254 # Importing locale may fail if Python is being built
2255 return "utf-8"
2256 else:
2257 return locale.getencoding()
2258
2259 def _rewind_decoded_chars(self, n):
2260 """Rewind the _decoded_chars buffer."""
2261 if self._decoded_chars_used < n:
2262 raise AssertionError("rewind decoded_chars out of bounds")
2263 self._decoded_chars_used -= n
2264
2265 def _read_chunk(self):
2266 """
2267 Read and decode the next chunk of data from the BufferedReader.
2268 """
2269
2270 # The return value is True unless EOF was reached. The decoded
2271 # string is placed in self._decoded_chars (replacing its previous
2272 # value). The entire input chunk is sent to the decoder, though
2273 # some of it may remain buffered in the decoder, yet to be
2274 # converted.
2275
2276 if self._decoder is None:
2277 raise ValueError("no decoder")
2278
2279 if self._telling:
2280 # To prepare for tell(), we need to snapshot a point in the
2281 # file where the decoder's input buffer is empty.
2282
2283 dec_buffer, dec_flags = self._decoder.getstate()
2284 # Given this, we know there was a valid snapshot point
2285 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2286
2287 # Read a chunk, decode it, and put the result in self._decoded_chars.
2288 if self._has_read1:
2289 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2290 else:
2291 input_chunk = self.buffer.read(self._CHUNK_SIZE)
2292 eof = not input_chunk
2293 decoded_chars = self._decoder.decode(input_chunk, eof)
2294 self._set_decoded_chars(decoded_chars)
2295 if decoded_chars:
2296 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2297 else:
2298 self._b2cratio = 0.0
2299
2300 if self._telling:
2301 # At the snapshot point, len(dec_buffer) bytes before the read,
2302 # the next input to be decoded is dec_buffer + input_chunk.
2303 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2304
2305 return not eof
2306
2307 def _pack_cookie(self, position, dec_flags=0,
2308 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
2309 # The meaning of a tell() cookie is: seek to position, set the
2310 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2311 # into the decoder with need_eof as the EOF flag, then skip
2312 # chars_to_skip characters of the decoded result. For most simple
2313 # decoders, tell() will often just give a byte offset in the file.
2314 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2315 (chars_to_skip<<192) | bool(need_eof)<<256)
2316
2317 def _unpack_cookie(self, bigint):
2318 rest, position = divmod(bigint, 1<<64)
2319 rest, dec_flags = divmod(rest, 1<<64)
2320 rest, bytes_to_feed = divmod(rest, 1<<64)
2321 need_eof, chars_to_skip = divmod(rest, 1<<64)
2322 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
2323
2324 def tell(self):
2325 if not self._seekable:
2326 raise UnsupportedOperation("underlying stream is not seekable")
2327 if not self._telling:
2328 raise OSError("telling position disabled by next() call")
2329 self.flush()
2330 position = self.buffer.tell()
2331 decoder = self._decoder
2332 if decoder is None or self._snapshot is None:
2333 if self._decoded_chars:
2334 # This should never happen.
2335 raise AssertionError("pending decoded text")
2336 return position
2337
2338 # Skip backward to the snapshot point (see _read_chunk).
2339 dec_flags, next_input = self._snapshot
2340 position -= len(next_input)
2341
2342 # How many decoded characters have been used up since the snapshot?
2343 chars_to_skip = self._decoded_chars_used
2344 if chars_to_skip == 0:
2345 # We haven't moved from the snapshot point.
2346 return self._pack_cookie(position, dec_flags)
2347
2348 # Starting from the snapshot position, we will walk the decoder
2349 # forward until it gives us enough decoded characters.
2350 saved_state = decoder.getstate()
2351 try:
2352 # Fast search for an acceptable start point, close to our
2353 # current pos.
2354 # Rationale: calling decoder.decode() has a large overhead
2355 # regardless of chunk size; we want the number of such calls to
2356 # be O(1) in most situations (common decoders, sensible input).
2357 # Actually, it will be exactly 1 for fixed-size codecs (all
2358 # 8-bit codecs, also UTF-16 and UTF-32).
2359 skip_bytes = int(self._b2cratio * chars_to_skip)
2360 skip_back = 1
2361 assert skip_bytes <= len(next_input)
2362 while skip_bytes > 0:
2363 decoder.setstate((b'', dec_flags))
2364 # Decode up to temptative start point
2365 n = len(decoder.decode(next_input[:skip_bytes]))
2366 if n <= chars_to_skip:
2367 b, d = decoder.getstate()
2368 if not b:
2369 # Before pos and no bytes buffered in decoder => OK
2370 dec_flags = d
2371 chars_to_skip -= n
2372 break
2373 # Skip back by buffered amount and reset heuristic
2374 skip_bytes -= len(b)
2375 skip_back = 1
2376 else:
2377 # We're too far ahead, skip back a bit
2378 skip_bytes -= skip_back
2379 skip_back = skip_back * 2
2380 else:
2381 skip_bytes = 0
2382 decoder.setstate((b'', dec_flags))
2383
2384 # Note our initial start point.
2385 start_pos = position + skip_bytes
2386 start_flags = dec_flags
2387 if chars_to_skip == 0:
2388 # We haven't moved from the start point.
2389 return self._pack_cookie(start_pos, start_flags)
2390
2391 # Feed the decoder one byte at a time. As we go, note the
2392 # nearest "safe start point" before the current location
2393 # (a point where the decoder has nothing buffered, so seek()
2394 # can safely start from there and advance to this location).
2395 bytes_fed = 0
2396 need_eof = False
2397 # Chars decoded since `start_pos`
2398 chars_decoded = 0
2399 for i in range(skip_bytes, len(next_input)):
2400 bytes_fed += 1
2401 chars_decoded += len(decoder.decode(next_input[i:i+1]))
2402 dec_buffer, dec_flags = decoder.getstate()
2403 if not dec_buffer and chars_decoded <= chars_to_skip:
2404 # Decoder buffer is empty, so this is a safe start point.
2405 start_pos += bytes_fed
2406 chars_to_skip -= chars_decoded
2407 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2408 if chars_decoded >= chars_to_skip:
2409 break
2410 else:
2411 # We didn't get enough decoded data; signal EOF to get more.
2412 chars_decoded += len(decoder.decode(b'', final=True))
2413 need_eof = True
2414 if chars_decoded < chars_to_skip:
2415 raise OSError("can't reconstruct logical file position")
2416
2417 # The returned cookie corresponds to the last safe start point.
2418 return self._pack_cookie(
2419 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2420 finally:
2421 decoder.setstate(saved_state)
2422
2423 def truncate(self, pos=None):
2424 self.flush()
2425 if pos is None:
2426 pos = self.tell()
2427 return self.buffer.truncate(pos)
2428
2429 def detach(self):
2430 if self.buffer is None:
2431 raise ValueError("buffer is already detached")
2432 self.flush()
2433 buffer = self._buffer
2434 self._buffer = None
2435 return buffer
2436
2437 def seek(self, cookie, whence=0):
2438 def _reset_encoder(position):
2439 """Reset the encoder (merely useful for proper BOM handling)"""
2440 try:
2441 encoder = self._encoder or self._get_encoder()
2442 except LookupError:
2443 # Sometimes the encoder doesn't exist
2444 pass
2445 else:
2446 if position != 0:
2447 encoder.setstate(0)
2448 else:
2449 encoder.reset()
2450
2451 if self.closed:
2452 raise ValueError("tell on closed file")
2453 if not self._seekable:
2454 raise UnsupportedOperation("underlying stream is not seekable")
2455 if whence == SEEK_CUR:
2456 if cookie != 0:
2457 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
2458 # Seeking to the current position should attempt to
2459 # sync the underlying buffer with the current position.
2460 whence = 0
2461 cookie = self.tell()
2462 elif whence == SEEK_END:
2463 if cookie != 0:
2464 raise UnsupportedOperation("can't do nonzero end-relative seeks")
2465 self.flush()
2466 position = self.buffer.seek(0, whence)
2467 self._set_decoded_chars('')
2468 self._snapshot = None
2469 if self._decoder:
2470 self._decoder.reset()
2471 _reset_encoder(position)
2472 return position
2473 if whence != 0:
2474 raise ValueError("unsupported whence (%r)" % (whence,))
2475 if cookie < 0:
2476 raise ValueError("negative seek position %r" % (cookie,))
2477 self.flush()
2478
2479 # The strategy of seek() is to go back to the safe start point
2480 # and replay the effect of read(chars_to_skip) from there.
2481 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2482 self._unpack_cookie(cookie)
2483
2484 # Seek back to the safe start point.
2485 self.buffer.seek(start_pos)
2486 self._set_decoded_chars('')
2487 self._snapshot = None
2488
2489 # Restore the decoder to its state from the safe start point.
2490 if cookie == 0 and self._decoder:
2491 self._decoder.reset()
2492 elif self._decoder or dec_flags or chars_to_skip:
2493 self._decoder = self._decoder or self._get_decoder()
2494 self._decoder.setstate((b'', dec_flags))
2495 self._snapshot = (dec_flags, b'')
2496
2497 if chars_to_skip:
2498 # Just like _read_chunk, feed the decoder and save a snapshot.
2499 input_chunk = self.buffer.read(bytes_to_feed)
2500 self._set_decoded_chars(
2501 self._decoder.decode(input_chunk, need_eof))
2502 self._snapshot = (dec_flags, input_chunk)
2503
2504 # Skip chars_to_skip of the decoded characters.
2505 if len(self._decoded_chars) < chars_to_skip:
2506 raise OSError("can't restore logical file position")
2507 self._decoded_chars_used = chars_to_skip
2508
2509 _reset_encoder(cookie)
2510 return cookie
2511
2512 def read(self, size=None):
2513 self._checkReadable()
2514 if size is None:
2515 size = -1
2516 else:
2517 try:
2518 size_index = size.__index__
2519 except AttributeError:
2520 raise TypeError(f"{size!r} is not an integer")
2521 else:
2522 size = size_index()
2523 decoder = self._decoder or self._get_decoder()
2524 if size < 0:
2525 # Read everything.
2526 result = (self._get_decoded_chars() +
2527 decoder.decode(self.buffer.read(), final=True))
2528 self._set_decoded_chars('')
2529 self._snapshot = None
2530 return result
2531 else:
2532 # Keep reading chunks until we have size characters to return.
2533 eof = False
2534 result = self._get_decoded_chars(size)
2535 while len(result) < size and not eof:
2536 eof = not self._read_chunk()
2537 result += self._get_decoded_chars(size - len(result))
2538 return result
2539
2540 def __next__(self):
2541 self._telling = False
2542 line = self.readline()
2543 if not line:
2544 self._snapshot = None
2545 self._telling = self._seekable
2546 raise StopIteration
2547 return line
2548
2549 def readline(self, size=None):
2550 if self.closed:
2551 raise ValueError("read from closed file")
2552 if size is None:
2553 size = -1
2554 else:
2555 try:
2556 size_index = size.__index__
2557 except AttributeError:
2558 raise TypeError(f"{size!r} is not an integer")
2559 else:
2560 size = size_index()
2561
2562 # Grab all the decoded text (we will rewind any extra bits later).
2563 line = self._get_decoded_chars()
2564
2565 start = 0
2566 # Make the decoder if it doesn't already exist.
2567 if not self._decoder:
2568 self._get_decoder()
2569
2570 pos = endpos = None
2571 while True:
2572 if self._readtranslate:
2573 # Newlines are already translated, only search for \n
2574 pos = line.find('\n', start)
2575 if pos >= 0:
2576 endpos = pos + 1
2577 break
2578 else:
2579 start = len(line)
2580
2581 elif self._readuniversal:
2582 # Universal newline search. Find any of \r, \r\n, \n
2583 # The decoder ensures that \r\n are not split in two pieces
2584
2585 # In C we'd look for these in parallel of course.
2586 nlpos = line.find("\n", start)
2587 crpos = line.find("\r", start)
2588 if crpos == -1:
2589 if nlpos == -1:
2590 # Nothing found
2591 start = len(line)
2592 else:
2593 # Found \n
2594 endpos = nlpos + 1
2595 break
2596 elif nlpos == -1:
2597 # Found lone \r
2598 endpos = crpos + 1
2599 break
2600 elif nlpos < crpos:
2601 # Found \n
2602 endpos = nlpos + 1
2603 break
2604 elif nlpos == crpos + 1:
2605 # Found \r\n
2606 endpos = crpos + 2
2607 break
2608 else:
2609 # Found \r
2610 endpos = crpos + 1
2611 break
2612 else:
2613 # non-universal
2614 pos = line.find(self._readnl)
2615 if pos >= 0:
2616 endpos = pos + len(self._readnl)
2617 break
2618
2619 if size >= 0 and len(line) >= size:
2620 endpos = size # reached length size
2621 break
2622
2623 # No line ending seen yet - get more data'
2624 while self._read_chunk():
2625 if self._decoded_chars:
2626 break
2627 if self._decoded_chars:
2628 line += self._get_decoded_chars()
2629 else:
2630 # end of file
2631 self._set_decoded_chars('')
2632 self._snapshot = None
2633 return line
2634
2635 if size >= 0 and endpos > size:
2636 endpos = size # don't exceed size
2637
2638 # Rewind _decoded_chars to just after the line ending we found.
2639 self._rewind_decoded_chars(len(line) - endpos)
2640 return line[:endpos]
2641
2642 @property
2643 def newlines(self):
2644 return self._decoder.newlines if self._decoder else None
2645
2646
2647 class ESC[4;38;5;81mStringIO(ESC[4;38;5;149mTextIOWrapper):
2648 """Text I/O implementation using an in-memory buffer.
2649
2650 The initial_value argument sets the value of object. The newline
2651 argument is like the one of TextIOWrapper's constructor.
2652 """
2653
2654 def __init__(self, initial_value="", newline="\n"):
2655 super(StringIO, self).__init__(BytesIO(),
2656 encoding="utf-8",
2657 errors="surrogatepass",
2658 newline=newline)
2659 # Issue #5645: make universal newlines semantics the same as in the
2660 # C version, even under Windows.
2661 if newline is None:
2662 self._writetranslate = False
2663 if initial_value is not None:
2664 if not isinstance(initial_value, str):
2665 raise TypeError("initial_value must be str or None, not {0}"
2666 .format(type(initial_value).__name__))
2667 self.write(initial_value)
2668 self.seek(0)
2669
2670 def getvalue(self):
2671 self.flush()
2672 decoder = self._decoder or self._get_decoder()
2673 old_state = decoder.getstate()
2674 decoder.reset()
2675 try:
2676 return decoder.decode(self.buffer.getvalue(), final=True)
2677 finally:
2678 decoder.setstate(old_state)
2679
2680 def __repr__(self):
2681 # TextIOWrapper tells the encoding in its repr. In StringIO,
2682 # that's an implementation detail.
2683 return object.__repr__(self)
2684
2685 @property
2686 def errors(self):
2687 return None
2688
2689 @property
2690 def encoding(self):
2691 return None
2692
2693 def detach(self):
2694 # This doesn't make sense on StringIO.
2695 self._unsupported("detach")