1 """
2 Python implementation of the io module.
3 """
4
5 import os
6 import abc
7 import codecs
8 import errno
9 import stat
10 import sys
11 # Import _thread instead of threading to reduce startup cost
12 from _thread import allocate_lock as Lock
13 if sys.platform in {'win32', 'cygwin'}:
14 from msvcrt import setmode as _setmode
15 else:
16 _setmode = None
17
18 import io
19 from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20
21 valid_seek_flags = {0, 1, 2} # Hardwired values
22 if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
26 # open() uses st_blksize whenever we can
27 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29 # NOTE: Base classes defined here are registered with the "official" ABCs
30 # defined in io.py. We don't use real inheritance though, because we don't want
31 # to inherit the C implementations.
32
33 # Rebind for compatibility
34 BlockingIOError = BlockingIOError
35
36 # Does io.IOBase finalizer log the exception if the close() method fails?
37 # The exception is ignored silently by default in release build.
38 _IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
39 # Does open() check its 'errors' argument?
40 _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
41
42
43 def text_encoding(encoding, stacklevel=2):
44 """
45 A helper function to choose the text encoding.
46
47 When encoding is not None, this function returns it.
48 Otherwise, this function returns the default text encoding
49 (i.e. "locale" or "utf-8" depends on UTF-8 mode).
50
51 This function emits an EncodingWarning if *encoding* is None and
52 sys.flags.warn_default_encoding is true.
53
54 This can be used in APIs with an encoding=None parameter
55 that pass it to TextIOWrapper or open.
56 However, please consider using encoding="utf-8" for new APIs.
57 """
58 if encoding is None:
59 if sys.flags.utf8_mode:
60 encoding = "utf-8"
61 else:
62 encoding = "locale"
63 if sys.flags.warn_default_encoding:
64 import warnings
65 warnings.warn("'encoding' argument not specified.",
66 EncodingWarning, stacklevel + 1)
67 return encoding
68
69
70 # Wrapper for builtins.open
71 #
72 # Trick so that open() won't become a bound method when stored
73 # as a class variable (as dbm.dumb does).
74 #
75 # See init_set_builtins_open() in Python/pylifecycle.c.
76 @staticmethod
77 def open(file, mode="r", buffering=-1, encoding=None, errors=None,
78 newline=None, closefd=True, opener=None):
79
80 r"""Open file and return a stream. Raise OSError upon failure.
81
82 file is either a text or byte string giving the name (and the path
83 if the file isn't in the current working directory) of the file to
84 be opened or an integer file descriptor of the file to be
85 wrapped. (If a file descriptor is given, it is closed when the
86 returned I/O object is closed, unless closefd is set to False.)
87
88 mode is an optional string that specifies the mode in which the file is
89 opened. It defaults to 'r' which means open for reading in text mode. Other
90 common values are 'w' for writing (truncating the file if it already
91 exists), 'x' for exclusive creation of a new file, and 'a' for appending
92 (which on some Unix systems, means that all writes append to the end of the
93 file regardless of the current seek position). In text mode, if encoding is
94 not specified the encoding used is platform dependent. (For reading and
95 writing raw bytes use binary mode and leave encoding unspecified.) The
96 available modes are:
97
98 ========= ===============================================================
99 Character Meaning
100 --------- ---------------------------------------------------------------
101 'r' open for reading (default)
102 'w' open for writing, truncating the file first
103 'x' create a new file and open it for writing
104 'a' open for writing, appending to the end of the file if it exists
105 'b' binary mode
106 't' text mode (default)
107 '+' open a disk file for updating (reading and writing)
108 ========= ===============================================================
109
110 The default mode is 'rt' (open for reading text). For binary random
111 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
112 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
113 raises an `FileExistsError` if the file already exists.
114
115 Python distinguishes between files opened in binary and text modes,
116 even when the underlying operating system doesn't. Files opened in
117 binary mode (appending 'b' to the mode argument) return contents as
118 bytes objects without any decoding. In text mode (the default, or when
119 't' is appended to the mode argument), the contents of the file are
120 returned as strings, the bytes having been first decoded using a
121 platform-dependent encoding or using the specified encoding if given.
122
123 buffering is an optional integer used to set the buffering policy.
124 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
125 line buffering (only usable in text mode), and an integer > 1 to indicate
126 the size of a fixed-size chunk buffer. When no buffering argument is
127 given, the default buffering policy works as follows:
128
129 * Binary files are buffered in fixed-size chunks; the size of the buffer
130 is chosen using a heuristic trying to determine the underlying device's
131 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
132 On many systems, the buffer will typically be 4096 or 8192 bytes long.
133
134 * "Interactive" text files (files for which isatty() returns True)
135 use line buffering. Other text files use the policy described above
136 for binary files.
137
138 encoding is the str name of the encoding used to decode or encode the
139 file. This should only be used in text mode. The default encoding is
140 platform dependent, but any encoding supported by Python can be
141 passed. See the codecs module for the list of supported encodings.
142
143 errors is an optional string that specifies how encoding errors are to
144 be handled---this argument should not be used in binary mode. Pass
145 'strict' to raise a ValueError exception if there is an encoding error
146 (the default of None has the same effect), or pass 'ignore' to ignore
147 errors. (Note that ignoring encoding errors can lead to data loss.)
148 See the documentation for codecs.register for a list of the permitted
149 encoding error strings.
150
151 newline is a string controlling how universal newlines works (it only
152 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
153 as follows:
154
155 * On input, if newline is None, universal newlines mode is
156 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
157 these are translated into '\n' before being returned to the
158 caller. If it is '', universal newline mode is enabled, but line
159 endings are returned to the caller untranslated. If it has any of
160 the other legal values, input lines are only terminated by the given
161 string, and the line ending is returned to the caller untranslated.
162
163 * On output, if newline is None, any '\n' characters written are
164 translated to the system default line separator, os.linesep. If
165 newline is '', no translation takes place. If newline is any of the
166 other legal values, any '\n' characters written are translated to
167 the given string.
168
169 closedfd is a bool. If closefd is False, the underlying file descriptor will
170 be kept open when the file is closed. This does not work when a file name is
171 given and must be True in that case.
172
173 The newly created file is non-inheritable.
174
175 A custom opener can be used by passing a callable as *opener*. The
176 underlying file descriptor for the file object is then obtained by calling
177 *opener* with (*file*, *flags*). *opener* must return an open file
178 descriptor (passing os.open as *opener* results in functionality similar to
179 passing None).
180
181 open() returns a file object whose type depends on the mode, and
182 through which the standard file operations such as reading and writing
183 are performed. When open() is used to open a file in a text mode ('w',
184 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
185 a file in a binary mode, the returned class varies: in read binary
186 mode, it returns a BufferedReader; in write binary and append binary
187 modes, it returns a BufferedWriter, and in read/write mode, it returns
188 a BufferedRandom.
189
190 It is also possible to use a string or bytearray as a file for both
191 reading and writing. For strings StringIO can be used like a file
192 opened in a text mode, and for bytes a BytesIO can be used like a file
193 opened in a binary mode.
194 """
195 if not isinstance(file, int):
196 file = os.fspath(file)
197 if not isinstance(file, (str, bytes, int)):
198 raise TypeError("invalid file: %r" % file)
199 if not isinstance(mode, str):
200 raise TypeError("invalid mode: %r" % mode)
201 if not isinstance(buffering, int):
202 raise TypeError("invalid buffering: %r" % buffering)
203 if encoding is not None and not isinstance(encoding, str):
204 raise TypeError("invalid encoding: %r" % encoding)
205 if errors is not None and not isinstance(errors, str):
206 raise TypeError("invalid errors: %r" % errors)
207 modes = set(mode)
208 if modes - set("axrwb+t") or len(mode) > len(modes):
209 raise ValueError("invalid mode: %r" % mode)
210 creating = "x" in modes
211 reading = "r" in modes
212 writing = "w" in modes
213 appending = "a" in modes
214 updating = "+" in modes
215 text = "t" in modes
216 binary = "b" in modes
217 if text and binary:
218 raise ValueError("can't have text and binary mode at once")
219 if creating + reading + writing + appending > 1:
220 raise ValueError("can't have read/write/append mode at once")
221 if not (creating or reading or writing or appending):
222 raise ValueError("must have exactly one of read/write/append mode")
223 if binary and encoding is not None:
224 raise ValueError("binary mode doesn't take an encoding argument")
225 if binary and errors is not None:
226 raise ValueError("binary mode doesn't take an errors argument")
227 if binary and newline is not None:
228 raise ValueError("binary mode doesn't take a newline argument")
229 if binary and buffering == 1:
230 import warnings
231 warnings.warn("line buffering (buffering=1) isn't supported in binary "
232 "mode, the default buffer size will be used",
233 RuntimeWarning, 2)
234 raw = FileIO(file,
235 (creating and "x" or "") +
236 (reading and "r" or "") +
237 (writing and "w" or "") +
238 (appending and "a" or "") +
239 (updating and "+" or ""),
240 closefd, opener=opener)
241 result = raw
242 try:
243 line_buffering = False
244 if buffering == 1 or buffering < 0 and raw.isatty():
245 buffering = -1
246 line_buffering = True
247 if buffering < 0:
248 buffering = DEFAULT_BUFFER_SIZE
249 try:
250 bs = os.fstat(raw.fileno()).st_blksize
251 except (OSError, AttributeError):
252 pass
253 else:
254 if bs > 1:
255 buffering = bs
256 if buffering < 0:
257 raise ValueError("invalid buffering size")
258 if buffering == 0:
259 if binary:
260 return result
261 raise ValueError("can't have unbuffered text I/O")
262 if updating:
263 buffer = BufferedRandom(raw, buffering)
264 elif creating or writing or appending:
265 buffer = BufferedWriter(raw, buffering)
266 elif reading:
267 buffer = BufferedReader(raw, buffering)
268 else:
269 raise ValueError("unknown mode: %r" % mode)
270 result = buffer
271 if binary:
272 return result
273 encoding = text_encoding(encoding)
274 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
275 result = text
276 text.mode = mode
277 return result
278 except:
279 result.close()
280 raise
281
282 # Define a default pure-Python implementation for open_code()
283 # that does not allow hooks. Warn on first use. Defined for tests.
284 def _open_code_with_warning(path):
285 """Opens the provided file with mode ``'rb'``. This function
286 should be used when the intent is to treat the contents as
287 executable code.
288
289 ``path`` should be an absolute path.
290
291 When supported by the runtime, this function can be hooked
292 in order to allow embedders more control over code files.
293 This functionality is not supported on the current runtime.
294 """
295 import warnings
296 warnings.warn("_pyio.open_code() may not be using hooks",
297 RuntimeWarning, 2)
298 return open(path, "rb")
299
300 try:
301 open_code = io.open_code
302 except AttributeError:
303 open_code = _open_code_with_warning
304
305
306 def __getattr__(name):
307 if name == "OpenWrapper":
308 # bpo-43680: Until Python 3.9, _pyio.open was not a static method and
309 # builtins.open was set to OpenWrapper to not become a bound method
310 # when set to a class variable. _io.open is a built-in function whereas
311 # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now
312 # a static method, and builtins.open() is now io.open().
313 import warnings
314 warnings.warn('OpenWrapper is deprecated, use open instead',
315 DeprecationWarning, stacklevel=2)
316 global OpenWrapper
317 OpenWrapper = open
318 return OpenWrapper
319 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
320
321
322 # In normal operation, both `UnsupportedOperation`s should be bound to the
323 # same object.
324 try:
325 UnsupportedOperation = io.UnsupportedOperation
326 except AttributeError:
327 class ESC[4;38;5;81mUnsupportedOperation(ESC[4;38;5;149mOSError, ESC[4;38;5;149mValueError):
328 pass
329
330
331 class ESC[4;38;5;81mIOBase(metaclass=ESC[4;38;5;149mabcESC[4;38;5;149m.ESC[4;38;5;149mABCMeta):
332
333 """The abstract base class for all I/O classes.
334
335 This class provides dummy implementations for many methods that
336 derived classes can override selectively; the default implementations
337 represent a file that cannot be read, written or seeked.
338
339 Even though IOBase does not declare read or write because
340 their signatures will vary, implementations and clients should
341 consider those methods part of the interface. Also, implementations
342 may raise UnsupportedOperation when operations they do not support are
343 called.
344
345 The basic type used for binary data read from or written to a file is
346 bytes. Other bytes-like objects are accepted as method arguments too.
347 Text I/O classes work with str data.
348
349 Note that calling any method (even inquiries) on a closed stream is
350 undefined. Implementations may raise OSError in this case.
351
352 IOBase (and its subclasses) support the iterator protocol, meaning
353 that an IOBase object can be iterated over yielding the lines in a
354 stream.
355
356 IOBase also supports the :keyword:`with` statement. In this example,
357 fp is closed after the suite of the with statement is complete:
358
359 with open('spam.txt', 'r') as fp:
360 fp.write('Spam and eggs!')
361 """
362
363 ### Internal ###
364
365 def _unsupported(self, name):
366 """Internal: raise an OSError exception for unsupported operations."""
367 raise UnsupportedOperation("%s.%s() not supported" %
368 (self.__class__.__name__, name))
369
370 ### Positioning ###
371
372 def seek(self, pos, whence=0):
373 """Change stream position.
374
375 Change the stream position to byte offset pos. Argument pos is
376 interpreted relative to the position indicated by whence. Values
377 for whence are ints:
378
379 * 0 -- start of stream (the default); offset should be zero or positive
380 * 1 -- current stream position; offset may be negative
381 * 2 -- end of stream; offset is usually negative
382 Some operating systems / file systems could provide additional values.
383
384 Return an int indicating the new absolute position.
385 """
386 self._unsupported("seek")
387
388 def tell(self):
389 """Return an int indicating the current stream position."""
390 return self.seek(0, 1)
391
392 def truncate(self, pos=None):
393 """Truncate file to size bytes.
394
395 Size defaults to the current IO position as reported by tell(). Return
396 the new size.
397 """
398 self._unsupported("truncate")
399
400 ### Flush and close ###
401
402 def flush(self):
403 """Flush write buffers, if applicable.
404
405 This is not implemented for read-only and non-blocking streams.
406 """
407 self._checkClosed()
408 # XXX Should this return the number of bytes written???
409
410 __closed = False
411
412 def close(self):
413 """Flush and close the IO object.
414
415 This method has no effect if the file is already closed.
416 """
417 if not self.__closed:
418 try:
419 self.flush()
420 finally:
421 self.__closed = True
422
423 def __del__(self):
424 """Destructor. Calls close()."""
425 try:
426 closed = self.closed
427 except AttributeError:
428 # If getting closed fails, then the object is probably
429 # in an unusable state, so ignore.
430 return
431
432 if closed:
433 return
434
435 if _IOBASE_EMITS_UNRAISABLE:
436 self.close()
437 else:
438 # The try/except block is in case this is called at program
439 # exit time, when it's possible that globals have already been
440 # deleted, and then the close() call might fail. Since
441 # there's nothing we can do about such failures and they annoy
442 # the end users, we suppress the traceback.
443 try:
444 self.close()
445 except:
446 pass
447
448 ### Inquiries ###
449
450 def seekable(self):
451 """Return a bool indicating whether object supports random access.
452
453 If False, seek(), tell() and truncate() will raise OSError.
454 This method may need to do a test seek().
455 """
456 return False
457
458 def _checkSeekable(self, msg=None):
459 """Internal: raise UnsupportedOperation if file is not seekable
460 """
461 if not self.seekable():
462 raise UnsupportedOperation("File or stream is not seekable."
463 if msg is None else msg)
464
465 def readable(self):
466 """Return a bool indicating whether object was opened for reading.
467
468 If False, read() will raise OSError.
469 """
470 return False
471
472 def _checkReadable(self, msg=None):
473 """Internal: raise UnsupportedOperation if file is not readable
474 """
475 if not self.readable():
476 raise UnsupportedOperation("File or stream is not readable."
477 if msg is None else msg)
478
479 def writable(self):
480 """Return a bool indicating whether object was opened for writing.
481
482 If False, write() and truncate() will raise OSError.
483 """
484 return False
485
486 def _checkWritable(self, msg=None):
487 """Internal: raise UnsupportedOperation if file is not writable
488 """
489 if not self.writable():
490 raise UnsupportedOperation("File or stream is not writable."
491 if msg is None else msg)
492
493 @property
494 def closed(self):
495 """closed: bool. True iff the file has been closed.
496
497 For backwards compatibility, this is a property, not a predicate.
498 """
499 return self.__closed
500
501 def _checkClosed(self, msg=None):
502 """Internal: raise a ValueError if file is closed
503 """
504 if self.closed:
505 raise ValueError("I/O operation on closed file."
506 if msg is None else msg)
507
508 ### Context manager ###
509
510 def __enter__(self): # That's a forward reference
511 """Context management protocol. Returns self (an instance of IOBase)."""
512 self._checkClosed()
513 return self
514
515 def __exit__(self, *args):
516 """Context management protocol. Calls close()"""
517 self.close()
518
519 ### Lower-level APIs ###
520
521 # XXX Should these be present even if unimplemented?
522
523 def fileno(self):
524 """Returns underlying file descriptor (an int) if one exists.
525
526 An OSError is raised if the IO object does not use a file descriptor.
527 """
528 self._unsupported("fileno")
529
530 def isatty(self):
531 """Return a bool indicating whether this is an 'interactive' stream.
532
533 Return False if it can't be determined.
534 """
535 self._checkClosed()
536 return False
537
538 ### Readline[s] and writelines ###
539
540 def readline(self, size=-1):
541 r"""Read and return a line of bytes from the stream.
542
543 If size is specified, at most size bytes will be read.
544 Size should be an int.
545
546 The line terminator is always b'\n' for binary files; for text
547 files, the newlines argument to open can be used to select the line
548 terminator(s) recognized.
549 """
550 # For backwards compatibility, a (slowish) readline().
551 if hasattr(self, "peek"):
552 def nreadahead():
553 readahead = self.peek(1)
554 if not readahead:
555 return 1
556 n = (readahead.find(b"\n") + 1) or len(readahead)
557 if size >= 0:
558 n = min(n, size)
559 return n
560 else:
561 def nreadahead():
562 return 1
563 if size is None:
564 size = -1
565 else:
566 try:
567 size_index = size.__index__
568 except AttributeError:
569 raise TypeError(f"{size!r} is not an integer")
570 else:
571 size = size_index()
572 res = bytearray()
573 while size < 0 or len(res) < size:
574 b = self.read(nreadahead())
575 if not b:
576 break
577 res += b
578 if res.endswith(b"\n"):
579 break
580 return bytes(res)
581
582 def __iter__(self):
583 self._checkClosed()
584 return self
585
586 def __next__(self):
587 line = self.readline()
588 if not line:
589 raise StopIteration
590 return line
591
592 def readlines(self, hint=None):
593 """Return a list of lines from the stream.
594
595 hint can be specified to control the number of lines read: no more
596 lines will be read if the total size (in bytes/characters) of all
597 lines so far exceeds hint.
598 """
599 if hint is None or hint <= 0:
600 return list(self)
601 n = 0
602 lines = []
603 for line in self:
604 lines.append(line)
605 n += len(line)
606 if n >= hint:
607 break
608 return lines
609
610 def writelines(self, lines):
611 """Write a list of lines to the stream.
612
613 Line separators are not added, so it is usual for each of the lines
614 provided to have a line separator at the end.
615 """
616 self._checkClosed()
617 for line in lines:
618 self.write(line)
619
620 io.IOBase.register(IOBase)
621
622
623 class ESC[4;38;5;81mRawIOBase(ESC[4;38;5;149mIOBase):
624
625 """Base class for raw binary I/O."""
626
627 # The read() method is implemented by calling readinto(); derived
628 # classes that want to support read() only need to implement
629 # readinto() as a primitive operation. In general, readinto() can be
630 # more efficient than read().
631
632 # (It would be tempting to also provide an implementation of
633 # readinto() in terms of read(), in case the latter is a more suitable
634 # primitive operation, but that would lead to nasty recursion in case
635 # a subclass doesn't implement either.)
636
637 def read(self, size=-1):
638 """Read and return up to size bytes, where size is an int.
639
640 Returns an empty bytes object on EOF, or None if the object is
641 set not to block and has no data to read.
642 """
643 if size is None:
644 size = -1
645 if size < 0:
646 return self.readall()
647 b = bytearray(size.__index__())
648 n = self.readinto(b)
649 if n is None:
650 return None
651 del b[n:]
652 return bytes(b)
653
654 def readall(self):
655 """Read until EOF, using multiple read() call."""
656 res = bytearray()
657 while True:
658 data = self.read(DEFAULT_BUFFER_SIZE)
659 if not data:
660 break
661 res += data
662 if res:
663 return bytes(res)
664 else:
665 # b'' or None
666 return data
667
668 def readinto(self, b):
669 """Read bytes into a pre-allocated bytes-like object b.
670
671 Returns an int representing the number of bytes read (0 for EOF), or
672 None if the object is set not to block and has no data to read.
673 """
674 self._unsupported("readinto")
675
676 def write(self, b):
677 """Write the given buffer to the IO stream.
678
679 Returns the number of bytes written, which may be less than the
680 length of b in bytes.
681 """
682 self._unsupported("write")
683
684 io.RawIOBase.register(RawIOBase)
685 from _io import FileIO
686 RawIOBase.register(FileIO)
687
688
689 class ESC[4;38;5;81mBufferedIOBase(ESC[4;38;5;149mIOBase):
690
691 """Base class for buffered IO objects.
692
693 The main difference with RawIOBase is that the read() method
694 supports omitting the size argument, and does not have a default
695 implementation that defers to readinto().
696
697 In addition, read(), readinto() and write() may raise
698 BlockingIOError if the underlying raw stream is in non-blocking
699 mode and not ready; unlike their raw counterparts, they will never
700 return None.
701
702 A typical implementation should not inherit from a RawIOBase
703 implementation, but wrap one.
704 """
705
706 def read(self, size=-1):
707 """Read and return up to size bytes, where size is an int.
708
709 If the argument is omitted, None, or negative, reads and
710 returns all data until EOF.
711
712 If the argument is positive, and the underlying raw stream is
713 not 'interactive', multiple raw reads may be issued to satisfy
714 the byte count (unless EOF is reached first). But for
715 interactive raw streams (XXX and for pipes?), at most one raw
716 read will be issued, and a short result does not imply that
717 EOF is imminent.
718
719 Returns an empty bytes array on EOF.
720
721 Raises BlockingIOError if the underlying raw stream has no
722 data at the moment.
723 """
724 self._unsupported("read")
725
726 def read1(self, size=-1):
727 """Read up to size bytes with at most one read() system call,
728 where size is an int.
729 """
730 self._unsupported("read1")
731
732 def readinto(self, b):
733 """Read bytes into a pre-allocated bytes-like object b.
734
735 Like read(), this may issue multiple reads to the underlying raw
736 stream, unless the latter is 'interactive'.
737
738 Returns an int representing the number of bytes read (0 for EOF).
739
740 Raises BlockingIOError if the underlying raw stream has no
741 data at the moment.
742 """
743
744 return self._readinto(b, read1=False)
745
746 def readinto1(self, b):
747 """Read bytes into buffer *b*, using at most one system call
748
749 Returns an int representing the number of bytes read (0 for EOF).
750
751 Raises BlockingIOError if the underlying raw stream has no
752 data at the moment.
753 """
754
755 return self._readinto(b, read1=True)
756
757 def _readinto(self, b, read1):
758 if not isinstance(b, memoryview):
759 b = memoryview(b)
760 b = b.cast('B')
761
762 if read1:
763 data = self.read1(len(b))
764 else:
765 data = self.read(len(b))
766 n = len(data)
767
768 b[:n] = data
769
770 return n
771
772 def write(self, b):
773 """Write the given bytes buffer to the IO stream.
774
775 Return the number of bytes written, which is always the length of b
776 in bytes.
777
778 Raises BlockingIOError if the buffer is full and the
779 underlying raw stream cannot accept more data at the moment.
780 """
781 self._unsupported("write")
782
783 def detach(self):
784 """
785 Separate the underlying raw stream from the buffer and return it.
786
787 After the raw stream has been detached, the buffer is in an unusable
788 state.
789 """
790 self._unsupported("detach")
791
792 io.BufferedIOBase.register(BufferedIOBase)
793
794
795 class ESC[4;38;5;81m_BufferedIOMixin(ESC[4;38;5;149mBufferedIOBase):
796
797 """A mixin implementation of BufferedIOBase with an underlying raw stream.
798
799 This passes most requests on to the underlying raw stream. It
800 does *not* provide implementations of read(), readinto() or
801 write().
802 """
803
804 def __init__(self, raw):
805 self._raw = raw
806
807 ### Positioning ###
808
809 def seek(self, pos, whence=0):
810 new_position = self.raw.seek(pos, whence)
811 if new_position < 0:
812 raise OSError("seek() returned an invalid position")
813 return new_position
814
815 def tell(self):
816 pos = self.raw.tell()
817 if pos < 0:
818 raise OSError("tell() returned an invalid position")
819 return pos
820
821 def truncate(self, pos=None):
822 self._checkClosed()
823 self._checkWritable()
824
825 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
826 # and a flush may be necessary to synch both views of the current
827 # file state.
828 self.flush()
829
830 if pos is None:
831 pos = self.tell()
832 # XXX: Should seek() be used, instead of passing the position
833 # XXX directly to truncate?
834 return self.raw.truncate(pos)
835
836 ### Flush and close ###
837
838 def flush(self):
839 if self.closed:
840 raise ValueError("flush on closed file")
841 self.raw.flush()
842
843 def close(self):
844 if self.raw is not None and not self.closed:
845 try:
846 # may raise BlockingIOError or BrokenPipeError etc
847 self.flush()
848 finally:
849 self.raw.close()
850
851 def detach(self):
852 if self.raw is None:
853 raise ValueError("raw stream already detached")
854 self.flush()
855 raw = self._raw
856 self._raw = None
857 return raw
858
859 ### Inquiries ###
860
861 def seekable(self):
862 return self.raw.seekable()
863
864 @property
865 def raw(self):
866 return self._raw
867
868 @property
869 def closed(self):
870 return self.raw.closed
871
872 @property
873 def name(self):
874 return self.raw.name
875
876 @property
877 def mode(self):
878 return self.raw.mode
879
880 def __getstate__(self):
881 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
882
883 def __repr__(self):
884 modname = self.__class__.__module__
885 clsname = self.__class__.__qualname__
886 try:
887 name = self.name
888 except AttributeError:
889 return "<{}.{}>".format(modname, clsname)
890 else:
891 return "<{}.{} name={!r}>".format(modname, clsname, name)
892
893 ### Lower-level APIs ###
894
895 def fileno(self):
896 return self.raw.fileno()
897
898 def isatty(self):
899 return self.raw.isatty()
900
901
902 class ESC[4;38;5;81mBytesIO(ESC[4;38;5;149mBufferedIOBase):
903
904 """Buffered I/O implementation using an in-memory bytes buffer."""
905
906 # Initialize _buffer as soon as possible since it's used by __del__()
907 # which calls close()
908 _buffer = None
909
910 def __init__(self, initial_bytes=None):
911 buf = bytearray()
912 if initial_bytes is not None:
913 buf += initial_bytes
914 self._buffer = buf
915 self._pos = 0
916
917 def __getstate__(self):
918 if self.closed:
919 raise ValueError("__getstate__ on closed file")
920 return self.__dict__.copy()
921
922 def getvalue(self):
923 """Return the bytes value (contents) of the buffer
924 """
925 if self.closed:
926 raise ValueError("getvalue on closed file")
927 return bytes(self._buffer)
928
929 def getbuffer(self):
930 """Return a readable and writable view of the buffer.
931 """
932 if self.closed:
933 raise ValueError("getbuffer on closed file")
934 return memoryview(self._buffer)
935
936 def close(self):
937 if self._buffer is not None:
938 self._buffer.clear()
939 super().close()
940
941 def read(self, size=-1):
942 if self.closed:
943 raise ValueError("read from closed file")
944 if size is None:
945 size = -1
946 else:
947 try:
948 size_index = size.__index__
949 except AttributeError:
950 raise TypeError(f"{size!r} is not an integer")
951 else:
952 size = size_index()
953 if size < 0:
954 size = len(self._buffer)
955 if len(self._buffer) <= self._pos:
956 return b""
957 newpos = min(len(self._buffer), self._pos + size)
958 b = self._buffer[self._pos : newpos]
959 self._pos = newpos
960 return bytes(b)
961
962 def read1(self, size=-1):
963 """This is the same as read.
964 """
965 return self.read(size)
966
967 def write(self, b):
968 if self.closed:
969 raise ValueError("write to closed file")
970 if isinstance(b, str):
971 raise TypeError("can't write str to binary stream")
972 with memoryview(b) as view:
973 n = view.nbytes # Size of any bytes-like object
974 if n == 0:
975 return 0
976 pos = self._pos
977 if pos > len(self._buffer):
978 # Inserts null bytes between the current end of the file
979 # and the new write position.
980 padding = b'\x00' * (pos - len(self._buffer))
981 self._buffer += padding
982 self._buffer[pos:pos + n] = b
983 self._pos += n
984 return n
985
986 def seek(self, pos, whence=0):
987 if self.closed:
988 raise ValueError("seek on closed file")
989 try:
990 pos_index = pos.__index__
991 except AttributeError:
992 raise TypeError(f"{pos!r} is not an integer")
993 else:
994 pos = pos_index()
995 if whence == 0:
996 if pos < 0:
997 raise ValueError("negative seek position %r" % (pos,))
998 self._pos = pos
999 elif whence == 1:
1000 self._pos = max(0, self._pos + pos)
1001 elif whence == 2:
1002 self._pos = max(0, len(self._buffer) + pos)
1003 else:
1004 raise ValueError("unsupported whence value")
1005 return self._pos
1006
1007 def tell(self):
1008 if self.closed:
1009 raise ValueError("tell on closed file")
1010 return self._pos
1011
1012 def truncate(self, pos=None):
1013 if self.closed:
1014 raise ValueError("truncate on closed file")
1015 if pos is None:
1016 pos = self._pos
1017 else:
1018 try:
1019 pos_index = pos.__index__
1020 except AttributeError:
1021 raise TypeError(f"{pos!r} is not an integer")
1022 else:
1023 pos = pos_index()
1024 if pos < 0:
1025 raise ValueError("negative truncate position %r" % (pos,))
1026 del self._buffer[pos:]
1027 return pos
1028
1029 def readable(self):
1030 if self.closed:
1031 raise ValueError("I/O operation on closed file.")
1032 return True
1033
1034 def writable(self):
1035 if self.closed:
1036 raise ValueError("I/O operation on closed file.")
1037 return True
1038
1039 def seekable(self):
1040 if self.closed:
1041 raise ValueError("I/O operation on closed file.")
1042 return True
1043
1044
1045 class ESC[4;38;5;81mBufferedReader(ESC[4;38;5;149m_BufferedIOMixin):
1046
1047 """BufferedReader(raw[, buffer_size])
1048
1049 A buffer for a readable, sequential BaseRawIO object.
1050
1051 The constructor creates a BufferedReader for the given readable raw
1052 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1053 is used.
1054 """
1055
1056 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1057 """Create a new buffered reader using the given readable raw IO object.
1058 """
1059 if not raw.readable():
1060 raise OSError('"raw" argument must be readable.')
1061
1062 _BufferedIOMixin.__init__(self, raw)
1063 if buffer_size <= 0:
1064 raise ValueError("invalid buffer size")
1065 self.buffer_size = buffer_size
1066 self._reset_read_buf()
1067 self._read_lock = Lock()
1068
1069 def readable(self):
1070 return self.raw.readable()
1071
1072 def _reset_read_buf(self):
1073 self._read_buf = b""
1074 self._read_pos = 0
1075
1076 def read(self, size=None):
1077 """Read size bytes.
1078
1079 Returns exactly size bytes of data unless the underlying raw IO
1080 stream reaches EOF or if the call would block in non-blocking
1081 mode. If size is negative, read until EOF or until read() would
1082 block.
1083 """
1084 if size is not None and size < -1:
1085 raise ValueError("invalid number of bytes to read")
1086 with self._read_lock:
1087 return self._read_unlocked(size)
1088
1089 def _read_unlocked(self, n=None):
1090 nodata_val = b""
1091 empty_values = (b"", None)
1092 buf = self._read_buf
1093 pos = self._read_pos
1094
1095 # Special case for when the number of bytes to read is unspecified.
1096 if n is None or n == -1:
1097 self._reset_read_buf()
1098 if hasattr(self.raw, 'readall'):
1099 chunk = self.raw.readall()
1100 if chunk is None:
1101 return buf[pos:] or None
1102 else:
1103 return buf[pos:] + chunk
1104 chunks = [buf[pos:]] # Strip the consumed bytes.
1105 current_size = 0
1106 while True:
1107 # Read until EOF or until read() would block.
1108 chunk = self.raw.read()
1109 if chunk in empty_values:
1110 nodata_val = chunk
1111 break
1112 current_size += len(chunk)
1113 chunks.append(chunk)
1114 return b"".join(chunks) or nodata_val
1115
1116 # The number of bytes to read is specified, return at most n bytes.
1117 avail = len(buf) - pos # Length of the available buffered data.
1118 if n <= avail:
1119 # Fast path: the data to read is fully buffered.
1120 self._read_pos += n
1121 return buf[pos:pos+n]
1122 # Slow path: read from the stream until enough bytes are read,
1123 # or until an EOF occurs or until read() would block.
1124 chunks = [buf[pos:]]
1125 wanted = max(self.buffer_size, n)
1126 while avail < n:
1127 chunk = self.raw.read(wanted)
1128 if chunk in empty_values:
1129 nodata_val = chunk
1130 break
1131 avail += len(chunk)
1132 chunks.append(chunk)
1133 # n is more than avail only when an EOF occurred or when
1134 # read() would have blocked.
1135 n = min(n, avail)
1136 out = b"".join(chunks)
1137 self._read_buf = out[n:] # Save the extra data in the buffer.
1138 self._read_pos = 0
1139 return out[:n] if out else nodata_val
1140
1141 def peek(self, size=0):
1142 """Returns buffered bytes without advancing the position.
1143
1144 The argument indicates a desired minimal number of bytes; we
1145 do at most one raw read to satisfy it. We never return more
1146 than self.buffer_size.
1147 """
1148 with self._read_lock:
1149 return self._peek_unlocked(size)
1150
1151 def _peek_unlocked(self, n=0):
1152 want = min(n, self.buffer_size)
1153 have = len(self._read_buf) - self._read_pos
1154 if have < want or have <= 0:
1155 to_read = self.buffer_size - have
1156 current = self.raw.read(to_read)
1157 if current:
1158 self._read_buf = self._read_buf[self._read_pos:] + current
1159 self._read_pos = 0
1160 return self._read_buf[self._read_pos:]
1161
1162 def read1(self, size=-1):
1163 """Reads up to size bytes, with at most one read() system call."""
1164 # Returns up to size bytes. If at least one byte is buffered, we
1165 # only return buffered bytes. Otherwise, we do one raw read.
1166 if size < 0:
1167 size = self.buffer_size
1168 if size == 0:
1169 return b""
1170 with self._read_lock:
1171 self._peek_unlocked(1)
1172 return self._read_unlocked(
1173 min(size, len(self._read_buf) - self._read_pos))
1174
1175 # Implementing readinto() and readinto1() is not strictly necessary (we
1176 # could rely on the base class that provides an implementation in terms of
1177 # read() and read1()). We do it anyway to keep the _pyio implementation
1178 # similar to the io implementation (which implements the methods for
1179 # performance reasons).
1180 def _readinto(self, buf, read1):
1181 """Read data into *buf* with at most one system call."""
1182
1183 # Need to create a memoryview object of type 'b', otherwise
1184 # we may not be able to assign bytes to it, and slicing it
1185 # would create a new object.
1186 if not isinstance(buf, memoryview):
1187 buf = memoryview(buf)
1188 if buf.nbytes == 0:
1189 return 0
1190 buf = buf.cast('B')
1191
1192 written = 0
1193 with self._read_lock:
1194 while written < len(buf):
1195
1196 # First try to read from internal buffer
1197 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1198 if avail:
1199 buf[written:written+avail] = \
1200 self._read_buf[self._read_pos:self._read_pos+avail]
1201 self._read_pos += avail
1202 written += avail
1203 if written == len(buf):
1204 break
1205
1206 # If remaining space in callers buffer is larger than
1207 # internal buffer, read directly into callers buffer
1208 if len(buf) - written > self.buffer_size:
1209 n = self.raw.readinto(buf[written:])
1210 if not n:
1211 break # eof
1212 written += n
1213
1214 # Otherwise refill internal buffer - unless we're
1215 # in read1 mode and already got some data
1216 elif not (read1 and written):
1217 if not self._peek_unlocked(1):
1218 break # eof
1219
1220 # In readinto1 mode, return as soon as we have some data
1221 if read1 and written:
1222 break
1223
1224 return written
1225
1226 def tell(self):
1227 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1228
1229 def seek(self, pos, whence=0):
1230 if whence not in valid_seek_flags:
1231 raise ValueError("invalid whence value")
1232 with self._read_lock:
1233 if whence == 1:
1234 pos -= len(self._read_buf) - self._read_pos
1235 pos = _BufferedIOMixin.seek(self, pos, whence)
1236 self._reset_read_buf()
1237 return pos
1238
1239 class ESC[4;38;5;81mBufferedWriter(ESC[4;38;5;149m_BufferedIOMixin):
1240
1241 """A buffer for a writeable sequential RawIO object.
1242
1243 The constructor creates a BufferedWriter for the given writeable raw
1244 stream. If the buffer_size is not given, it defaults to
1245 DEFAULT_BUFFER_SIZE.
1246 """
1247
1248 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1249 if not raw.writable():
1250 raise OSError('"raw" argument must be writable.')
1251
1252 _BufferedIOMixin.__init__(self, raw)
1253 if buffer_size <= 0:
1254 raise ValueError("invalid buffer size")
1255 self.buffer_size = buffer_size
1256 self._write_buf = bytearray()
1257 self._write_lock = Lock()
1258
1259 def writable(self):
1260 return self.raw.writable()
1261
1262 def write(self, b):
1263 if isinstance(b, str):
1264 raise TypeError("can't write str to binary stream")
1265 with self._write_lock:
1266 if self.closed:
1267 raise ValueError("write to closed file")
1268 # XXX we can implement some more tricks to try and avoid
1269 # partial writes
1270 if len(self._write_buf) > self.buffer_size:
1271 # We're full, so let's pre-flush the buffer. (This may
1272 # raise BlockingIOError with characters_written == 0.)
1273 self._flush_unlocked()
1274 before = len(self._write_buf)
1275 self._write_buf.extend(b)
1276 written = len(self._write_buf) - before
1277 if len(self._write_buf) > self.buffer_size:
1278 try:
1279 self._flush_unlocked()
1280 except BlockingIOError as e:
1281 if len(self._write_buf) > self.buffer_size:
1282 # We've hit the buffer_size. We have to accept a partial
1283 # write and cut back our buffer.
1284 overage = len(self._write_buf) - self.buffer_size
1285 written -= overage
1286 self._write_buf = self._write_buf[:self.buffer_size]
1287 raise BlockingIOError(e.errno, e.strerror, written)
1288 return written
1289
1290 def truncate(self, pos=None):
1291 with self._write_lock:
1292 self._flush_unlocked()
1293 if pos is None:
1294 pos = self.raw.tell()
1295 return self.raw.truncate(pos)
1296
1297 def flush(self):
1298 with self._write_lock:
1299 self._flush_unlocked()
1300
1301 def _flush_unlocked(self):
1302 if self.closed:
1303 raise ValueError("flush on closed file")
1304 while self._write_buf:
1305 try:
1306 n = self.raw.write(self._write_buf)
1307 except BlockingIOError:
1308 raise RuntimeError("self.raw should implement RawIOBase: it "
1309 "should not raise BlockingIOError")
1310 if n is None:
1311 raise BlockingIOError(
1312 errno.EAGAIN,
1313 "write could not complete without blocking", 0)
1314 if n > len(self._write_buf) or n < 0:
1315 raise OSError("write() returned incorrect number of bytes")
1316 del self._write_buf[:n]
1317
1318 def tell(self):
1319 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1320
1321 def seek(self, pos, whence=0):
1322 if whence not in valid_seek_flags:
1323 raise ValueError("invalid whence value")
1324 with self._write_lock:
1325 self._flush_unlocked()
1326 return _BufferedIOMixin.seek(self, pos, whence)
1327
1328 def close(self):
1329 with self._write_lock:
1330 if self.raw is None or self.closed:
1331 return
1332 # We have to release the lock and call self.flush() (which will
1333 # probably just re-take the lock) in case flush has been overridden in
1334 # a subclass or the user set self.flush to something. This is the same
1335 # behavior as the C implementation.
1336 try:
1337 # may raise BlockingIOError or BrokenPipeError etc
1338 self.flush()
1339 finally:
1340 with self._write_lock:
1341 self.raw.close()
1342
1343
1344 class ESC[4;38;5;81mBufferedRWPair(ESC[4;38;5;149mBufferedIOBase):
1345
1346 """A buffered reader and writer object together.
1347
1348 A buffered reader object and buffered writer object put together to
1349 form a sequential IO object that can read and write. This is typically
1350 used with a socket or two-way pipe.
1351
1352 reader and writer are RawIOBase objects that are readable and
1353 writeable respectively. If the buffer_size is omitted it defaults to
1354 DEFAULT_BUFFER_SIZE.
1355 """
1356
1357 # XXX The usefulness of this (compared to having two separate IO
1358 # objects) is questionable.
1359
1360 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
1361 """Constructor.
1362
1363 The arguments are two RawIO instances.
1364 """
1365 if not reader.readable():
1366 raise OSError('"reader" argument must be readable.')
1367
1368 if not writer.writable():
1369 raise OSError('"writer" argument must be writable.')
1370
1371 self.reader = BufferedReader(reader, buffer_size)
1372 self.writer = BufferedWriter(writer, buffer_size)
1373
1374 def read(self, size=-1):
1375 if size is None:
1376 size = -1
1377 return self.reader.read(size)
1378
1379 def readinto(self, b):
1380 return self.reader.readinto(b)
1381
1382 def write(self, b):
1383 return self.writer.write(b)
1384
1385 def peek(self, size=0):
1386 return self.reader.peek(size)
1387
1388 def read1(self, size=-1):
1389 return self.reader.read1(size)
1390
1391 def readinto1(self, b):
1392 return self.reader.readinto1(b)
1393
1394 def readable(self):
1395 return self.reader.readable()
1396
1397 def writable(self):
1398 return self.writer.writable()
1399
1400 def flush(self):
1401 return self.writer.flush()
1402
1403 def close(self):
1404 try:
1405 self.writer.close()
1406 finally:
1407 self.reader.close()
1408
1409 def isatty(self):
1410 return self.reader.isatty() or self.writer.isatty()
1411
1412 @property
1413 def closed(self):
1414 return self.writer.closed
1415
1416
1417 class ESC[4;38;5;81mBufferedRandom(ESC[4;38;5;149mBufferedWriter, ESC[4;38;5;149mBufferedReader):
1418
1419 """A buffered interface to random access streams.
1420
1421 The constructor creates a reader and writer for a seekable stream,
1422 raw, given in the first argument. If the buffer_size is omitted it
1423 defaults to DEFAULT_BUFFER_SIZE.
1424 """
1425
1426 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1427 raw._checkSeekable()
1428 BufferedReader.__init__(self, raw, buffer_size)
1429 BufferedWriter.__init__(self, raw, buffer_size)
1430
1431 def seek(self, pos, whence=0):
1432 if whence not in valid_seek_flags:
1433 raise ValueError("invalid whence value")
1434 self.flush()
1435 if self._read_buf:
1436 # Undo read ahead.
1437 with self._read_lock:
1438 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1439 # First do the raw seek, then empty the read buffer, so that
1440 # if the raw seek fails, we don't lose buffered data forever.
1441 pos = self.raw.seek(pos, whence)
1442 with self._read_lock:
1443 self._reset_read_buf()
1444 if pos < 0:
1445 raise OSError("seek() returned invalid position")
1446 return pos
1447
1448 def tell(self):
1449 if self._write_buf:
1450 return BufferedWriter.tell(self)
1451 else:
1452 return BufferedReader.tell(self)
1453
1454 def truncate(self, pos=None):
1455 if pos is None:
1456 pos = self.tell()
1457 # Use seek to flush the read buffer.
1458 return BufferedWriter.truncate(self, pos)
1459
1460 def read(self, size=None):
1461 if size is None:
1462 size = -1
1463 self.flush()
1464 return BufferedReader.read(self, size)
1465
1466 def readinto(self, b):
1467 self.flush()
1468 return BufferedReader.readinto(self, b)
1469
1470 def peek(self, size=0):
1471 self.flush()
1472 return BufferedReader.peek(self, size)
1473
1474 def read1(self, size=-1):
1475 self.flush()
1476 return BufferedReader.read1(self, size)
1477
1478 def readinto1(self, b):
1479 self.flush()
1480 return BufferedReader.readinto1(self, b)
1481
1482 def write(self, b):
1483 if self._read_buf:
1484 # Undo readahead
1485 with self._read_lock:
1486 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1487 self._reset_read_buf()
1488 return BufferedWriter.write(self, b)
1489
1490
1491 class ESC[4;38;5;81mFileIO(ESC[4;38;5;149mRawIOBase):
1492 _fd = -1
1493 _created = False
1494 _readable = False
1495 _writable = False
1496 _appending = False
1497 _seekable = None
1498 _closefd = True
1499
1500 def __init__(self, file, mode='r', closefd=True, opener=None):
1501 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1502 writing, exclusive creation or appending. The file will be created if it
1503 doesn't exist when opened for writing or appending; it will be truncated
1504 when opened for writing. A FileExistsError will be raised if it already
1505 exists when opened for creating. Opening a file for creating implies
1506 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1507 to allow simultaneous reading and writing. A custom opener can be used by
1508 passing a callable as *opener*. The underlying file descriptor for the file
1509 object is then obtained by calling opener with (*name*, *flags*).
1510 *opener* must return an open file descriptor (passing os.open as *opener*
1511 results in functionality similar to passing None).
1512 """
1513 if self._fd >= 0:
1514 # Have to close the existing file first.
1515 try:
1516 if self._closefd:
1517 os.close(self._fd)
1518 finally:
1519 self._fd = -1
1520
1521 if isinstance(file, float):
1522 raise TypeError('integer argument expected, got float')
1523 if isinstance(file, int):
1524 fd = file
1525 if fd < 0:
1526 raise ValueError('negative file descriptor')
1527 else:
1528 fd = -1
1529
1530 if not isinstance(mode, str):
1531 raise TypeError('invalid mode: %s' % (mode,))
1532 if not set(mode) <= set('xrwab+'):
1533 raise ValueError('invalid mode: %s' % (mode,))
1534 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1535 raise ValueError('Must have exactly one of create/read/write/append '
1536 'mode and at most one plus')
1537
1538 if 'x' in mode:
1539 self._created = True
1540 self._writable = True
1541 flags = os.O_EXCL | os.O_CREAT
1542 elif 'r' in mode:
1543 self._readable = True
1544 flags = 0
1545 elif 'w' in mode:
1546 self._writable = True
1547 flags = os.O_CREAT | os.O_TRUNC
1548 elif 'a' in mode:
1549 self._writable = True
1550 self._appending = True
1551 flags = os.O_APPEND | os.O_CREAT
1552
1553 if '+' in mode:
1554 self._readable = True
1555 self._writable = True
1556
1557 if self._readable and self._writable:
1558 flags |= os.O_RDWR
1559 elif self._readable:
1560 flags |= os.O_RDONLY
1561 else:
1562 flags |= os.O_WRONLY
1563
1564 flags |= getattr(os, 'O_BINARY', 0)
1565
1566 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1567 getattr(os, 'O_CLOEXEC', 0))
1568 flags |= noinherit_flag
1569
1570 owned_fd = None
1571 try:
1572 if fd < 0:
1573 if not closefd:
1574 raise ValueError('Cannot use closefd=False with file name')
1575 if opener is None:
1576 fd = os.open(file, flags, 0o666)
1577 else:
1578 fd = opener(file, flags)
1579 if not isinstance(fd, int):
1580 raise TypeError('expected integer from opener')
1581 if fd < 0:
1582 raise OSError('Negative file descriptor')
1583 owned_fd = fd
1584 if not noinherit_flag:
1585 os.set_inheritable(fd, False)
1586
1587 self._closefd = closefd
1588 fdfstat = os.fstat(fd)
1589 try:
1590 if stat.S_ISDIR(fdfstat.st_mode):
1591 raise IsADirectoryError(errno.EISDIR,
1592 os.strerror(errno.EISDIR), file)
1593 except AttributeError:
1594 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
1595 # don't exist.
1596 pass
1597 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1598 if self._blksize <= 1:
1599 self._blksize = DEFAULT_BUFFER_SIZE
1600
1601 if _setmode:
1602 # don't translate newlines (\r\n <=> \n)
1603 _setmode(fd, os.O_BINARY)
1604
1605 self.name = file
1606 if self._appending:
1607 # For consistent behaviour, we explicitly seek to the
1608 # end of file (otherwise, it might be done only on the
1609 # first write()).
1610 try:
1611 os.lseek(fd, 0, SEEK_END)
1612 except OSError as e:
1613 if e.errno != errno.ESPIPE:
1614 raise
1615 except:
1616 if owned_fd is not None:
1617 os.close(owned_fd)
1618 raise
1619 self._fd = fd
1620
1621 def __del__(self):
1622 if self._fd >= 0 and self._closefd and not self.closed:
1623 import warnings
1624 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1625 stacklevel=2, source=self)
1626 self.close()
1627
1628 def __getstate__(self):
1629 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
1630
1631 def __repr__(self):
1632 class_name = '%s.%s' % (self.__class__.__module__,
1633 self.__class__.__qualname__)
1634 if self.closed:
1635 return '<%s [closed]>' % class_name
1636 try:
1637 name = self.name
1638 except AttributeError:
1639 return ('<%s fd=%d mode=%r closefd=%r>' %
1640 (class_name, self._fd, self.mode, self._closefd))
1641 else:
1642 return ('<%s name=%r mode=%r closefd=%r>' %
1643 (class_name, name, self.mode, self._closefd))
1644
1645 def _checkReadable(self):
1646 if not self._readable:
1647 raise UnsupportedOperation('File not open for reading')
1648
1649 def _checkWritable(self, msg=None):
1650 if not self._writable:
1651 raise UnsupportedOperation('File not open for writing')
1652
1653 def read(self, size=None):
1654 """Read at most size bytes, returned as bytes.
1655
1656 Only makes one system call, so less data may be returned than requested
1657 In non-blocking mode, returns None if no data is available.
1658 Return an empty bytes object at EOF.
1659 """
1660 self._checkClosed()
1661 self._checkReadable()
1662 if size is None or size < 0:
1663 return self.readall()
1664 try:
1665 return os.read(self._fd, size)
1666 except BlockingIOError:
1667 return None
1668
1669 def readall(self):
1670 """Read all data from the file, returned as bytes.
1671
1672 In non-blocking mode, returns as much as is immediately available,
1673 or None if no data is available. Return an empty bytes object at EOF.
1674 """
1675 self._checkClosed()
1676 self._checkReadable()
1677 bufsize = DEFAULT_BUFFER_SIZE
1678 try:
1679 pos = os.lseek(self._fd, 0, SEEK_CUR)
1680 end = os.fstat(self._fd).st_size
1681 if end >= pos:
1682 bufsize = end - pos + 1
1683 except OSError:
1684 pass
1685
1686 result = bytearray()
1687 while True:
1688 if len(result) >= bufsize:
1689 bufsize = len(result)
1690 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1691 n = bufsize - len(result)
1692 try:
1693 chunk = os.read(self._fd, n)
1694 except BlockingIOError:
1695 if result:
1696 break
1697 return None
1698 if not chunk: # reached the end of the file
1699 break
1700 result += chunk
1701
1702 return bytes(result)
1703
1704 def readinto(self, b):
1705 """Same as RawIOBase.readinto()."""
1706 m = memoryview(b).cast('B')
1707 data = self.read(len(m))
1708 n = len(data)
1709 m[:n] = data
1710 return n
1711
1712 def write(self, b):
1713 """Write bytes b to file, return number written.
1714
1715 Only makes one system call, so not all of the data may be written.
1716 The number of bytes actually written is returned. In non-blocking mode,
1717 returns None if the write would block.
1718 """
1719 self._checkClosed()
1720 self._checkWritable()
1721 try:
1722 return os.write(self._fd, b)
1723 except BlockingIOError:
1724 return None
1725
1726 def seek(self, pos, whence=SEEK_SET):
1727 """Move to new file position.
1728
1729 Argument offset is a byte count. Optional argument whence defaults to
1730 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1731 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1732 and SEEK_END or 2 (move relative to end of file, usually negative, although
1733 many platforms allow seeking beyond the end of a file).
1734
1735 Note that not all file objects are seekable.
1736 """
1737 if isinstance(pos, float):
1738 raise TypeError('an integer is required')
1739 self._checkClosed()
1740 return os.lseek(self._fd, pos, whence)
1741
1742 def tell(self):
1743 """tell() -> int. Current file position.
1744
1745 Can raise OSError for non seekable files."""
1746 self._checkClosed()
1747 return os.lseek(self._fd, 0, SEEK_CUR)
1748
1749 def truncate(self, size=None):
1750 """Truncate the file to at most size bytes.
1751
1752 Size defaults to the current file position, as returned by tell().
1753 The current file position is changed to the value of size.
1754 """
1755 self._checkClosed()
1756 self._checkWritable()
1757 if size is None:
1758 size = self.tell()
1759 os.ftruncate(self._fd, size)
1760 return size
1761
1762 def close(self):
1763 """Close the file.
1764
1765 A closed file cannot be used for further I/O operations. close() may be
1766 called more than once without error.
1767 """
1768 if not self.closed:
1769 try:
1770 if self._closefd:
1771 os.close(self._fd)
1772 finally:
1773 super().close()
1774
1775 def seekable(self):
1776 """True if file supports random-access."""
1777 self._checkClosed()
1778 if self._seekable is None:
1779 try:
1780 self.tell()
1781 except OSError:
1782 self._seekable = False
1783 else:
1784 self._seekable = True
1785 return self._seekable
1786
1787 def readable(self):
1788 """True if file was opened in a read mode."""
1789 self._checkClosed()
1790 return self._readable
1791
1792 def writable(self):
1793 """True if file was opened in a write mode."""
1794 self._checkClosed()
1795 return self._writable
1796
1797 def fileno(self):
1798 """Return the underlying file descriptor (an integer)."""
1799 self._checkClosed()
1800 return self._fd
1801
1802 def isatty(self):
1803 """True if the file is connected to a TTY device."""
1804 self._checkClosed()
1805 return os.isatty(self._fd)
1806
1807 @property
1808 def closefd(self):
1809 """True if the file descriptor will be closed by close()."""
1810 return self._closefd
1811
1812 @property
1813 def mode(self):
1814 """String giving the file mode"""
1815 if self._created:
1816 if self._readable:
1817 return 'xb+'
1818 else:
1819 return 'xb'
1820 elif self._appending:
1821 if self._readable:
1822 return 'ab+'
1823 else:
1824 return 'ab'
1825 elif self._readable:
1826 if self._writable:
1827 return 'rb+'
1828 else:
1829 return 'rb'
1830 else:
1831 return 'wb'
1832
1833
1834 class ESC[4;38;5;81mTextIOBase(ESC[4;38;5;149mIOBase):
1835
1836 """Base class for text I/O.
1837
1838 This class provides a character and line based interface to stream
1839 I/O.
1840 """
1841
1842 def read(self, size=-1):
1843 """Read at most size characters from stream, where size is an int.
1844
1845 Read from underlying buffer until we have size characters or we hit EOF.
1846 If size is negative or omitted, read until EOF.
1847
1848 Returns a string.
1849 """
1850 self._unsupported("read")
1851
1852 def write(self, s):
1853 """Write string s to stream and returning an int."""
1854 self._unsupported("write")
1855
1856 def truncate(self, pos=None):
1857 """Truncate size to pos, where pos is an int."""
1858 self._unsupported("truncate")
1859
1860 def readline(self):
1861 """Read until newline or EOF.
1862
1863 Returns an empty string if EOF is hit immediately.
1864 """
1865 self._unsupported("readline")
1866
1867 def detach(self):
1868 """
1869 Separate the underlying buffer from the TextIOBase and return it.
1870
1871 After the underlying buffer has been detached, the TextIO is in an
1872 unusable state.
1873 """
1874 self._unsupported("detach")
1875
1876 @property
1877 def encoding(self):
1878 """Subclasses should override."""
1879 return None
1880
1881 @property
1882 def newlines(self):
1883 """Line endings translated so far.
1884
1885 Only line endings translated during reading are considered.
1886
1887 Subclasses should override.
1888 """
1889 return None
1890
1891 @property
1892 def errors(self):
1893 """Error setting of the decoder or encoder.
1894
1895 Subclasses should override."""
1896 return None
1897
1898 io.TextIOBase.register(TextIOBase)
1899
1900
1901 class ESC[4;38;5;81mIncrementalNewlineDecoder(ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mIncrementalDecoder):
1902 r"""Codec used when reading a file in universal newlines mode. It wraps
1903 another incremental decoder, translating \r\n and \r into \n. It also
1904 records the types of newlines encountered. When used with
1905 translate=False, it ensures that the newline sequence is returned in
1906 one piece.
1907 """
1908 def __init__(self, decoder, translate, errors='strict'):
1909 codecs.IncrementalDecoder.__init__(self, errors=errors)
1910 self.translate = translate
1911 self.decoder = decoder
1912 self.seennl = 0
1913 self.pendingcr = False
1914
1915 def decode(self, input, final=False):
1916 # decode input (with the eventual \r from a previous pass)
1917 if self.decoder is None:
1918 output = input
1919 else:
1920 output = self.decoder.decode(input, final=final)
1921 if self.pendingcr and (output or final):
1922 output = "\r" + output
1923 self.pendingcr = False
1924
1925 # retain last \r even when not translating data:
1926 # then readline() is sure to get \r\n in one pass
1927 if output.endswith("\r") and not final:
1928 output = output[:-1]
1929 self.pendingcr = True
1930
1931 # Record which newlines are read
1932 crlf = output.count('\r\n')
1933 cr = output.count('\r') - crlf
1934 lf = output.count('\n') - crlf
1935 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1936 | (crlf and self._CRLF)
1937
1938 if self.translate:
1939 if crlf:
1940 output = output.replace("\r\n", "\n")
1941 if cr:
1942 output = output.replace("\r", "\n")
1943
1944 return output
1945
1946 def getstate(self):
1947 if self.decoder is None:
1948 buf = b""
1949 flag = 0
1950 else:
1951 buf, flag = self.decoder.getstate()
1952 flag <<= 1
1953 if self.pendingcr:
1954 flag |= 1
1955 return buf, flag
1956
1957 def setstate(self, state):
1958 buf, flag = state
1959 self.pendingcr = bool(flag & 1)
1960 if self.decoder is not None:
1961 self.decoder.setstate((buf, flag >> 1))
1962
1963 def reset(self):
1964 self.seennl = 0
1965 self.pendingcr = False
1966 if self.decoder is not None:
1967 self.decoder.reset()
1968
1969 _LF = 1
1970 _CR = 2
1971 _CRLF = 4
1972
1973 @property
1974 def newlines(self):
1975 return (None,
1976 "\n",
1977 "\r",
1978 ("\r", "\n"),
1979 "\r\n",
1980 ("\n", "\r\n"),
1981 ("\r", "\r\n"),
1982 ("\r", "\n", "\r\n")
1983 )[self.seennl]
1984
1985
1986 class ESC[4;38;5;81mTextIOWrapper(ESC[4;38;5;149mTextIOBase):
1987
1988 r"""Character and line based layer over a BufferedIOBase object, buffer.
1989
1990 encoding gives the name of the encoding that the stream will be
1991 decoded or encoded with. It defaults to locale.getencoding().
1992
1993 errors determines the strictness of encoding and decoding (see the
1994 codecs.register) and defaults to "strict".
1995
1996 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1997 handling of line endings. If it is None, universal newlines is
1998 enabled. With this enabled, on input, the lines endings '\n', '\r',
1999 or '\r\n' are translated to '\n' before being returned to the
2000 caller. Conversely, on output, '\n' is translated to the system
2001 default line separator, os.linesep. If newline is any other of its
2002 legal values, that newline becomes the newline when the file is read
2003 and it is returned untranslated. On output, '\n' is converted to the
2004 newline.
2005
2006 If line_buffering is True, a call to flush is implied when a call to
2007 write contains a newline character.
2008 """
2009
2010 _CHUNK_SIZE = 2048
2011
2012 # Initialize _buffer as soon as possible since it's used by __del__()
2013 # which calls close()
2014 _buffer = None
2015
2016 # The write_through argument has no effect here since this
2017 # implementation always writes through. The argument is present only
2018 # so that the signature can match the signature of the C version.
2019 def __init__(self, buffer, encoding=None, errors=None, newline=None,
2020 line_buffering=False, write_through=False):
2021 self._check_newline(newline)
2022 encoding = text_encoding(encoding)
2023
2024 if encoding == "locale":
2025 encoding = self._get_locale_encoding()
2026
2027 if not isinstance(encoding, str):
2028 raise ValueError("invalid encoding: %r" % encoding)
2029
2030 if not codecs.lookup(encoding)._is_text_encoding:
2031 msg = ("%r is not a text encoding; "
2032 "use codecs.open() to handle arbitrary codecs")
2033 raise LookupError(msg % encoding)
2034
2035 if errors is None:
2036 errors = "strict"
2037 else:
2038 if not isinstance(errors, str):
2039 raise ValueError("invalid errors: %r" % errors)
2040 if _CHECK_ERRORS:
2041 codecs.lookup_error(errors)
2042
2043 self._buffer = buffer
2044 self._decoded_chars = '' # buffer for text returned from decoder
2045 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2046 self._snapshot = None # info for reconstructing decoder state
2047 self._seekable = self._telling = self.buffer.seekable()
2048 self._has_read1 = hasattr(self.buffer, 'read1')
2049 self._configure(encoding, errors, newline,
2050 line_buffering, write_through)
2051
2052 def _check_newline(self, newline):
2053 if newline is not None and not isinstance(newline, str):
2054 raise TypeError("illegal newline type: %r" % (type(newline),))
2055 if newline not in (None, "", "\n", "\r", "\r\n"):
2056 raise ValueError("illegal newline value: %r" % (newline,))
2057
2058 def _configure(self, encoding=None, errors=None, newline=None,
2059 line_buffering=False, write_through=False):
2060 self._encoding = encoding
2061 self._errors = errors
2062 self._encoder = None
2063 self._decoder = None
2064 self._b2cratio = 0.0
2065
2066 self._readuniversal = not newline
2067 self._readtranslate = newline is None
2068 self._readnl = newline
2069 self._writetranslate = newline != ''
2070 self._writenl = newline or os.linesep
2071
2072 self._line_buffering = line_buffering
2073 self._write_through = write_through
2074
2075 # don't write a BOM in the middle of a file
2076 if self._seekable and self.writable():
2077 position = self.buffer.tell()
2078 if position != 0:
2079 try:
2080 self._get_encoder().setstate(0)
2081 except LookupError:
2082 # Sometimes the encoder doesn't exist
2083 pass
2084
2085 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2086 # where dec_flags is the second (integer) item of the decoder state
2087 # and next_input is the chunk of input bytes that comes next after the
2088 # snapshot point. We use this to reconstruct decoder states in tell().
2089
2090 # Naming convention:
2091 # - "bytes_..." for integer variables that count input bytes
2092 # - "chars_..." for integer variables that count decoded characters
2093
2094 def __repr__(self):
2095 result = "<{}.{}".format(self.__class__.__module__,
2096 self.__class__.__qualname__)
2097 try:
2098 name = self.name
2099 except AttributeError:
2100 pass
2101 else:
2102 result += " name={0!r}".format(name)
2103 try:
2104 mode = self.mode
2105 except AttributeError:
2106 pass
2107 else:
2108 result += " mode={0!r}".format(mode)
2109 return result + " encoding={0!r}>".format(self.encoding)
2110
2111 @property
2112 def encoding(self):
2113 return self._encoding
2114
2115 @property
2116 def errors(self):
2117 return self._errors
2118
2119 @property
2120 def line_buffering(self):
2121 return self._line_buffering
2122
2123 @property
2124 def write_through(self):
2125 return self._write_through
2126
2127 @property
2128 def buffer(self):
2129 return self._buffer
2130
2131 def reconfigure(self, *,
2132 encoding=None, errors=None, newline=Ellipsis,
2133 line_buffering=None, write_through=None):
2134 """Reconfigure the text stream with new parameters.
2135
2136 This also flushes the stream.
2137 """
2138 if (self._decoder is not None
2139 and (encoding is not None or errors is not None
2140 or newline is not Ellipsis)):
2141 raise UnsupportedOperation(
2142 "It is not possible to set the encoding or newline of stream "
2143 "after the first read")
2144
2145 if errors is None:
2146 if encoding is None:
2147 errors = self._errors
2148 else:
2149 errors = 'strict'
2150 elif not isinstance(errors, str):
2151 raise TypeError("invalid errors: %r" % errors)
2152
2153 if encoding is None:
2154 encoding = self._encoding
2155 else:
2156 if not isinstance(encoding, str):
2157 raise TypeError("invalid encoding: %r" % encoding)
2158 if encoding == "locale":
2159 encoding = self._get_locale_encoding()
2160
2161 if newline is Ellipsis:
2162 newline = self._readnl
2163 self._check_newline(newline)
2164
2165 if line_buffering is None:
2166 line_buffering = self.line_buffering
2167 if write_through is None:
2168 write_through = self.write_through
2169
2170 self.flush()
2171 self._configure(encoding, errors, newline,
2172 line_buffering, write_through)
2173
2174 def seekable(self):
2175 if self.closed:
2176 raise ValueError("I/O operation on closed file.")
2177 return self._seekable
2178
2179 def readable(self):
2180 return self.buffer.readable()
2181
2182 def writable(self):
2183 return self.buffer.writable()
2184
2185 def flush(self):
2186 self.buffer.flush()
2187 self._telling = self._seekable
2188
2189 def close(self):
2190 if self.buffer is not None and not self.closed:
2191 try:
2192 self.flush()
2193 finally:
2194 self.buffer.close()
2195
2196 @property
2197 def closed(self):
2198 return self.buffer.closed
2199
2200 @property
2201 def name(self):
2202 return self.buffer.name
2203
2204 def fileno(self):
2205 return self.buffer.fileno()
2206
2207 def isatty(self):
2208 return self.buffer.isatty()
2209
2210 def write(self, s):
2211 'Write data, where s is a str'
2212 if self.closed:
2213 raise ValueError("write to closed file")
2214 if not isinstance(s, str):
2215 raise TypeError("can't write %s to text stream" %
2216 s.__class__.__name__)
2217 length = len(s)
2218 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2219 if haslf and self._writetranslate and self._writenl != "\n":
2220 s = s.replace("\n", self._writenl)
2221 encoder = self._encoder or self._get_encoder()
2222 # XXX What if we were just reading?
2223 b = encoder.encode(s)
2224 self.buffer.write(b)
2225 if self._line_buffering and (haslf or "\r" in s):
2226 self.flush()
2227 self._set_decoded_chars('')
2228 self._snapshot = None
2229 if self._decoder:
2230 self._decoder.reset()
2231 return length
2232
2233 def _get_encoder(self):
2234 make_encoder = codecs.getincrementalencoder(self._encoding)
2235 self._encoder = make_encoder(self._errors)
2236 return self._encoder
2237
2238 def _get_decoder(self):
2239 make_decoder = codecs.getincrementaldecoder(self._encoding)
2240 decoder = make_decoder(self._errors)
2241 if self._readuniversal:
2242 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2243 self._decoder = decoder
2244 return decoder
2245
2246 # The following three methods implement an ADT for _decoded_chars.
2247 # Text returned from the decoder is buffered here until the client
2248 # requests it by calling our read() or readline() method.
2249 def _set_decoded_chars(self, chars):
2250 """Set the _decoded_chars buffer."""
2251 self._decoded_chars = chars
2252 self._decoded_chars_used = 0
2253
2254 def _get_decoded_chars(self, n=None):
2255 """Advance into the _decoded_chars buffer."""
2256 offset = self._decoded_chars_used
2257 if n is None:
2258 chars = self._decoded_chars[offset:]
2259 else:
2260 chars = self._decoded_chars[offset:offset + n]
2261 self._decoded_chars_used += len(chars)
2262 return chars
2263
2264 def _get_locale_encoding(self):
2265 try:
2266 import locale
2267 except ImportError:
2268 # Importing locale may fail if Python is being built
2269 return "utf-8"
2270 else:
2271 return locale.getencoding()
2272
2273 def _rewind_decoded_chars(self, n):
2274 """Rewind the _decoded_chars buffer."""
2275 if self._decoded_chars_used < n:
2276 raise AssertionError("rewind decoded_chars out of bounds")
2277 self._decoded_chars_used -= n
2278
2279 def _read_chunk(self):
2280 """
2281 Read and decode the next chunk of data from the BufferedReader.
2282 """
2283
2284 # The return value is True unless EOF was reached. The decoded
2285 # string is placed in self._decoded_chars (replacing its previous
2286 # value). The entire input chunk is sent to the decoder, though
2287 # some of it may remain buffered in the decoder, yet to be
2288 # converted.
2289
2290 if self._decoder is None:
2291 raise ValueError("no decoder")
2292
2293 if self._telling:
2294 # To prepare for tell(), we need to snapshot a point in the
2295 # file where the decoder's input buffer is empty.
2296
2297 dec_buffer, dec_flags = self._decoder.getstate()
2298 # Given this, we know there was a valid snapshot point
2299 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2300
2301 # Read a chunk, decode it, and put the result in self._decoded_chars.
2302 if self._has_read1:
2303 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2304 else:
2305 input_chunk = self.buffer.read(self._CHUNK_SIZE)
2306 eof = not input_chunk
2307 decoded_chars = self._decoder.decode(input_chunk, eof)
2308 self._set_decoded_chars(decoded_chars)
2309 if decoded_chars:
2310 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2311 else:
2312 self._b2cratio = 0.0
2313
2314 if self._telling:
2315 # At the snapshot point, len(dec_buffer) bytes before the read,
2316 # the next input to be decoded is dec_buffer + input_chunk.
2317 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2318
2319 return not eof
2320
2321 def _pack_cookie(self, position, dec_flags=0,
2322 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
2323 # The meaning of a tell() cookie is: seek to position, set the
2324 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2325 # into the decoder with need_eof as the EOF flag, then skip
2326 # chars_to_skip characters of the decoded result. For most simple
2327 # decoders, tell() will often just give a byte offset in the file.
2328 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2329 (chars_to_skip<<192) | bool(need_eof)<<256)
2330
2331 def _unpack_cookie(self, bigint):
2332 rest, position = divmod(bigint, 1<<64)
2333 rest, dec_flags = divmod(rest, 1<<64)
2334 rest, bytes_to_feed = divmod(rest, 1<<64)
2335 need_eof, chars_to_skip = divmod(rest, 1<<64)
2336 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
2337
2338 def tell(self):
2339 if not self._seekable:
2340 raise UnsupportedOperation("underlying stream is not seekable")
2341 if not self._telling:
2342 raise OSError("telling position disabled by next() call")
2343 self.flush()
2344 position = self.buffer.tell()
2345 decoder = self._decoder
2346 if decoder is None or self._snapshot is None:
2347 if self._decoded_chars:
2348 # This should never happen.
2349 raise AssertionError("pending decoded text")
2350 return position
2351
2352 # Skip backward to the snapshot point (see _read_chunk).
2353 dec_flags, next_input = self._snapshot
2354 position -= len(next_input)
2355
2356 # How many decoded characters have been used up since the snapshot?
2357 chars_to_skip = self._decoded_chars_used
2358 if chars_to_skip == 0:
2359 # We haven't moved from the snapshot point.
2360 return self._pack_cookie(position, dec_flags)
2361
2362 # Starting from the snapshot position, we will walk the decoder
2363 # forward until it gives us enough decoded characters.
2364 saved_state = decoder.getstate()
2365 try:
2366 # Fast search for an acceptable start point, close to our
2367 # current pos.
2368 # Rationale: calling decoder.decode() has a large overhead
2369 # regardless of chunk size; we want the number of such calls to
2370 # be O(1) in most situations (common decoders, sensible input).
2371 # Actually, it will be exactly 1 for fixed-size codecs (all
2372 # 8-bit codecs, also UTF-16 and UTF-32).
2373 skip_bytes = int(self._b2cratio * chars_to_skip)
2374 skip_back = 1
2375 assert skip_bytes <= len(next_input)
2376 while skip_bytes > 0:
2377 decoder.setstate((b'', dec_flags))
2378 # Decode up to temptative start point
2379 n = len(decoder.decode(next_input[:skip_bytes]))
2380 if n <= chars_to_skip:
2381 b, d = decoder.getstate()
2382 if not b:
2383 # Before pos and no bytes buffered in decoder => OK
2384 dec_flags = d
2385 chars_to_skip -= n
2386 break
2387 # Skip back by buffered amount and reset heuristic
2388 skip_bytes -= len(b)
2389 skip_back = 1
2390 else:
2391 # We're too far ahead, skip back a bit
2392 skip_bytes -= skip_back
2393 skip_back = skip_back * 2
2394 else:
2395 skip_bytes = 0
2396 decoder.setstate((b'', dec_flags))
2397
2398 # Note our initial start point.
2399 start_pos = position + skip_bytes
2400 start_flags = dec_flags
2401 if chars_to_skip == 0:
2402 # We haven't moved from the start point.
2403 return self._pack_cookie(start_pos, start_flags)
2404
2405 # Feed the decoder one byte at a time. As we go, note the
2406 # nearest "safe start point" before the current location
2407 # (a point where the decoder has nothing buffered, so seek()
2408 # can safely start from there and advance to this location).
2409 bytes_fed = 0
2410 need_eof = False
2411 # Chars decoded since `start_pos`
2412 chars_decoded = 0
2413 for i in range(skip_bytes, len(next_input)):
2414 bytes_fed += 1
2415 chars_decoded += len(decoder.decode(next_input[i:i+1]))
2416 dec_buffer, dec_flags = decoder.getstate()
2417 if not dec_buffer and chars_decoded <= chars_to_skip:
2418 # Decoder buffer is empty, so this is a safe start point.
2419 start_pos += bytes_fed
2420 chars_to_skip -= chars_decoded
2421 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2422 if chars_decoded >= chars_to_skip:
2423 break
2424 else:
2425 # We didn't get enough decoded data; signal EOF to get more.
2426 chars_decoded += len(decoder.decode(b'', final=True))
2427 need_eof = True
2428 if chars_decoded < chars_to_skip:
2429 raise OSError("can't reconstruct logical file position")
2430
2431 # The returned cookie corresponds to the last safe start point.
2432 return self._pack_cookie(
2433 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2434 finally:
2435 decoder.setstate(saved_state)
2436
2437 def truncate(self, pos=None):
2438 self.flush()
2439 if pos is None:
2440 pos = self.tell()
2441 return self.buffer.truncate(pos)
2442
2443 def detach(self):
2444 if self.buffer is None:
2445 raise ValueError("buffer is already detached")
2446 self.flush()
2447 buffer = self._buffer
2448 self._buffer = None
2449 return buffer
2450
2451 def seek(self, cookie, whence=0):
2452 def _reset_encoder(position):
2453 """Reset the encoder (merely useful for proper BOM handling)"""
2454 try:
2455 encoder = self._encoder or self._get_encoder()
2456 except LookupError:
2457 # Sometimes the encoder doesn't exist
2458 pass
2459 else:
2460 if position != 0:
2461 encoder.setstate(0)
2462 else:
2463 encoder.reset()
2464
2465 if self.closed:
2466 raise ValueError("tell on closed file")
2467 if not self._seekable:
2468 raise UnsupportedOperation("underlying stream is not seekable")
2469 if whence == SEEK_CUR:
2470 if cookie != 0:
2471 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
2472 # Seeking to the current position should attempt to
2473 # sync the underlying buffer with the current position.
2474 whence = 0
2475 cookie = self.tell()
2476 elif whence == SEEK_END:
2477 if cookie != 0:
2478 raise UnsupportedOperation("can't do nonzero end-relative seeks")
2479 self.flush()
2480 position = self.buffer.seek(0, whence)
2481 self._set_decoded_chars('')
2482 self._snapshot = None
2483 if self._decoder:
2484 self._decoder.reset()
2485 _reset_encoder(position)
2486 return position
2487 if whence != 0:
2488 raise ValueError("unsupported whence (%r)" % (whence,))
2489 if cookie < 0:
2490 raise ValueError("negative seek position %r" % (cookie,))
2491 self.flush()
2492
2493 # The strategy of seek() is to go back to the safe start point
2494 # and replay the effect of read(chars_to_skip) from there.
2495 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2496 self._unpack_cookie(cookie)
2497
2498 # Seek back to the safe start point.
2499 self.buffer.seek(start_pos)
2500 self._set_decoded_chars('')
2501 self._snapshot = None
2502
2503 # Restore the decoder to its state from the safe start point.
2504 if cookie == 0 and self._decoder:
2505 self._decoder.reset()
2506 elif self._decoder or dec_flags or chars_to_skip:
2507 self._decoder = self._decoder or self._get_decoder()
2508 self._decoder.setstate((b'', dec_flags))
2509 self._snapshot = (dec_flags, b'')
2510
2511 if chars_to_skip:
2512 # Just like _read_chunk, feed the decoder and save a snapshot.
2513 input_chunk = self.buffer.read(bytes_to_feed)
2514 self._set_decoded_chars(
2515 self._decoder.decode(input_chunk, need_eof))
2516 self._snapshot = (dec_flags, input_chunk)
2517
2518 # Skip chars_to_skip of the decoded characters.
2519 if len(self._decoded_chars) < chars_to_skip:
2520 raise OSError("can't restore logical file position")
2521 self._decoded_chars_used = chars_to_skip
2522
2523 _reset_encoder(cookie)
2524 return cookie
2525
2526 def read(self, size=None):
2527 self._checkReadable()
2528 if size is None:
2529 size = -1
2530 else:
2531 try:
2532 size_index = size.__index__
2533 except AttributeError:
2534 raise TypeError(f"{size!r} is not an integer")
2535 else:
2536 size = size_index()
2537 decoder = self._decoder or self._get_decoder()
2538 if size < 0:
2539 # Read everything.
2540 result = (self._get_decoded_chars() +
2541 decoder.decode(self.buffer.read(), final=True))
2542 self._set_decoded_chars('')
2543 self._snapshot = None
2544 return result
2545 else:
2546 # Keep reading chunks until we have size characters to return.
2547 eof = False
2548 result = self._get_decoded_chars(size)
2549 while len(result) < size and not eof:
2550 eof = not self._read_chunk()
2551 result += self._get_decoded_chars(size - len(result))
2552 return result
2553
2554 def __next__(self):
2555 self._telling = False
2556 line = self.readline()
2557 if not line:
2558 self._snapshot = None
2559 self._telling = self._seekable
2560 raise StopIteration
2561 return line
2562
2563 def readline(self, size=None):
2564 if self.closed:
2565 raise ValueError("read from closed file")
2566 if size is None:
2567 size = -1
2568 else:
2569 try:
2570 size_index = size.__index__
2571 except AttributeError:
2572 raise TypeError(f"{size!r} is not an integer")
2573 else:
2574 size = size_index()
2575
2576 # Grab all the decoded text (we will rewind any extra bits later).
2577 line = self._get_decoded_chars()
2578
2579 start = 0
2580 # Make the decoder if it doesn't already exist.
2581 if not self._decoder:
2582 self._get_decoder()
2583
2584 pos = endpos = None
2585 while True:
2586 if self._readtranslate:
2587 # Newlines are already translated, only search for \n
2588 pos = line.find('\n', start)
2589 if pos >= 0:
2590 endpos = pos + 1
2591 break
2592 else:
2593 start = len(line)
2594
2595 elif self._readuniversal:
2596 # Universal newline search. Find any of \r, \r\n, \n
2597 # The decoder ensures that \r\n are not split in two pieces
2598
2599 # In C we'd look for these in parallel of course.
2600 nlpos = line.find("\n", start)
2601 crpos = line.find("\r", start)
2602 if crpos == -1:
2603 if nlpos == -1:
2604 # Nothing found
2605 start = len(line)
2606 else:
2607 # Found \n
2608 endpos = nlpos + 1
2609 break
2610 elif nlpos == -1:
2611 # Found lone \r
2612 endpos = crpos + 1
2613 break
2614 elif nlpos < crpos:
2615 # Found \n
2616 endpos = nlpos + 1
2617 break
2618 elif nlpos == crpos + 1:
2619 # Found \r\n
2620 endpos = crpos + 2
2621 break
2622 else:
2623 # Found \r
2624 endpos = crpos + 1
2625 break
2626 else:
2627 # non-universal
2628 pos = line.find(self._readnl)
2629 if pos >= 0:
2630 endpos = pos + len(self._readnl)
2631 break
2632
2633 if size >= 0 and len(line) >= size:
2634 endpos = size # reached length size
2635 break
2636
2637 # No line ending seen yet - get more data'
2638 while self._read_chunk():
2639 if self._decoded_chars:
2640 break
2641 if self._decoded_chars:
2642 line += self._get_decoded_chars()
2643 else:
2644 # end of file
2645 self._set_decoded_chars('')
2646 self._snapshot = None
2647 return line
2648
2649 if size >= 0 and endpos > size:
2650 endpos = size # don't exceed size
2651
2652 # Rewind _decoded_chars to just after the line ending we found.
2653 self._rewind_decoded_chars(len(line) - endpos)
2654 return line[:endpos]
2655
2656 @property
2657 def newlines(self):
2658 return self._decoder.newlines if self._decoder else None
2659
2660
2661 class ESC[4;38;5;81mStringIO(ESC[4;38;5;149mTextIOWrapper):
2662 """Text I/O implementation using an in-memory buffer.
2663
2664 The initial_value argument sets the value of object. The newline
2665 argument is like the one of TextIOWrapper's constructor.
2666 """
2667
2668 def __init__(self, initial_value="", newline="\n"):
2669 super(StringIO, self).__init__(BytesIO(),
2670 encoding="utf-8",
2671 errors="surrogatepass",
2672 newline=newline)
2673 # Issue #5645: make universal newlines semantics the same as in the
2674 # C version, even under Windows.
2675 if newline is None:
2676 self._writetranslate = False
2677 if initial_value is not None:
2678 if not isinstance(initial_value, str):
2679 raise TypeError("initial_value must be str or None, not {0}"
2680 .format(type(initial_value).__name__))
2681 self.write(initial_value)
2682 self.seek(0)
2683
2684 def getvalue(self):
2685 self.flush()
2686 decoder = self._decoder or self._get_decoder()
2687 old_state = decoder.getstate()
2688 decoder.reset()
2689 try:
2690 return decoder.decode(self.buffer.getvalue(), final=True)
2691 finally:
2692 decoder.setstate(old_state)
2693
2694 def __repr__(self):
2695 # TextIOWrapper tells the encoding in its repr. In StringIO,
2696 # that's an implementation detail.
2697 return object.__repr__(self)
2698
2699 @property
2700 def errors(self):
2701 return None
2702
2703 @property
2704 def encoding(self):
2705 return None
2706
2707 def detach(self):
2708 # This doesn't make sense on StringIO.
2709 self._unsupported("detach")