python (3.11.7)
       1  # SPDX-FileCopyrightText: 2015 Eric Larson
       2  #
       3  # SPDX-License-Identifier: Apache-2.0
       4  
       5  from tempfile import NamedTemporaryFile
       6  import mmap
       7  
       8  
       9  class ESC[4;38;5;81mCallbackFileWrapper(ESC[4;38;5;149mobject):
      10      """
      11      Small wrapper around a fp object which will tee everything read into a
      12      buffer, and when that file is closed it will execute a callback with the
      13      contents of that buffer.
      14  
      15      All attributes are proxied to the underlying file object.
      16  
      17      This class uses members with a double underscore (__) leading prefix so as
      18      not to accidentally shadow an attribute.
      19  
      20      The data is stored in a temporary file until it is all available.  As long
      21      as the temporary files directory is disk-based (sometimes it's a
      22      memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
      23      pressure is high.  For small files the disk usually won't be used at all,
      24      it'll all be in the filesystem memory cache, so there should be no
      25      performance impact.
      26      """
      27  
      28      def __init__(self, fp, callback):
      29          self.__buf = NamedTemporaryFile("rb+", delete=True)
      30          self.__fp = fp
      31          self.__callback = callback
      32  
      33      def __getattr__(self, name):
      34          # The vaguaries of garbage collection means that self.__fp is
      35          # not always set.  By using __getattribute__ and the private
      36          # name[0] allows looking up the attribute value and raising an
      37          # AttributeError when it doesn't exist. This stop thigns from
      38          # infinitely recursing calls to getattr in the case where
      39          # self.__fp hasn't been set.
      40          #
      41          # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
      42          fp = self.__getattribute__("_CallbackFileWrapper__fp")
      43          return getattr(fp, name)
      44  
      45      def __is_fp_closed(self):
      46          try:
      47              return self.__fp.fp is None
      48  
      49          except AttributeError:
      50              pass
      51  
      52          try:
      53              return self.__fp.closed
      54  
      55          except AttributeError:
      56              pass
      57  
      58          # We just don't cache it then.
      59          # TODO: Add some logging here...
      60          return False
      61  
      62      def _close(self):
      63          if self.__callback:
      64              if self.__buf.tell() == 0:
      65                  # Empty file:
      66                  result = b""
      67              else:
      68                  # Return the data without actually loading it into memory,
      69                  # relying on Python's buffer API and mmap(). mmap() just gives
      70                  # a view directly into the filesystem's memory cache, so it
      71                  # doesn't result in duplicate memory use.
      72                  self.__buf.seek(0, 0)
      73                  result = memoryview(
      74                      mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
      75                  )
      76              self.__callback(result)
      77  
      78          # We assign this to None here, because otherwise we can get into
      79          # really tricky problems where the CPython interpreter dead locks
      80          # because the callback is holding a reference to something which
      81          # has a __del__ method. Setting this to None breaks the cycle
      82          # and allows the garbage collector to do it's thing normally.
      83          self.__callback = None
      84  
      85          # Closing the temporary file releases memory and frees disk space.
      86          # Important when caching big files.
      87          self.__buf.close()
      88  
      89      def read(self, amt=None):
      90          data = self.__fp.read(amt)
      91          if data:
      92              # We may be dealing with b'', a sign that things are over:
      93              # it's passed e.g. after we've already closed self.__buf.
      94              self.__buf.write(data)
      95          if self.__is_fp_closed():
      96              self._close()
      97  
      98          return data
      99  
     100      def _safe_read(self, amt):
     101          data = self.__fp._safe_read(amt)
     102          if amt == 2 and data == b"\r\n":
     103              # urllib executes this read to toss the CRLF at the end
     104              # of the chunk.
     105              return data
     106  
     107          self.__buf.write(data)
     108          if self.__is_fp_closed():
     109              self._close()
     110  
     111          return data