python (3.11.7)

(root)/
lib/
python3.11/
site-packages/
pip/
_internal/
cache.py
       1  """Cache Management
       2  """
       3  
       4  import hashlib
       5  import json
       6  import logging
       7  import os
       8  from pathlib import Path
       9  from typing import Any, Dict, List, Optional
      10  
      11  from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
      12  from pip._vendor.packaging.utils import canonicalize_name
      13  
      14  from pip._internal.exceptions import InvalidWheelFilename
      15  from pip._internal.models.direct_url import DirectUrl
      16  from pip._internal.models.link import Link
      17  from pip._internal.models.wheel import Wheel
      18  from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
      19  from pip._internal.utils.urls import path_to_url
      20  
      21  logger = logging.getLogger(__name__)
      22  
      23  ORIGIN_JSON_NAME = "origin.json"
      24  
      25  
      26  def _hash_dict(d: Dict[str, str]) -> str:
      27      """Return a stable sha224 of a dictionary."""
      28      s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
      29      return hashlib.sha224(s.encode("ascii")).hexdigest()
      30  
      31  
      32  class ESC[4;38;5;81mCache:
      33      """An abstract class - provides cache directories for data from links
      34  
      35      :param cache_dir: The root of the cache.
      36      """
      37  
      38      def __init__(self, cache_dir: str) -> None:
      39          super().__init__()
      40          assert not cache_dir or os.path.isabs(cache_dir)
      41          self.cache_dir = cache_dir or None
      42  
      43      def _get_cache_path_parts(self, link: Link) -> List[str]:
      44          """Get parts of part that must be os.path.joined with cache_dir"""
      45  
      46          # We want to generate an url to use as our cache key, we don't want to
      47          # just re-use the URL because it might have other items in the fragment
      48          # and we don't care about those.
      49          key_parts = {"url": link.url_without_fragment}
      50          if link.hash_name is not None and link.hash is not None:
      51              key_parts[link.hash_name] = link.hash
      52          if link.subdirectory_fragment:
      53              key_parts["subdirectory"] = link.subdirectory_fragment
      54  
      55          # Include interpreter name, major and minor version in cache key
      56          # to cope with ill-behaved sdists that build a different wheel
      57          # depending on the python version their setup.py is being run on,
      58          # and don't encode the difference in compatibility tags.
      59          # https://github.com/pypa/pip/issues/7296
      60          key_parts["interpreter_name"] = interpreter_name()
      61          key_parts["interpreter_version"] = interpreter_version()
      62  
      63          # Encode our key url with sha224, we'll use this because it has similar
      64          # security properties to sha256, but with a shorter total output (and
      65          # thus less secure). However the differences don't make a lot of
      66          # difference for our use case here.
      67          hashed = _hash_dict(key_parts)
      68  
      69          # We want to nest the directories some to prevent having a ton of top
      70          # level directories where we might run out of sub directories on some
      71          # FS.
      72          parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
      73  
      74          return parts
      75  
      76      def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
      77          can_not_cache = not self.cache_dir or not canonical_package_name or not link
      78          if can_not_cache:
      79              return []
      80  
      81          candidates = []
      82          path = self.get_path_for_link(link)
      83          if os.path.isdir(path):
      84              for candidate in os.listdir(path):
      85                  candidates.append((candidate, path))
      86          return candidates
      87  
      88      def get_path_for_link(self, link: Link) -> str:
      89          """Return a directory to store cached items in for link."""
      90          raise NotImplementedError()
      91  
      92      def get(
      93          self,
      94          link: Link,
      95          package_name: Optional[str],
      96          supported_tags: List[Tag],
      97      ) -> Link:
      98          """Returns a link to a cached item if it exists, otherwise returns the
      99          passed link.
     100          """
     101          raise NotImplementedError()
     102  
     103  
     104  class ESC[4;38;5;81mSimpleWheelCache(ESC[4;38;5;149mCache):
     105      """A cache of wheels for future installs."""
     106  
     107      def __init__(self, cache_dir: str) -> None:
     108          super().__init__(cache_dir)
     109  
     110      def get_path_for_link(self, link: Link) -> str:
     111          """Return a directory to store cached wheels for link
     112  
     113          Because there are M wheels for any one sdist, we provide a directory
     114          to cache them in, and then consult that directory when looking up
     115          cache hits.
     116  
     117          We only insert things into the cache if they have plausible version
     118          numbers, so that we don't contaminate the cache with things that were
     119          not unique. E.g. ./package might have dozens of installs done for it
     120          and build a version of 0.0...and if we built and cached a wheel, we'd
     121          end up using the same wheel even if the source has been edited.
     122  
     123          :param link: The link of the sdist for which this will cache wheels.
     124          """
     125          parts = self._get_cache_path_parts(link)
     126          assert self.cache_dir
     127          # Store wheels within the root cache_dir
     128          return os.path.join(self.cache_dir, "wheels", *parts)
     129  
     130      def get(
     131          self,
     132          link: Link,
     133          package_name: Optional[str],
     134          supported_tags: List[Tag],
     135      ) -> Link:
     136          candidates = []
     137  
     138          if not package_name:
     139              return link
     140  
     141          canonical_package_name = canonicalize_name(package_name)
     142          for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
     143              try:
     144                  wheel = Wheel(wheel_name)
     145              except InvalidWheelFilename:
     146                  continue
     147              if canonicalize_name(wheel.name) != canonical_package_name:
     148                  logger.debug(
     149                      "Ignoring cached wheel %s for %s as it "
     150                      "does not match the expected distribution name %s.",
     151                      wheel_name,
     152                      link,
     153                      package_name,
     154                  )
     155                  continue
     156              if not wheel.supported(supported_tags):
     157                  # Built for a different python/arch/etc
     158                  continue
     159              candidates.append(
     160                  (
     161                      wheel.support_index_min(supported_tags),
     162                      wheel_name,
     163                      wheel_dir,
     164                  )
     165              )
     166  
     167          if not candidates:
     168              return link
     169  
     170          _, wheel_name, wheel_dir = min(candidates)
     171          return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
     172  
     173  
     174  class ESC[4;38;5;81mEphemWheelCache(ESC[4;38;5;149mSimpleWheelCache):
     175      """A SimpleWheelCache that creates it's own temporary cache directory"""
     176  
     177      def __init__(self) -> None:
     178          self._temp_dir = TempDirectory(
     179              kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
     180              globally_managed=True,
     181          )
     182  
     183          super().__init__(self._temp_dir.path)
     184  
     185  
     186  class ESC[4;38;5;81mCacheEntry:
     187      def __init__(
     188          self,
     189          link: Link,
     190          persistent: bool,
     191      ):
     192          self.link = link
     193          self.persistent = persistent
     194          self.origin: Optional[DirectUrl] = None
     195          origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
     196          if origin_direct_url_path.exists():
     197              try:
     198                  self.origin = DirectUrl.from_json(
     199                      origin_direct_url_path.read_text(encoding="utf-8")
     200                  )
     201              except Exception as e:
     202                  logger.warning(
     203                      "Ignoring invalid cache entry origin file %s for %s (%s)",
     204                      origin_direct_url_path,
     205                      link.filename,
     206                      e,
     207                  )
     208  
     209  
     210  class ESC[4;38;5;81mWheelCache(ESC[4;38;5;149mCache):
     211      """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
     212  
     213      This Cache allows for gracefully degradation, using the ephem wheel cache
     214      when a certain link is not found in the simple wheel cache first.
     215      """
     216  
     217      def __init__(self, cache_dir: str) -> None:
     218          super().__init__(cache_dir)
     219          self._wheel_cache = SimpleWheelCache(cache_dir)
     220          self._ephem_cache = EphemWheelCache()
     221  
     222      def get_path_for_link(self, link: Link) -> str:
     223          return self._wheel_cache.get_path_for_link(link)
     224  
     225      def get_ephem_path_for_link(self, link: Link) -> str:
     226          return self._ephem_cache.get_path_for_link(link)
     227  
     228      def get(
     229          self,
     230          link: Link,
     231          package_name: Optional[str],
     232          supported_tags: List[Tag],
     233      ) -> Link:
     234          cache_entry = self.get_cache_entry(link, package_name, supported_tags)
     235          if cache_entry is None:
     236              return link
     237          return cache_entry.link
     238  
     239      def get_cache_entry(
     240          self,
     241          link: Link,
     242          package_name: Optional[str],
     243          supported_tags: List[Tag],
     244      ) -> Optional[CacheEntry]:
     245          """Returns a CacheEntry with a link to a cached item if it exists or
     246          None. The cache entry indicates if the item was found in the persistent
     247          or ephemeral cache.
     248          """
     249          retval = self._wheel_cache.get(
     250              link=link,
     251              package_name=package_name,
     252              supported_tags=supported_tags,
     253          )
     254          if retval is not link:
     255              return CacheEntry(retval, persistent=True)
     256  
     257          retval = self._ephem_cache.get(
     258              link=link,
     259              package_name=package_name,
     260              supported_tags=supported_tags,
     261          )
     262          if retval is not link:
     263              return CacheEntry(retval, persistent=False)
     264  
     265          return None
     266  
     267      @staticmethod
     268      def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
     269          origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
     270          if origin_path.exists():
     271              try:
     272                  origin = DirectUrl.from_json(origin_path.read_text(encoding="utf-8"))
     273              except Exception as e:
     274                  logger.warning(
     275                      "Could not read origin file %s in cache entry (%s). "
     276                      "Will attempt to overwrite it.",
     277                      origin_path,
     278                      e,
     279                  )
     280              else:
     281                  # TODO: use DirectUrl.equivalent when
     282                  # https://github.com/pypa/pip/pull/10564 is merged.
     283                  if origin.url != download_info.url:
     284                      logger.warning(
     285                          "Origin URL %s in cache entry %s does not match download URL "
     286                          "%s. This is likely a pip bug or a cache corruption issue. "
     287                          "Will overwrite it with the new value.",
     288                          origin.url,
     289                          cache_dir,
     290                          download_info.url,
     291                      )
     292          origin_path.write_text(download_info.to_json(), encoding="utf-8")