python (3.11.7)
       1  import logging
       2  import mimetypes
       3  import os
       4  import pathlib
       5  from typing import Callable, Iterable, Optional, Tuple
       6  
       7  from pip._internal.models.candidate import InstallationCandidate
       8  from pip._internal.models.link import Link
       9  from pip._internal.utils.urls import path_to_url, url_to_path
      10  from pip._internal.vcs import is_url
      11  
      12  logger = logging.getLogger(__name__)
      13  
      14  FoundCandidates = Iterable[InstallationCandidate]
      15  FoundLinks = Iterable[Link]
      16  CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
      17  PageValidator = Callable[[Link], bool]
      18  
      19  
      20  class ESC[4;38;5;81mLinkSource:
      21      @property
      22      def link(self) -> Optional[Link]:
      23          """Returns the underlying link, if there's one."""
      24          raise NotImplementedError()
      25  
      26      def page_candidates(self) -> FoundCandidates:
      27          """Candidates found by parsing an archive listing HTML file."""
      28          raise NotImplementedError()
      29  
      30      def file_links(self) -> FoundLinks:
      31          """Links found by specifying archives directly."""
      32          raise NotImplementedError()
      33  
      34  
      35  def _is_html_file(file_url: str) -> bool:
      36      return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
      37  
      38  
      39  class ESC[4;38;5;81m_FlatDirectorySource(ESC[4;38;5;149mLinkSource):
      40      """Link source specified by ``--find-links=<path-to-dir>``.
      41  
      42      This looks the content of the directory, and returns:
      43  
      44      * ``page_candidates``: Links listed on each HTML file in the directory.
      45      * ``file_candidates``: Archives in the directory.
      46      """
      47  
      48      def __init__(
      49          self,
      50          candidates_from_page: CandidatesFromPage,
      51          path: str,
      52      ) -> None:
      53          self._candidates_from_page = candidates_from_page
      54          self._path = pathlib.Path(os.path.realpath(path))
      55  
      56      @property
      57      def link(self) -> Optional[Link]:
      58          return None
      59  
      60      def page_candidates(self) -> FoundCandidates:
      61          for path in self._path.iterdir():
      62              url = path_to_url(str(path))
      63              if not _is_html_file(url):
      64                  continue
      65              yield from self._candidates_from_page(Link(url))
      66  
      67      def file_links(self) -> FoundLinks:
      68          for path in self._path.iterdir():
      69              url = path_to_url(str(path))
      70              if _is_html_file(url):
      71                  continue
      72              yield Link(url)
      73  
      74  
      75  class ESC[4;38;5;81m_LocalFileSource(ESC[4;38;5;149mLinkSource):
      76      """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
      77  
      78      If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
      79      the option, it is converted to a URL first. This returns:
      80  
      81      * ``page_candidates``: Links listed on an HTML file.
      82      * ``file_candidates``: The non-HTML file.
      83      """
      84  
      85      def __init__(
      86          self,
      87          candidates_from_page: CandidatesFromPage,
      88          link: Link,
      89      ) -> None:
      90          self._candidates_from_page = candidates_from_page
      91          self._link = link
      92  
      93      @property
      94      def link(self) -> Optional[Link]:
      95          return self._link
      96  
      97      def page_candidates(self) -> FoundCandidates:
      98          if not _is_html_file(self._link.url):
      99              return
     100          yield from self._candidates_from_page(self._link)
     101  
     102      def file_links(self) -> FoundLinks:
     103          if _is_html_file(self._link.url):
     104              return
     105          yield self._link
     106  
     107  
     108  class ESC[4;38;5;81m_RemoteFileSource(ESC[4;38;5;149mLinkSource):
     109      """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
     110  
     111      This returns:
     112  
     113      * ``page_candidates``: Links listed on an HTML file.
     114      * ``file_candidates``: The non-HTML file.
     115      """
     116  
     117      def __init__(
     118          self,
     119          candidates_from_page: CandidatesFromPage,
     120          page_validator: PageValidator,
     121          link: Link,
     122      ) -> None:
     123          self._candidates_from_page = candidates_from_page
     124          self._page_validator = page_validator
     125          self._link = link
     126  
     127      @property
     128      def link(self) -> Optional[Link]:
     129          return self._link
     130  
     131      def page_candidates(self) -> FoundCandidates:
     132          if not self._page_validator(self._link):
     133              return
     134          yield from self._candidates_from_page(self._link)
     135  
     136      def file_links(self) -> FoundLinks:
     137          yield self._link
     138  
     139  
     140  class ESC[4;38;5;81m_IndexDirectorySource(ESC[4;38;5;149mLinkSource):
     141      """``--[extra-]index-url=<path-to-directory>``.
     142  
     143      This is treated like a remote URL; ``candidates_from_page`` contains logic
     144      for this by appending ``index.html`` to the link.
     145      """
     146  
     147      def __init__(
     148          self,
     149          candidates_from_page: CandidatesFromPage,
     150          link: Link,
     151      ) -> None:
     152          self._candidates_from_page = candidates_from_page
     153          self._link = link
     154  
     155      @property
     156      def link(self) -> Optional[Link]:
     157          return self._link
     158  
     159      def page_candidates(self) -> FoundCandidates:
     160          yield from self._candidates_from_page(self._link)
     161  
     162      def file_links(self) -> FoundLinks:
     163          return ()
     164  
     165  
     166  def build_source(
     167      location: str,
     168      *,
     169      candidates_from_page: CandidatesFromPage,
     170      page_validator: PageValidator,
     171      expand_dir: bool,
     172      cache_link_parsing: bool,
     173  ) -> Tuple[Optional[str], Optional[LinkSource]]:
     174      path: Optional[str] = None
     175      url: Optional[str] = None
     176      if os.path.exists(location):  # Is a local path.
     177          url = path_to_url(location)
     178          path = location
     179      elif location.startswith("file:"):  # A file: URL.
     180          url = location
     181          path = url_to_path(location)
     182      elif is_url(location):
     183          url = location
     184  
     185      if url is None:
     186          msg = (
     187              "Location '%s' is ignored: "
     188              "it is either a non-existing path or lacks a specific scheme."
     189          )
     190          logger.warning(msg, location)
     191          return (None, None)
     192  
     193      if path is None:
     194          source: LinkSource = _RemoteFileSource(
     195              candidates_from_page=candidates_from_page,
     196              page_validator=page_validator,
     197              link=Link(url, cache_link_parsing=cache_link_parsing),
     198          )
     199          return (url, source)
     200  
     201      if os.path.isdir(path):
     202          if expand_dir:
     203              source = _FlatDirectorySource(
     204                  candidates_from_page=candidates_from_page,
     205                  path=path,
     206              )
     207          else:
     208              source = _IndexDirectorySource(
     209                  candidates_from_page=candidates_from_page,
     210                  link=Link(url, cache_link_parsing=cache_link_parsing),
     211              )
     212          return (url, source)
     213      elif os.path.isfile(path):
     214          source = _LocalFileSource(
     215              candidates_from_page=candidates_from_page,
     216              link=Link(url, cache_link_parsing=cache_link_parsing),
     217          )
     218          return (url, source)
     219      logger.warning(
     220          "Location '%s' is ignored: it is neither a file nor a directory.",
     221          location,
     222      )
     223      return (url, None)