| Viewing file:  sources.py (6.4 KB)      -rw-r--r-- Select action/file-type:
 
  (+) |  (+) |  (+) | Code (+) | Session (+) |  (+) | SDB (+) |  (+) |  (+) |  (+) |  (+) |  (+) | 
 
import loggingimport mimetypes
 import os
 import pathlib
 from typing import Callable, Iterable, Optional, Tuple
 
 from pip._internal.models.candidate import InstallationCandidate
 from pip._internal.models.link import Link
 from pip._internal.utils.urls import path_to_url, url_to_path
 from pip._internal.vcs import is_url
 
 logger = logging.getLogger(__name__)
 
 FoundCandidates = Iterable[InstallationCandidate]
 FoundLinks = Iterable[Link]
 CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
 PageValidator = Callable[[Link], bool]
 
 
 class LinkSource:
 @property
 def link(self) -> Optional[Link]:
 """Returns the underlying link, if there's one."""
 raise NotImplementedError()
 
 def page_candidates(self) -> FoundCandidates:
 """Candidates found by parsing an archive listing HTML file."""
 raise NotImplementedError()
 
 def file_links(self) -> FoundLinks:
 """Links found by specifying archives directly."""
 raise NotImplementedError()
 
 
 def _is_html_file(file_url: str) -> bool:
 return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
 
 
 class _FlatDirectorySource(LinkSource):
 """Link source specified by ``--find-links=<path-to-dir>``.
 
 This looks the content of the directory, and returns:
 
 * ``page_candidates``: Links listed on each HTML file in the directory.
 * ``file_candidates``: Archives in the directory.
 """
 
 def __init__(
 self,
 candidates_from_page: CandidatesFromPage,
 path: str,
 ) -> None:
 self._candidates_from_page = candidates_from_page
 self._path = pathlib.Path(os.path.realpath(path))
 
 @property
 def link(self) -> Optional[Link]:
 return None
 
 def page_candidates(self) -> FoundCandidates:
 for path in self._path.iterdir():
 url = path_to_url(str(path))
 if not _is_html_file(url):
 continue
 yield from self._candidates_from_page(Link(url))
 
 def file_links(self) -> FoundLinks:
 for path in self._path.iterdir():
 url = path_to_url(str(path))
 if _is_html_file(url):
 continue
 yield Link(url)
 
 
 class _LocalFileSource(LinkSource):
 """``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
 
 If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
 the option, it is converted to a URL first. This returns:
 
 * ``page_candidates``: Links listed on an HTML file.
 * ``file_candidates``: The non-HTML file.
 """
 
 def __init__(
 self,
 candidates_from_page: CandidatesFromPage,
 link: Link,
 ) -> None:
 self._candidates_from_page = candidates_from_page
 self._link = link
 
 @property
 def link(self) -> Optional[Link]:
 return self._link
 
 def page_candidates(self) -> FoundCandidates:
 if not _is_html_file(self._link.url):
 return
 yield from self._candidates_from_page(self._link)
 
 def file_links(self) -> FoundLinks:
 if _is_html_file(self._link.url):
 return
 yield self._link
 
 
 class _RemoteFileSource(LinkSource):
 """``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
 
 This returns:
 
 * ``page_candidates``: Links listed on an HTML file.
 * ``file_candidates``: The non-HTML file.
 """
 
 def __init__(
 self,
 candidates_from_page: CandidatesFromPage,
 page_validator: PageValidator,
 link: Link,
 ) -> None:
 self._candidates_from_page = candidates_from_page
 self._page_validator = page_validator
 self._link = link
 
 @property
 def link(self) -> Optional[Link]:
 return self._link
 
 def page_candidates(self) -> FoundCandidates:
 if not self._page_validator(self._link):
 return
 yield from self._candidates_from_page(self._link)
 
 def file_links(self) -> FoundLinks:
 yield self._link
 
 
 class _IndexDirectorySource(LinkSource):
 """``--[extra-]index-url=<path-to-directory>``.
 
 This is treated like a remote URL; ``candidates_from_page`` contains logic
 for this by appending ``index.html`` to the link.
 """
 
 def __init__(
 self,
 candidates_from_page: CandidatesFromPage,
 link: Link,
 ) -> None:
 self._candidates_from_page = candidates_from_page
 self._link = link
 
 @property
 def link(self) -> Optional[Link]:
 return self._link
 
 def page_candidates(self) -> FoundCandidates:
 yield from self._candidates_from_page(self._link)
 
 def file_links(self) -> FoundLinks:
 return ()
 
 
 def build_source(
 location: str,
 *,
 candidates_from_page: CandidatesFromPage,
 page_validator: PageValidator,
 expand_dir: bool,
 cache_link_parsing: bool,
 ) -> Tuple[Optional[str], Optional[LinkSource]]:
 
 path: Optional[str] = None
 url: Optional[str] = None
 if os.path.exists(location):  # Is a local path.
 url = path_to_url(location)
 path = location
 elif location.startswith("file:"):  # A file: URL.
 url = location
 path = url_to_path(location)
 elif is_url(location):
 url = location
 
 if url is None:
 msg = (
 "Location '%s' is ignored: "
 "it is either a non-existing path or lacks a specific scheme."
 )
 logger.warning(msg, location)
 return (None, None)
 
 if path is None:
 source: LinkSource = _RemoteFileSource(
 candidates_from_page=candidates_from_page,
 page_validator=page_validator,
 link=Link(url, cache_link_parsing=cache_link_parsing),
 )
 return (url, source)
 
 if os.path.isdir(path):
 if expand_dir:
 source = _FlatDirectorySource(
 candidates_from_page=candidates_from_page,
 path=path,
 )
 else:
 source = _IndexDirectorySource(
 candidates_from_page=candidates_from_page,
 link=Link(url, cache_link_parsing=cache_link_parsing),
 )
 return (url, source)
 elif os.path.isfile(path):
 source = _LocalFileSource(
 candidates_from_page=candidates_from_page,
 link=Link(url, cache_link_parsing=cache_link_parsing),
 )
 return (url, source)
 logger.warning(
 "Location '%s' is ignored: it is neither a file nor a directory.",
 location,
 )
 return (url, None)
 
 |