Commit 070ea6b4 by Arun Babu Neelicattu Committed by Bjorn Neergaard

improve http request handling for sources

This change refactors HTTP repository source implementations. The
following changes have been made.

- CacheControl cache now lives within Authenticator.
- Authenticator manages unique sessions for individual netloc.
- CacheControl usage now respects disable cache parameter in repos.
- Certificate and authentication logic is now managed solely within
  Authenticator for source repositories taking advantage of recent
  enhancements.

These changes should allow for better handling of cases like those
described in #3041. Additionally, this forms the foundation for
unifying HTTP specific logic within the code base and possibly allowing
for migration of requests etc. if/when required.
parent cdd6e2bd
...@@ -192,8 +192,6 @@ class Factory(BaseFactory): ...@@ -192,8 +192,6 @@ class Factory(BaseFactory):
cls, source: dict[str, str], auth_config: Config, disable_cache: bool = False cls, source: dict[str, str], auth_config: Config, disable_cache: bool = False
) -> LegacyRepository: ) -> LegacyRepository:
from poetry.repositories.legacy_repository import LegacyRepository from poetry.repositories.legacy_repository import LegacyRepository
from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert
if "url" not in source: if "url" not in source:
raise RuntimeError("Unsupported source specified") raise RuntimeError("Unsupported source specified")
...@@ -208,8 +206,6 @@ class Factory(BaseFactory): ...@@ -208,8 +206,6 @@ class Factory(BaseFactory):
name, name,
url, url,
config=auth_config, config=auth_config,
cert=get_cert(auth_config, name),
client_cert=get_client_cert(auth_config, name),
disable_cache=disable_cache, disable_cache=disable_cache,
) )
......
...@@ -4,7 +4,6 @@ from abc import ABC ...@@ -4,7 +4,6 @@ from abc import ABC
from abc import abstractmethod from abc import abstractmethod
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from cachecontrol.caches import FileCache
from cachy import CacheManager from cachy import CacheManager
from poetry.core.semver.helpers import parse_constraint from poetry.core.semver.helpers import parse_constraint
...@@ -21,7 +20,7 @@ if TYPE_CHECKING: ...@@ -21,7 +20,7 @@ if TYPE_CHECKING:
class CachedRepository(Repository, ABC): class CachedRepository(Repository, ABC):
CACHE_VERSION = parse_constraint("1.0.0") CACHE_VERSION = parse_constraint("1.0.0")
def __init__(self, name: str, cache_group: str, disable_cache: bool = False): def __init__(self, name: str, disable_cache: bool = False):
super().__init__(name) super().__init__(name)
self._disable_cache = disable_cache self._disable_cache = disable_cache
self._cache_dir = REPOSITORY_CACHE_DIR / name self._cache_dir = REPOSITORY_CACHE_DIR / name
...@@ -36,7 +35,6 @@ class CachedRepository(Repository, ABC): ...@@ -36,7 +35,6 @@ class CachedRepository(Repository, ABC):
}, },
} }
) )
self._cache_control_cache = FileCache(str(self._cache_dir / cache_group))
@abstractmethod @abstractmethod
def _get_release_info(self, name: str, version: str) -> dict: def _get_release_info(self, name: str, version: str) -> dict:
......
from __future__ import annotations from __future__ import annotations
import contextlib
import hashlib import hashlib
import os import os
import urllib import urllib
import urllib.parse
from abc import ABC from abc import ABC
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Any from typing import Any
from urllib.parse import quote
import requests import requests
import requests.auth
from cachecontrol import CacheControl
from poetry.core.packages.dependency import Dependency from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.link import Link
from poetry.core.version.markers import parse_marker from poetry.core.version.markers import parse_marker
...@@ -42,41 +39,18 @@ class HTTPRepository(CachedRepository, ABC): ...@@ -42,41 +39,18 @@ class HTTPRepository(CachedRepository, ABC):
url: str, url: str,
config: Config | None = None, config: Config | None = None,
disable_cache: bool = False, disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None: ) -> None:
super().__init__(name, "_http", disable_cache) super().__init__(name, disable_cache)
self._url = url self._url = url
self._client_cert = client_cert
self._cert = cert
self._authenticator = Authenticator( self._authenticator = Authenticator(
config=config or Config(use_environment=True) config=config or Config(use_environment=True),
) cache_id=name,
disable_cache=disable_cache,
self._session = CacheControl(
self._authenticator.session, cache=self._cache_control_cache
) )
username, password = self._authenticator.get_credentials_for_url(self._url)
if username is not None and password is not None:
self._authenticator.session.auth = requests.auth.HTTPBasicAuth(
username, password
)
if self._cert:
self._authenticator.session.verify = str(self._cert)
if self._client_cert:
self._authenticator.session.cert = str(self._client_cert)
@property @property
def session(self) -> CacheControl: def session(self) -> Authenticator:
return self._session return self._authenticator
def __del__(self) -> None:
with contextlib.suppress(AttributeError):
self._session.close()
@property @property
def url(self) -> str: def url(self) -> str:
...@@ -84,22 +58,21 @@ class HTTPRepository(CachedRepository, ABC): ...@@ -84,22 +58,21 @@ class HTTPRepository(CachedRepository, ABC):
@property @property
def cert(self) -> Path | None: def cert(self) -> Path | None:
return self._cert cert = self._authenticator.get_certs_for_url(self.url).get("verify")
if cert:
return Path(cert)
return None
@property @property
def client_cert(self) -> Path | None: def client_cert(self) -> Path | None:
return self._client_cert cert = self._authenticator.get_certs_for_url(self.url).get("cert")
if cert:
return Path(cert)
return None
@property @property
def authenticated_url(self) -> str: def authenticated_url(self) -> str:
if not self._session.auth: return self._authenticator.authenticated_url(url=self.url)
return self.url
parsed = urllib.parse.urlparse(self.url)
username = quote(self._session.auth.username, safe="")
password = quote(self._session.auth.password, safe="")
return f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
def _download(self, url: str, dest: str) -> None: def _download(self, url: str, dest: str) -> None:
return download_file(url, dest, session=self.session) return download_file(url, dest, session=self.session)
...@@ -286,7 +259,7 @@ class HTTPRepository(CachedRepository, ABC): ...@@ -286,7 +259,7 @@ class HTTPRepository(CachedRepository, ABC):
def _get_response(self, endpoint: str) -> requests.Response | None: def _get_response(self, endpoint: str) -> requests.Response | None:
url = self._url + endpoint url = self._url + endpoint
try: try:
response = self.session.get(url) response = self.session.get(url, raise_for_status=False)
if response.status_code in (401, 403): if response.status_code in (401, 403):
self._log( self._log(
f"Authorization error accessing {url}", f"Authorization error accessing {url}",
......
...@@ -13,8 +13,6 @@ from poetry.utils.helpers import canonicalize_name ...@@ -13,8 +13,6 @@ from poetry.utils.helpers import canonicalize_name
if TYPE_CHECKING: if TYPE_CHECKING:
from pathlib import Path
from poetry.core.packages.dependency import Dependency from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.link import Link
...@@ -28,15 +26,11 @@ class LegacyRepository(HTTPRepository): ...@@ -28,15 +26,11 @@ class LegacyRepository(HTTPRepository):
url: str, url: str,
config: Config | None = None, config: Config | None = None,
disable_cache: bool = False, disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None: ) -> None:
if name == "pypi": if name == "pypi":
raise ValueError("The name [pypi] is reserved for repositories") raise ValueError("The name [pypi] is reserved for repositories")
super().__init__( super().__init__(name, url.rstrip("/"), config, disable_cache)
name, url.rstrip("/"), config, disable_cache, cert, client_cert
)
def find_packages(self, dependency: Dependency) -> list[Package]: def find_packages(self, dependency: Dependency) -> list[Package]:
packages = [] packages = []
......
...@@ -232,7 +232,7 @@ class PyPiRepository(HTTPRepository): ...@@ -232,7 +232,7 @@ class PyPiRepository(HTTPRepository):
except requests.exceptions.TooManyRedirects: except requests.exceptions.TooManyRedirects:
# Cache control redirect loop. # Cache control redirect loop.
# We try to remove the cache and try again # We try to remove the cache and try again
self._cache_control_cache.delete(self._base_url + endpoint) self.session.delete_cache(self._base_url + endpoint)
json_response = self.session.get(self._base_url + endpoint) json_response = self.session.get(self._base_url + endpoint)
if json_response.status_code == 404: if json_response.status_code == 404:
......
from __future__ import annotations from __future__ import annotations
import contextlib
import logging import logging
import time import time
import urllib.parse import urllib.parse
...@@ -12,7 +13,11 @@ import requests ...@@ -12,7 +13,11 @@ import requests
import requests.auth import requests.auth
import requests.exceptions import requests.exceptions
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from poetry.exceptions import PoetryException from poetry.exceptions import PoetryException
from poetry.locations import REPOSITORY_CACHE_DIR
from poetry.utils.helpers import get_cert from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert from poetry.utils.helpers import get_client_cert
from poetry.utils.password_manager import PasswordManager from poetry.utils.password_manager import PasswordManager
...@@ -26,43 +31,98 @@ if TYPE_CHECKING: ...@@ -26,43 +31,98 @@ if TYPE_CHECKING:
from poetry.config.config import Config from poetry.config.config import Config
logger = logging.getLogger() logger = logging.getLogger(__name__)
class Authenticator: class Authenticator:
def __init__(self, config: Config, io: IO | None = None) -> None: def __init__(
self,
config: Config,
io: IO | None = None,
cache_id: str | None = None,
disable_cache: bool = False,
) -> None:
self._config = config self._config = config
self._io = io self._io = io
self._session: requests.Session | None = None self._session: requests.Session | None = None
self._sessions_for_netloc: dict[str, requests.Session] = {}
self._credentials: dict[str, tuple[str, str]] = {} self._credentials: dict[str, tuple[str, str]] = {}
self._certs: dict[str, dict[str, Path | None]] = {} self._certs: dict[str, dict[str, Path | None]] = {}
self._password_manager = PasswordManager(self._config) self._password_manager = PasswordManager(self._config)
self._cache_control = (
FileCache(
str(REPOSITORY_CACHE_DIR / (cache_id or "_default_cache") / "_http")
)
if not disable_cache
else None
)
def _log(self, message: str, level: str = "debug") -> None: @property
if self._io is not None: def cache(self) -> FileCache | None:
self._io.write_line(f"<{level}>{message}</{level}>") return self._cache_control
else:
getattr(logger, level, logger.debug)(message)
@property @property
def session(self) -> requests.Session: def is_cached(self) -> bool:
if self._session is None: return self._cache_control is not None
self._session = requests.Session()
def create_session(self) -> requests.Session:
session = requests.Session()
if not self.is_cached:
return session
return CacheControl(sess=session, cache=self._cache_control)
def get_session(self, url: str | None = None) -> requests.Session:
if not url:
return self.create_session()
parsed_url = urllib.parse.urlsplit(url)
netloc = parsed_url.netloc
return self._session if netloc not in self._sessions_for_netloc:
logger.debug("Creating new session for %s", netloc)
self._sessions_for_netloc[netloc] = self.create_session()
return self._sessions_for_netloc[netloc]
def close(self) -> None:
for session in [self._session, *self._sessions_for_netloc.values()]:
if session is not None:
with contextlib.suppress(AttributeError):
session.close()
def __del__(self) -> None: def __del__(self) -> None:
if self._session is not None: self.close()
self._session.close()
def delete_cache(self, url: str) -> None:
if self.is_cached:
self._cache_control.delete(key=url)
def request(self, method: str, url: str, **kwargs: Any) -> requests.Response: def authenticated_url(self, url: str) -> str:
parsed = urllib.parse.urlparse(url)
username, password = self.get_credentials_for_url(url)
if username is not None and password is not None:
username = urllib.parse.quote(username, safe="")
password = urllib.parse.quote(password, safe="")
return (
f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
)
return url
def request(
self, method: str, url: str, raise_for_status: bool = True, **kwargs: Any
) -> requests.Response:
request = requests.Request(method, url) request = requests.Request(method, url)
username, password = self.get_credentials_for_url(url) username, password = self.get_credentials_for_url(url)
if username is not None and password is not None: if username is not None and password is not None:
request = requests.auth.HTTPBasicAuth(username, password)(request) request = requests.auth.HTTPBasicAuth(username, password)(request)
session = self.session session = self.get_session(url=url)
prepared_request = session.prepare_request(request) prepared_request = session.prepare_request(request)
proxies = kwargs.get("proxies", {}) proxies = kwargs.get("proxies", {})
...@@ -100,19 +160,26 @@ class Authenticator: ...@@ -100,19 +160,26 @@ class Authenticator:
raise e raise e
else: else:
if resp.status_code not in [502, 503, 504] or is_last_attempt: if resp.status_code not in [502, 503, 504] or is_last_attempt:
resp.raise_for_status() if resp.status_code is not None and raise_for_status:
resp.raise_for_status()
return resp return resp
if not is_last_attempt: if not is_last_attempt:
attempt += 1 attempt += 1
delay = 0.5 * attempt delay = 0.5 * attempt
self._log(f"Retrying HTTP request in {delay} seconds.", level="debug") logger.debug(f"Retrying HTTP request in {delay} seconds.")
time.sleep(delay) time.sleep(delay)
continue continue
# this should never really be hit under any sane circumstance # this should never really be hit under any sane circumstance
raise PoetryException("Failed HTTP {} request", method.upper()) raise PoetryException("Failed HTTP {} request", method.upper())
def get(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("get", url, **kwargs)
def post(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("post", url, **kwargs)
def get_credentials_for_url(self, url: str) -> tuple[str | None, str | None]: def get_credentials_for_url(self, url: str) -> tuple[str | None, str | None]:
parsed_url = urllib.parse.urlsplit(url) parsed_url = urllib.parse.urlsplit(url)
......
...@@ -20,6 +20,7 @@ if TYPE_CHECKING: ...@@ -20,6 +20,7 @@ if TYPE_CHECKING:
from requests import Session from requests import Session
from poetry.config.config import Config from poetry.config.config import Config
from poetry.utils.authenticator import Authenticator
_canonicalize_regex = re.compile("[-_]+") _canonicalize_regex = re.compile("[-_]+")
...@@ -94,7 +95,7 @@ def merge_dicts(d1: dict, d2: dict) -> None: ...@@ -94,7 +95,7 @@ def merge_dicts(d1: dict, d2: dict) -> None:
def download_file( def download_file(
url: str, url: str,
dest: str, dest: str,
session: Session | None = None, session: Authenticator | Session | None = None,
chunk_size: int = 1024, chunk_size: int = 1024,
) -> None: ) -> None:
import requests import requests
......
...@@ -117,44 +117,22 @@ def test_install_with_non_pypi_default_repository(pool: Pool, installer: PipInst ...@@ -117,44 +117,22 @@ def test_install_with_non_pypi_default_repository(pool: Pool, installer: PipInst
installer.install(bar) installer.install(bar)
def test_install_with_cert(): @pytest.mark.parametrize(
ca_path = "path/to/cert.pem" ("key", "option"),
pool = Pool() [
("cert", "client-cert"),
default = LegacyRepository("default", "https://foo.bar", cert=Path(ca_path)) ("verify", "cert"),
],
pool.add_repository(default, default=True) )
def test_install_with_certs(mocker: MockerFixture, key: str, option: str):
null_env = NullEnv()
installer = PipInstaller(null_env, NullIO(), pool)
foo = Package(
"foo",
"0.0.0",
source_type="legacy",
source_reference=default.name,
source_url=default.url,
)
installer.install(foo)
assert len(null_env.executed) == 1
cmd = null_env.executed[0]
assert "--cert" in cmd
cert_index = cmd.index("--cert")
# Need to do the str(Path()) bit because Windows paths get modified by Path
assert cmd[cert_index + 1] == str(Path(ca_path))
def test_install_with_client_cert():
client_path = "path/to/client.pem" client_path = "path/to/client.pem"
pool = Pool() mocker.patch(
"poetry.utils.authenticator.Authenticator.get_certs_for_url",
default = LegacyRepository( return_value={key: client_path},
"default", "https://foo.bar", client_cert=Path(client_path)
) )
default = LegacyRepository("default", "https://foo.bar")
pool = Pool()
pool.add_repository(default, default=True) pool.add_repository(default, default=True)
null_env = NullEnv() null_env = NullEnv()
...@@ -173,8 +151,8 @@ def test_install_with_client_cert(): ...@@ -173,8 +151,8 @@ def test_install_with_client_cert():
assert len(null_env.executed) == 1 assert len(null_env.executed) == 1
cmd = null_env.executed[0] cmd = null_env.executed[0]
assert "--client-cert" in cmd assert f"--{option}" in cmd
cert_index = cmd.index("--client-cert") cert_index = cmd.index(f"--{option}")
# Need to do the str(Path()) bit because Windows paths get modified by Path # Need to do the str(Path()) bit because Windows paths get modified by Path
assert cmd[cert_index + 1] == str(Path(client_path)) assert cmd[cert_index + 1] == str(Path(client_path))
......
...@@ -405,7 +405,7 @@ def test_get_redirected_response_url( ...@@ -405,7 +405,7 @@ def test_get_redirected_response_url(
repo = MockHttpRepository({"/foo": 200}, http) repo = MockHttpRepository({"/foo": 200}, http)
redirect_url = "http://legacy.redirect.bar" redirect_url = "http://legacy.redirect.bar"
def get_mock(url: str) -> requests.Response: def get_mock(url: str, raise_for_status: bool = True) -> requests.Response:
response = requests.Response() response = requests.Response()
response.status_code = 200 response.status_code = 200
response.url = redirect_url + "/foo" response.url = redirect_url + "/foo"
......
...@@ -218,10 +218,11 @@ def test_get_should_invalid_cache_on_too_many_redirects_error(mocker: MockerFixt ...@@ -218,10 +218,11 @@ def test_get_should_invalid_cache_on_too_many_redirects_error(mocker: MockerFixt
delete_cache = mocker.patch("cachecontrol.caches.file_cache.FileCache.delete") delete_cache = mocker.patch("cachecontrol.caches.file_cache.FileCache.delete")
response = Response() response = Response()
response.status_code = 200
response.encoding = "utf-8" response.encoding = "utf-8"
response.raw = BytesIO(encode('{"foo": "bar"}')) response.raw = BytesIO(encode('{"foo": "bar"}'))
mocker.patch( mocker.patch(
"cachecontrol.adapter.CacheControlAdapter.send", "poetry.utils.authenticator.Authenticator.get",
side_effect=[TooManyRedirects(), response], side_effect=[TooManyRedirects(), response],
) )
repository = PyPiRepository() repository = PyPiRepository()
......
...@@ -339,10 +339,12 @@ def test_authenticator_uses_certs_from_config_if_not_provided( ...@@ -339,10 +339,12 @@ def test_authenticator_uses_certs_from_config_if_not_provided(
) )
authenticator = Authenticator(config, NullIO()) authenticator = Authenticator(config, NullIO())
session_send = mocker.patch.object(authenticator.session, "send") url = "https://foo.bar/files/foo-0.1.0.tar.gz"
session = authenticator.get_session(url)
session_send = mocker.patch.object(session, "send")
authenticator.request( authenticator.request(
"get", "get",
"https://foo.bar/files/foo-0.1.0.tar.gz", url,
verify=cert, verify=cert,
cert=client_cert, cert=client_cert,
) )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment