Commit 070ea6b4 by Arun Babu Neelicattu Committed by Bjorn Neergaard

improve http request handling for sources

This change refactors HTTP repository source implementations. The
following changes have been made.

- CacheControl cache now lives within Authenticator.
- Authenticator manages unique sessions for individual netloc.
- CacheControl usage now respects disable cache parameter in repos.
- Certificate and authentication logic is now managed solely within
  Authenticator for source repositories taking advantage of recent
  enhancements.

These changes should allow for better handling of cases like those
described in #3041. Additionally, this forms the foundation for
unifying HTTP specific logic within the code base and possibly allowing
for migration of requests etc. if/when required.
parent cdd6e2bd
......@@ -192,8 +192,6 @@ class Factory(BaseFactory):
cls, source: dict[str, str], auth_config: Config, disable_cache: bool = False
) -> LegacyRepository:
from poetry.repositories.legacy_repository import LegacyRepository
from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert
if "url" not in source:
raise RuntimeError("Unsupported source specified")
......@@ -208,8 +206,6 @@ class Factory(BaseFactory):
name,
url,
config=auth_config,
cert=get_cert(auth_config, name),
client_cert=get_client_cert(auth_config, name),
disable_cache=disable_cache,
)
......
......@@ -4,7 +4,6 @@ from abc import ABC
from abc import abstractmethod
from typing import TYPE_CHECKING
from cachecontrol.caches import FileCache
from cachy import CacheManager
from poetry.core.semver.helpers import parse_constraint
......@@ -21,7 +20,7 @@ if TYPE_CHECKING:
class CachedRepository(Repository, ABC):
CACHE_VERSION = parse_constraint("1.0.0")
def __init__(self, name: str, cache_group: str, disable_cache: bool = False):
def __init__(self, name: str, disable_cache: bool = False):
super().__init__(name)
self._disable_cache = disable_cache
self._cache_dir = REPOSITORY_CACHE_DIR / name
......@@ -36,7 +35,6 @@ class CachedRepository(Repository, ABC):
},
}
)
self._cache_control_cache = FileCache(str(self._cache_dir / cache_group))
@abstractmethod
def _get_release_info(self, name: str, version: str) -> dict:
......
from __future__ import annotations
import contextlib
import hashlib
import os
import urllib
import urllib.parse
from abc import ABC
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from urllib.parse import quote
import requests
import requests.auth
from cachecontrol import CacheControl
from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link
from poetry.core.version.markers import parse_marker
......@@ -42,41 +39,18 @@ class HTTPRepository(CachedRepository, ABC):
url: str,
config: Config | None = None,
disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None:
super().__init__(name, "_http", disable_cache)
super().__init__(name, disable_cache)
self._url = url
self._client_cert = client_cert
self._cert = cert
self._authenticator = Authenticator(
config=config or Config(use_environment=True)
)
self._session = CacheControl(
self._authenticator.session, cache=self._cache_control_cache
config=config or Config(use_environment=True),
cache_id=name,
disable_cache=disable_cache,
)
username, password = self._authenticator.get_credentials_for_url(self._url)
if username is not None and password is not None:
self._authenticator.session.auth = requests.auth.HTTPBasicAuth(
username, password
)
if self._cert:
self._authenticator.session.verify = str(self._cert)
if self._client_cert:
self._authenticator.session.cert = str(self._client_cert)
@property
def session(self) -> CacheControl:
return self._session
def __del__(self) -> None:
with contextlib.suppress(AttributeError):
self._session.close()
def session(self) -> Authenticator:
return self._authenticator
@property
def url(self) -> str:
......@@ -84,22 +58,21 @@ class HTTPRepository(CachedRepository, ABC):
@property
def cert(self) -> Path | None:
return self._cert
cert = self._authenticator.get_certs_for_url(self.url).get("verify")
if cert:
return Path(cert)
return None
@property
def client_cert(self) -> Path | None:
return self._client_cert
cert = self._authenticator.get_certs_for_url(self.url).get("cert")
if cert:
return Path(cert)
return None
@property
def authenticated_url(self) -> str:
if not self._session.auth:
return self.url
parsed = urllib.parse.urlparse(self.url)
username = quote(self._session.auth.username, safe="")
password = quote(self._session.auth.password, safe="")
return f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
return self._authenticator.authenticated_url(url=self.url)
def _download(self, url: str, dest: str) -> None:
return download_file(url, dest, session=self.session)
......@@ -286,7 +259,7 @@ class HTTPRepository(CachedRepository, ABC):
def _get_response(self, endpoint: str) -> requests.Response | None:
url = self._url + endpoint
try:
response = self.session.get(url)
response = self.session.get(url, raise_for_status=False)
if response.status_code in (401, 403):
self._log(
f"Authorization error accessing {url}",
......
......@@ -13,8 +13,6 @@ from poetry.utils.helpers import canonicalize_name
if TYPE_CHECKING:
from pathlib import Path
from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link
......@@ -28,15 +26,11 @@ class LegacyRepository(HTTPRepository):
url: str,
config: Config | None = None,
disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None:
if name == "pypi":
raise ValueError("The name [pypi] is reserved for repositories")
super().__init__(
name, url.rstrip("/"), config, disable_cache, cert, client_cert
)
super().__init__(name, url.rstrip("/"), config, disable_cache)
def find_packages(self, dependency: Dependency) -> list[Package]:
packages = []
......
......@@ -232,7 +232,7 @@ class PyPiRepository(HTTPRepository):
except requests.exceptions.TooManyRedirects:
# Cache control redirect loop.
# We try to remove the cache and try again
self._cache_control_cache.delete(self._base_url + endpoint)
self.session.delete_cache(self._base_url + endpoint)
json_response = self.session.get(self._base_url + endpoint)
if json_response.status_code == 404:
......
from __future__ import annotations
import contextlib
import logging
import time
import urllib.parse
......@@ -12,7 +13,11 @@ import requests
import requests.auth
import requests.exceptions
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from poetry.exceptions import PoetryException
from poetry.locations import REPOSITORY_CACHE_DIR
from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert
from poetry.utils.password_manager import PasswordManager
......@@ -26,43 +31,98 @@ if TYPE_CHECKING:
from poetry.config.config import Config
logger = logging.getLogger()
logger = logging.getLogger(__name__)
class Authenticator:
def __init__(self, config: Config, io: IO | None = None) -> None:
def __init__(
self,
config: Config,
io: IO | None = None,
cache_id: str | None = None,
disable_cache: bool = False,
) -> None:
self._config = config
self._io = io
self._session: requests.Session | None = None
self._sessions_for_netloc: dict[str, requests.Session] = {}
self._credentials: dict[str, tuple[str, str]] = {}
self._certs: dict[str, dict[str, Path | None]] = {}
self._password_manager = PasswordManager(self._config)
self._cache_control = (
FileCache(
str(REPOSITORY_CACHE_DIR / (cache_id or "_default_cache") / "_http")
)
if not disable_cache
else None
)
def _log(self, message: str, level: str = "debug") -> None:
if self._io is not None:
self._io.write_line(f"<{level}>{message}</{level}>")
else:
getattr(logger, level, logger.debug)(message)
@property
def cache(self) -> FileCache | None:
return self._cache_control
@property
def session(self) -> requests.Session:
if self._session is None:
self._session = requests.Session()
def is_cached(self) -> bool:
return self._cache_control is not None
def create_session(self) -> requests.Session:
session = requests.Session()
if not self.is_cached:
return session
return CacheControl(sess=session, cache=self._cache_control)
def get_session(self, url: str | None = None) -> requests.Session:
if not url:
return self.create_session()
parsed_url = urllib.parse.urlsplit(url)
netloc = parsed_url.netloc
return self._session
if netloc not in self._sessions_for_netloc:
logger.debug("Creating new session for %s", netloc)
self._sessions_for_netloc[netloc] = self.create_session()
return self._sessions_for_netloc[netloc]
def close(self) -> None:
for session in [self._session, *self._sessions_for_netloc.values()]:
if session is not None:
with contextlib.suppress(AttributeError):
session.close()
def __del__(self) -> None:
if self._session is not None:
self._session.close()
self.close()
def delete_cache(self, url: str) -> None:
if self.is_cached:
self._cache_control.delete(key=url)
def authenticated_url(self, url: str) -> str:
parsed = urllib.parse.urlparse(url)
username, password = self.get_credentials_for_url(url)
if username is not None and password is not None:
username = urllib.parse.quote(username, safe="")
password = urllib.parse.quote(password, safe="")
return (
f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
)
def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
return url
def request(
self, method: str, url: str, raise_for_status: bool = True, **kwargs: Any
) -> requests.Response:
request = requests.Request(method, url)
username, password = self.get_credentials_for_url(url)
if username is not None and password is not None:
request = requests.auth.HTTPBasicAuth(username, password)(request)
session = self.session
session = self.get_session(url=url)
prepared_request = session.prepare_request(request)
proxies = kwargs.get("proxies", {})
......@@ -100,19 +160,26 @@ class Authenticator:
raise e
else:
if resp.status_code not in [502, 503, 504] or is_last_attempt:
if resp.status_code is not None and raise_for_status:
resp.raise_for_status()
return resp
if not is_last_attempt:
attempt += 1
delay = 0.5 * attempt
self._log(f"Retrying HTTP request in {delay} seconds.", level="debug")
logger.debug(f"Retrying HTTP request in {delay} seconds.")
time.sleep(delay)
continue
# this should never really be hit under any sane circumstance
raise PoetryException("Failed HTTP {} request", method.upper())
def get(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("get", url, **kwargs)
def post(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("post", url, **kwargs)
def get_credentials_for_url(self, url: str) -> tuple[str | None, str | None]:
parsed_url = urllib.parse.urlsplit(url)
......
......@@ -20,6 +20,7 @@ if TYPE_CHECKING:
from requests import Session
from poetry.config.config import Config
from poetry.utils.authenticator import Authenticator
_canonicalize_regex = re.compile("[-_]+")
......@@ -94,7 +95,7 @@ def merge_dicts(d1: dict, d2: dict) -> None:
def download_file(
url: str,
dest: str,
session: Session | None = None,
session: Authenticator | Session | None = None,
chunk_size: int = 1024,
) -> None:
import requests
......
......@@ -117,44 +117,22 @@ def test_install_with_non_pypi_default_repository(pool: Pool, installer: PipInst
installer.install(bar)
def test_install_with_cert():
ca_path = "path/to/cert.pem"
pool = Pool()
default = LegacyRepository("default", "https://foo.bar", cert=Path(ca_path))
pool.add_repository(default, default=True)
null_env = NullEnv()
installer = PipInstaller(null_env, NullIO(), pool)
foo = Package(
"foo",
"0.0.0",
source_type="legacy",
source_reference=default.name,
source_url=default.url,
)
installer.install(foo)
assert len(null_env.executed) == 1
cmd = null_env.executed[0]
assert "--cert" in cmd
cert_index = cmd.index("--cert")
# Need to do the str(Path()) bit because Windows paths get modified by Path
assert cmd[cert_index + 1] == str(Path(ca_path))
def test_install_with_client_cert():
@pytest.mark.parametrize(
("key", "option"),
[
("cert", "client-cert"),
("verify", "cert"),
],
)
def test_install_with_certs(mocker: MockerFixture, key: str, option: str):
client_path = "path/to/client.pem"
pool = Pool()
default = LegacyRepository(
"default", "https://foo.bar", client_cert=Path(client_path)
mocker.patch(
"poetry.utils.authenticator.Authenticator.get_certs_for_url",
return_value={key: client_path},
)
default = LegacyRepository("default", "https://foo.bar")
pool = Pool()
pool.add_repository(default, default=True)
null_env = NullEnv()
......@@ -173,8 +151,8 @@ def test_install_with_client_cert():
assert len(null_env.executed) == 1
cmd = null_env.executed[0]
assert "--client-cert" in cmd
cert_index = cmd.index("--client-cert")
assert f"--{option}" in cmd
cert_index = cmd.index(f"--{option}")
# Need to do the str(Path()) bit because Windows paths get modified by Path
assert cmd[cert_index + 1] == str(Path(client_path))
......
......@@ -405,7 +405,7 @@ def test_get_redirected_response_url(
repo = MockHttpRepository({"/foo": 200}, http)
redirect_url = "http://legacy.redirect.bar"
def get_mock(url: str) -> requests.Response:
def get_mock(url: str, raise_for_status: bool = True) -> requests.Response:
response = requests.Response()
response.status_code = 200
response.url = redirect_url + "/foo"
......
......@@ -218,10 +218,11 @@ def test_get_should_invalid_cache_on_too_many_redirects_error(mocker: MockerFixt
delete_cache = mocker.patch("cachecontrol.caches.file_cache.FileCache.delete")
response = Response()
response.status_code = 200
response.encoding = "utf-8"
response.raw = BytesIO(encode('{"foo": "bar"}'))
mocker.patch(
"cachecontrol.adapter.CacheControlAdapter.send",
"poetry.utils.authenticator.Authenticator.get",
side_effect=[TooManyRedirects(), response],
)
repository = PyPiRepository()
......
......@@ -339,10 +339,12 @@ def test_authenticator_uses_certs_from_config_if_not_provided(
)
authenticator = Authenticator(config, NullIO())
session_send = mocker.patch.object(authenticator.session, "send")
url = "https://foo.bar/files/foo-0.1.0.tar.gz"
session = authenticator.get_session(url)
session_send = mocker.patch.object(session, "send")
authenticator.request(
"get",
"https://foo.bar/files/foo-0.1.0.tar.gz",
url,
verify=cert,
cert=client_cert,
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment