Commit 0dc3d54f by David Hotham Committed by GitHub

feat: cache simple repository pages (#6932)

At #5868, I
[suggested](https://github.com/python-poetry/poetry/pull/5868#discussion_r931507885)
that some of the caching that was being reworked could be removed
altogether, because cachecontrol was already taking care of it just
fine.

But now I find myself using an Azure artifacts repository, and it is
returning headers that insist that the client does not do any caching:
```
cache-control: no-cache
pragma: no-cache           
x-cache: CONFIG_NOCACHE 
```
(pypi, by contrast, sets max-age to 10 minutes here).

So I was wrong! And now I am seeing a big performance hit in some
projects where the solve involves overrides and backtracking: and
therefore hitting the legacy simple API repeatedly.

However, we don't need all the mechanism of cachy and its like for this,
a well-placed `@lru_cache()` seems more than sufficient.

This makes me wonder whether it wouldn't be better to do similar for
pypi anyway, and rip out cachecontrol altogether. But let's keep it
simple for now: this is an easy fix to a performance regression.
parent 4896c4bd
from __future__ import annotations from __future__ import annotations
import functools
import hashlib import hashlib
import os import os
import urllib import urllib
...@@ -50,6 +51,7 @@ class HTTPRepository(CachedRepository): ...@@ -50,6 +51,7 @@ class HTTPRepository(CachedRepository):
disable_cache=disable_cache, disable_cache=disable_cache,
) )
self._authenticator.add_repository(name, url) self._authenticator.add_repository(name, url)
self.get_page = functools.lru_cache(maxsize=None)(self._get_page)
@property @property
def session(self) -> Authenticator: def session(self) -> Authenticator:
......
...@@ -72,7 +72,7 @@ class LegacyRepository(HTTPRepository): ...@@ -72,7 +72,7 @@ class LegacyRepository(HTTPRepository):
return package return package
def find_links_for_package(self, package: Package) -> list[Link]: def find_links_for_package(self, package: Package) -> list[Link]:
page = self._get_page(f"/{package.name}/") page = self.get_page(f"/{package.name}/")
if page is None: if page is None:
return [] return []
...@@ -90,7 +90,7 @@ class LegacyRepository(HTTPRepository): ...@@ -90,7 +90,7 @@ class LegacyRepository(HTTPRepository):
if not constraint.is_any(): if not constraint.is_any():
key = f"{key}:{constraint!s}" key = f"{key}:{constraint!s}"
page = self._get_page(f"/{name}/") page = self.get_page(f"/{name}/")
if page is None: if page is None:
self._log( self._log(
f"No packages found for {name}", f"No packages found for {name}",
...@@ -119,7 +119,7 @@ class LegacyRepository(HTTPRepository): ...@@ -119,7 +119,7 @@ class LegacyRepository(HTTPRepository):
def _get_release_info( def _get_release_info(
self, name: NormalizedName, version: Version self, name: NormalizedName, version: Version
) -> dict[str, Any]: ) -> dict[str, Any]:
page = self._get_page(f"/{name}/") page = self.get_page(f"/{name}/")
if page is None: if page is None:
raise PackageNotFound(f'No package named "{name}"') raise PackageNotFound(f'No package named "{name}"')
......
...@@ -73,7 +73,7 @@ def test_packages_property_returns_empty_list() -> None: ...@@ -73,7 +73,7 @@ def test_packages_property_returns_empty_list() -> None:
def test_page_relative_links_path_are_correct() -> None: def test_page_relative_links_path_are_correct() -> None:
repo = MockRepository() repo = MockRepository()
page = repo._get_page("/relative") page = repo.get_page("/relative")
assert page is not None assert page is not None
for link in page.links: for link in page.links:
...@@ -84,7 +84,7 @@ def test_page_relative_links_path_are_correct() -> None: ...@@ -84,7 +84,7 @@ def test_page_relative_links_path_are_correct() -> None:
def test_page_absolute_links_path_are_correct() -> None: def test_page_absolute_links_path_are_correct() -> None:
repo = MockRepository() repo = MockRepository()
page = repo._get_page("/absolute") page = repo.get_page("/absolute")
assert page is not None assert page is not None
for link in page.links: for link in page.links:
...@@ -95,7 +95,7 @@ def test_page_absolute_links_path_are_correct() -> None: ...@@ -95,7 +95,7 @@ def test_page_absolute_links_path_are_correct() -> None:
def test_page_clean_link() -> None: def test_page_clean_link() -> None:
repo = MockRepository() repo = MockRepository()
page = repo._get_page("/relative") page = repo.get_page("/relative")
assert page is not None assert page is not None
cleaned = page.clean_link('https://legacy.foo.bar/test /the"/cleaning\0') cleaned = page.clean_link('https://legacy.foo.bar/test /the"/cleaning\0')
...@@ -105,7 +105,7 @@ def test_page_clean_link() -> None: ...@@ -105,7 +105,7 @@ def test_page_clean_link() -> None:
def test_page_invalid_version_link() -> None: def test_page_invalid_version_link() -> None:
repo = MockRepository() repo = MockRepository()
page = repo._get_page("/invalid-version") page = repo.get_page("/invalid-version")
assert page is not None assert page is not None
links = list(page.links) links = list(page.links)
...@@ -123,7 +123,7 @@ def test_page_invalid_version_link() -> None: ...@@ -123,7 +123,7 @@ def test_page_invalid_version_link() -> None:
def test_sdist_format_support() -> None: def test_sdist_format_support() -> None:
repo = MockRepository() repo = MockRepository()
page = repo._get_page("/relative") page = repo.get_page("/relative")
assert page is not None assert page is not None
bz2_links = list(filter(lambda link: link.ext == ".tar.bz2", page.links)) bz2_links = list(filter(lambda link: link.ext == ".tar.bz2", page.links))
assert len(bz2_links) == 1 assert len(bz2_links) == 1
...@@ -490,7 +490,7 @@ class MockHttpRepository(LegacyRepository): ...@@ -490,7 +490,7 @@ class MockHttpRepository(LegacyRepository):
def test_get_200_returns_page(http: type[httpretty.httpretty]) -> None: def test_get_200_returns_page(http: type[httpretty.httpretty]) -> None:
repo = MockHttpRepository({"/foo": 200}, http) repo = MockHttpRepository({"/foo": 200}, http)
assert repo._get_page("/foo") assert repo.get_page("/foo")
@pytest.mark.parametrize("status_code", [401, 403, 404]) @pytest.mark.parametrize("status_code", [401, 403, 404])
...@@ -499,14 +499,14 @@ def test_get_40x_and_returns_none( ...@@ -499,14 +499,14 @@ def test_get_40x_and_returns_none(
) -> None: ) -> None:
repo = MockHttpRepository({"/foo": status_code}, http) repo = MockHttpRepository({"/foo": status_code}, http)
assert repo._get_page("/foo") is None assert repo.get_page("/foo") is None
def test_get_5xx_raises(http: type[httpretty.httpretty]) -> None: def test_get_5xx_raises(http: type[httpretty.httpretty]) -> None:
repo = MockHttpRepository({"/foo": 500}, http) repo = MockHttpRepository({"/foo": 500}, http)
with pytest.raises(RepositoryError): with pytest.raises(RepositoryError):
repo._get_page("/foo") repo.get_page("/foo")
def test_get_redirected_response_url( def test_get_redirected_response_url(
...@@ -524,7 +524,7 @@ def test_get_redirected_response_url( ...@@ -524,7 +524,7 @@ def test_get_redirected_response_url(
return response return response
monkeypatch.setattr(repo.session, "get", get_mock) monkeypatch.setattr(repo.session, "get", get_mock)
page = repo._get_page("/foo") page = repo.get_page("/foo")
assert page is not None assert page is not None
assert page._url == "http://legacy.redirect.bar/foo/" assert page._url == "http://legacy.redirect.bar/foo/"
...@@ -560,7 +560,7 @@ def test_authenticator_with_implicit_repository_configuration( ...@@ -560,7 +560,7 @@ def test_authenticator_with_implicit_repository_configuration(
) )
repo = LegacyRepository(name="source", url="https://foo.bar/simple", config=config) repo = LegacyRepository(name="source", url="https://foo.bar/simple", config=config)
repo._get_page("/foo") repo.get_page("/foo")
request = http.last_request() request = http.last_request()
......
...@@ -35,7 +35,7 @@ class MockSinglePageRepository(SinglePageRepository): ...@@ -35,7 +35,7 @@ class MockSinglePageRepository(SinglePageRepository):
def test_single_page_repository_get_page(): def test_single_page_repository_get_page():
repo = MockSinglePageRepository("jax_releases") repo = MockSinglePageRepository("jax_releases")
page = repo._get_page("/ignored") page = repo.get_page("/ignored")
links = list(page.links) links = list(page.links)
assert len(links) == 21 assert len(links) == 21
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment