Commit c4b22537 by Randy Döring Committed by GitHub

fix: performance regression when parsing links from legacy repositories (#6442)

Resolves: #6436

Measurements of `poetry lock` with warm cache with example
pyproject.toml from #6436:

|test case|time in s|peak memory usage in MB|
|---|---|---|
|legacy repository (before)|422|113|
|legacy repository (after)|3|118|
|pypi repository|1|92|

`backports.cached-property` is used in order to support
cached_property on Python 3.7. 

Co-authored-by: Jarrod Moore <jmo@jmo.name>
Co-authored-by: Bjorn Neergaard <bjorn@neersighted.com>
parent 692bdaa9
...@@ -21,6 +21,14 @@ tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900 ...@@ -21,6 +21,14 @@ tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900
tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
[[package]] [[package]]
name = "backports.cached-property"
version = "1.0.2"
description = "cached_property() - computed once per instance, cached as attribute"
category = "main"
optional = false
python-versions = ">=3.6.0"
[[package]]
name = "CacheControl" name = "CacheControl"
version = "0.12.11" version = "0.12.11"
description = "httplib2 caching for requests" description = "httplib2 caching for requests"
...@@ -931,7 +939,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= ...@@ -931,7 +939,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.7" python-versions = "^3.7"
content-hash = "3810aef128102dea7cd2797cfcee77f1345831ed057aac5a962916a76e29fb6a" content-hash = "7e40cde2399843d53e715f657b703397ce236196663788ada896d7e2a53c869a"
[metadata.files] [metadata.files]
atomicwrites = [ atomicwrites = [
...@@ -941,6 +949,10 @@ attrs = [ ...@@ -941,6 +949,10 @@ attrs = [
{file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
{file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
] ]
"backports.cached-property" = [
{file = "backports.cached-property-1.0.2.tar.gz", hash = "sha256:9306f9eed6ec55fd156ace6bc1094e2c86fae5fb2bf07b6a9c00745c656e75dd"},
{file = "backports.cached_property-1.0.2-py3-none-any.whl", hash = "sha256:baeb28e1cd619a3c9ab8941431fe34e8490861fb998c6c4590693d50171db0cc"},
]
CacheControl = [ CacheControl = [
{file = "CacheControl-0.12.11-py2.py3-none-any.whl", hash = "sha256:2c75d6a8938cb1933c75c50184549ad42728a27e9f6b92fd677c3151aa72555b"}, {file = "CacheControl-0.12.11-py2.py3-none-any.whl", hash = "sha256:2c75d6a8938cb1933c75c50184549ad42728a27e9f6b92fd677c3151aa72555b"},
{file = "CacheControl-0.12.11.tar.gz", hash = "sha256:a5b9fcc986b184db101aa280b42ecdcdfc524892596f606858e0b7a8b4d9e144"}, {file = "CacheControl-0.12.11.tar.gz", hash = "sha256:a5b9fcc986b184db101aa280b42ecdcdfc524892596f606858e0b7a8b4d9e144"},
......
...@@ -46,6 +46,7 @@ python = "^3.7" ...@@ -46,6 +46,7 @@ python = "^3.7"
poetry-core = "^1.1.0" poetry-core = "^1.1.0"
poetry-plugin-export = "^1.0.6" poetry-plugin-export = "^1.0.6"
"backports.cached-property" = { version = "^1.0.2", python = "<3.8" }
cachecontrol = { version = "^0.12.9", extras = ["filecache"] } cachecontrol = { version = "^0.12.9", extras = ["filecache"] }
cachy = "^0.3.0" cachy = "^0.3.0"
cleo = "^1.0.0a5" cleo = "^1.0.0a5"
......
...@@ -3,13 +3,15 @@ from __future__ import annotations ...@@ -3,13 +3,15 @@ from __future__ import annotations
import logging import logging
import re import re
from abc import abstractmethod
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import DefaultDict
from typing import List
from packaging.utils import canonicalize_name from packaging.utils import canonicalize_name
from poetry.core.packages.package import Package from poetry.core.packages.package import Package
from poetry.core.semver.version import Version from poetry.core.semver.version import Version
from poetry.utils._compat import cached_property
from poetry.utils.patterns import sdist_file_re from poetry.utils.patterns import sdist_file_re
from poetry.utils.patterns import wheel_file_re from poetry.utils.patterns import wheel_file_re
...@@ -20,6 +22,8 @@ if TYPE_CHECKING: ...@@ -20,6 +22,8 @@ if TYPE_CHECKING:
from packaging.utils import NormalizedName from packaging.utils import NormalizedName
from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.link import Link
LinkCache = DefaultDict[NormalizedName, DefaultDict[Version, List[Link]]]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -44,16 +48,8 @@ class LinkSource: ...@@ -44,16 +48,8 @@ class LinkSource:
def url(self) -> str: def url(self) -> str:
return self._url return self._url
def versions(self, name: str) -> Iterator[Version]: def versions(self, name: NormalizedName) -> Iterator[Version]:
name = canonicalize_name(name) yield from self._link_cache[name]
seen: set[Version] = set()
for link in self.links:
pkg = self.link_package_data(link)
if pkg and pkg.name == name and pkg.version not in seen:
seen.add(pkg.version)
yield pkg.version
@property @property
def packages(self) -> Iterator[Package]: def packages(self) -> Iterator[Package]:
...@@ -64,9 +60,10 @@ class LinkSource: ...@@ -64,9 +60,10 @@ class LinkSource:
yield pkg yield pkg
@property @property
@abstractmethod
def links(self) -> Iterator[Link]: def links(self) -> Iterator[Link]:
raise NotImplementedError() for links_per_version in self._link_cache.values():
for links in links_per_version.values():
yield from links
@classmethod @classmethod
def link_package_data(cls, link: Link) -> Package | None: def link_package_data(cls, link: Link) -> Package | None:
...@@ -102,11 +99,7 @@ class LinkSource: ...@@ -102,11 +99,7 @@ class LinkSource:
def links_for_version( def links_for_version(
self, name: NormalizedName, version: Version self, name: NormalizedName, version: Version
) -> Iterator[Link]: ) -> Iterator[Link]:
for link in self.links: yield from self._link_cache[name][version]
pkg = self.link_package_data(link)
if pkg and pkg.name == name and pkg.version == version:
yield link
def clean_link(self, url: str) -> str: def clean_link(self, url: str) -> str:
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in """Makes sure a link is fully encoded. That is, if a ' ' shows up in
...@@ -127,3 +120,7 @@ class LinkSource: ...@@ -127,3 +120,7 @@ class LinkSource:
if reasons: if reasons:
return "\n".join(sorted(reasons)) return "\n".join(sorted(reasons))
return True return True
@cached_property
def _link_cache(self) -> LinkCache:
raise NotImplementedError()
...@@ -3,16 +3,19 @@ from __future__ import annotations ...@@ -3,16 +3,19 @@ from __future__ import annotations
import urllib.parse import urllib.parse
import warnings import warnings
from collections import defaultdict
from html import unescape from html import unescape
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.link import Link
from poetry.repositories.link_sources.base import LinkSource from poetry.repositories.link_sources.base import LinkSource
from poetry.utils._compat import cached_property
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Iterator from poetry.repositories.link_sources.base import LinkCache
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
...@@ -25,8 +28,9 @@ class HTMLPage(LinkSource): ...@@ -25,8 +28,9 @@ class HTMLPage(LinkSource):
self._parsed = html5lib.parse(content, namespaceHTMLElements=False) self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
@property @cached_property
def links(self) -> Iterator[Link]: def _link_cache(self) -> LinkCache:
links: LinkCache = defaultdict(lambda: defaultdict(list))
for anchor in self._parsed.findall(".//a"): for anchor in self._parsed.findall(".//a"):
if anchor.get("href"): if anchor.get("href"):
href = anchor.get("href") href = anchor.get("href")
...@@ -44,7 +48,11 @@ class HTMLPage(LinkSource): ...@@ -44,7 +48,11 @@ class HTMLPage(LinkSource):
if link.ext not in self.SUPPORTED_FORMATS: if link.ext not in self.SUPPORTED_FORMATS:
continue continue
yield link pkg = self.link_package_data(link)
if pkg:
links[pkg.name][pkg.version].append(link)
return links
class SimpleRepositoryPage(HTMLPage): class SimpleRepositoryPage(HTMLPage):
......
...@@ -13,6 +13,12 @@ if sys.version_info < (3, 10): ...@@ -13,6 +13,12 @@ if sys.version_info < (3, 10):
else: else:
from importlib import metadata from importlib import metadata
if sys.version_info < (3, 8):
# compatibility for python <3.8
from backports.cached_property import cached_property
else:
from functools import cached_property
WINDOWS = sys.platform == "win32" WINDOWS = sys.platform == "win32"
...@@ -53,4 +59,12 @@ def list_to_shell_command(cmd: list[str]) -> str: ...@@ -53,4 +59,12 @@ def list_to_shell_command(cmd: list[str]) -> str:
) )
__all__ = ["WINDOWS", "decode", "encode", "list_to_shell_command", "metadata", "to_str"] __all__ = [
"WINDOWS",
"cached_property",
"decode",
"encode",
"list_to_shell_command",
"metadata",
"to_str",
]
from __future__ import annotations from __future__ import annotations
from collections import defaultdict
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from unittest.mock import PropertyMock from unittest.mock import PropertyMock
...@@ -24,16 +25,22 @@ def link_source(mocker: MockerFixture) -> LinkSource: ...@@ -24,16 +25,22 @@ def link_source(mocker: MockerFixture) -> LinkSource:
url = "https://example.org" url = "https://example.org"
link_source = LinkSource(url) link_source = LinkSource(url)
mocker.patch( mocker.patch(
f"{LinkSource.__module__}.{LinkSource.__qualname__}.links", f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_cache",
new_callable=PropertyMock, new_callable=PropertyMock,
return_value=iter( return_value=defaultdict(
[ lambda: defaultdict(list),
Link(f"{url}/demo-0.1.0.tar.gz"), {
Link(f"{url}/demo-0.1.0_invalid.tar.gz"), canonicalize_name("demo"): defaultdict(
Link(f"{url}/invalid.tar.gz"), list,
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"), {
Link(f"{url}/demo-0.1.1.tar.gz"), Version.parse("0.1.0"): [
] Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
],
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
},
),
},
), ),
) )
return link_source return link_source
...@@ -63,7 +70,7 @@ def test_link_package_data(filename: str, expected: Package | None) -> None: ...@@ -63,7 +70,7 @@ def test_link_package_data(filename: str, expected: Package | None) -> None:
], ],
) )
def test_versions(name: str, expected: set[Version], link_source: LinkSource) -> None: def test_versions(name: str, expected: set[Version], link_source: LinkSource) -> None:
assert set(link_source.versions(name)) == expected assert set(link_source.versions(canonicalize_name(name))) == expected
def test_packages(link_source: LinkSource) -> None: def test_packages(link_source: LinkSource) -> None:
......
...@@ -2,6 +2,7 @@ from __future__ import annotations ...@@ -2,6 +2,7 @@ from __future__ import annotations
import pytest import pytest
from packaging.utils import canonicalize_name
from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.link import Link
from poetry.core.semver.version import Version from poetry.core.semver.version import Version
...@@ -90,4 +91,4 @@ def test_yanked(yanked_attrs: tuple[str, str], expected: bool | str) -> None: ...@@ -90,4 +91,4 @@ def test_yanked(yanked_attrs: tuple[str, str], expected: bool | str) -> None:
content = DEMO_TEMPLATE.format(anchors) content = DEMO_TEMPLATE.format(anchors)
page = HTMLPage("https://example.org", content) page = HTMLPage("https://example.org", content)
assert page.yanked("demo", Version.parse("0.1")) == expected assert page.yanked(canonicalize_name("demo"), Version.parse("0.1")) == expected
...@@ -102,25 +102,12 @@ def test_page_invalid_version_link() -> None: ...@@ -102,25 +102,12 @@ def test_page_invalid_version_link() -> None:
assert page is not None assert page is not None
links = list(page.links) links = list(page.links)
assert len(links) == 2 assert len(links) == 1
versions = list(page.versions("poetry")) versions = list(page.versions(canonicalize_name("poetry")))
assert len(versions) == 1 assert len(versions) == 1
assert versions[0].to_string() == "0.1.0" assert versions[0].to_string() == "0.1.0"
invalid_link = None
for link in links:
if link.filename.startswith("poetry-21"):
invalid_link = link
break
links_010 = list(page.links_for_version(canonicalize_name("poetry"), versions[0]))
assert invalid_link not in links_010
assert invalid_link
assert not page.link_package_data(invalid_link)
packages = list(page.packages) packages = list(page.packages)
assert len(packages) == 1 assert len(packages) == 1
assert packages[0].name == "poetry" assert packages[0].name == "poetry"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment