Commit fd706c8c by Maxim Koltsov Committed by Randy Döring

Cache git dependencies as wheels (#7473)

Currently, poetry install will clone, build and install every git
dependency when it's not present in the environment. This is OK for
developer's machines, but not OK for CI - there environment is always
fresh, and installing git dependencies takes significant time on each CI
run, especially if the dependency has C extensions that need to be
built.

This commit builds a wheel for every git dependency that has precise
reference hash in lock file and is not required to be in editable mode,
stores that wheel in a cache dir and will install from it instead of
cloning the repository again.
parent dfb49048
......@@ -94,8 +94,8 @@ class Chef:
return archive
if archive.is_dir():
tmp_dir = tempfile.mkdtemp(prefix="poetry-chef-")
return self._prepare(archive, Path(tmp_dir), editable=editable)
destination = output_dir or Path(tempfile.mkdtemp(prefix="poetry-chef-"))
return self._prepare(archive, destination=destination, editable=editable)
return self._prepare_sdist(archive, destination=output_dir)
......
......@@ -529,7 +529,7 @@ class Executor:
cleanup_archive: bool = False
if package.source_type == "git":
archive = self._prepare_git_archive(operation)
cleanup_archive = True
cleanup_archive = operation.package.develop
elif package.source_type == "file":
archive = self._prepare_archive(operation)
elif package.source_type == "directory":
......@@ -584,7 +584,9 @@ class Executor:
raise
def _prepare_archive(self, operation: Install | Update) -> Path:
def _prepare_archive(
self, operation: Install | Update, *, output_dir: Path | None = None
) -> Path:
package = operation.package
operation_message = self.get_operation_message(operation)
......@@ -603,12 +605,28 @@ class Executor:
self._populate_hashes_dict(archive, package)
return self._chef.prepare(archive, editable=package.develop)
return self._chef.prepare(
archive, editable=package.develop, output_dir=output_dir
)
def _prepare_git_archive(self, operation: Install | Update) -> Path:
from poetry.vcs.git import Git
package = operation.package
assert package.source_url is not None
if package.source_resolved_reference and not package.develop:
# Only cache git archives when we know precise reference hash,
# otherwise we might get stale archives
cached_archive = self._artifact_cache.get_cached_archive_for_git(
package.source_url,
package.source_resolved_reference,
package.source_subdirectory,
env=self._env,
)
if cached_archive is not None:
return cached_archive
operation_message = self.get_operation_message(operation)
message = (
......@@ -616,7 +634,6 @@ class Executor:
)
self._write(operation, message)
assert package.source_url is not None
source = Git.clone(
url=package.source_url,
source_root=self._env.path / "src",
......@@ -627,10 +644,22 @@ class Executor:
original_url = package.source_url
package._source_url = str(source.path)
archive = self._prepare_archive(operation)
output_dir = None
if package.source_resolved_reference and not package.develop:
output_dir = self._artifact_cache.get_cache_directory_for_git(
original_url,
package.source_resolved_reference,
package.source_subdirectory,
)
archive = self._prepare_archive(operation, output_dir=output_dir)
package._source_url = original_url
if output_dir is not None and output_dir.is_dir():
# Mark directories with cached git packages, to distinguish from
# "normal" cache
(output_dir / ".created_from_git_dependency").touch()
return archive
def _install_directory_without_wheel_installer(
......
......@@ -231,6 +231,9 @@ class ArtifactCache:
if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment
return self._get_directory_from_hash(key_parts)
def _get_directory_from_hash(self, key_parts: object) -> Path:
key = hashlib.sha256(
json.dumps(
key_parts, sort_keys=True, separators=(",", ":"), ensure_ascii=True
......@@ -238,9 +241,17 @@ class ArtifactCache:
).hexdigest()
split_key = [key[:2], key[2:4], key[4:6], key[6:]]
return self._cache_dir.joinpath(*split_key)
def get_cache_directory_for_git(
self, url: str, ref: str, subdirectory: str | None
) -> Path:
key_parts = {"url": url, "ref": ref}
if subdirectory:
key_parts["subdirectory"] = subdirectory
return self._get_directory_from_hash(key_parts)
def get_cached_archive_for_link(
self,
link: Link,
......@@ -248,18 +259,42 @@ class ArtifactCache:
strict: bool,
env: Env | None = None,
) -> Path | None:
cache_dir = self.get_cache_directory_for_link(link)
return self._get_cached_archive(
cache_dir, strict=strict, filename=link.filename, env=env
)
def get_cached_archive_for_git(
self, url: str, reference: str, subdirectory: str | None, env: Env
) -> Path | None:
cache_dir = self.get_cache_directory_for_git(url, reference, subdirectory)
return self._get_cached_archive(cache_dir, strict=False, env=env)
def _get_cached_archive(
self,
cache_dir: Path,
*,
strict: bool,
filename: str | None = None,
env: Env | None = None,
) -> Path | None:
assert strict or env is not None
# implication "strict -> filename should not be None"
assert not strict or filename is not None
archives = self._get_cached_archives_for_link(link)
archives = self._get_cached_archives(cache_dir)
if not archives:
return None
candidates: list[tuple[float | None, Path]] = []
for archive in archives:
if strict:
# in strict mode return the original cached archive instead of the
# prioritized archive type.
if link.filename == archive.name:
if filename == archive.name:
return archive
continue
......@@ -286,9 +321,7 @@ class ArtifactCache:
return min(candidates)[1]
def _get_cached_archives_for_link(self, link: Link) -> list[Path]:
cache_dir = self.get_cache_directory_for_link(link)
def _get_cached_archives(self, cache_dir: Path) -> list[Path]:
archive_types = ["whl", "tar.gz", "tar.bz2", "bz2", "zip"]
paths: list[Path] = []
for archive_type in archive_types:
......
......@@ -34,6 +34,7 @@ from poetry.installation.wheel_installer import WheelInstaller
from poetry.repositories.repository_pool import RepositoryPool
from poetry.utils.cache import ArtifactCache
from poetry.utils.env import MockEnv
from poetry.vcs.git.backend import Git
from tests.repositories.test_pypi_repository import MockRepository
......@@ -81,7 +82,10 @@ class Chef(BaseChef):
wheel = self._directory_wheels.pop(0)
self._directory_wheels.append(wheel)
return wheel
destination.mkdir(parents=True, exist_ok=True)
dst_wheel = destination / wheel.name
shutil.copyfile(wheel, dst_wheel)
return dst_wheel
return super()._prepare(directory, destination, editable=editable)
......@@ -276,8 +280,8 @@ Package operations: 4 installs, 1 update, 1 removal
assert prepare_spy.call_count == 2
assert prepare_spy.call_args_list == [
mocker.call(chef, mocker.ANY, mocker.ANY, editable=False),
mocker.call(chef, mocker.ANY, mocker.ANY, editable=True),
mocker.call(chef, mocker.ANY, destination=mocker.ANY, editable=False),
mocker.call(chef, mocker.ANY, destination=mocker.ANY, editable=True),
]
......@@ -675,6 +679,7 @@ def test_executor_should_not_write_pep610_url_references_for_cached_package(
executor = Executor(tmp_venv, pool, config, io)
executor.execute([Install(package)])
verify_installed_distribution(tmp_venv, package)
assert link_cached.exists(), "cached file should not be deleted"
def test_executor_should_write_pep610_url_references_for_wheel_files(
......@@ -707,6 +712,7 @@ def test_executor_should_write_pep610_url_references_for_wheel_files(
"url": url.as_uri(),
}
verify_installed_distribution(tmp_venv, package, expected_url_reference)
assert url.exists(), "source file should not be deleted"
def test_executor_should_write_pep610_url_references_for_non_wheel_files(
......@@ -739,6 +745,7 @@ def test_executor_should_write_pep610_url_references_for_non_wheel_files(
"url": url.as_uri(),
}
verify_installed_distribution(tmp_venv, package, expected_url_reference)
assert url.exists(), "source file should not be deleted"
def test_executor_should_write_pep610_url_references_for_directories(
......@@ -749,6 +756,7 @@ def test_executor_should_write_pep610_url_references_for_directories(
io: BufferedIO,
wheel: Path,
fixture_dir: FixtureDirGetter,
mocker: MockerFixture,
):
url = (fixture_dir("git") / "github.com" / "demo" / "demo").resolve()
package = Package(
......@@ -757,6 +765,7 @@ def test_executor_should_write_pep610_url_references_for_directories(
chef = Chef(artifact_cache, tmp_venv, Factory.create_pool(config))
chef.set_directory_wheel(wheel)
prepare_spy = mocker.spy(chef, "prepare")
executor = Executor(tmp_venv, pool, config, io)
executor._chef = chef
......@@ -764,6 +773,7 @@ def test_executor_should_write_pep610_url_references_for_directories(
verify_installed_distribution(
tmp_venv, package, {"dir_info": {}, "url": url.as_uri()}
)
assert not prepare_spy.spy_return.exists(), "archive not cleaned up"
def test_executor_should_write_pep610_url_references_for_editable_directories(
......@@ -774,6 +784,7 @@ def test_executor_should_write_pep610_url_references_for_editable_directories(
io: BufferedIO,
wheel: Path,
fixture_dir: FixtureDirGetter,
mocker: MockerFixture,
):
url = (fixture_dir("git") / "github.com" / "demo" / "demo").resolve()
package = Package(
......@@ -786,6 +797,7 @@ def test_executor_should_write_pep610_url_references_for_editable_directories(
chef = Chef(artifact_cache, tmp_venv, Factory.create_pool(config))
chef.set_directory_wheel(wheel)
prepare_spy = mocker.spy(chef, "prepare")
executor = Executor(tmp_venv, pool, config, io)
executor._chef = chef
......@@ -793,6 +805,7 @@ def test_executor_should_write_pep610_url_references_for_editable_directories(
verify_installed_distribution(
tmp_venv, package, {"dir_info": {"editable": True}, "url": url.as_uri()}
)
assert not prepare_spy.spy_return.exists(), "archive not cleaned up"
@pytest.mark.parametrize("is_artifact_cached", [False, True])
......@@ -848,6 +861,7 @@ def test_executor_should_write_pep610_url_references_for_wheel_urls(
download_spy.assert_called_once_with(
mocker.ANY, operation, Link(package.source_url)
)
assert download_spy.spy_return.exists(), "cached file should not be deleted"
@pytest.mark.parametrize(
......@@ -938,10 +952,12 @@ def test_executor_should_write_pep610_url_references_for_non_wheel_urls(
download_spy.assert_called_once_with(
mocker.ANY, operation, Link(package.source_url)
)
assert download_spy.spy_return.exists(), "cached file should not be deleted"
else:
download_spy.assert_not_called()
@pytest.mark.parametrize("is_artifact_cached", [False, True])
def test_executor_should_write_pep610_url_references_for_git(
tmp_venv: VirtualEnv,
pool: RepositoryPool,
......@@ -950,18 +966,33 @@ def test_executor_should_write_pep610_url_references_for_git(
io: BufferedIO,
mock_file_downloads: None,
wheel: Path,
mocker: MockerFixture,
fixture_dir: FixtureDirGetter,
is_artifact_cached: bool,
):
if is_artifact_cached:
link_cached = fixture_dir("distributions") / "demo-0.1.2-py2.py3-none-any.whl"
mocker.patch(
"poetry.installation.executor.ArtifactCache.get_cached_archive_for_git",
return_value=link_cached,
)
clone_spy = mocker.spy(Git, "clone")
source_resolved_reference = "123456"
source_url = "https://github.com/demo/demo.git"
package = Package(
"demo",
"0.1.2",
source_type="git",
source_reference="master",
source_resolved_reference="123456",
source_url="https://github.com/demo/demo.git",
source_resolved_reference=source_resolved_reference,
source_url=source_url,
)
chef = Chef(artifact_cache, tmp_venv, Factory.create_pool(config))
chef.set_directory_wheel(wheel)
prepare_spy = mocker.spy(chef, "prepare")
executor = Executor(tmp_venv, pool, config, io)
executor._chef = chef
......@@ -979,6 +1010,68 @@ def test_executor_should_write_pep610_url_references_for_git(
},
)
if is_artifact_cached:
clone_spy.assert_not_called()
prepare_spy.assert_not_called()
else:
clone_spy.assert_called_once_with(
url=source_url, source_root=mocker.ANY, revision=source_resolved_reference
)
prepare_spy.assert_called_once()
assert prepare_spy.spy_return.exists(), "cached file should not be deleted"
assert (prepare_spy.spy_return.parent / ".created_from_git_dependency").exists()
def test_executor_should_write_pep610_url_references_for_editable_git(
tmp_venv: VirtualEnv,
pool: RepositoryPool,
config: Config,
artifact_cache: ArtifactCache,
io: BufferedIO,
mock_file_downloads: None,
wheel: Path,
mocker: MockerFixture,
fixture_dir: FixtureDirGetter,
):
source_resolved_reference = "123456"
source_url = "https://github.com/demo/demo.git"
package = Package(
"demo",
"0.1.2",
source_type="git",
source_reference="master",
source_resolved_reference=source_resolved_reference,
source_url=source_url,
develop=True,
)
chef = Chef(artifact_cache, tmp_venv, Factory.create_pool(config))
chef.set_directory_wheel(wheel)
prepare_spy = mocker.spy(chef, "prepare")
cache_spy = mocker.spy(artifact_cache, "get_cached_archive_for_git")
executor = Executor(tmp_venv, pool, config, io)
executor._chef = chef
executor.execute([Install(package)])
verify_installed_distribution(
tmp_venv,
package,
{
"vcs_info": {
"vcs": "git",
"requested_revision": "master",
"commit_id": "123456",
},
"url": package.source_url,
},
)
cache_spy.assert_not_called()
prepare_spy.assert_called_once()
assert not prepare_spy.spy_return.exists(), "editable git should not be cached"
assert not (prepare_spy.spy_return.parent / ".created_from_git_dependency").exists()
def test_executor_should_append_subdirectory_for_git(
mocker: MockerFixture,
......
......@@ -270,20 +270,32 @@ def test_get_cache_directory_for_link(tmp_path: Path) -> None:
assert directory == expected
def test_get_cached_archives_for_link(
fixture_dir: FixtureDirGetter, mocker: MockerFixture
) -> None:
@pytest.mark.parametrize("subdirectory", [None, "subdir"])
def test_get_cache_directory_for_git(tmp_path: Path, subdirectory: str | None) -> None:
cache = ArtifactCache(cache_dir=tmp_path)
directory = cache.get_cache_directory_for_git(
url="https://github.com/demo/demo.git", ref="123456", subdirectory=subdirectory
)
if subdirectory:
expected = Path(
f"{tmp_path.as_posix()}/53/08/33/"
"7851e5806669aa15ab0c555b13bd5523978057323c6a23a9cee18ec51c"
)
else:
expected = Path(
f"{tmp_path.as_posix()}/61/14/30/"
"7c57f8fd71e4eee40b18893b9b586cba45177f15e300f4fb8b14ccc933"
)
assert directory == expected
def test_get_cached_archives(fixture_dir: FixtureDirGetter) -> None:
distributions = fixture_dir("distributions")
cache = ArtifactCache(cache_dir=Path())
mocker.patch.object(
cache,
"get_cache_directory_for_link",
return_value=distributions,
)
archives = cache._get_cached_archives_for_link(
Link("https://files.python-poetry.org/demo-0.1.0.tar.gz")
)
archives = cache._get_cached_archives(distributions)
assert archives
assert set(archives) == set(distributions.glob("*.whl")) | set(
......@@ -328,7 +340,7 @@ def test_get_not_found_cached_archive_for_link(
mocker.patch.object(
cache,
"_get_cached_archives_for_link",
"_get_cached_archives",
return_value=available_packages,
)
......@@ -380,7 +392,7 @@ def test_get_found_cached_archive_for_link(
mocker.patch.object(
cache,
"_get_cached_archives_for_link",
"_get_cached_archives",
return_value=[
Path("/cache/demo-0.1.0-py2.py3-none-any"),
Path("/cache/demo-0.1.0.tar.gz"),
......@@ -392,3 +404,10 @@ def test_get_found_cached_archive_for_link(
archive = cache.get_cached_archive_for_link(Link(link), strict=strict, env=env)
assert Path(cached) == archive
def test_get_cached_archive_for_git() -> None:
"""Smoke test that checks that no assertion is raised."""
cache = ArtifactCache(cache_dir=Path())
archive = cache.get_cached_archive_for_git("url", "ref", "subdirectory", MockEnv())
assert archive is None
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment