Commit 7687539d by martin-kokos Committed by GitHub

Cache: Add logic and tests for FileCache handling corrupt files (#7453)

parent 2b15ce10
......@@ -4,6 +4,7 @@ import contextlib
import dataclasses
import hashlib
import json
import logging
import shutil
import time
......@@ -28,6 +29,8 @@ if TYPE_CHECKING:
MAX_DATE = 9999999999
T = TypeVar("T")
logger = logging.getLogger(__name__)
def decode(string: bytes, encodings: list[str] | None = None) -> str:
"""
......@@ -182,7 +185,14 @@ class FileCache(Generic[T]):
return None
with open(path, "rb") as f:
payload = self._deserialize(f.read())
file_content = f.read()
try:
payload = self._deserialize(file_content)
except (json.JSONDecodeError, ValueError):
self.forget(key)
logger.warning("Corrupt cache file was detected and cleaned up.")
return None
if payload.expired:
self.forget(key)
......
from __future__ import annotations
import shutil
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
......@@ -198,6 +200,62 @@ def test_cachy_compatibility(
assert cachy_file_cache.get("key4") == test_obj
def test_missing_cache_file(poetry_file_cache: FileCache) -> None:
poetry_file_cache.put("key1", "value")
key1_path = (
poetry_file_cache.path
/ "81/74/09/96/87/a2/66/21/8174099687a26621f4e2cdd7cc03b3dacedb3fb962255b1aafd033cabe831530" # noqa: E501
)
assert key1_path.exists()
key1_path.unlink() # corrupt cache by removing a key file
assert poetry_file_cache.get("key1") is None
def test_missing_cache_path(poetry_file_cache: FileCache) -> None:
poetry_file_cache.put("key1", "value")
key1_partial_path = poetry_file_cache.path / "81/74/09/96/87/a2/"
assert key1_partial_path.exists()
shutil.rmtree(
key1_partial_path
) # corrupt cache by removing a subdirectory containing a key file
assert poetry_file_cache.get("key1") is None
@pytest.mark.parametrize(
"corrupt_payload",
[
"", # empty file
b"\x00", # null
"99999999", # truncated file
'999999a999"value"', # corrupt lifetime
b'9999999999"va\xd8\x00"', # invalid unicode
"fil3systemFa!led", # garbage file
],
)
def test_detect_corrupted_cache_key_file(
corrupt_payload: str | bytes, poetry_file_cache: FileCache
) -> None:
poetry_file_cache.put("key1", "value")
key1_path = (
poetry_file_cache.path
/ "81/74/09/96/87/a2/66/21/8174099687a26621f4e2cdd7cc03b3dacedb3fb962255b1aafd033cabe831530" # noqa: E501
)
assert key1_path.exists()
# original content: 9999999999"value"
write_modes = {str: "w", bytes: "wb"}
with open(key1_path, write_modes[type(corrupt_payload)]) as f:
f.write(corrupt_payload) # write corrupt data
assert poetry_file_cache.get("key1") is None
def test_get_cache_directory_for_link(tmp_path: Path) -> None:
cache = ArtifactCache(cache_dir=tmp_path)
directory = cache.get_cache_directory_for_link(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment