Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 59 additions & 7 deletions src/python_inspector/utils_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import attr
import packageurl
import requests
import zipfile
from bs4 import BeautifulSoup
from commoncode import fileutils
from commoncode.hash import multi_checksums
Expand Down Expand Up @@ -1698,6 +1699,20 @@ async def get(

cache_valid = os.path.exists(cached) and os.path.getsize(cached) > 0

# Validate cached wheel/egg files.
if cache_valid and not as_text:
if path_or_url.endswith((".whl", ".egg", ".zip")):
try:
if not zipfile.is_zipfile(cached):
if TRACE_DEEP:
print(f" FILE CACHE INVALID (corrupted zip): {path_or_url}")
cache_valid = False
except (FileNotFoundError, OSError):
# File was deleted/modified by another task - treat as cache miss
if TRACE_DEEP:
print(f" FILE CACHE VANISHED during validation: {path_or_url}")
cache_valid = False

if force or not cache_valid:
if not cache_valid and os.path.exists(cached):
if TRACE_DEEP:
Expand All @@ -1715,17 +1730,54 @@ async def get(
)
wmode = "w" if as_text else "wb"

# acquire lock and wait until timeout to get a lock or die
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
async with aiofiles.open(cached, mode=wmode) as fo:
await fo.write(content)
# Use atomic file operations.
temp_file = f"{cached}.tmp.{os.getpid()}"

try:
# acquire lock and wait until timeout to get a lock or die
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
async with aiofiles.open(temp_file, mode=wmode) as fo:
await fo.write(content)

# Validate zip files before making them "live"
if not as_text and path_or_url.endswith((".whl", ".egg", ".zip")):
if not zipfile.is_zipfile(temp_file):
raise Exception(
f"Downloaded file is not a valid zip: {path_or_url}\n"
f"Size: {os.path.getsize(temp_file)} bytes"
)

# Atomic rename - readers will never see partial/corrupt file
os.rename(temp_file, cached)

except Exception:
# Clean up temp file on any error
if os.path.exists(temp_file):
os.remove(temp_file)
raise

return content, cached
else:
if TRACE_DEEP:
print(f" FILE CACHE HIT: {path_or_url}")
# also lock on read to avoid race conditions
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
return await get_local_file_content(path=cached, as_text=as_text), cached

# File passed validation, lock and read
# Handle race condition where file might be deleted between validation and lock
try:
with lockfile.FileLock(lock_file).locked(timeout=PYINSP_CACHE_LOCK_TIMEOUT):
return await get_local_file_content(path=cached, as_text=as_text), cached
except FileNotFoundError:
# File was deleted by another task after validation - retry with force.
if TRACE_DEEP:
print(f" FILE VANISHED after validation, re-downloading: {path_or_url}")
return await self.get(
credentials=credentials,
path_or_url=path_or_url,
as_text=as_text,
force=True,
verbose=verbose,
echo_func=echo_func,
)


CACHE = Cache()
Expand Down
111 changes: 58 additions & 53 deletions tests/data/azure-devops.req-310-expected.json

Large diffs are not rendered by default.

111 changes: 58 additions & 53 deletions tests/data/azure-devops.req-312-expected.json

Large diffs are not rendered by default.

111 changes: 58 additions & 53 deletions tests/data/azure-devops.req-313-expected.json

Large diffs are not rendered by default.

111 changes: 58 additions & 53 deletions tests/data/azure-devops.req-314-expected.json

Large diffs are not rendered by default.

58 changes: 29 additions & 29 deletions tests/data/azure-devops.req-38-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,12 @@
"type": "pypi",
"namespace": null,
"name": "certifi",
"version": "2025.10.5",
"version": "2026.1.4",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "Python package for providing Mozilla's CA Bundle.\nCertifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.",
"release_date": "2025-10-05T04:12:14",
"release_date": "2026-01-04T02:42:40",
"parties": [
{
"type": "person",
Expand All @@ -349,11 +349,11 @@
"Programming Language :: Python :: 3.9"
],
"homepage_url": "https://github.com/certifi/python-certifi",
"download_url": "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl",
"size": 163286,
"download_url": "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl",
"size": 152900,
"sha1": null,
"md5": "7b56f7121949a196441739c539fd01be",
"sha256": "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de",
"md5": "1dab98768140ad2d8dbc9be8f14a2af9",
"sha256": "9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": "https://github.com/certifi/python-certifi",
Expand All @@ -373,9 +373,9 @@
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/certifi/2025.10.5/json",
"api_data_url": "https://pypi.org/pypi/certifi/2026.1.4/json",
"datasource_id": null,
"purl": "pkg:pypi/certifi@2025.10.5"
"purl": "pkg:pypi/certifi@2026.1.4"
},
{
"type": "pypi",
Expand Down Expand Up @@ -576,17 +576,17 @@
"type": "pypi",
"namespace": null,
"name": "cryptography",
"version": "45.0.7",
"version": "46.0.0",
"qualifiers": {},
"subpath": null,
"primary_language": "Python",
"description": "cryptography is a package which provides cryptographic recipes and primitives to Python developers.\npyca/cryptography\n=================\n\n.. image:: https://img.shields.io/pypi/v/cryptography.svg\n :target: https://pypi.org/project/cryptography/\n :alt: Latest Version\n\n.. image:: https://readthedocs.org/projects/cryptography/badge/?version=latest\n :target: https://cryptography.io\n :alt: Latest Docs\n\n.. image:: https://github.com/pyca/cryptography/workflows/CI/badge.svg?branch=main\n :target: https://github.com/pyca/cryptography/actions?query=workflow%3ACI+branch%3Amain\n\n\n``cryptography`` is a package which provides cryptographic recipes and\nprimitives to Python developers. Our goal is for it to be your \"cryptographic\nstandard library\". It supports Python 3.7+ and PyPy3 7.3.11+.\n\n``cryptography`` includes both high level recipes and low level interfaces to\ncommon cryptographic algorithms such as symmetric ciphers, message digests, and\nkey derivation functions. For example, to encrypt something with\n``cryptography``'s high level symmetric encryption recipe:\n\n.. code-block:: pycon\n\n >>> from cryptography.fernet import Fernet\n >>> # Put this somewhere safe!\n >>> key = Fernet.generate_key()\n >>> f = Fernet(key)\n >>> token = f.encrypt(b\"A really secret message. Not for prying eyes.\")\n >>> token\n b'...'\n >>> f.decrypt(token)\n b'A really secret message. Not for prying eyes.'\n\nYou can find more information in the `documentation`_.\n\nYou can install ``cryptography`` with:\n\n.. code-block:: console\n\n $ pip install cryptography\n\nFor full details see `the installation documentation`_.\n\nDiscussion\n~~~~~~~~~~\n\nIf you run into bugs, you can file them in our `issue tracker`_.\n\nWe maintain a `cryptography-dev`_ mailing list for development discussion.\n\nYou can also join ``#pyca`` on ``irc.libera.chat`` to ask questions or get\ninvolved.\n\nSecurity\n~~~~~~~~\n\nNeed to report a security issue? Please consult our `security reporting`_\ndocumentation.\n\n\n.. _`documentation`: https://cryptography.io/\n.. _`the installation documentation`: https://cryptography.io/en/latest/installation/\n.. _`issue tracker`: https://github.com/pyca/cryptography/issues\n.. _`cryptography-dev`: https://mail.python.org/mailman/listinfo/cryptography-dev\n.. _`security reporting`: https://cryptography.io/en/latest/security/",
"release_date": "2025-09-01T11:14:24",
"description": "cryptography is a package which provides cryptographic recipes and primitives to Python developers.\npyca/cryptography\n=================\n\n.. image:: https://img.shields.io/pypi/v/cryptography.svg\n :target: https://pypi.org/project/cryptography/\n :alt: Latest Version\n\n.. image:: https://readthedocs.org/projects/cryptography/badge/?version=latest\n :target: https://cryptography.io\n :alt: Latest Docs\n\n.. image:: https://github.com/pyca/cryptography/actions/workflows/ci.yml/badge.svg\n :target: https://github.com/pyca/cryptography/actions/workflows/ci.yml?query=branch%3Amain\n\n``cryptography`` is a package which provides cryptographic recipes and\nprimitives to Python developers. Our goal is for it to be your \"cryptographic\nstandard library\". It supports Python 3.8+ and PyPy3 7.3.11+.\n\n``cryptography`` includes both high level recipes and low level interfaces to\ncommon cryptographic algorithms such as symmetric ciphers, message digests, and\nkey derivation functions. For example, to encrypt something with\n``cryptography``'s high level symmetric encryption recipe:\n\n.. code-block:: pycon\n\n >>> from cryptography.fernet import Fernet\n >>> # Put this somewhere safe!\n >>> key = Fernet.generate_key()\n >>> f = Fernet(key)\n >>> token = f.encrypt(b\"A really secret message. Not for prying eyes.\")\n >>> token\n b'...'\n >>> f.decrypt(token)\n b'A really secret message. Not for prying eyes.'\n\nYou can find more information in the `documentation`_.\n\nYou can install ``cryptography`` with:\n\n.. code-block:: console\n\n $ pip install cryptography\n\nFor full details see `the installation documentation`_.\n\nDiscussion\n~~~~~~~~~~\n\nIf you run into bugs, you can file them in our `issue tracker`_.\n\nWe maintain a `cryptography-dev`_ mailing list for development discussion.\n\nYou can also join ``#pyca`` on ``irc.libera.chat`` to ask questions or get\ninvolved.\n\nSecurity\n~~~~~~~~\n\nNeed to report a security issue? Please consult our `security reporting`_\ndocumentation.\n\n\n.. _`documentation`: https://cryptography.io/\n.. _`the installation documentation`: https://cryptography.io/en/latest/installation/\n.. _`issue tracker`: https://github.com/pyca/cryptography/issues\n.. _`cryptography-dev`: https://mail.python.org/mailman/listinfo/cryptography-dev\n.. _`security reporting`: https://cryptography.io/en/latest/security/",
"release_date": "2025-09-16T21:07:03",
"parties": [
{
"type": "person",
"role": "author",
"name": "The cryptography developers <cryptography-dev@python.org>",
"name": null,
"email": "The Python Cryptographic Authority and individual contributors <cryptography-dev@python.org>",
"url": null
}
Expand All @@ -607,38 +607,37 @@
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Free Threading :: 3 - Stable",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Security :: Cryptography"
],
"homepage_url": null,
"download_url": "https://files.pythonhosted.org/packages/eb/ac/59b7790b4ccaed739fc44775ce4645c9b8ce54cbec53edf16c74fd80cb2b/cryptography-45.0.7-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl",
"size": 4423075,
"download_url": "https://files.pythonhosted.org/packages/da/94/f1c1f30110c05fa5247bf460b17acfd52fa3f5c77e94ba19cff8957dc5e6/cryptography-46.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl",
"size": 4562561,
"sha1": null,
"md5": "d7c4a989694c8af7d27560ff4125516f",
"sha256": "3994c809c17fc570c2af12c9b840d7cea85a9fd3e5c0e0491f4fa3c029216d59",
"md5": "4fe77bef21236be92883b98ea2b89580",
"sha256": "c3cd09b1490c1509bf3892bde9cef729795fae4a2fee0621f19be3321beca7e4",
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"license_expression": null,
"declared_license": {
"license": "Apache-2.0 OR BSD-3-Clause"
},
"license_expression": "Apache-2.0 OR BSD-3-Clause",
"declared_license": {},
"notice_text": null,
"source_packages": [],
"file_references": [],
"extra_data": {},
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": "https://pypi.org/pypi/cryptography/45.0.7/json",
"api_data_url": "https://pypi.org/pypi/cryptography/46.0.0/json",
"datasource_id": null,
"purl": "pkg:pypi/cryptography@45.0.7"
"purl": "pkg:pypi/cryptography@46.0.0"
},
{
"type": "pypi",
Expand Down Expand Up @@ -1328,13 +1327,13 @@
"package": "pkg:pypi/azure-storage-blob@12.26.0",
"dependencies": [
"pkg:pypi/azure-core@1.33.0",
"pkg:pypi/cryptography@45.0.7",
"pkg:pypi/cryptography@46.0.0",
"pkg:pypi/isodate@0.7.2",
"pkg:pypi/typing-extensions@4.13.2"
]
},
{
"package": "pkg:pypi/certifi@2025.10.5",
"package": "pkg:pypi/certifi@2026.1.4",
"dependencies": []
},
{
Expand All @@ -1352,9 +1351,10 @@
"dependencies": []
},
{
"package": "pkg:pypi/cryptography@45.0.7",
"package": "pkg:pypi/cryptography@46.0.0",
"dependencies": [
"pkg:pypi/cffi@1.17.1"
"pkg:pypi/cffi@1.17.1",
"pkg:pypi/typing-extensions@4.13.2"
]
},
{
Expand All @@ -1369,7 +1369,7 @@
"package": "pkg:pypi/msrest@0.7.1",
"dependencies": [
"pkg:pypi/azure-core@1.33.0",
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2026.1.4",
"pkg:pypi/isodate@0.7.2",
"pkg:pypi/requests-oauthlib@2.0.0",
"pkg:pypi/requests@2.32.4"
Expand All @@ -1393,7 +1393,7 @@
{
"package": "pkg:pypi/requests@2.32.4",
"dependencies": [
"pkg:pypi/certifi@2025.10.5",
"pkg:pypi/certifi@2026.1.4",
"pkg:pypi/charset-normalizer@3.4.4",
"pkg:pypi/idna@3.11",
"pkg:pypi/urllib3@2.2.3"
Expand Down
Loading