Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add support to fetch purl
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
  • Loading branch information
TG1999 committed Aug 6, 2025
commit ac68acb18be1d5d3f39cb4c57febafe4236685ce
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ MarkupSafe==2.0.1
more-itertools==8.13.0
normality==2.3.3
packagedcode-msitools==0.101.210706
packageurl-python==0.9.9
packaging==21.3
packageurl-python==0.17.4
packaging==24.0
parameter-expansion-patched==0.3.1
patch==1.16
pdfminer-six==20220506
Expand Down
26 changes: 21 additions & 5 deletions src/fetchcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from urllib.parse import urlparse

import requests
from packageurl.contrib.purl2url import get_download_url


class Response:
Expand Down Expand Up @@ -89,19 +90,34 @@ def fetch_ftp(url, location):
return resp


def fetch_purl(purl, location=None):
"""
Return a `Response` object built from fetching the content at a PURL based `purl` URL string
saving the content in a file at `location`
"""
from fetchcode.download_urls import download_url as get_download_url_from_fetchcode

for resolver in (get_download_url, get_download_url_from_fetchcode):
url = resolver(purl)
if url:
return fetch(url=url)
return


def fetch(url):
"""
Return a `Response` object built from fetching the content at the `url` URL string and
store content at a temporary file.
"""

temp = tempfile.NamedTemporaryFile(delete=False)
location = temp.name

url_parts = urlparse(url)
scheme = url_parts.scheme
location = None

if scheme != "purl":
temp = tempfile.NamedTemporaryFile(delete=False)
location = temp.name

fetchers = {"ftp": fetch_ftp, "http": fetch_http, "https": fetch_http}
fetchers = {"ftp": fetch_ftp, "http": fetch_http, "https": fetch_http, "pkg": fetch_purl}

if scheme in fetchers:
return fetchers.get(scheme)(url, location)
Expand Down