Skip to content

Commit 92dedc8

Browse files
authored
Merge pull request #179 from aboutcode-org/add_support_to_fetch_purl
Add support to fetch purl
2 parents b4b9426 + 56156b0 commit 92dedc8

File tree

5 files changed

+212
-7
lines changed

5 files changed

+212
-7
lines changed

README.rst

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,41 @@ Fetch some package metadata and get a ``fetchcode.packagedcode_models.Package``
5959
>>> list(package.info('pkg:rubygems/files'))
6060
[Package(type='rubygems', namespace=None, name='files', version=None)]
6161

62+
Fetch a purl and get a ``fetchcode.fetch.Response`` object back::
63+
64+
>>> from fetchcode import fetch
65+
>>> f = fetch('pkg:swift/github.com/Alamofire/Alamofire@5.4.3')
66+
>>> f.location
67+
'/tmp/tmp_cm02xsg'
68+
>>> f.content_type
69+
'application/zip'
70+
>>> f.url
71+
'https://github.com/Alamofire/Alamofire/archive/5.4.3.zip'
72+
73+
Ecosystems supported for fetching a purl from fetchcode:
74+
75+
- alpm
76+
- apk
77+
- bitbucket
78+
- cargo
79+
- composer
80+
- conda
81+
- cpan
82+
- cran
83+
- deb
84+
- gem
85+
- generic
86+
- github
87+
- golang
88+
- hackage
89+
- hex
90+
- luarocks
91+
- maven
92+
- npm
93+
- nuget
94+
- pub
95+
- pypi
96+
- swift
6297

6398
License
6499
--------

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ MarkupSafe==2.0.1
4141
more-itertools==8.13.0
4242
normality==2.3.3
4343
packagedcode-msitools==0.101.210706
44-
packageurl-python==0.9.9
45-
packaging==21.3
44+
packageurl-python==0.17.4
45+
packaging==24.0
4646
parameter-expansion-patched==0.3.1
4747
patch==1.16
4848
pdfminer-six==20220506

src/fetchcode/__init__.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
from urllib.parse import urlparse
2222

2323
import requests
24+
from packageurl.contrib import purl2url
25+
26+
from fetchcode.utils import _http_exists
2427

2528

2629
class Response:
@@ -89,24 +92,70 @@ def fetch_ftp(url, location):
8992
return resp
9093

9194

95+
def resolve_purl(purl):
96+
"""
97+
Resolve a Package URL (PURL) to a download URL.
98+
99+
This function attempts to resolve the PURL using first purl2url library and
100+
if that fails, it falls back to fetchcode's download_urls module.
101+
"""
102+
from fetchcode.download_urls import download_url as get_download_url_from_fetchcode
103+
104+
for resolver in (purl2url.get_download_url, get_download_url_from_fetchcode):
105+
url = resolver(purl)
106+
if url and _http_exists(url):
107+
return url
108+
109+
110+
def get_resolved_url(url, scheme):
111+
resoltion_by_scheme = {
112+
"pkg": resolve_url_from_purl,
113+
}
114+
resolution_handler = resoltion_by_scheme.get(scheme)
115+
if not resolution_handler:
116+
raise ValueError(f"Not a supported/known scheme: {scheme}")
117+
url, scheme = resolution_handler(url)
118+
return url, scheme
119+
120+
121+
def resolve_url_from_purl(url):
122+
"""
123+
Resolve a Package URL (PURL) to a valid URL.
124+
Raises ValueError if the PURL cannot be resolved.
125+
"""
126+
url = resolve_purl(url)
127+
if not url:
128+
raise ValueError("Could not resolve PURL to a valid URL.")
129+
scheme = get_url_scheme(url)
130+
return url, scheme
131+
132+
133+
def get_url_scheme(url):
134+
"""
135+
Return the scheme of the given URL.
136+
"""
137+
return urlparse(url).scheme
138+
139+
92140
def fetch(url):
93141
"""
94142
Return a `Response` object built from fetching the content at the `url` URL string and
95143
store content at a temporary file.
96144
"""
145+
scheme = get_url_scheme(url)
146+
147+
if scheme in ["pkg"]:
148+
url, scheme = get_resolved_url(url, scheme)
97149

98150
temp = tempfile.NamedTemporaryFile(delete=False)
99151
location = temp.name
100152

101-
url_parts = urlparse(url)
102-
scheme = url_parts.scheme
103-
104153
fetchers = {"ftp": fetch_ftp, "http": fetch_http, "https": fetch_http}
105154

106155
if scheme in fetchers:
107156
return fetchers.get(scheme)(url, location)
108157

109-
raise Exception("Not a supported/known scheme.")
158+
raise Exception(f"Not a supported/known scheme: {scheme}.")
110159

111160

112161
def fetch_json_response(url):

src/fetchcode/composer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ class Composer:
2626

2727
@classmethod
2828
def get_download_url(cls, purl):
29-
3029
"""
3130
Return the download URL for a Composer PURL.
3231
"""

tests/test_fetch.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import pytest
2020

2121
from fetchcode import fetch
22+
from fetchcode import resolve_purl
23+
from fetchcode import resolve_url_from_purl
2224

2325

2426
@mock.patch("fetchcode.requests.get")
@@ -63,3 +65,123 @@ def test_fetch_with_scheme_not_present():
6365
url = "abc://speedtest/1KB.zip"
6466
response = fetch(url=url)
6567
assert "Not a supported/known scheme." == e_info
68+
69+
70+
@mock.patch("fetchcode._http_exists")
71+
@mock.patch("fetchcode.fetch_http")
72+
@mock.patch("fetchcode.pypi.fetch_json_response")
73+
def test_fetch_purl_with_fetchcode(mock_fetch_json_response, mock_fetch_http, mock_http_exists):
74+
mock_fetch_http.return_value = "mocked_purl_response"
75+
mock_http_exists.return_value = True
76+
mock_fetch_json_response.return_value = {
77+
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
78+
}
79+
80+
response = fetch("pkg:pypi/sample@1.0.0")
81+
82+
assert response == "mocked_purl_response"
83+
mock_http_exists.assert_called_once()
84+
mock_fetch_http.assert_called_once()
85+
86+
87+
@mock.patch("fetchcode._http_exists")
88+
@mock.patch("fetchcode.fetch_http")
89+
def test_fetch_purl_with_purl2url(mock_fetch_http, mock_http_exists):
90+
mock_fetch_http.return_value = "mocked_purl_response"
91+
mock_http_exists.return_value = True
92+
93+
response = fetch("pkg:alpm/sample@1.0.0")
94+
95+
assert response == "mocked_purl_response"
96+
mock_http_exists.assert_called_once()
97+
mock_fetch_http.assert_called_once()
98+
99+
100+
@mock.patch("fetchcode.pypi.fetch_json_response")
101+
def test_fetch_invalid_purl(mock_fetch_json_response):
102+
mock_fetch_json_response.return_value = {}
103+
104+
with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"):
105+
fetch("pkg:pypi/invalid-package@1.0.0")
106+
107+
108+
@mock.patch("fetchcode.pypi.fetch_json_response")
109+
def test_fetch_invalid_purl(mock_fetch_json_response):
110+
mock_fetch_json_response.return_value = {}
111+
112+
with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"):
113+
fetch("pkg:pypi/invalid-package@1.0.0")
114+
115+
116+
def test_fetch_unsupported_scheme():
117+
with pytest.raises(Exception, match="Not a supported/known scheme"):
118+
fetch("s3://bucket/object")
119+
120+
121+
def test_resolve_url_from_purl_invalid():
122+
with pytest.raises(ValueError, match="Could not resolve PURL to a valid URL."):
123+
fetch("pkg:invalid/invalid-package@1.0.0")
124+
125+
126+
@mock.patch("fetchcode._http_exists")
127+
def test_resolve_url_from_purl_using_purl2url(mock_http_exists):
128+
mock_http_exists.return_value = True
129+
130+
url, _ = resolve_url_from_purl("pkg:swift/github.com/Alamofire/Alamofire@5.4.3")
131+
assert url == "https://github.com/Alamofire/Alamofire/archive/5.4.3.zip"
132+
mock_http_exists.assert_called_once_with(
133+
"https://github.com/Alamofire/Alamofire/archive/5.4.3.zip"
134+
)
135+
136+
137+
@mock.patch("fetchcode._http_exists")
138+
@mock.patch("fetchcode.pypi.fetch_json_response")
139+
def test_resolve_url_from_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists):
140+
mock_http_exists.return_value = True
141+
mock_fetch_json_response.return_value = {
142+
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
143+
}
144+
145+
url, _ = resolve_url_from_purl("pkg:pypi/example@1.0.0")
146+
assert url == "https://example.com/sample-1.0.0.zip"
147+
mock_http_exists.assert_called_once_with("https://example.com/sample-1.0.0.zip")
148+
149+
150+
def test_resolve_purl_invalid():
151+
assert resolve_purl("pkg:invalid/invalid-package@1.0.0") is None
152+
153+
154+
def test_resolve_purl_using_purl2url():
155+
url = resolve_purl("pkg:pub/http@0.13.3")
156+
assert url == "https://pub.dev/api/archives/http-0.13.3.tar.gz"
157+
158+
159+
@mock.patch("fetchcode._http_exists")
160+
def test_resolve_purl_using_purl2url_url_does_not_exists(mock_http_exists):
161+
mock_http_exists.return_value = False
162+
url = resolve_purl("pkg:pub/http@0.13.3")
163+
assert url is None
164+
165+
166+
@mock.patch("fetchcode._http_exists")
167+
@mock.patch("fetchcode.pypi.fetch_json_response")
168+
def test_resolve_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists):
169+
mock_fetch_json_response.return_value = {
170+
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
171+
}
172+
mock_http_exists.return_value = True
173+
url = resolve_purl("pkg:pypi/example@1.0.0")
174+
assert url == "https://example.com/sample-1.0.0.zip"
175+
176+
177+
@mock.patch("fetchcode._http_exists")
178+
@mock.patch("fetchcode.pypi.fetch_json_response")
179+
def test_resolve_purl_using_fetchcode_url_does_not_exists(
180+
mock_fetch_json_response, mock_http_exists
181+
):
182+
mock_fetch_json_response.return_value = {
183+
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
184+
}
185+
mock_http_exists.return_value = False
186+
url = resolve_purl("pkg:pypi/example@1.0.0")
187+
assert url is None

0 commit comments

Comments
 (0)