Skip to content

Commit 801e740

Browse files
committed
Refactor code including dictionary access and tests #116
Reference: #116 Signed-off-by: John M. Horan <johnmhoran@gmail.com>
1 parent 3768d2e commit 801e740

11 files changed

+1982
-258
lines changed

src/fetchcode/package.py

Lines changed: 81 additions & 252 deletions
Large diffs are not rendered by default.

src/fetchcode/package_util.py

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,20 @@
1515
# specific language governing permissions and limitations under the License.
1616

1717
import dataclasses
18+
import logging
19+
import os
1820
import re
1921

2022
import attr
23+
from bs4 import BeautifulSoup
24+
from univers import versions
2125

2226
from fetchcode import utils
2327
from fetchcode.packagedcode_models import Package
2428

29+
LOG_FILE_LOCATION = os.path.join(os.path.expanduser("~"), "purlcli.log")
30+
logger = logging.getLogger(__name__)
31+
2532

2633
def package_from_dict(package_data):
2734
"""
@@ -723,3 +730,294 @@ def get_package_info(cls, gh_purl, package_name):
723730
"date": "2002-08-19T04:23:00",
724731
},
725732
}
733+
734+
735+
def get_cocoapods_org_url_status(purl, name, cocoapods_org_url):
736+
purl_to_cocoapods_org_url_status = {}
737+
cocoapods_org_url_head_request = utils.make_head_request(cocoapods_org_url)
738+
cocoapods_org_url_status_code = cocoapods_org_url_head_request.status_code
739+
740+
if cocoapods_org_url_status_code == 404:
741+
logger.error(f"cocoapods_org_url not found for {name}")
742+
purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_not_found"
743+
return purl_to_cocoapods_org_url_status
744+
elif cocoapods_org_url_status_code == 503:
745+
logger.error(f"cocoapods_org_url temporarily unavailable for {name}")
746+
purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_temporarily_unavailable"
747+
return purl_to_cocoapods_org_url_status
748+
elif cocoapods_org_url_status_code == 302:
749+
redirect_url = cocoapods_org_url_head_request.headers['Location']
750+
redirect_message = f"The cocoapods.org URL {cocoapods_org_url} redirects to {redirect_url}"
751+
logger.warning(redirect_message)
752+
print(redirect_message)
753+
754+
gh_repo_namespace = None
755+
gh_repo_name = None
756+
if redirect_url.startswith("https://github.com/"):
757+
redirect_url_split = redirect_url.split("/")
758+
if len(redirect_url_split) < 3:
759+
return purl_to_cocoapods_org_url_status
760+
gh_repo_namespace = redirect_url_split[-2]
761+
gh_repo_name = redirect_url_split[-1]
762+
763+
redirect_to_gh_response = utils.get_complete_response(redirect_url)
764+
if "Failed to fetch" in redirect_to_gh_response:
765+
logger.error(redirect_to_gh_response)
766+
print(redirect_to_gh_response)
767+
purl_to_cocoapods_org_url_status["return_message"] = "failed_to_fetch_github_redirect"
768+
return purl_to_cocoapods_org_url_status
769+
elif "not_found" in redirect_to_gh_response:
770+
redirect_to_gh_not_found = f"Redirect to GitHub not found: {redirect_url}"
771+
logger.error(redirect_to_gh_not_found)
772+
print(redirect_to_gh_not_found)
773+
purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_not_found"
774+
return purl_to_cocoapods_org_url_status
775+
776+
soup = BeautifulSoup(redirect_to_gh_response.text, "html.parser")
777+
head = soup.find("head")
778+
og_url_tag_get_content = None
779+
corrected_name = None
780+
if head:
781+
og_url_tag = head.find("meta", property="og:url")
782+
if og_url_tag:
783+
og_url = og_url_tag.get("content")
784+
og_url_tag_get_content = og_url
785+
corrected_name = og_url_tag_get_content.split('/')[-1]
786+
else:
787+
no_meta_tag = f"'og:url' meta tag not found in redirect_to_gh_response page for {purl}"
788+
print(no_meta_tag)
789+
logger.error(no_meta_tag)
790+
purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_error"
791+
return purl_to_cocoapods_org_url_status
792+
else:
793+
no_head_section = f"\n<head> section not found in redirect_to_gh_response page for {purl}"
794+
print(no_head_section)
795+
logger.error(no_head_section)
796+
purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_error"
797+
return purl_to_cocoapods_org_url_status
798+
799+
cocoapods_org_version = None
800+
801+
purl_to_cocoapods_org_url_status["corrected_name"] = corrected_name
802+
purl_to_cocoapods_org_url_status["cocoapods_org_pod_name"] = corrected_name
803+
purl_to_cocoapods_org_url_status["cocoapods_org_gh_repo_owner"] = gh_repo_namespace
804+
purl_to_cocoapods_org_url_status["cocoapods_org_gh_repo_name"] = gh_repo_name
805+
purl_to_cocoapods_org_url_status["cocoapods_org_version"] = cocoapods_org_version
806+
purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_redirects_to_github"
807+
return purl_to_cocoapods_org_url_status
808+
else:
809+
purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_redirects"
810+
return purl_to_cocoapods_org_url_status
811+
812+
else:
813+
purl_to_cocoapods_org_url_status["return_message"] = None
814+
return purl_to_cocoapods_org_url_status
815+
816+
817+
def get_pod_data_with_soup(purl, name, cocoapods_org_url):
818+
purl_to_pod_data_with_soup = {}
819+
cocoapods_org_response = utils.get_complete_response(cocoapods_org_url)
820+
if "Failed to fetch" in cocoapods_org_response:
821+
logger.error(cocoapods_org_response)
822+
print(cocoapods_org_response)
823+
return
824+
825+
soup = BeautifulSoup(cocoapods_org_response.text, "html.parser")
826+
cocoapods_org_gh_repo_owner = None
827+
cocoapods_org_gh_repo_name = None
828+
cocoapods_org_gh_repo_url = None
829+
cocoapods_org_podspec_url = None
830+
cocoapods_org_pkg_home_url = None
831+
832+
for sidebar_links in (soup.find_all('ul', class_ = "links" )):
833+
nested_links = sidebar_links.findChildren("a")
834+
for nested_link in nested_links:
835+
link_text = nested_link.text
836+
link_url = nested_link['href']
837+
if link_text == 'Homepage':
838+
cocoapods_org_pkg_home_url = link_url
839+
elif link_text == 'GitHub Repo':
840+
split_link = link_url.split('/')
841+
cocoapods_org_gh_repo_owner = split_link[-2]
842+
cocoapods_org_gh_repo_name = split_link[-1]
843+
elif link_text == 'See Podspec':
844+
cocoapods_org_podspec_url = link_url
845+
846+
if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name:
847+
cocoapods_org_gh_repo_url = f"https://github.com/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}"
848+
cocoapods_org_gh_repo_url_head_request = utils.make_head_request(cocoapods_org_gh_repo_url)
849+
cocoapods_org_gh_repo_url_status_code = cocoapods_org_gh_repo_url_head_request.status_code
850+
purl_to_pod_data_with_soup["cocoapods_org_gh_repo_url_status_code"] = cocoapods_org_gh_repo_url_status_code
851+
852+
base_path = "https://api.github.com/repos"
853+
api_url = f"{base_path}/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}"
854+
github_rest_no_exception_response = utils.get_github_rest_no_exception(api_url)
855+
if "Failed to fetch" in github_rest_no_exception_response:
856+
logger.error(f"{github_rest_no_exception_response}")
857+
print(f"{github_rest_no_exception_response}")
858+
859+
purl_to_pod_data_with_soup["cocoapods_org_gh_repo_owner"] = cocoapods_org_gh_repo_owner
860+
purl_to_pod_data_with_soup["cocoapods_org_gh_repo_name"] = cocoapods_org_gh_repo_name
861+
purl_to_pod_data_with_soup["cocoapods_org_gh_repo_url"] = cocoapods_org_gh_repo_url
862+
purl_to_pod_data_with_soup["cocoapods_org_podspec_url"] = cocoapods_org_podspec_url
863+
purl_to_pod_data_with_soup["cocoapods_org_pkg_home_url"] = cocoapods_org_pkg_home_url
864+
865+
if cocoapods_org_gh_repo_owner is None or cocoapods_org_gh_repo_name is None:
866+
no_github_repo = f"No GitHub repo found on cocoapods.org for {name}"
867+
print(f"{no_github_repo}")
868+
logger.warning(no_github_repo)
869+
870+
if cocoapods_org_podspec_url is None:
871+
no_podspec = f"No podspec found on cocoapods.org for {name}"
872+
print(f"{no_podspec}")
873+
logger.warning(no_podspec)
874+
purl_to_pod_data_with_soup["no_podspec"] = no_podspec
875+
876+
cocoapods_org_version = None
877+
purl_to_pod_data_with_soup["cocoapods_org_version"] = cocoapods_org_version
878+
if cocoapods_org_podspec_url:
879+
cocoapods_org_version = cocoapods_org_podspec_url.split("/")[-2]
880+
881+
cocoapods_org_pod_name = None
882+
head = soup.find("head")
883+
if head:
884+
og_title_tag = head.find("meta", property="og:title")
885+
if og_title_tag:
886+
og_title = og_title_tag.get("content")
887+
cocoapods_org_pod_name = og_title
888+
else:
889+
no_meta_tag = f"'og:title' meta tag not found in cocoapods.org page for {purl}"
890+
print(no_meta_tag)
891+
logger.error(no_meta_tag)
892+
else:
893+
no_head_section = f"\n<head> section not found in cocoapods.org page for {purl}"
894+
print(no_head_section)
895+
logger.error(no_head_section)
896+
897+
purl_to_pod_data_with_soup["cocoapods_org_pod_name"] = cocoapods_org_pod_name
898+
input_name = name
899+
if input_name != cocoapods_org_pod_name:
900+
name_change = (f"Input PURL name '{input_name}' analyzed as '{cocoapods_org_pod_name}' per {cocoapods_org_url}")
901+
input_name = cocoapods_org_pod_name
902+
print(f"{name_change}")
903+
logger.warning(name_change)
904+
905+
return purl_to_pod_data_with_soup
906+
907+
908+
def get_cocoapod_tags(spec, cocoapods_org_pod_name):
909+
try:
910+
response = utils.get_text_response(spec)
911+
data = response.strip()
912+
for line in data.splitlines():
913+
line = line.strip()
914+
if line.startswith(cocoapods_org_pod_name):
915+
data_list = line.split("/")
916+
if data_list[0] == cocoapods_org_pod_name:
917+
data_list.pop(0)
918+
sorted_data_list = sorted(
919+
data_list,
920+
key=lambda x: versions.SemverVersion(x),
921+
reverse=True,
922+
)
923+
return sorted_data_list
924+
return None
925+
except:
926+
print(f"Error retrieving cocoapods tag data from cdn.cocoapods.org")
927+
return None
928+
929+
930+
def construct_cocoapods_package(
931+
purl,
932+
name,
933+
hashed_path,
934+
repository_homepage_url,
935+
cocoapods_org_gh_repo_owner,
936+
cocoapods_org_gh_repo_name,
937+
tag,
938+
cocoapods_org_pod_name
939+
):
940+
name = name
941+
homepage_url = None
942+
vcs_url = None
943+
github_url = None
944+
bug_tracking_url = None
945+
code_view_url = None
946+
license_data = None
947+
declared_license = None
948+
primary_language = None
949+
950+
if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name:
951+
name = cocoapods_org_gh_repo_name
952+
namespace = cocoapods_org_gh_repo_owner
953+
base_path = "https://api.github.com/repos"
954+
api_url = f"{base_path}/{namespace}/{name}"
955+
gh_repo_api_response = utils.get_github_rest_no_exception(api_url)
956+
957+
if "Failed to fetch" not in gh_repo_api_response:
958+
homepage_url = gh_repo_api_response.get("homepage")
959+
vcs_url = gh_repo_api_response.get("git_url")
960+
license_data = gh_repo_api_response.get("license") or {}
961+
declared_license = license_data.get("spdx_id")
962+
primary_language = gh_repo_api_response.get("language")
963+
964+
github_url = "https://github.com"
965+
bug_tracking_url = f"{github_url}/{namespace}/{name}/issues"
966+
code_view_url = f"{github_url}/{namespace}/{name}"
967+
968+
corrected_name = cocoapods_org_pod_name
969+
podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{corrected_name}/{tag}/{corrected_name}.podspec.json"
970+
podspec_api_response = utils.get_json_response(podspec_api_url)
971+
972+
if "Failed to fetch" in podspec_api_response:
973+
logger.error(f"{podspec_api_response}")
974+
print(f"{podspec_api_response}")
975+
return
976+
977+
homepage_url = podspec_api_response.get("homepage")
978+
979+
lic = podspec_api_response.get("license")
980+
extracted_license_statement = None
981+
if isinstance(lic, dict):
982+
extracted_license_statement = lic
983+
else:
984+
extracted_license_statement = lic
985+
if not declared_license:
986+
declared_license = extracted_license_statement
987+
988+
source = podspec_api_response.get("source")
989+
vcs_url = None
990+
download_url = None
991+
if isinstance(source, dict):
992+
git_url = source.get("git", "")
993+
http_url = source.get("http", "")
994+
if http_url:
995+
download_url = http_url
996+
if git_url and not http_url:
997+
if git_url.endswith(".git") and "github" in git_url:
998+
gh_path = git_url[:-4]
999+
corrected_tag = tag
1000+
if source.get("tag") and source.get("tag").startswith("v"):
1001+
corrected_tag = source.get("tag")
1002+
download_url = f"{gh_path}/archive/refs/tags/{corrected_tag}.tar.gz"
1003+
vcs_url = git_url
1004+
elif git_url:
1005+
vcs_url = git_url
1006+
elif isinstance(source, str):
1007+
if not vcs_url:
1008+
vcs_url = source
1009+
1010+
purl_pkg = Package(
1011+
homepage_url=homepage_url,
1012+
api_url=podspec_api_url,
1013+
bug_tracking_url=bug_tracking_url,
1014+
code_view_url=code_view_url,
1015+
download_url=download_url,
1016+
declared_license=declared_license,
1017+
primary_language=primary_language,
1018+
repository_homepage_url=repository_homepage_url,
1019+
vcs_url=vcs_url,
1020+
**purl.to_dict(),
1021+
)
1022+
purl_pkg.version = tag
1023+
return purl_pkg

src/fetchcode/utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,15 @@ def get_json_response(url, headers=None):
202202
return f"Failed to fetch: {url}"
203203

204204

205+
def get_text_response(url, headers=None):
206+
resp = requests.get(url, headers=headers)
207+
if resp.status_code == 200:
208+
return resp.text
209+
210+
return f"Failed to fetch: {url}"
211+
212+
205213
def get_complete_response(url, headers=None, params=None):
206-
"""
207-
Generate `Package` object for a `url` string
208-
"""
209214
resp = requests.get(url, headers=headers, params=params)
210215
if resp.status_code == 200:
211216
return resp
@@ -216,9 +221,6 @@ def get_complete_response(url, headers=None, params=None):
216221

217222

218223
def make_head_request(url, headers=None):
219-
"""
220-
Check whether the URL status code is 200 or not.
221-
"""
222224
try:
223225
resp = requests.head(url, headers=headers)
224226

0 commit comments

Comments
 (0)