|
15 | 15 | # specific language governing permissions and limitations under the License. |
16 | 16 |
|
17 | 17 | import dataclasses |
| 18 | +import logging |
| 19 | +import os |
18 | 20 | import re |
19 | 21 |
|
20 | 22 | import attr |
| 23 | +from bs4 import BeautifulSoup |
| 24 | +from univers import versions |
21 | 25 |
|
22 | 26 | from fetchcode import utils |
23 | 27 | from fetchcode.packagedcode_models import Package |
24 | 28 |
|
| 29 | +LOG_FILE_LOCATION = os.path.join(os.path.expanduser("~"), "purlcli.log") |
| 30 | +logger = logging.getLogger(__name__) |
| 31 | + |
25 | 32 |
|
26 | 33 | def package_from_dict(package_data): |
27 | 34 | """ |
@@ -723,3 +730,294 @@ def get_package_info(cls, gh_purl, package_name): |
723 | 730 | "date": "2002-08-19T04:23:00", |
724 | 731 | }, |
725 | 732 | } |
| 733 | + |
| 734 | + |
| 735 | +def get_cocoapods_org_url_status(purl, name, cocoapods_org_url): |
| 736 | + purl_to_cocoapods_org_url_status = {} |
| 737 | + cocoapods_org_url_head_request = utils.make_head_request(cocoapods_org_url) |
| 738 | + cocoapods_org_url_status_code = cocoapods_org_url_head_request.status_code |
| 739 | + |
| 740 | + if cocoapods_org_url_status_code == 404: |
| 741 | + logger.error(f"cocoapods_org_url not found for {name}") |
| 742 | + purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_not_found" |
| 743 | + return purl_to_cocoapods_org_url_status |
| 744 | + elif cocoapods_org_url_status_code == 503: |
| 745 | + logger.error(f"cocoapods_org_url temporarily unavailable for {name}") |
| 746 | + purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_temporarily_unavailable" |
| 747 | + return purl_to_cocoapods_org_url_status |
| 748 | + elif cocoapods_org_url_status_code == 302: |
| 749 | + redirect_url = cocoapods_org_url_head_request.headers['Location'] |
| 750 | + redirect_message = f"The cocoapods.org URL {cocoapods_org_url} redirects to {redirect_url}" |
| 751 | + logger.warning(redirect_message) |
| 752 | + print(redirect_message) |
| 753 | + |
| 754 | + gh_repo_namespace = None |
| 755 | + gh_repo_name = None |
| 756 | + if redirect_url.startswith("https://github.com/"): |
| 757 | + redirect_url_split = redirect_url.split("/") |
| 758 | + if len(redirect_url_split) < 3: |
| 759 | + return purl_to_cocoapods_org_url_status |
| 760 | + gh_repo_namespace = redirect_url_split[-2] |
| 761 | + gh_repo_name = redirect_url_split[-1] |
| 762 | + |
| 763 | + redirect_to_gh_response = utils.get_complete_response(redirect_url) |
| 764 | + if "Failed to fetch" in redirect_to_gh_response: |
| 765 | + logger.error(redirect_to_gh_response) |
| 766 | + print(redirect_to_gh_response) |
| 767 | + purl_to_cocoapods_org_url_status["return_message"] = "failed_to_fetch_github_redirect" |
| 768 | + return purl_to_cocoapods_org_url_status |
| 769 | + elif "not_found" in redirect_to_gh_response: |
| 770 | + redirect_to_gh_not_found = f"Redirect to GitHub not found: {redirect_url}" |
| 771 | + logger.error(redirect_to_gh_not_found) |
| 772 | + print(redirect_to_gh_not_found) |
| 773 | + purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_not_found" |
| 774 | + return purl_to_cocoapods_org_url_status |
| 775 | + |
| 776 | + soup = BeautifulSoup(redirect_to_gh_response.text, "html.parser") |
| 777 | + head = soup.find("head") |
| 778 | + og_url_tag_get_content = None |
| 779 | + corrected_name = None |
| 780 | + if head: |
| 781 | + og_url_tag = head.find("meta", property="og:url") |
| 782 | + if og_url_tag: |
| 783 | + og_url = og_url_tag.get("content") |
| 784 | + og_url_tag_get_content = og_url |
| 785 | + corrected_name = og_url_tag_get_content.split('/')[-1] |
| 786 | + else: |
| 787 | + no_meta_tag = f"'og:url' meta tag not found in redirect_to_gh_response page for {purl}" |
| 788 | + print(no_meta_tag) |
| 789 | + logger.error(no_meta_tag) |
| 790 | + purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_error" |
| 791 | + return purl_to_cocoapods_org_url_status |
| 792 | + else: |
| 793 | + no_head_section = f"\n<head> section not found in redirect_to_gh_response page for {purl}" |
| 794 | + print(no_head_section) |
| 795 | + logger.error(no_head_section) |
| 796 | + purl_to_cocoapods_org_url_status["return_message"] = "github_redirect_error" |
| 797 | + return purl_to_cocoapods_org_url_status |
| 798 | + |
| 799 | + cocoapods_org_version = None |
| 800 | + |
| 801 | + purl_to_cocoapods_org_url_status["corrected_name"] = corrected_name |
| 802 | + purl_to_cocoapods_org_url_status["cocoapods_org_pod_name"] = corrected_name |
| 803 | + purl_to_cocoapods_org_url_status["cocoapods_org_gh_repo_owner"] = gh_repo_namespace |
| 804 | + purl_to_cocoapods_org_url_status["cocoapods_org_gh_repo_name"] = gh_repo_name |
| 805 | + purl_to_cocoapods_org_url_status["cocoapods_org_version"] = cocoapods_org_version |
| 806 | + purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_redirects_to_github" |
| 807 | + return purl_to_cocoapods_org_url_status |
| 808 | + else: |
| 809 | + purl_to_cocoapods_org_url_status["return_message"] = "cocoapods_org_url_redirects" |
| 810 | + return purl_to_cocoapods_org_url_status |
| 811 | + |
| 812 | + else: |
| 813 | + purl_to_cocoapods_org_url_status["return_message"] = None |
| 814 | + return purl_to_cocoapods_org_url_status |
| 815 | + |
| 816 | + |
| 817 | +def get_pod_data_with_soup(purl, name, cocoapods_org_url): |
| 818 | + purl_to_pod_data_with_soup = {} |
| 819 | + cocoapods_org_response = utils.get_complete_response(cocoapods_org_url) |
| 820 | + if "Failed to fetch" in cocoapods_org_response: |
| 821 | + logger.error(cocoapods_org_response) |
| 822 | + print(cocoapods_org_response) |
| 823 | + return |
| 824 | + |
| 825 | + soup = BeautifulSoup(cocoapods_org_response.text, "html.parser") |
| 826 | + cocoapods_org_gh_repo_owner = None |
| 827 | + cocoapods_org_gh_repo_name = None |
| 828 | + cocoapods_org_gh_repo_url = None |
| 829 | + cocoapods_org_podspec_url = None |
| 830 | + cocoapods_org_pkg_home_url = None |
| 831 | + |
| 832 | + for sidebar_links in (soup.find_all('ul', class_ = "links" )): |
| 833 | + nested_links = sidebar_links.findChildren("a") |
| 834 | + for nested_link in nested_links: |
| 835 | + link_text = nested_link.text |
| 836 | + link_url = nested_link['href'] |
| 837 | + if link_text == 'Homepage': |
| 838 | + cocoapods_org_pkg_home_url = link_url |
| 839 | + elif link_text == 'GitHub Repo': |
| 840 | + split_link = link_url.split('/') |
| 841 | + cocoapods_org_gh_repo_owner = split_link[-2] |
| 842 | + cocoapods_org_gh_repo_name = split_link[-1] |
| 843 | + elif link_text == 'See Podspec': |
| 844 | + cocoapods_org_podspec_url = link_url |
| 845 | + |
| 846 | + if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name: |
| 847 | + cocoapods_org_gh_repo_url = f"https://github.com/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}" |
| 848 | + cocoapods_org_gh_repo_url_head_request = utils.make_head_request(cocoapods_org_gh_repo_url) |
| 849 | + cocoapods_org_gh_repo_url_status_code = cocoapods_org_gh_repo_url_head_request.status_code |
| 850 | + purl_to_pod_data_with_soup["cocoapods_org_gh_repo_url_status_code"] = cocoapods_org_gh_repo_url_status_code |
| 851 | + |
| 852 | + base_path = "https://api.github.com/repos" |
| 853 | + api_url = f"{base_path}/{cocoapods_org_gh_repo_owner}/{cocoapods_org_gh_repo_name}" |
| 854 | + github_rest_no_exception_response = utils.get_github_rest_no_exception(api_url) |
| 855 | + if "Failed to fetch" in github_rest_no_exception_response: |
| 856 | + logger.error(f"{github_rest_no_exception_response}") |
| 857 | + print(f"{github_rest_no_exception_response}") |
| 858 | + |
| 859 | + purl_to_pod_data_with_soup["cocoapods_org_gh_repo_owner"] = cocoapods_org_gh_repo_owner |
| 860 | + purl_to_pod_data_with_soup["cocoapods_org_gh_repo_name"] = cocoapods_org_gh_repo_name |
| 861 | + purl_to_pod_data_with_soup["cocoapods_org_gh_repo_url"] = cocoapods_org_gh_repo_url |
| 862 | + purl_to_pod_data_with_soup["cocoapods_org_podspec_url"] = cocoapods_org_podspec_url |
| 863 | + purl_to_pod_data_with_soup["cocoapods_org_pkg_home_url"] = cocoapods_org_pkg_home_url |
| 864 | + |
| 865 | + if cocoapods_org_gh_repo_owner is None or cocoapods_org_gh_repo_name is None: |
| 866 | + no_github_repo = f"No GitHub repo found on cocoapods.org for {name}" |
| 867 | + print(f"{no_github_repo}") |
| 868 | + logger.warning(no_github_repo) |
| 869 | + |
| 870 | + if cocoapods_org_podspec_url is None: |
| 871 | + no_podspec = f"No podspec found on cocoapods.org for {name}" |
| 872 | + print(f"{no_podspec}") |
| 873 | + logger.warning(no_podspec) |
| 874 | + purl_to_pod_data_with_soup["no_podspec"] = no_podspec |
| 875 | + |
| 876 | + cocoapods_org_version = None |
| 877 | + purl_to_pod_data_with_soup["cocoapods_org_version"] = cocoapods_org_version |
| 878 | + if cocoapods_org_podspec_url: |
| 879 | + cocoapods_org_version = cocoapods_org_podspec_url.split("/")[-2] |
| 880 | + |
| 881 | + cocoapods_org_pod_name = None |
| 882 | + head = soup.find("head") |
| 883 | + if head: |
| 884 | + og_title_tag = head.find("meta", property="og:title") |
| 885 | + if og_title_tag: |
| 886 | + og_title = og_title_tag.get("content") |
| 887 | + cocoapods_org_pod_name = og_title |
| 888 | + else: |
| 889 | + no_meta_tag = f"'og:title' meta tag not found in cocoapods.org page for {purl}" |
| 890 | + print(no_meta_tag) |
| 891 | + logger.error(no_meta_tag) |
| 892 | + else: |
| 893 | + no_head_section = f"\n<head> section not found in cocoapods.org page for {purl}" |
| 894 | + print(no_head_section) |
| 895 | + logger.error(no_head_section) |
| 896 | + |
| 897 | + purl_to_pod_data_with_soup["cocoapods_org_pod_name"] = cocoapods_org_pod_name |
| 898 | + input_name = name |
| 899 | + if input_name != cocoapods_org_pod_name: |
| 900 | + name_change = (f"Input PURL name '{input_name}' analyzed as '{cocoapods_org_pod_name}' per {cocoapods_org_url}") |
| 901 | + input_name = cocoapods_org_pod_name |
| 902 | + print(f"{name_change}") |
| 903 | + logger.warning(name_change) |
| 904 | + |
| 905 | + return purl_to_pod_data_with_soup |
| 906 | + |
| 907 | + |
| 908 | +def get_cocoapod_tags(spec, cocoapods_org_pod_name): |
| 909 | + try: |
| 910 | + response = utils.get_text_response(spec) |
| 911 | + data = response.strip() |
| 912 | + for line in data.splitlines(): |
| 913 | + line = line.strip() |
| 914 | + if line.startswith(cocoapods_org_pod_name): |
| 915 | + data_list = line.split("/") |
| 916 | + if data_list[0] == cocoapods_org_pod_name: |
| 917 | + data_list.pop(0) |
| 918 | + sorted_data_list = sorted( |
| 919 | + data_list, |
| 920 | + key=lambda x: versions.SemverVersion(x), |
| 921 | + reverse=True, |
| 922 | + ) |
| 923 | + return sorted_data_list |
| 924 | + return None |
| 925 | + except: |
| 926 | + print(f"Error retrieving cocoapods tag data from cdn.cocoapods.org") |
| 927 | + return None |
| 928 | + |
| 929 | + |
| 930 | +def construct_cocoapods_package( |
| 931 | + purl, |
| 932 | + name, |
| 933 | + hashed_path, |
| 934 | + repository_homepage_url, |
| 935 | + cocoapods_org_gh_repo_owner, |
| 936 | + cocoapods_org_gh_repo_name, |
| 937 | + tag, |
| 938 | + cocoapods_org_pod_name |
| 939 | +): |
| 940 | + name = name |
| 941 | + homepage_url = None |
| 942 | + vcs_url = None |
| 943 | + github_url = None |
| 944 | + bug_tracking_url = None |
| 945 | + code_view_url = None |
| 946 | + license_data = None |
| 947 | + declared_license = None |
| 948 | + primary_language = None |
| 949 | + |
| 950 | + if cocoapods_org_gh_repo_owner and cocoapods_org_gh_repo_name: |
| 951 | + name = cocoapods_org_gh_repo_name |
| 952 | + namespace = cocoapods_org_gh_repo_owner |
| 953 | + base_path = "https://api.github.com/repos" |
| 954 | + api_url = f"{base_path}/{namespace}/{name}" |
| 955 | + gh_repo_api_response = utils.get_github_rest_no_exception(api_url) |
| 956 | + |
| 957 | + if "Failed to fetch" not in gh_repo_api_response: |
| 958 | + homepage_url = gh_repo_api_response.get("homepage") |
| 959 | + vcs_url = gh_repo_api_response.get("git_url") |
| 960 | + license_data = gh_repo_api_response.get("license") or {} |
| 961 | + declared_license = license_data.get("spdx_id") |
| 962 | + primary_language = gh_repo_api_response.get("language") |
| 963 | + |
| 964 | + github_url = "https://github.com" |
| 965 | + bug_tracking_url = f"{github_url}/{namespace}/{name}/issues" |
| 966 | + code_view_url = f"{github_url}/{namespace}/{name}" |
| 967 | + |
| 968 | + corrected_name = cocoapods_org_pod_name |
| 969 | + podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{corrected_name}/{tag}/{corrected_name}.podspec.json" |
| 970 | + podspec_api_response = utils.get_json_response(podspec_api_url) |
| 971 | + |
| 972 | + if "Failed to fetch" in podspec_api_response: |
| 973 | + logger.error(f"{podspec_api_response}") |
| 974 | + print(f"{podspec_api_response}") |
| 975 | + return |
| 976 | + |
| 977 | + homepage_url = podspec_api_response.get("homepage") |
| 978 | + |
| 979 | + lic = podspec_api_response.get("license") |
| 980 | + extracted_license_statement = None |
| 981 | + if isinstance(lic, dict): |
| 982 | + extracted_license_statement = lic |
| 983 | + else: |
| 984 | + extracted_license_statement = lic |
| 985 | + if not declared_license: |
| 986 | + declared_license = extracted_license_statement |
| 987 | + |
| 988 | + source = podspec_api_response.get("source") |
| 989 | + vcs_url = None |
| 990 | + download_url = None |
| 991 | + if isinstance(source, dict): |
| 992 | + git_url = source.get("git", "") |
| 993 | + http_url = source.get("http", "") |
| 994 | + if http_url: |
| 995 | + download_url = http_url |
| 996 | + if git_url and not http_url: |
| 997 | + if git_url.endswith(".git") and "github" in git_url: |
| 998 | + gh_path = git_url[:-4] |
| 999 | + corrected_tag = tag |
| 1000 | + if source.get("tag") and source.get("tag").startswith("v"): |
| 1001 | + corrected_tag = source.get("tag") |
| 1002 | + download_url = f"{gh_path}/archive/refs/tags/{corrected_tag}.tar.gz" |
| 1003 | + vcs_url = git_url |
| 1004 | + elif git_url: |
| 1005 | + vcs_url = git_url |
| 1006 | + elif isinstance(source, str): |
| 1007 | + if not vcs_url: |
| 1008 | + vcs_url = source |
| 1009 | + |
| 1010 | + purl_pkg = Package( |
| 1011 | + homepage_url=homepage_url, |
| 1012 | + api_url=podspec_api_url, |
| 1013 | + bug_tracking_url=bug_tracking_url, |
| 1014 | + code_view_url=code_view_url, |
| 1015 | + download_url=download_url, |
| 1016 | + declared_license=declared_license, |
| 1017 | + primary_language=primary_language, |
| 1018 | + repository_homepage_url=repository_homepage_url, |
| 1019 | + vcs_url=vcs_url, |
| 1020 | + **purl.to_dict(), |
| 1021 | + ) |
| 1022 | + purl_pkg.version = tag |
| 1023 | + return purl_pkg |
0 commit comments