Skip to content
Merged
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.8]
python-version: [3.9]

steps:
- name: Checkout code
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- name: Checkout code
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Release notes
=============

Version (next)
-------------------

- Add Pipeline to flag ghost packages (#1533)
- Add logging configuration (#1533)
- Drop support for python 3.8 (#1533)


Version v34.0.0
-------------------

Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
aboutcode.pipeline==0.1.0
aiosignal==1.2.0
alabaster==0.7.12
asgiref==3.5.2
Expand All @@ -10,6 +11,7 @@ bcrypt==3.2.0
beautifulsoup4==4.10.0
binaryornot==0.4.4
black==22.3.0
bleach==6.1.0
boolean.py==3.8
certifi==2024.7.4
cffi==1.15.0
Expand Down Expand Up @@ -49,6 +51,7 @@ jsonschema==3.2.0
license-expression==21.6.14
lxml==4.9.1
Markdown==3.3.4
markdown-it-py==3.0.0
MarkupSafe==2.1.1
matplotlib-inline==0.1.3
multidict==6.0.2
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ license_files =
README.rst

[options]
python_requires = >=3.8
python_requires = >=3.9

packages=find:
include_package_data = true
Expand Down Expand Up @@ -92,6 +92,9 @@ install_requires =
requests>=2.25.1
fetchcode>=0.3.0

#pipeline
aboutcode.pipeline>=0.1.0

#vulntotal
python-dotenv
texttable
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_kev
from vulnerabilities.improvers import vulnerability_status
from vulnerabilities.pipelines import flag_ghost_packages

IMPROVERS_REGISTRY = [
valid_versions.GitHubBasicImprover,
Expand All @@ -29,6 +30,7 @@
valid_versions.GithubOSVImprover,
vulnerability_status.VulnerabilityStatusImprover,
vulnerability_kev.VulnerabilityKevImprover,
flag_ghost_packages.FlagGhostPackagePipeline,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
8 changes: 8 additions & 0 deletions vulnerabilities/management/commands/improve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from vulnerabilities.improve_runner import ImproveRunner
from vulnerabilities.improvers import IMPROVERS_REGISTRY
from vulnerabilities.pipelines import VulnerableCodePipeline


class Command(BaseCommand):
Expand Down Expand Up @@ -56,6 +57,13 @@ def improve_data(self, improvers):

for improver in improvers:
self.stdout.write(f"Improving data using {improver.qualified_name}")
if issubclass(improver, VulnerableCodePipeline):
status, error = improver().execute()
if status != 0:
self.stdout.write(error)
failed_improvers.append(improver.qualified_name)
continue

try:
ImproveRunner(improver_class=improver).run()
self.stdout.write(
Expand Down
21 changes: 21 additions & 0 deletions vulnerabilities/migrations/0062_package_is_ghost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 4.1.13 on 2024-08-23 12:47

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0061_alter_packagechangelog_software_version_and_more"),
]

operations = [
migrations.AddField(
model_name="package",
name="is_ghost",
field=models.BooleanField(
default=False,
help_text="True if the package does not exist in the upstream package manager or its repository.",
),
),
]
5 changes: 5 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,11 @@ class Package(PackageURLMixin):
db_index=True,
)

is_ghost = models.BooleanField(
default=False,
help_text="True if the package does not exist in the upstream package manager or its repository.",
)

objects = PackageQuerySet.as_manager()

def save(self, *args, **kwargs):
Expand Down
34 changes: 34 additions & 0 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
from datetime import datetime
from datetime import timezone

from aboutcode.pipeline import BasePipeline

from vulnerabilities.utils import classproperty

module_logger = logging.getLogger(__name__)


class VulnerableCodePipeline(BasePipeline):
def log(self, message, level=logging.INFO):
"""Log the given `message` to the current module logger and execution_log."""
now_local = datetime.now(timezone.utc).astimezone()
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
message = f"{timestamp} {message}"
module_logger.log(level, message)
self.append_to_log(message)

@classproperty
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the pipeline used in logging.
"""
return f"{cls.__module__}.{cls.__qualname__}"
102 changes: 102 additions & 0 deletions vulnerabilities/pipelines/flag_ghost_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
from itertools import groupby
from traceback import format_exc as traceback_format_exc

from aboutcode.pipeline import LoopProgress
from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
from fetchcode.package_versions import versions
from packageurl import PackageURL

from vulnerabilities.models import Package
from vulnerabilities.pipelines import VulnerableCodePipeline


class FlagGhostPackagePipeline(VulnerableCodePipeline):
"""Detect and flag packages that do not exist upstream."""

@classmethod
def steps(cls):
return (cls.flag_ghost_packages,)

def flag_ghost_packages(self):
detect_and_flag_ghost_packages(logger=self.log)


def detect_and_flag_ghost_packages(logger=None):
"""Check if packages are available upstream. If not, mark them as ghost package."""
interesting_packages_qs = (
Package.objects.order_by("type", "namespace", "name")
.filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS)
.filter(qualifiers="")
.filter(subpath="")
)

distinct_packages_count = (
interesting_packages_qs.values("type", "namespace", "name")
.distinct("type", "namespace", "name")
.count()
)

grouped_packages = groupby(
interesting_packages_qs.paginated(),
key=lambda pkg: (pkg.type, pkg.namespace, pkg.name),
)

ghost_package_count = 0
progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger)
for type_namespace_name, packages in progress.iter(grouped_packages):
ghost_package_count += flag_ghost_packages(
base_purl=PackageURL(*type_namespace_name),
packages=packages,
logger=logger,
)

if logger:
logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages")


def flag_ghost_packages(base_purl, packages, logger=None):
"""
Check if `packages` are available upstream.
If not, update `is_ghost` to `True`.
Return the number of packages flagged as ghost.
"""
known_versions = get_versions(purl=base_purl, logger=logger)
# Skip if encounter error while fetching known versions
if known_versions is None:
return 0

ghost_packages = 0
for pkg in packages:
pkg.is_ghost = False
if pkg.version.lstrip("vV") not in known_versions:
pkg.is_ghost = True
ghost_packages += 1

if logger:
logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG)
pkg.save()

return ghost_packages


def get_versions(purl, logger=None):
"""Return set of known versions for the given purl."""
try:
return {v.value.lstrip("vV") for v in versions(str(purl))}
except Exception as e:
if logger:
logger(
f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return
15 changes: 15 additions & 0 deletions vulnerabilities/templates/package_details.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,21 @@
{{ fixed_package_details.purl.to_string }}
</td>
</tr>
{% if package.is_ghost %}
<tr>
<td class="two-col-left">
Tags
</td>
<td class="two-col-right">
<span
class="tag is-warning is-hoverablem has-tooltip-multiline has-tooltip-black"
data-tooltip="This package does not exist in the upstream package manager or its repository."
style="margin-right: 8px;">
Ghost
</span>
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
Expand Down
20 changes: 20 additions & 0 deletions vulnerabilities/tests/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import io


class TestLogger:
buffer = io.StringIO()

def write(self, msg, level=None):
self.buffer.write(msg)

def getvalue(self):
return self.buffer.getvalue()
71 changes: 71 additions & 0 deletions vulnerabilities/tests/pipelines/test_flag_ghost_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#


from pathlib import Path
from unittest import mock

from django.test import TestCase
from fetchcode.package_versions import PackageVersion
from packageurl import PackageURL

from vulnerabilities.models import Package
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.tests.pipelines import TestLogger


class FlagGhostPackagePipelineTest(TestCase):
data = Path(__file__).parent.parent / "test_data"

@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
def test_flag_ghost_package(self, mock_fetchcode_versions):
Package.objects.create(type="pypi", name="foo", version="2.3.0")
Package.objects.create(type="pypi", name="foo", version="3.0.0")

mock_fetchcode_versions.return_value = [
PackageVersion(value="2.3.0"),
]
interesting_packages_qs = Package.objects.all()
base_purl = PackageURL(type="pypi", name="foo")

self.assertEqual(0, Package.objects.filter(is_ghost=True).count())

flagged_package_count = flag_ghost_packages.flag_ghost_packages(
base_purl=base_purl,
packages=interesting_packages_qs,
)
self.assertEqual(1, flagged_package_count)
self.assertEqual(1, Package.objects.filter(is_ghost=True).count())

@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions):
Package.objects.create(type="pypi", name="foo", version="2.3.0")
Package.objects.create(type="pypi", name="foo", version="3.0.0")
Package.objects.create(
type="deb",
namespace="debian",
name="foo",
version="3.0.0",
qualifiers={"distro": "trixie"},
)

mock_fetchcode_versions.return_value = [
PackageVersion(value="2.3.0"),
]

self.assertEqual(3, Package.objects.count())
self.assertEqual(0, Package.objects.filter(is_ghost=True).count())

logger = TestLogger()

flag_ghost_packages.detect_and_flag_ghost_packages(logger=logger.write)
expected = "Successfully flagged 1 ghost Packages"

self.assertIn(expected, logger.getvalue())
self.assertEqual(1, Package.objects.filter(is_ghost=True).count())
Loading