Skip to content

Conversation

@keshav-space
Copy link
Member

There are nearly 15 million package vulnerability relationships in VCIO, and loading them all at once during data migration consumes all the memory.

- Avoid loading all records at once in memory Signed-off-by: Keshav Priyadarshi <git@keshav.space>
@TG1999
Copy link
Contributor

TG1999 commented Oct 28, 2024

@keshav-space thanks for this, can we use our own paginate here?

@keshav-space
Copy link
Member Author

keshav-space commented Oct 28, 2024

@keshav-space thanks for this, can we use our own paginate here?

@TG1999 We cannot use .paginated() in data migrations for PackagerRelatedVulnerability, as this model was created without our custom queryset manager.

class PackageRelatedVulnerability(models.Model):
"""
Track the relationship between a Package and Vulnerability.
"""
# TODO: Fix related_name
package = models.ForeignKey(
Package,
on_delete=models.CASCADE,
)
vulnerability = models.ForeignKey(
Vulnerability,
on_delete=models.CASCADE,
)
created_by = models.CharField(
max_length=100,
blank=True,
help_text="Fully qualified name of the improver prefixed with the"
"module name responsible for creating this relation. Eg:"
"vulnerabilities.importers.nginx.NginxBasicImprover",
)
from vulnerabilities.improver import MAX_CONFIDENCE
confidence = models.PositiveIntegerField(
default=MAX_CONFIDENCE,
validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)],
help_text="Confidence score for this relation",
)
fix = models.BooleanField(
default=False,
db_index=True,
help_text="Does this relation fix the specified vulnerability ?",
)
class Meta:
unique_together = ["package", "vulnerability"]
verbose_name_plural = "PackageRelatedVulnerabilities"
indexes = [models.Index(fields=["fix"])]
ordering = ["package", "vulnerability"]
def __str__(self):
return f"{self.package.package_url} {self.vulnerability.vulnerability_id}"
def update_or_create(self, advisory):
"""
Update if supplied record has more confidence than existing record
Create if doesn't exist
"""
try:
existing = PackageRelatedVulnerability.objects.get(
vulnerability=self.vulnerability, package=self.package
)
if self.confidence > existing.confidence:
existing.created_by = self.created_by
existing.confidence = self.confidence
existing.fix = self.fix
existing.save()
# TODO: later we want these to be part of a log field in the DB
logger.info(
f"Confidence improved for {self.package} R {self.vulnerability}, "
f"new confidence: {self.confidence}"
)
self.add_package_vulnerability_changelog(advisory=advisory)
except self.DoesNotExist:
PackageRelatedVulnerability.objects.create(
vulnerability=self.vulnerability,
created_by=self.created_by,
package=self.package,
confidence=self.confidence,
fix=self.fix,
)
logger.info(
f"New relationship {self.package} R {self.vulnerability}, "
f"fix: {self.fix}, confidence: {self.confidence}"
)
self.add_package_vulnerability_changelog(advisory=advisory)
@transaction.atomic
def add_package_vulnerability_changelog(self, advisory):
from vulnerabilities.utils import get_importer_name
importer_name = get_importer_name(advisory)
if self.fix:
change_logger = PackageChangeLog.log_fixing
else:
change_logger = PackageChangeLog.log_affected_by
change_logger(
package=self.package,
importer=importer_name,
source_url=advisory.url or None,
related_vulnerability=str(self.vulnerability),
)

Also, afaik custom querysets are not directly available in data migrations. In this case it would be better to use the built-in queryset iterator.

@keshav-space
Copy link
Member Author

Merging this now!

@keshav-space keshav-space merged commit 38b97ff into main Oct 29, 2024
9 checks passed
@keshav-space keshav-space deleted the fix-migration branch October 29, 2024 11:34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

3 participants