use crate::models::{Advisory, Enrichment};
use once_cell::sync::Lazy;
use regex_lite::Regex;
use std::collections::{HashMap, HashSet};
static CVE_REGEX: Lazy<Result<Regex, regex_lite::Error>> =
Lazy::new(|| Regex::new(r"(?i)(CVE-\d{4}-\d{4,})"));
pub struct ReportAggregator;
impl ReportAggregator {
fn extract_cve_ids(advisory: &Advisory) -> HashSet<String> {
let mut cves = HashSet::new();
if let Some(cve) = Self::extract_cve_from_string(&advisory.id) {
cves.insert(cve);
}
if let Some(aliases) = &advisory.aliases {
for alias in aliases {
if let Some(cve) = Self::extract_cve_from_string(alias) {
cves.insert(cve);
}
}
}
for reference in &advisory.references {
if let Some(cve) = Self::extract_cve_from_string(&reference.url) {
cves.insert(cve);
}
}
cves
}
pub fn extract_cve_from_string(text: &str) -> Option<String> {
if let Ok(regex) = &*CVE_REGEX {
if let Some(caps) = regex.captures(text) {
return Some(caps[1].to_uppercase());
}
}
None
}
pub fn aggregate(advisories: Vec<Advisory>) -> Vec<Advisory> {
let mut deduplicated: HashMap<String, Advisory> = HashMap::new();
let mut alias_map: HashMap<String, String> = HashMap::new();
let mut cve_map: HashMap<String, String> = HashMap::new();
for advisory in &advisories {
if let Some(aliases) = &advisory.aliases {
for alias in aliases {
alias_map.insert(alias.clone(), advisory.id.clone());
}
}
let cves = Self::extract_cve_ids(advisory);
for cve in cves {
cve_map.entry(cve).or_insert_with(|| advisory.id.clone());
}
}
for advisory in advisories {
let mut canonical_id = alias_map
.get(&advisory.id)
.cloned()
.unwrap_or_else(|| advisory.id.clone());
let advisory_cves = Self::extract_cve_ids(&advisory);
for cve in advisory_cves {
if let Some(mapped_id) = cve_map.get(&cve) {
if mapped_id != &canonical_id {
canonical_id = mapped_id.clone();
break;
}
}
}
match deduplicated.get_mut(&canonical_id) {
Some(existing) => {
Self::merge(existing, advisory);
}
None => {
let mut new_entry = advisory;
new_entry.id = canonical_id.clone();
deduplicated.insert(canonical_id, new_entry);
}
}
}
deduplicated.into_values().collect()
}
fn merge(target: &mut Advisory, source: Advisory) {
let mut aliases = target.aliases.clone().unwrap_or_default();
if let Some(source_aliases) = source.aliases {
aliases.extend(source_aliases);
}
if source.id != target.id {
aliases.push(source.id.clone());
}
let unique_aliases: HashSet<_> = aliases.into_iter().collect();
target.aliases = Some(unique_aliases.into_iter().collect());
let mut refs = target.references.clone();
refs.extend(source.references);
let mut unique_refs = Vec::new();
let mut seen_urls = HashSet::new();
for r in refs {
if seen_urls.insert(r.url.clone()) {
unique_refs.push(r);
}
}
target.references = unique_refs;
target.affected.extend(source.affected);
if let Some(d) = &source.details {
if target.details.is_none() || d.len() > target.details.as_ref().unwrap().len() {
target.details = Some(d.clone());
}
}
if let Some(s) = &source.summary {
if target.summary.is_none() || s.len() > target.summary.as_ref().unwrap().len() {
target.summary = Some(s.clone());
}
}
Self::merge_enrichment(target, source.enrichment);
}
fn merge_enrichment(target: &mut Advisory, source_enrichment: Option<Enrichment>) {
let Some(source) = source_enrichment else {
return;
};
let enrichment = target.enrichment.get_or_insert_with(Enrichment::default);
if let Some(score) = source.epss_score {
if enrichment.epss_score.map(|s| score > s).unwrap_or(true) {
enrichment.epss_score = Some(score);
enrichment.epss_percentile = source.epss_percentile;
enrichment.epss_date = source.epss_date;
}
}
enrichment.is_kev = enrichment.is_kev || source.is_kev;
if source.kev_due_date.is_some() {
enrichment.kev_due_date = source.kev_due_date;
}
if source.kev_date_added.is_some() {
enrichment.kev_date_added = source.kev_date_added;
}
if source.kev_ransomware.is_some() {
enrichment.kev_ransomware = source.kev_ransomware;
}
if let Some(score) = source.cvss_v3_score {
if enrichment.cvss_v3_score.map(|s| score > s).unwrap_or(true) {
enrichment.cvss_v3_score = Some(score);
enrichment.cvss_v3_severity = source.cvss_v3_severity;
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::{Affected, Package, Reference, ReferenceType};
use chrono::Utc;
fn create_advisory(
id: &str,
summary: Option<&str>,
aliases: Option<Vec<&str>>,
references: Option<Vec<&str>>,
) -> Advisory {
Advisory {
id: id.to_string(),
summary: summary.map(|s| s.to_string()),
details: None,
affected: vec![Affected {
package: Package {
ecosystem: "pypi".to_string(),
name: "test-package".to_string(),
purl: None,
},
ranges: vec![],
versions: vec![],
ecosystem_specific: None,
database_specific: None,
}],
references: references
.unwrap_or_default()
.into_iter()
.map(|url| Reference {
reference_type: ReferenceType::Web,
url: url.to_string(),
})
.collect(),
published: Some(Utc::now()),
modified: Some(Utc::now()),
aliases: aliases.map(|a| a.into_iter().map(|s| s.to_string()).collect()),
database_specific: None,
enrichment: None,
}
}
#[test]
fn test_extract_cve_from_id() {
let cve = ReportAggregator::extract_cve_from_string("CVE-2023-12345");
assert_eq!(cve, Some("CVE-2023-12345".to_string()));
}
#[test]
fn test_extract_cve_case_insensitive() {
let cve = ReportAggregator::extract_cve_from_string("cve-2023-12345");
assert_eq!(cve, Some("CVE-2023-12345".to_string()));
}
#[test]
fn test_extract_cve_from_url() {
let url = "https://nvd.nist.gov/vuln/detail/CVE-2023-12345";
let cve = ReportAggregator::extract_cve_from_string(url);
assert_eq!(cve, Some("CVE-2023-12345".to_string()));
}
#[test]
fn test_extract_cve_from_text() {
let text = "This affects CVE-2024-99999 in the codebase";
let cve = ReportAggregator::extract_cve_from_string(text);
assert_eq!(cve, Some("CVE-2024-99999".to_string()));
}
#[test]
fn test_extract_cve_not_found() {
let cve = ReportAggregator::extract_cve_from_string("GHSA-1234-5678-90ab");
assert_eq!(cve, None);
}
#[test]
fn test_extract_cve_ids_from_advisory() {
let advisory = create_advisory(
"CVE-2023-12345",
Some("Test vulnerability"),
Some(vec!["CVE-2023-54321"]),
Some(vec!["https://nvd.nist.gov/vuln/detail/CVE-2023-99999"]),
);
let cves = ReportAggregator::extract_cve_ids(&advisory);
assert_eq!(cves.len(), 3);
assert!(cves.contains("CVE-2023-12345"));
assert!(cves.contains("CVE-2023-54321"));
assert!(cves.contains("CVE-2023-99999"));
}
#[test]
fn test_no_duplication_same_id() {
let advisory = create_advisory("CVE-2023-12345", Some("Test"), None, None);
let advisories = vec![advisory.clone(), advisory];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].id, "CVE-2023-12345");
}
#[test]
fn test_merge_with_aliases() {
let ghsa_advisory = create_advisory(
"GHSA-1234-5678-90ab",
Some("GHSA Description"),
Some(vec!["CVE-2023-12345"]),
None,
);
let nvd_advisory = create_advisory(
"CVE-2023-12345",
Some("NVD Description"),
None,
Some(vec!["https://nvd.nist.gov/vuln/detail/CVE-2023-12345"]),
);
let advisories = vec![ghsa_advisory, nvd_advisory];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].id, "GHSA-1234-5678-90ab");
let aliases = result[0].aliases.as_ref().unwrap();
assert!(aliases.contains(&"CVE-2023-12345".to_string()));
}
#[test]
fn test_merge_cross_source_cves() {
let ghsa_advisory = create_advisory(
"GHSA-xxxx-xxxx-xxxx",
Some("GHSA Report"),
Some(vec!["CVE-2023-11111"]),
None,
);
let nvd_advisory = create_advisory("CVE-2023-11111", Some("NVD Report"), None, None);
let osv_advisory = create_advisory(
"OSV-2023-1234",
Some("OSV Report"),
Some(vec!["CVE-2023-11111"]),
None,
);
let advisories = vec![ghsa_advisory, nvd_advisory, osv_advisory];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(
result.len(),
1,
"Expected all three to be merged into one advisory"
);
let aliases = result[0].aliases.as_ref().unwrap();
assert!(
aliases.contains(&"CVE-2023-11111".to_string()) || result[0].id == "CVE-2023-11111"
);
}
#[test]
fn test_merge_references() {
let advisory1 = Advisory {
id: "CVE-2023-12345".to_string(),
summary: None,
details: None,
affected: vec![],
references: vec![Reference {
reference_type: ReferenceType::Web,
url: "https://example.com/1".to_string(),
}],
published: None,
modified: None,
aliases: None,
database_specific: None,
enrichment: None,
};
let advisory2 = Advisory {
id: "CVE-2023-12345".to_string(),
summary: None,
details: None,
affected: vec![],
references: vec![Reference {
reference_type: ReferenceType::Web,
url: "https://example.com/2".to_string(),
}],
published: None,
modified: None,
aliases: None,
database_specific: None,
enrichment: None,
};
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].references.len(), 2);
}
#[test]
fn test_merge_details_prefer_longer() {
let advisory1 = create_advisory("CVE-2023-12345", None, None, None);
let mut advisory2 = create_advisory("CVE-2023-12345", None, None, None);
advisory2.details =
Some("This is a very detailed description of the vulnerability".to_string());
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(
result[0].details,
Some("This is a very detailed description of the vulnerability".to_string())
);
}
#[test]
fn test_merge_enrichment_data() {
let mut advisory1 = create_advisory("CVE-2023-12345", None, None, None);
advisory1.enrichment = Some(Enrichment {
epss_score: Some(0.5),
epss_percentile: Some(0.6),
epss_date: None,
is_kev: false,
kev_due_date: None,
kev_date_added: None,
kev_ransomware: None,
cvss_v3_score: Some(7.5),
cvss_v3_severity: None,
});
let mut advisory2 = create_advisory("CVE-2023-12345", None, None, None);
advisory2.enrichment = Some(Enrichment {
epss_score: Some(0.7), epss_percentile: Some(0.8),
epss_date: None,
is_kev: true,
kev_due_date: None,
kev_date_added: None,
kev_ransomware: None,
cvss_v3_score: Some(6.0), cvss_v3_severity: None,
});
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
let enrichment = result[0].enrichment.as_ref().unwrap();
assert_eq!(enrichment.epss_score, Some(0.7));
assert!(enrichment.is_kev);
assert_eq!(enrichment.cvss_v3_score, Some(7.5));
}
#[test]
fn test_different_cves_no_merge() {
let advisory1 = create_advisory("CVE-2023-11111", Some("Vuln 1"), None, None);
let advisory2 = create_advisory("CVE-2023-22222", Some("Vuln 2"), None, None);
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 2);
}
#[test]
fn test_complex_cross_source_scenario() {
let ghsa = create_advisory(
"GHSA-1234-5678-90ab",
Some("Improper validation in library X"),
Some(vec!["CVE-2024-1234"]),
Some(vec!["https://github.com/advisories/GHSA-1234-5678-90ab"]),
);
let nvd = create_advisory(
"CVE-2024-1234",
Some("Library X improper validation vulnerability"),
None,
Some(vec!["https://nvd.nist.gov/vuln/detail/CVE-2024-1234"]),
);
let osv = create_advisory(
"OSV-2024-5678",
Some("A validation flaw in X"),
Some(vec!["CVE-2024-1234", "GHSA-1234-5678-90ab"]),
None,
);
let advisories = vec![ghsa, nvd, osv];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(
result.len(),
1,
"Expected all sources to be merged into one"
);
let merged = &result[0];
let aliases = merged.aliases.as_ref().unwrap();
assert!(aliases.len() >= 2, "Expected multiple aliases");
}
#[test]
fn test_cve_extraction_with_extended_numbers() {
let cve = ReportAggregator::extract_cve_from_string("CVE-2024-123456789");
assert_eq!(cve, Some("CVE-2024-123456789".to_string()));
}
#[test]
fn test_empty_advisory_list() {
let advisories = vec![];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 0);
}
#[test]
fn test_single_advisory() {
let advisory = create_advisory("CVE-2023-12345", Some("Test"), None, None);
let advisories = vec![advisory];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].id, "CVE-2023-12345");
}
#[test]
fn test_affected_packages_merged() {
let advisory1 = create_advisory("CVE-2023-12345", None, None, None);
let mut advisory2 = create_advisory("CVE-2023-12345", None, None, None);
advisory2.affected.push(Affected {
package: Package {
ecosystem: "npm".to_string(),
name: "another-package".to_string(),
purl: None,
},
ranges: vec![],
versions: vec![],
ecosystem_specific: None,
database_specific: None,
});
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].affected.len(), 3);
}
#[test]
fn test_no_duplicate_references() {
let advisory1 = Advisory {
id: "CVE-2023-12345".to_string(),
summary: None,
details: None,
affected: vec![],
references: vec![Reference {
reference_type: ReferenceType::Web,
url: "https://example.com/advisory".to_string(),
}],
published: None,
modified: None,
aliases: None,
database_specific: None,
enrichment: None,
};
let advisory2 = Advisory {
id: "CVE-2023-12345".to_string(),
summary: None,
details: None,
affected: vec![],
references: vec![Reference {
reference_type: ReferenceType::Web,
url: "https://example.com/advisory".to_string(), }],
published: None,
modified: None,
aliases: None,
database_specific: None,
enrichment: None,
};
let advisories = vec![advisory1, advisory2];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 1);
assert_eq!(result[0].references.len(), 1);
}
#[test]
fn test_aiohttp_real_world_scenario() {
let vuln1 = create_advisory(
"CVE-PYSEC-2023-251",
Some("HTTP request smuggling via HTTP method"),
None,
None,
);
let vuln2 = create_advisory(
"CVE-PYSEC-2023-120",
Some("Request smuggling via llhttp"),
None,
None,
);
let vuln3 = create_advisory(
"CVE-PYSEC-2021-76",
Some("Open redirect vulnerability"),
None,
None,
);
let advisories = vec![vuln1, vuln2, vuln3];
let result = ReportAggregator::aggregate(advisories);
assert_eq!(result.len(), 3);
}
}