#![doc = include_str!("../readme.md")]
use sbom_model::{Component, ComponentId, Sbom};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet, HashSet};
pub mod renderer;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Diff {
pub added: Vec<Component>,
pub removed: Vec<Component>,
pub changed: Vec<ComponentChange>,
pub edge_diffs: Vec<EdgeDiff>,
pub metadata_changed: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComponentChange {
pub id: ComponentId,
pub old: Component,
pub new: Component,
pub changes: Vec<FieldChange>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeDiff {
pub parent: ComponentId,
pub added: BTreeSet<ComponentId>,
pub removed: BTreeSet<ComponentId>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum FieldChange {
Version(String, String),
License(BTreeSet<String>, BTreeSet<String>),
Supplier(Option<String>, Option<String>),
Purl(Option<String>, Option<String>),
Hashes,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Field {
Version,
License,
Supplier,
Purl,
Hashes,
Deps,
}
pub struct Differ;
impl Differ {
pub fn diff(old: &Sbom, new: &Sbom, only: Option<&[Field]>) -> Diff {
let mut old = old.clone();
let mut new = new.clone();
old.normalize();
new.normalize();
let mut added = Vec::new();
let mut removed = Vec::new();
let mut changed = Vec::new();
let mut processed_old = HashSet::new();
let mut processed_new = HashSet::new();
let mut id_mapping: BTreeMap<ComponentId, ComponentId> = BTreeMap::new();
for (id, new_comp) in &new.components {
if let Some(old_comp) = old.components.get(id) {
processed_old.insert(id.clone());
processed_new.insert(id.clone());
id_mapping.insert(id.clone(), id.clone());
if let Some(change) = Self::compute_change(old_comp, new_comp, only) {
changed.push(change);
}
}
}
let mut old_identity_map: BTreeMap<(Option<String>, String), Vec<ComponentId>> =
BTreeMap::new();
for (id, comp) in &old.components {
if !processed_old.contains(id) {
let identity = (comp.ecosystem.clone(), comp.name.clone());
old_identity_map
.entry(identity)
.or_default()
.push(id.clone());
}
}
for (id, new_comp) in &new.components {
if processed_new.contains(id) {
continue;
}
let identity = (new_comp.ecosystem.clone(), new_comp.name.clone());
let matched_old_id = old_identity_map
.get_mut(&identity)
.and_then(|ids| ids.pop())
.or_else(|| {
if new_comp.ecosystem.is_some() {
old_identity_map
.get_mut(&(None, new_comp.name.clone()))
.and_then(|ids| ids.pop())
} else {
old_identity_map
.iter_mut()
.find(|((_, name), ids)| name == &new_comp.name && !ids.is_empty())
.and_then(|(_, ids)| ids.pop())
}
});
if let Some(old_id) = matched_old_id {
if let Some(old_comp) = old.components.get(&old_id) {
processed_old.insert(old_id.clone());
processed_new.insert(id.clone());
id_mapping.insert(old_id.clone(), id.clone());
if let Some(change) = Self::compute_change(old_comp, new_comp, only) {
changed.push(change);
}
continue;
}
}
added.push(new_comp.clone());
processed_new.insert(id.clone());
}
for (id, old_comp) in &old.components {
if !processed_old.contains(id) {
removed.push(old_comp.clone());
}
}
let should_include_deps = only.is_none_or(|fields| fields.contains(&Field::Deps));
let edge_diffs = if should_include_deps {
Self::compute_edge_diffs(&old, &new, &id_mapping)
} else {
Vec::new()
};
Diff {
added,
removed,
changed,
edge_diffs,
metadata_changed: old.metadata != new.metadata,
}
}
fn compute_edge_diffs(
old: &Sbom,
new: &Sbom,
id_mapping: &BTreeMap<ComponentId, ComponentId>,
) -> Vec<EdgeDiff> {
let mut edge_diffs = Vec::new();
let translate_id = |old_id: &ComponentId| -> ComponentId {
id_mapping
.get(old_id)
.cloned()
.unwrap_or_else(|| old_id.clone())
};
let mut all_parents: BTreeSet<ComponentId> = new.dependencies.keys().cloned().collect();
for old_parent in old.dependencies.keys() {
all_parents.insert(translate_id(old_parent));
}
for parent_id in all_parents {
let new_children: BTreeSet<ComponentId> = new
.dependencies
.get(&parent_id)
.cloned()
.unwrap_or_default();
let old_parent_id = id_mapping
.iter()
.find(|(_, new_id)| *new_id == &parent_id)
.map(|(old_id, _)| old_id.clone())
.unwrap_or_else(|| parent_id.clone());
let old_children: BTreeSet<ComponentId> = old
.dependencies
.get(&old_parent_id)
.map(|children| children.iter().map(&translate_id).collect())
.unwrap_or_default();
let added: BTreeSet<ComponentId> =
new_children.difference(&old_children).cloned().collect();
let removed: BTreeSet<ComponentId> =
old_children.difference(&new_children).cloned().collect();
if !added.is_empty() || !removed.is_empty() {
edge_diffs.push(EdgeDiff {
parent: parent_id,
added,
removed,
});
}
}
edge_diffs
}
fn compute_change(
old: &Component,
new: &Component,
only: Option<&[Field]>,
) -> Option<ComponentChange> {
let mut changes = Vec::new();
let should_include = |f: Field| only.is_none_or(|fields| fields.contains(&f));
if should_include(Field::Version) && old.version != new.version {
changes.push(FieldChange::Version(
old.version.clone().unwrap_or_default(),
new.version.clone().unwrap_or_default(),
));
}
if should_include(Field::License) && old.licenses != new.licenses {
changes.push(FieldChange::License(
old.licenses.clone(),
new.licenses.clone(),
));
}
if should_include(Field::Supplier) && old.supplier != new.supplier {
changes.push(FieldChange::Supplier(
old.supplier.clone(),
new.supplier.clone(),
));
}
if should_include(Field::Purl) && old.purl != new.purl {
changes.push(FieldChange::Purl(old.purl.clone(), new.purl.clone()));
}
if should_include(Field::Hashes) && old.hashes != new.hashes {
changes.push(FieldChange::Hashes);
}
if changes.is_empty() {
None
} else {
Some(ComponentChange {
id: new.id.clone(),
old: old.clone(),
new: new.clone(),
changes,
})
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_diff_added_removed() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
let c2 = Component::new("pkg-b".to_string(), Some("1.0".to_string()));
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 1);
assert_eq!(diff.removed.len(), 1);
assert_eq!(diff.changed.len(), 0);
}
#[test]
fn test_diff_changed() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
let mut c2 = c1.clone();
c2.version = Some("1.1".to_string());
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 0);
assert_eq!(diff.removed.len(), 0);
assert_eq!(diff.changed.len(), 1);
assert!(matches!(
diff.changed[0].changes[0],
FieldChange::Version(_, _)
));
}
#[test]
fn test_diff_identity_reconciliation() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
let c2 = Component::new("pkg-a".to_string(), Some("1.1".to_string()));
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.changed.len(), 1);
assert_eq!(diff.added.len(), 0);
}
#[test]
fn test_diff_license_change() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
c1.licenses.insert("MIT".into());
let mut c2 = c1.clone();
c2.licenses = BTreeSet::from(["Apache-2.0".into()]);
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.changed.len(), 1);
assert!(diff.changed[0]
.changes
.iter()
.any(|c| matches!(c, FieldChange::License(_, _))));
}
#[test]
fn test_diff_supplier_change() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
c1.supplier = Some("Acme Corp".into());
let mut c2 = c1.clone();
c2.supplier = Some("New Corp".into());
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.changed.len(), 1);
assert!(diff.changed[0]
.changes
.iter()
.any(|c| matches!(c, FieldChange::Supplier(_, _))));
}
#[test]
fn test_diff_hashes_change() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
c1.hashes.insert("sha256".into(), "aaa".into());
let mut c2 = c1.clone();
c2.hashes.insert("sha256".into(), "bbb".into());
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.changed.len(), 1);
assert!(diff.changed[0]
.changes
.iter()
.any(|c| matches!(c, FieldChange::Hashes)));
}
#[test]
fn test_diff_multiple_field_changes() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
c1.licenses.insert("MIT".into());
c1.supplier = Some("Old Corp".into());
c1.hashes.insert("sha256".into(), "aaa".into());
let mut c2 = c1.clone();
c2.version = Some("2.0".into());
c2.licenses = BTreeSet::from(["Apache-2.0".into()]);
c2.supplier = Some("New Corp".into());
c2.hashes.insert("sha256".into(), "bbb".into());
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.changed.len(), 1);
assert_eq!(diff.changed[0].changes.len(), 4);
}
#[test]
fn test_diff_no_changes() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
old.components.insert(c.id.clone(), c.clone());
new.components.insert(c.id.clone(), c);
let diff = Differ::diff(&old, &new, None);
assert!(diff.added.is_empty());
assert!(diff.removed.is_empty());
assert!(diff.changed.is_empty());
assert!(diff.edge_diffs.is_empty());
}
#[test]
fn test_diff_metadata_changed() {
let mut old = Sbom::default();
let mut new = Sbom::default();
old.metadata.timestamp = Some("2024-01-01".into());
new.metadata.timestamp = Some("2024-01-02".into());
let diff = Differ::diff(&old, &new, None);
assert!(!diff.metadata_changed);
}
#[test]
fn test_diff_filtering() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c1 = Component::new("pkg-a".to_string(), Some("1.0".to_string()));
c1.licenses.insert("MIT".into());
let mut c2 = c1.clone();
c2.version = Some("1.1".to_string());
c2.licenses = BTreeSet::from(["Apache-2.0".into()]);
old.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
let diff = Differ::diff(&old, &new, Some(&[Field::Version]));
assert_eq!(diff.changed.len(), 1);
assert_eq!(diff.changed[0].changes.len(), 1);
assert!(matches!(
diff.changed[0].changes[0],
FieldChange::Version(_, _)
));
}
#[test]
fn test_purl_change_same_ecosystem_name_is_change_not_add_remove() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c_old = Component::new("lodash".to_string(), Some("4.17.20".to_string()));
c_old.purl = Some("pkg:npm/lodash@4.17.20".to_string());
c_old.ecosystem = Some("npm".to_string());
c_old.id = ComponentId::new(c_old.purl.as_deref(), &[]);
let mut c_new = Component::new("lodash".to_string(), Some("4.17.21".to_string()));
c_new.purl = Some("pkg:npm/lodash@4.17.21".to_string());
c_new.ecosystem = Some("npm".to_string());
c_new.id = ComponentId::new(c_new.purl.as_deref(), &[]);
old.components.insert(c_old.id.clone(), c_old);
new.components.insert(c_new.id.clone(), c_new);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 0, "Should not have added components");
assert_eq!(diff.removed.len(), 0, "Should not have removed components");
assert_eq!(diff.changed.len(), 1, "Should have one changed component");
let changes = &diff.changed[0].changes;
assert!(changes
.iter()
.any(|c| matches!(c, FieldChange::Version(_, _))));
assert!(changes.iter().any(|c| matches!(c, FieldChange::Purl(_, _))));
}
#[test]
fn test_purl_removed_is_change() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c_old = Component::new("lodash".to_string(), Some("4.17.21".to_string()));
c_old.purl = Some("pkg:npm/lodash@4.17.21".to_string());
c_old.ecosystem = Some("npm".to_string()); c_old.id = ComponentId::new(c_old.purl.as_deref(), &[]);
let mut c_new = Component::new("lodash".to_string(), Some("4.17.21".to_string()));
c_new.purl = None;
c_new.ecosystem = None; c_new.id = ComponentId::new(None, &[("name", "lodash"), ("version", "4.17.21")]);
old.components.insert(c_old.id.clone(), c_old);
new.components.insert(c_new.id.clone(), c_new);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 0, "Should not have added components");
assert_eq!(diff.removed.len(), 0, "Should not have removed components");
assert_eq!(diff.changed.len(), 1, "Should have one changed component");
assert!(diff.changed[0]
.changes
.iter()
.any(|c| matches!(c, FieldChange::Purl(_, _))));
}
#[test]
fn test_purl_added_is_change() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c_old = Component::new("lodash".to_string(), Some("4.17.21".to_string()));
c_old.purl = None;
c_old.ecosystem = None; c_old.id = ComponentId::new(None, &[("name", "lodash"), ("version", "4.17.21")]);
let mut c_new = Component::new("lodash".to_string(), Some("4.17.21".to_string()));
c_new.purl = Some("pkg:npm/lodash@4.17.21".to_string());
c_new.ecosystem = Some("npm".to_string()); c_new.id = ComponentId::new(c_new.purl.as_deref(), &[]);
old.components.insert(c_old.id.clone(), c_old);
new.components.insert(c_new.id.clone(), c_new);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 0, "Should not have added components");
assert_eq!(diff.removed.len(), 0, "Should not have removed components");
assert_eq!(diff.changed.len(), 1, "Should have one changed component");
}
#[test]
fn test_same_name_different_ecosystems_not_matched() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c_old = Component::new("utils".to_string(), Some("1.0.0".to_string()));
c_old.purl = Some("pkg:npm/utils@1.0.0".to_string());
c_old.ecosystem = Some("npm".to_string());
c_old.id = ComponentId::new(c_old.purl.as_deref(), &[]);
let mut c_new = Component::new("utils".to_string(), Some("1.0.0".to_string()));
c_new.purl = Some("pkg:pypi/utils@1.0.0".to_string());
c_new.ecosystem = Some("pypi".to_string());
c_new.id = ComponentId::new(c_new.purl.as_deref(), &[]);
old.components.insert(c_old.id.clone(), c_old);
new.components.insert(c_new.id.clone(), c_new);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 1, "pypi/utils should be added");
assert_eq!(diff.removed.len(), 1, "npm/utils should be removed");
assert_eq!(
diff.changed.len(),
0,
"Should not match different ecosystems"
);
}
#[test]
fn test_same_name_both_no_ecosystem_matched() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut c_old = Component::new("mystery-pkg".to_string(), Some("1.0.0".to_string()));
c_old.ecosystem = None;
let mut c_new = Component::new("mystery-pkg".to_string(), Some("2.0.0".to_string()));
c_new.ecosystem = None;
old.components.insert(c_old.id.clone(), c_old);
new.components.insert(c_new.id.clone(), c_new);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.added.len(), 0);
assert_eq!(diff.removed.len(), 0);
assert_eq!(
diff.changed.len(),
1,
"Same name with None ecosystems should match"
);
}
#[test]
fn test_edge_diff_added_removed() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c1 = Component::new("parent".to_string(), Some("1.0".to_string()));
let c2 = Component::new("child-a".to_string(), Some("1.0".to_string()));
let c3 = Component::new("child-b".to_string(), Some("1.0".to_string()));
let parent_id = c1.id.clone();
let child_a_id = c2.id.clone();
let child_b_id = c3.id.clone();
old.components.insert(c1.id.clone(), c1.clone());
old.components.insert(c2.id.clone(), c2.clone());
old.components.insert(c3.id.clone(), c3.clone());
new.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
new.components.insert(c3.id.clone(), c3);
old.dependencies
.entry(parent_id.clone())
.or_default()
.insert(child_a_id.clone());
new.dependencies
.entry(parent_id.clone())
.or_default()
.insert(child_b_id.clone());
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.edge_diffs.len(), 1);
assert_eq!(diff.edge_diffs[0].parent, parent_id);
assert!(diff.edge_diffs[0].added.contains(&child_b_id));
assert!(diff.edge_diffs[0].removed.contains(&child_a_id));
}
#[test]
fn test_edge_diff_with_identity_reconciliation() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let mut parent_old = Component::new("parent".to_string(), Some("1.0".to_string()));
parent_old.purl = Some("pkg:npm/parent@1.0".to_string());
parent_old.ecosystem = Some("npm".to_string());
parent_old.id = ComponentId::new(parent_old.purl.as_deref(), &[]);
let mut parent_new = Component::new("parent".to_string(), Some("1.1".to_string()));
parent_new.purl = Some("pkg:npm/parent@1.1".to_string());
parent_new.ecosystem = Some("npm".to_string());
parent_new.id = ComponentId::new(parent_new.purl.as_deref(), &[]);
let child = Component::new("child".to_string(), Some("1.0".to_string()));
old.components
.insert(parent_old.id.clone(), parent_old.clone());
old.components.insert(child.id.clone(), child.clone());
new.components
.insert(parent_new.id.clone(), parent_new.clone());
new.components.insert(child.id.clone(), child.clone());
old.dependencies
.entry(parent_old.id.clone())
.or_default()
.insert(child.id.clone());
new.dependencies
.entry(parent_new.id.clone())
.or_default()
.insert(child.id.clone());
let diff = Differ::diff(&old, &new, None);
assert_eq!(
diff.edge_diffs.len(),
0,
"No edge changes expected when parent is reconciled by identity"
);
}
#[test]
fn test_edge_diff_filtering() {
let mut old = Sbom::default();
let mut new = Sbom::default();
let c1 = Component::new("parent".to_string(), Some("1.0".to_string()));
let c2 = Component::new("child".to_string(), Some("1.0".to_string()));
let parent_id = c1.id.clone();
let child_id = c2.id.clone();
old.components.insert(c1.id.clone(), c1.clone());
old.components.insert(c2.id.clone(), c2.clone());
new.components.insert(c1.id.clone(), c1);
new.components.insert(c2.id.clone(), c2);
new.dependencies
.entry(parent_id.clone())
.or_default()
.insert(child_id);
let diff = Differ::diff(&old, &new, None);
assert_eq!(diff.edge_diffs.len(), 1);
let diff_filtered = Differ::diff(&old, &new, Some(&[Field::Version]));
assert_eq!(diff_filtered.edge_diffs.len(), 0);
let diff_with_deps = Differ::diff(&old, &new, Some(&[Field::Deps]));
assert_eq!(diff_with_deps.edge_diffs.len(), 1);
}
}