Skip to main content

sbom_tools/diff/changes/
components.rs

1//! Component change computer implementation.
2
3use crate::diff::traits::{ChangeComputer, ComponentChangeSet, ComponentMatches};
4use crate::diff::{ComponentChange, CostModel, FieldChange};
5use crate::model::{
6    Component, CryptoAssetType, CryptoProperties, DatasetInfo, DatasetRef, MlModelInfo,
7    NormalizedSbom,
8};
9use std::collections::HashSet;
10
11/// Computes component-level changes between SBOMs.
12pub struct ComponentChangeComputer {
13    cost_model: CostModel,
14}
15
16impl ComponentChangeComputer {
17    /// Create a new component change computer with the given cost model.
18    #[must_use]
19    pub const fn new(cost_model: CostModel) -> Self {
20        Self { cost_model }
21    }
22
23    /// Compute individual field changes between two components.
24    fn compute_field_changes(&self, old: &Component, new: &Component) -> (Vec<FieldChange>, u32) {
25        let mut changes = Vec::new();
26        let mut total_cost = 0u32;
27
28        // Version change
29        if old.version != new.version {
30            changes.push(FieldChange {
31                field: "version".to_string(),
32                old_value: old.version.clone(),
33                new_value: new.version.clone(),
34            });
35            total_cost += self
36                .cost_model
37                .version_change_cost(&old.semver, &new.semver);
38        }
39
40        // License change
41        let old_licenses: HashSet<_> = old
42            .licenses
43            .declared
44            .iter()
45            .map(|l| &l.expression)
46            .collect();
47        let new_licenses: HashSet<_> = new
48            .licenses
49            .declared
50            .iter()
51            .map(|l| &l.expression)
52            .collect();
53        if old_licenses != new_licenses {
54            changes.push(FieldChange {
55                field: "licenses".to_string(),
56                old_value: Some(
57                    old.licenses
58                        .declared
59                        .iter()
60                        .map(|l| l.expression.clone())
61                        .collect::<Vec<_>>()
62                        .join(", "),
63                ),
64                new_value: Some(
65                    new.licenses
66                        .declared
67                        .iter()
68                        .map(|l| l.expression.clone())
69                        .collect::<Vec<_>>()
70                        .join(", "),
71                ),
72            });
73            total_cost += self.cost_model.license_changed;
74        }
75
76        // Supplier change
77        if old.supplier != new.supplier {
78            changes.push(FieldChange {
79                field: "supplier".to_string(),
80                old_value: old.supplier.as_ref().map(|s| s.name.clone()),
81                new_value: new.supplier.as_ref().map(|s| s.name.clone()),
82            });
83            total_cost += self.cost_model.supplier_changed;
84        }
85
86        // Hash change (same version but different hash = integrity concern)
87        if old.version == new.version && !old.hashes.is_empty() && !new.hashes.is_empty() {
88            let old_hashes: HashSet<_> = old.hashes.iter().map(|h| &h.value).collect();
89            let new_hashes: HashSet<_> = new.hashes.iter().map(|h| &h.value).collect();
90            if old_hashes.is_disjoint(&new_hashes) {
91                changes.push(FieldChange {
92                    field: "hashes".to_string(),
93                    old_value: Some(
94                        old.hashes
95                            .first()
96                            .map(|h| h.value.clone())
97                            .unwrap_or_default(),
98                    ),
99                    new_value: Some(
100                        new.hashes
101                            .first()
102                            .map(|h| h.value.clone())
103                            .unwrap_or_default(),
104                    ),
105                });
106                total_cost += self.cost_model.hash_mismatch;
107            }
108        }
109
110        // ML model metadata changes (granular, prefixed per-field)
111        if old.ml_model != new.ml_model {
112            total_cost += Self::compute_ml_changes(&self.cost_model, old, new, &mut changes);
113        }
114
115        // Dataset metadata changes (granular, prefixed per-field)
116        if old.dataset != new.dataset {
117            total_cost += Self::compute_dataset_changes(&self.cost_model, old, new, &mut changes);
118        }
119
120        // Cryptographic property changes
121        if old.crypto_properties != new.crypto_properties {
122            total_cost += Self::compute_crypto_changes(&self.cost_model, old, new, &mut changes);
123        }
124
125        (changes, total_cost)
126    }
127
128    /// Push a scalar `Option<String>` field change when the values differ.
129    fn push_scalar_change(
130        changes: &mut Vec<FieldChange>,
131        field: &str,
132        old: &Option<String>,
133        new: &Option<String>,
134        cost: &mut u32,
135        field_cost: u32,
136    ) {
137        if old != new {
138            changes.push(FieldChange {
139                field: field.to_string(),
140                old_value: old.clone(),
141                new_value: new.clone(),
142            });
143            *cost += field_cost;
144        }
145    }
146
147    /// Stable identity key for a training-dataset reference: prefer the BOM-ref,
148    /// then the name, then the PURL. Used to detect added/removed datasets across
149    /// two model revisions.
150    fn dataset_ref_key(reference: &DatasetRef) -> Option<&str> {
151        reference
152            .reference
153            .as_deref()
154            .or(reference.name.as_deref())
155            .or(reference.purl.as_deref())
156    }
157
158    /// Compute ML-model-specific field changes between two components.
159    ///
160    /// Emits granular, prefixed `ml_*` field changes (approach, architecture, task,
161    /// quantization, model card) plus per-dataset `ml_training_dataset` add/remove
162    /// entries, rather than one opaque serialized blob. This surfaces model-swap
163    /// signals such as `fp32 -> int4` re-quantization or training-data provenance loss.
164    fn compute_ml_changes(
165        cost_model: &CostModel,
166        old: &Component,
167        new: &Component,
168        changes: &mut Vec<FieldChange>,
169    ) -> u32 {
170        let mut cost = 0u32;
171
172        match (&old.ml_model, &new.ml_model) {
173            (Some(old_ml), Some(new_ml)) => {
174                cost += Self::compute_ml_sub_changes(cost_model, old_ml, new_ml, changes);
175            }
176            (None, Some(_)) | (Some(_), None) => {
177                // Model metadata appeared or disappeared wholesale.
178                changes.push(FieldChange {
179                    field: "ml_model".to_string(),
180                    old_value: old.ml_model.as_ref().map(|_| "present".to_string()),
181                    new_value: new.ml_model.as_ref().map(|_| "present".to_string()),
182                });
183                cost += cost_model.ml_model_changed;
184            }
185            (None, None) => {}
186        }
187
188        cost
189    }
190
191    fn compute_ml_sub_changes(
192        cost_model: &CostModel,
193        old: &MlModelInfo,
194        new: &MlModelInfo,
195        changes: &mut Vec<FieldChange>,
196    ) -> u32 {
197        let mut cost = 0u32;
198
199        Self::push_scalar_change(
200            changes,
201            "ml_approach",
202            &old.approach,
203            &new.approach,
204            &mut cost,
205            cost_model.ml_approach_changed,
206        );
207
208        // Architecture family and name are reported under a single prefixed field
209        // so a "resnet -> bert" or "cnn -> transformer" swap reads as one signal.
210        let old_arch = Self::join_architecture(old);
211        let new_arch = Self::join_architecture(new);
212        Self::push_scalar_change(
213            changes,
214            "ml_architecture",
215            &old_arch,
216            &new_arch,
217            &mut cost,
218            cost_model.ml_architecture_changed,
219        );
220
221        Self::push_scalar_change(
222            changes,
223            "ml_task",
224            &old.task,
225            &new.task,
226            &mut cost,
227            cost_model.ml_task_changed,
228        );
229        Self::push_scalar_change(
230            changes,
231            "ml_quantization",
232            &old.quantization,
233            &new.quantization,
234            &mut cost,
235            cost_model.ml_quantization_changed,
236        );
237        Self::push_scalar_change(
238            changes,
239            "ml_model_card",
240            &old.model_card_url,
241            &new.model_card_url,
242            &mut cost,
243            cost_model.ml_model_card_changed,
244        );
245
246        cost += Self::compute_training_dataset_changes(cost_model, old, new, changes);
247
248        cost
249    }
250
251    /// Combine architecture family and name into a single display value.
252    fn join_architecture(ml: &MlModelInfo) -> Option<String> {
253        match (&ml.architecture_family, &ml.architecture_name) {
254            (Some(family), Some(name)) => Some(format!("{family}/{name}")),
255            (Some(value), None) | (None, Some(value)) => Some(value.clone()),
256            (None, None) => None,
257        }
258    }
259
260    /// Emit per-dataset `ml_training_dataset` add/remove changes keyed by
261    /// `DatasetRef.reference`-or-`name`. Training-dataset removal is treated as a
262    /// provenance-loss signal and carries a high cost.
263    fn compute_training_dataset_changes(
264        cost_model: &CostModel,
265        old: &MlModelInfo,
266        new: &MlModelInfo,
267        changes: &mut Vec<FieldChange>,
268    ) -> u32 {
269        let mut cost = 0u32;
270
271        let old_keys: HashSet<&str> = old
272            .training_datasets
273            .iter()
274            .filter_map(Self::dataset_ref_key)
275            .collect();
276        let new_keys: HashSet<&str> = new
277            .training_datasets
278            .iter()
279            .filter_map(Self::dataset_ref_key)
280            .collect();
281
282        // Removed training datasets (present in old, absent in new). Sorted for
283        // deterministic output.
284        let mut removed: Vec<&str> = old_keys.difference(&new_keys).copied().collect();
285        removed.sort_unstable();
286        for key in removed {
287            changes.push(FieldChange {
288                field: "ml_training_dataset".to_string(),
289                old_value: Some(key.to_string()),
290                new_value: None,
291            });
292            cost += cost_model.ml_training_dataset_removed;
293        }
294
295        // Added training datasets (absent in old, present in new).
296        let mut added: Vec<&str> = new_keys.difference(&old_keys).copied().collect();
297        added.sort_unstable();
298        for key in added {
299            changes.push(FieldChange {
300                field: "ml_training_dataset".to_string(),
301                old_value: None,
302                new_value: Some(key.to_string()),
303            });
304            cost += cost_model.ml_training_dataset_added;
305        }
306
307        cost
308    }
309
310    /// Compute dataset-component-specific field changes between two components.
311    ///
312    /// Emits granular, prefixed `dataset_*` field changes: type, per-classification
313    /// sensitivity add/remove, and governance. Gaining a sensitivity classification
314    /// (e.g. a dataset newly tagged `pii`) is a data-governance signal and carries
315    /// a high cost.
316    fn compute_dataset_changes(
317        cost_model: &CostModel,
318        old: &Component,
319        new: &Component,
320        changes: &mut Vec<FieldChange>,
321    ) -> u32 {
322        let mut cost = 0u32;
323
324        match (&old.dataset, &new.dataset) {
325            (Some(old_ds), Some(new_ds)) => {
326                cost += Self::compute_dataset_sub_changes(cost_model, old_ds, new_ds, changes);
327            }
328            (None, Some(_)) | (Some(_), None) => {
329                changes.push(FieldChange {
330                    field: "dataset".to_string(),
331                    old_value: old.dataset.as_ref().map(|_| "present".to_string()),
332                    new_value: new.dataset.as_ref().map(|_| "present".to_string()),
333                });
334                cost += cost_model.dataset_changed;
335            }
336            (None, None) => {}
337        }
338
339        cost
340    }
341
342    fn compute_dataset_sub_changes(
343        cost_model: &CostModel,
344        old: &DatasetInfo,
345        new: &DatasetInfo,
346        changes: &mut Vec<FieldChange>,
347    ) -> u32 {
348        let mut cost = 0u32;
349
350        Self::push_scalar_change(
351            changes,
352            "dataset_type",
353            &old.dataset_type,
354            &new.dataset_type,
355            &mut cost,
356            cost_model.dataset_type_changed,
357        );
358
359        // Sensitivity classifications: emit per-classification add/remove so a
360        // dataset newly gaining "pii" is visible and costly.
361        let old_sens: HashSet<&str> = old
362            .sensitivity_classifications
363            .iter()
364            .map(String::as_str)
365            .collect();
366        let new_sens: HashSet<&str> = new
367            .sensitivity_classifications
368            .iter()
369            .map(String::as_str)
370            .collect();
371
372        let mut added: Vec<&str> = new_sens.difference(&old_sens).copied().collect();
373        added.sort_unstable();
374        for class in added {
375            changes.push(FieldChange {
376                field: "dataset_sensitivity".to_string(),
377                old_value: None,
378                new_value: Some(class.to_string()),
379            });
380            cost += cost_model.dataset_sensitivity_added;
381        }
382
383        let mut removed: Vec<&str> = old_sens.difference(&new_sens).copied().collect();
384        removed.sort_unstable();
385        for class in removed {
386            changes.push(FieldChange {
387                field: "dataset_sensitivity".to_string(),
388                old_value: Some(class.to_string()),
389                new_value: None,
390            });
391            cost += cost_model.dataset_sensitivity_removed;
392        }
393
394        // Governance owners: report a single change when the owner set differs.
395        let old_gov: HashSet<&str> = old.governance_owners.iter().map(String::as_str).collect();
396        let new_gov: HashSet<&str> = new.governance_owners.iter().map(String::as_str).collect();
397        if old_gov != new_gov {
398            changes.push(FieldChange {
399                field: "dataset_governance".to_string(),
400                old_value: Self::join_sorted(&old.governance_owners),
401                new_value: Self::join_sorted(&new.governance_owners),
402            });
403            cost += cost_model.dataset_governance_changed;
404        }
405
406        cost
407    }
408
409    /// Join a list of strings into a deterministic, comma-separated display value,
410    /// or `None` when empty.
411    fn join_sorted(values: &[String]) -> Option<String> {
412        if values.is_empty() {
413            return None;
414        }
415        let mut sorted: Vec<&str> = values.iter().map(String::as_str).collect();
416        sorted.sort_unstable();
417        Some(sorted.join(", "))
418    }
419
420    /// Compute crypto-specific field changes between two components.
421    fn compute_crypto_changes(
422        cost_model: &CostModel,
423        old: &Component,
424        new: &Component,
425        changes: &mut Vec<FieldChange>,
426    ) -> u32 {
427        let mut cost = 0u32;
428
429        match (&old.crypto_properties, &new.crypto_properties) {
430            (Some(old_cp), Some(new_cp)) => {
431                cost += Self::compute_crypto_sub_changes(cost_model, old_cp, new_cp, changes);
432            }
433            (None, Some(new_cp)) => {
434                changes.push(FieldChange {
435                    field: "crypto_properties".to_string(),
436                    old_value: None,
437                    new_value: Some(new_cp.asset_type.to_string()),
438                });
439                cost += cost_model.crypto_algorithm_changed;
440            }
441            (Some(old_cp), None) => {
442                changes.push(FieldChange {
443                    field: "crypto_properties".to_string(),
444                    old_value: Some(old_cp.asset_type.to_string()),
445                    new_value: None,
446                });
447                cost += cost_model.crypto_algorithm_changed;
448            }
449            (None, None) => {}
450        }
451
452        cost
453    }
454
455    fn compute_crypto_sub_changes(
456        cost_model: &CostModel,
457        old: &CryptoProperties,
458        new: &CryptoProperties,
459        changes: &mut Vec<FieldChange>,
460    ) -> u32 {
461        let mut cost = 0u32;
462
463        // Algorithm property changes
464        if let (Some(old_algo), Some(new_algo)) =
465            (&old.algorithm_properties, &new.algorithm_properties)
466        {
467            // Algorithm family change
468            if old_algo.algorithm_family != new_algo.algorithm_family {
469                changes.push(FieldChange {
470                    field: "crypto_algorithm".to_string(),
471                    old_value: old_algo.algorithm_family.clone(),
472                    new_value: new_algo.algorithm_family.clone(),
473                });
474                cost += cost_model.crypto_algorithm_changed;
475            }
476
477            // Quantum security level change
478            if old_algo.nist_quantum_security_level != new_algo.nist_quantum_security_level {
479                changes.push(FieldChange {
480                    field: "crypto_quantum_level".to_string(),
481                    old_value: old_algo.nist_quantum_security_level.map(|l| l.to_string()),
482                    new_value: new_algo.nist_quantum_security_level.map(|l| l.to_string()),
483                });
484                cost += cost_model.crypto_quantum_level_changed;
485            }
486
487            // Security downgrade detection: classical security level decreased
488            if let (Some(old_bits), Some(new_bits)) = (
489                old_algo.classical_security_level,
490                new_algo.classical_security_level,
491            ) && new_bits < old_bits
492            {
493                changes.push(FieldChange {
494                    field: "crypto_downgrade".to_string(),
495                    old_value: Some(format!("{old_bits} bits")),
496                    new_value: Some(format!("{new_bits} bits")),
497                });
498                cost += cost_model.crypto_downgrade;
499            }
500        }
501
502        // Key material state changes
503        if let (Some(old_mat), Some(new_mat)) = (
504            &old.related_crypto_material_properties,
505            &new.related_crypto_material_properties,
506        ) && old_mat.state != new_mat.state
507        {
508            changes.push(FieldChange {
509                field: "crypto_key_state".to_string(),
510                old_value: old_mat.state.as_ref().map(|s| s.to_string()),
511                new_value: new_mat.state.as_ref().map(|s| s.to_string()),
512            });
513            cost += cost_model.crypto_key_rotated;
514        }
515
516        // Certificate expiry changes
517        if let (Some(old_cert), Some(new_cert)) =
518            (&old.certificate_properties, &new.certificate_properties)
519            && old_cert.not_valid_after != new_cert.not_valid_after
520        {
521            changes.push(FieldChange {
522                field: "crypto_cert_expiry".to_string(),
523                old_value: old_cert.not_valid_after.map(|d| d.to_rfc3339()),
524                new_value: new_cert.not_valid_after.map(|d| d.to_rfc3339()),
525            });
526            cost += cost_model.crypto_cert_expiry_changed;
527        }
528
529        // Protocol version changes
530        if let (Some(old_proto), Some(new_proto)) =
531            (&old.protocol_properties, &new.protocol_properties)
532            && old_proto.version != new_proto.version
533        {
534            changes.push(FieldChange {
535                field: "crypto_protocol_version".to_string(),
536                old_value: old_proto.version.clone(),
537                new_value: new_proto.version.clone(),
538            });
539            cost += cost_model.crypto_protocol_changed;
540        }
541
542        // Asset type change (e.g., algorithm → protocol)
543        if old.asset_type != new.asset_type
544            && old.asset_type != CryptoAssetType::Other("unknown".to_string())
545        {
546            changes.push(FieldChange {
547                field: "crypto_asset_type".to_string(),
548                old_value: Some(old.asset_type.to_string()),
549                new_value: Some(new.asset_type.to_string()),
550            });
551            cost += cost_model.crypto_algorithm_changed;
552        }
553
554        cost
555    }
556}
557
558impl Default for ComponentChangeComputer {
559    fn default() -> Self {
560        Self::new(CostModel::default())
561    }
562}
563
564impl ChangeComputer for ComponentChangeComputer {
565    type ChangeSet = ComponentChangeSet;
566
567    fn compute(
568        &self,
569        old: &NormalizedSbom,
570        new: &NormalizedSbom,
571        matches: &ComponentMatches,
572    ) -> ComponentChangeSet {
573        let mut result = ComponentChangeSet::new();
574        let matched_new_ids: HashSet<_> = matches
575            .values()
576            .filter_map(std::clone::Clone::clone)
577            .collect();
578
579        // Find removed components
580        for (old_id, new_id_opt) in matches {
581            if new_id_opt.is_none()
582                && let Some(old_comp) = old.components.get(old_id)
583            {
584                result.removed.push(ComponentChange::removed(
585                    old_comp,
586                    self.cost_model.component_removed,
587                ));
588            }
589        }
590
591        // Find added components
592        for new_id in new.components.keys() {
593            if !matched_new_ids.contains(new_id)
594                && let Some(new_comp) = new.components.get(new_id)
595            {
596                result.added.push(ComponentChange::added(
597                    new_comp,
598                    self.cost_model.component_added,
599                ));
600            }
601        }
602
603        // Find modified components
604        for (old_id, new_id_opt) in matches {
605            if let Some(new_id) = new_id_opt
606                && let (Some(old_comp), Some(new_comp)) =
607                    (old.components.get(old_id), new.components.get(new_id))
608            {
609                // Check if component was actually modified
610                if old_comp.content_hash != new_comp.content_hash {
611                    let (field_changes, cost) = self.compute_field_changes(old_comp, new_comp);
612                    if !field_changes.is_empty() {
613                        result.modified.push(ComponentChange::modified(
614                            old_comp,
615                            new_comp,
616                            field_changes,
617                            cost,
618                        ));
619                    }
620                }
621            }
622        }
623
624        // Removed/modified are collected from hash-map iteration; sort by ID
625        // for deterministic output ordering
626        result.removed.sort_by(|a, b| a.id.cmp(&b.id));
627        result.modified.sort_by(|a, b| a.id.cmp(&b.id));
628
629        result
630    }
631
632    fn name(&self) -> &'static str {
633        "ComponentChangeComputer"
634    }
635}
636
637#[cfg(test)]
638mod tests {
639    // `DatasetInfo`, `DatasetRef`, and `MlModelInfo` are re-exported via the
640    // parent module's `use crate::model::{...}`.
641    use super::*;
642
643    #[test]
644    fn test_component_change_computer_default() {
645        let computer = ComponentChangeComputer::default();
646        assert_eq!(computer.name(), "ComponentChangeComputer");
647    }
648
649    #[test]
650    fn test_empty_sboms() {
651        let computer = ComponentChangeComputer::default();
652        let old = NormalizedSbom::default();
653        let new = NormalizedSbom::default();
654        let matches = ComponentMatches::new();
655
656        let result = computer.compute(&old, &new, &matches);
657        assert!(result.is_empty());
658    }
659
660    /// Locate the single field change with the given field name, asserting it exists.
661    fn find_change<'a>(changes: &'a [FieldChange], field: &str) -> &'a FieldChange {
662        changes
663            .iter()
664            .find(|c| c.field == field)
665            .unwrap_or_else(|| panic!("expected a `{field}` field change, got {changes:?}"))
666    }
667
668    #[test]
669    fn test_ml_quantization_change_is_granular() {
670        let computer = ComponentChangeComputer::default();
671        let mut old = Component::new("model".to_string(), "model@1".to_string());
672        let mut new = old.clone();
673
674        old.ml_model = Some(MlModelInfo {
675            quantization: Some("fp32".to_string()),
676            ..MlModelInfo::default()
677        });
678        new.ml_model = Some(MlModelInfo {
679            quantization: Some("int4".to_string()),
680            ..MlModelInfo::default()
681        });
682
683        let (changes, total_cost) = computer.compute_field_changes(&old, &new);
684
685        // The opaque "ml_model" blob is gone; a prefixed ml_quantization change appears.
686        assert!(changes.iter().all(|c| c.field != "ml_model"));
687        let change = find_change(&changes, "ml_quantization");
688        assert_eq!(change.old_value.as_deref(), Some("fp32"));
689        assert_eq!(change.new_value.as_deref(), Some("int4"));
690        assert_eq!(total_cost, CostModel::default().ml_quantization_changed);
691    }
692
693    #[test]
694    fn test_ml_architecture_and_task_changes_are_granular() {
695        let computer = ComponentChangeComputer::default();
696        let mut old = Component::new("model".to_string(), "model@1".to_string());
697        let mut new = old.clone();
698
699        old.ml_model = Some(MlModelInfo {
700            architecture_family: Some("cnn".to_string()),
701            architecture_name: Some("resnet".to_string()),
702            task: Some("computer-vision".to_string()),
703            ..MlModelInfo::default()
704        });
705        new.ml_model = Some(MlModelInfo {
706            architecture_family: Some("transformer".to_string()),
707            architecture_name: Some("bert".to_string()),
708            task: Some("nlp".to_string()),
709            ..MlModelInfo::default()
710        });
711
712        let (changes, _) = computer.compute_field_changes(&old, &new);
713
714        let arch = find_change(&changes, "ml_architecture");
715        assert_eq!(arch.old_value.as_deref(), Some("cnn/resnet"));
716        assert_eq!(arch.new_value.as_deref(), Some("transformer/bert"));
717        let task = find_change(&changes, "ml_task");
718        assert_eq!(task.old_value.as_deref(), Some("computer-vision"));
719        assert_eq!(task.new_value.as_deref(), Some("nlp"));
720    }
721
722    #[test]
723    fn test_ml_training_dataset_removed_has_high_cost() {
724        let computer = ComponentChangeComputer::default();
725        let mut old = Component::new("model".to_string(), "model@1".to_string());
726        let mut new = old.clone();
727
728        old.ml_model = Some(MlModelInfo {
729            training_datasets: vec![
730                DatasetRef {
731                    reference: Some("ds-imagenet".to_string()),
732                    name: Some("imagenet".to_string()),
733                    purl: None,
734                },
735                DatasetRef {
736                    reference: Some("ds-coco".to_string()),
737                    name: Some("coco".to_string()),
738                    purl: None,
739                },
740            ],
741            ..MlModelInfo::default()
742        });
743        new.ml_model = Some(MlModelInfo {
744            training_datasets: vec![DatasetRef {
745                reference: Some("ds-imagenet".to_string()),
746                name: Some("imagenet".to_string()),
747                purl: None,
748            }],
749            ..MlModelInfo::default()
750        });
751
752        let (changes, total_cost) = computer.compute_field_changes(&old, &new);
753
754        let removed = find_change(&changes, "ml_training_dataset");
755        assert_eq!(removed.old_value.as_deref(), Some("ds-coco"));
756        assert_eq!(removed.new_value, None);
757        assert_eq!(total_cost, CostModel::default().ml_training_dataset_removed);
758    }
759
760    #[test]
761    fn test_dataset_sensitivity_escalation_has_high_cost() {
762        let computer = ComponentChangeComputer::default();
763        let mut old = Component::new("dataset".to_string(), "dataset@1".to_string());
764        let mut new = old.clone();
765
766        old.dataset = Some(DatasetInfo {
767            dataset_type: Some("training".to_string()),
768            sensitivity_classifications: vec!["public".to_string()],
769            ..DatasetInfo::default()
770        });
771        new.dataset = Some(DatasetInfo {
772            dataset_type: Some("training".to_string()),
773            sensitivity_classifications: vec!["public".to_string(), "pii".to_string()],
774            ..DatasetInfo::default()
775        });
776
777        let (changes, total_cost) = computer.compute_field_changes(&old, &new);
778
779        // No opaque "dataset" blob; a prefixed dataset_sensitivity add appears.
780        assert!(changes.iter().all(|c| c.field != "dataset"));
781        let escalation = find_change(&changes, "dataset_sensitivity");
782        assert_eq!(escalation.old_value, None);
783        assert_eq!(escalation.new_value.as_deref(), Some("pii"));
784        assert_eq!(total_cost, CostModel::default().dataset_sensitivity_added);
785    }
786
787    #[test]
788    fn test_dataset_type_and_governance_changes_are_granular() {
789        let computer = ComponentChangeComputer::default();
790        let mut old = Component::new("dataset".to_string(), "dataset@1".to_string());
791        let mut new = old.clone();
792
793        old.dataset = Some(DatasetInfo {
794            dataset_type: Some("training".to_string()),
795            governance_owners: vec!["alice".to_string()],
796            ..DatasetInfo::default()
797        });
798        new.dataset = Some(DatasetInfo {
799            dataset_type: Some("validation".to_string()),
800            governance_owners: vec!["bob".to_string()],
801            ..DatasetInfo::default()
802        });
803
804        let (changes, _) = computer.compute_field_changes(&old, &new);
805
806        let ty = find_change(&changes, "dataset_type");
807        assert_eq!(ty.old_value.as_deref(), Some("training"));
808        assert_eq!(ty.new_value.as_deref(), Some("validation"));
809        let gov = find_change(&changes, "dataset_governance");
810        assert_eq!(gov.old_value.as_deref(), Some("alice"));
811        assert_eq!(gov.new_value.as_deref(), Some("bob"));
812    }
813
814    #[test]
815    fn test_security_focused_escalates_ml_and_dataset_costs() {
816        let secure = ComponentChangeComputer::new(CostModel::security_focused());
817        let default = ComponentChangeComputer::default();
818
819        let mut old = Component::new("dataset".to_string(), "dataset@1".to_string());
820        let mut new = old.clone();
821        old.dataset = Some(DatasetInfo {
822            sensitivity_classifications: vec![],
823            ..DatasetInfo::default()
824        });
825        new.dataset = Some(DatasetInfo {
826            sensitivity_classifications: vec!["pii".to_string()],
827            ..DatasetInfo::default()
828        });
829
830        let (_, secure_cost) = secure.compute_field_changes(&old, &new);
831        let (_, default_cost) = default.compute_field_changes(&old, &new);
832        assert!(
833            secure_cost > default_cost,
834            "security profile should weight PII escalation higher (secure={secure_cost}, default={default_cost})"
835        );
836    }
837}