Skip to main content

socket_patch_core/vex/
build.rs

1//! Manifest + applied-set → OpenVEX `Document` builder.
2//!
3//! The grouping rule (one statement per vulnerability ID) means we
4//! transpose the manifest: it stores `PURL -> { vulnId -> info }`, but
5//! VEX wants `vulnId -> { products (and subcomponents) }`. We do that
6//! transpose once, then sort to keep output deterministic.
7//!
8//! GHSA naming convention: we use the vuln-ID key (typically GHSA-xxxx)
9//! as `Vulnerability.name` and the `cves` array as `aliases`. If a
10//! single manifest entry has both — the manifest's key and `cves` —
11//! the latter become aliases. When two patches fix the same vuln ID
12//! they merge into one statement with both PURLs as subcomponents.
13
14use std::collections::BTreeMap;
15
16use crate::manifest::schema::PatchManifest;
17use crate::vex::schema::{
18    Document, Justification, Product, Statement, Status, Subcomponent, Vulnerability,
19    OPENVEX_CONTEXT_V0_2_0,
20};
21use crate::vex::time::now_rfc3339;
22
23/// Inputs for the document builder. The caller owns config like
24/// `author` and `doc_id` so the builder stays pure.
25#[derive(Debug, Clone)]
26pub struct BuildOptions {
27    /// Top-level product PURL/identifier.
28    pub product_id: String,
29    /// Document `@id` (e.g. `urn:uuid:...`). Caller-controlled so the
30    /// CLI can honor a `--doc-id` override or default to a random UUID.
31    pub doc_id: String,
32    /// Document `author` field. Defaults to "Socket" at the CLI layer.
33    pub author: String,
34    /// Optional `tooling` string. Conventionally `socket-patch <version>`.
35    pub tooling: Option<String>,
36}
37
38/// Build a VEX document from a manifest and a set of applied PURLs.
39///
40/// `applied` is a list of PURLs that have been verified (or were
41/// declared verified via `--no-verify`). Manifest entries not in
42/// `applied` are silently dropped — see the design note in
43/// `vex::verify` for why we never emit `affected`.
44///
45/// Returns `None` when no statements can be emitted (no applied
46/// patches matched the manifest). The CLI converts `None` into a
47/// non-zero exit code per the agreed contract.
48pub fn build_document(
49    manifest: &PatchManifest,
50    applied: &[String],
51    opts: &BuildOptions,
52) -> Option<Document> {
53    let timestamp = now_rfc3339();
54    let applied_set: std::collections::HashSet<&str> =
55        applied.iter().map(|s| s.as_str()).collect();
56
57    // vuln-id -> (aliases, impact-statement parts, subcomponent PURLs)
58    // BTreeMap keeps statement order deterministic by vuln id, which
59    // helps reproducibility for downstream diffs.
60    let mut grouped: BTreeMap<String, VulnGroup> = BTreeMap::new();
61
62    for (purl, record) in &manifest.patches {
63        if !applied_set.contains(purl.as_str()) {
64            continue;
65        }
66        for (vuln_id, info) in &record.vulnerabilities {
67            let entry = grouped.entry(vuln_id.clone()).or_default();
68            for cve in &info.cves {
69                if !entry.aliases.contains(cve) {
70                    entry.aliases.push(cve.clone());
71                }
72            }
73            entry.subcomponents.insert(purl.clone());
74            entry
75                .impact_parts
76                .push(format!("Patched via Socket patch {}", record.uuid));
77        }
78    }
79
80    if grouped.is_empty() {
81        return None;
82    }
83
84    let mut statements = Vec::with_capacity(grouped.len());
85    for (vuln_id, group) in grouped {
86        let mut aliases = group.aliases;
87        aliases.sort();
88
89        let mut subcomponent_ids: Vec<String> = group.subcomponents.into_iter().collect();
90        subcomponent_ids.sort();
91        let subcomponents = subcomponent_ids
92            .into_iter()
93            .map(|id| Subcomponent {
94                id,
95                identifiers: None,
96                hashes: None,
97            })
98            .collect();
99
100        let mut parts = group.impact_parts;
101        parts.sort();
102        parts.dedup();
103        // The `parts.is_empty()` branch is unreachable from the
104        // public API: the loop above pushes one entry per applied
105        // (purl, vuln) pair, so every group present in `grouped`
106        // has ≥1 entry. The defensive `None` arm stays in case a
107        // future refactor decouples grouping from impact tracking.
108        let impact_statement = if parts.is_empty() {
109            None
110        } else {
111            Some(parts.join("; "))
112        };
113
114        statements.push(Statement {
115            id: None,
116            vulnerability: Vulnerability {
117                name: vuln_id,
118                aliases,
119            },
120            timestamp: Some(timestamp.clone()),
121            last_updated: None,
122            products: vec![Product {
123                id: opts.product_id.clone(),
124                identifiers: None,
125                hashes: None,
126                subcomponents,
127            }],
128            status: Status::NotAffected,
129            supplier: None,
130            justification: Some(Justification::InlineMitigationsAlreadyExist),
131            impact_statement,
132            action_statement: None,
133        });
134    }
135
136    Some(Document {
137        context: OPENVEX_CONTEXT_V0_2_0.to_string(),
138        id: opts.doc_id.clone(),
139        author: opts.author.clone(),
140        role: None,
141        timestamp,
142        last_updated: None,
143        version: 1,
144        tooling: opts.tooling.clone(),
145        statements,
146    })
147}
148
149#[derive(Default)]
150struct VulnGroup {
151    aliases: Vec<String>,
152    subcomponents: std::collections::HashSet<String>,
153    impact_parts: Vec<String>,
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use crate::manifest::schema::{PatchFileInfo, PatchRecord, VulnerabilityInfo};
160    use std::collections::HashMap;
161
162    fn record(uuid: &str, vulns: Vec<(&str, Vec<&str>)>) -> PatchRecord {
163        let mut vmap = HashMap::new();
164        for (vid, cves) in vulns {
165            vmap.insert(
166                vid.to_string(),
167                VulnerabilityInfo {
168                    cves: cves.into_iter().map(String::from).collect(),
169                    summary: String::new(),
170                    severity: "high".to_string(),
171                    description: String::new(),
172                },
173            );
174        }
175        let mut files = HashMap::new();
176        files.insert(
177            "index.js".to_string(),
178            PatchFileInfo {
179                before_hash: "aaaa".to_string(),
180                after_hash: "bbbb".to_string(),
181            },
182        );
183        PatchRecord {
184            uuid: uuid.to_string(),
185            exported_at: "2024-01-01T00:00:00Z".to_string(),
186            files,
187            vulnerabilities: vmap,
188            description: String::new(),
189            license: "MIT".to_string(),
190            tier: "free".to_string(),
191        }
192    }
193
194    fn opts() -> BuildOptions {
195        BuildOptions {
196            product_id: "pkg:npm/app@1.0.0".to_string(),
197            doc_id: "urn:uuid:test".to_string(),
198            author: "Socket".to_string(),
199            tooling: Some("socket-patch 3.0.0".to_string()),
200        }
201    }
202
203    #[test]
204    fn empty_applied_returns_none() {
205        let manifest = PatchManifest::new();
206        assert!(build_document(&manifest, &[], &opts()).is_none());
207    }
208
209    #[test]
210    fn unapplied_patch_is_skipped() {
211        let mut manifest = PatchManifest::new();
212        manifest.patches.insert(
213            "pkg:npm/lodash@4.0.0".to_string(),
214            record("u1", vec![("GHSA-aaaa", vec!["CVE-2024-1"])]),
215        );
216        // applied is empty → no statements → None.
217        assert!(build_document(&manifest, &[], &opts()).is_none());
218    }
219
220    #[test]
221    fn single_patch_single_vuln_produces_one_statement() {
222        let mut manifest = PatchManifest::new();
223        manifest.patches.insert(
224            "pkg:npm/lodash@4.0.0".to_string(),
225            record("u1", vec![("GHSA-aaaa", vec!["CVE-2024-1"])]),
226        );
227        let doc = build_document(
228            &manifest,
229            &["pkg:npm/lodash@4.0.0".to_string()],
230            &opts(),
231        )
232        .unwrap();
233
234        assert_eq!(doc.statements.len(), 1);
235        let st = &doc.statements[0];
236        assert_eq!(st.vulnerability.name, "GHSA-aaaa");
237        assert_eq!(st.vulnerability.aliases, vec!["CVE-2024-1".to_string()]);
238        assert_eq!(st.status, Status::NotAffected);
239        assert_eq!(
240            st.justification,
241            Some(Justification::InlineMitigationsAlreadyExist)
242        );
243        assert_eq!(st.products.len(), 1);
244        assert_eq!(st.products[0].id, "pkg:npm/app@1.0.0");
245        assert_eq!(st.products[0].subcomponents.len(), 1);
246        assert_eq!(
247            st.products[0].subcomponents[0].id,
248            "pkg:npm/lodash@4.0.0"
249        );
250        assert!(st.impact_statement.as_ref().unwrap().contains("u1"));
251    }
252
253    #[test]
254    fn cves_flatten_into_aliases() {
255        let mut manifest = PatchManifest::new();
256        manifest.patches.insert(
257            "pkg:npm/x@1.0.0".to_string(),
258            record(
259                "u1",
260                vec![("GHSA-bbbb", vec!["CVE-2024-2", "CVE-2024-3"])],
261            ),
262        );
263        let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts())
264            .unwrap();
265        let aliases = &doc.statements[0].vulnerability.aliases;
266        assert_eq!(aliases.len(), 2);
267        // Sorted for determinism.
268        assert_eq!(aliases[0], "CVE-2024-2");
269        assert_eq!(aliases[1], "CVE-2024-3");
270    }
271
272    #[test]
273    fn two_patches_sharing_ghsa_merge_into_one_statement() {
274        let mut manifest = PatchManifest::new();
275        manifest.patches.insert(
276            "pkg:npm/x@1.0.0".to_string(),
277            record("u1", vec![("GHSA-cccc", vec!["CVE-A"])]),
278        );
279        manifest.patches.insert(
280            "pkg:npm/y@2.0.0".to_string(),
281            record("u2", vec![("GHSA-cccc", vec!["CVE-A"])]),
282        );
283
284        let doc = build_document(
285            &manifest,
286            &[
287                "pkg:npm/x@1.0.0".to_string(),
288                "pkg:npm/y@2.0.0".to_string(),
289            ],
290            &opts(),
291        )
292        .unwrap();
293
294        assert_eq!(doc.statements.len(), 1);
295        let subs = &doc.statements[0].products[0].subcomponents;
296        assert_eq!(subs.len(), 2);
297        let ids: Vec<&str> = subs.iter().map(|s| s.id.as_str()).collect();
298        assert!(ids.contains(&"pkg:npm/x@1.0.0"));
299        assert!(ids.contains(&"pkg:npm/y@2.0.0"));
300        // Both patch UUIDs surface in the impact statement.
301        let imp = doc.statements[0].impact_statement.as_ref().unwrap();
302        assert!(imp.contains("u1"));
303        assert!(imp.contains("u2"));
304    }
305
306    #[test]
307    fn one_patch_multiple_vulns_produces_one_statement_each() {
308        let mut manifest = PatchManifest::new();
309        manifest.patches.insert(
310            "pkg:npm/x@1.0.0".to_string(),
311            record(
312                "u1",
313                vec![
314                    ("GHSA-aaaa", vec!["CVE-1"]),
315                    ("GHSA-bbbb", vec!["CVE-2"]),
316                ],
317            ),
318        );
319
320        let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts())
321            .unwrap();
322        assert_eq!(doc.statements.len(), 2);
323        // BTreeMap order → sorted by vuln id.
324        assert_eq!(doc.statements[0].vulnerability.name, "GHSA-aaaa");
325        assert_eq!(doc.statements[1].vulnerability.name, "GHSA-bbbb");
326    }
327
328    #[test]
329    fn doc_carries_caller_supplied_fields() {
330        let mut manifest = PatchManifest::new();
331        manifest.patches.insert(
332            "pkg:npm/x@1.0.0".to_string(),
333            record("u1", vec![("GHSA-aaaa", vec![])]),
334        );
335        let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts())
336            .unwrap();
337        assert_eq!(doc.context, OPENVEX_CONTEXT_V0_2_0);
338        assert_eq!(doc.id, "urn:uuid:test");
339        assert_eq!(doc.author, "Socket");
340        assert_eq!(doc.tooling.as_deref(), Some("socket-patch 3.0.0"));
341        assert_eq!(doc.version, 1);
342    }
343
344    // ── Edge-case coverage ────────────────────────────────────────
345
346    /// `applied` references a PURL the manifest doesn't have. Must
347    /// not panic, must not emit a statement for the missing PURL.
348    #[test]
349    fn applied_purl_absent_from_manifest_is_silently_skipped() {
350        let mut manifest = PatchManifest::new();
351        manifest.patches.insert(
352            "pkg:npm/in-manifest@1.0.0".to_string(),
353            record("u1", vec![("GHSA-aaaa", vec!["CVE-1"])]),
354        );
355
356        let doc = build_document(
357            &manifest,
358            &[
359                "pkg:npm/in-manifest@1.0.0".to_string(),
360                "pkg:npm/ghost@9.9.9".to_string(), // not in manifest
361            ],
362            &opts(),
363        )
364        .unwrap();
365
366        assert_eq!(doc.statements.len(), 1);
367        let subs = &doc.statements[0].products[0].subcomponents;
368        assert_eq!(subs.len(), 1);
369        assert_eq!(subs[0].id, "pkg:npm/in-manifest@1.0.0");
370    }
371
372    /// A patch in the manifest with zero vulnerabilities contributes
373    /// no statements. Important: a patch is applied to fix files
374    /// *without* a vuln record (rare but legal) → silently skip.
375    #[test]
376    fn applied_patch_with_zero_vulnerabilities_emits_no_statement() {
377        let mut manifest = PatchManifest::new();
378        manifest.patches.insert(
379            "pkg:npm/with-vuln@1.0.0".to_string(),
380            record("u1", vec![("GHSA-aaaa", vec!["CVE-1"])]),
381        );
382        manifest.patches.insert(
383            "pkg:npm/no-vuln@2.0.0".to_string(),
384            record("u2", vec![]),
385        );
386
387        let doc = build_document(
388            &manifest,
389            &[
390                "pkg:npm/with-vuln@1.0.0".to_string(),
391                "pkg:npm/no-vuln@2.0.0".to_string(),
392            ],
393            &opts(),
394        )
395        .unwrap();
396
397        assert_eq!(doc.statements.len(), 1);
398        let subs = &doc.statements[0].products[0].subcomponents;
399        assert_eq!(subs.len(), 1);
400        assert_eq!(subs[0].id, "pkg:npm/with-vuln@1.0.0");
401    }
402
403    /// A vulnerability with an empty CVE list → statement carries
404    /// no `aliases` key (omit-when-empty per the serde attribute).
405    #[test]
406    fn empty_cve_list_produces_statement_with_no_aliases_key() {
407        let mut manifest = PatchManifest::new();
408        manifest.patches.insert(
409            "pkg:npm/x@1.0.0".to_string(),
410            record("u1", vec![("GHSA-no-cves", vec![])]),
411        );
412        let doc = build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts())
413            .unwrap();
414        assert_eq!(doc.statements[0].vulnerability.aliases.len(), 0);
415
416        // Serialize and verify the JSON omits the `aliases` key.
417        let v = serde_json::to_value(&doc.statements[0]).unwrap();
418        assert!(v["vulnerability"]
419            .as_object()
420            .unwrap()
421            .get("aliases")
422            .is_none());
423    }
424
425    /// Two patches share a GHSA AND share a CVE → the CVE appears
426    /// once in `aliases` (dedup-by-HashSet semantics).
427    #[test]
428    fn duplicate_cve_across_patches_deduped_in_aliases() {
429        let mut manifest = PatchManifest::new();
430        manifest.patches.insert(
431            "pkg:npm/x@1.0.0".to_string(),
432            record(
433                "u1",
434                vec![("GHSA-shared", vec!["CVE-SHARED", "CVE-X-ONLY"])],
435            ),
436        );
437        manifest.patches.insert(
438            "pkg:npm/y@2.0.0".to_string(),
439            record(
440                "u2",
441                vec![("GHSA-shared", vec!["CVE-SHARED", "CVE-Y-ONLY"])],
442            ),
443        );
444
445        let doc = build_document(
446            &manifest,
447            &[
448                "pkg:npm/x@1.0.0".to_string(),
449                "pkg:npm/y@2.0.0".to_string(),
450            ],
451            &opts(),
452        )
453        .unwrap();
454
455        assert_eq!(doc.statements.len(), 1);
456        let aliases = &doc.statements[0].vulnerability.aliases;
457        // Three unique CVEs, sorted.
458        assert_eq!(
459            aliases.as_slice(),
460            &[
461                "CVE-SHARED".to_string(),
462                "CVE-X-ONLY".to_string(),
463                "CVE-Y-ONLY".to_string(),
464            ]
465        );
466    }
467
468    /// Same patch UUID used by two PURLs that share a GHSA → the
469    /// impact_statement dedups the UUID-mention (no double-count).
470    #[test]
471    fn same_uuid_across_two_purls_deduped_in_impact_statement() {
472        // Two manifest entries, identical UUID and GHSA. Real world:
473        // the same patch package is fingerprinted against multiple
474        // installed versions. Builder must dedup the impact line.
475        let mut manifest = PatchManifest::new();
476        manifest.patches.insert(
477            "pkg:npm/x@1.0.0".to_string(),
478            record("shared-uuid", vec![("GHSA-shared", vec!["CVE-1"])]),
479        );
480        manifest.patches.insert(
481            "pkg:npm/x@1.0.1".to_string(),
482            record("shared-uuid", vec![("GHSA-shared", vec!["CVE-1"])]),
483        );
484
485        let doc = build_document(
486            &manifest,
487            &[
488                "pkg:npm/x@1.0.0".to_string(),
489                "pkg:npm/x@1.0.1".to_string(),
490            ],
491            &opts(),
492        )
493        .unwrap();
494        let imp = doc.statements[0].impact_statement.as_ref().unwrap();
495        // Count occurrences of "shared-uuid" — must be exactly 1.
496        assert_eq!(
497            imp.matches("shared-uuid").count(),
498            1,
499            "duplicate UUID must collapse: {imp}"
500        );
501    }
502
503    /// `BuildOptions.tooling = None` → `Document.tooling` is None and
504    /// the JSON output omits the key. Previously only `Some` was
505    /// asserted.
506    #[test]
507    fn tooling_none_omits_key_in_document() {
508        let mut manifest = PatchManifest::new();
509        manifest.patches.insert(
510            "pkg:npm/x@1.0.0".to_string(),
511            record("u1", vec![("GHSA-x", vec![])]),
512        );
513        let opts = BuildOptions {
514            product_id: "pkg:npm/app@1.0.0".to_string(),
515            doc_id: "urn:uuid:t".to_string(),
516            author: "Socket".to_string(),
517            tooling: None,
518        };
519        let doc =
520            build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts)
521                .unwrap();
522        assert!(doc.tooling.is_none());
523
524        let v = serde_json::to_value(&doc).unwrap();
525        assert!(v.as_object().unwrap().get("tooling").is_none());
526    }
527
528    /// Empty author string is allowed through unchanged. We don't
529    /// special-case it; the CLI layer ensures a sensible default.
530    #[test]
531    fn empty_author_is_preserved_not_substituted() {
532        let mut manifest = PatchManifest::new();
533        manifest.patches.insert(
534            "pkg:npm/x@1.0.0".to_string(),
535            record("u1", vec![("GHSA-x", vec![])]),
536        );
537        let opts = BuildOptions {
538            product_id: "pkg:npm/app@1.0.0".to_string(),
539            doc_id: "urn:uuid:t".to_string(),
540            author: String::new(),
541            tooling: None,
542        };
543        let doc =
544            build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts)
545                .unwrap();
546        assert_eq!(doc.author, "");
547    }
548
549    /// Two builds with the same inputs produce statements with
550    /// identical content and ordering. Timestamps may differ (the
551    /// builder calls `now_rfc3339`) but the `statements` field is
552    /// fully determined by the inputs.
553    #[test]
554    fn build_is_deterministic_modulo_timestamps() {
555        let mut manifest = PatchManifest::new();
556        manifest.patches.insert(
557            "pkg:npm/x@1.0.0".to_string(),
558            record(
559                "u1",
560                vec![
561                    ("GHSA-bbbb", vec!["CVE-2", "CVE-1"]),
562                    ("GHSA-aaaa", vec!["CVE-3"]),
563                ],
564            ),
565        );
566        manifest.patches.insert(
567            "pkg:npm/y@2.0.0".to_string(),
568            record("u2", vec![("GHSA-aaaa", vec!["CVE-3"])]),
569        );
570
571        let applied = vec![
572            "pkg:npm/x@1.0.0".to_string(),
573            "pkg:npm/y@2.0.0".to_string(),
574        ];
575
576        let a = build_document(&manifest, &applied, &opts()).unwrap();
577        let b = build_document(&manifest, &applied, &opts()).unwrap();
578
579        // Sanity-strip the per-run timestamp before comparing.
580        let strip = |mut d: Document| -> Document {
581            d.timestamp = String::new();
582            for s in d.statements.iter_mut() {
583                s.timestamp = None;
584            }
585            d
586        };
587        assert_eq!(strip(a), strip(b));
588    }
589
590    /// Every statement's `timestamp` equals the document's `timestamp`.
591    /// Builder pulls `now_rfc3339()` once and clones into each
592    /// statement; the contract is "one wall-clock per invocation".
593    #[test]
594    fn all_statement_timestamps_equal_document_timestamp() {
595        let mut manifest = PatchManifest::new();
596        manifest.patches.insert(
597            "pkg:npm/x@1.0.0".to_string(),
598            record(
599                "u1",
600                vec![("GHSA-a", vec!["CVE-1"]), ("GHSA-b", vec!["CVE-2"])],
601            ),
602        );
603        let doc =
604            build_document(&manifest, &["pkg:npm/x@1.0.0".to_string()], &opts())
605                .unwrap();
606        for st in &doc.statements {
607            assert_eq!(st.timestamp.as_deref(), Some(doc.timestamp.as_str()));
608        }
609    }
610
611    /// Subcomponent IDs are sorted within a merged statement. Pin
612    /// this so downstream tools can rely on stable diff output.
613    #[test]
614    fn merged_subcomponents_are_sorted_alphabetically() {
615        let mut manifest = PatchManifest::new();
616        manifest.patches.insert(
617            "pkg:npm/zzz@1.0.0".to_string(),
618            record("u-z", vec![("GHSA-shared", vec![])]),
619        );
620        manifest.patches.insert(
621            "pkg:npm/aaa@1.0.0".to_string(),
622            record("u-a", vec![("GHSA-shared", vec![])]),
623        );
624        manifest.patches.insert(
625            "pkg:npm/mmm@1.0.0".to_string(),
626            record("u-m", vec![("GHSA-shared", vec![])]),
627        );
628
629        let doc = build_document(
630            &manifest,
631            &[
632                "pkg:npm/zzz@1.0.0".to_string(),
633                "pkg:npm/aaa@1.0.0".to_string(),
634                "pkg:npm/mmm@1.0.0".to_string(),
635            ],
636            &opts(),
637        )
638        .unwrap();
639
640        let subs = &doc.statements[0].products[0].subcomponents;
641        assert_eq!(subs.len(), 3);
642        assert_eq!(subs[0].id, "pkg:npm/aaa@1.0.0");
643        assert_eq!(subs[1].id, "pkg:npm/mmm@1.0.0");
644        assert_eq!(subs[2].id, "pkg:npm/zzz@1.0.0");
645    }
646}