Skip to main content

socket_patch_cli/commands/
scan.rs

1use clap::Args;
2use socket_patch_core::api::client::{
3    build_proxy_fallback_client, get_api_client_with_overrides, is_fallback_candidate,
4};
5use socket_patch_core::api::types::{BatchPackagePatches, PatchSearchResult};
6use socket_patch_core::crawlers::{CrawlerOptions, Ecosystem};
7use socket_patch_core::manifest::operations::{read_manifest, write_manifest};
8use socket_patch_core::manifest::schema::PatchManifest;
9use socket_patch_core::utils::cleanup_blobs::{
10    cleanup_unused_archives, cleanup_unused_blobs, CleanupResult,
11};
12use socket_patch_core::utils::purl::strip_purl_qualifiers;
13use socket_patch_core::utils::telemetry::{track_patch_scan_failed, track_patch_scanned};
14use std::collections::HashSet;
15use std::path::Path;
16
17use crate::args::{apply_env_toggles, GlobalArgs};
18use crate::commands::vex::{generate_vex_from_manifest_path, VexEmbedArgs};
19use crate::ecosystem_dispatch::crawl_all_ecosystems;
20use crate::output::{color, confirm, format_severity, stderr_is_tty, stdout_is_tty};
21
22use super::get::{
23    download_and_apply_patches, select_patches, truncate_with_ellipsis, DownloadParams,
24};
25
26const DEFAULT_BATCH_SIZE: usize = 100;
27
28/// Surfaced in `scan --json` output. Tells a bot which PURLs in the discovery
29/// would replace an existing manifest entry with a newer UUID. Stable schema —
30/// see CLI_CONTRACT.md (`scan` JSON output / `updates` field).
31#[derive(Debug, PartialEq, Eq, Clone)]
32pub(crate) struct UpdateInfo {
33    pub purl: String,
34    pub old_uuid: String,
35    pub new_uuid: String,
36}
37
38/// Aggregated outcome of a GC pass (or preview). Serialized into the
39/// `scan --json` output's `gc` sub-object. See CLI_CONTRACT.md for the
40/// stable schema.
41#[derive(Debug, Default)]
42pub(crate) struct GcSummary {
43    /// PURLs removed from the manifest (apply mode) or eligible to be
44    /// removed (preview mode).
45    pub pruned: Vec<String>,
46    pub blobs: CleanupResult,
47    pub diffs: CleanupResult,
48    pub packages: CleanupResult,
49    /// `true` when `--no-prune` was set; the sub-object only carries the
50    /// `skipped: true` field in that case.
51    pub skipped: bool,
52}
53
54impl GcSummary {
55    fn total_bytes(&self) -> u64 {
56        self.blobs.bytes_freed + self.diffs.bytes_freed + self.packages.bytes_freed
57    }
58
59    /// Serialize for a *mutating* GC pass (post-apply).
60    fn to_apply_json(&self) -> serde_json::Value {
61        if self.skipped {
62            return serde_json::json!({ "skipped": true });
63        }
64        serde_json::json!({
65            "prunedManifestEntries": self.pruned,
66            "removedBlobs": self.blobs.blobs_removed,
67            "removedDiffArchives": self.diffs.blobs_removed,
68            "removedPackageArchives": self.packages.blobs_removed,
69            "bytesFreed": self.total_bytes(),
70        })
71    }
72
73    /// Serialize for a *non-mutating* GC pass (read-only preview).
74    fn to_preview_json(&self) -> serde_json::Value {
75        if self.skipped {
76            return serde_json::json!({ "skipped": true });
77        }
78        serde_json::json!({
79            "prunableManifestEntries": self.pruned,
80            "orphanBlobs": self.blobs.blobs_removed,
81            "orphanDiffArchives": self.diffs.blobs_removed,
82            "orphanPackageArchives": self.packages.blobs_removed,
83            "bytesReclaimable": self.total_bytes(),
84        })
85    }
86}
87
88/// Compute GC actions without performing them. `dry_run = true` for the
89/// preview path; `dry_run = false` for the apply path. The cleanup helpers
90/// from `socket_patch_core::utils::cleanup_blobs` natively support dry-run,
91/// so the same function works for both.
92async fn run_gc(
93    manifest: &PatchManifest,
94    pruned: Vec<String>,
95    socket_dir: &Path,
96    dry_run: bool,
97) -> GcSummary {
98    let blobs = cleanup_unused_blobs(manifest, &socket_dir.join("blobs"), dry_run)
99        .await
100        .unwrap_or_default();
101    let diffs = cleanup_unused_archives(manifest, &socket_dir.join("diffs"), dry_run)
102        .await
103        .unwrap_or_default();
104    let packages = cleanup_unused_archives(manifest, &socket_dir.join("packages"), dry_run)
105        .await
106        .unwrap_or_default();
107    GcSummary {
108        pruned,
109        blobs,
110        diffs,
111        packages,
112        skipped: false,
113    }
114}
115
116/// Apply-mode GC: re-read the manifest written by `download_and_apply_patches`,
117/// prune manifest entries for PURLs not in `scanned_purls`, write the manifest
118/// back, then sweep orphan blob/diff/package files. Callers must gate on the
119/// `prune` flag — when GC isn't requested, simply don't call this function and
120/// don't emit a `gc` sub-object.
121async fn run_apply_gc(
122    manifest_path: &Path,
123    socket_dir: &Path,
124    scanned_purls: &HashSet<String>,
125) -> GcSummary {
126    // Re-read the just-written manifest (the apply step may have added
127    // or updated entries we now want to consider for pruning).
128    let mut manifest = match read_manifest(manifest_path).await {
129        Ok(Some(m)) => m,
130        _ => return GcSummary::default(),
131    };
132    let prunable = detect_prunable(&manifest, scanned_purls);
133    for purl in &prunable {
134        manifest.patches.remove(purl);
135    }
136    if !prunable.is_empty() {
137        // If pruning failed mid-write the manifest may be stale, but the
138        // file-level cleanup below still operates on the in-memory copy.
139        let _ = write_manifest(manifest_path, &manifest).await;
140    }
141    run_gc(&manifest, prunable, socket_dir, /*dry_run=*/false).await
142}
143
144/// Dry-run preview of the apply-mode GC pass. Same shape as
145/// [`run_apply_gc`] but emits `prunable*`/`orphan*` field names and
146/// performs no mutation.
147async fn preview_apply_gc(
148    manifest_path: &Path,
149    socket_dir: &Path,
150    scanned_purls: &HashSet<String>,
151) -> GcSummary {
152    let manifest = match read_manifest(manifest_path).await {
153        Ok(Some(m)) => m,
154        _ => return GcSummary::default(),
155    };
156    let prunable = detect_prunable(&manifest, scanned_purls);
157    run_gc(&manifest, prunable, socket_dir, /*dry_run=*/true).await
158}
159
160/// PURL strings present in the manifest but absent from `scanned_purls`.
161/// These are candidates for pruning during `scan`'s GC pass — they
162/// correspond to packages that were once patched but are no longer
163/// installed (or no longer reachable to the crawler). Pure / no I/O so
164/// it's unit-testable.
165///
166/// Comparison is on the **base** PURL (qualifiers stripped) on both
167/// sides: the pypi crawler reports base PURLs, but a manifest may hold
168/// several qualified release variants (`?artifact_id=...`) of one
169/// installed package. Matching on the base keeps every variant of an
170/// installed package while still pruning all variants of one that is
171/// gone — otherwise `scan --all-releases --sync` would prune the very
172/// variants it just downloaded.
173pub(crate) fn detect_prunable(
174    manifest: &PatchManifest,
175    scanned_purls: &HashSet<String>,
176) -> Vec<String> {
177    let scanned_bases: HashSet<&str> =
178        scanned_purls.iter().map(|p| strip_purl_qualifiers(p)).collect();
179    manifest
180        .patches
181        .keys()
182        .filter(|p| !scanned_bases.contains(strip_purl_qualifiers(p)))
183        .cloned()
184        .collect()
185}
186
187/// Cross-reference an existing manifest against discovery results to find
188/// PURLs whose newest available patch UUID differs from the locally-recorded
189/// one. Used by both the discovery JSON path and the table-print path.
190/// Pure / no I/O so it's unit-testable.
191pub(crate) fn detect_updates(
192    existing_manifest: Option<&PatchManifest>,
193    packages: &[BatchPackagePatches],
194) -> Vec<UpdateInfo> {
195    let Some(manifest) = existing_manifest else {
196        return Vec::new();
197    };
198    let mut updates = Vec::new();
199    for pkg in packages {
200        let Some(existing) = manifest.patches.get(&pkg.purl) else {
201            continue;
202        };
203        // Treat the first patch in the batch as the candidate the apply path
204        // would resolve to (mirrors `select_patches` ordering — newest-first
205        // for paid users, single-patch auto-select for free).
206        let Some(candidate) = pkg.patches.first() else {
207            continue;
208        };
209        if candidate.uuid != existing.uuid {
210            updates.push(UpdateInfo {
211                purl: pkg.purl.clone(),
212                old_uuid: existing.uuid.clone(),
213                new_uuid: candidate.uuid.clone(),
214            });
215        }
216    }
217    updates
218}
219
220/// Collect the deduplicated CVE and GHSA identifiers across every patch of
221/// a package, for the scan table's VULNERABILITIES column. CVEs are listed
222/// before GHSAs and each group is sorted, so the rendered output is stable —
223/// the per-patch ID lists and set-based dedup are otherwise nondeterministic
224/// in order. Pure / no I/O so it's unit-testable.
225pub(crate) fn collect_vuln_ids(pkg: &BatchPackagePatches) -> Vec<String> {
226    let mut cves: HashSet<String> = HashSet::new();
227    let mut ghsas: HashSet<String> = HashSet::new();
228    for patch in &pkg.patches {
229        for cve in &patch.cve_ids {
230            cves.insert(cve.clone());
231        }
232        for ghsa in &patch.ghsa_ids {
233            ghsas.insert(ghsa.clone());
234        }
235    }
236    let mut cves: Vec<String> = cves.into_iter().collect();
237    cves.sort();
238    let mut ghsas: Vec<String> = ghsas.into_iter().collect();
239    ghsas.sort();
240    cves.into_iter().chain(ghsas).collect()
241}
242
243#[derive(Args)]
244pub struct ScanArgs {
245    #[command(flatten)]
246    pub common: GlobalArgs,
247
248    /// Number of packages to query per API request.
249    #[arg(long = "batch-size", env = "SOCKET_BATCH_SIZE", default_value_t = DEFAULT_BATCH_SIZE)]
250    pub batch_size: usize,
251
252    /// Download and apply selected patches in JSON mode (non-interactive).
253    /// Without this flag, `scan --json` is read-only — it lists available
254    /// patches plus an `updates` array but does not mutate the manifest.
255    /// Designed for unattended workflows (cron jobs, bots that open PRs);
256    /// pair with `--yes` for clarity though `--json` already implies non-
257    /// interactive confirmation. No effect outside `--json` mode (the
258    /// non-JSON path always prompts the user).
259    #[arg(long, default_value_t = false)]
260    pub apply: bool,
261
262    /// Garbage-collect after the scan: prune manifest entries for
263    /// packages no longer present in the crawl, then delete orphan
264    /// blob, diff, and package-archive files from `.socket/`. Off by
265    /// default to preserve manifest state across temporary uninstalls;
266    /// pair with `--apply` (or use `--sync`) for the auto-update
267    /// workflow.
268    #[arg(long, default_value_t = false)]
269    pub prune: bool,
270
271    /// Convenience flag for the auto-update workflow: implies both
272    /// `--apply` and `--prune`. Designed so a cron job or CI workflow
273    /// can run `socket-patch scan --json --sync --yes` and end up in a
274    /// fully-reconciled state in one invocation.
275    #[arg(long, default_value_t = false)]
276    pub sync: bool,
277
278    /// Download patches for every release/distribution variant of a
279    /// matched package, not just the one(s) matching the locally-
280    /// installed distribution. Affects ecosystems with per-release
281    /// variants — PyPI (wheel/sdist via `artifact_id`), RubyGems
282    /// (`platform`), and Maven (`classifier`). Off by default: narrow
283    /// scans store only the patch(es) for the installed dist, keeping
284    /// `.socket/` small; `--all-releases` makes the manifest portable
285    /// across environments (e.g. cross-platform CI caches).
286    #[arg(
287        long = "all-releases",
288        env = "SOCKET_ALL_RELEASES",
289        default_value_t = false,
290        value_parser = clap::builder::BoolishValueParser::new(),
291    )]
292    pub all_releases: bool,
293
294    /// On a successful scan, also generate an OpenVEX 0.2.0 document.
295    /// `--vex <path>` is the trigger; the `--vex-*` knobs mirror the
296    /// standalone `vex` command. The document is built from the manifest
297    /// as it stands after the scan (including any `--apply`/`--sync`
298    /// writes) and verified against on-disk state. A requested-but-failed
299    /// VEX makes the command exit non-zero.
300    #[command(flatten)]
301    pub vex: VexEmbedArgs,
302}
303
304/// Embedded-VEX side-effect for `scan`'s JSON terminal returns. When
305/// `--vex` was requested and `base_code` is 0, generate the OpenVEX
306/// document from the post-scan manifest and fold the outcome into
307/// `result` — a `vex` object on success, or `status: "error"` + `error`
308/// on failure (per the fail-the-command contract). Returns the final exit
309/// code: `base_code` when not requested / skipped / on VEX success, `1`
310/// when VEX generation failed. Caller prints `result` after this returns.
311async fn embed_vex_into_json(
312    common: &GlobalArgs,
313    vex_args: &VexEmbedArgs,
314    manifest_path: &Path,
315    base_code: i32,
316    result: &mut serde_json::Value,
317) -> i32 {
318    if vex_args.vex.is_none() || base_code != 0 {
319        return base_code;
320    }
321    let params = vex_args.to_build_params();
322    match generate_vex_from_manifest_path(common, &params, manifest_path).await {
323        Ok(summary) => {
324            result["vex"] = serde_json::json!({
325                "path": vex_args.vex.as_ref().unwrap().display().to_string(),
326                "statements": summary.statements,
327                "format": "openvex-0.2.0",
328            });
329            0
330        }
331        Err(e) => {
332            result["status"] = serde_json::json!("error");
333            result["error"] = serde_json::json!({
334                "code": e.code,
335                "message": e.message,
336            });
337            1
338        }
339    }
340}
341
342/// Embedded-VEX side-effect for `scan`'s human-readable terminal returns.
343/// Prints a one-line note (or error) and returns the final exit code:
344/// `base_code` when not requested / skipped / on VEX success, `1` on VEX
345/// failure. No-op unless `--vex` was set and `base_code` is 0.
346async fn embed_vex_human(
347    common: &GlobalArgs,
348    vex_args: &VexEmbedArgs,
349    manifest_path: &Path,
350    base_code: i32,
351) -> i32 {
352    if vex_args.vex.is_none() || base_code != 0 {
353        return base_code;
354    }
355    let params = vex_args.to_build_params();
356    match generate_vex_from_manifest_path(common, &params, manifest_path).await {
357        Ok(summary) => {
358            if !common.silent {
359                println!(
360                    "Wrote OpenVEX document with {} statement(s) to {}",
361                    summary.statements,
362                    vex_args.vex.as_ref().unwrap().display(),
363                );
364            }
365            0
366        }
367        Err(e) => {
368            if !common.silent {
369                eprintln!("Error: VEX generation failed: {}", e.message);
370            }
371            1
372        }
373    }
374}
375
376pub async fn run(args: ScanArgs) -> i32 {
377    apply_env_toggles(&args.common);
378
379    // `--sync` is sugar for `--apply --prune`. Derive locals once and
380    // use them everywhere downstream so the flag interactions are
381    // expressed in one place. `--apply --prune --sync` is redundant
382    // but legal (all three end up true).
383    let apply = args.apply || args.sync;
384    let prune = args.prune || args.sync;
385
386    // Resolved up-front (rather than at the GC site) because the embedded
387    // `--vex` side-effect reads the manifest at several terminal returns,
388    // including the early "no packages" exit before the GC block.
389    let manifest_path = args.common.resolved_manifest_path();
390    let socket_dir = manifest_path.parent().unwrap().to_path_buf();
391
392    let overrides = args.common.api_client_overrides();
393    let (mut api_client, mut use_public_proxy) =
394        get_api_client_with_overrides(overrides.clone()).await;
395    let telemetry_token = api_client.api_token().cloned();
396    let telemetry_org = api_client.org_slug().cloned();
397    // Tracks whether scan was downgraded from the authenticated
398    // endpoint to the public proxy mid-run after a 401/403. Surfaces
399    // in the final `patch_scanned` telemetry event so we can measure
400    // how often stale-token fallbacks fire in the wild.
401    let mut fallback_to_proxy = false;
402
403    // org slug is already stored in the client
404    let effective_org_slug: Option<&str> = None;
405
406    let crawler_options = CrawlerOptions {
407        cwd: args.common.cwd.clone(),
408        global: args.common.global,
409        global_prefix: args.common.global_prefix.clone(),
410        batch_size: args.batch_size,
411    };
412
413    let scan_target = if args.common.global || args.common.global_prefix.is_some() {
414        "global packages"
415    } else {
416        "packages"
417    };
418
419    let show_progress = !args.common.json && stderr_is_tty();
420
421    if show_progress {
422        eprint!("Scanning {scan_target}...");
423    }
424
425    // Crawl packages
426    let (all_crawled, eco_counts) = crawl_all_ecosystems(&crawler_options).await;
427
428    // Filter by --ecosystems if provided
429    let filtered_crawled: Vec<_> = if let Some(ref allowed) = args.common.ecosystems {
430        all_crawled
431            .into_iter()
432            .filter(|pkg| {
433                if let Some(eco) = Ecosystem::from_purl(&pkg.purl) {
434                    allowed.iter().any(|a| a == eco.cli_name())
435                } else {
436                    false
437                }
438            })
439            .collect()
440    } else {
441        all_crawled
442    };
443
444    let all_purls: Vec<String> = filtered_crawled.iter().map(|p| p.purl.clone()).collect();
445    let package_count = all_purls.len();
446
447    if package_count == 0 {
448        if show_progress {
449            eprintln!();
450        }
451        // Telemetry: empty-scan still counts as a successful scan.
452        track_patch_scanned(
453            0,
454            0,
455            0,
456            false,
457            args.common.ecosystems.clone().unwrap_or_default().as_slice(),
458            false,
459            telemetry_token.as_deref(),
460            telemetry_org.as_deref(),
461        )
462        .await;
463        if args.common.json {
464            // When the crawler finds nothing, GC is intentionally skipped
465            // — pruning every manifest entry on the assumption that the
466            // user "uninstalled everything" is too destructive. Bots
467            // that need full cleanup can call `repair` explicitly. No
468            // `gc` field emitted because the user didn't request one.
469            let mut result = serde_json::json!({
470                "status": "success",
471                "scannedPackages": 0,
472                "packagesWithPatches": 0,
473                "totalPatches": 0,
474                "freePatches": 0,
475                "paidPatches": 0,
476                "canAccessPaidPatches": false,
477                "packages": [],
478                "updates": [],
479            });
480            let code =
481                embed_vex_into_json(&args.common, &args.vex, &manifest_path, 0, &mut result).await;
482            println!("{}", serde_json::to_string_pretty(&result).unwrap());
483            return code;
484        } else if args.common.global || args.common.global_prefix.is_some() {
485            println!("No global packages found.");
486        } else {
487            #[allow(unused_mut)]
488            let mut install_cmds = String::from("npm/yarn/pnpm/pip");
489            #[cfg(feature = "cargo")]
490            install_cmds.push_str("/cargo");
491            #[cfg(feature = "golang")]
492            install_cmds.push_str("/go");
493            #[cfg(feature = "maven")]
494            install_cmds.push_str("/mvn");
495            #[cfg(feature = "composer")]
496            install_cmds.push_str("/composer");
497            println!("No packages found. Run {install_cmds} install first.");
498        }
499        return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await;
500    }
501
502    // Build ecosystem summary
503    let mut eco_parts = Vec::new();
504    for eco in Ecosystem::all() {
505        let count = if args.common.ecosystems.is_some() {
506            // When filtering, count the filtered packages
507            filtered_crawled.iter().filter(|p| Ecosystem::from_purl(&p.purl) == Some(*eco)).count()
508        } else {
509            eco_counts.get(eco).copied().unwrap_or(0)
510        };
511        if count > 0 {
512            eco_parts.push(format!("{count} {}", eco.display_name()));
513        }
514    }
515    let eco_summary = if eco_parts.is_empty() {
516        String::new()
517    } else {
518        format!(" ({})", eco_parts.join(", "))
519    };
520
521    if !args.common.json {
522        if show_progress {
523            eprintln!("\rFound {package_count} packages{eco_summary}");
524        } else {
525            eprintln!("Found {package_count} packages{eco_summary}");
526        }
527    }
528
529    // Query API in batches
530    let mut all_packages_with_patches: Vec<BatchPackagePatches> = Vec::new();
531    let mut can_access_paid_patches = false;
532    let total_batches = all_purls.len().div_ceil(args.batch_size);
533    let mut batch_error_count = 0usize;
534    let mut last_batch_error: Option<String> = None;
535
536    if show_progress {
537        eprint!("Querying API for patches... (batch 1/{total_batches})");
538    }
539
540    for (batch_idx, chunk) in all_purls.chunks(args.batch_size).enumerate() {
541        if show_progress {
542            eprint!(
543                "\rQuerying API for patches... (batch {}/{})",
544                batch_idx + 1,
545                total_batches
546            );
547        }
548
549        let purls: Vec<String> = chunk.to_vec();
550        let mut result = api_client
551            .search_patches_batch(effective_org_slug, &purls)
552            .await;
553
554        // Fallback: a 401/403 against the authenticated endpoint can
555        // mean a stale/revoked token. Retry against the public proxy
556        // (free patches only) once, then continue the rest of the
557        // loop with the downgraded client. Only triggers on the
558        // first authenticated batch; subsequent iterations are
559        // already on the proxy.
560        if !use_public_proxy {
561            if let Err(ref e) = result {
562                if is_fallback_candidate(e) {
563                    eprintln!(
564                        "Warning: authenticated API returned {e}; \
565                         falling back to public patch API proxy (free patches only)."
566                    );
567                    api_client = build_proxy_fallback_client(&overrides);
568                    use_public_proxy = true;
569                    fallback_to_proxy = true;
570                    result = api_client
571                        .search_patches_batch(effective_org_slug, &purls)
572                        .await;
573                }
574            }
575        }
576
577        match result {
578            Ok(response) => {
579                if response.can_access_paid_patches {
580                    can_access_paid_patches = true;
581                }
582                for pkg in response.packages {
583                    if !pkg.patches.is_empty() {
584                        all_packages_with_patches.push(pkg);
585                    }
586                }
587            }
588            Err(e) => {
589                batch_error_count += 1;
590                last_batch_error = Some(e.to_string());
591                if !args.common.json {
592                    eprintln!("\nError querying batch {}: {e}", batch_idx + 1);
593                }
594            }
595        }
596    }
597
598    // If every batch errored, surface this as a full scan failure rather
599    // than silently reporting zero patches (which historically looked
600    // identical to "no patches for these packages").
601    if total_batches > 0 && batch_error_count == total_batches {
602        let err = last_batch_error
603            .unwrap_or_else(|| "all batches failed".to_string());
604        track_patch_scan_failed(
605            &err,
606            fallback_to_proxy,
607            telemetry_token.as_deref(),
608            telemetry_org.as_deref(),
609        )
610        .await;
611    }
612
613    let total_patches_found: usize = all_packages_with_patches
614        .iter()
615        .map(|p| p.patches.len())
616        .sum();
617
618    if !args.common.json {
619        if total_patches_found > 0 {
620            if show_progress {
621                eprintln!(
622                    "\rFound {total_patches_found} patches for {} packages",
623                    all_packages_with_patches.len()
624                );
625            } else {
626                eprintln!(
627                    "Found {total_patches_found} patches for {} packages",
628                    all_packages_with_patches.len()
629                );
630            }
631        } else if show_progress {
632            eprintln!("\rAPI query complete");
633        } else {
634            eprintln!("API query complete");
635        }
636    }
637
638    // Calculate patch counts
639    let mut free_patches = 0usize;
640    let mut paid_patches = 0usize;
641    for pkg in &all_packages_with_patches {
642        for patch in &pkg.patches {
643            if patch.tier == "free" {
644                free_patches += 1;
645            } else {
646                paid_patches += 1;
647            }
648        }
649    }
650    let total_patches = free_patches + paid_patches;
651
652    // Telemetry: record the scan outcome once we have the canonical
653    // per-tier counts. `fallback_to_proxy` is `true` iff the batch
654    // loop downgraded from the authenticated endpoint to the public
655    // proxy after a 401/403.
656    track_patch_scanned(
657        package_count,
658        free_patches,
659        paid_patches,
660        can_access_paid_patches,
661        args.common.ecosystems.clone().unwrap_or_default().as_slice(),
662        fallback_to_proxy,
663        telemetry_token.as_deref(),
664        telemetry_org.as_deref(),
665    )
666    .await;
667
668    // Read existing manifest once for update detection. Used by both the
669    // JSON-mode emission (always includes an `updates` array) and the
670    // non-JSON table-print path (counts `updates_available`).
671    // (`manifest_path`/`socket_dir` are resolved at the top of `run`.)
672    let existing_manifest = read_manifest(&manifest_path).await.ok().flatten();
673    let updates = detect_updates(existing_manifest.as_ref(), &all_packages_with_patches);
674
675    // Crawl PURLs as a set for prunable detection (manifest entries whose
676    // PURL is not in the current crawl results).
677    let scanned_purls: HashSet<String> = all_purls.iter().cloned().collect();
678
679    if args.common.json {
680        let mut result = serde_json::json!({
681            "status": "success",
682            "scannedPackages": package_count,
683            "packagesWithPatches": all_packages_with_patches.len(),
684            "totalPatches": total_patches,
685            "freePatches": free_patches,
686            "paidPatches": paid_patches,
687            "canAccessPaidPatches": can_access_paid_patches,
688            "packages": all_packages_with_patches,
689            "updates": updates.iter().map(|u| serde_json::json!({
690                "purl": u.purl,
691                "oldUuid": u.old_uuid,
692                "newUuid": u.new_uuid,
693            })).collect::<Vec<_>>(),
694        });
695
696        // `apply` and `prune` are computed once at the top of run()
697        // (factoring in --sync, which implies both). They're independent
698        // here: a bot can `--apply` without `--prune`, or `--prune`
699        // without `--apply` (just GC-sweep), or both (full sync).
700        let dry = args.common.dry_run;
701
702        // --- Apply path (if requested) -----------------------------------
703        if apply {
704            let mut all_search_results: Vec<PatchSearchResult> = Vec::new();
705            for pkg in &all_packages_with_patches {
706                match api_client
707                    .search_patches_by_package(effective_org_slug, &pkg.purl)
708                    .await
709                {
710                    Ok(response) => all_search_results.extend(response.patches),
711                    Err(_) => continue,
712                }
713            }
714
715            // For scan-driven bot workflows there's no "specify --id"
716            // option — we're scanning the whole project. Pass
717            // `is_json = false` so `select_one` auto-selects the newest
718            // patch in non-TTY mode rather than erroring with
719            // `selection_required`.
720            let selected = if all_search_results.is_empty() {
721                Vec::new()
722            } else {
723                match select_patches(&all_search_results, can_access_paid_patches, false) {
724                    Ok(s) => s,
725                    Err(code) => return code,
726                }
727            };
728
729            let mut apply_code = 0i32;
730            if dry {
731                // Synthesize the per-patch outcome without touching disk.
732                // `decide_patch_action` consults the existing manifest,
733                // so it accurately reports what `--apply` *would* do.
734                let manifest_for_preview = existing_manifest
735                    .clone()
736                    .unwrap_or_else(PatchManifest::new);
737                let patches: Vec<serde_json::Value> = selected
738                    .iter()
739                    .map(|p| {
740                        match super::get::decide_patch_action(
741                            &manifest_for_preview,
742                            &p.purl,
743                            &p.uuid,
744                        ) {
745                            super::get::PatchAction::Added => serde_json::json!({
746                                "purl": p.purl, "uuid": p.uuid, "action": "added",
747                            }),
748                            super::get::PatchAction::Updated { old_uuid } => serde_json::json!({
749                                "purl": p.purl, "uuid": p.uuid,
750                                "action": "updated", "oldUuid": old_uuid,
751                            }),
752                            super::get::PatchAction::Skipped => serde_json::json!({
753                                "purl": p.purl, "uuid": p.uuid, "action": "skipped",
754                            }),
755                        }
756                    })
757                    .collect();
758                let added = patches.iter().filter(|p| p["action"] == "added").count();
759                let updated = patches.iter().filter(|p| p["action"] == "updated").count();
760                let skipped = patches.iter().filter(|p| p["action"] == "skipped").count();
761                result["apply"] = serde_json::json!({
762                    "found": selected.len(),
763                    "downloaded": 0,
764                    "skipped": skipped,
765                    "failed": 0,
766                    "applied": 0,
767                    "updated": updated,
768                    "added": added,
769                    "patches": patches,
770                    "dryRun": true,
771                });
772            } else if selected.is_empty() {
773                // No patches selected (e.g. all paid for a free user, or
774                // no packages had patches). Emit empty `apply` so JSON
775                // shape is stable, then fall through to GC if requested.
776                result["apply"] = serde_json::json!({
777                    "found": 0, "downloaded": 0, "skipped": 0,
778                    "failed": 0, "applied": 0, "updated": 0,
779                    "patches": [],
780                });
781            } else {
782                let params = DownloadParams {
783                    cwd: args.common.cwd.clone(),
784                    org: args.common.org.clone(),
785                    save_only: false,
786                    one_off: false,
787                    global: args.common.global,
788                    global_prefix: args.common.global_prefix.clone(),
789                    json: true,
790                    silent: true,
791                    download_mode: args.common.download_mode.clone(),
792                    api_overrides: args.common.api_client_overrides(),
793                    all_releases: args.all_releases,
794                };
795                let (code, apply_json) = download_and_apply_patches(&selected, &params).await;
796                apply_code = code;
797                let mut apply_obj = apply_json;
798                if let Some(obj) = apply_obj.as_object_mut() {
799                    obj.remove("status");
800                }
801                result["apply"] = apply_obj;
802                if apply_code != 0 {
803                    result["status"] = serde_json::json!("partial_failure");
804                }
805            }
806
807            // --- GC (if requested) --------------------------------------
808            if prune {
809                let gc = if dry {
810                    preview_apply_gc(&manifest_path, &socket_dir, &scanned_purls).await
811                } else {
812                    run_apply_gc(&manifest_path, &socket_dir, &scanned_purls).await
813                };
814                result["gc"] = if dry {
815                    gc.to_preview_json()
816                } else {
817                    gc.to_apply_json()
818                };
819            }
820
821            let final_code =
822                embed_vex_into_json(&args.common, &args.vex, &manifest_path, apply_code, &mut result)
823                    .await;
824            println!("{}", serde_json::to_string_pretty(&result).unwrap());
825            return final_code;
826        }
827
828        // --- GC-only path (no --apply, just --prune) --------------------
829        if prune {
830            let gc = if dry {
831                preview_apply_gc(&manifest_path, &socket_dir, &scanned_purls).await
832            } else {
833                run_apply_gc(&manifest_path, &socket_dir, &scanned_purls).await
834            };
835            result["gc"] = if dry {
836                gc.to_preview_json()
837            } else {
838                gc.to_apply_json()
839            };
840        }
841
842        let final_code =
843            embed_vex_into_json(&args.common, &args.vex, &manifest_path, 0, &mut result).await;
844        println!("{}", serde_json::to_string_pretty(&result).unwrap());
845        return final_code;
846    }
847
848    let use_color = stdout_is_tty();
849
850    if all_packages_with_patches.is_empty() {
851        println!("\nNo patches available for installed packages.");
852        return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await;
853    }
854
855    let mut updates_available = 0usize;
856
857    // Print table
858    println!("\n{}", "=".repeat(100));
859    println!(
860        "{}  {}  {}  VULNERABILITIES",
861        "PACKAGE".to_string() + &" ".repeat(33),
862        "PATCHES".to_string() + " ",
863        "SEVERITY".to_string() + &" ".repeat(8),
864    );
865    println!("{}", "=".repeat(100));
866
867    for pkg in &all_packages_with_patches {
868        // Char-safe truncation: a byte slice (`&pkg.purl[..37]`) panics
869        // when the cut lands mid-codepoint. PURLs can carry non-ASCII
870        // names/qualifiers, so route through the shared helper.
871        let display_purl = truncate_with_ellipsis(&pkg.purl, 40);
872
873        let pkg_free = pkg.patches.iter().filter(|p| p.tier == "free").count();
874        let pkg_paid = pkg.patches.iter().filter(|p| p.tier == "paid").count();
875
876        let count_str = if pkg_paid > 0 {
877            if can_access_paid_patches {
878                format!("{}+{}", pkg_free, pkg_paid)
879            } else {
880                format!("{}+{}", pkg_free, color(&pkg_paid.to_string(), "33", use_color))
881            }
882        } else {
883            format!("{}", pkg_free)
884        };
885
886        // Get highest severity
887        let severity = pkg
888            .patches
889            .iter()
890            .filter_map(|p| p.severity.as_deref())
891            .min_by_key(|s| severity_order(s))
892            .unwrap_or("unknown");
893
894        // Collect vuln IDs (deterministic: deduped, CVEs then GHSAs,
895        // each group sorted — see collect_vuln_ids).
896        let vuln_ids = collect_vuln_ids(pkg);
897        let vuln_str = if vuln_ids.len() > 2 {
898            format!(
899                "{} (+{})",
900                vuln_ids[..2].join(", "),
901                vuln_ids.len() - 2
902            )
903        } else if vuln_ids.is_empty() {
904            "-".to_string()
905        } else {
906            vuln_ids.join(", ")
907        };
908
909        // Check for updates
910        let has_update = if let Some(ref manifest) = existing_manifest {
911            if let Some(existing) = manifest.patches.get(&pkg.purl) {
912                // If any patch in the batch has a different UUID than what's in manifest, update available
913                pkg.patches.iter().any(|p| p.uuid != existing.uuid)
914            } else {
915                false
916            }
917        } else {
918            false
919        };
920        if has_update {
921            updates_available += 1;
922        }
923
924        let update_marker = if has_update {
925            color(" [UPDATE]", "33", use_color)
926        } else {
927            String::new()
928        };
929
930        println!(
931            "{:<40}  {:>8}  {:<16}  {}{}",
932            display_purl,
933            count_str,
934            format_severity(severity, use_color),
935            vuln_str,
936            update_marker,
937        );
938    }
939
940    println!("{}", "=".repeat(100));
941
942    // Summary
943    if can_access_paid_patches {
944        println!(
945            "\nSummary: {} package(s) with {} available patch(es)",
946            all_packages_with_patches.len(),
947            total_patches,
948        );
949    } else {
950        println!(
951            "\nSummary: {} package(s) with {} free patch(es)",
952            all_packages_with_patches.len(),
953            free_patches,
954        );
955        if paid_patches > 0 {
956            println!(
957                "{}",
958                color(
959                    &format!("         + {} additional patch(es) available with paid subscription", paid_patches),
960                    "33",
961                    use_color,
962                ),
963            );
964            println!(
965                "\nUpgrade to Socket's paid plan to access all patches: https://socket.dev/pricing"
966            );
967        }
968    }
969
970    if updates_available > 0 {
971        println!(
972            "\n{}",
973            color(
974                &format!("{updates_available} package(s) have newer patches available."),
975                "33",
976                use_color,
977            ),
978        );
979    }
980
981    // Count downloadable patches
982    let downloadable_count = if can_access_paid_patches {
983        all_packages_with_patches.len()
984    } else {
985        all_packages_with_patches
986            .iter()
987            .filter(|pkg| pkg.patches.iter().any(|p| p.tier == "free"))
988            .count()
989    };
990
991    if downloadable_count == 0 {
992        println!("\nNo downloadable patches (paid subscription required).");
993        return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await;
994    }
995
996    // Fetch full PatchSearchResult for each package that has patches
997    if show_progress {
998        eprint!("\nFetching patch details...");
999    }
1000
1001    let mut all_search_results: Vec<PatchSearchResult> = Vec::new();
1002    for (i, pkg) in all_packages_with_patches.iter().enumerate() {
1003        if show_progress {
1004            eprint!(
1005                "\rFetching patch details... ({}/{})",
1006                i + 1,
1007                all_packages_with_patches.len()
1008            );
1009        }
1010        match api_client
1011            .search_patches_by_package(effective_org_slug, &pkg.purl)
1012            .await
1013        {
1014            Ok(response) => {
1015                all_search_results.extend(response.patches);
1016            }
1017            Err(e) => {
1018                eprintln!("\n  Warning: could not fetch details for {}: {e}", pkg.purl);
1019            }
1020        }
1021    }
1022
1023    if show_progress {
1024        eprintln!();
1025    }
1026
1027    if all_search_results.is_empty() {
1028        eprintln!("Could not fetch patch details.");
1029        return 1;
1030    }
1031
1032    // Smart selection
1033    let selected: Vec<PatchSearchResult> =
1034        match select_patches(&all_search_results, can_access_paid_patches, false) {
1035            Ok(s) => s,
1036            Err(code) => return code,
1037        };
1038
1039    if selected.is_empty() {
1040        println!("No patches selected.");
1041        return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await;
1042    }
1043
1044    // Display detailed summary of selected patches before confirming
1045    println!("\nPatches to apply:\n");
1046    for patch in &selected {
1047        // Collect CVE/GHSA IDs and highest severity from vulnerabilities
1048        let mut vuln_ids: Vec<String> = Vec::new();
1049        let mut highest_severity: Option<&str> = None;
1050        for (id, vuln) in &patch.vulnerabilities {
1051            if vuln.cves.is_empty() {
1052                vuln_ids.push(id.clone());
1053            } else {
1054                for cve in &vuln.cves {
1055                    vuln_ids.push(cve.clone());
1056                }
1057            }
1058            let sev = vuln.severity.as_str();
1059            if highest_severity
1060                .is_none_or(|cur| severity_order(sev) < severity_order(cur))
1061            {
1062                highest_severity = Some(sev);
1063            }
1064        }
1065
1066        let sev_display = highest_severity.unwrap_or("unknown");
1067        let sev_colored = format_severity(sev_display, use_color);
1068
1069        // Char-safe: descriptions come straight from the API and routinely
1070        // contain non-ASCII text; a `&desc[..69]` byte slice would panic.
1071        let desc = truncate_with_ellipsis(&patch.description, 72);
1072
1073        println!(
1074            "  {} [{}] {}",
1075            patch.purl,
1076            patch.tier.to_uppercase(),
1077            sev_colored,
1078        );
1079        if !vuln_ids.is_empty() {
1080            println!("    Fixes: {}", vuln_ids.join(", "));
1081        }
1082        // Show per-vulnerability summaries
1083        for vuln in patch.vulnerabilities.values() {
1084            if !vuln.summary.is_empty() {
1085                // Char-safe: vulnerability summaries are API-sourced free
1086                // text; a `&summary[..73]` byte slice would panic mid-codepoint.
1087                let summary = truncate_with_ellipsis(&vuln.summary, 76);
1088                let cve_label = if vuln.cves.is_empty() {
1089                    String::new()
1090                } else {
1091                    format!("{}: ", vuln.cves.join(", "))
1092                };
1093                println!("    - {cve_label}{summary}");
1094            }
1095        }
1096        if !desc.is_empty() {
1097            println!("    {desc}");
1098        }
1099        println!();
1100    }
1101
1102    // Prompt to download
1103    let prompt = format!("Download and apply {} patch(es)?", selected.len());
1104    if !confirm(&prompt, true, args.common.yes, args.common.json) {
1105        println!("\nTo apply a patch, run:");
1106        println!("  socket-patch get <package-name-or-purl>");
1107        println!("  socket-patch get <CVE-ID>");
1108        return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await;
1109    }
1110
1111    // Download and apply
1112    let params = DownloadParams {
1113        cwd: args.common.cwd.clone(),
1114        org: args.common.org.clone(),
1115        save_only: false,
1116        one_off: false,
1117        global: args.common.global,
1118        global_prefix: args.common.global_prefix.clone(),
1119        json: false,
1120        silent: false,
1121        download_mode: args.common.download_mode.clone(),
1122        api_overrides: args.common.api_client_overrides(),
1123        all_releases: args.all_releases,
1124    };
1125
1126    let (code, _) = download_and_apply_patches(&selected, &params).await;
1127
1128    // Post-apply GC: only runs when the user opted in via `--prune` or
1129    // `--sync`. Default `scan --yes` no longer touches the manifest
1130    // beyond what `--apply` added — users wanting to clean up should
1131    // run `socket-patch gc` (or `repair`) explicitly.
1132    if prune {
1133        let gc = run_apply_gc(&manifest_path, &socket_dir, &scanned_purls).await;
1134        let total = gc.blobs.blobs_removed + gc.diffs.blobs_removed + gc.packages.blobs_removed;
1135        if !gc.pruned.is_empty() || total > 0 {
1136            println!(
1137                "\nGC: pruned {} manifest entr{} and removed {} orphan file{} ({}).",
1138                gc.pruned.len(),
1139                if gc.pruned.len() == 1 { "y" } else { "ies" },
1140                total,
1141                if total == 1 { "" } else { "s" },
1142                socket_patch_core::utils::cleanup_blobs::format_bytes(gc.total_bytes()),
1143            );
1144        }
1145    }
1146
1147    embed_vex_human(&args.common, &args.vex, &manifest_path, code).await
1148}
1149
1150pub(crate) fn severity_order(s: &str) -> u8 {
1151    match s.to_lowercase().as_str() {
1152        "critical" => 0,
1153        "high" => 1,
1154        "medium" => 2,
1155        "low" => 3,
1156        _ => 4,
1157    }
1158}
1159
1160#[cfg(test)]
1161mod tests {
1162    use super::*;
1163    use socket_patch_core::api::types::{BatchPackagePatches, BatchPatchInfo};
1164    use socket_patch_core::manifest::schema::{PatchManifest, PatchRecord};
1165    use std::collections::HashMap;
1166
1167    // ---- severity_order ----------------------------------------------------
1168
1169    #[test]
1170    fn severity_order_critical_is_zero() {
1171        assert_eq!(severity_order("critical"), 0);
1172    }
1173
1174    #[test]
1175    fn severity_order_is_case_insensitive() {
1176        assert_eq!(severity_order("Critical"), 0);
1177        assert_eq!(severity_order("CRITICAL"), 0);
1178        assert_eq!(severity_order("High"), 1);
1179    }
1180
1181    #[test]
1182    fn severity_order_known_levels() {
1183        assert_eq!(severity_order("high"), 1);
1184        assert_eq!(severity_order("medium"), 2);
1185        assert_eq!(severity_order("low"), 3);
1186    }
1187
1188    #[test]
1189    fn severity_order_unknown_is_four() {
1190        assert_eq!(severity_order("unknown"), 4);
1191        assert_eq!(severity_order(""), 4);
1192        assert_eq!(severity_order("informational"), 4);
1193    }
1194
1195    // ---- detect_updates -----------------------------------------------------
1196
1197    fn manifest_with(entries: &[(&str, &str)]) -> PatchManifest {
1198        let mut m = PatchManifest::new();
1199        for (purl, uuid) in entries {
1200            m.patches.insert(
1201                (*purl).to_string(),
1202                PatchRecord {
1203                    uuid: (*uuid).to_string(),
1204                    exported_at: String::new(),
1205                    files: HashMap::new(),
1206                    vulnerabilities: HashMap::new(),
1207                    description: String::new(),
1208                    license: String::new(),
1209                    tier: "free".to_string(),
1210                },
1211            );
1212        }
1213        m
1214    }
1215
1216    fn batch_with(purl: &str, uuids: &[&str]) -> BatchPackagePatches {
1217        BatchPackagePatches {
1218            purl: purl.to_string(),
1219            patches: uuids
1220                .iter()
1221                .map(|u| BatchPatchInfo {
1222                    uuid: (*u).to_string(),
1223                    purl: purl.to_string(),
1224                    tier: "free".to_string(),
1225                    cve_ids: Vec::new(),
1226                    ghsa_ids: Vec::new(),
1227                    severity: None,
1228                    title: String::new(),
1229                })
1230                .collect(),
1231        }
1232    }
1233
1234    #[test]
1235    fn detect_updates_returns_empty_when_no_manifest() {
1236        let pkgs = vec![batch_with("pkg:npm/foo@1.0", &["uuid-a"])];
1237        assert!(detect_updates(None, &pkgs).is_empty());
1238    }
1239
1240    #[test]
1241    fn detect_updates_returns_empty_for_empty_packages() {
1242        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1243        assert!(detect_updates(Some(&m), &[]).is_empty());
1244    }
1245
1246    #[test]
1247    fn detect_updates_returns_empty_when_no_overlap() {
1248        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1249        let pkgs = vec![batch_with("pkg:npm/bar@2.0", &["uuid-z"])];
1250        assert!(detect_updates(Some(&m), &pkgs).is_empty());
1251    }
1252
1253    #[test]
1254    fn detect_updates_skips_same_uuid() {
1255        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1256        let pkgs = vec![batch_with("pkg:npm/foo@1.0", &["uuid-a"])];
1257        assert!(detect_updates(Some(&m), &pkgs).is_empty());
1258    }
1259
1260    #[test]
1261    fn detect_updates_flags_different_uuid() {
1262        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1263        let pkgs = vec![batch_with("pkg:npm/foo@1.0", &["uuid-b"])];
1264        let updates = detect_updates(Some(&m), &pkgs);
1265        assert_eq!(updates.len(), 1);
1266        assert_eq!(updates[0].purl, "pkg:npm/foo@1.0");
1267        assert_eq!(updates[0].old_uuid, "uuid-a");
1268        assert_eq!(updates[0].new_uuid, "uuid-b");
1269    }
1270
1271    #[test]
1272    fn detect_updates_reports_multiple_updates() {
1273        let m = manifest_with(&[
1274            ("pkg:npm/foo@1.0", "uuid-a"),
1275            ("pkg:npm/bar@2.0", "uuid-c"),
1276        ]);
1277        let pkgs = vec![
1278            batch_with("pkg:npm/foo@1.0", &["uuid-b"]),
1279            batch_with("pkg:npm/bar@2.0", &["uuid-d"]),
1280        ];
1281        let updates = detect_updates(Some(&m), &pkgs);
1282        assert_eq!(updates.len(), 2);
1283    }
1284
1285    #[test]
1286    fn detect_updates_skips_packages_with_empty_patch_list() {
1287        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1288        // No candidate patches means we can't tell what the new UUID would
1289        // be, so there's nothing to compare against. Correct behavior is to
1290        // skip these silently.
1291        let pkgs = vec![batch_with("pkg:npm/foo@1.0", &[])];
1292        assert!(detect_updates(Some(&m), &pkgs).is_empty());
1293    }
1294
1295    #[test]
1296    fn detect_updates_uses_first_patch_as_candidate() {
1297        // `detect_updates` mirrors `select_patches` by picking the first
1298        // patch in the batch as the candidate UUID. Locking this in so a
1299        // future select_patches refactor doesn't silently drift the two.
1300        let m = manifest_with(&[("pkg:npm/foo@1.0", "uuid-a")]);
1301        let pkgs = vec![batch_with("pkg:npm/foo@1.0", &["uuid-b", "uuid-c"])];
1302        let updates = detect_updates(Some(&m), &pkgs);
1303        assert_eq!(updates.len(), 1);
1304        assert_eq!(updates[0].new_uuid, "uuid-b");
1305    }
1306
1307    // ---- detect_prunable ---------------------------------------------------
1308
1309    fn scanned(purls: &[&str]) -> HashSet<String> {
1310        purls.iter().map(|s| (*s).to_string()).collect()
1311    }
1312
1313    #[test]
1314    fn detect_prunable_empty_manifest_empty_scanned() {
1315        let m = PatchManifest::new();
1316        assert!(detect_prunable(&m, &scanned(&[])).is_empty());
1317    }
1318
1319    #[test]
1320    fn detect_prunable_empty_manifest_nonempty_scanned() {
1321        let m = PatchManifest::new();
1322        // No manifest entries → nothing to prune even if the crawl found
1323        // packages that don't appear in the manifest.
1324        assert!(detect_prunable(&m, &scanned(&["pkg:npm/foo@1"])).is_empty());
1325    }
1326
1327    #[test]
1328    fn detect_prunable_all_entries_present_in_scan() {
1329        let m = manifest_with(&[
1330            ("pkg:npm/foo@1.0", "uuid-a"),
1331            ("pkg:npm/bar@2.0", "uuid-b"),
1332        ]);
1333        let s = scanned(&["pkg:npm/foo@1.0", "pkg:npm/bar@2.0"]);
1334        assert!(detect_prunable(&m, &s).is_empty());
1335    }
1336
1337    #[test]
1338    fn detect_prunable_returns_missing_entries() {
1339        let m = manifest_with(&[
1340            ("pkg:npm/foo@1.0", "uuid-a"),
1341            ("pkg:npm/bar@2.0", "uuid-b"),
1342        ]);
1343        // foo is still installed, bar is gone.
1344        let s = scanned(&["pkg:npm/foo@1.0"]);
1345        let mut out = detect_prunable(&m, &s);
1346        out.sort();
1347        assert_eq!(out, vec!["pkg:npm/bar@2.0".to_string()]);
1348    }
1349
1350    #[test]
1351    fn detect_prunable_returns_everything_when_scan_is_empty() {
1352        let m = manifest_with(&[
1353            ("pkg:npm/foo@1.0", "uuid-a"),
1354            ("pkg:npm/bar@2.0", "uuid-b"),
1355        ]);
1356        let mut out = detect_prunable(&m, &scanned(&[]));
1357        out.sort();
1358        assert_eq!(
1359            out,
1360            vec!["pkg:npm/bar@2.0".to_string(), "pkg:npm/foo@1.0".to_string()],
1361        );
1362    }
1363
1364    #[test]
1365    fn detect_prunable_keeps_pypi_variants_of_installed_base() {
1366        // Manifest holds three qualified release variants; the crawler
1367        // reports only the base PURL. None should be pruned — they all
1368        // belong to the installed package.
1369        let m = manifest_with(&[
1370            ("pkg:pypi/six@1.16.0?artifact_id=wheel-a", "uuid-a"),
1371            ("pkg:pypi/six@1.16.0?artifact_id=wheel-b", "uuid-b"),
1372            ("pkg:pypi/six@1.16.0?artifact_id=sdist", "uuid-c"),
1373        ]);
1374        let out = detect_prunable(&m, &scanned(&["pkg:pypi/six@1.16.0"]));
1375        assert!(
1376            out.is_empty(),
1377            "variants of an installed base must not be pruned; got {out:?}"
1378        );
1379    }
1380
1381    #[test]
1382    fn detect_prunable_removes_all_variants_of_uninstalled_base() {
1383        // The package is no longer installed (empty crawl): every
1384        // release variant is prunable.
1385        let m = manifest_with(&[
1386            ("pkg:pypi/six@1.16.0?artifact_id=wheel-a", "uuid-a"),
1387            ("pkg:pypi/six@1.16.0?artifact_id=sdist", "uuid-c"),
1388        ]);
1389        let out = detect_prunable(&m, &scanned(&[]));
1390        assert_eq!(out.len(), 2, "all variants of a gone package should prune");
1391    }
1392
1393    // ---- collect_vuln_ids --------------------------------------------------
1394
1395    /// Build a single-patch package whose patch carries the given CVE and
1396    /// GHSA identifier lists.
1397    fn batch_with_vulns(purl: &str, cves: &[&str], ghsas: &[&str]) -> BatchPackagePatches {
1398        BatchPackagePatches {
1399            purl: purl.to_string(),
1400            patches: vec![BatchPatchInfo {
1401                uuid: "uuid".to_string(),
1402                purl: purl.to_string(),
1403                tier: "free".to_string(),
1404                cve_ids: cves.iter().map(|s| (*s).to_string()).collect(),
1405                ghsa_ids: ghsas.iter().map(|s| (*s).to_string()).collect(),
1406                severity: None,
1407                title: String::new(),
1408            }],
1409        }
1410    }
1411
1412    #[test]
1413    fn collect_vuln_ids_empty_when_no_vulns() {
1414        let pkg = batch_with_vulns("pkg:npm/foo@1.0", &[], &[]);
1415        assert!(collect_vuln_ids(&pkg).is_empty());
1416    }
1417
1418    #[test]
1419    fn collect_vuln_ids_lists_cves_before_ghsas_each_sorted() {
1420        // Deliberately unsorted input; output must be CVEs (sorted) then
1421        // GHSAs (sorted) so the rendered table column is deterministic.
1422        let pkg = batch_with_vulns(
1423            "pkg:npm/foo@1.0",
1424            &["CVE-2024-2", "CVE-2024-1"],
1425            &["GHSA-zzzz-zzzz-zzzz", "GHSA-aaaa-aaaa-aaaa"],
1426        );
1427        assert_eq!(
1428            collect_vuln_ids(&pkg),
1429            vec![
1430                "CVE-2024-1".to_string(),
1431                "CVE-2024-2".to_string(),
1432                "GHSA-aaaa-aaaa-aaaa".to_string(),
1433                "GHSA-zzzz-zzzz-zzzz".to_string(),
1434            ],
1435        );
1436    }
1437
1438    #[test]
1439    fn collect_vuln_ids_dedups_across_patches() {
1440        // The same CVE appears on two patches of one package; it must be
1441        // reported once.
1442        let pkg = BatchPackagePatches {
1443            purl: "pkg:npm/foo@1.0".to_string(),
1444            patches: vec![
1445                BatchPatchInfo {
1446                    uuid: "u1".to_string(),
1447                    purl: "pkg:npm/foo@1.0".to_string(),
1448                    tier: "free".to_string(),
1449                    cve_ids: vec!["CVE-2024-1".to_string()],
1450                    ghsa_ids: vec![],
1451                    severity: None,
1452                    title: String::new(),
1453                },
1454                BatchPatchInfo {
1455                    uuid: "u2".to_string(),
1456                    purl: "pkg:npm/foo@1.0".to_string(),
1457                    tier: "free".to_string(),
1458                    cve_ids: vec!["CVE-2024-1".to_string()],
1459                    ghsa_ids: vec!["GHSA-aaaa-aaaa-aaaa".to_string()],
1460                    severity: None,
1461                    title: String::new(),
1462                },
1463            ],
1464        };
1465        assert_eq!(
1466            collect_vuln_ids(&pkg),
1467            vec![
1468                "CVE-2024-1".to_string(),
1469                "GHSA-aaaa-aaaa-aaaa".to_string(),
1470            ],
1471        );
1472    }
1473
1474    // ---- truncate_with_ellipsis (scan's display columns) -------------------
1475    // scan.rs renders PURLs, descriptions, and vulnerability summaries — all
1476    // API-sourced and potentially non-ASCII — into fixed-width columns. These
1477    // pin scan's use of the char-safe helper; a raw `&s[..n]` byte slice
1478    // would panic when the cut lands mid-codepoint.
1479
1480    #[test]
1481    fn truncate_multibyte_purl_does_not_panic() {
1482        // 30 three-byte chars (90 bytes, 30 chars). The old purl path sliced
1483        // `&purl[..37]` once `len() > 40`; byte 37 splits a codepoint here.
1484        let purl = format!("pkg:npm/{}", "日".repeat(30));
1485        let out = truncate_with_ellipsis(&purl, 40);
1486        assert!(out.chars().count() <= 40);
1487    }
1488
1489    #[test]
1490    fn truncate_multibyte_description_truncates_on_char_boundary() {
1491        // 100 two-byte chars; description column truncates at 72.
1492        let desc = "é".repeat(100);
1493        let out = truncate_with_ellipsis(&desc, 72);
1494        assert_eq!(out.chars().count(), 72);
1495        assert!(out.ends_with("..."));
1496    }
1497
1498    #[test]
1499    fn truncate_multibyte_summary_truncates_on_char_boundary() {
1500        // Summary column truncates at 76.
1501        let summary = "—".repeat(100); // em dash, 3 bytes each
1502        let out = truncate_with_ellipsis(&summary, 76);
1503        assert_eq!(out.chars().count(), 76);
1504        assert!(out.ends_with("..."));
1505    }
1506}