Skip to main content

socket_patch_core/vex/
product.rs

1//! Top-level product PURL auto-detection.
2//!
3//! Detection chain (first match wins):
4//!   1. `.git/config` `[remote "origin"]` URL — the canonical
5//!      identifier when the repo IS the product. GitHub/GitLab/
6//!      Bitbucket URLs are normalized to
7//!      `pkg:<github|gitlab|bitbucket>/<owner>/<name>`; anything else
8//!      is returned as the raw URL.
9//!   2. `package.json` (npm)        → `pkg:npm/<name>@<version>`
10//!   3. `pyproject.toml` (PyPI)     → `pkg:pypi/<name>@<version>`
11//!   4. `Cargo.toml` (Cargo)        → `pkg:cargo/<name>@<version>`
12//!
13//! Returns `None` only when none of these sources yield a usable
14//! identifier. Multiple-package-manifest case: we pick the highest
15//! package-manifest priority and surface a warning via
16//! [`DetectResult::warnings`] so the CLI can echo it to stderr. Git
17//! remote presence does NOT trigger that warning even when alongside
18//! a package manifest — the priority is documented and stable.
19
20use std::path::Path;
21
22/// Outcome of [`detect_product`].
23#[derive(Debug, Clone, Default)]
24pub struct DetectResult {
25    /// Detected product PURL, or `None` if nothing matched.
26    pub purl: Option<String>,
27    /// Non-fatal observations the CLI should print to stderr — e.g.
28    /// "found Cargo.toml AND package.json; using package.json".
29    pub warnings: Vec<String>,
30}
31
32pub async fn detect_product(cwd: &Path) -> DetectResult {
33    let mut result = DetectResult::default();
34
35    // 1. git remote origin (highest priority — canonical when present).
36    if let Some(purl) = detect_git_remote(cwd).await {
37        result.purl = Some(purl);
38        return result;
39    }
40
41    let pkg_json = cwd.join("package.json");
42    let pyproject = cwd.join("pyproject.toml");
43    let cargo = cwd.join("Cargo.toml");
44
45    let pkg_json_exists = tokio::fs::metadata(&pkg_json).await.is_ok();
46    let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok();
47    let cargo_exists = tokio::fs::metadata(&cargo).await.is_ok();
48
49    // Names of every manifest present, in priority order — used for the
50    // "detected (...)" portion of the multi-manifest warning.
51    let mut present = Vec::new();
52    if pkg_json_exists {
53        present.push("package.json");
54    }
55    if pyproject_exists {
56        present.push("pyproject.toml");
57    }
58    if cargo_exists {
59        present.push("Cargo.toml");
60    }
61
62    // Read manifests in priority order, taking the first that yields a
63    // usable PURL. `selected` records the manifest ACTUALLY used — not
64    // merely the highest-priority one present, because that one may fail
65    // to parse (invalid JSON, missing version, workspace inheritance) and
66    // fall through to a lower-priority manifest. The warning must name
67    // what we used, otherwise it misreports the source.
68    let mut selected: Option<&str> = None;
69    if pkg_json_exists {
70        if let Some(purl) = read_package_json(&pkg_json).await {
71            result.purl = Some(purl);
72            selected = Some("package.json");
73        }
74    }
75    if result.purl.is_none() && pyproject_exists {
76        if let Some(purl) = read_pyproject(&pyproject).await {
77            result.purl = Some(purl);
78            selected = Some("pyproject.toml");
79        }
80    }
81    if result.purl.is_none() && cargo_exists {
82        if let Some(purl) = read_cargo_toml(&cargo).await {
83            result.purl = Some(purl);
84            selected = Some("Cargo.toml");
85        }
86    }
87
88    // Warn only when more than one manifest is present AND we actually
89    // settled on one — naming the manifest we used.
90    if present.len() > 1 {
91        if let Some(used) = selected {
92            result.warnings.push(format!(
93                "Multiple project manifests detected ({}); using {} for the top-level product",
94                present.join(", "),
95                used
96            ));
97        }
98    }
99
100    result
101}
102
103async fn read_package_json(path: &Path) -> Option<String> {
104    let content = tokio::fs::read_to_string(path).await.ok()?;
105    let v: serde_json::Value = serde_json::from_str(&content).ok()?;
106    let name = v.get("name")?.as_str()?;
107    let version = v.get("version")?.as_str()?;
108    if name.is_empty() || version.is_empty() {
109        return None;
110    }
111    // npm scoped packages keep their `@scope/name` form in the PURL —
112    // matches how socket-patch's manifest already stores them.
113    Some(format!("pkg:npm/{name}@{version}"))
114}
115
116async fn read_pyproject(path: &Path) -> Option<String> {
117    let content = tokio::fs::read_to_string(path).await.ok()?;
118    // PEP 621 `[project]` takes precedence (newer projects favor it),
119    // then fall back to Poetry's `[tool.poetry]` for legacy layouts.
120    let (name, version) = scan_toml_section(&content, "project")
121        .or_else(|| scan_toml_section(&content, "tool.poetry"))?;
122    Some(format!("pkg:pypi/{name}@{version}"))
123}
124
125async fn read_cargo_toml(path: &Path) -> Option<String> {
126    let content = tokio::fs::read_to_string(path).await.ok()?;
127    let (name, version) = scan_toml_section(&content, "package")?;
128    Some(format!("pkg:cargo/{name}@{version}"))
129}
130
131/// Minimal line-based TOML scanner for `[<section>]` blocks. Reads
132/// `name = "..."` and `version = "..."` from the named section and
133/// stops at the next `[` header. Robust enough for the well-formed
134/// `pyproject.toml` / `Cargo.toml` files we expect at the top level —
135/// no full TOML parser dependency.
136///
137/// Returns `None` if either key is missing, both keys appear outside
138/// the section, the value is empty, or the value is `version.workspace
139/// = true` (matches the cargo crawler's behavior of skipping workspace
140/// inheritance).
141fn scan_toml_section(content: &str, section: &str) -> Option<(String, String)> {
142    let mut in_section = false;
143    let mut name: Option<String> = None;
144    let mut version: Option<String> = None;
145    let header = format!("[{section}]");
146
147    for raw in content.lines() {
148        let line = raw.trim();
149        if line.is_empty() || line.starts_with('#') {
150            continue;
151        }
152        if line.starts_with('[') {
153            in_section = line == header;
154            continue;
155        }
156        if !in_section {
157            continue;
158        }
159        if let Some(v) = parse_toml_string_kv(line, "name") {
160            name = Some(v);
161        } else if let Some(v) = parse_toml_string_kv(line, "version") {
162            version = Some(v);
163        }
164    }
165
166    let name = name?;
167    let version = version?;
168    if name.is_empty() || version.is_empty() {
169        return None;
170    }
171    Some((name, version))
172}
173
174/// Walk up from `start` looking for a `.git/config` (the working tree
175/// or any of its ancestors). When found, parse the
176/// `[remote "origin"] url = ...` line and convert that URL to a PURL.
177///
178/// Returns `None` when:
179/// * `cwd` is not inside a git working tree,
180/// * `.git/config` has no `[remote "origin"]` section, or
181/// * the URL is empty / parsing failed catastrophically. (Otherwise
182///   even unrecognized hosts fall through to the raw-URL case.)
183///
184/// Worktrees (`.git` as a file pointing at a real git dir elsewhere)
185/// are deliberately NOT followed — they're rare and the package-
186/// manifest fallback handles them correctly. Submodules likewise:
187/// only the outermost `.git/config` wins.
188async fn detect_git_remote(start: &Path) -> Option<String> {
189    let git_config_path = find_git_config(start).await?;
190    let content = tokio::fs::read_to_string(&git_config_path).await.ok()?;
191    let url = scan_remote_origin_url(&content)?;
192    Some(remote_url_to_purl(&url))
193}
194
195/// Walk ancestors looking for `<dir>/.git/config` as a regular file.
196/// Returns the path to it, or `None` if we exhaust the chain.
197async fn find_git_config(start: &Path) -> Option<std::path::PathBuf> {
198    let mut cursor = match tokio::fs::canonicalize(start).await {
199        Ok(p) => p,
200        Err(_) => start.to_path_buf(),
201    };
202    loop {
203        let candidate = cursor.join(".git").join("config");
204        if tokio::fs::metadata(&candidate)
205            .await
206            .map(|m| m.is_file())
207            .unwrap_or(false)
208        {
209            return Some(candidate);
210        }
211        match cursor.parent() {
212            Some(p) => cursor = p.to_path_buf(),
213            None => return None,
214        }
215    }
216}
217
218/// Read the `url = ...` line out of the `[remote "origin"]` section of
219/// a git config file. Returns the trimmed URL, or `None`.
220fn scan_remote_origin_url(content: &str) -> Option<String> {
221    let mut in_section = false;
222    for raw in content.lines() {
223        let line = raw.trim();
224        if line.starts_with('[') && line.ends_with(']') {
225            in_section = line == "[remote \"origin\"]";
226            continue;
227        }
228        if !in_section {
229            continue;
230        }
231        // Parse `key = value`. Only the EXACT `url` key counts: a
232        // `url`-prefixed-but-different key (git permits arbitrary
233        // config keys, e.g. a custom `urlsuffix`) or a malformed
234        // `url ...` line without an `=` must be SKIPPED, not abort
235        // the scan — otherwise a later, valid `url = ...` line in the
236        // same section would never be read.
237        let Some((key, value)) = line.split_once('=') else {
238            continue;
239        };
240        if key.trim() != "url" {
241            continue;
242        }
243        let value = value.trim();
244        if value.is_empty() {
245            return None;
246        }
247        return Some(value.to_string());
248    }
249    None
250}
251
252/// Convert a git remote URL to a PURL when possible, else return the
253/// URL itself (OpenVEX `@id` accepts any URI).
254///
255/// Handled forms:
256/// * `git@github.com:owner/repo.git`     → `pkg:github/owner/repo`
257/// * `https://github.com/owner/repo.git` → `pkg:github/owner/repo`
258/// * `https://github.com/owner/repo`     → `pkg:github/owner/repo`
259/// * Same shapes for `gitlab.com` (→ `pkg:gitlab`) and `bitbucket.org`
260///   (→ `pkg:bitbucket`).
261/// * Anything else (self-hosted gitea, generic SSH, etc.) → URL as-is.
262fn remote_url_to_purl(url: &str) -> String {
263    if let Some((host, path)) = split_remote_host_path(url) {
264        // Trim slashes BEFORE stripping `.git`: a URL like
265        // `https://github.com/owner/repo.git/` carries a trailing
266        // slash, so stripping `.git` first would no-op and leave
267        // `repo.git` baked into the PURL. Trim again afterward in case
268        // the `.git` strip exposes a slash.
269        let cleaned = path.trim_matches('/');
270        let cleaned = cleaned.strip_suffix(".git").unwrap_or(cleaned);
271        let cleaned = cleaned.trim_matches('/');
272        let parts: Vec<&str> = cleaned.split('/').collect();
273        if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() {
274            // Hostnames are case-insensitive per DNS; match on a
275            // lowercased copy so `git@GitHub.com:...` still normalizes.
276            let ecosystem = match host.to_ascii_lowercase().as_str() {
277                "github.com" => Some("github"),
278                "gitlab.com" => Some("gitlab"),
279                "bitbucket.org" => Some("bitbucket"),
280                _ => None,
281            };
282            if let Some(eco) = ecosystem {
283                return format!("pkg:{eco}/{}/{}", parts[0], parts[1]);
284            }
285        }
286    }
287    url.to_string()
288}
289
290/// Pull `(host, path)` out of a git remote URL. Returns `None` for
291/// shapes we don't recognize — the caller falls back to raw-URL mode.
292fn split_remote_host_path(url: &str) -> Option<(&str, &str)> {
293    // SSH form: `git@<host>:<path>`. The `:` is a path separator, NOT
294    // a port — git's URL parser treats this as scp-style.
295    if let Some(rest) = url.strip_prefix("git@") {
296        let (host, path) = rest.split_once(':')?;
297        return Some((host, path));
298    }
299    // ssh:// or git+ssh:// form: strip both then drop the user.
300    let stripped = url
301        .strip_prefix("ssh://")
302        .or_else(|| url.strip_prefix("git+ssh://"))
303        .or_else(|| url.strip_prefix("git://"))
304        .or_else(|| url.strip_prefix("https://"))
305        .or_else(|| url.strip_prefix("http://"));
306    if let Some(rest) = stripped {
307        // Drop optional `user@` prefix.
308        let rest = match rest.split_once('@') {
309            Some((_, after)) => after,
310            None => rest,
311        };
312        let (host_with_port, path) = rest.split_once('/')?;
313        // Strip a `:port` if present.
314        let host = host_with_port
315            .split_once(':')
316            .map(|(h, _)| h)
317            .unwrap_or(host_with_port);
318        return Some((host, path));
319    }
320    None
321}
322
323/// Parse `<key> = "<value>"`. Returns `None` if the key doesn't match,
324/// the value isn't a double-quoted string literal, or the value is
325/// empty. Inline-table forms like `version = { workspace = true }`
326/// fail this check and are skipped by the caller.
327fn parse_toml_string_kv(line: &str, key: &str) -> Option<String> {
328    let eq = line.find('=')?;
329    let (lhs, rhs) = line.split_at(eq);
330    if lhs.trim() != key {
331        return None;
332    }
333    let rhs = rhs[1..].trim(); // drop the leading '=' and surrounding ws
334    let stripped = rhs.strip_prefix('"')?;
335    let end = stripped.find('"')?;
336    let value = &stripped[..end];
337    if value.is_empty() {
338        None
339    } else {
340        Some(value.to_string())
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[tokio::test]
349    async fn detect_package_json() {
350        let dir = tempfile::tempdir().unwrap();
351        tokio::fs::write(
352            dir.path().join("package.json"),
353            r#"{"name":"my-app","version":"1.2.3"}"#,
354        )
355        .await
356        .unwrap();
357
358        let r = detect_product(dir.path()).await;
359        assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.2.3"));
360        assert!(r.warnings.is_empty());
361    }
362
363    #[tokio::test]
364    async fn detect_scoped_npm_package() {
365        let dir = tempfile::tempdir().unwrap();
366        tokio::fs::write(
367            dir.path().join("package.json"),
368            r#"{"name":"@socket/foo","version":"0.1.0"}"#,
369        )
370        .await
371        .unwrap();
372
373        let r = detect_product(dir.path()).await;
374        assert_eq!(r.purl.as_deref(), Some("pkg:npm/@socket/foo@0.1.0"));
375    }
376
377    #[tokio::test]
378    async fn detect_pyproject() {
379        let dir = tempfile::tempdir().unwrap();
380        let content = "[project]\nname = \"my-pylib\"\nversion = \"0.4.0\"\n";
381        tokio::fs::write(dir.path().join("pyproject.toml"), content)
382            .await
383            .unwrap();
384
385        let r = detect_product(dir.path()).await;
386        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/my-pylib@0.4.0"));
387    }
388
389    #[tokio::test]
390    async fn detect_cargo_toml() {
391        let dir = tempfile::tempdir().unwrap();
392        let content = "[package]\nname = \"my-rust\"\nversion = \"2.0.0\"\nedition = \"2021\"\n";
393        tokio::fs::write(dir.path().join("Cargo.toml"), content)
394            .await
395            .unwrap();
396
397        let r = detect_product(dir.path()).await;
398        assert_eq!(r.purl.as_deref(), Some("pkg:cargo/my-rust@2.0.0"));
399    }
400
401    #[tokio::test]
402    async fn cargo_workspace_inheritance_is_unsupported() {
403        // `version.workspace = true` is not a quoted string literal,
404        // so detection should report None rather than emit garbage.
405        let dir = tempfile::tempdir().unwrap();
406        let content = "[package]\nname = \"my-rust\"\nversion.workspace = true\n";
407        tokio::fs::write(dir.path().join("Cargo.toml"), content)
408            .await
409            .unwrap();
410
411        let r = detect_product(dir.path()).await;
412        assert!(r.purl.is_none());
413    }
414
415    #[tokio::test]
416    async fn multiple_manifests_warns_and_picks_package_json() {
417        let dir = tempfile::tempdir().unwrap();
418        tokio::fs::write(
419            dir.path().join("package.json"),
420            r#"{"name":"my-app","version":"1.0.0"}"#,
421        )
422        .await
423        .unwrap();
424        tokio::fs::write(
425            dir.path().join("Cargo.toml"),
426            "[package]\nname = \"alt\"\nversion = \"9.9.9\"\n",
427        )
428        .await
429        .unwrap();
430
431        let r = detect_product(dir.path()).await;
432        assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.0.0"));
433        assert_eq!(r.warnings.len(), 1);
434        assert!(r.warnings[0].contains("Multiple"));
435    }
436
437    #[tokio::test]
438    async fn empty_dir_returns_none() {
439        let dir = tempfile::tempdir().unwrap();
440        let r = detect_product(dir.path()).await;
441        assert!(r.purl.is_none());
442        assert!(r.warnings.is_empty());
443    }
444
445    #[test]
446    fn scan_toml_skips_other_sections() {
447        let toml = "[other]\nname = \"wrong\"\nversion = \"0.0.0\"\n\n[package]\nname = \"right\"\nversion = \"1.0.0\"\n";
448        let (n, v) = scan_toml_section(toml, "package").unwrap();
449        assert_eq!(n, "right");
450        assert_eq!(v, "1.0.0");
451    }
452
453    #[test]
454    fn scan_toml_ignores_comments_and_blank_lines() {
455        let toml = "[package]\n# a comment\n\nname = \"x\"\nversion = \"1.0\"\n";
456        let (n, v) = scan_toml_section(toml, "package").unwrap();
457        assert_eq!(n, "x");
458        assert_eq!(v, "1.0");
459    }
460
461    #[test]
462    fn scan_toml_missing_version_returns_none() {
463        let toml = "[package]\nname = \"only-name\"\n";
464        assert!(scan_toml_section(toml, "package").is_none());
465    }
466
467    // ─────────────────── git-remote detection ───────────────────
468
469    #[test]
470    fn remote_url_github_ssh_becomes_pkg_github() {
471        assert_eq!(
472            remote_url_to_purl("git@github.com:SocketDev/socket-patch.git"),
473            "pkg:github/SocketDev/socket-patch"
474        );
475    }
476
477    #[test]
478    fn remote_url_github_https_becomes_pkg_github() {
479        assert_eq!(
480            remote_url_to_purl("https://github.com/SocketDev/socket-patch.git"),
481            "pkg:github/SocketDev/socket-patch"
482        );
483    }
484
485    #[test]
486    fn remote_url_github_https_no_dot_git() {
487        assert_eq!(
488            remote_url_to_purl("https://github.com/SocketDev/socket-patch"),
489            "pkg:github/SocketDev/socket-patch"
490        );
491    }
492
493    #[test]
494    fn remote_url_gitlab_and_bitbucket() {
495        assert_eq!(
496            remote_url_to_purl("git@gitlab.com:foo/bar.git"),
497            "pkg:gitlab/foo/bar"
498        );
499        assert_eq!(
500            remote_url_to_purl("https://bitbucket.org/foo/bar"),
501            "pkg:bitbucket/foo/bar"
502        );
503    }
504
505    #[test]
506    fn remote_url_unknown_host_returns_url_as_is() {
507        // Self-hosted gitea / unknown forge — VEX `@id` accepts any URI.
508        let raw = "https://git.example.com/team/repo.git";
509        assert_eq!(remote_url_to_purl(raw), raw);
510    }
511
512    #[test]
513    fn remote_url_ssh_protocol_form() {
514        assert_eq!(
515            remote_url_to_purl("ssh://git@github.com/foo/bar.git"),
516            "pkg:github/foo/bar"
517        );
518    }
519
520    #[test]
521    fn scan_origin_url_picks_url_in_section() {
522        let cfg = "[core]\nbare = false\n[remote \"origin\"]\nurl = git@github.com:foo/bar.git\nfetch = +refs/heads/*:refs/remotes/origin/*\n";
523        assert_eq!(
524            scan_remote_origin_url(cfg).as_deref(),
525            Some("git@github.com:foo/bar.git")
526        );
527    }
528
529    #[test]
530    fn scan_origin_url_ignores_other_remotes() {
531        // `[remote "upstream"]` must not be confused for origin.
532        let cfg = "[remote \"upstream\"]\nurl = git@github.com:other/repo.git\n[remote \"origin\"]\nurl = git@github.com:me/repo.git\n";
533        assert_eq!(
534            scan_remote_origin_url(cfg).as_deref(),
535            Some("git@github.com:me/repo.git")
536        );
537    }
538
539    #[test]
540    fn scan_origin_url_returns_none_when_missing() {
541        assert!(scan_remote_origin_url("[core]\nbare = false\n").is_none());
542    }
543
544    /// Regression: a key that merely *starts with* `url` (e.g. a
545    /// custom `urlsuffix` git permits) must NOT be treated as the
546    /// `url` key, and — critically — must not abort the scan before
547    /// the real `url = ...` line that follows it is read.
548    #[test]
549    fn scan_origin_url_ignores_url_prefixed_key_and_keeps_scanning() {
550        let cfg = "[remote \"origin\"]\n\turlsuffix = nonsense\n\turl = git@github.com:foo/bar.git\n";
551        assert_eq!(
552            scan_remote_origin_url(cfg).as_deref(),
553            Some("git@github.com:foo/bar.git")
554        );
555    }
556
557    /// Regression: a malformed `url ...` line WITHOUT an `=` must be
558    /// skipped, allowing a later well-formed `url = ...` line in the
559    /// same section to still be picked up. (Previously the `?` on the
560    /// `=` strip aborted the whole function, returning None.)
561    #[test]
562    fn scan_origin_url_skips_malformed_url_line_then_finds_valid_one() {
563        let cfg = "[remote \"origin\"]\n\turl no-equals-here\n\turl = git@github.com:foo/bar.git\n";
564        assert_eq!(
565            scan_remote_origin_url(cfg).as_deref(),
566            Some("git@github.com:foo/bar.git")
567        );
568    }
569
570    /// A `url` value embedding an `=` (rare, but the scp/https forms
571    /// permit query-ish suffixes) keeps everything after the FIRST
572    /// `=`, matching the prior behavior.
573    #[test]
574    fn scan_origin_url_preserves_equals_inside_value() {
575        let cfg = "[remote \"origin\"]\n\turl = https://host/p?token=abc\n";
576        assert_eq!(
577            scan_remote_origin_url(cfg).as_deref(),
578            Some("https://host/p?token=abc")
579        );
580    }
581
582    #[tokio::test]
583    async fn detect_prefers_git_remote_over_package_manifest() {
584        let dir = tempfile::tempdir().unwrap();
585        // package.json says "from-pkg"; git remote says "from-git".
586        // Git remote must win.
587        tokio::fs::write(
588            dir.path().join("package.json"),
589            r#"{"name":"from-pkg","version":"1.0.0"}"#,
590        )
591        .await
592        .unwrap();
593        let git_dir = dir.path().join(".git");
594        tokio::fs::create_dir_all(&git_dir).await.unwrap();
595        tokio::fs::write(
596            git_dir.join("config"),
597            "[remote \"origin\"]\n\turl = git@github.com:owner/from-git.git\n",
598        )
599        .await
600        .unwrap();
601
602        let r = detect_product(dir.path()).await;
603        assert_eq!(r.purl.as_deref(), Some("pkg:github/owner/from-git"));
604    }
605
606    #[tokio::test]
607    async fn detect_falls_back_to_package_manifest_when_no_git_remote() {
608        // Empty .git/config (no remote) → fall through to package.json.
609        let dir = tempfile::tempdir().unwrap();
610        tokio::fs::write(
611            dir.path().join("package.json"),
612            r#"{"name":"pkg-only","version":"2.0.0"}"#,
613        )
614        .await
615        .unwrap();
616        let git_dir = dir.path().join(".git");
617        tokio::fs::create_dir_all(&git_dir).await.unwrap();
618        tokio::fs::write(git_dir.join("config"), "[core]\nbare = false\n")
619            .await
620            .unwrap();
621
622        let r = detect_product(dir.path()).await;
623        assert_eq!(r.purl.as_deref(), Some("pkg:npm/pkg-only@2.0.0"));
624    }
625
626    #[tokio::test]
627    async fn detect_finds_git_config_in_parent_directory() {
628        // Common case: socket-patch is invoked from a subdir of the repo.
629        let root = tempfile::tempdir().unwrap();
630        let git_dir = root.path().join(".git");
631        tokio::fs::create_dir_all(&git_dir).await.unwrap();
632        tokio::fs::write(
633            git_dir.join("config"),
634            "[remote \"origin\"]\n\turl = git@github.com:org/proj.git\n",
635        )
636        .await
637        .unwrap();
638
639        let nested = root.path().join("packages").join("inner");
640        tokio::fs::create_dir_all(&nested).await.unwrap();
641
642        let r = detect_product(&nested).await;
643        assert_eq!(r.purl.as_deref(), Some("pkg:github/org/proj"));
644    }
645
646    // ── Edge-case + branch coverage ───────────────────────────────
647
648    /// `.git/config` exists but lists only non-origin remotes →
649    /// detection must fall through to package-manifest discovery
650    /// (otherwise the repo would surface no identifier at all).
651    #[tokio::test]
652    async fn git_config_with_only_non_origin_remote_falls_through() {
653        let dir = tempfile::tempdir().unwrap();
654        tokio::fs::write(
655            dir.path().join("package.json"),
656            r#"{"name":"fallback-app","version":"1.0.0"}"#,
657        )
658        .await
659        .unwrap();
660        let git_dir = dir.path().join(".git");
661        tokio::fs::create_dir_all(&git_dir).await.unwrap();
662        tokio::fs::write(
663            git_dir.join("config"),
664            "[remote \"upstream\"]\n\turl = git@github.com:other/proj.git\n",
665        )
666        .await
667        .unwrap();
668
669        let r = detect_product(dir.path()).await;
670        assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0"));
671    }
672
673    /// `url =` with no value after the `=` is a malformed git config.
674    /// Detection must treat it as "no remote" and fall through.
675    #[tokio::test]
676    async fn git_config_with_empty_url_falls_through() {
677        let dir = tempfile::tempdir().unwrap();
678        tokio::fs::write(
679            dir.path().join("package.json"),
680            r#"{"name":"fallback-app","version":"1.0.0"}"#,
681        )
682        .await
683        .unwrap();
684        let git_dir = dir.path().join(".git");
685        tokio::fs::create_dir_all(&git_dir).await.unwrap();
686        tokio::fs::write(git_dir.join("config"), "[remote \"origin\"]\n\turl = \n")
687            .await
688            .unwrap();
689
690        let r = detect_product(dir.path()).await;
691        assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0"));
692    }
693
694    /// CRLF line endings — Rust's `str::lines()` already handles
695    /// `\r\n`, but pin this so a future switch to `split('\n')`
696    /// would surface the regression.
697    #[test]
698    fn scan_origin_url_handles_crlf_line_endings() {
699        let cfg = "[remote \"origin\"]\r\n\turl = git@github.com:foo/bar.git\r\n";
700        assert_eq!(
701            scan_remote_origin_url(cfg).as_deref(),
702            Some("git@github.com:foo/bar.git")
703        );
704    }
705
706    /// `git+ssh://` URL form → `split_remote_host_path` branch.
707    #[test]
708    fn remote_url_git_plus_ssh_form() {
709        assert_eq!(
710            remote_url_to_purl("git+ssh://git@github.com/owner/repo.git"),
711            "pkg:github/owner/repo"
712        );
713    }
714
715    /// `git://` URL form (legacy unauthenticated) — separate branch
716    /// from `ssh://` and `https://`.
717    #[test]
718    fn remote_url_git_protocol_form() {
719        assert_eq!(
720            remote_url_to_purl("git://github.com/owner/repo.git"),
721            "pkg:github/owner/repo"
722        );
723    }
724
725    /// `http://` (plain, not https) — exercises the
726    /// `strip_prefix("http://")` arm in `split_remote_host_path`.
727    #[test]
728    fn remote_url_http_form() {
729        assert_eq!(
730            remote_url_to_purl("http://github.com/owner/repo.git"),
731            "pkg:github/owner/repo"
732        );
733    }
734
735    /// `ssh://git@host:22/path` — port suffix on host must be
736    /// stripped so the ecosystem lookup still matches `github.com`.
737    #[test]
738    fn remote_url_ssh_with_port_strips_port() {
739        assert_eq!(
740            remote_url_to_purl("ssh://git@github.com:22/owner/repo.git"),
741            "pkg:github/owner/repo"
742        );
743    }
744
745    /// Pre-`split_remote_host_path` SSH form WITH NO user prefix:
746    /// `ssh://github.com/foo/bar.git`. Branch where the `@` split
747    /// doesn't fire and the whole rest is treated as `host/path`.
748    #[test]
749    fn remote_url_ssh_no_user_prefix() {
750        assert_eq!(
751            remote_url_to_purl("ssh://github.com/foo/bar.git"),
752            "pkg:github/foo/bar"
753        );
754    }
755
756    /// Truly unrecognized URL form (no recognized scheme prefix and
757    /// no scp-style `git@host:path`) → returned as-is.
758    #[test]
759    fn remote_url_unknown_shape_returned_verbatim() {
760        let weird = "file:///srv/repos/proj.git";
761        assert_eq!(remote_url_to_purl(weird), weird);
762    }
763
764    /// `pyproject.toml` with `[tool.poetry]` (Poetry layout) is now
765    /// supported as a fallback when `[project]` is absent.
766    #[tokio::test]
767    async fn detect_pyproject_tool_poetry_layout() {
768        let dir = tempfile::tempdir().unwrap();
769        let content = "[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n";
770        tokio::fs::write(dir.path().join("pyproject.toml"), content)
771            .await
772            .unwrap();
773        let r = detect_product(dir.path()).await;
774        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/poetry-app@0.9.0"));
775    }
776
777    /// When `[project]` and `[tool.poetry]` are both present, the
778    /// PEP-621 section wins (modern projects prefer it).
779    #[tokio::test]
780    async fn detect_pyproject_project_section_wins_over_tool_poetry() {
781        let dir = tempfile::tempdir().unwrap();
782        let content = "[project]\nname = \"pep621-app\"\nversion = \"1.0.0\"\n\n[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n";
783        tokio::fs::write(dir.path().join("pyproject.toml"), content)
784            .await
785            .unwrap();
786        let r = detect_product(dir.path()).await;
787        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/pep621-app@1.0.0"));
788    }
789
790    /// Multi-manifest combo: pyproject + Cargo.toml present, no
791    /// package.json. pyproject wins per the priority list.
792    #[tokio::test]
793    async fn detect_pyproject_over_cargo_when_no_package_json() {
794        let dir = tempfile::tempdir().unwrap();
795        tokio::fs::write(
796            dir.path().join("pyproject.toml"),
797            "[project]\nname = \"py-app\"\nversion = \"1.0.0\"\n",
798        )
799        .await
800        .unwrap();
801        tokio::fs::write(
802            dir.path().join("Cargo.toml"),
803            "[package]\nname = \"rust-app\"\nversion = \"2.0.0\"\n",
804        )
805        .await
806        .unwrap();
807        let r = detect_product(dir.path()).await;
808        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/py-app@1.0.0"));
809        assert_eq!(r.warnings.len(), 1);
810        assert!(r.warnings[0].contains("pyproject.toml"));
811        assert!(r.warnings[0].contains("Cargo.toml"));
812    }
813
814    /// `package.json` with only `version` (no `name`) → None.
815    /// Currently the early `is_empty()` branch in `read_package_json`.
816    #[tokio::test]
817    async fn package_json_missing_name_returns_none() {
818        let dir = tempfile::tempdir().unwrap();
819        tokio::fs::write(dir.path().join("package.json"), r#"{"version":"1.0.0"}"#)
820            .await
821            .unwrap();
822        let r = detect_product(dir.path()).await;
823        assert!(r.purl.is_none());
824    }
825
826    /// `package.json` with empty `name` string → None (is_empty check).
827    #[tokio::test]
828    async fn package_json_empty_name_returns_none() {
829        let dir = tempfile::tempdir().unwrap();
830        tokio::fs::write(
831            dir.path().join("package.json"),
832            r#"{"name":"","version":"1.0.0"}"#,
833        )
834        .await
835        .unwrap();
836        let r = detect_product(dir.path()).await;
837        assert!(r.purl.is_none());
838    }
839
840    /// `package.json` with invalid JSON → None (parse-error branch).
841    #[tokio::test]
842    async fn package_json_invalid_json_returns_none() {
843        let dir = tempfile::tempdir().unwrap();
844        tokio::fs::write(dir.path().join("package.json"), "{ not json")
845            .await
846            .unwrap();
847        let r = detect_product(dir.path()).await;
848        assert!(r.purl.is_none());
849    }
850
851    /// `parse_toml_string_kv`: line without `=` → None.
852    #[test]
853    fn parse_toml_kv_returns_none_when_no_equals() {
854        assert!(parse_toml_string_kv("name without equals", "name").is_none());
855    }
856
857    /// `parse_toml_string_kv`: key mismatch → None even if value is fine.
858    #[test]
859    fn parse_toml_kv_returns_none_when_key_mismatch() {
860        assert!(parse_toml_string_kv(r#"other = "value""#, "name").is_none());
861    }
862
863    /// `parse_toml_string_kv`: missing closing quote → None.
864    #[test]
865    fn parse_toml_kv_returns_none_when_unterminated_string() {
866        assert!(parse_toml_string_kv(r#"name = "no-close"#, "name").is_none());
867    }
868
869    /// `parse_toml_string_kv`: empty quoted value → None (we reject
870    /// `name = ""`).
871    #[test]
872    fn parse_toml_kv_returns_none_when_value_empty() {
873        assert!(parse_toml_string_kv(r#"name = """#, "name").is_none());
874    }
875
876    /// `parse_toml_string_kv`: non-string value (e.g. `key = 42`) →
877    /// None (we only accept quoted strings).
878    #[test]
879    fn parse_toml_kv_returns_none_when_value_not_quoted() {
880        assert!(parse_toml_string_kv(r#"name = 42"#, "name").is_none());
881    }
882
883    /// `split_remote_host_path`: SSH URL with no `:` separator →
884    /// None. Defensive — `git@` prefix without scp-style path.
885    #[test]
886    fn split_host_path_rejects_ssh_without_colon() {
887        assert!(split_remote_host_path("git@github.com").is_none());
888    }
889
890    /// `split_remote_host_path`: stripped scheme but no `/` →
891    /// host-without-path, the inner `split_once('/')` returns None.
892    #[test]
893    fn split_host_path_rejects_scheme_url_without_path() {
894        assert!(split_remote_host_path("https://github.com").is_none());
895    }
896
897    /// `remote_url_to_purl`: GitHub URL with 3 path segments
898    /// (`owner/repo/extra`) falls into the "not exactly 2 parts"
899    /// branch and returns the raw URL.
900    #[test]
901    fn remote_url_three_path_segments_returns_url_as_is() {
902        let raw = "https://github.com/owner/repo/extra";
903        assert_eq!(remote_url_to_purl(raw), raw);
904    }
905
906    /// `remote_url_to_purl`: trailing slash on the path is trimmed
907    /// before splitting, so `https://github.com/owner/repo/` still
908    /// resolves to `pkg:github/owner/repo`.
909    #[test]
910    fn remote_url_trailing_slash_is_normalized() {
911        assert_eq!(
912            remote_url_to_purl("https://github.com/owner/repo/"),
913            "pkg:github/owner/repo"
914        );
915    }
916
917    /// `Cargo.toml` with `name` only (no `version`) → None. Exercises
918    /// the `version?` early-return path inside `scan_toml_section`.
919    #[tokio::test]
920    async fn cargo_toml_missing_version_returns_none() {
921        let dir = tempfile::tempdir().unwrap();
922        tokio::fs::write(
923            dir.path().join("Cargo.toml"),
924            "[package]\nname = \"only-name\"\n",
925        )
926        .await
927        .unwrap();
928        let r = detect_product(dir.path()).await;
929        assert!(r.purl.is_none());
930    }
931
932    /// Pyproject without `[project]` AND without `[tool.poetry]` →
933    /// None.
934    #[tokio::test]
935    async fn pyproject_with_no_recognized_section_returns_none() {
936        let dir = tempfile::tempdir().unwrap();
937        tokio::fs::write(
938            dir.path().join("pyproject.toml"),
939            "[build-system]\nrequires = [\"setuptools\"]\n",
940        )
941        .await
942        .unwrap();
943        let r = detect_product(dir.path()).await;
944        assert!(r.purl.is_none());
945    }
946
947    /// `DetectResult::default()` is empty (purl=None, warnings=[]).
948    #[test]
949    fn detect_result_default_is_empty() {
950        let r = DetectResult::default();
951        assert!(r.purl.is_none());
952        assert!(r.warnings.is_empty());
953    }
954
955    /// `find_git_config` returns None for a path that genuinely has
956    /// no `.git/config` on any ancestor. Tempdir on `/var/folders` (macOS)
957    /// or `/tmp` (linux) gives us a tree that escapes the user's home.
958    #[tokio::test]
959    async fn find_git_config_returns_none_when_no_repo_ancestor() {
960        // Walk up from the tempdir — none of its ancestors should
961        // contain `.git/config`. This depends on the test runner's
962        // tempdir living outside any git repo; both macOS
963        // /var/folders and Linux /tmp satisfy that.
964        let dir = tempfile::tempdir().unwrap();
965        let r = find_git_config(dir.path()).await;
966        assert!(r.is_none(), "unexpected .git/config above {dir:?}: {r:?}");
967    }
968
969    /// `find_git_config` handles a non-existent start path via the
970    /// `canonicalize → Err` arm and still walks ancestors of the
971    /// raw input. Returns None when no config is found.
972    #[tokio::test]
973    async fn find_git_config_handles_non_existent_start_path() {
974        let dir = tempfile::tempdir().unwrap();
975        let nonexistent = dir.path().join("does/not/exist");
976        // No I/O panic; the fallback `start.to_path_buf()` arm of
977        // the `canonicalize` match runs.
978        let r = find_git_config(&nonexistent).await;
979        assert!(r.is_none());
980    }
981
982    /// `package.json` where `name` is a number, not a string → None.
983    /// Exercises the `.as_str()?` branch on the JSON value.
984    #[tokio::test]
985    async fn package_json_with_non_string_name_returns_none() {
986        let dir = tempfile::tempdir().unwrap();
987        tokio::fs::write(
988            dir.path().join("package.json"),
989            r#"{"name":42,"version":"1.0.0"}"#,
990        )
991        .await
992        .unwrap();
993        let r = detect_product(dir.path()).await;
994        assert!(r.purl.is_none());
995    }
996
997    /// `package.json` where `version` is a number → None.
998    #[tokio::test]
999    async fn package_json_with_non_string_version_returns_none() {
1000        let dir = tempfile::tempdir().unwrap();
1001        tokio::fs::write(
1002            dir.path().join("package.json"),
1003            r#"{"name":"x","version":42}"#,
1004        )
1005        .await
1006        .unwrap();
1007        let r = detect_product(dir.path()).await;
1008        assert!(r.purl.is_none());
1009    }
1010
1011    /// `[remote "origin"]` block has a line that starts with `url`
1012    /// but has no `=` (e.g. `url ` then EOL). The `strip_prefix('=')?`
1013    /// inside `scan_remote_origin_url` returns None and the scanner
1014    /// continues — eventually exhausting the section with no url.
1015    #[test]
1016    fn scan_origin_url_skips_url_line_without_equals_sign() {
1017        let cfg = "[remote \"origin\"]\n\turl no-equals-here\n";
1018        // The `url` line has no `=`, so the scanner returns None
1019        // from the inner `strip_prefix('=')?` — but per the code
1020        // shape (line 224 with `?` on an Option), that propagates
1021        // out of `scan_remote_origin_url` as None.
1022        assert!(scan_remote_origin_url(cfg).is_none());
1023    }
1024
1025    /// `package.json` missing the `version` key entirely. Exercises
1026    /// the `v.get("version")?` early-return path (distinct from the
1027    /// `.as_str()?` branch — `get` returns None, not Some(non-string)).
1028    #[tokio::test]
1029    async fn package_json_missing_version_key_returns_none() {
1030        let dir = tempfile::tempdir().unwrap();
1031        tokio::fs::write(dir.path().join("package.json"), r#"{"name":"x"}"#)
1032            .await
1033            .unwrap();
1034        let r = detect_product(dir.path()).await;
1035        assert!(r.purl.is_none());
1036    }
1037
1038    // ── Regression: `.git` strip ordering ─────────────────────────
1039
1040    /// Regression: a remote URL carrying BOTH a `.git` suffix AND a
1041    /// trailing slash (`https://github.com/owner/repo.git/`) must still
1042    /// normalize to `pkg:github/owner/repo`. Previously `.git` was
1043    /// stripped before the slash was trimmed, so the strip no-opped and
1044    /// the PURL kept `repo.git`.
1045    #[test]
1046    fn remote_url_dotgit_with_trailing_slash_is_normalized() {
1047        assert_eq!(
1048            remote_url_to_purl("https://github.com/owner/repo.git/"),
1049            "pkg:github/owner/repo"
1050        );
1051    }
1052
1053    /// scp-style SSH form with the same `.git/` combination.
1054    #[test]
1055    fn remote_url_ssh_dotgit_with_trailing_slash_is_normalized() {
1056        assert_eq!(
1057            remote_url_to_purl("git@github.com:owner/repo.git/"),
1058            "pkg:github/owner/repo"
1059        );
1060    }
1061
1062    /// Regression: hostnames are case-insensitive (DNS), so a remote
1063    /// with a mixed-case host (`GitHub.com`) must still map to the
1064    /// `github` ecosystem rather than fall through to the raw URL.
1065    #[test]
1066    fn remote_url_mixed_case_host_is_normalized() {
1067        assert_eq!(
1068            remote_url_to_purl("git@GitHub.com:owner/repo.git"),
1069            "pkg:github/owner/repo"
1070        );
1071        assert_eq!(
1072            remote_url_to_purl("https://GitLab.com/foo/bar"),
1073            "pkg:gitlab/foo/bar"
1074        );
1075    }
1076
1077    /// The owner/repo path segments stay case-preserved even though the
1078    /// host is lowercased for the ecosystem match — repo names are
1079    /// case-sensitive.
1080    #[test]
1081    fn remote_url_path_case_is_preserved() {
1082        assert_eq!(
1083            remote_url_to_purl("git@GITHUB.COM:SocketDev/Socket-Patch.git"),
1084            "pkg:github/SocketDev/Socket-Patch"
1085        );
1086    }
1087
1088    // ── Regression: multi-manifest warning names the USED manifest ──
1089
1090    /// Regression: when the highest-priority manifest is present but
1091    /// fails to parse (invalid JSON), detection falls through to the
1092    /// next manifest — and the warning must name the manifest ACTUALLY
1093    /// used, not the one that failed. Previously the warning hard-coded
1094    /// `found[0]` ("package.json") even though Cargo.toml was used.
1095    #[tokio::test]
1096    async fn multi_manifest_warning_names_actually_used_manifest() {
1097        let dir = tempfile::tempdir().unwrap();
1098        // package.json present but unparseable → falls through to Cargo.
1099        tokio::fs::write(dir.path().join("package.json"), "{ not json")
1100            .await
1101            .unwrap();
1102        tokio::fs::write(
1103            dir.path().join("Cargo.toml"),
1104            "[package]\nname = \"alt\"\nversion = \"9.9.9\"\n",
1105        )
1106        .await
1107        .unwrap();
1108
1109        let r = detect_product(dir.path()).await;
1110        assert_eq!(r.purl.as_deref(), Some("pkg:cargo/alt@9.9.9"));
1111        assert_eq!(r.warnings.len(), 1);
1112        // The "detected (...)" list still mentions both manifests.
1113        assert!(r.warnings[0].contains("package.json"));
1114        assert!(r.warnings[0].contains("Cargo.toml"));
1115        // But the "using X" clause must name Cargo.toml, the one used.
1116        assert!(
1117            r.warnings[0].contains("using Cargo.toml"),
1118            "warning should name the manifest actually used: {}",
1119            r.warnings[0]
1120        );
1121    }
1122
1123    /// When multiple manifests are present but NONE parse, there is no
1124    /// product to surface and therefore no "using X" warning to emit
1125    /// (it would name a manifest that wasn't actually used).
1126    #[tokio::test]
1127    async fn multi_manifest_all_unparseable_emits_no_warning() {
1128        let dir = tempfile::tempdir().unwrap();
1129        tokio::fs::write(dir.path().join("package.json"), "{ not json")
1130            .await
1131            .unwrap();
1132        // Cargo.toml present but version is workspace-inherited (unsupported).
1133        tokio::fs::write(
1134            dir.path().join("Cargo.toml"),
1135            "[package]\nname = \"alt\"\nversion.workspace = true\n",
1136        )
1137        .await
1138        .unwrap();
1139
1140        let r = detect_product(dir.path()).await;
1141        assert!(r.purl.is_none());
1142        assert!(r.warnings.is_empty());
1143    }
1144}