Skip to main content

socket_patch_core/vex/
product.rs

1//! Top-level product PURL auto-detection.
2//!
3//! Detection chain (first match wins):
4//!   1. `.git/config` `[remote "origin"]` URL — the canonical
5//!      identifier when the repo IS the product. GitHub/GitLab/
6//!      Bitbucket URLs are normalized to
7//!      `pkg:<github|gitlab|bitbucket>/<owner>/<name>`; anything else
8//!      is returned as the raw URL.
9//!   2. `package.json` (npm)        → `pkg:npm/<name>@<version>`
10//!   3. `pyproject.toml` (PyPI)     → `pkg:pypi/<name>@<version>`
11//!   4. `Cargo.toml` (Cargo)        → `pkg:cargo/<name>@<version>`
12//!
13//! Returns `None` only when none of these sources yield a usable
14//! identifier. Multiple-package-manifest case: we pick the highest
15//! package-manifest priority and surface a warning via
16//! [`DetectResult::warnings`] so the CLI can echo it to stderr. Git
17//! remote presence does NOT trigger that warning even when alongside
18//! a package manifest — the priority is documented and stable.
19
20use std::path::Path;
21
22/// Outcome of [`detect_product`].
23#[derive(Debug, Clone, Default)]
24pub struct DetectResult {
25    /// Detected product PURL, or `None` if nothing matched.
26    pub purl: Option<String>,
27    /// Non-fatal observations the CLI should print to stderr — e.g.
28    /// "found Cargo.toml AND package.json; using package.json".
29    pub warnings: Vec<String>,
30}
31
32pub async fn detect_product(cwd: &Path) -> DetectResult {
33    let mut result = DetectResult::default();
34
35    // 1. git remote origin (highest priority — canonical when present).
36    if let Some(purl) = detect_git_remote(cwd).await {
37        result.purl = Some(purl);
38        return result;
39    }
40
41    let pkg_json = cwd.join("package.json");
42    let pyproject = cwd.join("pyproject.toml");
43    let cargo = cwd.join("Cargo.toml");
44
45    let pkg_json_exists = tokio::fs::metadata(&pkg_json).await.is_ok();
46    let pyproject_exists = tokio::fs::metadata(&pyproject).await.is_ok();
47    let cargo_exists = tokio::fs::metadata(&cargo).await.is_ok();
48
49    // Collect a warning if more than one manifest is present.
50    let present_count = [pkg_json_exists, pyproject_exists, cargo_exists]
51        .iter()
52        .filter(|b| **b)
53        .count();
54    if present_count > 1 {
55        let mut found = Vec::new();
56        if pkg_json_exists {
57            found.push("package.json");
58        }
59        if pyproject_exists {
60            found.push("pyproject.toml");
61        }
62        if cargo_exists {
63            found.push("Cargo.toml");
64        }
65        result.warnings.push(format!(
66            "Multiple project manifests detected ({}); using {} for the top-level product",
67            found.join(", "),
68            found[0]
69        ));
70    }
71
72    if pkg_json_exists {
73        if let Some(purl) = read_package_json(&pkg_json).await {
74            result.purl = Some(purl);
75            return result;
76        }
77    }
78    if pyproject_exists {
79        if let Some(purl) = read_pyproject(&pyproject).await {
80            result.purl = Some(purl);
81            return result;
82        }
83    }
84    if cargo_exists {
85        if let Some(purl) = read_cargo_toml(&cargo).await {
86            result.purl = Some(purl);
87            return result;
88        }
89    }
90
91    result
92}
93
94async fn read_package_json(path: &Path) -> Option<String> {
95    let content = tokio::fs::read_to_string(path).await.ok()?;
96    let v: serde_json::Value = serde_json::from_str(&content).ok()?;
97    let name = v.get("name")?.as_str()?;
98    let version = v.get("version")?.as_str()?;
99    if name.is_empty() || version.is_empty() {
100        return None;
101    }
102    // npm scoped packages keep their `@scope/name` form in the PURL —
103    // matches how socket-patch's manifest already stores them.
104    Some(format!("pkg:npm/{name}@{version}"))
105}
106
107async fn read_pyproject(path: &Path) -> Option<String> {
108    let content = tokio::fs::read_to_string(path).await.ok()?;
109    // PEP 621 `[project]` takes precedence (newer projects favor it),
110    // then fall back to Poetry's `[tool.poetry]` for legacy layouts.
111    let (name, version) = scan_toml_section(&content, "project")
112        .or_else(|| scan_toml_section(&content, "tool.poetry"))?;
113    Some(format!("pkg:pypi/{name}@{version}"))
114}
115
116async fn read_cargo_toml(path: &Path) -> Option<String> {
117    let content = tokio::fs::read_to_string(path).await.ok()?;
118    let (name, version) = scan_toml_section(&content, "package")?;
119    Some(format!("pkg:cargo/{name}@{version}"))
120}
121
122/// Minimal line-based TOML scanner for `[<section>]` blocks. Reads
123/// `name = "..."` and `version = "..."` from the named section and
124/// stops at the next `[` header. Robust enough for the well-formed
125/// `pyproject.toml` / `Cargo.toml` files we expect at the top level —
126/// no full TOML parser dependency.
127///
128/// Returns `None` if either key is missing, both keys appear outside
129/// the section, the value is empty, or the value is `version.workspace
130/// = true` (matches the cargo crawler's behavior of skipping workspace
131/// inheritance).
132fn scan_toml_section(content: &str, section: &str) -> Option<(String, String)> {
133    let mut in_section = false;
134    let mut name: Option<String> = None;
135    let mut version: Option<String> = None;
136    let header = format!("[{section}]");
137
138    for raw in content.lines() {
139        let line = raw.trim();
140        if line.is_empty() || line.starts_with('#') {
141            continue;
142        }
143        if line.starts_with('[') {
144            in_section = line == header;
145            continue;
146        }
147        if !in_section {
148            continue;
149        }
150        if let Some(v) = parse_toml_string_kv(line, "name") {
151            name = Some(v);
152        } else if let Some(v) = parse_toml_string_kv(line, "version") {
153            version = Some(v);
154        }
155    }
156
157    let name = name?;
158    let version = version?;
159    if name.is_empty() || version.is_empty() {
160        return None;
161    }
162    Some((name, version))
163}
164
165/// Walk up from `start` looking for a `.git/config` (the working tree
166/// or any of its ancestors). When found, parse the
167/// `[remote "origin"] url = ...` line and convert that URL to a PURL.
168///
169/// Returns `None` when:
170/// * `cwd` is not inside a git working tree,
171/// * `.git/config` has no `[remote "origin"]` section, or
172/// * the URL is empty / parsing failed catastrophically. (Otherwise
173///   even unrecognized hosts fall through to the raw-URL case.)
174///
175/// Worktrees (`.git` as a file pointing at a real git dir elsewhere)
176/// are deliberately NOT followed — they're rare and the package-
177/// manifest fallback handles them correctly. Submodules likewise:
178/// only the outermost `.git/config` wins.
179async fn detect_git_remote(start: &Path) -> Option<String> {
180    let git_config_path = find_git_config(start).await?;
181    let content = tokio::fs::read_to_string(&git_config_path).await.ok()?;
182    let url = scan_remote_origin_url(&content)?;
183    Some(remote_url_to_purl(&url))
184}
185
186/// Walk ancestors looking for `<dir>/.git/config` as a regular file.
187/// Returns the path to it, or `None` if we exhaust the chain.
188async fn find_git_config(start: &Path) -> Option<std::path::PathBuf> {
189    let mut cursor = match tokio::fs::canonicalize(start).await {
190        Ok(p) => p,
191        Err(_) => start.to_path_buf(),
192    };
193    loop {
194        let candidate = cursor.join(".git").join("config");
195        if tokio::fs::metadata(&candidate)
196            .await
197            .map(|m| m.is_file())
198            .unwrap_or(false)
199        {
200            return Some(candidate);
201        }
202        match cursor.parent() {
203            Some(p) => cursor = p.to_path_buf(),
204            None => return None,
205        }
206    }
207}
208
209/// Read the `url = ...` line out of the `[remote "origin"]` section of
210/// a git config file. Returns the trimmed URL, or `None`.
211fn scan_remote_origin_url(content: &str) -> Option<String> {
212    let mut in_section = false;
213    for raw in content.lines() {
214        let line = raw.trim();
215        if line.starts_with('[') && line.ends_with(']') {
216            in_section = line == "[remote \"origin\"]";
217            continue;
218        }
219        if !in_section {
220            continue;
221        }
222        if let Some(rest) = line.strip_prefix("url") {
223            let rest = rest.trim_start();
224            let rest = rest.strip_prefix('=')?.trim();
225            if rest.is_empty() {
226                return None;
227            }
228            return Some(rest.to_string());
229        }
230    }
231    None
232}
233
234/// Convert a git remote URL to a PURL when possible, else return the
235/// URL itself (OpenVEX `@id` accepts any URI).
236///
237/// Handled forms:
238/// * `git@github.com:owner/repo.git`     → `pkg:github/owner/repo`
239/// * `https://github.com/owner/repo.git` → `pkg:github/owner/repo`
240/// * `https://github.com/owner/repo`     → `pkg:github/owner/repo`
241/// * Same shapes for `gitlab.com` (→ `pkg:gitlab`) and `bitbucket.org`
242///   (→ `pkg:bitbucket`).
243/// * Anything else (self-hosted gitea, generic SSH, etc.) → URL as-is.
244fn remote_url_to_purl(url: &str) -> String {
245    if let Some((host, path)) = split_remote_host_path(url) {
246        let cleaned = path.strip_suffix(".git").unwrap_or(path);
247        let cleaned = cleaned.trim_matches('/');
248        let parts: Vec<&str> = cleaned.split('/').collect();
249        if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() {
250            let ecosystem = match host {
251                "github.com" => Some("github"),
252                "gitlab.com" => Some("gitlab"),
253                "bitbucket.org" => Some("bitbucket"),
254                _ => None,
255            };
256            if let Some(eco) = ecosystem {
257                return format!("pkg:{eco}/{}/{}", parts[0], parts[1]);
258            }
259        }
260    }
261    url.to_string()
262}
263
264/// Pull `(host, path)` out of a git remote URL. Returns `None` for
265/// shapes we don't recognize — the caller falls back to raw-URL mode.
266fn split_remote_host_path(url: &str) -> Option<(&str, &str)> {
267    // SSH form: `git@<host>:<path>`. The `:` is a path separator, NOT
268    // a port — git's URL parser treats this as scp-style.
269    if let Some(rest) = url.strip_prefix("git@") {
270        let (host, path) = rest.split_once(':')?;
271        return Some((host, path));
272    }
273    // ssh:// or git+ssh:// form: strip both then drop the user.
274    let stripped = url
275        .strip_prefix("ssh://")
276        .or_else(|| url.strip_prefix("git+ssh://"))
277        .or_else(|| url.strip_prefix("git://"))
278        .or_else(|| url.strip_prefix("https://"))
279        .or_else(|| url.strip_prefix("http://"));
280    if let Some(rest) = stripped {
281        // Drop optional `user@` prefix.
282        let rest = match rest.split_once('@') {
283            Some((_, after)) => after,
284            None => rest,
285        };
286        let (host_with_port, path) = rest.split_once('/')?;
287        // Strip a `:port` if present.
288        let host = host_with_port
289            .split_once(':')
290            .map(|(h, _)| h)
291            .unwrap_or(host_with_port);
292        return Some((host, path));
293    }
294    None
295}
296
297/// Parse `<key> = "<value>"`. Returns `None` if the key doesn't match,
298/// the value isn't a double-quoted string literal, or the value is
299/// empty. Inline-table forms like `version = { workspace = true }`
300/// fail this check and are skipped by the caller.
301fn parse_toml_string_kv(line: &str, key: &str) -> Option<String> {
302    let eq = line.find('=')?;
303    let (lhs, rhs) = line.split_at(eq);
304    if lhs.trim() != key {
305        return None;
306    }
307    let rhs = rhs[1..].trim(); // drop the leading '=' and surrounding ws
308    let stripped = rhs.strip_prefix('"')?;
309    let end = stripped.find('"')?;
310    let value = &stripped[..end];
311    if value.is_empty() {
312        None
313    } else {
314        Some(value.to_string())
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[tokio::test]
323    async fn detect_package_json() {
324        let dir = tempfile::tempdir().unwrap();
325        tokio::fs::write(
326            dir.path().join("package.json"),
327            r#"{"name":"my-app","version":"1.2.3"}"#,
328        )
329        .await
330        .unwrap();
331
332        let r = detect_product(dir.path()).await;
333        assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.2.3"));
334        assert!(r.warnings.is_empty());
335    }
336
337    #[tokio::test]
338    async fn detect_scoped_npm_package() {
339        let dir = tempfile::tempdir().unwrap();
340        tokio::fs::write(
341            dir.path().join("package.json"),
342            r#"{"name":"@socket/foo","version":"0.1.0"}"#,
343        )
344        .await
345        .unwrap();
346
347        let r = detect_product(dir.path()).await;
348        assert_eq!(r.purl.as_deref(), Some("pkg:npm/@socket/foo@0.1.0"));
349    }
350
351    #[tokio::test]
352    async fn detect_pyproject() {
353        let dir = tempfile::tempdir().unwrap();
354        let content = "[project]\nname = \"my-pylib\"\nversion = \"0.4.0\"\n";
355        tokio::fs::write(dir.path().join("pyproject.toml"), content)
356            .await
357            .unwrap();
358
359        let r = detect_product(dir.path()).await;
360        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/my-pylib@0.4.0"));
361    }
362
363    #[tokio::test]
364    async fn detect_cargo_toml() {
365        let dir = tempfile::tempdir().unwrap();
366        let content = "[package]\nname = \"my-rust\"\nversion = \"2.0.0\"\nedition = \"2021\"\n";
367        tokio::fs::write(dir.path().join("Cargo.toml"), content)
368            .await
369            .unwrap();
370
371        let r = detect_product(dir.path()).await;
372        assert_eq!(r.purl.as_deref(), Some("pkg:cargo/my-rust@2.0.0"));
373    }
374
375    #[tokio::test]
376    async fn cargo_workspace_inheritance_is_unsupported() {
377        // `version.workspace = true` is not a quoted string literal,
378        // so detection should report None rather than emit garbage.
379        let dir = tempfile::tempdir().unwrap();
380        let content = "[package]\nname = \"my-rust\"\nversion.workspace = true\n";
381        tokio::fs::write(dir.path().join("Cargo.toml"), content)
382            .await
383            .unwrap();
384
385        let r = detect_product(dir.path()).await;
386        assert!(r.purl.is_none());
387    }
388
389    #[tokio::test]
390    async fn multiple_manifests_warns_and_picks_package_json() {
391        let dir = tempfile::tempdir().unwrap();
392        tokio::fs::write(
393            dir.path().join("package.json"),
394            r#"{"name":"my-app","version":"1.0.0"}"#,
395        )
396        .await
397        .unwrap();
398        tokio::fs::write(
399            dir.path().join("Cargo.toml"),
400            "[package]\nname = \"alt\"\nversion = \"9.9.9\"\n",
401        )
402        .await
403        .unwrap();
404
405        let r = detect_product(dir.path()).await;
406        assert_eq!(r.purl.as_deref(), Some("pkg:npm/my-app@1.0.0"));
407        assert_eq!(r.warnings.len(), 1);
408        assert!(r.warnings[0].contains("Multiple"));
409    }
410
411    #[tokio::test]
412    async fn empty_dir_returns_none() {
413        let dir = tempfile::tempdir().unwrap();
414        let r = detect_product(dir.path()).await;
415        assert!(r.purl.is_none());
416        assert!(r.warnings.is_empty());
417    }
418
419    #[test]
420    fn scan_toml_skips_other_sections() {
421        let toml = "[other]\nname = \"wrong\"\nversion = \"0.0.0\"\n\n[package]\nname = \"right\"\nversion = \"1.0.0\"\n";
422        let (n, v) = scan_toml_section(toml, "package").unwrap();
423        assert_eq!(n, "right");
424        assert_eq!(v, "1.0.0");
425    }
426
427    #[test]
428    fn scan_toml_ignores_comments_and_blank_lines() {
429        let toml = "[package]\n# a comment\n\nname = \"x\"\nversion = \"1.0\"\n";
430        let (n, v) = scan_toml_section(toml, "package").unwrap();
431        assert_eq!(n, "x");
432        assert_eq!(v, "1.0");
433    }
434
435    #[test]
436    fn scan_toml_missing_version_returns_none() {
437        let toml = "[package]\nname = \"only-name\"\n";
438        assert!(scan_toml_section(toml, "package").is_none());
439    }
440
441    // ─────────────────── git-remote detection ───────────────────
442
443    #[test]
444    fn remote_url_github_ssh_becomes_pkg_github() {
445        assert_eq!(
446            remote_url_to_purl("git@github.com:SocketDev/socket-patch.git"),
447            "pkg:github/SocketDev/socket-patch"
448        );
449    }
450
451    #[test]
452    fn remote_url_github_https_becomes_pkg_github() {
453        assert_eq!(
454            remote_url_to_purl("https://github.com/SocketDev/socket-patch.git"),
455            "pkg:github/SocketDev/socket-patch"
456        );
457    }
458
459    #[test]
460    fn remote_url_github_https_no_dot_git() {
461        assert_eq!(
462            remote_url_to_purl("https://github.com/SocketDev/socket-patch"),
463            "pkg:github/SocketDev/socket-patch"
464        );
465    }
466
467    #[test]
468    fn remote_url_gitlab_and_bitbucket() {
469        assert_eq!(
470            remote_url_to_purl("git@gitlab.com:foo/bar.git"),
471            "pkg:gitlab/foo/bar"
472        );
473        assert_eq!(
474            remote_url_to_purl("https://bitbucket.org/foo/bar"),
475            "pkg:bitbucket/foo/bar"
476        );
477    }
478
479    #[test]
480    fn remote_url_unknown_host_returns_url_as_is() {
481        // Self-hosted gitea / unknown forge — VEX `@id` accepts any URI.
482        let raw = "https://git.example.com/team/repo.git";
483        assert_eq!(remote_url_to_purl(raw), raw);
484    }
485
486    #[test]
487    fn remote_url_ssh_protocol_form() {
488        assert_eq!(
489            remote_url_to_purl("ssh://git@github.com/foo/bar.git"),
490            "pkg:github/foo/bar"
491        );
492    }
493
494    #[test]
495    fn scan_origin_url_picks_url_in_section() {
496        let cfg = "[core]\nbare = false\n[remote \"origin\"]\nurl = git@github.com:foo/bar.git\nfetch = +refs/heads/*:refs/remotes/origin/*\n";
497        assert_eq!(
498            scan_remote_origin_url(cfg).as_deref(),
499            Some("git@github.com:foo/bar.git")
500        );
501    }
502
503    #[test]
504    fn scan_origin_url_ignores_other_remotes() {
505        // `[remote "upstream"]` must not be confused for origin.
506        let cfg = "[remote \"upstream\"]\nurl = git@github.com:other/repo.git\n[remote \"origin\"]\nurl = git@github.com:me/repo.git\n";
507        assert_eq!(
508            scan_remote_origin_url(cfg).as_deref(),
509            Some("git@github.com:me/repo.git")
510        );
511    }
512
513    #[test]
514    fn scan_origin_url_returns_none_when_missing() {
515        assert!(scan_remote_origin_url("[core]\nbare = false\n").is_none());
516    }
517
518    #[tokio::test]
519    async fn detect_prefers_git_remote_over_package_manifest() {
520        let dir = tempfile::tempdir().unwrap();
521        // package.json says "from-pkg"; git remote says "from-git".
522        // Git remote must win.
523        tokio::fs::write(
524            dir.path().join("package.json"),
525            r#"{"name":"from-pkg","version":"1.0.0"}"#,
526        )
527        .await
528        .unwrap();
529        let git_dir = dir.path().join(".git");
530        tokio::fs::create_dir_all(&git_dir).await.unwrap();
531        tokio::fs::write(
532            git_dir.join("config"),
533            "[remote \"origin\"]\n\turl = git@github.com:owner/from-git.git\n",
534        )
535        .await
536        .unwrap();
537
538        let r = detect_product(dir.path()).await;
539        assert_eq!(r.purl.as_deref(), Some("pkg:github/owner/from-git"));
540    }
541
542    #[tokio::test]
543    async fn detect_falls_back_to_package_manifest_when_no_git_remote() {
544        // Empty .git/config (no remote) → fall through to package.json.
545        let dir = tempfile::tempdir().unwrap();
546        tokio::fs::write(
547            dir.path().join("package.json"),
548            r#"{"name":"pkg-only","version":"2.0.0"}"#,
549        )
550        .await
551        .unwrap();
552        let git_dir = dir.path().join(".git");
553        tokio::fs::create_dir_all(&git_dir).await.unwrap();
554        tokio::fs::write(git_dir.join("config"), "[core]\nbare = false\n")
555            .await
556            .unwrap();
557
558        let r = detect_product(dir.path()).await;
559        assert_eq!(r.purl.as_deref(), Some("pkg:npm/pkg-only@2.0.0"));
560    }
561
562    #[tokio::test]
563    async fn detect_finds_git_config_in_parent_directory() {
564        // Common case: socket-patch is invoked from a subdir of the repo.
565        let root = tempfile::tempdir().unwrap();
566        let git_dir = root.path().join(".git");
567        tokio::fs::create_dir_all(&git_dir).await.unwrap();
568        tokio::fs::write(
569            git_dir.join("config"),
570            "[remote \"origin\"]\n\turl = git@github.com:org/proj.git\n",
571        )
572        .await
573        .unwrap();
574
575        let nested = root.path().join("packages").join("inner");
576        tokio::fs::create_dir_all(&nested).await.unwrap();
577
578        let r = detect_product(&nested).await;
579        assert_eq!(r.purl.as_deref(), Some("pkg:github/org/proj"));
580    }
581
582    // ── Edge-case + branch coverage ───────────────────────────────
583
584    /// `.git/config` exists but lists only non-origin remotes →
585    /// detection must fall through to package-manifest discovery
586    /// (otherwise the repo would surface no identifier at all).
587    #[tokio::test]
588    async fn git_config_with_only_non_origin_remote_falls_through() {
589        let dir = tempfile::tempdir().unwrap();
590        tokio::fs::write(
591            dir.path().join("package.json"),
592            r#"{"name":"fallback-app","version":"1.0.0"}"#,
593        )
594        .await
595        .unwrap();
596        let git_dir = dir.path().join(".git");
597        tokio::fs::create_dir_all(&git_dir).await.unwrap();
598        tokio::fs::write(
599            git_dir.join("config"),
600            "[remote \"upstream\"]\n\turl = git@github.com:other/proj.git\n",
601        )
602        .await
603        .unwrap();
604
605        let r = detect_product(dir.path()).await;
606        assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0"));
607    }
608
609    /// `url =` with no value after the `=` is a malformed git config.
610    /// Detection must treat it as "no remote" and fall through.
611    #[tokio::test]
612    async fn git_config_with_empty_url_falls_through() {
613        let dir = tempfile::tempdir().unwrap();
614        tokio::fs::write(
615            dir.path().join("package.json"),
616            r#"{"name":"fallback-app","version":"1.0.0"}"#,
617        )
618        .await
619        .unwrap();
620        let git_dir = dir.path().join(".git");
621        tokio::fs::create_dir_all(&git_dir).await.unwrap();
622        tokio::fs::write(
623            git_dir.join("config"),
624            "[remote \"origin\"]\n\turl = \n",
625        )
626        .await
627        .unwrap();
628
629        let r = detect_product(dir.path()).await;
630        assert_eq!(r.purl.as_deref(), Some("pkg:npm/fallback-app@1.0.0"));
631    }
632
633    /// CRLF line endings — Rust's `str::lines()` already handles
634    /// `\r\n`, but pin this so a future switch to `split('\n')`
635    /// would surface the regression.
636    #[test]
637    fn scan_origin_url_handles_crlf_line_endings() {
638        let cfg =
639            "[remote \"origin\"]\r\n\turl = git@github.com:foo/bar.git\r\n";
640        assert_eq!(
641            scan_remote_origin_url(cfg).as_deref(),
642            Some("git@github.com:foo/bar.git")
643        );
644    }
645
646    /// `git+ssh://` URL form → `split_remote_host_path` branch.
647    #[test]
648    fn remote_url_git_plus_ssh_form() {
649        assert_eq!(
650            remote_url_to_purl("git+ssh://git@github.com/owner/repo.git"),
651            "pkg:github/owner/repo"
652        );
653    }
654
655    /// `git://` URL form (legacy unauthenticated) — separate branch
656    /// from `ssh://` and `https://`.
657    #[test]
658    fn remote_url_git_protocol_form() {
659        assert_eq!(
660            remote_url_to_purl("git://github.com/owner/repo.git"),
661            "pkg:github/owner/repo"
662        );
663    }
664
665    /// `http://` (plain, not https) — exercises the
666    /// `strip_prefix("http://")` arm in `split_remote_host_path`.
667    #[test]
668    fn remote_url_http_form() {
669        assert_eq!(
670            remote_url_to_purl("http://github.com/owner/repo.git"),
671            "pkg:github/owner/repo"
672        );
673    }
674
675    /// `ssh://git@host:22/path` — port suffix on host must be
676    /// stripped so the ecosystem lookup still matches `github.com`.
677    #[test]
678    fn remote_url_ssh_with_port_strips_port() {
679        assert_eq!(
680            remote_url_to_purl("ssh://git@github.com:22/owner/repo.git"),
681            "pkg:github/owner/repo"
682        );
683    }
684
685    /// Pre-`split_remote_host_path` SSH form WITH NO user prefix:
686    /// `ssh://github.com/foo/bar.git`. Branch where the `@` split
687    /// doesn't fire and the whole rest is treated as `host/path`.
688    #[test]
689    fn remote_url_ssh_no_user_prefix() {
690        assert_eq!(
691            remote_url_to_purl("ssh://github.com/foo/bar.git"),
692            "pkg:github/foo/bar"
693        );
694    }
695
696    /// Truly unrecognized URL form (no recognized scheme prefix and
697    /// no scp-style `git@host:path`) → returned as-is.
698    #[test]
699    fn remote_url_unknown_shape_returned_verbatim() {
700        let weird = "file:///srv/repos/proj.git";
701        assert_eq!(remote_url_to_purl(weird), weird);
702    }
703
704    /// `pyproject.toml` with `[tool.poetry]` (Poetry layout) is now
705    /// supported as a fallback when `[project]` is absent.
706    #[tokio::test]
707    async fn detect_pyproject_tool_poetry_layout() {
708        let dir = tempfile::tempdir().unwrap();
709        let content = "[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n";
710        tokio::fs::write(dir.path().join("pyproject.toml"), content)
711            .await
712            .unwrap();
713        let r = detect_product(dir.path()).await;
714        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/poetry-app@0.9.0"));
715    }
716
717    /// When `[project]` and `[tool.poetry]` are both present, the
718    /// PEP-621 section wins (modern projects prefer it).
719    #[tokio::test]
720    async fn detect_pyproject_project_section_wins_over_tool_poetry() {
721        let dir = tempfile::tempdir().unwrap();
722        let content = "[project]\nname = \"pep621-app\"\nversion = \"1.0.0\"\n\n[tool.poetry]\nname = \"poetry-app\"\nversion = \"0.9.0\"\n";
723        tokio::fs::write(dir.path().join("pyproject.toml"), content)
724            .await
725            .unwrap();
726        let r = detect_product(dir.path()).await;
727        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/pep621-app@1.0.0"));
728    }
729
730    /// Multi-manifest combo: pyproject + Cargo.toml present, no
731    /// package.json. pyproject wins per the priority list.
732    #[tokio::test]
733    async fn detect_pyproject_over_cargo_when_no_package_json() {
734        let dir = tempfile::tempdir().unwrap();
735        tokio::fs::write(
736            dir.path().join("pyproject.toml"),
737            "[project]\nname = \"py-app\"\nversion = \"1.0.0\"\n",
738        )
739        .await
740        .unwrap();
741        tokio::fs::write(
742            dir.path().join("Cargo.toml"),
743            "[package]\nname = \"rust-app\"\nversion = \"2.0.0\"\n",
744        )
745        .await
746        .unwrap();
747        let r = detect_product(dir.path()).await;
748        assert_eq!(r.purl.as_deref(), Some("pkg:pypi/py-app@1.0.0"));
749        assert_eq!(r.warnings.len(), 1);
750        assert!(r.warnings[0].contains("pyproject.toml"));
751        assert!(r.warnings[0].contains("Cargo.toml"));
752    }
753
754    /// `package.json` with only `version` (no `name`) → None.
755    /// Currently the early `is_empty()` branch in `read_package_json`.
756    #[tokio::test]
757    async fn package_json_missing_name_returns_none() {
758        let dir = tempfile::tempdir().unwrap();
759        tokio::fs::write(
760            dir.path().join("package.json"),
761            r#"{"version":"1.0.0"}"#,
762        )
763        .await
764        .unwrap();
765        let r = detect_product(dir.path()).await;
766        assert!(r.purl.is_none());
767    }
768
769    /// `package.json` with empty `name` string → None (is_empty check).
770    #[tokio::test]
771    async fn package_json_empty_name_returns_none() {
772        let dir = tempfile::tempdir().unwrap();
773        tokio::fs::write(
774            dir.path().join("package.json"),
775            r#"{"name":"","version":"1.0.0"}"#,
776        )
777        .await
778        .unwrap();
779        let r = detect_product(dir.path()).await;
780        assert!(r.purl.is_none());
781    }
782
783    /// `package.json` with invalid JSON → None (parse-error branch).
784    #[tokio::test]
785    async fn package_json_invalid_json_returns_none() {
786        let dir = tempfile::tempdir().unwrap();
787        tokio::fs::write(dir.path().join("package.json"), "{ not json").await.unwrap();
788        let r = detect_product(dir.path()).await;
789        assert!(r.purl.is_none());
790    }
791
792    /// `parse_toml_string_kv`: line without `=` → None.
793    #[test]
794    fn parse_toml_kv_returns_none_when_no_equals() {
795        assert!(parse_toml_string_kv("name without equals", "name").is_none());
796    }
797
798    /// `parse_toml_string_kv`: key mismatch → None even if value is fine.
799    #[test]
800    fn parse_toml_kv_returns_none_when_key_mismatch() {
801        assert!(parse_toml_string_kv(r#"other = "value""#, "name").is_none());
802    }
803
804    /// `parse_toml_string_kv`: missing closing quote → None.
805    #[test]
806    fn parse_toml_kv_returns_none_when_unterminated_string() {
807        assert!(parse_toml_string_kv(r#"name = "no-close"#, "name").is_none());
808    }
809
810    /// `parse_toml_string_kv`: empty quoted value → None (we reject
811    /// `name = ""`).
812    #[test]
813    fn parse_toml_kv_returns_none_when_value_empty() {
814        assert!(parse_toml_string_kv(r#"name = """#, "name").is_none());
815    }
816
817    /// `parse_toml_string_kv`: non-string value (e.g. `key = 42`) →
818    /// None (we only accept quoted strings).
819    #[test]
820    fn parse_toml_kv_returns_none_when_value_not_quoted() {
821        assert!(parse_toml_string_kv(r#"name = 42"#, "name").is_none());
822    }
823
824    /// `split_remote_host_path`: SSH URL with no `:` separator →
825    /// None. Defensive — `git@` prefix without scp-style path.
826    #[test]
827    fn split_host_path_rejects_ssh_without_colon() {
828        assert!(split_remote_host_path("git@github.com").is_none());
829    }
830
831    /// `split_remote_host_path`: stripped scheme but no `/` →
832    /// host-without-path, the inner `split_once('/')` returns None.
833    #[test]
834    fn split_host_path_rejects_scheme_url_without_path() {
835        assert!(split_remote_host_path("https://github.com").is_none());
836    }
837
838    /// `remote_url_to_purl`: GitHub URL with 3 path segments
839    /// (`owner/repo/extra`) falls into the "not exactly 2 parts"
840    /// branch and returns the raw URL.
841    #[test]
842    fn remote_url_three_path_segments_returns_url_as_is() {
843        let raw = "https://github.com/owner/repo/extra";
844        assert_eq!(remote_url_to_purl(raw), raw);
845    }
846
847    /// `remote_url_to_purl`: trailing slash on the path is trimmed
848    /// before splitting, so `https://github.com/owner/repo/` still
849    /// resolves to `pkg:github/owner/repo`.
850    #[test]
851    fn remote_url_trailing_slash_is_normalized() {
852        assert_eq!(
853            remote_url_to_purl("https://github.com/owner/repo/"),
854            "pkg:github/owner/repo"
855        );
856    }
857
858    /// `Cargo.toml` with `name` only (no `version`) → None. Exercises
859    /// the `version?` early-return path inside `scan_toml_section`.
860    #[tokio::test]
861    async fn cargo_toml_missing_version_returns_none() {
862        let dir = tempfile::tempdir().unwrap();
863        tokio::fs::write(
864            dir.path().join("Cargo.toml"),
865            "[package]\nname = \"only-name\"\n",
866        )
867        .await
868        .unwrap();
869        let r = detect_product(dir.path()).await;
870        assert!(r.purl.is_none());
871    }
872
873    /// Pyproject without `[project]` AND without `[tool.poetry]` →
874    /// None.
875    #[tokio::test]
876    async fn pyproject_with_no_recognized_section_returns_none() {
877        let dir = tempfile::tempdir().unwrap();
878        tokio::fs::write(
879            dir.path().join("pyproject.toml"),
880            "[build-system]\nrequires = [\"setuptools\"]\n",
881        )
882        .await
883        .unwrap();
884        let r = detect_product(dir.path()).await;
885        assert!(r.purl.is_none());
886    }
887
888    /// `DetectResult::default()` is empty (purl=None, warnings=[]).
889    #[test]
890    fn detect_result_default_is_empty() {
891        let r = DetectResult::default();
892        assert!(r.purl.is_none());
893        assert!(r.warnings.is_empty());
894    }
895
896    /// `find_git_config` returns None for a path that genuinely has
897    /// no `.git/config` on any ancestor. Tempdir on `/var/folders` (macOS)
898    /// or `/tmp` (linux) gives us a tree that escapes the user's home.
899    #[tokio::test]
900    async fn find_git_config_returns_none_when_no_repo_ancestor() {
901        // Walk up from the tempdir — none of its ancestors should
902        // contain `.git/config`. This depends on the test runner's
903        // tempdir living outside any git repo; both macOS
904        // /var/folders and Linux /tmp satisfy that.
905        let dir = tempfile::tempdir().unwrap();
906        let r = find_git_config(dir.path()).await;
907        assert!(r.is_none(), "unexpected .git/config above {dir:?}: {r:?}");
908    }
909
910    /// `find_git_config` handles a non-existent start path via the
911    /// `canonicalize → Err` arm and still walks ancestors of the
912    /// raw input. Returns None when no config is found.
913    #[tokio::test]
914    async fn find_git_config_handles_non_existent_start_path() {
915        let dir = tempfile::tempdir().unwrap();
916        let nonexistent = dir.path().join("does/not/exist");
917        // No I/O panic; the fallback `start.to_path_buf()` arm of
918        // the `canonicalize` match runs.
919        let r = find_git_config(&nonexistent).await;
920        assert!(r.is_none());
921    }
922
923    /// `package.json` where `name` is a number, not a string → None.
924    /// Exercises the `.as_str()?` branch on the JSON value.
925    #[tokio::test]
926    async fn package_json_with_non_string_name_returns_none() {
927        let dir = tempfile::tempdir().unwrap();
928        tokio::fs::write(
929            dir.path().join("package.json"),
930            r#"{"name":42,"version":"1.0.0"}"#,
931        )
932        .await
933        .unwrap();
934        let r = detect_product(dir.path()).await;
935        assert!(r.purl.is_none());
936    }
937
938    /// `package.json` where `version` is a number → None.
939    #[tokio::test]
940    async fn package_json_with_non_string_version_returns_none() {
941        let dir = tempfile::tempdir().unwrap();
942        tokio::fs::write(
943            dir.path().join("package.json"),
944            r#"{"name":"x","version":42}"#,
945        )
946        .await
947        .unwrap();
948        let r = detect_product(dir.path()).await;
949        assert!(r.purl.is_none());
950    }
951
952    /// `[remote "origin"]` block has a line that starts with `url`
953    /// but has no `=` (e.g. `url ` then EOL). The `strip_prefix('=')?`
954    /// inside `scan_remote_origin_url` returns None and the scanner
955    /// continues — eventually exhausting the section with no url.
956    #[test]
957    fn scan_origin_url_skips_url_line_without_equals_sign() {
958        let cfg = "[remote \"origin\"]\n\turl no-equals-here\n";
959        // The `url` line has no `=`, so the scanner returns None
960        // from the inner `strip_prefix('=')?` — but per the code
961        // shape (line 224 with `?` on an Option), that propagates
962        // out of `scan_remote_origin_url` as None.
963        assert!(scan_remote_origin_url(cfg).is_none());
964    }
965
966    /// `package.json` missing the `version` key entirely. Exercises
967    /// the `v.get("version")?` early-return path (distinct from the
968    /// `.as_str()?` branch — `get` returns None, not Some(non-string)).
969    #[tokio::test]
970    async fn package_json_missing_version_key_returns_none() {
971        let dir = tempfile::tempdir().unwrap();
972        tokio::fs::write(
973            dir.path().join("package.json"),
974            r#"{"name":"x"}"#,
975        )
976        .await
977        .unwrap();
978        let r = detect_product(dir.path()).await;
979        assert!(r.purl.is_none());
980    }
981}