Skip to main content

trusty_memory/
discovery.rs

1//! Automatic project alias discovery.
2//!
3//! Why: Projects have implicit shorthand (cargo package names that differ from
4//! their directory, binary names that differ from packages, common first-
5//! letter abbreviations, repo short names) that should be surfaced
6//! automatically as `is_alias_for` triples without requiring users to call
7//! `add_alias` manually. The model can then resolve "tga" → "trusty-git-
8//! analytics" the first time it sees the shorthand, instead of mis-matching it
9//! against unrelated KG entries.
10//! What: Scans the given project root for Cargo workspace structure, git
11//! remote configuration, and other project signals; returns a flat list of
12//! `(short, full, source)` discoveries. The MCP `discover_aliases` tool feeds
13//! these into the palace KG (deduping against active triples) and rebuilds
14//! the prompt cache.
15//! Test: Unit tests in this module exercise each discovery source against
16//! fixture directories and the live workspace root (cwd).
17
18use anyhow::{Context, Result};
19use serde::Serialize;
20use std::collections::{HashMap, HashSet};
21use std::path::{Path, PathBuf};
22
23/// Where a discovered alias was inferred from.
24///
25/// Why: Surfaced through the MCP tool response so operators can audit *why*
26/// a particular alias landed in the KG (and which signal to trust). Also
27/// serialised into the triple's `provenance` field so retraction tooling can
28/// distinguish auto-discovered facts from hand-asserted ones.
29/// What: `Serialize` for direct JSON emission; `Debug` for tracing logs.
30/// Test: covered indirectly through `discover_project_aliases` tests.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
32pub enum DiscoverySource {
33    /// `[package].name` differs from the containing directory name.
34    CargoPackageName,
35    /// `[[bin]].name` differs from `[package].name`.
36    CargoBinaryName,
37    /// First-letter abbreviation of a hyphenated package name is globally
38    /// unique within the workspace.
39    FirstLetterAbbrev,
40    /// Short name extracted from the `origin` remote URL of the repo
41    /// containing the project root (resolved via `git -C <root> config`, so
42    /// it works inside worktrees as well as normal checkouts).
43    GitRemote,
44}
45
46impl DiscoverySource {
47    /// Stable string representation for triple provenance + JSON.
48    ///
49    /// Why: `serde_json::to_string` on the enum yields `"CargoPackageName"`,
50    /// but the triple's `provenance` field is plain text — we want a single
51    /// canonical spelling that round-trips cleanly.
52    /// What: lowercase, snake-case-ish identifiers matching the variant names.
53    /// Test: indirectly via `discover_and_assert` triples.
54    pub fn as_str(&self) -> &'static str {
55        match self {
56            Self::CargoPackageName => "cargo_package_name",
57            Self::CargoBinaryName => "cargo_binary_name",
58            Self::FirstLetterAbbrev => "first_letter_abbrev",
59            Self::GitRemote => "git_remote",
60        }
61    }
62}
63
64/// A single discovered alias mapping.
65///
66/// Why: Returned by `discover_project_aliases` and forwarded verbatim to the
67/// MCP tool response so callers can see exactly what would be (or was)
68/// asserted.
69/// What: `short` is the subject ("tga"); `full` is the object
70/// ("trusty-git-analytics"); `source` records the discovery signal.
71/// Test: each discovery source has a dedicated unit test asserting the
72/// resulting `AliasDiscovery` shape.
73#[derive(Debug, Clone, Serialize)]
74pub struct AliasDiscovery {
75    pub short: String,
76    pub full: String,
77    pub source: DiscoverySource,
78}
79
80/// Scan `project_root` for alias signals and return every discovery found.
81///
82/// Why: One entry point keeps the orchestration logic in the MCP tool simple
83/// — it just calls this and decides what to assert.
84/// What: Runs each discovery source in order (Cargo workspace, then Cargo
85/// single-crate fallback, then git remote, then first-letter abbreviations
86/// derived from the cargo discoveries). Deduplicates `(short, full)` pairs
87/// within the returned list so the first source wins.
88/// Test: `discovers_trusty_git_analytics_alias`,
89/// `first_letter_abbrev_tm_for_trusty_memory`,
90/// `no_duplicate_short_names_in_results`.
91pub async fn discover_project_aliases(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
92    let root = project_root.to_path_buf();
93    tokio::task::spawn_blocking(move || discover_blocking(&root))
94        .await
95        .context("join discover_project_aliases")?
96}
97
98/// Blocking implementation of [`discover_project_aliases`].
99///
100/// Why: All work here is filesystem + TOML parsing, which is naturally
101/// blocking. Splitting the async wrapper out keeps the algorithm
102/// straightforward and unit-testable without a runtime.
103/// What: Reads the root `Cargo.toml`, expands workspace members, scans each
104/// member's `Cargo.toml`, then walks git config. Returns deduplicated
105/// discoveries.
106/// Test: exercised by every test in this module (most call it directly).
107fn discover_blocking(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
108    let mut discoveries: Vec<AliasDiscovery> = Vec::new();
109    let mut seen_pairs: HashSet<(String, String)> = HashSet::new();
110
111    // Collect (package_name, dir_name) pairs so the first-letter pass can
112    // see every package in the workspace at once.
113    let mut packages: Vec<(String, String)> = Vec::new();
114
115    let root_manifest = project_root.join("Cargo.toml");
116    if root_manifest.is_file() {
117        match std::fs::read_to_string(&root_manifest)
118            .context("read root Cargo.toml")
119            .and_then(|s| toml::from_str::<toml::Value>(&s).context("parse root Cargo.toml"))
120        {
121            Ok(root_toml) => {
122                let members = workspace_members(&root_toml);
123                if !members.is_empty() {
124                    // Workspace mode.
125                    for member in expand_members(project_root, &members) {
126                        scan_member(&member, &mut discoveries, &mut seen_pairs, &mut packages);
127                    }
128                } else if root_toml.get("package").is_some() {
129                    // Single-crate fallback: treat the root manifest as the
130                    // only "member".
131                    scan_member(
132                        project_root,
133                        &mut discoveries,
134                        &mut seen_pairs,
135                        &mut packages,
136                    );
137                }
138            }
139            Err(e) => {
140                tracing::warn!("discovery: skipping root Cargo.toml: {e:#}");
141            }
142        }
143    }
144
145    // Phase 2: first-letter abbreviations for hyphenated package names that
146    // produce a globally-unique abbreviation. Uniqueness is computed across
147    // the union of every package name AND every abbreviation derived in
148    // this pass — so a package whose own name is the same as another
149    // package's abbreviation cannot collide with it.
150    add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen_pairs);
151
152    // Phase 3: git remote short name.
153    if let Some(d) = discover_git_remote(project_root) {
154        push_unique(&mut discoveries, &mut seen_pairs, d);
155    }
156
157    Ok(discoveries)
158}
159
160/// Extract the `[workspace] members = [...]` patterns from a parsed root
161/// `Cargo.toml`.
162///
163/// Why: Workspaces always live under a top-level `[workspace]` table with a
164/// `members` array of glob patterns; reading them at parse time keeps the
165/// downstream expansion code unaware of TOML.
166/// What: Returns the raw pattern strings (typically `"crates/*"`). An absent
167/// or malformed `[workspace]` yields an empty `Vec`.
168/// Test: covered by `discovers_trusty_git_analytics_alias` (which exercises
169/// this against the live root manifest).
170fn workspace_members(root_toml: &toml::Value) -> Vec<String> {
171    root_toml
172        .get("workspace")
173        .and_then(|w| w.get("members"))
174        .and_then(|m| m.as_array())
175        .map(|arr| {
176            arr.iter()
177                .filter_map(|v| v.as_str().map(|s| s.to_string()))
178                .collect()
179        })
180        .unwrap_or_default()
181}
182
183/// Expand workspace member patterns into concrete directories.
184///
185/// Why: Cargo permits glob patterns (`crates/*`, `vendor/*/sdk`) in
186/// `workspace.members`; we don't pull in the `glob` crate, so a minimal
187/// expansion handles the canonical "single trailing `*`" pattern that every
188/// workspace in this repo uses, with fallback to a literal directory.
189/// What: For each pattern: if it ends with `/*`, list every immediate
190/// subdirectory; otherwise treat it as a literal relative path. Skips entries
191/// without a `Cargo.toml`.
192/// Test: indirectly via `discovers_trusty_git_analytics_alias` (live workspace
193/// expansion).
194fn expand_members(root: &Path, patterns: &[String]) -> Vec<PathBuf> {
195    let mut out = Vec::new();
196    for pattern in patterns {
197        if let Some(prefix) = pattern.strip_suffix("/*") {
198            let dir = root.join(prefix);
199            let Ok(entries) = std::fs::read_dir(&dir) else {
200                continue;
201            };
202            for entry in entries.flatten() {
203                let path = entry.path();
204                if path.is_dir() && path.join("Cargo.toml").is_file() {
205                    out.push(path);
206                }
207            }
208        } else {
209            let path = root.join(pattern);
210            if path.is_dir() && path.join("Cargo.toml").is_file() {
211                out.push(path);
212            }
213        }
214    }
215    out
216}
217
218/// Scan one workspace member directory for cargo-derived aliases.
219///
220/// Why: Each member can contribute up to two aliases (package-name vs dir
221/// name, binary-name vs package name). Centralising the per-member logic
222/// lets the caller stay focused on iteration / expansion.
223/// What: Reads `<member>/Cargo.toml`, extracts `[package].name`, then walks
224/// every `[[bin]]` entry. Pushes one `CargoPackageName` discovery when the
225/// package name differs from the directory, and one `CargoBinaryName`
226/// discovery per binary whose name differs from the package. Tracks every
227/// package in `packages` so the first-letter pass can see the full set.
228/// Test: `scan_member_emits_package_and_binary_aliases`.
229fn scan_member(
230    member_dir: &Path,
231    discoveries: &mut Vec<AliasDiscovery>,
232    seen_pairs: &mut HashSet<(String, String)>,
233    packages: &mut Vec<(String, String)>,
234) {
235    let manifest = member_dir.join("Cargo.toml");
236    let Ok(raw) = std::fs::read_to_string(&manifest) else {
237        return;
238    };
239    let Ok(parsed) = toml::from_str::<toml::Value>(&raw) else {
240        tracing::warn!("discovery: failed to parse {}", manifest.display());
241        return;
242    };
243
244    let dir_name = member_dir
245        .file_name()
246        .and_then(|n| n.to_str())
247        .unwrap_or("")
248        .to_string();
249    if dir_name.is_empty() {
250        return;
251    }
252
253    let package_name = parsed
254        .get("package")
255        .and_then(|p| p.get("name"))
256        .and_then(|n| n.as_str())
257        .map(|s| s.to_string());
258
259    if let Some(ref pkg) = package_name {
260        packages.push((pkg.clone(), dir_name.clone()));
261        if pkg != &dir_name {
262            push_unique(
263                discoveries,
264                seen_pairs,
265                AliasDiscovery {
266                    short: pkg.clone(),
267                    full: dir_name.clone(),
268                    source: DiscoverySource::CargoPackageName,
269                },
270            );
271        }
272    }
273
274    if let Some(bins) = parsed.get("bin").and_then(|b| b.as_array()) {
275        let pkg_for_bin = package_name.as_deref().unwrap_or(&dir_name).to_string();
276        for bin in bins {
277            if let Some(bin_name) = bin.get("name").and_then(|n| n.as_str()) {
278                if bin_name != pkg_for_bin {
279                    push_unique(
280                        discoveries,
281                        seen_pairs,
282                        AliasDiscovery {
283                            short: bin_name.to_string(),
284                            full: pkg_for_bin.clone(),
285                            source: DiscoverySource::CargoBinaryName,
286                        },
287                    );
288                }
289            }
290        }
291    }
292}
293
294/// Compute first-letter abbreviations for hyphenated package names and add
295/// the ones that are globally unique within the workspace.
296///
297/// Why: Operators routinely refer to crates by their initials ("tm" for
298/// `trusty-memory`, "tga" for `trusty-git-analytics`). Surfacing these
299/// automatically — but only when there's no ambiguity — avoids polluting the
300/// prompt with collisions like `tmc` (which could be `trusty-mpm-cli` or
301/// `trusty-mpm-core`).
302/// What: Splits each package name on `-`, takes the first letter of every
303/// segment; counts how many distinct full names each abbreviation maps to.
304/// Emits a `FirstLetterAbbrev` discovery only for abbreviations that map to
305/// exactly one full name AND don't equal that full name AND don't collide
306/// with an existing package name (which would suggest a different crate).
307/// Test: `first_letter_abbrev_tm_for_trusty_memory`,
308/// `first_letter_abbrev_skips_ambiguous`.
309fn add_first_letter_abbreviations(
310    packages: &[(String, String)],
311    discoveries: &mut Vec<AliasDiscovery>,
312    seen_pairs: &mut HashSet<(String, String)>,
313) {
314    let package_name_set: HashSet<&str> = packages.iter().map(|(p, _)| p.as_str()).collect();
315
316    // abbrev → set of full package names that produce it.
317    let mut groups: HashMap<String, Vec<&str>> = HashMap::new();
318    for (pkg, _dir) in packages {
319        if !pkg.contains('-') {
320            continue;
321        }
322        let abbrev: String = pkg
323            .split('-')
324            .filter_map(|seg| seg.chars().next())
325            .collect();
326        if abbrev.len() < 2 {
327            continue;
328        }
329        groups.entry(abbrev).or_default().push(pkg.as_str());
330    }
331
332    for (abbrev, fulls) in groups {
333        if fulls.len() != 1 {
334            continue;
335        }
336        let full = fulls[0];
337        if abbrev == full {
338            continue;
339        }
340        // Don't shadow an existing package name. e.g. if "tm" were itself a
341        // package name, we wouldn't want to also assert "tm → trusty-memory".
342        if package_name_set.contains(abbrev.as_str()) {
343            continue;
344        }
345        push_unique(
346            discoveries,
347            seen_pairs,
348            AliasDiscovery {
349                short: abbrev,
350                full: full.to_string(),
351                source: DiscoverySource::FirstLetterAbbrev,
352            },
353        );
354    }
355}
356
357/// Read the git origin URL for `project_root` and extract a short repo name.
358///
359/// Why: Most repos refer to themselves by the trailing path component of the
360/// origin URL ("trusty-tools"), which is rarely the same as the working tree
361/// directory name when checked out under a non-default path. Surfacing it as
362/// an alias for itself isn't useful, but surfacing the workspace dir name as
363/// the canonical full name for the short repo name is — e.g. when working
364/// inside a worktree directory the model still knows "trusty-tools" refers
365/// to the project. The canonical source for `[remote "origin"] url = …` lives
366/// in `<root>/.git/config` for a normal checkout, but in a *worktree* `.git`
367/// is a file containing `gitdir: <parent>/.git/worktrees/<name>/` and the
368/// `[remote]` section is reachable only through the parent repo's
369/// `.git/config`. Direct filesystem reads silently drop the discovery in
370/// worktree-based checkouts.
371///
372/// Issue #116: the previous implementation only handled the normal-checkout
373/// case and returned `None` from inside any git worktree, mirroring the bug
374/// fixed for `kg_bootstrap` in #113 / PR #115.
375///
376/// What: Resolves the origin URL via [`read_origin_url`] (which prefers
377/// `git -C <root> config --get remote.origin.url` and falls back to a manual
378/// INI scan of `<root>/.git/config` when no `git` binary is on PATH — useful
379/// only for fixture-based tests that fabricate a `.git/config` directly).
380/// Extracts the short name, strips a trailing `.git`, and emits a
381/// `GitRemote` discovery iff the short name differs from the directory name.
382/// Test: `extract_origin_url_handles_typical_config`,
383/// `short_repo_name_strips_git_suffix_and_path`,
384/// `git_remote_works_inside_worktree`.
385fn discover_git_remote(project_root: &Path) -> Option<AliasDiscovery> {
386    let url = read_origin_url(project_root)?;
387    let short = short_repo_name(&url)?;
388    let dir_name = project_root
389        .file_name()
390        .and_then(|n| n.to_str())
391        .unwrap_or("")
392        .to_string();
393    if dir_name.is_empty() || short == dir_name {
394        return None;
395    }
396    Some(AliasDiscovery {
397        short,
398        full: dir_name,
399        source: DiscoverySource::GitRemote,
400    })
401}
402
403/// Resolve `remote.origin.url` for the repo rooted at `project_root`,
404/// transparent to worktree vs. normal-checkout layout.
405///
406/// Why: Centralises the worktree-vs-checkout indirection in one place so
407/// `discover_git_remote` stays readable. In a worktree `.git` is a file
408/// (not a directory) containing `gitdir: <parent>/.git/worktrees/<name>/`,
409/// so a naive `std::fs::read_to_string(".git/config")` fails — but the
410/// `[remote "origin"]` section is still reachable via the parent's
411/// `.git/config`. Shelling out to `git` lets us delegate that pointer
412/// resolution instead of re-implementing it.
413/// What: (1) tries `git -C <root> config --get remote.origin.url`, which
414/// works equally well in worktrees, normal checkouts, and submodules; (2)
415/// falls back to a manual INI scan of `<root>/.git/config` for environments
416/// without a `git` binary on PATH (notably fixture tests that fabricate a
417/// `.git/config` in a tempdir without ever initialising a real repo).
418/// Returns `None` if neither path yields a non-empty URL.
419/// Test: `git_remote_works_inside_worktree` (CLI path),
420/// `extract_origin_url_handles_typical_config` (file fallback path, via
421/// `extract_origin_url`).
422fn read_origin_url(project_root: &Path) -> Option<String> {
423    // Strategy 1: ask git directly. This is the only path that handles
424    // worktrees correctly without us re-implementing `gitdir:` resolution.
425    if let Ok(output) = std::process::Command::new("git")
426        .arg("-C")
427        .arg(project_root)
428        .arg("config")
429        .arg("--get")
430        .arg("remote.origin.url")
431        .output()
432    {
433        if output.status.success() {
434            let url = String::from_utf8_lossy(&output.stdout).trim().to_string();
435            if !url.is_empty() {
436                return Some(url);
437            }
438        }
439    }
440
441    // Strategy 2: direct INI scan of `<root>/.git/config`. Only useful for
442    // fixture tests that fabricate a `.git/config` in a tempdir; real-world
443    // worktrees will never reach this branch because the file read fails
444    // (the worktree `.git` is a file, not a directory).
445    let raw = std::fs::read_to_string(project_root.join(".git").join("config")).ok()?;
446    extract_origin_url(&raw)
447}
448
449/// Extract the `url = ...` value from the `[remote "origin"]` section of a
450/// git config file.
451///
452/// Why: Git config is a stable INI-ish format, but pulling in `gitoxide`
453/// just for one field would be wildly disproportionate. A line-based scan is
454/// sufficient for the canonical layout used by every git client.
455/// What: Walks lines, tracks whether we're inside `[remote "origin"]`, and
456/// returns the trimmed value of the first `url = ...` line within that
457/// section.
458/// Test: `extract_origin_url_handles_typical_config`.
459fn extract_origin_url(config: &str) -> Option<String> {
460    let mut in_origin = false;
461    for line in config.lines() {
462        let trimmed = line.trim();
463        if trimmed.starts_with('[') {
464            in_origin = trimmed == "[remote \"origin\"]";
465            continue;
466        }
467        if in_origin {
468            if let Some(rest) = trimmed.strip_prefix("url") {
469                let rest = rest.trim_start();
470                if let Some(rest) = rest.strip_prefix('=') {
471                    return Some(rest.trim().to_string());
472                }
473            }
474        }
475    }
476    None
477}
478
479/// Extract the short repo name from a git URL.
480///
481/// Why: Origin URLs come in three flavours — HTTPS (`https://host/owner/repo.git`),
482/// SSH (`git@host:owner/repo.git`), and local paths. All three end with
483/// `<name>` or `<name>.git`; returning the last path-component without the
484/// suffix gives a stable short name.
485/// What: Splits on both `/` and `:`, takes the last component, strips a
486/// trailing `.git`. Returns `None` for empty inputs.
487/// Test: `short_repo_name_strips_git_suffix_and_path`.
488fn short_repo_name(url: &str) -> Option<String> {
489    let last = url.rsplit(['/', ':']).next().unwrap_or("");
490    let stripped = last.strip_suffix(".git").unwrap_or(last).trim();
491    if stripped.is_empty() {
492        None
493    } else {
494        Some(stripped.to_string())
495    }
496}
497
498/// Push a discovery into the result list iff its `short` hasn't been seen yet.
499///
500/// Why: A subject can only have one *active* `is_alias_for` triple at a time
501/// (the temporal KG closes the prior interval whenever a new value is
502/// asserted), so emitting two discoveries with the same `short` would force
503/// every subsequent `discover_aliases` call to flap between them — endlessly
504/// reasserting because neither matches the currently-active object. Deduping
505/// on `short` here makes the discovery list inherently idempotent: one
506/// authoritative mapping per subject, with the first-seen source winning
507/// (`CargoPackageName` > `CargoBinaryName` > `FirstLetterAbbrev` >
508/// `GitRemote`, matching the call order in `discover_blocking`).
509/// What: Tracks every `short` already pushed; subsequent pushes with the
510/// same `short` are dropped. `seen_pairs` is misnamed historically — it now
511/// holds the deduped subjects.
512/// Test: `no_duplicate_short_names_in_results`,
513/// `dispatch_discover_aliases_inserts_new_and_dedupes` (the rerun assertion
514/// only passes when this dedup holds).
515fn push_unique(
516    discoveries: &mut Vec<AliasDiscovery>,
517    seen_subjects: &mut HashSet<(String, String)>,
518    d: AliasDiscovery,
519) {
520    // Repurpose the set as a subject-only dedup: store ("subject", "") so
521    // the existing call sites keep working without renaming the parameter
522    // type across every signature.
523    let key = (d.short.clone(), String::new());
524    if seen_subjects.insert(key) {
525        discoveries.push(d);
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    /// Why: Smoke-test the live workspace — the prompt test in the task spec
534    /// pins `("tga", "trusty-git-analytics")` as a discovered alias.
535    /// What: Locates the workspace root (parent of this crate dir), runs the
536    /// blocking discovery, and asserts the canonical pair is present with
537    /// the `CargoPackageName` source.
538    /// Test: this test itself.
539    #[test]
540    fn discovers_trusty_git_analytics_alias() {
541        let root = workspace_root();
542        let discoveries = discover_blocking(&root).expect("discover");
543        let hit = discoveries
544            .iter()
545            .find(|d| d.short == "tga" && d.full == "trusty-git-analytics");
546        assert!(
547            hit.is_some(),
548            "expected tga→trusty-git-analytics in discoveries; got: {discoveries:?}"
549        );
550        assert_eq!(hit.unwrap().source, DiscoverySource::CargoPackageName);
551    }
552
553    /// Why: First-letter abbreviation is the most subtle source — confirm
554    /// it fires for at least one crate in the live workspace and pins the
555    /// canonical example (`tc → trusty-common`, the longest-lived shared
556    /// library crate, has a guaranteed-unique two-letter abbreviation).
557    /// Test: this test itself.
558    #[test]
559    fn first_letter_abbrev_emits_unique_workspace_initials() {
560        let root = workspace_root();
561        let discoveries = discover_blocking(&root).expect("discover");
562        let hit = discoveries.iter().find(|d| {
563            d.short == "tc"
564                && d.full == "trusty-common"
565                && d.source == DiscoverySource::FirstLetterAbbrev
566        });
567        assert!(
568            hit.is_some(),
569            "expected tc→trusty-common first-letter abbrev; got: {discoveries:?}"
570        );
571    }
572
573    /// Why: A synthetic fixture pins the abbreviation algorithm against the
574    /// exact scenario the original spec called out — a workspace where
575    /// `tm` would uniquely map to `trusty-memory` if there were no other
576    /// `t-m-…` crates. The live workspace happens to also expose `tm` as a
577    /// binary alias for `trusty-mpm-cli`, which (correctly) takes
578    /// precedence; this isolated test confirms the abbreviation logic
579    /// itself does the right thing.
580    /// Test: this test itself.
581    #[test]
582    fn first_letter_abbrev_tm_unique_when_only_trusty_memory() {
583        let packages = vec![
584            ("trusty-memory".to_string(), "trusty-memory".to_string()),
585            ("trusty-common".to_string(), "trusty-common".to_string()),
586            ("trusty-mpm-cli".to_string(), "trusty-mpm-cli".to_string()),
587        ];
588        let mut discoveries = Vec::new();
589        let mut seen = HashSet::new();
590        add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
591        let tm = discoveries
592            .iter()
593            .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
594        assert_eq!(
595            tm.map(|d| d.full.as_str()),
596            Some("trusty-memory"),
597            "tm must abbreviate trusty-memory in this fixture; got: {discoveries:?}"
598        );
599    }
600
601    /// Why: Calling discovery twice must produce the same result — the
602    /// helper is pure (no mutation of disk state), and the dedup test in
603    /// the spec uses this property to verify idempotency.
604    /// Test: this test itself.
605    #[tokio::test]
606    async fn no_duplicate_short_names_in_results() {
607        let root = workspace_root();
608        let a = discover_project_aliases(&root).await.expect("discover a");
609        let b = discover_project_aliases(&root).await.expect("discover b");
610        assert_eq!(a.len(), b.len(), "two calls must yield equal counts");
611
612        // No (short, full) pair appears twice within a single call.
613        let mut seen = HashSet::new();
614        for d in &a {
615            assert!(
616                seen.insert((d.short.clone(), d.full.clone())),
617                "duplicate discovery: {} → {} ({:?})",
618                d.short,
619                d.full,
620                d.source,
621            );
622        }
623    }
624
625    /// Why: Pin the abbreviation-uniqueness rule against a synthetic
626    /// workspace where two crates share an abbreviation — the algorithm
627    /// must NOT emit a discovery for the ambiguous prefix.
628    /// What: Build two fake packages, both abbreviating to "tm", and assert
629    /// no `FirstLetterAbbrev` for "tm" is produced.
630    /// Test: this test itself.
631    #[test]
632    fn first_letter_abbrev_skips_ambiguous() {
633        let packages = vec![
634            ("trusty-memory".to_string(), "trusty-memory".to_string()),
635            ("trusty-monitor".to_string(), "trusty-monitor".to_string()),
636        ];
637        let mut discoveries = Vec::new();
638        let mut seen = HashSet::new();
639        add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
640        let tm = discoveries
641            .iter()
642            .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
643        assert!(
644            tm.is_none(),
645            "ambiguous tm must not produce an abbrev discovery; got: {discoveries:?}"
646        );
647    }
648
649    /// Why: Pin the parser against the typical `[remote "origin"]` block
650    /// shape. A regression that loses the URL would silently disable the
651    /// GitRemote source.
652    #[test]
653    fn extract_origin_url_handles_typical_config() {
654        let cfg = "\
655[core]
656\trepositoryformatversion = 0
657[remote \"origin\"]
658\turl = git@github.com:bobmatnyc/trusty-tools.git
659\tfetch = +refs/heads/*:refs/remotes/origin/*
660[branch \"main\"]
661\tremote = origin
662";
663        assert_eq!(
664            extract_origin_url(cfg),
665            Some("git@github.com:bobmatnyc/trusty-tools.git".to_string())
666        );
667    }
668
669    /// Why: Three URL flavours must all collapse to the same short name.
670    #[test]
671    fn short_repo_name_strips_git_suffix_and_path() {
672        assert_eq!(
673            short_repo_name("git@github.com:bobmatnyc/trusty-tools.git").as_deref(),
674            Some("trusty-tools")
675        );
676        assert_eq!(
677            short_repo_name("https://github.com/bobmatnyc/trusty-tools.git").as_deref(),
678            Some("trusty-tools")
679        );
680        assert_eq!(
681            short_repo_name("https://github.com/bobmatnyc/trusty-tools").as_deref(),
682            Some("trusty-tools")
683        );
684        assert_eq!(short_repo_name("").as_deref(), None);
685    }
686
687    /// Why: Scan logic must surface both CargoPackageName and
688    /// CargoBinaryName aliases from a single fixture.
689    #[test]
690    fn scan_member_emits_package_and_binary_aliases() {
691        let tmp = tempfile::tempdir().expect("tempdir");
692        let member = tmp.path().join("trusty-git-analytics");
693        std::fs::create_dir_all(&member).expect("mkdir");
694        std::fs::write(
695            member.join("Cargo.toml"),
696            r#"
697[package]
698name = "tga"
699version = "0.1.0"
700
701[[bin]]
702name = "tga_bench"
703path = "src/bench.rs"
704
705[[bin]]
706name = "tga"
707path = "src/main.rs"
708"#,
709        )
710        .expect("write Cargo.toml");
711
712        let mut discoveries = Vec::new();
713        let mut seen = HashSet::new();
714        let mut packages = Vec::new();
715        scan_member(&member, &mut discoveries, &mut seen, &mut packages);
716
717        // Package-name discovery.
718        let pkg_disc = discoveries
719            .iter()
720            .find(|d| d.source == DiscoverySource::CargoPackageName)
721            .expect("package alias");
722        assert_eq!(pkg_disc.short, "tga");
723        assert_eq!(pkg_disc.full, "trusty-git-analytics");
724
725        // Binary-name discovery (only the one that differs from the package).
726        let bin_disc = discoveries
727            .iter()
728            .find(|d| d.source == DiscoverySource::CargoBinaryName)
729            .expect("binary alias");
730        assert_eq!(bin_disc.short, "tga_bench");
731        assert_eq!(bin_disc.full, "tga");
732
733        // The matching-name bin must NOT produce a discovery.
734        assert_eq!(
735            discoveries
736                .iter()
737                .filter(|d| d.source == DiscoverySource::CargoBinaryName)
738                .count(),
739            1
740        );
741    }
742
743    /// Why (issue #116): `discover_git_remote` must return the same remote
744    /// URL inside a git worktree as it does in the parent checkout. Before
745    /// the fix it read `<root>/.git/config` directly, which fails inside a
746    /// worktree because `.git` is a *file* (containing
747    /// `gitdir: <parent>/.git/worktrees/<name>/`), not a directory — and
748    /// the `[remote "origin"]` section lives only in the parent's
749    /// `.git/config`. This test pins the post-fix behaviour: initialise a
750    /// real repo, add a remote, create a worktree off it, and assert
751    /// `discover_git_remote` recovers the URL from inside the worktree.
752    /// What: Builds a tempdir-backed parent repo + worktree pair using the
753    /// real `git` CLI (the same tool the production code delegates to),
754    /// then calls the discovery helper against the worktree path.
755    /// Test: this test itself; serves as the worktree regression guard for #116.
756    #[test]
757    fn git_remote_works_inside_worktree() {
758        // Skip when `git` is unavailable on PATH — the fixture relies on
759        // real worktree semantics that we can't fabricate from pure FS ops.
760        if std::process::Command::new("git")
761            .arg("--version")
762            .output()
763            .ok()
764            .map(|o| !o.status.success())
765            .unwrap_or(true)
766        {
767            eprintln!("skipping git_remote_works_inside_worktree: `git` not on PATH");
768            return;
769        }
770
771        let tmp = tempfile::tempdir().expect("tempdir");
772        // The repo dir name must differ from the short repo name in the
773        // remote URL so that `discover_git_remote` actually emits a
774        // discovery (it skips when `short == dir_name`).
775        let parent = tmp.path().join("local-checkout");
776        std::fs::create_dir_all(&parent).expect("mkdir parent");
777
778        // Initialise a real repo so `.git` is a directory in the parent
779        // and a file (with `gitdir:`) inside the worktree.
780        let run = |args: &[&str], cwd: &Path| {
781            let status = std::process::Command::new("git")
782                .args(args)
783                .current_dir(cwd)
784                .status()
785                .expect("git status");
786            assert!(status.success(), "git {args:?} failed in {cwd:?}");
787        };
788        run(&["init", "--initial-branch=main", "."], &parent);
789        run(&["config", "user.email", "test@example.invalid"], &parent);
790        run(&["config", "user.name", "test"], &parent);
791        run(
792            &[
793                "remote",
794                "add",
795                "origin",
796                "git@github.com:bobmatnyc/trusty-tools.git",
797            ],
798            &parent,
799        );
800        // A real commit + branch is required before `git worktree add` will
801        // accept the source as a base.
802        std::fs::write(parent.join("README.md"), "hi").expect("write README");
803        run(&["add", "README.md"], &parent);
804        run(&["commit", "-m", "init"], &parent);
805
806        // Create the worktree as a sibling directory (outside the parent
807        // checkout, the standard layout). Re-use the same short repo name
808        // as the URL's tail so this also confirms the "short == dir_name"
809        // skip rule works against the worktree dir name (not the parent's).
810        let worktree = tmp.path().join("trusty-tools-feature");
811        run(
812            &[
813                "worktree",
814                "add",
815                "-b",
816                "feature",
817                worktree.to_str().expect("worktree path"),
818            ],
819            &parent,
820        );
821
822        // Sanity: `.git` inside the worktree must be a file, not a dir —
823        // otherwise the fixture isn't actually exercising the bug.
824        let dot_git = worktree.join(".git");
825        assert!(
826            dot_git.is_file(),
827            "expected `.git` to be a file inside the worktree; got {dot_git:?}"
828        );
829
830        // Run discovery against the worktree path. Pre-fix this returned
831        // `None`; post-fix it must return the GitRemote discovery with the
832        // short name extracted from origin.
833        let d = discover_git_remote(&worktree).expect("expected GitRemote discovery from worktree");
834        assert_eq!(d.source, DiscoverySource::GitRemote);
835        assert_eq!(d.short, "trusty-tools");
836        assert_eq!(d.full, "trusty-tools-feature");
837
838        // Also confirm the normal-checkout path still works inside the same
839        // fixture (regression guard: the shell-out must not break the
840        // happy path either).
841        let d_parent = discover_git_remote(&parent)
842            .expect("expected GitRemote discovery from normal checkout");
843        assert_eq!(d_parent.source, DiscoverySource::GitRemote);
844        assert_eq!(d_parent.short, "trusty-tools");
845        assert_eq!(d_parent.full, "local-checkout");
846    }
847
848    /// Resolve the workspace root (parent of `crates/trusty-memory`).
849    ///
850    /// Why: Cargo runs each crate's tests with `CARGO_MANIFEST_DIR` set to
851    /// that crate's directory. The live-workspace tests need the workspace
852    /// root, which is two levels up.
853    fn workspace_root() -> PathBuf {
854        let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
855        manifest_dir
856            .parent() // crates/
857            .and_then(|p| p.parent()) // workspace root
858            .expect("workspace root")
859            .to_path_buf()
860    }
861}