Skip to main content

trusty_memory/
discovery.rs

1//! Automatic project alias discovery.
2//!
3//! Why: Projects have implicit shorthand (cargo package names that differ from
4//! their directory, binary names that differ from packages, common first-
5//! letter abbreviations, repo short names) that should be surfaced
6//! automatically as `is_alias_for` triples without requiring users to call
7//! `add_alias` manually. The model can then resolve "tga" → "trusty-git-
8//! analytics" the first time it sees the shorthand, instead of mis-matching it
9//! against unrelated KG entries.
10//! What: Scans the given project root for Cargo workspace structure, git
11//! remote configuration, and other project signals; returns a flat list of
12//! `(short, full, source)` discoveries. The MCP `discover_aliases` tool feeds
13//! these into the palace KG (deduping against active triples) and rebuilds
14//! the prompt cache.
15//! Test: Unit tests in this module exercise each discovery source against
16//! fixture directories and the live workspace root (cwd).
17
18use anyhow::{Context, Result};
19use serde::Serialize;
20use std::collections::{HashMap, HashSet};
21use std::path::{Path, PathBuf};
22
23/// Where a discovered alias was inferred from.
24///
25/// Why: Surfaced through the MCP tool response so operators can audit *why*
26/// a particular alias landed in the KG (and which signal to trust). Also
27/// serialised into the triple's `provenance` field so retraction tooling can
28/// distinguish auto-discovered facts from hand-asserted ones.
29/// What: `Serialize` for direct JSON emission; `Debug` for tracing logs.
30/// Test: covered indirectly through `discover_project_aliases` tests.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
32pub enum DiscoverySource {
33    /// `[package].name` differs from the containing directory name.
34    CargoPackageName,
35    /// `[[bin]].name` differs from `[package].name`.
36    CargoBinaryName,
37    /// First-letter abbreviation of a hyphenated package name is globally
38    /// unique within the workspace.
39    FirstLetterAbbrev,
40    /// Short name extracted from the `origin` remote URL in `.git/config`.
41    GitRemote,
42}
43
44impl DiscoverySource {
45    /// Stable string representation for triple provenance + JSON.
46    ///
47    /// Why: `serde_json::to_string` on the enum yields `"CargoPackageName"`,
48    /// but the triple's `provenance` field is plain text — we want a single
49    /// canonical spelling that round-trips cleanly.
50    /// What: lowercase, snake-case-ish identifiers matching the variant names.
51    /// Test: indirectly via `discover_and_assert` triples.
52    pub fn as_str(&self) -> &'static str {
53        match self {
54            Self::CargoPackageName => "cargo_package_name",
55            Self::CargoBinaryName => "cargo_binary_name",
56            Self::FirstLetterAbbrev => "first_letter_abbrev",
57            Self::GitRemote => "git_remote",
58        }
59    }
60}
61
62/// A single discovered alias mapping.
63///
64/// Why: Returned by `discover_project_aliases` and forwarded verbatim to the
65/// MCP tool response so callers can see exactly what would be (or was)
66/// asserted.
67/// What: `short` is the subject ("tga"); `full` is the object
68/// ("trusty-git-analytics"); `source` records the discovery signal.
69/// Test: each discovery source has a dedicated unit test asserting the
70/// resulting `AliasDiscovery` shape.
71#[derive(Debug, Clone, Serialize)]
72pub struct AliasDiscovery {
73    pub short: String,
74    pub full: String,
75    pub source: DiscoverySource,
76}
77
78/// Scan `project_root` for alias signals and return every discovery found.
79///
80/// Why: One entry point keeps the orchestration logic in the MCP tool simple
81/// — it just calls this and decides what to assert.
82/// What: Runs each discovery source in order (Cargo workspace, then Cargo
83/// single-crate fallback, then git remote, then first-letter abbreviations
84/// derived from the cargo discoveries). Deduplicates `(short, full)` pairs
85/// within the returned list so the first source wins.
86/// Test: `discovers_trusty_git_analytics_alias`,
87/// `first_letter_abbrev_tm_for_trusty_memory`,
88/// `no_duplicate_short_names_in_results`.
89pub async fn discover_project_aliases(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
90    let root = project_root.to_path_buf();
91    tokio::task::spawn_blocking(move || discover_blocking(&root))
92        .await
93        .context("join discover_project_aliases")?
94}
95
96/// Blocking implementation of [`discover_project_aliases`].
97///
98/// Why: All work here is filesystem + TOML parsing, which is naturally
99/// blocking. Splitting the async wrapper out keeps the algorithm
100/// straightforward and unit-testable without a runtime.
101/// What: Reads the root `Cargo.toml`, expands workspace members, scans each
102/// member's `Cargo.toml`, then walks git config. Returns deduplicated
103/// discoveries.
104/// Test: exercised by every test in this module (most call it directly).
105fn discover_blocking(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
106    let mut discoveries: Vec<AliasDiscovery> = Vec::new();
107    let mut seen_pairs: HashSet<(String, String)> = HashSet::new();
108
109    // Collect (package_name, dir_name) pairs so the first-letter pass can
110    // see every package in the workspace at once.
111    let mut packages: Vec<(String, String)> = Vec::new();
112
113    let root_manifest = project_root.join("Cargo.toml");
114    if root_manifest.is_file() {
115        match std::fs::read_to_string(&root_manifest)
116            .context("read root Cargo.toml")
117            .and_then(|s| toml::from_str::<toml::Value>(&s).context("parse root Cargo.toml"))
118        {
119            Ok(root_toml) => {
120                let members = workspace_members(&root_toml);
121                if !members.is_empty() {
122                    // Workspace mode.
123                    for member in expand_members(project_root, &members) {
124                        scan_member(&member, &mut discoveries, &mut seen_pairs, &mut packages);
125                    }
126                } else if root_toml.get("package").is_some() {
127                    // Single-crate fallback: treat the root manifest as the
128                    // only "member".
129                    scan_member(
130                        project_root,
131                        &mut discoveries,
132                        &mut seen_pairs,
133                        &mut packages,
134                    );
135                }
136            }
137            Err(e) => {
138                tracing::warn!("discovery: skipping root Cargo.toml: {e:#}");
139            }
140        }
141    }
142
143    // Phase 2: first-letter abbreviations for hyphenated package names that
144    // produce a globally-unique abbreviation. Uniqueness is computed across
145    // the union of every package name AND every abbreviation derived in
146    // this pass — so a package whose own name is the same as another
147    // package's abbreviation cannot collide with it.
148    add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen_pairs);
149
150    // Phase 3: git remote short name.
151    if let Some(d) = discover_git_remote(project_root) {
152        push_unique(&mut discoveries, &mut seen_pairs, d);
153    }
154
155    Ok(discoveries)
156}
157
158/// Extract the `[workspace] members = [...]` patterns from a parsed root
159/// `Cargo.toml`.
160///
161/// Why: Workspaces always live under a top-level `[workspace]` table with a
162/// `members` array of glob patterns; reading them at parse time keeps the
163/// downstream expansion code unaware of TOML.
164/// What: Returns the raw pattern strings (typically `"crates/*"`). An absent
165/// or malformed `[workspace]` yields an empty `Vec`.
166/// Test: covered by `discovers_trusty_git_analytics_alias` (which exercises
167/// this against the live root manifest).
168fn workspace_members(root_toml: &toml::Value) -> Vec<String> {
169    root_toml
170        .get("workspace")
171        .and_then(|w| w.get("members"))
172        .and_then(|m| m.as_array())
173        .map(|arr| {
174            arr.iter()
175                .filter_map(|v| v.as_str().map(|s| s.to_string()))
176                .collect()
177        })
178        .unwrap_or_default()
179}
180
181/// Expand workspace member patterns into concrete directories.
182///
183/// Why: Cargo permits glob patterns (`crates/*`, `vendor/*/sdk`) in
184/// `workspace.members`; we don't pull in the `glob` crate, so a minimal
185/// expansion handles the canonical "single trailing `*`" pattern that every
186/// workspace in this repo uses, with fallback to a literal directory.
187/// What: For each pattern: if it ends with `/*`, list every immediate
188/// subdirectory; otherwise treat it as a literal relative path. Skips entries
189/// without a `Cargo.toml`.
190/// Test: indirectly via `discovers_trusty_git_analytics_alias` (live workspace
191/// expansion).
192fn expand_members(root: &Path, patterns: &[String]) -> Vec<PathBuf> {
193    let mut out = Vec::new();
194    for pattern in patterns {
195        if let Some(prefix) = pattern.strip_suffix("/*") {
196            let dir = root.join(prefix);
197            let Ok(entries) = std::fs::read_dir(&dir) else {
198                continue;
199            };
200            for entry in entries.flatten() {
201                let path = entry.path();
202                if path.is_dir() && path.join("Cargo.toml").is_file() {
203                    out.push(path);
204                }
205            }
206        } else {
207            let path = root.join(pattern);
208            if path.is_dir() && path.join("Cargo.toml").is_file() {
209                out.push(path);
210            }
211        }
212    }
213    out
214}
215
216/// Scan one workspace member directory for cargo-derived aliases.
217///
218/// Why: Each member can contribute up to two aliases (package-name vs dir
219/// name, binary-name vs package name). Centralising the per-member logic
220/// lets the caller stay focused on iteration / expansion.
221/// What: Reads `<member>/Cargo.toml`, extracts `[package].name`, then walks
222/// every `[[bin]]` entry. Pushes one `CargoPackageName` discovery when the
223/// package name differs from the directory, and one `CargoBinaryName`
224/// discovery per binary whose name differs from the package. Tracks every
225/// package in `packages` so the first-letter pass can see the full set.
226/// Test: `scan_member_emits_package_and_binary_aliases`.
227fn scan_member(
228    member_dir: &Path,
229    discoveries: &mut Vec<AliasDiscovery>,
230    seen_pairs: &mut HashSet<(String, String)>,
231    packages: &mut Vec<(String, String)>,
232) {
233    let manifest = member_dir.join("Cargo.toml");
234    let Ok(raw) = std::fs::read_to_string(&manifest) else {
235        return;
236    };
237    let Ok(parsed) = toml::from_str::<toml::Value>(&raw) else {
238        tracing::warn!("discovery: failed to parse {}", manifest.display());
239        return;
240    };
241
242    let dir_name = member_dir
243        .file_name()
244        .and_then(|n| n.to_str())
245        .unwrap_or("")
246        .to_string();
247    if dir_name.is_empty() {
248        return;
249    }
250
251    let package_name = parsed
252        .get("package")
253        .and_then(|p| p.get("name"))
254        .and_then(|n| n.as_str())
255        .map(|s| s.to_string());
256
257    if let Some(ref pkg) = package_name {
258        packages.push((pkg.clone(), dir_name.clone()));
259        if pkg != &dir_name {
260            push_unique(
261                discoveries,
262                seen_pairs,
263                AliasDiscovery {
264                    short: pkg.clone(),
265                    full: dir_name.clone(),
266                    source: DiscoverySource::CargoPackageName,
267                },
268            );
269        }
270    }
271
272    if let Some(bins) = parsed.get("bin").and_then(|b| b.as_array()) {
273        let pkg_for_bin = package_name.as_deref().unwrap_or(&dir_name).to_string();
274        for bin in bins {
275            if let Some(bin_name) = bin.get("name").and_then(|n| n.as_str()) {
276                if bin_name != pkg_for_bin {
277                    push_unique(
278                        discoveries,
279                        seen_pairs,
280                        AliasDiscovery {
281                            short: bin_name.to_string(),
282                            full: pkg_for_bin.clone(),
283                            source: DiscoverySource::CargoBinaryName,
284                        },
285                    );
286                }
287            }
288        }
289    }
290}
291
292/// Compute first-letter abbreviations for hyphenated package names and add
293/// the ones that are globally unique within the workspace.
294///
295/// Why: Operators routinely refer to crates by their initials ("tm" for
296/// `trusty-memory`, "tga" for `trusty-git-analytics`). Surfacing these
297/// automatically — but only when there's no ambiguity — avoids polluting the
298/// prompt with collisions like `tmc` (which could be `trusty-mpm-cli` or
299/// `trusty-mpm-core`).
300/// What: Splits each package name on `-`, takes the first letter of every
301/// segment; counts how many distinct full names each abbreviation maps to.
302/// Emits a `FirstLetterAbbrev` discovery only for abbreviations that map to
303/// exactly one full name AND don't equal that full name AND don't collide
304/// with an existing package name (which would suggest a different crate).
305/// Test: `first_letter_abbrev_tm_for_trusty_memory`,
306/// `first_letter_abbrev_skips_ambiguous`.
307fn add_first_letter_abbreviations(
308    packages: &[(String, String)],
309    discoveries: &mut Vec<AliasDiscovery>,
310    seen_pairs: &mut HashSet<(String, String)>,
311) {
312    let package_name_set: HashSet<&str> = packages.iter().map(|(p, _)| p.as_str()).collect();
313
314    // abbrev → set of full package names that produce it.
315    let mut groups: HashMap<String, Vec<&str>> = HashMap::new();
316    for (pkg, _dir) in packages {
317        if !pkg.contains('-') {
318            continue;
319        }
320        let abbrev: String = pkg
321            .split('-')
322            .filter_map(|seg| seg.chars().next())
323            .collect();
324        if abbrev.len() < 2 {
325            continue;
326        }
327        groups.entry(abbrev).or_default().push(pkg.as_str());
328    }
329
330    for (abbrev, fulls) in groups {
331        if fulls.len() != 1 {
332            continue;
333        }
334        let full = fulls[0];
335        if abbrev == full {
336            continue;
337        }
338        // Don't shadow an existing package name. e.g. if "tm" were itself a
339        // package name, we wouldn't want to also assert "tm → trusty-memory".
340        if package_name_set.contains(abbrev.as_str()) {
341            continue;
342        }
343        push_unique(
344            discoveries,
345            seen_pairs,
346            AliasDiscovery {
347                short: abbrev,
348                full: full.to_string(),
349                source: DiscoverySource::FirstLetterAbbrev,
350            },
351        );
352    }
353}
354
355/// Read `.git/config` and extract the short repo name from `origin`.
356///
357/// Why: Most repos refer to themselves by the trailing path component of the
358/// origin URL ("trusty-tools"), which is rarely the same as the working tree
359/// directory name when checked out under a non-default path. Surfacing it as
360/// an alias for itself isn't useful, but surfacing the workspace dir name as
361/// the canonical full name for the short repo name is — e.g. when working
362/// inside a worktree directory the model still knows "trusty-tools" refers
363/// to the project.
364/// What: Greps `.git/config` for the `[remote "origin"] url = ...` line,
365/// strips `.git`, takes the last `/`-separated component. Emits a
366/// `GitRemote` discovery only when the short name differs from the directory
367/// name and the directory name is non-empty.
368/// Test: `git_remote_extracts_short_name_from_origin_url`.
369fn discover_git_remote(project_root: &Path) -> Option<AliasDiscovery> {
370    let config_path = project_root.join(".git").join("config");
371    let raw = std::fs::read_to_string(&config_path).ok()?;
372    let url = extract_origin_url(&raw)?;
373    let short = short_repo_name(&url)?;
374    let dir_name = project_root
375        .file_name()
376        .and_then(|n| n.to_str())
377        .unwrap_or("")
378        .to_string();
379    if dir_name.is_empty() || short == dir_name {
380        return None;
381    }
382    Some(AliasDiscovery {
383        short,
384        full: dir_name,
385        source: DiscoverySource::GitRemote,
386    })
387}
388
389/// Extract the `url = ...` value from the `[remote "origin"]` section of a
390/// git config file.
391///
392/// Why: Git config is a stable INI-ish format, but pulling in `gitoxide`
393/// just for one field would be wildly disproportionate. A line-based scan is
394/// sufficient for the canonical layout used by every git client.
395/// What: Walks lines, tracks whether we're inside `[remote "origin"]`, and
396/// returns the trimmed value of the first `url = ...` line within that
397/// section.
398/// Test: `extract_origin_url_handles_typical_config`.
399fn extract_origin_url(config: &str) -> Option<String> {
400    let mut in_origin = false;
401    for line in config.lines() {
402        let trimmed = line.trim();
403        if trimmed.starts_with('[') {
404            in_origin = trimmed == "[remote \"origin\"]";
405            continue;
406        }
407        if in_origin {
408            if let Some(rest) = trimmed.strip_prefix("url") {
409                let rest = rest.trim_start();
410                if let Some(rest) = rest.strip_prefix('=') {
411                    return Some(rest.trim().to_string());
412                }
413            }
414        }
415    }
416    None
417}
418
419/// Extract the short repo name from a git URL.
420///
421/// Why: Origin URLs come in three flavours — HTTPS (`https://host/owner/repo.git`),
422/// SSH (`git@host:owner/repo.git`), and local paths. All three end with
423/// `<name>` or `<name>.git`; returning the last path-component without the
424/// suffix gives a stable short name.
425/// What: Splits on both `/` and `:`, takes the last component, strips a
426/// trailing `.git`. Returns `None` for empty inputs.
427/// Test: `short_repo_name_strips_git_suffix_and_path`.
428fn short_repo_name(url: &str) -> Option<String> {
429    let last = url
430        .rsplit(|c: char| c == '/' || c == ':')
431        .next()
432        .unwrap_or("");
433    let stripped = last.strip_suffix(".git").unwrap_or(last).trim();
434    if stripped.is_empty() {
435        None
436    } else {
437        Some(stripped.to_string())
438    }
439}
440
441/// Push a discovery into the result list iff its `short` hasn't been seen yet.
442///
443/// Why: A subject can only have one *active* `is_alias_for` triple at a time
444/// (the temporal KG closes the prior interval whenever a new value is
445/// asserted), so emitting two discoveries with the same `short` would force
446/// every subsequent `discover_aliases` call to flap between them — endlessly
447/// reasserting because neither matches the currently-active object. Deduping
448/// on `short` here makes the discovery list inherently idempotent: one
449/// authoritative mapping per subject, with the first-seen source winning
450/// (`CargoPackageName` > `CargoBinaryName` > `FirstLetterAbbrev` >
451/// `GitRemote`, matching the call order in `discover_blocking`).
452/// What: Tracks every `short` already pushed; subsequent pushes with the
453/// same `short` are dropped. `seen_pairs` is misnamed historically — it now
454/// holds the deduped subjects.
455/// Test: `no_duplicate_short_names_in_results`,
456/// `dispatch_discover_aliases_inserts_new_and_dedupes` (the rerun assertion
457/// only passes when this dedup holds).
458fn push_unique(
459    discoveries: &mut Vec<AliasDiscovery>,
460    seen_subjects: &mut HashSet<(String, String)>,
461    d: AliasDiscovery,
462) {
463    // Repurpose the set as a subject-only dedup: store ("subject", "") so
464    // the existing call sites keep working without renaming the parameter
465    // type across every signature.
466    let key = (d.short.clone(), String::new());
467    if seen_subjects.insert(key) {
468        discoveries.push(d);
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    /// Why: Smoke-test the live workspace — the prompt test in the task spec
477    /// pins `("tga", "trusty-git-analytics")` as a discovered alias.
478    /// What: Locates the workspace root (parent of this crate dir), runs the
479    /// blocking discovery, and asserts the canonical pair is present with
480    /// the `CargoPackageName` source.
481    /// Test: this test itself.
482    #[test]
483    fn discovers_trusty_git_analytics_alias() {
484        let root = workspace_root();
485        let discoveries = discover_blocking(&root).expect("discover");
486        let hit = discoveries
487            .iter()
488            .find(|d| d.short == "tga" && d.full == "trusty-git-analytics");
489        assert!(
490            hit.is_some(),
491            "expected tga→trusty-git-analytics in discoveries; got: {discoveries:?}"
492        );
493        assert_eq!(hit.unwrap().source, DiscoverySource::CargoPackageName);
494    }
495
496    /// Why: First-letter abbreviation is the most subtle source — confirm
497    /// it fires for at least one crate in the live workspace and pins the
498    /// canonical example (`tc → trusty-common`, the longest-lived shared
499    /// library crate, has a guaranteed-unique two-letter abbreviation).
500    /// Test: this test itself.
501    #[test]
502    fn first_letter_abbrev_emits_unique_workspace_initials() {
503        let root = workspace_root();
504        let discoveries = discover_blocking(&root).expect("discover");
505        let hit = discoveries.iter().find(|d| {
506            d.short == "tc"
507                && d.full == "trusty-common"
508                && d.source == DiscoverySource::FirstLetterAbbrev
509        });
510        assert!(
511            hit.is_some(),
512            "expected tc→trusty-common first-letter abbrev; got: {discoveries:?}"
513        );
514    }
515
516    /// Why: A synthetic fixture pins the abbreviation algorithm against the
517    /// exact scenario the original spec called out — a workspace where
518    /// `tm` would uniquely map to `trusty-memory` if there were no other
519    /// `t-m-…` crates. The live workspace happens to also expose `tm` as a
520    /// binary alias for `trusty-mpm-cli`, which (correctly) takes
521    /// precedence; this isolated test confirms the abbreviation logic
522    /// itself does the right thing.
523    /// Test: this test itself.
524    #[test]
525    fn first_letter_abbrev_tm_unique_when_only_trusty_memory() {
526        let packages = vec![
527            ("trusty-memory".to_string(), "trusty-memory".to_string()),
528            ("trusty-common".to_string(), "trusty-common".to_string()),
529            ("trusty-mpm-cli".to_string(), "trusty-mpm-cli".to_string()),
530        ];
531        let mut discoveries = Vec::new();
532        let mut seen = HashSet::new();
533        add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
534        let tm = discoveries
535            .iter()
536            .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
537        assert_eq!(
538            tm.map(|d| d.full.as_str()),
539            Some("trusty-memory"),
540            "tm must abbreviate trusty-memory in this fixture; got: {discoveries:?}"
541        );
542    }
543
544    /// Why: Calling discovery twice must produce the same result — the
545    /// helper is pure (no mutation of disk state), and the dedup test in
546    /// the spec uses this property to verify idempotency.
547    /// Test: this test itself.
548    #[tokio::test]
549    async fn no_duplicate_short_names_in_results() {
550        let root = workspace_root();
551        let a = discover_project_aliases(&root).await.expect("discover a");
552        let b = discover_project_aliases(&root).await.expect("discover b");
553        assert_eq!(a.len(), b.len(), "two calls must yield equal counts");
554
555        // No (short, full) pair appears twice within a single call.
556        let mut seen = HashSet::new();
557        for d in &a {
558            assert!(
559                seen.insert((d.short.clone(), d.full.clone())),
560                "duplicate discovery: {} → {} ({:?})",
561                d.short,
562                d.full,
563                d.source,
564            );
565        }
566    }
567
568    /// Why: Pin the abbreviation-uniqueness rule against a synthetic
569    /// workspace where two crates share an abbreviation — the algorithm
570    /// must NOT emit a discovery for the ambiguous prefix.
571    /// What: Build two fake packages, both abbreviating to "tm", and assert
572    /// no `FirstLetterAbbrev` for "tm" is produced.
573    /// Test: this test itself.
574    #[test]
575    fn first_letter_abbrev_skips_ambiguous() {
576        let packages = vec![
577            ("trusty-memory".to_string(), "trusty-memory".to_string()),
578            ("trusty-monitor".to_string(), "trusty-monitor".to_string()),
579        ];
580        let mut discoveries = Vec::new();
581        let mut seen = HashSet::new();
582        add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
583        let tm = discoveries
584            .iter()
585            .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
586        assert!(
587            tm.is_none(),
588            "ambiguous tm must not produce an abbrev discovery; got: {discoveries:?}"
589        );
590    }
591
592    /// Why: Pin the parser against the typical `[remote "origin"]` block
593    /// shape. A regression that loses the URL would silently disable the
594    /// GitRemote source.
595    #[test]
596    fn extract_origin_url_handles_typical_config() {
597        let cfg = "\
598[core]
599\trepositoryformatversion = 0
600[remote \"origin\"]
601\turl = git@github.com:bobmatnyc/trusty-tools.git
602\tfetch = +refs/heads/*:refs/remotes/origin/*
603[branch \"main\"]
604\tremote = origin
605";
606        assert_eq!(
607            extract_origin_url(cfg),
608            Some("git@github.com:bobmatnyc/trusty-tools.git".to_string())
609        );
610    }
611
612    /// Why: Three URL flavours must all collapse to the same short name.
613    #[test]
614    fn short_repo_name_strips_git_suffix_and_path() {
615        assert_eq!(
616            short_repo_name("git@github.com:bobmatnyc/trusty-tools.git").as_deref(),
617            Some("trusty-tools")
618        );
619        assert_eq!(
620            short_repo_name("https://github.com/bobmatnyc/trusty-tools.git").as_deref(),
621            Some("trusty-tools")
622        );
623        assert_eq!(
624            short_repo_name("https://github.com/bobmatnyc/trusty-tools").as_deref(),
625            Some("trusty-tools")
626        );
627        assert_eq!(short_repo_name("").as_deref(), None);
628    }
629
630    /// Why: Scan logic must surface both CargoPackageName and
631    /// CargoBinaryName aliases from a single fixture.
632    #[test]
633    fn scan_member_emits_package_and_binary_aliases() {
634        let tmp = tempfile::tempdir().expect("tempdir");
635        let member = tmp.path().join("trusty-git-analytics");
636        std::fs::create_dir_all(&member).expect("mkdir");
637        std::fs::write(
638            member.join("Cargo.toml"),
639            r#"
640[package]
641name = "tga"
642version = "0.1.0"
643
644[[bin]]
645name = "tga_bench"
646path = "src/bench.rs"
647
648[[bin]]
649name = "tga"
650path = "src/main.rs"
651"#,
652        )
653        .expect("write Cargo.toml");
654
655        let mut discoveries = Vec::new();
656        let mut seen = HashSet::new();
657        let mut packages = Vec::new();
658        scan_member(&member, &mut discoveries, &mut seen, &mut packages);
659
660        // Package-name discovery.
661        let pkg_disc = discoveries
662            .iter()
663            .find(|d| d.source == DiscoverySource::CargoPackageName)
664            .expect("package alias");
665        assert_eq!(pkg_disc.short, "tga");
666        assert_eq!(pkg_disc.full, "trusty-git-analytics");
667
668        // Binary-name discovery (only the one that differs from the package).
669        let bin_disc = discoveries
670            .iter()
671            .find(|d| d.source == DiscoverySource::CargoBinaryName)
672            .expect("binary alias");
673        assert_eq!(bin_disc.short, "tga_bench");
674        assert_eq!(bin_disc.full, "tga");
675
676        // The matching-name bin must NOT produce a discovery.
677        assert_eq!(
678            discoveries
679                .iter()
680                .filter(|d| d.source == DiscoverySource::CargoBinaryName)
681                .count(),
682            1
683        );
684    }
685
686    /// Resolve the workspace root (parent of `crates/trusty-memory`).
687    ///
688    /// Why: Cargo runs each crate's tests with `CARGO_MANIFEST_DIR` set to
689    /// that crate's directory. The live-workspace tests need the workspace
690    /// root, which is two levels up.
691    fn workspace_root() -> PathBuf {
692        let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
693        manifest_dir
694            .parent() // crates/
695            .and_then(|p| p.parent()) // workspace root
696            .expect("workspace root")
697            .to_path_buf()
698    }
699}