trusty_memory/discovery.rs
1//! Automatic project alias discovery.
2//!
3//! Why: Projects have implicit shorthand (cargo package names that differ from
4//! their directory, binary names that differ from packages, common first-
5//! letter abbreviations, repo short names) that should be surfaced
6//! automatically as `is_alias_for` triples without requiring users to call
7//! `add_alias` manually. The model can then resolve "tga" → "trusty-git-
8//! analytics" the first time it sees the shorthand, instead of mis-matching it
9//! against unrelated KG entries.
10//! What: Scans the given project root for Cargo workspace structure, git
11//! remote configuration, and other project signals; returns a flat list of
12//! `(short, full, source)` discoveries. The MCP `discover_aliases` tool feeds
13//! these into the palace KG (deduping against active triples) and rebuilds
14//! the prompt cache.
15//! Test: Unit tests in this module exercise each discovery source against
16//! fixture directories and the live workspace root (cwd).
17
18use anyhow::{Context, Result};
19use serde::Serialize;
20use std::collections::{HashMap, HashSet};
21use std::path::{Path, PathBuf};
22
23/// Where a discovered alias was inferred from.
24///
25/// Why: Surfaced through the MCP tool response so operators can audit *why*
26/// a particular alias landed in the KG (and which signal to trust). Also
27/// serialised into the triple's `provenance` field so retraction tooling can
28/// distinguish auto-discovered facts from hand-asserted ones.
29/// What: `Serialize` for direct JSON emission; `Debug` for tracing logs.
30/// Test: covered indirectly through `discover_project_aliases` tests.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
32pub enum DiscoverySource {
33 /// `[package].name` differs from the containing directory name.
34 CargoPackageName,
35 /// `[[bin]].name` differs from `[package].name`.
36 CargoBinaryName,
37 /// First-letter abbreviation of a hyphenated package name is globally
38 /// unique within the workspace.
39 FirstLetterAbbrev,
40 /// Short name extracted from the `origin` remote URL of the repo
41 /// containing the project root (resolved via `git -C <root> config`, so
42 /// it works inside worktrees as well as normal checkouts).
43 GitRemote,
44}
45
46impl DiscoverySource {
47 /// Stable string representation for triple provenance + JSON.
48 ///
49 /// Why: `serde_json::to_string` on the enum yields `"CargoPackageName"`,
50 /// but the triple's `provenance` field is plain text — we want a single
51 /// canonical spelling that round-trips cleanly.
52 /// What: lowercase, snake-case-ish identifiers matching the variant names.
53 /// Test: indirectly via `discover_and_assert` triples.
54 pub fn as_str(&self) -> &'static str {
55 match self {
56 Self::CargoPackageName => "cargo_package_name",
57 Self::CargoBinaryName => "cargo_binary_name",
58 Self::FirstLetterAbbrev => "first_letter_abbrev",
59 Self::GitRemote => "git_remote",
60 }
61 }
62}
63
64/// A single discovered alias mapping.
65///
66/// Why: Returned by `discover_project_aliases` and forwarded verbatim to the
67/// MCP tool response so callers can see exactly what would be (or was)
68/// asserted.
69/// What: `short` is the subject ("tga"); `full` is the object
70/// ("trusty-git-analytics"); `source` records the discovery signal.
71/// Test: each discovery source has a dedicated unit test asserting the
72/// resulting `AliasDiscovery` shape.
73#[derive(Debug, Clone, Serialize)]
74pub struct AliasDiscovery {
75 pub short: String,
76 pub full: String,
77 pub source: DiscoverySource,
78}
79
80/// Scan `project_root` for alias signals and return every discovery found.
81///
82/// Why: One entry point keeps the orchestration logic in the MCP tool simple
83/// — it just calls this and decides what to assert.
84/// What: Runs each discovery source in order (Cargo workspace, then Cargo
85/// single-crate fallback, then git remote, then first-letter abbreviations
86/// derived from the cargo discoveries). Deduplicates `(short, full)` pairs
87/// within the returned list so the first source wins.
88/// Test: `discovers_trusty_git_analytics_alias`,
89/// `first_letter_abbrev_tm_for_trusty_memory`,
90/// `no_duplicate_short_names_in_results`.
91pub async fn discover_project_aliases(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
92 let root = project_root.to_path_buf();
93 tokio::task::spawn_blocking(move || discover_blocking(&root))
94 .await
95 .context("join discover_project_aliases")?
96}
97
98/// Blocking implementation of [`discover_project_aliases`].
99///
100/// Why: All work here is filesystem + TOML parsing, which is naturally
101/// blocking. Splitting the async wrapper out keeps the algorithm
102/// straightforward and unit-testable without a runtime.
103/// What: Reads the root `Cargo.toml`, expands workspace members, scans each
104/// member's `Cargo.toml`, then walks git config. Returns deduplicated
105/// discoveries.
106/// Test: exercised by every test in this module (most call it directly).
107fn discover_blocking(project_root: &Path) -> Result<Vec<AliasDiscovery>> {
108 let mut discoveries: Vec<AliasDiscovery> = Vec::new();
109 let mut seen_pairs: HashSet<(String, String)> = HashSet::new();
110
111 // Collect (package_name, dir_name) pairs so the first-letter pass can
112 // see every package in the workspace at once.
113 let mut packages: Vec<(String, String)> = Vec::new();
114
115 let root_manifest = project_root.join("Cargo.toml");
116 if root_manifest.is_file() {
117 match std::fs::read_to_string(&root_manifest)
118 .context("read root Cargo.toml")
119 .and_then(|s| toml::from_str::<toml::Value>(&s).context("parse root Cargo.toml"))
120 {
121 Ok(root_toml) => {
122 let members = workspace_members(&root_toml);
123 if !members.is_empty() {
124 // Workspace mode.
125 for member in expand_members(project_root, &members) {
126 scan_member(&member, &mut discoveries, &mut seen_pairs, &mut packages);
127 }
128 } else if root_toml.get("package").is_some() {
129 // Single-crate fallback: treat the root manifest as the
130 // only "member".
131 scan_member(
132 project_root,
133 &mut discoveries,
134 &mut seen_pairs,
135 &mut packages,
136 );
137 }
138 }
139 Err(e) => {
140 tracing::warn!("discovery: skipping root Cargo.toml: {e:#}");
141 }
142 }
143 }
144
145 // Phase 2: first-letter abbreviations for hyphenated package names that
146 // produce a globally-unique abbreviation. Uniqueness is computed across
147 // the union of every package name AND every abbreviation derived in
148 // this pass — so a package whose own name is the same as another
149 // package's abbreviation cannot collide with it.
150 add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen_pairs);
151
152 // Phase 3: git remote short name.
153 if let Some(d) = discover_git_remote(project_root) {
154 push_unique(&mut discoveries, &mut seen_pairs, d);
155 }
156
157 Ok(discoveries)
158}
159
160/// Extract the `[workspace] members = [...]` patterns from a parsed root
161/// `Cargo.toml`.
162///
163/// Why: Workspaces always live under a top-level `[workspace]` table with a
164/// `members` array of glob patterns; reading them at parse time keeps the
165/// downstream expansion code unaware of TOML.
166/// What: Returns the raw pattern strings (typically `"crates/*"`). An absent
167/// or malformed `[workspace]` yields an empty `Vec`.
168/// Test: covered by `discovers_trusty_git_analytics_alias` (which exercises
169/// this against the live root manifest).
170fn workspace_members(root_toml: &toml::Value) -> Vec<String> {
171 root_toml
172 .get("workspace")
173 .and_then(|w| w.get("members"))
174 .and_then(|m| m.as_array())
175 .map(|arr| {
176 arr.iter()
177 .filter_map(|v| v.as_str().map(|s| s.to_string()))
178 .collect()
179 })
180 .unwrap_or_default()
181}
182
183/// Expand workspace member patterns into concrete directories.
184///
185/// Why: Cargo permits glob patterns (`crates/*`, `vendor/*/sdk`) in
186/// `workspace.members`; we don't pull in the `glob` crate, so a minimal
187/// expansion handles the canonical "single trailing `*`" pattern that every
188/// workspace in this repo uses, with fallback to a literal directory.
189/// What: For each pattern: if it ends with `/*`, list every immediate
190/// subdirectory; otherwise treat it as a literal relative path. Skips entries
191/// without a `Cargo.toml`.
192/// Test: indirectly via `discovers_trusty_git_analytics_alias` (live workspace
193/// expansion).
194fn expand_members(root: &Path, patterns: &[String]) -> Vec<PathBuf> {
195 let mut out = Vec::new();
196 for pattern in patterns {
197 if let Some(prefix) = pattern.strip_suffix("/*") {
198 let dir = root.join(prefix);
199 let Ok(entries) = std::fs::read_dir(&dir) else {
200 continue;
201 };
202 for entry in entries.flatten() {
203 let path = entry.path();
204 if path.is_dir() && path.join("Cargo.toml").is_file() {
205 out.push(path);
206 }
207 }
208 } else {
209 let path = root.join(pattern);
210 if path.is_dir() && path.join("Cargo.toml").is_file() {
211 out.push(path);
212 }
213 }
214 }
215 out
216}
217
218/// Scan one workspace member directory for cargo-derived aliases.
219///
220/// Why: Each member can contribute up to two aliases (package-name vs dir
221/// name, binary-name vs package name). Centralising the per-member logic
222/// lets the caller stay focused on iteration / expansion.
223/// What: Reads `<member>/Cargo.toml`, extracts `[package].name`, then walks
224/// every `[[bin]]` entry. Pushes one `CargoPackageName` discovery when the
225/// package name differs from the directory, and one `CargoBinaryName`
226/// discovery per binary whose name differs from the package. Tracks every
227/// package in `packages` so the first-letter pass can see the full set.
228/// Test: `scan_member_emits_package_and_binary_aliases`.
229fn scan_member(
230 member_dir: &Path,
231 discoveries: &mut Vec<AliasDiscovery>,
232 seen_pairs: &mut HashSet<(String, String)>,
233 packages: &mut Vec<(String, String)>,
234) {
235 let manifest = member_dir.join("Cargo.toml");
236 let Ok(raw) = std::fs::read_to_string(&manifest) else {
237 return;
238 };
239 let Ok(parsed) = toml::from_str::<toml::Value>(&raw) else {
240 tracing::warn!("discovery: failed to parse {}", manifest.display());
241 return;
242 };
243
244 let dir_name = member_dir
245 .file_name()
246 .and_then(|n| n.to_str())
247 .unwrap_or("")
248 .to_string();
249 if dir_name.is_empty() {
250 return;
251 }
252
253 let package_name = parsed
254 .get("package")
255 .and_then(|p| p.get("name"))
256 .and_then(|n| n.as_str())
257 .map(|s| s.to_string());
258
259 if let Some(ref pkg) = package_name {
260 packages.push((pkg.clone(), dir_name.clone()));
261 if pkg != &dir_name {
262 push_unique(
263 discoveries,
264 seen_pairs,
265 AliasDiscovery {
266 short: pkg.clone(),
267 full: dir_name.clone(),
268 source: DiscoverySource::CargoPackageName,
269 },
270 );
271 }
272 }
273
274 if let Some(bins) = parsed.get("bin").and_then(|b| b.as_array()) {
275 let pkg_for_bin = package_name.as_deref().unwrap_or(&dir_name).to_string();
276 for bin in bins {
277 if let Some(bin_name) = bin.get("name").and_then(|n| n.as_str()) {
278 if bin_name != pkg_for_bin {
279 push_unique(
280 discoveries,
281 seen_pairs,
282 AliasDiscovery {
283 short: bin_name.to_string(),
284 full: pkg_for_bin.clone(),
285 source: DiscoverySource::CargoBinaryName,
286 },
287 );
288 }
289 }
290 }
291 }
292}
293
294/// Compute first-letter abbreviations for hyphenated package names and add
295/// the ones that are globally unique within the workspace.
296///
297/// Why: Operators routinely refer to crates by their initials ("tm" for
298/// `trusty-memory`, "tga" for `trusty-git-analytics`). Surfacing these
299/// automatically — but only when there's no ambiguity — avoids polluting the
300/// prompt with collisions like `tmc` (which could be `trusty-mpm-cli` or
301/// `trusty-mpm-core`).
302/// What: Splits each package name on `-`, takes the first letter of every
303/// segment; counts how many distinct full names each abbreviation maps to.
304/// Emits a `FirstLetterAbbrev` discovery only for abbreviations that map to
305/// exactly one full name AND don't equal that full name AND don't collide
306/// with an existing package name (which would suggest a different crate).
307/// Test: `first_letter_abbrev_tm_for_trusty_memory`,
308/// `first_letter_abbrev_skips_ambiguous`.
309fn add_first_letter_abbreviations(
310 packages: &[(String, String)],
311 discoveries: &mut Vec<AliasDiscovery>,
312 seen_pairs: &mut HashSet<(String, String)>,
313) {
314 let package_name_set: HashSet<&str> = packages.iter().map(|(p, _)| p.as_str()).collect();
315
316 // abbrev → set of full package names that produce it.
317 let mut groups: HashMap<String, Vec<&str>> = HashMap::new();
318 for (pkg, _dir) in packages {
319 if !pkg.contains('-') {
320 continue;
321 }
322 let abbrev: String = pkg
323 .split('-')
324 .filter_map(|seg| seg.chars().next())
325 .collect();
326 if abbrev.len() < 2 {
327 continue;
328 }
329 groups.entry(abbrev).or_default().push(pkg.as_str());
330 }
331
332 for (abbrev, fulls) in groups {
333 if fulls.len() != 1 {
334 continue;
335 }
336 let full = fulls[0];
337 if abbrev == full {
338 continue;
339 }
340 // Don't shadow an existing package name. e.g. if "tm" were itself a
341 // package name, we wouldn't want to also assert "tm → trusty-memory".
342 if package_name_set.contains(abbrev.as_str()) {
343 continue;
344 }
345 push_unique(
346 discoveries,
347 seen_pairs,
348 AliasDiscovery {
349 short: abbrev,
350 full: full.to_string(),
351 source: DiscoverySource::FirstLetterAbbrev,
352 },
353 );
354 }
355}
356
357/// Read the git origin URL for `project_root` and extract a short repo name.
358///
359/// Why: Most repos refer to themselves by the trailing path component of the
360/// origin URL ("trusty-tools"), which is rarely the same as the working tree
361/// directory name when checked out under a non-default path. Surfacing it as
362/// an alias for itself isn't useful, but surfacing the workspace dir name as
363/// the canonical full name for the short repo name is — e.g. when working
364/// inside a worktree directory the model still knows "trusty-tools" refers
365/// to the project. The canonical source for `[remote "origin"] url = …` lives
366/// in `<root>/.git/config` for a normal checkout, but in a *worktree* `.git`
367/// is a file containing `gitdir: <parent>/.git/worktrees/<name>/` and the
368/// `[remote]` section is reachable only through the parent repo's
369/// `.git/config`. Direct filesystem reads silently drop the discovery in
370/// worktree-based checkouts.
371///
372/// Issue #116: the previous implementation only handled the normal-checkout
373/// case and returned `None` from inside any git worktree, mirroring the bug
374/// fixed for `kg_bootstrap` in #113 / PR #115.
375///
376/// What: Resolves the origin URL via [`read_origin_url`] (which prefers
377/// `git -C <root> config --get remote.origin.url` and falls back to a manual
378/// INI scan of `<root>/.git/config` when no `git` binary is on PATH — useful
379/// only for fixture-based tests that fabricate a `.git/config` directly).
380/// Extracts the short name, strips a trailing `.git`, and emits a
381/// `GitRemote` discovery iff the short name differs from the directory name.
382/// Test: `extract_origin_url_handles_typical_config`,
383/// `short_repo_name_strips_git_suffix_and_path`,
384/// `git_remote_works_inside_worktree`.
385fn discover_git_remote(project_root: &Path) -> Option<AliasDiscovery> {
386 let url = read_origin_url(project_root)?;
387 let short = short_repo_name(&url)?;
388 let dir_name = project_root
389 .file_name()
390 .and_then(|n| n.to_str())
391 .unwrap_or("")
392 .to_string();
393 if dir_name.is_empty() || short == dir_name {
394 return None;
395 }
396 Some(AliasDiscovery {
397 short,
398 full: dir_name,
399 source: DiscoverySource::GitRemote,
400 })
401}
402
403/// Resolve `remote.origin.url` for the repo rooted at `project_root`,
404/// transparent to worktree vs. normal-checkout layout.
405///
406/// Why: Centralises the worktree-vs-checkout indirection in one place so
407/// `discover_git_remote` stays readable. In a worktree `.git` is a file
408/// (not a directory) containing `gitdir: <parent>/.git/worktrees/<name>/`,
409/// so a naive `std::fs::read_to_string(".git/config")` fails — but the
410/// `[remote "origin"]` section is still reachable via the parent's
411/// `.git/config`. Shelling out to `git` lets us delegate that pointer
412/// resolution instead of re-implementing it.
413/// What: (1) tries `git -C <root> config --get remote.origin.url`, which
414/// works equally well in worktrees, normal checkouts, and submodules; (2)
415/// falls back to a manual INI scan of `<root>/.git/config` for environments
416/// without a `git` binary on PATH (notably fixture tests that fabricate a
417/// `.git/config` in a tempdir without ever initialising a real repo).
418/// Returns `None` if neither path yields a non-empty URL.
419/// Test: `git_remote_works_inside_worktree` (CLI path),
420/// `extract_origin_url_handles_typical_config` (file fallback path, via
421/// `extract_origin_url`).
422fn read_origin_url(project_root: &Path) -> Option<String> {
423 // Strategy 1: ask git directly. This is the only path that handles
424 // worktrees correctly without us re-implementing `gitdir:` resolution.
425 if let Ok(output) = std::process::Command::new("git")
426 .arg("-C")
427 .arg(project_root)
428 .arg("config")
429 .arg("--get")
430 .arg("remote.origin.url")
431 .output()
432 {
433 if output.status.success() {
434 let url = String::from_utf8_lossy(&output.stdout).trim().to_string();
435 if !url.is_empty() {
436 return Some(url);
437 }
438 }
439 }
440
441 // Strategy 2: direct INI scan of `<root>/.git/config`. Only useful for
442 // fixture tests that fabricate a `.git/config` in a tempdir; real-world
443 // worktrees will never reach this branch because the file read fails
444 // (the worktree `.git` is a file, not a directory).
445 let raw = std::fs::read_to_string(project_root.join(".git").join("config")).ok()?;
446 extract_origin_url(&raw)
447}
448
449/// Extract the `url = ...` value from the `[remote "origin"]` section of a
450/// git config file.
451///
452/// Why: Git config is a stable INI-ish format, but pulling in `gitoxide`
453/// just for one field would be wildly disproportionate. A line-based scan is
454/// sufficient for the canonical layout used by every git client.
455/// What: Walks lines, tracks whether we're inside `[remote "origin"]`, and
456/// returns the trimmed value of the first `url = ...` line within that
457/// section.
458/// Test: `extract_origin_url_handles_typical_config`.
459fn extract_origin_url(config: &str) -> Option<String> {
460 let mut in_origin = false;
461 for line in config.lines() {
462 let trimmed = line.trim();
463 if trimmed.starts_with('[') {
464 in_origin = trimmed == "[remote \"origin\"]";
465 continue;
466 }
467 if in_origin {
468 if let Some(rest) = trimmed.strip_prefix("url") {
469 let rest = rest.trim_start();
470 if let Some(rest) = rest.strip_prefix('=') {
471 return Some(rest.trim().to_string());
472 }
473 }
474 }
475 }
476 None
477}
478
479/// Extract the short repo name from a git URL.
480///
481/// Why: Origin URLs come in three flavours — HTTPS (`https://host/owner/repo.git`),
482/// SSH (`git@host:owner/repo.git`), and local paths. All three end with
483/// `<name>` or `<name>.git`; returning the last path-component without the
484/// suffix gives a stable short name.
485/// What: Splits on both `/` and `:`, takes the last component, strips a
486/// trailing `.git`. Returns `None` for empty inputs.
487/// Test: `short_repo_name_strips_git_suffix_and_path`.
488fn short_repo_name(url: &str) -> Option<String> {
489 let last = url.rsplit(['/', ':']).next().unwrap_or("");
490 let stripped = last.strip_suffix(".git").unwrap_or(last).trim();
491 if stripped.is_empty() {
492 None
493 } else {
494 Some(stripped.to_string())
495 }
496}
497
498/// Push a discovery into the result list iff its `short` hasn't been seen yet.
499///
500/// Why: A subject can only have one *active* `is_alias_for` triple at a time
501/// (the temporal KG closes the prior interval whenever a new value is
502/// asserted), so emitting two discoveries with the same `short` would force
503/// every subsequent `discover_aliases` call to flap between them — endlessly
504/// reasserting because neither matches the currently-active object. Deduping
505/// on `short` here makes the discovery list inherently idempotent: one
506/// authoritative mapping per subject, with the first-seen source winning
507/// (`CargoPackageName` > `CargoBinaryName` > `FirstLetterAbbrev` >
508/// `GitRemote`, matching the call order in `discover_blocking`).
509/// What: Tracks every `short` already pushed; subsequent pushes with the
510/// same `short` are dropped. `seen_pairs` is misnamed historically — it now
511/// holds the deduped subjects.
512/// Test: `no_duplicate_short_names_in_results`,
513/// `dispatch_discover_aliases_inserts_new_and_dedupes` (the rerun assertion
514/// only passes when this dedup holds).
515fn push_unique(
516 discoveries: &mut Vec<AliasDiscovery>,
517 seen_subjects: &mut HashSet<(String, String)>,
518 d: AliasDiscovery,
519) {
520 // Repurpose the set as a subject-only dedup: store ("subject", "") so
521 // the existing call sites keep working without renaming the parameter
522 // type across every signature.
523 let key = (d.short.clone(), String::new());
524 if seen_subjects.insert(key) {
525 discoveries.push(d);
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532
533 /// Why: Smoke-test the live workspace — the prompt test in the task spec
534 /// pins `("tga", "trusty-git-analytics")` as a discovered alias.
535 /// What: Locates the workspace root (parent of this crate dir), runs the
536 /// blocking discovery, and asserts the canonical pair is present with
537 /// the `CargoPackageName` source.
538 /// Test: this test itself.
539 #[test]
540 fn discovers_trusty_git_analytics_alias() {
541 let root = workspace_root();
542 let discoveries = discover_blocking(&root).expect("discover");
543 let hit = discoveries
544 .iter()
545 .find(|d| d.short == "tga" && d.full == "trusty-git-analytics");
546 assert!(
547 hit.is_some(),
548 "expected tga→trusty-git-analytics in discoveries; got: {discoveries:?}"
549 );
550 assert_eq!(hit.unwrap().source, DiscoverySource::CargoPackageName);
551 }
552
553 /// Why: First-letter abbreviation is the most subtle source — confirm
554 /// it fires for at least one crate in the live workspace and pins the
555 /// canonical example (`tc → trusty-common`, the longest-lived shared
556 /// library crate, has a guaranteed-unique two-letter abbreviation).
557 /// Test: this test itself.
558 #[test]
559 fn first_letter_abbrev_emits_unique_workspace_initials() {
560 let root = workspace_root();
561 let discoveries = discover_blocking(&root).expect("discover");
562 let hit = discoveries.iter().find(|d| {
563 d.short == "tc"
564 && d.full == "trusty-common"
565 && d.source == DiscoverySource::FirstLetterAbbrev
566 });
567 assert!(
568 hit.is_some(),
569 "expected tc→trusty-common first-letter abbrev; got: {discoveries:?}"
570 );
571 }
572
573 /// Why: A synthetic fixture pins the abbreviation algorithm against the
574 /// exact scenario the original spec called out — a workspace where
575 /// `tm` would uniquely map to `trusty-memory` if there were no other
576 /// `t-m-…` crates. The live workspace happens to also expose `tm` as a
577 /// binary alias for `trusty-mpm-cli`, which (correctly) takes
578 /// precedence; this isolated test confirms the abbreviation logic
579 /// itself does the right thing.
580 /// Test: this test itself.
581 #[test]
582 fn first_letter_abbrev_tm_unique_when_only_trusty_memory() {
583 let packages = vec![
584 ("trusty-memory".to_string(), "trusty-memory".to_string()),
585 ("trusty-common".to_string(), "trusty-common".to_string()),
586 ("trusty-mpm-cli".to_string(), "trusty-mpm-cli".to_string()),
587 ];
588 let mut discoveries = Vec::new();
589 let mut seen = HashSet::new();
590 add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
591 let tm = discoveries
592 .iter()
593 .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
594 assert_eq!(
595 tm.map(|d| d.full.as_str()),
596 Some("trusty-memory"),
597 "tm must abbreviate trusty-memory in this fixture; got: {discoveries:?}"
598 );
599 }
600
601 /// Why: Calling discovery twice must produce the same result — the
602 /// helper is pure (no mutation of disk state), and the dedup test in
603 /// the spec uses this property to verify idempotency.
604 /// Test: this test itself.
605 #[tokio::test]
606 async fn no_duplicate_short_names_in_results() {
607 let root = workspace_root();
608 let a = discover_project_aliases(&root).await.expect("discover a");
609 let b = discover_project_aliases(&root).await.expect("discover b");
610 assert_eq!(a.len(), b.len(), "two calls must yield equal counts");
611
612 // No (short, full) pair appears twice within a single call.
613 let mut seen = HashSet::new();
614 for d in &a {
615 assert!(
616 seen.insert((d.short.clone(), d.full.clone())),
617 "duplicate discovery: {} → {} ({:?})",
618 d.short,
619 d.full,
620 d.source,
621 );
622 }
623 }
624
625 /// Why: Pin the abbreviation-uniqueness rule against a synthetic
626 /// workspace where two crates share an abbreviation — the algorithm
627 /// must NOT emit a discovery for the ambiguous prefix.
628 /// What: Build two fake packages, both abbreviating to "tm", and assert
629 /// no `FirstLetterAbbrev` for "tm" is produced.
630 /// Test: this test itself.
631 #[test]
632 fn first_letter_abbrev_skips_ambiguous() {
633 let packages = vec![
634 ("trusty-memory".to_string(), "trusty-memory".to_string()),
635 ("trusty-monitor".to_string(), "trusty-monitor".to_string()),
636 ];
637 let mut discoveries = Vec::new();
638 let mut seen = HashSet::new();
639 add_first_letter_abbreviations(&packages, &mut discoveries, &mut seen);
640 let tm = discoveries
641 .iter()
642 .find(|d| d.short == "tm" && d.source == DiscoverySource::FirstLetterAbbrev);
643 assert!(
644 tm.is_none(),
645 "ambiguous tm must not produce an abbrev discovery; got: {discoveries:?}"
646 );
647 }
648
649 /// Why: Pin the parser against the typical `[remote "origin"]` block
650 /// shape. A regression that loses the URL would silently disable the
651 /// GitRemote source.
652 #[test]
653 fn extract_origin_url_handles_typical_config() {
654 let cfg = "\
655[core]
656\trepositoryformatversion = 0
657[remote \"origin\"]
658\turl = git@github.com:bobmatnyc/trusty-tools.git
659\tfetch = +refs/heads/*:refs/remotes/origin/*
660[branch \"main\"]
661\tremote = origin
662";
663 assert_eq!(
664 extract_origin_url(cfg),
665 Some("git@github.com:bobmatnyc/trusty-tools.git".to_string())
666 );
667 }
668
669 /// Why: Three URL flavours must all collapse to the same short name.
670 #[test]
671 fn short_repo_name_strips_git_suffix_and_path() {
672 assert_eq!(
673 short_repo_name("git@github.com:bobmatnyc/trusty-tools.git").as_deref(),
674 Some("trusty-tools")
675 );
676 assert_eq!(
677 short_repo_name("https://github.com/bobmatnyc/trusty-tools.git").as_deref(),
678 Some("trusty-tools")
679 );
680 assert_eq!(
681 short_repo_name("https://github.com/bobmatnyc/trusty-tools").as_deref(),
682 Some("trusty-tools")
683 );
684 assert_eq!(short_repo_name("").as_deref(), None);
685 }
686
687 /// Why: Scan logic must surface both CargoPackageName and
688 /// CargoBinaryName aliases from a single fixture.
689 #[test]
690 fn scan_member_emits_package_and_binary_aliases() {
691 let tmp = tempfile::tempdir().expect("tempdir");
692 let member = tmp.path().join("trusty-git-analytics");
693 std::fs::create_dir_all(&member).expect("mkdir");
694 std::fs::write(
695 member.join("Cargo.toml"),
696 r#"
697[package]
698name = "tga"
699version = "0.1.0"
700
701[[bin]]
702name = "tga_bench"
703path = "src/bench.rs"
704
705[[bin]]
706name = "tga"
707path = "src/main.rs"
708"#,
709 )
710 .expect("write Cargo.toml");
711
712 let mut discoveries = Vec::new();
713 let mut seen = HashSet::new();
714 let mut packages = Vec::new();
715 scan_member(&member, &mut discoveries, &mut seen, &mut packages);
716
717 // Package-name discovery.
718 let pkg_disc = discoveries
719 .iter()
720 .find(|d| d.source == DiscoverySource::CargoPackageName)
721 .expect("package alias");
722 assert_eq!(pkg_disc.short, "tga");
723 assert_eq!(pkg_disc.full, "trusty-git-analytics");
724
725 // Binary-name discovery (only the one that differs from the package).
726 let bin_disc = discoveries
727 .iter()
728 .find(|d| d.source == DiscoverySource::CargoBinaryName)
729 .expect("binary alias");
730 assert_eq!(bin_disc.short, "tga_bench");
731 assert_eq!(bin_disc.full, "tga");
732
733 // The matching-name bin must NOT produce a discovery.
734 assert_eq!(
735 discoveries
736 .iter()
737 .filter(|d| d.source == DiscoverySource::CargoBinaryName)
738 .count(),
739 1
740 );
741 }
742
743 /// Why (issue #116): `discover_git_remote` must return the same remote
744 /// URL inside a git worktree as it does in the parent checkout. Before
745 /// the fix it read `<root>/.git/config` directly, which fails inside a
746 /// worktree because `.git` is a *file* (containing
747 /// `gitdir: <parent>/.git/worktrees/<name>/`), not a directory — and
748 /// the `[remote "origin"]` section lives only in the parent's
749 /// `.git/config`. This test pins the post-fix behaviour: initialise a
750 /// real repo, add a remote, create a worktree off it, and assert
751 /// `discover_git_remote` recovers the URL from inside the worktree.
752 /// What: Builds a tempdir-backed parent repo + worktree pair using the
753 /// real `git` CLI (the same tool the production code delegates to),
754 /// then calls the discovery helper against the worktree path.
755 /// Test: this test itself; serves as the worktree regression guard for #116.
756 #[test]
757 fn git_remote_works_inside_worktree() {
758 // Skip when `git` is unavailable on PATH — the fixture relies on
759 // real worktree semantics that we can't fabricate from pure FS ops.
760 if std::process::Command::new("git")
761 .arg("--version")
762 .output()
763 .ok()
764 .map(|o| !o.status.success())
765 .unwrap_or(true)
766 {
767 eprintln!("skipping git_remote_works_inside_worktree: `git` not on PATH");
768 return;
769 }
770
771 let tmp = tempfile::tempdir().expect("tempdir");
772 // The repo dir name must differ from the short repo name in the
773 // remote URL so that `discover_git_remote` actually emits a
774 // discovery (it skips when `short == dir_name`).
775 let parent = tmp.path().join("local-checkout");
776 std::fs::create_dir_all(&parent).expect("mkdir parent");
777
778 // Initialise a real repo so `.git` is a directory in the parent
779 // and a file (with `gitdir:`) inside the worktree.
780 let run = |args: &[&str], cwd: &Path| {
781 let status = std::process::Command::new("git")
782 .args(args)
783 .current_dir(cwd)
784 .status()
785 .expect("git status");
786 assert!(status.success(), "git {args:?} failed in {cwd:?}");
787 };
788 run(&["init", "--initial-branch=main", "."], &parent);
789 run(&["config", "user.email", "test@example.invalid"], &parent);
790 run(&["config", "user.name", "test"], &parent);
791 run(
792 &[
793 "remote",
794 "add",
795 "origin",
796 "git@github.com:bobmatnyc/trusty-tools.git",
797 ],
798 &parent,
799 );
800 // A real commit + branch is required before `git worktree add` will
801 // accept the source as a base.
802 std::fs::write(parent.join("README.md"), "hi").expect("write README");
803 run(&["add", "README.md"], &parent);
804 run(&["commit", "-m", "init"], &parent);
805
806 // Create the worktree as a sibling directory (outside the parent
807 // checkout, the standard layout). Re-use the same short repo name
808 // as the URL's tail so this also confirms the "short == dir_name"
809 // skip rule works against the worktree dir name (not the parent's).
810 let worktree = tmp.path().join("trusty-tools-feature");
811 run(
812 &[
813 "worktree",
814 "add",
815 "-b",
816 "feature",
817 worktree.to_str().expect("worktree path"),
818 ],
819 &parent,
820 );
821
822 // Sanity: `.git` inside the worktree must be a file, not a dir —
823 // otherwise the fixture isn't actually exercising the bug.
824 let dot_git = worktree.join(".git");
825 assert!(
826 dot_git.is_file(),
827 "expected `.git` to be a file inside the worktree; got {dot_git:?}"
828 );
829
830 // Run discovery against the worktree path. Pre-fix this returned
831 // `None`; post-fix it must return the GitRemote discovery with the
832 // short name extracted from origin.
833 let d = discover_git_remote(&worktree).expect("expected GitRemote discovery from worktree");
834 assert_eq!(d.source, DiscoverySource::GitRemote);
835 assert_eq!(d.short, "trusty-tools");
836 assert_eq!(d.full, "trusty-tools-feature");
837
838 // Also confirm the normal-checkout path still works inside the same
839 // fixture (regression guard: the shell-out must not break the
840 // happy path either).
841 let d_parent = discover_git_remote(&parent)
842 .expect("expected GitRemote discovery from normal checkout");
843 assert_eq!(d_parent.source, DiscoverySource::GitRemote);
844 assert_eq!(d_parent.short, "trusty-tools");
845 assert_eq!(d_parent.full, "local-checkout");
846 }
847
848 /// Resolve the workspace root (parent of `crates/trusty-memory`).
849 ///
850 /// Why: Cargo runs each crate's tests with `CARGO_MANIFEST_DIR` set to
851 /// that crate's directory. The live-workspace tests need the workspace
852 /// root, which is two levels up.
853 fn workspace_root() -> PathBuf {
854 let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
855 manifest_dir
856 .parent() // crates/
857 .and_then(|p| p.parent()) // workspace root
858 .expect("workspace root")
859 .to_path_buf()
860 }
861}