Skip to main content

trusty_memory/
bootstrap.rs

1//! Knowledge-graph bootstrap helpers.
2//!
3//! Why: Issue #60 — after `palace_create`, the knowledge graph (KG) sits at
4//! zero triples and there is no auto-discovery path. Users have no idea
5//! they're supposed to call `kg_assert` manually before `kg_query` returns
6//! anything useful. `kg_bootstrap` closes this gap by scanning well-known
7//! project files (`Cargo.toml`, `package.json`, `pyproject.toml`, `CLAUDE.md`,
8//! `.git/config`, `go.mod`) and seeding structured triples that describe the
9//! project (language, version, source repo, etc.). It also seeds temporal
10//! metadata (`created_at`, `bootstrapped_at`) so even an empty project at
11//! least has *something* in the KG and a timestamp anchor for future queries.
12//! What: A pure-blocking scanner (`scan_project`) returns a flat list of
13//! `(subject, predicate, object, provenance)` tuples; the public async entry
14//! point `bootstrap_palace` resolves a palace handle, runs the scanner, and
15//! asserts each tuple through the existing `KnowledgeGraph::assert` path.
16//! Test: Unit tests pin each scanner against fixture directories;
17//! `kg_bootstrap` is exercised end-to-end from the MCP tool surface in
18//! `tools.rs`.
19//!
20//! Design notes:
21//! - Missing files are NOT errors — every read is best-effort. The scanner
22//!   returns whatever triples it could derive and skips the rest with a
23//!   debug-level log.
24//! - All extracted facts use the user-supplied (or inferred) project name as
25//!   the triple subject. When no project name can be derived from manifests,
26//!   the palace ID is used as a fallback so the temporal triples still anchor
27//!   to a stable subject.
28//! - Provenance strings are stable identifiers (`bootstrap:cargo.toml`,
29//!   `bootstrap:package.json`, …) so operators can audit which scanner
30//!   asserted each triple and retract by source if needed.
31
32use anyhow::{anyhow, Context, Result};
33use serde::Serialize;
34use std::path::{Path, PathBuf};
35use trusty_common::memory_core::store::kg::Triple;
36
37use crate::AppState;
38
39/// A single bootstrap discovery before it becomes a Triple.
40///
41/// Why: Keeping the scanner output as plain tuples (rather than full
42/// `Triple`s) lets the unit tests verify the extraction logic without
43/// constructing timestamps or worrying about confidence values. The async
44/// caller converts these into `Triple`s with the live `chrono::Utc::now()`
45/// timestamp right before assertion.
46/// What: Carries subject, predicate, object, and the provenance tag that
47/// identifies which scanner produced the fact.
48/// Test: Each scanner test asserts the expected `BootstrapTriple`s land in
49/// the result list.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct BootstrapTriple {
52    pub subject: String,
53    pub predicate: String,
54    pub object: String,
55    pub provenance: String,
56}
57
58/// Per-file scan summary returned to the MCP caller.
59///
60/// Why: Operators want to know *which* files contributed to the bootstrap
61/// (and which were absent) without re-running the tool with verbose logging.
62/// What: Filename + count of triples it produced; emitted as JSON in the
63/// MCP response.
64/// Test: `bootstrap_palace_returns_per_file_counts`.
65#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
66pub struct ScannedFile {
67    pub file: String,
68    pub triples: usize,
69}
70
71/// Aggregate result of a bootstrap run.
72///
73/// Why: The MCP `kg_bootstrap` tool returns this verbatim so the model (or a
74/// human operator) can see exactly what was asserted and which files were
75/// scanned.
76/// What: Total triple count + per-file summaries + the resolved project
77/// subject. `Serialize` so it round-trips into the MCP JSON envelope.
78/// Test: `bootstrap_palace_seeds_temporal_metadata_when_no_files`.
79#[derive(Debug, Clone, Serialize)]
80pub struct BootstrapResult {
81    pub palace: String,
82    pub project_subject: String,
83    pub triples_asserted: usize,
84    pub scanned_files: Vec<ScannedFile>,
85}
86
87/// Run the bootstrap scan against a palace.
88///
89/// Why: Single async entry point that the MCP dispatcher (and the
90/// auto-bootstrap hook in `palace_create`) calls. Encapsulates path
91/// resolution, scanning, triple construction, and KG assertion.
92/// What: Resolves `project_path` (caller-supplied), runs the blocking
93/// scanner, seeds temporal metadata triples, and asserts every discovery
94/// through `handle.kg.assert(...)`. Returns a summary of what was written.
95/// Test: `bootstrap_palace_seeds_temporal_metadata_when_no_files`,
96/// `bootstrap_palace_scans_cargo_toml`.
97pub async fn bootstrap_palace(
98    state: &AppState,
99    palace_id: &str,
100    project_path: Option<&Path>,
101) -> Result<BootstrapResult> {
102    let handle = state
103        .registry
104        .open_palace(
105            &state.data_root,
106            &trusty_common::memory_core::palace::PalaceId::new(palace_id),
107        )
108        .with_context(|| format!("open palace {palace_id}"))?;
109
110    // Choose the scan root. When the caller did not supply a project path,
111    // we still scan the palace's own data dir so `CLAUDE.md` or other
112    // operator-placed files inside the palace are picked up.
113    let scan_root: PathBuf = match project_path {
114        Some(p) => p.to_path_buf(),
115        None => handle
116            .data_dir
117            .clone()
118            .unwrap_or_else(|| state.data_root.join(palace_id)),
119    };
120    let palace_id_owned = palace_id.to_string();
121
122    let (triples, scanned_files, project_subject) =
123        tokio::task::spawn_blocking(move || scan_project(&scan_root, &palace_id_owned))
124            .await
125            .context("join scan_project")??;
126
127    // Seed temporal metadata (always present, even for empty projects).
128    let now = chrono::Utc::now();
129    let mut all = triples;
130    all.push(BootstrapTriple {
131        subject: project_subject.clone(),
132        predicate: "bootstrapped_at".to_string(),
133        object: now.to_rfc3339(),
134        provenance: "bootstrap:temporal".to_string(),
135    });
136    // `created_at` is only inserted when the palace doesn't yet have one;
137    // re-running bootstrap must not lie about when the palace first came
138    // into being. The KG's temporal layer would close the prior interval
139    // and the new interval would carry a misleading `valid_from`. Check
140    // `query_active` before writing.
141    let existing = handle
142        .kg
143        .query_active(&project_subject)
144        .await
145        .context("kg.query_active for created_at check")?;
146    if !existing.iter().any(|t| t.predicate == "created_at") {
147        all.push(BootstrapTriple {
148            subject: project_subject.clone(),
149            predicate: "created_at".to_string(),
150            object: now.to_rfc3339(),
151            provenance: "bootstrap:temporal".to_string(),
152        });
153    }
154
155    let mut asserted = 0usize;
156    for bt in &all {
157        let triple = Triple {
158            subject: bt.subject.clone(),
159            predicate: bt.predicate.clone(),
160            object: bt.object.clone(),
161            valid_from: now,
162            valid_to: None,
163            confidence: 1.0,
164            provenance: Some(bt.provenance.clone()),
165        };
166        handle
167            .kg
168            .assert(triple)
169            .await
170            .with_context(|| format!("kg.assert {} {}", bt.subject, bt.predicate))?;
171        asserted += 1;
172    }
173
174    Ok(BootstrapResult {
175        palace: palace_id.to_string(),
176        project_subject,
177        triples_asserted: asserted,
178        scanned_files,
179    })
180}
181
182/// Blocking scanner: walk well-known files under `root` and extract triples.
183///
184/// Why: Pulled out as a sync function so the file I/O + TOML/JSON parsing
185/// run on a blocking thread (via `spawn_blocking`) and the algorithm itself
186/// is trivially unit-testable against fixture directories.
187/// What: Returns `(triples, per_file_summary, project_subject)`. The
188/// project subject is derived from the first manifest that yields a name;
189/// falls back to `fallback_subject` (typically the palace id) when none
190/// match.
191/// Test: `scan_project_extracts_cargo_facts`,
192/// `scan_project_extracts_package_json`,
193/// `scan_project_falls_back_to_palace_id_when_no_manifest`.
194pub fn scan_project(
195    root: &Path,
196    fallback_subject: &str,
197) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
198    let mut triples: Vec<BootstrapTriple> = Vec::new();
199    let mut summary: Vec<ScannedFile> = Vec::new();
200    let mut project_subject: Option<String> = None;
201
202    // 1. Cargo.toml
203    let before = triples.len();
204    if let Some(name) = scan_cargo_toml(root, &mut triples) {
205        project_subject.get_or_insert(name);
206    }
207    if triples.len() > before {
208        summary.push(ScannedFile {
209            file: "Cargo.toml".to_string(),
210            triples: triples.len() - before,
211        });
212    }
213
214    // 2. package.json
215    let before = triples.len();
216    if let Some(name) = scan_package_json(root, &mut triples) {
217        project_subject.get_or_insert(name);
218    }
219    if triples.len() > before {
220        summary.push(ScannedFile {
221            file: "package.json".to_string(),
222            triples: triples.len() - before,
223        });
224    }
225
226    // 3. pyproject.toml
227    let before = triples.len();
228    if let Some(name) = scan_pyproject_toml(root, &mut triples) {
229        project_subject.get_or_insert(name);
230    }
231    if triples.len() > before {
232        summary.push(ScannedFile {
233            file: "pyproject.toml".to_string(),
234            triples: triples.len() - before,
235        });
236    }
237
238    // 4. go.mod
239    let before = triples.len();
240    if let Some(name) = scan_go_mod(root, &mut triples) {
241        project_subject.get_or_insert(name);
242    }
243    if triples.len() > before {
244        summary.push(ScannedFile {
245            file: "go.mod".to_string(),
246            triples: triples.len() - before,
247        });
248    }
249
250    // 5. CLAUDE.md — first H1 heading as descriptive name. Does not set
251    //    project_subject (the manifest sources are stronger signals) but
252    //    contributes a `has_description` triple when the subject is known.
253    let before = triples.len();
254    scan_claude_md(root, project_subject.as_deref(), &mut triples);
255    if triples.len() > before {
256        summary.push(ScannedFile {
257            file: "CLAUDE.md".to_string(),
258            triples: triples.len() - before,
259        });
260    }
261
262    // 6. .git/config — source repo URL.
263    let before = triples.len();
264    scan_git_config(root, project_subject.as_deref(), &mut triples);
265    if triples.len() > before {
266        summary.push(ScannedFile {
267            file: ".git/config".to_string(),
268            triples: triples.len() - before,
269        });
270    }
271
272    let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
273
274    // Rewrite any triples that used a placeholder subject (only the
275    // CLAUDE.md / .git/config scanners are subject-dependent; if no manifest
276    // matched, those scanners ran with subject=None and produced nothing, so
277    // this is currently a no-op — but keeping the loop makes future scanner
278    // additions safe).
279    for t in &mut triples {
280        if t.subject.is_empty() {
281            t.subject = subject.clone();
282        }
283    }
284
285    Ok((triples, summary, subject))
286}
287
288/// Scan `Cargo.toml`. Returns the package/workspace name if extracted.
289///
290/// Why: Rust projects are the primary trusty-tools target; we want
291/// `has_language=Rust`, `has_version`, `has_edition`, `has_rust_version`,
292/// and `workspace_member` triples auto-populated so `kg_query` against the
293/// project name returns useful context immediately.
294/// What: Parses the TOML; emits `(name, has_language, "Rust")` always when
295/// the manifest exists, plus version/edition/rust-version/workspace member
296/// triples when present.
297/// Test: `scan_project_extracts_cargo_facts`.
298fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
299    let manifest = root.join("Cargo.toml");
300    let raw = std::fs::read_to_string(&manifest).ok()?;
301    let parsed: toml::Value = match toml::from_str(&raw) {
302        Ok(v) => v,
303        Err(e) => {
304            tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
305            return None;
306        }
307    };
308
309    // Workspace root manifests may have no [package] section. Use the
310    // workspace.package.name if present; otherwise derive from the dir name.
311    let name = parsed
312        .get("package")
313        .and_then(|p| p.get("name"))
314        .and_then(|n| n.as_str())
315        .map(|s| s.to_string())
316        .or_else(|| {
317            parsed
318                .get("workspace")
319                .and_then(|w| w.get("package"))
320                .and_then(|p| p.get("name"))
321                .and_then(|n| n.as_str())
322                .map(|s| s.to_string())
323        })
324        .or_else(|| {
325            root.file_name()
326                .and_then(|n| n.to_str())
327                .map(|s| s.to_string())
328        })?;
329
330    out.push(BootstrapTriple {
331        subject: name.clone(),
332        predicate: "has_language".to_string(),
333        object: "Rust".to_string(),
334        provenance: "bootstrap:cargo.toml".to_string(),
335    });
336
337    if let Some(version) = parsed
338        .get("package")
339        .and_then(|p| p.get("version"))
340        .and_then(|v| v.as_str())
341    {
342        out.push(BootstrapTriple {
343            subject: name.clone(),
344            predicate: "has_version".to_string(),
345            object: version.to_string(),
346            provenance: "bootstrap:cargo.toml".to_string(),
347        });
348    }
349    if let Some(edition) = parsed
350        .get("package")
351        .and_then(|p| p.get("edition"))
352        .and_then(|v| v.as_str())
353    {
354        out.push(BootstrapTriple {
355            subject: name.clone(),
356            predicate: "has_edition".to_string(),
357            object: edition.to_string(),
358            provenance: "bootstrap:cargo.toml".to_string(),
359        });
360    }
361    if let Some(rv) = parsed
362        .get("package")
363        .and_then(|p| p.get("rust-version"))
364        .and_then(|v| v.as_str())
365    {
366        out.push(BootstrapTriple {
367            subject: name.clone(),
368            predicate: "has_rust_version".to_string(),
369            object: rv.to_string(),
370            provenance: "bootstrap:cargo.toml".to_string(),
371        });
372    }
373
374    // Workspace members (capped at 64 to avoid flooding the KG on huge
375    // monorepos; bootstrap is a coarse seeder, not an exhaustive index).
376    if let Some(members) = parsed
377        .get("workspace")
378        .and_then(|w| w.get("members"))
379        .and_then(|m| m.as_array())
380    {
381        for member in members.iter().take(64) {
382            if let Some(s) = member.as_str() {
383                out.push(BootstrapTriple {
384                    subject: name.clone(),
385                    predicate: "has_workspace_member".to_string(),
386                    object: s.to_string(),
387                    provenance: "bootstrap:cargo.toml".to_string(),
388                });
389            }
390        }
391    }
392
393    Some(name)
394}
395
396/// Scan `package.json`.
397///
398/// Why: Node/TypeScript projects are the second most common target. We want
399/// `has_language=JavaScript`, `has_version`, and `has_dependency` triples.
400/// What: Parses the JSON; emits language/version triples + one
401/// `has_dependency` per top-level key in the `dependencies` object (cap 64).
402/// Test: `scan_project_extracts_package_json`.
403fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
404    let manifest = root.join("package.json");
405    let raw = std::fs::read_to_string(&manifest).ok()?;
406    let parsed: serde_json::Value = match serde_json::from_str(&raw) {
407        Ok(v) => v,
408        Err(e) => {
409            tracing::debug!("bootstrap: parse package.json failed: {e:#}");
410            return None;
411        }
412    };
413    let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
414
415    out.push(BootstrapTriple {
416        subject: name.clone(),
417        predicate: "has_language".to_string(),
418        object: "JavaScript".to_string(),
419        provenance: "bootstrap:package.json".to_string(),
420    });
421
422    if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
423        out.push(BootstrapTriple {
424            subject: name.clone(),
425            predicate: "has_version".to_string(),
426            object: version.to_string(),
427            provenance: "bootstrap:package.json".to_string(),
428        });
429    }
430
431    if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
432        for (k, _) in deps.iter().take(64) {
433            out.push(BootstrapTriple {
434                subject: name.clone(),
435                predicate: "has_dependency".to_string(),
436                object: k.clone(),
437                provenance: "bootstrap:package.json".to_string(),
438            });
439        }
440    }
441
442    Some(name)
443}
444
445/// Scan `pyproject.toml`.
446///
447/// Why: Python projects use PEP-621 `[project]` metadata; surfacing the
448/// language tag + version + `requires-python` makes Python repos legible to
449/// the KG without manual assertions.
450/// What: Parses the TOML; emits language/version/requires-python triples
451/// when the `[project]` table is present.
452/// Test: `scan_project_extracts_pyproject`.
453fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
454    let manifest = root.join("pyproject.toml");
455    let raw = std::fs::read_to_string(&manifest).ok()?;
456    let parsed: toml::Value = match toml::from_str(&raw) {
457        Ok(v) => v,
458        Err(e) => {
459            tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
460            return None;
461        }
462    };
463    let project = parsed.get("project")?;
464    let name = project.get("name").and_then(|n| n.as_str())?.to_string();
465
466    out.push(BootstrapTriple {
467        subject: name.clone(),
468        predicate: "has_language".to_string(),
469        object: "Python".to_string(),
470        provenance: "bootstrap:pyproject.toml".to_string(),
471    });
472
473    if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
474        out.push(BootstrapTriple {
475            subject: name.clone(),
476            predicate: "has_version".to_string(),
477            object: v.to_string(),
478            provenance: "bootstrap:pyproject.toml".to_string(),
479        });
480    }
481    if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
482        out.push(BootstrapTriple {
483            subject: name.clone(),
484            predicate: "requires_python".to_string(),
485            object: rp.to_string(),
486            provenance: "bootstrap:pyproject.toml".to_string(),
487        });
488    }
489
490    Some(name)
491}
492
493/// Scan `go.mod` for the module name.
494///
495/// Why: Go projects encode their canonical name on the `module` line of
496/// `go.mod`; surfacing it as the project subject lets Go repos opt into the
497/// same KG shape as Rust/Node/Python.
498/// What: Reads `go.mod`, extracts the `module <name>` directive, and emits
499/// `(name, has_language, "Go")` plus `(name, has_module_path, <name>)`.
500/// Test: `scan_project_extracts_go_mod`.
501fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
502    let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
503    let module = raw
504        .lines()
505        .find_map(|line| line.trim().strip_prefix("module "))
506        .map(|s| s.trim().to_string())?;
507    if module.is_empty() {
508        return None;
509    }
510    out.push(BootstrapTriple {
511        subject: module.clone(),
512        predicate: "has_language".to_string(),
513        object: "Go".to_string(),
514        provenance: "bootstrap:go.mod".to_string(),
515    });
516    out.push(BootstrapTriple {
517        subject: module.clone(),
518        predicate: "has_module_path".to_string(),
519        object: module.clone(),
520        provenance: "bootstrap:go.mod".to_string(),
521    });
522    Some(module)
523}
524
525/// Scan `CLAUDE.md` for the first H1 heading; attach as project description.
526///
527/// Why: Trusty-* projects use `CLAUDE.md` as the canonical orientation
528/// document; the first H1 line is invariably the project name/tagline and
529/// makes a good `has_description` triple.
530/// What: Walks lines, finds the first `# Title` heading, strips the prefix,
531/// and pushes a `has_description` triple under `subject` (when known).
532/// Test: `scan_project_extracts_claude_md_h1`.
533fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
534    let Some(subject) = subject else {
535        // No project subject yet — skip; we don't want orphan triples.
536        return;
537    };
538    let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
539        return;
540    };
541    if let Some(h1) = raw.lines().find_map(|line| {
542        let t = line.trim_start();
543        t.strip_prefix("# ")
544            .filter(|rest| !rest.is_empty())
545            .map(|s| s.trim().to_string())
546    }) {
547        out.push(BootstrapTriple {
548            subject: subject.to_string(),
549            predicate: "has_description".to_string(),
550            object: h1,
551            provenance: "bootstrap:claude.md".to_string(),
552        });
553    }
554}
555
556/// Scan the git origin URL for the project rooted at `root`.
557///
558/// Why: Tying a project to its source repo URL is the single highest-signal
559/// fact for downstream tooling (link to issues, find upstream, etc.). The
560/// canonical source is `[remote "origin"] url = …`, but its physical location
561/// depends on whether `root` is a normal checkout or a git worktree:
562///
563/// - Normal checkout: `.git/` is a directory; the config lives in
564///   `<root>/.git/config`.
565/// - Worktree: `.git` is a *file* containing `gitdir: <pointer>` to the
566///   parent repo's `.git/worktrees/<name>/` dir; the `[remote]` section lives
567///   in the parent's `.git/config`, not anywhere reachable by joining
568///   `<root>/.git/config`.
569///
570/// Issue #113: the previous implementation only handled the first case and
571/// silently dropped the `source_repo` triple in any worktree-based checkout.
572///
573/// What: First shells out to `git -C <root> config --get remote.origin.url`,
574/// which natively resolves the `.git`-file pointer for us. Falls back to a
575/// direct file scan of `<root>/.git/config` when `git` is unavailable on PATH
576/// (matters for the fixture-based unit tests, which fabricate a `.git/config`
577/// file in a tempdir without a real repo). Emits a
578/// `(subject, source_repo, url)` triple when a URL is found.
579/// Test: `scan_project_extracts_git_origin` (file fallback path),
580/// `tools::tests::kg_bootstrap_seeds_workspace_facts` (git-CLI path,
581/// exercised inside worktrees).
582fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
583    let Some(subject) = subject else { return };
584    let Some(url) = read_origin_url(root) else {
585        return;
586    };
587    out.push(BootstrapTriple {
588        subject: subject.to_string(),
589        predicate: "source_repo".to_string(),
590        object: url,
591        provenance: "bootstrap:git.config".to_string(),
592    });
593}
594
595/// Resolve `remote.origin.url` for the repo rooted at `root`, transparent to
596/// worktree vs. normal-checkout layout.
597///
598/// Why: Centralises the worktree-vs-checkout indirection in one place so the
599/// bootstrap scanner stays readable. See `scan_git_config` for the full
600/// reasoning behind the two-strategy approach.
601/// What: (1) tries `git -C <root> config --get remote.origin.url`, which
602/// works equally well in worktrees, normal checkouts, and submodules; (2)
603/// falls back to a manual INI scan of `<root>/.git/config` for environments
604/// without a `git` binary (notably the fixture-based unit tests in this
605/// module). Returns `None` if neither path yields a non-empty URL.
606/// Test: `read_origin_url_returns_none_for_non_git_dir`,
607/// `scan_project_extracts_git_origin` (file fallback).
608fn read_origin_url(root: &Path) -> Option<String> {
609    // Strategy 1: ask git directly. This is the only path that handles
610    // worktrees correctly without us re-implementing `gitdir:` resolution.
611    if let Ok(output) = std::process::Command::new("git")
612        .arg("-C")
613        .arg(root)
614        .arg("config")
615        .arg("--get")
616        .arg("remote.origin.url")
617        .output()
618    {
619        if output.status.success() {
620            let url = String::from_utf8_lossy(&output.stdout).trim().to_string();
621            if !url.is_empty() {
622                return Some(url);
623            }
624        }
625    }
626
627    // Strategy 2: direct INI scan of `<root>/.git/config`. Only useful for
628    // fixture tests that fabricate a `.git/config` in a tempdir; real-world
629    // worktrees will never reach this branch because the file read fails
630    // (the worktree `.git` is a file, not a directory).
631    let raw = std::fs::read_to_string(root.join(".git").join("config")).ok()?;
632    let mut in_origin = false;
633    for line in raw.lines() {
634        let trimmed = line.trim();
635        if trimmed.starts_with('[') {
636            in_origin = trimmed == "[remote \"origin\"]";
637            continue;
638        }
639        if in_origin {
640            if let Some(rest) = trimmed.strip_prefix("url") {
641                let rest = rest.trim_start();
642                if let Some(rest) = rest.strip_prefix('=') {
643                    let url = rest.trim().to_string();
644                    if !url.is_empty() {
645                        return Some(url);
646                    }
647                }
648            }
649        }
650    }
651    None
652}
653
654/// Hint string returned by `kg_query` when the palace KG is empty.
655///
656/// Why: Issue #60 — when a user calls `kg_query` against a brand-new palace
657/// they get an empty triples array with no indication that `kg_bootstrap` /
658/// `kg_assert` even exist. A short hint embedded in the response solves
659/// this with one line of code at the call site.
660/// What: Static string, kept in this module so tests can pin it.
661/// Test: `kg_query_emits_hint_when_palace_empty` in `tools.rs`.
662pub const KG_EMPTY_HINT: &str =
663    "Knowledge graph is empty. Run kg_bootstrap to seed it from project files, \
664     or use kg_assert to add triples manually.";
665
666/// Convenience: count active triples across an entire palace.
667///
668/// Why: `kg_query` is per-subject, so to determine "is the KG empty?" the
669/// `kg_query` handler needs a separate broader check. Centralising the
670/// emptiness check here keeps the hint logic in one place and lets future
671/// changes (e.g. counting across closets) live alongside their consumer.
672/// What: Returns `Ok(true)` iff the palace has zero triples for the queried
673/// subject AND the broader "is_anything_asserted" check is empty. Practical
674/// emptiness: we treat the palace as empty if the queried subject returned
675/// no triples — this is the user's signal that something is wrong, even if
676/// other subjects have data.
677/// Test: covered indirectly through `kg_query_emits_hint_when_palace_empty`.
678pub fn is_kg_empty_for_subject(triples: &[Triple]) -> bool {
679    triples.is_empty()
680}
681
682/// Helper: bubble up the bootstrap result as the MCP JSON envelope expects.
683///
684/// Why: `tools.rs` keeps the dispatcher branches small; converting the
685/// `BootstrapResult` into a `serde_json::Value` here keeps the JSON shape
686/// owned by this module and stable for tests.
687/// What: Serialises the result via serde and wraps any failure in
688/// `anyhow::Error` with context.
689/// Test: round-tripped via the MCP dispatcher test.
690pub fn result_to_json(r: &BootstrapResult) -> Result<serde_json::Value> {
691    serde_json::to_value(r).map_err(|e| anyhow!("serialize BootstrapResult: {e}"))
692}
693
694#[cfg(test)]
695mod tests {
696    use super::*;
697    use std::fs;
698
699    fn write(root: &Path, rel: &str, content: &str) {
700        let p = root.join(rel);
701        if let Some(parent) = p.parent() {
702            fs::create_dir_all(parent).expect("mkdir");
703        }
704        fs::write(&p, content).expect("write");
705    }
706
707    /// Why: Pin the Cargo.toml scanner against a realistic single-crate
708    /// manifest. Covers name/version/edition/rust-version extraction.
709    #[test]
710    fn scan_project_extracts_cargo_facts() {
711        let tmp = tempfile::tempdir().expect("tempdir");
712        write(
713            tmp.path(),
714            "Cargo.toml",
715            r#"
716[package]
717name = "demo-crate"
718version = "1.2.3"
719edition = "2021"
720rust-version = "1.88"
721"#,
722        );
723        let (triples, summary, subject) =
724            scan_project(tmp.path(), "fallback").expect("scan_project");
725        assert_eq!(subject, "demo-crate");
726        assert!(summary.iter().any(|s| s.file == "Cargo.toml"));
727
728        let has = |p: &str, o: &str| {
729            triples
730                .iter()
731                .any(|t| t.subject == "demo-crate" && t.predicate == p && t.object == o)
732        };
733        assert!(has("has_language", "Rust"));
734        assert!(has("has_version", "1.2.3"));
735        assert!(has("has_edition", "2021"));
736        assert!(has("has_rust_version", "1.88"));
737    }
738
739    /// Why: Workspace manifests have no `[package]` section but a
740    /// `[workspace]` table with members; the scanner must still produce
741    /// workspace-member triples and fall back to the directory name for
742    /// the subject.
743    #[test]
744    fn scan_project_extracts_workspace_members() {
745        let tmp = tempfile::tempdir().expect("tempdir");
746        let root = tmp.path().join("trusty-tools");
747        fs::create_dir_all(&root).expect("mkdir");
748        write(
749            &root,
750            "Cargo.toml",
751            r#"
752[workspace]
753members = ["crates/foo", "crates/bar"]
754resolver = "2"
755"#,
756        );
757        let (triples, _summary, subject) = scan_project(&root, "fallback").expect("scan_project");
758        assert_eq!(subject, "trusty-tools");
759        assert!(triples
760            .iter()
761            .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/foo"));
762        assert!(triples
763            .iter()
764            .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/bar"));
765    }
766
767    /// Why: package.json is the JS/TS entry point; pin name/version + a
768    /// `has_dependency` triple per top-level dep key.
769    #[test]
770    fn scan_project_extracts_package_json() {
771        let tmp = tempfile::tempdir().expect("tempdir");
772        write(
773            tmp.path(),
774            "package.json",
775            r#"{
776  "name": "my-app",
777  "version": "0.5.0",
778  "dependencies": {
779    "react": "^18.0.0",
780    "lodash": "^4.0.0"
781  }
782}"#,
783        );
784        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
785        assert_eq!(subject, "my-app");
786        assert!(triples
787            .iter()
788            .any(|t| t.predicate == "has_language" && t.object == "JavaScript"));
789        assert!(triples
790            .iter()
791            .any(|t| t.predicate == "has_version" && t.object == "0.5.0"));
792        assert!(triples
793            .iter()
794            .any(|t| t.predicate == "has_dependency" && t.object == "react"));
795        assert!(triples
796            .iter()
797            .any(|t| t.predicate == "has_dependency" && t.object == "lodash"));
798    }
799
800    /// Why: pyproject.toml uses PEP-621 `[project]` table; confirm
801    /// language/version/requires-python triples land.
802    #[test]
803    fn scan_project_extracts_pyproject() {
804        let tmp = tempfile::tempdir().expect("tempdir");
805        write(
806            tmp.path(),
807            "pyproject.toml",
808            r#"
809[project]
810name = "pydemo"
811version = "2.0.1"
812requires-python = ">=3.10"
813"#,
814        );
815        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
816        assert_eq!(subject, "pydemo");
817        assert!(triples
818            .iter()
819            .any(|t| t.predicate == "has_language" && t.object == "Python"));
820        assert!(triples
821            .iter()
822            .any(|t| t.predicate == "has_version" && t.object == "2.0.1"));
823        assert!(triples
824            .iter()
825            .any(|t| t.predicate == "requires_python" && t.object == ">=3.10"));
826    }
827
828    /// Why: Go modules name themselves in `go.mod`; confirm module-name
829    /// extraction + language tag.
830    #[test]
831    fn scan_project_extracts_go_mod() {
832        let tmp = tempfile::tempdir().expect("tempdir");
833        write(
834            tmp.path(),
835            "go.mod",
836            "module github.com/example/widget\n\ngo 1.22\n",
837        );
838        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
839        assert_eq!(subject, "github.com/example/widget");
840        assert!(triples
841            .iter()
842            .any(|t| t.predicate == "has_language" && t.object == "Go"));
843    }
844
845    /// Why: CLAUDE.md's first H1 becomes the project description; pin the
846    /// extractor against a typical heading + leading frontmatter.
847    #[test]
848    fn scan_project_extracts_claude_md_h1() {
849        let tmp = tempfile::tempdir().expect("tempdir");
850        write(
851            tmp.path(),
852            "Cargo.toml",
853            r#"
854[package]
855name = "demo"
856version = "0.1.0"
857"#,
858        );
859        write(
860            tmp.path(),
861            "CLAUDE.md",
862            "\n\n# Demo Project — orientation guide\n\nSome body text.\n",
863        );
864        let (triples, _summary, _subject) = scan_project(tmp.path(), "fb").expect("scan");
865        assert!(triples.iter().any(|t| t.subject == "demo"
866            && t.predicate == "has_description"
867            && t.object == "Demo Project — orientation guide"));
868    }
869
870    /// Why: .git/config is the canonical source-repo URL; confirm extraction
871    /// across SSH-style URLs.
872    #[test]
873    fn scan_project_extracts_git_origin() {
874        let tmp = tempfile::tempdir().expect("tempdir");
875        write(
876            tmp.path(),
877            "Cargo.toml",
878            r#"
879[package]
880name = "demo"
881version = "0.1.0"
882"#,
883        );
884        write(
885            tmp.path(),
886            ".git/config",
887            "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = git@github.com:example/demo.git\n",
888        );
889        let (triples, _summary, _) = scan_project(tmp.path(), "fb").expect("scan");
890        assert!(
891            triples
892                .iter()
893                .any(|t| t.predicate == "source_repo"
894                    && t.object == "git@github.com:example/demo.git")
895        );
896    }
897
898    /// Why: When no manifest matches, the fallback subject (palace id) must
899    /// be returned so temporal triples still have a stable anchor.
900    #[test]
901    fn scan_project_falls_back_to_palace_id_when_no_manifest() {
902        let tmp = tempfile::tempdir().expect("tempdir");
903        let (triples, summary, subject) = scan_project(tmp.path(), "my-palace").expect("scan");
904        assert_eq!(subject, "my-palace");
905        assert!(triples.is_empty());
906        assert!(summary.is_empty());
907    }
908}