Skip to main content

trusty_memory/
bootstrap.rs

1//! Knowledge-graph bootstrap helpers.
2//!
3//! Why: Issue #60 — after `palace_create`, the knowledge graph (KG) sits at
4//! zero triples and there is no auto-discovery path. Users have no idea
5//! they're supposed to call `kg_assert` manually before `kg_query` returns
6//! anything useful. `kg_bootstrap` closes this gap by scanning well-known
7//! project files (`Cargo.toml`, `package.json`, `pyproject.toml`, `CLAUDE.md`,
8//! `.git/config`, `go.mod`) and seeding structured triples that describe the
9//! project (language, version, source repo, etc.). It also seeds temporal
10//! metadata (`created_at`, `bootstrapped_at`) so even an empty project at
11//! least has *something* in the KG and a timestamp anchor for future queries.
12//! What: A pure-blocking scanner (`scan_project`) returns a flat list of
13//! `(subject, predicate, object, provenance)` tuples; the public async entry
14//! point `bootstrap_palace` resolves a palace handle, runs the scanner, and
15//! asserts each tuple through the existing `KnowledgeGraph::assert` path.
16//! Test: Unit tests pin each scanner against fixture directories;
17//! `kg_bootstrap` is exercised end-to-end from the MCP tool surface in
18//! `tools.rs`.
19//!
20//! Design notes:
21//! - Missing files are NOT errors — every read is best-effort. The scanner
22//!   returns whatever triples it could derive and skips the rest with a
23//!   debug-level log.
24//! - All extracted facts use the user-supplied (or inferred) project name as
25//!   the triple subject. When no project name can be derived from manifests,
26//!   the palace ID is used as a fallback so the temporal triples still anchor
27//!   to a stable subject.
28//! - Provenance strings are stable identifiers (`bootstrap:cargo.toml`,
29//!   `bootstrap:package.json`, …) so operators can audit which scanner
30//!   asserted each triple and retract by source if needed.
31
32use anyhow::{anyhow, Context, Result};
33use serde::Serialize;
34use std::path::{Path, PathBuf};
35use trusty_common::memory_core::store::kg::Triple;
36
37use crate::AppState;
38
39/// A single bootstrap discovery before it becomes a Triple.
40///
41/// Why: Keeping the scanner output as plain tuples (rather than full
42/// `Triple`s) lets the unit tests verify the extraction logic without
43/// constructing timestamps or worrying about confidence values. The async
44/// caller converts these into `Triple`s with the live `chrono::Utc::now()`
45/// timestamp right before assertion.
46/// What: Carries subject, predicate, object, and the provenance tag that
47/// identifies which scanner produced the fact.
48/// Test: Each scanner test asserts the expected `BootstrapTriple`s land in
49/// the result list.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct BootstrapTriple {
52    pub subject: String,
53    pub predicate: String,
54    pub object: String,
55    pub provenance: String,
56}
57
58/// Per-file scan summary returned to the MCP caller.
59///
60/// Why: Operators want to know *which* files contributed to the bootstrap
61/// (and which were absent) without re-running the tool with verbose logging.
62/// What: Filename + count of triples it produced; emitted as JSON in the
63/// MCP response.
64/// Test: `bootstrap_palace_returns_per_file_counts`.
65#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
66pub struct ScannedFile {
67    pub file: String,
68    pub triples: usize,
69}
70
71/// Aggregate result of a bootstrap run.
72///
73/// Why: The MCP `kg_bootstrap` tool returns this verbatim so the model (or a
74/// human operator) can see exactly what was asserted and which files were
75/// scanned.
76/// What: Total triple count + per-file summaries + the resolved project
77/// subject. `Serialize` so it round-trips into the MCP JSON envelope.
78/// Test: `bootstrap_palace_seeds_temporal_metadata_when_no_files`.
79#[derive(Debug, Clone, Serialize)]
80pub struct BootstrapResult {
81    pub palace: String,
82    pub project_subject: String,
83    pub triples_asserted: usize,
84    pub scanned_files: Vec<ScannedFile>,
85}
86
87/// Run the bootstrap scan against a palace.
88///
89/// Why: Single async entry point that the MCP dispatcher (and the
90/// auto-bootstrap hook in `palace_create`) calls. Encapsulates path
91/// resolution, scanning, triple construction, and KG assertion.
92/// What: Resolves `project_path` (caller-supplied), runs the blocking
93/// scanner, seeds temporal metadata triples, and asserts every discovery
94/// through `handle.kg.assert(...)`. Returns a summary of what was written.
95/// Test: `bootstrap_palace_seeds_temporal_metadata_when_no_files`,
96/// `bootstrap_palace_scans_cargo_toml`.
97pub async fn bootstrap_palace(
98    state: &AppState,
99    palace_id: &str,
100    project_path: Option<&Path>,
101) -> Result<BootstrapResult> {
102    let handle = state
103        .registry
104        .open_palace(
105            &state.data_root,
106            &trusty_common::memory_core::palace::PalaceId::new(palace_id),
107        )
108        .with_context(|| format!("open palace {palace_id}"))?;
109
110    // Choose the scan root. When the caller did not supply a project path,
111    // we still scan the palace's own data dir so `CLAUDE.md` or other
112    // operator-placed files inside the palace are picked up.
113    let scan_root: PathBuf = match project_path {
114        Some(p) => p.to_path_buf(),
115        None => handle
116            .data_dir
117            .clone()
118            .unwrap_or_else(|| state.data_root.join(palace_id)),
119    };
120    let palace_id_owned = palace_id.to_string();
121
122    let (triples, scanned_files, project_subject) =
123        tokio::task::spawn_blocking(move || scan_project(&scan_root, &palace_id_owned))
124            .await
125            .context("join scan_project")??;
126
127    // Seed temporal metadata (always present, even for empty projects).
128    let now = chrono::Utc::now();
129    let mut all = triples;
130    all.push(BootstrapTriple {
131        subject: project_subject.clone(),
132        predicate: "bootstrapped_at".to_string(),
133        object: now.to_rfc3339(),
134        provenance: "bootstrap:temporal".to_string(),
135    });
136    // `created_at` is only inserted when the palace doesn't yet have one;
137    // re-running bootstrap must not lie about when the palace first came
138    // into being. The KG's temporal layer would close the prior interval
139    // and the new interval would carry a misleading `valid_from`. Check
140    // `query_active` before writing.
141    let existing = handle
142        .kg
143        .query_active(&project_subject)
144        .await
145        .context("kg.query_active for created_at check")?;
146    if !existing.iter().any(|t| t.predicate == "created_at") {
147        all.push(BootstrapTriple {
148            subject: project_subject.clone(),
149            predicate: "created_at".to_string(),
150            object: now.to_rfc3339(),
151            provenance: "bootstrap:temporal".to_string(),
152        });
153    }
154
155    let mut asserted = 0usize;
156    for bt in &all {
157        let triple = Triple {
158            subject: bt.subject.clone(),
159            predicate: bt.predicate.clone(),
160            object: bt.object.clone(),
161            valid_from: now,
162            valid_to: None,
163            confidence: 1.0,
164            provenance: Some(bt.provenance.clone()),
165        };
166        handle
167            .kg
168            .assert(triple)
169            .await
170            .with_context(|| format!("kg.assert {} {}", bt.subject, bt.predicate))?;
171        asserted += 1;
172    }
173
174    Ok(BootstrapResult {
175        palace: palace_id.to_string(),
176        project_subject,
177        triples_asserted: asserted,
178        scanned_files,
179    })
180}
181
182/// Blocking scanner: walk well-known files under `root` and extract triples.
183///
184/// Why: Pulled out as a sync function so the file I/O + TOML/JSON parsing
185/// run on a blocking thread (via `spawn_blocking`) and the algorithm itself
186/// is trivially unit-testable against fixture directories.
187/// What: Returns `(triples, per_file_summary, project_subject)`. The
188/// project subject is derived from the first manifest that yields a name;
189/// falls back to `fallback_subject` (typically the palace id) when none
190/// match.
191/// Test: `scan_project_extracts_cargo_facts`,
192/// `scan_project_extracts_package_json`,
193/// `scan_project_falls_back_to_palace_id_when_no_manifest`.
194pub fn scan_project(
195    root: &Path,
196    fallback_subject: &str,
197) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
198    let mut triples: Vec<BootstrapTriple> = Vec::new();
199    let mut summary: Vec<ScannedFile> = Vec::new();
200    let mut project_subject: Option<String> = None;
201
202    // 1. Cargo.toml
203    let before = triples.len();
204    if let Some(name) = scan_cargo_toml(root, &mut triples) {
205        project_subject.get_or_insert(name);
206    }
207    if triples.len() > before {
208        summary.push(ScannedFile {
209            file: "Cargo.toml".to_string(),
210            triples: triples.len() - before,
211        });
212    }
213
214    // 2. package.json
215    let before = triples.len();
216    if let Some(name) = scan_package_json(root, &mut triples) {
217        project_subject.get_or_insert(name);
218    }
219    if triples.len() > before {
220        summary.push(ScannedFile {
221            file: "package.json".to_string(),
222            triples: triples.len() - before,
223        });
224    }
225
226    // 3. pyproject.toml
227    let before = triples.len();
228    if let Some(name) = scan_pyproject_toml(root, &mut triples) {
229        project_subject.get_or_insert(name);
230    }
231    if triples.len() > before {
232        summary.push(ScannedFile {
233            file: "pyproject.toml".to_string(),
234            triples: triples.len() - before,
235        });
236    }
237
238    // 4. go.mod
239    let before = triples.len();
240    if let Some(name) = scan_go_mod(root, &mut triples) {
241        project_subject.get_or_insert(name);
242    }
243    if triples.len() > before {
244        summary.push(ScannedFile {
245            file: "go.mod".to_string(),
246            triples: triples.len() - before,
247        });
248    }
249
250    // 5. CLAUDE.md — first H1 heading as descriptive name. Does not set
251    //    project_subject (the manifest sources are stronger signals) but
252    //    contributes a `has_description` triple when the subject is known.
253    let before = triples.len();
254    scan_claude_md(root, project_subject.as_deref(), &mut triples);
255    if triples.len() > before {
256        summary.push(ScannedFile {
257            file: "CLAUDE.md".to_string(),
258            triples: triples.len() - before,
259        });
260    }
261
262    // 6. .git/config — source repo URL.
263    let before = triples.len();
264    scan_git_config(root, project_subject.as_deref(), &mut triples);
265    if triples.len() > before {
266        summary.push(ScannedFile {
267            file: ".git/config".to_string(),
268            triples: triples.len() - before,
269        });
270    }
271
272    let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
273
274    // Rewrite any triples that used a placeholder subject (only the
275    // CLAUDE.md / .git/config scanners are subject-dependent; if no manifest
276    // matched, those scanners ran with subject=None and produced nothing, so
277    // this is currently a no-op — but keeping the loop makes future scanner
278    // additions safe).
279    for t in &mut triples {
280        if t.subject.is_empty() {
281            t.subject = subject.clone();
282        }
283    }
284
285    Ok((triples, summary, subject))
286}
287
288/// Scan `Cargo.toml`. Returns the package/workspace name if extracted.
289///
290/// Why: Rust projects are the primary trusty-tools target; we want
291/// `has_language=Rust`, `has_version`, `has_edition`, `has_rust_version`,
292/// and `workspace_member` triples auto-populated so `kg_query` against the
293/// project name returns useful context immediately.
294/// What: Parses the TOML; emits `(name, has_language, "Rust")` always when
295/// the manifest exists, plus version/edition/rust-version/workspace member
296/// triples when present.
297/// Test: `scan_project_extracts_cargo_facts`.
298fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
299    let manifest = root.join("Cargo.toml");
300    let raw = std::fs::read_to_string(&manifest).ok()?;
301    let parsed: toml::Value = match toml::from_str(&raw) {
302        Ok(v) => v,
303        Err(e) => {
304            tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
305            return None;
306        }
307    };
308
309    // Workspace root manifests may have no [package] section. Use the
310    // workspace.package.name if present; otherwise derive from the dir name.
311    let name = parsed
312        .get("package")
313        .and_then(|p| p.get("name"))
314        .and_then(|n| n.as_str())
315        .map(|s| s.to_string())
316        .or_else(|| {
317            parsed
318                .get("workspace")
319                .and_then(|w| w.get("package"))
320                .and_then(|p| p.get("name"))
321                .and_then(|n| n.as_str())
322                .map(|s| s.to_string())
323        })
324        .or_else(|| {
325            root.file_name()
326                .and_then(|n| n.to_str())
327                .map(|s| s.to_string())
328        })?;
329
330    out.push(BootstrapTriple {
331        subject: name.clone(),
332        predicate: "has_language".to_string(),
333        object: "Rust".to_string(),
334        provenance: "bootstrap:cargo.toml".to_string(),
335    });
336
337    if let Some(version) = parsed
338        .get("package")
339        .and_then(|p| p.get("version"))
340        .and_then(|v| v.as_str())
341    {
342        out.push(BootstrapTriple {
343            subject: name.clone(),
344            predicate: "has_version".to_string(),
345            object: version.to_string(),
346            provenance: "bootstrap:cargo.toml".to_string(),
347        });
348    }
349    if let Some(edition) = parsed
350        .get("package")
351        .and_then(|p| p.get("edition"))
352        .and_then(|v| v.as_str())
353    {
354        out.push(BootstrapTriple {
355            subject: name.clone(),
356            predicate: "has_edition".to_string(),
357            object: edition.to_string(),
358            provenance: "bootstrap:cargo.toml".to_string(),
359        });
360    }
361    if let Some(rv) = parsed
362        .get("package")
363        .and_then(|p| p.get("rust-version"))
364        .and_then(|v| v.as_str())
365    {
366        out.push(BootstrapTriple {
367            subject: name.clone(),
368            predicate: "has_rust_version".to_string(),
369            object: rv.to_string(),
370            provenance: "bootstrap:cargo.toml".to_string(),
371        });
372    }
373
374    // Workspace members (capped at 64 to avoid flooding the KG on huge
375    // monorepos; bootstrap is a coarse seeder, not an exhaustive index).
376    if let Some(members) = parsed
377        .get("workspace")
378        .and_then(|w| w.get("members"))
379        .and_then(|m| m.as_array())
380    {
381        for member in members.iter().take(64) {
382            if let Some(s) = member.as_str() {
383                out.push(BootstrapTriple {
384                    subject: name.clone(),
385                    predicate: "has_workspace_member".to_string(),
386                    object: s.to_string(),
387                    provenance: "bootstrap:cargo.toml".to_string(),
388                });
389            }
390        }
391    }
392
393    Some(name)
394}
395
396/// Scan `package.json`.
397///
398/// Why: Node/TypeScript projects are the second most common target. We want
399/// `has_language=JavaScript`, `has_version`, and `has_dependency` triples.
400/// What: Parses the JSON; emits language/version triples + one
401/// `has_dependency` per top-level key in the `dependencies` object (cap 64).
402/// Test: `scan_project_extracts_package_json`.
403fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
404    let manifest = root.join("package.json");
405    let raw = std::fs::read_to_string(&manifest).ok()?;
406    let parsed: serde_json::Value = match serde_json::from_str(&raw) {
407        Ok(v) => v,
408        Err(e) => {
409            tracing::debug!("bootstrap: parse package.json failed: {e:#}");
410            return None;
411        }
412    };
413    let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
414
415    out.push(BootstrapTriple {
416        subject: name.clone(),
417        predicate: "has_language".to_string(),
418        object: "JavaScript".to_string(),
419        provenance: "bootstrap:package.json".to_string(),
420    });
421
422    if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
423        out.push(BootstrapTriple {
424            subject: name.clone(),
425            predicate: "has_version".to_string(),
426            object: version.to_string(),
427            provenance: "bootstrap:package.json".to_string(),
428        });
429    }
430
431    if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
432        for (k, _) in deps.iter().take(64) {
433            out.push(BootstrapTriple {
434                subject: name.clone(),
435                predicate: "has_dependency".to_string(),
436                object: k.clone(),
437                provenance: "bootstrap:package.json".to_string(),
438            });
439        }
440    }
441
442    Some(name)
443}
444
445/// Scan `pyproject.toml`.
446///
447/// Why: Python projects use PEP-621 `[project]` metadata; surfacing the
448/// language tag + version + `requires-python` makes Python repos legible to
449/// the KG without manual assertions.
450/// What: Parses the TOML; emits language/version/requires-python triples
451/// when the `[project]` table is present.
452/// Test: `scan_project_extracts_pyproject`.
453fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
454    let manifest = root.join("pyproject.toml");
455    let raw = std::fs::read_to_string(&manifest).ok()?;
456    let parsed: toml::Value = match toml::from_str(&raw) {
457        Ok(v) => v,
458        Err(e) => {
459            tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
460            return None;
461        }
462    };
463    let project = parsed.get("project")?;
464    let name = project.get("name").and_then(|n| n.as_str())?.to_string();
465
466    out.push(BootstrapTriple {
467        subject: name.clone(),
468        predicate: "has_language".to_string(),
469        object: "Python".to_string(),
470        provenance: "bootstrap:pyproject.toml".to_string(),
471    });
472
473    if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
474        out.push(BootstrapTriple {
475            subject: name.clone(),
476            predicate: "has_version".to_string(),
477            object: v.to_string(),
478            provenance: "bootstrap:pyproject.toml".to_string(),
479        });
480    }
481    if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
482        out.push(BootstrapTriple {
483            subject: name.clone(),
484            predicate: "requires_python".to_string(),
485            object: rp.to_string(),
486            provenance: "bootstrap:pyproject.toml".to_string(),
487        });
488    }
489
490    Some(name)
491}
492
493/// Scan `go.mod` for the module name.
494///
495/// Why: Go projects encode their canonical name on the `module` line of
496/// `go.mod`; surfacing it as the project subject lets Go repos opt into the
497/// same KG shape as Rust/Node/Python.
498/// What: Reads `go.mod`, extracts the `module <name>` directive, and emits
499/// `(name, has_language, "Go")` plus `(name, has_module_path, <name>)`.
500/// Test: `scan_project_extracts_go_mod`.
501fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
502    let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
503    let module = raw
504        .lines()
505        .find_map(|line| line.trim().strip_prefix("module "))
506        .map(|s| s.trim().to_string())?;
507    if module.is_empty() {
508        return None;
509    }
510    out.push(BootstrapTriple {
511        subject: module.clone(),
512        predicate: "has_language".to_string(),
513        object: "Go".to_string(),
514        provenance: "bootstrap:go.mod".to_string(),
515    });
516    out.push(BootstrapTriple {
517        subject: module.clone(),
518        predicate: "has_module_path".to_string(),
519        object: module.clone(),
520        provenance: "bootstrap:go.mod".to_string(),
521    });
522    Some(module)
523}
524
525/// Scan `CLAUDE.md` for the first H1 heading; attach as project description.
526///
527/// Why: Trusty-* projects use `CLAUDE.md` as the canonical orientation
528/// document; the first H1 line is invariably the project name/tagline and
529/// makes a good `has_description` triple.
530/// What: Walks lines, finds the first `# Title` heading, strips the prefix,
531/// and pushes a `has_description` triple under `subject` (when known).
532/// Test: `scan_project_extracts_claude_md_h1`.
533fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
534    let Some(subject) = subject else {
535        // No project subject yet — skip; we don't want orphan triples.
536        return;
537    };
538    let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
539        return;
540    };
541    if let Some(h1) = raw.lines().find_map(|line| {
542        let t = line.trim_start();
543        t.strip_prefix("# ")
544            .filter(|rest| !rest.is_empty())
545            .map(|s| s.trim().to_string())
546    }) {
547        out.push(BootstrapTriple {
548            subject: subject.to_string(),
549            predicate: "has_description".to_string(),
550            object: h1,
551            provenance: "bootstrap:claude.md".to_string(),
552        });
553    }
554}
555
556/// Scan `.git/config` for the `remote.origin.url`.
557///
558/// Why: Tying a project to its source repo URL is the single highest-signal
559/// fact for downstream tooling (link to issues, find upstream, etc.).
560/// What: Reuses the same INI-ish scan as `discovery::extract_origin_url` but
561/// kept inline here so `bootstrap` is self-contained. Emits a
562/// `(subject, source_repo, url)` triple.
563/// Test: `scan_project_extracts_git_origin`.
564fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
565    let Some(subject) = subject else { return };
566    let Ok(raw) = std::fs::read_to_string(root.join(".git").join("config")) else {
567        return;
568    };
569    let mut in_origin = false;
570    for line in raw.lines() {
571        let trimmed = line.trim();
572        if trimmed.starts_with('[') {
573            in_origin = trimmed == "[remote \"origin\"]";
574            continue;
575        }
576        if in_origin {
577            if let Some(rest) = trimmed.strip_prefix("url") {
578                let rest = rest.trim_start();
579                if let Some(rest) = rest.strip_prefix('=') {
580                    let url = rest.trim().to_string();
581                    if !url.is_empty() {
582                        out.push(BootstrapTriple {
583                            subject: subject.to_string(),
584                            predicate: "source_repo".to_string(),
585                            object: url,
586                            provenance: "bootstrap:git.config".to_string(),
587                        });
588                        return;
589                    }
590                }
591            }
592        }
593    }
594}
595
596/// Hint string returned by `kg_query` when the palace KG is empty.
597///
598/// Why: Issue #60 — when a user calls `kg_query` against a brand-new palace
599/// they get an empty triples array with no indication that `kg_bootstrap` /
600/// `kg_assert` even exist. A short hint embedded in the response solves
601/// this with one line of code at the call site.
602/// What: Static string, kept in this module so tests can pin it.
603/// Test: `kg_query_emits_hint_when_palace_empty` in `tools.rs`.
604pub const KG_EMPTY_HINT: &str =
605    "Knowledge graph is empty. Run kg_bootstrap to seed it from project files, \
606     or use kg_assert to add triples manually.";
607
608/// Convenience: count active triples across an entire palace.
609///
610/// Why: `kg_query` is per-subject, so to determine "is the KG empty?" the
611/// `kg_query` handler needs a separate broader check. Centralising the
612/// emptiness check here keeps the hint logic in one place and lets future
613/// changes (e.g. counting across closets) live alongside their consumer.
614/// What: Returns `Ok(true)` iff the palace has zero triples for the queried
615/// subject AND the broader "is_anything_asserted" check is empty. Practical
616/// emptiness: we treat the palace as empty if the queried subject returned
617/// no triples — this is the user's signal that something is wrong, even if
618/// other subjects have data.
619/// Test: covered indirectly through `kg_query_emits_hint_when_palace_empty`.
620pub fn is_kg_empty_for_subject(triples: &[Triple]) -> bool {
621    triples.is_empty()
622}
623
624/// Helper: bubble up the bootstrap result as the MCP JSON envelope expects.
625///
626/// Why: `tools.rs` keeps the dispatcher branches small; converting the
627/// `BootstrapResult` into a `serde_json::Value` here keeps the JSON shape
628/// owned by this module and stable for tests.
629/// What: Serialises the result via serde and wraps any failure in
630/// `anyhow::Error` with context.
631/// Test: round-tripped via the MCP dispatcher test.
632pub fn result_to_json(r: &BootstrapResult) -> Result<serde_json::Value> {
633    serde_json::to_value(r).map_err(|e| anyhow!("serialize BootstrapResult: {e}"))
634}
635
636#[cfg(test)]
637mod tests {
638    use super::*;
639    use std::fs;
640
641    fn write(root: &Path, rel: &str, content: &str) {
642        let p = root.join(rel);
643        if let Some(parent) = p.parent() {
644            fs::create_dir_all(parent).expect("mkdir");
645        }
646        fs::write(&p, content).expect("write");
647    }
648
649    /// Why: Pin the Cargo.toml scanner against a realistic single-crate
650    /// manifest. Covers name/version/edition/rust-version extraction.
651    #[test]
652    fn scan_project_extracts_cargo_facts() {
653        let tmp = tempfile::tempdir().expect("tempdir");
654        write(
655            tmp.path(),
656            "Cargo.toml",
657            r#"
658[package]
659name = "demo-crate"
660version = "1.2.3"
661edition = "2021"
662rust-version = "1.88"
663"#,
664        );
665        let (triples, summary, subject) =
666            scan_project(tmp.path(), "fallback").expect("scan_project");
667        assert_eq!(subject, "demo-crate");
668        assert!(summary.iter().any(|s| s.file == "Cargo.toml"));
669
670        let has = |p: &str, o: &str| {
671            triples
672                .iter()
673                .any(|t| t.subject == "demo-crate" && t.predicate == p && t.object == o)
674        };
675        assert!(has("has_language", "Rust"));
676        assert!(has("has_version", "1.2.3"));
677        assert!(has("has_edition", "2021"));
678        assert!(has("has_rust_version", "1.88"));
679    }
680
681    /// Why: Workspace manifests have no `[package]` section but a
682    /// `[workspace]` table with members; the scanner must still produce
683    /// workspace-member triples and fall back to the directory name for
684    /// the subject.
685    #[test]
686    fn scan_project_extracts_workspace_members() {
687        let tmp = tempfile::tempdir().expect("tempdir");
688        let root = tmp.path().join("trusty-tools");
689        fs::create_dir_all(&root).expect("mkdir");
690        write(
691            &root,
692            "Cargo.toml",
693            r#"
694[workspace]
695members = ["crates/foo", "crates/bar"]
696resolver = "2"
697"#,
698        );
699        let (triples, _summary, subject) = scan_project(&root, "fallback").expect("scan_project");
700        assert_eq!(subject, "trusty-tools");
701        assert!(triples
702            .iter()
703            .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/foo"));
704        assert!(triples
705            .iter()
706            .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/bar"));
707    }
708
709    /// Why: package.json is the JS/TS entry point; pin name/version + a
710    /// `has_dependency` triple per top-level dep key.
711    #[test]
712    fn scan_project_extracts_package_json() {
713        let tmp = tempfile::tempdir().expect("tempdir");
714        write(
715            tmp.path(),
716            "package.json",
717            r#"{
718  "name": "my-app",
719  "version": "0.5.0",
720  "dependencies": {
721    "react": "^18.0.0",
722    "lodash": "^4.0.0"
723  }
724}"#,
725        );
726        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
727        assert_eq!(subject, "my-app");
728        assert!(triples
729            .iter()
730            .any(|t| t.predicate == "has_language" && t.object == "JavaScript"));
731        assert!(triples
732            .iter()
733            .any(|t| t.predicate == "has_version" && t.object == "0.5.0"));
734        assert!(triples
735            .iter()
736            .any(|t| t.predicate == "has_dependency" && t.object == "react"));
737        assert!(triples
738            .iter()
739            .any(|t| t.predicate == "has_dependency" && t.object == "lodash"));
740    }
741
742    /// Why: pyproject.toml uses PEP-621 `[project]` table; confirm
743    /// language/version/requires-python triples land.
744    #[test]
745    fn scan_project_extracts_pyproject() {
746        let tmp = tempfile::tempdir().expect("tempdir");
747        write(
748            tmp.path(),
749            "pyproject.toml",
750            r#"
751[project]
752name = "pydemo"
753version = "2.0.1"
754requires-python = ">=3.10"
755"#,
756        );
757        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
758        assert_eq!(subject, "pydemo");
759        assert!(triples
760            .iter()
761            .any(|t| t.predicate == "has_language" && t.object == "Python"));
762        assert!(triples
763            .iter()
764            .any(|t| t.predicate == "has_version" && t.object == "2.0.1"));
765        assert!(triples
766            .iter()
767            .any(|t| t.predicate == "requires_python" && t.object == ">=3.10"));
768    }
769
770    /// Why: Go modules name themselves in `go.mod`; confirm module-name
771    /// extraction + language tag.
772    #[test]
773    fn scan_project_extracts_go_mod() {
774        let tmp = tempfile::tempdir().expect("tempdir");
775        write(
776            tmp.path(),
777            "go.mod",
778            "module github.com/example/widget\n\ngo 1.22\n",
779        );
780        let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
781        assert_eq!(subject, "github.com/example/widget");
782        assert!(triples
783            .iter()
784            .any(|t| t.predicate == "has_language" && t.object == "Go"));
785    }
786
787    /// Why: CLAUDE.md's first H1 becomes the project description; pin the
788    /// extractor against a typical heading + leading frontmatter.
789    #[test]
790    fn scan_project_extracts_claude_md_h1() {
791        let tmp = tempfile::tempdir().expect("tempdir");
792        write(
793            tmp.path(),
794            "Cargo.toml",
795            r#"
796[package]
797name = "demo"
798version = "0.1.0"
799"#,
800        );
801        write(
802            tmp.path(),
803            "CLAUDE.md",
804            "\n\n# Demo Project — orientation guide\n\nSome body text.\n",
805        );
806        let (triples, _summary, _subject) = scan_project(tmp.path(), "fb").expect("scan");
807        assert!(triples.iter().any(|t| t.subject == "demo"
808            && t.predicate == "has_description"
809            && t.object == "Demo Project — orientation guide"));
810    }
811
812    /// Why: .git/config is the canonical source-repo URL; confirm extraction
813    /// across SSH-style URLs.
814    #[test]
815    fn scan_project_extracts_git_origin() {
816        let tmp = tempfile::tempdir().expect("tempdir");
817        write(
818            tmp.path(),
819            "Cargo.toml",
820            r#"
821[package]
822name = "demo"
823version = "0.1.0"
824"#,
825        );
826        write(
827            tmp.path(),
828            ".git/config",
829            "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = git@github.com:example/demo.git\n",
830        );
831        let (triples, _summary, _) = scan_project(tmp.path(), "fb").expect("scan");
832        assert!(
833            triples
834                .iter()
835                .any(|t| t.predicate == "source_repo"
836                    && t.object == "git@github.com:example/demo.git")
837        );
838    }
839
840    /// Why: When no manifest matches, the fallback subject (palace id) must
841    /// be returned so temporal triples still have a stable anchor.
842    #[test]
843    fn scan_project_falls_back_to_palace_id_when_no_manifest() {
844        let tmp = tempfile::tempdir().expect("tempdir");
845        let (triples, summary, subject) = scan_project(tmp.path(), "my-palace").expect("scan");
846        assert_eq!(subject, "my-palace");
847        assert!(triples.is_empty());
848        assert!(summary.is_empty());
849    }
850}