1use anyhow::{anyhow, Context, Result};
33use serde::Serialize;
34use std::path::{Path, PathBuf};
35use trusty_common::memory_core::store::kg::Triple;
36
37use crate::AppState;
38
39#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct BootstrapTriple {
52 pub subject: String,
53 pub predicate: String,
54 pub object: String,
55 pub provenance: String,
56}
57
58#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
66pub struct ScannedFile {
67 pub file: String,
68 pub triples: usize,
69}
70
71#[derive(Debug, Clone, Serialize)]
80pub struct BootstrapResult {
81 pub palace: String,
82 pub project_subject: String,
83 pub triples_asserted: usize,
84 pub scanned_files: Vec<ScannedFile>,
85}
86
87pub async fn bootstrap_palace(
98 state: &AppState,
99 palace_id: &str,
100 project_path: Option<&Path>,
101) -> Result<BootstrapResult> {
102 let handle = state
103 .registry
104 .open_palace(
105 &state.data_root,
106 &trusty_common::memory_core::palace::PalaceId::new(palace_id),
107 )
108 .with_context(|| format!("open palace {palace_id}"))?;
109
110 let scan_root: PathBuf = match project_path {
114 Some(p) => p.to_path_buf(),
115 None => handle
116 .data_dir
117 .clone()
118 .unwrap_or_else(|| state.data_root.join(palace_id)),
119 };
120 let palace_id_owned = palace_id.to_string();
121
122 let (triples, scanned_files, project_subject) =
123 tokio::task::spawn_blocking(move || scan_project(&scan_root, &palace_id_owned))
124 .await
125 .context("join scan_project")??;
126
127 let now = chrono::Utc::now();
129 let mut all = triples;
130 all.push(BootstrapTriple {
131 subject: project_subject.clone(),
132 predicate: "bootstrapped_at".to_string(),
133 object: now.to_rfc3339(),
134 provenance: "bootstrap:temporal".to_string(),
135 });
136 let existing = handle
142 .kg
143 .query_active(&project_subject)
144 .await
145 .context("kg.query_active for created_at check")?;
146 if !existing.iter().any(|t| t.predicate == "created_at") {
147 all.push(BootstrapTriple {
148 subject: project_subject.clone(),
149 predicate: "created_at".to_string(),
150 object: now.to_rfc3339(),
151 provenance: "bootstrap:temporal".to_string(),
152 });
153 }
154
155 let mut asserted = 0usize;
156 for bt in &all {
157 let triple = Triple {
158 subject: bt.subject.clone(),
159 predicate: bt.predicate.clone(),
160 object: bt.object.clone(),
161 valid_from: now,
162 valid_to: None,
163 confidence: 1.0,
164 provenance: Some(bt.provenance.clone()),
165 };
166 handle
167 .kg
168 .assert(triple)
169 .await
170 .with_context(|| format!("kg.assert {} {}", bt.subject, bt.predicate))?;
171 asserted += 1;
172 }
173
174 Ok(BootstrapResult {
175 palace: palace_id.to_string(),
176 project_subject,
177 triples_asserted: asserted,
178 scanned_files,
179 })
180}
181
182pub fn scan_project(
195 root: &Path,
196 fallback_subject: &str,
197) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
198 let mut triples: Vec<BootstrapTriple> = Vec::new();
199 let mut summary: Vec<ScannedFile> = Vec::new();
200 let mut project_subject: Option<String> = None;
201
202 let before = triples.len();
204 if let Some(name) = scan_cargo_toml(root, &mut triples) {
205 project_subject.get_or_insert(name);
206 }
207 if triples.len() > before {
208 summary.push(ScannedFile {
209 file: "Cargo.toml".to_string(),
210 triples: triples.len() - before,
211 });
212 }
213
214 let before = triples.len();
216 if let Some(name) = scan_package_json(root, &mut triples) {
217 project_subject.get_or_insert(name);
218 }
219 if triples.len() > before {
220 summary.push(ScannedFile {
221 file: "package.json".to_string(),
222 triples: triples.len() - before,
223 });
224 }
225
226 let before = triples.len();
228 if let Some(name) = scan_pyproject_toml(root, &mut triples) {
229 project_subject.get_or_insert(name);
230 }
231 if triples.len() > before {
232 summary.push(ScannedFile {
233 file: "pyproject.toml".to_string(),
234 triples: triples.len() - before,
235 });
236 }
237
238 let before = triples.len();
240 if let Some(name) = scan_go_mod(root, &mut triples) {
241 project_subject.get_or_insert(name);
242 }
243 if triples.len() > before {
244 summary.push(ScannedFile {
245 file: "go.mod".to_string(),
246 triples: triples.len() - before,
247 });
248 }
249
250 let before = triples.len();
254 scan_claude_md(root, project_subject.as_deref(), &mut triples);
255 if triples.len() > before {
256 summary.push(ScannedFile {
257 file: "CLAUDE.md".to_string(),
258 triples: triples.len() - before,
259 });
260 }
261
262 let before = triples.len();
264 scan_git_config(root, project_subject.as_deref(), &mut triples);
265 if triples.len() > before {
266 summary.push(ScannedFile {
267 file: ".git/config".to_string(),
268 triples: triples.len() - before,
269 });
270 }
271
272 let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
273
274 for t in &mut triples {
280 if t.subject.is_empty() {
281 t.subject = subject.clone();
282 }
283 }
284
285 Ok((triples, summary, subject))
286}
287
288fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
299 let manifest = root.join("Cargo.toml");
300 let raw = std::fs::read_to_string(&manifest).ok()?;
301 let parsed: toml::Value = match toml::from_str(&raw) {
302 Ok(v) => v,
303 Err(e) => {
304 tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
305 return None;
306 }
307 };
308
309 let name = parsed
312 .get("package")
313 .and_then(|p| p.get("name"))
314 .and_then(|n| n.as_str())
315 .map(|s| s.to_string())
316 .or_else(|| {
317 parsed
318 .get("workspace")
319 .and_then(|w| w.get("package"))
320 .and_then(|p| p.get("name"))
321 .and_then(|n| n.as_str())
322 .map(|s| s.to_string())
323 })
324 .or_else(|| {
325 root.file_name()
326 .and_then(|n| n.to_str())
327 .map(|s| s.to_string())
328 })?;
329
330 out.push(BootstrapTriple {
331 subject: name.clone(),
332 predicate: "has_language".to_string(),
333 object: "Rust".to_string(),
334 provenance: "bootstrap:cargo.toml".to_string(),
335 });
336
337 if let Some(version) = parsed
338 .get("package")
339 .and_then(|p| p.get("version"))
340 .and_then(|v| v.as_str())
341 {
342 out.push(BootstrapTriple {
343 subject: name.clone(),
344 predicate: "has_version".to_string(),
345 object: version.to_string(),
346 provenance: "bootstrap:cargo.toml".to_string(),
347 });
348 }
349 if let Some(edition) = parsed
350 .get("package")
351 .and_then(|p| p.get("edition"))
352 .and_then(|v| v.as_str())
353 {
354 out.push(BootstrapTriple {
355 subject: name.clone(),
356 predicate: "has_edition".to_string(),
357 object: edition.to_string(),
358 provenance: "bootstrap:cargo.toml".to_string(),
359 });
360 }
361 if let Some(rv) = parsed
362 .get("package")
363 .and_then(|p| p.get("rust-version"))
364 .and_then(|v| v.as_str())
365 {
366 out.push(BootstrapTriple {
367 subject: name.clone(),
368 predicate: "has_rust_version".to_string(),
369 object: rv.to_string(),
370 provenance: "bootstrap:cargo.toml".to_string(),
371 });
372 }
373
374 if let Some(members) = parsed
377 .get("workspace")
378 .and_then(|w| w.get("members"))
379 .and_then(|m| m.as_array())
380 {
381 for member in members.iter().take(64) {
382 if let Some(s) = member.as_str() {
383 out.push(BootstrapTriple {
384 subject: name.clone(),
385 predicate: "has_workspace_member".to_string(),
386 object: s.to_string(),
387 provenance: "bootstrap:cargo.toml".to_string(),
388 });
389 }
390 }
391 }
392
393 Some(name)
394}
395
396fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
404 let manifest = root.join("package.json");
405 let raw = std::fs::read_to_string(&manifest).ok()?;
406 let parsed: serde_json::Value = match serde_json::from_str(&raw) {
407 Ok(v) => v,
408 Err(e) => {
409 tracing::debug!("bootstrap: parse package.json failed: {e:#}");
410 return None;
411 }
412 };
413 let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
414
415 out.push(BootstrapTriple {
416 subject: name.clone(),
417 predicate: "has_language".to_string(),
418 object: "JavaScript".to_string(),
419 provenance: "bootstrap:package.json".to_string(),
420 });
421
422 if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
423 out.push(BootstrapTriple {
424 subject: name.clone(),
425 predicate: "has_version".to_string(),
426 object: version.to_string(),
427 provenance: "bootstrap:package.json".to_string(),
428 });
429 }
430
431 if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
432 for (k, _) in deps.iter().take(64) {
433 out.push(BootstrapTriple {
434 subject: name.clone(),
435 predicate: "has_dependency".to_string(),
436 object: k.clone(),
437 provenance: "bootstrap:package.json".to_string(),
438 });
439 }
440 }
441
442 Some(name)
443}
444
445fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
454 let manifest = root.join("pyproject.toml");
455 let raw = std::fs::read_to_string(&manifest).ok()?;
456 let parsed: toml::Value = match toml::from_str(&raw) {
457 Ok(v) => v,
458 Err(e) => {
459 tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
460 return None;
461 }
462 };
463 let project = parsed.get("project")?;
464 let name = project.get("name").and_then(|n| n.as_str())?.to_string();
465
466 out.push(BootstrapTriple {
467 subject: name.clone(),
468 predicate: "has_language".to_string(),
469 object: "Python".to_string(),
470 provenance: "bootstrap:pyproject.toml".to_string(),
471 });
472
473 if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
474 out.push(BootstrapTriple {
475 subject: name.clone(),
476 predicate: "has_version".to_string(),
477 object: v.to_string(),
478 provenance: "bootstrap:pyproject.toml".to_string(),
479 });
480 }
481 if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
482 out.push(BootstrapTriple {
483 subject: name.clone(),
484 predicate: "requires_python".to_string(),
485 object: rp.to_string(),
486 provenance: "bootstrap:pyproject.toml".to_string(),
487 });
488 }
489
490 Some(name)
491}
492
493fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
502 let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
503 let module = raw
504 .lines()
505 .find_map(|line| line.trim().strip_prefix("module "))
506 .map(|s| s.trim().to_string())?;
507 if module.is_empty() {
508 return None;
509 }
510 out.push(BootstrapTriple {
511 subject: module.clone(),
512 predicate: "has_language".to_string(),
513 object: "Go".to_string(),
514 provenance: "bootstrap:go.mod".to_string(),
515 });
516 out.push(BootstrapTriple {
517 subject: module.clone(),
518 predicate: "has_module_path".to_string(),
519 object: module.clone(),
520 provenance: "bootstrap:go.mod".to_string(),
521 });
522 Some(module)
523}
524
525fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
534 let Some(subject) = subject else {
535 return;
537 };
538 let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
539 return;
540 };
541 if let Some(h1) = raw.lines().find_map(|line| {
542 let t = line.trim_start();
543 t.strip_prefix("# ")
544 .filter(|rest| !rest.is_empty())
545 .map(|s| s.trim().to_string())
546 }) {
547 out.push(BootstrapTriple {
548 subject: subject.to_string(),
549 predicate: "has_description".to_string(),
550 object: h1,
551 provenance: "bootstrap:claude.md".to_string(),
552 });
553 }
554}
555
556fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
565 let Some(subject) = subject else { return };
566 let Ok(raw) = std::fs::read_to_string(root.join(".git").join("config")) else {
567 return;
568 };
569 let mut in_origin = false;
570 for line in raw.lines() {
571 let trimmed = line.trim();
572 if trimmed.starts_with('[') {
573 in_origin = trimmed == "[remote \"origin\"]";
574 continue;
575 }
576 if in_origin {
577 if let Some(rest) = trimmed.strip_prefix("url") {
578 let rest = rest.trim_start();
579 if let Some(rest) = rest.strip_prefix('=') {
580 let url = rest.trim().to_string();
581 if !url.is_empty() {
582 out.push(BootstrapTriple {
583 subject: subject.to_string(),
584 predicate: "source_repo".to_string(),
585 object: url,
586 provenance: "bootstrap:git.config".to_string(),
587 });
588 return;
589 }
590 }
591 }
592 }
593 }
594}
595
596pub const KG_EMPTY_HINT: &str =
605 "Knowledge graph is empty. Run kg_bootstrap to seed it from project files, \
606 or use kg_assert to add triples manually.";
607
608pub fn is_kg_empty_for_subject(triples: &[Triple]) -> bool {
621 triples.is_empty()
622}
623
624pub fn result_to_json(r: &BootstrapResult) -> Result<serde_json::Value> {
633 serde_json::to_value(r).map_err(|e| anyhow!("serialize BootstrapResult: {e}"))
634}
635
636#[cfg(test)]
637mod tests {
638 use super::*;
639 use std::fs;
640
641 fn write(root: &Path, rel: &str, content: &str) {
642 let p = root.join(rel);
643 if let Some(parent) = p.parent() {
644 fs::create_dir_all(parent).expect("mkdir");
645 }
646 fs::write(&p, content).expect("write");
647 }
648
649 #[test]
652 fn scan_project_extracts_cargo_facts() {
653 let tmp = tempfile::tempdir().expect("tempdir");
654 write(
655 tmp.path(),
656 "Cargo.toml",
657 r#"
658[package]
659name = "demo-crate"
660version = "1.2.3"
661edition = "2021"
662rust-version = "1.88"
663"#,
664 );
665 let (triples, summary, subject) =
666 scan_project(tmp.path(), "fallback").expect("scan_project");
667 assert_eq!(subject, "demo-crate");
668 assert!(summary.iter().any(|s| s.file == "Cargo.toml"));
669
670 let has = |p: &str, o: &str| {
671 triples
672 .iter()
673 .any(|t| t.subject == "demo-crate" && t.predicate == p && t.object == o)
674 };
675 assert!(has("has_language", "Rust"));
676 assert!(has("has_version", "1.2.3"));
677 assert!(has("has_edition", "2021"));
678 assert!(has("has_rust_version", "1.88"));
679 }
680
681 #[test]
686 fn scan_project_extracts_workspace_members() {
687 let tmp = tempfile::tempdir().expect("tempdir");
688 let root = tmp.path().join("trusty-tools");
689 fs::create_dir_all(&root).expect("mkdir");
690 write(
691 &root,
692 "Cargo.toml",
693 r#"
694[workspace]
695members = ["crates/foo", "crates/bar"]
696resolver = "2"
697"#,
698 );
699 let (triples, _summary, subject) = scan_project(&root, "fallback").expect("scan_project");
700 assert_eq!(subject, "trusty-tools");
701 assert!(triples
702 .iter()
703 .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/foo"));
704 assert!(triples
705 .iter()
706 .any(|t| t.predicate == "has_workspace_member" && t.object == "crates/bar"));
707 }
708
709 #[test]
712 fn scan_project_extracts_package_json() {
713 let tmp = tempfile::tempdir().expect("tempdir");
714 write(
715 tmp.path(),
716 "package.json",
717 r#"{
718 "name": "my-app",
719 "version": "0.5.0",
720 "dependencies": {
721 "react": "^18.0.0",
722 "lodash": "^4.0.0"
723 }
724}"#,
725 );
726 let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
727 assert_eq!(subject, "my-app");
728 assert!(triples
729 .iter()
730 .any(|t| t.predicate == "has_language" && t.object == "JavaScript"));
731 assert!(triples
732 .iter()
733 .any(|t| t.predicate == "has_version" && t.object == "0.5.0"));
734 assert!(triples
735 .iter()
736 .any(|t| t.predicate == "has_dependency" && t.object == "react"));
737 assert!(triples
738 .iter()
739 .any(|t| t.predicate == "has_dependency" && t.object == "lodash"));
740 }
741
742 #[test]
745 fn scan_project_extracts_pyproject() {
746 let tmp = tempfile::tempdir().expect("tempdir");
747 write(
748 tmp.path(),
749 "pyproject.toml",
750 r#"
751[project]
752name = "pydemo"
753version = "2.0.1"
754requires-python = ">=3.10"
755"#,
756 );
757 let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
758 assert_eq!(subject, "pydemo");
759 assert!(triples
760 .iter()
761 .any(|t| t.predicate == "has_language" && t.object == "Python"));
762 assert!(triples
763 .iter()
764 .any(|t| t.predicate == "has_version" && t.object == "2.0.1"));
765 assert!(triples
766 .iter()
767 .any(|t| t.predicate == "requires_python" && t.object == ">=3.10"));
768 }
769
770 #[test]
773 fn scan_project_extracts_go_mod() {
774 let tmp = tempfile::tempdir().expect("tempdir");
775 write(
776 tmp.path(),
777 "go.mod",
778 "module github.com/example/widget\n\ngo 1.22\n",
779 );
780 let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
781 assert_eq!(subject, "github.com/example/widget");
782 assert!(triples
783 .iter()
784 .any(|t| t.predicate == "has_language" && t.object == "Go"));
785 }
786
787 #[test]
790 fn scan_project_extracts_claude_md_h1() {
791 let tmp = tempfile::tempdir().expect("tempdir");
792 write(
793 tmp.path(),
794 "Cargo.toml",
795 r#"
796[package]
797name = "demo"
798version = "0.1.0"
799"#,
800 );
801 write(
802 tmp.path(),
803 "CLAUDE.md",
804 "\n\n# Demo Project — orientation guide\n\nSome body text.\n",
805 );
806 let (triples, _summary, _subject) = scan_project(tmp.path(), "fb").expect("scan");
807 assert!(triples.iter().any(|t| t.subject == "demo"
808 && t.predicate == "has_description"
809 && t.object == "Demo Project — orientation guide"));
810 }
811
812 #[test]
815 fn scan_project_extracts_git_origin() {
816 let tmp = tempfile::tempdir().expect("tempdir");
817 write(
818 tmp.path(),
819 "Cargo.toml",
820 r#"
821[package]
822name = "demo"
823version = "0.1.0"
824"#,
825 );
826 write(
827 tmp.path(),
828 ".git/config",
829 "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = git@github.com:example/demo.git\n",
830 );
831 let (triples, _summary, _) = scan_project(tmp.path(), "fb").expect("scan");
832 assert!(
833 triples
834 .iter()
835 .any(|t| t.predicate == "source_repo"
836 && t.object == "git@github.com:example/demo.git")
837 );
838 }
839
840 #[test]
843 fn scan_project_falls_back_to_palace_id_when_no_manifest() {
844 let tmp = tempfile::tempdir().expect("tempdir");
845 let (triples, summary, subject) = scan_project(tmp.path(), "my-palace").expect("scan");
846 assert_eq!(subject, "my-palace");
847 assert!(triples.is_empty());
848 assert!(summary.is_empty());
849 }
850}