use anyhow::{anyhow, Context, Result};
use serde::Serialize;
use std::path::{Path, PathBuf};
use trusty_common::memory_core::store::kg::Triple;
use crate::AppState;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BootstrapTriple {
pub subject: String,
pub predicate: String,
pub object: String,
pub provenance: String,
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct ScannedFile {
pub file: String,
pub triples: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct BootstrapResult {
pub palace: String,
pub project_subject: String,
pub triples_asserted: usize,
pub scanned_files: Vec<ScannedFile>,
}
pub async fn bootstrap_palace(
state: &AppState,
palace_id: &str,
project_path: Option<&Path>,
) -> Result<BootstrapResult> {
let handle = state
.registry
.open_palace(
&state.data_root,
&trusty_common::memory_core::palace::PalaceId::new(palace_id),
)
.with_context(|| format!("open palace {palace_id}"))?;
let scan_root: PathBuf = match project_path {
Some(p) => p.to_path_buf(),
None => handle
.data_dir
.clone()
.unwrap_or_else(|| state.data_root.join(palace_id)),
};
let palace_id_owned = palace_id.to_string();
let (triples, scanned_files, project_subject) =
tokio::task::spawn_blocking(move || scan_project(&scan_root, &palace_id_owned))
.await
.context("join scan_project")??;
let now = chrono::Utc::now();
let mut all = triples;
all.push(BootstrapTriple {
subject: project_subject.clone(),
predicate: "bootstrapped_at".to_string(),
object: now.to_rfc3339(),
provenance: "bootstrap:temporal".to_string(),
});
let existing = handle
.kg
.query_active(&project_subject)
.await
.context("kg.query_active for created_at check")?;
if !existing.iter().any(|t| t.predicate == "created_at") {
all.push(BootstrapTriple {
subject: project_subject.clone(),
predicate: "created_at".to_string(),
object: now.to_rfc3339(),
provenance: "bootstrap:temporal".to_string(),
});
}
let mut asserted = 0usize;
for bt in &all {
let triple = Triple {
subject: bt.subject.clone(),
predicate: bt.predicate.clone(),
object: bt.object.clone(),
valid_from: now,
valid_to: None,
confidence: 1.0,
provenance: Some(bt.provenance.clone()),
};
handle
.kg
.assert(triple)
.await
.with_context(|| format!("kg.assert {} {}", bt.subject, bt.predicate))?;
asserted += 1;
}
Ok(BootstrapResult {
palace: palace_id.to_string(),
project_subject,
triples_asserted: asserted,
scanned_files,
})
}
pub fn scan_project(
root: &Path,
fallback_subject: &str,
) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
let mut triples: Vec<BootstrapTriple> = Vec::new();
let mut summary: Vec<ScannedFile> = Vec::new();
let mut project_subject: Option<String> = None;
let before = triples.len();
if let Some(name) = scan_cargo_toml(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "Cargo.toml".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_package_json(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "package.json".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_pyproject_toml(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "pyproject.toml".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_go_mod(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "go.mod".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
scan_claude_md(root, project_subject.as_deref(), &mut triples);
if triples.len() > before {
summary.push(ScannedFile {
file: "CLAUDE.md".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
scan_git_config(root, project_subject.as_deref(), &mut triples);
if triples.len() > before {
summary.push(ScannedFile {
file: ".git/config".to_string(),
triples: triples.len() - before,
});
}
let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
for t in &mut triples {
if t.subject.is_empty() {
t.subject = subject.clone();
}
}
Ok((triples, summary, subject))
}
fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("Cargo.toml");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: toml::Value = match toml::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
return None;
}
};
let name = parsed
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(|s| s.to_string())
.or_else(|| {
parsed
.get("workspace")
.and_then(|w| w.get("package"))
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(|s| s.to_string())
})
.or_else(|| {
root.file_name()
.and_then(|n| n.to_str())
.map(|s| s.to_string())
})?;
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "Rust".to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
if let Some(version) = parsed
.get("package")
.and_then(|p| p.get("version"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: version.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(edition) = parsed
.get("package")
.and_then(|p| p.get("edition"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_edition".to_string(),
object: edition.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(rv) = parsed
.get("package")
.and_then(|p| p.get("rust-version"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_rust_version".to_string(),
object: rv.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(members) = parsed
.get("workspace")
.and_then(|w| w.get("members"))
.and_then(|m| m.as_array())
{
for member in members.iter().take(64) {
if let Some(s) = member.as_str() {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_workspace_member".to_string(),
object: s.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
}
}
Some(name)
}
fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("package.json");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: serde_json::Value = match serde_json::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse package.json failed: {e:#}");
return None;
}
};
let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "JavaScript".to_string(),
provenance: "bootstrap:package.json".to_string(),
});
if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: version.to_string(),
provenance: "bootstrap:package.json".to_string(),
});
}
if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
for (k, _) in deps.iter().take(64) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_dependency".to_string(),
object: k.clone(),
provenance: "bootstrap:package.json".to_string(),
});
}
}
Some(name)
}
fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("pyproject.toml");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: toml::Value = match toml::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
return None;
}
};
let project = parsed.get("project")?;
let name = project.get("name").and_then(|n| n.as_str())?.to_string();
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "Python".to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: v.to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
}
if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "requires_python".to_string(),
object: rp.to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
}
Some(name)
}
fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
let module = raw
.lines()
.find_map(|line| line.trim().strip_prefix("module "))
.map(|s| s.trim().to_string())?;
if module.is_empty() {
return None;
}
out.push(BootstrapTriple {
subject: module.clone(),
predicate: "has_language".to_string(),
object: "Go".to_string(),
provenance: "bootstrap:go.mod".to_string(),
});
out.push(BootstrapTriple {
subject: module.clone(),
predicate: "has_module_path".to_string(),
object: module.clone(),
provenance: "bootstrap:go.mod".to_string(),
});
Some(module)
}
fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
let Some(subject) = subject else {
return;
};
let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
return;
};
if let Some(h1) = raw.lines().find_map(|line| {
let t = line.trim_start();
t.strip_prefix("# ")
.filter(|rest| !rest.is_empty())
.map(|s| s.trim().to_string())
}) {
out.push(BootstrapTriple {
subject: subject.to_string(),
predicate: "has_description".to_string(),
object: h1,
provenance: "bootstrap:claude.md".to_string(),
});
}
}
fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
let Some(subject) = subject else { return };
let Ok(raw) = std::fs::read_to_string(root.join(".git").join("config")) else {
return;
};
let mut in_origin = false;
for line in raw.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') {
in_origin = trimmed == "[remote \"origin\"]";
continue;
}
if in_origin {
if let Some(rest) = trimmed.strip_prefix("url") {
let rest = rest.trim_start();
if let Some(rest) = rest.strip_prefix('=') {
let url = rest.trim().to_string();
if !url.is_empty() {
out.push(BootstrapTriple {
subject: subject.to_string(),
predicate: "source_repo".to_string(),
object: url,
provenance: "bootstrap:git.config".to_string(),
});
return;
}
}
}
}
}
}
pub const KG_EMPTY_HINT: &str =
"Knowledge graph is empty. Run kg_bootstrap to seed it from project files, \
or use kg_assert to add triples manually.";
pub fn is_kg_empty_for_subject(triples: &[Triple]) -> bool {
triples.is_empty()
}
pub fn result_to_json(r: &BootstrapResult) -> Result<serde_json::Value> {
serde_json::to_value(r).map_err(|e| anyhow!("serialize BootstrapResult: {e}"))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
fn write(root: &Path, rel: &str, content: &str) {
let p = root.join(rel);
if let Some(parent) = p.parent() {
fs::create_dir_all(parent).expect("mkdir");
}
fs::write(&p, content).expect("write");
}
#[test]
fn scan_project_extracts_cargo_facts() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"Cargo.toml",
r#"
[package]
name = "demo-crate"
version = "1.2.3"
edition = "2021"
rust-version = "1.88"
"#,
);
let (triples, summary, subject) =
scan_project(tmp.path(), "fallback").expect("scan_project");
assert_eq!(subject, "demo-crate");
assert!(summary.iter().any(|s| s.file == "Cargo.toml"));
let has = |p: &str, o: &str| {
triples
.iter()
.any(|t| t.subject == "demo-crate" && t.predicate == p && t.object == o)
};
assert!(has("has_language", "Rust"));
assert!(has("has_version", "1.2.3"));
assert!(has("has_edition", "2021"));
assert!(has("has_rust_version", "1.88"));
}
#[test]
fn scan_project_extracts_workspace_members() {
let tmp = tempfile::tempdir().expect("tempdir");
let root = tmp.path().join("trusty-tools");
fs::create_dir_all(&root).expect("mkdir");
write(
&root,
"Cargo.toml",
r#"
[workspace]
members = ["crates/foo", "crates/bar"]
resolver = "2"
"#,
);
let (triples, _summary, subject) = scan_project(&root, "fallback").expect("scan_project");
assert_eq!(subject, "trusty-tools");
assert!(triples
.iter()
.any(|t| t.predicate == "has_workspace_member" && t.object == "crates/foo"));
assert!(triples
.iter()
.any(|t| t.predicate == "has_workspace_member" && t.object == "crates/bar"));
}
#[test]
fn scan_project_extracts_package_json() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"package.json",
r#"{
"name": "my-app",
"version": "0.5.0",
"dependencies": {
"react": "^18.0.0",
"lodash": "^4.0.0"
}
}"#,
);
let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
assert_eq!(subject, "my-app");
assert!(triples
.iter()
.any(|t| t.predicate == "has_language" && t.object == "JavaScript"));
assert!(triples
.iter()
.any(|t| t.predicate == "has_version" && t.object == "0.5.0"));
assert!(triples
.iter()
.any(|t| t.predicate == "has_dependency" && t.object == "react"));
assert!(triples
.iter()
.any(|t| t.predicate == "has_dependency" && t.object == "lodash"));
}
#[test]
fn scan_project_extracts_pyproject() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"pyproject.toml",
r#"
[project]
name = "pydemo"
version = "2.0.1"
requires-python = ">=3.10"
"#,
);
let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
assert_eq!(subject, "pydemo");
assert!(triples
.iter()
.any(|t| t.predicate == "has_language" && t.object == "Python"));
assert!(triples
.iter()
.any(|t| t.predicate == "has_version" && t.object == "2.0.1"));
assert!(triples
.iter()
.any(|t| t.predicate == "requires_python" && t.object == ">=3.10"));
}
#[test]
fn scan_project_extracts_go_mod() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"go.mod",
"module github.com/example/widget\n\ngo 1.22\n",
);
let (triples, _summary, subject) = scan_project(tmp.path(), "fb").expect("scan");
assert_eq!(subject, "github.com/example/widget");
assert!(triples
.iter()
.any(|t| t.predicate == "has_language" && t.object == "Go"));
}
#[test]
fn scan_project_extracts_claude_md_h1() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"Cargo.toml",
r#"
[package]
name = "demo"
version = "0.1.0"
"#,
);
write(
tmp.path(),
"CLAUDE.md",
"\n\n# Demo Project — orientation guide\n\nSome body text.\n",
);
let (triples, _summary, _subject) = scan_project(tmp.path(), "fb").expect("scan");
assert!(triples.iter().any(|t| t.subject == "demo"
&& t.predicate == "has_description"
&& t.object == "Demo Project — orientation guide"));
}
#[test]
fn scan_project_extracts_git_origin() {
let tmp = tempfile::tempdir().expect("tempdir");
write(
tmp.path(),
"Cargo.toml",
r#"
[package]
name = "demo"
version = "0.1.0"
"#,
);
write(
tmp.path(),
".git/config",
"[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = git@github.com:example/demo.git\n",
);
let (triples, _summary, _) = scan_project(tmp.path(), "fb").expect("scan");
assert!(
triples
.iter()
.any(|t| t.predicate == "source_repo"
&& t.object == "git@github.com:example/demo.git")
);
}
#[test]
fn scan_project_falls_back_to_palace_id_when_no_manifest() {
let tmp = tempfile::tempdir().expect("tempdir");
let (triples, summary, subject) = scan_project(tmp.path(), "my-palace").expect("scan");
assert_eq!(subject, "my-palace");
assert!(triples.is_empty());
assert!(summary.is_empty());
}
}