use std::path::Path;
use anyhow::Result;
use super::types::{BootstrapTriple, ScannedFile};
pub fn scan_project(
root: &Path,
fallback_subject: &str,
) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
let mut triples: Vec<BootstrapTriple> = Vec::new();
let mut summary: Vec<ScannedFile> = Vec::new();
let mut project_subject: Option<String> = None;
let before = triples.len();
if let Some(name) = scan_cargo_toml(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "Cargo.toml".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_package_json(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "package.json".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_pyproject_toml(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "pyproject.toml".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
if let Some(name) = scan_go_mod(root, &mut triples) {
project_subject.get_or_insert(name);
}
if triples.len() > before {
summary.push(ScannedFile {
file: "go.mod".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
scan_claude_md(root, project_subject.as_deref(), &mut triples);
if triples.len() > before {
summary.push(ScannedFile {
file: "CLAUDE.md".to_string(),
triples: triples.len() - before,
});
}
let before = triples.len();
scan_git_config(root, project_subject.as_deref(), &mut triples);
if triples.len() > before {
summary.push(ScannedFile {
file: ".git/config".to_string(),
triples: triples.len() - before,
});
}
let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
for t in &mut triples {
if t.subject.is_empty() {
t.subject = subject.clone();
}
}
Ok((triples, summary, subject))
}
fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("Cargo.toml");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: toml::Value = match toml::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
return None;
}
};
let name = parsed
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(|s| s.to_string())
.or_else(|| {
parsed
.get("workspace")
.and_then(|w| w.get("package"))
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.map(|s| s.to_string())
})
.or_else(|| {
root.file_name()
.and_then(|n| n.to_str())
.map(|s| s.to_string())
})?;
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "Rust".to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
if let Some(version) = parsed
.get("package")
.and_then(|p| p.get("version"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: version.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(edition) = parsed
.get("package")
.and_then(|p| p.get("edition"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_edition".to_string(),
object: edition.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(rv) = parsed
.get("package")
.and_then(|p| p.get("rust-version"))
.and_then(|v| v.as_str())
{
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_rust_version".to_string(),
object: rv.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
if let Some(members) = parsed
.get("workspace")
.and_then(|w| w.get("members"))
.and_then(|m| m.as_array())
{
for member in members.iter().take(64) {
if let Some(s) = member.as_str() {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_workspace_member".to_string(),
object: s.to_string(),
provenance: "bootstrap:cargo.toml".to_string(),
});
}
}
}
Some(name)
}
fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("package.json");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: serde_json::Value = match serde_json::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse package.json failed: {e:#}");
return None;
}
};
let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "JavaScript".to_string(),
provenance: "bootstrap:package.json".to_string(),
});
if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: version.to_string(),
provenance: "bootstrap:package.json".to_string(),
});
}
if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
for (k, _) in deps.iter().take(64) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_dependency".to_string(),
object: k.clone(),
provenance: "bootstrap:package.json".to_string(),
});
}
}
Some(name)
}
fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let manifest = root.join("pyproject.toml");
let raw = std::fs::read_to_string(&manifest).ok()?;
let parsed: toml::Value = match toml::from_str(&raw) {
Ok(v) => v,
Err(e) => {
tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
return None;
}
};
let project = parsed.get("project")?;
let name = project.get("name").and_then(|n| n.as_str())?.to_string();
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_language".to_string(),
object: "Python".to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "has_version".to_string(),
object: v.to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
}
if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
out.push(BootstrapTriple {
subject: name.clone(),
predicate: "requires_python".to_string(),
object: rp.to_string(),
provenance: "bootstrap:pyproject.toml".to_string(),
});
}
Some(name)
}
fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
let module = raw
.lines()
.find_map(|line| line.trim().strip_prefix("module "))
.map(|s| s.trim().to_string())?;
if module.is_empty() {
return None;
}
out.push(BootstrapTriple {
subject: module.clone(),
predicate: "has_language".to_string(),
object: "Go".to_string(),
provenance: "bootstrap:go.mod".to_string(),
});
out.push(BootstrapTriple {
subject: module.clone(),
predicate: "has_module_path".to_string(),
object: module.clone(),
provenance: "bootstrap:go.mod".to_string(),
});
Some(module)
}
fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
let Some(subject) = subject else {
return;
};
let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
return;
};
if let Some(h1) = raw.lines().find_map(|line| {
let t = line.trim_start();
t.strip_prefix("# ")
.filter(|rest| !rest.is_empty())
.map(|s| s.trim().to_string())
}) {
out.push(BootstrapTriple {
subject: subject.to_string(),
predicate: "has_description".to_string(),
object: h1,
provenance: "bootstrap:claude.md".to_string(),
});
}
}
fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
let Some(subject) = subject else { return };
let Some(url) = read_origin_url(root) else {
return;
};
out.push(BootstrapTriple {
subject: subject.to_string(),
predicate: "source_repo".to_string(),
object: url,
provenance: "bootstrap:git.config".to_string(),
});
}
fn read_origin_url(root: &Path) -> Option<String> {
if let Ok(output) = std::process::Command::new("git")
.arg("-C")
.arg(root)
.arg("config")
.arg("--get")
.arg("remote.origin.url")
.output()
{
if output.status.success() {
let url = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !url.is_empty() {
return Some(url);
}
}
}
let raw = std::fs::read_to_string(root.join(".git").join("config")).ok()?;
let mut in_origin = false;
for line in raw.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') {
in_origin = trimmed == "[remote \"origin\"]";
continue;
}
if in_origin {
if let Some(rest) = trimmed.strip_prefix("url") {
let rest = rest.trim_start();
if let Some(rest) = rest.strip_prefix('=') {
let url = rest.trim().to_string();
if !url.is_empty() {
return Some(url);
}
}
}
}
}
None
}
#[cfg(test)]
mod scan_tests;