1use std::path::Path;
16
17use anyhow::Result;
18
19use super::types::{BootstrapTriple, ScannedFile};
20
21pub fn scan_project(
34 root: &Path,
35 fallback_subject: &str,
36) -> Result<(Vec<BootstrapTriple>, Vec<ScannedFile>, String)> {
37 let mut triples: Vec<BootstrapTriple> = Vec::new();
38 let mut summary: Vec<ScannedFile> = Vec::new();
39 let mut project_subject: Option<String> = None;
40
41 let before = triples.len();
43 if let Some(name) = scan_cargo_toml(root, &mut triples) {
44 project_subject.get_or_insert(name);
45 }
46 if triples.len() > before {
47 summary.push(ScannedFile {
48 file: "Cargo.toml".to_string(),
49 triples: triples.len() - before,
50 });
51 }
52
53 let before = triples.len();
55 if let Some(name) = scan_package_json(root, &mut triples) {
56 project_subject.get_or_insert(name);
57 }
58 if triples.len() > before {
59 summary.push(ScannedFile {
60 file: "package.json".to_string(),
61 triples: triples.len() - before,
62 });
63 }
64
65 let before = triples.len();
67 if let Some(name) = scan_pyproject_toml(root, &mut triples) {
68 project_subject.get_or_insert(name);
69 }
70 if triples.len() > before {
71 summary.push(ScannedFile {
72 file: "pyproject.toml".to_string(),
73 triples: triples.len() - before,
74 });
75 }
76
77 let before = triples.len();
79 if let Some(name) = scan_go_mod(root, &mut triples) {
80 project_subject.get_or_insert(name);
81 }
82 if triples.len() > before {
83 summary.push(ScannedFile {
84 file: "go.mod".to_string(),
85 triples: triples.len() - before,
86 });
87 }
88
89 let before = triples.len();
93 scan_claude_md(root, project_subject.as_deref(), &mut triples);
94 if triples.len() > before {
95 summary.push(ScannedFile {
96 file: "CLAUDE.md".to_string(),
97 triples: triples.len() - before,
98 });
99 }
100
101 let before = triples.len();
103 scan_git_config(root, project_subject.as_deref(), &mut triples);
104 if triples.len() > before {
105 summary.push(ScannedFile {
106 file: ".git/config".to_string(),
107 triples: triples.len() - before,
108 });
109 }
110
111 let subject = project_subject.unwrap_or_else(|| fallback_subject.to_string());
112
113 for t in &mut triples {
119 if t.subject.is_empty() {
120 t.subject = subject.clone();
121 }
122 }
123
124 Ok((triples, summary, subject))
125}
126
127fn scan_cargo_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
138 let manifest = root.join("Cargo.toml");
139 let raw = std::fs::read_to_string(&manifest).ok()?;
140 let parsed: toml::Value = match toml::from_str(&raw) {
141 Ok(v) => v,
142 Err(e) => {
143 tracing::debug!("bootstrap: parse Cargo.toml failed: {e:#}");
144 return None;
145 }
146 };
147
148 let name = parsed
151 .get("package")
152 .and_then(|p| p.get("name"))
153 .and_then(|n| n.as_str())
154 .map(|s| s.to_string())
155 .or_else(|| {
156 parsed
157 .get("workspace")
158 .and_then(|w| w.get("package"))
159 .and_then(|p| p.get("name"))
160 .and_then(|n| n.as_str())
161 .map(|s| s.to_string())
162 })
163 .or_else(|| {
164 root.file_name()
165 .and_then(|n| n.to_str())
166 .map(|s| s.to_string())
167 })?;
168
169 out.push(BootstrapTriple {
170 subject: name.clone(),
171 predicate: "has_language".to_string(),
172 object: "Rust".to_string(),
173 provenance: "bootstrap:cargo.toml".to_string(),
174 });
175
176 if let Some(version) = parsed
177 .get("package")
178 .and_then(|p| p.get("version"))
179 .and_then(|v| v.as_str())
180 {
181 out.push(BootstrapTriple {
182 subject: name.clone(),
183 predicate: "has_version".to_string(),
184 object: version.to_string(),
185 provenance: "bootstrap:cargo.toml".to_string(),
186 });
187 }
188 if let Some(edition) = parsed
189 .get("package")
190 .and_then(|p| p.get("edition"))
191 .and_then(|v| v.as_str())
192 {
193 out.push(BootstrapTriple {
194 subject: name.clone(),
195 predicate: "has_edition".to_string(),
196 object: edition.to_string(),
197 provenance: "bootstrap:cargo.toml".to_string(),
198 });
199 }
200 if let Some(rv) = parsed
201 .get("package")
202 .and_then(|p| p.get("rust-version"))
203 .and_then(|v| v.as_str())
204 {
205 out.push(BootstrapTriple {
206 subject: name.clone(),
207 predicate: "has_rust_version".to_string(),
208 object: rv.to_string(),
209 provenance: "bootstrap:cargo.toml".to_string(),
210 });
211 }
212
213 if let Some(members) = parsed
216 .get("workspace")
217 .and_then(|w| w.get("members"))
218 .and_then(|m| m.as_array())
219 {
220 for member in members.iter().take(64) {
221 if let Some(s) = member.as_str() {
222 out.push(BootstrapTriple {
223 subject: name.clone(),
224 predicate: "has_workspace_member".to_string(),
225 object: s.to_string(),
226 provenance: "bootstrap:cargo.toml".to_string(),
227 });
228 }
229 }
230 }
231
232 Some(name)
233}
234
235fn scan_package_json(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
243 let manifest = root.join("package.json");
244 let raw = std::fs::read_to_string(&manifest).ok()?;
245 let parsed: serde_json::Value = match serde_json::from_str(&raw) {
246 Ok(v) => v,
247 Err(e) => {
248 tracing::debug!("bootstrap: parse package.json failed: {e:#}");
249 return None;
250 }
251 };
252 let name = parsed.get("name").and_then(|n| n.as_str())?.to_string();
253
254 out.push(BootstrapTriple {
255 subject: name.clone(),
256 predicate: "has_language".to_string(),
257 object: "JavaScript".to_string(),
258 provenance: "bootstrap:package.json".to_string(),
259 });
260
261 if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
262 out.push(BootstrapTriple {
263 subject: name.clone(),
264 predicate: "has_version".to_string(),
265 object: version.to_string(),
266 provenance: "bootstrap:package.json".to_string(),
267 });
268 }
269
270 if let Some(deps) = parsed.get("dependencies").and_then(|d| d.as_object()) {
271 for (k, _) in deps.iter().take(64) {
272 out.push(BootstrapTriple {
273 subject: name.clone(),
274 predicate: "has_dependency".to_string(),
275 object: k.clone(),
276 provenance: "bootstrap:package.json".to_string(),
277 });
278 }
279 }
280
281 Some(name)
282}
283
284fn scan_pyproject_toml(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
293 let manifest = root.join("pyproject.toml");
294 let raw = std::fs::read_to_string(&manifest).ok()?;
295 let parsed: toml::Value = match toml::from_str(&raw) {
296 Ok(v) => v,
297 Err(e) => {
298 tracing::debug!("bootstrap: parse pyproject.toml failed: {e:#}");
299 return None;
300 }
301 };
302 let project = parsed.get("project")?;
303 let name = project.get("name").and_then(|n| n.as_str())?.to_string();
304
305 out.push(BootstrapTriple {
306 subject: name.clone(),
307 predicate: "has_language".to_string(),
308 object: "Python".to_string(),
309 provenance: "bootstrap:pyproject.toml".to_string(),
310 });
311
312 if let Some(v) = project.get("version").and_then(|v| v.as_str()) {
313 out.push(BootstrapTriple {
314 subject: name.clone(),
315 predicate: "has_version".to_string(),
316 object: v.to_string(),
317 provenance: "bootstrap:pyproject.toml".to_string(),
318 });
319 }
320 if let Some(rp) = project.get("requires-python").and_then(|v| v.as_str()) {
321 out.push(BootstrapTriple {
322 subject: name.clone(),
323 predicate: "requires_python".to_string(),
324 object: rp.to_string(),
325 provenance: "bootstrap:pyproject.toml".to_string(),
326 });
327 }
328
329 Some(name)
330}
331
332fn scan_go_mod(root: &Path, out: &mut Vec<BootstrapTriple>) -> Option<String> {
341 let raw = std::fs::read_to_string(root.join("go.mod")).ok()?;
342 let module = raw
343 .lines()
344 .find_map(|line| line.trim().strip_prefix("module "))
345 .map(|s| s.trim().to_string())?;
346 if module.is_empty() {
347 return None;
348 }
349 out.push(BootstrapTriple {
350 subject: module.clone(),
351 predicate: "has_language".to_string(),
352 object: "Go".to_string(),
353 provenance: "bootstrap:go.mod".to_string(),
354 });
355 out.push(BootstrapTriple {
356 subject: module.clone(),
357 predicate: "has_module_path".to_string(),
358 object: module.clone(),
359 provenance: "bootstrap:go.mod".to_string(),
360 });
361 Some(module)
362}
363
364fn scan_claude_md(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
373 let Some(subject) = subject else {
374 return;
376 };
377 let Ok(raw) = std::fs::read_to_string(root.join("CLAUDE.md")) else {
378 return;
379 };
380 if let Some(h1) = raw.lines().find_map(|line| {
381 let t = line.trim_start();
382 t.strip_prefix("# ")
383 .filter(|rest| !rest.is_empty())
384 .map(|s| s.trim().to_string())
385 }) {
386 out.push(BootstrapTriple {
387 subject: subject.to_string(),
388 predicate: "has_description".to_string(),
389 object: h1,
390 provenance: "bootstrap:claude.md".to_string(),
391 });
392 }
393}
394
395fn scan_git_config(root: &Path, subject: Option<&str>, out: &mut Vec<BootstrapTriple>) {
422 let Some(subject) = subject else { return };
423 let Some(url) = read_origin_url(root) else {
424 return;
425 };
426 out.push(BootstrapTriple {
427 subject: subject.to_string(),
428 predicate: "source_repo".to_string(),
429 object: url,
430 provenance: "bootstrap:git.config".to_string(),
431 });
432}
433
434fn read_origin_url(root: &Path) -> Option<String> {
448 if let Ok(output) = std::process::Command::new("git")
451 .arg("-C")
452 .arg(root)
453 .arg("config")
454 .arg("--get")
455 .arg("remote.origin.url")
456 .output()
457 {
458 if output.status.success() {
459 let url = String::from_utf8_lossy(&output.stdout).trim().to_string();
460 if !url.is_empty() {
461 return Some(url);
462 }
463 }
464 }
465
466 let raw = std::fs::read_to_string(root.join(".git").join("config")).ok()?;
471 let mut in_origin = false;
472 for line in raw.lines() {
473 let trimmed = line.trim();
474 if trimmed.starts_with('[') {
475 in_origin = trimmed == "[remote \"origin\"]";
476 continue;
477 }
478 if in_origin {
479 if let Some(rest) = trimmed.strip_prefix("url") {
480 let rest = rest.trim_start();
481 if let Some(rest) = rest.strip_prefix('=') {
482 let url = rest.trim().to_string();
483 if !url.is_empty() {
484 return Some(url);
485 }
486 }
487 }
488 }
489 }
490 None
491}
492
493#[cfg(test)]
494mod scan_tests;