lean_ctx/core/
knowledge_bootstrap.rs1use std::path::Path;
2
3use crate::core::graph_index::ProjectIndex;
4use crate::core::knowledge::ProjectKnowledge;
5use crate::core::memory_policy::MemoryPolicy;
6
7const BOOTSTRAP_SESSION_ID: &str = "auto-bootstrap";
8const BOOTSTRAP_CONFIDENCE: f32 = 0.95;
9
10pub fn bootstrap_if_empty(
15 knowledge: &mut ProjectKnowledge,
16 project_root: &str,
17 index: Option<&ProjectIndex>,
18 policy: &MemoryPolicy,
19) -> bool {
20 if !knowledge.facts.is_empty() {
21 return false;
22 }
23
24 let mut changed = false;
25
26 changed |= remember_fact(knowledge, "workflow", "project_root", project_root, policy);
28
29 if let Some(identity) = crate::core::project_hash::project_identity(project_root) {
30 changed |= remember_fact(
31 knowledge,
32 "architecture",
33 "project_identity",
34 &identity,
35 policy,
36 );
37 if let Some(url) = identity.strip_prefix("git:") {
38 changed |= remember_fact(knowledge, "deployment", "git_remote", url, policy);
39 }
40 }
41
42 let markers = detect_build_markers(project_root);
43 if !markers.is_empty() {
44 changed |= remember_fact(
45 knowledge,
46 "architecture",
47 "build_markers",
48 &markers.join(", "),
49 policy,
50 );
51 }
52
53 if let Some(idx) = index {
54 let file_count = idx.files.len();
55 let symbol_count = idx.symbols.len();
56 let edge_count = idx.edges.len();
57 changed |= remember_fact(
58 knowledge,
59 "workflow",
60 "index_stats",
61 &format!("files={file_count}, symbols={symbol_count}, edges={edge_count}"),
62 policy,
63 );
64
65 if !idx.last_scan.trim().is_empty() {
66 changed |= remember_fact(
67 knowledge,
68 "workflow",
69 "index_last_scan",
70 &idx.last_scan,
71 policy,
72 );
73 }
74
75 let (langs, total_tokens) = summarize_languages_and_tokens(idx);
76 if !langs.is_empty() {
77 changed |= remember_fact(knowledge, "architecture", "languages_top", &langs, policy);
78 }
79 if total_tokens > 0 {
80 changed |= remember_fact(
81 knowledge,
82 "performance",
83 "tokens_indexed",
84 &total_tokens.to_string(),
85 policy,
86 );
87 }
88 }
89
90 changed
91}
92
93fn remember_fact(
94 knowledge: &mut ProjectKnowledge,
95 category: &str,
96 key: &str,
97 value: &str,
98 policy: &MemoryPolicy,
99) -> bool {
100 if value.trim().is_empty() {
101 return false;
102 }
103 knowledge.remember(
104 category,
105 key,
106 value,
107 BOOTSTRAP_SESSION_ID,
108 BOOTSTRAP_CONFIDENCE,
109 policy,
110 );
111 true
112}
113
114fn detect_build_markers(project_root: &str) -> Vec<&'static str> {
115 let root = Path::new(project_root);
116 let mut out: Vec<&'static str> = Vec::new();
117
118 if root.join(".git").exists() {
119 out.push("git");
120 }
121 if root.join("Cargo.toml").exists() {
122 out.push("cargo");
123 }
124 if root.join("package.json").exists() {
125 out.push("npm");
126 }
127 if root.join("pyproject.toml").exists() {
128 out.push("python");
129 }
130 if root.join("go.mod").exists() {
131 out.push("go");
132 }
133 if root.join("pom.xml").exists() {
134 out.push("maven");
135 }
136 if root.join("build.gradle").exists() || root.join("build.gradle.kts").exists() {
137 out.push("gradle");
138 }
139 if root.join("CMakeLists.txt").exists() {
140 out.push("cmake");
141 }
142
143 if let Ok(entries) = std::fs::read_dir(root) {
144 if entries
145 .flatten()
146 .any(|e| e.path().extension().is_some_and(|ext| ext == "sln"))
147 {
148 out.push("dotnet");
149 }
150 }
151
152 out
153}
154
155fn summarize_languages_and_tokens(index: &ProjectIndex) -> (String, u64) {
156 let mut total_tokens: u64 = 0;
157 let mut counts: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
158
159 for f in index.files.values() {
160 total_tokens = total_tokens.saturating_add(f.token_count as u64);
161 let lang = if f.language.trim().is_empty() {
162 "unknown"
163 } else {
164 f.language.as_str()
165 };
166 *counts.entry(lang).or_insert(0) += 1;
167 }
168
169 let mut entries: Vec<(&str, usize)> = counts.into_iter().collect();
170 entries.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(b.0)));
171
172 let langs = entries
173 .into_iter()
174 .take(6)
175 .map(|(lang, count)| format!("{lang}:{count}"))
176 .collect::<Vec<_>>()
177 .join(", ");
178
179 (langs, total_tokens)
180}