Skip to main content

lean_ctx/core/
knowledge_bootstrap.rs

1use std::path::Path;
2
3use crate::core::graph_index::ProjectIndex;
4use crate::core::knowledge::ProjectKnowledge;
5use crate::core::memory_policy::MemoryPolicy;
6
7const BOOTSTRAP_SESSION_ID: &str = "auto-bootstrap";
8const BOOTSTRAP_CONFIDENCE: f32 = 0.95;
9
10/// Seed a minimal set of *real*, deterministic facts so the dashboard Knowledge Graph
11/// is never empty on a new project.
12///
13/// This does not use placeholders — it only derives values from the filesystem and/or index.
14pub fn bootstrap_if_empty(
15    knowledge: &mut ProjectKnowledge,
16    project_root: &str,
17    index: Option<&ProjectIndex>,
18    policy: &MemoryPolicy,
19) -> bool {
20    if !knowledge.facts.is_empty() {
21        return false;
22    }
23
24    let mut changed = false;
25
26    // Always safe + real: makes the graph non-empty even for marker-less folders.
27    changed |= remember_fact(knowledge, "workflow", "project_root", project_root, policy);
28
29    if let Some(identity) = crate::core::project_hash::project_identity(project_root) {
30        changed |= remember_fact(
31            knowledge,
32            "architecture",
33            "project_identity",
34            &identity,
35            policy,
36        );
37        if let Some(url) = identity.strip_prefix("git:") {
38            changed |= remember_fact(knowledge, "deployment", "git_remote", url, policy);
39        }
40    }
41
42    let markers = detect_build_markers(project_root);
43    if !markers.is_empty() {
44        changed |= remember_fact(
45            knowledge,
46            "architecture",
47            "build_markers",
48            &markers.join(", "),
49            policy,
50        );
51    }
52
53    if let Some(idx) = index {
54        let file_count = idx.files.len();
55        let symbol_count = idx.symbols.len();
56        let edge_count = idx.edges.len();
57        changed |= remember_fact(
58            knowledge,
59            "workflow",
60            "index_stats",
61            &format!("files={file_count}, symbols={symbol_count}, edges={edge_count}"),
62            policy,
63        );
64
65        if !idx.last_scan.trim().is_empty() {
66            changed |= remember_fact(
67                knowledge,
68                "workflow",
69                "index_last_scan",
70                &idx.last_scan,
71                policy,
72            );
73        }
74
75        let (langs, total_tokens) = summarize_languages_and_tokens(idx);
76        if !langs.is_empty() {
77            changed |= remember_fact(knowledge, "architecture", "languages_top", &langs, policy);
78        }
79        if total_tokens > 0 {
80            changed |= remember_fact(
81                knowledge,
82                "performance",
83                "tokens_indexed",
84                &total_tokens.to_string(),
85                policy,
86            );
87        }
88    }
89
90    changed
91}
92
93fn remember_fact(
94    knowledge: &mut ProjectKnowledge,
95    category: &str,
96    key: &str,
97    value: &str,
98    policy: &MemoryPolicy,
99) -> bool {
100    if value.trim().is_empty() {
101        return false;
102    }
103    knowledge.remember(
104        category,
105        key,
106        value,
107        BOOTSTRAP_SESSION_ID,
108        BOOTSTRAP_CONFIDENCE,
109        policy,
110    );
111    true
112}
113
114fn detect_build_markers(project_root: &str) -> Vec<&'static str> {
115    let root = Path::new(project_root);
116    let mut out: Vec<&'static str> = Vec::new();
117
118    if root.join(".git").exists() {
119        out.push("git");
120    }
121    if root.join("Cargo.toml").exists() {
122        out.push("cargo");
123    }
124    if root.join("package.json").exists() {
125        out.push("npm");
126    }
127    if root.join("pyproject.toml").exists() {
128        out.push("python");
129    }
130    if root.join("go.mod").exists() {
131        out.push("go");
132    }
133    if root.join("pom.xml").exists() {
134        out.push("maven");
135    }
136    if root.join("build.gradle").exists() || root.join("build.gradle.kts").exists() {
137        out.push("gradle");
138    }
139    if root.join("CMakeLists.txt").exists() {
140        out.push("cmake");
141    }
142
143    if let Ok(entries) = std::fs::read_dir(root) {
144        if entries
145            .flatten()
146            .any(|e| e.path().extension().is_some_and(|ext| ext == "sln"))
147        {
148            out.push("dotnet");
149        }
150    }
151
152    out
153}
154
155fn summarize_languages_and_tokens(index: &ProjectIndex) -> (String, u64) {
156    let mut total_tokens: u64 = 0;
157    let mut counts: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
158
159    for f in index.files.values() {
160        total_tokens = total_tokens.saturating_add(f.token_count as u64);
161        let lang = if f.language.trim().is_empty() {
162            "unknown"
163        } else {
164            f.language.as_str()
165        };
166        *counts.entry(lang).or_insert(0) += 1;
167    }
168
169    let mut entries: Vec<(&str, usize)> = counts.into_iter().collect();
170    entries.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(b.0)));
171
172    let langs = entries
173        .into_iter()
174        .take(6)
175        .map(|(lang, count)| format!("{lang}:{count}"))
176        .collect::<Vec<_>>()
177        .join(", ");
178
179    (langs, total_tokens)
180}