1pub mod config;
2pub mod db;
3pub mod extraction;
4pub mod graph;
5pub mod mcp;
6pub mod types;
7
8use anyhow::{anyhow, Context, Result};
9use config::{load_config, save_config, CodeGraphConfig};
10use db::Database;
11use extraction::{detect_language, extract_from_source, should_include_file};
12use graph::{GraphTraverser, Subgraph};
13use sha2::{Digest, Sha256};
14use std::collections::BTreeSet;
15use std::fs;
16use std::path::{Path, PathBuf};
17use types::{FileRecord, GraphStats, IndexResult, Node, NodeEdge, SearchOptions, SearchResult};
18
19pub const CODEGRAPH_DIR: &str = ".codegraph";
20pub const DATABASE_FILE: &str = "codegraph.db";
21
22pub struct CodeGraph {
23 root: PathBuf,
24 config: CodeGraphConfig,
25 db: Database,
26}
27
28impl CodeGraph {
29 pub fn init(root: impl AsRef<Path>) -> Result<Self> {
30 let root = root
31 .as_ref()
32 .canonicalize()
33 .unwrap_or_else(|_| root.as_ref().to_path_buf());
34 let dir = root.join(CODEGRAPH_DIR);
35 if dir.exists() {
36 return Err(anyhow!(
37 "CodeGraph already initialized in {}",
38 root.display()
39 ));
40 }
41 fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
42 let config = CodeGraphConfig::default_for_root(".");
43 save_config(&root, &config)?;
44 let db = Database::initialize(dir.join(DATABASE_FILE))?;
45 Ok(Self { root, config, db })
46 }
47
48 pub fn open(root: impl AsRef<Path>) -> Result<Self> {
49 let root = find_nearest_codegraph_root(root.as_ref())
50 .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
51 let config = load_config(&root)?;
52 let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
53 Ok(Self { root, config, db })
54 }
55
56 pub fn root(&self) -> &Path {
57 &self.root
58 }
59
60 pub fn index_all(&mut self) -> Result<IndexResult> {
61 let start = std::time::Instant::now();
62 self.db.clear_all()?;
63 let files = self.scan_files()?;
64 let mut result = IndexResult::default();
65
66 for path in files {
67 let full = self.root.join(&path);
68 let content = match fs::read_to_string(&full) {
69 Ok(content) => content,
70 Err(err) => {
71 result.files_errored += 1;
72 result.errors.push(format!("{}: {}", path.display(), err));
73 continue;
74 }
75 };
76 let lang = detect_language(&path, &content);
77 if lang.is_unknown() {
78 result.files_skipped += 1;
79 continue;
80 }
81 let extraction = extract_from_source(&path, &content, lang);
82 let hash = content_hash(&content);
83 let metadata = fs::metadata(&full)?;
84 self.db.insert_file(&FileRecord {
85 path: path.to_string_lossy().replace('\\', "/"),
86 content_hash: hash,
87 language: lang,
88 size: metadata.len(),
89 modified_at: metadata
90 .modified()
91 .ok()
92 .and_then(system_time_ms)
93 .unwrap_or_default(),
94 indexed_at: now_ms(),
95 node_count: extraction.nodes.len() as i64,
96 })?;
97 self.db.insert_nodes(&extraction.nodes)?;
98 self.db.insert_edges(&extraction.edges)?;
99 self.db
100 .insert_unresolved_refs(&extraction.unresolved_references)?;
101 result.files_indexed += 1;
102 result.nodes_created += extraction.nodes.len() as i64;
103 result.edges_created += extraction.edges.len() as i64;
104 }
105
106 self.db.resolve_references_by_name()?;
107 result.edges_created = self.db.edge_count()?;
108 result.success = result.files_errored == 0;
109 result.duration_ms = start.elapsed().as_millis() as i64;
110 Ok(result)
111 }
112
113 pub fn sync(&mut self) -> Result<IndexResult> {
114 self.index_all()
115 }
116
117 pub fn stats(&self) -> Result<GraphStats> {
118 self.db.stats()
119 }
120
121 pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
122 self.db.search_nodes(query, options)
123 }
124
125 pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
126 self.db.get_node(id)
127 }
128
129 pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
130 GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
131 }
132
133 pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
134 GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
135 }
136
137 pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
138 GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
139 }
140
141 pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
142 self.db.get_file_dependents(file_path)
143 }
144
145 pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
146 self.db.get_all_files()
147 }
148
149 pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
150 let results = self.find_context_nodes(task, max_nodes)?;
151 let mut out = format!("## Context: {task}\n\n");
152 if results.is_empty() {
153 out.push_str("No matching symbols or files were found.\n\n");
154 out.push_str("Try a concrete symbol name, file name, package/module name, or a shorter code term. ");
155 out.push_str("For candidate discovery, run `cgz query --json <term>`.\n");
156 return Ok(out);
157 }
158 for result in results {
159 let n = result.node;
160 out.push_str(&format!(
161 "- `{}` `{}` at `{}:{}`",
162 n.kind, n.name, n.file_path, n.start_line
163 ));
164 if let Some(sig) = n.signature.as_deref() {
165 out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
166 }
167 out.push('\n');
168 if include_code {
169 if let Ok(code) = self.read_node_source(&n) {
170 out.push_str("\n```");
171 out.push_str(n.language.as_str());
172 out.push('\n');
173 out.push_str(&code);
174 if !code.ends_with('\n') {
175 out.push('\n');
176 }
177 out.push_str("```\n\n");
178 }
179 }
180 }
181 Ok(out)
182 }
183
184 fn find_context_nodes(&self, task: &str, max_nodes: i64) -> Result<Vec<SearchResult>> {
185 let limit = max_nodes.max(1);
186 let mut out = Vec::new();
187 let mut seen = BTreeSet::new();
188
189 for term in context_search_terms(task) {
190 if out.len() >= limit as usize {
191 break;
192 }
193 let remaining = limit - out.len() as i64;
194 let results = self.search_nodes(
195 &term,
196 SearchOptions {
197 limit: remaining,
198 ..Default::default()
199 },
200 )?;
201 for result in results {
202 if seen.insert(result.node.id.clone()) {
203 out.push(result);
204 if out.len() >= limit as usize {
205 break;
206 }
207 }
208 }
209 }
210
211 Ok(out)
212 }
213
214 pub fn read_node_source(&self, node: &Node) -> Result<String> {
215 let full = self.root.join(&node.file_path);
216 let text =
217 fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
218 let lines: Vec<&str> = text.lines().collect();
219 let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
220 let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
221 Ok(lines[start..end].join("\n"))
222 }
223
224 pub fn close(self) {}
225
226 fn scan_files(&self) -> Result<Vec<PathBuf>> {
227 let mut out = Vec::new();
228 let walker = ignore::WalkBuilder::new(&self.root)
229 .hidden(false)
230 .git_ignore(true)
231 .git_global(true)
232 .git_exclude(true)
233 .build();
234 for entry in walker {
235 let entry = entry?;
236 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
237 continue;
238 }
239 let rel = entry
240 .path()
241 .strip_prefix(&self.root)
242 .unwrap_or(entry.path())
243 .to_path_buf();
244 if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
245 continue;
246 }
247 if should_include_file(&rel, &self.config) {
248 out.push(rel);
249 }
250 }
251 out.sort();
252 Ok(out)
253 }
254}
255
256fn context_search_terms(task: &str) -> Vec<String> {
257 let mut terms = Vec::new();
258 let mut seen = BTreeSet::new();
259 push_context_term(task.trim(), &mut terms, &mut seen);
260
261 for raw in task.split(|c: char| {
262 !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/' || c == '.' || c == ':')
263 }) {
264 let term = raw.trim_matches(|c: char| {
265 !(c.is_ascii_alphanumeric() || c == '_' || c == '/' || c == '.' || c == ':')
266 });
267 if is_useful_context_term(term) {
268 push_context_term(term, &mut terms, &mut seen);
269 }
270 }
271
272 terms
273}
274
275fn push_context_term(term: &str, terms: &mut Vec<String>, seen: &mut BTreeSet<String>) {
276 if term.is_empty() {
277 return;
278 }
279 let key = term.to_ascii_lowercase();
280 if seen.insert(key) {
281 terms.push(term.to_string());
282 }
283}
284
285fn is_useful_context_term(term: &str) -> bool {
286 if term.len() < 3 {
287 return false;
288 }
289 if CONTEXT_STOP_WORDS.contains(&term.to_ascii_lowercase().as_str()) {
290 return false;
291 }
292 term.contains('_')
293 || term.contains('/')
294 || term.contains('.')
295 || term.contains(':')
296 || term.chars().any(|c| c.is_ascii_digit())
297 || term.len() >= 5
298}
299
300const CONTEXT_STOP_WORDS: &[&str] = &[
301 "about",
302 "after",
303 "before",
304 "build",
305 "change",
306 "check",
307 "code",
308 "context",
309 "debug",
310 "error",
311 "feature",
312 "files",
313 "fix",
314 "from",
315 "handle",
316 "implement",
317 "invalid",
318 "issue",
319 "order",
320 "query",
321 "return",
322 "should",
323 "task",
324 "test",
325 "tests",
326 "update",
327 "valid",
328 "validation",
329 "when",
330 "where",
331 "with",
332];
333
334pub fn is_initialized(root: impl AsRef<Path>) -> bool {
335 root.as_ref()
336 .join(CODEGRAPH_DIR)
337 .join(DATABASE_FILE)
338 .exists()
339}
340
341pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
342 let mut cur = start
343 .as_ref()
344 .canonicalize()
345 .unwrap_or_else(|_| start.as_ref().to_path_buf());
346 if cur.is_file() {
347 cur.pop();
348 }
349 loop {
350 if is_initialized(&cur) {
351 return Some(cur);
352 }
353 if !cur.pop() {
354 return None;
355 }
356 }
357}
358
359fn content_hash(content: &str) -> String {
360 let mut h = Sha256::new();
361 h.update(content.as_bytes());
362 format!("{:x}", h.finalize())
363}
364
365fn now_ms() -> i64 {
366 system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
367}
368
369fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
370 t.duration_since(std::time::UNIX_EPOCH)
371 .ok()
372 .map(|d| d.as_millis() as i64)
373}