Skip to main content

codegraph/
lib.rs

1pub mod config;
2pub mod db;
3pub mod extraction;
4pub mod graph;
5pub mod mcp;
6pub mod types;
7
8use anyhow::{anyhow, Context, Result};
9use config::{load_config, save_config, CodeGraphConfig};
10use db::Database;
11use extraction::{detect_language, extract_from_source, should_include_file};
12use graph::{GraphTraverser, Subgraph};
13use sha2::{Digest, Sha256};
14use std::collections::BTreeSet;
15use std::fs;
16use std::path::{Path, PathBuf};
17use types::{FileRecord, GraphStats, IndexResult, Node, NodeEdge, SearchOptions, SearchResult};
18
19pub const CODEGRAPH_DIR: &str = ".codegraph";
20pub const DATABASE_FILE: &str = "codegraph.db";
21
22pub struct CodeGraph {
23    root: PathBuf,
24    config: CodeGraphConfig,
25    db: Database,
26}
27
28impl CodeGraph {
29    pub fn init(root: impl AsRef<Path>) -> Result<Self> {
30        let root = root
31            .as_ref()
32            .canonicalize()
33            .unwrap_or_else(|_| root.as_ref().to_path_buf());
34        let dir = root.join(CODEGRAPH_DIR);
35        if dir.exists() {
36            return Err(anyhow!(
37                "CodeGraph already initialized in {}",
38                root.display()
39            ));
40        }
41        fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
42        let config = CodeGraphConfig::default_for_root(".");
43        save_config(&root, &config)?;
44        let db = Database::initialize(dir.join(DATABASE_FILE))?;
45        Ok(Self { root, config, db })
46    }
47
48    pub fn open(root: impl AsRef<Path>) -> Result<Self> {
49        let root = find_nearest_codegraph_root(root.as_ref())
50            .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
51        let config = load_config(&root)?;
52        let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
53        Ok(Self { root, config, db })
54    }
55
56    pub fn root(&self) -> &Path {
57        &self.root
58    }
59
60    pub fn index_all(&mut self) -> Result<IndexResult> {
61        let start = std::time::Instant::now();
62        self.db.clear_all()?;
63        let files = self.scan_files()?;
64        let mut result = IndexResult::default();
65
66        for path in files {
67            let full = self.root.join(&path);
68            let content = match fs::read_to_string(&full) {
69                Ok(content) => content,
70                Err(err) => {
71                    result.files_errored += 1;
72                    result.errors.push(format!("{}: {}", path.display(), err));
73                    continue;
74                }
75            };
76            let lang = detect_language(&path, &content);
77            if lang.is_unknown() {
78                result.files_skipped += 1;
79                continue;
80            }
81            let extraction = extract_from_source(&path, &content, lang);
82            let hash = content_hash(&content);
83            let metadata = fs::metadata(&full)?;
84            self.db.insert_file(&FileRecord {
85                path: path.to_string_lossy().replace('\\', "/"),
86                content_hash: hash,
87                language: lang,
88                size: metadata.len(),
89                modified_at: metadata
90                    .modified()
91                    .ok()
92                    .and_then(system_time_ms)
93                    .unwrap_or_default(),
94                indexed_at: now_ms(),
95                node_count: extraction.nodes.len() as i64,
96            })?;
97            self.db.insert_nodes(&extraction.nodes)?;
98            self.db.insert_edges(&extraction.edges)?;
99            self.db
100                .insert_unresolved_refs(&extraction.unresolved_references)?;
101            result.files_indexed += 1;
102            result.nodes_created += extraction.nodes.len() as i64;
103            result.edges_created += extraction.edges.len() as i64;
104        }
105
106        self.db.resolve_references_by_name()?;
107        result.edges_created = self.db.edge_count()?;
108        result.success = result.files_errored == 0;
109        result.duration_ms = start.elapsed().as_millis() as i64;
110        Ok(result)
111    }
112
113    pub fn sync(&mut self) -> Result<IndexResult> {
114        self.index_all()
115    }
116
117    pub fn stats(&self) -> Result<GraphStats> {
118        self.db.stats()
119    }
120
121    pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
122        self.db.search_nodes(query, options)
123    }
124
125    pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
126        self.db.get_node(id)
127    }
128
129    pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
130        GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
131    }
132
133    pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
134        GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
135    }
136
137    pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
138        GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
139    }
140
141    pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
142        self.db.get_file_dependents(file_path)
143    }
144
145    pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
146        self.db.get_all_files()
147    }
148
149    pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
150        let results = self.find_context_nodes(task, max_nodes)?;
151        let mut out = format!("## Context: {task}\n\n");
152        if results.is_empty() {
153            out.push_str("No matching symbols or files were found.\n\n");
154            out.push_str("Try a concrete symbol name, file name, package/module name, or a shorter code term. ");
155            out.push_str("For candidate discovery, run `cgz query --json <term>`.\n");
156            return Ok(out);
157        }
158        for result in results {
159            let n = result.node;
160            out.push_str(&format!(
161                "- `{}` `{}` at `{}:{}`",
162                n.kind, n.name, n.file_path, n.start_line
163            ));
164            if let Some(sig) = n.signature.as_deref() {
165                out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
166            }
167            out.push('\n');
168            if include_code {
169                if let Ok(code) = self.read_node_source(&n) {
170                    out.push_str("\n```");
171                    out.push_str(n.language.as_str());
172                    out.push('\n');
173                    out.push_str(&code);
174                    if !code.ends_with('\n') {
175                        out.push('\n');
176                    }
177                    out.push_str("```\n\n");
178                }
179            }
180        }
181        Ok(out)
182    }
183
184    fn find_context_nodes(&self, task: &str, max_nodes: i64) -> Result<Vec<SearchResult>> {
185        let limit = max_nodes.max(1);
186        let mut out = Vec::new();
187        let mut seen = BTreeSet::new();
188
189        for term in context_search_terms(task) {
190            if out.len() >= limit as usize {
191                break;
192            }
193            let remaining = limit - out.len() as i64;
194            let results = self.search_nodes(
195                &term,
196                SearchOptions {
197                    limit: remaining,
198                    ..Default::default()
199                },
200            )?;
201            for result in results {
202                if seen.insert(result.node.id.clone()) {
203                    out.push(result);
204                    if out.len() >= limit as usize {
205                        break;
206                    }
207                }
208            }
209        }
210
211        Ok(out)
212    }
213
214    pub fn read_node_source(&self, node: &Node) -> Result<String> {
215        let full = self.root.join(&node.file_path);
216        let text =
217            fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
218        let lines: Vec<&str> = text.lines().collect();
219        let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
220        let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
221        Ok(lines[start..end].join("\n"))
222    }
223
224    pub fn close(self) {}
225
226    fn scan_files(&self) -> Result<Vec<PathBuf>> {
227        let mut out = Vec::new();
228        let walker = ignore::WalkBuilder::new(&self.root)
229            .hidden(false)
230            .git_ignore(true)
231            .git_global(true)
232            .git_exclude(true)
233            .build();
234        for entry in walker {
235            let entry = entry?;
236            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
237                continue;
238            }
239            let rel = entry
240                .path()
241                .strip_prefix(&self.root)
242                .unwrap_or(entry.path())
243                .to_path_buf();
244            if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
245                continue;
246            }
247            if should_include_file(&rel, &self.config) {
248                out.push(rel);
249            }
250        }
251        out.sort();
252        Ok(out)
253    }
254}
255
256fn context_search_terms(task: &str) -> Vec<String> {
257    let mut terms = Vec::new();
258    let mut seen = BTreeSet::new();
259    push_context_term(task.trim(), &mut terms, &mut seen);
260
261    for raw in task.split(|c: char| {
262        !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/' || c == '.' || c == ':')
263    }) {
264        let term = raw.trim_matches(|c: char| {
265            !(c.is_ascii_alphanumeric() || c == '_' || c == '/' || c == '.' || c == ':')
266        });
267        if is_useful_context_term(term) {
268            push_context_term(term, &mut terms, &mut seen);
269        }
270    }
271
272    terms
273}
274
275fn push_context_term(term: &str, terms: &mut Vec<String>, seen: &mut BTreeSet<String>) {
276    if term.is_empty() {
277        return;
278    }
279    let key = term.to_ascii_lowercase();
280    if seen.insert(key) {
281        terms.push(term.to_string());
282    }
283}
284
285fn is_useful_context_term(term: &str) -> bool {
286    if term.len() < 3 {
287        return false;
288    }
289    if CONTEXT_STOP_WORDS.contains(&term.to_ascii_lowercase().as_str()) {
290        return false;
291    }
292    term.contains('_')
293        || term.contains('/')
294        || term.contains('.')
295        || term.contains(':')
296        || term.chars().any(|c| c.is_ascii_digit())
297        || term.len() >= 5
298}
299
300const CONTEXT_STOP_WORDS: &[&str] = &[
301    "about",
302    "after",
303    "before",
304    "build",
305    "change",
306    "check",
307    "code",
308    "context",
309    "debug",
310    "error",
311    "feature",
312    "files",
313    "fix",
314    "from",
315    "handle",
316    "implement",
317    "invalid",
318    "issue",
319    "order",
320    "query",
321    "return",
322    "should",
323    "task",
324    "test",
325    "tests",
326    "update",
327    "valid",
328    "validation",
329    "when",
330    "where",
331    "with",
332];
333
334pub fn is_initialized(root: impl AsRef<Path>) -> bool {
335    root.as_ref()
336        .join(CODEGRAPH_DIR)
337        .join(DATABASE_FILE)
338        .exists()
339}
340
341pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
342    let mut cur = start
343        .as_ref()
344        .canonicalize()
345        .unwrap_or_else(|_| start.as_ref().to_path_buf());
346    if cur.is_file() {
347        cur.pop();
348    }
349    loop {
350        if is_initialized(&cur) {
351            return Some(cur);
352        }
353        if !cur.pop() {
354            return None;
355        }
356    }
357}
358
359fn content_hash(content: &str) -> String {
360    let mut h = Sha256::new();
361    h.update(content.as_bytes());
362    format!("{:x}", h.finalize())
363}
364
365fn now_ms() -> i64 {
366    system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
367}
368
369fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
370    t.duration_since(std::time::UNIX_EPOCH)
371        .ok()
372        .map(|d| d.as_millis() as i64)
373}