1pub mod config;
2pub mod db;
3pub mod extraction;
4pub mod graph;
5pub mod mcp;
6pub mod types;
7
8use anyhow::{anyhow, Context, Result};
9use config::{load_config, save_config, CodeGraphConfig};
10use db::Database;
11use extraction::{detect_language, extract_from_source, should_include_file};
12use graph::{GraphTraverser, Subgraph};
13use sha2::{Digest, Sha256};
14use std::fs;
15use std::path::{Path, PathBuf};
16use types::{FileRecord, GraphStats, IndexResult, Node, NodeEdge, SearchOptions, SearchResult};
17
18pub const CODEGRAPH_DIR: &str = ".codegraph";
19pub const DATABASE_FILE: &str = "codegraph.db";
20
21pub struct CodeGraph {
22 root: PathBuf,
23 config: CodeGraphConfig,
24 db: Database,
25}
26
27impl CodeGraph {
28 pub fn init(root: impl AsRef<Path>) -> Result<Self> {
29 let root = root
30 .as_ref()
31 .canonicalize()
32 .unwrap_or_else(|_| root.as_ref().to_path_buf());
33 let dir = root.join(CODEGRAPH_DIR);
34 if dir.exists() {
35 return Err(anyhow!(
36 "CodeGraph already initialized in {}",
37 root.display()
38 ));
39 }
40 fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
41 let config = CodeGraphConfig::default_for_root(".");
42 save_config(&root, &config)?;
43 let db = Database::initialize(dir.join(DATABASE_FILE))?;
44 Ok(Self { root, config, db })
45 }
46
47 pub fn open(root: impl AsRef<Path>) -> Result<Self> {
48 let root = find_nearest_codegraph_root(root.as_ref())
49 .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
50 let config = load_config(&root)?;
51 let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
52 Ok(Self { root, config, db })
53 }
54
55 pub fn root(&self) -> &Path {
56 &self.root
57 }
58
59 pub fn index_all(&mut self) -> Result<IndexResult> {
60 let start = std::time::Instant::now();
61 self.db.clear_all()?;
62 let files = self.scan_files()?;
63 let mut result = IndexResult::default();
64
65 for path in files {
66 let full = self.root.join(&path);
67 let content = match fs::read_to_string(&full) {
68 Ok(content) => content,
69 Err(err) => {
70 result.files_errored += 1;
71 result.errors.push(format!("{}: {}", path.display(), err));
72 continue;
73 }
74 };
75 let lang = detect_language(&path, &content);
76 if lang.is_unknown() {
77 result.files_skipped += 1;
78 continue;
79 }
80 let extraction = extract_from_source(&path, &content, lang);
81 let hash = content_hash(&content);
82 let metadata = fs::metadata(&full)?;
83 self.db.insert_file(&FileRecord {
84 path: path.to_string_lossy().replace('\\', "/"),
85 content_hash: hash,
86 language: lang,
87 size: metadata.len(),
88 modified_at: metadata
89 .modified()
90 .ok()
91 .and_then(system_time_ms)
92 .unwrap_or_default(),
93 indexed_at: now_ms(),
94 node_count: extraction.nodes.len() as i64,
95 })?;
96 self.db.insert_nodes(&extraction.nodes)?;
97 self.db.insert_edges(&extraction.edges)?;
98 self.db
99 .insert_unresolved_refs(&extraction.unresolved_references)?;
100 result.files_indexed += 1;
101 result.nodes_created += extraction.nodes.len() as i64;
102 result.edges_created += extraction.edges.len() as i64;
103 }
104
105 self.db.resolve_references_by_name()?;
106 result.edges_created = self.db.edge_count()?;
107 result.success = result.files_errored == 0;
108 result.duration_ms = start.elapsed().as_millis() as i64;
109 Ok(result)
110 }
111
112 pub fn sync(&mut self) -> Result<IndexResult> {
113 self.index_all()
114 }
115
116 pub fn stats(&self) -> Result<GraphStats> {
117 self.db.stats()
118 }
119
120 pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
121 self.db.search_nodes(query, options)
122 }
123
124 pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
125 self.db.get_node(id)
126 }
127
128 pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
129 GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
130 }
131
132 pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
133 GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
134 }
135
136 pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
137 GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
138 }
139
140 pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
141 self.db.get_file_dependents(file_path)
142 }
143
144 pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
145 self.db.get_all_files()
146 }
147
148 pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
149 let results = self.search_nodes(
150 task,
151 SearchOptions {
152 limit: max_nodes,
153 ..Default::default()
154 },
155 )?;
156 let mut out = format!("## Context: {task}\n\n");
157 for result in results {
158 let n = result.node;
159 out.push_str(&format!(
160 "- `{}` `{}` at `{}:{}`",
161 n.kind, n.name, n.file_path, n.start_line
162 ));
163 if let Some(sig) = n.signature.as_deref() {
164 out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
165 }
166 out.push('\n');
167 if include_code {
168 if let Ok(code) = self.read_node_source(&n) {
169 out.push_str("\n```");
170 out.push_str(n.language.as_str());
171 out.push('\n');
172 out.push_str(&code);
173 if !code.ends_with('\n') {
174 out.push('\n');
175 }
176 out.push_str("```\n\n");
177 }
178 }
179 }
180 Ok(out)
181 }
182
183 pub fn read_node_source(&self, node: &Node) -> Result<String> {
184 let full = self.root.join(&node.file_path);
185 let text =
186 fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
187 let lines: Vec<&str> = text.lines().collect();
188 let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
189 let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
190 Ok(lines[start..end].join("\n"))
191 }
192
193 pub fn close(self) {}
194
195 fn scan_files(&self) -> Result<Vec<PathBuf>> {
196 let mut out = Vec::new();
197 let walker = ignore::WalkBuilder::new(&self.root)
198 .hidden(false)
199 .git_ignore(true)
200 .git_global(true)
201 .git_exclude(true)
202 .build();
203 for entry in walker {
204 let entry = entry?;
205 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
206 continue;
207 }
208 let rel = entry
209 .path()
210 .strip_prefix(&self.root)
211 .unwrap_or(entry.path())
212 .to_path_buf();
213 if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
214 continue;
215 }
216 if should_include_file(&rel, &self.config) {
217 out.push(rel);
218 }
219 }
220 out.sort();
221 Ok(out)
222 }
223}
224
225pub fn is_initialized(root: impl AsRef<Path>) -> bool {
226 root.as_ref()
227 .join(CODEGRAPH_DIR)
228 .join(DATABASE_FILE)
229 .exists()
230}
231
232pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
233 let mut cur = start
234 .as_ref()
235 .canonicalize()
236 .unwrap_or_else(|_| start.as_ref().to_path_buf());
237 if cur.is_file() {
238 cur.pop();
239 }
240 loop {
241 if is_initialized(&cur) {
242 return Some(cur);
243 }
244 if !cur.pop() {
245 return None;
246 }
247 }
248}
249
250fn content_hash(content: &str) -> String {
251 let mut h = Sha256::new();
252 h.update(content.as_bytes());
253 format!("{:x}", h.finalize())
254}
255
256fn now_ms() -> i64 {
257 system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
258}
259
260fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
261 t.duration_since(std::time::UNIX_EPOCH)
262 .ok()
263 .map(|d| d.as_millis() as i64)
264}