1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11 pub id: String,
12 pub kind: String,
13 pub name: String,
14 pub path: String,
15 pub line_start: u32,
16 pub line_end: u32,
17 #[serde(default)]
18 pub language: String,
19 #[serde(default)]
20 pub churn: f64,
21 #[serde(default)]
22 pub coupling: f64,
23 #[serde(default)]
24 pub community: i64,
25 #[serde(default)]
26 pub in_degree: i64,
27 #[serde(default)]
28 pub out_degree: i64,
29 #[serde(default)]
30 pub exported: bool,
31 #[serde(default)]
32 pub is_dead_candidate: bool,
33 #[serde(default)]
34 pub dead_reason: Option<String>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct Edge {
39 pub id: String,
40 pub src: String,
41 pub dst: String,
42 pub kind: String,
43 #[serde(default = "default_weight")]
44 pub weight: f64,
45 #[serde(default = "default_weight")]
46 pub confidence: f64,
47}
48
49fn default_weight() -> f64 {
50 1.0
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct RepoStats {
55 pub node_count: u64,
56 pub edge_count: u64,
57 pub language_breakdown: std::collections::HashMap<String, f64>,
58 pub community_count: u32,
59 pub function_count: u64,
60 pub class_count: u64,
61 pub file_count: u64,
62}
63
64pub type CommunityRow = (i64, String, i64, Vec<String>);
65type CommunityGroup = (Vec<(String, i64, String)>, i64); #[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct TagRow {
69 pub id: String,
70 pub file_path: String,
71 pub line: u32,
72 pub tag_type: String,
73 pub text: String,
74 pub comment_type: String,
76}
77
78impl Default for Node {
79 fn default() -> Self {
80 Self {
81 id: String::new(),
82 kind: String::new(),
83 name: String::new(),
84 path: String::new(),
85 line_start: 0,
86 line_end: 0,
87 language: String::new(),
88 churn: 0.0,
89 coupling: 0.0,
90 community: 0,
91 in_degree: 0,
92 out_degree: 0,
93 exported: false,
94 is_dead_candidate: false,
95 dead_reason: None,
96 }
97 }
98}
99
100impl Node {
101 pub fn from_def(d: &NodeDef, language: &str) -> Self {
102 let exported = d
103 .metadata
104 .get("exported")
105 .and_then(|v| v.as_bool())
106 .unwrap_or(false);
107 Self {
108 id: d.id.clone(),
109 kind: d.kind.as_str().to_string(),
110 name: d.name.clone(),
111 path: d.path.clone(),
112 line_start: d.line_start,
113 line_end: d.line_end,
114 language: language.to_string(),
115 churn: 0.0,
116 coupling: 0.0,
117 community: 0,
118 in_degree: 0,
119 out_degree: 0,
120 exported,
121 is_dead_candidate: false,
122 dead_reason: None,
123 }
124 }
125}
126
127impl Edge {
128 pub fn from_def(d: &EdgeDef) -> Self {
129 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
130 Self {
131 id,
132 src: d.src.clone(),
133 dst: d.dst.clone(),
134 kind: d.kind.as_str().to_string(),
135 weight: d.weight,
136 confidence: d.confidence,
137 }
138 }
139}
140
141pub struct GraphDb {
142 pub conn: duckdb::Connection,
143 pub repo_id: String,
144 pub db_path: PathBuf,
145}
146
147impl GraphDb {
148 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
149 let repo_id = repo_hash(repo_path);
150 let dir = dirs::home_dir()
151 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
152 .join(".cgx")
153 .join("repos");
154 std::fs::create_dir_all(&dir)?;
155
156 let db_path = dir.join(format!("{}.db", repo_id));
157 let conn = duckdb::Connection::open(&db_path)?;
158
159 conn.execute_batch(
160 "CREATE TABLE IF NOT EXISTS nodes (
161 id VARCHAR PRIMARY KEY,
162 kind VARCHAR NOT NULL,
163 name VARCHAR NOT NULL,
164 path VARCHAR NOT NULL,
165 line_start INTEGER,
166 line_end INTEGER,
167 language VARCHAR,
168 churn DOUBLE DEFAULT 0.0,
169 coupling DOUBLE DEFAULT 0.0,
170 community BIGINT DEFAULT 0,
171 in_degree BIGINT DEFAULT 0,
172 out_degree BIGINT DEFAULT 0,
173 exported TINYINT DEFAULT 0,
174 is_dead_candidate TINYINT DEFAULT 0,
175 dead_reason TEXT,
176 metadata JSON
177 );
178 CREATE TABLE IF NOT EXISTS edges (
179 id VARCHAR PRIMARY KEY,
180 src VARCHAR NOT NULL,
181 dst VARCHAR NOT NULL,
182 kind VARCHAR NOT NULL,
183 weight DOUBLE DEFAULT 1.0,
184 confidence DOUBLE DEFAULT 1.0,
185 metadata JSON
186 );
187 CREATE TABLE IF NOT EXISTS communities (
188 id INTEGER PRIMARY KEY,
189 label VARCHAR,
190 node_count INTEGER,
191 top_nodes JSON
192 );
193 CREATE TABLE IF NOT EXISTS repo_meta (
194 key VARCHAR PRIMARY KEY,
195 value JSON
196 );
197 CREATE TABLE IF NOT EXISTS file_hashes (
198 path VARCHAR PRIMARY KEY,
199 hash VARCHAR NOT NULL,
200 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
201 );
202 CREATE TABLE IF NOT EXISTS tags (
203 id VARCHAR PRIMARY KEY,
204 file_path VARCHAR NOT NULL,
205 line INTEGER NOT NULL,
206 tag_type VARCHAR NOT NULL,
207 text VARCHAR NOT NULL,
208 comment_type VARCHAR NOT NULL DEFAULT 'code'
209 );
210 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
211 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
212 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
213 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
214 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
215 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
216 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
217 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
218 )?;
219
220 conn.execute_batch(
224 "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported TINYINT DEFAULT 0;
225 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate TINYINT DEFAULT 0;
226 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason TEXT;
227 CREATE INDEX IF NOT EXISTS idx_nodes_dead ON nodes(is_dead_candidate);",
228 )?;
229
230 Ok(Self {
231 conn,
232 repo_id,
233 db_path,
234 })
235 }
236
237 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
238 if nodes.is_empty() {
239 return Ok(0);
240 }
241 let mut count = 0;
242 let mut stmt = self.conn.prepare(
243 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported)
244 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
245 )?;
246 for node in nodes {
247 stmt.execute(params![
248 node.id,
249 node.kind,
250 node.name,
251 node.path,
252 node.line_start,
253 node.line_end,
254 node.language,
255 node.churn,
256 node.coupling,
257 node.community,
258 node.in_degree,
259 node.out_degree,
260 node.exported as i32,
261 ])?;
262 count += 1;
263 }
264 Ok(count)
265 }
266
267 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
268 if edges.is_empty() {
269 return Ok(0);
270 }
271 let mut count = 0;
272 let mut stmt = self.conn.prepare(
273 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
274 VALUES (?, ?, ?, ?, ?, ?)",
275 )?;
276 for edge in edges {
277 stmt.execute(params![
278 edge.id,
279 edge.src,
280 edge.dst,
281 edge.kind,
282 edge.weight,
283 edge.confidence,
284 ])?;
285 count += 1;
286 }
287 Ok(count)
288 }
289
290 pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
291 if tags.is_empty() {
292 return Ok(0);
293 }
294 let mut count = 0;
295 let mut stmt = self.conn.prepare(
296 "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
297 VALUES (?, ?, ?, ?, ?, ?)",
298 )?;
299 for tag in tags {
300 stmt.execute(params![
301 tag.id,
302 tag.file_path,
303 tag.line,
304 tag.tag_type,
305 tag.text,
306 tag.comment_type,
307 ])?;
308 count += 1;
309 }
310 Ok(count)
311 }
312
313 pub fn get_tags(
314 &self,
315 tag_type_filter: Option<&str>,
316 comment_type_filter: Option<&str>,
317 ) -> anyhow::Result<Vec<TagRow>> {
318 let sql = match (tag_type_filter, comment_type_filter) {
319 (Some(_), Some(_)) => {
320 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
321 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
322 }
323 (Some(_), None) => {
324 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
325 WHERE tag_type = ? ORDER BY file_path, line"
326 }
327 (None, Some(_)) => {
328 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
329 WHERE comment_type = ? ORDER BY file_path, line"
330 }
331 (None, None) => {
332 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
333 ORDER BY file_path, line"
334 }
335 };
336
337 let mut stmt = self.conn.prepare(sql)?;
338 let map_row = |row: &duckdb::Row| {
339 Ok(TagRow {
340 id: row.get(0)?,
341 file_path: row.get(1)?,
342 line: row.get::<_, u32>(2)?,
343 tag_type: row.get(3)?,
344 text: row.get(4)?,
345 comment_type: row.get(5)?,
346 })
347 };
348
349 let rows = match (tag_type_filter, comment_type_filter) {
350 (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
351 (Some(t), None) => stmt.query_map(params![t], map_row)?,
352 (None, Some(c)) => stmt.query_map(params![c], map_row)?,
353 (None, None) => stmt.query_map([], map_row)?,
354 };
355
356 let mut results = Vec::new();
357 for row in rows {
358 results.push(row?);
359 }
360 Ok(results)
361 }
362
363 pub fn clear_all_tags(&self) -> anyhow::Result<()> {
364 self.conn.execute_batch(
365 "DROP TABLE IF EXISTS tags;
366 CREATE TABLE IF NOT EXISTS tags (
367 id VARCHAR PRIMARY KEY,
368 file_path VARCHAR NOT NULL,
369 line INTEGER NOT NULL,
370 tag_type VARCHAR NOT NULL,
371 text VARCHAR NOT NULL,
372 comment_type VARCHAR NOT NULL DEFAULT 'code'
373 );
374 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
375 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
376 )?;
377 Ok(())
378 }
379
380 pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
381 if paths.is_empty() {
382 return Ok(());
383 }
384 let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
385 for path in paths {
386 stmt.execute(params![path])?;
387 }
388 Ok(())
389 }
390
391 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
392 let mut stmt = self
393 .conn
394 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason FROM nodes WHERE id = ?")?;
395 let mut rows = stmt.query_map(params![id], |row| {
396 Ok(Node {
397 id: row.get(0)?,
398 kind: row.get(1)?,
399 name: row.get(2)?,
400 path: row.get(3)?,
401 line_start: row.get(4)?,
402 line_end: row.get(5)?,
403 language: row.get(6)?,
404 churn: row.get(7)?,
405 coupling: row.get(8)?,
406 community: row.get(9)?,
407 in_degree: row.get(10)?,
408 out_degree: row.get(11)?,
409 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
410 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
411 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
412 })
413 })?;
414
415 match rows.next() {
416 Some(Ok(node)) => Ok(Some(node)),
417 _ => Ok(None),
418 }
419 }
420
421 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
422 let mut seen = std::collections::HashSet::new();
423 seen.insert(id.to_string());
424 let mut current = vec![id.to_string()];
425 let mut result: Vec<Node> = Vec::new();
426 let max_depth = depth.min(3);
427
428 for _ in 0..max_depth {
429 if current.is_empty() {
430 break;
431 }
432 let mut next = Vec::new();
433
434 for cur_id in ¤t {
435 let mut stmt = self.conn.prepare(
436 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason
437 FROM nodes n
438 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
439 LIMIT 100",
440 )?;
441 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
442 Ok(Node {
443 id: row.get(0)?,
444 kind: row.get(1)?,
445 name: row.get(2)?,
446 path: row.get(3)?,
447 line_start: row.get(4)?,
448 line_end: row.get(5)?,
449 language: row.get(6)?,
450 churn: row.get(7)?,
451 coupling: row.get(8)?,
452 community: row.get(9)?,
453 in_degree: row.get(10)?,
454 out_degree: row.get(11)?,
455 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
456 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
457 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
458 })
459 })?;
460
461 for row in rows {
462 let node = row?;
463 if seen.insert(node.id.clone()) {
464 next.push(node.id.clone());
465 result.push(node);
466 }
467 }
468 }
469 current = next;
470 }
471
472 Ok(result)
473 }
474
475 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
476 let mut stmt = self.conn.prepare(
477 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason FROM nodes",
478 )?;
479 let rows = stmt.query_map([], |row| {
480 Ok(Node {
481 id: row.get(0)?,
482 kind: row.get(1)?,
483 name: row.get(2)?,
484 path: row.get(3)?,
485 line_start: row.get(4)?,
486 line_end: row.get(5)?,
487 language: row.get(6)?,
488 churn: row.get(7)?,
489 coupling: row.get(8)?,
490 community: row.get(9)?,
491 in_degree: row.get(10)?,
492 out_degree: row.get(11)?,
493 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
494 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
495 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
496 })
497 })?;
498
499 let mut nodes = Vec::new();
500 for row in rows {
501 nodes.push(row?);
502 }
503 Ok(nodes)
504 }
505
506 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
507 let mut stmt = self
508 .conn
509 .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
510 let rows = stmt.query_map([], |row| {
511 Ok(Edge {
512 id: row.get(0)?,
513 src: row.get(1)?,
514 dst: row.get(2)?,
515 kind: row.get(3)?,
516 weight: row.get(4)?,
517 confidence: row.get(5)?,
518 })
519 })?;
520
521 let mut edges = Vec::new();
522 for row in rows {
523 edges.push(row?);
524 }
525 Ok(edges)
526 }
527
528 pub fn node_count(&self) -> anyhow::Result<u64> {
529 let count: i64 = self
530 .conn
531 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
532 Ok(count as u64)
533 }
534
535 pub fn edge_count(&self) -> anyhow::Result<u64> {
536 let count: i64 = self
537 .conn
538 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
539 Ok(count as u64)
540 }
541
542 pub fn clear(&self) -> anyhow::Result<()> {
543 self.conn.execute_batch(
546 "TRUNCATE TABLE edges;
547 TRUNCATE TABLE nodes;
548 TRUNCATE TABLE communities;",
549 )?;
550 Ok(())
551 }
552
553 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
554 let mut stmt = self.conn.prepare(
555 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
556 )?;
557 let rows = stmt.query_map([], |row| {
558 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
559 })?;
560
561 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
562 for row in rows {
563 let (lang, cnt) = row?;
564 *counts.entry(lang).or_default() += cnt;
565 }
566
567 let total: i64 = counts.values().sum();
568 if total == 0 {
569 return Ok(std::collections::HashMap::new());
570 }
571
572 let mut breakdown = std::collections::HashMap::new();
573 for (lang, cnt) in counts {
574 breakdown.insert(lang, cnt as f64 / total as f64);
575 }
576 Ok(breakdown)
577 }
578
579 pub fn get_node_counts_by_kind(
580 &self,
581 ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
582 let mut stmt = self
583 .conn
584 .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
585 let rows = stmt.query_map([], |row| {
586 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
587 })?;
588
589 let mut counts = std::collections::HashMap::new();
590 for row in rows {
591 let (kind, cnt) = row?;
592 counts.insert(kind, cnt as u64);
593 }
594 Ok(counts)
595 }
596
597 pub fn upsert_node_scores(
598 &self,
599 node_id: &str,
600 churn: f64,
601 coupling: f64,
602 ) -> anyhow::Result<()> {
603 self.conn.execute(
604 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
605 params![churn, coupling, node_id],
606 )?;
607 Ok(())
608 }
609
610 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
611 self.conn.execute_batch(
612 "UPDATE nodes SET in_degree = 0, out_degree = 0;
613 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
614 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
615 )?;
616 Ok(())
617 }
618
619 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
620 let mut stmt = self.conn.prepare(
621 "SELECT path, churn, coupling, in_degree
622 FROM nodes
623 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
624 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
625 LIMIT ?",
626 )?;
627 let rows = stmt.query_map(params![limit as i64], |row| {
628 Ok((
629 row.get::<_, String>(0)?,
630 row.get::<_, f64>(1)?,
631 row.get::<_, f64>(2)?,
632 row.get::<_, i64>(3)?,
633 ))
634 })?;
635 let mut results = Vec::new();
636 for row in rows {
637 results.push(row?);
638 }
639 Ok(results)
640 }
641
642 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
643 let mut stmt = self.conn.prepare(
644 "SELECT n.name, COUNT(e.id) as file_count
645 FROM nodes n
646 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
647 WHERE n.kind = 'Author'
648 GROUP BY n.name
649 ORDER BY file_count DESC",
650 )?;
651 let rows = stmt.query_map([], |row| {
652 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
653 })?;
654 let mut results = Vec::new();
655 for row in rows {
656 results.push(row?);
657 }
658 Ok(results)
659 }
660
661 pub fn compute_coupling(&self) -> anyhow::Result<()> {
662 self.conn.execute_batch(
663 "UPDATE nodes SET coupling = 0.0;
664 UPDATE nodes SET coupling =
665 CASE
666 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
667 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
668 ELSE 0.0
669 END
670 WHERE kind = 'File';",
671 )?;
672 Ok(())
673 }
674
675 pub fn update_node_communities(
676 &self,
677 communities: &std::collections::HashMap<String, i64>,
678 ) -> anyhow::Result<usize> {
679 if communities.is_empty() {
680 return Ok(0);
681 }
682 let mut count = 0;
683 let mut stmt = self
684 .conn
685 .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
686 for (node_id, community) in communities {
687 let affected = stmt.execute(params![*community, node_id.as_str()])?;
688 count += affected;
689 }
690 Ok(count)
691 }
692
693 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
694 let node_count = self.node_count()?;
695 let edge_count = self.edge_count()?;
696 let lang_breakdown = self.get_language_breakdown()?;
697 let communities = self.get_communities()?;
698 let counts_by_kind = self.get_node_counts_by_kind()?;
699
700 Ok(RepoStats {
701 node_count,
702 edge_count,
703 language_breakdown: lang_breakdown,
704 community_count: communities.len() as u32,
705 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
706 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
707 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
708 })
709 }
710
711 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
712 let mut stmt = self.conn.prepare(
713 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason
714 FROM nodes
715 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
716 ORDER BY out_degree DESC
717 LIMIT ?",
718 )?;
719 let rows = stmt.query_map(params![limit as i64], |row| {
720 Ok(Node {
721 id: row.get(0)?,
722 kind: row.get(1)?,
723 name: row.get(2)?,
724 path: row.get(3)?,
725 line_start: row.get(4)?,
726 line_end: row.get(5)?,
727 language: row.get(6)?,
728 churn: row.get(7)?,
729 coupling: row.get(8)?,
730 community: row.get(9)?,
731 in_degree: row.get(10)?,
732 out_degree: row.get(11)?,
733 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
734 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
735 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
736 })
737 })?;
738 let mut results = Vec::new();
739 for row in rows {
740 results.push(row?);
741 }
742 Ok(results)
743 }
744
745 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
746 let mut stmt = self.conn.prepare(
747 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason
748 FROM nodes
749 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
750 ORDER BY in_degree DESC
751 LIMIT ?",
752 )?;
753 let rows = stmt.query_map(params![limit as i64], |row| {
754 Ok(Node {
755 id: row.get(0)?,
756 kind: row.get(1)?,
757 name: row.get(2)?,
758 path: row.get(3)?,
759 line_start: row.get(4)?,
760 line_end: row.get(5)?,
761 language: row.get(6)?,
762 churn: row.get(7)?,
763 coupling: row.get(8)?,
764 community: row.get(9)?,
765 in_degree: row.get(10)?,
766 out_degree: row.get(11)?,
767 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
768 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
769 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
770 })
771 })?;
772 let mut results = Vec::new();
773 for row in rows {
774 results.push(row?);
775 }
776 Ok(results)
777 }
778
779 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
780 let mut stmt = self.conn.prepare(
781 "SELECT community, kind, name, path, in_degree
782 FROM nodes
783 WHERE community > 0
784 ORDER BY community",
785 )?;
786 let rows = stmt.query_map([], |row| {
787 Ok((
788 row.get::<_, i64>(0)?,
789 row.get::<_, String>(1)?,
790 row.get::<_, String>(2)?,
791 row.get::<_, String>(3)?,
792 row.get::<_, i64>(4)?,
793 ))
794 })?;
795
796 let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
797 std::collections::HashMap::new();
798 for row in rows {
799 let (community, kind, name, _path, in_degree) = row?;
800 let entry = community_map
801 .entry(community)
802 .or_insert_with(|| (Vec::new(), 0));
803 entry.0.push((kind, in_degree, name));
804 entry.1 += 1;
805 }
806
807 let mut result: Vec<CommunityRow> = community_map
808 .into_iter()
809 .map(|(community, (mut items, count))| {
810 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
811 let top_nodes: Vec<String> = items
812 .iter()
813 .take(5)
814 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
815 .collect();
816 let label = top_nodes
817 .first()
818 .cloned()
819 .unwrap_or_else(|| format!("community-{}", community));
820 (community, label, count, top_nodes)
821 })
822 .collect();
823
824 result.sort_by_key(|row| std::cmp::Reverse(row.2));
825 Ok(result)
826 }
827
828 pub fn clear_communities(&self) -> anyhow::Result<()> {
829 self.conn.execute("UPDATE nodes SET community = 0", [])?;
830 self.conn.execute("DELETE FROM communities", [])?;
831 Ok(())
832 }
833
834 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
837 let mut seen = std::collections::HashSet::new();
838 seen.insert(id.to_string());
839 let mut current = vec![id.to_string()];
840 let mut result: Vec<Node> = Vec::new();
841 let max_depth = depth.min(3);
842
843 for _ in 0..max_depth {
844 if current.is_empty() {
845 break;
846 }
847 let mut next = Vec::new();
848 for cur_id in ¤t {
849 let mut stmt = self.conn.prepare(
850 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason
851 FROM nodes n
852 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
853 LIMIT 100",
854 )?;
855 let rows = stmt.query_map(params![cur_id], |row| {
856 Ok(Node {
857 id: row.get(0)?,
858 kind: row.get(1)?,
859 name: row.get(2)?,
860 path: row.get(3)?,
861 line_start: row.get(4)?,
862 line_end: row.get(5)?,
863 language: row.get(6)?,
864 churn: row.get(7)?,
865 coupling: row.get(8)?,
866 community: row.get(9)?,
867 in_degree: row.get(10)?,
868 out_degree: row.get(11)?,
869 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
870 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
871 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
872 })
873 })?;
874 for row in rows {
875 let node = row?;
876 if seen.insert(node.id.clone()) {
877 next.push(node.id.clone());
878 result.push(node);
879 }
880 }
881 }
882 current = next;
883 }
884
885 Ok(result)
886 }
887
888 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
889 let mut stmt = self.conn.prepare(
890 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason FROM nodes WHERE community = ?",
891 )?;
892 let rows = stmt.query_map(params![community], |row| {
893 Ok(Node {
894 id: row.get(0)?,
895 kind: row.get(1)?,
896 name: row.get(2)?,
897 path: row.get(3)?,
898 line_start: row.get(4)?,
899 line_end: row.get(5)?,
900 language: row.get(6)?,
901 churn: row.get(7)?,
902 coupling: row.get(8)?,
903 community: row.get(9)?,
904 in_degree: row.get(10)?,
905 out_degree: row.get(11)?,
906 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
907 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
908 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
909 })
910 })?;
911 let mut nodes = Vec::new();
912 for row in rows {
913 nodes.push(row?);
914 }
915 Ok(nodes)
916 }
917
918 pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
919 if items.is_empty() {
921 return Ok(());
922 }
923 let mut stmt = self
924 .conn
925 .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
926 for (id, reason) in items {
927 stmt.execute(params![reason, id])?;
928 }
929 Ok(())
930 }
931
932 pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
933 let total: i64 = self
935 .conn
936 .query_row(
937 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
938 [],
939 |r| r.get(0),
940 )
941 .unwrap_or(0);
942 let high: i64 = self.conn.query_row(
944 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
945 ).unwrap_or(0);
946 Ok((total, high))
947 }
948
949 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
950 let mut stmt = self.conn.prepare(
951 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
952 FROM edges e
953 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
954 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
955 )?;
956 let rows = stmt.query_map(params![community, community], |row| {
957 Ok(Edge {
958 id: row.get(0)?,
959 src: row.get(1)?,
960 dst: row.get(2)?,
961 kind: row.get(3)?,
962 weight: row.get(4)?,
963 confidence: row.get(5)?,
964 })
965 })?;
966 let mut edges = Vec::new();
967 for row in rows {
968 edges.push(row?);
969 }
970 Ok(edges)
971 }
972
973 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
976 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
977 let rows = stmt.query_map([], |row| {
978 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
979 })?;
980 let mut result = std::collections::HashMap::new();
981 for row in rows {
982 let (path, hash) = row?;
983 result.insert(path, hash);
984 }
985 Ok(result)
986 }
987
988 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
989 self.conn.execute(
990 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
991 params![path, hash],
992 )?;
993 Ok(())
994 }
995
996 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
997 if paths.is_empty() {
998 return Ok(());
999 }
1000 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1001 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1002 let mut stmt = self.conn.prepare(&sql)?;
1003 let params: Vec<&dyn duckdb::ToSql> =
1004 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1005 stmt.execute(params.as_slice())?;
1006 Ok(())
1007 }
1008
1009 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1010 if paths.is_empty() {
1011 return Ok(0);
1012 }
1013 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1014 let sql_edges = format!(
1016 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1017 placeholders, placeholders
1018 );
1019 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1020 let params_edges: Vec<&dyn duckdb::ToSql> = paths
1021 .iter()
1022 .chain(paths.iter())
1023 .map(|p| p as &dyn duckdb::ToSql)
1024 .collect();
1025 stmt_edges.execute(params_edges.as_slice())?;
1026
1027 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1029 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1030 let params_nodes: Vec<&dyn duckdb::ToSql> =
1031 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1032 let count = stmt_nodes.execute(params_nodes.as_slice())?;
1033 Ok(count)
1034 }
1035}
1036
1037pub fn repo_hash(path: &Path) -> String {
1038 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1039 let path_str = canonical.to_string_lossy().to_string();
1040 let mut hasher = Sha256::new();
1041 hasher.update(path_str.as_bytes());
1042 format!("{:x}", hasher.finalize())[..16].to_string()
1043}