1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Node {
12 pub id: String,
14 pub kind: String,
16 pub name: String,
17 pub path: String,
19 pub line_start: u32,
20 pub line_end: u32,
21 #[serde(default)]
22 pub language: String,
23 #[serde(default)]
25 pub churn: f64,
26 #[serde(default)]
28 pub coupling: f64,
29 #[serde(default)]
31 pub community: i64,
32 #[serde(default)]
33 pub in_degree: i64,
34 #[serde(default)]
35 pub out_degree: i64,
36 #[serde(default)]
38 pub exported: bool,
39 #[serde(default)]
41 pub is_dead_candidate: bool,
42 #[serde(default)]
43 pub dead_reason: Option<String>,
44 #[serde(default)]
46 pub complexity: f64,
47 #[serde(default)]
48 pub is_test_file: bool,
49 #[serde(default)]
51 pub test_count: i64,
52 #[serde(default)]
54 pub is_tested: bool,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct Edge {
60 pub id: String,
62 pub src: String,
63 pub dst: String,
64 pub kind: String,
66 #[serde(default = "default_weight")]
67 pub weight: f64,
68 #[serde(default = "default_weight")]
69 pub confidence: f64,
70}
71
72fn default_weight() -> f64 {
73 1.0
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct RepoStats {
79 pub node_count: u64,
80 pub edge_count: u64,
81 pub language_breakdown: std::collections::HashMap<String, f64>,
83 pub community_count: u32,
84 pub function_count: u64,
85 pub class_count: u64,
86 pub file_count: u64,
87}
88
89pub type CommunityRow = (i64, String, i64, Vec<String>);
91pub type DocsCoverage = (f64, Vec<(i64, i64, i64)>, Vec<Node>);
93pub type TestCoverageSummary = (f64, i64, i64, Vec<Node>);
95type CommunityGroup = (Vec<(String, i64, String)>, i64); #[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct SnapshotEntry {
100 pub id: String,
101 pub commit_sha: String,
102 pub commit_date: String,
104 pub commit_msg: String,
105 pub node_count: i64,
106 pub edge_count: i64,
107 pub snapshot_data: Option<String>,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct TagRow {
114 pub id: String,
115 pub file_path: String,
116 pub line: u32,
117 pub tag_type: String,
119 pub text: String,
120 pub comment_type: String,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct CloneRow {
127 pub id: String,
128 pub node_a: String,
129 pub node_b: String,
130 pub similarity: f64,
132 pub kind: String,
134}
135
136impl Default for Node {
137 fn default() -> Self {
138 Self {
139 id: String::new(),
140 kind: String::new(),
141 name: String::new(),
142 path: String::new(),
143 line_start: 0,
144 line_end: 0,
145 language: String::new(),
146 churn: 0.0,
147 coupling: 0.0,
148 community: 0,
149 in_degree: 0,
150 out_degree: 0,
151 exported: false,
152 is_dead_candidate: false,
153 dead_reason: None,
154 complexity: 0.0,
155 is_test_file: false,
156 test_count: 0,
157 is_tested: false,
158 }
159 }
160}
161
162impl Node {
163 pub fn from_def(d: &NodeDef, language: &str) -> Self {
164 let exported = d
165 .metadata
166 .get("exported")
167 .and_then(|v| v.as_bool())
168 .unwrap_or(false);
169 let complexity = d
170 .metadata
171 .get("complexity")
172 .and_then(|v| v.as_f64())
173 .unwrap_or(0.0);
174 Self {
175 id: d.id.clone(),
176 kind: d.kind.as_str().to_string(),
177 name: d.name.clone(),
178 path: d.path.clone(),
179 line_start: d.line_start,
180 line_end: d.line_end,
181 language: language.to_string(),
182 churn: 0.0,
183 coupling: 0.0,
184 community: 0,
185 in_degree: 0,
186 out_degree: 0,
187 exported,
188 is_dead_candidate: false,
189 dead_reason: None,
190 complexity,
191 is_test_file: false,
192 test_count: 0,
193 is_tested: false,
194 }
195 }
196}
197
198impl Edge {
199 pub fn from_def(d: &EdgeDef) -> Self {
200 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
201 Self {
202 id,
203 src: d.src.clone(),
204 dst: d.dst.clone(),
205 kind: d.kind.as_str().to_string(),
206 weight: d.weight,
207 confidence: d.confidence,
208 }
209 }
210}
211
212pub struct GraphDb {
217 pub conn: duckdb::Connection,
218 pub repo_id: String,
220 pub db_path: PathBuf,
221}
222
223impl GraphDb {
224 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
228 let repo_id = repo_hash(repo_path);
229 let dir = dirs::home_dir()
230 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
231 .join(".cgx")
232 .join("repos");
233 std::fs::create_dir_all(&dir)?;
234
235 let db_path = dir.join(format!("{}.db", repo_id));
236 let conn = duckdb::Connection::open(&db_path)?;
237
238 conn.execute_batch(
239 "CREATE TABLE IF NOT EXISTS nodes (
240 id VARCHAR PRIMARY KEY,
241 kind VARCHAR NOT NULL,
242 name VARCHAR NOT NULL,
243 path VARCHAR NOT NULL,
244 line_start INTEGER,
245 line_end INTEGER,
246 language VARCHAR,
247 churn DOUBLE DEFAULT 0.0,
248 coupling DOUBLE DEFAULT 0.0,
249 community BIGINT DEFAULT 0,
250 in_degree BIGINT DEFAULT 0,
251 out_degree BIGINT DEFAULT 0,
252 exported TINYINT DEFAULT 0,
253 is_dead_candidate TINYINT DEFAULT 0,
254 dead_reason TEXT,
255 metadata JSON
256 );
257 CREATE TABLE IF NOT EXISTS edges (
258 id VARCHAR PRIMARY KEY,
259 src VARCHAR NOT NULL,
260 dst VARCHAR NOT NULL,
261 kind VARCHAR NOT NULL,
262 weight DOUBLE DEFAULT 1.0,
263 confidence DOUBLE DEFAULT 1.0,
264 metadata JSON
265 );
266 CREATE TABLE IF NOT EXISTS communities (
267 id INTEGER PRIMARY KEY,
268 label VARCHAR,
269 node_count INTEGER,
270 top_nodes JSON
271 );
272 CREATE TABLE IF NOT EXISTS repo_meta (
273 key VARCHAR PRIMARY KEY,
274 value JSON
275 );
276 CREATE TABLE IF NOT EXISTS file_hashes (
277 path VARCHAR PRIMARY KEY,
278 hash VARCHAR NOT NULL,
279 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
280 );
281 CREATE TABLE IF NOT EXISTS tags (
282 id VARCHAR PRIMARY KEY,
283 file_path VARCHAR NOT NULL,
284 line INTEGER NOT NULL,
285 tag_type VARCHAR NOT NULL,
286 text VARCHAR NOT NULL,
287 comment_type VARCHAR NOT NULL DEFAULT 'code'
288 );
289 CREATE TABLE IF NOT EXISTS clones (
290 id VARCHAR PRIMARY KEY,
291 node_a VARCHAR NOT NULL,
292 node_b VARCHAR NOT NULL,
293 similarity FLOAT NOT NULL,
294 kind VARCHAR NOT NULL
295 );
296 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
297 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
298 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
299 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
300 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
301 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
302 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
303 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);
304 CREATE INDEX IF NOT EXISTS idx_clones_a ON clones(node_a);
305 CREATE INDEX IF NOT EXISTS idx_clones_b ON clones(node_b);",
306 )?;
307
308 conn.execute_batch(
312 "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported TINYINT DEFAULT 0;
313 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate TINYINT DEFAULT 0;
314 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason TEXT;
315 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS complexity DOUBLE DEFAULT 0.0;
316 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS doc_comment TEXT;
317 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_test_file TINYINT DEFAULT 0;
318 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS test_count INTEGER DEFAULT 0;
319 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_tested TINYINT DEFAULT 0;
320 CREATE INDEX IF NOT EXISTS idx_nodes_dead ON nodes(is_dead_candidate);
321 CREATE INDEX IF NOT EXISTS idx_nodes_complexity ON nodes(complexity);
322 CREATE INDEX IF NOT EXISTS idx_nodes_is_tested ON nodes(is_tested);",
323 )?;
324
325 conn.execute_batch(
326 "CREATE TABLE IF NOT EXISTS snapshots (
327 id VARCHAR PRIMARY KEY,
328 commit_sha VARCHAR NOT NULL,
329 commit_date TEXT NOT NULL,
330 commit_msg VARCHAR,
331 node_count INTEGER,
332 edge_count INTEGER,
333 snapshot_data TEXT
334 );
335 CREATE INDEX IF NOT EXISTS idx_snapshots_date ON snapshots(commit_date);",
336 )?;
337
338 Ok(Self {
339 conn,
340 repo_id,
341 db_path,
342 })
343 }
344
345 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
347 if nodes.is_empty() {
348 return Ok(0);
349 }
350 let mut count = 0;
351 let mut stmt = self.conn.prepare(
352 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported, complexity, is_test_file, test_count, is_tested)
353 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
354 )?;
355 for node in nodes {
356 stmt.execute(params![
357 node.id,
358 node.kind,
359 node.name,
360 node.path,
361 node.line_start,
362 node.line_end,
363 node.language,
364 node.churn,
365 node.coupling,
366 node.community,
367 node.in_degree,
368 node.out_degree,
369 node.exported as i32,
370 node.complexity,
371 node.is_test_file as i32,
372 node.test_count,
373 node.is_tested as i32,
374 ])?;
375 count += 1;
376 }
377 Ok(count)
378 }
379
380 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
382 if edges.is_empty() {
383 return Ok(0);
384 }
385 let mut count = 0;
386 let mut stmt = self.conn.prepare(
387 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
388 VALUES (?, ?, ?, ?, ?, ?)",
389 )?;
390 for edge in edges {
391 stmt.execute(params![
392 edge.id,
393 edge.src,
394 edge.dst,
395 edge.kind,
396 edge.weight,
397 edge.confidence,
398 ])?;
399 count += 1;
400 }
401 Ok(count)
402 }
403
404 pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
406 if tags.is_empty() {
407 return Ok(0);
408 }
409 let mut count = 0;
410 let mut stmt = self.conn.prepare(
411 "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
412 VALUES (?, ?, ?, ?, ?, ?)",
413 )?;
414 for tag in tags {
415 stmt.execute(params![
416 tag.id,
417 tag.file_path,
418 tag.line,
419 tag.tag_type,
420 tag.text,
421 tag.comment_type,
422 ])?;
423 count += 1;
424 }
425 Ok(count)
426 }
427
428 pub fn get_tags(
430 &self,
431 tag_type_filter: Option<&str>,
432 comment_type_filter: Option<&str>,
433 ) -> anyhow::Result<Vec<TagRow>> {
434 let sql = match (tag_type_filter, comment_type_filter) {
435 (Some(_), Some(_)) => {
436 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
437 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
438 }
439 (Some(_), None) => {
440 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
441 WHERE tag_type = ? ORDER BY file_path, line"
442 }
443 (None, Some(_)) => {
444 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
445 WHERE comment_type = ? ORDER BY file_path, line"
446 }
447 (None, None) => {
448 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
449 ORDER BY file_path, line"
450 }
451 };
452
453 let mut stmt = self.conn.prepare(sql)?;
454 let map_row = |row: &duckdb::Row| {
455 Ok(TagRow {
456 id: row.get(0)?,
457 file_path: row.get(1)?,
458 line: row.get::<_, u32>(2)?,
459 tag_type: row.get(3)?,
460 text: row.get(4)?,
461 comment_type: row.get(5)?,
462 })
463 };
464
465 let rows = match (tag_type_filter, comment_type_filter) {
466 (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
467 (Some(t), None) => stmt.query_map(params![t], map_row)?,
468 (None, Some(c)) => stmt.query_map(params![c], map_row)?,
469 (None, None) => stmt.query_map([], map_row)?,
470 };
471
472 let mut results = Vec::new();
473 for row in rows {
474 results.push(row?);
475 }
476 Ok(results)
477 }
478
479 pub fn clear_all_tags(&self) -> anyhow::Result<()> {
481 self.conn.execute_batch(
482 "DROP TABLE IF EXISTS tags;
483 CREATE TABLE IF NOT EXISTS tags (
484 id VARCHAR PRIMARY KEY,
485 file_path VARCHAR NOT NULL,
486 line INTEGER NOT NULL,
487 tag_type VARCHAR NOT NULL,
488 text VARCHAR NOT NULL,
489 comment_type VARCHAR NOT NULL DEFAULT 'code'
490 );
491 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
492 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
493 )?;
494 Ok(())
495 }
496
497 pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
499 if paths.is_empty() {
500 return Ok(());
501 }
502 let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
503 for path in paths {
504 stmt.execute(params![path])?;
505 }
506 Ok(())
507 }
508
509 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
511 let mut stmt = self
512 .conn
513 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE id = ?")?;
514 let mut rows = stmt.query_map(params![id], |row| {
515 Ok(Node {
516 id: row.get(0)?,
517 kind: row.get(1)?,
518 name: row.get(2)?,
519 path: row.get(3)?,
520 line_start: row.get(4)?,
521 line_end: row.get(5)?,
522 language: row.get(6)?,
523 churn: row.get(7)?,
524 coupling: row.get(8)?,
525 community: row.get(9)?,
526 in_degree: row.get(10)?,
527 out_degree: row.get(11)?,
528 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
529 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
530 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
531 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
532 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
533 test_count: row.get::<_, i64>(17).unwrap_or(0),
534 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
535 })
536 })?;
537
538 match rows.next() {
539 Some(Ok(node)) => Ok(Some(node)),
540 _ => Ok(None),
541 }
542 }
543
544 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
546 let mut seen = std::collections::HashSet::new();
547 seen.insert(id.to_string());
548 let mut current = vec![id.to_string()];
549 let mut result: Vec<Node> = Vec::new();
550 let max_depth = depth.min(3);
551
552 for _ in 0..max_depth {
553 if current.is_empty() {
554 break;
555 }
556 let mut next = Vec::new();
557
558 for cur_id in ¤t {
559 let mut stmt = self.conn.prepare(
560 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
561 FROM nodes n
562 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
563 LIMIT 100",
564 )?;
565 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
566 Ok(Node {
567 id: row.get(0)?,
568 kind: row.get(1)?,
569 name: row.get(2)?,
570 path: row.get(3)?,
571 line_start: row.get(4)?,
572 line_end: row.get(5)?,
573 language: row.get(6)?,
574 churn: row.get(7)?,
575 coupling: row.get(8)?,
576 community: row.get(9)?,
577 in_degree: row.get(10)?,
578 out_degree: row.get(11)?,
579 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
580 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
581 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
582 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
583 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
584 test_count: row.get::<_, i64>(17).unwrap_or(0),
585 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
586 })
587 })?;
588
589 for row in rows {
590 let node = row?;
591 if seen.insert(node.id.clone()) {
592 next.push(node.id.clone());
593 result.push(node);
594 }
595 }
596 }
597 current = next;
598 }
599
600 Ok(result)
601 }
602
603 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
605 let mut stmt = self.conn.prepare(
606 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes",
607 )?;
608 let rows = stmt.query_map([], |row| {
609 Ok(Node {
610 id: row.get(0)?,
611 kind: row.get(1)?,
612 name: row.get(2)?,
613 path: row.get(3)?,
614 line_start: row.get(4)?,
615 line_end: row.get(5)?,
616 language: row.get(6)?,
617 churn: row.get(7)?,
618 coupling: row.get(8)?,
619 community: row.get(9)?,
620 in_degree: row.get(10)?,
621 out_degree: row.get(11)?,
622 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
623 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
624 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
625 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
626 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
627 test_count: row.get::<_, i64>(17).unwrap_or(0),
628 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
629 })
630 })?;
631
632 let mut nodes = Vec::new();
633 for row in rows {
634 nodes.push(row?);
635 }
636 Ok(nodes)
637 }
638
639 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
641 let mut stmt = self
642 .conn
643 .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
644 let rows = stmt.query_map([], |row| {
645 Ok(Edge {
646 id: row.get(0)?,
647 src: row.get(1)?,
648 dst: row.get(2)?,
649 kind: row.get(3)?,
650 weight: row.get(4)?,
651 confidence: row.get(5)?,
652 })
653 })?;
654
655 let mut edges = Vec::new();
656 for row in rows {
657 edges.push(row?);
658 }
659 Ok(edges)
660 }
661
662 pub fn node_count(&self) -> anyhow::Result<u64> {
664 let count: i64 = self
665 .conn
666 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
667 Ok(count as u64)
668 }
669
670 pub fn edge_count(&self) -> anyhow::Result<u64> {
672 let count: i64 = self
673 .conn
674 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
675 Ok(count as u64)
676 }
677
678 pub fn clear(&self) -> anyhow::Result<()> {
680 self.conn.execute_batch(
683 "TRUNCATE TABLE edges;
684 TRUNCATE TABLE nodes;
685 TRUNCATE TABLE communities;",
686 )?;
687 Ok(())
688 }
689
690 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
692 let mut stmt = self.conn.prepare(
693 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
694 )?;
695 let rows = stmt.query_map([], |row| {
696 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
697 })?;
698
699 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
700 for row in rows {
701 let (lang, cnt) = row?;
702 *counts.entry(lang).or_default() += cnt;
703 }
704
705 let total: i64 = counts.values().sum();
706 if total == 0 {
707 return Ok(std::collections::HashMap::new());
708 }
709
710 let mut breakdown = std::collections::HashMap::new();
711 for (lang, cnt) in counts {
712 breakdown.insert(lang, cnt as f64 / total as f64);
713 }
714 Ok(breakdown)
715 }
716
717 pub fn get_node_counts_by_kind(
719 &self,
720 ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
721 let mut stmt = self
722 .conn
723 .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
724 let rows = stmt.query_map([], |row| {
725 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
726 })?;
727
728 let mut counts = std::collections::HashMap::new();
729 for row in rows {
730 let (kind, cnt) = row?;
731 counts.insert(kind, cnt as u64);
732 }
733 Ok(counts)
734 }
735
736 pub fn upsert_node_scores(
738 &self,
739 node_id: &str,
740 churn: f64,
741 coupling: f64,
742 ) -> anyhow::Result<()> {
743 self.conn.execute(
744 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
745 params![churn, coupling, node_id],
746 )?;
747 Ok(())
748 }
749
750 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
752 self.conn.execute_batch(
753 "UPDATE nodes SET in_degree = 0, out_degree = 0;
754 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
755 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
756 )?;
757 Ok(())
758 }
759
760 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
764 let mut stmt = self.conn.prepare(
765 "SELECT path, churn, coupling, in_degree
766 FROM nodes
767 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
768 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
769 LIMIT ?",
770 )?;
771 let rows = stmt.query_map(params![limit as i64], |row| {
772 Ok((
773 row.get::<_, String>(0)?,
774 row.get::<_, f64>(1)?,
775 row.get::<_, f64>(2)?,
776 row.get::<_, i64>(3)?,
777 ))
778 })?;
779 let mut results = Vec::new();
780 for row in rows {
781 results.push(row?);
782 }
783 Ok(results)
784 }
785
786 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
788 let mut stmt = self.conn.prepare(
789 "SELECT n.name, COUNT(e.id) as file_count
790 FROM nodes n
791 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
792 WHERE n.kind = 'Author'
793 GROUP BY n.name
794 ORDER BY file_count DESC",
795 )?;
796 let rows = stmt.query_map([], |row| {
797 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
798 })?;
799 let mut results = Vec::new();
800 for row in rows {
801 results.push(row?);
802 }
803 Ok(results)
804 }
805
806 pub fn compute_coupling(&self) -> anyhow::Result<()> {
808 self.conn.execute_batch(
809 "UPDATE nodes SET coupling = 0.0;
810 UPDATE nodes SET coupling =
811 CASE
812 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
813 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
814 ELSE 0.0
815 END
816 WHERE kind = 'File';",
817 )?;
818 Ok(())
819 }
820
821 pub fn update_node_communities(
823 &self,
824 communities: &std::collections::HashMap<String, i64>,
825 ) -> anyhow::Result<usize> {
826 if communities.is_empty() {
827 return Ok(0);
828 }
829 let mut count = 0;
830 let mut stmt = self
831 .conn
832 .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
833 for (node_id, community) in communities {
834 let affected = stmt.execute(params![*community, node_id.as_str()])?;
835 count += affected;
836 }
837 Ok(count)
838 }
839
840 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
842 let node_count = self.node_count()?;
843 let edge_count = self.edge_count()?;
844 let lang_breakdown = self.get_language_breakdown()?;
845 let communities = self.get_communities()?;
846 let counts_by_kind = self.get_node_counts_by_kind()?;
847
848 Ok(RepoStats {
849 node_count,
850 edge_count,
851 language_breakdown: lang_breakdown,
852 community_count: communities.len() as u32,
853 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
854 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
855 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
856 })
857 }
858
859 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
861 let mut stmt = self.conn.prepare(
862 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
863 FROM nodes
864 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
865 ORDER BY out_degree DESC
866 LIMIT ?",
867 )?;
868 let rows = stmt.query_map(params![limit as i64], |row| {
869 Ok(Node {
870 id: row.get(0)?,
871 kind: row.get(1)?,
872 name: row.get(2)?,
873 path: row.get(3)?,
874 line_start: row.get(4)?,
875 line_end: row.get(5)?,
876 language: row.get(6)?,
877 churn: row.get(7)?,
878 coupling: row.get(8)?,
879 community: row.get(9)?,
880 in_degree: row.get(10)?,
881 out_degree: row.get(11)?,
882 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
883 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
884 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
885 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
886 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
887 test_count: row.get::<_, i64>(17).unwrap_or(0),
888 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
889 })
890 })?;
891 let mut results = Vec::new();
892 for row in rows {
893 results.push(row?);
894 }
895 Ok(results)
896 }
897
898 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
900 let mut stmt = self.conn.prepare(
901 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
902 FROM nodes
903 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
904 ORDER BY in_degree DESC
905 LIMIT ?",
906 )?;
907 let rows = stmt.query_map(params![limit as i64], |row| {
908 Ok(Node {
909 id: row.get(0)?,
910 kind: row.get(1)?,
911 name: row.get(2)?,
912 path: row.get(3)?,
913 line_start: row.get(4)?,
914 line_end: row.get(5)?,
915 language: row.get(6)?,
916 churn: row.get(7)?,
917 coupling: row.get(8)?,
918 community: row.get(9)?,
919 in_degree: row.get(10)?,
920 out_degree: row.get(11)?,
921 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
922 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
923 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
924 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
925 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
926 test_count: row.get::<_, i64>(17).unwrap_or(0),
927 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
928 })
929 })?;
930 let mut results = Vec::new();
931 for row in rows {
932 results.push(row?);
933 }
934 Ok(results)
935 }
936
937 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
939 let mut stmt = self.conn.prepare(
940 "SELECT community, kind, name, path, in_degree
941 FROM nodes
942 WHERE community > 0
943 ORDER BY community",
944 )?;
945 let rows = stmt.query_map([], |row| {
946 Ok((
947 row.get::<_, i64>(0)?,
948 row.get::<_, String>(1)?,
949 row.get::<_, String>(2)?,
950 row.get::<_, String>(3)?,
951 row.get::<_, i64>(4)?,
952 ))
953 })?;
954
955 let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
956 std::collections::HashMap::new();
957 for row in rows {
958 let (community, kind, name, _path, in_degree) = row?;
959 let entry = community_map
960 .entry(community)
961 .or_insert_with(|| (Vec::new(), 0));
962 entry.0.push((kind, in_degree, name));
963 entry.1 += 1;
964 }
965
966 let mut result: Vec<CommunityRow> = community_map
967 .into_iter()
968 .map(|(community, (mut items, count))| {
969 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
970 let top_nodes: Vec<String> = items
971 .iter()
972 .take(5)
973 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
974 .collect();
975 let label = top_nodes
976 .first()
977 .cloned()
978 .unwrap_or_else(|| format!("community-{}", community));
979 (community, label, count, top_nodes)
980 })
981 .collect();
982
983 result.sort_by_key(|row| std::cmp::Reverse(row.2));
984 Ok(result)
985 }
986
987 pub fn clear_communities(&self) -> anyhow::Result<()> {
989 self.conn.execute("UPDATE nodes SET community = 0", [])?;
990 self.conn.execute("DELETE FROM communities", [])?;
991 Ok(())
992 }
993
994 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
997 let mut seen = std::collections::HashSet::new();
998 seen.insert(id.to_string());
999 let mut current = vec![id.to_string()];
1000 let mut result: Vec<Node> = Vec::new();
1001 let max_depth = depth.min(3);
1002
1003 for _ in 0..max_depth {
1004 if current.is_empty() {
1005 break;
1006 }
1007 let mut next = Vec::new();
1008 for cur_id in ¤t {
1009 let mut stmt = self.conn.prepare(
1010 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
1011 FROM nodes n
1012 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
1013 LIMIT 100",
1014 )?;
1015 let rows = stmt.query_map(params![cur_id], |row| {
1016 Ok(Node {
1017 id: row.get(0)?,
1018 kind: row.get(1)?,
1019 name: row.get(2)?,
1020 path: row.get(3)?,
1021 line_start: row.get(4)?,
1022 line_end: row.get(5)?,
1023 language: row.get(6)?,
1024 churn: row.get(7)?,
1025 coupling: row.get(8)?,
1026 community: row.get(9)?,
1027 in_degree: row.get(10)?,
1028 out_degree: row.get(11)?,
1029 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1030 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1031 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1032 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1033 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1034 test_count: row.get::<_, i64>(17).unwrap_or(0),
1035 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1036 })
1037 })?;
1038 for row in rows {
1039 let node = row?;
1040 if seen.insert(node.id.clone()) {
1041 next.push(node.id.clone());
1042 result.push(node);
1043 }
1044 }
1045 }
1046 current = next;
1047 }
1048
1049 Ok(result)
1050 }
1051
1052 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
1054 let mut stmt = self.conn.prepare(
1055 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE community = ?",
1056 )?;
1057 let rows = stmt.query_map(params![community], |row| {
1058 Ok(Node {
1059 id: row.get(0)?,
1060 kind: row.get(1)?,
1061 name: row.get(2)?,
1062 path: row.get(3)?,
1063 line_start: row.get(4)?,
1064 line_end: row.get(5)?,
1065 language: row.get(6)?,
1066 churn: row.get(7)?,
1067 coupling: row.get(8)?,
1068 community: row.get(9)?,
1069 in_degree: row.get(10)?,
1070 out_degree: row.get(11)?,
1071 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1072 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1073 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1074 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1075 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1076 test_count: row.get::<_, i64>(17).unwrap_or(0),
1077 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1078 })
1079 })?;
1080 let mut nodes = Vec::new();
1081 for row in rows {
1082 nodes.push(row?);
1083 }
1084 Ok(nodes)
1085 }
1086
1087 pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
1089 if items.is_empty() {
1091 return Ok(());
1092 }
1093 let mut stmt = self
1094 .conn
1095 .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
1096 for (id, reason) in items {
1097 stmt.execute(params![reason, id])?;
1098 }
1099 Ok(())
1100 }
1101
1102 pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
1104 let total: i64 = self
1106 .conn
1107 .query_row(
1108 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
1109 [],
1110 |r| r.get(0),
1111 )
1112 .unwrap_or(0);
1113 let high: i64 = self.conn.query_row(
1115 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
1116 ).unwrap_or(0);
1117 Ok((total, high))
1118 }
1119
1120 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
1122 let mut stmt = self.conn.prepare(
1123 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
1124 FROM edges e
1125 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
1126 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
1127 )?;
1128 let rows = stmt.query_map(params![community, community], |row| {
1129 Ok(Edge {
1130 id: row.get(0)?,
1131 src: row.get(1)?,
1132 dst: row.get(2)?,
1133 kind: row.get(3)?,
1134 weight: row.get(4)?,
1135 confidence: row.get(5)?,
1136 })
1137 })?;
1138 let mut edges = Vec::new();
1139 for row in rows {
1140 edges.push(row?);
1141 }
1142 Ok(edges)
1143 }
1144
1145 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
1149 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
1150 let rows = stmt.query_map([], |row| {
1151 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1152 })?;
1153 let mut result = std::collections::HashMap::new();
1154 for row in rows {
1155 let (path, hash) = row?;
1156 result.insert(path, hash);
1157 }
1158 Ok(result)
1159 }
1160
1161 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
1163 self.conn.execute(
1164 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
1165 params![path, hash],
1166 )?;
1167 Ok(())
1168 }
1169
1170 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
1172 if paths.is_empty() {
1173 return Ok(());
1174 }
1175 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1176 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1177 let mut stmt = self.conn.prepare(&sql)?;
1178 let params: Vec<&dyn duckdb::ToSql> =
1179 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1180 stmt.execute(params.as_slice())?;
1181 Ok(())
1182 }
1183
1184 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1188 if paths.is_empty() {
1189 return Ok(0);
1190 }
1191 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1192 let sql_edges = format!(
1194 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1195 placeholders, placeholders
1196 );
1197 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1198 let params_edges: Vec<&dyn duckdb::ToSql> = paths
1199 .iter()
1200 .chain(paths.iter())
1201 .map(|p| p as &dyn duckdb::ToSql)
1202 .collect();
1203 stmt_edges.execute(params_edges.as_slice())?;
1204
1205 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1207 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1208 let params_nodes: Vec<&dyn duckdb::ToSql> =
1209 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1210 let count = stmt_nodes.execute(params_nodes.as_slice())?;
1211 Ok(count)
1212 }
1213
1214 pub fn update_node_doc_comment(&self, id: &str, doc: &str) -> anyhow::Result<()> {
1216 self.conn.execute(
1217 "UPDATE nodes SET doc_comment = ? WHERE id = ?",
1218 params![doc, id],
1219 )?;
1220 Ok(())
1221 }
1222
1223 pub fn update_node_complexity(&self, id: &str, complexity: f64) -> anyhow::Result<()> {
1225 self.conn.execute(
1226 "UPDATE nodes SET complexity = ? WHERE id = ?",
1227 params![complexity, id],
1228 )?;
1229 Ok(())
1230 }
1231
1232 pub fn get_nodes_by_complexity(
1234 &self,
1235 limit: usize,
1236 min_score: f64,
1237 ) -> anyhow::Result<Vec<Node>> {
1238 let mut stmt = self.conn.prepare(
1239 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1240 FROM nodes
1241 WHERE kind = 'Function' AND COALESCE(complexity, 0.0) >= ?
1242 ORDER BY complexity DESC
1243 LIMIT ?",
1244 )?;
1245 let rows = stmt.query_map(params![min_score, limit as i64], |row| {
1246 Ok(Node {
1247 id: row.get(0)?,
1248 kind: row.get(1)?,
1249 name: row.get(2)?,
1250 path: row.get(3)?,
1251 line_start: row.get(4)?,
1252 line_end: row.get(5)?,
1253 language: row.get(6)?,
1254 churn: row.get(7)?,
1255 coupling: row.get(8)?,
1256 community: row.get(9)?,
1257 in_degree: row.get(10)?,
1258 out_degree: row.get(11)?,
1259 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1260 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1261 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1262 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1263 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1264 test_count: row.get::<_, i64>(17).unwrap_or(0),
1265 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1266 })
1267 })?;
1268 let mut results = Vec::new();
1269 for row in rows {
1270 results.push(row?);
1271 }
1272 Ok(results)
1273 }
1274
1275 pub fn get_docs_coverage(&self) -> anyhow::Result<DocsCoverage> {
1277 let overall: f64 = self
1278 .conn
1279 .query_row(
1280 "SELECT COALESCE(
1281 CAST(SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) AS DOUBLE)
1282 / NULLIF(CAST(COUNT(*) AS DOUBLE), 0.0) * 100.0,
1283 0.0)
1284 FROM nodes WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'",
1285 [],
1286 |r| r.get(0),
1287 )
1288 .unwrap_or(0.0);
1289
1290 let mut by_community = Vec::new();
1291 let mut stmt = self.conn.prepare(
1292 "SELECT community,
1293 SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) as documented,
1294 COUNT(*) as total
1295 FROM nodes
1296 WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'
1297 GROUP BY community
1298 ORDER BY community",
1299 )?;
1300 let comm_rows = stmt.query_map([], |row| {
1301 Ok((
1302 row.get::<_, i64>(0)?,
1303 row.get::<_, i64>(1)?,
1304 row.get::<_, i64>(2)?,
1305 ))
1306 })?;
1307 for row in comm_rows {
1308 by_community.push(row?);
1309 }
1310
1311 let mut undoc_stmt = self.conn.prepare(
1312 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1313 FROM nodes
1314 WHERE kind = 'Function' AND (doc_comment IS NULL OR doc_comment = '')
1315 ORDER BY in_degree DESC
1316 LIMIT 10",
1317 )?;
1318 let undoc_rows = undoc_stmt.query_map([], |row| {
1319 Ok(Node {
1320 id: row.get(0)?,
1321 kind: row.get(1)?,
1322 name: row.get(2)?,
1323 path: row.get(3)?,
1324 line_start: row.get(4)?,
1325 line_end: row.get(5)?,
1326 language: row.get(6)?,
1327 churn: row.get(7)?,
1328 coupling: row.get(8)?,
1329 community: row.get(9)?,
1330 in_degree: row.get(10)?,
1331 out_degree: row.get(11)?,
1332 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1333 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1334 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1335 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1336 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1337 test_count: row.get::<_, i64>(17).unwrap_or(0),
1338 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1339 })
1340 })?;
1341 let mut undocumented = Vec::new();
1342 for row in undoc_rows {
1343 undocumented.push(row?);
1344 }
1345
1346 Ok((overall, by_community, undocumented))
1347 }
1348
1349 pub fn upsert_clones(&self, clones: &[CloneRow]) -> anyhow::Result<usize> {
1351 if clones.is_empty() {
1352 return Ok(0);
1353 }
1354 let mut count = 0;
1355 let mut stmt = self.conn.prepare(
1356 "INSERT OR REPLACE INTO clones (id, node_a, node_b, similarity, kind) VALUES (?, ?, ?, ?, ?)",
1357 )?;
1358 for c in clones {
1359 stmt.execute(params![c.id, c.node_a, c.node_b, c.similarity, c.kind])?;
1360 count += 1;
1361 }
1362 Ok(count)
1363 }
1364
1365 pub fn get_clones(
1367 &self,
1368 min_similarity: f64,
1369 kind_filter: Option<&str>,
1370 ) -> anyhow::Result<Vec<CloneRow>> {
1371 let (sql, use_kind) = if kind_filter.is_some() {
1372 (
1373 "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? AND kind = ? ORDER BY similarity DESC",
1374 true,
1375 )
1376 } else {
1377 (
1378 "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? ORDER BY similarity DESC",
1379 false,
1380 )
1381 };
1382
1383 let mut stmt = self.conn.prepare(sql)?;
1384 let map_row = |row: &duckdb::Row| {
1385 Ok(CloneRow {
1386 id: row.get(0)?,
1387 node_a: row.get(1)?,
1388 node_b: row.get(2)?,
1389 similarity: row.get::<_, f32>(3)? as f64,
1390 kind: row.get(4)?,
1391 })
1392 };
1393
1394 let rows = if use_kind {
1395 stmt.query_map(params![min_similarity, kind_filter.unwrap_or("")], map_row)?
1396 } else {
1397 stmt.query_map(params![min_similarity], map_row)?
1398 };
1399
1400 let mut results = Vec::new();
1401 for row in rows {
1402 results.push(row?);
1403 }
1404 Ok(results)
1405 }
1406
1407 pub fn clear_clones(&self) -> anyhow::Result<()> {
1409 self.conn.execute("DELETE FROM clones", [])?;
1410 Ok(())
1411 }
1412
1413 pub fn mark_test_files(&self, paths: &[String]) -> anyhow::Result<()> {
1415 if paths.is_empty() {
1416 return Ok(());
1417 }
1418 let mut stmt = self
1419 .conn
1420 .prepare("UPDATE nodes SET is_test_file = 1 WHERE path = ?")?;
1421 for path in paths {
1422 stmt.execute(params![path])?;
1423 }
1424 Ok(())
1425 }
1426
1427 pub fn update_test_coverage(&self) -> anyhow::Result<()> {
1429 self.conn.execute_batch(
1430 "UPDATE nodes SET test_count = (
1431 SELECT COUNT(*) FROM edges
1432 WHERE edges.dst = nodes.id AND edges.kind = 'TESTS'
1433 );
1434 UPDATE nodes SET is_tested = (test_count > 0)
1435 WHERE is_test_file = 0;",
1436 )?;
1437 Ok(())
1438 }
1439
1440 pub fn get_test_coverage_summary(
1442 &self,
1443 top_n: usize,
1444 ) -> anyhow::Result<(f64, i64, i64, Vec<Node>)> {
1445 let tested: i64 = self
1446 .conn
1447 .query_row(
1448 "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0 AND is_tested = 1",
1449 [],
1450 |r| r.get(0),
1451 )
1452 .unwrap_or(0);
1453 let total: i64 = self
1454 .conn
1455 .query_row(
1456 "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0",
1457 [],
1458 |r| r.get(0),
1459 )
1460 .unwrap_or(0);
1461
1462 let overall_pct = if total > 0 {
1463 (tested as f64 / total as f64) * 100.0
1464 } else {
1465 0.0
1466 };
1467
1468 let mut gap_stmt = self.conn.prepare(
1469 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1470 FROM nodes
1471 WHERE kind IN ('Function','Class') AND is_test_file = 0 AND COALESCE(is_tested, 0) = 0
1472 ORDER BY (churn * CAST(in_degree AS DOUBLE) + CAST(in_degree AS DOUBLE) * 0.5) DESC
1473 LIMIT ?",
1474 )?;
1475 let gap_rows = gap_stmt.query_map(params![top_n as i64], |row| {
1476 Ok(Node {
1477 id: row.get(0)?,
1478 kind: row.get(1)?,
1479 name: row.get(2)?,
1480 path: row.get(3)?,
1481 line_start: row.get(4)?,
1482 line_end: row.get(5)?,
1483 language: row.get(6)?,
1484 churn: row.get(7)?,
1485 coupling: row.get(8)?,
1486 community: row.get(9)?,
1487 in_degree: row.get(10)?,
1488 out_degree: row.get(11)?,
1489 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1490 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1491 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1492 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1493 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1494 test_count: row.get::<_, i64>(17).unwrap_or(0),
1495 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1496 })
1497 })?;
1498 let mut gaps = Vec::new();
1499 for row in gap_rows {
1500 gaps.push(row?);
1501 }
1502
1503 Ok((overall_pct, tested, total - tested, gaps))
1504 }
1505
1506 pub fn upsert_snapshot(&self, entry: &SnapshotEntry) -> anyhow::Result<()> {
1508 self.conn.execute(
1509 "INSERT OR REPLACE INTO snapshots (id, commit_sha, commit_date, commit_msg, node_count, edge_count, snapshot_data)
1510 VALUES (?, ?, ?, ?, ?, ?, ?)",
1511 params![
1512 entry.id,
1513 entry.commit_sha,
1514 entry.commit_date,
1515 entry.commit_msg,
1516 entry.node_count,
1517 entry.edge_count,
1518 entry.snapshot_data,
1519 ],
1520 )?;
1521 Ok(())
1522 }
1523
1524 pub fn get_snapshots(&self, limit: usize) -> anyhow::Result<Vec<SnapshotEntry>> {
1526 let mut stmt = self.conn.prepare(
1527 "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1528 FROM snapshots ORDER BY commit_date DESC LIMIT ?",
1529 )?;
1530 let rows = stmt.query_map(params![limit as i64], |row| {
1531 Ok(SnapshotEntry {
1532 id: row.get(0)?,
1533 commit_sha: row.get(1)?,
1534 commit_date: row.get(2)?,
1535 commit_msg: row.get(3)?,
1536 node_count: row.get(4)?,
1537 edge_count: row.get(5)?,
1538 snapshot_data: row.get(6)?,
1539 })
1540 })?;
1541 let mut result = Vec::new();
1542 for row in rows {
1543 result.push(row?);
1544 }
1545 Ok(result)
1546 }
1547
1548 pub fn get_snapshot_by_sha(&self, sha: &str) -> anyhow::Result<Option<SnapshotEntry>> {
1550 let mut stmt = self.conn.prepare(
1551 "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1552 FROM snapshots WHERE commit_sha = ? OR commit_sha LIKE ? LIMIT 1",
1553 )?;
1554 let prefix = format!("{}%", sha);
1555 let mut rows = stmt.query_map(params![sha, prefix], |row| {
1556 Ok(SnapshotEntry {
1557 id: row.get(0)?,
1558 commit_sha: row.get(1)?,
1559 commit_date: row.get(2)?,
1560 commit_msg: row.get(3)?,
1561 node_count: row.get(4)?,
1562 edge_count: row.get(5)?,
1563 snapshot_data: row.get(6)?,
1564 })
1565 })?;
1566 match rows.next() {
1567 Some(Ok(entry)) => Ok(Some(entry)),
1568 _ => Ok(None),
1569 }
1570 }
1571
1572 pub fn snapshot_count(&self) -> i64 {
1574 self.conn
1575 .query_row("SELECT COUNT(*) FROM snapshots", [], |r| r.get(0))
1576 .unwrap_or(0)
1577 }
1578}
1579
1580pub fn repo_hash(path: &Path) -> String {
1581 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1582 let path_str = canonical.to_string_lossy().to_string();
1583 let mut hasher = Sha256::new();
1584 hasher.update(path_str.as_bytes());
1585 format!("{:x}", hasher.finalize())[..16].to_string()
1586}