1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11 pub id: String,
12 pub kind: String,
13 pub name: String,
14 pub path: String,
15 pub line_start: u32,
16 pub line_end: u32,
17 #[serde(default)]
18 pub language: String,
19 #[serde(default)]
20 pub churn: f64,
21 #[serde(default)]
22 pub coupling: f64,
23 #[serde(default)]
24 pub community: i64,
25 #[serde(default)]
26 pub in_degree: i64,
27 #[serde(default)]
28 pub out_degree: i64,
29 #[serde(default)]
30 pub exported: bool,
31 #[serde(default)]
32 pub is_dead_candidate: bool,
33 #[serde(default)]
34 pub dead_reason: Option<String>,
35 #[serde(default)]
36 pub complexity: f64,
37 #[serde(default)]
38 pub is_test_file: bool,
39 #[serde(default)]
40 pub test_count: i64,
41 #[serde(default)]
42 pub is_tested: bool,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct Edge {
47 pub id: String,
48 pub src: String,
49 pub dst: String,
50 pub kind: String,
51 #[serde(default = "default_weight")]
52 pub weight: f64,
53 #[serde(default = "default_weight")]
54 pub confidence: f64,
55}
56
57fn default_weight() -> f64 {
58 1.0
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct RepoStats {
63 pub node_count: u64,
64 pub edge_count: u64,
65 pub language_breakdown: std::collections::HashMap<String, f64>,
66 pub community_count: u32,
67 pub function_count: u64,
68 pub class_count: u64,
69 pub file_count: u64,
70}
71
72pub type CommunityRow = (i64, String, i64, Vec<String>);
73pub type DocsCoverage = (f64, Vec<(i64, i64, i64)>, Vec<Node>);
75pub type TestCoverageSummary = (f64, i64, i64, Vec<Node>);
77type CommunityGroup = (Vec<(String, i64, String)>, i64); #[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct SnapshotEntry {
81 pub id: String,
82 pub commit_sha: String,
83 pub commit_date: String,
84 pub commit_msg: String,
85 pub node_count: i64,
86 pub edge_count: i64,
87 pub snapshot_data: Option<String>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct TagRow {
93 pub id: String,
94 pub file_path: String,
95 pub line: u32,
96 pub tag_type: String,
97 pub text: String,
98 pub comment_type: String,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct CloneRow {
104 pub id: String,
105 pub node_a: String,
106 pub node_b: String,
107 pub similarity: f64,
108 pub kind: String,
109}
110
111impl Default for Node {
112 fn default() -> Self {
113 Self {
114 id: String::new(),
115 kind: String::new(),
116 name: String::new(),
117 path: String::new(),
118 line_start: 0,
119 line_end: 0,
120 language: String::new(),
121 churn: 0.0,
122 coupling: 0.0,
123 community: 0,
124 in_degree: 0,
125 out_degree: 0,
126 exported: false,
127 is_dead_candidate: false,
128 dead_reason: None,
129 complexity: 0.0,
130 is_test_file: false,
131 test_count: 0,
132 is_tested: false,
133 }
134 }
135}
136
137impl Node {
138 pub fn from_def(d: &NodeDef, language: &str) -> Self {
139 let exported = d
140 .metadata
141 .get("exported")
142 .and_then(|v| v.as_bool())
143 .unwrap_or(false);
144 let complexity = d
145 .metadata
146 .get("complexity")
147 .and_then(|v| v.as_f64())
148 .unwrap_or(0.0);
149 Self {
150 id: d.id.clone(),
151 kind: d.kind.as_str().to_string(),
152 name: d.name.clone(),
153 path: d.path.clone(),
154 line_start: d.line_start,
155 line_end: d.line_end,
156 language: language.to_string(),
157 churn: 0.0,
158 coupling: 0.0,
159 community: 0,
160 in_degree: 0,
161 out_degree: 0,
162 exported,
163 is_dead_candidate: false,
164 dead_reason: None,
165 complexity,
166 is_test_file: false,
167 test_count: 0,
168 is_tested: false,
169 }
170 }
171}
172
173impl Edge {
174 pub fn from_def(d: &EdgeDef) -> Self {
175 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
176 Self {
177 id,
178 src: d.src.clone(),
179 dst: d.dst.clone(),
180 kind: d.kind.as_str().to_string(),
181 weight: d.weight,
182 confidence: d.confidence,
183 }
184 }
185}
186
187pub struct GraphDb {
188 pub conn: duckdb::Connection,
189 pub repo_id: String,
190 pub db_path: PathBuf,
191}
192
193impl GraphDb {
194 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
195 let repo_id = repo_hash(repo_path);
196 let dir = dirs::home_dir()
197 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
198 .join(".cgx")
199 .join("repos");
200 std::fs::create_dir_all(&dir)?;
201
202 let db_path = dir.join(format!("{}.db", repo_id));
203 let conn = duckdb::Connection::open(&db_path)?;
204
205 conn.execute_batch(
206 "CREATE TABLE IF NOT EXISTS nodes (
207 id VARCHAR PRIMARY KEY,
208 kind VARCHAR NOT NULL,
209 name VARCHAR NOT NULL,
210 path VARCHAR NOT NULL,
211 line_start INTEGER,
212 line_end INTEGER,
213 language VARCHAR,
214 churn DOUBLE DEFAULT 0.0,
215 coupling DOUBLE DEFAULT 0.0,
216 community BIGINT DEFAULT 0,
217 in_degree BIGINT DEFAULT 0,
218 out_degree BIGINT DEFAULT 0,
219 exported TINYINT DEFAULT 0,
220 is_dead_candidate TINYINT DEFAULT 0,
221 dead_reason TEXT,
222 metadata JSON
223 );
224 CREATE TABLE IF NOT EXISTS edges (
225 id VARCHAR PRIMARY KEY,
226 src VARCHAR NOT NULL,
227 dst VARCHAR NOT NULL,
228 kind VARCHAR NOT NULL,
229 weight DOUBLE DEFAULT 1.0,
230 confidence DOUBLE DEFAULT 1.0,
231 metadata JSON
232 );
233 CREATE TABLE IF NOT EXISTS communities (
234 id INTEGER PRIMARY KEY,
235 label VARCHAR,
236 node_count INTEGER,
237 top_nodes JSON
238 );
239 CREATE TABLE IF NOT EXISTS repo_meta (
240 key VARCHAR PRIMARY KEY,
241 value JSON
242 );
243 CREATE TABLE IF NOT EXISTS file_hashes (
244 path VARCHAR PRIMARY KEY,
245 hash VARCHAR NOT NULL,
246 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
247 );
248 CREATE TABLE IF NOT EXISTS tags (
249 id VARCHAR PRIMARY KEY,
250 file_path VARCHAR NOT NULL,
251 line INTEGER NOT NULL,
252 tag_type VARCHAR NOT NULL,
253 text VARCHAR NOT NULL,
254 comment_type VARCHAR NOT NULL DEFAULT 'code'
255 );
256 CREATE TABLE IF NOT EXISTS clones (
257 id VARCHAR PRIMARY KEY,
258 node_a VARCHAR NOT NULL,
259 node_b VARCHAR NOT NULL,
260 similarity FLOAT NOT NULL,
261 kind VARCHAR NOT NULL
262 );
263 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
264 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
265 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
266 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
267 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
268 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
269 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
270 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);
271 CREATE INDEX IF NOT EXISTS idx_clones_a ON clones(node_a);
272 CREATE INDEX IF NOT EXISTS idx_clones_b ON clones(node_b);",
273 )?;
274
275 conn.execute_batch(
279 "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported TINYINT DEFAULT 0;
280 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate TINYINT DEFAULT 0;
281 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason TEXT;
282 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS complexity DOUBLE DEFAULT 0.0;
283 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS doc_comment TEXT;
284 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_test_file TINYINT DEFAULT 0;
285 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS test_count INTEGER DEFAULT 0;
286 ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_tested TINYINT DEFAULT 0;
287 CREATE INDEX IF NOT EXISTS idx_nodes_dead ON nodes(is_dead_candidate);
288 CREATE INDEX IF NOT EXISTS idx_nodes_complexity ON nodes(complexity);
289 CREATE INDEX IF NOT EXISTS idx_nodes_is_tested ON nodes(is_tested);",
290 )?;
291
292 conn.execute_batch(
293 "CREATE TABLE IF NOT EXISTS snapshots (
294 id VARCHAR PRIMARY KEY,
295 commit_sha VARCHAR NOT NULL,
296 commit_date TEXT NOT NULL,
297 commit_msg VARCHAR,
298 node_count INTEGER,
299 edge_count INTEGER,
300 snapshot_data TEXT
301 );
302 CREATE INDEX IF NOT EXISTS idx_snapshots_date ON snapshots(commit_date);",
303 )?;
304
305 Ok(Self {
306 conn,
307 repo_id,
308 db_path,
309 })
310 }
311
312 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
313 if nodes.is_empty() {
314 return Ok(0);
315 }
316 let mut count = 0;
317 let mut stmt = self.conn.prepare(
318 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported, complexity, is_test_file, test_count, is_tested)
319 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
320 )?;
321 for node in nodes {
322 stmt.execute(params![
323 node.id,
324 node.kind,
325 node.name,
326 node.path,
327 node.line_start,
328 node.line_end,
329 node.language,
330 node.churn,
331 node.coupling,
332 node.community,
333 node.in_degree,
334 node.out_degree,
335 node.exported as i32,
336 node.complexity,
337 node.is_test_file as i32,
338 node.test_count,
339 node.is_tested as i32,
340 ])?;
341 count += 1;
342 }
343 Ok(count)
344 }
345
346 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
347 if edges.is_empty() {
348 return Ok(0);
349 }
350 let mut count = 0;
351 let mut stmt = self.conn.prepare(
352 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
353 VALUES (?, ?, ?, ?, ?, ?)",
354 )?;
355 for edge in edges {
356 stmt.execute(params![
357 edge.id,
358 edge.src,
359 edge.dst,
360 edge.kind,
361 edge.weight,
362 edge.confidence,
363 ])?;
364 count += 1;
365 }
366 Ok(count)
367 }
368
369 pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
370 if tags.is_empty() {
371 return Ok(0);
372 }
373 let mut count = 0;
374 let mut stmt = self.conn.prepare(
375 "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
376 VALUES (?, ?, ?, ?, ?, ?)",
377 )?;
378 for tag in tags {
379 stmt.execute(params![
380 tag.id,
381 tag.file_path,
382 tag.line,
383 tag.tag_type,
384 tag.text,
385 tag.comment_type,
386 ])?;
387 count += 1;
388 }
389 Ok(count)
390 }
391
392 pub fn get_tags(
393 &self,
394 tag_type_filter: Option<&str>,
395 comment_type_filter: Option<&str>,
396 ) -> anyhow::Result<Vec<TagRow>> {
397 let sql = match (tag_type_filter, comment_type_filter) {
398 (Some(_), Some(_)) => {
399 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
400 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
401 }
402 (Some(_), None) => {
403 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
404 WHERE tag_type = ? ORDER BY file_path, line"
405 }
406 (None, Some(_)) => {
407 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
408 WHERE comment_type = ? ORDER BY file_path, line"
409 }
410 (None, None) => {
411 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
412 ORDER BY file_path, line"
413 }
414 };
415
416 let mut stmt = self.conn.prepare(sql)?;
417 let map_row = |row: &duckdb::Row| {
418 Ok(TagRow {
419 id: row.get(0)?,
420 file_path: row.get(1)?,
421 line: row.get::<_, u32>(2)?,
422 tag_type: row.get(3)?,
423 text: row.get(4)?,
424 comment_type: row.get(5)?,
425 })
426 };
427
428 let rows = match (tag_type_filter, comment_type_filter) {
429 (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
430 (Some(t), None) => stmt.query_map(params![t], map_row)?,
431 (None, Some(c)) => stmt.query_map(params![c], map_row)?,
432 (None, None) => stmt.query_map([], map_row)?,
433 };
434
435 let mut results = Vec::new();
436 for row in rows {
437 results.push(row?);
438 }
439 Ok(results)
440 }
441
442 pub fn clear_all_tags(&self) -> anyhow::Result<()> {
443 self.conn.execute_batch(
444 "DROP TABLE IF EXISTS tags;
445 CREATE TABLE IF NOT EXISTS tags (
446 id VARCHAR PRIMARY KEY,
447 file_path VARCHAR NOT NULL,
448 line INTEGER NOT NULL,
449 tag_type VARCHAR NOT NULL,
450 text VARCHAR NOT NULL,
451 comment_type VARCHAR NOT NULL DEFAULT 'code'
452 );
453 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
454 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
455 )?;
456 Ok(())
457 }
458
459 pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
460 if paths.is_empty() {
461 return Ok(());
462 }
463 let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
464 for path in paths {
465 stmt.execute(params![path])?;
466 }
467 Ok(())
468 }
469
470 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
471 let mut stmt = self
472 .conn
473 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE id = ?")?;
474 let mut rows = stmt.query_map(params![id], |row| {
475 Ok(Node {
476 id: row.get(0)?,
477 kind: row.get(1)?,
478 name: row.get(2)?,
479 path: row.get(3)?,
480 line_start: row.get(4)?,
481 line_end: row.get(5)?,
482 language: row.get(6)?,
483 churn: row.get(7)?,
484 coupling: row.get(8)?,
485 community: row.get(9)?,
486 in_degree: row.get(10)?,
487 out_degree: row.get(11)?,
488 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
489 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
490 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
491 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
492 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
493 test_count: row.get::<_, i64>(17).unwrap_or(0),
494 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
495 })
496 })?;
497
498 match rows.next() {
499 Some(Ok(node)) => Ok(Some(node)),
500 _ => Ok(None),
501 }
502 }
503
504 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
505 let mut seen = std::collections::HashSet::new();
506 seen.insert(id.to_string());
507 let mut current = vec![id.to_string()];
508 let mut result: Vec<Node> = Vec::new();
509 let max_depth = depth.min(3);
510
511 for _ in 0..max_depth {
512 if current.is_empty() {
513 break;
514 }
515 let mut next = Vec::new();
516
517 for cur_id in ¤t {
518 let mut stmt = self.conn.prepare(
519 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
520 FROM nodes n
521 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
522 LIMIT 100",
523 )?;
524 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
525 Ok(Node {
526 id: row.get(0)?,
527 kind: row.get(1)?,
528 name: row.get(2)?,
529 path: row.get(3)?,
530 line_start: row.get(4)?,
531 line_end: row.get(5)?,
532 language: row.get(6)?,
533 churn: row.get(7)?,
534 coupling: row.get(8)?,
535 community: row.get(9)?,
536 in_degree: row.get(10)?,
537 out_degree: row.get(11)?,
538 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
539 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
540 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
541 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
542 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
543 test_count: row.get::<_, i64>(17).unwrap_or(0),
544 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
545 })
546 })?;
547
548 for row in rows {
549 let node = row?;
550 if seen.insert(node.id.clone()) {
551 next.push(node.id.clone());
552 result.push(node);
553 }
554 }
555 }
556 current = next;
557 }
558
559 Ok(result)
560 }
561
562 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
563 let mut stmt = self.conn.prepare(
564 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes",
565 )?;
566 let rows = stmt.query_map([], |row| {
567 Ok(Node {
568 id: row.get(0)?,
569 kind: row.get(1)?,
570 name: row.get(2)?,
571 path: row.get(3)?,
572 line_start: row.get(4)?,
573 line_end: row.get(5)?,
574 language: row.get(6)?,
575 churn: row.get(7)?,
576 coupling: row.get(8)?,
577 community: row.get(9)?,
578 in_degree: row.get(10)?,
579 out_degree: row.get(11)?,
580 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
581 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
582 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
583 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
584 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
585 test_count: row.get::<_, i64>(17).unwrap_or(0),
586 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
587 })
588 })?;
589
590 let mut nodes = Vec::new();
591 for row in rows {
592 nodes.push(row?);
593 }
594 Ok(nodes)
595 }
596
597 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
598 let mut stmt = self
599 .conn
600 .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
601 let rows = stmt.query_map([], |row| {
602 Ok(Edge {
603 id: row.get(0)?,
604 src: row.get(1)?,
605 dst: row.get(2)?,
606 kind: row.get(3)?,
607 weight: row.get(4)?,
608 confidence: row.get(5)?,
609 })
610 })?;
611
612 let mut edges = Vec::new();
613 for row in rows {
614 edges.push(row?);
615 }
616 Ok(edges)
617 }
618
619 pub fn node_count(&self) -> anyhow::Result<u64> {
620 let count: i64 = self
621 .conn
622 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
623 Ok(count as u64)
624 }
625
626 pub fn edge_count(&self) -> anyhow::Result<u64> {
627 let count: i64 = self
628 .conn
629 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
630 Ok(count as u64)
631 }
632
633 pub fn clear(&self) -> anyhow::Result<()> {
634 self.conn.execute_batch(
637 "TRUNCATE TABLE edges;
638 TRUNCATE TABLE nodes;
639 TRUNCATE TABLE communities;",
640 )?;
641 Ok(())
642 }
643
644 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
645 let mut stmt = self.conn.prepare(
646 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
647 )?;
648 let rows = stmt.query_map([], |row| {
649 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
650 })?;
651
652 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
653 for row in rows {
654 let (lang, cnt) = row?;
655 *counts.entry(lang).or_default() += cnt;
656 }
657
658 let total: i64 = counts.values().sum();
659 if total == 0 {
660 return Ok(std::collections::HashMap::new());
661 }
662
663 let mut breakdown = std::collections::HashMap::new();
664 for (lang, cnt) in counts {
665 breakdown.insert(lang, cnt as f64 / total as f64);
666 }
667 Ok(breakdown)
668 }
669
670 pub fn get_node_counts_by_kind(
671 &self,
672 ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
673 let mut stmt = self
674 .conn
675 .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
676 let rows = stmt.query_map([], |row| {
677 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
678 })?;
679
680 let mut counts = std::collections::HashMap::new();
681 for row in rows {
682 let (kind, cnt) = row?;
683 counts.insert(kind, cnt as u64);
684 }
685 Ok(counts)
686 }
687
688 pub fn upsert_node_scores(
689 &self,
690 node_id: &str,
691 churn: f64,
692 coupling: f64,
693 ) -> anyhow::Result<()> {
694 self.conn.execute(
695 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
696 params![churn, coupling, node_id],
697 )?;
698 Ok(())
699 }
700
701 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
702 self.conn.execute_batch(
703 "UPDATE nodes SET in_degree = 0, out_degree = 0;
704 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
705 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
706 )?;
707 Ok(())
708 }
709
710 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
711 let mut stmt = self.conn.prepare(
712 "SELECT path, churn, coupling, in_degree
713 FROM nodes
714 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
715 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
716 LIMIT ?",
717 )?;
718 let rows = stmt.query_map(params![limit as i64], |row| {
719 Ok((
720 row.get::<_, String>(0)?,
721 row.get::<_, f64>(1)?,
722 row.get::<_, f64>(2)?,
723 row.get::<_, i64>(3)?,
724 ))
725 })?;
726 let mut results = Vec::new();
727 for row in rows {
728 results.push(row?);
729 }
730 Ok(results)
731 }
732
733 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
734 let mut stmt = self.conn.prepare(
735 "SELECT n.name, COUNT(e.id) as file_count
736 FROM nodes n
737 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
738 WHERE n.kind = 'Author'
739 GROUP BY n.name
740 ORDER BY file_count DESC",
741 )?;
742 let rows = stmt.query_map([], |row| {
743 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
744 })?;
745 let mut results = Vec::new();
746 for row in rows {
747 results.push(row?);
748 }
749 Ok(results)
750 }
751
752 pub fn compute_coupling(&self) -> anyhow::Result<()> {
753 self.conn.execute_batch(
754 "UPDATE nodes SET coupling = 0.0;
755 UPDATE nodes SET coupling =
756 CASE
757 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
758 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
759 ELSE 0.0
760 END
761 WHERE kind = 'File';",
762 )?;
763 Ok(())
764 }
765
766 pub fn update_node_communities(
767 &self,
768 communities: &std::collections::HashMap<String, i64>,
769 ) -> anyhow::Result<usize> {
770 if communities.is_empty() {
771 return Ok(0);
772 }
773 let mut count = 0;
774 let mut stmt = self
775 .conn
776 .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
777 for (node_id, community) in communities {
778 let affected = stmt.execute(params![*community, node_id.as_str()])?;
779 count += affected;
780 }
781 Ok(count)
782 }
783
784 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
785 let node_count = self.node_count()?;
786 let edge_count = self.edge_count()?;
787 let lang_breakdown = self.get_language_breakdown()?;
788 let communities = self.get_communities()?;
789 let counts_by_kind = self.get_node_counts_by_kind()?;
790
791 Ok(RepoStats {
792 node_count,
793 edge_count,
794 language_breakdown: lang_breakdown,
795 community_count: communities.len() as u32,
796 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
797 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
798 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
799 })
800 }
801
802 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
803 let mut stmt = self.conn.prepare(
804 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
805 FROM nodes
806 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
807 ORDER BY out_degree DESC
808 LIMIT ?",
809 )?;
810 let rows = stmt.query_map(params![limit as i64], |row| {
811 Ok(Node {
812 id: row.get(0)?,
813 kind: row.get(1)?,
814 name: row.get(2)?,
815 path: row.get(3)?,
816 line_start: row.get(4)?,
817 line_end: row.get(5)?,
818 language: row.get(6)?,
819 churn: row.get(7)?,
820 coupling: row.get(8)?,
821 community: row.get(9)?,
822 in_degree: row.get(10)?,
823 out_degree: row.get(11)?,
824 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
825 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
826 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
827 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
828 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
829 test_count: row.get::<_, i64>(17).unwrap_or(0),
830 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
831 })
832 })?;
833 let mut results = Vec::new();
834 for row in rows {
835 results.push(row?);
836 }
837 Ok(results)
838 }
839
840 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
841 let mut stmt = self.conn.prepare(
842 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
843 FROM nodes
844 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
845 ORDER BY in_degree DESC
846 LIMIT ?",
847 )?;
848 let rows = stmt.query_map(params![limit as i64], |row| {
849 Ok(Node {
850 id: row.get(0)?,
851 kind: row.get(1)?,
852 name: row.get(2)?,
853 path: row.get(3)?,
854 line_start: row.get(4)?,
855 line_end: row.get(5)?,
856 language: row.get(6)?,
857 churn: row.get(7)?,
858 coupling: row.get(8)?,
859 community: row.get(9)?,
860 in_degree: row.get(10)?,
861 out_degree: row.get(11)?,
862 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
863 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
864 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
865 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
866 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
867 test_count: row.get::<_, i64>(17).unwrap_or(0),
868 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
869 })
870 })?;
871 let mut results = Vec::new();
872 for row in rows {
873 results.push(row?);
874 }
875 Ok(results)
876 }
877
878 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
879 let mut stmt = self.conn.prepare(
880 "SELECT community, kind, name, path, in_degree
881 FROM nodes
882 WHERE community > 0
883 ORDER BY community",
884 )?;
885 let rows = stmt.query_map([], |row| {
886 Ok((
887 row.get::<_, i64>(0)?,
888 row.get::<_, String>(1)?,
889 row.get::<_, String>(2)?,
890 row.get::<_, String>(3)?,
891 row.get::<_, i64>(4)?,
892 ))
893 })?;
894
895 let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
896 std::collections::HashMap::new();
897 for row in rows {
898 let (community, kind, name, _path, in_degree) = row?;
899 let entry = community_map
900 .entry(community)
901 .or_insert_with(|| (Vec::new(), 0));
902 entry.0.push((kind, in_degree, name));
903 entry.1 += 1;
904 }
905
906 let mut result: Vec<CommunityRow> = community_map
907 .into_iter()
908 .map(|(community, (mut items, count))| {
909 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
910 let top_nodes: Vec<String> = items
911 .iter()
912 .take(5)
913 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
914 .collect();
915 let label = top_nodes
916 .first()
917 .cloned()
918 .unwrap_or_else(|| format!("community-{}", community));
919 (community, label, count, top_nodes)
920 })
921 .collect();
922
923 result.sort_by_key(|row| std::cmp::Reverse(row.2));
924 Ok(result)
925 }
926
927 pub fn clear_communities(&self) -> anyhow::Result<()> {
928 self.conn.execute("UPDATE nodes SET community = 0", [])?;
929 self.conn.execute("DELETE FROM communities", [])?;
930 Ok(())
931 }
932
933 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
936 let mut seen = std::collections::HashSet::new();
937 seen.insert(id.to_string());
938 let mut current = vec![id.to_string()];
939 let mut result: Vec<Node> = Vec::new();
940 let max_depth = depth.min(3);
941
942 for _ in 0..max_depth {
943 if current.is_empty() {
944 break;
945 }
946 let mut next = Vec::new();
947 for cur_id in ¤t {
948 let mut stmt = self.conn.prepare(
949 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
950 FROM nodes n
951 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
952 LIMIT 100",
953 )?;
954 let rows = stmt.query_map(params![cur_id], |row| {
955 Ok(Node {
956 id: row.get(0)?,
957 kind: row.get(1)?,
958 name: row.get(2)?,
959 path: row.get(3)?,
960 line_start: row.get(4)?,
961 line_end: row.get(5)?,
962 language: row.get(6)?,
963 churn: row.get(7)?,
964 coupling: row.get(8)?,
965 community: row.get(9)?,
966 in_degree: row.get(10)?,
967 out_degree: row.get(11)?,
968 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
969 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
970 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
971 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
972 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
973 test_count: row.get::<_, i64>(17).unwrap_or(0),
974 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
975 })
976 })?;
977 for row in rows {
978 let node = row?;
979 if seen.insert(node.id.clone()) {
980 next.push(node.id.clone());
981 result.push(node);
982 }
983 }
984 }
985 current = next;
986 }
987
988 Ok(result)
989 }
990
991 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
992 let mut stmt = self.conn.prepare(
993 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE community = ?",
994 )?;
995 let rows = stmt.query_map(params![community], |row| {
996 Ok(Node {
997 id: row.get(0)?,
998 kind: row.get(1)?,
999 name: row.get(2)?,
1000 path: row.get(3)?,
1001 line_start: row.get(4)?,
1002 line_end: row.get(5)?,
1003 language: row.get(6)?,
1004 churn: row.get(7)?,
1005 coupling: row.get(8)?,
1006 community: row.get(9)?,
1007 in_degree: row.get(10)?,
1008 out_degree: row.get(11)?,
1009 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1010 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1011 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1012 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1013 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1014 test_count: row.get::<_, i64>(17).unwrap_or(0),
1015 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1016 })
1017 })?;
1018 let mut nodes = Vec::new();
1019 for row in rows {
1020 nodes.push(row?);
1021 }
1022 Ok(nodes)
1023 }
1024
1025 pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
1026 if items.is_empty() {
1028 return Ok(());
1029 }
1030 let mut stmt = self
1031 .conn
1032 .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
1033 for (id, reason) in items {
1034 stmt.execute(params![reason, id])?;
1035 }
1036 Ok(())
1037 }
1038
1039 pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
1040 let total: i64 = self
1042 .conn
1043 .query_row(
1044 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
1045 [],
1046 |r| r.get(0),
1047 )
1048 .unwrap_or(0);
1049 let high: i64 = self.conn.query_row(
1051 "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
1052 ).unwrap_or(0);
1053 Ok((total, high))
1054 }
1055
1056 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
1057 let mut stmt = self.conn.prepare(
1058 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
1059 FROM edges e
1060 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
1061 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
1062 )?;
1063 let rows = stmt.query_map(params![community, community], |row| {
1064 Ok(Edge {
1065 id: row.get(0)?,
1066 src: row.get(1)?,
1067 dst: row.get(2)?,
1068 kind: row.get(3)?,
1069 weight: row.get(4)?,
1070 confidence: row.get(5)?,
1071 })
1072 })?;
1073 let mut edges = Vec::new();
1074 for row in rows {
1075 edges.push(row?);
1076 }
1077 Ok(edges)
1078 }
1079
1080 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
1083 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
1084 let rows = stmt.query_map([], |row| {
1085 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1086 })?;
1087 let mut result = std::collections::HashMap::new();
1088 for row in rows {
1089 let (path, hash) = row?;
1090 result.insert(path, hash);
1091 }
1092 Ok(result)
1093 }
1094
1095 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
1096 self.conn.execute(
1097 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
1098 params![path, hash],
1099 )?;
1100 Ok(())
1101 }
1102
1103 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
1104 if paths.is_empty() {
1105 return Ok(());
1106 }
1107 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1108 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1109 let mut stmt = self.conn.prepare(&sql)?;
1110 let params: Vec<&dyn duckdb::ToSql> =
1111 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1112 stmt.execute(params.as_slice())?;
1113 Ok(())
1114 }
1115
1116 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1117 if paths.is_empty() {
1118 return Ok(0);
1119 }
1120 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1121 let sql_edges = format!(
1123 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1124 placeholders, placeholders
1125 );
1126 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1127 let params_edges: Vec<&dyn duckdb::ToSql> = paths
1128 .iter()
1129 .chain(paths.iter())
1130 .map(|p| p as &dyn duckdb::ToSql)
1131 .collect();
1132 stmt_edges.execute(params_edges.as_slice())?;
1133
1134 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1136 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1137 let params_nodes: Vec<&dyn duckdb::ToSql> =
1138 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1139 let count = stmt_nodes.execute(params_nodes.as_slice())?;
1140 Ok(count)
1141 }
1142
1143 pub fn update_node_doc_comment(&self, id: &str, doc: &str) -> anyhow::Result<()> {
1144 self.conn.execute(
1145 "UPDATE nodes SET doc_comment = ? WHERE id = ?",
1146 params![doc, id],
1147 )?;
1148 Ok(())
1149 }
1150
1151 pub fn update_node_complexity(&self, id: &str, complexity: f64) -> anyhow::Result<()> {
1152 self.conn.execute(
1153 "UPDATE nodes SET complexity = ? WHERE id = ?",
1154 params![complexity, id],
1155 )?;
1156 Ok(())
1157 }
1158
1159 pub fn get_nodes_by_complexity(
1160 &self,
1161 limit: usize,
1162 min_score: f64,
1163 ) -> anyhow::Result<Vec<Node>> {
1164 let mut stmt = self.conn.prepare(
1165 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1166 FROM nodes
1167 WHERE kind = 'Function' AND COALESCE(complexity, 0.0) >= ?
1168 ORDER BY complexity DESC
1169 LIMIT ?",
1170 )?;
1171 let rows = stmt.query_map(params![min_score, limit as i64], |row| {
1172 Ok(Node {
1173 id: row.get(0)?,
1174 kind: row.get(1)?,
1175 name: row.get(2)?,
1176 path: row.get(3)?,
1177 line_start: row.get(4)?,
1178 line_end: row.get(5)?,
1179 language: row.get(6)?,
1180 churn: row.get(7)?,
1181 coupling: row.get(8)?,
1182 community: row.get(9)?,
1183 in_degree: row.get(10)?,
1184 out_degree: row.get(11)?,
1185 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1186 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1187 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1188 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1189 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1190 test_count: row.get::<_, i64>(17).unwrap_or(0),
1191 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1192 })
1193 })?;
1194 let mut results = Vec::new();
1195 for row in rows {
1196 results.push(row?);
1197 }
1198 Ok(results)
1199 }
1200
1201 pub fn get_docs_coverage(
1203 &self,
1204 ) -> anyhow::Result<DocsCoverage> {
1205 let overall: f64 = self
1206 .conn
1207 .query_row(
1208 "SELECT COALESCE(
1209 CAST(SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) AS DOUBLE)
1210 / NULLIF(CAST(COUNT(*) AS DOUBLE), 0.0) * 100.0,
1211 0.0)
1212 FROM nodes WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'",
1213 [],
1214 |r| r.get(0),
1215 )
1216 .unwrap_or(0.0);
1217
1218 let mut by_community = Vec::new();
1219 let mut stmt = self.conn.prepare(
1220 "SELECT community,
1221 SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) as documented,
1222 COUNT(*) as total
1223 FROM nodes
1224 WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'
1225 GROUP BY community
1226 ORDER BY community",
1227 )?;
1228 let comm_rows = stmt.query_map([], |row| {
1229 Ok((
1230 row.get::<_, i64>(0)?,
1231 row.get::<_, i64>(1)?,
1232 row.get::<_, i64>(2)?,
1233 ))
1234 })?;
1235 for row in comm_rows {
1236 by_community.push(row?);
1237 }
1238
1239 let mut undoc_stmt = self.conn.prepare(
1240 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1241 FROM nodes
1242 WHERE kind = 'Function' AND (doc_comment IS NULL OR doc_comment = '')
1243 ORDER BY in_degree DESC
1244 LIMIT 10",
1245 )?;
1246 let undoc_rows = undoc_stmt.query_map([], |row| {
1247 Ok(Node {
1248 id: row.get(0)?,
1249 kind: row.get(1)?,
1250 name: row.get(2)?,
1251 path: row.get(3)?,
1252 line_start: row.get(4)?,
1253 line_end: row.get(5)?,
1254 language: row.get(6)?,
1255 churn: row.get(7)?,
1256 coupling: row.get(8)?,
1257 community: row.get(9)?,
1258 in_degree: row.get(10)?,
1259 out_degree: row.get(11)?,
1260 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1261 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1262 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1263 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1264 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1265 test_count: row.get::<_, i64>(17).unwrap_or(0),
1266 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1267 })
1268 })?;
1269 let mut undocumented = Vec::new();
1270 for row in undoc_rows {
1271 undocumented.push(row?);
1272 }
1273
1274 Ok((overall, by_community, undocumented))
1275 }
1276
1277 pub fn upsert_clones(&self, clones: &[CloneRow]) -> anyhow::Result<usize> {
1278 if clones.is_empty() {
1279 return Ok(0);
1280 }
1281 let mut count = 0;
1282 let mut stmt = self.conn.prepare(
1283 "INSERT OR REPLACE INTO clones (id, node_a, node_b, similarity, kind) VALUES (?, ?, ?, ?, ?)",
1284 )?;
1285 for c in clones {
1286 stmt.execute(params![c.id, c.node_a, c.node_b, c.similarity, c.kind])?;
1287 count += 1;
1288 }
1289 Ok(count)
1290 }
1291
1292 pub fn get_clones(
1293 &self,
1294 min_similarity: f64,
1295 kind_filter: Option<&str>,
1296 ) -> anyhow::Result<Vec<CloneRow>> {
1297 let (sql, use_kind) = if kind_filter.is_some() {
1298 (
1299 "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? AND kind = ? ORDER BY similarity DESC",
1300 true,
1301 )
1302 } else {
1303 (
1304 "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? ORDER BY similarity DESC",
1305 false,
1306 )
1307 };
1308
1309 let mut stmt = self.conn.prepare(sql)?;
1310 let map_row = |row: &duckdb::Row| {
1311 Ok(CloneRow {
1312 id: row.get(0)?,
1313 node_a: row.get(1)?,
1314 node_b: row.get(2)?,
1315 similarity: row.get::<_, f32>(3)? as f64,
1316 kind: row.get(4)?,
1317 })
1318 };
1319
1320 let rows = if use_kind {
1321 stmt.query_map(
1322 params![min_similarity, kind_filter.unwrap_or("")],
1323 map_row,
1324 )?
1325 } else {
1326 stmt.query_map(params![min_similarity], map_row)?
1327 };
1328
1329 let mut results = Vec::new();
1330 for row in rows {
1331 results.push(row?);
1332 }
1333 Ok(results)
1334 }
1335
1336 pub fn clear_clones(&self) -> anyhow::Result<()> {
1337 self.conn.execute("DELETE FROM clones", [])?;
1338 Ok(())
1339 }
1340
1341 pub fn mark_test_files(&self, paths: &[String]) -> anyhow::Result<()> {
1342 if paths.is_empty() {
1343 return Ok(());
1344 }
1345 let mut stmt = self
1346 .conn
1347 .prepare("UPDATE nodes SET is_test_file = 1 WHERE path = ?")?;
1348 for path in paths {
1349 stmt.execute(params![path])?;
1350 }
1351 Ok(())
1352 }
1353
1354 pub fn update_test_coverage(&self) -> anyhow::Result<()> {
1356 self.conn.execute_batch(
1357 "UPDATE nodes SET test_count = (
1358 SELECT COUNT(*) FROM edges
1359 WHERE edges.dst = nodes.id AND edges.kind = 'TESTS'
1360 );
1361 UPDATE nodes SET is_tested = (test_count > 0)
1362 WHERE is_test_file = 0;",
1363 )?;
1364 Ok(())
1365 }
1366
1367 pub fn get_test_coverage_summary(
1369 &self,
1370 top_n: usize,
1371 ) -> anyhow::Result<(f64, i64, i64, Vec<Node>)> {
1372 let tested: i64 = self
1373 .conn
1374 .query_row(
1375 "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0 AND is_tested = 1",
1376 [],
1377 |r| r.get(0),
1378 )
1379 .unwrap_or(0);
1380 let total: i64 = self
1381 .conn
1382 .query_row(
1383 "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0",
1384 [],
1385 |r| r.get(0),
1386 )
1387 .unwrap_or(0);
1388
1389 let overall_pct = if total > 0 {
1390 (tested as f64 / total as f64) * 100.0
1391 } else {
1392 0.0
1393 };
1394
1395 let mut gap_stmt = self.conn.prepare(
1396 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1397 FROM nodes
1398 WHERE kind IN ('Function','Class') AND is_test_file = 0 AND COALESCE(is_tested, 0) = 0
1399 ORDER BY (churn * CAST(in_degree AS DOUBLE) + CAST(in_degree AS DOUBLE) * 0.5) DESC
1400 LIMIT ?",
1401 )?;
1402 let gap_rows = gap_stmt.query_map(params![top_n as i64], |row| {
1403 Ok(Node {
1404 id: row.get(0)?,
1405 kind: row.get(1)?,
1406 name: row.get(2)?,
1407 path: row.get(3)?,
1408 line_start: row.get(4)?,
1409 line_end: row.get(5)?,
1410 language: row.get(6)?,
1411 churn: row.get(7)?,
1412 coupling: row.get(8)?,
1413 community: row.get(9)?,
1414 in_degree: row.get(10)?,
1415 out_degree: row.get(11)?,
1416 exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1417 is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1418 dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1419 complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1420 is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1421 test_count: row.get::<_, i64>(17).unwrap_or(0),
1422 is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1423 })
1424 })?;
1425 let mut gaps = Vec::new();
1426 for row in gap_rows {
1427 gaps.push(row?);
1428 }
1429
1430 Ok((overall_pct, tested, total - tested, gaps))
1431 }
1432
1433 pub fn upsert_snapshot(&self, entry: &SnapshotEntry) -> anyhow::Result<()> {
1434 self.conn.execute(
1435 "INSERT OR REPLACE INTO snapshots (id, commit_sha, commit_date, commit_msg, node_count, edge_count, snapshot_data)
1436 VALUES (?, ?, ?, ?, ?, ?, ?)",
1437 params![
1438 entry.id,
1439 entry.commit_sha,
1440 entry.commit_date,
1441 entry.commit_msg,
1442 entry.node_count,
1443 entry.edge_count,
1444 entry.snapshot_data,
1445 ],
1446 )?;
1447 Ok(())
1448 }
1449
1450 pub fn get_snapshots(&self, limit: usize) -> anyhow::Result<Vec<SnapshotEntry>> {
1451 let mut stmt = self.conn.prepare(
1452 "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1453 FROM snapshots ORDER BY commit_date DESC LIMIT ?",
1454 )?;
1455 let rows = stmt.query_map(params![limit as i64], |row| {
1456 Ok(SnapshotEntry {
1457 id: row.get(0)?,
1458 commit_sha: row.get(1)?,
1459 commit_date: row.get(2)?,
1460 commit_msg: row.get(3)?,
1461 node_count: row.get(4)?,
1462 edge_count: row.get(5)?,
1463 snapshot_data: row.get(6)?,
1464 })
1465 })?;
1466 let mut result = Vec::new();
1467 for row in rows {
1468 result.push(row?);
1469 }
1470 Ok(result)
1471 }
1472
1473 pub fn get_snapshot_by_sha(&self, sha: &str) -> anyhow::Result<Option<SnapshotEntry>> {
1474 let mut stmt = self.conn.prepare(
1475 "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1476 FROM snapshots WHERE commit_sha = ? OR commit_sha LIKE ? LIMIT 1",
1477 )?;
1478 let prefix = format!("{}%", sha);
1479 let mut rows = stmt.query_map(params![sha, prefix], |row| {
1480 Ok(SnapshotEntry {
1481 id: row.get(0)?,
1482 commit_sha: row.get(1)?,
1483 commit_date: row.get(2)?,
1484 commit_msg: row.get(3)?,
1485 node_count: row.get(4)?,
1486 edge_count: row.get(5)?,
1487 snapshot_data: row.get(6)?,
1488 })
1489 })?;
1490 match rows.next() {
1491 Some(Ok(entry)) => Ok(Some(entry)),
1492 _ => Ok(None),
1493 }
1494 }
1495
1496 pub fn snapshot_count(&self) -> i64 {
1497 self.conn
1498 .query_row("SELECT COUNT(*) FROM snapshots", [], |r| r.get(0))
1499 .unwrap_or(0)
1500 }
1501}
1502
1503pub fn repo_hash(path: &Path) -> String {
1504 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1505 let path_str = canonical.to_string_lossy().to_string();
1506 let mut hasher = Sha256::new();
1507 hasher.update(path_str.as_bytes());
1508 format!("{:x}", hasher.finalize())[..16].to_string()
1509}