Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29    #[serde(default)]
30    pub exported: bool,
31    #[serde(default)]
32    pub is_dead_candidate: bool,
33    #[serde(default)]
34    pub dead_reason: Option<String>,
35    #[serde(default)]
36    pub complexity: f64,
37    #[serde(default)]
38    pub is_test_file: bool,
39    #[serde(default)]
40    pub test_count: i64,
41    #[serde(default)]
42    pub is_tested: bool,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct Edge {
47    pub id: String,
48    pub src: String,
49    pub dst: String,
50    pub kind: String,
51    #[serde(default = "default_weight")]
52    pub weight: f64,
53    #[serde(default = "default_weight")]
54    pub confidence: f64,
55}
56
57fn default_weight() -> f64 {
58    1.0
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct RepoStats {
63    pub node_count: u64,
64    pub edge_count: u64,
65    pub language_breakdown: std::collections::HashMap<String, f64>,
66    pub community_count: u32,
67    pub function_count: u64,
68    pub class_count: u64,
69    pub file_count: u64,
70}
71
72pub type CommunityRow = (i64, String, i64, Vec<String>);
73/// (overall_pct, Vec<(community_id, documented, total)>, undocumented_high_coupling)
74pub type DocsCoverage = (f64, Vec<(i64, i64, i64)>, Vec<Node>);
75/// (overall_pct, tested_count, untested_count, gaps)
76pub type TestCoverageSummary = (f64, i64, i64, Vec<Node>);
77type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct SnapshotEntry {
81    pub id: String,
82    pub commit_sha: String,
83    pub commit_date: String,
84    pub commit_msg: String,
85    pub node_count: i64,
86    pub edge_count: i64,
87    /// JSON blob: {"file_count": N, "insertions": N, "deletions": N}
88    pub snapshot_data: Option<String>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct TagRow {
93    pub id: String,
94    pub file_path: String,
95    pub line: u32,
96    pub tag_type: String,
97    pub text: String,
98    /// "code", "jsx", or "jsx_commented_code"
99    pub comment_type: String,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct CloneRow {
104    pub id: String,
105    pub node_a: String,
106    pub node_b: String,
107    pub similarity: f64,
108    pub kind: String,
109}
110
111impl Default for Node {
112    fn default() -> Self {
113        Self {
114            id: String::new(),
115            kind: String::new(),
116            name: String::new(),
117            path: String::new(),
118            line_start: 0,
119            line_end: 0,
120            language: String::new(),
121            churn: 0.0,
122            coupling: 0.0,
123            community: 0,
124            in_degree: 0,
125            out_degree: 0,
126            exported: false,
127            is_dead_candidate: false,
128            dead_reason: None,
129            complexity: 0.0,
130            is_test_file: false,
131            test_count: 0,
132            is_tested: false,
133        }
134    }
135}
136
137impl Node {
138    pub fn from_def(d: &NodeDef, language: &str) -> Self {
139        let exported = d
140            .metadata
141            .get("exported")
142            .and_then(|v| v.as_bool())
143            .unwrap_or(false);
144        let complexity = d
145            .metadata
146            .get("complexity")
147            .and_then(|v| v.as_f64())
148            .unwrap_or(0.0);
149        Self {
150            id: d.id.clone(),
151            kind: d.kind.as_str().to_string(),
152            name: d.name.clone(),
153            path: d.path.clone(),
154            line_start: d.line_start,
155            line_end: d.line_end,
156            language: language.to_string(),
157            churn: 0.0,
158            coupling: 0.0,
159            community: 0,
160            in_degree: 0,
161            out_degree: 0,
162            exported,
163            is_dead_candidate: false,
164            dead_reason: None,
165            complexity,
166            is_test_file: false,
167            test_count: 0,
168            is_tested: false,
169        }
170    }
171}
172
173impl Edge {
174    pub fn from_def(d: &EdgeDef) -> Self {
175        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
176        Self {
177            id,
178            src: d.src.clone(),
179            dst: d.dst.clone(),
180            kind: d.kind.as_str().to_string(),
181            weight: d.weight,
182            confidence: d.confidence,
183        }
184    }
185}
186
187pub struct GraphDb {
188    pub conn: duckdb::Connection,
189    pub repo_id: String,
190    pub db_path: PathBuf,
191}
192
193impl GraphDb {
194    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
195        let repo_id = repo_hash(repo_path);
196        let dir = dirs::home_dir()
197            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
198            .join(".cgx")
199            .join("repos");
200        std::fs::create_dir_all(&dir)?;
201
202        let db_path = dir.join(format!("{}.db", repo_id));
203        let conn = duckdb::Connection::open(&db_path)?;
204
205        conn.execute_batch(
206            "CREATE TABLE IF NOT EXISTS nodes (
207                id                 VARCHAR PRIMARY KEY,
208                kind               VARCHAR NOT NULL,
209                name               VARCHAR NOT NULL,
210                path               VARCHAR NOT NULL,
211                line_start         INTEGER,
212                line_end           INTEGER,
213                language           VARCHAR,
214                churn              DOUBLE DEFAULT 0.0,
215                coupling           DOUBLE DEFAULT 0.0,
216                community          BIGINT DEFAULT 0,
217                in_degree          BIGINT DEFAULT 0,
218                out_degree         BIGINT DEFAULT 0,
219                exported           TINYINT DEFAULT 0,
220                is_dead_candidate  TINYINT DEFAULT 0,
221                dead_reason        TEXT,
222                metadata           JSON
223            );
224            CREATE TABLE IF NOT EXISTS edges (
225                id         VARCHAR PRIMARY KEY,
226                src        VARCHAR NOT NULL,
227                dst        VARCHAR NOT NULL,
228                kind       VARCHAR NOT NULL,
229                weight     DOUBLE DEFAULT 1.0,
230                confidence DOUBLE DEFAULT 1.0,
231                metadata   JSON
232            );
233            CREATE TABLE IF NOT EXISTS communities (
234                id         INTEGER PRIMARY KEY,
235                label      VARCHAR,
236                node_count INTEGER,
237                top_nodes  JSON
238            );
239            CREATE TABLE IF NOT EXISTS repo_meta (
240                key        VARCHAR PRIMARY KEY,
241                value      JSON
242            );
243            CREATE TABLE IF NOT EXISTS file_hashes (
244                path       VARCHAR PRIMARY KEY,
245                hash       VARCHAR NOT NULL,
246                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
247            );
248            CREATE TABLE IF NOT EXISTS tags (
249                id           VARCHAR PRIMARY KEY,
250                file_path    VARCHAR NOT NULL,
251                line         INTEGER NOT NULL,
252                tag_type     VARCHAR NOT NULL,
253                text         VARCHAR NOT NULL,
254                comment_type VARCHAR NOT NULL DEFAULT 'code'
255            );
256            CREATE TABLE IF NOT EXISTS clones (
257                id         VARCHAR PRIMARY KEY,
258                node_a     VARCHAR NOT NULL,
259                node_b     VARCHAR NOT NULL,
260                similarity FLOAT NOT NULL,
261                kind       VARCHAR NOT NULL
262            );
263            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
264            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
265            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
266            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
267            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
268            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);
269            CREATE INDEX IF NOT EXISTS idx_tags_file       ON tags(file_path);
270            CREATE INDEX IF NOT EXISTS idx_tags_type       ON tags(tag_type);
271            CREATE INDEX IF NOT EXISTS idx_clones_a        ON clones(node_a);
272            CREATE INDEX IF NOT EXISTS idx_clones_b        ON clones(node_b);",
273        )?;
274
275        // Migration: add new columns to existing DBs that pre-date this schema.
276        // DuckDB 1.x supports "ADD COLUMN IF NOT EXISTS" which is a no-op when
277        // the column is already present — no error, no transaction abort.
278        conn.execute_batch(
279            "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported           TINYINT DEFAULT 0;
280             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate  TINYINT DEFAULT 0;
281             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason        TEXT;
282             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS complexity         DOUBLE DEFAULT 0.0;
283             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS doc_comment        TEXT;
284             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_test_file       TINYINT DEFAULT 0;
285             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS test_count         INTEGER DEFAULT 0;
286             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_tested          TINYINT DEFAULT 0;
287             CREATE INDEX IF NOT EXISTS idx_nodes_dead       ON nodes(is_dead_candidate);
288             CREATE INDEX IF NOT EXISTS idx_nodes_complexity ON nodes(complexity);
289             CREATE INDEX IF NOT EXISTS idx_nodes_is_tested  ON nodes(is_tested);",
290        )?;
291
292        conn.execute_batch(
293            "CREATE TABLE IF NOT EXISTS snapshots (
294                id           VARCHAR PRIMARY KEY,
295                commit_sha   VARCHAR NOT NULL,
296                commit_date  TEXT NOT NULL,
297                commit_msg   VARCHAR,
298                node_count   INTEGER,
299                edge_count   INTEGER,
300                snapshot_data TEXT
301            );
302            CREATE INDEX IF NOT EXISTS idx_snapshots_date ON snapshots(commit_date);",
303        )?;
304
305        Ok(Self {
306            conn,
307            repo_id,
308            db_path,
309        })
310    }
311
312    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
313        if nodes.is_empty() {
314            return Ok(0);
315        }
316        let mut count = 0;
317        let mut stmt = self.conn.prepare(
318            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported, complexity, is_test_file, test_count, is_tested)
319             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
320        )?;
321        for node in nodes {
322            stmt.execute(params![
323                node.id,
324                node.kind,
325                node.name,
326                node.path,
327                node.line_start,
328                node.line_end,
329                node.language,
330                node.churn,
331                node.coupling,
332                node.community,
333                node.in_degree,
334                node.out_degree,
335                node.exported as i32,
336                node.complexity,
337                node.is_test_file as i32,
338                node.test_count,
339                node.is_tested as i32,
340            ])?;
341            count += 1;
342        }
343        Ok(count)
344    }
345
346    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
347        if edges.is_empty() {
348            return Ok(0);
349        }
350        let mut count = 0;
351        let mut stmt = self.conn.prepare(
352            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
353             VALUES (?, ?, ?, ?, ?, ?)",
354        )?;
355        for edge in edges {
356            stmt.execute(params![
357                edge.id,
358                edge.src,
359                edge.dst,
360                edge.kind,
361                edge.weight,
362                edge.confidence,
363            ])?;
364            count += 1;
365        }
366        Ok(count)
367    }
368
369    pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
370        if tags.is_empty() {
371            return Ok(0);
372        }
373        let mut count = 0;
374        let mut stmt = self.conn.prepare(
375            "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
376             VALUES (?, ?, ?, ?, ?, ?)",
377        )?;
378        for tag in tags {
379            stmt.execute(params![
380                tag.id,
381                tag.file_path,
382                tag.line,
383                tag.tag_type,
384                tag.text,
385                tag.comment_type,
386            ])?;
387            count += 1;
388        }
389        Ok(count)
390    }
391
392    pub fn get_tags(
393        &self,
394        tag_type_filter: Option<&str>,
395        comment_type_filter: Option<&str>,
396    ) -> anyhow::Result<Vec<TagRow>> {
397        let sql = match (tag_type_filter, comment_type_filter) {
398            (Some(_), Some(_)) => {
399                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
400                 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
401            }
402            (Some(_), None) => {
403                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
404                 WHERE tag_type = ? ORDER BY file_path, line"
405            }
406            (None, Some(_)) => {
407                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
408                 WHERE comment_type = ? ORDER BY file_path, line"
409            }
410            (None, None) => {
411                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
412                 ORDER BY file_path, line"
413            }
414        };
415
416        let mut stmt = self.conn.prepare(sql)?;
417        let map_row = |row: &duckdb::Row| {
418            Ok(TagRow {
419                id: row.get(0)?,
420                file_path: row.get(1)?,
421                line: row.get::<_, u32>(2)?,
422                tag_type: row.get(3)?,
423                text: row.get(4)?,
424                comment_type: row.get(5)?,
425            })
426        };
427
428        let rows = match (tag_type_filter, comment_type_filter) {
429            (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
430            (Some(t), None) => stmt.query_map(params![t], map_row)?,
431            (None, Some(c)) => stmt.query_map(params![c], map_row)?,
432            (None, None) => stmt.query_map([], map_row)?,
433        };
434
435        let mut results = Vec::new();
436        for row in rows {
437            results.push(row?);
438        }
439        Ok(results)
440    }
441
442    pub fn clear_all_tags(&self) -> anyhow::Result<()> {
443        self.conn.execute_batch(
444            "DROP TABLE IF EXISTS tags;
445             CREATE TABLE IF NOT EXISTS tags (
446                 id           VARCHAR PRIMARY KEY,
447                 file_path    VARCHAR NOT NULL,
448                 line         INTEGER NOT NULL,
449                 tag_type     VARCHAR NOT NULL,
450                 text         VARCHAR NOT NULL,
451                 comment_type VARCHAR NOT NULL DEFAULT 'code'
452             );
453             CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
454             CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
455        )?;
456        Ok(())
457    }
458
459    pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
460        if paths.is_empty() {
461            return Ok(());
462        }
463        let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
464        for path in paths {
465            stmt.execute(params![path])?;
466        }
467        Ok(())
468    }
469
470    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
471        let mut stmt = self
472            .conn
473            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE id = ?")?;
474        let mut rows = stmt.query_map(params![id], |row| {
475            Ok(Node {
476                id: row.get(0)?,
477                kind: row.get(1)?,
478                name: row.get(2)?,
479                path: row.get(3)?,
480                line_start: row.get(4)?,
481                line_end: row.get(5)?,
482                language: row.get(6)?,
483                churn: row.get(7)?,
484                coupling: row.get(8)?,
485                community: row.get(9)?,
486                in_degree: row.get(10)?,
487                out_degree: row.get(11)?,
488                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
489                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
490                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
491                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
492                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
493                test_count: row.get::<_, i64>(17).unwrap_or(0),
494                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
495            })
496        })?;
497
498        match rows.next() {
499            Some(Ok(node)) => Ok(Some(node)),
500            _ => Ok(None),
501        }
502    }
503
504    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
505        let mut seen = std::collections::HashSet::new();
506        seen.insert(id.to_string());
507        let mut current = vec![id.to_string()];
508        let mut result: Vec<Node> = Vec::new();
509        let max_depth = depth.min(3);
510
511        for _ in 0..max_depth {
512            if current.is_empty() {
513                break;
514            }
515            let mut next = Vec::new();
516
517            for cur_id in &current {
518                let mut stmt = self.conn.prepare(
519                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
520                     FROM nodes n
521                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
522                     LIMIT 100",
523                )?;
524                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
525                    Ok(Node {
526                        id: row.get(0)?,
527                        kind: row.get(1)?,
528                        name: row.get(2)?,
529                        path: row.get(3)?,
530                        line_start: row.get(4)?,
531                        line_end: row.get(5)?,
532                        language: row.get(6)?,
533                        churn: row.get(7)?,
534                        coupling: row.get(8)?,
535                        community: row.get(9)?,
536                        in_degree: row.get(10)?,
537                        out_degree: row.get(11)?,
538                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
539                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
540                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
541                        complexity: row.get::<_, f64>(15).unwrap_or(0.0),
542                        is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
543                        test_count: row.get::<_, i64>(17).unwrap_or(0),
544                        is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
545                    })
546                })?;
547
548                for row in rows {
549                    let node = row?;
550                    if seen.insert(node.id.clone()) {
551                        next.push(node.id.clone());
552                        result.push(node);
553                    }
554                }
555            }
556            current = next;
557        }
558
559        Ok(result)
560    }
561
562    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
563        let mut stmt = self.conn.prepare(
564            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes",
565        )?;
566        let rows = stmt.query_map([], |row| {
567            Ok(Node {
568                id: row.get(0)?,
569                kind: row.get(1)?,
570                name: row.get(2)?,
571                path: row.get(3)?,
572                line_start: row.get(4)?,
573                line_end: row.get(5)?,
574                language: row.get(6)?,
575                churn: row.get(7)?,
576                coupling: row.get(8)?,
577                community: row.get(9)?,
578                in_degree: row.get(10)?,
579                out_degree: row.get(11)?,
580                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
581                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
582                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
583                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
584                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
585                test_count: row.get::<_, i64>(17).unwrap_or(0),
586                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
587            })
588        })?;
589
590        let mut nodes = Vec::new();
591        for row in rows {
592            nodes.push(row?);
593        }
594        Ok(nodes)
595    }
596
597    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
598        let mut stmt = self
599            .conn
600            .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
601        let rows = stmt.query_map([], |row| {
602            Ok(Edge {
603                id: row.get(0)?,
604                src: row.get(1)?,
605                dst: row.get(2)?,
606                kind: row.get(3)?,
607                weight: row.get(4)?,
608                confidence: row.get(5)?,
609            })
610        })?;
611
612        let mut edges = Vec::new();
613        for row in rows {
614            edges.push(row?);
615        }
616        Ok(edges)
617    }
618
619    pub fn node_count(&self) -> anyhow::Result<u64> {
620        let count: i64 = self
621            .conn
622            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
623        Ok(count as u64)
624    }
625
626    pub fn edge_count(&self) -> anyhow::Result<u64> {
627        let count: i64 = self
628            .conn
629            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
630        Ok(count as u64)
631    }
632
633    pub fn clear(&self) -> anyhow::Result<()> {
634        // TRUNCATE avoids DuckDB ART index bulk-delete failures on large datasets
635        // and is more reliable than DROP+CREATE for data persistence across connections.
636        self.conn.execute_batch(
637            "TRUNCATE TABLE edges;
638             TRUNCATE TABLE nodes;
639             TRUNCATE TABLE communities;",
640        )?;
641        Ok(())
642    }
643
644    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
645        let mut stmt = self.conn.prepare(
646            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
647        )?;
648        let rows = stmt.query_map([], |row| {
649            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
650        })?;
651
652        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
653        for row in rows {
654            let (lang, cnt) = row?;
655            *counts.entry(lang).or_default() += cnt;
656        }
657
658        let total: i64 = counts.values().sum();
659        if total == 0 {
660            return Ok(std::collections::HashMap::new());
661        }
662
663        let mut breakdown = std::collections::HashMap::new();
664        for (lang, cnt) in counts {
665            breakdown.insert(lang, cnt as f64 / total as f64);
666        }
667        Ok(breakdown)
668    }
669
670    pub fn get_node_counts_by_kind(
671        &self,
672    ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
673        let mut stmt = self
674            .conn
675            .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
676        let rows = stmt.query_map([], |row| {
677            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
678        })?;
679
680        let mut counts = std::collections::HashMap::new();
681        for row in rows {
682            let (kind, cnt) = row?;
683            counts.insert(kind, cnt as u64);
684        }
685        Ok(counts)
686    }
687
688    pub fn upsert_node_scores(
689        &self,
690        node_id: &str,
691        churn: f64,
692        coupling: f64,
693    ) -> anyhow::Result<()> {
694        self.conn.execute(
695            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
696            params![churn, coupling, node_id],
697        )?;
698        Ok(())
699    }
700
701    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
702        self.conn.execute_batch(
703            "UPDATE nodes SET in_degree = 0, out_degree = 0;
704             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
705             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
706        )?;
707        Ok(())
708    }
709
710    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
711        let mut stmt = self.conn.prepare(
712            "SELECT path, churn, coupling, in_degree
713             FROM nodes
714             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
715             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
716             LIMIT ?",
717        )?;
718        let rows = stmt.query_map(params![limit as i64], |row| {
719            Ok((
720                row.get::<_, String>(0)?,
721                row.get::<_, f64>(1)?,
722                row.get::<_, f64>(2)?,
723                row.get::<_, i64>(3)?,
724            ))
725        })?;
726        let mut results = Vec::new();
727        for row in rows {
728            results.push(row?);
729        }
730        Ok(results)
731    }
732
733    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
734        let mut stmt = self.conn.prepare(
735            "SELECT n.name, COUNT(e.id) as file_count
736             FROM nodes n
737             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
738             WHERE n.kind = 'Author'
739             GROUP BY n.name
740             ORDER BY file_count DESC",
741        )?;
742        let rows = stmt.query_map([], |row| {
743            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
744        })?;
745        let mut results = Vec::new();
746        for row in rows {
747            results.push(row?);
748        }
749        Ok(results)
750    }
751
752    pub fn compute_coupling(&self) -> anyhow::Result<()> {
753        self.conn.execute_batch(
754            "UPDATE nodes SET coupling = 0.0;
755             UPDATE nodes SET coupling = 
756                CASE 
757                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
758                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
759                    ELSE 0.0
760                END
761             WHERE kind = 'File';",
762        )?;
763        Ok(())
764    }
765
766    pub fn update_node_communities(
767        &self,
768        communities: &std::collections::HashMap<String, i64>,
769    ) -> anyhow::Result<usize> {
770        if communities.is_empty() {
771            return Ok(0);
772        }
773        let mut count = 0;
774        let mut stmt = self
775            .conn
776            .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
777        for (node_id, community) in communities {
778            let affected = stmt.execute(params![*community, node_id.as_str()])?;
779            count += affected;
780        }
781        Ok(count)
782    }
783
784    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
785        let node_count = self.node_count()?;
786        let edge_count = self.edge_count()?;
787        let lang_breakdown = self.get_language_breakdown()?;
788        let communities = self.get_communities()?;
789        let counts_by_kind = self.get_node_counts_by_kind()?;
790
791        Ok(RepoStats {
792            node_count,
793            edge_count,
794            language_breakdown: lang_breakdown,
795            community_count: communities.len() as u32,
796            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
797            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
798            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
799        })
800    }
801
802    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
803        let mut stmt = self.conn.prepare(
804            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
805             FROM nodes
806             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
807             ORDER BY out_degree DESC
808             LIMIT ?",
809        )?;
810        let rows = stmt.query_map(params![limit as i64], |row| {
811            Ok(Node {
812                id: row.get(0)?,
813                kind: row.get(1)?,
814                name: row.get(2)?,
815                path: row.get(3)?,
816                line_start: row.get(4)?,
817                line_end: row.get(5)?,
818                language: row.get(6)?,
819                churn: row.get(7)?,
820                coupling: row.get(8)?,
821                community: row.get(9)?,
822                in_degree: row.get(10)?,
823                out_degree: row.get(11)?,
824                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
825                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
826                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
827                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
828                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
829                test_count: row.get::<_, i64>(17).unwrap_or(0),
830                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
831            })
832        })?;
833        let mut results = Vec::new();
834        for row in rows {
835            results.push(row?);
836        }
837        Ok(results)
838    }
839
840    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
841        let mut stmt = self.conn.prepare(
842            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
843             FROM nodes
844             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
845             ORDER BY in_degree DESC
846             LIMIT ?",
847        )?;
848        let rows = stmt.query_map(params![limit as i64], |row| {
849            Ok(Node {
850                id: row.get(0)?,
851                kind: row.get(1)?,
852                name: row.get(2)?,
853                path: row.get(3)?,
854                line_start: row.get(4)?,
855                line_end: row.get(5)?,
856                language: row.get(6)?,
857                churn: row.get(7)?,
858                coupling: row.get(8)?,
859                community: row.get(9)?,
860                in_degree: row.get(10)?,
861                out_degree: row.get(11)?,
862                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
863                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
864                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
865                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
866                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
867                test_count: row.get::<_, i64>(17).unwrap_or(0),
868                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
869            })
870        })?;
871        let mut results = Vec::new();
872        for row in rows {
873            results.push(row?);
874        }
875        Ok(results)
876    }
877
878    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
879        let mut stmt = self.conn.prepare(
880            "SELECT community, kind, name, path, in_degree
881             FROM nodes
882             WHERE community > 0
883             ORDER BY community",
884        )?;
885        let rows = stmt.query_map([], |row| {
886            Ok((
887                row.get::<_, i64>(0)?,
888                row.get::<_, String>(1)?,
889                row.get::<_, String>(2)?,
890                row.get::<_, String>(3)?,
891                row.get::<_, i64>(4)?,
892            ))
893        })?;
894
895        let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
896            std::collections::HashMap::new();
897        for row in rows {
898            let (community, kind, name, _path, in_degree) = row?;
899            let entry = community_map
900                .entry(community)
901                .or_insert_with(|| (Vec::new(), 0));
902            entry.0.push((kind, in_degree, name));
903            entry.1 += 1;
904        }
905
906        let mut result: Vec<CommunityRow> = community_map
907            .into_iter()
908            .map(|(community, (mut items, count))| {
909                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
910                let top_nodes: Vec<String> = items
911                    .iter()
912                    .take(5)
913                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
914                    .collect();
915                let label = top_nodes
916                    .first()
917                    .cloned()
918                    .unwrap_or_else(|| format!("community-{}", community));
919                (community, label, count, top_nodes)
920            })
921            .collect();
922
923        result.sort_by_key(|row| std::cmp::Reverse(row.2));
924        Ok(result)
925    }
926
927    pub fn clear_communities(&self) -> anyhow::Result<()> {
928        self.conn.execute("UPDATE nodes SET community = 0", [])?;
929        self.conn.execute("DELETE FROM communities", [])?;
930        Ok(())
931    }
932
933    /// BFS following only incoming edges — returns all nodes that depend on `id`.
934    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
935    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
936        let mut seen = std::collections::HashSet::new();
937        seen.insert(id.to_string());
938        let mut current = vec![id.to_string()];
939        let mut result: Vec<Node> = Vec::new();
940        let max_depth = depth.min(3);
941
942        for _ in 0..max_depth {
943            if current.is_empty() {
944                break;
945            }
946            let mut next = Vec::new();
947            for cur_id in &current {
948                let mut stmt = self.conn.prepare(
949                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
950                     FROM nodes n
951                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
952                     LIMIT 100",
953                )?;
954                let rows = stmt.query_map(params![cur_id], |row| {
955                    Ok(Node {
956                        id: row.get(0)?,
957                        kind: row.get(1)?,
958                        name: row.get(2)?,
959                        path: row.get(3)?,
960                        line_start: row.get(4)?,
961                        line_end: row.get(5)?,
962                        language: row.get(6)?,
963                        churn: row.get(7)?,
964                        coupling: row.get(8)?,
965                        community: row.get(9)?,
966                        in_degree: row.get(10)?,
967                        out_degree: row.get(11)?,
968                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
969                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
970                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
971                        complexity: row.get::<_, f64>(15).unwrap_or(0.0),
972                        is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
973                        test_count: row.get::<_, i64>(17).unwrap_or(0),
974                        is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
975                    })
976                })?;
977                for row in rows {
978                    let node = row?;
979                    if seen.insert(node.id.clone()) {
980                        next.push(node.id.clone());
981                        result.push(node);
982                    }
983                }
984            }
985            current = next;
986        }
987
988        Ok(result)
989    }
990
991    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
992        let mut stmt = self.conn.prepare(
993            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE community = ?",
994        )?;
995        let rows = stmt.query_map(params![community], |row| {
996            Ok(Node {
997                id: row.get(0)?,
998                kind: row.get(1)?,
999                name: row.get(2)?,
1000                path: row.get(3)?,
1001                line_start: row.get(4)?,
1002                line_end: row.get(5)?,
1003                language: row.get(6)?,
1004                churn: row.get(7)?,
1005                coupling: row.get(8)?,
1006                community: row.get(9)?,
1007                in_degree: row.get(10)?,
1008                out_degree: row.get(11)?,
1009                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1010                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1011                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1012                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1013                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1014                test_count: row.get::<_, i64>(17).unwrap_or(0),
1015                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1016            })
1017        })?;
1018        let mut nodes = Vec::new();
1019        for row in rows {
1020            nodes.push(row?);
1021        }
1022        Ok(nodes)
1023    }
1024
1025    pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
1026        // items = vec of (node_id, dead_reason)
1027        if items.is_empty() {
1028            return Ok(());
1029        }
1030        let mut stmt = self
1031            .conn
1032            .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
1033        for (id, reason) in items {
1034            stmt.execute(params![reason, id])?;
1035        }
1036        Ok(())
1037    }
1038
1039    pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
1040        // Returns (total_candidates, high_confidence_count)
1041        let total: i64 = self
1042            .conn
1043            .query_row(
1044                "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
1045                [],
1046                |r| r.get(0),
1047            )
1048            .unwrap_or(0);
1049        // High confidence = unreachable or disconnected reasons
1050        let high: i64 = self.conn.query_row(
1051            "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
1052        ).unwrap_or(0);
1053        Ok((total, high))
1054    }
1055
1056    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
1057        let mut stmt = self.conn.prepare(
1058            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
1059             FROM edges e
1060             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
1061             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
1062        )?;
1063        let rows = stmt.query_map(params![community, community], |row| {
1064            Ok(Edge {
1065                id: row.get(0)?,
1066                src: row.get(1)?,
1067                dst: row.get(2)?,
1068                kind: row.get(3)?,
1069                weight: row.get(4)?,
1070                confidence: row.get(5)?,
1071            })
1072        })?;
1073        let mut edges = Vec::new();
1074        for row in rows {
1075            edges.push(row?);
1076        }
1077        Ok(edges)
1078    }
1079
1080    // ── File hashes for incremental indexing ────────────────────────────────
1081
1082    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
1083        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
1084        let rows = stmt.query_map([], |row| {
1085            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1086        })?;
1087        let mut result = std::collections::HashMap::new();
1088        for row in rows {
1089            let (path, hash) = row?;
1090            result.insert(path, hash);
1091        }
1092        Ok(result)
1093    }
1094
1095    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
1096        self.conn.execute(
1097            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
1098            params![path, hash],
1099        )?;
1100        Ok(())
1101    }
1102
1103    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
1104        if paths.is_empty() {
1105            return Ok(());
1106        }
1107        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1108        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1109        let mut stmt = self.conn.prepare(&sql)?;
1110        let params: Vec<&dyn duckdb::ToSql> =
1111            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1112        stmt.execute(params.as_slice())?;
1113        Ok(())
1114    }
1115
1116    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1117        if paths.is_empty() {
1118            return Ok(0);
1119        }
1120        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1121        // Delete edges connected to nodes from these paths first
1122        let sql_edges = format!(
1123            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1124            placeholders, placeholders
1125        );
1126        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1127        let params_edges: Vec<&dyn duckdb::ToSql> = paths
1128            .iter()
1129            .chain(paths.iter())
1130            .map(|p| p as &dyn duckdb::ToSql)
1131            .collect();
1132        stmt_edges.execute(params_edges.as_slice())?;
1133
1134        // Delete nodes
1135        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1136        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1137        let params_nodes: Vec<&dyn duckdb::ToSql> =
1138            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1139        let count = stmt_nodes.execute(params_nodes.as_slice())?;
1140        Ok(count)
1141    }
1142
1143    pub fn update_node_doc_comment(&self, id: &str, doc: &str) -> anyhow::Result<()> {
1144        self.conn.execute(
1145            "UPDATE nodes SET doc_comment = ? WHERE id = ?",
1146            params![doc, id],
1147        )?;
1148        Ok(())
1149    }
1150
1151    pub fn update_node_complexity(&self, id: &str, complexity: f64) -> anyhow::Result<()> {
1152        self.conn.execute(
1153            "UPDATE nodes SET complexity = ? WHERE id = ?",
1154            params![complexity, id],
1155        )?;
1156        Ok(())
1157    }
1158
1159    pub fn get_nodes_by_complexity(
1160        &self,
1161        limit: usize,
1162        min_score: f64,
1163    ) -> anyhow::Result<Vec<Node>> {
1164        let mut stmt = self.conn.prepare(
1165            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1166             FROM nodes
1167             WHERE kind = 'Function' AND COALESCE(complexity, 0.0) >= ?
1168             ORDER BY complexity DESC
1169             LIMIT ?",
1170        )?;
1171        let rows = stmt.query_map(params![min_score, limit as i64], |row| {
1172            Ok(Node {
1173                id: row.get(0)?,
1174                kind: row.get(1)?,
1175                name: row.get(2)?,
1176                path: row.get(3)?,
1177                line_start: row.get(4)?,
1178                line_end: row.get(5)?,
1179                language: row.get(6)?,
1180                churn: row.get(7)?,
1181                coupling: row.get(8)?,
1182                community: row.get(9)?,
1183                in_degree: row.get(10)?,
1184                out_degree: row.get(11)?,
1185                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1186                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1187                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1188                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1189                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1190                test_count: row.get::<_, i64>(17).unwrap_or(0),
1191                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1192            })
1193        })?;
1194        let mut results = Vec::new();
1195        for row in rows {
1196            results.push(row?);
1197        }
1198        Ok(results)
1199    }
1200
1201    /// Returns (overall_pct, Vec<(community_id, documented, total)>, Vec<undocumented high-coupling nodes>)
1202    pub fn get_docs_coverage(&self) -> anyhow::Result<DocsCoverage> {
1203        let overall: f64 = self
1204            .conn
1205            .query_row(
1206                "SELECT COALESCE(
1207                    CAST(SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) AS DOUBLE)
1208                    / NULLIF(CAST(COUNT(*) AS DOUBLE), 0.0) * 100.0,
1209                    0.0)
1210                 FROM nodes WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'",
1211                [],
1212                |r| r.get(0),
1213            )
1214            .unwrap_or(0.0);
1215
1216        let mut by_community = Vec::new();
1217        let mut stmt = self.conn.prepare(
1218            "SELECT community,
1219                    SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) as documented,
1220                    COUNT(*) as total
1221             FROM nodes
1222             WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'
1223             GROUP BY community
1224             ORDER BY community",
1225        )?;
1226        let comm_rows = stmt.query_map([], |row| {
1227            Ok((
1228                row.get::<_, i64>(0)?,
1229                row.get::<_, i64>(1)?,
1230                row.get::<_, i64>(2)?,
1231            ))
1232        })?;
1233        for row in comm_rows {
1234            by_community.push(row?);
1235        }
1236
1237        let mut undoc_stmt = self.conn.prepare(
1238            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1239             FROM nodes
1240             WHERE kind = 'Function' AND (doc_comment IS NULL OR doc_comment = '')
1241             ORDER BY in_degree DESC
1242             LIMIT 10",
1243        )?;
1244        let undoc_rows = undoc_stmt.query_map([], |row| {
1245            Ok(Node {
1246                id: row.get(0)?,
1247                kind: row.get(1)?,
1248                name: row.get(2)?,
1249                path: row.get(3)?,
1250                line_start: row.get(4)?,
1251                line_end: row.get(5)?,
1252                language: row.get(6)?,
1253                churn: row.get(7)?,
1254                coupling: row.get(8)?,
1255                community: row.get(9)?,
1256                in_degree: row.get(10)?,
1257                out_degree: row.get(11)?,
1258                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1259                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1260                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1261                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1262                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1263                test_count: row.get::<_, i64>(17).unwrap_or(0),
1264                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1265            })
1266        })?;
1267        let mut undocumented = Vec::new();
1268        for row in undoc_rows {
1269            undocumented.push(row?);
1270        }
1271
1272        Ok((overall, by_community, undocumented))
1273    }
1274
1275    pub fn upsert_clones(&self, clones: &[CloneRow]) -> anyhow::Result<usize> {
1276        if clones.is_empty() {
1277            return Ok(0);
1278        }
1279        let mut count = 0;
1280        let mut stmt = self.conn.prepare(
1281            "INSERT OR REPLACE INTO clones (id, node_a, node_b, similarity, kind) VALUES (?, ?, ?, ?, ?)",
1282        )?;
1283        for c in clones {
1284            stmt.execute(params![c.id, c.node_a, c.node_b, c.similarity, c.kind])?;
1285            count += 1;
1286        }
1287        Ok(count)
1288    }
1289
1290    pub fn get_clones(
1291        &self,
1292        min_similarity: f64,
1293        kind_filter: Option<&str>,
1294    ) -> anyhow::Result<Vec<CloneRow>> {
1295        let (sql, use_kind) = if kind_filter.is_some() {
1296            (
1297                "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? AND kind = ? ORDER BY similarity DESC",
1298                true,
1299            )
1300        } else {
1301            (
1302                "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? ORDER BY similarity DESC",
1303                false,
1304            )
1305        };
1306
1307        let mut stmt = self.conn.prepare(sql)?;
1308        let map_row = |row: &duckdb::Row| {
1309            Ok(CloneRow {
1310                id: row.get(0)?,
1311                node_a: row.get(1)?,
1312                node_b: row.get(2)?,
1313                similarity: row.get::<_, f32>(3)? as f64,
1314                kind: row.get(4)?,
1315            })
1316        };
1317
1318        let rows = if use_kind {
1319            stmt.query_map(params![min_similarity, kind_filter.unwrap_or("")], map_row)?
1320        } else {
1321            stmt.query_map(params![min_similarity], map_row)?
1322        };
1323
1324        let mut results = Vec::new();
1325        for row in rows {
1326            results.push(row?);
1327        }
1328        Ok(results)
1329    }
1330
1331    pub fn clear_clones(&self) -> anyhow::Result<()> {
1332        self.conn.execute("DELETE FROM clones", [])?;
1333        Ok(())
1334    }
1335
1336    pub fn mark_test_files(&self, paths: &[String]) -> anyhow::Result<()> {
1337        if paths.is_empty() {
1338            return Ok(());
1339        }
1340        let mut stmt = self
1341            .conn
1342            .prepare("UPDATE nodes SET is_test_file = 1 WHERE path = ?")?;
1343        for path in paths {
1344            stmt.execute(params![path])?;
1345        }
1346        Ok(())
1347    }
1348
1349    /// After inserting TESTS edges, compute test_count and is_tested for non-test nodes.
1350    pub fn update_test_coverage(&self) -> anyhow::Result<()> {
1351        self.conn.execute_batch(
1352            "UPDATE nodes SET test_count = (
1353                SELECT COUNT(*) FROM edges
1354                WHERE edges.dst = nodes.id AND edges.kind = 'TESTS'
1355             );
1356             UPDATE nodes SET is_tested = (test_count > 0)
1357             WHERE is_test_file = 0;",
1358        )?;
1359        Ok(())
1360    }
1361
1362    /// Returns (overall_pct, tested_count, untested_count, gaps ranked by risk)
1363    pub fn get_test_coverage_summary(
1364        &self,
1365        top_n: usize,
1366    ) -> anyhow::Result<(f64, i64, i64, Vec<Node>)> {
1367        let tested: i64 = self
1368            .conn
1369            .query_row(
1370                "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0 AND is_tested = 1",
1371                [],
1372                |r| r.get(0),
1373            )
1374            .unwrap_or(0);
1375        let total: i64 = self
1376            .conn
1377            .query_row(
1378                "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0",
1379                [],
1380                |r| r.get(0),
1381            )
1382            .unwrap_or(0);
1383
1384        let overall_pct = if total > 0 {
1385            (tested as f64 / total as f64) * 100.0
1386        } else {
1387            0.0
1388        };
1389
1390        let mut gap_stmt = self.conn.prepare(
1391            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1392             FROM nodes
1393             WHERE kind IN ('Function','Class') AND is_test_file = 0 AND COALESCE(is_tested, 0) = 0
1394             ORDER BY (churn * CAST(in_degree AS DOUBLE) + CAST(in_degree AS DOUBLE) * 0.5) DESC
1395             LIMIT ?",
1396        )?;
1397        let gap_rows = gap_stmt.query_map(params![top_n as i64], |row| {
1398            Ok(Node {
1399                id: row.get(0)?,
1400                kind: row.get(1)?,
1401                name: row.get(2)?,
1402                path: row.get(3)?,
1403                line_start: row.get(4)?,
1404                line_end: row.get(5)?,
1405                language: row.get(6)?,
1406                churn: row.get(7)?,
1407                coupling: row.get(8)?,
1408                community: row.get(9)?,
1409                in_degree: row.get(10)?,
1410                out_degree: row.get(11)?,
1411                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1412                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1413                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1414                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1415                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1416                test_count: row.get::<_, i64>(17).unwrap_or(0),
1417                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1418            })
1419        })?;
1420        let mut gaps = Vec::new();
1421        for row in gap_rows {
1422            gaps.push(row?);
1423        }
1424
1425        Ok((overall_pct, tested, total - tested, gaps))
1426    }
1427
1428    pub fn upsert_snapshot(&self, entry: &SnapshotEntry) -> anyhow::Result<()> {
1429        self.conn.execute(
1430            "INSERT OR REPLACE INTO snapshots (id, commit_sha, commit_date, commit_msg, node_count, edge_count, snapshot_data)
1431             VALUES (?, ?, ?, ?, ?, ?, ?)",
1432            params![
1433                entry.id,
1434                entry.commit_sha,
1435                entry.commit_date,
1436                entry.commit_msg,
1437                entry.node_count,
1438                entry.edge_count,
1439                entry.snapshot_data,
1440            ],
1441        )?;
1442        Ok(())
1443    }
1444
1445    pub fn get_snapshots(&self, limit: usize) -> anyhow::Result<Vec<SnapshotEntry>> {
1446        let mut stmt = self.conn.prepare(
1447            "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1448             FROM snapshots ORDER BY commit_date DESC LIMIT ?",
1449        )?;
1450        let rows = stmt.query_map(params![limit as i64], |row| {
1451            Ok(SnapshotEntry {
1452                id: row.get(0)?,
1453                commit_sha: row.get(1)?,
1454                commit_date: row.get(2)?,
1455                commit_msg: row.get(3)?,
1456                node_count: row.get(4)?,
1457                edge_count: row.get(5)?,
1458                snapshot_data: row.get(6)?,
1459            })
1460        })?;
1461        let mut result = Vec::new();
1462        for row in rows {
1463            result.push(row?);
1464        }
1465        Ok(result)
1466    }
1467
1468    pub fn get_snapshot_by_sha(&self, sha: &str) -> anyhow::Result<Option<SnapshotEntry>> {
1469        let mut stmt = self.conn.prepare(
1470            "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1471             FROM snapshots WHERE commit_sha = ? OR commit_sha LIKE ? LIMIT 1",
1472        )?;
1473        let prefix = format!("{}%", sha);
1474        let mut rows = stmt.query_map(params![sha, prefix], |row| {
1475            Ok(SnapshotEntry {
1476                id: row.get(0)?,
1477                commit_sha: row.get(1)?,
1478                commit_date: row.get(2)?,
1479                commit_msg: row.get(3)?,
1480                node_count: row.get(4)?,
1481                edge_count: row.get(5)?,
1482                snapshot_data: row.get(6)?,
1483            })
1484        })?;
1485        match rows.next() {
1486            Some(Ok(entry)) => Ok(Some(entry)),
1487            _ => Ok(None),
1488        }
1489    }
1490
1491    pub fn snapshot_count(&self) -> i64 {
1492        self.conn
1493            .query_row("SELECT COUNT(*) FROM snapshots", [], |r| r.get(0))
1494            .unwrap_or(0)
1495    }
1496}
1497
1498pub fn repo_hash(path: &Path) -> String {
1499    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1500    let path_str = canonical.to_string_lossy().to_string();
1501    let mut hasher = Sha256::new();
1502    hasher.update(path_str.as_bytes());
1503    format!("{:x}", hasher.finalize())[..16].to_string()
1504}