Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29    #[serde(default)]
30    pub exported: bool,
31    #[serde(default)]
32    pub is_dead_candidate: bool,
33    #[serde(default)]
34    pub dead_reason: Option<String>,
35    #[serde(default)]
36    pub complexity: f64,
37    #[serde(default)]
38    pub is_test_file: bool,
39    #[serde(default)]
40    pub test_count: i64,
41    #[serde(default)]
42    pub is_tested: bool,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct Edge {
47    pub id: String,
48    pub src: String,
49    pub dst: String,
50    pub kind: String,
51    #[serde(default = "default_weight")]
52    pub weight: f64,
53    #[serde(default = "default_weight")]
54    pub confidence: f64,
55}
56
57fn default_weight() -> f64 {
58    1.0
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct RepoStats {
63    pub node_count: u64,
64    pub edge_count: u64,
65    pub language_breakdown: std::collections::HashMap<String, f64>,
66    pub community_count: u32,
67    pub function_count: u64,
68    pub class_count: u64,
69    pub file_count: u64,
70}
71
72pub type CommunityRow = (i64, String, i64, Vec<String>);
73/// (overall_pct, Vec<(community_id, documented, total)>, undocumented_high_coupling)
74pub type DocsCoverage = (f64, Vec<(i64, i64, i64)>, Vec<Node>);
75/// (overall_pct, tested_count, untested_count, gaps)
76pub type TestCoverageSummary = (f64, i64, i64, Vec<Node>);
77type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct SnapshotEntry {
81    pub id: String,
82    pub commit_sha: String,
83    pub commit_date: String,
84    pub commit_msg: String,
85    pub node_count: i64,
86    pub edge_count: i64,
87    /// JSON blob: {"file_count": N, "insertions": N, "deletions": N}
88    pub snapshot_data: Option<String>,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct TagRow {
93    pub id: String,
94    pub file_path: String,
95    pub line: u32,
96    pub tag_type: String,
97    pub text: String,
98    /// "code", "jsx", or "jsx_commented_code"
99    pub comment_type: String,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct CloneRow {
104    pub id: String,
105    pub node_a: String,
106    pub node_b: String,
107    pub similarity: f64,
108    pub kind: String,
109}
110
111impl Default for Node {
112    fn default() -> Self {
113        Self {
114            id: String::new(),
115            kind: String::new(),
116            name: String::new(),
117            path: String::new(),
118            line_start: 0,
119            line_end: 0,
120            language: String::new(),
121            churn: 0.0,
122            coupling: 0.0,
123            community: 0,
124            in_degree: 0,
125            out_degree: 0,
126            exported: false,
127            is_dead_candidate: false,
128            dead_reason: None,
129            complexity: 0.0,
130            is_test_file: false,
131            test_count: 0,
132            is_tested: false,
133        }
134    }
135}
136
137impl Node {
138    pub fn from_def(d: &NodeDef, language: &str) -> Self {
139        let exported = d
140            .metadata
141            .get("exported")
142            .and_then(|v| v.as_bool())
143            .unwrap_or(false);
144        let complexity = d
145            .metadata
146            .get("complexity")
147            .and_then(|v| v.as_f64())
148            .unwrap_or(0.0);
149        Self {
150            id: d.id.clone(),
151            kind: d.kind.as_str().to_string(),
152            name: d.name.clone(),
153            path: d.path.clone(),
154            line_start: d.line_start,
155            line_end: d.line_end,
156            language: language.to_string(),
157            churn: 0.0,
158            coupling: 0.0,
159            community: 0,
160            in_degree: 0,
161            out_degree: 0,
162            exported,
163            is_dead_candidate: false,
164            dead_reason: None,
165            complexity,
166            is_test_file: false,
167            test_count: 0,
168            is_tested: false,
169        }
170    }
171}
172
173impl Edge {
174    pub fn from_def(d: &EdgeDef) -> Self {
175        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
176        Self {
177            id,
178            src: d.src.clone(),
179            dst: d.dst.clone(),
180            kind: d.kind.as_str().to_string(),
181            weight: d.weight,
182            confidence: d.confidence,
183        }
184    }
185}
186
187pub struct GraphDb {
188    pub conn: duckdb::Connection,
189    pub repo_id: String,
190    pub db_path: PathBuf,
191}
192
193impl GraphDb {
194    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
195        let repo_id = repo_hash(repo_path);
196        let dir = dirs::home_dir()
197            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
198            .join(".cgx")
199            .join("repos");
200        std::fs::create_dir_all(&dir)?;
201
202        let db_path = dir.join(format!("{}.db", repo_id));
203        let conn = duckdb::Connection::open(&db_path)?;
204
205        conn.execute_batch(
206            "CREATE TABLE IF NOT EXISTS nodes (
207                id                 VARCHAR PRIMARY KEY,
208                kind               VARCHAR NOT NULL,
209                name               VARCHAR NOT NULL,
210                path               VARCHAR NOT NULL,
211                line_start         INTEGER,
212                line_end           INTEGER,
213                language           VARCHAR,
214                churn              DOUBLE DEFAULT 0.0,
215                coupling           DOUBLE DEFAULT 0.0,
216                community          BIGINT DEFAULT 0,
217                in_degree          BIGINT DEFAULT 0,
218                out_degree         BIGINT DEFAULT 0,
219                exported           TINYINT DEFAULT 0,
220                is_dead_candidate  TINYINT DEFAULT 0,
221                dead_reason        TEXT,
222                metadata           JSON
223            );
224            CREATE TABLE IF NOT EXISTS edges (
225                id         VARCHAR PRIMARY KEY,
226                src        VARCHAR NOT NULL,
227                dst        VARCHAR NOT NULL,
228                kind       VARCHAR NOT NULL,
229                weight     DOUBLE DEFAULT 1.0,
230                confidence DOUBLE DEFAULT 1.0,
231                metadata   JSON
232            );
233            CREATE TABLE IF NOT EXISTS communities (
234                id         INTEGER PRIMARY KEY,
235                label      VARCHAR,
236                node_count INTEGER,
237                top_nodes  JSON
238            );
239            CREATE TABLE IF NOT EXISTS repo_meta (
240                key        VARCHAR PRIMARY KEY,
241                value      JSON
242            );
243            CREATE TABLE IF NOT EXISTS file_hashes (
244                path       VARCHAR PRIMARY KEY,
245                hash       VARCHAR NOT NULL,
246                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
247            );
248            CREATE TABLE IF NOT EXISTS tags (
249                id           VARCHAR PRIMARY KEY,
250                file_path    VARCHAR NOT NULL,
251                line         INTEGER NOT NULL,
252                tag_type     VARCHAR NOT NULL,
253                text         VARCHAR NOT NULL,
254                comment_type VARCHAR NOT NULL DEFAULT 'code'
255            );
256            CREATE TABLE IF NOT EXISTS clones (
257                id         VARCHAR PRIMARY KEY,
258                node_a     VARCHAR NOT NULL,
259                node_b     VARCHAR NOT NULL,
260                similarity FLOAT NOT NULL,
261                kind       VARCHAR NOT NULL
262            );
263            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
264            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
265            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
266            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
267            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
268            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);
269            CREATE INDEX IF NOT EXISTS idx_tags_file       ON tags(file_path);
270            CREATE INDEX IF NOT EXISTS idx_tags_type       ON tags(tag_type);
271            CREATE INDEX IF NOT EXISTS idx_clones_a        ON clones(node_a);
272            CREATE INDEX IF NOT EXISTS idx_clones_b        ON clones(node_b);",
273        )?;
274
275        // Migration: add new columns to existing DBs that pre-date this schema.
276        // DuckDB 1.x supports "ADD COLUMN IF NOT EXISTS" which is a no-op when
277        // the column is already present — no error, no transaction abort.
278        conn.execute_batch(
279            "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported           TINYINT DEFAULT 0;
280             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate  TINYINT DEFAULT 0;
281             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason        TEXT;
282             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS complexity         DOUBLE DEFAULT 0.0;
283             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS doc_comment        TEXT;
284             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_test_file       TINYINT DEFAULT 0;
285             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS test_count         INTEGER DEFAULT 0;
286             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_tested          TINYINT DEFAULT 0;
287             CREATE INDEX IF NOT EXISTS idx_nodes_dead       ON nodes(is_dead_candidate);
288             CREATE INDEX IF NOT EXISTS idx_nodes_complexity ON nodes(complexity);
289             CREATE INDEX IF NOT EXISTS idx_nodes_is_tested  ON nodes(is_tested);",
290        )?;
291
292        conn.execute_batch(
293            "CREATE TABLE IF NOT EXISTS snapshots (
294                id           VARCHAR PRIMARY KEY,
295                commit_sha   VARCHAR NOT NULL,
296                commit_date  TEXT NOT NULL,
297                commit_msg   VARCHAR,
298                node_count   INTEGER,
299                edge_count   INTEGER,
300                snapshot_data TEXT
301            );
302            CREATE INDEX IF NOT EXISTS idx_snapshots_date ON snapshots(commit_date);",
303        )?;
304
305        Ok(Self {
306            conn,
307            repo_id,
308            db_path,
309        })
310    }
311
312    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
313        if nodes.is_empty() {
314            return Ok(0);
315        }
316        let mut count = 0;
317        let mut stmt = self.conn.prepare(
318            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported, complexity, is_test_file, test_count, is_tested)
319             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
320        )?;
321        for node in nodes {
322            stmt.execute(params![
323                node.id,
324                node.kind,
325                node.name,
326                node.path,
327                node.line_start,
328                node.line_end,
329                node.language,
330                node.churn,
331                node.coupling,
332                node.community,
333                node.in_degree,
334                node.out_degree,
335                node.exported as i32,
336                node.complexity,
337                node.is_test_file as i32,
338                node.test_count,
339                node.is_tested as i32,
340            ])?;
341            count += 1;
342        }
343        Ok(count)
344    }
345
346    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
347        if edges.is_empty() {
348            return Ok(0);
349        }
350        let mut count = 0;
351        let mut stmt = self.conn.prepare(
352            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
353             VALUES (?, ?, ?, ?, ?, ?)",
354        )?;
355        for edge in edges {
356            stmt.execute(params![
357                edge.id,
358                edge.src,
359                edge.dst,
360                edge.kind,
361                edge.weight,
362                edge.confidence,
363            ])?;
364            count += 1;
365        }
366        Ok(count)
367    }
368
369    pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
370        if tags.is_empty() {
371            return Ok(0);
372        }
373        let mut count = 0;
374        let mut stmt = self.conn.prepare(
375            "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
376             VALUES (?, ?, ?, ?, ?, ?)",
377        )?;
378        for tag in tags {
379            stmt.execute(params![
380                tag.id,
381                tag.file_path,
382                tag.line,
383                tag.tag_type,
384                tag.text,
385                tag.comment_type,
386            ])?;
387            count += 1;
388        }
389        Ok(count)
390    }
391
392    pub fn get_tags(
393        &self,
394        tag_type_filter: Option<&str>,
395        comment_type_filter: Option<&str>,
396    ) -> anyhow::Result<Vec<TagRow>> {
397        let sql = match (tag_type_filter, comment_type_filter) {
398            (Some(_), Some(_)) => {
399                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
400                 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
401            }
402            (Some(_), None) => {
403                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
404                 WHERE tag_type = ? ORDER BY file_path, line"
405            }
406            (None, Some(_)) => {
407                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
408                 WHERE comment_type = ? ORDER BY file_path, line"
409            }
410            (None, None) => {
411                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
412                 ORDER BY file_path, line"
413            }
414        };
415
416        let mut stmt = self.conn.prepare(sql)?;
417        let map_row = |row: &duckdb::Row| {
418            Ok(TagRow {
419                id: row.get(0)?,
420                file_path: row.get(1)?,
421                line: row.get::<_, u32>(2)?,
422                tag_type: row.get(3)?,
423                text: row.get(4)?,
424                comment_type: row.get(5)?,
425            })
426        };
427
428        let rows = match (tag_type_filter, comment_type_filter) {
429            (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
430            (Some(t), None) => stmt.query_map(params![t], map_row)?,
431            (None, Some(c)) => stmt.query_map(params![c], map_row)?,
432            (None, None) => stmt.query_map([], map_row)?,
433        };
434
435        let mut results = Vec::new();
436        for row in rows {
437            results.push(row?);
438        }
439        Ok(results)
440    }
441
442    pub fn clear_all_tags(&self) -> anyhow::Result<()> {
443        self.conn.execute_batch(
444            "DROP TABLE IF EXISTS tags;
445             CREATE TABLE IF NOT EXISTS tags (
446                 id           VARCHAR PRIMARY KEY,
447                 file_path    VARCHAR NOT NULL,
448                 line         INTEGER NOT NULL,
449                 tag_type     VARCHAR NOT NULL,
450                 text         VARCHAR NOT NULL,
451                 comment_type VARCHAR NOT NULL DEFAULT 'code'
452             );
453             CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
454             CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
455        )?;
456        Ok(())
457    }
458
459    pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
460        if paths.is_empty() {
461            return Ok(());
462        }
463        let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
464        for path in paths {
465            stmt.execute(params![path])?;
466        }
467        Ok(())
468    }
469
470    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
471        let mut stmt = self
472            .conn
473            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE id = ?")?;
474        let mut rows = stmt.query_map(params![id], |row| {
475            Ok(Node {
476                id: row.get(0)?,
477                kind: row.get(1)?,
478                name: row.get(2)?,
479                path: row.get(3)?,
480                line_start: row.get(4)?,
481                line_end: row.get(5)?,
482                language: row.get(6)?,
483                churn: row.get(7)?,
484                coupling: row.get(8)?,
485                community: row.get(9)?,
486                in_degree: row.get(10)?,
487                out_degree: row.get(11)?,
488                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
489                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
490                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
491                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
492                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
493                test_count: row.get::<_, i64>(17).unwrap_or(0),
494                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
495            })
496        })?;
497
498        match rows.next() {
499            Some(Ok(node)) => Ok(Some(node)),
500            _ => Ok(None),
501        }
502    }
503
504    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
505        let mut seen = std::collections::HashSet::new();
506        seen.insert(id.to_string());
507        let mut current = vec![id.to_string()];
508        let mut result: Vec<Node> = Vec::new();
509        let max_depth = depth.min(3);
510
511        for _ in 0..max_depth {
512            if current.is_empty() {
513                break;
514            }
515            let mut next = Vec::new();
516
517            for cur_id in &current {
518                let mut stmt = self.conn.prepare(
519                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
520                     FROM nodes n
521                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
522                     LIMIT 100",
523                )?;
524                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
525                    Ok(Node {
526                        id: row.get(0)?,
527                        kind: row.get(1)?,
528                        name: row.get(2)?,
529                        path: row.get(3)?,
530                        line_start: row.get(4)?,
531                        line_end: row.get(5)?,
532                        language: row.get(6)?,
533                        churn: row.get(7)?,
534                        coupling: row.get(8)?,
535                        community: row.get(9)?,
536                        in_degree: row.get(10)?,
537                        out_degree: row.get(11)?,
538                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
539                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
540                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
541                        complexity: row.get::<_, f64>(15).unwrap_or(0.0),
542                        is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
543                        test_count: row.get::<_, i64>(17).unwrap_or(0),
544                        is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
545                    })
546                })?;
547
548                for row in rows {
549                    let node = row?;
550                    if seen.insert(node.id.clone()) {
551                        next.push(node.id.clone());
552                        result.push(node);
553                    }
554                }
555            }
556            current = next;
557        }
558
559        Ok(result)
560    }
561
562    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
563        let mut stmt = self.conn.prepare(
564            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes",
565        )?;
566        let rows = stmt.query_map([], |row| {
567            Ok(Node {
568                id: row.get(0)?,
569                kind: row.get(1)?,
570                name: row.get(2)?,
571                path: row.get(3)?,
572                line_start: row.get(4)?,
573                line_end: row.get(5)?,
574                language: row.get(6)?,
575                churn: row.get(7)?,
576                coupling: row.get(8)?,
577                community: row.get(9)?,
578                in_degree: row.get(10)?,
579                out_degree: row.get(11)?,
580                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
581                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
582                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
583                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
584                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
585                test_count: row.get::<_, i64>(17).unwrap_or(0),
586                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
587            })
588        })?;
589
590        let mut nodes = Vec::new();
591        for row in rows {
592            nodes.push(row?);
593        }
594        Ok(nodes)
595    }
596
597    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
598        let mut stmt = self
599            .conn
600            .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
601        let rows = stmt.query_map([], |row| {
602            Ok(Edge {
603                id: row.get(0)?,
604                src: row.get(1)?,
605                dst: row.get(2)?,
606                kind: row.get(3)?,
607                weight: row.get(4)?,
608                confidence: row.get(5)?,
609            })
610        })?;
611
612        let mut edges = Vec::new();
613        for row in rows {
614            edges.push(row?);
615        }
616        Ok(edges)
617    }
618
619    pub fn node_count(&self) -> anyhow::Result<u64> {
620        let count: i64 = self
621            .conn
622            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
623        Ok(count as u64)
624    }
625
626    pub fn edge_count(&self) -> anyhow::Result<u64> {
627        let count: i64 = self
628            .conn
629            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
630        Ok(count as u64)
631    }
632
633    pub fn clear(&self) -> anyhow::Result<()> {
634        // TRUNCATE avoids DuckDB ART index bulk-delete failures on large datasets
635        // and is more reliable than DROP+CREATE for data persistence across connections.
636        self.conn.execute_batch(
637            "TRUNCATE TABLE edges;
638             TRUNCATE TABLE nodes;
639             TRUNCATE TABLE communities;",
640        )?;
641        Ok(())
642    }
643
644    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
645        let mut stmt = self.conn.prepare(
646            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
647        )?;
648        let rows = stmt.query_map([], |row| {
649            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
650        })?;
651
652        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
653        for row in rows {
654            let (lang, cnt) = row?;
655            *counts.entry(lang).or_default() += cnt;
656        }
657
658        let total: i64 = counts.values().sum();
659        if total == 0 {
660            return Ok(std::collections::HashMap::new());
661        }
662
663        let mut breakdown = std::collections::HashMap::new();
664        for (lang, cnt) in counts {
665            breakdown.insert(lang, cnt as f64 / total as f64);
666        }
667        Ok(breakdown)
668    }
669
670    pub fn get_node_counts_by_kind(
671        &self,
672    ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
673        let mut stmt = self
674            .conn
675            .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
676        let rows = stmt.query_map([], |row| {
677            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
678        })?;
679
680        let mut counts = std::collections::HashMap::new();
681        for row in rows {
682            let (kind, cnt) = row?;
683            counts.insert(kind, cnt as u64);
684        }
685        Ok(counts)
686    }
687
688    pub fn upsert_node_scores(
689        &self,
690        node_id: &str,
691        churn: f64,
692        coupling: f64,
693    ) -> anyhow::Result<()> {
694        self.conn.execute(
695            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
696            params![churn, coupling, node_id],
697        )?;
698        Ok(())
699    }
700
701    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
702        self.conn.execute_batch(
703            "UPDATE nodes SET in_degree = 0, out_degree = 0;
704             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
705             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
706        )?;
707        Ok(())
708    }
709
710    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
711        let mut stmt = self.conn.prepare(
712            "SELECT path, churn, coupling, in_degree
713             FROM nodes
714             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
715             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
716             LIMIT ?",
717        )?;
718        let rows = stmt.query_map(params![limit as i64], |row| {
719            Ok((
720                row.get::<_, String>(0)?,
721                row.get::<_, f64>(1)?,
722                row.get::<_, f64>(2)?,
723                row.get::<_, i64>(3)?,
724            ))
725        })?;
726        let mut results = Vec::new();
727        for row in rows {
728            results.push(row?);
729        }
730        Ok(results)
731    }
732
733    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
734        let mut stmt = self.conn.prepare(
735            "SELECT n.name, COUNT(e.id) as file_count
736             FROM nodes n
737             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
738             WHERE n.kind = 'Author'
739             GROUP BY n.name
740             ORDER BY file_count DESC",
741        )?;
742        let rows = stmt.query_map([], |row| {
743            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
744        })?;
745        let mut results = Vec::new();
746        for row in rows {
747            results.push(row?);
748        }
749        Ok(results)
750    }
751
752    pub fn compute_coupling(&self) -> anyhow::Result<()> {
753        self.conn.execute_batch(
754            "UPDATE nodes SET coupling = 0.0;
755             UPDATE nodes SET coupling = 
756                CASE 
757                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
758                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
759                    ELSE 0.0
760                END
761             WHERE kind = 'File';",
762        )?;
763        Ok(())
764    }
765
766    pub fn update_node_communities(
767        &self,
768        communities: &std::collections::HashMap<String, i64>,
769    ) -> anyhow::Result<usize> {
770        if communities.is_empty() {
771            return Ok(0);
772        }
773        let mut count = 0;
774        let mut stmt = self
775            .conn
776            .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
777        for (node_id, community) in communities {
778            let affected = stmt.execute(params![*community, node_id.as_str()])?;
779            count += affected;
780        }
781        Ok(count)
782    }
783
784    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
785        let node_count = self.node_count()?;
786        let edge_count = self.edge_count()?;
787        let lang_breakdown = self.get_language_breakdown()?;
788        let communities = self.get_communities()?;
789        let counts_by_kind = self.get_node_counts_by_kind()?;
790
791        Ok(RepoStats {
792            node_count,
793            edge_count,
794            language_breakdown: lang_breakdown,
795            community_count: communities.len() as u32,
796            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
797            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
798            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
799        })
800    }
801
802    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
803        let mut stmt = self.conn.prepare(
804            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
805             FROM nodes
806             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
807             ORDER BY out_degree DESC
808             LIMIT ?",
809        )?;
810        let rows = stmt.query_map(params![limit as i64], |row| {
811            Ok(Node {
812                id: row.get(0)?,
813                kind: row.get(1)?,
814                name: row.get(2)?,
815                path: row.get(3)?,
816                line_start: row.get(4)?,
817                line_end: row.get(5)?,
818                language: row.get(6)?,
819                churn: row.get(7)?,
820                coupling: row.get(8)?,
821                community: row.get(9)?,
822                in_degree: row.get(10)?,
823                out_degree: row.get(11)?,
824                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
825                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
826                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
827                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
828                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
829                test_count: row.get::<_, i64>(17).unwrap_or(0),
830                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
831            })
832        })?;
833        let mut results = Vec::new();
834        for row in rows {
835            results.push(row?);
836        }
837        Ok(results)
838    }
839
840    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
841        let mut stmt = self.conn.prepare(
842            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
843             FROM nodes
844             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
845             ORDER BY in_degree DESC
846             LIMIT ?",
847        )?;
848        let rows = stmt.query_map(params![limit as i64], |row| {
849            Ok(Node {
850                id: row.get(0)?,
851                kind: row.get(1)?,
852                name: row.get(2)?,
853                path: row.get(3)?,
854                line_start: row.get(4)?,
855                line_end: row.get(5)?,
856                language: row.get(6)?,
857                churn: row.get(7)?,
858                coupling: row.get(8)?,
859                community: row.get(9)?,
860                in_degree: row.get(10)?,
861                out_degree: row.get(11)?,
862                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
863                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
864                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
865                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
866                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
867                test_count: row.get::<_, i64>(17).unwrap_or(0),
868                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
869            })
870        })?;
871        let mut results = Vec::new();
872        for row in rows {
873            results.push(row?);
874        }
875        Ok(results)
876    }
877
878    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
879        let mut stmt = self.conn.prepare(
880            "SELECT community, kind, name, path, in_degree
881             FROM nodes
882             WHERE community > 0
883             ORDER BY community",
884        )?;
885        let rows = stmt.query_map([], |row| {
886            Ok((
887                row.get::<_, i64>(0)?,
888                row.get::<_, String>(1)?,
889                row.get::<_, String>(2)?,
890                row.get::<_, String>(3)?,
891                row.get::<_, i64>(4)?,
892            ))
893        })?;
894
895        let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
896            std::collections::HashMap::new();
897        for row in rows {
898            let (community, kind, name, _path, in_degree) = row?;
899            let entry = community_map
900                .entry(community)
901                .or_insert_with(|| (Vec::new(), 0));
902            entry.0.push((kind, in_degree, name));
903            entry.1 += 1;
904        }
905
906        let mut result: Vec<CommunityRow> = community_map
907            .into_iter()
908            .map(|(community, (mut items, count))| {
909                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
910                let top_nodes: Vec<String> = items
911                    .iter()
912                    .take(5)
913                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
914                    .collect();
915                let label = top_nodes
916                    .first()
917                    .cloned()
918                    .unwrap_or_else(|| format!("community-{}", community));
919                (community, label, count, top_nodes)
920            })
921            .collect();
922
923        result.sort_by_key(|row| std::cmp::Reverse(row.2));
924        Ok(result)
925    }
926
927    pub fn clear_communities(&self) -> anyhow::Result<()> {
928        self.conn.execute("UPDATE nodes SET community = 0", [])?;
929        self.conn.execute("DELETE FROM communities", [])?;
930        Ok(())
931    }
932
933    /// BFS following only incoming edges — returns all nodes that depend on `id`.
934    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
935    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
936        let mut seen = std::collections::HashSet::new();
937        seen.insert(id.to_string());
938        let mut current = vec![id.to_string()];
939        let mut result: Vec<Node> = Vec::new();
940        let max_depth = depth.min(3);
941
942        for _ in 0..max_depth {
943            if current.is_empty() {
944                break;
945            }
946            let mut next = Vec::new();
947            for cur_id in &current {
948                let mut stmt = self.conn.prepare(
949                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason, COALESCE(n.complexity, 0.0), COALESCE(n.is_test_file, 0), COALESCE(n.test_count, 0), COALESCE(n.is_tested, 0)
950                     FROM nodes n
951                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
952                     LIMIT 100",
953                )?;
954                let rows = stmt.query_map(params![cur_id], |row| {
955                    Ok(Node {
956                        id: row.get(0)?,
957                        kind: row.get(1)?,
958                        name: row.get(2)?,
959                        path: row.get(3)?,
960                        line_start: row.get(4)?,
961                        line_end: row.get(5)?,
962                        language: row.get(6)?,
963                        churn: row.get(7)?,
964                        coupling: row.get(8)?,
965                        community: row.get(9)?,
966                        in_degree: row.get(10)?,
967                        out_degree: row.get(11)?,
968                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
969                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
970                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
971                        complexity: row.get::<_, f64>(15).unwrap_or(0.0),
972                        is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
973                        test_count: row.get::<_, i64>(17).unwrap_or(0),
974                        is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
975                    })
976                })?;
977                for row in rows {
978                    let node = row?;
979                    if seen.insert(node.id.clone()) {
980                        next.push(node.id.clone());
981                        result.push(node);
982                    }
983                }
984            }
985            current = next;
986        }
987
988        Ok(result)
989    }
990
991    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
992        let mut stmt = self.conn.prepare(
993            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0) FROM nodes WHERE community = ?",
994        )?;
995        let rows = stmt.query_map(params![community], |row| {
996            Ok(Node {
997                id: row.get(0)?,
998                kind: row.get(1)?,
999                name: row.get(2)?,
1000                path: row.get(3)?,
1001                line_start: row.get(4)?,
1002                line_end: row.get(5)?,
1003                language: row.get(6)?,
1004                churn: row.get(7)?,
1005                coupling: row.get(8)?,
1006                community: row.get(9)?,
1007                in_degree: row.get(10)?,
1008                out_degree: row.get(11)?,
1009                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1010                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1011                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1012                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1013                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1014                test_count: row.get::<_, i64>(17).unwrap_or(0),
1015                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1016            })
1017        })?;
1018        let mut nodes = Vec::new();
1019        for row in rows {
1020            nodes.push(row?);
1021        }
1022        Ok(nodes)
1023    }
1024
1025    pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
1026        // items = vec of (node_id, dead_reason)
1027        if items.is_empty() {
1028            return Ok(());
1029        }
1030        let mut stmt = self
1031            .conn
1032            .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
1033        for (id, reason) in items {
1034            stmt.execute(params![reason, id])?;
1035        }
1036        Ok(())
1037    }
1038
1039    pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
1040        // Returns (total_candidates, high_confidence_count)
1041        let total: i64 = self
1042            .conn
1043            .query_row(
1044                "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
1045                [],
1046                |r| r.get(0),
1047            )
1048            .unwrap_or(0);
1049        // High confidence = unreachable or disconnected reasons
1050        let high: i64 = self.conn.query_row(
1051            "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
1052        ).unwrap_or(0);
1053        Ok((total, high))
1054    }
1055
1056    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
1057        let mut stmt = self.conn.prepare(
1058            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
1059             FROM edges e
1060             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
1061             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
1062        )?;
1063        let rows = stmt.query_map(params![community, community], |row| {
1064            Ok(Edge {
1065                id: row.get(0)?,
1066                src: row.get(1)?,
1067                dst: row.get(2)?,
1068                kind: row.get(3)?,
1069                weight: row.get(4)?,
1070                confidence: row.get(5)?,
1071            })
1072        })?;
1073        let mut edges = Vec::new();
1074        for row in rows {
1075            edges.push(row?);
1076        }
1077        Ok(edges)
1078    }
1079
1080    // ── File hashes for incremental indexing ────────────────────────────────
1081
1082    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
1083        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
1084        let rows = stmt.query_map([], |row| {
1085            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1086        })?;
1087        let mut result = std::collections::HashMap::new();
1088        for row in rows {
1089            let (path, hash) = row?;
1090            result.insert(path, hash);
1091        }
1092        Ok(result)
1093    }
1094
1095    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
1096        self.conn.execute(
1097            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
1098            params![path, hash],
1099        )?;
1100        Ok(())
1101    }
1102
1103    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
1104        if paths.is_empty() {
1105            return Ok(());
1106        }
1107        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1108        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1109        let mut stmt = self.conn.prepare(&sql)?;
1110        let params: Vec<&dyn duckdb::ToSql> =
1111            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1112        stmt.execute(params.as_slice())?;
1113        Ok(())
1114    }
1115
1116    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1117        if paths.is_empty() {
1118            return Ok(0);
1119        }
1120        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1121        // Delete edges connected to nodes from these paths first
1122        let sql_edges = format!(
1123            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1124            placeholders, placeholders
1125        );
1126        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1127        let params_edges: Vec<&dyn duckdb::ToSql> = paths
1128            .iter()
1129            .chain(paths.iter())
1130            .map(|p| p as &dyn duckdb::ToSql)
1131            .collect();
1132        stmt_edges.execute(params_edges.as_slice())?;
1133
1134        // Delete nodes
1135        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1136        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1137        let params_nodes: Vec<&dyn duckdb::ToSql> =
1138            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1139        let count = stmt_nodes.execute(params_nodes.as_slice())?;
1140        Ok(count)
1141    }
1142
1143    pub fn update_node_doc_comment(&self, id: &str, doc: &str) -> anyhow::Result<()> {
1144        self.conn.execute(
1145            "UPDATE nodes SET doc_comment = ? WHERE id = ?",
1146            params![doc, id],
1147        )?;
1148        Ok(())
1149    }
1150
1151    pub fn update_node_complexity(&self, id: &str, complexity: f64) -> anyhow::Result<()> {
1152        self.conn.execute(
1153            "UPDATE nodes SET complexity = ? WHERE id = ?",
1154            params![complexity, id],
1155        )?;
1156        Ok(())
1157    }
1158
1159    pub fn get_nodes_by_complexity(
1160        &self,
1161        limit: usize,
1162        min_score: f64,
1163    ) -> anyhow::Result<Vec<Node>> {
1164        let mut stmt = self.conn.prepare(
1165            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1166             FROM nodes
1167             WHERE kind = 'Function' AND COALESCE(complexity, 0.0) >= ?
1168             ORDER BY complexity DESC
1169             LIMIT ?",
1170        )?;
1171        let rows = stmt.query_map(params![min_score, limit as i64], |row| {
1172            Ok(Node {
1173                id: row.get(0)?,
1174                kind: row.get(1)?,
1175                name: row.get(2)?,
1176                path: row.get(3)?,
1177                line_start: row.get(4)?,
1178                line_end: row.get(5)?,
1179                language: row.get(6)?,
1180                churn: row.get(7)?,
1181                coupling: row.get(8)?,
1182                community: row.get(9)?,
1183                in_degree: row.get(10)?,
1184                out_degree: row.get(11)?,
1185                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1186                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1187                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1188                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1189                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1190                test_count: row.get::<_, i64>(17).unwrap_or(0),
1191                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1192            })
1193        })?;
1194        let mut results = Vec::new();
1195        for row in rows {
1196            results.push(row?);
1197        }
1198        Ok(results)
1199    }
1200
1201    /// Returns (overall_pct, Vec<(community_id, documented, total)>, Vec<undocumented high-coupling nodes>)
1202    pub fn get_docs_coverage(
1203        &self,
1204    ) -> anyhow::Result<DocsCoverage> {
1205        let overall: f64 = self
1206            .conn
1207            .query_row(
1208                "SELECT COALESCE(
1209                    CAST(SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) AS DOUBLE)
1210                    / NULLIF(CAST(COUNT(*) AS DOUBLE), 0.0) * 100.0,
1211                    0.0)
1212                 FROM nodes WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'",
1213                [],
1214                |r| r.get(0),
1215            )
1216            .unwrap_or(0.0);
1217
1218        let mut by_community = Vec::new();
1219        let mut stmt = self.conn.prepare(
1220            "SELECT community,
1221                    SUM(CASE WHEN doc_comment IS NOT NULL AND doc_comment != '' THEN 1 ELSE 0 END) as documented,
1222                    COUNT(*) as total
1223             FROM nodes
1224             WHERE kind IN ('Function', 'Class') AND path NOT LIKE '%test%'
1225             GROUP BY community
1226             ORDER BY community",
1227        )?;
1228        let comm_rows = stmt.query_map([], |row| {
1229            Ok((
1230                row.get::<_, i64>(0)?,
1231                row.get::<_, i64>(1)?,
1232                row.get::<_, i64>(2)?,
1233            ))
1234        })?;
1235        for row in comm_rows {
1236            by_community.push(row?);
1237        }
1238
1239        let mut undoc_stmt = self.conn.prepare(
1240            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1241             FROM nodes
1242             WHERE kind = 'Function' AND (doc_comment IS NULL OR doc_comment = '')
1243             ORDER BY in_degree DESC
1244             LIMIT 10",
1245        )?;
1246        let undoc_rows = undoc_stmt.query_map([], |row| {
1247            Ok(Node {
1248                id: row.get(0)?,
1249                kind: row.get(1)?,
1250                name: row.get(2)?,
1251                path: row.get(3)?,
1252                line_start: row.get(4)?,
1253                line_end: row.get(5)?,
1254                language: row.get(6)?,
1255                churn: row.get(7)?,
1256                coupling: row.get(8)?,
1257                community: row.get(9)?,
1258                in_degree: row.get(10)?,
1259                out_degree: row.get(11)?,
1260                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1261                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1262                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1263                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1264                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1265                test_count: row.get::<_, i64>(17).unwrap_or(0),
1266                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1267            })
1268        })?;
1269        let mut undocumented = Vec::new();
1270        for row in undoc_rows {
1271            undocumented.push(row?);
1272        }
1273
1274        Ok((overall, by_community, undocumented))
1275    }
1276
1277    pub fn upsert_clones(&self, clones: &[CloneRow]) -> anyhow::Result<usize> {
1278        if clones.is_empty() {
1279            return Ok(0);
1280        }
1281        let mut count = 0;
1282        let mut stmt = self.conn.prepare(
1283            "INSERT OR REPLACE INTO clones (id, node_a, node_b, similarity, kind) VALUES (?, ?, ?, ?, ?)",
1284        )?;
1285        for c in clones {
1286            stmt.execute(params![c.id, c.node_a, c.node_b, c.similarity, c.kind])?;
1287            count += 1;
1288        }
1289        Ok(count)
1290    }
1291
1292    pub fn get_clones(
1293        &self,
1294        min_similarity: f64,
1295        kind_filter: Option<&str>,
1296    ) -> anyhow::Result<Vec<CloneRow>> {
1297        let (sql, use_kind) = if kind_filter.is_some() {
1298            (
1299                "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? AND kind = ? ORDER BY similarity DESC",
1300                true,
1301            )
1302        } else {
1303            (
1304                "SELECT id, node_a, node_b, similarity, kind FROM clones WHERE similarity >= ? ORDER BY similarity DESC",
1305                false,
1306            )
1307        };
1308
1309        let mut stmt = self.conn.prepare(sql)?;
1310        let map_row = |row: &duckdb::Row| {
1311            Ok(CloneRow {
1312                id: row.get(0)?,
1313                node_a: row.get(1)?,
1314                node_b: row.get(2)?,
1315                similarity: row.get::<_, f32>(3)? as f64,
1316                kind: row.get(4)?,
1317            })
1318        };
1319
1320        let rows = if use_kind {
1321            stmt.query_map(
1322                params![min_similarity, kind_filter.unwrap_or("")],
1323                map_row,
1324            )?
1325        } else {
1326            stmt.query_map(params![min_similarity], map_row)?
1327        };
1328
1329        let mut results = Vec::new();
1330        for row in rows {
1331            results.push(row?);
1332        }
1333        Ok(results)
1334    }
1335
1336    pub fn clear_clones(&self) -> anyhow::Result<()> {
1337        self.conn.execute("DELETE FROM clones", [])?;
1338        Ok(())
1339    }
1340
1341    pub fn mark_test_files(&self, paths: &[String]) -> anyhow::Result<()> {
1342        if paths.is_empty() {
1343            return Ok(());
1344        }
1345        let mut stmt = self
1346            .conn
1347            .prepare("UPDATE nodes SET is_test_file = 1 WHERE path = ?")?;
1348        for path in paths {
1349            stmt.execute(params![path])?;
1350        }
1351        Ok(())
1352    }
1353
1354    /// After inserting TESTS edges, compute test_count and is_tested for non-test nodes.
1355    pub fn update_test_coverage(&self) -> anyhow::Result<()> {
1356        self.conn.execute_batch(
1357            "UPDATE nodes SET test_count = (
1358                SELECT COUNT(*) FROM edges
1359                WHERE edges.dst = nodes.id AND edges.kind = 'TESTS'
1360             );
1361             UPDATE nodes SET is_tested = (test_count > 0)
1362             WHERE is_test_file = 0;",
1363        )?;
1364        Ok(())
1365    }
1366
1367    /// Returns (overall_pct, tested_count, untested_count, gaps ranked by risk)
1368    pub fn get_test_coverage_summary(
1369        &self,
1370        top_n: usize,
1371    ) -> anyhow::Result<(f64, i64, i64, Vec<Node>)> {
1372        let tested: i64 = self
1373            .conn
1374            .query_row(
1375                "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0 AND is_tested = 1",
1376                [],
1377                |r| r.get(0),
1378            )
1379            .unwrap_or(0);
1380        let total: i64 = self
1381            .conn
1382            .query_row(
1383                "SELECT COUNT(*) FROM nodes WHERE kind IN ('Function','Class') AND is_test_file = 0",
1384                [],
1385                |r| r.get(0),
1386            )
1387            .unwrap_or(0);
1388
1389        let overall_pct = if total > 0 {
1390            (tested as f64 / total as f64) * 100.0
1391        } else {
1392            0.0
1393        };
1394
1395        let mut gap_stmt = self.conn.prepare(
1396            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason, COALESCE(complexity, 0.0), COALESCE(is_test_file, 0), COALESCE(test_count, 0), COALESCE(is_tested, 0)
1397             FROM nodes
1398             WHERE kind IN ('Function','Class') AND is_test_file = 0 AND COALESCE(is_tested, 0) = 0
1399             ORDER BY (churn * CAST(in_degree AS DOUBLE) + CAST(in_degree AS DOUBLE) * 0.5) DESC
1400             LIMIT ?",
1401        )?;
1402        let gap_rows = gap_stmt.query_map(params![top_n as i64], |row| {
1403            Ok(Node {
1404                id: row.get(0)?,
1405                kind: row.get(1)?,
1406                name: row.get(2)?,
1407                path: row.get(3)?,
1408                line_start: row.get(4)?,
1409                line_end: row.get(5)?,
1410                language: row.get(6)?,
1411                churn: row.get(7)?,
1412                coupling: row.get(8)?,
1413                community: row.get(9)?,
1414                in_degree: row.get(10)?,
1415                out_degree: row.get(11)?,
1416                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
1417                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
1418                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
1419                complexity: row.get::<_, f64>(15).unwrap_or(0.0),
1420                is_test_file: row.get::<_, i64>(16).map(|v| v != 0).unwrap_or(false),
1421                test_count: row.get::<_, i64>(17).unwrap_or(0),
1422                is_tested: row.get::<_, i64>(18).map(|v| v != 0).unwrap_or(false),
1423            })
1424        })?;
1425        let mut gaps = Vec::new();
1426        for row in gap_rows {
1427            gaps.push(row?);
1428        }
1429
1430        Ok((overall_pct, tested, total - tested, gaps))
1431    }
1432
1433    pub fn upsert_snapshot(&self, entry: &SnapshotEntry) -> anyhow::Result<()> {
1434        self.conn.execute(
1435            "INSERT OR REPLACE INTO snapshots (id, commit_sha, commit_date, commit_msg, node_count, edge_count, snapshot_data)
1436             VALUES (?, ?, ?, ?, ?, ?, ?)",
1437            params![
1438                entry.id,
1439                entry.commit_sha,
1440                entry.commit_date,
1441                entry.commit_msg,
1442                entry.node_count,
1443                entry.edge_count,
1444                entry.snapshot_data,
1445            ],
1446        )?;
1447        Ok(())
1448    }
1449
1450    pub fn get_snapshots(&self, limit: usize) -> anyhow::Result<Vec<SnapshotEntry>> {
1451        let mut stmt = self.conn.prepare(
1452            "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1453             FROM snapshots ORDER BY commit_date DESC LIMIT ?",
1454        )?;
1455        let rows = stmt.query_map(params![limit as i64], |row| {
1456            Ok(SnapshotEntry {
1457                id: row.get(0)?,
1458                commit_sha: row.get(1)?,
1459                commit_date: row.get(2)?,
1460                commit_msg: row.get(3)?,
1461                node_count: row.get(4)?,
1462                edge_count: row.get(5)?,
1463                snapshot_data: row.get(6)?,
1464            })
1465        })?;
1466        let mut result = Vec::new();
1467        for row in rows {
1468            result.push(row?);
1469        }
1470        Ok(result)
1471    }
1472
1473    pub fn get_snapshot_by_sha(&self, sha: &str) -> anyhow::Result<Option<SnapshotEntry>> {
1474        let mut stmt = self.conn.prepare(
1475            "SELECT id, commit_sha, commit_date, commit_msg, COALESCE(node_count,0), COALESCE(edge_count,0), snapshot_data
1476             FROM snapshots WHERE commit_sha = ? OR commit_sha LIKE ? LIMIT 1",
1477        )?;
1478        let prefix = format!("{}%", sha);
1479        let mut rows = stmt.query_map(params![sha, prefix], |row| {
1480            Ok(SnapshotEntry {
1481                id: row.get(0)?,
1482                commit_sha: row.get(1)?,
1483                commit_date: row.get(2)?,
1484                commit_msg: row.get(3)?,
1485                node_count: row.get(4)?,
1486                edge_count: row.get(5)?,
1487                snapshot_data: row.get(6)?,
1488            })
1489        })?;
1490        match rows.next() {
1491            Some(Ok(entry)) => Ok(Some(entry)),
1492            _ => Ok(None),
1493        }
1494    }
1495
1496    pub fn snapshot_count(&self) -> i64 {
1497        self.conn
1498            .query_row("SELECT COUNT(*) FROM snapshots", [], |r| r.get(0))
1499            .unwrap_or(0)
1500    }
1501}
1502
1503pub fn repo_hash(path: &Path) -> String {
1504    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1505    let path_str = canonical.to_string_lossy().to_string();
1506    let mut hasher = Sha256::new();
1507    hasher.update(path_str.as_bytes());
1508    format!("{:x}", hasher.finalize())[..16].to_string()
1509}