Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29    #[serde(default)]
30    pub exported: bool,
31    #[serde(default)]
32    pub is_dead_candidate: bool,
33    #[serde(default)]
34    pub dead_reason: Option<String>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct Edge {
39    pub id: String,
40    pub src: String,
41    pub dst: String,
42    pub kind: String,
43    #[serde(default = "default_weight")]
44    pub weight: f64,
45    #[serde(default = "default_weight")]
46    pub confidence: f64,
47}
48
49fn default_weight() -> f64 {
50    1.0
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct RepoStats {
55    pub node_count: u64,
56    pub edge_count: u64,
57    pub language_breakdown: std::collections::HashMap<String, f64>,
58    pub community_count: u32,
59    pub function_count: u64,
60    pub class_count: u64,
61    pub file_count: u64,
62}
63
64pub type CommunityRow = (i64, String, i64, Vec<String>);
65type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct TagRow {
69    pub id: String,
70    pub file_path: String,
71    pub line: u32,
72    pub tag_type: String,
73    pub text: String,
74    /// "code", "jsx", or "jsx_commented_code"
75    pub comment_type: String,
76}
77
78impl Default for Node {
79    fn default() -> Self {
80        Self {
81            id: String::new(),
82            kind: String::new(),
83            name: String::new(),
84            path: String::new(),
85            line_start: 0,
86            line_end: 0,
87            language: String::new(),
88            churn: 0.0,
89            coupling: 0.0,
90            community: 0,
91            in_degree: 0,
92            out_degree: 0,
93            exported: false,
94            is_dead_candidate: false,
95            dead_reason: None,
96        }
97    }
98}
99
100impl Node {
101    pub fn from_def(d: &NodeDef, language: &str) -> Self {
102        let exported = d
103            .metadata
104            .get("exported")
105            .and_then(|v| v.as_bool())
106            .unwrap_or(false);
107        Self {
108            id: d.id.clone(),
109            kind: d.kind.as_str().to_string(),
110            name: d.name.clone(),
111            path: d.path.clone(),
112            line_start: d.line_start,
113            line_end: d.line_end,
114            language: language.to_string(),
115            churn: 0.0,
116            coupling: 0.0,
117            community: 0,
118            in_degree: 0,
119            out_degree: 0,
120            exported,
121            is_dead_candidate: false,
122            dead_reason: None,
123        }
124    }
125}
126
127impl Edge {
128    pub fn from_def(d: &EdgeDef) -> Self {
129        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
130        Self {
131            id,
132            src: d.src.clone(),
133            dst: d.dst.clone(),
134            kind: d.kind.as_str().to_string(),
135            weight: d.weight,
136            confidence: d.confidence,
137        }
138    }
139}
140
141pub struct GraphDb {
142    pub conn: duckdb::Connection,
143    pub repo_id: String,
144    pub db_path: PathBuf,
145}
146
147impl GraphDb {
148    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
149        let repo_id = repo_hash(repo_path);
150        let dir = dirs::home_dir()
151            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
152            .join(".cgx")
153            .join("repos");
154        std::fs::create_dir_all(&dir)?;
155
156        let db_path = dir.join(format!("{}.db", repo_id));
157        let conn = duckdb::Connection::open(&db_path)?;
158
159        conn.execute_batch(
160            "CREATE TABLE IF NOT EXISTS nodes (
161                id                 VARCHAR PRIMARY KEY,
162                kind               VARCHAR NOT NULL,
163                name               VARCHAR NOT NULL,
164                path               VARCHAR NOT NULL,
165                line_start         INTEGER,
166                line_end           INTEGER,
167                language           VARCHAR,
168                churn              DOUBLE DEFAULT 0.0,
169                coupling           DOUBLE DEFAULT 0.0,
170                community          BIGINT DEFAULT 0,
171                in_degree          BIGINT DEFAULT 0,
172                out_degree         BIGINT DEFAULT 0,
173                exported           TINYINT DEFAULT 0,
174                is_dead_candidate  TINYINT DEFAULT 0,
175                dead_reason        TEXT,
176                metadata           JSON
177            );
178            CREATE TABLE IF NOT EXISTS edges (
179                id         VARCHAR PRIMARY KEY,
180                src        VARCHAR NOT NULL,
181                dst        VARCHAR NOT NULL,
182                kind       VARCHAR NOT NULL,
183                weight     DOUBLE DEFAULT 1.0,
184                confidence DOUBLE DEFAULT 1.0,
185                metadata   JSON
186            );
187            CREATE TABLE IF NOT EXISTS communities (
188                id         INTEGER PRIMARY KEY,
189                label      VARCHAR,
190                node_count INTEGER,
191                top_nodes  JSON
192            );
193            CREATE TABLE IF NOT EXISTS repo_meta (
194                key        VARCHAR PRIMARY KEY,
195                value      JSON
196            );
197            CREATE TABLE IF NOT EXISTS file_hashes (
198                path       VARCHAR PRIMARY KEY,
199                hash       VARCHAR NOT NULL,
200                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
201            );
202            CREATE TABLE IF NOT EXISTS tags (
203                id           VARCHAR PRIMARY KEY,
204                file_path    VARCHAR NOT NULL,
205                line         INTEGER NOT NULL,
206                tag_type     VARCHAR NOT NULL,
207                text         VARCHAR NOT NULL,
208                comment_type VARCHAR NOT NULL DEFAULT 'code'
209            );
210            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
211            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
212            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
213            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
214            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
215            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);
216            CREATE INDEX IF NOT EXISTS idx_tags_file       ON tags(file_path);
217            CREATE INDEX IF NOT EXISTS idx_tags_type       ON tags(tag_type);",
218        )?;
219
220        // Migration: add new columns to existing DBs that pre-date this schema.
221        // DuckDB 1.x supports "ADD COLUMN IF NOT EXISTS" which is a no-op when
222        // the column is already present — no error, no transaction abort.
223        conn.execute_batch(
224            "ALTER TABLE nodes ADD COLUMN IF NOT EXISTS exported           TINYINT DEFAULT 0;
225             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS is_dead_candidate  TINYINT DEFAULT 0;
226             ALTER TABLE nodes ADD COLUMN IF NOT EXISTS dead_reason        TEXT;
227             CREATE INDEX IF NOT EXISTS idx_nodes_dead ON nodes(is_dead_candidate);",
228        )?;
229
230        Ok(Self {
231            conn,
232            repo_id,
233            db_path,
234        })
235    }
236
237    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
238        if nodes.is_empty() {
239            return Ok(0);
240        }
241        let mut count = 0;
242        let mut stmt = self.conn.prepare(
243            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, exported)
244             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
245        )?;
246        for node in nodes {
247            stmt.execute(params![
248                node.id,
249                node.kind,
250                node.name,
251                node.path,
252                node.line_start,
253                node.line_end,
254                node.language,
255                node.churn,
256                node.coupling,
257                node.community,
258                node.in_degree,
259                node.out_degree,
260                node.exported as i32,
261            ])?;
262            count += 1;
263        }
264        Ok(count)
265    }
266
267    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
268        if edges.is_empty() {
269            return Ok(0);
270        }
271        let mut count = 0;
272        let mut stmt = self.conn.prepare(
273            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
274             VALUES (?, ?, ?, ?, ?, ?)",
275        )?;
276        for edge in edges {
277            stmt.execute(params![
278                edge.id,
279                edge.src,
280                edge.dst,
281                edge.kind,
282                edge.weight,
283                edge.confidence,
284            ])?;
285            count += 1;
286        }
287        Ok(count)
288    }
289
290    pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
291        if tags.is_empty() {
292            return Ok(0);
293        }
294        let mut count = 0;
295        let mut stmt = self.conn.prepare(
296            "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
297             VALUES (?, ?, ?, ?, ?, ?)",
298        )?;
299        for tag in tags {
300            stmt.execute(params![
301                tag.id,
302                tag.file_path,
303                tag.line,
304                tag.tag_type,
305                tag.text,
306                tag.comment_type,
307            ])?;
308            count += 1;
309        }
310        Ok(count)
311    }
312
313    pub fn get_tags(
314        &self,
315        tag_type_filter: Option<&str>,
316        comment_type_filter: Option<&str>,
317    ) -> anyhow::Result<Vec<TagRow>> {
318        let sql = match (tag_type_filter, comment_type_filter) {
319            (Some(_), Some(_)) => {
320                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
321                 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
322            }
323            (Some(_), None) => {
324                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
325                 WHERE tag_type = ? ORDER BY file_path, line"
326            }
327            (None, Some(_)) => {
328                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
329                 WHERE comment_type = ? ORDER BY file_path, line"
330            }
331            (None, None) => {
332                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
333                 ORDER BY file_path, line"
334            }
335        };
336
337        let mut stmt = self.conn.prepare(sql)?;
338        let map_row = |row: &duckdb::Row| {
339            Ok(TagRow {
340                id: row.get(0)?,
341                file_path: row.get(1)?,
342                line: row.get::<_, u32>(2)?,
343                tag_type: row.get(3)?,
344                text: row.get(4)?,
345                comment_type: row.get(5)?,
346            })
347        };
348
349        let rows = match (tag_type_filter, comment_type_filter) {
350            (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
351            (Some(t), None) => stmt.query_map(params![t], map_row)?,
352            (None, Some(c)) => stmt.query_map(params![c], map_row)?,
353            (None, None) => stmt.query_map([], map_row)?,
354        };
355
356        let mut results = Vec::new();
357        for row in rows {
358            results.push(row?);
359        }
360        Ok(results)
361    }
362
363    pub fn clear_all_tags(&self) -> anyhow::Result<()> {
364        self.conn.execute_batch(
365            "DROP TABLE IF EXISTS tags;
366             CREATE TABLE IF NOT EXISTS tags (
367                 id           VARCHAR PRIMARY KEY,
368                 file_path    VARCHAR NOT NULL,
369                 line         INTEGER NOT NULL,
370                 tag_type     VARCHAR NOT NULL,
371                 text         VARCHAR NOT NULL,
372                 comment_type VARCHAR NOT NULL DEFAULT 'code'
373             );
374             CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
375             CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
376        )?;
377        Ok(())
378    }
379
380    pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
381        if paths.is_empty() {
382            return Ok(());
383        }
384        let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
385        for path in paths {
386            stmt.execute(params![path])?;
387        }
388        Ok(())
389    }
390
391    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
392        let mut stmt = self
393            .conn
394            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false) as exported, COALESCE(is_dead_candidate, false) as is_dead_candidate, dead_reason FROM nodes WHERE id = ?")?;
395        let mut rows = stmt.query_map(params![id], |row| {
396            Ok(Node {
397                id: row.get(0)?,
398                kind: row.get(1)?,
399                name: row.get(2)?,
400                path: row.get(3)?,
401                line_start: row.get(4)?,
402                line_end: row.get(5)?,
403                language: row.get(6)?,
404                churn: row.get(7)?,
405                coupling: row.get(8)?,
406                community: row.get(9)?,
407                in_degree: row.get(10)?,
408                out_degree: row.get(11)?,
409                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
410                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
411                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
412            })
413        })?;
414
415        match rows.next() {
416            Some(Ok(node)) => Ok(Some(node)),
417            _ => Ok(None),
418        }
419    }
420
421    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
422        let mut seen = std::collections::HashSet::new();
423        seen.insert(id.to_string());
424        let mut current = vec![id.to_string()];
425        let mut result: Vec<Node> = Vec::new();
426        let max_depth = depth.min(3);
427
428        for _ in 0..max_depth {
429            if current.is_empty() {
430                break;
431            }
432            let mut next = Vec::new();
433
434            for cur_id in &current {
435                let mut stmt = self.conn.prepare(
436                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason
437                     FROM nodes n
438                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
439                     LIMIT 100",
440                )?;
441                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
442                    Ok(Node {
443                        id: row.get(0)?,
444                        kind: row.get(1)?,
445                        name: row.get(2)?,
446                        path: row.get(3)?,
447                        line_start: row.get(4)?,
448                        line_end: row.get(5)?,
449                        language: row.get(6)?,
450                        churn: row.get(7)?,
451                        coupling: row.get(8)?,
452                        community: row.get(9)?,
453                        in_degree: row.get(10)?,
454                        out_degree: row.get(11)?,
455                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
456                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
457                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
458                    })
459                })?;
460
461                for row in rows {
462                    let node = row?;
463                    if seen.insert(node.id.clone()) {
464                        next.push(node.id.clone());
465                        result.push(node);
466                    }
467                }
468            }
469            current = next;
470        }
471
472        Ok(result)
473    }
474
475    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
476        let mut stmt = self.conn.prepare(
477            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason FROM nodes",
478        )?;
479        let rows = stmt.query_map([], |row| {
480            Ok(Node {
481                id: row.get(0)?,
482                kind: row.get(1)?,
483                name: row.get(2)?,
484                path: row.get(3)?,
485                line_start: row.get(4)?,
486                line_end: row.get(5)?,
487                language: row.get(6)?,
488                churn: row.get(7)?,
489                coupling: row.get(8)?,
490                community: row.get(9)?,
491                in_degree: row.get(10)?,
492                out_degree: row.get(11)?,
493                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
494                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
495                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
496            })
497        })?;
498
499        let mut nodes = Vec::new();
500        for row in rows {
501            nodes.push(row?);
502        }
503        Ok(nodes)
504    }
505
506    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
507        let mut stmt = self
508            .conn
509            .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
510        let rows = stmt.query_map([], |row| {
511            Ok(Edge {
512                id: row.get(0)?,
513                src: row.get(1)?,
514                dst: row.get(2)?,
515                kind: row.get(3)?,
516                weight: row.get(4)?,
517                confidence: row.get(5)?,
518            })
519        })?;
520
521        let mut edges = Vec::new();
522        for row in rows {
523            edges.push(row?);
524        }
525        Ok(edges)
526    }
527
528    pub fn node_count(&self) -> anyhow::Result<u64> {
529        let count: i64 = self
530            .conn
531            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
532        Ok(count as u64)
533    }
534
535    pub fn edge_count(&self) -> anyhow::Result<u64> {
536        let count: i64 = self
537            .conn
538            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
539        Ok(count as u64)
540    }
541
542    pub fn clear(&self) -> anyhow::Result<()> {
543        // TRUNCATE avoids DuckDB ART index bulk-delete failures on large datasets
544        // and is more reliable than DROP+CREATE for data persistence across connections.
545        self.conn.execute_batch(
546            "TRUNCATE TABLE edges;
547             TRUNCATE TABLE nodes;
548             TRUNCATE TABLE communities;",
549        )?;
550        Ok(())
551    }
552
553    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
554        let mut stmt = self.conn.prepare(
555            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
556        )?;
557        let rows = stmt.query_map([], |row| {
558            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
559        })?;
560
561        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
562        for row in rows {
563            let (lang, cnt) = row?;
564            *counts.entry(lang).or_default() += cnt;
565        }
566
567        let total: i64 = counts.values().sum();
568        if total == 0 {
569            return Ok(std::collections::HashMap::new());
570        }
571
572        let mut breakdown = std::collections::HashMap::new();
573        for (lang, cnt) in counts {
574            breakdown.insert(lang, cnt as f64 / total as f64);
575        }
576        Ok(breakdown)
577    }
578
579    pub fn get_node_counts_by_kind(
580        &self,
581    ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
582        let mut stmt = self
583            .conn
584            .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
585        let rows = stmt.query_map([], |row| {
586            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
587        })?;
588
589        let mut counts = std::collections::HashMap::new();
590        for row in rows {
591            let (kind, cnt) = row?;
592            counts.insert(kind, cnt as u64);
593        }
594        Ok(counts)
595    }
596
597    pub fn upsert_node_scores(
598        &self,
599        node_id: &str,
600        churn: f64,
601        coupling: f64,
602    ) -> anyhow::Result<()> {
603        self.conn.execute(
604            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
605            params![churn, coupling, node_id],
606        )?;
607        Ok(())
608    }
609
610    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
611        self.conn.execute_batch(
612            "UPDATE nodes SET in_degree = 0, out_degree = 0;
613             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
614             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
615        )?;
616        Ok(())
617    }
618
619    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
620        let mut stmt = self.conn.prepare(
621            "SELECT path, churn, coupling, in_degree
622             FROM nodes
623             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
624             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
625             LIMIT ?",
626        )?;
627        let rows = stmt.query_map(params![limit as i64], |row| {
628            Ok((
629                row.get::<_, String>(0)?,
630                row.get::<_, f64>(1)?,
631                row.get::<_, f64>(2)?,
632                row.get::<_, i64>(3)?,
633            ))
634        })?;
635        let mut results = Vec::new();
636        for row in rows {
637            results.push(row?);
638        }
639        Ok(results)
640    }
641
642    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
643        let mut stmt = self.conn.prepare(
644            "SELECT n.name, COUNT(e.id) as file_count
645             FROM nodes n
646             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
647             WHERE n.kind = 'Author'
648             GROUP BY n.name
649             ORDER BY file_count DESC",
650        )?;
651        let rows = stmt.query_map([], |row| {
652            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
653        })?;
654        let mut results = Vec::new();
655        for row in rows {
656            results.push(row?);
657        }
658        Ok(results)
659    }
660
661    pub fn compute_coupling(&self) -> anyhow::Result<()> {
662        self.conn.execute_batch(
663            "UPDATE nodes SET coupling = 0.0;
664             UPDATE nodes SET coupling = 
665                CASE 
666                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
667                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
668                    ELSE 0.0
669                END
670             WHERE kind = 'File';",
671        )?;
672        Ok(())
673    }
674
675    pub fn update_node_communities(
676        &self,
677        communities: &std::collections::HashMap<String, i64>,
678    ) -> anyhow::Result<usize> {
679        if communities.is_empty() {
680            return Ok(0);
681        }
682        let mut count = 0;
683        let mut stmt = self
684            .conn
685            .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
686        for (node_id, community) in communities {
687            let affected = stmt.execute(params![*community, node_id.as_str()])?;
688            count += affected;
689        }
690        Ok(count)
691    }
692
693    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
694        let node_count = self.node_count()?;
695        let edge_count = self.edge_count()?;
696        let lang_breakdown = self.get_language_breakdown()?;
697        let communities = self.get_communities()?;
698        let counts_by_kind = self.get_node_counts_by_kind()?;
699
700        Ok(RepoStats {
701            node_count,
702            edge_count,
703            language_breakdown: lang_breakdown,
704            community_count: communities.len() as u32,
705            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
706            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
707            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
708        })
709    }
710
711    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
712        let mut stmt = self.conn.prepare(
713            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason
714             FROM nodes
715             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
716             ORDER BY out_degree DESC
717             LIMIT ?",
718        )?;
719        let rows = stmt.query_map(params![limit as i64], |row| {
720            Ok(Node {
721                id: row.get(0)?,
722                kind: row.get(1)?,
723                name: row.get(2)?,
724                path: row.get(3)?,
725                line_start: row.get(4)?,
726                line_end: row.get(5)?,
727                language: row.get(6)?,
728                churn: row.get(7)?,
729                coupling: row.get(8)?,
730                community: row.get(9)?,
731                in_degree: row.get(10)?,
732                out_degree: row.get(11)?,
733                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
734                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
735                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
736            })
737        })?;
738        let mut results = Vec::new();
739        for row in rows {
740            results.push(row?);
741        }
742        Ok(results)
743    }
744
745    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
746        let mut stmt = self.conn.prepare(
747            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason
748             FROM nodes
749             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
750             ORDER BY in_degree DESC
751             LIMIT ?",
752        )?;
753        let rows = stmt.query_map(params![limit as i64], |row| {
754            Ok(Node {
755                id: row.get(0)?,
756                kind: row.get(1)?,
757                name: row.get(2)?,
758                path: row.get(3)?,
759                line_start: row.get(4)?,
760                line_end: row.get(5)?,
761                language: row.get(6)?,
762                churn: row.get(7)?,
763                coupling: row.get(8)?,
764                community: row.get(9)?,
765                in_degree: row.get(10)?,
766                out_degree: row.get(11)?,
767                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
768                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
769                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
770            })
771        })?;
772        let mut results = Vec::new();
773        for row in rows {
774            results.push(row?);
775        }
776        Ok(results)
777    }
778
779    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
780        let mut stmt = self.conn.prepare(
781            "SELECT community, kind, name, path, in_degree
782             FROM nodes
783             WHERE community > 0
784             ORDER BY community",
785        )?;
786        let rows = stmt.query_map([], |row| {
787            Ok((
788                row.get::<_, i64>(0)?,
789                row.get::<_, String>(1)?,
790                row.get::<_, String>(2)?,
791                row.get::<_, String>(3)?,
792                row.get::<_, i64>(4)?,
793            ))
794        })?;
795
796        let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
797            std::collections::HashMap::new();
798        for row in rows {
799            let (community, kind, name, _path, in_degree) = row?;
800            let entry = community_map
801                .entry(community)
802                .or_insert_with(|| (Vec::new(), 0));
803            entry.0.push((kind, in_degree, name));
804            entry.1 += 1;
805        }
806
807        let mut result: Vec<CommunityRow> = community_map
808            .into_iter()
809            .map(|(community, (mut items, count))| {
810                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
811                let top_nodes: Vec<String> = items
812                    .iter()
813                    .take(5)
814                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
815                    .collect();
816                let label = top_nodes
817                    .first()
818                    .cloned()
819                    .unwrap_or_else(|| format!("community-{}", community));
820                (community, label, count, top_nodes)
821            })
822            .collect();
823
824        result.sort_by_key(|row| std::cmp::Reverse(row.2));
825        Ok(result)
826    }
827
828    pub fn clear_communities(&self) -> anyhow::Result<()> {
829        self.conn.execute("UPDATE nodes SET community = 0", [])?;
830        self.conn.execute("DELETE FROM communities", [])?;
831        Ok(())
832    }
833
834    /// BFS following only incoming edges — returns all nodes that depend on `id`.
835    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
836    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
837        let mut seen = std::collections::HashSet::new();
838        seen.insert(id.to_string());
839        let mut current = vec![id.to_string()];
840        let mut result: Vec<Node> = Vec::new();
841        let max_depth = depth.min(3);
842
843        for _ in 0..max_depth {
844            if current.is_empty() {
845                break;
846            }
847            let mut next = Vec::new();
848            for cur_id in &current {
849                let mut stmt = self.conn.prepare(
850                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree, COALESCE(n.exported, false), COALESCE(n.is_dead_candidate, false), n.dead_reason
851                     FROM nodes n
852                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
853                     LIMIT 100",
854                )?;
855                let rows = stmt.query_map(params![cur_id], |row| {
856                    Ok(Node {
857                        id: row.get(0)?,
858                        kind: row.get(1)?,
859                        name: row.get(2)?,
860                        path: row.get(3)?,
861                        line_start: row.get(4)?,
862                        line_end: row.get(5)?,
863                        language: row.get(6)?,
864                        churn: row.get(7)?,
865                        coupling: row.get(8)?,
866                        community: row.get(9)?,
867                        in_degree: row.get(10)?,
868                        out_degree: row.get(11)?,
869                        exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
870                        is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
871                        dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
872                    })
873                })?;
874                for row in rows {
875                    let node = row?;
876                    if seen.insert(node.id.clone()) {
877                        next.push(node.id.clone());
878                        result.push(node);
879                    }
880                }
881            }
882            current = next;
883        }
884
885        Ok(result)
886    }
887
888    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
889        let mut stmt = self.conn.prepare(
890            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree, COALESCE(exported, false), COALESCE(is_dead_candidate, false), dead_reason FROM nodes WHERE community = ?",
891        )?;
892        let rows = stmt.query_map(params![community], |row| {
893            Ok(Node {
894                id: row.get(0)?,
895                kind: row.get(1)?,
896                name: row.get(2)?,
897                path: row.get(3)?,
898                line_start: row.get(4)?,
899                line_end: row.get(5)?,
900                language: row.get(6)?,
901                churn: row.get(7)?,
902                coupling: row.get(8)?,
903                community: row.get(9)?,
904                in_degree: row.get(10)?,
905                out_degree: row.get(11)?,
906                exported: row.get::<_, i64>(12).map(|v| v != 0).unwrap_or(false),
907                is_dead_candidate: row.get::<_, bool>(13).unwrap_or(false),
908                dead_reason: row.get::<_, Option<String>>(14).unwrap_or(None),
909            })
910        })?;
911        let mut nodes = Vec::new();
912        for row in rows {
913            nodes.push(row?);
914        }
915        Ok(nodes)
916    }
917
918    pub fn mark_dead_candidates(&self, items: &[(String, String)]) -> anyhow::Result<()> {
919        // items = vec of (node_id, dead_reason)
920        if items.is_empty() {
921            return Ok(());
922        }
923        let mut stmt = self
924            .conn
925            .prepare("UPDATE nodes SET is_dead_candidate = 1, dead_reason = ? WHERE id = ?")?;
926        for (id, reason) in items {
927            stmt.execute(params![reason, id])?;
928        }
929        Ok(())
930    }
931
932    pub fn get_dead_code_stats(&self) -> anyhow::Result<(i64, i64)> {
933        // Returns (total_candidates, high_confidence_count)
934        let total: i64 = self
935            .conn
936            .query_row(
937                "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1",
938                [],
939                |r| r.get(0),
940            )
941            .unwrap_or(0);
942        // High confidence = unreachable or disconnected reasons
943        let high: i64 = self.conn.query_row(
944            "SELECT COUNT(*) FROM nodes WHERE is_dead_candidate = 1 AND dead_reason IN ('unreachable', 'disconnected')", [], |r| r.get(0)
945        ).unwrap_or(0);
946        Ok((total, high))
947    }
948
949    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
950        let mut stmt = self.conn.prepare(
951            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
952             FROM edges e
953             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
954             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
955        )?;
956        let rows = stmt.query_map(params![community, community], |row| {
957            Ok(Edge {
958                id: row.get(0)?,
959                src: row.get(1)?,
960                dst: row.get(2)?,
961                kind: row.get(3)?,
962                weight: row.get(4)?,
963                confidence: row.get(5)?,
964            })
965        })?;
966        let mut edges = Vec::new();
967        for row in rows {
968            edges.push(row?);
969        }
970        Ok(edges)
971    }
972
973    // ── File hashes for incremental indexing ────────────────────────────────
974
975    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
976        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
977        let rows = stmt.query_map([], |row| {
978            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
979        })?;
980        let mut result = std::collections::HashMap::new();
981        for row in rows {
982            let (path, hash) = row?;
983            result.insert(path, hash);
984        }
985        Ok(result)
986    }
987
988    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
989        self.conn.execute(
990            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
991            params![path, hash],
992        )?;
993        Ok(())
994    }
995
996    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
997        if paths.is_empty() {
998            return Ok(());
999        }
1000        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1001        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
1002        let mut stmt = self.conn.prepare(&sql)?;
1003        let params: Vec<&dyn duckdb::ToSql> =
1004            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1005        stmt.execute(params.as_slice())?;
1006        Ok(())
1007    }
1008
1009    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
1010        if paths.is_empty() {
1011            return Ok(0);
1012        }
1013        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
1014        // Delete edges connected to nodes from these paths first
1015        let sql_edges = format!(
1016            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
1017            placeholders, placeholders
1018        );
1019        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
1020        let params_edges: Vec<&dyn duckdb::ToSql> = paths
1021            .iter()
1022            .chain(paths.iter())
1023            .map(|p| p as &dyn duckdb::ToSql)
1024            .collect();
1025        stmt_edges.execute(params_edges.as_slice())?;
1026
1027        // Delete nodes
1028        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
1029        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
1030        let params_nodes: Vec<&dyn duckdb::ToSql> =
1031            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
1032        let count = stmt_nodes.execute(params_nodes.as_slice())?;
1033        Ok(count)
1034    }
1035}
1036
1037pub fn repo_hash(path: &Path) -> String {
1038    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
1039    let path_str = canonical.to_string_lossy().to_string();
1040    let mut hasher = Sha256::new();
1041    hasher.update(path_str.as_bytes());
1042    format!("{:x}", hasher.finalize())[..16].to_string()
1043}