Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33    pub id: String,
34    pub src: String,
35    pub dst: String,
36    pub kind: String,
37    #[serde(default = "default_weight")]
38    pub weight: f64,
39    #[serde(default = "default_weight")]
40    pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44    1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49    pub node_count: u64,
50    pub edge_count: u64,
51    pub language_breakdown: std::collections::HashMap<String, f64>,
52    pub community_count: u32,
53    pub function_count: u64,
54    pub class_count: u64,
55    pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct TagRow {
63    pub id: String,
64    pub file_path: String,
65    pub line: u32,
66    pub tag_type: String,
67    pub text: String,
68    /// "code", "jsx", or "jsx_commented_code"
69    pub comment_type: String,
70}
71
72impl Node {
73    pub fn from_def(d: &NodeDef, language: &str) -> Self {
74        Self {
75            id: d.id.clone(),
76            kind: d.kind.as_str().to_string(),
77            name: d.name.clone(),
78            path: d.path.clone(),
79            line_start: d.line_start,
80            line_end: d.line_end,
81            language: language.to_string(),
82            churn: 0.0,
83            coupling: 0.0,
84            community: 0,
85            in_degree: 0,
86            out_degree: 0,
87        }
88    }
89}
90
91impl Edge {
92    pub fn from_def(d: &EdgeDef) -> Self {
93        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
94        Self {
95            id,
96            src: d.src.clone(),
97            dst: d.dst.clone(),
98            kind: d.kind.as_str().to_string(),
99            weight: d.weight,
100            confidence: d.confidence,
101        }
102    }
103}
104
105pub struct GraphDb {
106    pub conn: duckdb::Connection,
107    pub repo_id: String,
108    pub db_path: PathBuf,
109}
110
111impl GraphDb {
112    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
113        let repo_id = repo_hash(repo_path);
114        let dir = dirs::home_dir()
115            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
116            .join(".cgx")
117            .join("repos");
118        std::fs::create_dir_all(&dir)?;
119
120        let db_path = dir.join(format!("{}.db", repo_id));
121        let conn = duckdb::Connection::open(&db_path)?;
122
123        conn.execute_batch(
124            "CREATE TABLE IF NOT EXISTS nodes (
125                id         VARCHAR PRIMARY KEY,
126                kind       VARCHAR NOT NULL,
127                name       VARCHAR NOT NULL,
128                path       VARCHAR NOT NULL,
129                line_start INTEGER,
130                line_end   INTEGER,
131                language   VARCHAR,
132                churn      DOUBLE DEFAULT 0.0,
133                coupling   DOUBLE DEFAULT 0.0,
134                community  BIGINT DEFAULT 0,
135                in_degree  BIGINT DEFAULT 0,
136                out_degree BIGINT DEFAULT 0,
137                metadata   JSON
138            );
139            CREATE TABLE IF NOT EXISTS edges (
140                id         VARCHAR PRIMARY KEY,
141                src        VARCHAR NOT NULL,
142                dst        VARCHAR NOT NULL,
143                kind       VARCHAR NOT NULL,
144                weight     DOUBLE DEFAULT 1.0,
145                confidence DOUBLE DEFAULT 1.0,
146                metadata   JSON
147            );
148            CREATE TABLE IF NOT EXISTS communities (
149                id         INTEGER PRIMARY KEY,
150                label      VARCHAR,
151                node_count INTEGER,
152                top_nodes  JSON
153            );
154            CREATE TABLE IF NOT EXISTS repo_meta (
155                key        VARCHAR PRIMARY KEY,
156                value      JSON
157            );
158            CREATE TABLE IF NOT EXISTS file_hashes (
159                path       VARCHAR PRIMARY KEY,
160                hash       VARCHAR NOT NULL,
161                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
162            );
163            CREATE TABLE IF NOT EXISTS tags (
164                id           VARCHAR PRIMARY KEY,
165                file_path    VARCHAR NOT NULL,
166                line         INTEGER NOT NULL,
167                tag_type     VARCHAR NOT NULL,
168                text         VARCHAR NOT NULL,
169                comment_type VARCHAR NOT NULL DEFAULT 'code'
170            );
171            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
172            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
173            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
174            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
175            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
176            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);
177            CREATE INDEX IF NOT EXISTS idx_tags_file       ON tags(file_path);
178            CREATE INDEX IF NOT EXISTS idx_tags_type       ON tags(tag_type);",
179        )?;
180
181        Ok(Self {
182            conn,
183            repo_id,
184            db_path,
185        })
186    }
187
188    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
189        if nodes.is_empty() {
190            return Ok(0);
191        }
192        let mut count = 0;
193        let mut stmt = self.conn.prepare(
194            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
195             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
196        )?;
197        for node in nodes {
198            stmt.execute(params![
199                node.id,
200                node.kind,
201                node.name,
202                node.path,
203                node.line_start,
204                node.line_end,
205                node.language,
206                node.churn,
207                node.coupling,
208                node.community,
209                node.in_degree,
210                node.out_degree,
211            ])?;
212            count += 1;
213        }
214        Ok(count)
215    }
216
217    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
218        if edges.is_empty() {
219            return Ok(0);
220        }
221        let mut count = 0;
222        let mut stmt = self.conn.prepare(
223            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
224             VALUES (?, ?, ?, ?, ?, ?)",
225        )?;
226        for edge in edges {
227            stmt.execute(params![
228                edge.id,
229                edge.src,
230                edge.dst,
231                edge.kind,
232                edge.weight,
233                edge.confidence,
234            ])?;
235            count += 1;
236        }
237        Ok(count)
238    }
239
240    pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
241        if tags.is_empty() {
242            return Ok(0);
243        }
244        let mut count = 0;
245        let mut stmt = self.conn.prepare(
246            "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
247             VALUES (?, ?, ?, ?, ?, ?)",
248        )?;
249        for tag in tags {
250            stmt.execute(params![
251                tag.id,
252                tag.file_path,
253                tag.line,
254                tag.tag_type,
255                tag.text,
256                tag.comment_type,
257            ])?;
258            count += 1;
259        }
260        Ok(count)
261    }
262
263    pub fn get_tags(
264        &self,
265        tag_type_filter: Option<&str>,
266        comment_type_filter: Option<&str>,
267    ) -> anyhow::Result<Vec<TagRow>> {
268        let sql = match (tag_type_filter, comment_type_filter) {
269            (Some(_), Some(_)) => {
270                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
271                 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
272            }
273            (Some(_), None) => {
274                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
275                 WHERE tag_type = ? ORDER BY file_path, line"
276            }
277            (None, Some(_)) => {
278                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
279                 WHERE comment_type = ? ORDER BY file_path, line"
280            }
281            (None, None) => {
282                "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
283                 ORDER BY file_path, line"
284            }
285        };
286
287        let mut stmt = self.conn.prepare(sql)?;
288        let map_row = |row: &duckdb::Row| {
289            Ok(TagRow {
290                id: row.get(0)?,
291                file_path: row.get(1)?,
292                line: row.get::<_, u32>(2)?,
293                tag_type: row.get(3)?,
294                text: row.get(4)?,
295                comment_type: row.get(5)?,
296            })
297        };
298
299        let rows = match (tag_type_filter, comment_type_filter) {
300            (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
301            (Some(t), None) => stmt.query_map(params![t], map_row)?,
302            (None, Some(c)) => stmt.query_map(params![c], map_row)?,
303            (None, None) => stmt.query_map([], map_row)?,
304        };
305
306        let mut results = Vec::new();
307        for row in rows {
308            results.push(row?);
309        }
310        Ok(results)
311    }
312
313    pub fn clear_all_tags(&self) -> anyhow::Result<()> {
314        self.conn.execute_batch(
315            "DROP TABLE IF EXISTS tags;
316             CREATE TABLE IF NOT EXISTS tags (
317                 id           VARCHAR PRIMARY KEY,
318                 file_path    VARCHAR NOT NULL,
319                 line         INTEGER NOT NULL,
320                 tag_type     VARCHAR NOT NULL,
321                 text         VARCHAR NOT NULL,
322                 comment_type VARCHAR NOT NULL DEFAULT 'code'
323             );
324             CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
325             CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
326        )?;
327        Ok(())
328    }
329
330    pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
331        if paths.is_empty() {
332            return Ok(());
333        }
334        let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
335        for path in paths {
336            stmt.execute(params![path])?;
337        }
338        Ok(())
339    }
340
341    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
342        let mut stmt = self
343            .conn
344            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
345        let mut rows = stmt.query_map(params![id], |row| {
346            Ok(Node {
347                id: row.get(0)?,
348                kind: row.get(1)?,
349                name: row.get(2)?,
350                path: row.get(3)?,
351                line_start: row.get(4)?,
352                line_end: row.get(5)?,
353                language: row.get(6)?,
354                churn: row.get(7)?,
355                coupling: row.get(8)?,
356                community: row.get(9)?,
357                in_degree: row.get(10)?,
358                out_degree: row.get(11)?,
359            })
360        })?;
361
362        match rows.next() {
363            Some(Ok(node)) => Ok(Some(node)),
364            _ => Ok(None),
365        }
366    }
367
368    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
369        let mut seen = std::collections::HashSet::new();
370        seen.insert(id.to_string());
371        let mut current = vec![id.to_string()];
372        let mut result: Vec<Node> = Vec::new();
373        let max_depth = depth.min(3);
374
375        for _ in 0..max_depth {
376            if current.is_empty() {
377                break;
378            }
379            let mut next = Vec::new();
380
381            for cur_id in &current {
382                let mut stmt = self.conn.prepare(
383                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
384                     FROM nodes n
385                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
386                     LIMIT 100",
387                )?;
388                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
389                    Ok(Node {
390                        id: row.get(0)?,
391                        kind: row.get(1)?,
392                        name: row.get(2)?,
393                        path: row.get(3)?,
394                        line_start: row.get(4)?,
395                        line_end: row.get(5)?,
396                        language: row.get(6)?,
397                        churn: row.get(7)?,
398                        coupling: row.get(8)?,
399                        community: row.get(9)?,
400                        in_degree: row.get(10)?,
401                        out_degree: row.get(11)?,
402                    })
403                })?;
404
405                for row in rows {
406                    let node = row?;
407                    if seen.insert(node.id.clone()) {
408                        next.push(node.id.clone());
409                        result.push(node);
410                    }
411                }
412            }
413            current = next;
414        }
415
416        Ok(result)
417    }
418
419    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
420        let mut stmt = self.conn.prepare(
421            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
422        )?;
423        let rows = stmt.query_map([], |row| {
424            Ok(Node {
425                id: row.get(0)?,
426                kind: row.get(1)?,
427                name: row.get(2)?,
428                path: row.get(3)?,
429                line_start: row.get(4)?,
430                line_end: row.get(5)?,
431                language: row.get(6)?,
432                churn: row.get(7)?,
433                coupling: row.get(8)?,
434                community: row.get(9)?,
435                in_degree: row.get(10)?,
436                out_degree: row.get(11)?,
437            })
438        })?;
439
440        let mut nodes = Vec::new();
441        for row in rows {
442            nodes.push(row?);
443        }
444        Ok(nodes)
445    }
446
447    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
448        let mut stmt = self
449            .conn
450            .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
451        let rows = stmt.query_map([], |row| {
452            Ok(Edge {
453                id: row.get(0)?,
454                src: row.get(1)?,
455                dst: row.get(2)?,
456                kind: row.get(3)?,
457                weight: row.get(4)?,
458                confidence: row.get(5)?,
459            })
460        })?;
461
462        let mut edges = Vec::new();
463        for row in rows {
464            edges.push(row?);
465        }
466        Ok(edges)
467    }
468
469    pub fn node_count(&self) -> anyhow::Result<u64> {
470        let count: i64 = self
471            .conn
472            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
473        Ok(count as u64)
474    }
475
476    pub fn edge_count(&self) -> anyhow::Result<u64> {
477        let count: i64 = self
478            .conn
479            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
480        Ok(count as u64)
481    }
482
483    pub fn clear(&self) -> anyhow::Result<()> {
484        // Drop and recreate tables instead of DELETE to avoid DuckDB ART index
485        // bulk-delete failures on large datasets (duckdb issue with indexed tables).
486        self.conn.execute_batch(
487            "DROP TABLE IF EXISTS edges;
488             DROP TABLE IF EXISTS nodes;
489             DROP TABLE IF EXISTS communities;
490             CREATE TABLE IF NOT EXISTS nodes (
491                 id         VARCHAR PRIMARY KEY,
492                 kind       VARCHAR NOT NULL,
493                 name       VARCHAR NOT NULL,
494                 path       VARCHAR NOT NULL,
495                 line_start INTEGER,
496                 line_end   INTEGER,
497                 language   VARCHAR,
498                 churn      DOUBLE DEFAULT 0.0,
499                 coupling   DOUBLE DEFAULT 0.0,
500                 community  BIGINT DEFAULT 0,
501                 in_degree  BIGINT DEFAULT 0,
502                 out_degree BIGINT DEFAULT 0,
503                 metadata   JSON
504             );
505             CREATE TABLE IF NOT EXISTS edges (
506                 id         VARCHAR PRIMARY KEY,
507                 src        VARCHAR NOT NULL,
508                 dst        VARCHAR NOT NULL,
509                 kind       VARCHAR NOT NULL,
510                 weight     DOUBLE DEFAULT 1.0,
511                 confidence DOUBLE DEFAULT 1.0,
512                 metadata   JSON
513             );
514             CREATE TABLE IF NOT EXISTS communities (
515                 id         INTEGER PRIMARY KEY,
516                 label      VARCHAR,
517                 node_count INTEGER,
518                 top_nodes  JSON
519             );
520             CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
521             CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
522             CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
523             CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
524             CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
525             CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);",
526        )?;
527        Ok(())
528    }
529
530    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
531        let mut stmt = self.conn.prepare(
532            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
533        )?;
534        let rows = stmt.query_map([], |row| {
535            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
536        })?;
537
538        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
539        for row in rows {
540            let (lang, cnt) = row?;
541            *counts.entry(lang).or_default() += cnt;
542        }
543
544        let total: i64 = counts.values().sum();
545        if total == 0 {
546            return Ok(std::collections::HashMap::new());
547        }
548
549        let mut breakdown = std::collections::HashMap::new();
550        for (lang, cnt) in counts {
551            breakdown.insert(lang, cnt as f64 / total as f64);
552        }
553        Ok(breakdown)
554    }
555
556    pub fn get_node_counts_by_kind(
557        &self,
558    ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
559        let mut stmt = self
560            .conn
561            .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
562        let rows = stmt.query_map([], |row| {
563            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
564        })?;
565
566        let mut counts = std::collections::HashMap::new();
567        for row in rows {
568            let (kind, cnt) = row?;
569            counts.insert(kind, cnt as u64);
570        }
571        Ok(counts)
572    }
573
574    pub fn upsert_node_scores(
575        &self,
576        node_id: &str,
577        churn: f64,
578        coupling: f64,
579    ) -> anyhow::Result<()> {
580        self.conn.execute(
581            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
582            params![churn, coupling, node_id],
583        )?;
584        Ok(())
585    }
586
587    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
588        self.conn.execute_batch(
589            "UPDATE nodes SET in_degree = 0, out_degree = 0;
590             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
591             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
592        )?;
593        Ok(())
594    }
595
596    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
597        let mut stmt = self.conn.prepare(
598            "SELECT path, churn, coupling, in_degree
599             FROM nodes
600             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
601             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
602             LIMIT ?",
603        )?;
604        let rows = stmt.query_map(params![limit as i64], |row| {
605            Ok((
606                row.get::<_, String>(0)?,
607                row.get::<_, f64>(1)?,
608                row.get::<_, f64>(2)?,
609                row.get::<_, i64>(3)?,
610            ))
611        })?;
612        let mut results = Vec::new();
613        for row in rows {
614            results.push(row?);
615        }
616        Ok(results)
617    }
618
619    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
620        let mut stmt = self.conn.prepare(
621            "SELECT n.name, COUNT(e.id) as file_count
622             FROM nodes n
623             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
624             WHERE n.kind = 'Author'
625             GROUP BY n.name
626             ORDER BY file_count DESC",
627        )?;
628        let rows = stmt.query_map([], |row| {
629            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
630        })?;
631        let mut results = Vec::new();
632        for row in rows {
633            results.push(row?);
634        }
635        Ok(results)
636    }
637
638    pub fn compute_coupling(&self) -> anyhow::Result<()> {
639        self.conn.execute_batch(
640            "UPDATE nodes SET coupling = 0.0;
641             UPDATE nodes SET coupling = 
642                CASE 
643                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
644                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
645                    ELSE 0.0
646                END
647             WHERE kind = 'File';",
648        )?;
649        Ok(())
650    }
651
652    pub fn update_node_communities(
653        &self,
654        communities: &std::collections::HashMap<String, i64>,
655    ) -> anyhow::Result<usize> {
656        if communities.is_empty() {
657            return Ok(0);
658        }
659        let mut count = 0;
660        let mut stmt = self
661            .conn
662            .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
663        for (node_id, community) in communities {
664            let affected = stmt.execute(params![*community, node_id.as_str()])?;
665            count += affected;
666        }
667        Ok(count)
668    }
669
670    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
671        let node_count = self.node_count()?;
672        let edge_count = self.edge_count()?;
673        let lang_breakdown = self.get_language_breakdown()?;
674        let communities = self.get_communities()?;
675        let counts_by_kind = self.get_node_counts_by_kind()?;
676
677        Ok(RepoStats {
678            node_count,
679            edge_count,
680            language_breakdown: lang_breakdown,
681            community_count: communities.len() as u32,
682            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
683            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
684            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
685        })
686    }
687
688    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
689        let mut stmt = self.conn.prepare(
690            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
691             FROM nodes
692             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
693             ORDER BY out_degree DESC
694             LIMIT ?",
695        )?;
696        let rows = stmt.query_map(params![limit as i64], |row| {
697            Ok(Node {
698                id: row.get(0)?,
699                kind: row.get(1)?,
700                name: row.get(2)?,
701                path: row.get(3)?,
702                line_start: row.get(4)?,
703                line_end: row.get(5)?,
704                language: row.get(6)?,
705                churn: row.get(7)?,
706                coupling: row.get(8)?,
707                community: row.get(9)?,
708                in_degree: row.get(10)?,
709                out_degree: row.get(11)?,
710            })
711        })?;
712        let mut results = Vec::new();
713        for row in rows {
714            results.push(row?);
715        }
716        Ok(results)
717    }
718
719    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
720        let mut stmt = self.conn.prepare(
721            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
722             FROM nodes
723             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
724             ORDER BY in_degree DESC
725             LIMIT ?",
726        )?;
727        let rows = stmt.query_map(params![limit as i64], |row| {
728            Ok(Node {
729                id: row.get(0)?,
730                kind: row.get(1)?,
731                name: row.get(2)?,
732                path: row.get(3)?,
733                line_start: row.get(4)?,
734                line_end: row.get(5)?,
735                language: row.get(6)?,
736                churn: row.get(7)?,
737                coupling: row.get(8)?,
738                community: row.get(9)?,
739                in_degree: row.get(10)?,
740                out_degree: row.get(11)?,
741            })
742        })?;
743        let mut results = Vec::new();
744        for row in rows {
745            results.push(row?);
746        }
747        Ok(results)
748    }
749
750    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
751        let mut stmt = self.conn.prepare(
752            "SELECT community, kind, name, path, in_degree
753             FROM nodes
754             WHERE community > 0
755             ORDER BY community",
756        )?;
757        let rows = stmt.query_map([], |row| {
758            Ok((
759                row.get::<_, i64>(0)?,
760                row.get::<_, String>(1)?,
761                row.get::<_, String>(2)?,
762                row.get::<_, String>(3)?,
763                row.get::<_, i64>(4)?,
764            ))
765        })?;
766
767        let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
768            std::collections::HashMap::new();
769        for row in rows {
770            let (community, kind, name, _path, in_degree) = row?;
771            let entry = community_map
772                .entry(community)
773                .or_insert_with(|| (Vec::new(), 0));
774            entry.0.push((kind, in_degree, name));
775            entry.1 += 1;
776        }
777
778        let mut result: Vec<CommunityRow> = community_map
779            .into_iter()
780            .map(|(community, (mut items, count))| {
781                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
782                let top_nodes: Vec<String> = items
783                    .iter()
784                    .take(5)
785                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
786                    .collect();
787                let label = top_nodes
788                    .first()
789                    .cloned()
790                    .unwrap_or_else(|| format!("community-{}", community));
791                (community, label, count, top_nodes)
792            })
793            .collect();
794
795        result.sort_by_key(|row| std::cmp::Reverse(row.2));
796        Ok(result)
797    }
798
799    pub fn clear_communities(&self) -> anyhow::Result<()> {
800        self.conn.execute("UPDATE nodes SET community = 0", [])?;
801        self.conn.execute("DELETE FROM communities", [])?;
802        Ok(())
803    }
804
805    /// BFS following only incoming edges — returns all nodes that depend on `id`.
806    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
807    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
808        let mut seen = std::collections::HashSet::new();
809        seen.insert(id.to_string());
810        let mut current = vec![id.to_string()];
811        let mut result: Vec<Node> = Vec::new();
812        let max_depth = depth.min(3);
813
814        for _ in 0..max_depth {
815            if current.is_empty() {
816                break;
817            }
818            let mut next = Vec::new();
819            for cur_id in &current {
820                let mut stmt = self.conn.prepare(
821                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
822                     FROM nodes n
823                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
824                     LIMIT 100",
825                )?;
826                let rows = stmt.query_map(params![cur_id], |row| {
827                    Ok(Node {
828                        id: row.get(0)?,
829                        kind: row.get(1)?,
830                        name: row.get(2)?,
831                        path: row.get(3)?,
832                        line_start: row.get(4)?,
833                        line_end: row.get(5)?,
834                        language: row.get(6)?,
835                        churn: row.get(7)?,
836                        coupling: row.get(8)?,
837                        community: row.get(9)?,
838                        in_degree: row.get(10)?,
839                        out_degree: row.get(11)?,
840                    })
841                })?;
842                for row in rows {
843                    let node = row?;
844                    if seen.insert(node.id.clone()) {
845                        next.push(node.id.clone());
846                        result.push(node);
847                    }
848                }
849            }
850            current = next;
851        }
852
853        Ok(result)
854    }
855
856    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
857        let mut stmt = self.conn.prepare(
858            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
859        )?;
860        let rows = stmt.query_map(params![community], |row| {
861            Ok(Node {
862                id: row.get(0)?,
863                kind: row.get(1)?,
864                name: row.get(2)?,
865                path: row.get(3)?,
866                line_start: row.get(4)?,
867                line_end: row.get(5)?,
868                language: row.get(6)?,
869                churn: row.get(7)?,
870                coupling: row.get(8)?,
871                community: row.get(9)?,
872                in_degree: row.get(10)?,
873                out_degree: row.get(11)?,
874            })
875        })?;
876        let mut nodes = Vec::new();
877        for row in rows {
878            nodes.push(row?);
879        }
880        Ok(nodes)
881    }
882
883    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
884        let mut stmt = self.conn.prepare(
885            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
886             FROM edges e
887             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
888             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
889        )?;
890        let rows = stmt.query_map(params![community, community], |row| {
891            Ok(Edge {
892                id: row.get(0)?,
893                src: row.get(1)?,
894                dst: row.get(2)?,
895                kind: row.get(3)?,
896                weight: row.get(4)?,
897                confidence: row.get(5)?,
898            })
899        })?;
900        let mut edges = Vec::new();
901        for row in rows {
902            edges.push(row?);
903        }
904        Ok(edges)
905    }
906
907    // ── File hashes for incremental indexing ────────────────────────────────
908
909    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
910        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
911        let rows = stmt.query_map([], |row| {
912            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
913        })?;
914        let mut result = std::collections::HashMap::new();
915        for row in rows {
916            let (path, hash) = row?;
917            result.insert(path, hash);
918        }
919        Ok(result)
920    }
921
922    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
923        self.conn.execute(
924            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
925            params![path, hash],
926        )?;
927        Ok(())
928    }
929
930    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
931        if paths.is_empty() {
932            return Ok(());
933        }
934        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
935        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
936        let mut stmt = self.conn.prepare(&sql)?;
937        let params: Vec<&dyn duckdb::ToSql> =
938            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
939        stmt.execute(params.as_slice())?;
940        Ok(())
941    }
942
943    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
944        if paths.is_empty() {
945            return Ok(0);
946        }
947        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
948        // Delete edges connected to nodes from these paths first
949        let sql_edges = format!(
950            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
951            placeholders, placeholders
952        );
953        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
954        let params_edges: Vec<&dyn duckdb::ToSql> = paths
955            .iter()
956            .chain(paths.iter())
957            .map(|p| p as &dyn duckdb::ToSql)
958            .collect();
959        stmt_edges.execute(params_edges.as_slice())?;
960
961        // Delete nodes
962        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
963        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
964        let params_nodes: Vec<&dyn duckdb::ToSql> =
965            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
966        let count = stmt_nodes.execute(params_nodes.as_slice())?;
967        Ok(count)
968    }
969}
970
971pub fn repo_hash(path: &Path) -> String {
972    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
973    let path_str = canonical.to_string_lossy().to_string();
974    let mut hasher = Sha256::new();
975    hasher.update(path_str.as_bytes());
976    format!("{:x}", hasher.finalize())[..16].to_string()
977}