Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33    pub id: String,
34    pub src: String,
35    pub dst: String,
36    pub kind: String,
37    #[serde(default = "default_weight")]
38    pub weight: f64,
39    #[serde(default = "default_weight")]
40    pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44    1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49    pub node_count: u64,
50    pub edge_count: u64,
51    pub language_breakdown: std::collections::HashMap<String, f64>,
52    pub community_count: u32,
53    pub function_count: u64,
54    pub class_count: u64,
55    pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
60
61impl Node {
62    pub fn from_def(d: &NodeDef, language: &str) -> Self {
63        Self {
64            id: d.id.clone(),
65            kind: d.kind.as_str().to_string(),
66            name: d.name.clone(),
67            path: d.path.clone(),
68            line_start: d.line_start,
69            line_end: d.line_end,
70            language: language.to_string(),
71            churn: 0.0,
72            coupling: 0.0,
73            community: 0,
74            in_degree: 0,
75            out_degree: 0,
76        }
77    }
78}
79
80impl Edge {
81    pub fn from_def(d: &EdgeDef) -> Self {
82        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
83        Self {
84            id,
85            src: d.src.clone(),
86            dst: d.dst.clone(),
87            kind: d.kind.as_str().to_string(),
88            weight: d.weight,
89            confidence: d.confidence,
90        }
91    }
92}
93
94pub struct GraphDb {
95    pub conn: duckdb::Connection,
96    pub repo_id: String,
97    pub db_path: PathBuf,
98}
99
100impl GraphDb {
101    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
102        let repo_id = repo_hash(repo_path);
103        let dir = dirs::home_dir()
104            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
105            .join(".cgx")
106            .join("repos");
107        std::fs::create_dir_all(&dir)?;
108
109        let db_path = dir.join(format!("{}.db", repo_id));
110        let conn = duckdb::Connection::open(&db_path)?;
111
112        conn.execute_batch(
113            "CREATE TABLE IF NOT EXISTS nodes (
114                id         VARCHAR PRIMARY KEY,
115                kind       VARCHAR NOT NULL,
116                name       VARCHAR NOT NULL,
117                path       VARCHAR NOT NULL,
118                line_start INTEGER,
119                line_end   INTEGER,
120                language   VARCHAR,
121                churn      DOUBLE DEFAULT 0.0,
122                coupling   DOUBLE DEFAULT 0.0,
123                community  BIGINT DEFAULT 0,
124                in_degree  BIGINT DEFAULT 0,
125                out_degree BIGINT DEFAULT 0,
126                metadata   JSON
127            );
128            CREATE TABLE IF NOT EXISTS edges (
129                id         VARCHAR PRIMARY KEY,
130                src        VARCHAR NOT NULL,
131                dst        VARCHAR NOT NULL,
132                kind       VARCHAR NOT NULL,
133                weight     DOUBLE DEFAULT 1.0,
134                confidence DOUBLE DEFAULT 1.0,
135                metadata   JSON
136            );
137            CREATE TABLE IF NOT EXISTS communities (
138                id         INTEGER PRIMARY KEY,
139                label      VARCHAR,
140                node_count INTEGER,
141                top_nodes  JSON
142            );
143            CREATE TABLE IF NOT EXISTS repo_meta (
144                key        VARCHAR PRIMARY KEY,
145                value      JSON
146            );
147            CREATE TABLE IF NOT EXISTS file_hashes (
148                path       VARCHAR PRIMARY KEY,
149                hash       VARCHAR NOT NULL,
150                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
151            );
152            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
153            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
154            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
155            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
156            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
157            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);",
158        )?;
159
160        Ok(Self {
161            conn,
162            repo_id,
163            db_path,
164        })
165    }
166
167    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
168        if nodes.is_empty() {
169            return Ok(0);
170        }
171        let mut count = 0;
172        let mut stmt = self.conn.prepare(
173            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
174             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
175        )?;
176        for node in nodes {
177            stmt.execute(params![
178                node.id,
179                node.kind,
180                node.name,
181                node.path,
182                node.line_start,
183                node.line_end,
184                node.language,
185                node.churn,
186                node.coupling,
187                node.community,
188                node.in_degree,
189                node.out_degree,
190            ])?;
191            count += 1;
192        }
193        Ok(count)
194    }
195
196    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
197        if edges.is_empty() {
198            return Ok(0);
199        }
200        let mut count = 0;
201        let mut stmt = self.conn.prepare(
202            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
203             VALUES (?, ?, ?, ?, ?, ?)",
204        )?;
205        for edge in edges {
206            stmt.execute(params![
207                edge.id,
208                edge.src,
209                edge.dst,
210                edge.kind,
211                edge.weight,
212                edge.confidence,
213            ])?;
214            count += 1;
215        }
216        Ok(count)
217    }
218
219    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
220        let mut stmt = self
221            .conn
222            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
223        let mut rows = stmt.query_map(params![id], |row| {
224            Ok(Node {
225                id: row.get(0)?,
226                kind: row.get(1)?,
227                name: row.get(2)?,
228                path: row.get(3)?,
229                line_start: row.get(4)?,
230                line_end: row.get(5)?,
231                language: row.get(6)?,
232                churn: row.get(7)?,
233                coupling: row.get(8)?,
234                community: row.get(9)?,
235                in_degree: row.get(10)?,
236                out_degree: row.get(11)?,
237            })
238        })?;
239
240        match rows.next() {
241            Some(Ok(node)) => Ok(Some(node)),
242            _ => Ok(None),
243        }
244    }
245
246    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
247        let mut seen = std::collections::HashSet::new();
248        seen.insert(id.to_string());
249        let mut current = vec![id.to_string()];
250        let mut result: Vec<Node> = Vec::new();
251        let max_depth = depth.min(3);
252
253        for _ in 0..max_depth {
254            if current.is_empty() {
255                break;
256            }
257            let mut next = Vec::new();
258
259            for cur_id in &current {
260                let mut stmt = self.conn.prepare(
261                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
262                     FROM nodes n
263                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
264                     LIMIT 100",
265                )?;
266                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
267                    Ok(Node {
268                        id: row.get(0)?,
269                        kind: row.get(1)?,
270                        name: row.get(2)?,
271                        path: row.get(3)?,
272                        line_start: row.get(4)?,
273                        line_end: row.get(5)?,
274                        language: row.get(6)?,
275                        churn: row.get(7)?,
276                        coupling: row.get(8)?,
277                        community: row.get(9)?,
278                        in_degree: row.get(10)?,
279                        out_degree: row.get(11)?,
280                    })
281                })?;
282
283                for row in rows {
284                    let node = row?;
285                    if seen.insert(node.id.clone()) {
286                        next.push(node.id.clone());
287                        result.push(node);
288                    }
289                }
290            }
291            current = next;
292        }
293
294        Ok(result)
295    }
296
297    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
298        let mut stmt = self.conn.prepare(
299            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
300        )?;
301        let rows = stmt.query_map([], |row| {
302            Ok(Node {
303                id: row.get(0)?,
304                kind: row.get(1)?,
305                name: row.get(2)?,
306                path: row.get(3)?,
307                line_start: row.get(4)?,
308                line_end: row.get(5)?,
309                language: row.get(6)?,
310                churn: row.get(7)?,
311                coupling: row.get(8)?,
312                community: row.get(9)?,
313                in_degree: row.get(10)?,
314                out_degree: row.get(11)?,
315            })
316        })?;
317
318        let mut nodes = Vec::new();
319        for row in rows {
320            nodes.push(row?);
321        }
322        Ok(nodes)
323    }
324
325    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
326        let mut stmt = self.conn.prepare(
327            "SELECT id, src, dst, kind, weight, confidence FROM edges",
328        )?;
329        let rows = stmt.query_map([], |row| {
330            Ok(Edge {
331                id: row.get(0)?,
332                src: row.get(1)?,
333                dst: row.get(2)?,
334                kind: row.get(3)?,
335                weight: row.get(4)?,
336                confidence: row.get(5)?,
337            })
338        })?;
339
340        let mut edges = Vec::new();
341        for row in rows {
342            edges.push(row?);
343        }
344        Ok(edges)
345    }
346
347    pub fn node_count(&self) -> anyhow::Result<u64> {
348        let count: i64 = self
349            .conn
350            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
351        Ok(count as u64)
352    }
353
354    pub fn edge_count(&self) -> anyhow::Result<u64> {
355        let count: i64 = self
356            .conn
357            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
358        Ok(count as u64)
359    }
360
361    pub fn clear(&self) -> anyhow::Result<()> {
362        // Drop and recreate tables instead of DELETE to avoid DuckDB ART index
363        // bulk-delete failures on large datasets (duckdb issue with indexed tables).
364        self.conn.execute_batch(
365            "DROP TABLE IF EXISTS edges;
366             DROP TABLE IF EXISTS nodes;
367             DROP TABLE IF EXISTS communities;
368             CREATE TABLE IF NOT EXISTS nodes (
369                 id         VARCHAR PRIMARY KEY,
370                 kind       VARCHAR NOT NULL,
371                 name       VARCHAR NOT NULL,
372                 path       VARCHAR NOT NULL,
373                 line_start INTEGER,
374                 line_end   INTEGER,
375                 language   VARCHAR,
376                 churn      DOUBLE DEFAULT 0.0,
377                 coupling   DOUBLE DEFAULT 0.0,
378                 community  BIGINT DEFAULT 0,
379                 in_degree  BIGINT DEFAULT 0,
380                 out_degree BIGINT DEFAULT 0,
381                 metadata   JSON
382             );
383             CREATE TABLE IF NOT EXISTS edges (
384                 id         VARCHAR PRIMARY KEY,
385                 src        VARCHAR NOT NULL,
386                 dst        VARCHAR NOT NULL,
387                 kind       VARCHAR NOT NULL,
388                 weight     DOUBLE DEFAULT 1.0,
389                 confidence DOUBLE DEFAULT 1.0,
390                 metadata   JSON
391             );
392             CREATE TABLE IF NOT EXISTS communities (
393                 id         INTEGER PRIMARY KEY,
394                 label      VARCHAR,
395                 node_count INTEGER,
396                 top_nodes  JSON
397             );
398             CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
399             CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
400             CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
401             CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
402             CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
403             CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);",
404        )?;
405        Ok(())
406    }
407
408    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
409        let mut stmt = self.conn.prepare(
410            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
411        )?;
412        let rows = stmt.query_map([], |row| {
413            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
414        })?;
415
416        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
417        for row in rows {
418            let (lang, cnt) = row?;
419            *counts.entry(lang).or_default() += cnt;
420        }
421
422        let total: i64 = counts.values().sum();
423        if total == 0 {
424            return Ok(std::collections::HashMap::new());
425        }
426
427        let mut breakdown = std::collections::HashMap::new();
428        for (lang, cnt) in counts {
429            breakdown.insert(lang, cnt as f64 / total as f64);
430        }
431        Ok(breakdown)
432    }
433
434    pub fn get_node_counts_by_kind(&self) -> anyhow::Result<std::collections::HashMap<String, u64>> {
435        let mut stmt = self.conn.prepare(
436            "SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind",
437        )?;
438        let rows = stmt.query_map([], |row| {
439            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
440        })?;
441
442        let mut counts = std::collections::HashMap::new();
443        for row in rows {
444            let (kind, cnt) = row?;
445            counts.insert(kind, cnt as u64);
446        }
447        Ok(counts)
448    }
449
450    pub fn upsert_node_scores(&self, node_id: &str, churn: f64, coupling: f64) -> anyhow::Result<()> {
451        self.conn.execute(
452            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
453            params![churn, coupling, node_id],
454        )?;
455        Ok(())
456    }
457
458    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
459        self.conn.execute_batch(
460            "UPDATE nodes SET in_degree = 0, out_degree = 0;
461             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
462             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
463        )?;
464        Ok(())
465    }
466
467    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
468        let mut stmt = self.conn.prepare(
469            "SELECT path, churn, coupling, in_degree
470             FROM nodes
471             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
472             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
473             LIMIT ?",
474        )?;
475        let rows = stmt.query_map(params![limit as i64], |row| {
476            Ok((
477                row.get::<_, String>(0)?,
478                row.get::<_, f64>(1)?,
479                row.get::<_, f64>(2)?,
480                row.get::<_, i64>(3)?,
481            ))
482        })?;
483        let mut results = Vec::new();
484        for row in rows {
485            results.push(row?);
486        }
487        Ok(results)
488    }
489
490    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
491        let mut stmt = self.conn.prepare(
492            "SELECT n.name, COUNT(e.id) as file_count
493             FROM nodes n
494             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
495             WHERE n.kind = 'Author'
496             GROUP BY n.name
497             ORDER BY file_count DESC",
498        )?;
499        let rows = stmt.query_map([], |row| {
500            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
501        })?;
502        let mut results = Vec::new();
503        for row in rows {
504            results.push(row?);
505        }
506        Ok(results)
507    }
508
509    pub fn compute_coupling(&self) -> anyhow::Result<()> {
510        self.conn.execute_batch(
511            "UPDATE nodes SET coupling = 0.0;
512             UPDATE nodes SET coupling = 
513                CASE 
514                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
515                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
516                    ELSE 0.0
517                END
518             WHERE kind = 'File';",
519        )?;
520        Ok(())
521    }
522
523    pub fn update_node_communities(&self, communities: &std::collections::HashMap<String, i64>) -> anyhow::Result<usize> {
524        if communities.is_empty() {
525            return Ok(0);
526        }
527        let mut count = 0;
528        let mut stmt = self.conn.prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
529        for (node_id, community) in communities {
530            let affected = stmt.execute(params![*community, node_id.as_str()])?;
531            count += affected;
532        }
533        Ok(count)
534    }
535
536    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
537        let node_count = self.node_count()?;
538        let edge_count = self.edge_count()?;
539        let lang_breakdown = self.get_language_breakdown()?;
540        let communities = self.get_communities()?;
541        let counts_by_kind = self.get_node_counts_by_kind()?;
542
543        Ok(RepoStats {
544            node_count,
545            edge_count,
546            language_breakdown: lang_breakdown,
547            community_count: communities.len() as u32,
548            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
549            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
550            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
551        })
552    }
553
554    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
555        let mut stmt = self.conn.prepare(
556            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
557             FROM nodes
558             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
559             ORDER BY out_degree DESC
560             LIMIT ?",
561        )?;
562        let rows = stmt.query_map(params![limit as i64], |row| {
563            Ok(Node {
564                id: row.get(0)?,
565                kind: row.get(1)?,
566                name: row.get(2)?,
567                path: row.get(3)?,
568                line_start: row.get(4)?,
569                line_end: row.get(5)?,
570                language: row.get(6)?,
571                churn: row.get(7)?,
572                coupling: row.get(8)?,
573                community: row.get(9)?,
574                in_degree: row.get(10)?,
575                out_degree: row.get(11)?,
576            })
577        })?;
578        let mut results = Vec::new();
579        for row in rows {
580            results.push(row?);
581        }
582        Ok(results)
583    }
584
585    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
586        let mut stmt = self.conn.prepare(
587            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
588             FROM nodes
589             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
590             ORDER BY in_degree DESC
591             LIMIT ?",
592        )?;
593        let rows = stmt.query_map(params![limit as i64], |row| {
594            Ok(Node {
595                id: row.get(0)?,
596                kind: row.get(1)?,
597                name: row.get(2)?,
598                path: row.get(3)?,
599                line_start: row.get(4)?,
600                line_end: row.get(5)?,
601                language: row.get(6)?,
602                churn: row.get(7)?,
603                coupling: row.get(8)?,
604                community: row.get(9)?,
605                in_degree: row.get(10)?,
606                out_degree: row.get(11)?,
607            })
608        })?;
609        let mut results = Vec::new();
610        for row in rows {
611            results.push(row?);
612        }
613        Ok(results)
614    }
615
616    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
617        let mut stmt = self.conn.prepare(
618            "SELECT community, kind, name, path, in_degree
619             FROM nodes
620             WHERE community > 0
621             ORDER BY community",
622        )?;
623        let rows = stmt.query_map([], |row| {
624            Ok((
625                row.get::<_, i64>(0)?,
626                row.get::<_, String>(1)?,
627                row.get::<_, String>(2)?,
628                row.get::<_, String>(3)?,
629                row.get::<_, i64>(4)?,
630            ))
631        })?;
632
633        let mut community_map: std::collections::HashMap<i64, CommunityGroup> = std::collections::HashMap::new();
634        for row in rows {
635            let (community, kind, name, _path, in_degree) = row?;
636            let entry = community_map.entry(community).or_insert_with(|| (Vec::new(), 0));
637            entry.0.push((kind, in_degree, name));
638            entry.1 += 1;
639        }
640
641        let mut result: Vec<CommunityRow> = community_map
642            .into_iter()
643            .map(|(community, (mut items, count))| {
644                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
645                let top_nodes: Vec<String> = items
646                    .iter()
647                    .take(5)
648                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
649                    .collect();
650                let label = top_nodes.first().cloned().unwrap_or_else(|| format!("community-{}", community));
651                (community, label, count, top_nodes)
652            })
653            .collect();
654
655        result.sort_by(|a, b| b.2.cmp(&a.2));
656        Ok(result)
657    }
658
659    pub fn clear_communities(&self) -> anyhow::Result<()> {
660        self.conn.execute("UPDATE nodes SET community = 0", [])?;
661        self.conn.execute("DELETE FROM communities", [])?;
662        Ok(())
663    }
664
665    /// BFS following only incoming edges — returns all nodes that depend on `id`.
666    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
667    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
668        let mut seen = std::collections::HashSet::new();
669        seen.insert(id.to_string());
670        let mut current = vec![id.to_string()];
671        let mut result: Vec<Node> = Vec::new();
672        let max_depth = depth.min(3);
673
674        for _ in 0..max_depth {
675            if current.is_empty() {
676                break;
677            }
678            let mut next = Vec::new();
679            for cur_id in &current {
680                let mut stmt = self.conn.prepare(
681                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
682                     FROM nodes n
683                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
684                     LIMIT 100",
685                )?;
686                let rows = stmt.query_map(params![cur_id], |row| {
687                    Ok(Node {
688                        id: row.get(0)?,
689                        kind: row.get(1)?,
690                        name: row.get(2)?,
691                        path: row.get(3)?,
692                        line_start: row.get(4)?,
693                        line_end: row.get(5)?,
694                        language: row.get(6)?,
695                        churn: row.get(7)?,
696                        coupling: row.get(8)?,
697                        community: row.get(9)?,
698                        in_degree: row.get(10)?,
699                        out_degree: row.get(11)?,
700                    })
701                })?;
702                for row in rows {
703                    let node = row?;
704                    if seen.insert(node.id.clone()) {
705                        next.push(node.id.clone());
706                        result.push(node);
707                    }
708                }
709            }
710            current = next;
711        }
712
713        Ok(result)
714    }
715
716    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
717        let mut stmt = self.conn.prepare(
718            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
719        )?;
720        let rows = stmt.query_map(params![community], |row| {
721            Ok(Node {
722                id: row.get(0)?,
723                kind: row.get(1)?,
724                name: row.get(2)?,
725                path: row.get(3)?,
726                line_start: row.get(4)?,
727                line_end: row.get(5)?,
728                language: row.get(6)?,
729                churn: row.get(7)?,
730                coupling: row.get(8)?,
731                community: row.get(9)?,
732                in_degree: row.get(10)?,
733                out_degree: row.get(11)?,
734            })
735        })?;
736        let mut nodes = Vec::new();
737        for row in rows {
738            nodes.push(row?);
739        }
740        Ok(nodes)
741    }
742
743    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
744        let mut stmt = self.conn.prepare(
745            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
746             FROM edges e
747             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
748             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
749        )?;
750        let rows = stmt.query_map(params![community, community], |row| {
751            Ok(Edge {
752                id: row.get(0)?,
753                src: row.get(1)?,
754                dst: row.get(2)?,
755                kind: row.get(3)?,
756                weight: row.get(4)?,
757                confidence: row.get(5)?,
758            })
759        })?;
760        let mut edges = Vec::new();
761        for row in rows {
762            edges.push(row?);
763        }
764        Ok(edges)
765    }
766
767    // ── File hashes for incremental indexing ────────────────────────────────
768
769    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
770        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
771        let rows = stmt.query_map([], |row| {
772            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
773        })?;
774        let mut result = std::collections::HashMap::new();
775        for row in rows {
776            let (path, hash) = row?;
777            result.insert(path, hash);
778        }
779        Ok(result)
780    }
781
782    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
783        self.conn.execute(
784            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
785            params![path, hash],
786        )?;
787        Ok(())
788    }
789
790    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
791        if paths.is_empty() {
792            return Ok(());
793        }
794        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
795        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
796        let mut stmt = self.conn.prepare(&sql)?;
797        let params: Vec<&dyn duckdb::ToSql> = paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
798        stmt.execute(params.as_slice())?;
799        Ok(())
800    }
801
802    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
803        if paths.is_empty() {
804            return Ok(0);
805        }
806        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
807        // Delete edges connected to nodes from these paths first
808        let sql_edges = format!(
809            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
810            placeholders, placeholders
811        );
812        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
813        let params_edges: Vec<&dyn duckdb::ToSql> = paths.iter().chain(paths.iter()).map(|p| p as &dyn duckdb::ToSql).collect();
814        stmt_edges.execute(params_edges.as_slice())?;
815
816        // Delete nodes
817        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
818        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
819        let params_nodes: Vec<&dyn duckdb::ToSql> = paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
820        let count = stmt_nodes.execute(params_nodes.as_slice())?;
821        Ok(count)
822    }
823}
824
825pub fn repo_hash(path: &Path) -> String {
826    let canonical = path
827        .canonicalize()
828        .unwrap_or_else(|_| path.to_path_buf());
829    let path_str = canonical.to_string_lossy().to_string();
830    let mut hasher = Sha256::new();
831    hasher.update(path_str.as_bytes());
832    format!("{:x}", hasher.finalize())[..16].to_string()
833}