Skip to main content

cgx_engine/
graph.rs

1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11    pub id: String,
12    pub kind: String,
13    pub name: String,
14    pub path: String,
15    pub line_start: u32,
16    pub line_end: u32,
17    #[serde(default)]
18    pub language: String,
19    #[serde(default)]
20    pub churn: f64,
21    #[serde(default)]
22    pub coupling: f64,
23    #[serde(default)]
24    pub community: i64,
25    #[serde(default)]
26    pub in_degree: i64,
27    #[serde(default)]
28    pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33    pub id: String,
34    pub src: String,
35    pub dst: String,
36    pub kind: String,
37    #[serde(default = "default_weight")]
38    pub weight: f64,
39    #[serde(default = "default_weight")]
40    pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44    1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49    pub node_count: u64,
50    pub edge_count: u64,
51    pub language_breakdown: std::collections::HashMap<String, f64>,
52    pub community_count: u32,
53    pub function_count: u64,
54    pub class_count: u64,
55    pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); // (kind, in_degree, name)
60
61impl Node {
62    pub fn from_def(d: &NodeDef, language: &str) -> Self {
63        Self {
64            id: d.id.clone(),
65            kind: d.kind.as_str().to_string(),
66            name: d.name.clone(),
67            path: d.path.clone(),
68            line_start: d.line_start,
69            line_end: d.line_end,
70            language: language.to_string(),
71            churn: 0.0,
72            coupling: 0.0,
73            community: 0,
74            in_degree: 0,
75            out_degree: 0,
76        }
77    }
78}
79
80impl Edge {
81    pub fn from_def(d: &EdgeDef) -> Self {
82        let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
83        Self {
84            id,
85            src: d.src.clone(),
86            dst: d.dst.clone(),
87            kind: d.kind.as_str().to_string(),
88            weight: d.weight,
89            confidence: d.confidence,
90        }
91    }
92}
93
94pub struct GraphDb {
95    pub conn: duckdb::Connection,
96    pub repo_id: String,
97    pub db_path: PathBuf,
98}
99
100impl GraphDb {
101    pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
102        let repo_id = repo_hash(repo_path);
103        let dir = dirs::home_dir()
104            .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
105            .join(".cgx")
106            .join("repos");
107        std::fs::create_dir_all(&dir)?;
108
109        let db_path = dir.join(format!("{}.db", repo_id));
110        let conn = duckdb::Connection::open(&db_path)?;
111
112        conn.execute_batch(
113            "CREATE TABLE IF NOT EXISTS nodes (
114                id         VARCHAR PRIMARY KEY,
115                kind       VARCHAR NOT NULL,
116                name       VARCHAR NOT NULL,
117                path       VARCHAR NOT NULL,
118                line_start INTEGER,
119                line_end   INTEGER,
120                language   VARCHAR,
121                churn      DOUBLE DEFAULT 0.0,
122                coupling   DOUBLE DEFAULT 0.0,
123                community  BIGINT DEFAULT 0,
124                in_degree  BIGINT DEFAULT 0,
125                out_degree BIGINT DEFAULT 0,
126                metadata   JSON
127            );
128            CREATE TABLE IF NOT EXISTS edges (
129                id         VARCHAR PRIMARY KEY,
130                src        VARCHAR NOT NULL,
131                dst        VARCHAR NOT NULL,
132                kind       VARCHAR NOT NULL,
133                weight     DOUBLE DEFAULT 1.0,
134                confidence DOUBLE DEFAULT 1.0,
135                metadata   JSON
136            );
137            CREATE TABLE IF NOT EXISTS communities (
138                id         INTEGER PRIMARY KEY,
139                label      VARCHAR,
140                node_count INTEGER,
141                top_nodes  JSON
142            );
143            CREATE TABLE IF NOT EXISTS repo_meta (
144                key        VARCHAR PRIMARY KEY,
145                value      JSON
146            );
147            CREATE TABLE IF NOT EXISTS file_hashes (
148                path       VARCHAR PRIMARY KEY,
149                hash       VARCHAR NOT NULL,
150                indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
151            );
152            CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
153            CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
154            CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
155            CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
156            CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
157            CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);",
158        )?;
159
160        Ok(Self {
161            conn,
162            repo_id,
163            db_path,
164        })
165    }
166
167    pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
168        if nodes.is_empty() {
169            return Ok(0);
170        }
171        let mut count = 0;
172        let mut stmt = self.conn.prepare(
173            "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
174             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
175        )?;
176        for node in nodes {
177            stmt.execute(params![
178                node.id,
179                node.kind,
180                node.name,
181                node.path,
182                node.line_start,
183                node.line_end,
184                node.language,
185                node.churn,
186                node.coupling,
187                node.community,
188                node.in_degree,
189                node.out_degree,
190            ])?;
191            count += 1;
192        }
193        Ok(count)
194    }
195
196    pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
197        if edges.is_empty() {
198            return Ok(0);
199        }
200        let mut count = 0;
201        let mut stmt = self.conn.prepare(
202            "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
203             VALUES (?, ?, ?, ?, ?, ?)",
204        )?;
205        for edge in edges {
206            stmt.execute(params![
207                edge.id,
208                edge.src,
209                edge.dst,
210                edge.kind,
211                edge.weight,
212                edge.confidence,
213            ])?;
214            count += 1;
215        }
216        Ok(count)
217    }
218
219    pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
220        let mut stmt = self
221            .conn
222            .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
223        let mut rows = stmt.query_map(params![id], |row| {
224            Ok(Node {
225                id: row.get(0)?,
226                kind: row.get(1)?,
227                name: row.get(2)?,
228                path: row.get(3)?,
229                line_start: row.get(4)?,
230                line_end: row.get(5)?,
231                language: row.get(6)?,
232                churn: row.get(7)?,
233                coupling: row.get(8)?,
234                community: row.get(9)?,
235                in_degree: row.get(10)?,
236                out_degree: row.get(11)?,
237            })
238        })?;
239
240        match rows.next() {
241            Some(Ok(node)) => Ok(Some(node)),
242            _ => Ok(None),
243        }
244    }
245
246    pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
247        let mut seen = std::collections::HashSet::new();
248        seen.insert(id.to_string());
249        let mut current = vec![id.to_string()];
250        let mut result: Vec<Node> = Vec::new();
251        let max_depth = depth.min(3);
252
253        for _ in 0..max_depth {
254            if current.is_empty() {
255                break;
256            }
257            let mut next = Vec::new();
258
259            for cur_id in &current {
260                let mut stmt = self.conn.prepare(
261                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
262                     FROM nodes n
263                     INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
264                     LIMIT 100",
265                )?;
266                let rows = stmt.query_map(params![cur_id, cur_id], |row| {
267                    Ok(Node {
268                        id: row.get(0)?,
269                        kind: row.get(1)?,
270                        name: row.get(2)?,
271                        path: row.get(3)?,
272                        line_start: row.get(4)?,
273                        line_end: row.get(5)?,
274                        language: row.get(6)?,
275                        churn: row.get(7)?,
276                        coupling: row.get(8)?,
277                        community: row.get(9)?,
278                        in_degree: row.get(10)?,
279                        out_degree: row.get(11)?,
280                    })
281                })?;
282
283                for row in rows {
284                    let node = row?;
285                    if seen.insert(node.id.clone()) {
286                        next.push(node.id.clone());
287                        result.push(node);
288                    }
289                }
290            }
291            current = next;
292        }
293
294        Ok(result)
295    }
296
297    pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
298        let mut stmt = self.conn.prepare(
299            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
300        )?;
301        let rows = stmt.query_map([], |row| {
302            Ok(Node {
303                id: row.get(0)?,
304                kind: row.get(1)?,
305                name: row.get(2)?,
306                path: row.get(3)?,
307                line_start: row.get(4)?,
308                line_end: row.get(5)?,
309                language: row.get(6)?,
310                churn: row.get(7)?,
311                coupling: row.get(8)?,
312                community: row.get(9)?,
313                in_degree: row.get(10)?,
314                out_degree: row.get(11)?,
315            })
316        })?;
317
318        let mut nodes = Vec::new();
319        for row in rows {
320            nodes.push(row?);
321        }
322        Ok(nodes)
323    }
324
325    pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
326        let mut stmt = self
327            .conn
328            .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
329        let rows = stmt.query_map([], |row| {
330            Ok(Edge {
331                id: row.get(0)?,
332                src: row.get(1)?,
333                dst: row.get(2)?,
334                kind: row.get(3)?,
335                weight: row.get(4)?,
336                confidence: row.get(5)?,
337            })
338        })?;
339
340        let mut edges = Vec::new();
341        for row in rows {
342            edges.push(row?);
343        }
344        Ok(edges)
345    }
346
347    pub fn node_count(&self) -> anyhow::Result<u64> {
348        let count: i64 = self
349            .conn
350            .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
351        Ok(count as u64)
352    }
353
354    pub fn edge_count(&self) -> anyhow::Result<u64> {
355        let count: i64 = self
356            .conn
357            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
358        Ok(count as u64)
359    }
360
361    pub fn clear(&self) -> anyhow::Result<()> {
362        // Drop and recreate tables instead of DELETE to avoid DuckDB ART index
363        // bulk-delete failures on large datasets (duckdb issue with indexed tables).
364        self.conn.execute_batch(
365            "DROP TABLE IF EXISTS edges;
366             DROP TABLE IF EXISTS nodes;
367             DROP TABLE IF EXISTS communities;
368             CREATE TABLE IF NOT EXISTS nodes (
369                 id         VARCHAR PRIMARY KEY,
370                 kind       VARCHAR NOT NULL,
371                 name       VARCHAR NOT NULL,
372                 path       VARCHAR NOT NULL,
373                 line_start INTEGER,
374                 line_end   INTEGER,
375                 language   VARCHAR,
376                 churn      DOUBLE DEFAULT 0.0,
377                 coupling   DOUBLE DEFAULT 0.0,
378                 community  BIGINT DEFAULT 0,
379                 in_degree  BIGINT DEFAULT 0,
380                 out_degree BIGINT DEFAULT 0,
381                 metadata   JSON
382             );
383             CREATE TABLE IF NOT EXISTS edges (
384                 id         VARCHAR PRIMARY KEY,
385                 src        VARCHAR NOT NULL,
386                 dst        VARCHAR NOT NULL,
387                 kind       VARCHAR NOT NULL,
388                 weight     DOUBLE DEFAULT 1.0,
389                 confidence DOUBLE DEFAULT 1.0,
390                 metadata   JSON
391             );
392             CREATE TABLE IF NOT EXISTS communities (
393                 id         INTEGER PRIMARY KEY,
394                 label      VARCHAR,
395                 node_count INTEGER,
396                 top_nodes  JSON
397             );
398             CREATE INDEX IF NOT EXISTS idx_nodes_kind      ON nodes(kind);
399             CREATE INDEX IF NOT EXISTS idx_nodes_path      ON nodes(path);
400             CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
401             CREATE INDEX IF NOT EXISTS idx_edges_src       ON edges(src);
402             CREATE INDEX IF NOT EXISTS idx_edges_dst       ON edges(dst);
403             CREATE INDEX IF NOT EXISTS idx_edges_kind      ON edges(kind);",
404        )?;
405        Ok(())
406    }
407
408    pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
409        let mut stmt = self.conn.prepare(
410            "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
411        )?;
412        let rows = stmt.query_map([], |row| {
413            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
414        })?;
415
416        let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
417        for row in rows {
418            let (lang, cnt) = row?;
419            *counts.entry(lang).or_default() += cnt;
420        }
421
422        let total: i64 = counts.values().sum();
423        if total == 0 {
424            return Ok(std::collections::HashMap::new());
425        }
426
427        let mut breakdown = std::collections::HashMap::new();
428        for (lang, cnt) in counts {
429            breakdown.insert(lang, cnt as f64 / total as f64);
430        }
431        Ok(breakdown)
432    }
433
434    pub fn get_node_counts_by_kind(
435        &self,
436    ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
437        let mut stmt = self
438            .conn
439            .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
440        let rows = stmt.query_map([], |row| {
441            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
442        })?;
443
444        let mut counts = std::collections::HashMap::new();
445        for row in rows {
446            let (kind, cnt) = row?;
447            counts.insert(kind, cnt as u64);
448        }
449        Ok(counts)
450    }
451
452    pub fn upsert_node_scores(
453        &self,
454        node_id: &str,
455        churn: f64,
456        coupling: f64,
457    ) -> anyhow::Result<()> {
458        self.conn.execute(
459            "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
460            params![churn, coupling, node_id],
461        )?;
462        Ok(())
463    }
464
465    pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
466        self.conn.execute_batch(
467            "UPDATE nodes SET in_degree = 0, out_degree = 0;
468             UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
469             UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
470        )?;
471        Ok(())
472    }
473
474    pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
475        let mut stmt = self.conn.prepare(
476            "SELECT path, churn, coupling, in_degree
477             FROM nodes
478             WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
479             ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
480             LIMIT ?",
481        )?;
482        let rows = stmt.query_map(params![limit as i64], |row| {
483            Ok((
484                row.get::<_, String>(0)?,
485                row.get::<_, f64>(1)?,
486                row.get::<_, f64>(2)?,
487                row.get::<_, i64>(3)?,
488            ))
489        })?;
490        let mut results = Vec::new();
491        for row in rows {
492            results.push(row?);
493        }
494        Ok(results)
495    }
496
497    pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
498        let mut stmt = self.conn.prepare(
499            "SELECT n.name, COUNT(e.id) as file_count
500             FROM nodes n
501             INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
502             WHERE n.kind = 'Author'
503             GROUP BY n.name
504             ORDER BY file_count DESC",
505        )?;
506        let rows = stmt.query_map([], |row| {
507            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
508        })?;
509        let mut results = Vec::new();
510        for row in rows {
511            results.push(row?);
512        }
513        Ok(results)
514    }
515
516    pub fn compute_coupling(&self) -> anyhow::Result<()> {
517        self.conn.execute_batch(
518            "UPDATE nodes SET coupling = 0.0;
519             UPDATE nodes SET coupling = 
520                CASE 
521                    WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
522                    THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
523                    ELSE 0.0
524                END
525             WHERE kind = 'File';",
526        )?;
527        Ok(())
528    }
529
530    pub fn update_node_communities(
531        &self,
532        communities: &std::collections::HashMap<String, i64>,
533    ) -> anyhow::Result<usize> {
534        if communities.is_empty() {
535            return Ok(0);
536        }
537        let mut count = 0;
538        let mut stmt = self
539            .conn
540            .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
541        for (node_id, community) in communities {
542            let affected = stmt.execute(params![*community, node_id.as_str()])?;
543            count += affected;
544        }
545        Ok(count)
546    }
547
548    pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
549        let node_count = self.node_count()?;
550        let edge_count = self.edge_count()?;
551        let lang_breakdown = self.get_language_breakdown()?;
552        let communities = self.get_communities()?;
553        let counts_by_kind = self.get_node_counts_by_kind()?;
554
555        Ok(RepoStats {
556            node_count,
557            edge_count,
558            language_breakdown: lang_breakdown,
559            community_count: communities.len() as u32,
560            function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
561            class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
562            file_count: counts_by_kind.get("File").copied().unwrap_or(0),
563        })
564    }
565
566    pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
567        let mut stmt = self.conn.prepare(
568            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
569             FROM nodes
570             WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
571             ORDER BY out_degree DESC
572             LIMIT ?",
573        )?;
574        let rows = stmt.query_map(params![limit as i64], |row| {
575            Ok(Node {
576                id: row.get(0)?,
577                kind: row.get(1)?,
578                name: row.get(2)?,
579                path: row.get(3)?,
580                line_start: row.get(4)?,
581                line_end: row.get(5)?,
582                language: row.get(6)?,
583                churn: row.get(7)?,
584                coupling: row.get(8)?,
585                community: row.get(9)?,
586                in_degree: row.get(10)?,
587                out_degree: row.get(11)?,
588            })
589        })?;
590        let mut results = Vec::new();
591        for row in rows {
592            results.push(row?);
593        }
594        Ok(results)
595    }
596
597    pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
598        let mut stmt = self.conn.prepare(
599            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
600             FROM nodes
601             WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
602             ORDER BY in_degree DESC
603             LIMIT ?",
604        )?;
605        let rows = stmt.query_map(params![limit as i64], |row| {
606            Ok(Node {
607                id: row.get(0)?,
608                kind: row.get(1)?,
609                name: row.get(2)?,
610                path: row.get(3)?,
611                line_start: row.get(4)?,
612                line_end: row.get(5)?,
613                language: row.get(6)?,
614                churn: row.get(7)?,
615                coupling: row.get(8)?,
616                community: row.get(9)?,
617                in_degree: row.get(10)?,
618                out_degree: row.get(11)?,
619            })
620        })?;
621        let mut results = Vec::new();
622        for row in rows {
623            results.push(row?);
624        }
625        Ok(results)
626    }
627
628    pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
629        let mut stmt = self.conn.prepare(
630            "SELECT community, kind, name, path, in_degree
631             FROM nodes
632             WHERE community > 0
633             ORDER BY community",
634        )?;
635        let rows = stmt.query_map([], |row| {
636            Ok((
637                row.get::<_, i64>(0)?,
638                row.get::<_, String>(1)?,
639                row.get::<_, String>(2)?,
640                row.get::<_, String>(3)?,
641                row.get::<_, i64>(4)?,
642            ))
643        })?;
644
645        let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
646            std::collections::HashMap::new();
647        for row in rows {
648            let (community, kind, name, _path, in_degree) = row?;
649            let entry = community_map
650                .entry(community)
651                .or_insert_with(|| (Vec::new(), 0));
652            entry.0.push((kind, in_degree, name));
653            entry.1 += 1;
654        }
655
656        let mut result: Vec<CommunityRow> = community_map
657            .into_iter()
658            .map(|(community, (mut items, count))| {
659                items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
660                let top_nodes: Vec<String> = items
661                    .iter()
662                    .take(5)
663                    .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
664                    .collect();
665                let label = top_nodes
666                    .first()
667                    .cloned()
668                    .unwrap_or_else(|| format!("community-{}", community));
669                (community, label, count, top_nodes)
670            })
671            .collect();
672
673        result.sort_by_key(|row| std::cmp::Reverse(row.2));
674        Ok(result)
675    }
676
677    pub fn clear_communities(&self) -> anyhow::Result<()> {
678        self.conn.execute("UPDATE nodes SET community = 0", [])?;
679        self.conn.execute("DELETE FROM communities", [])?;
680        Ok(())
681    }
682
683    /// BFS following only incoming edges — returns all nodes that depend on `id`.
684    /// Used for blast-radius analysis: if `id` changes, these nodes are affected.
685    pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
686        let mut seen = std::collections::HashSet::new();
687        seen.insert(id.to_string());
688        let mut current = vec![id.to_string()];
689        let mut result: Vec<Node> = Vec::new();
690        let max_depth = depth.min(3);
691
692        for _ in 0..max_depth {
693            if current.is_empty() {
694                break;
695            }
696            let mut next = Vec::new();
697            for cur_id in &current {
698                let mut stmt = self.conn.prepare(
699                    "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
700                     FROM nodes n
701                     INNER JOIN edges e ON e.src = n.id AND e.dst = ?
702                     LIMIT 100",
703                )?;
704                let rows = stmt.query_map(params![cur_id], |row| {
705                    Ok(Node {
706                        id: row.get(0)?,
707                        kind: row.get(1)?,
708                        name: row.get(2)?,
709                        path: row.get(3)?,
710                        line_start: row.get(4)?,
711                        line_end: row.get(5)?,
712                        language: row.get(6)?,
713                        churn: row.get(7)?,
714                        coupling: row.get(8)?,
715                        community: row.get(9)?,
716                        in_degree: row.get(10)?,
717                        out_degree: row.get(11)?,
718                    })
719                })?;
720                for row in rows {
721                    let node = row?;
722                    if seen.insert(node.id.clone()) {
723                        next.push(node.id.clone());
724                        result.push(node);
725                    }
726                }
727            }
728            current = next;
729        }
730
731        Ok(result)
732    }
733
734    pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
735        let mut stmt = self.conn.prepare(
736            "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
737        )?;
738        let rows = stmt.query_map(params![community], |row| {
739            Ok(Node {
740                id: row.get(0)?,
741                kind: row.get(1)?,
742                name: row.get(2)?,
743                path: row.get(3)?,
744                line_start: row.get(4)?,
745                line_end: row.get(5)?,
746                language: row.get(6)?,
747                churn: row.get(7)?,
748                coupling: row.get(8)?,
749                community: row.get(9)?,
750                in_degree: row.get(10)?,
751                out_degree: row.get(11)?,
752            })
753        })?;
754        let mut nodes = Vec::new();
755        for row in rows {
756            nodes.push(row?);
757        }
758        Ok(nodes)
759    }
760
761    pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
762        let mut stmt = self.conn.prepare(
763            "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
764             FROM edges e
765             INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
766             INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
767        )?;
768        let rows = stmt.query_map(params![community, community], |row| {
769            Ok(Edge {
770                id: row.get(0)?,
771                src: row.get(1)?,
772                dst: row.get(2)?,
773                kind: row.get(3)?,
774                weight: row.get(4)?,
775                confidence: row.get(5)?,
776            })
777        })?;
778        let mut edges = Vec::new();
779        for row in rows {
780            edges.push(row?);
781        }
782        Ok(edges)
783    }
784
785    // ── File hashes for incremental indexing ────────────────────────────────
786
787    pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
788        let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
789        let rows = stmt.query_map([], |row| {
790            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
791        })?;
792        let mut result = std::collections::HashMap::new();
793        for row in rows {
794            let (path, hash) = row?;
795            result.insert(path, hash);
796        }
797        Ok(result)
798    }
799
800    pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
801        self.conn.execute(
802            "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
803            params![path, hash],
804        )?;
805        Ok(())
806    }
807
808    pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
809        if paths.is_empty() {
810            return Ok(());
811        }
812        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
813        let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
814        let mut stmt = self.conn.prepare(&sql)?;
815        let params: Vec<&dyn duckdb::ToSql> =
816            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
817        stmt.execute(params.as_slice())?;
818        Ok(())
819    }
820
821    pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
822        if paths.is_empty() {
823            return Ok(0);
824        }
825        let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
826        // Delete edges connected to nodes from these paths first
827        let sql_edges = format!(
828            "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
829            placeholders, placeholders
830        );
831        let mut stmt_edges = self.conn.prepare(&sql_edges)?;
832        let params_edges: Vec<&dyn duckdb::ToSql> = paths
833            .iter()
834            .chain(paths.iter())
835            .map(|p| p as &dyn duckdb::ToSql)
836            .collect();
837        stmt_edges.execute(params_edges.as_slice())?;
838
839        // Delete nodes
840        let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
841        let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
842        let params_nodes: Vec<&dyn duckdb::ToSql> =
843            paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
844        let count = stmt_nodes.execute(params_nodes.as_slice())?;
845        Ok(count)
846    }
847}
848
849pub fn repo_hash(path: &Path) -> String {
850    let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
851    let path_str = canonical.to_string_lossy().to_string();
852    let mut hasher = Sha256::new();
853    hasher.update(path_str.as_bytes());
854    format!("{:x}", hasher.finalize())[..16].to_string()
855}