1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11 pub id: String,
12 pub kind: String,
13 pub name: String,
14 pub path: String,
15 pub line_start: u32,
16 pub line_end: u32,
17 #[serde(default)]
18 pub language: String,
19 #[serde(default)]
20 pub churn: f64,
21 #[serde(default)]
22 pub coupling: f64,
23 #[serde(default)]
24 pub community: i64,
25 #[serde(default)]
26 pub in_degree: i64,
27 #[serde(default)]
28 pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33 pub id: String,
34 pub src: String,
35 pub dst: String,
36 pub kind: String,
37 #[serde(default = "default_weight")]
38 pub weight: f64,
39 #[serde(default = "default_weight")]
40 pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44 1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49 pub node_count: u64,
50 pub edge_count: u64,
51 pub language_breakdown: std::collections::HashMap<String, f64>,
52 pub community_count: u32,
53 pub function_count: u64,
54 pub class_count: u64,
55 pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); impl Node {
62 pub fn from_def(d: &NodeDef, language: &str) -> Self {
63 Self {
64 id: d.id.clone(),
65 kind: d.kind.as_str().to_string(),
66 name: d.name.clone(),
67 path: d.path.clone(),
68 line_start: d.line_start,
69 line_end: d.line_end,
70 language: language.to_string(),
71 churn: 0.0,
72 coupling: 0.0,
73 community: 0,
74 in_degree: 0,
75 out_degree: 0,
76 }
77 }
78}
79
80impl Edge {
81 pub fn from_def(d: &EdgeDef) -> Self {
82 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
83 Self {
84 id,
85 src: d.src.clone(),
86 dst: d.dst.clone(),
87 kind: d.kind.as_str().to_string(),
88 weight: d.weight,
89 confidence: d.confidence,
90 }
91 }
92}
93
94pub struct GraphDb {
95 pub conn: duckdb::Connection,
96 pub repo_id: String,
97 pub db_path: PathBuf,
98}
99
100impl GraphDb {
101 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
102 let repo_id = repo_hash(repo_path);
103 let dir = dirs::home_dir()
104 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
105 .join(".cgx")
106 .join("repos");
107 std::fs::create_dir_all(&dir)?;
108
109 let db_path = dir.join(format!("{}.db", repo_id));
110 let conn = duckdb::Connection::open(&db_path)?;
111
112 conn.execute_batch(
113 "CREATE TABLE IF NOT EXISTS nodes (
114 id VARCHAR PRIMARY KEY,
115 kind VARCHAR NOT NULL,
116 name VARCHAR NOT NULL,
117 path VARCHAR NOT NULL,
118 line_start INTEGER,
119 line_end INTEGER,
120 language VARCHAR,
121 churn DOUBLE DEFAULT 0.0,
122 coupling DOUBLE DEFAULT 0.0,
123 community BIGINT DEFAULT 0,
124 in_degree BIGINT DEFAULT 0,
125 out_degree BIGINT DEFAULT 0,
126 metadata JSON
127 );
128 CREATE TABLE IF NOT EXISTS edges (
129 id VARCHAR PRIMARY KEY,
130 src VARCHAR NOT NULL,
131 dst VARCHAR NOT NULL,
132 kind VARCHAR NOT NULL,
133 weight DOUBLE DEFAULT 1.0,
134 confidence DOUBLE DEFAULT 1.0,
135 metadata JSON
136 );
137 CREATE TABLE IF NOT EXISTS communities (
138 id INTEGER PRIMARY KEY,
139 label VARCHAR,
140 node_count INTEGER,
141 top_nodes JSON
142 );
143 CREATE TABLE IF NOT EXISTS repo_meta (
144 key VARCHAR PRIMARY KEY,
145 value JSON
146 );
147 CREATE TABLE IF NOT EXISTS file_hashes (
148 path VARCHAR PRIMARY KEY,
149 hash VARCHAR NOT NULL,
150 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
151 );
152 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
153 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
154 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
155 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
156 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
157 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);",
158 )?;
159
160 Ok(Self {
161 conn,
162 repo_id,
163 db_path,
164 })
165 }
166
167 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
168 if nodes.is_empty() {
169 return Ok(0);
170 }
171 let mut count = 0;
172 let mut stmt = self.conn.prepare(
173 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
174 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
175 )?;
176 for node in nodes {
177 stmt.execute(params![
178 node.id,
179 node.kind,
180 node.name,
181 node.path,
182 node.line_start,
183 node.line_end,
184 node.language,
185 node.churn,
186 node.coupling,
187 node.community,
188 node.in_degree,
189 node.out_degree,
190 ])?;
191 count += 1;
192 }
193 Ok(count)
194 }
195
196 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
197 if edges.is_empty() {
198 return Ok(0);
199 }
200 let mut count = 0;
201 let mut stmt = self.conn.prepare(
202 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
203 VALUES (?, ?, ?, ?, ?, ?)",
204 )?;
205 for edge in edges {
206 stmt.execute(params![
207 edge.id,
208 edge.src,
209 edge.dst,
210 edge.kind,
211 edge.weight,
212 edge.confidence,
213 ])?;
214 count += 1;
215 }
216 Ok(count)
217 }
218
219 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
220 let mut stmt = self
221 .conn
222 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
223 let mut rows = stmt.query_map(params![id], |row| {
224 Ok(Node {
225 id: row.get(0)?,
226 kind: row.get(1)?,
227 name: row.get(2)?,
228 path: row.get(3)?,
229 line_start: row.get(4)?,
230 line_end: row.get(5)?,
231 language: row.get(6)?,
232 churn: row.get(7)?,
233 coupling: row.get(8)?,
234 community: row.get(9)?,
235 in_degree: row.get(10)?,
236 out_degree: row.get(11)?,
237 })
238 })?;
239
240 match rows.next() {
241 Some(Ok(node)) => Ok(Some(node)),
242 _ => Ok(None),
243 }
244 }
245
246 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
247 let mut seen = std::collections::HashSet::new();
248 seen.insert(id.to_string());
249 let mut current = vec![id.to_string()];
250 let mut result: Vec<Node> = Vec::new();
251 let max_depth = depth.min(3);
252
253 for _ in 0..max_depth {
254 if current.is_empty() {
255 break;
256 }
257 let mut next = Vec::new();
258
259 for cur_id in ¤t {
260 let mut stmt = self.conn.prepare(
261 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
262 FROM nodes n
263 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
264 LIMIT 100",
265 )?;
266 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
267 Ok(Node {
268 id: row.get(0)?,
269 kind: row.get(1)?,
270 name: row.get(2)?,
271 path: row.get(3)?,
272 line_start: row.get(4)?,
273 line_end: row.get(5)?,
274 language: row.get(6)?,
275 churn: row.get(7)?,
276 coupling: row.get(8)?,
277 community: row.get(9)?,
278 in_degree: row.get(10)?,
279 out_degree: row.get(11)?,
280 })
281 })?;
282
283 for row in rows {
284 let node = row?;
285 if seen.insert(node.id.clone()) {
286 next.push(node.id.clone());
287 result.push(node);
288 }
289 }
290 }
291 current = next;
292 }
293
294 Ok(result)
295 }
296
297 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
298 let mut stmt = self.conn.prepare(
299 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
300 )?;
301 let rows = stmt.query_map([], |row| {
302 Ok(Node {
303 id: row.get(0)?,
304 kind: row.get(1)?,
305 name: row.get(2)?,
306 path: row.get(3)?,
307 line_start: row.get(4)?,
308 line_end: row.get(5)?,
309 language: row.get(6)?,
310 churn: row.get(7)?,
311 coupling: row.get(8)?,
312 community: row.get(9)?,
313 in_degree: row.get(10)?,
314 out_degree: row.get(11)?,
315 })
316 })?;
317
318 let mut nodes = Vec::new();
319 for row in rows {
320 nodes.push(row?);
321 }
322 Ok(nodes)
323 }
324
325 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
326 let mut stmt = self
327 .conn
328 .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
329 let rows = stmt.query_map([], |row| {
330 Ok(Edge {
331 id: row.get(0)?,
332 src: row.get(1)?,
333 dst: row.get(2)?,
334 kind: row.get(3)?,
335 weight: row.get(4)?,
336 confidence: row.get(5)?,
337 })
338 })?;
339
340 let mut edges = Vec::new();
341 for row in rows {
342 edges.push(row?);
343 }
344 Ok(edges)
345 }
346
347 pub fn node_count(&self) -> anyhow::Result<u64> {
348 let count: i64 = self
349 .conn
350 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
351 Ok(count as u64)
352 }
353
354 pub fn edge_count(&self) -> anyhow::Result<u64> {
355 let count: i64 = self
356 .conn
357 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
358 Ok(count as u64)
359 }
360
361 pub fn clear(&self) -> anyhow::Result<()> {
362 self.conn.execute_batch(
365 "DROP TABLE IF EXISTS edges;
366 DROP TABLE IF EXISTS nodes;
367 DROP TABLE IF EXISTS communities;
368 CREATE TABLE IF NOT EXISTS nodes (
369 id VARCHAR PRIMARY KEY,
370 kind VARCHAR NOT NULL,
371 name VARCHAR NOT NULL,
372 path VARCHAR NOT NULL,
373 line_start INTEGER,
374 line_end INTEGER,
375 language VARCHAR,
376 churn DOUBLE DEFAULT 0.0,
377 coupling DOUBLE DEFAULT 0.0,
378 community BIGINT DEFAULT 0,
379 in_degree BIGINT DEFAULT 0,
380 out_degree BIGINT DEFAULT 0,
381 metadata JSON
382 );
383 CREATE TABLE IF NOT EXISTS edges (
384 id VARCHAR PRIMARY KEY,
385 src VARCHAR NOT NULL,
386 dst VARCHAR NOT NULL,
387 kind VARCHAR NOT NULL,
388 weight DOUBLE DEFAULT 1.0,
389 confidence DOUBLE DEFAULT 1.0,
390 metadata JSON
391 );
392 CREATE TABLE IF NOT EXISTS communities (
393 id INTEGER PRIMARY KEY,
394 label VARCHAR,
395 node_count INTEGER,
396 top_nodes JSON
397 );
398 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
399 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
400 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
401 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
402 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
403 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);",
404 )?;
405 Ok(())
406 }
407
408 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
409 let mut stmt = self.conn.prepare(
410 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
411 )?;
412 let rows = stmt.query_map([], |row| {
413 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
414 })?;
415
416 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
417 for row in rows {
418 let (lang, cnt) = row?;
419 *counts.entry(lang).or_default() += cnt;
420 }
421
422 let total: i64 = counts.values().sum();
423 if total == 0 {
424 return Ok(std::collections::HashMap::new());
425 }
426
427 let mut breakdown = std::collections::HashMap::new();
428 for (lang, cnt) in counts {
429 breakdown.insert(lang, cnt as f64 / total as f64);
430 }
431 Ok(breakdown)
432 }
433
434 pub fn get_node_counts_by_kind(
435 &self,
436 ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
437 let mut stmt = self
438 .conn
439 .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
440 let rows = stmt.query_map([], |row| {
441 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
442 })?;
443
444 let mut counts = std::collections::HashMap::new();
445 for row in rows {
446 let (kind, cnt) = row?;
447 counts.insert(kind, cnt as u64);
448 }
449 Ok(counts)
450 }
451
452 pub fn upsert_node_scores(
453 &self,
454 node_id: &str,
455 churn: f64,
456 coupling: f64,
457 ) -> anyhow::Result<()> {
458 self.conn.execute(
459 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
460 params![churn, coupling, node_id],
461 )?;
462 Ok(())
463 }
464
465 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
466 self.conn.execute_batch(
467 "UPDATE nodes SET in_degree = 0, out_degree = 0;
468 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
469 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
470 )?;
471 Ok(())
472 }
473
474 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
475 let mut stmt = self.conn.prepare(
476 "SELECT path, churn, coupling, in_degree
477 FROM nodes
478 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
479 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
480 LIMIT ?",
481 )?;
482 let rows = stmt.query_map(params![limit as i64], |row| {
483 Ok((
484 row.get::<_, String>(0)?,
485 row.get::<_, f64>(1)?,
486 row.get::<_, f64>(2)?,
487 row.get::<_, i64>(3)?,
488 ))
489 })?;
490 let mut results = Vec::new();
491 for row in rows {
492 results.push(row?);
493 }
494 Ok(results)
495 }
496
497 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
498 let mut stmt = self.conn.prepare(
499 "SELECT n.name, COUNT(e.id) as file_count
500 FROM nodes n
501 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
502 WHERE n.kind = 'Author'
503 GROUP BY n.name
504 ORDER BY file_count DESC",
505 )?;
506 let rows = stmt.query_map([], |row| {
507 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
508 })?;
509 let mut results = Vec::new();
510 for row in rows {
511 results.push(row?);
512 }
513 Ok(results)
514 }
515
516 pub fn compute_coupling(&self) -> anyhow::Result<()> {
517 self.conn.execute_batch(
518 "UPDATE nodes SET coupling = 0.0;
519 UPDATE nodes SET coupling =
520 CASE
521 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
522 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
523 ELSE 0.0
524 END
525 WHERE kind = 'File';",
526 )?;
527 Ok(())
528 }
529
530 pub fn update_node_communities(
531 &self,
532 communities: &std::collections::HashMap<String, i64>,
533 ) -> anyhow::Result<usize> {
534 if communities.is_empty() {
535 return Ok(0);
536 }
537 let mut count = 0;
538 let mut stmt = self
539 .conn
540 .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
541 for (node_id, community) in communities {
542 let affected = stmt.execute(params![*community, node_id.as_str()])?;
543 count += affected;
544 }
545 Ok(count)
546 }
547
548 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
549 let node_count = self.node_count()?;
550 let edge_count = self.edge_count()?;
551 let lang_breakdown = self.get_language_breakdown()?;
552 let communities = self.get_communities()?;
553 let counts_by_kind = self.get_node_counts_by_kind()?;
554
555 Ok(RepoStats {
556 node_count,
557 edge_count,
558 language_breakdown: lang_breakdown,
559 community_count: communities.len() as u32,
560 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
561 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
562 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
563 })
564 }
565
566 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
567 let mut stmt = self.conn.prepare(
568 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
569 FROM nodes
570 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
571 ORDER BY out_degree DESC
572 LIMIT ?",
573 )?;
574 let rows = stmt.query_map(params![limit as i64], |row| {
575 Ok(Node {
576 id: row.get(0)?,
577 kind: row.get(1)?,
578 name: row.get(2)?,
579 path: row.get(3)?,
580 line_start: row.get(4)?,
581 line_end: row.get(5)?,
582 language: row.get(6)?,
583 churn: row.get(7)?,
584 coupling: row.get(8)?,
585 community: row.get(9)?,
586 in_degree: row.get(10)?,
587 out_degree: row.get(11)?,
588 })
589 })?;
590 let mut results = Vec::new();
591 for row in rows {
592 results.push(row?);
593 }
594 Ok(results)
595 }
596
597 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
598 let mut stmt = self.conn.prepare(
599 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
600 FROM nodes
601 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
602 ORDER BY in_degree DESC
603 LIMIT ?",
604 )?;
605 let rows = stmt.query_map(params![limit as i64], |row| {
606 Ok(Node {
607 id: row.get(0)?,
608 kind: row.get(1)?,
609 name: row.get(2)?,
610 path: row.get(3)?,
611 line_start: row.get(4)?,
612 line_end: row.get(5)?,
613 language: row.get(6)?,
614 churn: row.get(7)?,
615 coupling: row.get(8)?,
616 community: row.get(9)?,
617 in_degree: row.get(10)?,
618 out_degree: row.get(11)?,
619 })
620 })?;
621 let mut results = Vec::new();
622 for row in rows {
623 results.push(row?);
624 }
625 Ok(results)
626 }
627
628 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
629 let mut stmt = self.conn.prepare(
630 "SELECT community, kind, name, path, in_degree
631 FROM nodes
632 WHERE community > 0
633 ORDER BY community",
634 )?;
635 let rows = stmt.query_map([], |row| {
636 Ok((
637 row.get::<_, i64>(0)?,
638 row.get::<_, String>(1)?,
639 row.get::<_, String>(2)?,
640 row.get::<_, String>(3)?,
641 row.get::<_, i64>(4)?,
642 ))
643 })?;
644
645 let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
646 std::collections::HashMap::new();
647 for row in rows {
648 let (community, kind, name, _path, in_degree) = row?;
649 let entry = community_map
650 .entry(community)
651 .or_insert_with(|| (Vec::new(), 0));
652 entry.0.push((kind, in_degree, name));
653 entry.1 += 1;
654 }
655
656 let mut result: Vec<CommunityRow> = community_map
657 .into_iter()
658 .map(|(community, (mut items, count))| {
659 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
660 let top_nodes: Vec<String> = items
661 .iter()
662 .take(5)
663 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
664 .collect();
665 let label = top_nodes
666 .first()
667 .cloned()
668 .unwrap_or_else(|| format!("community-{}", community));
669 (community, label, count, top_nodes)
670 })
671 .collect();
672
673 result.sort_by_key(|row| std::cmp::Reverse(row.2));
674 Ok(result)
675 }
676
677 pub fn clear_communities(&self) -> anyhow::Result<()> {
678 self.conn.execute("UPDATE nodes SET community = 0", [])?;
679 self.conn.execute("DELETE FROM communities", [])?;
680 Ok(())
681 }
682
683 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
686 let mut seen = std::collections::HashSet::new();
687 seen.insert(id.to_string());
688 let mut current = vec![id.to_string()];
689 let mut result: Vec<Node> = Vec::new();
690 let max_depth = depth.min(3);
691
692 for _ in 0..max_depth {
693 if current.is_empty() {
694 break;
695 }
696 let mut next = Vec::new();
697 for cur_id in ¤t {
698 let mut stmt = self.conn.prepare(
699 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
700 FROM nodes n
701 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
702 LIMIT 100",
703 )?;
704 let rows = stmt.query_map(params![cur_id], |row| {
705 Ok(Node {
706 id: row.get(0)?,
707 kind: row.get(1)?,
708 name: row.get(2)?,
709 path: row.get(3)?,
710 line_start: row.get(4)?,
711 line_end: row.get(5)?,
712 language: row.get(6)?,
713 churn: row.get(7)?,
714 coupling: row.get(8)?,
715 community: row.get(9)?,
716 in_degree: row.get(10)?,
717 out_degree: row.get(11)?,
718 })
719 })?;
720 for row in rows {
721 let node = row?;
722 if seen.insert(node.id.clone()) {
723 next.push(node.id.clone());
724 result.push(node);
725 }
726 }
727 }
728 current = next;
729 }
730
731 Ok(result)
732 }
733
734 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
735 let mut stmt = self.conn.prepare(
736 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
737 )?;
738 let rows = stmt.query_map(params![community], |row| {
739 Ok(Node {
740 id: row.get(0)?,
741 kind: row.get(1)?,
742 name: row.get(2)?,
743 path: row.get(3)?,
744 line_start: row.get(4)?,
745 line_end: row.get(5)?,
746 language: row.get(6)?,
747 churn: row.get(7)?,
748 coupling: row.get(8)?,
749 community: row.get(9)?,
750 in_degree: row.get(10)?,
751 out_degree: row.get(11)?,
752 })
753 })?;
754 let mut nodes = Vec::new();
755 for row in rows {
756 nodes.push(row?);
757 }
758 Ok(nodes)
759 }
760
761 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
762 let mut stmt = self.conn.prepare(
763 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
764 FROM edges e
765 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
766 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
767 )?;
768 let rows = stmt.query_map(params![community, community], |row| {
769 Ok(Edge {
770 id: row.get(0)?,
771 src: row.get(1)?,
772 dst: row.get(2)?,
773 kind: row.get(3)?,
774 weight: row.get(4)?,
775 confidence: row.get(5)?,
776 })
777 })?;
778 let mut edges = Vec::new();
779 for row in rows {
780 edges.push(row?);
781 }
782 Ok(edges)
783 }
784
785 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
788 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
789 let rows = stmt.query_map([], |row| {
790 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
791 })?;
792 let mut result = std::collections::HashMap::new();
793 for row in rows {
794 let (path, hash) = row?;
795 result.insert(path, hash);
796 }
797 Ok(result)
798 }
799
800 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
801 self.conn.execute(
802 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
803 params![path, hash],
804 )?;
805 Ok(())
806 }
807
808 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
809 if paths.is_empty() {
810 return Ok(());
811 }
812 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
813 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
814 let mut stmt = self.conn.prepare(&sql)?;
815 let params: Vec<&dyn duckdb::ToSql> =
816 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
817 stmt.execute(params.as_slice())?;
818 Ok(())
819 }
820
821 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
822 if paths.is_empty() {
823 return Ok(0);
824 }
825 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
826 let sql_edges = format!(
828 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
829 placeholders, placeholders
830 );
831 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
832 let params_edges: Vec<&dyn duckdb::ToSql> = paths
833 .iter()
834 .chain(paths.iter())
835 .map(|p| p as &dyn duckdb::ToSql)
836 .collect();
837 stmt_edges.execute(params_edges.as_slice())?;
838
839 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
841 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
842 let params_nodes: Vec<&dyn duckdb::ToSql> =
843 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
844 let count = stmt_nodes.execute(params_nodes.as_slice())?;
845 Ok(count)
846 }
847}
848
849pub fn repo_hash(path: &Path) -> String {
850 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
851 let path_str = canonical.to_string_lossy().to_string();
852 let mut hasher = Sha256::new();
853 hasher.update(path_str.as_bytes());
854 format!("{:x}", hasher.finalize())[..16].to_string()
855}