1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11 pub id: String,
12 pub kind: String,
13 pub name: String,
14 pub path: String,
15 pub line_start: u32,
16 pub line_end: u32,
17 #[serde(default)]
18 pub language: String,
19 #[serde(default)]
20 pub churn: f64,
21 #[serde(default)]
22 pub coupling: f64,
23 #[serde(default)]
24 pub community: i64,
25 #[serde(default)]
26 pub in_degree: i64,
27 #[serde(default)]
28 pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33 pub id: String,
34 pub src: String,
35 pub dst: String,
36 pub kind: String,
37 #[serde(default = "default_weight")]
38 pub weight: f64,
39 #[serde(default = "default_weight")]
40 pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44 1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49 pub node_count: u64,
50 pub edge_count: u64,
51 pub language_breakdown: std::collections::HashMap<String, f64>,
52 pub community_count: u32,
53 pub function_count: u64,
54 pub class_count: u64,
55 pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); impl Node {
62 pub fn from_def(d: &NodeDef, language: &str) -> Self {
63 Self {
64 id: d.id.clone(),
65 kind: d.kind.as_str().to_string(),
66 name: d.name.clone(),
67 path: d.path.clone(),
68 line_start: d.line_start,
69 line_end: d.line_end,
70 language: language.to_string(),
71 churn: 0.0,
72 coupling: 0.0,
73 community: 0,
74 in_degree: 0,
75 out_degree: 0,
76 }
77 }
78}
79
80impl Edge {
81 pub fn from_def(d: &EdgeDef) -> Self {
82 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
83 Self {
84 id,
85 src: d.src.clone(),
86 dst: d.dst.clone(),
87 kind: d.kind.as_str().to_string(),
88 weight: d.weight,
89 confidence: d.confidence,
90 }
91 }
92}
93
94pub struct GraphDb {
95 pub conn: duckdb::Connection,
96 pub repo_id: String,
97 pub db_path: PathBuf,
98}
99
100impl GraphDb {
101 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
102 let repo_id = repo_hash(repo_path);
103 let dir = dirs::home_dir()
104 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
105 .join(".cgx")
106 .join("repos");
107 std::fs::create_dir_all(&dir)?;
108
109 let db_path = dir.join(format!("{}.db", repo_id));
110 let conn = duckdb::Connection::open(&db_path)?;
111
112 conn.execute_batch(
113 "CREATE TABLE IF NOT EXISTS nodes (
114 id VARCHAR PRIMARY KEY,
115 kind VARCHAR NOT NULL,
116 name VARCHAR NOT NULL,
117 path VARCHAR NOT NULL,
118 line_start INTEGER,
119 line_end INTEGER,
120 language VARCHAR,
121 churn DOUBLE DEFAULT 0.0,
122 coupling DOUBLE DEFAULT 0.0,
123 community BIGINT DEFAULT 0,
124 in_degree BIGINT DEFAULT 0,
125 out_degree BIGINT DEFAULT 0,
126 metadata JSON
127 );
128 CREATE TABLE IF NOT EXISTS edges (
129 id VARCHAR PRIMARY KEY,
130 src VARCHAR NOT NULL,
131 dst VARCHAR NOT NULL,
132 kind VARCHAR NOT NULL,
133 weight DOUBLE DEFAULT 1.0,
134 confidence DOUBLE DEFAULT 1.0,
135 metadata JSON
136 );
137 CREATE TABLE IF NOT EXISTS communities (
138 id INTEGER PRIMARY KEY,
139 label VARCHAR,
140 node_count INTEGER,
141 top_nodes JSON
142 );
143 CREATE TABLE IF NOT EXISTS repo_meta (
144 key VARCHAR PRIMARY KEY,
145 value JSON
146 );
147 CREATE TABLE IF NOT EXISTS file_hashes (
148 path VARCHAR PRIMARY KEY,
149 hash VARCHAR NOT NULL,
150 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
151 );
152 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
153 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
154 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
155 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
156 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
157 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);",
158 )?;
159
160 Ok(Self {
161 conn,
162 repo_id,
163 db_path,
164 })
165 }
166
167 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
168 if nodes.is_empty() {
169 return Ok(0);
170 }
171 let mut count = 0;
172 let mut stmt = self.conn.prepare(
173 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
174 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
175 )?;
176 for node in nodes {
177 stmt.execute(params![
178 node.id,
179 node.kind,
180 node.name,
181 node.path,
182 node.line_start,
183 node.line_end,
184 node.language,
185 node.churn,
186 node.coupling,
187 node.community,
188 node.in_degree,
189 node.out_degree,
190 ])?;
191 count += 1;
192 }
193 Ok(count)
194 }
195
196 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
197 if edges.is_empty() {
198 return Ok(0);
199 }
200 let mut count = 0;
201 let mut stmt = self.conn.prepare(
202 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
203 VALUES (?, ?, ?, ?, ?, ?)",
204 )?;
205 for edge in edges {
206 stmt.execute(params![
207 edge.id,
208 edge.src,
209 edge.dst,
210 edge.kind,
211 edge.weight,
212 edge.confidence,
213 ])?;
214 count += 1;
215 }
216 Ok(count)
217 }
218
219 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
220 let mut stmt = self
221 .conn
222 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
223 let mut rows = stmt.query_map(params![id], |row| {
224 Ok(Node {
225 id: row.get(0)?,
226 kind: row.get(1)?,
227 name: row.get(2)?,
228 path: row.get(3)?,
229 line_start: row.get(4)?,
230 line_end: row.get(5)?,
231 language: row.get(6)?,
232 churn: row.get(7)?,
233 coupling: row.get(8)?,
234 community: row.get(9)?,
235 in_degree: row.get(10)?,
236 out_degree: row.get(11)?,
237 })
238 })?;
239
240 match rows.next() {
241 Some(Ok(node)) => Ok(Some(node)),
242 _ => Ok(None),
243 }
244 }
245
246 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
247 let mut seen = std::collections::HashSet::new();
248 seen.insert(id.to_string());
249 let mut current = vec![id.to_string()];
250 let mut result: Vec<Node> = Vec::new();
251 let max_depth = depth.min(3);
252
253 for _ in 0..max_depth {
254 if current.is_empty() {
255 break;
256 }
257 let mut next = Vec::new();
258
259 for cur_id in ¤t {
260 let mut stmt = self.conn.prepare(
261 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
262 FROM nodes n
263 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
264 LIMIT 100",
265 )?;
266 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
267 Ok(Node {
268 id: row.get(0)?,
269 kind: row.get(1)?,
270 name: row.get(2)?,
271 path: row.get(3)?,
272 line_start: row.get(4)?,
273 line_end: row.get(5)?,
274 language: row.get(6)?,
275 churn: row.get(7)?,
276 coupling: row.get(8)?,
277 community: row.get(9)?,
278 in_degree: row.get(10)?,
279 out_degree: row.get(11)?,
280 })
281 })?;
282
283 for row in rows {
284 let node = row?;
285 if seen.insert(node.id.clone()) {
286 next.push(node.id.clone());
287 result.push(node);
288 }
289 }
290 }
291 current = next;
292 }
293
294 Ok(result)
295 }
296
297 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
298 let mut stmt = self.conn.prepare(
299 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
300 )?;
301 let rows = stmt.query_map([], |row| {
302 Ok(Node {
303 id: row.get(0)?,
304 kind: row.get(1)?,
305 name: row.get(2)?,
306 path: row.get(3)?,
307 line_start: row.get(4)?,
308 line_end: row.get(5)?,
309 language: row.get(6)?,
310 churn: row.get(7)?,
311 coupling: row.get(8)?,
312 community: row.get(9)?,
313 in_degree: row.get(10)?,
314 out_degree: row.get(11)?,
315 })
316 })?;
317
318 let mut nodes = Vec::new();
319 for row in rows {
320 nodes.push(row?);
321 }
322 Ok(nodes)
323 }
324
325 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
326 let mut stmt = self.conn.prepare(
327 "SELECT id, src, dst, kind, weight, confidence FROM edges",
328 )?;
329 let rows = stmt.query_map([], |row| {
330 Ok(Edge {
331 id: row.get(0)?,
332 src: row.get(1)?,
333 dst: row.get(2)?,
334 kind: row.get(3)?,
335 weight: row.get(4)?,
336 confidence: row.get(5)?,
337 })
338 })?;
339
340 let mut edges = Vec::new();
341 for row in rows {
342 edges.push(row?);
343 }
344 Ok(edges)
345 }
346
347 pub fn node_count(&self) -> anyhow::Result<u64> {
348 let count: i64 = self
349 .conn
350 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
351 Ok(count as u64)
352 }
353
354 pub fn edge_count(&self) -> anyhow::Result<u64> {
355 let count: i64 = self
356 .conn
357 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
358 Ok(count as u64)
359 }
360
361 pub fn clear(&self) -> anyhow::Result<()> {
362 self.conn.execute_batch(
365 "DROP TABLE IF EXISTS edges;
366 DROP TABLE IF EXISTS nodes;
367 DROP TABLE IF EXISTS communities;
368 CREATE TABLE IF NOT EXISTS nodes (
369 id VARCHAR PRIMARY KEY,
370 kind VARCHAR NOT NULL,
371 name VARCHAR NOT NULL,
372 path VARCHAR NOT NULL,
373 line_start INTEGER,
374 line_end INTEGER,
375 language VARCHAR,
376 churn DOUBLE DEFAULT 0.0,
377 coupling DOUBLE DEFAULT 0.0,
378 community BIGINT DEFAULT 0,
379 in_degree BIGINT DEFAULT 0,
380 out_degree BIGINT DEFAULT 0,
381 metadata JSON
382 );
383 CREATE TABLE IF NOT EXISTS edges (
384 id VARCHAR PRIMARY KEY,
385 src VARCHAR NOT NULL,
386 dst VARCHAR NOT NULL,
387 kind VARCHAR NOT NULL,
388 weight DOUBLE DEFAULT 1.0,
389 confidence DOUBLE DEFAULT 1.0,
390 metadata JSON
391 );
392 CREATE TABLE IF NOT EXISTS communities (
393 id INTEGER PRIMARY KEY,
394 label VARCHAR,
395 node_count INTEGER,
396 top_nodes JSON
397 );
398 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
399 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
400 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
401 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
402 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
403 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);",
404 )?;
405 Ok(())
406 }
407
408 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
409 let mut stmt = self.conn.prepare(
410 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
411 )?;
412 let rows = stmt.query_map([], |row| {
413 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
414 })?;
415
416 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
417 for row in rows {
418 let (lang, cnt) = row?;
419 *counts.entry(lang).or_default() += cnt;
420 }
421
422 let total: i64 = counts.values().sum();
423 if total == 0 {
424 return Ok(std::collections::HashMap::new());
425 }
426
427 let mut breakdown = std::collections::HashMap::new();
428 for (lang, cnt) in counts {
429 breakdown.insert(lang, cnt as f64 / total as f64);
430 }
431 Ok(breakdown)
432 }
433
434 pub fn get_node_counts_by_kind(&self) -> anyhow::Result<std::collections::HashMap<String, u64>> {
435 let mut stmt = self.conn.prepare(
436 "SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind",
437 )?;
438 let rows = stmt.query_map([], |row| {
439 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
440 })?;
441
442 let mut counts = std::collections::HashMap::new();
443 for row in rows {
444 let (kind, cnt) = row?;
445 counts.insert(kind, cnt as u64);
446 }
447 Ok(counts)
448 }
449
450 pub fn upsert_node_scores(&self, node_id: &str, churn: f64, coupling: f64) -> anyhow::Result<()> {
451 self.conn.execute(
452 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
453 params![churn, coupling, node_id],
454 )?;
455 Ok(())
456 }
457
458 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
459 self.conn.execute_batch(
460 "UPDATE nodes SET in_degree = 0, out_degree = 0;
461 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
462 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
463 )?;
464 Ok(())
465 }
466
467 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
468 let mut stmt = self.conn.prepare(
469 "SELECT path, churn, coupling, in_degree
470 FROM nodes
471 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
472 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
473 LIMIT ?",
474 )?;
475 let rows = stmt.query_map(params![limit as i64], |row| {
476 Ok((
477 row.get::<_, String>(0)?,
478 row.get::<_, f64>(1)?,
479 row.get::<_, f64>(2)?,
480 row.get::<_, i64>(3)?,
481 ))
482 })?;
483 let mut results = Vec::new();
484 for row in rows {
485 results.push(row?);
486 }
487 Ok(results)
488 }
489
490 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
491 let mut stmt = self.conn.prepare(
492 "SELECT n.name, COUNT(e.id) as file_count
493 FROM nodes n
494 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
495 WHERE n.kind = 'Author'
496 GROUP BY n.name
497 ORDER BY file_count DESC",
498 )?;
499 let rows = stmt.query_map([], |row| {
500 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
501 })?;
502 let mut results = Vec::new();
503 for row in rows {
504 results.push(row?);
505 }
506 Ok(results)
507 }
508
509 pub fn compute_coupling(&self) -> anyhow::Result<()> {
510 self.conn.execute_batch(
511 "UPDATE nodes SET coupling = 0.0;
512 UPDATE nodes SET coupling =
513 CASE
514 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
515 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
516 ELSE 0.0
517 END
518 WHERE kind = 'File';",
519 )?;
520 Ok(())
521 }
522
523 pub fn update_node_communities(&self, communities: &std::collections::HashMap<String, i64>) -> anyhow::Result<usize> {
524 if communities.is_empty() {
525 return Ok(0);
526 }
527 let mut count = 0;
528 let mut stmt = self.conn.prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
529 for (node_id, community) in communities {
530 let affected = stmt.execute(params![*community, node_id.as_str()])?;
531 count += affected;
532 }
533 Ok(count)
534 }
535
536 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
537 let node_count = self.node_count()?;
538 let edge_count = self.edge_count()?;
539 let lang_breakdown = self.get_language_breakdown()?;
540 let communities = self.get_communities()?;
541 let counts_by_kind = self.get_node_counts_by_kind()?;
542
543 Ok(RepoStats {
544 node_count,
545 edge_count,
546 language_breakdown: lang_breakdown,
547 community_count: communities.len() as u32,
548 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
549 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
550 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
551 })
552 }
553
554 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
555 let mut stmt = self.conn.prepare(
556 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
557 FROM nodes
558 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
559 ORDER BY out_degree DESC
560 LIMIT ?",
561 )?;
562 let rows = stmt.query_map(params![limit as i64], |row| {
563 Ok(Node {
564 id: row.get(0)?,
565 kind: row.get(1)?,
566 name: row.get(2)?,
567 path: row.get(3)?,
568 line_start: row.get(4)?,
569 line_end: row.get(5)?,
570 language: row.get(6)?,
571 churn: row.get(7)?,
572 coupling: row.get(8)?,
573 community: row.get(9)?,
574 in_degree: row.get(10)?,
575 out_degree: row.get(11)?,
576 })
577 })?;
578 let mut results = Vec::new();
579 for row in rows {
580 results.push(row?);
581 }
582 Ok(results)
583 }
584
585 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
586 let mut stmt = self.conn.prepare(
587 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
588 FROM nodes
589 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
590 ORDER BY in_degree DESC
591 LIMIT ?",
592 )?;
593 let rows = stmt.query_map(params![limit as i64], |row| {
594 Ok(Node {
595 id: row.get(0)?,
596 kind: row.get(1)?,
597 name: row.get(2)?,
598 path: row.get(3)?,
599 line_start: row.get(4)?,
600 line_end: row.get(5)?,
601 language: row.get(6)?,
602 churn: row.get(7)?,
603 coupling: row.get(8)?,
604 community: row.get(9)?,
605 in_degree: row.get(10)?,
606 out_degree: row.get(11)?,
607 })
608 })?;
609 let mut results = Vec::new();
610 for row in rows {
611 results.push(row?);
612 }
613 Ok(results)
614 }
615
616 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
617 let mut stmt = self.conn.prepare(
618 "SELECT community, kind, name, path, in_degree
619 FROM nodes
620 WHERE community > 0
621 ORDER BY community",
622 )?;
623 let rows = stmt.query_map([], |row| {
624 Ok((
625 row.get::<_, i64>(0)?,
626 row.get::<_, String>(1)?,
627 row.get::<_, String>(2)?,
628 row.get::<_, String>(3)?,
629 row.get::<_, i64>(4)?,
630 ))
631 })?;
632
633 let mut community_map: std::collections::HashMap<i64, CommunityGroup> = std::collections::HashMap::new();
634 for row in rows {
635 let (community, kind, name, _path, in_degree) = row?;
636 let entry = community_map.entry(community).or_insert_with(|| (Vec::new(), 0));
637 entry.0.push((kind, in_degree, name));
638 entry.1 += 1;
639 }
640
641 let mut result: Vec<CommunityRow> = community_map
642 .into_iter()
643 .map(|(community, (mut items, count))| {
644 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
645 let top_nodes: Vec<String> = items
646 .iter()
647 .take(5)
648 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
649 .collect();
650 let label = top_nodes.first().cloned().unwrap_or_else(|| format!("community-{}", community));
651 (community, label, count, top_nodes)
652 })
653 .collect();
654
655 result.sort_by(|a, b| b.2.cmp(&a.2));
656 Ok(result)
657 }
658
659 pub fn clear_communities(&self) -> anyhow::Result<()> {
660 self.conn.execute("UPDATE nodes SET community = 0", [])?;
661 self.conn.execute("DELETE FROM communities", [])?;
662 Ok(())
663 }
664
665 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
668 let mut seen = std::collections::HashSet::new();
669 seen.insert(id.to_string());
670 let mut current = vec![id.to_string()];
671 let mut result: Vec<Node> = Vec::new();
672 let max_depth = depth.min(3);
673
674 for _ in 0..max_depth {
675 if current.is_empty() {
676 break;
677 }
678 let mut next = Vec::new();
679 for cur_id in ¤t {
680 let mut stmt = self.conn.prepare(
681 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
682 FROM nodes n
683 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
684 LIMIT 100",
685 )?;
686 let rows = stmt.query_map(params![cur_id], |row| {
687 Ok(Node {
688 id: row.get(0)?,
689 kind: row.get(1)?,
690 name: row.get(2)?,
691 path: row.get(3)?,
692 line_start: row.get(4)?,
693 line_end: row.get(5)?,
694 language: row.get(6)?,
695 churn: row.get(7)?,
696 coupling: row.get(8)?,
697 community: row.get(9)?,
698 in_degree: row.get(10)?,
699 out_degree: row.get(11)?,
700 })
701 })?;
702 for row in rows {
703 let node = row?;
704 if seen.insert(node.id.clone()) {
705 next.push(node.id.clone());
706 result.push(node);
707 }
708 }
709 }
710 current = next;
711 }
712
713 Ok(result)
714 }
715
716 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
717 let mut stmt = self.conn.prepare(
718 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
719 )?;
720 let rows = stmt.query_map(params![community], |row| {
721 Ok(Node {
722 id: row.get(0)?,
723 kind: row.get(1)?,
724 name: row.get(2)?,
725 path: row.get(3)?,
726 line_start: row.get(4)?,
727 line_end: row.get(5)?,
728 language: row.get(6)?,
729 churn: row.get(7)?,
730 coupling: row.get(8)?,
731 community: row.get(9)?,
732 in_degree: row.get(10)?,
733 out_degree: row.get(11)?,
734 })
735 })?;
736 let mut nodes = Vec::new();
737 for row in rows {
738 nodes.push(row?);
739 }
740 Ok(nodes)
741 }
742
743 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
744 let mut stmt = self.conn.prepare(
745 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
746 FROM edges e
747 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
748 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
749 )?;
750 let rows = stmt.query_map(params![community, community], |row| {
751 Ok(Edge {
752 id: row.get(0)?,
753 src: row.get(1)?,
754 dst: row.get(2)?,
755 kind: row.get(3)?,
756 weight: row.get(4)?,
757 confidence: row.get(5)?,
758 })
759 })?;
760 let mut edges = Vec::new();
761 for row in rows {
762 edges.push(row?);
763 }
764 Ok(edges)
765 }
766
767 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
770 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
771 let rows = stmt.query_map([], |row| {
772 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
773 })?;
774 let mut result = std::collections::HashMap::new();
775 for row in rows {
776 let (path, hash) = row?;
777 result.insert(path, hash);
778 }
779 Ok(result)
780 }
781
782 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
783 self.conn.execute(
784 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
785 params![path, hash],
786 )?;
787 Ok(())
788 }
789
790 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
791 if paths.is_empty() {
792 return Ok(());
793 }
794 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
795 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
796 let mut stmt = self.conn.prepare(&sql)?;
797 let params: Vec<&dyn duckdb::ToSql> = paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
798 stmt.execute(params.as_slice())?;
799 Ok(())
800 }
801
802 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
803 if paths.is_empty() {
804 return Ok(0);
805 }
806 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
807 let sql_edges = format!(
809 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
810 placeholders, placeholders
811 );
812 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
813 let params_edges: Vec<&dyn duckdb::ToSql> = paths.iter().chain(paths.iter()).map(|p| p as &dyn duckdb::ToSql).collect();
814 stmt_edges.execute(params_edges.as_slice())?;
815
816 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
818 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
819 let params_nodes: Vec<&dyn duckdb::ToSql> = paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
820 let count = stmt_nodes.execute(params_nodes.as_slice())?;
821 Ok(count)
822 }
823}
824
825pub fn repo_hash(path: &Path) -> String {
826 let canonical = path
827 .canonicalize()
828 .unwrap_or_else(|_| path.to_path_buf());
829 let path_str = canonical.to_string_lossy().to_string();
830 let mut hasher = Sha256::new();
831 hasher.update(path_str.as_bytes());
832 format!("{:x}", hasher.finalize())[..16].to_string()
833}