1use std::path::{Path, PathBuf};
2
3use duckdb::params;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7use crate::parser::{EdgeDef, NodeDef};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Node {
11 pub id: String,
12 pub kind: String,
13 pub name: String,
14 pub path: String,
15 pub line_start: u32,
16 pub line_end: u32,
17 #[serde(default)]
18 pub language: String,
19 #[serde(default)]
20 pub churn: f64,
21 #[serde(default)]
22 pub coupling: f64,
23 #[serde(default)]
24 pub community: i64,
25 #[serde(default)]
26 pub in_degree: i64,
27 #[serde(default)]
28 pub out_degree: i64,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Edge {
33 pub id: String,
34 pub src: String,
35 pub dst: String,
36 pub kind: String,
37 #[serde(default = "default_weight")]
38 pub weight: f64,
39 #[serde(default = "default_weight")]
40 pub confidence: f64,
41}
42
43fn default_weight() -> f64 {
44 1.0
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct RepoStats {
49 pub node_count: u64,
50 pub edge_count: u64,
51 pub language_breakdown: std::collections::HashMap<String, f64>,
52 pub community_count: u32,
53 pub function_count: u64,
54 pub class_count: u64,
55 pub file_count: u64,
56}
57
58pub type CommunityRow = (i64, String, i64, Vec<String>);
59type CommunityGroup = (Vec<(String, i64, String)>, i64); #[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct TagRow {
63 pub id: String,
64 pub file_path: String,
65 pub line: u32,
66 pub tag_type: String,
67 pub text: String,
68 pub comment_type: String,
70}
71
72impl Node {
73 pub fn from_def(d: &NodeDef, language: &str) -> Self {
74 Self {
75 id: d.id.clone(),
76 kind: d.kind.as_str().to_string(),
77 name: d.name.clone(),
78 path: d.path.clone(),
79 line_start: d.line_start,
80 line_end: d.line_end,
81 language: language.to_string(),
82 churn: 0.0,
83 coupling: 0.0,
84 community: 0,
85 in_degree: 0,
86 out_degree: 0,
87 }
88 }
89}
90
91impl Edge {
92 pub fn from_def(d: &EdgeDef) -> Self {
93 let id = format!("{}|{}|{}", d.src, d.kind.as_str(), d.dst);
94 Self {
95 id,
96 src: d.src.clone(),
97 dst: d.dst.clone(),
98 kind: d.kind.as_str().to_string(),
99 weight: d.weight,
100 confidence: d.confidence,
101 }
102 }
103}
104
105pub struct GraphDb {
106 pub conn: duckdb::Connection,
107 pub repo_id: String,
108 pub db_path: PathBuf,
109}
110
111impl GraphDb {
112 pub fn open(repo_path: &Path) -> anyhow::Result<Self> {
113 let repo_id = repo_hash(repo_path);
114 let dir = dirs::home_dir()
115 .ok_or_else(|| anyhow::anyhow!("cannot determine home directory"))?
116 .join(".cgx")
117 .join("repos");
118 std::fs::create_dir_all(&dir)?;
119
120 let db_path = dir.join(format!("{}.db", repo_id));
121 let conn = duckdb::Connection::open(&db_path)?;
122
123 conn.execute_batch(
124 "CREATE TABLE IF NOT EXISTS nodes (
125 id VARCHAR PRIMARY KEY,
126 kind VARCHAR NOT NULL,
127 name VARCHAR NOT NULL,
128 path VARCHAR NOT NULL,
129 line_start INTEGER,
130 line_end INTEGER,
131 language VARCHAR,
132 churn DOUBLE DEFAULT 0.0,
133 coupling DOUBLE DEFAULT 0.0,
134 community BIGINT DEFAULT 0,
135 in_degree BIGINT DEFAULT 0,
136 out_degree BIGINT DEFAULT 0,
137 metadata JSON
138 );
139 CREATE TABLE IF NOT EXISTS edges (
140 id VARCHAR PRIMARY KEY,
141 src VARCHAR NOT NULL,
142 dst VARCHAR NOT NULL,
143 kind VARCHAR NOT NULL,
144 weight DOUBLE DEFAULT 1.0,
145 confidence DOUBLE DEFAULT 1.0,
146 metadata JSON
147 );
148 CREATE TABLE IF NOT EXISTS communities (
149 id INTEGER PRIMARY KEY,
150 label VARCHAR,
151 node_count INTEGER,
152 top_nodes JSON
153 );
154 CREATE TABLE IF NOT EXISTS repo_meta (
155 key VARCHAR PRIMARY KEY,
156 value JSON
157 );
158 CREATE TABLE IF NOT EXISTS file_hashes (
159 path VARCHAR PRIMARY KEY,
160 hash VARCHAR NOT NULL,
161 indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
162 );
163 CREATE TABLE IF NOT EXISTS tags (
164 id VARCHAR PRIMARY KEY,
165 file_path VARCHAR NOT NULL,
166 line INTEGER NOT NULL,
167 tag_type VARCHAR NOT NULL,
168 text VARCHAR NOT NULL,
169 comment_type VARCHAR NOT NULL DEFAULT 'code'
170 );
171 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
172 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
173 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
174 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
175 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
176 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
177 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
178 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
179 )?;
180
181 Ok(Self {
182 conn,
183 repo_id,
184 db_path,
185 })
186 }
187
188 pub fn upsert_nodes(&self, nodes: &[Node]) -> anyhow::Result<usize> {
189 if nodes.is_empty() {
190 return Ok(0);
191 }
192 let mut count = 0;
193 let mut stmt = self.conn.prepare(
194 "INSERT OR REPLACE INTO nodes (id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree)
195 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
196 )?;
197 for node in nodes {
198 stmt.execute(params![
199 node.id,
200 node.kind,
201 node.name,
202 node.path,
203 node.line_start,
204 node.line_end,
205 node.language,
206 node.churn,
207 node.coupling,
208 node.community,
209 node.in_degree,
210 node.out_degree,
211 ])?;
212 count += 1;
213 }
214 Ok(count)
215 }
216
217 pub fn upsert_edges(&self, edges: &[Edge]) -> anyhow::Result<usize> {
218 if edges.is_empty() {
219 return Ok(0);
220 }
221 let mut count = 0;
222 let mut stmt = self.conn.prepare(
223 "INSERT OR REPLACE INTO edges (id, src, dst, kind, weight, confidence)
224 VALUES (?, ?, ?, ?, ?, ?)",
225 )?;
226 for edge in edges {
227 stmt.execute(params![
228 edge.id,
229 edge.src,
230 edge.dst,
231 edge.kind,
232 edge.weight,
233 edge.confidence,
234 ])?;
235 count += 1;
236 }
237 Ok(count)
238 }
239
240 pub fn upsert_tags(&self, tags: &[TagRow]) -> anyhow::Result<usize> {
241 if tags.is_empty() {
242 return Ok(0);
243 }
244 let mut count = 0;
245 let mut stmt = self.conn.prepare(
246 "INSERT OR REPLACE INTO tags (id, file_path, line, tag_type, text, comment_type)
247 VALUES (?, ?, ?, ?, ?, ?)",
248 )?;
249 for tag in tags {
250 stmt.execute(params![
251 tag.id,
252 tag.file_path,
253 tag.line,
254 tag.tag_type,
255 tag.text,
256 tag.comment_type,
257 ])?;
258 count += 1;
259 }
260 Ok(count)
261 }
262
263 pub fn get_tags(
264 &self,
265 tag_type_filter: Option<&str>,
266 comment_type_filter: Option<&str>,
267 ) -> anyhow::Result<Vec<TagRow>> {
268 let sql = match (tag_type_filter, comment_type_filter) {
269 (Some(_), Some(_)) => {
270 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
271 WHERE tag_type = ? AND comment_type = ? ORDER BY file_path, line"
272 }
273 (Some(_), None) => {
274 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
275 WHERE tag_type = ? ORDER BY file_path, line"
276 }
277 (None, Some(_)) => {
278 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
279 WHERE comment_type = ? ORDER BY file_path, line"
280 }
281 (None, None) => {
282 "SELECT id, file_path, line, tag_type, text, comment_type FROM tags \
283 ORDER BY file_path, line"
284 }
285 };
286
287 let mut stmt = self.conn.prepare(sql)?;
288 let map_row = |row: &duckdb::Row| {
289 Ok(TagRow {
290 id: row.get(0)?,
291 file_path: row.get(1)?,
292 line: row.get::<_, u32>(2)?,
293 tag_type: row.get(3)?,
294 text: row.get(4)?,
295 comment_type: row.get(5)?,
296 })
297 };
298
299 let rows = match (tag_type_filter, comment_type_filter) {
300 (Some(t), Some(c)) => stmt.query_map(params![t, c], map_row)?,
301 (Some(t), None) => stmt.query_map(params![t], map_row)?,
302 (None, Some(c)) => stmt.query_map(params![c], map_row)?,
303 (None, None) => stmt.query_map([], map_row)?,
304 };
305
306 let mut results = Vec::new();
307 for row in rows {
308 results.push(row?);
309 }
310 Ok(results)
311 }
312
313 pub fn clear_all_tags(&self) -> anyhow::Result<()> {
314 self.conn.execute_batch(
315 "DROP TABLE IF EXISTS tags;
316 CREATE TABLE IF NOT EXISTS tags (
317 id VARCHAR PRIMARY KEY,
318 file_path VARCHAR NOT NULL,
319 line INTEGER NOT NULL,
320 tag_type VARCHAR NOT NULL,
321 text VARCHAR NOT NULL,
322 comment_type VARCHAR NOT NULL DEFAULT 'code'
323 );
324 CREATE INDEX IF NOT EXISTS idx_tags_file ON tags(file_path);
325 CREATE INDEX IF NOT EXISTS idx_tags_type ON tags(tag_type);",
326 )?;
327 Ok(())
328 }
329
330 pub fn delete_tags_for_paths(&self, paths: &[String]) -> anyhow::Result<()> {
331 if paths.is_empty() {
332 return Ok(());
333 }
334 let mut stmt = self.conn.prepare("DELETE FROM tags WHERE file_path = ?")?;
335 for path in paths {
336 stmt.execute(params![path])?;
337 }
338 Ok(())
339 }
340
341 pub fn get_node(&self, id: &str) -> anyhow::Result<Option<Node>> {
342 let mut stmt = self
343 .conn
344 .prepare("SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE id = ?")?;
345 let mut rows = stmt.query_map(params![id], |row| {
346 Ok(Node {
347 id: row.get(0)?,
348 kind: row.get(1)?,
349 name: row.get(2)?,
350 path: row.get(3)?,
351 line_start: row.get(4)?,
352 line_end: row.get(5)?,
353 language: row.get(6)?,
354 churn: row.get(7)?,
355 coupling: row.get(8)?,
356 community: row.get(9)?,
357 in_degree: row.get(10)?,
358 out_degree: row.get(11)?,
359 })
360 })?;
361
362 match rows.next() {
363 Some(Ok(node)) => Ok(Some(node)),
364 _ => Ok(None),
365 }
366 }
367
368 pub fn get_neighbors(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
369 let mut seen = std::collections::HashSet::new();
370 seen.insert(id.to_string());
371 let mut current = vec![id.to_string()];
372 let mut result: Vec<Node> = Vec::new();
373 let max_depth = depth.min(3);
374
375 for _ in 0..max_depth {
376 if current.is_empty() {
377 break;
378 }
379 let mut next = Vec::new();
380
381 for cur_id in ¤t {
382 let mut stmt = self.conn.prepare(
383 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
384 FROM nodes n
385 INNER JOIN edges e ON (e.dst = n.id AND e.src = ?1) OR (e.src = n.id AND e.dst = ?2)
386 LIMIT 100",
387 )?;
388 let rows = stmt.query_map(params![cur_id, cur_id], |row| {
389 Ok(Node {
390 id: row.get(0)?,
391 kind: row.get(1)?,
392 name: row.get(2)?,
393 path: row.get(3)?,
394 line_start: row.get(4)?,
395 line_end: row.get(5)?,
396 language: row.get(6)?,
397 churn: row.get(7)?,
398 coupling: row.get(8)?,
399 community: row.get(9)?,
400 in_degree: row.get(10)?,
401 out_degree: row.get(11)?,
402 })
403 })?;
404
405 for row in rows {
406 let node = row?;
407 if seen.insert(node.id.clone()) {
408 next.push(node.id.clone());
409 result.push(node);
410 }
411 }
412 }
413 current = next;
414 }
415
416 Ok(result)
417 }
418
419 pub fn get_all_nodes(&self) -> anyhow::Result<Vec<Node>> {
420 let mut stmt = self.conn.prepare(
421 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes",
422 )?;
423 let rows = stmt.query_map([], |row| {
424 Ok(Node {
425 id: row.get(0)?,
426 kind: row.get(1)?,
427 name: row.get(2)?,
428 path: row.get(3)?,
429 line_start: row.get(4)?,
430 line_end: row.get(5)?,
431 language: row.get(6)?,
432 churn: row.get(7)?,
433 coupling: row.get(8)?,
434 community: row.get(9)?,
435 in_degree: row.get(10)?,
436 out_degree: row.get(11)?,
437 })
438 })?;
439
440 let mut nodes = Vec::new();
441 for row in rows {
442 nodes.push(row?);
443 }
444 Ok(nodes)
445 }
446
447 pub fn get_all_edges(&self) -> anyhow::Result<Vec<Edge>> {
448 let mut stmt = self
449 .conn
450 .prepare("SELECT id, src, dst, kind, weight, confidence FROM edges")?;
451 let rows = stmt.query_map([], |row| {
452 Ok(Edge {
453 id: row.get(0)?,
454 src: row.get(1)?,
455 dst: row.get(2)?,
456 kind: row.get(3)?,
457 weight: row.get(4)?,
458 confidence: row.get(5)?,
459 })
460 })?;
461
462 let mut edges = Vec::new();
463 for row in rows {
464 edges.push(row?);
465 }
466 Ok(edges)
467 }
468
469 pub fn node_count(&self) -> anyhow::Result<u64> {
470 let count: i64 = self
471 .conn
472 .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get(0))?;
473 Ok(count as u64)
474 }
475
476 pub fn edge_count(&self) -> anyhow::Result<u64> {
477 let count: i64 = self
478 .conn
479 .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
480 Ok(count as u64)
481 }
482
483 pub fn clear(&self) -> anyhow::Result<()> {
484 self.conn.execute_batch(
487 "DROP TABLE IF EXISTS edges;
488 DROP TABLE IF EXISTS nodes;
489 DROP TABLE IF EXISTS communities;
490 CREATE TABLE IF NOT EXISTS nodes (
491 id VARCHAR PRIMARY KEY,
492 kind VARCHAR NOT NULL,
493 name VARCHAR NOT NULL,
494 path VARCHAR NOT NULL,
495 line_start INTEGER,
496 line_end INTEGER,
497 language VARCHAR,
498 churn DOUBLE DEFAULT 0.0,
499 coupling DOUBLE DEFAULT 0.0,
500 community BIGINT DEFAULT 0,
501 in_degree BIGINT DEFAULT 0,
502 out_degree BIGINT DEFAULT 0,
503 metadata JSON
504 );
505 CREATE TABLE IF NOT EXISTS edges (
506 id VARCHAR PRIMARY KEY,
507 src VARCHAR NOT NULL,
508 dst VARCHAR NOT NULL,
509 kind VARCHAR NOT NULL,
510 weight DOUBLE DEFAULT 1.0,
511 confidence DOUBLE DEFAULT 1.0,
512 metadata JSON
513 );
514 CREATE TABLE IF NOT EXISTS communities (
515 id INTEGER PRIMARY KEY,
516 label VARCHAR,
517 node_count INTEGER,
518 top_nodes JSON
519 );
520 CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
521 CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
522 CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community);
523 CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src);
524 CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst);
525 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);",
526 )?;
527 Ok(())
528 }
529
530 pub fn get_language_breakdown(&self) -> anyhow::Result<std::collections::HashMap<String, f64>> {
531 let mut stmt = self.conn.prepare(
532 "SELECT language, COUNT(*) as cnt FROM nodes WHERE language != '' GROUP BY language",
533 )?;
534 let rows = stmt.query_map([], |row| {
535 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
536 })?;
537
538 let mut counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
539 for row in rows {
540 let (lang, cnt) = row?;
541 *counts.entry(lang).or_default() += cnt;
542 }
543
544 let total: i64 = counts.values().sum();
545 if total == 0 {
546 return Ok(std::collections::HashMap::new());
547 }
548
549 let mut breakdown = std::collections::HashMap::new();
550 for (lang, cnt) in counts {
551 breakdown.insert(lang, cnt as f64 / total as f64);
552 }
553 Ok(breakdown)
554 }
555
556 pub fn get_node_counts_by_kind(
557 &self,
558 ) -> anyhow::Result<std::collections::HashMap<String, u64>> {
559 let mut stmt = self
560 .conn
561 .prepare("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind")?;
562 let rows = stmt.query_map([], |row| {
563 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
564 })?;
565
566 let mut counts = std::collections::HashMap::new();
567 for row in rows {
568 let (kind, cnt) = row?;
569 counts.insert(kind, cnt as u64);
570 }
571 Ok(counts)
572 }
573
574 pub fn upsert_node_scores(
575 &self,
576 node_id: &str,
577 churn: f64,
578 coupling: f64,
579 ) -> anyhow::Result<()> {
580 self.conn.execute(
581 "UPDATE nodes SET churn = ?, coupling = ? WHERE id = ?",
582 params![churn, coupling, node_id],
583 )?;
584 Ok(())
585 }
586
587 pub fn update_in_out_degrees(&self) -> anyhow::Result<()> {
588 self.conn.execute_batch(
589 "UPDATE nodes SET in_degree = 0, out_degree = 0;
590 UPDATE nodes SET out_degree = (SELECT COUNT(*) FROM edges WHERE edges.src = nodes.id);
591 UPDATE nodes SET in_degree = (SELECT COUNT(*) FROM edges WHERE edges.dst = nodes.id);",
592 )?;
593 Ok(())
594 }
595
596 pub fn get_hotspots(&self, limit: usize) -> anyhow::Result<Vec<(String, f64, f64, i64)>> {
597 let mut stmt = self.conn.prepare(
598 "SELECT path, churn, coupling, in_degree
599 FROM nodes
600 WHERE kind = 'File' AND (churn > 0.0 OR in_degree > 0)
601 ORDER BY (churn * COALESCE(coupling, 0.0) + CAST(in_degree AS DOUBLE) * 0.01) DESC
602 LIMIT ?",
603 )?;
604 let rows = stmt.query_map(params![limit as i64], |row| {
605 Ok((
606 row.get::<_, String>(0)?,
607 row.get::<_, f64>(1)?,
608 row.get::<_, f64>(2)?,
609 row.get::<_, i64>(3)?,
610 ))
611 })?;
612 let mut results = Vec::new();
613 for row in rows {
614 results.push(row?);
615 }
616 Ok(results)
617 }
618
619 pub fn get_ownership(&self) -> anyhow::Result<Vec<(String, i64)>> {
620 let mut stmt = self.conn.prepare(
621 "SELECT n.name, COUNT(e.id) as file_count
622 FROM nodes n
623 INNER JOIN edges e ON e.src = n.id AND e.kind = 'OWNS'
624 WHERE n.kind = 'Author'
625 GROUP BY n.name
626 ORDER BY file_count DESC",
627 )?;
628 let rows = stmt.query_map([], |row| {
629 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
630 })?;
631 let mut results = Vec::new();
632 for row in rows {
633 results.push(row?);
634 }
635 Ok(results)
636 }
637
638 pub fn compute_coupling(&self) -> anyhow::Result<()> {
639 self.conn.execute_batch(
640 "UPDATE nodes SET coupling = 0.0;
641 UPDATE nodes SET coupling =
642 CASE
643 WHEN (SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') > 0
644 THEN CAST(in_degree AS DOUBLE) / CAST((SELECT MAX(in_degree) FROM nodes WHERE kind = 'File') AS DOUBLE)
645 ELSE 0.0
646 END
647 WHERE kind = 'File';",
648 )?;
649 Ok(())
650 }
651
652 pub fn update_node_communities(
653 &self,
654 communities: &std::collections::HashMap<String, i64>,
655 ) -> anyhow::Result<usize> {
656 if communities.is_empty() {
657 return Ok(0);
658 }
659 let mut count = 0;
660 let mut stmt = self
661 .conn
662 .prepare("UPDATE nodes SET community = ? WHERE id = ?")?;
663 for (node_id, community) in communities {
664 let affected = stmt.execute(params![*community, node_id.as_str()])?;
665 count += affected;
666 }
667 Ok(count)
668 }
669
670 pub fn get_stats(&self) -> anyhow::Result<RepoStats> {
671 let node_count = self.node_count()?;
672 let edge_count = self.edge_count()?;
673 let lang_breakdown = self.get_language_breakdown()?;
674 let communities = self.get_communities()?;
675 let counts_by_kind = self.get_node_counts_by_kind()?;
676
677 Ok(RepoStats {
678 node_count,
679 edge_count,
680 language_breakdown: lang_breakdown,
681 community_count: communities.len() as u32,
682 function_count: counts_by_kind.get("Function").copied().unwrap_or(0),
683 class_count: counts_by_kind.get("Class").copied().unwrap_or(0),
684 file_count: counts_by_kind.get("File").copied().unwrap_or(0),
685 })
686 }
687
688 pub fn get_entry_points(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
689 let mut stmt = self.conn.prepare(
690 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
691 FROM nodes
692 WHERE in_degree = 0 AND kind != 'File' AND kind != 'Author'
693 ORDER BY out_degree DESC
694 LIMIT ?",
695 )?;
696 let rows = stmt.query_map(params![limit as i64], |row| {
697 Ok(Node {
698 id: row.get(0)?,
699 kind: row.get(1)?,
700 name: row.get(2)?,
701 path: row.get(3)?,
702 line_start: row.get(4)?,
703 line_end: row.get(5)?,
704 language: row.get(6)?,
705 churn: row.get(7)?,
706 coupling: row.get(8)?,
707 community: row.get(9)?,
708 in_degree: row.get(10)?,
709 out_degree: row.get(11)?,
710 })
711 })?;
712 let mut results = Vec::new();
713 for row in rows {
714 results.push(row?);
715 }
716 Ok(results)
717 }
718
719 pub fn get_god_nodes(&self, limit: usize) -> anyhow::Result<Vec<Node>> {
720 let mut stmt = self.conn.prepare(
721 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree
722 FROM nodes
723 WHERE in_degree > 0 AND kind != 'File' AND kind != 'Author'
724 ORDER BY in_degree DESC
725 LIMIT ?",
726 )?;
727 let rows = stmt.query_map(params![limit as i64], |row| {
728 Ok(Node {
729 id: row.get(0)?,
730 kind: row.get(1)?,
731 name: row.get(2)?,
732 path: row.get(3)?,
733 line_start: row.get(4)?,
734 line_end: row.get(5)?,
735 language: row.get(6)?,
736 churn: row.get(7)?,
737 coupling: row.get(8)?,
738 community: row.get(9)?,
739 in_degree: row.get(10)?,
740 out_degree: row.get(11)?,
741 })
742 })?;
743 let mut results = Vec::new();
744 for row in rows {
745 results.push(row?);
746 }
747 Ok(results)
748 }
749
750 pub fn get_communities(&self) -> anyhow::Result<Vec<CommunityRow>> {
751 let mut stmt = self.conn.prepare(
752 "SELECT community, kind, name, path, in_degree
753 FROM nodes
754 WHERE community > 0
755 ORDER BY community",
756 )?;
757 let rows = stmt.query_map([], |row| {
758 Ok((
759 row.get::<_, i64>(0)?,
760 row.get::<_, String>(1)?,
761 row.get::<_, String>(2)?,
762 row.get::<_, String>(3)?,
763 row.get::<_, i64>(4)?,
764 ))
765 })?;
766
767 let mut community_map: std::collections::HashMap<i64, CommunityGroup> =
768 std::collections::HashMap::new();
769 for row in rows {
770 let (community, kind, name, _path, in_degree) = row?;
771 let entry = community_map
772 .entry(community)
773 .or_insert_with(|| (Vec::new(), 0));
774 entry.0.push((kind, in_degree, name));
775 entry.1 += 1;
776 }
777
778 let mut result: Vec<CommunityRow> = community_map
779 .into_iter()
780 .map(|(community, (mut items, count))| {
781 items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.2.cmp(&b.2)));
782 let top_nodes: Vec<String> = items
783 .iter()
784 .take(5)
785 .map(|(kind, _deg, name)| format!("{}:{}", kind, name))
786 .collect();
787 let label = top_nodes
788 .first()
789 .cloned()
790 .unwrap_or_else(|| format!("community-{}", community));
791 (community, label, count, top_nodes)
792 })
793 .collect();
794
795 result.sort_by_key(|row| std::cmp::Reverse(row.2));
796 Ok(result)
797 }
798
799 pub fn clear_communities(&self) -> anyhow::Result<()> {
800 self.conn.execute("UPDATE nodes SET community = 0", [])?;
801 self.conn.execute("DELETE FROM communities", [])?;
802 Ok(())
803 }
804
805 pub fn get_dependents(&self, id: &str, depth: u8) -> anyhow::Result<Vec<Node>> {
808 let mut seen = std::collections::HashSet::new();
809 seen.insert(id.to_string());
810 let mut current = vec![id.to_string()];
811 let mut result: Vec<Node> = Vec::new();
812 let max_depth = depth.min(3);
813
814 for _ in 0..max_depth {
815 if current.is_empty() {
816 break;
817 }
818 let mut next = Vec::new();
819 for cur_id in ¤t {
820 let mut stmt = self.conn.prepare(
821 "SELECT DISTINCT n.id, n.kind, n.name, n.path, n.line_start, n.line_end, n.language, n.churn, n.coupling, n.community, n.in_degree, n.out_degree
822 FROM nodes n
823 INNER JOIN edges e ON e.src = n.id AND e.dst = ?
824 LIMIT 100",
825 )?;
826 let rows = stmt.query_map(params![cur_id], |row| {
827 Ok(Node {
828 id: row.get(0)?,
829 kind: row.get(1)?,
830 name: row.get(2)?,
831 path: row.get(3)?,
832 line_start: row.get(4)?,
833 line_end: row.get(5)?,
834 language: row.get(6)?,
835 churn: row.get(7)?,
836 coupling: row.get(8)?,
837 community: row.get(9)?,
838 in_degree: row.get(10)?,
839 out_degree: row.get(11)?,
840 })
841 })?;
842 for row in rows {
843 let node = row?;
844 if seen.insert(node.id.clone()) {
845 next.push(node.id.clone());
846 result.push(node);
847 }
848 }
849 }
850 current = next;
851 }
852
853 Ok(result)
854 }
855
856 pub fn get_nodes_by_community(&self, community: i64) -> anyhow::Result<Vec<Node>> {
857 let mut stmt = self.conn.prepare(
858 "SELECT id, kind, name, path, line_start, line_end, language, churn, coupling, community, in_degree, out_degree FROM nodes WHERE community = ?",
859 )?;
860 let rows = stmt.query_map(params![community], |row| {
861 Ok(Node {
862 id: row.get(0)?,
863 kind: row.get(1)?,
864 name: row.get(2)?,
865 path: row.get(3)?,
866 line_start: row.get(4)?,
867 line_end: row.get(5)?,
868 language: row.get(6)?,
869 churn: row.get(7)?,
870 coupling: row.get(8)?,
871 community: row.get(9)?,
872 in_degree: row.get(10)?,
873 out_degree: row.get(11)?,
874 })
875 })?;
876 let mut nodes = Vec::new();
877 for row in rows {
878 nodes.push(row?);
879 }
880 Ok(nodes)
881 }
882
883 pub fn get_edges_by_community(&self, community: i64) -> anyhow::Result<Vec<Edge>> {
884 let mut stmt = self.conn.prepare(
885 "SELECT DISTINCT e.id, e.src, e.dst, e.kind, e.weight, e.confidence
886 FROM edges e
887 INNER JOIN nodes n1 ON e.src = n1.id AND n1.community = ?
888 INNER JOIN nodes n2 ON e.dst = n2.id AND n2.community = ?",
889 )?;
890 let rows = stmt.query_map(params![community, community], |row| {
891 Ok(Edge {
892 id: row.get(0)?,
893 src: row.get(1)?,
894 dst: row.get(2)?,
895 kind: row.get(3)?,
896 weight: row.get(4)?,
897 confidence: row.get(5)?,
898 })
899 })?;
900 let mut edges = Vec::new();
901 for row in rows {
902 edges.push(row?);
903 }
904 Ok(edges)
905 }
906
907 pub fn get_file_hashes(&self) -> anyhow::Result<std::collections::HashMap<String, String>> {
910 let mut stmt = self.conn.prepare("SELECT path, hash FROM file_hashes")?;
911 let rows = stmt.query_map([], |row| {
912 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
913 })?;
914 let mut result = std::collections::HashMap::new();
915 for row in rows {
916 let (path, hash) = row?;
917 result.insert(path, hash);
918 }
919 Ok(result)
920 }
921
922 pub fn set_file_hash(&self, path: &str, hash: &str) -> anyhow::Result<()> {
923 self.conn.execute(
924 "INSERT OR REPLACE INTO file_hashes (path, hash) VALUES (?, ?)",
925 params![path, hash],
926 )?;
927 Ok(())
928 }
929
930 pub fn remove_file_hashes(&self, paths: &[String]) -> anyhow::Result<()> {
931 if paths.is_empty() {
932 return Ok(());
933 }
934 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
935 let sql = format!("DELETE FROM file_hashes WHERE path IN ({})", placeholders);
936 let mut stmt = self.conn.prepare(&sql)?;
937 let params: Vec<&dyn duckdb::ToSql> =
938 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
939 stmt.execute(params.as_slice())?;
940 Ok(())
941 }
942
943 pub fn delete_nodes_by_paths(&self, paths: &[String]) -> anyhow::Result<usize> {
944 if paths.is_empty() {
945 return Ok(0);
946 }
947 let placeholders = paths.iter().map(|_| "?").collect::<Vec<_>>().join(",");
948 let sql_edges = format!(
950 "DELETE FROM edges WHERE src IN (SELECT id FROM nodes WHERE path IN ({})) OR dst IN (SELECT id FROM nodes WHERE path IN ({}))",
951 placeholders, placeholders
952 );
953 let mut stmt_edges = self.conn.prepare(&sql_edges)?;
954 let params_edges: Vec<&dyn duckdb::ToSql> = paths
955 .iter()
956 .chain(paths.iter())
957 .map(|p| p as &dyn duckdb::ToSql)
958 .collect();
959 stmt_edges.execute(params_edges.as_slice())?;
960
961 let sql_nodes = format!("DELETE FROM nodes WHERE path IN ({})", placeholders);
963 let mut stmt_nodes = self.conn.prepare(&sql_nodes)?;
964 let params_nodes: Vec<&dyn duckdb::ToSql> =
965 paths.iter().map(|p| p as &dyn duckdb::ToSql).collect();
966 let count = stmt_nodes.execute(params_nodes.as_slice())?;
967 Ok(count)
968 }
969}
970
971pub fn repo_hash(path: &Path) -> String {
972 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
973 let path_str = canonical.to_string_lossy().to_string();
974 let mut hasher = Sha256::new();
975 hasher.update(path_str.as_bytes());
976 format!("{:x}", hasher.finalize())[..16].to_string()
977}