1use std::collections::BTreeMap;
2use std::path::Path;
3
4use anyhow::Result;
5use cozo::{DataValue, DbInstance, NamedRows, Num, ScriptMutability};
6
7use std::collections::HashMap;
8
9use super::queries::{
10 format_skeleton, ApiSymbol, BranchInfo, CoverageRow, ExampleTest, FileDeps, HierarchyNode,
11 ImpactRow, ReferenceRow, SkeletonSymbol, SymbolDetail, SymbolRow, TestContext, TestCoverage,
12 TestTarget, TypeHierarchy,
13};
14use super::store::GraphStats;
15use crate::model::{FileExtraction, RelationKind};
16
17type Params = BTreeMap<String, DataValue>;
18
19fn empty_params() -> Params {
20 BTreeMap::new()
21}
22
23pub struct CozoStore {
24 db: DbInstance,
25}
26
27impl CozoStore {
28 pub fn open(path: &Path) -> Result<Self> {
29 if let Some(parent) = path.parent() {
30 std::fs::create_dir_all(parent)?;
31 }
32 let db = DbInstance::new("sqlite", path.to_str().unwrap_or(""), Default::default())
33 .map_err(|e| anyhow::anyhow!("failed to open cozo db: {e}"))?;
34 let store = Self { db };
35 store.init_schema()?;
36 Ok(store)
37 }
38
39 fn init_schema(&self) -> Result<()> {
40 for ddl in COZO_SCHEMA {
41 match self
42 .db
43 .run_script(ddl, empty_params(), ScriptMutability::Mutable)
44 {
45 Ok(_) => {}
46 Err(e) => {
47 let msg = format!("{e}");
48 if !msg.contains("already exists") && !msg.contains("conflicts") {
49 return Err(anyhow::anyhow!("schema error: {e}\n DDL: {ddl}"));
50 }
51 }
52 }
53 }
54 for idx in COZO_INDICES {
55 match self
56 .db
57 .run_script(idx, empty_params(), ScriptMutability::Mutable)
58 {
59 Ok(_) => {}
60 Err(e) => {
61 let msg = format!("{e}");
62 if !msg.contains("already exists")
63 && !msg.contains("conflicts")
64 && !msg.contains("duplicate")
65 {
66 return Err(anyhow::anyhow!("index error: {e}\n DDL: {idx}"));
67 }
68 }
69 }
70 }
71 Ok(())
72 }
73
74 fn run(&self, script: &str) -> Result<NamedRows> {
75 self.db
76 .run_script(script, empty_params(), ScriptMutability::Immutable)
77 .map_err(|e| {
78 anyhow::anyhow!(
79 "query failed: {e}\n script: {}",
80 &script[..script.len().min(200)]
81 )
82 })
83 }
84
85 fn run_params(&self, script: &str, params: Params, mutable: bool) -> Result<NamedRows> {
86 let m = if mutable {
87 ScriptMutability::Mutable
88 } else {
89 ScriptMutability::Immutable
90 };
91 self.db.run_script(script, params, m).map_err(|e| {
92 anyhow::anyhow!(
93 "query failed: {e}\n script: {}",
94 &script[..script.len().min(200)]
95 )
96 })
97 }
98
99 pub fn symbols_in_file(&self, file: &str) -> Result<Vec<SymbolRow>> {
102 let mut params = empty_params();
103 params.insert("file".into(), DataValue::Str(file.into()));
104 let r = self.run_params(
105 r#"?[id, name, kind, start_line, end_line] :=
106 *defines{file_id: $file, symbol_id: id},
107 *symbol{id, name, kind, start_line, end_line}
108 :order start_line"#,
109 params,
110 false,
111 )?;
112 Ok(named_rows_to_symbol_rows(&r))
113 }
114
115 pub fn skeleton(&self, file: &str) -> Result<String> {
116 let mut params = empty_params();
117 params.insert("file".into(), DataValue::Str(file.into()));
118
119 let r = self.run_params(
120 r#"?[id, name, kind, start_line, complexity, parameters, return_type, visibility, parent] :=
121 *symbol{id, name, kind, file, start_line, complexity, parameters, return_type, visibility, parent},
122 file = $file
123 :order start_line"#,
124 params.clone(),
125 false,
126 )?;
127
128 if r.rows.is_empty() {
129 return Ok(format!(
130 "No symbols found in '{file}'. File may not be indexed."
131 ));
132 }
133
134 let ids: Vec<String> = r.rows.iter().map(|row| dv_str(&row[0])).collect();
135
136 let mut fan_in: HashMap<String, usize> = HashMap::new();
137 for id in &ids {
138 let mut p = empty_params();
139 p.insert("target".into(), DataValue::Str(id.clone().into()));
140 let cr = self.run_params(
141 r#"?[count(caller)] := *calls{caller, callee: $target}"#,
142 p,
143 false,
144 )?;
145 fan_in.insert(id.clone(), dv_u64(&cr) as usize);
146 }
147
148 let mut stmt_counts: HashMap<String, usize> = HashMap::new();
149 let mut nesting: HashMap<String, u32> = HashMap::new();
150 for id in &ids {
151 let mut p = empty_params();
152 p.insert("sym".into(), DataValue::Str(id.clone().into()));
153 let sr = self.run_params(
154 r#"?[count(stmt_id), max(depth)] :=
155 *has_statement{symbol_id: $sym, statement_id: stmt_id},
156 *statement{id: stmt_id, depth}"#,
157 p,
158 false,
159 )?;
160 if let Some(row) = sr.rows.first() {
161 stmt_counts.insert(id.clone(), dv_u64_val(&row[0]) as usize);
162 nesting.insert(id.clone(), dv_u32(&row[1]));
163 }
164 }
165
166 let symbols: Vec<SkeletonSymbol> = r
167 .rows
168 .iter()
169 .map(|row| {
170 let id = dv_str(&row[0]);
171 SkeletonSymbol {
172 fan_in: fan_in.get(&id).copied().unwrap_or(0),
173 stmt_count: stmt_counts.get(&id).copied().unwrap_or(0),
174 nesting: nesting.get(&id).copied().unwrap_or(0),
175 id,
176 name: dv_str(&row[1]),
177 kind: dv_str(&row[2]),
178 start_line: dv_str(&row[3]),
179 complexity: dv_u32(&row[4]),
180 params: dv_str(&row[5]),
181 return_type: dv_str(&row[6]),
182 visibility: dv_str(&row[7]),
183 parent: dv_str(&row[8]),
184 }
185 })
186 .collect();
187
188 Ok(format_skeleton(file, &symbols))
189 }
190
191 pub fn callers_of(&self, symbol_id: &str) -> Result<Vec<String>> {
192 let mut params = empty_params();
193 params.insert("target".into(), DataValue::Str(symbol_id.into()));
194 let r = self.run_params(
195 r#"?[caller_id] := *calls{caller: caller_id, callee: $target}"#,
196 params,
197 false,
198 )?;
199 Ok(collect_strings(&r))
200 }
201
202 pub fn callees_of(&self, symbol_id: &str) -> Result<Vec<String>> {
203 let mut params = empty_params();
204 params.insert("source".into(), DataValue::Str(symbol_id.into()));
205 let r = self.run_params(
206 r#"?[callee_id] := *calls{caller: $source, callee: callee_id}"#,
207 params,
208 false,
209 )?;
210 Ok(collect_strings(&r))
211 }
212
213 pub fn find_symbol_by_id(&self, symbol_id: &str) -> Result<Option<SymbolDetail>> {
214 let mut params = empty_params();
215 params.insert("id".into(), DataValue::Str(symbol_id.into()));
216 let r = self.run_params(
217 r#"?[id, name, kind, file, start_line, end_line] :=
218 id = $id,
219 *symbol{id, name, kind, file, start_line, end_line}"#,
220 params,
221 false,
222 )?;
223 if let Some(row) = r.rows.first() {
224 Ok(Some(row_to_symbol_detail(row)))
225 } else {
226 Ok(None)
227 }
228 }
229
230 pub fn branches_of(&self, symbol_id: &str) -> Result<Vec<BranchInfo>> {
231 let mut params = empty_params();
232 params.insert("sym".into(), DataValue::Str(symbol_id.into()));
233 let r = self.run_params(
234 r#"?[kind, condition, start_line, depth] :=
235 *has_statement{symbol_id: $sym, statement_id: st_id},
236 *statement{id: st_id, kind, condition, start_line, depth}
237 :order start_line"#,
238 params,
239 false,
240 )?;
241 Ok(r.rows
242 .iter()
243 .map(|row| BranchInfo {
244 kind: dv_str(&row[0]),
245 condition: dv_str(&row[1]),
246 line: dv_u32(&row[2]),
247 depth: dv_u32(&row[3]),
248 })
249 .collect())
250 }
251
252 pub fn transitive_impact(&self, symbol_id: &str, max_depth: u32) -> Result<Vec<ImpactRow>> {
253 let mut params = empty_params();
254 params.insert("target".into(), DataValue::Str(symbol_id.into()));
255 let mut rules = String::new();
257 rules.push_str("layer_1[caller] := *calls{caller, callee: $target}\n");
258 for d in 2..=max_depth {
259 rules.push_str(&format!(
260 "layer_{d}[caller] := layer_{}[mid], *calls{{caller, callee: mid}}\n",
261 d - 1
262 ));
263 }
264 for d in 1..=max_depth {
266 rules.push_str(&format!(
267 "?[id, name, file, kind] := layer_{d}[id], *symbol{{id, name, file, kind}}\n"
268 ));
269 }
270 let r = self.run_params(&rules, params, false)?;
271 Ok(r.rows
272 .iter()
273 .map(|row| ImpactRow {
274 id: dv_str(&row[0]),
275 name: dv_str(&row[1]),
276 file: dv_str(&row[2]),
277 kind: dv_str(&row[3]),
278 })
279 .collect())
280 }
281
282 pub fn symbols_in_range(&self, file: &str, start: u32, end: u32) -> Result<Vec<SymbolDetail>> {
283 let mut params = empty_params();
284 params.insert("file".into(), DataValue::Str(file.into()));
285 params.insert("start".into(), DataValue::from(start as i64));
286 params.insert("end".into(), DataValue::from(end as i64));
287 let r = self.run_params(
288 r#"?[id, name, kind, file, start_line, end_line] :=
289 *defines{file_id: $file, symbol_id: id},
290 *symbol{id, name, kind, file, start_line, end_line},
291 start_line <= $end, end_line >= $start
292 :order start_line"#,
293 params,
294 false,
295 )?;
296 Ok(r.rows.iter().map(|row| row_to_symbol_detail(row)).collect())
297 }
298
299 pub fn find_all_references(&self, symbol_id: &str) -> Result<Vec<ReferenceRow>> {
300 let mut params = empty_params();
301 params.insert("target".into(), DataValue::Str(symbol_id.into()));
302 let r = self.run_params(
303 r#"?[caller_id, caller_name, file, start_line, target_id] :=
304 *calls{caller: caller_id, callee: $target},
305 *symbol{id: caller_id, name: caller_name, file, start_line},
306 target_id = $target"#,
307 params,
308 false,
309 )?;
310 Ok(r.rows
311 .iter()
312 .map(|row| ReferenceRow {
313 caller_id: dv_str(&row[0]),
314 caller_name: dv_str(&row[1]),
315 file: dv_str(&row[2]),
316 line: dv_u32(&row[3]),
317 target_id: dv_str(&row[4]),
318 })
319 .collect())
320 }
321
322 pub fn get_api_surface(&self) -> Result<Vec<ApiSymbol>> {
323 let mut params = empty_params();
324 params.insert("vis".into(), DataValue::Str("public".into()));
325 params.insert("route".into(), DataValue::Str("Route".into()));
326 let r = self.run_params(
327 r#"?[id, name, kind, file, start_line, visibility, docstring] :=
328 visibility = $vis,
329 *symbol{id, name, kind, file, start_line, visibility, docstring}
330 ?[id, name, kind, file, start_line, visibility, docstring] :=
331 kind = $route,
332 *symbol{id, name, kind, file, start_line, visibility, docstring}
333 :order file, start_line"#,
334 params,
335 false,
336 )?;
337 Ok(r.rows
338 .iter()
339 .map(|row| ApiSymbol {
340 id: dv_str(&row[0]),
341 name: dv_str(&row[1]),
342 kind: dv_str(&row[2]),
343 file: dv_str(&row[3]),
344 line: dv_u32(&row[4]),
345 visibility: dv_str(&row[5]),
346 docstring: dv_str(&row[6]),
347 })
348 .collect())
349 }
350
351 pub fn get_file_deps(&self, file: &str) -> Result<FileDeps> {
352 let mut params = empty_params();
353 params.insert("file".into(), DataValue::Str(file.into()));
354
355 let r_out = self.run_params(
356 r#"?[dep_file] := *imports{importer: $file, imported: dep_id},
357 *module{id: dep_id, file: dep_file}"#,
358 params.clone(),
359 false,
360 )?;
361 let imports = collect_strings(&r_out);
362
363 let r_in = self.run_params(
364 r#"?[importer_file] := *imports{importer: imp_id, imported: $file},
365 *module{id: imp_id, file: importer_file}"#,
366 params,
367 false,
368 )?;
369 let imported_by = collect_strings(&r_in);
370
371 Ok(FileDeps {
372 file: file.to_string(),
373 imports,
374 imported_by,
375 })
376 }
377
378 pub fn get_type_hierarchy(&self, symbol_id: &str, max_depth: u32) -> Result<TypeHierarchy> {
379 let mut params = empty_params();
380 params.insert("root".into(), DataValue::Str(symbol_id.into()));
381
382 let mut up_rules = String::new();
384 up_rules.push_str("layer_1[parent] := *inherits{child: $root, parent}\n");
385 for d in 2..=max_depth {
386 up_rules.push_str(&format!(
387 "layer_{d}[gp] := layer_{}[p], *inherits{{child: p, parent: gp}}\n",
388 d - 1
389 ));
390 }
391 for d in 1..=max_depth {
392 up_rules.push_str(&format!(
393 "?[id, name, kind, file] := layer_{d}[id], *symbol{{id, name, kind, file}}\n"
394 ));
395 }
396 let r_up = self.run_params(&up_rules, params.clone(), false)?;
397 let ancestors: Vec<HierarchyNode> = r_up
398 .rows
399 .iter()
400 .map(|row| HierarchyNode {
401 id: dv_str(&row[0]),
402 name: dv_str(&row[1]),
403 kind: dv_str(&row[2]),
404 file: dv_str(&row[3]),
405 })
406 .collect();
407
408 let mut down_rules = String::new();
410 down_rules.push_str("layer_1[child] := *inherits{child, parent: $root}\n");
411 for d in 2..=max_depth {
412 down_rules.push_str(&format!(
413 "layer_{d}[gc] := layer_{}[p], *inherits{{child: gc, parent: p}}\n",
414 d - 1
415 ));
416 }
417 for d in 1..=max_depth {
418 down_rules.push_str(&format!(
419 "?[id, name, kind, file] := layer_{d}[id], *symbol{{id, name, kind, file}}\n"
420 ));
421 }
422 let r_down = self.run_params(&down_rules, params.clone(), false)?;
423 let descendants: Vec<HierarchyNode> = r_down
424 .rows
425 .iter()
426 .map(|row| HierarchyNode {
427 id: dv_str(&row[0]),
428 name: dv_str(&row[1]),
429 kind: dv_str(&row[2]),
430 file: dv_str(&row[3]),
431 })
432 .collect();
433
434 let root_detail = self.find_symbol_by_id(symbol_id)?;
435
436 Ok(TypeHierarchy {
437 root_id: symbol_id.to_string(),
438 root_name: root_detail
439 .as_ref()
440 .map(|s| s.name.clone())
441 .unwrap_or_default(),
442 ancestors,
443 descendants,
444 })
445 }
446
447 pub fn get_test_coverage(&self) -> Result<TestCoverage> {
448 let r_tested = self.run(r#"?[symbol_id, test_id] := *tested_by{symbol_id, test_id}"#)?;
450 let mut tested_map: HashMap<String, String> = HashMap::new();
451 for row in &r_tested.rows {
452 tested_map.insert(dv_str(&row[0]), dv_str(&row[1]));
453 }
454
455 let r_syms = self.run(r#"?[id, name, kind, file] := *symbol{id, name, kind, file}"#)?;
457
458 let mut covered = Vec::new();
459 let mut uncovered = Vec::new();
460 for row in &r_syms.rows {
461 let kind = dv_str(&row[2]);
462 if !is_testable_kind(&kind) {
463 continue;
464 }
465 let id = dv_str(&row[0]);
466 if let Some(test_id) = tested_map.get(&id) {
467 covered.push(CoverageRow {
468 symbol_id: id,
469 symbol_name: dv_str(&row[1]),
470 kind,
471 file: dv_str(&row[3]),
472 test_id: Some(test_id.clone()),
473 });
474 } else {
475 uncovered.push(CoverageRow {
476 symbol_id: id,
477 symbol_name: dv_str(&row[1]),
478 kind,
479 file: dv_str(&row[3]),
480 test_id: None,
481 });
482 }
483 }
484
485 let total = covered.len() + uncovered.len();
486 let pct = (covered.len() * 100).checked_div(total).unwrap_or(0);
487
488 Ok(TestCoverage {
489 covered_count: covered.len(),
490 uncovered_count: uncovered.len(),
491 coverage_pct: pct,
492 covered,
493 uncovered,
494 })
495 }
496
497 pub fn generate_test_context(
498 &self,
499 file_filter: Option<&str>,
500 limit: usize,
501 ) -> Result<TestContext> {
502 let framework = self.detect_test_framework()?;
503 let example_test = self.find_example_test(file_filter)?;
504
505 let r_tested = self.run(r#"?[symbol_id] := *tested_by{symbol_id, test_id: _}"#)?;
507 let tested_ids: std::collections::HashSet<String> =
508 r_tested.rows.iter().map(|row| dv_str(&row[0])).collect();
509
510 let file_clause = if let Some(f) = file_filter {
512 format!(r#", starts_with(file, "{}")"#, f.replace('"', "\\\""))
513 } else {
514 String::new()
515 };
516
517 let r = self.run(&format!(
518 r#"?[id, name, kind, file, start_line, end_line, visibility, parameters, return_type, complexity] :=
519 *symbol{{id, name, kind, file, start_line, end_line, visibility, parameters, return_type, complexity}}{file_clause}
520 :order -complexity, file, start_line"#,
521 ))?;
522
523 let mut targets: Vec<TestTarget> = r
525 .rows
526 .iter()
527 .filter(|row| {
528 let kind = dv_str(&row[2]);
529 is_testable_kind(&kind) && !tested_ids.contains(&dv_str(&row[0]))
530 })
531 .map(|row| {
532 let visibility = dv_str(&row[6]);
533 let complexity = dv_u32(&row[9]);
534 let vis_score: u32 = if visibility == "public" || visibility == "pub" {
535 10
536 } else {
537 0
538 };
539 TestTarget {
540 symbol_id: dv_str(&row[0]),
541 name: dv_str(&row[1]),
542 kind: dv_str(&row[2]),
543 file: dv_str(&row[3]),
544 start_line: dv_u32(&row[4]),
545 end_line: dv_u32(&row[5]),
546 visibility,
547 parameters: dv_str(&row[7]),
548 return_type: dv_str(&row[8]),
549 complexity,
550 callers: Vec::new(),
551 callees: Vec::new(),
552 branches: Vec::new(),
553 priority_score: complexity * 5 + vis_score,
554 }
555 })
556 .take(limit)
557 .collect();
558
559 if !targets.is_empty() {
561 let ids: Vec<&str> = targets.iter().map(|t| t.symbol_id.as_str()).collect();
562 let callers_map = self.batch_callers(&ids)?;
563 let callees_map = self.batch_callees(&ids)?;
564 let mut branches_map = self.batch_branches(&ids)?;
565
566 for t in &mut targets {
567 t.callers = callers_map.get(&t.symbol_id).cloned().unwrap_or_default();
568 t.callees = callees_map.get(&t.symbol_id).cloned().unwrap_or_default();
569 t.branches = branches_map.remove(&t.symbol_id).unwrap_or_default();
570 t.priority_score += t.callers.len() as u32 * 3;
571 }
572 }
573
574 targets.sort_by_key(|t| std::cmp::Reverse(t.priority_score));
575
576 Ok(TestContext {
577 framework,
578 example_test,
579 targets,
580 })
581 }
582
583 fn detect_test_framework(&self) -> Result<String> {
584 let r = self.run(
585 r#"?[lang, count(lang)] := *symbol{kind: "Test", language: lang}
586 :order -count(lang)
587 :limit 1"#,
588 );
589 if let Ok(r) = r {
590 if let Some(row) = r.rows.first() {
591 let lang = dv_str(&row[0]);
592 let fw = match lang.as_str() {
593 "go" => "go (go test)",
594 "rust" => "rust (cargo test)",
595 "python" => "python (unittest/pytest)",
596 "java" | "kotlin" => "java (junit)",
597 "csharp" => "csharp (nunit/xunit)",
598 "javascript" | "typescript" => "javascript (jest/vitest)",
599 _ if !lang.is_empty() => return Ok(format!("{lang} (detected)")),
600 _ => "unknown",
601 };
602 if fw != "unknown" {
603 return Ok(fw.to_string());
604 }
605 }
606 }
607 Ok("unknown".to_string())
608 }
609
610 fn find_example_test(&self, file_filter: Option<&str>) -> Result<Option<ExampleTest>> {
611 let file_clause = if let Some(f) = file_filter {
612 format!(r#", starts_with(file, "{}")"#, f.replace('"', "\\\""))
613 } else {
614 String::new()
615 };
616 let r = self.run(&format!(
617 r#"?[id, name, file, start_line, end_line] :=
618 *symbol{{id, name, kind: "Test", file, start_line, end_line}}{file_clause}
619 :limit 1"#,
620 ))?;
621 if let Some(row) = r.rows.first() {
622 Ok(Some(ExampleTest {
623 symbol_id: dv_str(&row[0]),
624 name: dv_str(&row[1]),
625 file: dv_str(&row[2]),
626 start_line: dv_u32(&row[3]),
627 end_line: dv_u32(&row[4]),
628 }))
629 } else {
630 Ok(None)
631 }
632 }
633
634 pub fn raw_query(&self, script: &str) -> Result<Vec<Vec<String>>> {
635 let r = self.run(script)?;
636 Ok(r.rows
637 .iter()
638 .map(|row| row.iter().map(dv_str).collect())
639 .collect())
640 }
641
642 #[allow(clippy::type_complexity)]
645 pub fn import_symbols(
646 &self,
647 rows: &[(
648 String,
649 String,
650 String,
651 String,
652 i64,
653 i64,
654 String,
655 String,
656 String,
657 String,
658 String,
659 i64,
660 String,
661 String,
662 )],
663 ) -> Result<()> {
664 if rows.is_empty() {
665 return Ok(());
666 }
667 let headers = vec![
668 "id".into(),
669 "name".into(),
670 "kind".into(),
671 "file".into(),
672 "start_line".into(),
673 "end_line".into(),
674 "signature_hash".into(),
675 "language".into(),
676 "visibility".into(),
677 "parent".into(),
678 "docstring".into(),
679 "complexity".into(),
680 "parameters".into(),
681 "return_type".into(),
682 ];
683 let data_rows: Vec<Vec<DataValue>> = rows
684 .iter()
685 .map(|r| {
686 vec![
687 DataValue::Str(r.0.clone().into()),
688 DataValue::Str(r.1.clone().into()),
689 DataValue::Str(r.2.clone().into()),
690 DataValue::Str(r.3.clone().into()),
691 DataValue::from(r.4),
692 DataValue::from(r.5),
693 DataValue::Str(r.6.clone().into()),
694 DataValue::Str(r.7.clone().into()),
695 DataValue::Str(r.8.clone().into()),
696 DataValue::Str(r.9.clone().into()),
697 DataValue::Str(r.10.clone().into()),
698 DataValue::from(r.11),
699 DataValue::Str(r.12.clone().into()),
700 DataValue::Str(r.13.clone().into()),
701 ]
702 })
703 .collect();
704 let named = NamedRows::new(headers, data_rows);
705 let mut map = BTreeMap::new();
706 map.insert("symbol".to_string(), named);
707 self.db
708 .import_relations(map)
709 .map_err(|e| anyhow::anyhow!("import symbols: {e}"))
710 }
711
712 pub fn import_modules(
713 &self,
714 rows: &[(String, String, String, String, String, String)],
715 ) -> Result<()> {
716 if rows.is_empty() {
717 return Ok(());
718 }
719 let headers = vec![
720 "id".into(),
721 "name".into(),
722 "file".into(),
723 "language".into(),
724 "content_hash".into(),
725 "summary".into(),
726 ];
727 let data_rows: Vec<Vec<DataValue>> = rows
728 .iter()
729 .map(|r| {
730 vec![
731 DataValue::Str(r.0.clone().into()),
732 DataValue::Str(r.1.clone().into()),
733 DataValue::Str(r.2.clone().into()),
734 DataValue::Str(r.3.clone().into()),
735 DataValue::Str(r.4.clone().into()),
736 DataValue::Str(r.5.clone().into()),
737 ]
738 })
739 .collect();
740 let named = NamedRows::new(headers, data_rows);
741 let mut map = BTreeMap::new();
742 map.insert("module".to_string(), named);
743 self.db
744 .import_relations(map)
745 .map_err(|e| anyhow::anyhow!("import modules: {e}"))
746 }
747
748 pub fn import_files(&self, rows: &[(String, String, String, String, i64)]) -> Result<()> {
749 if rows.is_empty() {
750 return Ok(());
751 }
752 let headers = vec![
753 "id".into(),
754 "name".into(),
755 "path".into(),
756 "language".into(),
757 "symbol_count".into(),
758 ];
759 let data_rows: Vec<Vec<DataValue>> = rows
760 .iter()
761 .map(|r| {
762 vec![
763 DataValue::Str(r.0.clone().into()),
764 DataValue::Str(r.1.clone().into()),
765 DataValue::Str(r.2.clone().into()),
766 DataValue::Str(r.3.clone().into()),
767 DataValue::from(r.4),
768 ]
769 })
770 .collect();
771 let named = NamedRows::new(headers, data_rows);
772 let mut map = BTreeMap::new();
773 map.insert("file".to_string(), named);
774 self.db
775 .import_relations(map)
776 .map_err(|e| anyhow::anyhow!("import files: {e}"))
777 }
778
779 pub fn import_edges(&self, relation: &str, pairs: &[(String, String)]) -> Result<()> {
780 if pairs.is_empty() {
781 return Ok(());
782 }
783 let (col_a, col_b) = edge_columns(relation);
784 if relation == "calls" {
785 let headers = vec![col_a.to_string(), col_b.to_string(), "line".to_string()];
786 let data_rows: Vec<Vec<DataValue>> = pairs
787 .iter()
788 .map(|(a, b)| {
789 vec![
790 DataValue::Str(a.clone().into()),
791 DataValue::Str(b.clone().into()),
792 DataValue::from(0i64),
793 ]
794 })
795 .collect();
796 let named = NamedRows::new(headers, data_rows);
797 let mut map = BTreeMap::new();
798 map.insert(relation.to_string(), named);
799 self.db
800 .import_relations(map)
801 .map_err(|e| anyhow::anyhow!("import {relation}: {e}"))
802 } else {
803 let headers = vec![col_a.to_string(), col_b.to_string()];
804 let data_rows: Vec<Vec<DataValue>> = pairs
805 .iter()
806 .map(|(a, b)| {
807 vec![
808 DataValue::Str(a.clone().into()),
809 DataValue::Str(b.clone().into()),
810 ]
811 })
812 .collect();
813 let named = NamedRows::new(headers, data_rows);
814 let mut map = BTreeMap::new();
815 map.insert(relation.to_string(), named);
816 self.db
817 .import_relations(map)
818 .map_err(|e| anyhow::anyhow!("import {relation}: {e}"))
819 }
820 }
821
822 pub fn import_calls_with_lines(&self, rows: &[(String, String, i64)]) -> Result<()> {
823 if rows.is_empty() {
824 return Ok(());
825 }
826 let headers = vec!["caller".into(), "callee".into(), "line".into()];
827 let data_rows: Vec<Vec<DataValue>> = rows
828 .iter()
829 .map(|r| {
830 vec![
831 DataValue::Str(r.0.clone().into()),
832 DataValue::Str(r.1.clone().into()),
833 DataValue::from(r.2),
834 ]
835 })
836 .collect();
837 let named = NamedRows::new(headers, data_rows);
838 let mut map = BTreeMap::new();
839 map.insert("calls".to_string(), named);
840 self.db
841 .import_relations(map)
842 .map_err(|e| anyhow::anyhow!("import calls: {e}"))
843 }
844
845 pub fn import_statements(
846 &self,
847 rows: &[(String, String, String, i64, i64, i64, String)],
848 ) -> Result<()> {
849 if rows.is_empty() {
850 return Ok(());
851 }
852 let headers = vec![
853 "id".into(),
854 "kind".into(),
855 "condition".into(),
856 "start_line".into(),
857 "end_line".into(),
858 "depth".into(),
859 "parent_symbol".into(),
860 ];
861 let data_rows: Vec<Vec<DataValue>> = rows
862 .iter()
863 .map(|r| {
864 vec![
865 DataValue::Str(r.0.clone().into()),
866 DataValue::Str(r.1.clone().into()),
867 DataValue::Str(r.2.clone().into()),
868 DataValue::from(r.3),
869 DataValue::from(r.4),
870 DataValue::from(r.5),
871 DataValue::Str(r.6.clone().into()),
872 ]
873 })
874 .collect();
875 let named = NamedRows::new(headers, data_rows);
876 let mut map = BTreeMap::new();
877 map.insert("statement".to_string(), named);
878 self.db
879 .import_relations(map)
880 .map_err(|e| anyhow::anyhow!("import statements: {e}"))
881 }
882
883 pub fn import_raw(
884 &self,
885 relation: &str,
886 headers: Vec<String>,
887 rows: Vec<Vec<DataValue>>,
888 ) -> Result<()> {
889 if rows.is_empty() {
890 return Ok(());
891 }
892 let named = NamedRows::new(headers, rows);
893 let mut map = BTreeMap::new();
894 map.insert(relation.to_string(), named);
895 self.db
896 .import_relations(map)
897 .map_err(|e| anyhow::anyhow!("import {relation}: {e}"))
898 }
899
900 pub fn import_folders(&self, rows: &[(String, String, String)]) -> Result<()> {
901 if rows.is_empty() {
902 return Ok(());
903 }
904 let headers = vec!["id".into(), "name".into(), "path".into()];
905 let data_rows: Vec<Vec<DataValue>> = rows
906 .iter()
907 .map(|r| {
908 vec![
909 DataValue::Str(r.0.clone().into()),
910 DataValue::Str(r.1.clone().into()),
911 DataValue::Str(r.2.clone().into()),
912 ]
913 })
914 .collect();
915 let named = NamedRows::new(headers, data_rows);
916 let mut map = BTreeMap::new();
917 map.insert("folder".to_string(), named);
918 self.db
919 .import_relations(map)
920 .map_err(|e| anyhow::anyhow!("import folders: {e}"))
921 }
922
923 pub fn import_dependencies(
924 &self,
925 rows: &[(String, String, String, String, bool)],
926 ) -> Result<()> {
927 if rows.is_empty() {
928 return Ok(());
929 }
930 let headers = vec![
931 "id".into(),
932 "name".into(),
933 "version".into(),
934 "ecosystem".into(),
935 "is_dev".into(),
936 ];
937 let data_rows: Vec<Vec<DataValue>> = rows
938 .iter()
939 .map(|r| {
940 vec![
941 DataValue::Str(r.0.clone().into()),
942 DataValue::Str(r.1.clone().into()),
943 DataValue::Str(r.2.clone().into()),
944 DataValue::Str(r.3.clone().into()),
945 DataValue::Bool(r.4),
946 ]
947 })
948 .collect();
949 let named = NamedRows::new(headers, data_rows);
950 let mut map = BTreeMap::new();
951 map.insert("dependency".to_string(), named);
952 self.db
953 .import_relations(map)
954 .map_err(|e| anyhow::anyhow!("import dependencies: {e}"))
955 }
956
957 pub fn import_clusters(&self, rows: &[(String, String, String)]) -> Result<()> {
958 if rows.is_empty() {
959 return Ok(());
960 }
961 let headers = vec!["id".into(), "name".into(), "description".into()];
962 let data_rows: Vec<Vec<DataValue>> = rows
963 .iter()
964 .map(|r| {
965 vec![
966 DataValue::Str(r.0.clone().into()),
967 DataValue::Str(r.1.clone().into()),
968 DataValue::Str(r.2.clone().into()),
969 ]
970 })
971 .collect();
972 let named = NamedRows::new(headers, data_rows);
973 let mut map = BTreeMap::new();
974 map.insert("cluster".to_string(), named);
975 self.db
976 .import_relations(map)
977 .map_err(|e| anyhow::anyhow!("import clusters: {e}"))
978 }
979
980 pub fn upsert_file(&self, extraction: &FileExtraction) -> Result<()> {
983 self.delete_file_data(&extraction.file)?;
984 self.insert_file_data(extraction)?;
985 self.invalidate_caches()
986 }
987
988 pub fn upsert_file_batch(&self, extraction: &FileExtraction) -> Result<()> {
989 self.delete_file_data(&extraction.file)?;
990 self.insert_file_data(extraction)
991 }
992
993 fn delete_file_data(&self, file: &str) -> Result<()> {
994 let mut params = empty_params();
995 params.insert("file".into(), DataValue::Str(file.into()));
996
997 let _ = self.run_params(
999 r#"?[id] := *defines{file_id: $file, symbol_id: sym}, *has_statement{symbol_id: sym, statement_id: id}
1000 :rm statement {id}"#,
1001 params.clone(), true,
1002 );
1003 let _ = self.run_params(
1005 r#"?[symbol_id, statement_id] := *defines{file_id: $file, symbol_id}, *has_statement{symbol_id, statement_id}
1006 :rm has_statement {symbol_id, statement_id}"#,
1007 params.clone(), true,
1008 );
1009 let _ = self.run_params(
1011 r#"?[caller, callee, line] := *defines{file_id: $file, symbol_id: caller}, *calls{caller, callee, line}
1012 :rm calls {caller, callee, line}"#,
1013 params.clone(), true,
1014 );
1015 for (rel, col_a, col_b) in &[
1017 ("inherits", "child", "parent"),
1018 ("tested_by", "symbol_id", "test_id"),
1019 ("reads_rel", "reader", "target"),
1020 ("writes_rel", "writer", "target"),
1021 ("has_concern", "symbol_id", "concern_id"),
1022 ("has_config", "symbol_id", "config_id"),
1023 ("resolves_to", "source", "target"),
1024 ("taint_flow", "source", "target"),
1025 ] {
1026 let q = format!(
1027 "?[{col_a}, {col_b}] := *defines{{file_id: $file, symbol_id: {col_a}}}, *{rel}{{{col_a}, {col_b}}}
1028 :rm {rel} {{{col_a}, {col_b}}}"
1029 );
1030 let _ = self.run_params(&q, params.clone(), true);
1031 }
1032 let _ = self.run_params(
1034 r#"?[module_id, symbol_id] := *contains{module_id: $file, symbol_id}
1035 :rm contains {module_id, symbol_id}"#,
1036 params.clone(),
1037 true,
1038 );
1039 let _ = self.run_params(
1041 r#"?[importer, imported] := *imports{importer: $file, imported}
1042 :rm imports {importer, imported}"#,
1043 params.clone(),
1044 true,
1045 );
1046 let _ = self.run_params(
1048 r#"?[file_id, symbol_id] := *defines{file_id: $file, symbol_id}
1049 :rm defines {file_id, symbol_id}"#,
1050 params.clone(),
1051 true,
1052 );
1053 let _ = self.run_params(
1055 r#"?[id] := *symbol{id, file: $file}
1056 :rm symbol {id}"#,
1057 params.clone(),
1058 true,
1059 );
1060 let _ = self.run_params(
1062 r#"?[id] := id = $file
1063 :rm module {id}"#,
1064 params.clone(),
1065 true,
1066 );
1067 let _ = self.run_params(
1069 r#"?[id] := id = $file
1070 :rm file {id}"#,
1071 params,
1072 true,
1073 );
1074 Ok(())
1075 }
1076
1077 fn insert_file_data(&self, extraction: &FileExtraction) -> Result<()> {
1078 let module_id = &extraction.file;
1079 let module_name = extraction
1080 .file
1081 .rsplit_once('/')
1082 .map(|(_, f)| f)
1083 .unwrap_or(&extraction.file);
1084 let file_name = module_name;
1085
1086 self.import_modules(&[(
1088 module_id.clone(),
1089 module_name.to_string(),
1090 extraction.file.clone(),
1091 extraction.language.clone(),
1092 extraction.content_hash.clone(),
1093 String::new(),
1094 )])?;
1095
1096 self.import_files(&[(
1098 extraction.file.clone(),
1099 file_name.to_string(),
1100 extraction.file.clone(),
1101 extraction.language.clone(),
1102 extraction.symbols.len() as i64,
1103 )])?;
1104
1105 if !extraction.symbols.is_empty() {
1107 let sym_rows: Vec<_> = extraction
1108 .symbols
1109 .iter()
1110 .map(|sym| {
1111 (
1112 sym.id.clone(),
1113 sym.name.clone(),
1114 sym.kind.as_str().to_string(),
1115 extraction.file.clone(),
1116 sym.span.start_line as i64,
1117 sym.span.end_line as i64,
1118 sym.signature_hash.clone(),
1119 sym.language.clone(),
1120 sym.visibility.clone().unwrap_or_default(),
1121 sym.parent.clone().unwrap_or_default(),
1122 sym.docstring.clone().unwrap_or_default(),
1123 sym.complexity as i64,
1124 sym.parameters.clone().unwrap_or_default(),
1125 sym.return_type.clone().unwrap_or_default(),
1126 )
1127 })
1128 .collect();
1129 self.import_symbols(&sym_rows)?;
1130
1131 let contains: Vec<_> = extraction
1133 .symbols
1134 .iter()
1135 .map(|s| (module_id.clone(), s.id.clone()))
1136 .collect();
1137 self.import_edges("contains", &contains)?;
1138
1139 let defines: Vec<_> = extraction
1141 .symbols
1142 .iter()
1143 .map(|s| (extraction.file.clone(), s.id.clone()))
1144 .collect();
1145 self.import_edges("defines", &defines)?;
1146 }
1147
1148 let mut calls_rows: Vec<(String, String, i64)> = Vec::new();
1150 let mut inherits_pairs: Vec<(String, String)> = Vec::new();
1151 let mut tested_by_pairs: Vec<(String, String)> = Vec::new();
1152 let mut imports_pairs: Vec<(String, String)> = Vec::new();
1153 let mut reads_pairs: Vec<(String, String)> = Vec::new();
1154 let mut writes_pairs: Vec<(String, String)> = Vec::new();
1155 let mut custom_pairs: HashMap<String, Vec<(String, String)>> = HashMap::new();
1156
1157 for rel in &extraction.relations {
1158 let line = rel.span.as_ref().map(|s| s.start_line as i64).unwrap_or(0);
1159 match &rel.kind {
1160 RelationKind::Calls | RelationKind::CalledBy => {
1161 calls_rows.push((rel.source_id.clone(), rel.target_id.clone(), line));
1162 }
1163 RelationKind::Inherits | RelationKind::InheritedBy => {
1164 inherits_pairs.push((rel.source_id.clone(), rel.target_id.clone()));
1165 }
1166 RelationKind::TestedBy | RelationKind::Tests => {
1167 tested_by_pairs.push((rel.source_id.clone(), rel.target_id.clone()));
1168 }
1169 RelationKind::Imports | RelationKind::ImportedBy => {
1170 imports_pairs.push((rel.source_id.clone(), rel.target_id.clone()));
1171 }
1172 RelationKind::Reads => {
1173 reads_pairs.push((rel.source_id.clone(), rel.target_id.clone()));
1174 }
1175 RelationKind::Writes => {
1176 writes_pairs.push((rel.source_id.clone(), rel.target_id.clone()));
1177 }
1178 RelationKind::Custom(edge_name) => {
1179 custom_pairs
1180 .entry(edge_name.clone())
1181 .or_default()
1182 .push((rel.source_id.clone(), rel.target_id.clone()));
1183 }
1184 _ => {}
1185 }
1186 }
1187
1188 self.import_calls_with_lines(&calls_rows)?;
1189 self.import_edges("inherits", &inherits_pairs)?;
1190 self.import_edges("tested_by", &tested_by_pairs)?;
1191 self.import_edges("imports", &imports_pairs)?;
1192 self.import_edges("reads_rel", &reads_pairs)?;
1193 self.import_edges("writes_rel", &writes_pairs)?;
1194
1195 for (edge_name, pairs) in &custom_pairs {
1196 let lower = edge_name.to_lowercase();
1197 let ddl = format!(":create {lower} {{source: String, target: String}}");
1198 let _ = self.create_custom_edge(&ddl);
1199 self.import_edges(&lower, pairs)?;
1200 }
1201
1202 if !extraction.statements.is_empty() {
1204 let stmt_rows: Vec<_> = extraction
1205 .statements
1206 .iter()
1207 .map(|s| {
1208 (
1209 s.id.clone(),
1210 s.kind.as_str().to_string(),
1211 s.condition.clone(),
1212 s.start_line as i64,
1213 s.end_line as i64,
1214 s.depth as i64,
1215 s.parent_symbol.clone(),
1216 )
1217 })
1218 .collect();
1219 self.import_statements(&stmt_rows)?;
1220
1221 let has_stmt: Vec<_> = extraction
1222 .statements
1223 .iter()
1224 .map(|s| (s.parent_symbol.clone(), s.id.clone()))
1225 .collect();
1226 self.import_edges("has_statement", &has_stmt)?;
1227 }
1228
1229 Ok(())
1230 }
1231
1232 pub fn refresh_materialized(&self) -> Result<()> {
1233 self.refresh_meta()?;
1234 self.refresh_testable()
1235 }
1236
1237 fn invalidate_caches(&self) -> Result<()> {
1238 let _ = self.run_params(
1239 "?[key, val] <- []\n:replace meta_cache {key: String => val: Int}",
1240 empty_params(),
1241 true,
1242 );
1243 let _ = self.run_params(
1244 "?[id] <- []\n:replace testable_cache {id: String}",
1245 empty_params(),
1246 true,
1247 );
1248 Ok(())
1249 }
1250
1251 fn batch_callers(&self, ids: &[&str]) -> Result<HashMap<String, Vec<String>>> {
1252 let vals: Vec<String> = ids
1253 .iter()
1254 .map(|id| format!("[\"{}\"]", id.replace('"', "\\\"")))
1255 .collect();
1256 let script = format!(
1257 "targets[id] <- [{}]\n?[callee, caller] := targets[callee], *calls{{caller, callee}}",
1258 vals.join(", ")
1259 );
1260 let r = self.run(&script)?;
1261 let mut map: HashMap<String, Vec<String>> = HashMap::new();
1262 for row in &r.rows {
1263 map.entry(dv_str(&row[0]))
1264 .or_default()
1265 .push(dv_str(&row[1]));
1266 }
1267 Ok(map)
1268 }
1269
1270 fn batch_callees(&self, ids: &[&str]) -> Result<HashMap<String, Vec<String>>> {
1271 let vals: Vec<String> = ids
1272 .iter()
1273 .map(|id| format!("[\"{}\"]", id.replace('"', "\\\"")))
1274 .collect();
1275 let script = format!(
1276 "targets[id] <- [{}]\n?[caller, callee] := targets[caller], *calls{{caller, callee}}",
1277 vals.join(", ")
1278 );
1279 let r = self.run(&script)?;
1280 let mut map: HashMap<String, Vec<String>> = HashMap::new();
1281 for row in &r.rows {
1282 map.entry(dv_str(&row[0]))
1283 .or_default()
1284 .push(dv_str(&row[1]));
1285 }
1286 Ok(map)
1287 }
1288
1289 fn batch_branches(&self, ids: &[&str]) -> Result<HashMap<String, Vec<BranchInfo>>> {
1290 let vals: Vec<String> = ids
1291 .iter()
1292 .map(|id| format!("[\"{}\"]", id.replace('"', "\\\"")))
1293 .collect();
1294 let script = format!(
1295 "targets[id] <- [{}]\n?[sym, stmt_kind, condition, start_line, depth] := targets[sym], *has_statement{{symbol_id: sym, statement_id: sid}}, *statement{{id: sid, kind: stmt_kind, condition, start_line, depth}}",
1296 vals.join(", ")
1297 );
1298 let r = self.run(&script)?;
1299 let mut map: HashMap<String, Vec<BranchInfo>> = HashMap::new();
1300 for row in &r.rows {
1301 map.entry(dv_str(&row[0])).or_default().push(BranchInfo {
1302 kind: dv_str(&row[1]),
1303 condition: dv_str(&row[2]),
1304 line: dv_u32(&row[3]),
1305 depth: dv_u32(&row[4]),
1306 });
1307 }
1308 Ok(map)
1309 }
1310
1311 fn refresh_meta(&self) -> Result<()> {
1312 let counts: &[(&str, &str)] = &[
1313 ("symbols", "?[count(id)] := *symbol{id}"),
1314 ("modules", "?[count(id)] := *module{id}"),
1315 ("files", "?[count(id)] := *file{id}"),
1316 ("folders", "?[count(id)] := *folder{id}"),
1317 ("calls", "?[count(caller)] := *calls{caller}"),
1318 ("inherits", "?[count(child)] := *inherits{child}"),
1319 ("contains", "?[count(module_id)] := *contains{module_id}"),
1320 ];
1321 let mut rows = Vec::new();
1322 for (key, query) in counts {
1323 let r = self.run(query)?;
1324 let val = dv_u64(&r) as i64;
1325 rows.push(format!("[\"{key}\", {val}]"));
1326 }
1327 let script = format!(
1328 "?[key, val] <- [{}]\n:replace meta_cache {{key: String => val: Int}}",
1329 rows.join(", ")
1330 );
1331 self.run_params(&script, empty_params(), true)?;
1332 Ok(())
1333 }
1334
1335 fn refresh_testable(&self) -> Result<()> {
1336 self.run_params(
1337 r#"?[id] := *symbol{id, kind: "Function"}
1338 ?[id] := *symbol{id, kind: "Method"}
1339 ?[id] := *symbol{id, kind: "Class"}
1340 ?[id] := *symbol{id, kind: "Struct"}
1341 ?[id] := *symbol{id, kind: "Trait"}
1342 ?[id] := *symbol{id, kind: "Interface"}
1343 :replace testable_cache {id: String}"#,
1344 empty_params(),
1345 true,
1346 )?;
1347 Ok(())
1348 }
1349
1350 pub fn create_custom_edge(&self, ddl: &str) -> Result<()> {
1351 match self
1352 .db
1353 .run_script(ddl, empty_params(), ScriptMutability::Mutable)
1354 {
1355 Ok(_) => Ok(()),
1356 Err(e) => {
1357 let msg = format!("{e}");
1358 if msg.contains("already exists") || msg.contains("conflicts") {
1359 Ok(())
1360 } else {
1361 Err(anyhow::anyhow!("create custom edge: {e}"))
1362 }
1363 }
1364 }
1365 }
1366
1367 pub fn stats(&self) -> Result<GraphStats> {
1368 let r = self.run(r#"?[key, val] := *meta_cache{key, val}"#);
1369 if let Ok(r) = &r {
1370 if !r.rows.is_empty() {
1371 let m: HashMap<String, u64> = r
1372 .rows
1373 .iter()
1374 .map(|row| (dv_str(&row[0]), dv_u64_val(&row[1])))
1375 .collect();
1376 return Ok(GraphStats {
1377 symbols: *m.get("symbols").unwrap_or(&0),
1378 modules: *m.get("modules").unwrap_or(&0),
1379 files: *m.get("files").unwrap_or(&0),
1380 folders: *m.get("folders").unwrap_or(&0),
1381 calls: *m.get("calls").unwrap_or(&0),
1382 inherits: *m.get("inherits").unwrap_or(&0),
1383 contains: *m.get("contains").unwrap_or(&0),
1384 });
1385 }
1386 }
1387 self.refresh_meta()?;
1389 self.stats()
1390 }
1391
1392 pub fn get_file_hashes(&self) -> Result<HashMap<String, String>> {
1395 let r = self.run(r#"?[file, content_hash] := *module{file, content_hash}"#)?;
1396 let mut map = HashMap::new();
1397 for row in &r.rows {
1398 map.insert(dv_str(&row[0]), dv_str(&row[1]));
1399 }
1400 Ok(map)
1401 }
1402
1403 pub fn get_all_symbols(&self) -> Result<Vec<(String, String, String, String)>> {
1404 let r = self.run(r#"?[name, id, file, kind] := *symbol{id, name, file, kind}"#)?;
1405 Ok(r.rows
1406 .iter()
1407 .map(|row| {
1408 (
1409 dv_str(&row[0]),
1410 dv_str(&row[1]),
1411 dv_str(&row[2]),
1412 dv_str(&row[3]),
1413 )
1414 })
1415 .collect())
1416 }
1417
1418 pub fn remove_file(&self, file: &str) -> Result<()> {
1419 self.delete_file_data(file)
1420 }
1421
1422 pub fn upsert_all_bulk(&self, extractions: &[FileExtraction]) -> Result<()> {
1423 for e in extractions {
1424 self.upsert_file_batch(e)?;
1425 }
1426 self.invalidate_caches()
1427 }
1428
1429 pub fn derive_tested_by_edges(&self) -> Result<usize> {
1430 let _ = self.run_params(
1431 r#"?[symbol_id, test_id] := *tested_by{symbol_id, test_id}
1432 :rm tested_by {symbol_id, test_id}"#,
1433 empty_params(),
1434 true,
1435 );
1436 self.run_params(
1437 r#"?[symbol_id, test_id] := *calls{caller: test_id, callee: symbol_id},
1438 *symbol{id: test_id, kind: "Test"},
1439 *symbol{id: symbol_id, kind},
1440 kind != "Test"
1441 :put tested_by {symbol_id, test_id}"#,
1442 empty_params(),
1443 true,
1444 )?;
1445 let r = self.run(r#"?[count(symbol_id)] := *tested_by{symbol_id}"#)?;
1446 Ok(dv_u64(&r) as usize)
1447 }
1448
1449 pub fn cross_cutting_for(&self, symbol_id: &str) -> Result<Vec<(String, String)>> {
1450 let mut params = empty_params();
1451 params.insert("sym".into(), DataValue::Str(symbol_id.into()));
1452 let r = self.run_params(
1453 r#"?[kind, detail] := *has_concern{symbol_id: $sym, concern_id: cid}, *concern{id: cid, kind, detail}"#,
1454 params, false,
1455 )?;
1456 Ok(r.rows
1457 .iter()
1458 .map(|row| (dv_str(&row[0]), dv_str(&row[1])))
1459 .collect())
1460 }
1461
1462 pub fn upsert_folders_bulk(&self, file_paths: &[&str]) -> Result<()> {
1463 let mut all_folders: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
1464 for file_path in file_paths {
1465 let parts: Vec<&str> = file_path.rsplitn(2, '/').collect();
1466 if parts.len() < 2 {
1467 continue;
1468 }
1469 let dir_path = parts[1];
1470 let segments: Vec<&str> = dir_path.split('/').collect();
1471 for i in 0..segments.len() {
1472 all_folders.insert(segments[..=i].join("/"));
1473 }
1474 }
1475 if all_folders.is_empty() {
1476 return Ok(());
1477 }
1478
1479 let folder_rows: Vec<(String, String, String)> = all_folders
1480 .iter()
1481 .map(|fp| {
1482 let name = fp.rsplit_once('/').map(|(_, n)| n).unwrap_or(fp.as_str());
1483 (fp.clone(), name.to_string(), fp.clone())
1484 })
1485 .collect();
1486 self.import_folders(&folder_rows)?;
1487
1488 let cf_pairs: Vec<(String, String)> = all_folders
1489 .iter()
1490 .filter_map(|child| {
1491 child
1492 .rsplit_once('/')
1493 .map(|(p, _)| p)
1494 .and_then(|parent_path| {
1495 if all_folders.contains(parent_path) {
1496 Some((parent_path.to_string(), child.clone()))
1497 } else {
1498 None
1499 }
1500 })
1501 })
1502 .collect();
1503 self.import_edges("contains_folder", &cf_pairs)?;
1504
1505 let cfile_pairs: Vec<(String, String)> = file_paths
1506 .iter()
1507 .filter_map(|fp| {
1508 let parts: Vec<&str> = fp.rsplitn(2, '/').collect();
1509 if parts.len() < 2 {
1510 return None;
1511 }
1512 Some((parts[1].to_string(), fp.to_string()))
1513 })
1514 .collect();
1515 self.import_edges("contains_file", &cfile_pairs)?;
1516
1517 Ok(())
1518 }
1519
1520 pub fn import_concerns(&self, rows: &[(String, String, String)]) -> Result<()> {
1521 if rows.is_empty() {
1522 return Ok(());
1523 }
1524 let headers = vec!["id".into(), "kind".into(), "detail".into()];
1525 let data_rows: Vec<Vec<DataValue>> = rows
1526 .iter()
1527 .map(|r| {
1528 vec![
1529 DataValue::Str(r.0.clone().into()),
1530 DataValue::Str(r.1.clone().into()),
1531 DataValue::Str(r.2.clone().into()),
1532 ]
1533 })
1534 .collect();
1535 let named = NamedRows::new(headers, data_rows);
1536 let mut map = BTreeMap::new();
1537 map.insert("concern".to_string(), named);
1538 self.db
1539 .import_relations(map)
1540 .map_err(|e| anyhow::anyhow!("import concerns: {e}"))
1541 }
1542
1543 pub fn import_config_bindings(
1544 &self,
1545 rows: &[(String, String, String, String, String, String)],
1546 ) -> Result<()> {
1547 if rows.is_empty() {
1548 return Ok(());
1549 }
1550 let headers = vec![
1551 "id".into(),
1552 "kind".into(),
1553 "key".into(),
1554 "value".into(),
1555 "profile".into(),
1556 "source_file".into(),
1557 ];
1558 let data_rows: Vec<Vec<DataValue>> = rows
1559 .iter()
1560 .map(|r| {
1561 vec![
1562 DataValue::Str(r.0.clone().into()),
1563 DataValue::Str(r.1.clone().into()),
1564 DataValue::Str(r.2.clone().into()),
1565 DataValue::Str(r.3.clone().into()),
1566 DataValue::Str(r.4.clone().into()),
1567 DataValue::Str(r.5.clone().into()),
1568 ]
1569 })
1570 .collect();
1571 let named = NamedRows::new(headers, data_rows);
1572 let mut map = BTreeMap::new();
1573 map.insert("config_binding".to_string(), named);
1574 self.db
1575 .import_relations(map)
1576 .map_err(|e| anyhow::anyhow!("import config_bindings: {e}"))
1577 }
1578
1579 pub fn import_taint_flows(
1580 &self,
1581 rows: &[(String, String, String, String, String)],
1582 ) -> Result<()> {
1583 if rows.is_empty() {
1584 return Ok(());
1585 }
1586 let headers = vec![
1587 "source".into(),
1588 "target".into(),
1589 "source_kind".into(),
1590 "sink_kind".into(),
1591 "path".into(),
1592 ];
1593 let data_rows: Vec<Vec<DataValue>> = rows
1594 .iter()
1595 .map(|r| {
1596 vec![
1597 DataValue::Str(r.0.clone().into()),
1598 DataValue::Str(r.1.clone().into()),
1599 DataValue::Str(r.2.clone().into()),
1600 DataValue::Str(r.3.clone().into()),
1601 DataValue::Str(r.4.clone().into()),
1602 ]
1603 })
1604 .collect();
1605 let named = NamedRows::new(headers, data_rows);
1606 let mut map = BTreeMap::new();
1607 map.insert("taint_flow".to_string(), named);
1608 self.db
1609 .import_relations(map)
1610 .map_err(|e| anyhow::anyhow!("import taint_flows: {e}"))
1611 }
1612
1613 pub fn import_resolves_to(&self, rows: &[(String, String, String, String)]) -> Result<()> {
1614 if rows.is_empty() {
1615 return Ok(());
1616 }
1617 let headers = vec![
1618 "source".into(),
1619 "target".into(),
1620 "mechanism".into(),
1621 "config_source".into(),
1622 ];
1623 let data_rows: Vec<Vec<DataValue>> = rows
1624 .iter()
1625 .map(|r| {
1626 vec![
1627 DataValue::Str(r.0.clone().into()),
1628 DataValue::Str(r.1.clone().into()),
1629 DataValue::Str(r.2.clone().into()),
1630 DataValue::Str(r.3.clone().into()),
1631 ]
1632 })
1633 .collect();
1634 let named = NamedRows::new(headers, data_rows);
1635 let mut map = BTreeMap::new();
1636 map.insert("resolves_to".to_string(), named);
1637 self.db
1638 .import_relations(map)
1639 .map_err(|e| anyhow::anyhow!("import resolves_to: {e}"))
1640 }
1641
1642 pub fn relation_counts(&self) -> Result<BTreeMap<String, u64>> {
1643 let relations = [
1644 ("symbol", "id"),
1645 ("module", "id"),
1646 ("cluster", "id"),
1647 ("file", "id"),
1648 ("folder", "id"),
1649 ("dependency", "id"),
1650 ("statement", "id"),
1651 ("calls", "caller"),
1652 ("depends_on", "module_id"),
1653 ("imports", "importer"),
1654 ("contains", "module_id"),
1655 ("inherits", "child"),
1656 ("tested_by", "symbol_id"),
1657 ("reads_rel", "reader"),
1658 ("writes_rel", "writer"),
1659 ("member_of", "symbol_id"),
1660 ("similar_to", "symbol_a"),
1661 ("bridge_to", "source"),
1662 ("contains_file", "folder_id"),
1663 ("contains_folder", "parent_id"),
1664 ("defines", "file_id"),
1665 ("calls_service", "caller"),
1666 ("has_statement", "symbol_id"),
1667 ("concern", "id"),
1668 ("has_concern", "symbol_id"),
1669 ("config_binding", "id"),
1670 ("has_config", "symbol_id"),
1671 ("resolves_to", "source"),
1672 ("taint_flow", "source"),
1673 ];
1674 let mut counts = BTreeMap::new();
1675 for (rel, col) in &relations {
1676 let q = format!("?[count({col})] := *{rel}{{{col}}}");
1677 let r = self.run(&q)?;
1678 counts.insert(rel.to_string(), dv_u64(&r));
1679 }
1680 Ok(counts)
1681 }
1682}
1683
1684pub fn cozo_schema_ddl() -> Vec<&'static str> {
1687 COZO_SCHEMA.to_vec()
1688}
1689
1690const COZO_SCHEMA: &[&str] = &[
1691 ":create symbol {id: String => name: String, kind: String, file: String, start_line: Int, end_line: Int, signature_hash: String default \"\", language: String default \"\", visibility: String default \"\", parent: String default \"\", docstring: String default \"\", complexity: Int default 1, parameters: String default \"\", return_type: String default \"\"}",
1692 ":create module {id: String => name: String, file: String, language: String, content_hash: String default \"\", summary: String default \"\"}",
1693 ":create cluster {id: String => name: String, description: String default \"\"}",
1694 ":create file {id: String => name: String, path: String, language: String, symbol_count: Int default 0}",
1695 ":create folder {id: String => name: String, path: String}",
1696 ":create dependency {id: String => name: String, version: String default \"\", ecosystem: String default \"\", is_dev: Bool default false}",
1697 ":create statement {id: String => kind: String, condition: String default \"\", start_line: Int default 0, end_line: Int default 0, depth: Int default 0, parent_symbol: String default \"\"}",
1698 ":create calls {caller: String, callee: String, line: Int default 0}",
1699 ":create depends_on {module_id: String, dep_id: String, is_dev: Bool default false}",
1700 ":create imports {importer: String, imported: String}",
1701 ":create contains {module_id: String, symbol_id: String}",
1702 ":create inherits {child: String, parent: String}",
1703 ":create tested_by {symbol_id: String, test_id: String}",
1704 ":create reads_rel {reader: String, target: String}",
1705 ":create writes_rel {writer: String, target: String}",
1706 ":create member_of {symbol_id: String, cluster_id: String}",
1707 ":create similar_to {symbol_a: String, symbol_b: String, score: Float default 0.0}",
1708 ":create bridge_to {source: String, target: String, bridge_kind: String default \"\", detail: String default \"\"}",
1709 ":create contains_file {folder_id: String, file_id: String}",
1710 ":create contains_folder {parent_id: String, child_id: String}",
1711 ":create defines {file_id: String, symbol_id: String}",
1712 ":create calls_service {caller: String, target: String, method: String default \"\", path: String default \"\", target_service: String default \"\"}",
1713 ":create has_statement {symbol_id: String, statement_id: String}",
1714 ":create concern {id: String => kind: String, detail: String default \"\"}",
1715 ":create has_concern {symbol_id: String, concern_id: String}",
1716 ":create config_binding {id: String => kind: String, key: String, value: String default \"\", profile: String default \"\", source_file: String default \"\"}",
1717 ":create has_config {symbol_id: String, config_id: String}",
1718 ":create resolves_to {source: String, target: String, mechanism: String default \"\", config_source: String default \"\"}",
1719 ":create taint_flow {source: String, target: String, source_kind: String default \"\", sink_kind: String default \"\", path: String default \"\"}",
1720 ":create meta_cache {key: String => val: Int}",
1722 ":create testable_cache {id: String}",
1723];
1724
1725const COZO_INDICES: &[&str] = &[
1726 "::index create calls:calls_by_callee {callee}",
1728 "::index create inherits:inherits_by_parent {parent}",
1729 "::index create tested_by:tested_by_test {test_id}",
1730 "::index create defines:defines_by_symbol {symbol_id}",
1731 "::index create contains:contains_by_symbol {symbol_id}",
1732 "::index create imports:imports_by_imported {imported}",
1733 "::index create has_statement:has_stmt_by_stmt {statement_id}",
1734 "::index create reads_rel:reads_by_target {target}",
1735 "::index create writes_rel:writes_by_target {target}",
1736 "::index create similar_to:similar_by_b {symbol_b}",
1737 "::index create bridge_to:bridge_by_target {target}",
1738 "::index create contains_file:contains_file_by_file {file_id}",
1739 "::index create contains_folder:contains_folder_by_child {child_id}",
1740 "::index create calls_service:calls_svc_by_target {target}",
1741 "::index create member_of:member_by_cluster {cluster_id}",
1742 "::index create has_concern:has_concern_by_concern {concern_id}",
1743 "::index create has_config:has_config_by_config {config_id}",
1744 "::index create resolves_to:resolves_to_by_target {target}",
1745 "::index create taint_flow:taint_flow_by_target {target}",
1746 "::index create symbol:symbol_by_file {file}",
1748 "::index create symbol:symbol_by_kind {kind}",
1749 "::index create symbol:symbol_by_visibility {visibility}",
1750];
1751
1752fn edge_columns(relation: &str) -> (&'static str, &'static str) {
1753 match relation {
1754 "calls" => ("caller", "callee"),
1755 "inherits" => ("child", "parent"),
1756 "tested_by" => ("symbol_id", "test_id"),
1757 "contains" => ("module_id", "symbol_id"),
1758 "defines" => ("file_id", "symbol_id"),
1759 "imports" => ("importer", "imported"),
1760 "has_statement" => ("symbol_id", "statement_id"),
1761 "reads_rel" => ("reader", "target"),
1762 "writes_rel" => ("writer", "target"),
1763 "has_concern" => ("symbol_id", "concern_id"),
1764 "has_config" => ("symbol_id", "config_id"),
1765 "resolves_to" => ("source", "target"),
1766 "taint_flow" => ("source", "target"),
1767 _ => ("source", "target"),
1768 }
1769}
1770
1771fn dv_str(v: &DataValue) -> String {
1774 match v {
1775 DataValue::Str(s) => s.to_string(),
1776 DataValue::Null => String::new(),
1777 other => format!("{other:?}"),
1778 }
1779}
1780
1781fn dv_u32(v: &DataValue) -> u32 {
1782 match v {
1783 DataValue::Num(Num::Int(i)) => *i as u32,
1784 DataValue::Num(Num::Float(f)) => *f as u32,
1785 _ => 0,
1786 }
1787}
1788
1789fn dv_u64(r: &NamedRows) -> u64 {
1790 r.rows
1791 .first()
1792 .and_then(|row| row.first())
1793 .map(dv_u64_val)
1794 .unwrap_or(0)
1795}
1796
1797fn dv_u64_val(v: &DataValue) -> u64 {
1798 match v {
1799 DataValue::Num(Num::Int(i)) => *i as u64,
1800 DataValue::Num(Num::Float(f)) => *f as u64,
1801 _ => 0,
1802 }
1803}
1804
1805fn is_testable_kind(kind: &str) -> bool {
1806 matches!(
1807 kind,
1808 "Function" | "Method" | "Class" | "Struct" | "Trait" | "Interface"
1809 )
1810}
1811
1812fn collect_strings(r: &NamedRows) -> Vec<String> {
1813 r.rows
1814 .iter()
1815 .filter_map(|row| row.first().map(dv_str))
1816 .collect()
1817}
1818
1819fn named_rows_to_symbol_rows(r: &NamedRows) -> Vec<SymbolRow> {
1820 r.rows
1821 .iter()
1822 .map(|row| SymbolRow {
1823 id: dv_str(&row[0]),
1824 name: dv_str(&row[1]),
1825 kind: dv_str(&row[2]),
1826 start_line: dv_u32(&row[3]),
1827 end_line: dv_u32(&row[4]),
1828 })
1829 .collect()
1830}
1831
1832fn row_to_symbol_detail(row: &[DataValue]) -> SymbolDetail {
1833 SymbolDetail {
1834 id: dv_str(&row[0]),
1835 name: dv_str(&row[1]),
1836 kind: dv_str(&row[2]),
1837 file: dv_str(&row[3]),
1838 start_line: dv_u32(&row[4]),
1839 end_line: dv_u32(&row[5]),
1840 }
1841}