1use std::collections::{BTreeMap, HashMap};
2use std::fmt;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use gobby_core::degradation::ServiceState;
6use gobby_core::falkor::{GraphClient, Row};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10use crate::config::Context;
11use crate::graph::typed_query;
12use crate::models::{ProjectionMetadata, ProjectionProvenance};
13
14const RELATES_TO_CODE: &str = "RELATES_TO_CODE";
15const DEFAULT_TOP_LIMIT: usize = 10;
16
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub struct BridgeEdgeHypothesis {
19 pub source_id: String,
20 pub target_symbol_id: String,
21 pub relation: String,
22 pub label: String,
23 pub read_only: bool,
24 pub metadata: ProjectionMetadata,
25}
26
27impl BridgeEdgeHypothesis {
28 pub fn new(
29 source_id: impl Into<String>,
30 target_symbol_id: impl Into<String>,
31 relation: impl Into<String>,
32 metadata: ProjectionMetadata,
33 ) -> Self {
34 Self {
35 source_id: source_id.into(),
36 target_symbol_id: target_symbol_id.into(),
37 relation: relation.into(),
38 label: "inferred hypothesis".to_string(),
39 read_only: true,
40 metadata: inferred_bridge_metadata(metadata),
41 }
42 }
43
44 pub fn inferred(
45 source_id: impl Into<String>,
46 target_symbol_id: impl Into<String>,
47 relation: impl Into<String>,
48 source_system: impl Into<String>,
49 confidence: Option<f64>,
50 ) -> Self {
51 Self::new(
52 source_id,
53 target_symbol_id,
54 relation,
55 ProjectionMetadata::inferred(source_system, confidence),
56 )
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub struct ProjectGraphReport {
62 pub project_id: String,
63 pub generated_at: String,
64 pub summary: GraphReportSummary,
65 pub hotspots: GraphReportHotspots,
66 pub unresolved_targets: Vec<TargetFrequency>,
67 pub external_targets: Vec<TargetFrequency>,
68 #[serde(skip_serializing_if = "Option::is_none")]
69 pub bridge_summary: Option<BridgeReportSummary>,
70 #[serde(default, skip_serializing_if = "Vec::is_empty")]
71 pub bridge_edges: Vec<BridgeEdgeHypothesis>,
72 #[serde(default, skip_serializing_if = "Vec::is_empty")]
73 pub degradation_details: Vec<ReportDegradation>,
74 pub suggested_investigation_questions: Vec<String>,
75 pub markdown: String,
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79pub struct ProjectGraphReportOptions {
80 pub top_n: usize,
81}
82
83impl Default for ProjectGraphReportOptions {
84 fn default() -> Self {
85 Self {
86 top_n: DEFAULT_TOP_LIMIT,
87 }
88 }
89}
90
91impl ProjectGraphReportOptions {
92 fn normalized(self) -> Self {
93 Self {
94 top_n: self.top_n.max(1),
95 }
96 }
97}
98
99#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
100pub struct GraphReportSummary {
101 pub node_count: usize,
102 pub edge_count: usize,
103 pub node_counts_by_type: BTreeMap<String, usize>,
104 pub code_edge_counts: BTreeMap<String, usize>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
108pub struct GraphReportHotspots {
109 pub high_degree_files: Vec<GraphHotspot>,
110 pub high_degree_symbols: Vec<GraphHotspot>,
111 pub high_degree_modules: Vec<GraphHotspot>,
112 pub incoming_call_hotspots: Vec<GraphHotspot>,
113}
114
115#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
116pub struct GraphHotspot {
117 pub id: String,
118 pub name: String,
119 #[serde(rename = "type")]
120 pub node_type: String,
121 pub degree: usize,
122 pub incoming: usize,
123 pub outgoing: usize,
124 #[serde(skip_serializing_if = "Option::is_none")]
125 pub file_path: Option<String>,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
129pub struct TargetFrequency {
130 pub id: String,
131 pub name: String,
132 pub count: usize,
133}
134
135#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
136pub struct BridgeReportSummary {
137 pub relation: String,
138 pub edge_count: usize,
139 pub inferred: bool,
140 pub read_only: bool,
141 pub source_system_counts: Vec<NamedCount>,
142 #[serde(skip_serializing_if = "Option::is_none")]
143 pub confidence_range: Option<ConfidenceRange>,
144}
145
146#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
147pub struct NamedCount {
148 pub name: String,
149 pub count: usize,
150}
151
152#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
153pub struct ConfidenceRange {
154 pub min: f64,
155 pub max: f64,
156}
157
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub struct ReportDegradation {
160 pub input: String,
161 pub required: bool,
162 pub detail: String,
163}
164
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum ProjectGraphReportError {
167 GraphServiceNotConfigured,
168 GraphServiceUnreachable { message: String },
169 GraphQueryFailed { message: String },
170}
171
172impl fmt::Display for ProjectGraphReportError {
173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174 match self {
175 Self::GraphServiceNotConfigured => {
176 f.write_str("FalkorDB is not configured; project graph report requires FalkorDB")
177 }
178 Self::GraphServiceUnreachable { message } => write!(
179 f,
180 "FalkorDB is unreachable; project graph report requires FalkorDB: {message}"
181 ),
182 Self::GraphQueryFailed { message } => {
183 write!(f, "project graph report query failed: {message}")
184 }
185 }
186 }
187}
188
189impl std::error::Error for ProjectGraphReportError {}
190
191#[derive(Debug, Clone, Default, PartialEq)]
192struct ReportGraphSnapshot {
193 nodes: Vec<ReportNode>,
194 code_edges: Vec<ReportCodeEdge>,
195 summary: Option<GraphReportSummary>,
196 hotspots: Option<GraphReportHotspots>,
197 unresolved_targets: Option<Vec<TargetFrequency>>,
198 external_targets: Option<Vec<TargetFrequency>>,
199 bridge_edges: BridgeEdgeInput,
200}
201
202#[derive(Debug, Clone, PartialEq, Eq)]
203struct ReportNode {
204 id: String,
205 name: String,
206 node_type: String,
207 file_path: Option<String>,
208}
209
210impl ReportNode {
211 #[cfg(test)]
212 fn new(id: impl Into<String>, name: impl Into<String>, node_type: impl Into<String>) -> Self {
213 Self {
214 id: id.into(),
215 name: name.into(),
216 node_type: node_type.into(),
217 file_path: None,
218 }
219 }
220
221 #[cfg(test)]
222 fn with_file_path(mut self, file_path: impl Into<String>) -> Self {
223 self.file_path = Some(file_path.into());
224 self
225 }
226}
227
228#[derive(Debug, Clone, PartialEq, Eq)]
229struct ReportCodeEdge {
230 source: String,
231 target: String,
232 edge_type: String,
233}
234
235impl ReportCodeEdge {
236 #[cfg(test)]
237 fn new(
238 source: impl Into<String>,
239 target: impl Into<String>,
240 edge_type: impl Into<String>,
241 ) -> Self {
242 Self {
243 source: source.into(),
244 target: target.into(),
245 edge_type: edge_type.into(),
246 }
247 }
248}
249
250#[derive(Debug, Clone, PartialEq)]
251enum BridgeEdgeInput {
252 Available(Vec<BridgeEdgeHypothesis>),
253 Unavailable(String),
254}
255
256impl BridgeEdgeInput {
257 fn available(edges: Vec<BridgeEdgeHypothesis>) -> Self {
258 Self::Available(edges)
259 }
260
261 fn unavailable(reason: impl Into<String>) -> Self {
262 Self::Unavailable(reason.into())
263 }
264}
265
266impl Default for BridgeEdgeInput {
267 fn default() -> Self {
268 Self::Available(vec![])
269 }
270}
271
272#[derive(Debug, Clone, Copy, Default)]
273struct DegreeStats {
274 incoming: usize,
275 outgoing: usize,
276}
277
278pub fn generate_report(ctx: &Context) -> Result<ProjectGraphReport, ProjectGraphReportError> {
279 generate_report_with_options(ctx, ProjectGraphReportOptions::default())
280}
281
282pub fn generate_report_with_options(
283 ctx: &Context,
284 options: ProjectGraphReportOptions,
285) -> Result<ProjectGraphReport, ProjectGraphReportError> {
286 let Some(config) = ctx.falkordb.as_ref() else {
287 return Err(ProjectGraphReportError::GraphServiceNotConfigured);
288 };
289
290 let connection_config = config.connection_config();
291 let options = options.normalized();
292 let result = gobby_core::falkor::with_graph(
293 Some(&connection_config),
294 &config.graph_name,
295 ReportGraphSnapshot::default(),
296 |client| load_report_snapshot(client, &ctx.project_id, options.top_n),
297 );
298
299 match result {
300 Ok((snapshot, ServiceState::Available)) => Ok(generate_report_from_snapshot_with_options(
301 &ctx.project_id,
302 now_iso8601(),
303 snapshot,
304 options,
305 )),
306 Ok((_, ServiceState::NotConfigured)) => {
307 Err(ProjectGraphReportError::GraphServiceNotConfigured)
308 }
309 Ok((_, ServiceState::Unreachable { message })) => {
310 Err(ProjectGraphReportError::GraphServiceUnreachable { message })
311 }
312 Err(error) => Err(ProjectGraphReportError::GraphQueryFailed {
313 message: error.to_string(),
314 }),
315 }
316}
317
318pub fn empty_report(project_id: impl Into<String>) -> ProjectGraphReport {
319 generate_report_from_snapshot(project_id, now_iso8601(), ReportGraphSnapshot::default())
320}
321
322fn generate_report_from_snapshot(
323 project_id: impl Into<String>,
324 generated_at: impl Into<String>,
325 snapshot: ReportGraphSnapshot,
326) -> ProjectGraphReport {
327 generate_report_from_snapshot_with_options(
328 project_id,
329 generated_at,
330 snapshot,
331 ProjectGraphReportOptions::default(),
332 )
333}
334
335fn generate_report_from_snapshot_with_options(
336 project_id: impl Into<String>,
337 generated_at: impl Into<String>,
338 snapshot: ReportGraphSnapshot,
339 options: ProjectGraphReportOptions,
340) -> ProjectGraphReport {
341 let options = options.normalized();
342 let project_id = project_id.into();
343 let generated_at = generated_at.into();
344 let node_by_id = snapshot
345 .nodes
346 .iter()
347 .map(|node| (node.id.as_str(), node))
348 .collect::<HashMap<_, _>>();
349
350 let summary = snapshot
351 .summary
352 .clone()
353 .unwrap_or_else(|| summarize_graph(&snapshot.nodes, &snapshot.code_edges));
354 let hotspots = snapshot.hotspots.clone().unwrap_or_else(|| {
355 summarize_hotspots(&snapshot.nodes, &snapshot.code_edges, options.top_n)
356 });
357 let unresolved_targets = snapshot.unresolved_targets.clone().unwrap_or_else(|| {
358 target_frequencies(
359 &snapshot.code_edges,
360 &node_by_id,
361 "unresolved",
362 options.top_n,
363 )
364 });
365 let external_targets = snapshot.external_targets.clone().unwrap_or_else(|| {
366 target_frequencies(&snapshot.code_edges, &node_by_id, "external", options.top_n)
367 });
368
369 let (bridge_edges, mut degradation_details) = match snapshot.bridge_edges {
370 BridgeEdgeInput::Available(edges) => (normalize_bridge_edges(edges), vec![]),
371 BridgeEdgeInput::Unavailable(reason) => (
372 vec![],
373 vec![ReportDegradation {
374 input: RELATES_TO_CODE.to_string(),
375 required: false,
376 detail: reason,
377 }],
378 ),
379 };
380 let bridge_summary = summarize_bridge_edges(&bridge_edges);
381 let suggested_investigation_questions = suggested_questions(
382 &hotspots,
383 &unresolved_targets,
384 &external_targets,
385 bridge_summary.as_ref(),
386 °radation_details,
387 );
388 let markdown = render_markdown(RenderMarkdownInput {
389 project_id: &project_id,
390 generated_at: &generated_at,
391 summary: &summary,
392 hotspots: &hotspots,
393 unresolved_targets: &unresolved_targets,
394 external_targets: &external_targets,
395 bridge_summary: bridge_summary.as_ref(),
396 degradation_details: °radation_details,
397 top_n: options.top_n,
398 });
399
400 degradation_details.sort_by(|left, right| left.input.cmp(&right.input));
401
402 ProjectGraphReport {
403 project_id,
404 generated_at,
405 summary,
406 hotspots,
407 unresolved_targets,
408 external_targets,
409 bridge_summary,
410 bridge_edges,
411 degradation_details,
412 suggested_investigation_questions,
413 markdown,
414 }
415}
416
417fn load_report_snapshot(
418 client: &mut GraphClient,
419 project_id: &str,
420 top_n: usize,
421) -> anyhow::Result<ReportGraphSnapshot> {
422 let (query, params) = report_node_counts_query(project_id);
423 let node_counts_by_type = rows_to_named_counts(client.query(&query, Some(params))?);
424 let node_count = node_counts_by_type.values().sum();
425
426 let (query, params) = report_code_edge_counts_query(project_id);
427 let code_edge_counts = rows_to_named_counts(client.query(&query, Some(params))?);
428 let edge_count = code_edge_counts.values().sum();
429
430 let summary = GraphReportSummary {
431 node_count,
432 edge_count,
433 node_counts_by_type,
434 code_edge_counts,
435 };
436
437 let hotspots = GraphReportHotspots {
438 high_degree_files: load_hotspots(client, project_id, "file", top_n)?,
439 high_degree_symbols: load_hotspots(client, project_id, "symbol", top_n)?,
440 high_degree_modules: load_hotspots(client, project_id, "module", top_n)?,
441 incoming_call_hotspots: load_incoming_call_hotspots(client, project_id, top_n)?,
442 };
443
444 let unresolved_targets = load_target_frequencies(client, project_id, "unresolved", top_n)?;
445 let external_targets = load_target_frequencies(client, project_id, "external", top_n)?;
446
447 let (query, params) = report_bridge_edges_query(project_id);
448 let bridge_edges = match client.query(&query, Some(params)) {
449 Ok(rows) => BridgeEdgeInput::available(
450 rows.iter()
451 .filter_map(row_to_bridge_edge_hypothesis)
452 .collect(),
453 ),
454 Err(error) => BridgeEdgeInput::unavailable(format!("bridge edge query failed: {error}")),
455 };
456
457 Ok(ReportGraphSnapshot {
458 nodes: vec![],
459 code_edges: vec![],
460 summary: Some(summary),
461 hotspots: Some(hotspots),
462 unresolved_targets: Some(unresolved_targets),
463 external_targets: Some(external_targets),
464 bridge_edges,
465 })
466}
467
468fn report_node_type_case(alias: &str) -> String {
469 format!(
470 "CASE \
471 WHEN {alias}:CodeFile THEN 'file' \
472 WHEN {alias}:CodeModule THEN 'module' \
473 WHEN {alias}:CodeSymbol THEN coalesce({alias}.kind, 'symbol') \
474 WHEN {alias}:UnresolvedCallee THEN 'unresolved' \
475 WHEN {alias}:ExternalSymbol THEN 'external' \
476 ELSE 'node' \
477 END"
478 )
479}
480
481fn report_node_id_expr(alias: &str) -> String {
482 format!("coalesce({alias}.id, {alias}.path, {alias}.name)")
483}
484
485fn report_node_name_expr(alias: &str) -> String {
486 format!("coalesce({alias}.name, {alias}.path, {alias}.id)")
487}
488
489fn report_node_counts_query(project_id: &str) -> (String, HashMap<String, String>) {
490 (
491 "MATCH (n {project: $project}) \
492 WHERE n:CodeFile OR n:CodeSymbol OR n:CodeModule OR n:UnresolvedCallee OR n:ExternalSymbol \
493 RETURN CASE \
494 WHEN n:CodeFile THEN 'file' \
495 WHEN n:CodeModule THEN 'module' \
496 WHEN n:CodeSymbol THEN coalesce(n.kind, 'symbol') \
497 WHEN n:UnresolvedCallee THEN 'unresolved' \
498 WHEN n:ExternalSymbol THEN 'external' \
499 ELSE 'node' \
500 END AS name, \
501 count(n) AS count"
502 .to_string(),
503 typed_query::string_params(&[("project", project_id)]),
504 )
505}
506
507fn report_code_edge_counts_query(project_id: &str) -> (String, HashMap<String, String>) {
508 (
509 "MATCH (source {project: $project})-[r]->(target {project: $project}) \
510 WHERE type(r) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
511 RETURN type(r) AS name, count(r) AS count"
512 .to_string(),
513 typed_query::string_params(&[("project", project_id)]),
514 )
515}
516
517fn report_hotspots_query(
518 project_id: &str,
519 node_class: &str,
520 top_n: usize,
521) -> (String, HashMap<String, String>) {
522 let predicate = match node_class {
523 "file" => "n:CodeFile",
524 "module" => "n:CodeModule",
525 _ => "n:CodeSymbol",
526 };
527 let limit = top_n.max(1);
528 (
529 format!(
530 "MATCH (n {{project: $project}}) \
531 WHERE {predicate} \
532 OPTIONAL MATCH (n)-[out]->(out_target {{project: $project}}) \
533 WHERE type(out) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
534 AND (out_target:CodeFile OR out_target:CodeSymbol OR out_target:CodeModule OR out_target:UnresolvedCallee OR out_target:ExternalSymbol) \
535 WITH n, count(out) AS outgoing \
536 OPTIONAL MATCH (in_source {{project: $project}})-[inc]->(n) \
537 WHERE type(inc) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
538 AND (in_source:CodeFile OR in_source:CodeSymbol OR in_source:CodeModule OR in_source:UnresolvedCallee OR in_source:ExternalSymbol) \
539 WITH n, outgoing, count(inc) AS incoming \
540 WITH n, outgoing, incoming, outgoing + incoming AS degree \
541 WHERE degree > 0 \
542 RETURN {} AS id, {} AS name, {} AS node_type, degree, incoming, outgoing, coalesce(n.file_path, n.path) AS file_path \
543 ORDER BY degree DESC, name ASC, id ASC \
544 LIMIT {limit}",
545 report_node_id_expr("n"),
546 report_node_name_expr("n"),
547 report_node_type_case("n")
548 ),
549 typed_query::string_params(&[("project", project_id)]),
550 )
551}
552
553fn report_incoming_call_hotspots_query(
554 project_id: &str,
555 top_n: usize,
556) -> (String, HashMap<String, String>) {
557 let limit = top_n.max(1);
558 (
559 format!(
560 "MATCH (:CodeSymbol {{project: $project}})-[r:CALLS]->(n:CodeSymbol {{project: $project}}) \
561 WITH n, count(r) AS incoming \
562 WHERE incoming > 0 \
563 RETURN n.id AS id, coalesce(n.name, n.id) AS name, {} AS node_type, incoming AS degree, incoming, 0 AS outgoing, n.file_path AS file_path \
564 ORDER BY degree DESC, name ASC, id ASC \
565 LIMIT {limit}",
566 report_node_type_case("n")
567 ),
568 typed_query::string_params(&[("project", project_id)]),
569 )
570}
571
572fn report_target_frequencies_query(
573 project_id: &str,
574 target_type: &str,
575 top_n: usize,
576) -> (String, HashMap<String, String>) {
577 let target_label = if target_type == "external" {
578 "ExternalSymbol"
579 } else {
580 "UnresolvedCallee"
581 };
582 let limit = top_n.max(1);
583 (
584 format!(
585 "MATCH (:CodeSymbol {{project: $project}})-[r:CALLS]->(target:{target_label} {{project: $project}}) \
586 RETURN target.id AS id, coalesce(target.name, target.id) AS name, count(r) AS count \
587 ORDER BY count DESC, name ASC, id ASC \
588 LIMIT {limit}"
589 ),
590 typed_query::string_params(&[("project", project_id)]),
591 )
592}
593
594fn report_bridge_edges_query(project_id: &str) -> (String, HashMap<String, String>) {
595 (
596 "MATCH (source)-[r:RELATES_TO_CODE]->(target:CodeSymbol {project: $project}) \
597 RETURN coalesce(source.id, source.uuid, source.name) AS source_id, \
598 target.id AS target_symbol_id, \
599 'RELATES_TO_CODE' AS relation, \
600 r.provenance AS provenance, \
601 r.confidence AS confidence, \
602 coalesce(r.source_system, 'gobby-memory') AS source_system, \
603 r.source_file_path AS source_file_path, \
604 r.source_line AS source_line, \
605 r.source_symbol_id AS source_symbol_id, \
606 r.matching_method AS matching_method"
607 .to_string(),
608 typed_query::string_params(&[("project", project_id)]),
609 )
610}
611
612fn rows_to_named_counts(rows: Vec<Row>) -> BTreeMap<String, usize> {
613 rows.iter()
614 .filter_map(|row| {
615 let name = row_string(row, &["name"])?;
616 let count = row_usize(row, &["count"]).unwrap_or(0);
617 Some((name, count))
618 })
619 .collect()
620}
621
622fn load_hotspots(
623 client: &mut GraphClient,
624 project_id: &str,
625 node_class: &str,
626 top_n: usize,
627) -> anyhow::Result<Vec<GraphHotspot>> {
628 let (query, params) = report_hotspots_query(project_id, node_class, top_n);
629 Ok(client
630 .query(&query, Some(params))?
631 .iter()
632 .filter_map(row_to_graph_hotspot)
633 .collect())
634}
635
636fn load_incoming_call_hotspots(
637 client: &mut GraphClient,
638 project_id: &str,
639 top_n: usize,
640) -> anyhow::Result<Vec<GraphHotspot>> {
641 let (query, params) = report_incoming_call_hotspots_query(project_id, top_n);
642 Ok(client
643 .query(&query, Some(params))?
644 .iter()
645 .filter_map(row_to_graph_hotspot)
646 .collect())
647}
648
649fn load_target_frequencies(
650 client: &mut GraphClient,
651 project_id: &str,
652 target_type: &str,
653 top_n: usize,
654) -> anyhow::Result<Vec<TargetFrequency>> {
655 let (query, params) = report_target_frequencies_query(project_id, target_type, top_n);
656 Ok(client
657 .query(&query, Some(params))?
658 .iter()
659 .filter_map(row_to_target_frequency)
660 .collect())
661}
662
663fn row_to_graph_hotspot(row: &Row) -> Option<GraphHotspot> {
664 Some(GraphHotspot {
665 id: row_string(row, &["id"])?,
666 name: row_string(row, &["name"])?,
667 node_type: row_string(row, &["node_type"]).unwrap_or_else(|| "node".to_string()),
668 degree: row_usize(row, &["degree"]).unwrap_or(0),
669 incoming: row_usize(row, &["incoming"]).unwrap_or(0),
670 outgoing: row_usize(row, &["outgoing"]).unwrap_or(0),
671 file_path: row_string(row, &["file_path"]),
672 })
673}
674
675fn row_to_target_frequency(row: &Row) -> Option<TargetFrequency> {
676 Some(TargetFrequency {
677 id: row_string(row, &["id"])?,
678 name: row_string(row, &["name"])?,
679 count: row_usize(row, &["count"]).unwrap_or(0),
680 })
681}
682
683fn row_to_bridge_edge_hypothesis(row: &Row) -> Option<BridgeEdgeHypothesis> {
684 let source_id = row_string(row, &["source_id"])?;
685 let target_symbol_id = row_string(row, &["target_symbol_id"])?;
686 let relation = row_string(row, &["relation"]).unwrap_or_else(|| RELATES_TO_CODE.to_string());
687 let source_system =
688 row_string(row, &["source_system"]).unwrap_or_else(|| "gobby-memory".to_string());
689
690 let mut metadata = ProjectionMetadata::new(
691 row_string(row, &["provenance"])
692 .and_then(|value| ProjectionProvenance::from_wire_value(&value))
693 .unwrap_or(ProjectionProvenance::Inferred),
694 source_system,
695 );
696 metadata.confidence = row_f64(row, &["confidence"]);
697 metadata.source_file_path = row_string(row, &["source_file_path"]);
698 metadata.source_line = row_usize(row, &["source_line"]);
699 metadata.source_symbol_id = row_string(row, &["source_symbol_id"]);
700 metadata.matching_method = row_string(row, &["matching_method"]);
701
702 Some(BridgeEdgeHypothesis::new(
703 source_id,
704 target_symbol_id,
705 relation,
706 metadata,
707 ))
708}
709
710fn summarize_graph(nodes: &[ReportNode], edges: &[ReportCodeEdge]) -> GraphReportSummary {
711 let mut node_counts_by_type = BTreeMap::new();
712 for node in nodes {
713 *node_counts_by_type
714 .entry(node.node_type.clone())
715 .or_insert(0) += 1;
716 }
717
718 let mut code_edge_counts = BTreeMap::new();
719 for edge in edges {
720 *code_edge_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
721 }
722
723 GraphReportSummary {
724 node_count: nodes.len(),
725 edge_count: edges.len(),
726 node_counts_by_type,
727 code_edge_counts,
728 }
729}
730
731fn summarize_hotspots(
732 nodes: &[ReportNode],
733 edges: &[ReportCodeEdge],
734 top_n: usize,
735) -> GraphReportHotspots {
736 let mut degree = HashMap::<&str, DegreeStats>::new();
737 let mut incoming_calls = HashMap::<&str, usize>::new();
738 for edge in edges {
739 degree.entry(&edge.source).or_default().outgoing += 1;
740 degree.entry(&edge.target).or_default().incoming += 1;
741 if edge.edge_type == "CALLS" {
742 *incoming_calls.entry(&edge.target).or_insert(0) += 1;
743 }
744 }
745
746 GraphReportHotspots {
747 high_degree_files: top_hotspots(nodes, °ree, top_n, |node| node.node_type == "file"),
748 high_degree_symbols: top_hotspots(nodes, °ree, top_n, |node| {
749 is_symbol_node(&node.node_type)
750 }),
751 high_degree_modules: top_hotspots(nodes, °ree, top_n, |node| node.node_type == "module"),
752 incoming_call_hotspots: top_incoming_call_hotspots(nodes, &incoming_calls, top_n),
753 }
754}
755
756fn top_hotspots(
757 nodes: &[ReportNode],
758 degree: &HashMap<&str, DegreeStats>,
759 top_n: usize,
760 include: impl Fn(&ReportNode) -> bool,
761) -> Vec<GraphHotspot> {
762 let mut hotspots = nodes
763 .iter()
764 .filter(|node| include(node))
765 .filter_map(|node| {
766 let stats = degree.get(node.id.as_str())?;
767 let total = stats.incoming + stats.outgoing;
768 (total > 0).then(|| GraphHotspot {
769 id: node.id.clone(),
770 name: node.name.clone(),
771 node_type: node.node_type.clone(),
772 degree: total,
773 incoming: stats.incoming,
774 outgoing: stats.outgoing,
775 file_path: node.file_path.clone(),
776 })
777 })
778 .collect::<Vec<_>>();
779 sort_hotspots(&mut hotspots);
780 hotspots.truncate(top_n);
781 hotspots
782}
783
784fn top_incoming_call_hotspots(
785 nodes: &[ReportNode],
786 incoming_calls: &HashMap<&str, usize>,
787 top_n: usize,
788) -> Vec<GraphHotspot> {
789 let mut hotspots = nodes
790 .iter()
791 .filter(|node| is_symbol_node(&node.node_type))
792 .filter_map(|node| {
793 let incoming = incoming_calls.get(node.id.as_str()).copied().unwrap_or(0);
794 (incoming > 0).then(|| GraphHotspot {
795 id: node.id.clone(),
796 name: node.name.clone(),
797 node_type: node.node_type.clone(),
798 degree: incoming,
799 incoming,
800 outgoing: 0,
801 file_path: node.file_path.clone(),
802 })
803 })
804 .collect::<Vec<_>>();
805 sort_hotspots(&mut hotspots);
806 hotspots.truncate(top_n);
807 hotspots
808}
809
810fn target_frequencies(
811 edges: &[ReportCodeEdge],
812 node_by_id: &HashMap<&str, &ReportNode>,
813 target_type: &str,
814 top_n: usize,
815) -> Vec<TargetFrequency> {
816 let mut counts = BTreeMap::<String, TargetFrequency>::new();
817 for edge in edges.iter().filter(|edge| edge.edge_type == "CALLS") {
818 let Some(node) = node_by_id.get(edge.target.as_str()) else {
819 continue;
820 };
821 if node.node_type != target_type {
822 continue;
823 }
824 let entry = counts
825 .entry(node.id.clone())
826 .or_insert_with(|| TargetFrequency {
827 id: node.id.clone(),
828 name: node.name.clone(),
829 count: 0,
830 });
831 entry.count += 1;
832 }
833
834 let mut frequencies = counts.into_values().collect::<Vec<_>>();
835 frequencies.sort_by(|left, right| {
836 right
837 .count
838 .cmp(&left.count)
839 .then_with(|| left.name.cmp(&right.name))
840 .then_with(|| left.id.cmp(&right.id))
841 });
842 frequencies.truncate(top_n);
843 frequencies
844}
845
846fn summarize_bridge_edges(edges: &[BridgeEdgeHypothesis]) -> Option<BridgeReportSummary> {
847 if edges.is_empty() {
848 return None;
849 }
850
851 let mut source_counts = BTreeMap::<String, usize>::new();
852 let mut confidence_min = f64::INFINITY;
853 let mut confidence_max = f64::NEG_INFINITY;
854 let mut has_confidence = false;
855 for edge in edges {
856 *source_counts
857 .entry(edge.metadata.source_system.clone())
858 .or_insert(0) += 1;
859 if let Some(confidence) = edge.metadata.confidence
860 && confidence.is_finite()
861 {
862 confidence_min = confidence_min.min(confidence);
863 confidence_max = confidence_max.max(confidence);
864 has_confidence = true;
865 }
866 }
867
868 let source_system_counts = source_counts
869 .into_iter()
870 .map(|(name, count)| NamedCount { name, count })
871 .collect();
872
873 Some(BridgeReportSummary {
874 relation: RELATES_TO_CODE.to_string(),
875 edge_count: edges.len(),
876 inferred: true,
877 read_only: true,
878 source_system_counts,
879 confidence_range: has_confidence.then_some(ConfidenceRange {
880 min: confidence_min,
881 max: confidence_max,
882 }),
883 })
884}
885
886fn normalize_bridge_edges(edges: Vec<BridgeEdgeHypothesis>) -> Vec<BridgeEdgeHypothesis> {
887 edges
888 .into_iter()
889 .map(|edge| {
890 BridgeEdgeHypothesis::new(
891 edge.source_id,
892 edge.target_symbol_id,
893 edge.relation,
894 edge.metadata,
895 )
896 })
897 .collect()
898}
899
900fn suggested_questions(
901 hotspots: &GraphReportHotspots,
902 unresolved_targets: &[TargetFrequency],
903 external_targets: &[TargetFrequency],
904 bridge_summary: Option<&BridgeReportSummary>,
905 degradation_details: &[ReportDegradation],
906) -> Vec<String> {
907 let mut questions =
908 vec!["Which high-degree files or symbols should be reviewed before refactors?".to_string()];
909
910 if !hotspots.incoming_call_hotspots.is_empty() {
911 questions.push("Which incoming-call hotspots define the largest blast radius?".to_string());
912 }
913 if !unresolved_targets.is_empty() || !external_targets.is_empty() {
914 questions.push(
915 "Which unresolved or external call targets should be resolved first?".to_string(),
916 );
917 }
918 if bridge_summary.is_some() {
919 questions
920 .push("Which inferred RELATES_TO_CODE bridges need human confirmation?".to_string());
921 }
922 if !degradation_details.is_empty() {
923 questions.push(
924 "Which degraded optional inputs should be restored for the next report?".to_string(),
925 );
926 }
927
928 questions
929}
930
931struct RenderMarkdownInput<'a> {
932 project_id: &'a str,
933 generated_at: &'a str,
934 summary: &'a GraphReportSummary,
935 hotspots: &'a GraphReportHotspots,
936 unresolved_targets: &'a [TargetFrequency],
937 external_targets: &'a [TargetFrequency],
938 bridge_summary: Option<&'a BridgeReportSummary>,
939 degradation_details: &'a [ReportDegradation],
940 top_n: usize,
941}
942
943fn render_markdown(input: RenderMarkdownInput<'_>) -> String {
944 let mut lines = vec![
945 "# Project Graph Report".to_string(),
946 String::new(),
947 format!("- Project: {}", input.project_id),
948 format!("- Generated: {}", input.generated_at),
949 format!("- Nodes: {}", input.summary.node_count),
950 format!("- Edges: {}", input.summary.edge_count),
951 ];
952
953 if !input.summary.code_edge_counts.is_empty() {
954 lines.push(format!(
955 "- Code edges: {}",
956 named_counts_inline(&input.summary.code_edge_counts)
957 ));
958 }
959
960 append_hotspot_section(
961 &mut lines,
962 "High-degree files",
963 &input.hotspots.high_degree_files,
964 input.top_n,
965 );
966 append_hotspot_section(
967 &mut lines,
968 "High-degree symbols",
969 &input.hotspots.high_degree_symbols,
970 input.top_n,
971 );
972 append_hotspot_section(
973 &mut lines,
974 "Incoming-call hotspots",
975 &input.hotspots.incoming_call_hotspots,
976 input.top_n,
977 );
978 append_target_section(
979 &mut lines,
980 "Unresolved call targets",
981 input.unresolved_targets,
982 input.top_n,
983 );
984 append_target_section(
985 &mut lines,
986 "External call targets",
987 input.external_targets,
988 input.top_n,
989 );
990
991 if let Some(summary) = input.bridge_summary {
992 lines.push(String::new());
993 lines.push("RELATES_TO_CODE bridges".to_string());
994 lines.push(format!(
995 "- {} inferred read-only edge(s)",
996 summary.edge_count
997 ));
998 if let Some(range) = &summary.confidence_range {
999 lines.push(format!("- Confidence: {:.3}..{:.3}", range.min, range.max));
1000 }
1001 }
1002
1003 if !input.degradation_details.is_empty() {
1004 lines.push(String::new());
1005 lines.push("Degradation".to_string());
1006 for detail in input.degradation_details {
1007 lines.push(format!("- {}: {}", detail.input, detail.detail));
1008 }
1009 }
1010
1011 lines.join("\n")
1012}
1013
1014fn append_hotspot_section(
1015 lines: &mut Vec<String>,
1016 title: &str,
1017 hotspots: &[GraphHotspot],
1018 top_n: usize,
1019) {
1020 if hotspots.is_empty() {
1021 return;
1022 }
1023 lines.push(String::new());
1024 lines.push(title.to_string());
1025 for hotspot in hotspots.iter().take(top_n) {
1026 lines.push(format!(
1027 "- {} ({}, degree {})",
1028 hotspot.name, hotspot.node_type, hotspot.degree
1029 ));
1030 }
1031}
1032
1033fn append_target_section(
1034 lines: &mut Vec<String>,
1035 title: &str,
1036 targets: &[TargetFrequency],
1037 top_n: usize,
1038) {
1039 if targets.is_empty() {
1040 return;
1041 }
1042 lines.push(String::new());
1043 lines.push(title.to_string());
1044 for target in targets.iter().take(top_n) {
1045 lines.push(format!("- {} ({})", target.name, target.count));
1046 }
1047}
1048
1049fn named_counts_inline(counts: &BTreeMap<String, usize>) -> String {
1050 counts
1051 .iter()
1052 .map(|(name, count)| format!("{name}={count}"))
1053 .collect::<Vec<_>>()
1054 .join(", ")
1055}
1056
1057fn sort_hotspots(hotspots: &mut [GraphHotspot]) {
1058 hotspots.sort_by(|left, right| {
1059 right
1060 .degree
1061 .cmp(&left.degree)
1062 .then_with(|| left.name.cmp(&right.name))
1063 .then_with(|| left.id.cmp(&right.id))
1064 });
1065}
1066
1067fn is_symbol_node(node_type: &str) -> bool {
1068 !matches!(node_type, "file" | "module" | "unresolved" | "external")
1069}
1070
1071fn inferred_bridge_metadata(mut metadata: ProjectionMetadata) -> ProjectionMetadata {
1072 metadata.provenance = ProjectionProvenance::Inferred;
1073 metadata
1074}
1075
1076fn row_string(row: &Row, keys: &[&str]) -> Option<String> {
1077 keys.iter()
1078 .find_map(|key| row.get(*key).and_then(Value::as_str))
1079 .filter(|value| !value.is_empty())
1080 .map(ToOwned::to_owned)
1081}
1082
1083fn row_usize(row: &Row, keys: &[&str]) -> Option<usize> {
1084 keys.iter()
1085 .find_map(|key| row.get(*key))
1086 .and_then(|value| {
1087 value
1088 .as_u64()
1089 .or_else(|| value.as_i64().and_then(|value| value.try_into().ok()))
1090 })
1091 .map(|value| value as usize)
1092}
1093
1094fn row_f64(row: &Row, keys: &[&str]) -> Option<f64> {
1095 keys.iter()
1096 .find_map(|key| row.get(*key))
1097 .and_then(Value::as_f64)
1098}
1099
1100fn now_iso8601() -> String {
1101 let dur = SystemTime::now()
1102 .duration_since(UNIX_EPOCH)
1103 .unwrap_or_default();
1104 let secs = dur.as_secs();
1105 let micros = dur.subsec_micros();
1106
1107 let (year, month, day) = days_to_ymd(secs / 86400);
1108 let daytime = secs % 86400;
1109 let hour = daytime / 3600;
1110 let minute = (daytime % 3600) / 60;
1111 let second = daytime % 60;
1112
1113 format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{micros:06}+00:00")
1114}
1115
1116fn days_to_ymd(days: u64) -> (u64, u64, u64) {
1117 let z = days as i64 + 719468;
1118 let era = if z >= 0 { z } else { z - 146096 } / 146097;
1119 let doe = (z - era * 146097) as u64;
1120 let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
1121 let y = yoe as i64 + era * 400;
1122 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1123 let mp = (5 * doy + 2) / 153;
1124 let day = doy - (153 * mp + 2) / 5 + 1;
1125 let month = if mp < 10 { mp + 3 } else { mp - 9 };
1126 let year = if month <= 2 { y + 1 } else { y };
1127 (year as u64, month, day)
1128}
1129
1130#[cfg(test)]
1131mod tests {
1132 use super::*;
1133 use crate::config::{CodeVectorSettings, Context};
1134 use crate::models::{ProjectionMetadata, ProjectionProvenance};
1135 use std::path::PathBuf;
1136
1137 #[test]
1138 fn report_shape() {
1139 let snapshot = ReportGraphSnapshot {
1140 nodes: vec![
1141 ReportNode::new("src/lib.rs", "src/lib.rs", "file"),
1142 ReportNode::new("mod:api", "api", "module"),
1143 ReportNode::new("sym:handler", "handler", "function").with_file_path("src/lib.rs"),
1144 ReportNode::new("sym:parse", "parse", "function").with_file_path("src/lib.rs"),
1145 ReportNode::new("unresolved:do_work", "do_work", "unresolved"),
1146 ReportNode::new("external:serde_json", "serde_json", "external"),
1147 ],
1148 code_edges: vec![
1149 ReportCodeEdge::new("src/lib.rs", "sym:handler", "DEFINES"),
1150 ReportCodeEdge::new("src/lib.rs", "mod:api", "IMPORTS"),
1151 ReportCodeEdge::new("sym:handler", "sym:parse", "CALLS"),
1152 ReportCodeEdge::new("sym:parse", "unresolved:do_work", "CALLS"),
1153 ReportCodeEdge::new("sym:handler", "external:serde_json", "CALLS"),
1154 ],
1155 bridge_edges: BridgeEdgeInput::available(vec![BridgeEdgeHypothesis::inferred(
1156 "memory-1",
1157 "sym:handler",
1158 RELATES_TO_CODE,
1159 "gobby-memory",
1160 Some(0.72),
1161 )]),
1162 ..ReportGraphSnapshot::default()
1163 };
1164
1165 let report = generate_report_from_snapshot("project-1", "2026-05-28T00:00:00Z", snapshot);
1166 let json = serde_json::to_value(&report).expect("report serializes");
1167
1168 assert_eq!(json["project_id"], "project-1");
1169 assert_eq!(json["summary"]["node_count"], 6);
1170 assert_eq!(json["summary"]["edge_count"], 5);
1171 assert_eq!(json["summary"]["code_edge_counts"]["CALLS"], 3);
1172 assert_eq!(json["hotspots"]["high_degree_files"][0]["id"], "src/lib.rs");
1173 assert_eq!(
1174 json["hotspots"]["incoming_call_hotspots"][0]["id"],
1175 "sym:parse"
1176 );
1177 assert_eq!(json["unresolved_targets"][0]["name"], "do_work");
1178 assert_eq!(json["external_targets"][0]["name"], "serde_json");
1179 assert_eq!(json["bridge_summary"]["relation"], RELATES_TO_CODE);
1180 assert_eq!(json["bridge_summary"]["confidence_range"]["min"], 0.72);
1181 assert!(json["markdown"].as_str().unwrap().contains("project-1"));
1182 assert!(
1183 !json["suggested_investigation_questions"]
1184 .as_array()
1185 .unwrap()
1186 .is_empty()
1187 );
1188 }
1189
1190 #[test]
1191 fn bridge_edges_are_read_only() {
1192 let edge = BridgeEdgeHypothesis::new(
1193 "memory-1",
1194 "symbol-1",
1195 RELATES_TO_CODE,
1196 ProjectionMetadata::gcode_extracted(),
1197 );
1198
1199 assert!(edge.read_only);
1200 assert_eq!(edge.label, "inferred hypothesis");
1201 assert_eq!(edge.metadata.provenance, ProjectionProvenance::Inferred);
1202
1203 let snapshot = ReportGraphSnapshot {
1204 nodes: vec![ReportNode::new("symbol-1", "handler", "function")],
1205 code_edges: vec![],
1206 bridge_edges: BridgeEdgeInput::available(vec![edge]),
1207 ..ReportGraphSnapshot::default()
1208 };
1209 let report = generate_report_from_snapshot("project-1", "2026-05-28T00:00:00Z", snapshot);
1210 let json = serde_json::to_value(&report).expect("report serializes");
1211
1212 assert_eq!(json["bridge_edges"][0]["read_only"], true);
1213 assert_eq!(
1214 json["bridge_edges"][0]["metadata"]["provenance"],
1215 "INFERRED"
1216 );
1217 }
1218
1219 #[test]
1220 fn report_degradation_contract() {
1221 let ctx = Context {
1222 database_url: "postgresql://localhost/unavailable".to_string(),
1223 project_root: PathBuf::from("/tmp/project"),
1224 project_id: "project-1".to_string(),
1225 quiet: true,
1226 falkordb: None,
1227 qdrant: None,
1228 embedding: None,
1229 code_vectors: CodeVectorSettings::default(),
1230 daemon_url: None,
1231 };
1232 let err = generate_report(&ctx).expect_err("missing graph service is required");
1233 assert_eq!(err, ProjectGraphReportError::GraphServiceNotConfigured);
1234
1235 let report = generate_report_from_snapshot(
1236 "project-1",
1237 "2026-05-28T00:00:00Z",
1238 ReportGraphSnapshot {
1239 nodes: vec![ReportNode::new("symbol-1", "handler", "function")],
1240 code_edges: vec![],
1241 bridge_edges: BridgeEdgeInput::unavailable("bridge edge query timed out"),
1242 ..ReportGraphSnapshot::default()
1243 },
1244 );
1245
1246 assert_eq!(report.summary.node_count, 1);
1247 assert_eq!(report.degradation_details.len(), 1);
1248 assert_eq!(report.degradation_details[0].input, RELATES_TO_CODE);
1249 assert!(!report.degradation_details[0].required);
1250 }
1251
1252 #[test]
1253 fn bridge_edges_are_hypotheses() {
1254 let edge = BridgeEdgeHypothesis::inferred(
1255 "memory-1",
1256 "symbol-1",
1257 RELATES_TO_CODE,
1258 "gobby-memory",
1259 Some(0.72),
1260 );
1261
1262 assert_eq!(edge.label, "inferred hypothesis");
1263 assert_eq!(edge.metadata.provenance, ProjectionProvenance::Inferred);
1264 assert!(edge.metadata.is_hypothesis());
1265
1266 let mut report = empty_report("project-1");
1267 report.bridge_edges.push(edge);
1268
1269 let json = serde_json::to_value(&report).expect("report serializes");
1270 assert_eq!(json["bridge_edges"][0]["label"], "inferred hypothesis");
1271 assert_eq!(
1272 json["bridge_edges"][0]["metadata"]["provenance"],
1273 "INFERRED"
1274 );
1275 }
1276}