Skip to main content

gobby_code/graph/
report.rs

1use std::collections::{BTreeMap, HashMap};
2use std::fmt;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use gobby_core::degradation::ServiceState;
6use gobby_core::falkor::{GraphClient, Row};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10use crate::config::Context;
11use crate::graph::typed_query;
12use crate::models::{ProjectionMetadata, ProjectionProvenance};
13
14const RELATES_TO_CODE: &str = "RELATES_TO_CODE";
15const DEFAULT_TOP_LIMIT: usize = 10;
16
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub struct BridgeEdgeHypothesis {
19    pub source_id: String,
20    pub target_symbol_id: String,
21    pub relation: String,
22    pub label: String,
23    pub read_only: bool,
24    pub metadata: ProjectionMetadata,
25}
26
27impl BridgeEdgeHypothesis {
28    pub fn new(
29        source_id: impl Into<String>,
30        target_symbol_id: impl Into<String>,
31        relation: impl Into<String>,
32        metadata: ProjectionMetadata,
33    ) -> Self {
34        Self {
35            source_id: source_id.into(),
36            target_symbol_id: target_symbol_id.into(),
37            relation: relation.into(),
38            label: "inferred hypothesis".to_string(),
39            read_only: true,
40            metadata: inferred_bridge_metadata(metadata),
41        }
42    }
43
44    pub fn inferred(
45        source_id: impl Into<String>,
46        target_symbol_id: impl Into<String>,
47        relation: impl Into<String>,
48        source_system: impl Into<String>,
49        confidence: Option<f64>,
50    ) -> Self {
51        Self::new(
52            source_id,
53            target_symbol_id,
54            relation,
55            ProjectionMetadata::inferred(source_system, confidence),
56        )
57    }
58}
59
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61pub struct ProjectGraphReport {
62    pub project_id: String,
63    pub generated_at: String,
64    pub summary: GraphReportSummary,
65    pub hotspots: GraphReportHotspots,
66    pub unresolved_targets: Vec<TargetFrequency>,
67    pub external_targets: Vec<TargetFrequency>,
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub bridge_summary: Option<BridgeReportSummary>,
70    #[serde(default, skip_serializing_if = "Vec::is_empty")]
71    pub bridge_edges: Vec<BridgeEdgeHypothesis>,
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub degradation_details: Vec<ReportDegradation>,
74    pub suggested_investigation_questions: Vec<String>,
75    pub markdown: String,
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79pub struct ProjectGraphReportOptions {
80    pub top_n: usize,
81}
82
83impl Default for ProjectGraphReportOptions {
84    fn default() -> Self {
85        Self {
86            top_n: DEFAULT_TOP_LIMIT,
87        }
88    }
89}
90
91impl ProjectGraphReportOptions {
92    fn normalized(self) -> Self {
93        Self {
94            top_n: self.top_n.max(1),
95        }
96    }
97}
98
99#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
100pub struct GraphReportSummary {
101    pub node_count: usize,
102    pub edge_count: usize,
103    pub node_counts_by_type: BTreeMap<String, usize>,
104    pub code_edge_counts: BTreeMap<String, usize>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
108pub struct GraphReportHotspots {
109    pub high_degree_files: Vec<GraphHotspot>,
110    pub high_degree_symbols: Vec<GraphHotspot>,
111    pub high_degree_modules: Vec<GraphHotspot>,
112    pub incoming_call_hotspots: Vec<GraphHotspot>,
113}
114
115#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
116pub struct GraphHotspot {
117    pub id: String,
118    pub name: String,
119    #[serde(rename = "type")]
120    pub node_type: String,
121    pub degree: usize,
122    pub incoming: usize,
123    pub outgoing: usize,
124    #[serde(skip_serializing_if = "Option::is_none")]
125    pub file_path: Option<String>,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
129pub struct TargetFrequency {
130    pub id: String,
131    pub name: String,
132    pub count: usize,
133}
134
135#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
136pub struct BridgeReportSummary {
137    pub relation: String,
138    pub edge_count: usize,
139    pub inferred: bool,
140    pub read_only: bool,
141    pub source_system_counts: Vec<NamedCount>,
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub confidence_range: Option<ConfidenceRange>,
144}
145
146#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
147pub struct NamedCount {
148    pub name: String,
149    pub count: usize,
150}
151
152#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
153pub struct ConfidenceRange {
154    pub min: f64,
155    pub max: f64,
156}
157
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub struct ReportDegradation {
160    pub input: String,
161    pub required: bool,
162    pub detail: String,
163}
164
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum ProjectGraphReportError {
167    GraphServiceNotConfigured,
168    GraphServiceUnreachable { message: String },
169    GraphQueryFailed { message: String },
170}
171
172impl fmt::Display for ProjectGraphReportError {
173    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174        match self {
175            Self::GraphServiceNotConfigured => {
176                f.write_str("FalkorDB is not configured; project graph report requires FalkorDB")
177            }
178            Self::GraphServiceUnreachable { message } => write!(
179                f,
180                "FalkorDB is unreachable; project graph report requires FalkorDB: {message}"
181            ),
182            Self::GraphQueryFailed { message } => {
183                write!(f, "project graph report query failed: {message}")
184            }
185        }
186    }
187}
188
189impl std::error::Error for ProjectGraphReportError {}
190
191#[derive(Debug, Clone, Default, PartialEq)]
192struct ReportGraphSnapshot {
193    nodes: Vec<ReportNode>,
194    code_edges: Vec<ReportCodeEdge>,
195    summary: Option<GraphReportSummary>,
196    hotspots: Option<GraphReportHotspots>,
197    unresolved_targets: Option<Vec<TargetFrequency>>,
198    external_targets: Option<Vec<TargetFrequency>>,
199    bridge_edges: BridgeEdgeInput,
200}
201
202#[derive(Debug, Clone, PartialEq, Eq)]
203struct ReportNode {
204    id: String,
205    name: String,
206    node_type: String,
207    file_path: Option<String>,
208}
209
210impl ReportNode {
211    #[cfg(test)]
212    fn new(id: impl Into<String>, name: impl Into<String>, node_type: impl Into<String>) -> Self {
213        Self {
214            id: id.into(),
215            name: name.into(),
216            node_type: node_type.into(),
217            file_path: None,
218        }
219    }
220
221    #[cfg(test)]
222    fn with_file_path(mut self, file_path: impl Into<String>) -> Self {
223        self.file_path = Some(file_path.into());
224        self
225    }
226}
227
228#[derive(Debug, Clone, PartialEq, Eq)]
229struct ReportCodeEdge {
230    source: String,
231    target: String,
232    edge_type: String,
233}
234
235impl ReportCodeEdge {
236    #[cfg(test)]
237    fn new(
238        source: impl Into<String>,
239        target: impl Into<String>,
240        edge_type: impl Into<String>,
241    ) -> Self {
242        Self {
243            source: source.into(),
244            target: target.into(),
245            edge_type: edge_type.into(),
246        }
247    }
248}
249
250#[derive(Debug, Clone, PartialEq)]
251enum BridgeEdgeInput {
252    Available(Vec<BridgeEdgeHypothesis>),
253    Unavailable(String),
254}
255
256impl BridgeEdgeInput {
257    fn available(edges: Vec<BridgeEdgeHypothesis>) -> Self {
258        Self::Available(edges)
259    }
260
261    fn unavailable(reason: impl Into<String>) -> Self {
262        Self::Unavailable(reason.into())
263    }
264}
265
266impl Default for BridgeEdgeInput {
267    fn default() -> Self {
268        Self::Available(vec![])
269    }
270}
271
272#[derive(Debug, Clone, Copy, Default)]
273struct DegreeStats {
274    incoming: usize,
275    outgoing: usize,
276}
277
278pub fn generate_report(ctx: &Context) -> Result<ProjectGraphReport, ProjectGraphReportError> {
279    generate_report_with_options(ctx, ProjectGraphReportOptions::default())
280}
281
282pub fn generate_report_with_options(
283    ctx: &Context,
284    options: ProjectGraphReportOptions,
285) -> Result<ProjectGraphReport, ProjectGraphReportError> {
286    let Some(config) = ctx.falkordb.as_ref() else {
287        return Err(ProjectGraphReportError::GraphServiceNotConfigured);
288    };
289
290    let connection_config = config.connection_config();
291    let options = options.normalized();
292    let result = gobby_core::falkor::with_graph(
293        Some(&connection_config),
294        &config.graph_name,
295        ReportGraphSnapshot::default(),
296        |client| load_report_snapshot(client, &ctx.project_id, options.top_n),
297    );
298
299    match result {
300        Ok((snapshot, ServiceState::Available)) => Ok(generate_report_from_snapshot_with_options(
301            &ctx.project_id,
302            now_iso8601(),
303            snapshot,
304            options,
305        )),
306        Ok((_, ServiceState::NotConfigured)) => {
307            Err(ProjectGraphReportError::GraphServiceNotConfigured)
308        }
309        Ok((_, ServiceState::Unreachable { message })) => {
310            Err(ProjectGraphReportError::GraphServiceUnreachable { message })
311        }
312        Err(error) => Err(ProjectGraphReportError::GraphQueryFailed {
313            message: error.to_string(),
314        }),
315    }
316}
317
318pub fn empty_report(project_id: impl Into<String>) -> ProjectGraphReport {
319    generate_report_from_snapshot(project_id, now_iso8601(), ReportGraphSnapshot::default())
320}
321
322fn generate_report_from_snapshot(
323    project_id: impl Into<String>,
324    generated_at: impl Into<String>,
325    snapshot: ReportGraphSnapshot,
326) -> ProjectGraphReport {
327    generate_report_from_snapshot_with_options(
328        project_id,
329        generated_at,
330        snapshot,
331        ProjectGraphReportOptions::default(),
332    )
333}
334
335fn generate_report_from_snapshot_with_options(
336    project_id: impl Into<String>,
337    generated_at: impl Into<String>,
338    snapshot: ReportGraphSnapshot,
339    options: ProjectGraphReportOptions,
340) -> ProjectGraphReport {
341    let options = options.normalized();
342    let project_id = project_id.into();
343    let generated_at = generated_at.into();
344    let node_by_id = snapshot
345        .nodes
346        .iter()
347        .map(|node| (node.id.as_str(), node))
348        .collect::<HashMap<_, _>>();
349
350    let summary = snapshot
351        .summary
352        .clone()
353        .unwrap_or_else(|| summarize_graph(&snapshot.nodes, &snapshot.code_edges));
354    let hotspots = snapshot.hotspots.clone().unwrap_or_else(|| {
355        summarize_hotspots(&snapshot.nodes, &snapshot.code_edges, options.top_n)
356    });
357    let unresolved_targets = snapshot.unresolved_targets.clone().unwrap_or_else(|| {
358        target_frequencies(
359            &snapshot.code_edges,
360            &node_by_id,
361            "unresolved",
362            options.top_n,
363        )
364    });
365    let external_targets = snapshot.external_targets.clone().unwrap_or_else(|| {
366        target_frequencies(&snapshot.code_edges, &node_by_id, "external", options.top_n)
367    });
368
369    let (bridge_edges, mut degradation_details) = match snapshot.bridge_edges {
370        BridgeEdgeInput::Available(edges) => (normalize_bridge_edges(edges), vec![]),
371        BridgeEdgeInput::Unavailable(reason) => (
372            vec![],
373            vec![ReportDegradation {
374                input: RELATES_TO_CODE.to_string(),
375                required: false,
376                detail: reason,
377            }],
378        ),
379    };
380    let bridge_summary = summarize_bridge_edges(&bridge_edges);
381    let suggested_investigation_questions = suggested_questions(
382        &hotspots,
383        &unresolved_targets,
384        &external_targets,
385        bridge_summary.as_ref(),
386        &degradation_details,
387    );
388    let markdown = render_markdown(RenderMarkdownInput {
389        project_id: &project_id,
390        generated_at: &generated_at,
391        summary: &summary,
392        hotspots: &hotspots,
393        unresolved_targets: &unresolved_targets,
394        external_targets: &external_targets,
395        bridge_summary: bridge_summary.as_ref(),
396        degradation_details: &degradation_details,
397        top_n: options.top_n,
398    });
399
400    degradation_details.sort_by(|left, right| left.input.cmp(&right.input));
401
402    ProjectGraphReport {
403        project_id,
404        generated_at,
405        summary,
406        hotspots,
407        unresolved_targets,
408        external_targets,
409        bridge_summary,
410        bridge_edges,
411        degradation_details,
412        suggested_investigation_questions,
413        markdown,
414    }
415}
416
417fn load_report_snapshot(
418    client: &mut GraphClient,
419    project_id: &str,
420    top_n: usize,
421) -> anyhow::Result<ReportGraphSnapshot> {
422    let (query, params) = report_node_counts_query(project_id);
423    let node_counts_by_type = rows_to_named_counts(client.query(&query, Some(params))?);
424    let node_count = node_counts_by_type.values().sum();
425
426    let (query, params) = report_code_edge_counts_query(project_id);
427    let code_edge_counts = rows_to_named_counts(client.query(&query, Some(params))?);
428    let edge_count = code_edge_counts.values().sum();
429
430    let summary = GraphReportSummary {
431        node_count,
432        edge_count,
433        node_counts_by_type,
434        code_edge_counts,
435    };
436
437    let hotspots = GraphReportHotspots {
438        high_degree_files: load_hotspots(client, project_id, "file", top_n)?,
439        high_degree_symbols: load_hotspots(client, project_id, "symbol", top_n)?,
440        high_degree_modules: load_hotspots(client, project_id, "module", top_n)?,
441        incoming_call_hotspots: load_incoming_call_hotspots(client, project_id, top_n)?,
442    };
443
444    let unresolved_targets = load_target_frequencies(client, project_id, "unresolved", top_n)?;
445    let external_targets = load_target_frequencies(client, project_id, "external", top_n)?;
446
447    let (query, params) = report_bridge_edges_query(project_id);
448    let bridge_edges = match client.query(&query, Some(params)) {
449        Ok(rows) => BridgeEdgeInput::available(
450            rows.iter()
451                .filter_map(row_to_bridge_edge_hypothesis)
452                .collect(),
453        ),
454        Err(error) => BridgeEdgeInput::unavailable(format!("bridge edge query failed: {error}")),
455    };
456
457    Ok(ReportGraphSnapshot {
458        nodes: vec![],
459        code_edges: vec![],
460        summary: Some(summary),
461        hotspots: Some(hotspots),
462        unresolved_targets: Some(unresolved_targets),
463        external_targets: Some(external_targets),
464        bridge_edges,
465    })
466}
467
468fn report_node_type_case(alias: &str) -> String {
469    format!(
470        "CASE \
471          WHEN {alias}:CodeFile THEN 'file' \
472          WHEN {alias}:CodeModule THEN 'module' \
473          WHEN {alias}:CodeSymbol THEN coalesce({alias}.kind, 'symbol') \
474          WHEN {alias}:UnresolvedCallee THEN 'unresolved' \
475          WHEN {alias}:ExternalSymbol THEN 'external' \
476          ELSE 'node' \
477        END"
478    )
479}
480
481fn report_node_id_expr(alias: &str) -> String {
482    format!("coalesce({alias}.id, {alias}.path, {alias}.name)")
483}
484
485fn report_node_name_expr(alias: &str) -> String {
486    format!("coalesce({alias}.name, {alias}.path, {alias}.id)")
487}
488
489fn report_node_counts_query(project_id: &str) -> (String, HashMap<String, String>) {
490    (
491        "MATCH (n {project: $project}) \
492         WHERE n:CodeFile OR n:CodeSymbol OR n:CodeModule OR n:UnresolvedCallee OR n:ExternalSymbol \
493         RETURN CASE \
494                  WHEN n:CodeFile THEN 'file' \
495                  WHEN n:CodeModule THEN 'module' \
496                  WHEN n:CodeSymbol THEN coalesce(n.kind, 'symbol') \
497                  WHEN n:UnresolvedCallee THEN 'unresolved' \
498                  WHEN n:ExternalSymbol THEN 'external' \
499                  ELSE 'node' \
500                END AS name, \
501                count(n) AS count"
502            .to_string(),
503        typed_query::string_params(&[("project", project_id)]),
504    )
505}
506
507fn report_code_edge_counts_query(project_id: &str) -> (String, HashMap<String, String>) {
508    (
509        "MATCH (source {project: $project})-[r]->(target {project: $project}) \
510         WHERE type(r) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
511         RETURN type(r) AS name, count(r) AS count"
512            .to_string(),
513        typed_query::string_params(&[("project", project_id)]),
514    )
515}
516
517fn report_hotspots_query(
518    project_id: &str,
519    node_class: &str,
520    top_n: usize,
521) -> (String, HashMap<String, String>) {
522    let predicate = match node_class {
523        "file" => "n:CodeFile",
524        "module" => "n:CodeModule",
525        _ => "n:CodeSymbol",
526    };
527    let limit = top_n.max(1);
528    (
529        format!(
530            "MATCH (n {{project: $project}}) \
531             WHERE {predicate} \
532             OPTIONAL MATCH (n)-[out]->(out_target {{project: $project}}) \
533             WHERE type(out) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
534               AND (out_target:CodeFile OR out_target:CodeSymbol OR out_target:CodeModule OR out_target:UnresolvedCallee OR out_target:ExternalSymbol) \
535             WITH n, count(out) AS outgoing \
536             OPTIONAL MATCH (in_source {{project: $project}})-[inc]->(n) \
537             WHERE type(inc) IN ['DEFINES', 'IMPORTS', 'CALLS'] \
538               AND (in_source:CodeFile OR in_source:CodeSymbol OR in_source:CodeModule OR in_source:UnresolvedCallee OR in_source:ExternalSymbol) \
539             WITH n, outgoing, count(inc) AS incoming \
540             WITH n, outgoing, incoming, outgoing + incoming AS degree \
541             WHERE degree > 0 \
542             RETURN {} AS id, {} AS name, {} AS node_type, degree, incoming, outgoing, coalesce(n.file_path, n.path) AS file_path \
543             ORDER BY degree DESC, name ASC, id ASC \
544             LIMIT {limit}",
545            report_node_id_expr("n"),
546            report_node_name_expr("n"),
547            report_node_type_case("n")
548        ),
549        typed_query::string_params(&[("project", project_id)]),
550    )
551}
552
553fn report_incoming_call_hotspots_query(
554    project_id: &str,
555    top_n: usize,
556) -> (String, HashMap<String, String>) {
557    let limit = top_n.max(1);
558    (
559        format!(
560            "MATCH (:CodeSymbol {{project: $project}})-[r:CALLS]->(n:CodeSymbol {{project: $project}}) \
561             WITH n, count(r) AS incoming \
562             WHERE incoming > 0 \
563             RETURN n.id AS id, coalesce(n.name, n.id) AS name, {} AS node_type, incoming AS degree, incoming, 0 AS outgoing, n.file_path AS file_path \
564             ORDER BY degree DESC, name ASC, id ASC \
565             LIMIT {limit}",
566            report_node_type_case("n")
567        ),
568        typed_query::string_params(&[("project", project_id)]),
569    )
570}
571
572fn report_target_frequencies_query(
573    project_id: &str,
574    target_type: &str,
575    top_n: usize,
576) -> (String, HashMap<String, String>) {
577    let target_label = if target_type == "external" {
578        "ExternalSymbol"
579    } else {
580        "UnresolvedCallee"
581    };
582    let limit = top_n.max(1);
583    (
584        format!(
585            "MATCH (:CodeSymbol {{project: $project}})-[r:CALLS]->(target:{target_label} {{project: $project}}) \
586             RETURN target.id AS id, coalesce(target.name, target.id) AS name, count(r) AS count \
587             ORDER BY count DESC, name ASC, id ASC \
588             LIMIT {limit}"
589        ),
590        typed_query::string_params(&[("project", project_id)]),
591    )
592}
593
594fn report_bridge_edges_query(project_id: &str) -> (String, HashMap<String, String>) {
595    (
596        "MATCH (source)-[r:RELATES_TO_CODE]->(target:CodeSymbol {project: $project}) \
597         RETURN coalesce(source.id, source.uuid, source.name) AS source_id, \
598                target.id AS target_symbol_id, \
599                'RELATES_TO_CODE' AS relation, \
600                r.provenance AS provenance, \
601                r.confidence AS confidence, \
602                coalesce(r.source_system, 'gobby-memory') AS source_system, \
603                r.source_file_path AS source_file_path, \
604                r.source_line AS source_line, \
605                r.source_symbol_id AS source_symbol_id, \
606                r.matching_method AS matching_method"
607            .to_string(),
608        typed_query::string_params(&[("project", project_id)]),
609    )
610}
611
612fn rows_to_named_counts(rows: Vec<Row>) -> BTreeMap<String, usize> {
613    rows.iter()
614        .filter_map(|row| {
615            let name = row_string(row, &["name"])?;
616            let count = row_usize(row, &["count"]).unwrap_or(0);
617            Some((name, count))
618        })
619        .collect()
620}
621
622fn load_hotspots(
623    client: &mut GraphClient,
624    project_id: &str,
625    node_class: &str,
626    top_n: usize,
627) -> anyhow::Result<Vec<GraphHotspot>> {
628    let (query, params) = report_hotspots_query(project_id, node_class, top_n);
629    Ok(client
630        .query(&query, Some(params))?
631        .iter()
632        .filter_map(row_to_graph_hotspot)
633        .collect())
634}
635
636fn load_incoming_call_hotspots(
637    client: &mut GraphClient,
638    project_id: &str,
639    top_n: usize,
640) -> anyhow::Result<Vec<GraphHotspot>> {
641    let (query, params) = report_incoming_call_hotspots_query(project_id, top_n);
642    Ok(client
643        .query(&query, Some(params))?
644        .iter()
645        .filter_map(row_to_graph_hotspot)
646        .collect())
647}
648
649fn load_target_frequencies(
650    client: &mut GraphClient,
651    project_id: &str,
652    target_type: &str,
653    top_n: usize,
654) -> anyhow::Result<Vec<TargetFrequency>> {
655    let (query, params) = report_target_frequencies_query(project_id, target_type, top_n);
656    Ok(client
657        .query(&query, Some(params))?
658        .iter()
659        .filter_map(row_to_target_frequency)
660        .collect())
661}
662
663fn row_to_graph_hotspot(row: &Row) -> Option<GraphHotspot> {
664    Some(GraphHotspot {
665        id: row_string(row, &["id"])?,
666        name: row_string(row, &["name"])?,
667        node_type: row_string(row, &["node_type"]).unwrap_or_else(|| "node".to_string()),
668        degree: row_usize(row, &["degree"]).unwrap_or(0),
669        incoming: row_usize(row, &["incoming"]).unwrap_or(0),
670        outgoing: row_usize(row, &["outgoing"]).unwrap_or(0),
671        file_path: row_string(row, &["file_path"]),
672    })
673}
674
675fn row_to_target_frequency(row: &Row) -> Option<TargetFrequency> {
676    Some(TargetFrequency {
677        id: row_string(row, &["id"])?,
678        name: row_string(row, &["name"])?,
679        count: row_usize(row, &["count"]).unwrap_or(0),
680    })
681}
682
683fn row_to_bridge_edge_hypothesis(row: &Row) -> Option<BridgeEdgeHypothesis> {
684    let source_id = row_string(row, &["source_id"])?;
685    let target_symbol_id = row_string(row, &["target_symbol_id"])?;
686    let relation = row_string(row, &["relation"]).unwrap_or_else(|| RELATES_TO_CODE.to_string());
687    let source_system =
688        row_string(row, &["source_system"]).unwrap_or_else(|| "gobby-memory".to_string());
689
690    let mut metadata = ProjectionMetadata::new(
691        row_string(row, &["provenance"])
692            .and_then(|value| ProjectionProvenance::from_wire_value(&value))
693            .unwrap_or(ProjectionProvenance::Inferred),
694        source_system,
695    );
696    metadata.confidence = row_f64(row, &["confidence"]);
697    metadata.source_file_path = row_string(row, &["source_file_path"]);
698    metadata.source_line = row_usize(row, &["source_line"]);
699    metadata.source_symbol_id = row_string(row, &["source_symbol_id"]);
700    metadata.matching_method = row_string(row, &["matching_method"]);
701
702    Some(BridgeEdgeHypothesis::new(
703        source_id,
704        target_symbol_id,
705        relation,
706        metadata,
707    ))
708}
709
710fn summarize_graph(nodes: &[ReportNode], edges: &[ReportCodeEdge]) -> GraphReportSummary {
711    let mut node_counts_by_type = BTreeMap::new();
712    for node in nodes {
713        *node_counts_by_type
714            .entry(node.node_type.clone())
715            .or_insert(0) += 1;
716    }
717
718    let mut code_edge_counts = BTreeMap::new();
719    for edge in edges {
720        *code_edge_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
721    }
722
723    GraphReportSummary {
724        node_count: nodes.len(),
725        edge_count: edges.len(),
726        node_counts_by_type,
727        code_edge_counts,
728    }
729}
730
731fn summarize_hotspots(
732    nodes: &[ReportNode],
733    edges: &[ReportCodeEdge],
734    top_n: usize,
735) -> GraphReportHotspots {
736    let mut degree = HashMap::<&str, DegreeStats>::new();
737    let mut incoming_calls = HashMap::<&str, usize>::new();
738    for edge in edges {
739        degree.entry(&edge.source).or_default().outgoing += 1;
740        degree.entry(&edge.target).or_default().incoming += 1;
741        if edge.edge_type == "CALLS" {
742            *incoming_calls.entry(&edge.target).or_insert(0) += 1;
743        }
744    }
745
746    GraphReportHotspots {
747        high_degree_files: top_hotspots(nodes, &degree, top_n, |node| node.node_type == "file"),
748        high_degree_symbols: top_hotspots(nodes, &degree, top_n, |node| {
749            is_symbol_node(&node.node_type)
750        }),
751        high_degree_modules: top_hotspots(nodes, &degree, top_n, |node| node.node_type == "module"),
752        incoming_call_hotspots: top_incoming_call_hotspots(nodes, &incoming_calls, top_n),
753    }
754}
755
756fn top_hotspots(
757    nodes: &[ReportNode],
758    degree: &HashMap<&str, DegreeStats>,
759    top_n: usize,
760    include: impl Fn(&ReportNode) -> bool,
761) -> Vec<GraphHotspot> {
762    let mut hotspots = nodes
763        .iter()
764        .filter(|node| include(node))
765        .filter_map(|node| {
766            let stats = degree.get(node.id.as_str())?;
767            let total = stats.incoming + stats.outgoing;
768            (total > 0).then(|| GraphHotspot {
769                id: node.id.clone(),
770                name: node.name.clone(),
771                node_type: node.node_type.clone(),
772                degree: total,
773                incoming: stats.incoming,
774                outgoing: stats.outgoing,
775                file_path: node.file_path.clone(),
776            })
777        })
778        .collect::<Vec<_>>();
779    sort_hotspots(&mut hotspots);
780    hotspots.truncate(top_n);
781    hotspots
782}
783
784fn top_incoming_call_hotspots(
785    nodes: &[ReportNode],
786    incoming_calls: &HashMap<&str, usize>,
787    top_n: usize,
788) -> Vec<GraphHotspot> {
789    let mut hotspots = nodes
790        .iter()
791        .filter(|node| is_symbol_node(&node.node_type))
792        .filter_map(|node| {
793            let incoming = incoming_calls.get(node.id.as_str()).copied().unwrap_or(0);
794            (incoming > 0).then(|| GraphHotspot {
795                id: node.id.clone(),
796                name: node.name.clone(),
797                node_type: node.node_type.clone(),
798                degree: incoming,
799                incoming,
800                outgoing: 0,
801                file_path: node.file_path.clone(),
802            })
803        })
804        .collect::<Vec<_>>();
805    sort_hotspots(&mut hotspots);
806    hotspots.truncate(top_n);
807    hotspots
808}
809
810fn target_frequencies(
811    edges: &[ReportCodeEdge],
812    node_by_id: &HashMap<&str, &ReportNode>,
813    target_type: &str,
814    top_n: usize,
815) -> Vec<TargetFrequency> {
816    let mut counts = BTreeMap::<String, TargetFrequency>::new();
817    for edge in edges.iter().filter(|edge| edge.edge_type == "CALLS") {
818        let Some(node) = node_by_id.get(edge.target.as_str()) else {
819            continue;
820        };
821        if node.node_type != target_type {
822            continue;
823        }
824        let entry = counts
825            .entry(node.id.clone())
826            .or_insert_with(|| TargetFrequency {
827                id: node.id.clone(),
828                name: node.name.clone(),
829                count: 0,
830            });
831        entry.count += 1;
832    }
833
834    let mut frequencies = counts.into_values().collect::<Vec<_>>();
835    frequencies.sort_by(|left, right| {
836        right
837            .count
838            .cmp(&left.count)
839            .then_with(|| left.name.cmp(&right.name))
840            .then_with(|| left.id.cmp(&right.id))
841    });
842    frequencies.truncate(top_n);
843    frequencies
844}
845
846fn summarize_bridge_edges(edges: &[BridgeEdgeHypothesis]) -> Option<BridgeReportSummary> {
847    if edges.is_empty() {
848        return None;
849    }
850
851    let mut source_counts = BTreeMap::<String, usize>::new();
852    let mut confidence_min = f64::INFINITY;
853    let mut confidence_max = f64::NEG_INFINITY;
854    let mut has_confidence = false;
855    for edge in edges {
856        *source_counts
857            .entry(edge.metadata.source_system.clone())
858            .or_insert(0) += 1;
859        if let Some(confidence) = edge.metadata.confidence
860            && confidence.is_finite()
861        {
862            confidence_min = confidence_min.min(confidence);
863            confidence_max = confidence_max.max(confidence);
864            has_confidence = true;
865        }
866    }
867
868    let source_system_counts = source_counts
869        .into_iter()
870        .map(|(name, count)| NamedCount { name, count })
871        .collect();
872
873    Some(BridgeReportSummary {
874        relation: RELATES_TO_CODE.to_string(),
875        edge_count: edges.len(),
876        inferred: true,
877        read_only: true,
878        source_system_counts,
879        confidence_range: has_confidence.then_some(ConfidenceRange {
880            min: confidence_min,
881            max: confidence_max,
882        }),
883    })
884}
885
886fn normalize_bridge_edges(edges: Vec<BridgeEdgeHypothesis>) -> Vec<BridgeEdgeHypothesis> {
887    edges
888        .into_iter()
889        .map(|edge| {
890            BridgeEdgeHypothesis::new(
891                edge.source_id,
892                edge.target_symbol_id,
893                edge.relation,
894                edge.metadata,
895            )
896        })
897        .collect()
898}
899
900fn suggested_questions(
901    hotspots: &GraphReportHotspots,
902    unresolved_targets: &[TargetFrequency],
903    external_targets: &[TargetFrequency],
904    bridge_summary: Option<&BridgeReportSummary>,
905    degradation_details: &[ReportDegradation],
906) -> Vec<String> {
907    let mut questions =
908        vec!["Which high-degree files or symbols should be reviewed before refactors?".to_string()];
909
910    if !hotspots.incoming_call_hotspots.is_empty() {
911        questions.push("Which incoming-call hotspots define the largest blast radius?".to_string());
912    }
913    if !unresolved_targets.is_empty() || !external_targets.is_empty() {
914        questions.push(
915            "Which unresolved or external call targets should be resolved first?".to_string(),
916        );
917    }
918    if bridge_summary.is_some() {
919        questions
920            .push("Which inferred RELATES_TO_CODE bridges need human confirmation?".to_string());
921    }
922    if !degradation_details.is_empty() {
923        questions.push(
924            "Which degraded optional inputs should be restored for the next report?".to_string(),
925        );
926    }
927
928    questions
929}
930
931struct RenderMarkdownInput<'a> {
932    project_id: &'a str,
933    generated_at: &'a str,
934    summary: &'a GraphReportSummary,
935    hotspots: &'a GraphReportHotspots,
936    unresolved_targets: &'a [TargetFrequency],
937    external_targets: &'a [TargetFrequency],
938    bridge_summary: Option<&'a BridgeReportSummary>,
939    degradation_details: &'a [ReportDegradation],
940    top_n: usize,
941}
942
943fn render_markdown(input: RenderMarkdownInput<'_>) -> String {
944    let mut lines = vec![
945        "# Project Graph Report".to_string(),
946        String::new(),
947        format!("- Project: {}", input.project_id),
948        format!("- Generated: {}", input.generated_at),
949        format!("- Nodes: {}", input.summary.node_count),
950        format!("- Edges: {}", input.summary.edge_count),
951    ];
952
953    if !input.summary.code_edge_counts.is_empty() {
954        lines.push(format!(
955            "- Code edges: {}",
956            named_counts_inline(&input.summary.code_edge_counts)
957        ));
958    }
959
960    append_hotspot_section(
961        &mut lines,
962        "High-degree files",
963        &input.hotspots.high_degree_files,
964        input.top_n,
965    );
966    append_hotspot_section(
967        &mut lines,
968        "High-degree symbols",
969        &input.hotspots.high_degree_symbols,
970        input.top_n,
971    );
972    append_hotspot_section(
973        &mut lines,
974        "Incoming-call hotspots",
975        &input.hotspots.incoming_call_hotspots,
976        input.top_n,
977    );
978    append_target_section(
979        &mut lines,
980        "Unresolved call targets",
981        input.unresolved_targets,
982        input.top_n,
983    );
984    append_target_section(
985        &mut lines,
986        "External call targets",
987        input.external_targets,
988        input.top_n,
989    );
990
991    if let Some(summary) = input.bridge_summary {
992        lines.push(String::new());
993        lines.push("RELATES_TO_CODE bridges".to_string());
994        lines.push(format!(
995            "- {} inferred read-only edge(s)",
996            summary.edge_count
997        ));
998        if let Some(range) = &summary.confidence_range {
999            lines.push(format!("- Confidence: {:.3}..{:.3}", range.min, range.max));
1000        }
1001    }
1002
1003    if !input.degradation_details.is_empty() {
1004        lines.push(String::new());
1005        lines.push("Degradation".to_string());
1006        for detail in input.degradation_details {
1007            lines.push(format!("- {}: {}", detail.input, detail.detail));
1008        }
1009    }
1010
1011    lines.join("\n")
1012}
1013
1014fn append_hotspot_section(
1015    lines: &mut Vec<String>,
1016    title: &str,
1017    hotspots: &[GraphHotspot],
1018    top_n: usize,
1019) {
1020    if hotspots.is_empty() {
1021        return;
1022    }
1023    lines.push(String::new());
1024    lines.push(title.to_string());
1025    for hotspot in hotspots.iter().take(top_n) {
1026        lines.push(format!(
1027            "- {} ({}, degree {})",
1028            hotspot.name, hotspot.node_type, hotspot.degree
1029        ));
1030    }
1031}
1032
1033fn append_target_section(
1034    lines: &mut Vec<String>,
1035    title: &str,
1036    targets: &[TargetFrequency],
1037    top_n: usize,
1038) {
1039    if targets.is_empty() {
1040        return;
1041    }
1042    lines.push(String::new());
1043    lines.push(title.to_string());
1044    for target in targets.iter().take(top_n) {
1045        lines.push(format!("- {} ({})", target.name, target.count));
1046    }
1047}
1048
1049fn named_counts_inline(counts: &BTreeMap<String, usize>) -> String {
1050    counts
1051        .iter()
1052        .map(|(name, count)| format!("{name}={count}"))
1053        .collect::<Vec<_>>()
1054        .join(", ")
1055}
1056
1057fn sort_hotspots(hotspots: &mut [GraphHotspot]) {
1058    hotspots.sort_by(|left, right| {
1059        right
1060            .degree
1061            .cmp(&left.degree)
1062            .then_with(|| left.name.cmp(&right.name))
1063            .then_with(|| left.id.cmp(&right.id))
1064    });
1065}
1066
1067fn is_symbol_node(node_type: &str) -> bool {
1068    !matches!(node_type, "file" | "module" | "unresolved" | "external")
1069}
1070
1071fn inferred_bridge_metadata(mut metadata: ProjectionMetadata) -> ProjectionMetadata {
1072    metadata.provenance = ProjectionProvenance::Inferred;
1073    metadata
1074}
1075
1076fn row_string(row: &Row, keys: &[&str]) -> Option<String> {
1077    keys.iter()
1078        .find_map(|key| row.get(*key).and_then(Value::as_str))
1079        .filter(|value| !value.is_empty())
1080        .map(ToOwned::to_owned)
1081}
1082
1083fn row_usize(row: &Row, keys: &[&str]) -> Option<usize> {
1084    keys.iter()
1085        .find_map(|key| row.get(*key))
1086        .and_then(|value| {
1087            value
1088                .as_u64()
1089                .or_else(|| value.as_i64().and_then(|value| value.try_into().ok()))
1090        })
1091        .map(|value| value as usize)
1092}
1093
1094fn row_f64(row: &Row, keys: &[&str]) -> Option<f64> {
1095    keys.iter()
1096        .find_map(|key| row.get(*key))
1097        .and_then(Value::as_f64)
1098}
1099
1100fn now_iso8601() -> String {
1101    let dur = SystemTime::now()
1102        .duration_since(UNIX_EPOCH)
1103        .unwrap_or_default();
1104    let secs = dur.as_secs();
1105    let micros = dur.subsec_micros();
1106
1107    let (year, month, day) = days_to_ymd(secs / 86400);
1108    let daytime = secs % 86400;
1109    let hour = daytime / 3600;
1110    let minute = (daytime % 3600) / 60;
1111    let second = daytime % 60;
1112
1113    format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{micros:06}+00:00")
1114}
1115
1116fn days_to_ymd(days: u64) -> (u64, u64, u64) {
1117    let z = days as i64 + 719468;
1118    let era = if z >= 0 { z } else { z - 146096 } / 146097;
1119    let doe = (z - era * 146097) as u64;
1120    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
1121    let y = yoe as i64 + era * 400;
1122    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1123    let mp = (5 * doy + 2) / 153;
1124    let day = doy - (153 * mp + 2) / 5 + 1;
1125    let month = if mp < 10 { mp + 3 } else { mp - 9 };
1126    let year = if month <= 2 { y + 1 } else { y };
1127    (year as u64, month, day)
1128}
1129
1130#[cfg(test)]
1131mod tests {
1132    use super::*;
1133    use crate::config::{CodeVectorSettings, Context};
1134    use crate::models::{ProjectionMetadata, ProjectionProvenance};
1135    use std::path::PathBuf;
1136
1137    #[test]
1138    fn report_shape() {
1139        let snapshot = ReportGraphSnapshot {
1140            nodes: vec![
1141                ReportNode::new("src/lib.rs", "src/lib.rs", "file"),
1142                ReportNode::new("mod:api", "api", "module"),
1143                ReportNode::new("sym:handler", "handler", "function").with_file_path("src/lib.rs"),
1144                ReportNode::new("sym:parse", "parse", "function").with_file_path("src/lib.rs"),
1145                ReportNode::new("unresolved:do_work", "do_work", "unresolved"),
1146                ReportNode::new("external:serde_json", "serde_json", "external"),
1147            ],
1148            code_edges: vec![
1149                ReportCodeEdge::new("src/lib.rs", "sym:handler", "DEFINES"),
1150                ReportCodeEdge::new("src/lib.rs", "mod:api", "IMPORTS"),
1151                ReportCodeEdge::new("sym:handler", "sym:parse", "CALLS"),
1152                ReportCodeEdge::new("sym:parse", "unresolved:do_work", "CALLS"),
1153                ReportCodeEdge::new("sym:handler", "external:serde_json", "CALLS"),
1154            ],
1155            bridge_edges: BridgeEdgeInput::available(vec![BridgeEdgeHypothesis::inferred(
1156                "memory-1",
1157                "sym:handler",
1158                RELATES_TO_CODE,
1159                "gobby-memory",
1160                Some(0.72),
1161            )]),
1162            ..ReportGraphSnapshot::default()
1163        };
1164
1165        let report = generate_report_from_snapshot("project-1", "2026-05-28T00:00:00Z", snapshot);
1166        let json = serde_json::to_value(&report).expect("report serializes");
1167
1168        assert_eq!(json["project_id"], "project-1");
1169        assert_eq!(json["summary"]["node_count"], 6);
1170        assert_eq!(json["summary"]["edge_count"], 5);
1171        assert_eq!(json["summary"]["code_edge_counts"]["CALLS"], 3);
1172        assert_eq!(json["hotspots"]["high_degree_files"][0]["id"], "src/lib.rs");
1173        assert_eq!(
1174            json["hotspots"]["incoming_call_hotspots"][0]["id"],
1175            "sym:parse"
1176        );
1177        assert_eq!(json["unresolved_targets"][0]["name"], "do_work");
1178        assert_eq!(json["external_targets"][0]["name"], "serde_json");
1179        assert_eq!(json["bridge_summary"]["relation"], RELATES_TO_CODE);
1180        assert_eq!(json["bridge_summary"]["confidence_range"]["min"], 0.72);
1181        assert!(json["markdown"].as_str().unwrap().contains("project-1"));
1182        assert!(
1183            !json["suggested_investigation_questions"]
1184                .as_array()
1185                .unwrap()
1186                .is_empty()
1187        );
1188    }
1189
1190    #[test]
1191    fn bridge_edges_are_read_only() {
1192        let edge = BridgeEdgeHypothesis::new(
1193            "memory-1",
1194            "symbol-1",
1195            RELATES_TO_CODE,
1196            ProjectionMetadata::gcode_extracted(),
1197        );
1198
1199        assert!(edge.read_only);
1200        assert_eq!(edge.label, "inferred hypothesis");
1201        assert_eq!(edge.metadata.provenance, ProjectionProvenance::Inferred);
1202
1203        let snapshot = ReportGraphSnapshot {
1204            nodes: vec![ReportNode::new("symbol-1", "handler", "function")],
1205            code_edges: vec![],
1206            bridge_edges: BridgeEdgeInput::available(vec![edge]),
1207            ..ReportGraphSnapshot::default()
1208        };
1209        let report = generate_report_from_snapshot("project-1", "2026-05-28T00:00:00Z", snapshot);
1210        let json = serde_json::to_value(&report).expect("report serializes");
1211
1212        assert_eq!(json["bridge_edges"][0]["read_only"], true);
1213        assert_eq!(
1214            json["bridge_edges"][0]["metadata"]["provenance"],
1215            "INFERRED"
1216        );
1217    }
1218
1219    #[test]
1220    fn report_degradation_contract() {
1221        let ctx = Context {
1222            database_url: "postgresql://localhost/unavailable".to_string(),
1223            project_root: PathBuf::from("/tmp/project"),
1224            project_id: "project-1".to_string(),
1225            quiet: true,
1226            falkordb: None,
1227            qdrant: None,
1228            embedding: None,
1229            code_vectors: CodeVectorSettings::default(),
1230            daemon_url: None,
1231        };
1232        let err = generate_report(&ctx).expect_err("missing graph service is required");
1233        assert_eq!(err, ProjectGraphReportError::GraphServiceNotConfigured);
1234
1235        let report = generate_report_from_snapshot(
1236            "project-1",
1237            "2026-05-28T00:00:00Z",
1238            ReportGraphSnapshot {
1239                nodes: vec![ReportNode::new("symbol-1", "handler", "function")],
1240                code_edges: vec![],
1241                bridge_edges: BridgeEdgeInput::unavailable("bridge edge query timed out"),
1242                ..ReportGraphSnapshot::default()
1243            },
1244        );
1245
1246        assert_eq!(report.summary.node_count, 1);
1247        assert_eq!(report.degradation_details.len(), 1);
1248        assert_eq!(report.degradation_details[0].input, RELATES_TO_CODE);
1249        assert!(!report.degradation_details[0].required);
1250    }
1251
1252    #[test]
1253    fn bridge_edges_are_hypotheses() {
1254        let edge = BridgeEdgeHypothesis::inferred(
1255            "memory-1",
1256            "symbol-1",
1257            RELATES_TO_CODE,
1258            "gobby-memory",
1259            Some(0.72),
1260        );
1261
1262        assert_eq!(edge.label, "inferred hypothesis");
1263        assert_eq!(edge.metadata.provenance, ProjectionProvenance::Inferred);
1264        assert!(edge.metadata.is_hypothesis());
1265
1266        let mut report = empty_report("project-1");
1267        report.bridge_edges.push(edge);
1268
1269        let json = serde_json::to_value(&report).expect("report serializes");
1270        assert_eq!(json["bridge_edges"][0]["label"], "inferred hypothesis");
1271        assert_eq!(
1272            json["bridge_edges"][0]["metadata"]["provenance"],
1273            "INFERRED"
1274        );
1275    }
1276}