Skip to main content

grapha_core/
normalize.rs

1use std::collections::HashMap;
2
3use crate::graph::{Edge, EdgeKind, FlowDirection, Graph, Node, NodeKind, Visibility};
4
5pub fn normalize_graph(mut graph: Graph) -> Graph {
6    fn visibility_rank(visibility: &Visibility) -> u8 {
7        match visibility {
8            Visibility::Private => 0,
9            Visibility::Crate => 1,
10            Visibility::Public => 2,
11        }
12    }
13
14    fn merged_kind(existing: NodeKind, incoming: NodeKind) -> NodeKind {
15        match (existing, incoming) {
16            (NodeKind::Struct, NodeKind::Class) => NodeKind::Class,
17            _ => existing,
18        }
19    }
20
21    fn merge_node(existing: &mut Node, incoming: Node) {
22        existing.kind = merged_kind(existing.kind, incoming.kind);
23        if visibility_rank(&incoming.visibility) > visibility_rank(&existing.visibility) {
24            existing.visibility = incoming.visibility;
25        }
26        if existing.role.is_none() {
27            existing.role = incoming.role;
28        }
29        if existing.signature.is_none() {
30            existing.signature = incoming.signature;
31        }
32        if existing.doc_comment.is_none() {
33            existing.doc_comment = incoming.doc_comment;
34        }
35        if existing.module.is_none() {
36            existing.module = incoming.module;
37        }
38        for (key, value) in incoming.metadata {
39            existing.metadata.entry(key).or_insert(value);
40        }
41    }
42
43    let mut node_index = HashMap::new();
44    let mut normalized_nodes = Vec::with_capacity(graph.nodes.len());
45    for node in graph.nodes {
46        if let Some(existing_index) = node_index.get(&node.id).copied() {
47            merge_node(&mut normalized_nodes[existing_index], node);
48        } else {
49            node_index.insert(node.id.clone(), normalized_nodes.len());
50            normalized_nodes.push(node);
51        }
52    }
53
54    let mut edge_index = HashMap::new();
55    let mut normalized_edges = Vec::with_capacity(graph.edges.len());
56    for edge in graph.edges {
57        let fingerprint = edge_fingerprint(&edge);
58        if let Some(existing_index) = edge_index.get(&fingerprint).copied() {
59            let existing: &mut Edge = &mut normalized_edges[existing_index];
60            existing.confidence = existing.confidence.max(edge.confidence);
61            for provenance in edge.provenance {
62                if !existing
63                    .provenance
64                    .iter()
65                    .any(|current| current == &provenance)
66                {
67                    existing.provenance.push(provenance);
68                }
69            }
70        } else {
71            edge_index.insert(fingerprint, normalized_edges.len());
72            normalized_edges.push(edge);
73        }
74    }
75
76    graph.nodes = normalized_nodes;
77    graph.edges = normalized_edges;
78    graph
79}
80
81pub fn edge_fingerprint(edge: &Edge) -> String {
82    let mut hasher = Fnv1a64::default();
83    hasher.write_component(&edge.source);
84    hasher.write_component(&edge.target);
85    hasher.write_component(edge_kind_tag(edge.kind));
86    hasher.write_component(direction_tag(edge.direction.as_ref()));
87    hasher.write_component(edge.operation.as_deref().unwrap_or(""));
88    hasher.write_component(edge.condition.as_deref().unwrap_or(""));
89    hasher.write_component(bool_tag(edge.async_boundary));
90    // Fast hex encoding without format! allocation overhead
91    let hash = hasher.finish();
92    let mut buf = [0u8; 16];
93    let bytes = hash.to_be_bytes();
94    const HEX: &[u8; 16] = b"0123456789abcdef";
95    for (i, &b) in bytes.iter().enumerate() {
96        buf[i * 2] = HEX[(b >> 4) as usize];
97        buf[i * 2 + 1] = HEX[(b & 0xf) as usize];
98    }
99    // SAFETY: buf only contains ASCII hex chars
100    unsafe { String::from_utf8_unchecked(buf.to_vec()) }
101}
102
103fn edge_kind_tag(kind: EdgeKind) -> &'static str {
104    match kind {
105        EdgeKind::Calls => "calls",
106        EdgeKind::Uses => "uses",
107        EdgeKind::Implements => "implements",
108        EdgeKind::Contains => "contains",
109        EdgeKind::TypeRef => "type_ref",
110        EdgeKind::Inherits => "inherits",
111        EdgeKind::Reads => "reads",
112        EdgeKind::Writes => "writes",
113        EdgeKind::Publishes => "publishes",
114        EdgeKind::Subscribes => "subscribes",
115    }
116}
117
118fn direction_tag(direction: Option<&FlowDirection>) -> &'static str {
119    match direction {
120        Some(FlowDirection::Read) => "read",
121        Some(FlowDirection::Write) => "write",
122        Some(FlowDirection::ReadWrite) => "read_write",
123        Some(FlowDirection::Pure) => "pure",
124        None => "",
125    }
126}
127
128fn bool_tag(value: Option<bool>) -> &'static str {
129    match value {
130        Some(true) => "1",
131        Some(false) => "0",
132        None => "",
133    }
134}
135
136#[derive(Default)]
137struct Fnv1a64 {
138    state: u64,
139}
140
141impl Fnv1a64 {
142    const OFFSET_BASIS: u64 = 0xcbf29ce484222325;
143    const PRIME: u64 = 0x100000001b3;
144
145    fn write_component(&mut self, value: &str) {
146        if self.state == 0 {
147            self.state = Self::OFFSET_BASIS;
148        }
149        for byte in value.as_bytes() {
150            self.state ^= u64::from(*byte);
151            self.state = self.state.wrapping_mul(Self::PRIME);
152        }
153        self.state ^= u64::from(0xff_u8);
154        self.state = self.state.wrapping_mul(Self::PRIME);
155    }
156
157    fn finish(self) -> u64 {
158        if self.state == 0 {
159            Self::OFFSET_BASIS
160        } else {
161            self.state
162        }
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use crate::graph::{EdgeKind, EdgeProvenance, NodeKind, NodeRole, Span, TerminalKind};
170    use std::collections::HashMap;
171    use std::path::PathBuf;
172
173    #[test]
174    fn normalize_graph_merges_duplicate_edges_and_provenance() {
175        let graph = Graph {
176            version: "0.1.0".to_string(),
177            nodes: vec![],
178            edges: vec![
179                Edge {
180                    source: "a".to_string(),
181                    target: "b".to_string(),
182                    kind: EdgeKind::Calls,
183                    confidence: 0.4,
184                    direction: None,
185                    operation: None,
186                    condition: None,
187                    async_boundary: None,
188                    provenance: vec![EdgeProvenance {
189                        file: PathBuf::from("a.swift"),
190                        span: Span {
191                            start: [1, 0],
192                            end: [1, 4],
193                        },
194                        symbol_id: "a".to_string(),
195                    }],
196                },
197                Edge {
198                    source: "a".to_string(),
199                    target: "b".to_string(),
200                    kind: EdgeKind::Calls,
201                    confidence: 0.9,
202                    direction: None,
203                    operation: None,
204                    condition: None,
205                    async_boundary: None,
206                    provenance: vec![EdgeProvenance {
207                        file: PathBuf::from("a.swift"),
208                        span: Span {
209                            start: [2, 0],
210                            end: [2, 4],
211                        },
212                        symbol_id: "a".to_string(),
213                    }],
214                },
215            ],
216        };
217
218        let normalized = normalize_graph(graph);
219        assert_eq!(normalized.edges.len(), 1);
220        assert_eq!(normalized.edges[0].confidence, 0.9);
221        assert_eq!(normalized.edges[0].provenance.len(), 2);
222    }
223
224    #[test]
225    fn normalize_graph_merges_duplicate_nodes_by_id() {
226        let graph = Graph {
227            version: "0.1.0".to_string(),
228            nodes: vec![
229                Node {
230                    id: "s:RoomPage.centerContentView".to_string(),
231                    kind: NodeKind::Property,
232                    name: "centerContentView".to_string(),
233                    file: PathBuf::from("RoomPage.swift"),
234                    span: Span {
235                        start: [0, 0],
236                        end: [0, 0],
237                    },
238                    visibility: Visibility::Private,
239                    metadata: HashMap::new(),
240                    role: None,
241                    signature: None,
242                    doc_comment: None,
243                    module: None,
244                    snippet: None,
245                },
246                Node {
247                    id: "s:RoomPage.centerContentView".to_string(),
248                    kind: NodeKind::Property,
249                    name: "centerContentView".to_string(),
250                    file: PathBuf::from("RoomPage.swift"),
251                    span: Span {
252                        start: [10, 4],
253                        end: [10, 20],
254                    },
255                    visibility: Visibility::Public,
256                    metadata: HashMap::new(),
257                    role: Some(NodeRole::EntryPoint),
258                    signature: Some("var centerContentView: some View".to_string()),
259                    doc_comment: Some("helper".to_string()),
260                    module: Some("Room".to_string()),
261                    snippet: None,
262                },
263            ],
264            edges: vec![],
265        };
266
267        let normalized = normalize_graph(graph);
268        assert_eq!(normalized.nodes.len(), 1);
269        assert_eq!(normalized.nodes[0].visibility, Visibility::Public);
270        assert_eq!(normalized.nodes[0].role, Some(NodeRole::EntryPoint));
271        assert_eq!(
272            normalized.nodes[0].signature.as_deref(),
273            Some("var centerContentView: some View")
274        );
275        assert_eq!(normalized.nodes[0].doc_comment.as_deref(), Some("helper"));
276        assert_eq!(normalized.nodes[0].module.as_deref(), Some("Room"));
277    }
278
279    #[test]
280    fn normalize_graph_prefers_class_over_struct_for_same_symbol() {
281        let graph = Graph {
282            version: "0.1.0".to_string(),
283            nodes: vec![
284                Node {
285                    id: "AppDelegate".to_string(),
286                    kind: NodeKind::Struct,
287                    name: "AppDelegate".to_string(),
288                    file: PathBuf::from("AppDelegate.swift"),
289                    span: Span {
290                        start: [0, 0],
291                        end: [1, 0],
292                    },
293                    visibility: Visibility::Crate,
294                    metadata: HashMap::new(),
295                    role: None,
296                    signature: None,
297                    doc_comment: None,
298                    module: None,
299                    snippet: None,
300                },
301                Node {
302                    id: "AppDelegate".to_string(),
303                    kind: NodeKind::Class,
304                    name: "AppDelegate".to_string(),
305                    file: PathBuf::from("AppDelegate.swift"),
306                    span: Span {
307                        start: [0, 0],
308                        end: [1, 0],
309                    },
310                    visibility: Visibility::Crate,
311                    metadata: HashMap::new(),
312                    role: None,
313                    signature: None,
314                    doc_comment: None,
315                    module: None,
316                    snippet: None,
317                },
318            ],
319            edges: vec![],
320        };
321
322        let normalized = normalize_graph(graph);
323        assert_eq!(normalized.nodes.len(), 1);
324        assert_eq!(normalized.nodes[0].kind, NodeKind::Class);
325    }
326
327    #[test]
328    fn fingerprint_changes_when_effect_shape_changes() {
329        let base = Edge {
330            source: "a".to_string(),
331            target: "b".to_string(),
332            kind: EdgeKind::Calls,
333            confidence: 1.0,
334            direction: None,
335            operation: None,
336            condition: None,
337            async_boundary: None,
338            provenance: Vec::new(),
339        };
340        let mut changed = base.clone();
341        changed.direction = Some(FlowDirection::Read);
342
343        assert_ne!(edge_fingerprint(&base), edge_fingerprint(&changed));
344    }
345
346    #[test]
347    fn fingerprint_ignores_confidence_and_provenance() {
348        let base = Edge {
349            source: "a".to_string(),
350            target: "b".to_string(),
351            kind: EdgeKind::Calls,
352            confidence: 0.2,
353            direction: Some(FlowDirection::Read),
354            operation: Some("HTTP".to_string()),
355            condition: None,
356            async_boundary: None,
357            provenance: Vec::new(),
358        };
359        let mut changed = base.clone();
360        changed.confidence = 0.9;
361        changed.provenance = vec![EdgeProvenance {
362            file: PathBuf::from("a.swift"),
363            span: Span {
364                start: [1, 0],
365                end: [1, 2],
366            },
367            symbol_id: "a".to_string(),
368        }];
369
370        assert_eq!(edge_fingerprint(&base), edge_fingerprint(&changed));
371    }
372
373    #[test]
374    fn terminal_role_is_preserved_by_normalization() {
375        let graph = Graph {
376            version: "0.1.0".to_string(),
377            nodes: vec![Node {
378                id: "terminal".to_string(),
379                kind: NodeKind::Function,
380                name: "terminal".to_string(),
381                file: PathBuf::from("main.rs"),
382                span: Span {
383                    start: [0, 0],
384                    end: [1, 0],
385                },
386                visibility: Visibility::Public,
387                metadata: HashMap::new(),
388                role: Some(NodeRole::Terminal {
389                    kind: TerminalKind::Network,
390                }),
391                signature: None,
392                doc_comment: None,
393                module: None,
394                snippet: None,
395            }],
396            edges: vec![],
397        };
398
399        let normalized = normalize_graph(graph);
400        assert_eq!(
401            normalized.nodes[0].role,
402            Some(NodeRole::Terminal {
403                kind: TerminalKind::Network,
404            })
405        );
406    }
407}