Skip to main content

grapha_core/
normalize.rs

1use std::collections::HashMap;
2
3use crate::graph::{Edge, EdgeKind, FlowDirection, Graph, Node, Visibility};
4
5pub fn normalize_graph(mut graph: Graph) -> Graph {
6    fn visibility_rank(visibility: &Visibility) -> u8 {
7        match visibility {
8            Visibility::Private => 0,
9            Visibility::Crate => 1,
10            Visibility::Public => 2,
11        }
12    }
13
14    fn merge_node(existing: &mut Node, incoming: Node) {
15        if visibility_rank(&incoming.visibility) > visibility_rank(&existing.visibility) {
16            existing.visibility = incoming.visibility;
17        }
18        if existing.role.is_none() {
19            existing.role = incoming.role;
20        }
21        if existing.signature.is_none() {
22            existing.signature = incoming.signature;
23        }
24        if existing.doc_comment.is_none() {
25            existing.doc_comment = incoming.doc_comment;
26        }
27        if existing.module.is_none() {
28            existing.module = incoming.module;
29        }
30        for (key, value) in incoming.metadata {
31            existing.metadata.entry(key).or_insert(value);
32        }
33    }
34
35    let mut node_index = HashMap::new();
36    let mut normalized_nodes = Vec::with_capacity(graph.nodes.len());
37    for node in graph.nodes {
38        if let Some(existing_index) = node_index.get(&node.id).copied() {
39            merge_node(&mut normalized_nodes[existing_index], node);
40        } else {
41            node_index.insert(node.id.clone(), normalized_nodes.len());
42            normalized_nodes.push(node);
43        }
44    }
45
46    let mut edge_index = HashMap::new();
47    let mut normalized_edges = Vec::with_capacity(graph.edges.len());
48    for edge in graph.edges {
49        let fingerprint = edge_fingerprint(&edge);
50        if let Some(existing_index) = edge_index.get(&fingerprint).copied() {
51            let existing: &mut Edge = &mut normalized_edges[existing_index];
52            existing.confidence = existing.confidence.max(edge.confidence);
53            for provenance in edge.provenance {
54                if !existing
55                    .provenance
56                    .iter()
57                    .any(|current| current == &provenance)
58                {
59                    existing.provenance.push(provenance);
60                }
61            }
62        } else {
63            edge_index.insert(fingerprint, normalized_edges.len());
64            normalized_edges.push(edge);
65        }
66    }
67
68    graph.nodes = normalized_nodes;
69    graph.edges = normalized_edges;
70    graph
71}
72
73pub fn edge_fingerprint(edge: &Edge) -> String {
74    let mut hasher = Fnv1a64::default();
75    hasher.write_component(&edge.source);
76    hasher.write_component(&edge.target);
77    hasher.write_component(edge_kind_tag(edge.kind));
78    hasher.write_component(direction_tag(edge.direction.as_ref()));
79    hasher.write_component(edge.operation.as_deref().unwrap_or(""));
80    hasher.write_component(edge.condition.as_deref().unwrap_or(""));
81    hasher.write_component(bool_tag(edge.async_boundary));
82    // Fast hex encoding without format! allocation overhead
83    let hash = hasher.finish();
84    let mut buf = [0u8; 16];
85    let bytes = hash.to_be_bytes();
86    const HEX: &[u8; 16] = b"0123456789abcdef";
87    for (i, &b) in bytes.iter().enumerate() {
88        buf[i * 2] = HEX[(b >> 4) as usize];
89        buf[i * 2 + 1] = HEX[(b & 0xf) as usize];
90    }
91    // SAFETY: buf only contains ASCII hex chars
92    unsafe { String::from_utf8_unchecked(buf.to_vec()) }
93}
94
95fn edge_kind_tag(kind: EdgeKind) -> &'static str {
96    match kind {
97        EdgeKind::Calls => "calls",
98        EdgeKind::Uses => "uses",
99        EdgeKind::Implements => "implements",
100        EdgeKind::Contains => "contains",
101        EdgeKind::TypeRef => "type_ref",
102        EdgeKind::Inherits => "inherits",
103        EdgeKind::Reads => "reads",
104        EdgeKind::Writes => "writes",
105        EdgeKind::Publishes => "publishes",
106        EdgeKind::Subscribes => "subscribes",
107    }
108}
109
110fn direction_tag(direction: Option<&FlowDirection>) -> &'static str {
111    match direction {
112        Some(FlowDirection::Read) => "read",
113        Some(FlowDirection::Write) => "write",
114        Some(FlowDirection::ReadWrite) => "read_write",
115        Some(FlowDirection::Pure) => "pure",
116        None => "",
117    }
118}
119
120fn bool_tag(value: Option<bool>) -> &'static str {
121    match value {
122        Some(true) => "1",
123        Some(false) => "0",
124        None => "",
125    }
126}
127
128#[derive(Default)]
129struct Fnv1a64 {
130    state: u64,
131}
132
133impl Fnv1a64 {
134    const OFFSET_BASIS: u64 = 0xcbf29ce484222325;
135    const PRIME: u64 = 0x100000001b3;
136
137    fn write_component(&mut self, value: &str) {
138        if self.state == 0 {
139            self.state = Self::OFFSET_BASIS;
140        }
141        for byte in value.as_bytes() {
142            self.state ^= u64::from(*byte);
143            self.state = self.state.wrapping_mul(Self::PRIME);
144        }
145        self.state ^= u64::from(0xff_u8);
146        self.state = self.state.wrapping_mul(Self::PRIME);
147    }
148
149    fn finish(self) -> u64 {
150        if self.state == 0 {
151            Self::OFFSET_BASIS
152        } else {
153            self.state
154        }
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use crate::graph::{EdgeKind, EdgeProvenance, NodeKind, NodeRole, Span, TerminalKind};
162    use std::collections::HashMap;
163    use std::path::PathBuf;
164
165    #[test]
166    fn normalize_graph_merges_duplicate_edges_and_provenance() {
167        let graph = Graph {
168            version: "0.1.0".to_string(),
169            nodes: vec![],
170            edges: vec![
171                Edge {
172                    source: "a".to_string(),
173                    target: "b".to_string(),
174                    kind: EdgeKind::Calls,
175                    confidence: 0.4,
176                    direction: None,
177                    operation: None,
178                    condition: None,
179                    async_boundary: None,
180                    provenance: vec![EdgeProvenance {
181                        file: PathBuf::from("a.swift"),
182                        span: Span {
183                            start: [1, 0],
184                            end: [1, 4],
185                        },
186                        symbol_id: "a".to_string(),
187                    }],
188                },
189                Edge {
190                    source: "a".to_string(),
191                    target: "b".to_string(),
192                    kind: EdgeKind::Calls,
193                    confidence: 0.9,
194                    direction: None,
195                    operation: None,
196                    condition: None,
197                    async_boundary: None,
198                    provenance: vec![EdgeProvenance {
199                        file: PathBuf::from("a.swift"),
200                        span: Span {
201                            start: [2, 0],
202                            end: [2, 4],
203                        },
204                        symbol_id: "a".to_string(),
205                    }],
206                },
207            ],
208        };
209
210        let normalized = normalize_graph(graph);
211        assert_eq!(normalized.edges.len(), 1);
212        assert_eq!(normalized.edges[0].confidence, 0.9);
213        assert_eq!(normalized.edges[0].provenance.len(), 2);
214    }
215
216    #[test]
217    fn normalize_graph_merges_duplicate_nodes_by_id() {
218        let graph = Graph {
219            version: "0.1.0".to_string(),
220            nodes: vec![
221                Node {
222                    id: "s:RoomPage.centerContentView".to_string(),
223                    kind: NodeKind::Property,
224                    name: "centerContentView".to_string(),
225                    file: PathBuf::from("RoomPage.swift"),
226                    span: Span {
227                        start: [0, 0],
228                        end: [0, 0],
229                    },
230                    visibility: Visibility::Private,
231                    metadata: HashMap::new(),
232                    role: None,
233                    signature: None,
234                    doc_comment: None,
235                    module: None,
236                    snippet: None,
237                },
238                Node {
239                    id: "s:RoomPage.centerContentView".to_string(),
240                    kind: NodeKind::Property,
241                    name: "centerContentView".to_string(),
242                    file: PathBuf::from("RoomPage.swift"),
243                    span: Span {
244                        start: [10, 4],
245                        end: [10, 20],
246                    },
247                    visibility: Visibility::Public,
248                    metadata: HashMap::new(),
249                    role: Some(NodeRole::EntryPoint),
250                    signature: Some("var centerContentView: some View".to_string()),
251                    doc_comment: Some("helper".to_string()),
252                    module: Some("Room".to_string()),
253                    snippet: None,
254                },
255            ],
256            edges: vec![],
257        };
258
259        let normalized = normalize_graph(graph);
260        assert_eq!(normalized.nodes.len(), 1);
261        assert_eq!(normalized.nodes[0].visibility, Visibility::Public);
262        assert_eq!(normalized.nodes[0].role, Some(NodeRole::EntryPoint));
263        assert_eq!(
264            normalized.nodes[0].signature.as_deref(),
265            Some("var centerContentView: some View")
266        );
267        assert_eq!(normalized.nodes[0].doc_comment.as_deref(), Some("helper"));
268        assert_eq!(normalized.nodes[0].module.as_deref(), Some("Room"));
269    }
270
271    #[test]
272    fn fingerprint_changes_when_effect_shape_changes() {
273        let base = Edge {
274            source: "a".to_string(),
275            target: "b".to_string(),
276            kind: EdgeKind::Calls,
277            confidence: 1.0,
278            direction: None,
279            operation: None,
280            condition: None,
281            async_boundary: None,
282            provenance: Vec::new(),
283        };
284        let mut changed = base.clone();
285        changed.direction = Some(FlowDirection::Read);
286
287        assert_ne!(edge_fingerprint(&base), edge_fingerprint(&changed));
288    }
289
290    #[test]
291    fn fingerprint_ignores_confidence_and_provenance() {
292        let base = Edge {
293            source: "a".to_string(),
294            target: "b".to_string(),
295            kind: EdgeKind::Calls,
296            confidence: 0.2,
297            direction: Some(FlowDirection::Read),
298            operation: Some("HTTP".to_string()),
299            condition: None,
300            async_boundary: None,
301            provenance: Vec::new(),
302        };
303        let mut changed = base.clone();
304        changed.confidence = 0.9;
305        changed.provenance = vec![EdgeProvenance {
306            file: PathBuf::from("a.swift"),
307            span: Span {
308                start: [1, 0],
309                end: [1, 2],
310            },
311            symbol_id: "a".to_string(),
312        }];
313
314        assert_eq!(edge_fingerprint(&base), edge_fingerprint(&changed));
315    }
316
317    #[test]
318    fn terminal_role_is_preserved_by_normalization() {
319        let graph = Graph {
320            version: "0.1.0".to_string(),
321            nodes: vec![Node {
322                id: "terminal".to_string(),
323                kind: NodeKind::Function,
324                name: "terminal".to_string(),
325                file: PathBuf::from("main.rs"),
326                span: Span {
327                    start: [0, 0],
328                    end: [1, 0],
329                },
330                visibility: Visibility::Public,
331                metadata: HashMap::new(),
332                role: Some(NodeRole::Terminal {
333                    kind: TerminalKind::Network,
334                }),
335                signature: None,
336                doc_comment: None,
337                module: None,
338                snippet: None,
339            }],
340            edges: vec![],
341        };
342
343        let normalized = normalize_graph(graph);
344        assert_eq!(
345            normalized.nodes[0].role,
346            Some(NodeRole::Terminal {
347                kind: TerminalKind::Network,
348            })
349        );
350    }
351}