Skip to main content

graphy_core/
graph.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use petgraph::stable_graph::{NodeIndex, StableGraph};
5use petgraph::visit::EdgeRef;
6use petgraph::Direction;
7use serde::{Deserialize, Serialize};
8use tracing::warn;
9
10use crate::gir::{EdgeKind, GirEdge, GirNode, NodeKind, ParseOutput};
11use crate::symbol_id::SymbolId;
12
13/// Serializable representation of the graph (nodes + edges as flat vecs).
14#[derive(Serialize, Deserialize)]
15struct SerializableGraph {
16    nodes: Vec<GirNode>,
17    edges: Vec<(SymbolId, SymbolId, GirEdge)>,
18}
19
20/// The central code knowledge graph.
21///
22/// Wraps a petgraph StableGraph with indexes for fast lookups by ID, name, and file.
23pub struct CodeGraph {
24    pub graph: StableGraph<GirNode, GirEdge>,
25    id_index: HashMap<SymbolId, NodeIndex>,
26    name_index: HashMap<String, Vec<NodeIndex>>,
27    file_index: HashMap<PathBuf, Vec<NodeIndex>>,
28    kind_index: HashMap<NodeKind, Vec<NodeIndex>>,
29}
30
31impl std::fmt::Debug for CodeGraph {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        f.debug_struct("CodeGraph")
34            .field("nodes", &self.graph.node_count())
35            .field("edges", &self.graph.edge_count())
36            .finish()
37    }
38}
39
40impl Default for CodeGraph {
41    fn default() -> Self {
42        Self::new()
43    }
44}
45
46impl Serialize for CodeGraph {
47    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
48        let nodes: Vec<GirNode> = self.graph.node_weights().cloned().collect();
49        let edges: Vec<(SymbolId, SymbolId, GirEdge)> = self
50            .graph
51            .edge_indices()
52            .filter_map(|ei| {
53                let (src_idx, tgt_idx) = self.graph.edge_endpoints(ei)?;
54                let src_node = self.graph.node_weight(src_idx)?;
55                let tgt_node = self.graph.node_weight(tgt_idx)?;
56                let edge = self.graph.edge_weight(ei)?;
57                Some((src_node.id, tgt_node.id, edge.clone()))
58            })
59            .collect();
60
61        let sg = SerializableGraph { nodes, edges };
62        sg.serialize(serializer)
63    }
64}
65
66impl<'de> Deserialize<'de> for CodeGraph {
67    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
68        let sg = SerializableGraph::deserialize(deserializer)?;
69        let mut graph = CodeGraph::new();
70        for node in sg.nodes {
71            graph.add_node(node);
72        }
73        let mut dropped_edges = 0u64;
74        for (src, tgt, edge) in sg.edges {
75            if !graph.add_edge(src, tgt, edge) {
76                dropped_edges += 1;
77            }
78        }
79        if dropped_edges > 0 {
80            warn!(
81                "Deserialization: dropped {} edges referencing missing nodes",
82                dropped_edges
83            );
84        }
85        Ok(graph)
86    }
87}
88
89impl CodeGraph {
90    pub fn new() -> Self {
91        Self {
92            graph: StableGraph::new(),
93            id_index: HashMap::new(),
94            name_index: HashMap::new(),
95            file_index: HashMap::new(),
96            kind_index: HashMap::new(),
97        }
98    }
99
100    // ── Mutation ─────────────────────────────────────────
101
102    pub fn add_node(&mut self, node: GirNode) -> NodeIndex {
103        let id = node.id;
104        let name = node.name.clone();
105        let file = node.file_path.clone();
106        let kind = node.kind;
107
108        // Deduplicate: if a node with this ID already exists, update it.
109        // Clean up stale secondary index entries if name/kind/file changed.
110        if let Some(&existing) = self.id_index.get(&id) {
111            if let Some(old) = self.graph.node_weight(existing) {
112                let old_name = old.name.clone();
113                let old_kind = old.kind;
114                let old_file = old.file_path.clone();
115
116                // Update secondary indexes if values changed
117                if old_name != name {
118                    if let Some(names) = self.name_index.get_mut(&old_name) {
119                        names.retain(|i| *i != existing);
120                        if names.is_empty() {
121                            self.name_index.remove(&old_name);
122                        }
123                    }
124                    self.name_index.entry(name).or_default().push(existing);
125                }
126                if old_kind != kind {
127                    if let Some(kinds) = self.kind_index.get_mut(&old_kind) {
128                        kinds.retain(|i| *i != existing);
129                        if kinds.is_empty() {
130                            self.kind_index.remove(&old_kind);
131                        }
132                    }
133                    self.kind_index.entry(kind).or_default().push(existing);
134                }
135                if old_file != file {
136                    if let Some(files) = self.file_index.get_mut(&old_file) {
137                        files.retain(|i| *i != existing);
138                        if files.is_empty() {
139                            self.file_index.remove(&old_file);
140                        }
141                    }
142                    self.file_index.entry(file).or_default().push(existing);
143                }
144            }
145            self.graph[existing] = node;
146            return existing;
147        }
148
149        let idx = self.graph.add_node(node);
150        self.id_index.insert(id, idx);
151        self.name_index.entry(name).or_default().push(idx);
152        self.file_index.entry(file).or_default().push(idx);
153        self.kind_index.entry(kind).or_default().push(idx);
154        idx
155    }
156
157    pub fn add_edge(&mut self, source: SymbolId, target: SymbolId, edge: GirEdge) -> bool {
158        let (Some(&src_idx), Some(&tgt_idx)) =
159            (self.id_index.get(&source), self.id_index.get(&target))
160        else {
161            return false;
162        };
163        // Deduplicate: skip if an edge of the same kind already exists between these nodes
164        let dominated = self
165            .graph
166            .edges_connecting(src_idx, tgt_idx)
167            .any(|e| e.weight().kind == edge.kind);
168        if dominated {
169            return true;
170        }
171        self.graph.add_edge(src_idx, tgt_idx, edge);
172        true
173    }
174
175    /// Merge a ParseOutput into the graph.
176    pub fn merge(&mut self, output: ParseOutput) {
177        for node in output.nodes {
178            self.add_node(node);
179        }
180        for (src, tgt, edge) in output.edges {
181            self.add_edge(src, tgt, edge);
182        }
183    }
184
185    /// Remove all nodes belonging to a file (for incremental re-indexing).
186    ///
187    /// petgraph's `StableGraph::remove_node()` also removes all edges
188    /// incident to the node, so no dangling edges are left behind.
189    pub fn remove_file(&mut self, path: &Path) {
190        let indices: Vec<NodeIndex> = self.file_index.remove(path).unwrap_or_default();
191
192        for idx in &indices {
193            if let Some(node) = self.graph.node_weight(*idx) {
194                let id = node.id;
195                let name = node.name.clone();
196                let kind = node.kind;
197
198                self.id_index.remove(&id);
199                if let Some(names) = self.name_index.get_mut(&name) {
200                    names.retain(|i| i != idx);
201                    if names.is_empty() {
202                        self.name_index.remove(&name);
203                    }
204                }
205                if let Some(kinds) = self.kind_index.get_mut(&kind) {
206                    kinds.retain(|i| i != idx);
207                    if kinds.is_empty() {
208                        self.kind_index.remove(&kind);
209                    }
210                }
211            }
212            // This also removes all edges incident to the node.
213            self.graph.remove_node(*idx);
214        }
215
216        // Note: we intentionally do NOT shrink_to_fit() here.
217        // In watch mode, remove_file is immediately followed by merge (re-add),
218        // so shrinking would cause unnecessary reallocation churn.
219    }
220
221    /// Validate graph invariants. Returns a list of issues found.
222    ///
223    /// Checks that all indexes point to valid nodes and that no stale
224    /// entries exist. Useful for debugging and post-load verification.
225    pub fn validate(&self) -> Vec<String> {
226        let mut errors = Vec::new();
227
228        // Check id_index entries point to valid nodes with matching IDs
229        for (&id, &idx) in &self.id_index {
230            match self.graph.node_weight(idx) {
231                Some(node) => {
232                    if node.id != id {
233                        errors.push(format!(
234                            "id_index mismatch: key={} but node.id={}",
235                            id, node.id
236                        ));
237                    }
238                }
239                None => {
240                    errors.push(format!("id_index stale: {} -> {:?} (no node)", id, idx));
241                }
242            }
243        }
244
245        // Check name_index entries point to valid nodes
246        for (name, indices) in &self.name_index {
247            for idx in indices {
248                if self.graph.node_weight(*idx).is_none() {
249                    errors.push(format!(
250                        "name_index stale: '{}' -> {:?} (no node)",
251                        name, idx
252                    ));
253                }
254            }
255        }
256
257        // Check file_index entries point to valid nodes
258        for (path, indices) in &self.file_index {
259            for idx in indices {
260                if self.graph.node_weight(*idx).is_none() {
261                    errors.push(format!(
262                        "file_index stale: '{}' -> {:?} (no node)",
263                        path.display(),
264                        idx
265                    ));
266                }
267            }
268        }
269
270        // Check kind_index entries point to valid nodes
271        for (kind, indices) in &self.kind_index {
272            for idx in indices {
273                if self.graph.node_weight(*idx).is_none() {
274                    errors.push(format!(
275                        "kind_index stale: {:?} -> {:?} (no node)",
276                        kind, idx
277                    ));
278                }
279            }
280        }
281
282        // Check all graph nodes are in the id_index
283        for idx in self.graph.node_indices() {
284            if let Some(node) = self.graph.node_weight(idx) {
285                if !self.id_index.contains_key(&node.id) {
286                    errors.push(format!(
287                        "node {} not in id_index (idx={:?})",
288                        node.id, idx
289                    ));
290                }
291            }
292        }
293
294        errors
295    }
296
297    // ── Queries ─────────────────────────────────────────
298
299    pub fn node_count(&self) -> usize {
300        self.graph.node_count()
301    }
302
303    pub fn edge_count(&self) -> usize {
304        self.graph.edge_count()
305    }
306
307    pub fn get_node(&self, id: SymbolId) -> Option<&GirNode> {
308        self.id_index
309            .get(&id)
310            .and_then(|idx| self.graph.node_weight(*idx))
311    }
312
313    pub fn get_node_index(&self, id: SymbolId) -> Option<NodeIndex> {
314        self.id_index.get(&id).copied()
315    }
316
317    pub fn find_by_name(&self, name: &str) -> Vec<&GirNode> {
318        self.name_index
319            .get(name)
320            .map(|indices| {
321                indices
322                    .iter()
323                    .filter_map(|idx| self.graph.node_weight(*idx))
324                    .collect()
325            })
326            .unwrap_or_default()
327    }
328
329    pub fn find_by_file(&self, path: &Path) -> Vec<&GirNode> {
330        self.file_index
331            .get(path)
332            .map(|indices| {
333                indices
334                    .iter()
335                    .filter_map(|idx| self.graph.node_weight(*idx))
336                    .collect()
337            })
338            .unwrap_or_default()
339    }
340
341    pub fn find_by_kind(&self, kind: NodeKind) -> Vec<&GirNode> {
342        self.kind_index
343            .get(&kind)
344            .map(|indices| {
345                indices
346                    .iter()
347                    .filter_map(|idx| self.graph.node_weight(*idx))
348                    .collect()
349            })
350            .unwrap_or_default()
351    }
352
353    /// Get all nodes connected by outgoing edges of a specific kind.
354    pub fn outgoing(&self, id: SymbolId, edge_kind: EdgeKind) -> Vec<&GirNode> {
355        let Some(&idx) = self.id_index.get(&id) else {
356            return vec![];
357        };
358        self.graph
359            .edges_directed(idx, Direction::Outgoing)
360            .filter(|e| e.weight().kind == edge_kind)
361            .filter_map(|e| self.graph.node_weight(e.target()))
362            .collect()
363    }
364
365    /// Get all nodes connected by incoming edges of a specific kind.
366    pub fn incoming(&self, id: SymbolId, edge_kind: EdgeKind) -> Vec<&GirNode> {
367        let Some(&idx) = self.id_index.get(&id) else {
368            return vec![];
369        };
370        self.graph
371            .edges_directed(idx, Direction::Incoming)
372            .filter(|e| e.weight().kind == edge_kind)
373            .filter_map(|e| self.graph.node_weight(e.source()))
374            .collect()
375    }
376
377    /// Get callees of a function/method.
378    pub fn callees(&self, id: SymbolId) -> Vec<&GirNode> {
379        self.outgoing(id, EdgeKind::Calls)
380    }
381
382    /// Get callers of a function/method.
383    pub fn callers(&self, id: SymbolId) -> Vec<&GirNode> {
384        self.incoming(id, EdgeKind::Calls)
385    }
386
387    /// Get children (contained symbols) of a node.
388    pub fn children(&self, id: SymbolId) -> Vec<&GirNode> {
389        self.outgoing(id, EdgeKind::Contains)
390    }
391
392    /// Get the parent (container) of a node.
393    pub fn parent(&self, id: SymbolId) -> Option<&GirNode> {
394        self.incoming(id, EdgeKind::Contains).into_iter().next()
395    }
396
397    /// Check if a node is a "phantom" call target — i.e. created by the parser
398    /// to represent a call expression, not an actual symbol definition.
399    /// Phantom nodes have no parent (no incoming Contains edge) and are not
400    /// top-level File/Folder/Module nodes.
401    pub fn is_phantom(&self, id: SymbolId) -> bool {
402        let Some(&idx) = self.id_index.get(&id) else {
403            return false;
404        };
405        let Some(node) = self.graph.node_weight(idx) else {
406            return false;
407        };
408        // File, Folder, Module nodes are legitimate roots without parents
409        if matches!(node.kind, NodeKind::File | NodeKind::Folder | NodeKind::Module) {
410            return false;
411        }
412        self.parent(id).is_none()
413    }
414
415    /// Check if a callable node is an interface/trait implementation method.
416    /// Works across all languages: checks whether the method's parent type
417    /// has any Implements or Inherits edges, meaning its methods may be
418    /// called via dynamic dispatch and shouldn't be flagged as dead.
419    pub fn is_interface_impl(&self, id: SymbolId) -> bool {
420        let Some(parent) = self.parent(id) else {
421            return false;
422        };
423        let parent_id = parent.id;
424        !self.outgoing(parent_id, EdgeKind::Implements).is_empty()
425            || !self.outgoing(parent_id, EdgeKind::Inherits).is_empty()
426    }
427
428    /// Structural check: is this a method on a type that is actively used?
429    ///
430    /// Methods called via `self.method()` / `this.method()` are filtered at
431    /// parse time to avoid noise, so they have no incoming `Calls` edges.
432    /// Instead of hardcoding self/this dispatch, we use a structural signal:
433    /// if the parent type (struct/class/enum) has incoming edges (it's
434    /// constructed or referenced), its private methods are likely alive
435    /// via self-dispatch.
436    pub fn is_method_on_used_type(&self, id: SymbolId) -> bool {
437        let Some(parent) = self.parent(id) else {
438            return false;
439        };
440        if !matches!(
441            parent.kind,
442            NodeKind::Struct | NodeKind::Class | NodeKind::Enum | NodeKind::Trait
443        ) {
444            return false;
445        }
446        let Some(idx) = self.get_node_index(parent.id) else {
447            return false;
448        };
449        self.graph
450            .edges_directed(idx, Direction::Incoming)
451            .any(|e| e.weight().kind != EdgeKind::Contains)
452    }
453
454    /// Structural check: does this symbol (or a parent module) have any
455    /// decorator / attribute annotations?
456    ///
457    /// This is a language-agnostic heuristic for framework-registered symbols
458    /// (tests, route handlers, event listeners, DI-managed beans, etc.).
459    /// Instead of hardcoding per-language name prefixes like `test_`, we rely
460    /// on the graph structure: if the symbol (or an ancestor module) carries
461    /// an `AnnotatedWith` edge to a `Decorator` node, some framework knows
462    /// about it and it should not be considered dead code.
463    pub fn is_decorated(&self, id: SymbolId) -> bool {
464        // Direct decorator on this symbol
465        if !self.outgoing(id, EdgeKind::AnnotatedWith).is_empty() {
466            return true;
467        }
468
469        // Propagate through parent Module / File nodes.
470        // A function inside `#[cfg(test)] mod tests { ... }` inherits
471        // the module's decorator status, but a method inside a struct
472        // with `#[derive(Debug)]` does NOT — derive is metadata, not
473        // a framework registration for the struct's methods.
474        if let Some(parent) = self.parent(id) {
475            if matches!(parent.kind, NodeKind::Module | NodeKind::File) {
476                return self.is_decorated(parent.id);
477            }
478        }
479        false
480    }
481
482    /// Remove orphaned phantom nodes — phantoms with no incoming edges at all.
483    /// Keeps phantom nodes that are call targets (have incoming Calls edges)
484    /// since they represent useful callee information in context views.
485    pub fn remove_phantom_nodes(&mut self) -> usize {
486        let phantoms: Vec<(SymbolId, NodeIndex)> = self
487            .id_index
488            .iter()
489            .filter_map(|(&id, &idx)| {
490                if !self.is_phantom(id) {
491                    return None;
492                }
493                // Keep phantom nodes that have incoming edges (they're referenced)
494                let has_incoming = self
495                    .graph
496                    .edges_directed(idx, Direction::Incoming)
497                    .next()
498                    .is_some();
499                if has_incoming {
500                    return None;
501                }
502                Some((id, idx))
503            })
504            .collect();
505
506        let count = phantoms.len();
507        for (id, idx) in &phantoms {
508            if let Some(node) = self.graph.node_weight(*idx) {
509                let name = node.name.clone();
510                let kind = node.kind;
511                let file = node.file_path.clone();
512                if let Some(names) = self.name_index.get_mut(&name) {
513                    names.retain(|i| i != idx);
514                    if names.is_empty() {
515                        self.name_index.remove(&name);
516                    }
517                }
518                if let Some(kinds) = self.kind_index.get_mut(&kind) {
519                    kinds.retain(|i| i != idx);
520                    if kinds.is_empty() {
521                        self.kind_index.remove(&kind);
522                    }
523                }
524                if let Some(files) = self.file_index.get_mut(&file) {
525                    files.retain(|i| i != idx);
526                    if files.is_empty() {
527                        self.file_index.remove(&file);
528                    }
529                }
530            }
531            self.id_index.remove(id);
532            self.graph.remove_node(*idx);
533        }
534        count
535    }
536
537    /// All indexed file paths.
538    pub fn indexed_files(&self) -> Vec<&PathBuf> {
539        self.file_index.keys().collect()
540    }
541
542    /// Iterator over all nodes.
543    pub fn all_nodes(&self) -> impl Iterator<Item = &GirNode> {
544        self.graph.node_weights()
545    }
546
547    /// Mutable iterator over all nodes.
548    pub fn all_nodes_mut(&mut self) -> impl Iterator<Item = &mut GirNode> {
549        self.graph.node_weights_mut()
550    }
551
552    /// Remove all edges of a given kind from the entire graph.
553    /// Used to clear analysis-phase edges before re-running analysis.
554    pub fn remove_edges_by_kind(&mut self, kind: EdgeKind) -> usize {
555        let to_remove: Vec<_> = self
556            .graph
557            .edge_indices()
558            .filter(|&ei| {
559                self.graph
560                    .edge_weight(ei)
561                    .map_or(false, |e| e.kind == kind)
562            })
563            .collect();
564        let count = to_remove.len();
565        for ei in to_remove {
566            self.graph.remove_edge(ei);
567        }
568        count
569    }
570
571    /// Remove all edges of any of the given kinds from the entire graph.
572    pub fn remove_edges_by_kinds(&mut self, kinds: &[EdgeKind]) -> usize {
573        let to_remove: Vec<_> = self
574            .graph
575            .edge_indices()
576            .filter(|&ei| {
577                self.graph
578                    .edge_weight(ei)
579                    .map_or(false, |e| kinds.contains(&e.kind))
580            })
581            .collect();
582        let count = to_remove.len();
583        for ei in to_remove {
584            self.graph.remove_edge(ei);
585        }
586        count
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593    use crate::gir::{Language, Span};
594
595    fn make_node(name: &str, kind: NodeKind, line: u32) -> GirNode {
596        GirNode::new(
597            name.to_string(),
598            kind,
599            PathBuf::from("test.py"),
600            Span::new(line, 0, line + 5, 0),
601            Language::Python,
602        )
603    }
604
605    #[test]
606    fn add_and_query_nodes() {
607        let mut g = CodeGraph::new();
608        let func = make_node("my_func", NodeKind::Function, 1);
609        let id = func.id;
610        g.add_node(func);
611
612        assert_eq!(g.node_count(), 1);
613        assert!(g.get_node(id).is_some());
614        assert_eq!(g.find_by_name("my_func").len(), 1);
615    }
616
617    #[test]
618    fn add_edges_and_query() {
619        let mut g = CodeGraph::new();
620        let caller = make_node("caller", NodeKind::Function, 1);
621        let callee = make_node("callee", NodeKind::Function, 10);
622        let caller_id = caller.id;
623        let callee_id = callee.id;
624
625        g.add_node(caller);
626        g.add_node(callee);
627        g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Calls));
628
629        assert_eq!(g.callees(caller_id).len(), 1);
630        assert_eq!(g.callers(callee_id).len(), 1);
631        assert_eq!(g.callees(caller_id)[0].name, "callee");
632    }
633
634    #[test]
635    fn deduplication() {
636        let mut g = CodeGraph::new();
637        let n1 = make_node("foo", NodeKind::Function, 1);
638        let n2 = make_node("foo", NodeKind::Function, 1);
639        g.add_node(n1);
640        g.add_node(n2);
641        assert_eq!(g.node_count(), 1);
642    }
643
644    #[test]
645    fn remove_file() {
646        let mut g = CodeGraph::new();
647        let n1 = make_node("foo", NodeKind::Function, 1);
648        let n2 = make_node("bar", NodeKind::Function, 10);
649        g.add_node(n1);
650        g.add_node(n2);
651        assert_eq!(g.node_count(), 2);
652
653        g.remove_file(Path::new("test.py"));
654        assert_eq!(g.node_count(), 0);
655    }
656
657    #[test]
658    fn remove_file_cleans_edges() {
659        let mut g = CodeGraph::new();
660        let caller = make_node("caller", NodeKind::Function, 1);
661        let callee = GirNode::new(
662            "callee".to_string(),
663            NodeKind::Function,
664            PathBuf::from("other.py"),
665            Span::new(1, 0, 6, 0),
666            Language::Python,
667        );
668        let caller_id = caller.id;
669        let callee_id = callee.id;
670        g.add_node(caller);
671        g.add_node(callee);
672        g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Calls));
673        assert_eq!(g.edge_count(), 1);
674
675        // Remove file containing caller — edge should be cleaned up
676        g.remove_file(Path::new("test.py"));
677        assert_eq!(g.edge_count(), 0);
678        assert_eq!(g.node_count(), 1); // callee in other.py survives
679        assert!(g.validate().is_empty());
680    }
681
682    #[test]
683    fn validate_clean_graph() {
684        let mut g = CodeGraph::new();
685        let n = make_node("foo", NodeKind::Function, 1);
686        g.add_node(n);
687        assert!(g.validate().is_empty());
688    }
689
690    #[test]
691    fn edge_deduplication() {
692        let mut g = CodeGraph::new();
693        let caller = make_node("caller", NodeKind::Function, 1);
694        let callee = make_node("callee", NodeKind::Function, 10);
695        let caller_id = caller.id;
696        let callee_id = callee.id;
697        g.add_node(caller);
698        g.add_node(callee);
699
700        // First edge should succeed
701        assert!(g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Calls)));
702        assert_eq!(g.edge_count(), 1);
703
704        // Same edge kind between same nodes should be deduped
705        assert!(g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Calls)));
706        assert_eq!(g.edge_count(), 1); // Still 1, not 2
707
708        // Different edge kind between same nodes should be allowed
709        assert!(g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Contains)));
710        assert_eq!(g.edge_count(), 2);
711    }
712
713    #[test]
714    fn remove_edges_by_kind() {
715        let mut g = CodeGraph::new();
716        let a = make_node("a", NodeKind::Function, 1);
717        let b = make_node("b", NodeKind::Function, 10);
718        let c = make_node("c", NodeKind::Class, 20);
719        let a_id = a.id;
720        let b_id = b.id;
721        let c_id = c.id;
722        g.add_node(a);
723        g.add_node(b);
724        g.add_node(c);
725
726        g.add_edge(a_id, b_id, GirEdge::new(EdgeKind::Calls));
727        g.add_edge(a_id, c_id, GirEdge::new(EdgeKind::Imports));
728        g.add_edge(b_id, c_id, GirEdge::new(EdgeKind::Calls));
729        assert_eq!(g.edge_count(), 3);
730
731        // Remove only Calls edges
732        let removed = g.remove_edges_by_kind(EdgeKind::Calls);
733        assert_eq!(removed, 2);
734        assert_eq!(g.edge_count(), 1); // Only Imports edge remains
735    }
736
737    #[test]
738    fn serialization_round_trip() {
739        let mut g = CodeGraph::new();
740        let caller = make_node("caller", NodeKind::Function, 1);
741        let callee = make_node("callee", NodeKind::Function, 10);
742        let caller_id = caller.id;
743        let callee_id = callee.id;
744        g.add_node(caller);
745        g.add_node(callee);
746        g.add_edge(caller_id, callee_id, GirEdge::new(EdgeKind::Calls));
747
748        let bytes = bincode::serialize(&g).unwrap();
749        let g2: CodeGraph = bincode::deserialize(&bytes).unwrap();
750
751        assert_eq!(g2.node_count(), 2);
752        assert_eq!(g2.edge_count(), 1);
753        assert_eq!(g2.callees(caller_id).len(), 1);
754        assert!(g2.validate().is_empty());
755    }
756
757    // ── Edge case tests ───────────────────────────────────
758
759    #[test]
760    fn empty_graph() {
761        let g = CodeGraph::new();
762        assert_eq!(g.node_count(), 0);
763        assert_eq!(g.edge_count(), 0);
764        assert!(g.validate().is_empty());
765        assert!(g.find_by_name("anything").is_empty());
766        assert!(g.find_by_file(Path::new("anything.rs")).is_empty());
767        assert!(g.find_by_kind(NodeKind::Function).is_empty());
768        assert!(g.indexed_files().is_empty());
769    }
770
771    #[test]
772    fn add_edge_missing_nodes() {
773        let mut g = CodeGraph::new();
774        let fake_id1 = SymbolId::new(Path::new("a.py"), "x", NodeKind::Function, 1);
775        let fake_id2 = SymbolId::new(Path::new("a.py"), "y", NodeKind::Function, 2);
776        // Should return false when nodes don't exist
777        assert!(!g.add_edge(fake_id1, fake_id2, GirEdge::new(EdgeKind::Calls)));
778        assert_eq!(g.edge_count(), 0);
779    }
780
781    #[test]
782    fn add_edge_one_node_missing() {
783        let mut g = CodeGraph::new();
784        let node = make_node("exists", NodeKind::Function, 1);
785        let id = node.id;
786        g.add_node(node);
787        let fake_id = SymbolId::new(Path::new("a.py"), "ghost", NodeKind::Function, 99);
788        assert!(!g.add_edge(id, fake_id, GirEdge::new(EdgeKind::Calls)));
789        assert!(!g.add_edge(fake_id, id, GirEdge::new(EdgeKind::Calls)));
790    }
791
792    #[test]
793    fn find_by_kind_query() {
794        let mut g = CodeGraph::new();
795        g.add_node(make_node("f1", NodeKind::Function, 1));
796        g.add_node(make_node("f2", NodeKind::Function, 10));
797        g.add_node(make_node("C", NodeKind::Class, 20));
798        assert_eq!(g.find_by_kind(NodeKind::Function).len(), 2);
799        assert_eq!(g.find_by_kind(NodeKind::Class).len(), 1);
800        assert_eq!(g.find_by_kind(NodeKind::Method).len(), 0);
801    }
802
803    #[test]
804    fn get_nonexistent_node() {
805        let g = CodeGraph::new();
806        let fake_id = SymbolId::new(Path::new("a.py"), "x", NodeKind::Function, 1);
807        assert!(g.get_node(fake_id).is_none());
808        assert!(g.get_node_index(fake_id).is_none());
809    }
810
811    #[test]
812    fn outgoing_incoming_nonexistent_node() {
813        let g = CodeGraph::new();
814        let fake_id = SymbolId::new(Path::new("a.py"), "x", NodeKind::Function, 1);
815        assert!(g.outgoing(fake_id, EdgeKind::Calls).is_empty());
816        assert!(g.incoming(fake_id, EdgeKind::Calls).is_empty());
817        assert!(g.callees(fake_id).is_empty());
818        assert!(g.callers(fake_id).is_empty());
819        assert!(g.children(fake_id).is_empty());
820        assert!(g.parent(fake_id).is_none());
821    }
822
823    #[test]
824    fn phantom_detection() {
825        let mut g = CodeGraph::new();
826        // File node — NOT phantom (top-level)
827        let file = GirNode::new(
828            "test.py".to_string(),
829            NodeKind::File,
830            PathBuf::from("test.py"),
831            Span::new(0, 0, 100, 0),
832            Language::Python,
833        );
834        let file_id = file.id;
835        g.add_node(file);
836        assert!(!g.is_phantom(file_id));
837
838        // Function with parent (Contains edge) — NOT phantom
839        let func = make_node("real_func", NodeKind::Function, 5);
840        let func_id = func.id;
841        g.add_node(func);
842        g.add_edge(file_id, func_id, GirEdge::new(EdgeKind::Contains));
843        assert!(!g.is_phantom(func_id));
844
845        // Function WITHOUT parent — IS phantom
846        let phantom = make_node("phantom_call", NodeKind::Function, 99);
847        let phantom_id = phantom.id;
848        g.add_node(phantom);
849        assert!(g.is_phantom(phantom_id));
850    }
851
852    #[test]
853    fn remove_phantom_nodes_cleans_indexes() {
854        let mut g = CodeGraph::new();
855        let file = GirNode::new(
856            "test.py".to_string(),
857            NodeKind::File,
858            PathBuf::from("test.py"),
859            Span::new(0, 0, 100, 0),
860            Language::Python,
861        );
862        let file_id = file.id;
863        g.add_node(file);
864
865        let real = make_node("real", NodeKind::Function, 1);
866        let real_id = real.id;
867        g.add_node(real);
868        g.add_edge(file_id, real_id, GirEdge::new(EdgeKind::Contains));
869
870        // Orphan phantom (no incoming edges at all)
871        let orphan = make_node("orphan", NodeKind::Function, 50);
872        g.add_node(orphan);
873
874        // Referenced phantom (has incoming Calls edge — should be kept)
875        let referenced = make_node("referenced", NodeKind::Function, 60);
876        let ref_id = referenced.id;
877        g.add_node(referenced);
878        g.add_edge(real_id, ref_id, GirEdge::new(EdgeKind::Calls));
879
880        assert_eq!(g.node_count(), 4);
881        let removed = g.remove_phantom_nodes();
882        assert_eq!(removed, 1); // Only orphan removed
883        assert_eq!(g.node_count(), 3);
884        assert!(g.validate().is_empty()); // All indexes consistent
885    }
886
887    #[test]
888    fn remove_file_nonexistent() {
889        let mut g = CodeGraph::new();
890        g.add_node(make_node("f", NodeKind::Function, 1));
891        g.remove_file(Path::new("nonexistent.py"));
892        assert_eq!(g.node_count(), 1); // unchanged
893        assert!(g.validate().is_empty());
894    }
895
896    #[test]
897    fn merge_parse_output() {
898        let mut g = CodeGraph::new();
899        let mut po = ParseOutput::new();
900        let n1 = make_node("a", NodeKind::Function, 1);
901        let n2 = make_node("b", NodeKind::Function, 10);
902        let id1 = n1.id;
903        let id2 = n2.id;
904        po.add_node(n1);
905        po.add_node(n2);
906        po.add_edge(id1, id2, GirEdge::new(EdgeKind::Calls));
907        g.merge(po);
908        assert_eq!(g.node_count(), 2);
909        assert_eq!(g.edge_count(), 1);
910        assert!(g.validate().is_empty());
911    }
912
913    #[test]
914    fn add_node_update_changes_name() {
915        let mut g = CodeGraph::new();
916        let mut n = make_node("old_name", NodeKind::Function, 1);
917        let id = n.id;
918        g.add_node(n.clone());
919        assert_eq!(g.find_by_name("old_name").len(), 1);
920
921        // Re-add with same id but different name
922        n.name = "new_name".to_string();
923        g.add_node(n);
924        assert_eq!(g.node_count(), 1); // Still 1 node
925        assert_eq!(g.find_by_name("old_name").len(), 0); // Old name cleaned
926        assert_eq!(g.find_by_name("new_name").len(), 1); // New name indexed
927        assert_eq!(g.get_node(id).unwrap().name, "new_name");
928        assert!(g.validate().is_empty());
929    }
930
931    #[test]
932    fn self_edge() {
933        let mut g = CodeGraph::new();
934        let func = make_node("recursive", NodeKind::Function, 1);
935        let id = func.id;
936        g.add_node(func);
937        assert!(g.add_edge(id, id, GirEdge::new(EdgeKind::Calls)));
938        assert_eq!(g.edge_count(), 1);
939        assert_eq!(g.callees(id).len(), 1);
940        assert_eq!(g.callers(id).len(), 1);
941    }
942
943    #[test]
944    fn remove_edges_by_kinds_multiple() {
945        let mut g = CodeGraph::new();
946        let a = make_node("a", NodeKind::Function, 1);
947        let b = make_node("b", NodeKind::Function, 10);
948        let a_id = a.id;
949        let b_id = b.id;
950        g.add_node(a);
951        g.add_node(b);
952        g.add_edge(a_id, b_id, GirEdge::new(EdgeKind::Calls));
953        g.add_edge(a_id, b_id, GirEdge::new(EdgeKind::Imports));
954        g.add_edge(a_id, b_id, GirEdge::new(EdgeKind::Contains));
955        assert_eq!(g.edge_count(), 3);
956
957        let removed = g.remove_edges_by_kinds(&[EdgeKind::Calls, EdgeKind::Imports]);
958        assert_eq!(removed, 2);
959        assert_eq!(g.edge_count(), 1); // Only Contains remains
960    }
961
962    #[test]
963    fn is_interface_impl_check() {
964        let mut g = CodeGraph::new();
965        let file = GirNode::new(
966            "test.py".to_string(),
967            NodeKind::File,
968            PathBuf::from("test.py"),
969            Span::new(0, 0, 100, 0),
970            Language::Python,
971        );
972        let file_id = file.id;
973        g.add_node(file);
974
975        let cls = make_node("MyClass", NodeKind::Class, 1);
976        let cls_id = cls.id;
977        g.add_node(cls);
978        g.add_edge(file_id, cls_id, GirEdge::new(EdgeKind::Contains));
979
980        let method = make_node("my_method", NodeKind::Method, 5);
981        let method_id = method.id;
982        g.add_node(method);
983        g.add_edge(cls_id, method_id, GirEdge::new(EdgeKind::Contains));
984
985        // No implements/inherits → not an interface impl
986        assert!(!g.is_interface_impl(method_id));
987
988        // Add Implements edge to class
989        let iface = make_node("MyInterface", NodeKind::Interface, 50);
990        let iface_id = iface.id;
991        g.add_node(iface);
992        g.add_edge(cls_id, iface_id, GirEdge::new(EdgeKind::Implements));
993
994        // Now method IS on an implementing type
995        assert!(g.is_interface_impl(method_id));
996    }
997
998    #[test]
999    fn serialization_round_trip_empty_graph() {
1000        let g = CodeGraph::new();
1001        let bytes = bincode::serialize(&g).unwrap();
1002        let g2: CodeGraph = bincode::deserialize(&bytes).unwrap();
1003        assert_eq!(g2.node_count(), 0);
1004        assert_eq!(g2.edge_count(), 0);
1005        assert!(g2.validate().is_empty());
1006    }
1007
1008    #[test]
1009    fn serialization_drops_edges_with_missing_nodes() {
1010        // Manually craft a SerializableGraph with dangling edge
1011        let node = make_node("only_node", NodeKind::Function, 1);
1012        let fake_id = SymbolId::new(Path::new("x.py"), "ghost", NodeKind::Function, 99);
1013        let sg = serde_json::json!({
1014            "nodes": [serde_json::to_value(&node).unwrap()],
1015            "edges": [[
1016                serde_json::to_value(node.id).unwrap(),
1017                serde_json::to_value(fake_id).unwrap(),
1018                serde_json::to_value(GirEdge::new(EdgeKind::Calls)).unwrap()
1019            ]]
1020        });
1021        let g: CodeGraph = serde_json::from_value(sg).unwrap();
1022        assert_eq!(g.node_count(), 1);
1023        assert_eq!(g.edge_count(), 0); // dangling edge dropped
1024        assert!(g.validate().is_empty());
1025    }
1026
1027    #[test]
1028    fn indexed_files_returns_correct_paths() {
1029        let mut g = CodeGraph::new();
1030        g.add_node(make_node("f1", NodeKind::Function, 1)); // test.py
1031        let other = GirNode::new(
1032            "f2".to_string(),
1033            NodeKind::Function,
1034            PathBuf::from("other.py"),
1035            Span::new(1, 0, 6, 0),
1036            Language::Python,
1037        );
1038        g.add_node(other);
1039        let files = g.indexed_files();
1040        assert_eq!(files.len(), 2);
1041        let paths: Vec<&Path> = files.iter().map(|p| p.as_path()).collect();
1042        assert!(paths.contains(&Path::new("test.py")));
1043        assert!(paths.contains(&Path::new("other.py")));
1044    }
1045}