Skip to main content

agentic_codebase/graph/
code_graph.rs

1//! Core graph structure holding code units and edges.
2//!
3//! The `CodeGraph` is the central data structure. It stores all code units
4//! (nodes) and edges (relationships) and provides O(1) lookup by ID.
5
6use std::collections::{HashMap, HashSet};
7
8use crate::types::{
9    AcbError, AcbResult, CodeUnit, CodeUnitType, Edge, EdgeType, Language, MAX_EDGES_PER_UNIT,
10};
11
12/// The core in-memory graph of code units and their relationships.
13///
14/// Units are stored by their sequential ID. Edges are indexed by source ID
15/// for efficient forward traversal.
16#[derive(Debug, Clone)]
17pub struct CodeGraph {
18    /// All code units, indexed by ID.
19    units: Vec<CodeUnit>,
20
21    /// All edges.
22    edges: Vec<Edge>,
23
24    /// Edges indexed by source unit ID.
25    edges_by_source: HashMap<u64, Vec<usize>>,
26
27    /// Edges indexed by target unit ID (reverse index).
28    edges_by_target: HashMap<u64, Vec<usize>>,
29
30    /// Feature vector dimensionality.
31    dimension: usize,
32
33    /// Set of languages present in the graph.
34    languages: HashSet<Language>,
35}
36
37impl CodeGraph {
38    /// Create a new empty code graph with the given feature vector dimension.
39    pub fn new(dimension: usize) -> Self {
40        Self {
41            units: Vec::new(),
42            edges: Vec::new(),
43            edges_by_source: HashMap::new(),
44            edges_by_target: HashMap::new(),
45            dimension,
46            languages: HashSet::new(),
47        }
48    }
49
50    /// Create with default dimension (256).
51    pub fn with_default_dimension() -> Self {
52        Self::new(crate::types::DEFAULT_DIMENSION)
53    }
54
55    /// Add a code unit to the graph, assigning it a sequential ID.
56    ///
57    /// Returns the assigned ID.
58    pub fn add_unit(&mut self, mut unit: CodeUnit) -> u64 {
59        let id = self.units.len() as u64;
60        unit.id = id;
61        self.languages.insert(unit.language);
62        self.units.push(unit);
63        id
64    }
65
66    /// Add an edge between two code units.
67    ///
68    /// # Errors
69    ///
70    /// - `AcbError::SelfEdge` if source and target are the same.
71    /// - `AcbError::UnitNotFound` if source or target don't exist.
72    /// - `AcbError::TooManyEdges` if the source unit already has too many edges.
73    pub fn add_edge(&mut self, edge: Edge) -> AcbResult<()> {
74        // Validate: no self-edges
75        if edge.source_id == edge.target_id {
76            return Err(AcbError::SelfEdge(edge.source_id));
77        }
78
79        // Validate: source exists
80        if edge.source_id >= self.units.len() as u64 {
81            return Err(AcbError::UnitNotFound(edge.source_id));
82        }
83
84        // Validate: target exists
85        if edge.target_id >= self.units.len() as u64 {
86            return Err(AcbError::InvalidEdgeTarget(edge.target_id));
87        }
88
89        // Validate: not too many edges from source
90        let source_edge_count = self
91            .edges_by_source
92            .get(&edge.source_id)
93            .map(|v| v.len() as u32)
94            .unwrap_or(0);
95        if source_edge_count >= MAX_EDGES_PER_UNIT {
96            return Err(AcbError::TooManyEdges(source_edge_count));
97        }
98
99        let idx = self.edges.len();
100        self.edges_by_source
101            .entry(edge.source_id)
102            .or_default()
103            .push(idx);
104        self.edges_by_target
105            .entry(edge.target_id)
106            .or_default()
107            .push(idx);
108        self.edges.push(edge);
109
110        Ok(())
111    }
112
113    /// Returns the number of code units.
114    pub fn unit_count(&self) -> usize {
115        self.units.len()
116    }
117
118    /// Returns the number of edges.
119    pub fn edge_count(&self) -> usize {
120        self.edges.len()
121    }
122
123    /// Returns the feature vector dimension.
124    pub fn dimension(&self) -> usize {
125        self.dimension
126    }
127
128    /// Returns the set of languages in the graph.
129    pub fn languages(&self) -> &HashSet<Language> {
130        &self.languages
131    }
132
133    /// Get a code unit by ID.
134    pub fn get_unit(&self, id: u64) -> Option<&CodeUnit> {
135        self.units.get(id as usize)
136    }
137
138    /// Get a mutable reference to a code unit by ID.
139    pub fn get_unit_mut(&mut self, id: u64) -> Option<&mut CodeUnit> {
140        self.units.get_mut(id as usize)
141    }
142
143    /// Iterate over all code units.
144    pub fn units(&self) -> &[CodeUnit] {
145        &self.units
146    }
147
148    /// Iterate over all edges.
149    pub fn edges(&self) -> &[Edge] {
150        &self.edges
151    }
152
153    /// Get all edges originating from the given unit.
154    pub fn edges_from(&self, source_id: u64) -> Vec<&Edge> {
155        self.edges_by_source
156            .get(&source_id)
157            .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
158            .unwrap_or_default()
159    }
160
161    /// Get all edges targeting the given unit.
162    pub fn edges_to(&self, target_id: u64) -> Vec<&Edge> {
163        self.edges_by_target
164            .get(&target_id)
165            .map(|indices| indices.iter().map(|&i| &self.edges[i]).collect())
166            .unwrap_or_default()
167    }
168
169    /// Get edges from a source filtered by edge type.
170    pub fn edges_from_of_type(&self, source_id: u64, edge_type: EdgeType) -> Vec<&Edge> {
171        self.edges_from(source_id)
172            .into_iter()
173            .filter(|e| e.edge_type == edge_type)
174            .collect()
175    }
176
177    /// Get edges to a target filtered by edge type.
178    pub fn edges_to_of_type(&self, target_id: u64, edge_type: EdgeType) -> Vec<&Edge> {
179        self.edges_to(target_id)
180            .into_iter()
181            .filter(|e| e.edge_type == edge_type)
182            .collect()
183    }
184
185    /// Find units by name (case-insensitive prefix match).
186    pub fn find_units_by_name(&self, prefix: &str) -> Vec<&CodeUnit> {
187        let prefix_lower = prefix.to_lowercase();
188        self.units
189            .iter()
190            .filter(|u| u.name.to_lowercase().starts_with(&prefix_lower))
191            .collect()
192    }
193
194    /// Find units by exact name.
195    pub fn find_units_by_exact_name(&self, name: &str) -> Vec<&CodeUnit> {
196        self.units.iter().filter(|u| u.name == name).collect()
197    }
198
199    /// Find units by type.
200    pub fn find_units_by_type(&self, unit_type: CodeUnitType) -> Vec<&CodeUnit> {
201        self.units
202            .iter()
203            .filter(|u| u.unit_type == unit_type)
204            .collect()
205    }
206
207    /// Find units by language.
208    pub fn find_units_by_language(&self, language: Language) -> Vec<&CodeUnit> {
209        self.units
210            .iter()
211            .filter(|u| u.language == language)
212            .collect()
213    }
214
215    /// Find units in a specific file.
216    pub fn find_units_by_path(&self, path: &std::path::Path) -> Vec<&CodeUnit> {
217        self.units.iter().filter(|u| u.file_path == path).collect()
218    }
219
220    /// Check if an edge between two units of a given type already exists.
221    pub fn has_edge(&self, source_id: u64, target_id: u64, edge_type: EdgeType) -> bool {
222        self.edges_from(source_id)
223            .iter()
224            .any(|e| e.target_id == target_id && e.edge_type == edge_type)
225    }
226
227    /// Get summary statistics about the graph.
228    pub fn stats(&self) -> GraphStats {
229        let mut type_counts: HashMap<CodeUnitType, usize> = HashMap::new();
230        let mut edge_type_counts: HashMap<EdgeType, usize> = HashMap::new();
231        let mut lang_counts: HashMap<Language, usize> = HashMap::new();
232
233        for unit in &self.units {
234            *type_counts.entry(unit.unit_type).or_default() += 1;
235            *lang_counts.entry(unit.language).or_default() += 1;
236        }
237        for edge in &self.edges {
238            *edge_type_counts.entry(edge.edge_type).or_default() += 1;
239        }
240
241        GraphStats {
242            unit_count: self.units.len(),
243            edge_count: self.edges.len(),
244            dimension: self.dimension,
245            type_counts,
246            edge_type_counts,
247            language_counts: lang_counts,
248        }
249    }
250}
251
252impl Default for CodeGraph {
253    fn default() -> Self {
254        Self::with_default_dimension()
255    }
256}
257
258/// Summary statistics about a code graph.
259#[derive(Debug, Clone)]
260pub struct GraphStats {
261    /// Total number of code units.
262    pub unit_count: usize,
263    /// Total number of edges.
264    pub edge_count: usize,
265    /// Feature vector dimension.
266    pub dimension: usize,
267    /// Count of units per type.
268    pub type_counts: HashMap<CodeUnitType, usize>,
269    /// Count of edges per type.
270    pub edge_type_counts: HashMap<EdgeType, usize>,
271    /// Count of units per language.
272    pub language_counts: HashMap<Language, usize>,
273}