Skip to main content

arbor_core/
node.rs

1//! Code node representation.
2//!
3//! A CodeNode is our abstraction over raw AST nodes. It captures
4//! the semantically meaningful parts of code: what it is, where it lives,
5//! and enough metadata to be useful for graph construction.
6
7use serde::{Deserialize, Serialize};
8use std::hash::{Hash, Hasher};
9
10/// The kind of code entity this node represents.
11///
12/// We intentionally keep this list focused on the entities that matter
13/// for understanding code structure. Helper nodes like expressions
14/// or statements are filtered out during extraction.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
16#[serde(rename_all = "snake_case")]
17pub enum NodeKind {
18    /// A standalone function (not attached to a class).
19    Function,
20    /// A method inside a class or impl block.
21    Method,
22    /// A class definition.
23    Class,
24    /// An interface, protocol, or trait.
25    Interface,
26    /// A struct (Rust, Go).
27    Struct,
28    /// An enum definition.
29    Enum,
30    /// A module-level variable.
31    Variable,
32    /// A constant or static value.
33    Constant,
34    /// A type alias.
35    TypeAlias,
36    /// The file/module itself as a container.
37    Module,
38    /// An import statement.
39    Import,
40    /// An export declaration.
41    Export,
42    /// A constructor (Java, TypeScript class constructors).
43    Constructor,
44    /// A class field.
45    Field,
46    /// A document section or heading (for Markdown knowledge graphs in Lattice).
47    Section,
48}
49
50impl std::fmt::Display for NodeKind {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        let s = match self {
53            Self::Function => "function",
54            Self::Method => "method",
55            Self::Class => "class",
56            Self::Interface => "interface",
57            Self::Struct => "struct",
58            Self::Enum => "enum",
59            Self::Variable => "variable",
60            Self::Constant => "constant",
61            Self::TypeAlias => "type_alias",
62            Self::Module => "module",
63            Self::Import => "import",
64            Self::Export => "export",
65            Self::Constructor => "constructor",
66            Self::Field => "field",
67            Self::Section => "section",
68        };
69        write!(f, "{}", s)
70    }
71}
72
73/// Visibility of a code entity.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
75#[serde(rename_all = "snake_case")]
76pub enum Visibility {
77    #[default]
78    Private,
79    Public,
80    Protected,
81    /// Rust's pub(crate) or similar restricted visibility.
82    Internal,
83}
84
85/// A code entity extracted from source.
86///
87/// This is the core data type that flows through Arbor. It's designed
88/// to be language-agnostic while still capturing the structure we need.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct CodeNode {
91    /// Unique identifier, derived from file path + qualified name + kind.
92    pub id: String,
93
94    /// The simple name (e.g., "validate_user").
95    pub name: String,
96
97    /// Fully qualified name including parent scope (e.g., "UserService.validate_user").
98    pub qualified_name: String,
99
100    /// What kind of entity this is.
101    pub kind: NodeKind,
102
103    /// Path to the source file, relative to project root.
104    pub file: String,
105
106    /// Starting line (1-indexed, like editors show).
107    pub line_start: u32,
108
109    /// Ending line (inclusive).
110    pub line_end: u32,
111
112    /// Column of the name identifier.
113    pub column: u32,
114
115    /// Function/method signature if applicable.
116    pub signature: Option<String>,
117
118    /// Visibility modifier.
119    pub visibility: Visibility,
120
121    /// Whether this is async.
122    pub is_async: bool,
123
124    /// Whether this is static/class-level.
125    pub is_static: bool,
126
127    /// Whether this is exported (TS/ES modules).
128    pub is_exported: bool,
129
130    /// Docstring or leading comment.
131    pub docstring: Option<String>,
132
133    /// Byte offset range in source for incremental updates.
134    pub byte_start: u32,
135    pub byte_end: u32,
136
137    /// Entities this node references (call targets, type refs, etc).
138    /// These are names, not IDs - resolution happens in the graph crate.
139    pub references: Vec<String>,
140}
141
142impl CodeNode {
143    /// Creates a deterministic ID for this node.
144    ///
145    /// The ID is a hash of (file, qualified_name, kind) so the same
146    /// entity always gets the same ID across parses.
147    pub fn compute_id(file: &str, qualified_name: &str, kind: NodeKind) -> String {
148        use std::collections::hash_map::DefaultHasher;
149
150        let mut hasher = DefaultHasher::new();
151        file.hash(&mut hasher);
152        qualified_name.hash(&mut hasher);
153        kind.hash(&mut hasher);
154
155        format!("{:016x}", hasher.finish())
156    }
157
158    /// Creates a new node and automatically computes its ID.
159    pub fn new(
160        name: impl Into<String>,
161        qualified_name: impl Into<String>,
162        kind: NodeKind,
163        file: impl Into<String>,
164    ) -> Self {
165        let name = name.into();
166        let qualified_name = qualified_name.into();
167        let file = file.into();
168        let id = Self::compute_id(&file, &qualified_name, kind);
169
170        Self {
171            id,
172            name,
173            qualified_name,
174            kind,
175            file,
176            line_start: 0,
177            line_end: 0,
178            column: 0,
179            signature: None,
180            visibility: Visibility::default(),
181            is_async: false,
182            is_static: false,
183            is_exported: false,
184            docstring: None,
185            byte_start: 0,
186            byte_end: 0,
187            references: Vec::new(),
188        }
189    }
190
191    /// Builder pattern: set line range.
192    pub fn with_lines(mut self, start: u32, end: u32) -> Self {
193        self.line_start = start;
194        self.line_end = end;
195        self
196    }
197
198    /// Builder pattern: set byte range.
199    pub fn with_bytes(mut self, start: u32, end: u32) -> Self {
200        self.byte_start = start;
201        self.byte_end = end;
202        self
203    }
204
205    /// Builder pattern: set column.
206    pub fn with_column(mut self, column: u32) -> Self {
207        self.column = column;
208        self
209    }
210
211    /// Builder pattern: set signature.
212    pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
213        self.signature = Some(sig.into());
214        self
215    }
216
217    /// Builder pattern: set visibility.
218    pub fn with_visibility(mut self, vis: Visibility) -> Self {
219        self.visibility = vis;
220        self
221    }
222
223    /// Builder pattern: mark as async.
224    pub fn as_async(mut self) -> Self {
225        self.is_async = true;
226        self
227    }
228
229    /// Builder pattern: mark as static.
230    pub fn as_static(mut self) -> Self {
231        self.is_static = true;
232        self
233    }
234
235    /// Builder pattern: mark as exported.
236    pub fn as_exported(mut self) -> Self {
237        self.is_exported = true;
238        self
239    }
240
241    /// Builder pattern: add references.
242    pub fn with_references(mut self, refs: Vec<String>) -> Self {
243        self.references = refs;
244        self
245    }
246}
247
248impl PartialEq for CodeNode {
249    fn eq(&self, other: &Self) -> bool {
250        self.id == other.id
251    }
252}
253
254impl Eq for CodeNode {}
255
256impl Hash for CodeNode {
257    fn hash<H: Hasher>(&self, state: &mut H) {
258        self.id.hash(state);
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265    use std::collections::HashSet;
266
267    #[test]
268    fn test_node_kind_display_all_variants() {
269        // Exhaustive check of Display for every NodeKind variant
270        assert_eq!(NodeKind::Function.to_string(), "function");
271        assert_eq!(NodeKind::Method.to_string(), "method");
272        assert_eq!(NodeKind::Class.to_string(), "class");
273        assert_eq!(NodeKind::Struct.to_string(), "struct");
274        assert_eq!(NodeKind::Interface.to_string(), "interface");
275        assert_eq!(NodeKind::Enum.to_string(), "enum");
276        assert_eq!(NodeKind::Module.to_string(), "module");
277        assert_eq!(NodeKind::Field.to_string(), "field");
278        assert_eq!(NodeKind::Constant.to_string(), "constant");
279        assert_eq!(NodeKind::Constructor.to_string(), "constructor");
280        assert_eq!(NodeKind::Import.to_string(), "import");
281        assert_eq!(NodeKind::Export.to_string(), "export");
282        assert_eq!(NodeKind::TypeAlias.to_string(), "type_alias");
283        assert_eq!(NodeKind::Variable.to_string(), "variable");
284    }
285
286    #[test]
287    fn test_visibility_default_is_private() {
288        let vis = Visibility::default();
289        assert!(matches!(vis, Visibility::Private));
290    }
291
292    #[test]
293    fn test_builder_pattern_chain() {
294        // Verify that all builder methods compose correctly
295        let node = CodeNode::new("foo", "pkg.foo", NodeKind::Function, "main.rs")
296            .with_lines(10, 20)
297            .with_bytes(100, 300)
298            .with_column(4)
299            .with_signature("fn foo(x: i32) -> bool")
300            .with_visibility(Visibility::Public)
301            .as_async()
302            .as_static()
303            .as_exported()
304            .with_references(vec!["bar".to_string(), "baz".to_string()]);
305
306        assert_eq!(node.name, "foo");
307        assert_eq!(node.qualified_name, "pkg.foo");
308        assert_eq!(node.file, "main.rs");
309        assert_eq!(node.line_start, 10);
310        assert_eq!(node.line_end, 20);
311        assert_eq!(node.byte_start, 100);
312        assert_eq!(node.byte_end, 300);
313        assert_eq!(node.column, 4);
314        assert_eq!(node.signature.as_deref(), Some("fn foo(x: i32) -> bool"));
315        assert!(matches!(node.visibility, Visibility::Public));
316        assert!(node.is_async);
317        assert!(node.is_static);
318        assert!(node.is_exported);
319        assert_eq!(node.references.len(), 2);
320    }
321
322    #[test]
323    fn test_code_node_equality_by_id() {
324        // PartialEq compares by ID only, not by other fields
325        let node1 = CodeNode::new("foo", "foo", NodeKind::Function, "a.rs");
326        let node2 = CodeNode::new("foo", "foo", NodeKind::Function, "a.rs");
327        // Same inputs → same ID → equal
328        assert_eq!(node1, node2);
329
330        // Different kind → different ID → not equal
331        let node3 = CodeNode::new("foo", "foo", NodeKind::Method, "a.rs");
332        assert_ne!(node1, node3);
333    }
334
335    #[test]
336    fn test_code_node_hash_consistency() {
337        // Same node should hash identically, and be usable in HashSet
338        let node1 = CodeNode::new("foo", "foo", NodeKind::Function, "main.rs");
339        let node2 = CodeNode::new("foo", "foo", NodeKind::Function, "main.rs");
340
341        let mut set = HashSet::new();
342        set.insert(node1.clone());
343        assert!(set.contains(&node2));
344        // Inserting duplicate should not increase size
345        set.insert(node2);
346        assert_eq!(set.len(), 1);
347    }
348
349    #[test]
350    fn test_compute_id_deterministic() {
351        // Same inputs must always produce the same ID
352        let id1 = CodeNode::compute_id("test.rs", "main", NodeKind::Function);
353        let id2 = CodeNode::compute_id("test.rs", "main", NodeKind::Function);
354        assert_eq!(id1, id2);
355    }
356
357    #[test]
358    fn test_compute_id_different_kinds_differ() {
359        // A function and a struct with the same name should have different IDs
360        let id_fn = CodeNode::compute_id("test.rs", "Foo", NodeKind::Function);
361        let id_struct = CodeNode::compute_id("test.rs", "Foo", NodeKind::Struct);
362        assert_ne!(id_fn, id_struct);
363    }
364
365    #[test]
366    fn test_compute_id_different_files_differ() {
367        let id1 = CodeNode::compute_id("a.rs", "main", NodeKind::Function);
368        let id2 = CodeNode::compute_id("b.rs", "main", NodeKind::Function);
369        assert_ne!(id1, id2);
370    }
371
372    #[test]
373    fn test_node_default_values() {
374        // Verify sensible defaults for a freshly created node
375        let node = CodeNode::new("f", "f", NodeKind::Function, "x.rs");
376        assert_eq!(node.line_start, 0);
377        assert_eq!(node.line_end, 0);
378        assert_eq!(node.byte_start, 0);
379        assert_eq!(node.byte_end, 0);
380        assert_eq!(node.column, 0);
381        assert!(node.signature.is_none());
382        assert!(!node.is_async);
383        assert!(!node.is_static);
384        assert!(!node.is_exported);
385        assert!(node.references.is_empty());
386        assert!(matches!(node.visibility, Visibility::Private));
387    }
388}