Skip to main content

arbor_core/
node.rs

1//! Code node representation.
2//!
3//! A CodeNode is our abstraction over raw AST nodes. It captures
4//! the semantically meaningful parts of code: what it is, where it lives,
5//! and enough metadata to be useful for graph construction.
6
7use serde::{Deserialize, Serialize};
8use std::hash::{Hash, Hasher};
9
10/// The kind of code entity this node represents.
11///
12/// We intentionally keep this list focused on the entities that matter
13/// for understanding code structure. Helper nodes like expressions
14/// or statements are filtered out during extraction.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
16#[serde(rename_all = "snake_case")]
17pub enum NodeKind {
18    /// A standalone function (not attached to a class).
19    Function,
20    /// A method inside a class or impl block.
21    Method,
22    /// A class definition.
23    Class,
24    /// An interface, protocol, or trait.
25    Interface,
26    /// A struct (Rust, Go).
27    Struct,
28    /// An enum definition.
29    Enum,
30    /// A module-level variable.
31    Variable,
32    /// A constant or static value.
33    Constant,
34    /// A type alias.
35    TypeAlias,
36    /// The file/module itself as a container.
37    Module,
38    /// An import statement.
39    Import,
40    /// An export declaration.
41    Export,
42    /// A constructor (Java, TypeScript class constructors).
43    Constructor,
44    /// A class field.
45    Field,
46}
47
48impl std::fmt::Display for NodeKind {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        let s = match self {
51            Self::Function => "function",
52            Self::Method => "method",
53            Self::Class => "class",
54            Self::Interface => "interface",
55            Self::Struct => "struct",
56            Self::Enum => "enum",
57            Self::Variable => "variable",
58            Self::Constant => "constant",
59            Self::TypeAlias => "type_alias",
60            Self::Module => "module",
61            Self::Import => "import",
62            Self::Export => "export",
63            Self::Constructor => "constructor",
64            Self::Field => "field",
65        };
66        write!(f, "{}", s)
67    }
68}
69
70/// Visibility of a code entity.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
72#[serde(rename_all = "snake_case")]
73pub enum Visibility {
74    #[default]
75    Private,
76    Public,
77    Protected,
78    /// Rust's pub(crate) or similar restricted visibility.
79    Internal,
80}
81
82/// A code entity extracted from source.
83///
84/// This is the core data type that flows through Arbor. It's designed
85/// to be language-agnostic while still capturing the structure we need.
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct CodeNode {
88    /// Unique identifier, derived from file path + qualified name + kind.
89    pub id: String,
90
91    /// The simple name (e.g., "validate_user").
92    pub name: String,
93
94    /// Fully qualified name including parent scope (e.g., "UserService.validate_user").
95    pub qualified_name: String,
96
97    /// What kind of entity this is.
98    pub kind: NodeKind,
99
100    /// Path to the source file, relative to project root.
101    pub file: String,
102
103    /// Starting line (1-indexed, like editors show).
104    pub line_start: u32,
105
106    /// Ending line (inclusive).
107    pub line_end: u32,
108
109    /// Column of the name identifier.
110    pub column: u32,
111
112    /// Function/method signature if applicable.
113    pub signature: Option<String>,
114
115    /// Visibility modifier.
116    pub visibility: Visibility,
117
118    /// Whether this is async.
119    pub is_async: bool,
120
121    /// Whether this is static/class-level.
122    pub is_static: bool,
123
124    /// Whether this is exported (TS/ES modules).
125    pub is_exported: bool,
126
127    /// Docstring or leading comment.
128    pub docstring: Option<String>,
129
130    /// Byte offset range in source for incremental updates.
131    pub byte_start: u32,
132    pub byte_end: u32,
133
134    /// Entities this node references (call targets, type refs, etc).
135    /// These are names, not IDs - resolution happens in the graph crate.
136    pub references: Vec<String>,
137}
138
139impl CodeNode {
140    /// Creates a deterministic ID for this node.
141    ///
142    /// The ID is a hash of (file, qualified_name, kind) so the same
143    /// entity always gets the same ID across parses.
144    pub fn compute_id(file: &str, qualified_name: &str, kind: NodeKind) -> String {
145        use std::collections::hash_map::DefaultHasher;
146
147        let mut hasher = DefaultHasher::new();
148        file.hash(&mut hasher);
149        qualified_name.hash(&mut hasher);
150        kind.hash(&mut hasher);
151
152        format!("{:016x}", hasher.finish())
153    }
154
155    /// Creates a new node and automatically computes its ID.
156    pub fn new(
157        name: impl Into<String>,
158        qualified_name: impl Into<String>,
159        kind: NodeKind,
160        file: impl Into<String>,
161    ) -> Self {
162        let name = name.into();
163        let qualified_name = qualified_name.into();
164        let file = file.into();
165        let id = Self::compute_id(&file, &qualified_name, kind);
166
167        Self {
168            id,
169            name,
170            qualified_name,
171            kind,
172            file,
173            line_start: 0,
174            line_end: 0,
175            column: 0,
176            signature: None,
177            visibility: Visibility::default(),
178            is_async: false,
179            is_static: false,
180            is_exported: false,
181            docstring: None,
182            byte_start: 0,
183            byte_end: 0,
184            references: Vec::new(),
185        }
186    }
187
188    /// Builder pattern: set line range.
189    pub fn with_lines(mut self, start: u32, end: u32) -> Self {
190        self.line_start = start;
191        self.line_end = end;
192        self
193    }
194
195    /// Builder pattern: set byte range.
196    pub fn with_bytes(mut self, start: u32, end: u32) -> Self {
197        self.byte_start = start;
198        self.byte_end = end;
199        self
200    }
201
202    /// Builder pattern: set column.
203    pub fn with_column(mut self, column: u32) -> Self {
204        self.column = column;
205        self
206    }
207
208    /// Builder pattern: set signature.
209    pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
210        self.signature = Some(sig.into());
211        self
212    }
213
214    /// Builder pattern: set visibility.
215    pub fn with_visibility(mut self, vis: Visibility) -> Self {
216        self.visibility = vis;
217        self
218    }
219
220    /// Builder pattern: mark as async.
221    pub fn as_async(mut self) -> Self {
222        self.is_async = true;
223        self
224    }
225
226    /// Builder pattern: mark as static.
227    pub fn as_static(mut self) -> Self {
228        self.is_static = true;
229        self
230    }
231
232    /// Builder pattern: mark as exported.
233    pub fn as_exported(mut self) -> Self {
234        self.is_exported = true;
235        self
236    }
237
238    /// Builder pattern: add references.
239    pub fn with_references(mut self, refs: Vec<String>) -> Self {
240        self.references = refs;
241        self
242    }
243}
244
245impl PartialEq for CodeNode {
246    fn eq(&self, other: &Self) -> bool {
247        self.id == other.id
248    }
249}
250
251impl Eq for CodeNode {}
252
253impl Hash for CodeNode {
254    fn hash<H: Hasher>(&self, state: &mut H) {
255        self.id.hash(state);
256    }
257}