agentic_codebase/types/code_unit.rs
1//! CodeUnit — the atomic element of the code graph.
2//!
3//! A code unit represents any identifiable piece of code: a function, class,
4//! module, import, test, or documentation block.
5
6use serde::{Deserialize, Serialize};
7use std::path::PathBuf;
8
9use super::language::Language;
10use super::span::Span;
11use super::DEFAULT_DIMENSION;
12
13/// The type of code unit stored in a node.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15#[repr(u8)]
16pub enum CodeUnitType {
17 /// A logical grouping (file, package, namespace, module).
18 Module = 0,
19 /// A named entity (function, class, variable, constant).
20 Symbol = 1,
21 /// A type definition (class, struct, interface, enum, type alias).
22 Type = 2,
23 /// A callable unit (function, method, closure).
24 Function = 3,
25 /// A function parameter or struct/class field.
26 Parameter = 4,
27 /// A dependency declaration (import, require, use).
28 Import = 5,
29 /// A test case or test suite.
30 Test = 6,
31 /// Documentation block (docstring, JSDoc, comment block).
32 Doc = 7,
33 /// Configuration value or constant.
34 Config = 8,
35 /// An identified design pattern (Singleton, Factory, etc.).
36 Pattern = 9,
37 /// A trait, interface, or protocol definition.
38 Trait = 10,
39 /// An implementation block (impl, class body).
40 Impl = 11,
41 /// A macro definition or invocation.
42 Macro = 12,
43}
44
45impl CodeUnitType {
46 /// Convert from raw byte value.
47 ///
48 /// Returns `None` for values that don't correspond to a known variant.
49 pub fn from_u8(value: u8) -> Option<Self> {
50 match value {
51 0 => Some(Self::Module),
52 1 => Some(Self::Symbol),
53 2 => Some(Self::Type),
54 3 => Some(Self::Function),
55 4 => Some(Self::Parameter),
56 5 => Some(Self::Import),
57 6 => Some(Self::Test),
58 7 => Some(Self::Doc),
59 8 => Some(Self::Config),
60 9 => Some(Self::Pattern),
61 10 => Some(Self::Trait),
62 11 => Some(Self::Impl),
63 12 => Some(Self::Macro),
64 _ => None,
65 }
66 }
67
68 /// Returns true if this type represents a callable.
69 pub fn is_callable(&self) -> bool {
70 matches!(self, Self::Function | Self::Macro)
71 }
72
73 /// Returns true if this type can have children.
74 pub fn is_container(&self) -> bool {
75 matches!(self, Self::Module | Self::Type | Self::Trait | Self::Impl)
76 }
77
78 /// Returns a human-readable label for this type.
79 pub fn label(&self) -> &'static str {
80 match self {
81 Self::Module => "module",
82 Self::Symbol => "symbol",
83 Self::Type => "type",
84 Self::Function => "function",
85 Self::Parameter => "parameter",
86 Self::Import => "import",
87 Self::Test => "test",
88 Self::Doc => "doc",
89 Self::Config => "config",
90 Self::Pattern => "pattern",
91 Self::Trait => "trait",
92 Self::Impl => "impl",
93 Self::Macro => "macro",
94 }
95 }
96}
97
98impl std::fmt::Display for CodeUnitType {
99 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 write!(f, "{}", self.label())
101 }
102}
103
104/// Symbol visibility/accessibility.
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
106#[repr(u8)]
107pub enum Visibility {
108 /// Accessible from anywhere.
109 Public = 0,
110 /// Accessible within module/file.
111 Private = 1,
112 /// Accessible within package/crate.
113 Internal = 2,
114 /// Protected (subclass access).
115 Protected = 3,
116 /// Unknown visibility.
117 Unknown = 255,
118}
119
120impl Visibility {
121 /// Convert from raw byte value.
122 pub fn from_u8(value: u8) -> Option<Self> {
123 match value {
124 0 => Some(Self::Public),
125 1 => Some(Self::Private),
126 2 => Some(Self::Internal),
127 3 => Some(Self::Protected),
128 255 => Some(Self::Unknown),
129 _ => None,
130 }
131 }
132}
133
134impl std::fmt::Display for Visibility {
135 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136 match self {
137 Self::Public => write!(f, "public"),
138 Self::Private => write!(f, "private"),
139 Self::Internal => write!(f, "internal"),
140 Self::Protected => write!(f, "protected"),
141 Self::Unknown => write!(f, "unknown"),
142 }
143 }
144}
145
146/// A single code unit — the atomic element of the code graph.
147///
148/// Code units are the nodes of the semantic graph. Each represents an
149/// identifiable piece of code: a function, class, module, import, etc.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CodeUnit {
152 /// Unique identifier (assigned sequentially during compilation).
153 pub id: u64,
154
155 /// Type of code unit.
156 pub unit_type: CodeUnitType,
157
158 /// Programming language.
159 pub language: Language,
160
161 /// Simple name (e.g., "process_payment").
162 pub name: String,
163
164 /// Fully qualified name (e.g., "payments.stripe.process_payment").
165 pub qualified_name: String,
166
167 /// Source file path (relative to repo root).
168 pub file_path: PathBuf,
169
170 /// Location in source file.
171 pub span: Span,
172
173 /// Type signature if applicable (e.g., "(amount: Decimal) -> bool").
174 pub signature: Option<String>,
175
176 /// First line of documentation.
177 pub doc_summary: Option<String>,
178
179 // === Semantic metadata ===
180 /// Visibility level.
181 pub visibility: Visibility,
182
183 /// Cyclomatic complexity (0 for non-functions).
184 pub complexity: u32,
185
186 /// Is this async/await?
187 pub is_async: bool,
188
189 /// Is this a generator/iterator?
190 pub is_generator: bool,
191
192 // === Temporal metadata ===
193 /// First seen timestamp (git commit time, or compile time if no git).
194 pub created_at: u64,
195
196 /// Last modified timestamp.
197 pub last_modified: u64,
198
199 /// Total changes in git history.
200 pub change_count: u32,
201
202 /// Stability score: 0.0 = constantly changing, 1.0 = never changes.
203 pub stability_score: f32,
204
205 // === Collective metadata ===
206 /// Global usage count from collective (0 if private code).
207 pub collective_usage: u64,
208
209 /// Content hash for deduplication (Blake3).
210 pub content_hash: [u8; 32],
211
212 // === Vector for semantic search ===
213 /// Feature vector for similarity (dimension = DEFAULT_DIMENSION).
214 pub feature_vec: Vec<f32>,
215
216 // === Graph position (set by graph builder) ===
217 /// Byte offset into edge table.
218 pub edge_offset: u64,
219
220 /// Number of outgoing edges.
221 pub edge_count: u32,
222}
223
224impl CodeUnit {
225 /// Create a new code unit with required fields only.
226 ///
227 /// Optional fields are initialized to their defaults:
228 /// - `id` is 0 (set by the graph on insertion)
229 /// - `visibility` is `Unknown`
230 /// - `stability_score` is 1.0 (stable by default)
231 /// - `feature_vec` is zero-filled with `DEFAULT_DIMENSION` elements
232 pub fn new(
233 unit_type: CodeUnitType,
234 language: Language,
235 name: String,
236 qualified_name: String,
237 file_path: PathBuf,
238 span: Span,
239 ) -> Self {
240 let now = crate::types::now_micros();
241 Self {
242 id: 0,
243 unit_type,
244 language,
245 name,
246 qualified_name,
247 file_path,
248 span,
249 signature: None,
250 doc_summary: None,
251 visibility: Visibility::Unknown,
252 complexity: 0,
253 is_async: false,
254 is_generator: false,
255 created_at: now,
256 last_modified: now,
257 change_count: 0,
258 stability_score: 1.0,
259 collective_usage: 0,
260 content_hash: [0u8; 32],
261 feature_vec: vec![0.0; DEFAULT_DIMENSION],
262 edge_offset: 0,
263 edge_count: 0,
264 }
265 }
266}
267
268/// Builder for constructing [`CodeUnit`] instances with optional fields.
269///
270/// # Examples
271///
272/// ```
273/// use agentic_codebase::types::*;
274/// use std::path::PathBuf;
275///
276/// let unit = CodeUnitBuilder::new(
277/// CodeUnitType::Function,
278/// Language::Python,
279/// "my_func",
280/// "mymodule.my_func",
281/// PathBuf::from("src/mymodule.py"),
282/// Span::new(10, 0, 20, 0),
283/// )
284/// .signature("(x: int) -> bool")
285/// .doc("Checks if x is valid")
286/// .visibility(Visibility::Public)
287/// .complexity(3)
288/// .build();
289/// ```
290pub struct CodeUnitBuilder {
291 inner: CodeUnit,
292}
293
294impl CodeUnitBuilder {
295 /// Create a new builder with required fields.
296 pub fn new(
297 unit_type: CodeUnitType,
298 language: Language,
299 name: impl Into<String>,
300 qualified_name: impl Into<String>,
301 file_path: impl Into<PathBuf>,
302 span: Span,
303 ) -> Self {
304 Self {
305 inner: CodeUnit::new(
306 unit_type,
307 language,
308 name.into(),
309 qualified_name.into(),
310 file_path.into(),
311 span,
312 ),
313 }
314 }
315
316 /// Set the type signature.
317 pub fn signature(mut self, sig: impl Into<String>) -> Self {
318 self.inner.signature = Some(sig.into());
319 self
320 }
321
322 /// Set the documentation summary.
323 pub fn doc(mut self, doc: impl Into<String>) -> Self {
324 self.inner.doc_summary = Some(doc.into());
325 self
326 }
327
328 /// Set the visibility level.
329 pub fn visibility(mut self, vis: Visibility) -> Self {
330 self.inner.visibility = vis;
331 self
332 }
333
334 /// Set the cyclomatic complexity.
335 pub fn complexity(mut self, c: u32) -> Self {
336 self.inner.complexity = c;
337 self
338 }
339
340 /// Mark this unit as async.
341 pub fn async_fn(mut self) -> Self {
342 self.inner.is_async = true;
343 self
344 }
345
346 /// Mark this unit as a generator.
347 pub fn generator(mut self) -> Self {
348 self.inner.is_generator = true;
349 self
350 }
351
352 /// Set the feature vector.
353 pub fn feature_vec(mut self, vec: Vec<f32>) -> Self {
354 self.inner.feature_vec = vec;
355 self
356 }
357
358 /// Set the content hash.
359 pub fn content_hash(mut self, hash: [u8; 32]) -> Self {
360 self.inner.content_hash = hash;
361 self
362 }
363
364 /// Set timestamps.
365 pub fn timestamps(mut self, created: u64, modified: u64) -> Self {
366 self.inner.created_at = created;
367 self.inner.last_modified = modified;
368 self
369 }
370
371 /// Consume the builder and produce a [`CodeUnit`].
372 pub fn build(self) -> CodeUnit {
373 self.inner
374 }
375}