Skip to main content

agentic_codebase/types/
code_unit.rs

1//! CodeUnit — the atomic element of the code graph.
2//!
3//! A code unit represents any identifiable piece of code: a function, class,
4//! module, import, test, or documentation block.
5
6use serde::{Deserialize, Serialize};
7use std::path::PathBuf;
8
9use super::language::Language;
10use super::span::Span;
11use super::DEFAULT_DIMENSION;
12
13/// The type of code unit stored in a node.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
15#[repr(u8)]
16pub enum CodeUnitType {
17    /// A logical grouping (file, package, namespace, module).
18    Module = 0,
19    /// A named entity (function, class, variable, constant).
20    Symbol = 1,
21    /// A type definition (class, struct, interface, enum, type alias).
22    Type = 2,
23    /// A callable unit (function, method, closure).
24    Function = 3,
25    /// A function parameter or struct/class field.
26    Parameter = 4,
27    /// A dependency declaration (import, require, use).
28    Import = 5,
29    /// A test case or test suite.
30    Test = 6,
31    /// Documentation block (docstring, JSDoc, comment block).
32    Doc = 7,
33    /// Configuration value or constant.
34    Config = 8,
35    /// An identified design pattern (Singleton, Factory, etc.).
36    Pattern = 9,
37    /// A trait, interface, or protocol definition.
38    Trait = 10,
39    /// An implementation block (impl, class body).
40    Impl = 11,
41    /// A macro definition or invocation.
42    Macro = 12,
43}
44
45impl CodeUnitType {
46    /// Convert from raw byte value.
47    ///
48    /// Returns `None` for values that don't correspond to a known variant.
49    pub fn from_u8(value: u8) -> Option<Self> {
50        match value {
51            0 => Some(Self::Module),
52            1 => Some(Self::Symbol),
53            2 => Some(Self::Type),
54            3 => Some(Self::Function),
55            4 => Some(Self::Parameter),
56            5 => Some(Self::Import),
57            6 => Some(Self::Test),
58            7 => Some(Self::Doc),
59            8 => Some(Self::Config),
60            9 => Some(Self::Pattern),
61            10 => Some(Self::Trait),
62            11 => Some(Self::Impl),
63            12 => Some(Self::Macro),
64            _ => None,
65        }
66    }
67
68    /// Returns true if this type represents a callable.
69    pub fn is_callable(&self) -> bool {
70        matches!(self, Self::Function | Self::Macro)
71    }
72
73    /// Returns true if this type can have children.
74    pub fn is_container(&self) -> bool {
75        matches!(self, Self::Module | Self::Type | Self::Trait | Self::Impl)
76    }
77
78    /// Returns a human-readable label for this type.
79    pub fn label(&self) -> &'static str {
80        match self {
81            Self::Module => "module",
82            Self::Symbol => "symbol",
83            Self::Type => "type",
84            Self::Function => "function",
85            Self::Parameter => "parameter",
86            Self::Import => "import",
87            Self::Test => "test",
88            Self::Doc => "doc",
89            Self::Config => "config",
90            Self::Pattern => "pattern",
91            Self::Trait => "trait",
92            Self::Impl => "impl",
93            Self::Macro => "macro",
94        }
95    }
96}
97
98impl std::fmt::Display for CodeUnitType {
99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100        write!(f, "{}", self.label())
101    }
102}
103
104/// Symbol visibility/accessibility.
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
106#[repr(u8)]
107pub enum Visibility {
108    /// Accessible from anywhere.
109    Public = 0,
110    /// Accessible within module/file.
111    Private = 1,
112    /// Accessible within package/crate.
113    Internal = 2,
114    /// Protected (subclass access).
115    Protected = 3,
116    /// Unknown visibility.
117    Unknown = 255,
118}
119
120impl Visibility {
121    /// Convert from raw byte value.
122    pub fn from_u8(value: u8) -> Option<Self> {
123        match value {
124            0 => Some(Self::Public),
125            1 => Some(Self::Private),
126            2 => Some(Self::Internal),
127            3 => Some(Self::Protected),
128            255 => Some(Self::Unknown),
129            _ => None,
130        }
131    }
132}
133
134impl std::fmt::Display for Visibility {
135    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
136        match self {
137            Self::Public => write!(f, "public"),
138            Self::Private => write!(f, "private"),
139            Self::Internal => write!(f, "internal"),
140            Self::Protected => write!(f, "protected"),
141            Self::Unknown => write!(f, "unknown"),
142        }
143    }
144}
145
146/// A single code unit — the atomic element of the code graph.
147///
148/// Code units are the nodes of the semantic graph. Each represents an
149/// identifiable piece of code: a function, class, module, import, etc.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CodeUnit {
152    /// Unique identifier (assigned sequentially during compilation).
153    pub id: u64,
154
155    /// Type of code unit.
156    pub unit_type: CodeUnitType,
157
158    /// Programming language.
159    pub language: Language,
160
161    /// Simple name (e.g., "process_payment").
162    pub name: String,
163
164    /// Fully qualified name (e.g., "payments.stripe.process_payment").
165    pub qualified_name: String,
166
167    /// Source file path (relative to repo root).
168    pub file_path: PathBuf,
169
170    /// Location in source file.
171    pub span: Span,
172
173    /// Type signature if applicable (e.g., "(amount: Decimal) -> bool").
174    pub signature: Option<String>,
175
176    /// First line of documentation.
177    pub doc_summary: Option<String>,
178
179    // === Semantic metadata ===
180    /// Visibility level.
181    pub visibility: Visibility,
182
183    /// Cyclomatic complexity (0 for non-functions).
184    pub complexity: u32,
185
186    /// Is this async/await?
187    pub is_async: bool,
188
189    /// Is this a generator/iterator?
190    pub is_generator: bool,
191
192    // === Temporal metadata ===
193    /// First seen timestamp (git commit time, or compile time if no git).
194    pub created_at: u64,
195
196    /// Last modified timestamp.
197    pub last_modified: u64,
198
199    /// Total changes in git history.
200    pub change_count: u32,
201
202    /// Stability score: 0.0 = constantly changing, 1.0 = never changes.
203    pub stability_score: f32,
204
205    // === Collective metadata ===
206    /// Global usage count from collective (0 if private code).
207    pub collective_usage: u64,
208
209    /// Content hash for deduplication (Blake3).
210    pub content_hash: [u8; 32],
211
212    // === Vector for semantic search ===
213    /// Feature vector for similarity (dimension = DEFAULT_DIMENSION).
214    pub feature_vec: Vec<f32>,
215
216    // === Graph position (set by graph builder) ===
217    /// Byte offset into edge table.
218    pub edge_offset: u64,
219
220    /// Number of outgoing edges.
221    pub edge_count: u32,
222}
223
224impl CodeUnit {
225    /// Create a new code unit with required fields only.
226    ///
227    /// Optional fields are initialized to their defaults:
228    /// - `id` is 0 (set by the graph on insertion)
229    /// - `visibility` is `Unknown`
230    /// - `stability_score` is 1.0 (stable by default)
231    /// - `feature_vec` is zero-filled with `DEFAULT_DIMENSION` elements
232    pub fn new(
233        unit_type: CodeUnitType,
234        language: Language,
235        name: String,
236        qualified_name: String,
237        file_path: PathBuf,
238        span: Span,
239    ) -> Self {
240        let now = crate::types::now_micros();
241        Self {
242            id: 0,
243            unit_type,
244            language,
245            name,
246            qualified_name,
247            file_path,
248            span,
249            signature: None,
250            doc_summary: None,
251            visibility: Visibility::Unknown,
252            complexity: 0,
253            is_async: false,
254            is_generator: false,
255            created_at: now,
256            last_modified: now,
257            change_count: 0,
258            stability_score: 1.0,
259            collective_usage: 0,
260            content_hash: [0u8; 32],
261            feature_vec: vec![0.0; DEFAULT_DIMENSION],
262            edge_offset: 0,
263            edge_count: 0,
264        }
265    }
266}
267
268/// Builder for constructing [`CodeUnit`] instances with optional fields.
269///
270/// # Examples
271///
272/// ```
273/// use agentic_codebase::types::*;
274/// use std::path::PathBuf;
275///
276/// let unit = CodeUnitBuilder::new(
277///     CodeUnitType::Function,
278///     Language::Python,
279///     "my_func",
280///     "mymodule.my_func",
281///     PathBuf::from("src/mymodule.py"),
282///     Span::new(10, 0, 20, 0),
283/// )
284/// .signature("(x: int) -> bool")
285/// .doc("Checks if x is valid")
286/// .visibility(Visibility::Public)
287/// .complexity(3)
288/// .build();
289/// ```
290pub struct CodeUnitBuilder {
291    inner: CodeUnit,
292}
293
294impl CodeUnitBuilder {
295    /// Create a new builder with required fields.
296    pub fn new(
297        unit_type: CodeUnitType,
298        language: Language,
299        name: impl Into<String>,
300        qualified_name: impl Into<String>,
301        file_path: impl Into<PathBuf>,
302        span: Span,
303    ) -> Self {
304        Self {
305            inner: CodeUnit::new(
306                unit_type,
307                language,
308                name.into(),
309                qualified_name.into(),
310                file_path.into(),
311                span,
312            ),
313        }
314    }
315
316    /// Set the type signature.
317    pub fn signature(mut self, sig: impl Into<String>) -> Self {
318        self.inner.signature = Some(sig.into());
319        self
320    }
321
322    /// Set the documentation summary.
323    pub fn doc(mut self, doc: impl Into<String>) -> Self {
324        self.inner.doc_summary = Some(doc.into());
325        self
326    }
327
328    /// Set the visibility level.
329    pub fn visibility(mut self, vis: Visibility) -> Self {
330        self.inner.visibility = vis;
331        self
332    }
333
334    /// Set the cyclomatic complexity.
335    pub fn complexity(mut self, c: u32) -> Self {
336        self.inner.complexity = c;
337        self
338    }
339
340    /// Mark this unit as async.
341    pub fn async_fn(mut self) -> Self {
342        self.inner.is_async = true;
343        self
344    }
345
346    /// Mark this unit as a generator.
347    pub fn generator(mut self) -> Self {
348        self.inner.is_generator = true;
349        self
350    }
351
352    /// Set the feature vector.
353    pub fn feature_vec(mut self, vec: Vec<f32>) -> Self {
354        self.inner.feature_vec = vec;
355        self
356    }
357
358    /// Set the content hash.
359    pub fn content_hash(mut self, hash: [u8; 32]) -> Self {
360        self.inner.content_hash = hash;
361        self
362    }
363
364    /// Set timestamps.
365    pub fn timestamps(mut self, created: u64, modified: u64) -> Self {
366        self.inner.created_at = created;
367        self.inner.last_modified = modified;
368        self
369    }
370
371    /// Consume the builder and produce a [`CodeUnit`].
372    pub fn build(self) -> CodeUnit {
373        self.inner
374    }
375}