aurora_semantic/
types.rs

1//! Core types used throughout the aurora-semantic crate.
2
3use serde::{Deserialize, Serialize};
4use std::path::PathBuf;
5use uuid::Uuid;
6
7/// Unique identifier for a workspace.
8#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
9pub struct WorkspaceId(pub Uuid);
10
11impl WorkspaceId {
12    /// Create a new random workspace ID.
13    pub fn new() -> Self {
14        Self(Uuid::new_v4())
15    }
16
17    /// Create from an existing UUID.
18    pub fn from_uuid(uuid: Uuid) -> Self {
19        Self(uuid)
20    }
21
22    /// Get the inner UUID.
23    pub fn as_uuid(&self) -> &Uuid {
24        &self.0
25    }
26}
27
28impl Default for WorkspaceId {
29    fn default() -> Self {
30        Self::new()
31    }
32}
33
34impl std::fmt::Display for WorkspaceId {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        write!(f, "{}", self.0)
37    }
38}
39
40/// Unique identifier for a document (source file).
41#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
42pub struct DocumentId(pub Uuid);
43
44impl DocumentId {
45    /// Create a new unique document ID.
46    pub fn new() -> Self {
47        Self(Uuid::new_v4())
48    }
49}
50
51impl Default for DocumentId {
52    fn default() -> Self {
53        Self::new()
54    }
55}
56
57/// Unique identifier for a chunk within a document.
58#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
59pub struct ChunkId(pub Uuid);
60
61impl ChunkId {
62    /// Create a new unique chunk ID.
63    pub fn new() -> Self {
64        Self(Uuid::new_v4())
65    }
66}
67
68impl Default for ChunkId {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74/// Represents a source code document.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct Document {
77    /// Unique identifier for this document.
78    pub id: DocumentId,
79    /// Path relative to workspace root.
80    pub relative_path: PathBuf,
81    /// Absolute path on disk.
82    pub absolute_path: PathBuf,
83    /// Programming language.
84    pub language: Language,
85    /// Content hash for change detection.
86    pub content_hash: String,
87    /// File size in bytes.
88    pub size_bytes: u64,
89    /// Last modification timestamp.
90    pub modified_at: chrono::DateTime<chrono::Utc>,
91}
92
93/// A chunk of source code extracted from a document.
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct Chunk {
96    /// Unique identifier for this chunk.
97    pub id: ChunkId,
98    /// Document this chunk belongs to.
99    pub document_id: DocumentId,
100    /// The actual source code content.
101    pub content: String,
102    /// Type of code construct this chunk represents.
103    pub chunk_type: ChunkType,
104    /// Starting line number (1-indexed).
105    pub start_line: u32,
106    /// Ending line number (1-indexed).
107    pub end_line: u32,
108    /// Starting byte offset.
109    pub start_byte: usize,
110    /// Ending byte offset.
111    pub end_byte: usize,
112    /// Name of the symbol (function name, class name, etc.).
113    pub symbol_name: Option<String>,
114    /// Parent symbol name for nested constructs.
115    pub parent_symbol: Option<String>,
116}
117
118/// Type of code chunk.
119#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
120pub enum ChunkType {
121    /// Complete function or method.
122    Function,
123    /// Class or struct definition.
124    Class,
125    /// Struct definition (for languages that distinguish).
126    Struct,
127    /// Enum definition.
128    Enum,
129    /// Interface or trait definition.
130    Interface,
131    /// Module or namespace.
132    Module,
133    /// Import/use statements block.
134    Imports,
135    /// Constant or static variable.
136    Constant,
137    /// Type alias or typedef.
138    TypeDef,
139    /// Implementation block (Rust impl).
140    Implementation,
141    /// Generic code block (fallback).
142    Block,
143    /// Comment block or documentation.
144    Comment,
145}
146
147/// Supported programming languages.
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
149pub enum Language {
150    /// Rust programming language.
151    Rust,
152    /// Python programming language.
153    Python,
154    /// JavaScript (ES6+).
155    JavaScript,
156    /// TypeScript (including TSX/JSX).
157    TypeScript,
158    /// Go programming language.
159    Go,
160    /// C programming language.
161    C,
162    /// C++ programming language.
163    Cpp,
164    /// Java programming language.
165    Java,
166    /// Unknown or unsupported language.
167    Unknown,
168}
169
170impl Language {
171    /// Detect language from file extension.
172    pub fn from_extension(ext: &str) -> Self {
173        match ext.to_lowercase().as_str() {
174            "rs" => Language::Rust,
175            "py" | "pyw" | "pyi" => Language::Python,
176            "js" | "mjs" | "cjs" => Language::JavaScript,
177            "ts" | "mts" | "cts" => Language::TypeScript,
178            "tsx" | "jsx" => Language::TypeScript, // TSX/JSX treated as TypeScript
179            "go" => Language::Go,
180            "c" | "h" => Language::C,
181            "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Language::Cpp,
182            "java" => Language::Java,
183            _ => Language::Unknown,
184        }
185    }
186
187    /// Get file extensions for this language.
188    pub fn extensions(&self) -> &'static [&'static str] {
189        match self {
190            Language::Rust => &["rs"],
191            Language::Python => &["py", "pyw", "pyi"],
192            Language::JavaScript => &["js", "mjs", "cjs", "jsx"],
193            Language::TypeScript => &["ts", "mts", "cts", "tsx"],
194            Language::Go => &["go"],
195            Language::C => &["c", "h"],
196            Language::Cpp => &["cpp", "cc", "cxx", "hpp", "hxx", "hh"],
197            Language::Java => &["java"],
198            Language::Unknown => &[],
199        }
200    }
201
202    /// Get the language name as a string.
203    pub fn as_str(&self) -> &'static str {
204        match self {
205            Language::Rust => "rust",
206            Language::Python => "python",
207            Language::JavaScript => "javascript",
208            Language::TypeScript => "typescript",
209            Language::Go => "go",
210            Language::C => "c",
211            Language::Cpp => "cpp",
212            Language::Java => "java",
213            Language::Unknown => "unknown",
214        }
215    }
216}
217
218/// A search result with relevance score.
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SearchResult {
221    /// The matching chunk.
222    pub chunk: Chunk,
223    /// Document information.
224    pub document: Document,
225    /// Relevance score (0.0 to 1.0).
226    pub score: f32,
227    /// Which search method found this result.
228    pub match_type: MatchType,
229    /// Highlighted snippets showing matches.
230    pub highlights: Vec<Highlight>,
231}
232
233/// Type of match that produced a search result.
234#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
235pub enum MatchType {
236    /// Found via lexical (keyword) search.
237    Lexical,
238    /// Found via semantic (embedding) search.
239    Semantic,
240    /// Found via hybrid search (combined score).
241    Hybrid,
242}
243
244/// A highlighted portion of text showing a match.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct Highlight {
247    /// Start offset within the chunk content.
248    pub start: usize,
249    /// End offset within the chunk content.
250    pub end: usize,
251    /// The matched text.
252    pub text: String,
253}
254
255/// Progress information during indexing.
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct IndexProgress {
258    /// Current phase of indexing.
259    pub phase: IndexPhase,
260    /// Number of items processed.
261    pub processed: usize,
262    /// Total number of items to process.
263    pub total: usize,
264    /// Current file being processed (if applicable).
265    pub current_file: Option<PathBuf>,
266    /// Estimated time remaining in seconds.
267    pub eta_seconds: Option<f64>,
268}
269
270impl IndexProgress {
271    /// Get progress as a percentage (0.0 to 100.0).
272    pub fn percentage(&self) -> f64 {
273        if self.total == 0 {
274            0.0
275        } else {
276            (self.processed as f64 / self.total as f64) * 100.0
277        }
278    }
279}
280
281/// Phases of the indexing process.
282#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
283pub enum IndexPhase {
284    /// Scanning filesystem for files.
285    Scanning,
286    /// Parsing source code into chunks.
287    Parsing,
288    /// Generating embeddings for semantic search.
289    Embedding,
290    /// Building search indexes.
291    Indexing,
292    /// Writing index to disk.
293    Persisting,
294    /// Indexing complete.
295    Complete,
296}
297
298/// Statistics about an indexed workspace.
299#[derive(Debug, Clone, Serialize, Deserialize)]
300pub struct WorkspaceStats {
301    /// Workspace identifier.
302    pub workspace_id: WorkspaceId,
303    /// Root path of the workspace.
304    pub root_path: PathBuf,
305    /// Total number of indexed documents.
306    pub document_count: usize,
307    /// Total number of chunks.
308    pub chunk_count: usize,
309    /// Total size of source files in bytes.
310    pub total_bytes: u64,
311    /// Size of the index on disk in bytes.
312    pub index_size_bytes: u64,
313    /// When the index was created.
314    pub created_at: chrono::DateTime<chrono::Utc>,
315    /// When the index was last updated.
316    pub updated_at: chrono::DateTime<chrono::Utc>,
317    /// Breakdown by language.
318    pub language_stats: Vec<LanguageStats>,
319}
320
321/// Statistics for a single language in a workspace.
322#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct LanguageStats {
324    /// The language.
325    pub language: Language,
326    /// Number of files.
327    pub file_count: usize,
328    /// Number of chunks.
329    pub chunk_count: usize,
330    /// Total bytes.
331    pub total_bytes: u64,
332}