infiniloom_engine/
types.rs

1//! Core type definitions for Infiniloom
2
3use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::path::PathBuf;
6
7// Re-export canonical tokenizer types from tokenizer module
8pub use crate::tokenizer::{TokenCounts, TokenModel};
9
10/// Backward-compatible alias for TokenModel
11pub type TokenizerModel = TokenModel;
12
13/// A scanned repository
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct Repository {
16    /// Repository name (usually directory name)
17    pub name: String,
18    /// Absolute path to repository root
19    pub path: PathBuf,
20    /// List of files in the repository
21    pub files: Vec<RepoFile>,
22    /// Repository metadata and statistics
23    pub metadata: RepoMetadata,
24}
25
26impl Repository {
27    /// Create a new empty repository
28    pub fn new(name: impl Into<String>, path: impl Into<PathBuf>) -> Self {
29        Self {
30            name: name.into(),
31            path: path.into(),
32            files: Vec::new(),
33            metadata: RepoMetadata::default(),
34        }
35    }
36
37    /// Get total token count for a specific model
38    pub fn total_tokens(&self, model: TokenizerModel) -> u32 {
39        self.files.iter().map(|f| f.token_count.get(model)).sum()
40    }
41
42    /// Get files filtered by language
43    pub fn files_by_language(&self, language: &str) -> Vec<&RepoFile> {
44        self.files
45            .iter()
46            .filter(|f| f.language.as_deref() == Some(language))
47            .collect()
48    }
49
50    /// Get files sorted by importance
51    #[must_use]
52    pub fn files_by_importance(&self) -> Vec<&RepoFile> {
53        let mut files: Vec<_> = self.files.iter().collect();
54        files.sort_by(|a, b| {
55            b.importance
56                .partial_cmp(&a.importance)
57                .unwrap_or(std::cmp::Ordering::Equal)
58        });
59        files
60    }
61}
62
63impl fmt::Display for Repository {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        write!(
66            f,
67            "Repository({}: {} files, {} lines)",
68            self.name, self.metadata.total_files, self.metadata.total_lines
69        )
70    }
71}
72
73/// A single file in the repository
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct RepoFile {
76    /// Absolute path to file
77    pub path: PathBuf,
78    /// Path relative to repository root
79    pub relative_path: String,
80    /// Detected programming language
81    pub language: Option<String>,
82    /// File size in bytes
83    pub size_bytes: u64,
84    /// Token counts for different models
85    pub token_count: TokenCounts,
86    /// Extracted symbols (functions, classes, etc.)
87    pub symbols: Vec<Symbol>,
88    /// Calculated importance score (0.0 - 1.0)
89    pub importance: f32,
90    /// File content (may be None to save memory)
91    pub content: Option<String>,
92}
93
94impl RepoFile {
95    /// Create a new file entry
96    pub fn new(path: impl Into<PathBuf>, relative_path: impl Into<String>) -> Self {
97        Self {
98            path: path.into(),
99            relative_path: relative_path.into(),
100            language: None,
101            size_bytes: 0,
102            token_count: TokenCounts::default(),
103            symbols: Vec::new(),
104            importance: 0.5,
105            content: None,
106        }
107    }
108
109    /// Get file extension
110    pub fn extension(&self) -> Option<&str> {
111        self.path.extension().and_then(|e| e.to_str())
112    }
113
114    /// Get filename without path
115    #[must_use]
116    pub fn filename(&self) -> &str {
117        self.path.file_name().and_then(|n| n.to_str()).unwrap_or("")
118    }
119}
120
121impl fmt::Display for RepoFile {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        write!(
124            f,
125            "{} ({}, {} tokens)",
126            self.relative_path,
127            self.language.as_deref().unwrap_or("unknown"),
128            self.token_count.claude
129        )
130    }
131}
132
133/// Visibility modifier for symbols
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
135pub enum Visibility {
136    #[default]
137    Public,
138    Private,
139    Protected,
140    Internal, // For languages like C# or package-private in Java
141}
142
143impl Visibility {
144    pub fn name(&self) -> &'static str {
145        match self {
146            Self::Public => "public",
147            Self::Private => "private",
148            Self::Protected => "protected",
149            Self::Internal => "internal",
150        }
151    }
152}
153
154/// A code symbol (function, class, variable, etc.)
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct Symbol {
157    /// Symbol name
158    pub name: String,
159    /// Symbol kind
160    pub kind: SymbolKind,
161    /// Function/method signature (if applicable)
162    pub signature: Option<String>,
163    /// Documentation string
164    pub docstring: Option<String>,
165    /// Starting line number (1-indexed)
166    pub start_line: u32,
167    /// Ending line number (1-indexed)
168    pub end_line: u32,
169    /// Number of references to this symbol
170    pub references: u32,
171    /// Calculated importance (0.0 - 1.0)
172    pub importance: f32,
173    /// Parent symbol name (for methods inside classes)
174    pub parent: Option<String>,
175    /// Visibility modifier (public, private, etc.)
176    pub visibility: Visibility,
177    /// Function/method calls made by this symbol (callee names)
178    pub calls: Vec<String>,
179    /// Base class/parent class name (for class inheritance)
180    pub extends: Option<String>,
181    /// Implemented interfaces/protocols/traits
182    pub implements: Vec<String>,
183}
184
185impl Symbol {
186    /// Create a new symbol
187    pub fn new(name: impl Into<String>, kind: SymbolKind) -> Self {
188        Self {
189            name: name.into(),
190            kind,
191            signature: None,
192            docstring: None,
193            start_line: 0,
194            end_line: 0,
195            references: 0,
196            importance: 0.5,
197            parent: None,
198            visibility: Visibility::default(),
199            calls: Vec::new(),
200            extends: None,
201            implements: Vec::new(),
202        }
203    }
204
205    /// Get line count
206    #[must_use]
207    pub fn line_count(&self) -> u32 {
208        if self.end_line >= self.start_line {
209            self.end_line - self.start_line + 1
210        } else {
211            1
212        }
213    }
214}
215
216impl fmt::Display for Symbol {
217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218        write!(
219            f,
220            "{}:{} (lines {}-{})",
221            self.kind.name(),
222            self.name,
223            self.start_line,
224            self.end_line
225        )
226    }
227}
228
229/// Kind of code symbol
230#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
231pub enum SymbolKind {
232    Function,
233    Method,
234    Class,
235    Interface,
236    Struct,
237    Enum,
238    Constant,
239    Variable,
240    Import,
241    Export,
242    TypeAlias,
243    Module,
244    Trait,
245    Macro,
246}
247
248impl SymbolKind {
249    /// Get human-readable name
250    #[must_use]
251    pub fn name(&self) -> &'static str {
252        match self {
253            Self::Function => "function",
254            Self::Method => "method",
255            Self::Class => "class",
256            Self::Interface => "interface",
257            Self::Struct => "struct",
258            Self::Enum => "enum",
259            Self::Constant => "constant",
260            Self::Variable => "variable",
261            Self::Import => "import",
262            Self::Export => "export",
263            Self::TypeAlias => "type",
264            Self::Module => "module",
265            Self::Trait => "trait",
266            Self::Macro => "macro",
267        }
268    }
269
270    /// Parse from string name (inverse of name())
271    #[must_use]
272    #[allow(clippy::should_implement_trait)]
273    pub fn from_str(s: &str) -> Option<Self> {
274        match s.to_lowercase().as_str() {
275            "function" => Some(Self::Function),
276            "method" => Some(Self::Method),
277            "class" => Some(Self::Class),
278            "interface" => Some(Self::Interface),
279            "struct" => Some(Self::Struct),
280            "enum" => Some(Self::Enum),
281            "constant" => Some(Self::Constant),
282            "variable" => Some(Self::Variable),
283            "import" => Some(Self::Import),
284            "export" => Some(Self::Export),
285            "type" | "typealias" => Some(Self::TypeAlias),
286            "module" => Some(Self::Module),
287            "trait" => Some(Self::Trait),
288            "macro" => Some(Self::Macro),
289            _ => None,
290        }
291    }
292}
293
294impl std::str::FromStr for SymbolKind {
295    type Err = ();
296
297    fn from_str(s: &str) -> Result<Self, Self::Err> {
298        SymbolKind::from_str(s).ok_or(())
299    }
300}
301
302impl fmt::Display for SymbolKind {
303    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
304        write!(f, "{}", self.name())
305    }
306}
307
308/// Repository metadata and statistics
309#[derive(Debug, Clone, Default, Serialize, Deserialize)]
310pub struct RepoMetadata {
311    /// Total number of files
312    pub total_files: u32,
313    /// Total lines of code
314    pub total_lines: u64,
315    /// Aggregate token counts
316    pub total_tokens: TokenCounts,
317    /// Language breakdown
318    pub languages: Vec<LanguageStats>,
319    /// Detected framework (e.g., "React", "Django")
320    pub framework: Option<String>,
321    /// Repository description
322    pub description: Option<String>,
323    /// Git branch (if in git repo)
324    pub branch: Option<String>,
325    /// Git commit hash (if in git repo)
326    pub commit: Option<String>,
327    /// Directory structure tree
328    pub directory_structure: Option<String>,
329    /// External dependencies (packages/libraries)
330    pub external_dependencies: Vec<String>,
331    /// Git history (commits and changes) - for structured output
332    pub git_history: Option<GitHistory>,
333}
334
335/// Statistics for a single language
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct LanguageStats {
338    /// Language name
339    pub language: String,
340    /// Number of files
341    pub files: u32,
342    /// Total lines in this language
343    pub lines: u64,
344    /// Percentage of total codebase
345    pub percentage: f32,
346}
347
348/// A git commit entry for structured output
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct GitCommitInfo {
351    /// Full commit hash
352    pub hash: String,
353    /// Short commit hash (7 chars)
354    pub short_hash: String,
355    /// Author name
356    pub author: String,
357    /// Commit date (YYYY-MM-DD)
358    pub date: String,
359    /// Commit message
360    pub message: String,
361}
362
363/// Git history information for structured output
364#[derive(Debug, Clone, Default, Serialize, Deserialize)]
365pub struct GitHistory {
366    /// Recent commits
367    pub commits: Vec<GitCommitInfo>,
368    /// Files with uncommitted changes
369    pub changed_files: Vec<GitChangedFile>,
370}
371
372/// A file with uncommitted changes
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct GitChangedFile {
375    /// File path relative to repo root
376    pub path: String,
377    /// Change status (A=Added, M=Modified, D=Deleted, R=Renamed)
378    pub status: String,
379    /// Diff content (optional, only populated when --include-diffs is used)
380    #[serde(skip_serializing_if = "Option::is_none")]
381    pub diff_content: Option<String>,
382}
383
384/// Compression level for output
385#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
386pub enum CompressionLevel {
387    /// No compression
388    None,
389    /// Remove empty lines, trim whitespace
390    Minimal,
391    /// Remove comments, normalize whitespace
392    #[default]
393    Balanced,
394    /// Remove docstrings, keep signatures only
395    Aggressive,
396    /// Key symbols only
397    Extreme,
398    /// Focused: key symbols with small surrounding context
399    Focused,
400    /// Semantic compression using code understanding
401    ///
402    /// Uses chunk-based compression that:
403    /// - Splits content at semantic boundaries (paragraphs, functions)
404    /// - Applies budget-ratio-based selection
405    /// - When `embeddings` feature is enabled: clusters similar code and keeps representatives
406    /// - When disabled: uses heuristic-based sampling
407    ///
408    /// This provides intelligent compression that preserves code structure better than
409    /// character-based approaches, though it's not as sophisticated as full neural
410    /// semantic analysis.
411    ///
412    /// Expected reduction: ~60-70% (may vary based on content structure)
413    Semantic,
414}
415
416impl CompressionLevel {
417    /// Expected reduction percentage
418    ///
419    /// Note: These are approximate values. Actual reduction depends on:
420    /// - Content structure (more repetitive = higher reduction)
421    /// - Code density (comments/whitespace ratio)
422    /// - For Semantic: whether `embeddings` feature is enabled
423    pub fn expected_reduction(&self) -> u8 {
424        match self {
425            Self::None => 0,
426            Self::Minimal => 15,
427            Self::Balanced => 35,
428            Self::Aggressive => 60,
429            Self::Extreme => 80,
430            Self::Focused => 75,
431            // Semantic uses chunk-based compression with ~50% budget ratio default
432            // Combined with structure preservation, typically achieves 60-70%
433            Self::Semantic => 65,
434        }
435    }
436
437    /// Get a human-readable description of this compression level
438    pub fn description(&self) -> &'static str {
439        match self {
440            Self::None => "No compression - original content preserved",
441            Self::Minimal => "Remove empty lines, trim whitespace",
442            Self::Balanced => "Remove comments, normalize whitespace",
443            Self::Aggressive => "Remove docstrings, keep signatures only",
444            Self::Extreme => "Key symbols only - minimal context",
445            Self::Focused => "Focused symbols with small surrounding context",
446            Self::Semantic => "Semantic chunking with intelligent sampling",
447        }
448    }
449
450    /// Parse compression level from string
451    ///
452    /// Accepts: "none", "minimal", "balanced", "aggressive", "extreme", "semantic"
453    /// Case-insensitive. Returns `None` for unrecognized values.
454    #[allow(clippy::should_implement_trait)]
455    pub fn from_str(s: &str) -> Option<Self> {
456        match s.to_lowercase().as_str() {
457            "none" => Some(Self::None),
458            "minimal" => Some(Self::Minimal),
459            "balanced" => Some(Self::Balanced),
460            "aggressive" => Some(Self::Aggressive),
461            "extreme" => Some(Self::Extreme),
462            "focused" => Some(Self::Focused),
463            "semantic" => Some(Self::Semantic),
464            _ => None,
465        }
466    }
467
468    /// Get string name of this compression level
469    pub fn name(&self) -> &'static str {
470        match self {
471            Self::None => "none",
472            Self::Minimal => "minimal",
473            Self::Balanced => "balanced",
474            Self::Aggressive => "aggressive",
475            Self::Extreme => "extreme",
476            Self::Focused => "focused",
477            Self::Semantic => "semantic",
478        }
479    }
480
481    /// Get all available compression levels
482    pub fn all() -> &'static [Self] {
483        &[
484            Self::None,
485            Self::Minimal,
486            Self::Balanced,
487            Self::Aggressive,
488            Self::Extreme,
489            Self::Focused,
490            Self::Semantic,
491        ]
492    }
493}
494
495impl std::str::FromStr for CompressionLevel {
496    type Err = ();
497
498    fn from_str(s: &str) -> Result<Self, Self::Err> {
499        CompressionLevel::from_str(s).ok_or(())
500    }
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    #[test]
508    fn test_repository_new() {
509        let repo = Repository::new("test", "/tmp/test");
510        assert_eq!(repo.name, "test");
511        assert!(repo.files.is_empty());
512    }
513
514    #[test]
515    fn test_token_counts() {
516        let mut counts = TokenCounts::default();
517        counts.set(TokenizerModel::Claude, 100);
518        assert_eq!(counts.get(TokenizerModel::Claude), 100);
519    }
520
521    #[test]
522    fn test_symbol_line_count() {
523        let mut sym = Symbol::new("test", SymbolKind::Function);
524        sym.start_line = 10;
525        sym.end_line = 20;
526        assert_eq!(sym.line_count(), 11);
527    }
528
529    #[test]
530    fn test_compression_level_from_str() {
531        assert_eq!(CompressionLevel::from_str("none"), Some(CompressionLevel::None));
532        assert_eq!(CompressionLevel::from_str("minimal"), Some(CompressionLevel::Minimal));
533        assert_eq!(CompressionLevel::from_str("balanced"), Some(CompressionLevel::Balanced));
534        assert_eq!(CompressionLevel::from_str("aggressive"), Some(CompressionLevel::Aggressive));
535        assert_eq!(CompressionLevel::from_str("extreme"), Some(CompressionLevel::Extreme));
536        assert_eq!(CompressionLevel::from_str("focused"), Some(CompressionLevel::Focused));
537        assert_eq!(CompressionLevel::from_str("semantic"), Some(CompressionLevel::Semantic));
538
539        // Case insensitive
540        assert_eq!(CompressionLevel::from_str("SEMANTIC"), Some(CompressionLevel::Semantic));
541        assert_eq!(CompressionLevel::from_str("Balanced"), Some(CompressionLevel::Balanced));
542
543        // Unknown
544        assert_eq!(CompressionLevel::from_str("unknown"), None);
545        assert_eq!(CompressionLevel::from_str(""), None);
546    }
547
548    #[test]
549    fn test_compression_level_name() {
550        assert_eq!(CompressionLevel::None.name(), "none");
551        assert_eq!(CompressionLevel::Semantic.name(), "semantic");
552    }
553
554    #[test]
555    fn test_compression_level_expected_reduction() {
556        assert_eq!(CompressionLevel::None.expected_reduction(), 0);
557        assert_eq!(CompressionLevel::Minimal.expected_reduction(), 15);
558        assert_eq!(CompressionLevel::Balanced.expected_reduction(), 35);
559        assert_eq!(CompressionLevel::Aggressive.expected_reduction(), 60);
560        assert_eq!(CompressionLevel::Extreme.expected_reduction(), 80);
561        assert_eq!(CompressionLevel::Focused.expected_reduction(), 75);
562        assert_eq!(CompressionLevel::Semantic.expected_reduction(), 65);
563    }
564
565    #[test]
566    fn test_compression_level_description() {
567        // All levels should have non-empty descriptions
568        for level in CompressionLevel::all() {
569            assert!(!level.description().is_empty());
570        }
571    }
572
573    #[test]
574    fn test_compression_level_all() {
575        let all = CompressionLevel::all();
576        assert_eq!(all.len(), 7);
577        assert!(all.contains(&CompressionLevel::Semantic));
578    }
579}