Skip to main content

infiniloom_engine/embedding/
error.rs

1//! Actionable error types for the embedding system
2//!
3//! All errors include:
4//! - Clear description of what went wrong
5//! - Actionable fix suggestions
6//! - Context for debugging
7//!
8//! # Security
9//!
10//! Error messages use sanitized paths that strip the user's home directory
11//! to prevent leaking sensitive filesystem information.
12
13use std::path::{Path, PathBuf};
14use thiserror::Error;
15
16/// Sanitize a path for display in error messages
17///
18/// Removes the user's home directory prefix to prevent leaking sensitive paths.
19/// Example: `/Users/john/code/project/src/foo.rs` → `~/code/project/src/foo.rs`
20pub fn sanitize_path(path: &Path) -> String {
21    // Try HOME environment variable (Unix/macOS)
22    if let Ok(home) = std::env::var("HOME") {
23        let home_path = Path::new(&home);
24        if let Ok(relative) = path.strip_prefix(home_path) {
25            return format!("~/{}", relative.display());
26        }
27    }
28    // Try USERPROFILE for Windows
29    if let Ok(home) = std::env::var("USERPROFILE") {
30        let home_path = Path::new(&home);
31        if let Ok(relative) = path.strip_prefix(home_path) {
32            return format!("~/{}", relative.display());
33        }
34    }
35    // If we can't get home dir or path isn't under it, use as-is
36    path.display().to_string()
37}
38
39/// Sanitize a PathBuf for display in error messages
40pub fn sanitize_pathbuf(path: &PathBuf) -> String {
41    sanitize_path(path.as_path())
42}
43
44/// A wrapper around PathBuf that sanitizes paths when displayed
45#[derive(Debug, Clone)]
46pub struct SafePath(pub PathBuf);
47
48impl std::fmt::Display for SafePath {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        write!(f, "{}", sanitize_path(&self.0))
51    }
52}
53
54/// Actionable error types with helpful messages
55#[derive(Debug, Error)]
56pub enum EmbedError {
57    // === User Errors (Actionable) ===
58    #[error(
59        "Invalid settings: {field} - {reason}\n\nFix: Check your --{field} argument or config file"
60    )]
61    InvalidSettings { field: String, reason: String },
62
63    #[error("Manifest version {found} is newer than supported version {max_supported}\n\nFix: Upgrade infiniloom to latest version, or delete manifest and rebuild:\n  rm .infiniloom-embed.bin && infiniloom embed")]
64    ManifestVersionTooNew { found: u32, max_supported: u32 },
65
66    #[error("Manifest corrupted or tampered\n  Path: {path}\n  Expected checksum: {expected}\n  Actual checksum: {actual}\n\nFix: Delete manifest and rebuild:\n  rm {path} && infiniloom embed", path = path.display())]
67    ManifestCorrupted { path: PathBuf, expected: String, actual: String },
68
69    #[error("Settings changed since last run\n\nPrevious: {previous}\nCurrent:  {current}\n\nImpact: All chunk IDs may change\n\nFix: Run with --full to rebuild, or restore original settings")]
70    SettingsChanged { previous: String, current: String },
71
72    #[error("No code chunks found\n\nPossible causes:\n  - Include patterns too restrictive: {include_patterns}\n  - Exclude patterns too broad: {exclude_patterns}\n  - No supported languages in repository\n\nFix: Check -i/--include and -e/--exclude patterns")]
73    NoChunksGenerated { include_patterns: String, exclude_patterns: String },
74
75    #[error("Secrets detected in {count} chunks\n\nFiles with secrets:\n{files}\n\nFix: Either:\n  1. Remove secrets from code\n  2. Use --redact-secrets to mask them\n  3. Use --no-scan-secrets to skip scanning (not recommended)")]
76    SecretsDetected { count: usize, files: String },
77
78    #[error("Invalid glob pattern: '{pattern}'\n  Error: {reason}\n\nFix: Check -i/--include or -e/--exclude pattern syntax.\n  Examples: '*.rs', 'src/**/*.ts', '!tests/*'")]
79    InvalidPattern { pattern: String, reason: String },
80
81    #[error("Hash collision detected!\n  Chunk ID: {id}\n  Hash 1: {hash1}\n  Hash 2: {hash2}\n\nThis is extremely rare. Please report at https://github.com/infiniloom/issues")]
82    HashCollision { id: String, hash1: String, hash2: String },
83
84    // === Resource Limit Errors ===
85    #[error("File too large: {path} ({size} bytes, max: {max})\n\nFix: Exclude large files with -e/--exclude pattern, or increase --max-file-size", path = path.display())]
86    FileTooLarge { path: PathBuf, size: u64, max: u64 },
87
88    #[error("Line too long in file: {path} ({length} chars, max: {max})\n\nThis is likely a minified file.\n\nFix: Exclude minified files with -e/--exclude pattern (e.g., '*.min.js'), or increase --max-line-length", path = path.display())]
89    LineTooLong { path: PathBuf, length: usize, max: usize },
90
91    #[error(
92        "Too many chunks generated ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-chunks limit"
93    )]
94    TooManyChunks { count: usize, max: usize },
95
96    #[error("Too many files to process ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-files limit")]
97    TooManyFiles { count: usize, max: usize },
98
99    #[error("Recursion limit exceeded while parsing\n  Depth: {depth}, Max: {max}\n  Context: {context}\n\nFix: File may have unusual nesting. Exclude it with -e pattern")]
100    RecursionLimitExceeded { depth: u32, max: u32, context: String },
101
102    #[error("Path traversal detected\n  Path: {path}\n  Repo root: {repo_root}\n\nFix: Remove symlinks pointing outside repository, or use --no-follow-symlinks", path = path.display(), repo_root = repo_root.display())]
103    PathTraversal { path: PathBuf, repo_root: PathBuf },
104
105    // === System Errors ===
106    #[error("I/O error: {path}\n  {source}", path = path.display())]
107    IoError {
108        path: PathBuf,
109        #[source]
110        source: std::io::Error,
111    },
112
113    #[error("Parse error in {file} at line {line}\n  {message}\n\nFix: Fix syntax error or exclude file with -e pattern")]
114    ParseError { file: String, line: u32, message: String },
115
116    #[error("Serialization error: {reason}")]
117    SerializationError { reason: String },
118
119    #[error("Deserialization error: {reason}\n\nFix: Manifest may be corrupted. Delete and rebuild:\n  rm .infiniloom-embed.bin && infiniloom embed")]
120    DeserializationError { reason: String },
121
122    #[error("Unsupported algorithm version {found} (max supported: {max_supported})\n\nFix: Upgrade infiniloom or regenerate with current version")]
123    UnsupportedAlgorithmVersion { found: u32, max_supported: u32 },
124
125    #[error("Multiple files failed to process:\n{errors}\n\nFix: Address individual errors above")]
126    MultipleErrors { errors: String },
127
128    #[error("Not a directory: {path}", path = path.display())]
129    NotADirectory { path: PathBuf },
130
131    #[error("Too many errors encountered ({count}, max: {max})\n\nFix: Address individual errors, or increase error tolerance")]
132    TooManyErrors { count: usize, max: usize },
133
134    #[error("SQLite manifest error: {reason}\n\nFix: Delete the .infiniloom-embed.db file and retry, or check disk permissions")]
135    SqliteError { reason: String },
136}
137
138impl EmbedError {
139    /// Format multiple file errors into a single error
140    pub fn from_file_errors(errors: Vec<(PathBuf, EmbedError)>) -> Self {
141        let formatted = errors
142            .iter()
143            .map(|(path, err)| format!("  {}: {}", path.display(), err))
144            .collect::<Vec<_>>()
145            .join("\n");
146        Self::MultipleErrors { errors: formatted }
147    }
148
149    /// Check if this error is critical (should stop processing)
150    pub fn is_critical(&self) -> bool {
151        matches!(
152            self,
153            EmbedError::TooManyChunks { .. }
154                | EmbedError::TooManyFiles { .. }
155                | EmbedError::PathTraversal { .. }
156                | EmbedError::HashCollision { .. }
157                | EmbedError::SecretsDetected { .. }
158                | EmbedError::ManifestCorrupted { .. }
159                | EmbedError::InvalidPattern { .. }
160                | EmbedError::InvalidSettings { .. }
161        )
162    }
163
164    /// Check if this error can be recovered from by skipping the file
165    pub fn is_skippable(&self) -> bool {
166        matches!(
167            self,
168            EmbedError::FileTooLarge { .. }
169                | EmbedError::LineTooLong { .. }
170                | EmbedError::ParseError { .. }
171                | EmbedError::IoError { .. }
172                | EmbedError::RecursionLimitExceeded { .. }
173        )
174    }
175
176    /// Get the semantic exit code for this error
177    ///
178    /// Exit codes follow POSIX conventions and are designed for shell scripting:
179    ///
180    /// | Code | Category | Description |
181    /// |------|----------|-------------|
182    /// | 0 | Success | No error |
183    /// | 1 | User Error | Invalid settings, patterns, or arguments |
184    /// | 2 | Input Error | No chunks generated, no data to process |
185    /// | 3 | Security | Secrets detected (use --redact-secrets or --no-scan-secrets) |
186    /// | 4 | Security | Path traversal attempt blocked |
187    /// | 10 | Manifest | Version mismatch, corruption, or settings changed |
188    /// | 11 | Resource | Too many chunks/files, recursion limit |
189    /// | 12 | System | I/O errors, serialization failures |
190    /// | 13 | Internal | Hash collision (extremely rare, report as bug) |
191    /// | 14 | Parse | Source code parse errors (skippable) |
192    /// | 15 | Multiple | Multiple errors encountered |
193    ///
194    /// # Shell Script Example
195    ///
196    /// ```bash
197    /// infiniloom embed /path/to/repo
198    /// case $? in
199    ///     0) echo "Success" ;;
200    ///     1) echo "Invalid settings - check arguments" ;;
201    ///     2) echo "No code found - check include/exclude patterns" ;;
202    ///     3) echo "Secrets detected - use --redact-secrets" ;;
203    ///     4) echo "Security violation - path traversal blocked" ;;
204    ///     10) echo "Manifest issue - delete .infiniloom-embed.bin and retry" ;;
205    ///     11) echo "Resource limit - use more restrictive patterns" ;;
206    ///     12) echo "System error - check disk space and permissions" ;;
207    ///     13) echo "Internal error - please report this bug" ;;
208    ///     14) echo "Parse errors - some files skipped" ;;
209    ///     15) echo "Multiple errors - see above for details" ;;
210    /// esac
211    /// ```
212    pub fn exit_code(&self) -> i32 {
213        match self {
214            // User errors (invalid configuration): 1
215            EmbedError::InvalidSettings { .. } | EmbedError::InvalidPattern { .. } => 1,
216
217            // Input errors (no data): 2
218            EmbedError::NoChunksGenerated { .. } | EmbedError::NotADirectory { .. } => 2,
219
220            // Security - secrets detected: 3
221            EmbedError::SecretsDetected { .. } => 3,
222
223            // Security - path traversal: 4
224            EmbedError::PathTraversal { .. } => 4,
225
226            // Manifest errors: 10
227            EmbedError::ManifestVersionTooNew { .. }
228            | EmbedError::ManifestCorrupted { .. }
229            | EmbedError::SettingsChanged { .. }
230            | EmbedError::UnsupportedAlgorithmVersion { .. } => 10,
231
232            // Resource limit errors: 11
233            EmbedError::TooManyChunks { .. }
234            | EmbedError::TooManyFiles { .. }
235            | EmbedError::TooManyErrors { .. }
236            | EmbedError::RecursionLimitExceeded { .. }
237            | EmbedError::FileTooLarge { .. }
238            | EmbedError::LineTooLong { .. } => 11,
239
240            // System errors (I/O, serialization, SQLite): 12
241            EmbedError::IoError { .. }
242            | EmbedError::SerializationError { .. }
243            | EmbedError::DeserializationError { .. }
244            | EmbedError::SqliteError { .. } => 12,
245
246            // Internal errors (hash collision - extremely rare): 13
247            EmbedError::HashCollision { .. } => 13,
248
249            // Parse errors: 14
250            EmbedError::ParseError { .. } => 14,
251
252            // Multiple errors: 15
253            EmbedError::MultipleErrors { .. } => 15,
254        }
255    }
256
257    /// Get a short error code string for programmatic use
258    ///
259    /// Useful for JSON output or logging systems.
260    pub fn error_code(&self) -> &'static str {
261        match self {
262            EmbedError::InvalidSettings { .. } => "E001_INVALID_SETTINGS",
263            EmbedError::InvalidPattern { .. } => "E002_INVALID_PATTERN",
264            EmbedError::NoChunksGenerated { .. } => "E003_NO_CHUNKS",
265            EmbedError::NotADirectory { .. } => "E004_NOT_DIRECTORY",
266            EmbedError::SecretsDetected { .. } => "E005_SECRETS_DETECTED",
267            EmbedError::PathTraversal { .. } => "E006_PATH_TRAVERSAL",
268            EmbedError::ManifestVersionTooNew { .. } => "E010_MANIFEST_VERSION",
269            EmbedError::ManifestCorrupted { .. } => "E011_MANIFEST_CORRUPTED",
270            EmbedError::SettingsChanged { .. } => "E012_SETTINGS_CHANGED",
271            EmbedError::UnsupportedAlgorithmVersion { .. } => "E013_ALGORITHM_VERSION",
272            EmbedError::TooManyChunks { .. } => "E020_TOO_MANY_CHUNKS",
273            EmbedError::TooManyFiles { .. } => "E021_TOO_MANY_FILES",
274            EmbedError::TooManyErrors { .. } => "E022_TOO_MANY_ERRORS",
275            EmbedError::RecursionLimitExceeded { .. } => "E023_RECURSION_LIMIT",
276            EmbedError::FileTooLarge { .. } => "E024_FILE_TOO_LARGE",
277            EmbedError::LineTooLong { .. } => "E025_LINE_TOO_LONG",
278            EmbedError::IoError { .. } => "E030_IO_ERROR",
279            EmbedError::SerializationError { .. } => "E031_SERIALIZATION",
280            EmbedError::DeserializationError { .. } => "E032_DESERIALIZATION",
281            EmbedError::HashCollision { .. } => "E040_HASH_COLLISION",
282            EmbedError::ParseError { .. } => "E050_PARSE_ERROR",
283            EmbedError::MultipleErrors { .. } => "E099_MULTIPLE_ERRORS",
284            EmbedError::SqliteError { .. } => "E033_SQLITE_ERROR",
285        }
286    }
287}
288
289impl Clone for EmbedError {
290    fn clone(&self) -> Self {
291        match self {
292            Self::InvalidSettings { field, reason } => {
293                Self::InvalidSettings { field: field.clone(), reason: reason.clone() }
294            },
295            Self::ManifestVersionTooNew { found, max_supported } => {
296                Self::ManifestVersionTooNew { found: *found, max_supported: *max_supported }
297            },
298            Self::ManifestCorrupted { path, expected, actual } => Self::ManifestCorrupted {
299                path: path.clone(),
300                expected: expected.clone(),
301                actual: actual.clone(),
302            },
303            Self::SettingsChanged { previous, current } => {
304                Self::SettingsChanged { previous: previous.clone(), current: current.clone() }
305            },
306            Self::NoChunksGenerated { include_patterns, exclude_patterns } => {
307                Self::NoChunksGenerated {
308                    include_patterns: include_patterns.clone(),
309                    exclude_patterns: exclude_patterns.clone(),
310                }
311            },
312            Self::SecretsDetected { count, files } => {
313                Self::SecretsDetected { count: *count, files: files.clone() }
314            },
315            Self::HashCollision { id, hash1, hash2 } => {
316                Self::HashCollision { id: id.clone(), hash1: hash1.clone(), hash2: hash2.clone() }
317            },
318            Self::FileTooLarge { path, size, max } => {
319                Self::FileTooLarge { path: path.clone(), size: *size, max: *max }
320            },
321            Self::LineTooLong { path, length, max } => {
322                Self::LineTooLong { path: path.clone(), length: *length, max: *max }
323            },
324            Self::TooManyChunks { count, max } => Self::TooManyChunks { count: *count, max: *max },
325            Self::TooManyFiles { count, max } => Self::TooManyFiles { count: *count, max: *max },
326            Self::RecursionLimitExceeded { depth, max, context } => {
327                Self::RecursionLimitExceeded { depth: *depth, max: *max, context: context.clone() }
328            },
329            Self::PathTraversal { path, repo_root } => {
330                Self::PathTraversal { path: path.clone(), repo_root: repo_root.clone() }
331            },
332            Self::IoError { path, source } => Self::IoError {
333                path: path.clone(),
334                source: std::io::Error::new(source.kind(), source.to_string()),
335            },
336            Self::ParseError { file, line, message } => {
337                Self::ParseError { file: file.clone(), line: *line, message: message.clone() }
338            },
339            Self::SerializationError { reason } => {
340                Self::SerializationError { reason: reason.clone() }
341            },
342            Self::DeserializationError { reason } => {
343                Self::DeserializationError { reason: reason.clone() }
344            },
345            Self::UnsupportedAlgorithmVersion { found, max_supported } => {
346                Self::UnsupportedAlgorithmVersion { found: *found, max_supported: *max_supported }
347            },
348            Self::MultipleErrors { errors } => Self::MultipleErrors { errors: errors.clone() },
349            Self::NotADirectory { path } => Self::NotADirectory { path: path.clone() },
350            Self::InvalidPattern { pattern, reason } => {
351                Self::InvalidPattern { pattern: pattern.clone(), reason: reason.clone() }
352            },
353            Self::TooManyErrors { count, max } => Self::TooManyErrors { count: *count, max: *max },
354            Self::SqliteError { reason } => Self::SqliteError { reason: reason.clone() },
355        }
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_error_display() {
365        let err = EmbedError::InvalidSettings {
366            field: "max_tokens".to_owned(),
367            reason: "exceeds limit of 100000".to_owned(),
368        };
369        let msg = err.to_string();
370        assert!(msg.contains("max_tokens"));
371        assert!(msg.contains("Fix:"));
372    }
373
374    #[test]
375    fn test_from_file_errors() {
376        let errors = vec![
377            (
378                PathBuf::from("src/foo.rs"),
379                EmbedError::FileTooLarge {
380                    path: PathBuf::from("src/foo.rs"),
381                    size: 20_000_000,
382                    max: 10_000_000,
383                },
384            ),
385            (
386                PathBuf::from("src/bar.rs"),
387                EmbedError::ParseError {
388                    file: "src/bar.rs".to_owned(),
389                    line: 42,
390                    message: "unexpected token".to_owned(),
391                },
392            ),
393        ];
394
395        let combined = EmbedError::from_file_errors(errors);
396        let msg = combined.to_string();
397        assert!(msg.contains("src/foo.rs"));
398        assert!(msg.contains("src/bar.rs"));
399    }
400
401    #[test]
402    fn test_is_critical() {
403        assert!(EmbedError::TooManyChunks { count: 100, max: 50 }.is_critical());
404        assert!(EmbedError::PathTraversal {
405            path: PathBuf::from("/etc/passwd"),
406            repo_root: PathBuf::from("/home/user/repo"),
407        }
408        .is_critical());
409        assert!(!EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
410            .is_critical());
411    }
412
413    #[test]
414    fn test_is_skippable() {
415        assert!(EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
416            .is_skippable());
417        assert!(EmbedError::ParseError {
418            file: "bad.rs".to_owned(),
419            line: 1,
420            message: "syntax error".to_owned(),
421        }
422        .is_skippable());
423        assert!(!EmbedError::TooManyChunks { count: 100, max: 50 }.is_skippable());
424    }
425
426    #[test]
427    fn test_error_clone() {
428        let err = EmbedError::HashCollision {
429            id: "ec_123".to_owned(),
430            hash1: "abc".to_owned(),
431            hash2: "def".to_owned(),
432        };
433        let cloned = err;
434        assert!(matches!(cloned, EmbedError::HashCollision { .. }));
435    }
436
437    #[test]
438    fn test_exit_codes() {
439        // User errors: 1
440        assert_eq!(
441            EmbedError::InvalidSettings {
442                field: "max_tokens".to_owned(),
443                reason: "too high".to_owned()
444            }
445            .exit_code(),
446            1
447        );
448        assert_eq!(
449            EmbedError::InvalidPattern {
450                pattern: "**[".to_owned(),
451                reason: "unclosed bracket".to_owned()
452            }
453            .exit_code(),
454            1
455        );
456
457        // Input errors: 2
458        assert_eq!(
459            EmbedError::NoChunksGenerated {
460                include_patterns: "*.xyz".to_owned(),
461                exclude_patterns: "".to_owned()
462            }
463            .exit_code(),
464            2
465        );
466        assert_eq!(
467            EmbedError::NotADirectory { path: PathBuf::from("/tmp/file.txt") }.exit_code(),
468            2
469        );
470
471        // Security - secrets: 3
472        assert_eq!(
473            EmbedError::SecretsDetected { count: 5, files: "config.py".to_owned() }.exit_code(),
474            3
475        );
476
477        // Security - path traversal: 4
478        assert_eq!(
479            EmbedError::PathTraversal {
480                path: PathBuf::from("../../../etc/passwd"),
481                repo_root: PathBuf::from("/repo")
482            }
483            .exit_code(),
484            4
485        );
486
487        // Manifest errors: 10
488        assert_eq!(
489            EmbedError::ManifestVersionTooNew { found: 99, max_supported: 2 }.exit_code(),
490            10
491        );
492        assert_eq!(
493            EmbedError::ManifestCorrupted {
494                path: PathBuf::from(".infiniloom-embed.bin"),
495                expected: "abc".to_owned(),
496                actual: "def".to_owned()
497            }
498            .exit_code(),
499            10
500        );
501
502        // Resource limits: 11
503        assert_eq!(EmbedError::TooManyChunks { count: 100000, max: 50000 }.exit_code(), 11);
504        assert_eq!(EmbedError::TooManyFiles { count: 10000, max: 5000 }.exit_code(), 11);
505        assert_eq!(
506            EmbedError::FileTooLarge {
507                path: PathBuf::from("big.bin"),
508                size: 100_000_000,
509                max: 10_000_000
510            }
511            .exit_code(),
512            11
513        );
514
515        // System errors: 12
516        assert_eq!(
517            EmbedError::IoError {
518                path: PathBuf::from("/tmp"),
519                source: std::io::Error::new(std::io::ErrorKind::NotFound, "not found")
520            }
521            .exit_code(),
522            12
523        );
524        assert_eq!(EmbedError::SerializationError { reason: "failed".to_owned() }.exit_code(), 12);
525
526        // Internal errors: 13
527        assert_eq!(
528            EmbedError::HashCollision {
529                id: "ec_123".to_owned(),
530                hash1: "abc".to_owned(),
531                hash2: "def".to_owned()
532            }
533            .exit_code(),
534            13
535        );
536
537        // Parse errors: 14
538        assert_eq!(
539            EmbedError::ParseError {
540                file: "bad.rs".to_owned(),
541                line: 42,
542                message: "syntax error".to_owned()
543            }
544            .exit_code(),
545            14
546        );
547
548        // Multiple errors: 15
549        assert_eq!(
550            EmbedError::MultipleErrors { errors: "error1\nerror2".to_owned() }.exit_code(),
551            15
552        );
553    }
554
555    #[test]
556    fn test_error_codes() {
557        assert_eq!(
558            EmbedError::InvalidSettings { field: "x".to_owned(), reason: "y".to_owned() }
559                .error_code(),
560            "E001_INVALID_SETTINGS"
561        );
562        assert_eq!(
563            EmbedError::SecretsDetected { count: 1, files: "f".to_owned() }.error_code(),
564            "E005_SECRETS_DETECTED"
565        );
566        assert_eq!(
567            EmbedError::HashCollision {
568                id: "i".to_owned(),
569                hash1: "a".to_owned(),
570                hash2: "b".to_owned()
571            }
572            .error_code(),
573            "E040_HASH_COLLISION"
574        );
575    }
576}