infiniloom_engine/embedding/
error.rs

1//! Actionable error types for the embedding system
2//!
3//! All errors include:
4//! - Clear description of what went wrong
5//! - Actionable fix suggestions
6//! - Context for debugging
7//!
8//! # Security
9//!
10//! Error messages use sanitized paths that strip the user's home directory
11//! to prevent leaking sensitive filesystem information.
12
13use std::path::{Path, PathBuf};
14use thiserror::Error;
15
16/// Sanitize a path for display in error messages
17///
18/// Removes the user's home directory prefix to prevent leaking sensitive paths.
19/// Example: `/Users/john/code/project/src/foo.rs` → `~/code/project/src/foo.rs`
20pub fn sanitize_path(path: &Path) -> String {
21    // Try HOME environment variable (Unix/macOS)
22    if let Ok(home) = std::env::var("HOME") {
23        let home_path = Path::new(&home);
24        if let Ok(relative) = path.strip_prefix(home_path) {
25            return format!("~/{}", relative.display());
26        }
27    }
28    // Try USERPROFILE for Windows
29    if let Ok(home) = std::env::var("USERPROFILE") {
30        let home_path = Path::new(&home);
31        if let Ok(relative) = path.strip_prefix(home_path) {
32            return format!("~/{}", relative.display());
33        }
34    }
35    // If we can't get home dir or path isn't under it, use as-is
36    path.display().to_string()
37}
38
39/// Sanitize a PathBuf for display in error messages
40pub fn sanitize_pathbuf(path: &PathBuf) -> String {
41    sanitize_path(path.as_path())
42}
43
44/// A wrapper around PathBuf that sanitizes paths when displayed
45#[derive(Debug, Clone)]
46pub struct SafePath(pub PathBuf);
47
48impl std::fmt::Display for SafePath {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        write!(f, "{}", sanitize_path(&self.0))
51    }
52}
53
54/// Actionable error types with helpful messages
55#[derive(Debug, Error)]
56pub enum EmbedError {
57    // === User Errors (Actionable) ===
58    #[error(
59        "Invalid settings: {field} - {reason}\n\nFix: Check your --{field} argument or config file"
60    )]
61    InvalidSettings { field: String, reason: String },
62
63    #[error("Manifest version {found} is newer than supported version {max_supported}\n\nFix: Upgrade infiniloom to latest version, or delete manifest and rebuild:\n  rm .infiniloom-embed.bin && infiniloom embed")]
64    ManifestVersionTooNew { found: u32, max_supported: u32 },
65
66    #[error("Manifest corrupted or tampered\n  Path: {path}\n  Expected checksum: {expected}\n  Actual checksum: {actual}\n\nFix: Delete manifest and rebuild:\n  rm {path} && infiniloom embed", path = path.display())]
67    ManifestCorrupted { path: PathBuf, expected: String, actual: String },
68
69    #[error("Settings changed since last run\n\nPrevious: {previous}\nCurrent:  {current}\n\nImpact: All chunk IDs may change\n\nFix: Run with --full to rebuild, or restore original settings")]
70    SettingsChanged { previous: String, current: String },
71
72    #[error("No code chunks found\n\nPossible causes:\n  - Include patterns too restrictive: {include_patterns}\n  - Exclude patterns too broad: {exclude_patterns}\n  - No supported languages in repository\n\nFix: Check -i/--include and -e/--exclude patterns")]
73    NoChunksGenerated { include_patterns: String, exclude_patterns: String },
74
75    #[error("Secrets detected in {count} chunks\n\nFiles with secrets:\n{files}\n\nFix: Either:\n  1. Remove secrets from code\n  2. Use --redact-secrets to mask them\n  3. Use --no-scan-secrets to skip scanning (not recommended)")]
76    SecretsDetected { count: usize, files: String },
77
78    #[error("Invalid glob pattern: '{pattern}'\n  Error: {reason}\n\nFix: Check -i/--include or -e/--exclude pattern syntax.\n  Examples: '*.rs', 'src/**/*.ts', '!tests/*'")]
79    InvalidPattern { pattern: String, reason: String },
80
81    #[error("Hash collision detected!\n  Chunk ID: {id}\n  Hash 1: {hash1}\n  Hash 2: {hash2}\n\nThis is extremely rare. Please report at https://github.com/infiniloom/issues")]
82    HashCollision { id: String, hash1: String, hash2: String },
83
84    // === Resource Limit Errors ===
85    #[error("File too large: {path} ({size} bytes, max: {max})\n\nFix: Exclude large files with -e/--exclude pattern, or increase --max-file-size", path = path.display())]
86    FileTooLarge { path: PathBuf, size: u64, max: u64 },
87
88    #[error("Line too long in file: {path} ({length} chars, max: {max})\n\nThis is likely a minified file.\n\nFix: Exclude minified files with -e/--exclude pattern (e.g., '*.min.js'), or increase --max-line-length", path = path.display())]
89    LineTooLong { path: PathBuf, length: usize, max: usize },
90
91    #[error(
92        "Too many chunks generated ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-chunks limit"
93    )]
94    TooManyChunks { count: usize, max: usize },
95
96    #[error("Too many files to process ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-files limit")]
97    TooManyFiles { count: usize, max: usize },
98
99    #[error("Recursion limit exceeded while parsing\n  Depth: {depth}, Max: {max}\n  Context: {context}\n\nFix: File may have unusual nesting. Exclude it with -e pattern")]
100    RecursionLimitExceeded { depth: u32, max: u32, context: String },
101
102    #[error("Path traversal detected\n  Path: {path}\n  Repo root: {repo_root}\n\nFix: Remove symlinks pointing outside repository, or use --no-follow-symlinks", path = path.display(), repo_root = repo_root.display())]
103    PathTraversal { path: PathBuf, repo_root: PathBuf },
104
105    // === System Errors ===
106    #[error("I/O error: {path}\n  {source}", path = path.display())]
107    IoError {
108        path: PathBuf,
109        #[source]
110        source: std::io::Error,
111    },
112
113    #[error("Parse error in {file} at line {line}\n  {message}\n\nFix: Fix syntax error or exclude file with -e pattern")]
114    ParseError { file: String, line: u32, message: String },
115
116    #[error("Serialization error: {reason}")]
117    SerializationError { reason: String },
118
119    #[error("Deserialization error: {reason}\n\nFix: Manifest may be corrupted. Delete and rebuild:\n  rm .infiniloom-embed.bin && infiniloom embed")]
120    DeserializationError { reason: String },
121
122    #[error("Unsupported algorithm version {found} (max supported: {max_supported})\n\nFix: Upgrade infiniloom or regenerate with current version")]
123    UnsupportedAlgorithmVersion { found: u32, max_supported: u32 },
124
125    #[error("Multiple files failed to process:\n{errors}\n\nFix: Address individual errors above")]
126    MultipleErrors { errors: String },
127
128    #[error("Not a directory: {path}", path = path.display())]
129    NotADirectory { path: PathBuf },
130
131    #[error("Too many errors encountered ({count}, max: {max})\n\nFix: Address individual errors, or increase error tolerance")]
132    TooManyErrors { count: usize, max: usize },
133}
134
135impl EmbedError {
136    /// Format multiple file errors into a single error
137    pub fn from_file_errors(errors: Vec<(PathBuf, EmbedError)>) -> Self {
138        let formatted = errors
139            .iter()
140            .map(|(path, err)| format!("  {}: {}", path.display(), err))
141            .collect::<Vec<_>>()
142            .join("\n");
143        Self::MultipleErrors { errors: formatted }
144    }
145
146    /// Check if this error is critical (should stop processing)
147    pub fn is_critical(&self) -> bool {
148        matches!(
149            self,
150            EmbedError::TooManyChunks { .. }
151                | EmbedError::TooManyFiles { .. }
152                | EmbedError::PathTraversal { .. }
153                | EmbedError::HashCollision { .. }
154                | EmbedError::SecretsDetected { .. }
155                | EmbedError::ManifestCorrupted { .. }
156                | EmbedError::InvalidPattern { .. }
157                | EmbedError::InvalidSettings { .. }
158        )
159    }
160
161    /// Check if this error can be recovered from by skipping the file
162    pub fn is_skippable(&self) -> bool {
163        matches!(
164            self,
165            EmbedError::FileTooLarge { .. }
166                | EmbedError::LineTooLong { .. }
167                | EmbedError::ParseError { .. }
168                | EmbedError::IoError { .. }
169                | EmbedError::RecursionLimitExceeded { .. }
170        )
171    }
172
173    /// Get the semantic exit code for this error
174    ///
175    /// Exit codes follow POSIX conventions and are designed for shell scripting:
176    ///
177    /// | Code | Category | Description |
178    /// |------|----------|-------------|
179    /// | 0 | Success | No error |
180    /// | 1 | User Error | Invalid settings, patterns, or arguments |
181    /// | 2 | Input Error | No chunks generated, no data to process |
182    /// | 3 | Security | Secrets detected (use --redact-secrets or --no-scan-secrets) |
183    /// | 4 | Security | Path traversal attempt blocked |
184    /// | 10 | Manifest | Version mismatch, corruption, or settings changed |
185    /// | 11 | Resource | Too many chunks/files, recursion limit |
186    /// | 12 | System | I/O errors, serialization failures |
187    /// | 13 | Internal | Hash collision (extremely rare, report as bug) |
188    /// | 14 | Parse | Source code parse errors (skippable) |
189    /// | 15 | Multiple | Multiple errors encountered |
190    ///
191    /// # Shell Script Example
192    ///
193    /// ```bash
194    /// infiniloom embed /path/to/repo
195    /// case $? in
196    ///     0) echo "Success" ;;
197    ///     1) echo "Invalid settings - check arguments" ;;
198    ///     2) echo "No code found - check include/exclude patterns" ;;
199    ///     3) echo "Secrets detected - use --redact-secrets" ;;
200    ///     4) echo "Security violation - path traversal blocked" ;;
201    ///     10) echo "Manifest issue - delete .infiniloom-embed.bin and retry" ;;
202    ///     11) echo "Resource limit - use more restrictive patterns" ;;
203    ///     12) echo "System error - check disk space and permissions" ;;
204    ///     13) echo "Internal error - please report this bug" ;;
205    ///     14) echo "Parse errors - some files skipped" ;;
206    ///     15) echo "Multiple errors - see above for details" ;;
207    /// esac
208    /// ```
209    pub fn exit_code(&self) -> i32 {
210        match self {
211            // User errors (invalid configuration): 1
212            EmbedError::InvalidSettings { .. } | EmbedError::InvalidPattern { .. } => 1,
213
214            // Input errors (no data): 2
215            EmbedError::NoChunksGenerated { .. } | EmbedError::NotADirectory { .. } => 2,
216
217            // Security - secrets detected: 3
218            EmbedError::SecretsDetected { .. } => 3,
219
220            // Security - path traversal: 4
221            EmbedError::PathTraversal { .. } => 4,
222
223            // Manifest errors: 10
224            EmbedError::ManifestVersionTooNew { .. }
225            | EmbedError::ManifestCorrupted { .. }
226            | EmbedError::SettingsChanged { .. }
227            | EmbedError::UnsupportedAlgorithmVersion { .. } => 10,
228
229            // Resource limit errors: 11
230            EmbedError::TooManyChunks { .. }
231            | EmbedError::TooManyFiles { .. }
232            | EmbedError::TooManyErrors { .. }
233            | EmbedError::RecursionLimitExceeded { .. }
234            | EmbedError::FileTooLarge { .. }
235            | EmbedError::LineTooLong { .. } => 11,
236
237            // System errors (I/O, serialization): 12
238            EmbedError::IoError { .. }
239            | EmbedError::SerializationError { .. }
240            | EmbedError::DeserializationError { .. } => 12,
241
242            // Internal errors (hash collision - extremely rare): 13
243            EmbedError::HashCollision { .. } => 13,
244
245            // Parse errors: 14
246            EmbedError::ParseError { .. } => 14,
247
248            // Multiple errors: 15
249            EmbedError::MultipleErrors { .. } => 15,
250        }
251    }
252
253    /// Get a short error code string for programmatic use
254    ///
255    /// Useful for JSON output or logging systems.
256    pub fn error_code(&self) -> &'static str {
257        match self {
258            EmbedError::InvalidSettings { .. } => "E001_INVALID_SETTINGS",
259            EmbedError::InvalidPattern { .. } => "E002_INVALID_PATTERN",
260            EmbedError::NoChunksGenerated { .. } => "E003_NO_CHUNKS",
261            EmbedError::NotADirectory { .. } => "E004_NOT_DIRECTORY",
262            EmbedError::SecretsDetected { .. } => "E005_SECRETS_DETECTED",
263            EmbedError::PathTraversal { .. } => "E006_PATH_TRAVERSAL",
264            EmbedError::ManifestVersionTooNew { .. } => "E010_MANIFEST_VERSION",
265            EmbedError::ManifestCorrupted { .. } => "E011_MANIFEST_CORRUPTED",
266            EmbedError::SettingsChanged { .. } => "E012_SETTINGS_CHANGED",
267            EmbedError::UnsupportedAlgorithmVersion { .. } => "E013_ALGORITHM_VERSION",
268            EmbedError::TooManyChunks { .. } => "E020_TOO_MANY_CHUNKS",
269            EmbedError::TooManyFiles { .. } => "E021_TOO_MANY_FILES",
270            EmbedError::TooManyErrors { .. } => "E022_TOO_MANY_ERRORS",
271            EmbedError::RecursionLimitExceeded { .. } => "E023_RECURSION_LIMIT",
272            EmbedError::FileTooLarge { .. } => "E024_FILE_TOO_LARGE",
273            EmbedError::LineTooLong { .. } => "E025_LINE_TOO_LONG",
274            EmbedError::IoError { .. } => "E030_IO_ERROR",
275            EmbedError::SerializationError { .. } => "E031_SERIALIZATION",
276            EmbedError::DeserializationError { .. } => "E032_DESERIALIZATION",
277            EmbedError::HashCollision { .. } => "E040_HASH_COLLISION",
278            EmbedError::ParseError { .. } => "E050_PARSE_ERROR",
279            EmbedError::MultipleErrors { .. } => "E099_MULTIPLE_ERRORS",
280        }
281    }
282}
283
284impl Clone for EmbedError {
285    fn clone(&self) -> Self {
286        match self {
287            Self::InvalidSettings { field, reason } => {
288                Self::InvalidSettings { field: field.clone(), reason: reason.clone() }
289            },
290            Self::ManifestVersionTooNew { found, max_supported } => {
291                Self::ManifestVersionTooNew { found: *found, max_supported: *max_supported }
292            },
293            Self::ManifestCorrupted { path, expected, actual } => Self::ManifestCorrupted {
294                path: path.clone(),
295                expected: expected.clone(),
296                actual: actual.clone(),
297            },
298            Self::SettingsChanged { previous, current } => {
299                Self::SettingsChanged { previous: previous.clone(), current: current.clone() }
300            },
301            Self::NoChunksGenerated { include_patterns, exclude_patterns } => {
302                Self::NoChunksGenerated {
303                    include_patterns: include_patterns.clone(),
304                    exclude_patterns: exclude_patterns.clone(),
305                }
306            },
307            Self::SecretsDetected { count, files } => {
308                Self::SecretsDetected { count: *count, files: files.clone() }
309            },
310            Self::HashCollision { id, hash1, hash2 } => {
311                Self::HashCollision { id: id.clone(), hash1: hash1.clone(), hash2: hash2.clone() }
312            },
313            Self::FileTooLarge { path, size, max } => {
314                Self::FileTooLarge { path: path.clone(), size: *size, max: *max }
315            },
316            Self::LineTooLong { path, length, max } => {
317                Self::LineTooLong { path: path.clone(), length: *length, max: *max }
318            },
319            Self::TooManyChunks { count, max } => Self::TooManyChunks { count: *count, max: *max },
320            Self::TooManyFiles { count, max } => Self::TooManyFiles { count: *count, max: *max },
321            Self::RecursionLimitExceeded { depth, max, context } => {
322                Self::RecursionLimitExceeded { depth: *depth, max: *max, context: context.clone() }
323            },
324            Self::PathTraversal { path, repo_root } => {
325                Self::PathTraversal { path: path.clone(), repo_root: repo_root.clone() }
326            },
327            Self::IoError { path, source } => Self::IoError {
328                path: path.clone(),
329                source: std::io::Error::new(source.kind(), source.to_string()),
330            },
331            Self::ParseError { file, line, message } => {
332                Self::ParseError { file: file.clone(), line: *line, message: message.clone() }
333            },
334            Self::SerializationError { reason } => {
335                Self::SerializationError { reason: reason.clone() }
336            },
337            Self::DeserializationError { reason } => {
338                Self::DeserializationError { reason: reason.clone() }
339            },
340            Self::UnsupportedAlgorithmVersion { found, max_supported } => {
341                Self::UnsupportedAlgorithmVersion { found: *found, max_supported: *max_supported }
342            },
343            Self::MultipleErrors { errors } => Self::MultipleErrors { errors: errors.clone() },
344            Self::NotADirectory { path } => Self::NotADirectory { path: path.clone() },
345            Self::InvalidPattern { pattern, reason } => {
346                Self::InvalidPattern { pattern: pattern.clone(), reason: reason.clone() }
347            },
348            Self::TooManyErrors { count, max } => Self::TooManyErrors { count: *count, max: *max },
349        }
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    #[test]
358    fn test_error_display() {
359        let err = EmbedError::InvalidSettings {
360            field: "max_tokens".to_owned(),
361            reason: "exceeds limit of 100000".to_owned(),
362        };
363        let msg = err.to_string();
364        assert!(msg.contains("max_tokens"));
365        assert!(msg.contains("Fix:"));
366    }
367
368    #[test]
369    fn test_from_file_errors() {
370        let errors = vec![
371            (
372                PathBuf::from("src/foo.rs"),
373                EmbedError::FileTooLarge {
374                    path: PathBuf::from("src/foo.rs"),
375                    size: 20_000_000,
376                    max: 10_000_000,
377                },
378            ),
379            (
380                PathBuf::from("src/bar.rs"),
381                EmbedError::ParseError {
382                    file: "src/bar.rs".to_owned(),
383                    line: 42,
384                    message: "unexpected token".to_owned(),
385                },
386            ),
387        ];
388
389        let combined = EmbedError::from_file_errors(errors);
390        let msg = combined.to_string();
391        assert!(msg.contains("src/foo.rs"));
392        assert!(msg.contains("src/bar.rs"));
393    }
394
395    #[test]
396    fn test_is_critical() {
397        assert!(EmbedError::TooManyChunks { count: 100, max: 50 }.is_critical());
398        assert!(EmbedError::PathTraversal {
399            path: PathBuf::from("/etc/passwd"),
400            repo_root: PathBuf::from("/home/user/repo"),
401        }
402        .is_critical());
403        assert!(!EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
404            .is_critical());
405    }
406
407    #[test]
408    fn test_is_skippable() {
409        assert!(EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
410            .is_skippable());
411        assert!(EmbedError::ParseError {
412            file: "bad.rs".to_owned(),
413            line: 1,
414            message: "syntax error".to_owned(),
415        }
416        .is_skippable());
417        assert!(!EmbedError::TooManyChunks { count: 100, max: 50 }.is_skippable());
418    }
419
420    #[test]
421    fn test_error_clone() {
422        let err = EmbedError::HashCollision {
423            id: "ec_123".to_owned(),
424            hash1: "abc".to_owned(),
425            hash2: "def".to_owned(),
426        };
427        let cloned = err;
428        assert!(matches!(cloned, EmbedError::HashCollision { .. }));
429    }
430
431    #[test]
432    fn test_exit_codes() {
433        // User errors: 1
434        assert_eq!(
435            EmbedError::InvalidSettings {
436                field: "max_tokens".to_owned(),
437                reason: "too high".to_owned()
438            }
439            .exit_code(),
440            1
441        );
442        assert_eq!(
443            EmbedError::InvalidPattern {
444                pattern: "**[".to_owned(),
445                reason: "unclosed bracket".to_owned()
446            }
447            .exit_code(),
448            1
449        );
450
451        // Input errors: 2
452        assert_eq!(
453            EmbedError::NoChunksGenerated {
454                include_patterns: "*.xyz".to_owned(),
455                exclude_patterns: "".to_owned()
456            }
457            .exit_code(),
458            2
459        );
460        assert_eq!(
461            EmbedError::NotADirectory { path: PathBuf::from("/tmp/file.txt") }.exit_code(),
462            2
463        );
464
465        // Security - secrets: 3
466        assert_eq!(
467            EmbedError::SecretsDetected { count: 5, files: "config.py".to_owned() }.exit_code(),
468            3
469        );
470
471        // Security - path traversal: 4
472        assert_eq!(
473            EmbedError::PathTraversal {
474                path: PathBuf::from("../../../etc/passwd"),
475                repo_root: PathBuf::from("/repo")
476            }
477            .exit_code(),
478            4
479        );
480
481        // Manifest errors: 10
482        assert_eq!(
483            EmbedError::ManifestVersionTooNew { found: 99, max_supported: 2 }.exit_code(),
484            10
485        );
486        assert_eq!(
487            EmbedError::ManifestCorrupted {
488                path: PathBuf::from(".infiniloom-embed.bin"),
489                expected: "abc".to_owned(),
490                actual: "def".to_owned()
491            }
492            .exit_code(),
493            10
494        );
495
496        // Resource limits: 11
497        assert_eq!(EmbedError::TooManyChunks { count: 100000, max: 50000 }.exit_code(), 11);
498        assert_eq!(EmbedError::TooManyFiles { count: 10000, max: 5000 }.exit_code(), 11);
499        assert_eq!(
500            EmbedError::FileTooLarge {
501                path: PathBuf::from("big.bin"),
502                size: 100_000_000,
503                max: 10_000_000
504            }
505            .exit_code(),
506            11
507        );
508
509        // System errors: 12
510        assert_eq!(
511            EmbedError::IoError {
512                path: PathBuf::from("/tmp"),
513                source: std::io::Error::new(std::io::ErrorKind::NotFound, "not found")
514            }
515            .exit_code(),
516            12
517        );
518        assert_eq!(EmbedError::SerializationError { reason: "failed".to_owned() }.exit_code(), 12);
519
520        // Internal errors: 13
521        assert_eq!(
522            EmbedError::HashCollision {
523                id: "ec_123".to_owned(),
524                hash1: "abc".to_owned(),
525                hash2: "def".to_owned()
526            }
527            .exit_code(),
528            13
529        );
530
531        // Parse errors: 14
532        assert_eq!(
533            EmbedError::ParseError {
534                file: "bad.rs".to_owned(),
535                line: 42,
536                message: "syntax error".to_owned()
537            }
538            .exit_code(),
539            14
540        );
541
542        // Multiple errors: 15
543        assert_eq!(
544            EmbedError::MultipleErrors { errors: "error1\nerror2".to_owned() }.exit_code(),
545            15
546        );
547    }
548
549    #[test]
550    fn test_error_codes() {
551        assert_eq!(
552            EmbedError::InvalidSettings { field: "x".to_owned(), reason: "y".to_owned() }
553                .error_code(),
554            "E001_INVALID_SETTINGS"
555        );
556        assert_eq!(
557            EmbedError::SecretsDetected { count: 1, files: "f".to_owned() }.error_code(),
558            "E005_SECRETS_DETECTED"
559        );
560        assert_eq!(
561            EmbedError::HashCollision {
562                id: "i".to_owned(),
563                hash1: "a".to_owned(),
564                hash2: "b".to_owned()
565            }
566            .error_code(),
567            "E040_HASH_COLLISION"
568        );
569    }
570}