1use std::path::{Path, PathBuf};
14use thiserror::Error;
15
16pub fn sanitize_path(path: &Path) -> String {
21 if let Ok(home) = std::env::var("HOME") {
23 let home_path = Path::new(&home);
24 if let Ok(relative) = path.strip_prefix(home_path) {
25 return format!("~/{}", relative.display());
26 }
27 }
28 if let Ok(home) = std::env::var("USERPROFILE") {
30 let home_path = Path::new(&home);
31 if let Ok(relative) = path.strip_prefix(home_path) {
32 return format!("~/{}", relative.display());
33 }
34 }
35 path.display().to_string()
37}
38
39pub fn sanitize_pathbuf(path: &PathBuf) -> String {
41 sanitize_path(path.as_path())
42}
43
44#[derive(Debug, Clone)]
46pub struct SafePath(pub PathBuf);
47
48impl std::fmt::Display for SafePath {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 write!(f, "{}", sanitize_path(&self.0))
51 }
52}
53
54#[derive(Debug, Error)]
56pub enum EmbedError {
57 #[error(
59 "Invalid settings: {field} - {reason}\n\nFix: Check your --{field} argument or config file"
60 )]
61 InvalidSettings { field: String, reason: String },
62
63 #[error("Manifest version {found} is newer than supported version {max_supported}\n\nFix: Upgrade infiniloom to latest version, or delete manifest and rebuild:\n rm .infiniloom-embed.bin && infiniloom embed")]
64 ManifestVersionTooNew { found: u32, max_supported: u32 },
65
66 #[error("Manifest corrupted or tampered\n Path: {path}\n Expected checksum: {expected}\n Actual checksum: {actual}\n\nFix: Delete manifest and rebuild:\n rm {path} && infiniloom embed", path = path.display())]
67 ManifestCorrupted { path: PathBuf, expected: String, actual: String },
68
69 #[error("Settings changed since last run\n\nPrevious: {previous}\nCurrent: {current}\n\nImpact: All chunk IDs may change\n\nFix: Run with --full to rebuild, or restore original settings")]
70 SettingsChanged { previous: String, current: String },
71
72 #[error("No code chunks found\n\nPossible causes:\n - Include patterns too restrictive: {include_patterns}\n - Exclude patterns too broad: {exclude_patterns}\n - No supported languages in repository\n\nFix: Check -i/--include and -e/--exclude patterns")]
73 NoChunksGenerated { include_patterns: String, exclude_patterns: String },
74
75 #[error("Secrets detected in {count} chunks\n\nFiles with secrets:\n{files}\n\nFix: Either:\n 1. Remove secrets from code\n 2. Use --redact-secrets to mask them\n 3. Use --no-scan-secrets to skip scanning (not recommended)")]
76 SecretsDetected { count: usize, files: String },
77
78 #[error("Invalid glob pattern: '{pattern}'\n Error: {reason}\n\nFix: Check -i/--include or -e/--exclude pattern syntax.\n Examples: '*.rs', 'src/**/*.ts', '!tests/*'")]
79 InvalidPattern { pattern: String, reason: String },
80
81 #[error("Hash collision detected!\n Chunk ID: {id}\n Hash 1: {hash1}\n Hash 2: {hash2}\n\nThis is extremely rare. Please report at https://github.com/infiniloom/issues")]
82 HashCollision { id: String, hash1: String, hash2: String },
83
84 #[error("File too large: {path} ({size} bytes, max: {max})\n\nFix: Exclude large files with -e/--exclude pattern, or increase --max-file-size", path = path.display())]
86 FileTooLarge { path: PathBuf, size: u64, max: u64 },
87
88 #[error("Line too long in file: {path} ({length} chars, max: {max})\n\nThis is likely a minified file.\n\nFix: Exclude minified files with -e/--exclude pattern (e.g., '*.min.js'), or increase --max-line-length", path = path.display())]
89 LineTooLong { path: PathBuf, length: usize, max: usize },
90
91 #[error(
92 "Too many chunks generated ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-chunks limit"
93 )]
94 TooManyChunks { count: usize, max: usize },
95
96 #[error("Too many files to process ({count}, max: {max})\n\nFix: Use more restrictive include patterns, or increase --max-files limit")]
97 TooManyFiles { count: usize, max: usize },
98
99 #[error("Recursion limit exceeded while parsing\n Depth: {depth}, Max: {max}\n Context: {context}\n\nFix: File may have unusual nesting. Exclude it with -e pattern")]
100 RecursionLimitExceeded { depth: u32, max: u32, context: String },
101
102 #[error("Path traversal detected\n Path: {path}\n Repo root: {repo_root}\n\nFix: Remove symlinks pointing outside repository, or use --no-follow-symlinks", path = path.display(), repo_root = repo_root.display())]
103 PathTraversal { path: PathBuf, repo_root: PathBuf },
104
105 #[error("I/O error: {path}\n {source}", path = path.display())]
107 IoError {
108 path: PathBuf,
109 #[source]
110 source: std::io::Error,
111 },
112
113 #[error("Parse error in {file} at line {line}\n {message}\n\nFix: Fix syntax error or exclude file with -e pattern")]
114 ParseError { file: String, line: u32, message: String },
115
116 #[error("Serialization error: {reason}")]
117 SerializationError { reason: String },
118
119 #[error("Deserialization error: {reason}\n\nFix: Manifest may be corrupted. Delete and rebuild:\n rm .infiniloom-embed.bin && infiniloom embed")]
120 DeserializationError { reason: String },
121
122 #[error("Unsupported algorithm version {found} (max supported: {max_supported})\n\nFix: Upgrade infiniloom or regenerate with current version")]
123 UnsupportedAlgorithmVersion { found: u32, max_supported: u32 },
124
125 #[error("Multiple files failed to process:\n{errors}\n\nFix: Address individual errors above")]
126 MultipleErrors { errors: String },
127
128 #[error("Not a directory: {path}", path = path.display())]
129 NotADirectory { path: PathBuf },
130
131 #[error("Too many errors encountered ({count}, max: {max})\n\nFix: Address individual errors, or increase error tolerance")]
132 TooManyErrors { count: usize, max: usize },
133
134 #[error("SQLite manifest error: {reason}\n\nFix: Delete the .infiniloom-embed.db file and retry, or check disk permissions")]
135 SqliteError { reason: String },
136}
137
138impl EmbedError {
139 pub fn from_file_errors(errors: Vec<(PathBuf, EmbedError)>) -> Self {
141 let formatted = errors
142 .iter()
143 .map(|(path, err)| format!(" {}: {}", path.display(), err))
144 .collect::<Vec<_>>()
145 .join("\n");
146 Self::MultipleErrors { errors: formatted }
147 }
148
149 pub fn is_critical(&self) -> bool {
151 matches!(
152 self,
153 EmbedError::TooManyChunks { .. }
154 | EmbedError::TooManyFiles { .. }
155 | EmbedError::PathTraversal { .. }
156 | EmbedError::HashCollision { .. }
157 | EmbedError::SecretsDetected { .. }
158 | EmbedError::ManifestCorrupted { .. }
159 | EmbedError::InvalidPattern { .. }
160 | EmbedError::InvalidSettings { .. }
161 )
162 }
163
164 pub fn is_skippable(&self) -> bool {
166 matches!(
167 self,
168 EmbedError::FileTooLarge { .. }
169 | EmbedError::LineTooLong { .. }
170 | EmbedError::ParseError { .. }
171 | EmbedError::IoError { .. }
172 | EmbedError::RecursionLimitExceeded { .. }
173 )
174 }
175
176 pub fn exit_code(&self) -> i32 {
213 match self {
214 EmbedError::InvalidSettings { .. } | EmbedError::InvalidPattern { .. } => 1,
216
217 EmbedError::NoChunksGenerated { .. } | EmbedError::NotADirectory { .. } => 2,
219
220 EmbedError::SecretsDetected { .. } => 3,
222
223 EmbedError::PathTraversal { .. } => 4,
225
226 EmbedError::ManifestVersionTooNew { .. }
228 | EmbedError::ManifestCorrupted { .. }
229 | EmbedError::SettingsChanged { .. }
230 | EmbedError::UnsupportedAlgorithmVersion { .. } => 10,
231
232 EmbedError::TooManyChunks { .. }
234 | EmbedError::TooManyFiles { .. }
235 | EmbedError::TooManyErrors { .. }
236 | EmbedError::RecursionLimitExceeded { .. }
237 | EmbedError::FileTooLarge { .. }
238 | EmbedError::LineTooLong { .. } => 11,
239
240 EmbedError::IoError { .. }
242 | EmbedError::SerializationError { .. }
243 | EmbedError::DeserializationError { .. }
244 | EmbedError::SqliteError { .. } => 12,
245
246 EmbedError::HashCollision { .. } => 13,
248
249 EmbedError::ParseError { .. } => 14,
251
252 EmbedError::MultipleErrors { .. } => 15,
254 }
255 }
256
257 pub fn error_code(&self) -> &'static str {
261 match self {
262 EmbedError::InvalidSettings { .. } => "E001_INVALID_SETTINGS",
263 EmbedError::InvalidPattern { .. } => "E002_INVALID_PATTERN",
264 EmbedError::NoChunksGenerated { .. } => "E003_NO_CHUNKS",
265 EmbedError::NotADirectory { .. } => "E004_NOT_DIRECTORY",
266 EmbedError::SecretsDetected { .. } => "E005_SECRETS_DETECTED",
267 EmbedError::PathTraversal { .. } => "E006_PATH_TRAVERSAL",
268 EmbedError::ManifestVersionTooNew { .. } => "E010_MANIFEST_VERSION",
269 EmbedError::ManifestCorrupted { .. } => "E011_MANIFEST_CORRUPTED",
270 EmbedError::SettingsChanged { .. } => "E012_SETTINGS_CHANGED",
271 EmbedError::UnsupportedAlgorithmVersion { .. } => "E013_ALGORITHM_VERSION",
272 EmbedError::TooManyChunks { .. } => "E020_TOO_MANY_CHUNKS",
273 EmbedError::TooManyFiles { .. } => "E021_TOO_MANY_FILES",
274 EmbedError::TooManyErrors { .. } => "E022_TOO_MANY_ERRORS",
275 EmbedError::RecursionLimitExceeded { .. } => "E023_RECURSION_LIMIT",
276 EmbedError::FileTooLarge { .. } => "E024_FILE_TOO_LARGE",
277 EmbedError::LineTooLong { .. } => "E025_LINE_TOO_LONG",
278 EmbedError::IoError { .. } => "E030_IO_ERROR",
279 EmbedError::SerializationError { .. } => "E031_SERIALIZATION",
280 EmbedError::DeserializationError { .. } => "E032_DESERIALIZATION",
281 EmbedError::HashCollision { .. } => "E040_HASH_COLLISION",
282 EmbedError::ParseError { .. } => "E050_PARSE_ERROR",
283 EmbedError::MultipleErrors { .. } => "E099_MULTIPLE_ERRORS",
284 EmbedError::SqliteError { .. } => "E033_SQLITE_ERROR",
285 }
286 }
287}
288
289impl Clone for EmbedError {
290 fn clone(&self) -> Self {
291 match self {
292 Self::InvalidSettings { field, reason } => {
293 Self::InvalidSettings { field: field.clone(), reason: reason.clone() }
294 },
295 Self::ManifestVersionTooNew { found, max_supported } => {
296 Self::ManifestVersionTooNew { found: *found, max_supported: *max_supported }
297 },
298 Self::ManifestCorrupted { path, expected, actual } => Self::ManifestCorrupted {
299 path: path.clone(),
300 expected: expected.clone(),
301 actual: actual.clone(),
302 },
303 Self::SettingsChanged { previous, current } => {
304 Self::SettingsChanged { previous: previous.clone(), current: current.clone() }
305 },
306 Self::NoChunksGenerated { include_patterns, exclude_patterns } => {
307 Self::NoChunksGenerated {
308 include_patterns: include_patterns.clone(),
309 exclude_patterns: exclude_patterns.clone(),
310 }
311 },
312 Self::SecretsDetected { count, files } => {
313 Self::SecretsDetected { count: *count, files: files.clone() }
314 },
315 Self::HashCollision { id, hash1, hash2 } => {
316 Self::HashCollision { id: id.clone(), hash1: hash1.clone(), hash2: hash2.clone() }
317 },
318 Self::FileTooLarge { path, size, max } => {
319 Self::FileTooLarge { path: path.clone(), size: *size, max: *max }
320 },
321 Self::LineTooLong { path, length, max } => {
322 Self::LineTooLong { path: path.clone(), length: *length, max: *max }
323 },
324 Self::TooManyChunks { count, max } => Self::TooManyChunks { count: *count, max: *max },
325 Self::TooManyFiles { count, max } => Self::TooManyFiles { count: *count, max: *max },
326 Self::RecursionLimitExceeded { depth, max, context } => {
327 Self::RecursionLimitExceeded { depth: *depth, max: *max, context: context.clone() }
328 },
329 Self::PathTraversal { path, repo_root } => {
330 Self::PathTraversal { path: path.clone(), repo_root: repo_root.clone() }
331 },
332 Self::IoError { path, source } => Self::IoError {
333 path: path.clone(),
334 source: std::io::Error::new(source.kind(), source.to_string()),
335 },
336 Self::ParseError { file, line, message } => {
337 Self::ParseError { file: file.clone(), line: *line, message: message.clone() }
338 },
339 Self::SerializationError { reason } => {
340 Self::SerializationError { reason: reason.clone() }
341 },
342 Self::DeserializationError { reason } => {
343 Self::DeserializationError { reason: reason.clone() }
344 },
345 Self::UnsupportedAlgorithmVersion { found, max_supported } => {
346 Self::UnsupportedAlgorithmVersion { found: *found, max_supported: *max_supported }
347 },
348 Self::MultipleErrors { errors } => Self::MultipleErrors { errors: errors.clone() },
349 Self::NotADirectory { path } => Self::NotADirectory { path: path.clone() },
350 Self::InvalidPattern { pattern, reason } => {
351 Self::InvalidPattern { pattern: pattern.clone(), reason: reason.clone() }
352 },
353 Self::TooManyErrors { count, max } => Self::TooManyErrors { count: *count, max: *max },
354 Self::SqliteError { reason } => Self::SqliteError { reason: reason.clone() },
355 }
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_error_display() {
365 let err = EmbedError::InvalidSettings {
366 field: "max_tokens".to_owned(),
367 reason: "exceeds limit of 100000".to_owned(),
368 };
369 let msg = err.to_string();
370 assert!(msg.contains("max_tokens"));
371 assert!(msg.contains("Fix:"));
372 }
373
374 #[test]
375 fn test_from_file_errors() {
376 let errors = vec![
377 (
378 PathBuf::from("src/foo.rs"),
379 EmbedError::FileTooLarge {
380 path: PathBuf::from("src/foo.rs"),
381 size: 20_000_000,
382 max: 10_000_000,
383 },
384 ),
385 (
386 PathBuf::from("src/bar.rs"),
387 EmbedError::ParseError {
388 file: "src/bar.rs".to_owned(),
389 line: 42,
390 message: "unexpected token".to_owned(),
391 },
392 ),
393 ];
394
395 let combined = EmbedError::from_file_errors(errors);
396 let msg = combined.to_string();
397 assert!(msg.contains("src/foo.rs"));
398 assert!(msg.contains("src/bar.rs"));
399 }
400
401 #[test]
402 fn test_is_critical() {
403 assert!(EmbedError::TooManyChunks { count: 100, max: 50 }.is_critical());
404 assert!(EmbedError::PathTraversal {
405 path: PathBuf::from("/etc/passwd"),
406 repo_root: PathBuf::from("/home/user/repo"),
407 }
408 .is_critical());
409 assert!(!EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
410 .is_critical());
411 }
412
413 #[test]
414 fn test_is_skippable() {
415 assert!(EmbedError::FileTooLarge { path: PathBuf::from("big.bin"), size: 100, max: 50 }
416 .is_skippable());
417 assert!(EmbedError::ParseError {
418 file: "bad.rs".to_owned(),
419 line: 1,
420 message: "syntax error".to_owned(),
421 }
422 .is_skippable());
423 assert!(!EmbedError::TooManyChunks { count: 100, max: 50 }.is_skippable());
424 }
425
426 #[test]
427 fn test_error_clone() {
428 let err = EmbedError::HashCollision {
429 id: "ec_123".to_owned(),
430 hash1: "abc".to_owned(),
431 hash2: "def".to_owned(),
432 };
433 let cloned = err;
434 assert!(matches!(cloned, EmbedError::HashCollision { .. }));
435 }
436
437 #[test]
438 fn test_exit_codes() {
439 assert_eq!(
441 EmbedError::InvalidSettings {
442 field: "max_tokens".to_owned(),
443 reason: "too high".to_owned()
444 }
445 .exit_code(),
446 1
447 );
448 assert_eq!(
449 EmbedError::InvalidPattern {
450 pattern: "**[".to_owned(),
451 reason: "unclosed bracket".to_owned()
452 }
453 .exit_code(),
454 1
455 );
456
457 assert_eq!(
459 EmbedError::NoChunksGenerated {
460 include_patterns: "*.xyz".to_owned(),
461 exclude_patterns: "".to_owned()
462 }
463 .exit_code(),
464 2
465 );
466 assert_eq!(
467 EmbedError::NotADirectory { path: PathBuf::from("/tmp/file.txt") }.exit_code(),
468 2
469 );
470
471 assert_eq!(
473 EmbedError::SecretsDetected { count: 5, files: "config.py".to_owned() }.exit_code(),
474 3
475 );
476
477 assert_eq!(
479 EmbedError::PathTraversal {
480 path: PathBuf::from("../../../etc/passwd"),
481 repo_root: PathBuf::from("/repo")
482 }
483 .exit_code(),
484 4
485 );
486
487 assert_eq!(
489 EmbedError::ManifestVersionTooNew { found: 99, max_supported: 2 }.exit_code(),
490 10
491 );
492 assert_eq!(
493 EmbedError::ManifestCorrupted {
494 path: PathBuf::from(".infiniloom-embed.bin"),
495 expected: "abc".to_owned(),
496 actual: "def".to_owned()
497 }
498 .exit_code(),
499 10
500 );
501
502 assert_eq!(EmbedError::TooManyChunks { count: 100000, max: 50000 }.exit_code(), 11);
504 assert_eq!(EmbedError::TooManyFiles { count: 10000, max: 5000 }.exit_code(), 11);
505 assert_eq!(
506 EmbedError::FileTooLarge {
507 path: PathBuf::from("big.bin"),
508 size: 100_000_000,
509 max: 10_000_000
510 }
511 .exit_code(),
512 11
513 );
514
515 assert_eq!(
517 EmbedError::IoError {
518 path: PathBuf::from("/tmp"),
519 source: std::io::Error::new(std::io::ErrorKind::NotFound, "not found")
520 }
521 .exit_code(),
522 12
523 );
524 assert_eq!(EmbedError::SerializationError { reason: "failed".to_owned() }.exit_code(), 12);
525
526 assert_eq!(
528 EmbedError::HashCollision {
529 id: "ec_123".to_owned(),
530 hash1: "abc".to_owned(),
531 hash2: "def".to_owned()
532 }
533 .exit_code(),
534 13
535 );
536
537 assert_eq!(
539 EmbedError::ParseError {
540 file: "bad.rs".to_owned(),
541 line: 42,
542 message: "syntax error".to_owned()
543 }
544 .exit_code(),
545 14
546 );
547
548 assert_eq!(
550 EmbedError::MultipleErrors { errors: "error1\nerror2".to_owned() }.exit_code(),
551 15
552 );
553 }
554
555 #[test]
556 fn test_error_codes() {
557 assert_eq!(
558 EmbedError::InvalidSettings { field: "x".to_owned(), reason: "y".to_owned() }
559 .error_code(),
560 "E001_INVALID_SETTINGS"
561 );
562 assert_eq!(
563 EmbedError::SecretsDetected { count: 1, files: "f".to_owned() }.error_code(),
564 "E005_SECRETS_DETECTED"
565 );
566 assert_eq!(
567 EmbedError::HashCollision {
568 id: "i".to_owned(),
569 hash1: "a".to_owned(),
570 hash2: "b".to_owned()
571 }
572 .error_code(),
573 "E040_HASH_COLLISION"
574 );
575 }
576}