Skip to main content

base_d/encoders/algorithms/
errors.rs

1use std::fmt;
2
3/// Errors that can occur during encoding.
4#[derive(Debug, PartialEq, Eq)]
5pub enum EncodeError {
6    /// A byte mapped to an invalid Unicode codepoint during ByteRange encoding
7    InvalidCodepoint {
8        codepoint: u32,
9        start_codepoint: u32,
10        byte: u8,
11    },
12}
13
14impl fmt::Display for EncodeError {
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        match self {
17            EncodeError::InvalidCodepoint {
18                codepoint,
19                start_codepoint,
20                byte,
21            } => {
22                write!(
23                    f,
24                    "ByteRange encoding produced invalid codepoint U+{:04X} \
25                     (start_codepoint=U+{:04X}, byte=0x{:02X}). \
26                     This dictionary should have been rejected at construction time.",
27                    codepoint, start_codepoint, byte
28                )
29            }
30        }
31    }
32}
33
34impl std::error::Error for EncodeError {}
35
36/// Errors that can occur during decoding.
37#[derive(Debug, PartialEq, Eq)]
38pub enum DecodeError {
39    /// The input contains a character not in the dictionary
40    InvalidCharacter {
41        char: char,
42        position: usize,
43        input: String,
44        valid_chars: String,
45    },
46    /// The input contains a word not in the word dictionary
47    InvalidWord {
48        word: String,
49        position: usize,
50        input: String,
51    },
52    /// The input string is empty
53    EmptyInput,
54    /// The padding is malformed or incorrect
55    InvalidPadding,
56    /// Invalid length for the encoding format
57    InvalidLength {
58        actual: usize,
59        expected: String,
60        hint: String,
61    },
62}
63
64/// Truncate a string to at most `max_chars` characters, appending "..." if truncated.
65/// Uses char-count instead of byte-index to avoid panics on multi-byte UTF-8 input.
66pub(crate) fn safe_truncate(s: &str, max_chars: usize) -> String {
67    if s.chars().count() > max_chars {
68        let truncated: String = s.chars().take(max_chars).collect();
69        format!("{}...", truncated)
70    } else {
71        s.to_string()
72    }
73}
74
75impl DecodeError {
76    /// Create an InvalidCharacter error with context
77    pub fn invalid_character(c: char, position: usize, input: &str, valid_chars: &str) -> Self {
78        DecodeError::InvalidCharacter {
79            char: c,
80            position,
81            input: safe_truncate(input, 60),
82            valid_chars: valid_chars.to_string(),
83        }
84    }
85
86    /// Create an InvalidLength error
87    pub fn invalid_length(
88        actual: usize,
89        expected: impl Into<String>,
90        hint: impl Into<String>,
91    ) -> Self {
92        DecodeError::InvalidLength {
93            actual,
94            expected: expected.into(),
95            hint: hint.into(),
96        }
97    }
98
99    /// Create an InvalidWord error for word-based decoding
100    pub fn invalid_word(word: &str, position: usize, input: &str) -> Self {
101        DecodeError::InvalidWord {
102            word: word.to_string(),
103            position,
104            input: safe_truncate(input, 80),
105        }
106    }
107}
108
109impl fmt::Display for DecodeError {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        let use_color = should_use_color();
112
113        match self {
114            DecodeError::InvalidCharacter {
115                char: c,
116                position,
117                input,
118                valid_chars,
119            } => {
120                // Error header
121                if use_color {
122                    writeln!(
123                        f,
124                        "\x1b[1;31merror:\x1b[0m invalid character '{}' at position {}",
125                        c, position
126                    )?;
127                } else {
128                    writeln!(
129                        f,
130                        "error: invalid character '{}' at position {}",
131                        c, position
132                    )?;
133                }
134                writeln!(f)?;
135
136                // Show input with caret pointing at error position
137                // Need to account for multi-byte UTF-8 characters
138                let char_position = input.chars().take(*position).count();
139                writeln!(f, "  {}", input)?;
140                write!(f, "  {}", " ".repeat(char_position))?;
141                if use_color {
142                    writeln!(f, "\x1b[1;31m^\x1b[0m")?;
143                } else {
144                    writeln!(f, "^")?;
145                }
146                writeln!(f)?;
147
148                // Hint with valid characters (truncate if too long)
149                let hint_chars = safe_truncate(valid_chars, 80);
150
151                if use_color {
152                    write!(f, "\x1b[1;36mhint:\x1b[0m valid characters: {}", hint_chars)?;
153                } else {
154                    write!(f, "hint: valid characters: {}", hint_chars)?;
155                }
156                Ok(())
157            }
158            DecodeError::InvalidWord {
159                word,
160                position,
161                input,
162            } => {
163                if use_color {
164                    writeln!(
165                        f,
166                        "\x1b[1;31merror:\x1b[0m unknown word '{}' at position {}",
167                        word, position
168                    )?;
169                } else {
170                    writeln!(f, "error: unknown word '{}' at position {}", word, position)?;
171                }
172                writeln!(f)?;
173                writeln!(f, "  {}", input)?;
174                writeln!(f)?;
175                if use_color {
176                    write!(
177                        f,
178                        "\x1b[1;36mhint:\x1b[0m check spelling or verify word is in dictionary"
179                    )?;
180                } else {
181                    write!(f, "hint: check spelling or verify word is in dictionary")?;
182                }
183                Ok(())
184            }
185            DecodeError::EmptyInput => {
186                if use_color {
187                    write!(f, "\x1b[1;31merror:\x1b[0m cannot decode empty input")?;
188                } else {
189                    write!(f, "error: cannot decode empty input")?;
190                }
191                Ok(())
192            }
193            DecodeError::InvalidPadding => {
194                if use_color {
195                    writeln!(f, "\x1b[1;31merror:\x1b[0m invalid padding")?;
196                    write!(
197                        f,
198                        "\n\x1b[1;36mhint:\x1b[0m check for missing or incorrect '=' characters at end of input"
199                    )?;
200                } else {
201                    writeln!(f, "error: invalid padding")?;
202                    write!(
203                        f,
204                        "\nhint: check for missing or incorrect '=' characters at end of input"
205                    )?;
206                }
207                Ok(())
208            }
209            DecodeError::InvalidLength {
210                actual,
211                expected,
212                hint,
213            } => {
214                if use_color {
215                    writeln!(f, "\x1b[1;31merror:\x1b[0m invalid length for decode",)?;
216                } else {
217                    writeln!(f, "error: invalid length for decode")?;
218                }
219                writeln!(f)?;
220                writeln!(f, "  input is {} characters, expected {}", actual, expected)?;
221                writeln!(f)?;
222                if use_color {
223                    write!(f, "\x1b[1;36mhint:\x1b[0m {}", hint)?;
224                } else {
225                    write!(f, "hint: {}", hint)?;
226                }
227                Ok(())
228            }
229        }
230    }
231}
232
233impl std::error::Error for DecodeError {}
234
235/// Check if colored output should be used
236fn should_use_color() -> bool {
237    // Respect NO_COLOR environment variable
238    if std::env::var("NO_COLOR").is_ok() {
239        return false;
240    }
241
242    // Check if stderr is a terminal
243    use std::io::IsTerminal;
244    std::io::stderr().is_terminal()
245}
246
247/// Error when a dictionary is not found
248#[derive(Debug)]
249pub struct DictionaryNotFoundError {
250    pub name: String,
251    pub suggestion: Option<String>,
252}
253
254impl DictionaryNotFoundError {
255    pub fn new(name: impl Into<String>) -> Self {
256        Self {
257            name: name.into(),
258            suggestion: None,
259        }
260    }
261
262    pub fn with_suggestion(name: impl Into<String>, suggestion: Option<String>) -> Self {
263        Self {
264            name: name.into(),
265            suggestion,
266        }
267    }
268
269    pub fn with_cause(name: impl Into<String>, cause: impl std::fmt::Display) -> Self {
270        Self {
271            name: name.into(),
272            suggestion: Some(format!("build failed: {}", cause)),
273        }
274    }
275}
276
277impl fmt::Display for DictionaryNotFoundError {
278    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
279        let use_color = should_use_color();
280
281        if use_color {
282            writeln!(
283                f,
284                "\x1b[1;31merror:\x1b[0m dictionary '{}' not found",
285                self.name
286            )?;
287        } else {
288            writeln!(f, "error: dictionary '{}' not found", self.name)?;
289        }
290
291        writeln!(f)?;
292
293        if let Some(suggestion) = &self.suggestion {
294            if use_color {
295                writeln!(f, "\x1b[1;36mhint:\x1b[0m did you mean '{}'?", suggestion)?;
296            } else {
297                writeln!(f, "hint: did you mean '{}'?", suggestion)?;
298            }
299        }
300
301        if use_color {
302            write!(
303                f,
304                "      run \x1b[1m`base-d config --dictionaries`\x1b[0m to see all dictionaries"
305            )?;
306        } else {
307            write!(
308                f,
309                "      run `base-d config --dictionaries` to see all dictionaries"
310            )?;
311        }
312
313        Ok(())
314    }
315}
316
317impl std::error::Error for DictionaryNotFoundError {}
318
319/// Calculate Levenshtein distance between two strings
320fn levenshtein_distance(s1: &str, s2: &str) -> usize {
321    let len1 = s1.chars().count();
322    let len2 = s2.chars().count();
323
324    if len1 == 0 {
325        return len2;
326    }
327    if len2 == 0 {
328        return len1;
329    }
330
331    let mut prev_row: Vec<usize> = (0..=len2).collect();
332    let mut curr_row = vec![0; len2 + 1];
333
334    for (i, c1) in s1.chars().enumerate() {
335        curr_row[0] = i + 1;
336
337        for (j, c2) in s2.chars().enumerate() {
338            let cost = if c1 == c2 { 0 } else { 1 };
339            curr_row[j + 1] = (curr_row[j] + 1)
340                .min(prev_row[j + 1] + 1)
341                .min(prev_row[j] + cost);
342        }
343
344        std::mem::swap(&mut prev_row, &mut curr_row);
345    }
346
347    prev_row[len2]
348}
349
350/// Find the closest matching dictionary name
351pub fn find_closest_dictionary(name: &str, available: &[String]) -> Option<String> {
352    if available.is_empty() {
353        return None;
354    }
355
356    let mut best_match = None;
357    let mut best_distance = usize::MAX;
358
359    for dict_name in available {
360        let distance = levenshtein_distance(name, dict_name);
361
362        // Only suggest if distance is reasonably small
363        // (e.g., 1-2 character typos for short names, up to 3 for longer names)
364        let threshold = if name.len() < 5 { 2 } else { 3 };
365
366        if distance < best_distance && distance <= threshold {
367            best_distance = distance;
368            best_match = Some(dict_name.clone());
369        }
370    }
371
372    best_match
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn test_levenshtein_distance() {
381        assert_eq!(levenshtein_distance("base64", "base64"), 0);
382        assert_eq!(levenshtein_distance("base64", "base32"), 2);
383        assert_eq!(levenshtein_distance("bas64", "base64"), 1);
384        assert_eq!(levenshtein_distance("", "base64"), 6);
385    }
386
387    #[test]
388    fn test_find_closest_dictionary() {
389        let dicts = vec![
390            "base64".to_string(),
391            "base32".to_string(),
392            "base16".to_string(),
393            "hex".to_string(),
394        ];
395
396        assert_eq!(
397            find_closest_dictionary("bas64", &dicts),
398            Some("base64".to_string())
399        );
400        assert_eq!(
401            find_closest_dictionary("base63", &dicts),
402            Some("base64".to_string())
403        );
404        assert_eq!(
405            find_closest_dictionary("hex_radix", &dicts),
406            None // too different
407        );
408    }
409
410    #[test]
411    fn test_error_display_no_color() {
412        // Unsafe: environment variable access (not thread-safe)
413        // TODO: Audit that the environment access only happens in single-threaded code.
414        unsafe {
415            std::env::set_var("NO_COLOR", "1");
416        }
417
418        let err = DecodeError::invalid_character('_', 12, "SGVsbG9faW52YWxpZA==", "A-Za-z0-9+/=");
419        let display = format!("{}", err);
420
421        assert!(display.contains("invalid character '_' at position 12"));
422        assert!(display.contains("SGVsbG9faW52YWxpZA=="));
423        assert!(display.contains("^"));
424        assert!(display.contains("hint:"));
425
426        // Unsafe: environment variable access (not thread-safe)
427        // TODO: Audit that the environment access only happens in single-threaded code.
428        unsafe {
429            std::env::remove_var("NO_COLOR");
430        }
431    }
432
433    #[test]
434    fn test_invalid_length_error() {
435        // Unsafe: environment variable access (not thread-safe)
436        // TODO: Audit that the environment access only happens in single-threaded code.
437        unsafe {
438            std::env::set_var("NO_COLOR", "1");
439        }
440
441        let err = DecodeError::invalid_length(
442            13,
443            "multiple of 4",
444            "add padding (=) or check for missing characters",
445        );
446        let display = format!("{}", err);
447
448        assert!(display.contains("invalid length"));
449        assert!(display.contains("13 characters"));
450        assert!(display.contains("multiple of 4"));
451        assert!(display.contains("add padding"));
452
453        // Unsafe: environment variable access (not thread-safe)
454        // TODO: Audit that the environment access only happens in single-threaded code.
455        unsafe {
456            std::env::remove_var("NO_COLOR");
457        }
458    }
459
460    #[test]
461    fn test_dictionary_not_found_error() {
462        // Unsafe: environment variable access (not thread-safe)
463        // TODO: Audit that the environment access only happens in single-threaded code.
464        unsafe {
465            std::env::set_var("NO_COLOR", "1");
466        }
467
468        let err = DictionaryNotFoundError::with_suggestion("bas64", Some("base64".to_string()));
469        let display = format!("{}", err);
470
471        assert!(display.contains("dictionary 'bas64' not found"));
472        assert!(display.contains("did you mean 'base64'?"));
473        assert!(display.contains("base-d config --dictionaries"));
474
475        // Unsafe: environment variable access (not thread-safe)
476        // TODO: Audit that the environment access only happens in single-threaded code.
477        unsafe {
478            std::env::remove_var("NO_COLOR");
479        }
480    }
481
482    #[test]
483    fn test_safe_truncate_multibyte() {
484        let input = "\u{1F3AD}".repeat(20); // 20 chars, 80 bytes
485        let result = safe_truncate(&input, 10);
486        assert_eq!(result, format!("{}...", "\u{1F3AD}".repeat(10)));
487    }
488
489    #[test]
490    fn test_safe_truncate_no_truncation() {
491        assert_eq!(safe_truncate("hello", 10), "hello");
492    }
493
494    #[test]
495    fn test_safe_truncate_exact_boundary() {
496        assert_eq!(safe_truncate("hello", 5), "hello");
497    }
498
499    #[test]
500    fn test_invalid_character_multibyte_no_panic() {
501        let input = "\u{1F711}".repeat(30); // 30 alchemical symbols, 120 bytes
502        // This must not panic -- the old &input[..60] would have
503        let err = DecodeError::invalid_character('x', 0, &input, "abc");
504        let _ = format!("{}", err);
505    }
506}