Skip to main content

ass_core/utils/utf8/validation/
validate.rs

1//! UTF-8 and ASS character validation checks
2//!
3//! Provides detailed UTF-8 validation with position-specific error reporting
4//! and content validation against the ASS-permitted character set.
5
6use crate::utils::CoreError;
7use alloc::format;
8use core::str;
9
10/// Validate UTF-8 with detailed error information
11///
12/// Provides more detailed error reporting than standard UTF-8 validation,
13/// including the position and nature of encoding errors. Essential for
14/// processing subtitle files with encoding issues.
15///
16/// # Arguments
17///
18/// * `bytes` - Byte sequence to validate
19///
20/// # Returns
21///
22/// `Ok(())` if valid UTF-8, detailed error with position if invalid
23///
24/// # Examples
25///
26/// ```rust
27/// # use ass_core::utils::utf8::validate_utf8;
28/// let valid_text = "Hello, 世界!";
29/// assert!(validate_utf8(valid_text.as_bytes()).is_ok());
30///
31/// let invalid_bytes = &[0xFF, 0xFE, 0x80];
32/// assert!(validate_utf8(invalid_bytes).is_err());
33/// ```
34///
35/// # Errors
36///
37/// Returns an error if the byte slice contains invalid UTF-8 sequences.
38pub fn validate_utf8(bytes: &[u8]) -> Result<(), CoreError> {
39    match str::from_utf8(bytes) {
40        Ok(_) => Ok(()),
41        Err(err) => {
42            let position = err.valid_up_to();
43            let message = err.error_len().map_or_else(
44                || format!("Incomplete UTF-8 sequence at position {position}"),
45                |len| format!("Invalid UTF-8 sequence of {len} bytes at position {position}"),
46            );
47
48            Err(CoreError::utf8_error(position, message))
49        }
50    }
51}
52
53/// Check if text contains only valid ASS characters
54///
55/// ASS files should generally contain only printable characters plus
56/// specific control characters like tabs and newlines. This function
57/// validates character content according to ASS specification guidelines.
58///
59/// # Arguments
60///
61/// * `text` - Text content to validate
62///
63/// # Returns
64///
65/// `true` if all characters are valid for ASS content
66#[must_use]
67pub fn is_valid_ass_text(text: &str) -> bool {
68    text.chars().all(|c| {
69        c.is_ascii_graphic()  // Printable ASCII
70            || c == ' '       // Space
71            || c == '\t'      // Tab
72            || c == '\n'      // Newline
73            || c == '\r'      // Carriage return
74            || (!c.is_ascii() && !c.is_control()) // Non-ASCII printable (Unicode)
75    })
76}