use super::CoreError;
use alloc::{format, string::String};
use core::fmt;
#[must_use]
pub const fn utf8_error(position: usize, message: String) -> CoreError {
CoreError::Utf8Error { position, message }
}
pub fn validation_error<T: fmt::Display>(message: T) -> CoreError {
CoreError::Validation(format!("{message}"))
}
pub fn validate_utf8_detailed(bytes: &[u8]) -> Result<(), CoreError> {
match core::str::from_utf8(bytes) {
Ok(_) => Ok(()),
Err(err) => {
let position = err.valid_up_to();
let message = err.error_len().map_or_else(
|| format!("Incomplete UTF-8 sequence at position {position}"),
|len| format!("Invalid UTF-8 sequence of {len} bytes at position {position}"),
);
Err(utf8_error(position, message))
}
}
}
pub fn validate_ass_text_content(text: &str) -> Result<(), CoreError> {
for (pos, ch) in text.char_indices() {
if !is_valid_ass_char(ch) {
return Err(validation_error(format!(
"Invalid character '{}' (U+{:04X}) at position {}",
ch.escape_default().collect::<String>(),
ch as u32,
pos
)));
}
}
Ok(())
}
fn is_valid_ass_char(ch: char) -> bool {
match ch {
c if c.is_ascii_graphic() => true,
' ' | '\t' | '\n' | '\r' => true,
c if !c.is_ascii() && !c.is_control() => true,
_ => false,
}
}
pub fn validate_bom_handling(bytes: &[u8]) -> Result<(), CoreError> {
if bytes.len() >= 3 && bytes[0..3] == [0xEF, 0xBB, 0xBF] {
return Ok(());
}
if bytes.len() >= 2 && (bytes[0..2] == [0xFF, 0xFE] || bytes[0..2] == [0xFE, 0xFF]) {
return Err(validation_error(
"UTF-16 BOM detected - ASS files should be UTF-8",
));
}
if bytes.len() >= 4
&& (bytes[0..4] == [0xFF, 0xFE, 0x00, 0x00] || bytes[0..4] == [0x00, 0x00, 0xFE, 0xFF])
{
return Err(validation_error(
"UTF-32 BOM detected - ASS files should be UTF-8",
));
}
if bytes.len() >= 2 && bytes[0..2] == [0xEF, 0xBB] {
return Err(validation_error(
"Partial UTF-8 BOM detected - file may be corrupted or incorrectly encoded",
));
}
if !bytes.is_empty() && bytes[0] == 0xEF && (bytes.len() == 1 || bytes[1] != 0xBB) {
return Err(validation_error(
"Suspicious byte sequence that could be partial BOM - check file encoding",
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(not(feature = "std"))]
use alloc::string::ToString;
#[test]
fn utf8_error_creation() {
let error = utf8_error(42, "test message".to_string());
assert!(matches!(error, CoreError::Utf8Error { position: 42, .. }));
}
#[test]
fn validation_error_creation() {
let error = validation_error("invalid content");
assert!(matches!(error, CoreError::Validation(_)));
}
#[test]
fn validate_valid_utf8() {
let text = "Hello, 世界! 🎵";
assert!(validate_utf8_detailed(text.as_bytes()).is_ok());
}
#[test]
fn validate_invalid_utf8() {
let invalid_bytes = &[0xFF, 0xFE, 0x80];
assert!(validate_utf8_detailed(invalid_bytes).is_err());
}
#[test]
fn validate_ass_text_valid() {
assert!(validate_ass_text_content("Hello World").is_ok());
assert!(validate_ass_text_content("Hello\tWorld\n").is_ok());
assert!(validate_ass_text_content("Hello 世界").is_ok());
}
#[test]
fn validate_ass_text_invalid() {
assert!(validate_ass_text_content("Hello\x00World").is_err()); assert!(validate_ass_text_content("Hello\x1FWorld").is_err()); }
#[test]
fn valid_ass_char_check() {
assert!(is_valid_ass_char('A'));
assert!(is_valid_ass_char(' '));
assert!(is_valid_ass_char('\n'));
assert!(is_valid_ass_char('世'));
assert!(!is_valid_ass_char('\x00'));
assert!(!is_valid_ass_char('\x1F'));
}
#[test]
fn bom_validation_utf8() {
let utf8_bom = &[0xEF, 0xBB, 0xBF, b'H', b'i'];
assert!(validate_bom_handling(utf8_bom).is_ok());
}
#[test]
fn bom_validation_utf16() {
let utf16_bom = &[0xFF, 0xFE, b'H', 0x00, b'i', 0x00];
assert!(validate_bom_handling(utf16_bom).is_err());
}
#[test]
fn bom_validation_no_bom() {
let no_bom = b"Hello World";
assert!(validate_bom_handling(no_bom).is_ok());
}
#[test]
fn bom_validation_partial_utf8() {
let partial_bom = &[0xEF, 0xBB, b'H', b'i'];
assert!(validate_bom_handling(partial_bom).is_err());
}
#[test]
fn bom_validation_single_ef_byte() {
let single_ef = &[0xEF, b'H', b'i'];
assert!(validate_bom_handling(single_ef).is_err());
}
#[test]
fn bom_validation_ef_only() {
let ef_only = &[0xEF];
assert!(validate_bom_handling(ef_only).is_err());
}
}