mod bom;
mod encoding;
mod normalization;
mod validation;
pub use bom::{detect_bom, strip_bom, BomType};
pub use encoding::{detect_encoding, is_likely_ass_content, EncodingInfo};
pub use normalization::{
normalize_line_endings, normalize_whitespace, remove_control_chars, trim_lines,
};
pub use validation::{
count_replacement_chars, is_valid_ass_text, recover_utf8, truncate_at_char_boundary,
validate_utf8,
};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn integration_bom_detection() {
let text_with_bom = "\u{FEFF}Hello World";
let (stripped, had_bom) = strip_bom(text_with_bom);
assert_eq!(stripped, "Hello World");
assert!(had_bom);
let bytes = &[0xEF, 0xBB, 0xBF, b'H', b'i'];
let (bom_type, skip) = detect_bom(bytes).unwrap();
assert_eq!(bom_type, BomType::Utf8);
assert_eq!(skip, 3);
}
#[test]
fn integration_encoding_detection() {
let text = "[Script Info]\nTitle: Test Script";
let encoding = detect_encoding(text.as_bytes());
assert_eq!(encoding.encoding, "UTF-8");
assert!(encoding.confidence > 0.9); assert!(!encoding.has_bom);
assert!(is_likely_ass_content(text));
assert!(!is_likely_ass_content("Just regular text"));
}
#[test]
fn integration_validation_and_recovery() {
let valid_text = "Hello, 世界! 🎵";
assert!(validate_utf8(valid_text.as_bytes()).is_ok());
let invalid_bytes = &[b'H', b'i', 0xFF, b'!'];
let (recovered, replacements) = recover_utf8(invalid_bytes);
assert_eq!(recovered, "Hi�!");
assert_eq!(replacements, 1);
assert_eq!(count_replacement_chars(&recovered), 1);
}
#[test]
fn integration_normalization() {
let input = "Line 1\r\nLine 2\rLine 3\n";
let normalized = normalize_line_endings(input);
assert_eq!(normalized, "Line 1\nLine 2\nLine 3\n");
let whitespace_text = "Hello World Test";
let normalized_ws = normalize_whitespace(whitespace_text, true);
assert_eq!(normalized_ws, "Hello World Test");
let input_with_control = "Hello\x00World\x1FTest\nValid";
let cleaned = remove_control_chars(input_with_control);
assert_eq!(cleaned, "HelloWorldTest\nValid");
}
#[test]
fn integration_text_validation() {
assert!(is_valid_ass_text("Hello World"));
assert!(is_valid_ass_text("Hello\tWorld\n"));
assert!(is_valid_ass_text("Hello 世界"));
assert!(!is_valid_ass_text("Hello\x00World")); assert!(!is_valid_ass_text("Hello\x1FWorld")); }
#[test]
fn integration_truncation() {
let text = "Hello World";
let (truncated, was_truncated) = truncate_at_char_boundary(text, 5);
assert_eq!(truncated, "Hello");
assert!(was_truncated);
let unicode_text = "Hello 世界";
let (truncated, was_truncated) = truncate_at_char_boundary(unicode_text, 8);
assert_eq!(truncated, "Hello "); assert!(was_truncated);
let short_text = "Hi";
let (truncated, was_truncated) = truncate_at_char_boundary(short_text, 10);
assert_eq!(truncated, "Hi");
assert!(!was_truncated);
}
#[test]
fn integration_full_workflow() {
let input = "\u{FEFF}[Script Info]\r\nTitle: Test\x00Script\r\n\r\n[Events]\nDialogue: Hello World";
let (without_bom, had_bom) = strip_bom(input);
assert!(had_bom);
let normalized = normalize_line_endings(without_bom);
let cleaned = remove_control_chars(&normalized);
let final_text = normalize_whitespace(&cleaned, true);
assert!(final_text.contains("[Script Info]"));
assert!(final_text.contains("Title: TestScript")); assert!(!final_text.contains('\r')); assert!(final_text.contains("Dialogue: Hello World")); }
}