ass_core/utils/errors/
encoding.rs1use super::CoreError;
8use alloc::{format, string::String};
9use core::fmt;
10
11#[must_use]
30pub const fn utf8_error(position: usize, message: String) -> CoreError {
31 CoreError::Utf8Error { position, message }
32}
33
34pub fn validation_error<T: fmt::Display>(message: T) -> CoreError {
43 CoreError::Validation(format!("{message}"))
44}
45
46pub fn validate_utf8_detailed(bytes: &[u8]) -> Result<(), CoreError> {
63 match core::str::from_utf8(bytes) {
64 Ok(_) => Ok(()),
65 Err(err) => {
66 let position = err.valid_up_to();
67 let message = err.error_len().map_or_else(
68 || format!("Incomplete UTF-8 sequence at position {position}"),
69 |len| format!("Invalid UTF-8 sequence of {len} bytes at position {position}"),
70 );
71
72 Err(utf8_error(position, message))
73 }
74 }
75}
76
77pub fn validate_ass_text_content(text: &str) -> Result<(), CoreError> {
94 for (pos, ch) in text.char_indices() {
95 if !is_valid_ass_char(ch) {
96 return Err(validation_error(format!(
97 "Invalid character '{}' (U+{:04X}) at position {}",
98 ch.escape_default().collect::<String>(),
99 ch as u32,
100 pos
101 )));
102 }
103 }
104 Ok(())
105}
106
107fn is_valid_ass_char(ch: char) -> bool {
112 match ch {
113 c if c.is_ascii_graphic() => true,
115 ' ' | '\t' | '\n' | '\r' => true,
117 c if !c.is_ascii() && !c.is_control() => true,
119 _ => false,
121 }
122}
123
124pub fn validate_bom_handling(bytes: &[u8]) -> Result<(), CoreError> {
141 if bytes.len() >= 3 && bytes[0..3] == [0xEF, 0xBB, 0xBF] {
142 return Ok(());
144 }
145
146 if bytes.len() >= 2 && (bytes[0..2] == [0xFF, 0xFE] || bytes[0..2] == [0xFE, 0xFF]) {
147 return Err(validation_error(
148 "UTF-16 BOM detected - ASS files should be UTF-8",
149 ));
150 }
151
152 if bytes.len() >= 4
153 && (bytes[0..4] == [0xFF, 0xFE, 0x00, 0x00] || bytes[0..4] == [0x00, 0x00, 0xFE, 0xFF])
154 {
155 return Err(validation_error(
156 "UTF-32 BOM detected - ASS files should be UTF-8",
157 ));
158 }
159
160 if bytes.len() >= 2 && bytes[0..2] == [0xEF, 0xBB] {
162 return Err(validation_error(
163 "Partial UTF-8 BOM detected - file may be corrupted or incorrectly encoded",
164 ));
165 }
166
167 if !bytes.is_empty() && bytes[0] == 0xEF && (bytes.len() == 1 || bytes[1] != 0xBB) {
168 return Err(validation_error(
169 "Suspicious byte sequence that could be partial BOM - check file encoding",
170 ));
171 }
172
173 Ok(())
174}
175
176#[cfg(test)]
185mod tests {
186 use super::*;
187 #[cfg(not(feature = "std"))]
188 use alloc::string::ToString;
189
190 #[test]
191 fn utf8_error_creation() {
192 let error = utf8_error(42, "test message".to_string());
193 assert!(matches!(error, CoreError::Utf8Error { position: 42, .. }));
194 }
195
196 #[test]
197 fn validation_error_creation() {
198 let error = validation_error("invalid content");
199 assert!(matches!(error, CoreError::Validation(_)));
200 }
201
202 #[test]
203 fn validate_valid_utf8() {
204 let text = "Hello, 世界! 🎵";
205 assert!(validate_utf8_detailed(text.as_bytes()).is_ok());
206 }
207
208 #[test]
209 fn validate_invalid_utf8() {
210 let invalid_bytes = &[0xFF, 0xFE, 0x80];
211 assert!(validate_utf8_detailed(invalid_bytes).is_err());
212 }
213
214 #[test]
215 fn validate_ass_text_valid() {
216 assert!(validate_ass_text_content("Hello World").is_ok());
217 assert!(validate_ass_text_content("Hello\tWorld\n").is_ok());
218 assert!(validate_ass_text_content("Hello 世界").is_ok());
219 }
220
221 #[test]
222 fn validate_ass_text_invalid() {
223 assert!(validate_ass_text_content("Hello\x00World").is_err()); assert!(validate_ass_text_content("Hello\x1FWorld").is_err()); }
226
227 #[test]
228 fn valid_ass_char_check() {
229 assert!(is_valid_ass_char('A'));
230 assert!(is_valid_ass_char(' '));
231 assert!(is_valid_ass_char('\n'));
232 assert!(is_valid_ass_char('世'));
233 assert!(!is_valid_ass_char('\x00'));
234 assert!(!is_valid_ass_char('\x1F'));
235 }
236
237 #[test]
238 fn bom_validation_utf8() {
239 let utf8_bom = &[0xEF, 0xBB, 0xBF, b'H', b'i'];
240 assert!(validate_bom_handling(utf8_bom).is_ok());
241 }
242
243 #[test]
244 fn bom_validation_utf16() {
245 let utf16_bom = &[0xFF, 0xFE, b'H', 0x00, b'i', 0x00];
246 assert!(validate_bom_handling(utf16_bom).is_err());
247 }
248
249 #[test]
250 fn bom_validation_no_bom() {
251 let no_bom = b"Hello World";
252 assert!(validate_bom_handling(no_bom).is_ok());
253 }
254
255 #[test]
256 fn bom_validation_partial_utf8() {
257 let partial_bom = &[0xEF, 0xBB, b'H', b'i'];
258 assert!(validate_bom_handling(partial_bom).is_err());
259 }
260
261 #[test]
262 fn bom_validation_single_ef_byte() {
263 let single_ef = &[0xEF, b'H', b'i'];
264 assert!(validate_bom_handling(single_ef).is_err());
265 }
266
267 #[test]
268 fn bom_validation_ef_only() {
269 let ef_only = &[0xEF];
270 assert!(validate_bom_handling(ef_only).is_err());
271 }
272}