ass_core/utils/
mod.rs

1//! Utility functions and shared types for ASS-RS core
2//!
3//! Contains common functionality used across parser, tokenizer, and analysis modules.
4//! Focuses on zero-allocation helpers, color processing, and UTF-8 handling.
5//!
6//! # Performance
7//!
8//! - Zero-copy span utilities for AST references
9//! - SIMD-optimized color conversions when available
10//! - Minimal allocation math helpers (bezier evaluation)
11//!
12//! # Example
13//!
14//! ```rust
15//! use ass_core::utils::{Spans, parse_bgr_color};
16//!
17//! let color_str = "&H00FF00FF&";
18//! let rgba = parse_bgr_color(color_str)?;
19//! assert_eq!(rgba, [255, 0, 255, 0]); // BGR -> RGBA
20//! # Ok::<(), Box<dyn std::error::Error>>(())
21//! ```
22
23#[cfg(not(feature = "std"))]
24use alloc::{format, string::String, vec::Vec};
25use core::{fmt, ops::Range};
26#[cfg(feature = "std")]
27use std::{format, string::String, vec::Vec};
28
29pub mod benchmark_generators;
30pub mod errors;
31pub mod hashers;
32pub mod utf8;
33
34pub use benchmark_generators::{
35    create_test_event, generate_overlapping_script, generate_script_with_issues, ComplexityLevel,
36    ScriptGenerator,
37};
38pub use errors::CoreError;
39pub use hashers::{create_hash_map, create_hash_map_with_capacity, create_hasher, hash_value};
40pub use utf8::{detect_encoding, normalize_line_endings, recover_utf8, strip_bom, validate_utf8};
41
42/// Zero-copy span utilities for AST node validation and manipulation
43///
44/// Provides safe methods to work with string slices that reference
45/// the original source text, maintaining zero-copy semantics.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub struct Spans<'a> {
48    /// Reference to the original source text
49    source: &'a str,
50}
51
52impl<'a> Spans<'a> {
53    /// Create new span utilities for source text
54    #[must_use]
55    pub const fn new(source: &'a str) -> Self {
56        Self { source }
57    }
58
59    /// Validate that a span references this source text
60    ///
61    /// Returns `true` if the span is a valid substring of the source.
62    /// Used for debug assertions to ensure zero-copy invariants.
63    #[must_use]
64    pub fn validate_span(&self, span: &str) -> bool {
65        let source_start = self.source.as_ptr() as usize;
66        let source_end = source_start + self.source.len();
67
68        let span_start = span.as_ptr() as usize;
69        let span_end = span_start + span.len();
70
71        span_start >= source_start && span_end <= source_end
72    }
73
74    /// Get byte offset of span within source
75    #[must_use]
76    pub fn span_offset(&self, span: &str) -> Option<usize> {
77        let source_start = self.source.as_ptr() as usize;
78        let span_start = span.as_ptr() as usize;
79
80        if self.validate_span(span) {
81            Some(span_start - source_start)
82        } else {
83            None
84        }
85    }
86
87    /// Get line number (1-based) for a span
88    #[must_use]
89    pub fn span_line(&self, span: &str) -> Option<usize> {
90        let offset = self.span_offset(span)?;
91        Some(self.source[..offset].chars().filter(|&c| c == '\n').count() + 1)
92    }
93
94    /// Get column number (1-based) for a span
95    #[must_use]
96    pub fn span_column(&self, span: &str) -> Option<usize> {
97        let offset = self.span_offset(span)?;
98        let line_start = self.source[..offset].rfind('\n').map_or(0, |pos| pos + 1);
99
100        Some(self.source[line_start..offset].chars().count() + 1)
101    }
102
103    /// Extract substring by byte range
104    #[must_use]
105    pub fn substring(&self, range: Range<usize>) -> Option<&'a str> {
106        self.source.get(range)
107    }
108}
109
110/// Parse ASS BGR color format to RGBA bytes
111///
112/// ASS uses BGR format like `&H00FF00FF&` (blue, green, red, alpha).
113/// Converts to standard RGBA format for rendering.
114///
115/// # Arguments
116///
117/// * `color_str` - Color string in ASS format
118///
119/// # Returns
120///
121/// RGBA bytes `[red, green, blue, alpha]` or error if invalid format.
122///
123/// # Example
124///
125/// ```rust
126/// # use ass_core::utils::parse_bgr_color;
127/// // Pure red in ASS format
128/// let rgba = parse_bgr_color("&H000000FF&")?;
129/// assert_eq!(rgba, [255, 0, 0, 0]);
130/// # Ok::<(), Box<dyn std::error::Error>>(())
131/// ```
132///
133/// # Errors
134///
135/// Returns an error if the color string format is invalid or cannot be parsed.
136pub fn parse_bgr_color(color_str: &str) -> Result<[u8; 4], CoreError> {
137    let trimmed = color_str.trim();
138
139    let hex_part =
140        if (trimmed.starts_with("&H") || trimmed.starts_with("&h")) && trimmed.ends_with('&') {
141            &trimmed[2..trimmed.len() - 1]
142        } else if let Some(stripped) = trimmed.strip_prefix("&H") {
143            stripped
144        } else if let Some(stripped) = trimmed.strip_prefix("&h") {
145            stripped
146        } else if let Some(stripped) = trimmed.strip_prefix("0x") {
147            stripped
148        } else if trimmed.chars().all(|c| c.is_ascii_hexdigit()) {
149            trimmed
150        } else {
151            return Err(CoreError::InvalidColor(format!(
152                "Invalid color format: {color_str}"
153            )));
154        };
155
156    let hex_value = u32::from_str_radix(hex_part, 16)
157        .map_err(|_| CoreError::InvalidColor(format!("Invalid hex value: {hex_part}")))?;
158
159    let color_array = match hex_part.len() {
160        6 => {
161            let blue = ((hex_value >> 16) & 0xFF) as u8;
162            let green = ((hex_value >> 8) & 0xFF) as u8;
163            let red = (hex_value & 0xFF) as u8;
164            [red, green, blue, 0]
165        }
166        8 => {
167            let alpha = ((hex_value >> 24) & 0xFF) as u8;
168            let blue = ((hex_value >> 16) & 0xFF) as u8;
169            let green = ((hex_value >> 8) & 0xFF) as u8;
170            let red = (hex_value & 0xFF) as u8;
171            [red, green, blue, alpha]
172        }
173        _ => {
174            return Err(CoreError::InvalidColor(format!(
175                "Invalid color length: {}",
176                hex_part.len()
177            )))
178        }
179    };
180
181    Ok(color_array)
182}
183
184/// Parse numeric value from ASS field with validation
185///
186/// Handles integer and floating-point parsing with ASS-specific validation.
187/// Provides better error messages than standard parsing.
188///
189/// # Errors
190///
191/// Returns an error if the string cannot be parsed as the target numeric type.
192pub fn parse_numeric<T>(value_str: &str) -> Result<T, CoreError>
193where
194    T: core::str::FromStr,
195    T::Err: fmt::Display,
196{
197    value_str
198        .trim()
199        .parse()
200        .map_err(|e| CoreError::InvalidNumeric(format!("Failed to parse '{value_str}': {e}")))
201}
202
203/// Evaluate cubic bezier curve at parameter t
204///
205/// Used for drawing command evaluation and animation curves.
206/// No external dependencies - implements bezier math directly.
207///
208/// # Arguments
209///
210/// * `p0, p1, p2, p3` - Control points as (x, y) tuples
211/// * `t` - Parameter from 0.0 to 1.0
212///
213/// # Returns
214///
215/// Point on curve as (x, y) tuple
216#[must_use]
217pub fn eval_cubic_bezier(
218    p0: (f32, f32),
219    p1: (f32, f32),
220    p2: (f32, f32),
221    p3: (f32, f32),
222    t: f32,
223) -> (f32, f32) {
224    let t2 = t * t;
225    let t3 = t2 * t;
226    let mt = 1.0 - t;
227    let mt2 = mt * mt;
228    let mt3 = mt2 * mt;
229
230    let x = t3.mul_add(
231        p3.0,
232        (3.0 * mt * t2).mul_add(p2.0, mt3.mul_add(p0.0, 3.0 * mt2 * t * p1.0)),
233    );
234    let y = t3.mul_add(
235        p3.1,
236        (3.0 * mt * t2).mul_add(p2.1, mt3.mul_add(p0.1, 3.0 * mt2 * t * p1.1)),
237    );
238
239    (x, y)
240}
241
242/// Parse ASS time format (H:MM:SS.CC) to centiseconds
243///
244/// ASS uses centiseconds (1/100th second) for timing.
245/// Supports various formats including fractional seconds.
246///
247/// # Example
248///
249/// ```rust
250/// # use ass_core::utils::parse_ass_time;
251/// assert_eq!(parse_ass_time("0:01:30.50")?, 9050); // 1:30.5 = 9050 centiseconds
252/// # Ok::<(), Box<dyn std::error::Error>>(())
253/// ```
254///
255/// # Errors
256///
257/// Returns an error if the time format is invalid or cannot be parsed.
258pub fn parse_ass_time(time_str: &str) -> Result<u32, CoreError> {
259    let parts: Vec<&str> = time_str.split(':').collect();
260    if parts.len() != 3 {
261        return Err(CoreError::InvalidTime(format!(
262            "Invalid time format: {time_str}"
263        )));
264    }
265
266    let hours: u32 = parts[0]
267        .parse()
268        .map_err(|_| CoreError::InvalidTime(format!("Invalid hours: {}", parts[0])))?;
269
270    let minutes: u32 = parts[1]
271        .parse()
272        .map_err(|_| CoreError::InvalidTime(format!("Invalid minutes: {}", parts[1])))?;
273
274    let seconds_parts: Vec<&str> = parts[2].split('.').collect();
275    let seconds: u32 = seconds_parts[0]
276        .parse()
277        .map_err(|_| CoreError::InvalidTime(format!("Invalid seconds: {}", seconds_parts[0])))?;
278
279    let centiseconds = if seconds_parts.len() > 1 {
280        let frac_str = &seconds_parts[1];
281        let frac_val: u32 = frac_str
282            .parse()
283            .map_err(|_| CoreError::InvalidTime(format!("Invalid centiseconds: {frac_str}")))?;
284
285        match frac_str.len() {
286            1 => frac_val * 10,
287            2 => frac_val,
288            _ => {
289                return Err(CoreError::InvalidTime(format!(
290                    "Too many decimal places: {frac_str}"
291                )))
292            }
293        }
294    } else {
295        0
296    };
297
298    if minutes >= 60 {
299        return Err(CoreError::InvalidTime(format!(
300            "Minutes must be < 60: {minutes}"
301        )));
302    }
303    if seconds >= 60 {
304        return Err(CoreError::InvalidTime(format!(
305            "Seconds must be < 60: {seconds}"
306        )));
307    }
308    if centiseconds >= 100 {
309        return Err(CoreError::InvalidTime(format!(
310            "Centiseconds must be < 100: {centiseconds}"
311        )));
312    }
313
314    Ok(hours * 360_000 + minutes * 6_000 + seconds * 100 + centiseconds)
315}
316
317/// Format centiseconds back to ASS time format
318///
319/// Converts internal centisecond representation back to H:MM:SS.CC format.
320#[must_use]
321pub fn format_ass_time(centiseconds: u32) -> String {
322    let hours = centiseconds / 360_000;
323    let remainder = centiseconds % 360_000;
324    let minutes = remainder / 6000;
325    let remainder = remainder % 6000;
326    let seconds = remainder / 100;
327    let cs = remainder % 100;
328
329    format!("{hours}:{minutes:02}:{seconds:02}.{cs:02}")
330}
331
332/// Trim and normalize whitespace in ASS field values
333///
334/// ASS fields may have inconsistent whitespace that should be normalized
335/// while preserving intentional spacing in text content.
336#[must_use]
337pub fn normalize_field_value(value: &str) -> &str {
338    value.trim()
339}
340
341/// Check if string contains only valid ASS characters
342///
343/// ASS has restrictions on certain characters in names and style definitions.
344#[must_use]
345pub fn validate_ass_name(name: &str) -> bool {
346    !name.is_empty()
347        && !name.contains(',') // Comma is field separator
348        && !name.contains(':') // Colon is key-value separator
349        && !name.contains('{') // Override block start
350        && !name.contains('}') // Override block end
351        && name.chars().all(|c| !c.is_control() || c == '\t')
352}
353
354/// Decode UU-encoded data commonly found in ASS `[Fonts]` and `[Graphics]` sections
355///
356/// UU-encoding (Unix-to-Unix encoding) embeds binary data as ASCII text.
357/// Each line starts with a length character followed by encoded data.
358///
359/// # Arguments
360///
361/// * `lines` - Iterator of UU-encoded text lines
362///
363/// # Returns
364///
365/// Decoded binary data or error if encoding is invalid.
366///
367/// # Example
368///
369/// ```rust
370/// # use ass_core::utils::decode_uu_data;
371/// let lines = vec![""];
372/// let decoded = decode_uu_data(lines.iter().map(|s| *s))?;
373/// // UU-decode implementation handles empty input gracefully
374/// assert!(decoded.len() >= 0);
375/// # Ok::<(), Box<dyn std::error::Error>>(())
376/// ```
377///
378/// # Errors
379///
380/// Returns an error if the UU-encoded data is malformed or cannot be decoded.
381#[allow(clippy::similar_names)]
382pub fn decode_uu_data<'a, I>(lines: I) -> Result<Vec<u8>, CoreError>
383where
384    I: Iterator<Item = &'a str>,
385{
386    let mut result = Vec::new();
387
388    for line in lines {
389        let line = line.trim_start().trim_end_matches(['\n', '\r']);
390        if line.is_empty() {
391            continue;
392        }
393
394        // Check for end marker
395        if line == "end" || line.starts_with("end ") {
396            break;
397        }
398
399        let input_bytes = line.as_bytes();
400        if input_bytes.is_empty() {
401            continue;
402        }
403
404        // First character encodes the line length
405        let expected_length = (input_bytes[0].wrapping_sub(b' ')) as usize;
406
407        // Only process lines with reasonable UU length values (0-45)
408        // This filters out obvious non-UU lines like comments
409        if expected_length > 45 {
410            continue;
411        }
412
413        // If length is 0, this indicates end of data
414        if expected_length == 0 {
415            break;
416        }
417
418        let data_part = &input_bytes[1..];
419        let mut decoded_bytes = Vec::new();
420
421        // Process groups of 4 characters into 3 bytes
422        for chunk in data_part.chunks(4) {
423            let mut group = [b' '; 4];
424            for (i, &byte) in chunk.iter().enumerate() {
425                group[i] = byte;
426            }
427
428            // Decode 4 characters to 3 bytes
429            let c1 = group[0].wrapping_sub(b' ');
430            let c2 = group[1].wrapping_sub(b' ');
431            let c3 = group[2].wrapping_sub(b' ');
432            let c4 = group[3].wrapping_sub(b' ');
433
434            let decoded_byte1 = (c1 << 2) | (c2 >> 4);
435            let decoded_byte2 = ((c2 & 0x0F) << 4) | (c3 >> 2);
436            let decoded_byte3 = ((c3 & 0x03) << 6) | c4;
437
438            // Always decode all 3 bytes - missing chars are treated as spaces (value 0)
439            decoded_bytes.push(decoded_byte1);
440            decoded_bytes.push(decoded_byte2);
441            decoded_bytes.push(decoded_byte3);
442        }
443
444        // Truncate to expected length to handle padding
445        decoded_bytes.truncate(expected_length);
446        result.extend_from_slice(&decoded_bytes);
447    }
448    Ok(result)
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454    #[cfg(not(feature = "std"))]
455    use alloc::vec;
456
457    #[test]
458    fn spans_validation() {
459        let source = "Hello, World!";
460        let spans = Spans::new(source);
461
462        let valid_span = &source[0..5]; // "Hello"
463        assert!(spans.validate_span(valid_span));
464        assert_eq!(spans.span_offset(valid_span), Some(0));
465        assert_eq!(spans.span_line(valid_span), Some(1));
466        assert_eq!(spans.span_column(valid_span), Some(1));
467
468        let another_span = &source[7..12]; // "World"
469        assert!(spans.validate_span(another_span));
470        assert_eq!(spans.span_offset(another_span), Some(7));
471    }
472
473    #[test]
474    fn spans_multiline() {
475        let source = "Line 1\nLine 2\nLine 3";
476        let spans = Spans::new(source);
477
478        let line2_span = &source[7..13]; // "Line 2"
479        assert_eq!(spans.span_line(line2_span), Some(2));
480        assert_eq!(spans.span_column(line2_span), Some(1));
481    }
482
483    #[test]
484    fn parse_bgr_colors() {
485        assert_eq!(parse_bgr_color("&H000000FF&").unwrap(), [255, 0, 0, 0]);
486        assert_eq!(parse_bgr_color("&H0000FF00&").unwrap(), [0, 255, 0, 0]);
487        assert_eq!(parse_bgr_color("&H00FF0000&").unwrap(), [0, 0, 255, 0]);
488
489        assert_eq!(parse_bgr_color("&HFF000000&").unwrap(), [0, 0, 0, 255]);
490
491        assert_eq!(parse_bgr_color("0x000000FF").unwrap(), [255, 0, 0, 0]);
492        assert_eq!(parse_bgr_color("000000FF").unwrap(), [255, 0, 0, 0]);
493    }
494
495    #[test]
496    fn parse_bgr_colors_invalid() {
497        assert!(parse_bgr_color("invalid").is_err());
498        assert!(parse_bgr_color("&HZZZZ&").is_err());
499        assert!(parse_bgr_color("").is_err());
500    }
501
502    #[test]
503    fn parse_bgr_colors_without_trailing_ampersand() {
504        assert_eq!(parse_bgr_color("&H000000FF").unwrap(), [255, 0, 0, 0]);
505        assert_eq!(parse_bgr_color("&H00FFFFFF").unwrap(), [255, 255, 255, 0]);
506        assert_eq!(parse_bgr_color("&H00000000").unwrap(), [0, 0, 0, 0]);
507        assert_eq!(parse_bgr_color("&HFF000000").unwrap(), [0, 0, 0, 255]);
508    }
509
510    #[test]
511    fn parse_ass_times() {
512        assert_eq!(parse_ass_time("0:00:00.00").unwrap(), 0);
513        assert_eq!(parse_ass_time("0:00:01.00").unwrap(), 100);
514        assert_eq!(parse_ass_time("0:01:00.00").unwrap(), 6000);
515        assert_eq!(parse_ass_time("1:00:00.00").unwrap(), 360_000);
516        assert_eq!(parse_ass_time("0:01:30.50").unwrap(), 9050);
517    }
518
519    #[test]
520    fn parse_ass_times_invalid() {
521        assert!(parse_ass_time("invalid").is_err());
522        assert!(parse_ass_time("0:60:00.00").is_err()); // Invalid minutes
523        assert!(parse_ass_time("0:00:60.00").is_err()); // Invalid seconds
524        assert!(parse_ass_time("0:00:00.100").is_err()); // Invalid centiseconds
525    }
526
527    #[test]
528    fn format_ass_times() {
529        assert_eq!(format_ass_time(0), "0:00:00.00");
530        assert_eq!(format_ass_time(100), "0:00:01.00");
531        assert_eq!(format_ass_time(6000), "0:01:00.00");
532        assert_eq!(format_ass_time(360_000), "1:00:00.00");
533        assert_eq!(format_ass_time(9050), "0:01:30.50");
534    }
535
536    #[test]
537    fn bezier_evaluation() {
538        let p0 = (0.0, 0.0);
539        let p1 = (0.33, 0.0);
540        let p2 = (0.67, 1.0);
541        let p3 = (1.0, 1.0);
542
543        let start = eval_cubic_bezier(p0, p1, p2, p3, 0.0);
544        assert_eq!(start, p0);
545
546        let end = eval_cubic_bezier(p0, p1, p2, p3, 1.0);
547        assert_eq!(end, p3);
548
549        let mid = eval_cubic_bezier(p0, p1, p2, p3, 0.5);
550        assert!(mid.0 > 0.0 && mid.0 < 1.0);
551        assert!(mid.1 > 0.0 && mid.1 < 1.0);
552    }
553
554    #[test]
555    fn validate_ass_names() {
556        assert!(validate_ass_name("Default"));
557        assert!(validate_ass_name("MyStyle"));
558        assert!(validate_ass_name("Style with spaces"));
559
560        assert!(!validate_ass_name("")); // Empty
561        assert!(!validate_ass_name("Style,Name")); // Comma
562        assert!(!validate_ass_name("Style:Name")); // Colon
563        assert!(!validate_ass_name("Style{Name")); // Brace
564        assert!(!validate_ass_name("Style\nName")); // Control character
565    }
566
567    #[test]
568    fn normalize_field_values() {
569        assert_eq!(normalize_field_value("  value  "), "value");
570        assert_eq!(normalize_field_value("\tvalue\t"), "value");
571        assert_eq!(normalize_field_value("value"), "value");
572    }
573
574    #[test]
575    fn numeric_parsing() {
576        assert_eq!(parse_numeric::<i32>("42").unwrap(), 42);
577        assert!((parse_numeric::<f32>("3.15").unwrap() - 3.15).abs() < f32::EPSILON);
578        assert!(parse_numeric::<i32>("invalid").is_err());
579    }
580
581    #[test]
582    fn decode_uu_data_empty_input() {
583        let lines: Vec<&str> = vec![];
584        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
585        assert_eq!(decoded, Vec::<u8>::new());
586    }
587
588    #[test]
589    fn decode_uu_data_known_encoding() {
590        // Test known UU-encoded data: "Cat" -> "#0V%T"
591        let lines = ["#0V%T"];
592        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
593        assert_eq!(decoded, b"Cat");
594    }
595
596    #[test]
597    fn decode_uu_data_known_encoding_png() {
598        // Test known UU-encoded data: "PNG" -> "#4$Y'"
599        let lines = ["#4$Y'"];
600        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
601        assert_eq!(decoded, b"PNG");
602    }
603
604    #[test]
605    fn decode_uu_data_multiline() {
606        // Test multi-line UU-encoded data
607        let lines = ["#0V%T", "#0V%T"];
608        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
609        assert_eq!(decoded, b"CatCat");
610    }
611
612    #[test]
613    fn decode_uu_data_with_end_marker() {
614        let lines = ["#0V%T", "end"];
615        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
616        assert_eq!(decoded, b"Cat");
617    }
618
619    #[test]
620    fn decode_uu_data_with_end_marker_spaced() {
621        let lines = ["#0V%T", "end 644"];
622        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
623        assert_eq!(decoded, b"Cat");
624    }
625
626    #[test]
627    fn decode_uu_data_zero_length_line() {
628        // Zero-length line should terminate decoding
629        let lines = ["#0V%T", " "];
630        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
631        assert_eq!(decoded, b"Cat");
632    }
633
634    #[test]
635    fn decode_uu_data_whitespace_lines() {
636        let lines = ["  #0V%T  ", "\t", ""];
637        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
638        assert_eq!(decoded, b"Cat");
639    }
640
641    #[test]
642    fn decode_uu_data_length_validation() {
643        // Test that length encoding is respected
644        let lines = ["!    "]; // '!' encodes length 1, but provides 4 characters of data
645        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
646        assert_eq!(decoded.len(), 1); // Should be truncated to declared length
647    }
648
649    #[test]
650    fn decode_uu_data_partial_chunks() {
651        // Test handling of incomplete 4-character groups
652        let lines = ["\"``"]; // Only 3 characters after length byte
653        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
654        assert_eq!(decoded.len(), 2); // Should decode what's available
655    }
656
657    #[test]
658    fn decode_uu_data_large_line() {
659        // Test handling of max-length UU line (45 bytes -> 60 characters + length)
660        let line = format!("M{}", "!!!!".repeat(15)); // 45 bytes of data
661        let lines = [line.as_str()];
662        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
663        assert_eq!(decoded.len(), 45);
664    }
665
666    #[test]
667    fn decode_uu_data_mixed_content() {
668        let lines = [
669            "begin 644 test.txt", // Should be ignored
670            "#0V%T",              // Should be decoded
671            "| comment",          // Should be ignored as it doesn't start with valid length
672            "#4$Y'",              // Should be decoded
673            "end",                // Should terminate
674        ];
675        let decoded = decode_uu_data(lines.iter().copied()).unwrap();
676        assert_eq!(decoded, b"CatPNG");
677    }
678
679    #[test]
680    fn decode_uu_data_all_printable_chars() {
681        // Test that decoder handles all valid UU characters (space to underscore)
682        let lines = ["@ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"];
683        let _decoded = decode_uu_data(lines.iter().copied()).unwrap();
684        // Should not panic, exact output depends on UU encoding rules
685    }
686
687    #[test]
688    fn decode_uu_data_boundary_lengths() {
689        // Test boundary cases for line lengths
690        let single_byte = ["!   "]; // Length 1
691        let two_bytes = ["\"`` "]; // Length 2
692        let three_bytes = ["#```"]; // Length 3
693
694        let decoded1 = decode_uu_data(single_byte.iter().copied()).unwrap();
695        assert_eq!(decoded1.len(), 1);
696
697        let decoded2 = decode_uu_data(two_bytes.iter().copied()).unwrap();
698        assert_eq!(decoded2.len(), 2);
699
700        let decoded3 = decode_uu_data(three_bytes.iter().copied()).unwrap();
701        assert_eq!(decoded3.len(), 3);
702    }
703
704    #[test]
705    fn decode_uu_data_handles_invalid_gracefully() {
706        // Decoder should not panic on invalid characters
707        let lines = ["#\x01\x02\x03"]; // Non-printable characters
708        let _result = decode_uu_data(lines.iter().copied());
709        // Should not panic, may return unexpected data or error
710    }
711
712    #[test]
713    fn parse_bgr_color_edge_cases() {
714        // Test lowercase hex prefix
715        assert_eq!(parse_bgr_color("&h000000").unwrap(), [0, 0, 0, 0]);
716        assert_eq!(parse_bgr_color("&hFFFFFF").unwrap(), [255, 255, 255, 0]);
717
718        // Test 0x prefix
719        assert_eq!(parse_bgr_color("0x000000").unwrap(), [0, 0, 0, 0]);
720        assert_eq!(parse_bgr_color("0xFFFFFF").unwrap(), [255, 255, 255, 0]);
721
722        // Test plain hex without prefix
723        assert_eq!(parse_bgr_color("000000").unwrap(), [0, 0, 0, 0]);
724        assert_eq!(parse_bgr_color("FFFFFF").unwrap(), [255, 255, 255, 0]);
725
726        // Test with extra whitespace
727        assert_eq!(parse_bgr_color("  &H000000  ").unwrap(), [0, 0, 0, 0]);
728        assert_eq!(parse_bgr_color("\t&H000000\t").unwrap(), [0, 0, 0, 0]);
729
730        // Test with trailing ampersand variations
731        assert_eq!(parse_bgr_color("&H000000&").unwrap(), [0, 0, 0, 0]);
732        assert_eq!(parse_bgr_color("&h000000&").unwrap(), [0, 0, 0, 0]);
733
734        // Test mixed case hex digits
735        assert_eq!(parse_bgr_color("&HaAbBcC").unwrap(), [204, 187, 170, 0]);
736        assert_eq!(parse_bgr_color("&HFFaaBBcc").unwrap(), [204, 187, 170, 255]);
737
738        // Test invalid lengths
739        assert!(parse_bgr_color("&H00000").is_err()); // 5 chars
740        assert!(parse_bgr_color("&H0000000").is_err()); // 7 chars
741        assert!(parse_bgr_color("&H000000000").is_err()); // 9 chars
742
743        // Test invalid characters in hex
744        assert!(parse_bgr_color("&H00000G").is_err());
745        assert!(parse_bgr_color("&H00Z000").is_err());
746
747        // Test empty after prefix
748        assert!(parse_bgr_color("&H").is_err());
749        assert!(parse_bgr_color("0x").is_err());
750
751        // Test malformed prefixes
752        assert!(parse_bgr_color("&H000000X").is_err());
753        assert!(parse_bgr_color("X&H000000").is_err());
754    }
755
756    #[test]
757    fn spans_edge_cases() {
758        let source = "line1\nline2\nline3";
759        let spans = Spans::new(source);
760
761        // Test span validation with actual substrings
762        let line1 = &source[0..5]; // "line1"
763        let line2 = &source[6..11]; // "line2"
764        let line3 = &source[12..17]; // "line3"
765
766        assert!(spans.validate_span(line1));
767        assert!(spans.validate_span(line2));
768        assert!(spans.validate_span(line3));
769        assert!(spans.validate_span(source)); // Entire source
770
771        // Test span offset calculations
772        assert_eq!(spans.span_offset(line1), Some(0));
773        assert_eq!(spans.span_offset(line2), Some(6));
774        assert_eq!(spans.span_offset(line3), Some(12));
775
776        // Test line calculations
777        assert_eq!(spans.span_line(line1), Some(1));
778        assert_eq!(spans.span_line(line2), Some(2));
779        assert_eq!(spans.span_line(line3), Some(3));
780
781        // Test column calculations
782        assert_eq!(spans.span_column(line1), Some(1));
783        assert_eq!(spans.span_column(line2), Some(1));
784        assert_eq!(spans.span_column(line3), Some(1));
785
786        // Test substring extraction
787        assert_eq!(spans.substring(0..5), Some("line1"));
788        assert_eq!(spans.substring(6..11), Some("line2"));
789        assert_eq!(spans.substring(12..17), Some("line3"));
790        assert_eq!(spans.substring(0..source.len()), Some(source));
791
792        // Test invalid range
793        assert_eq!(spans.substring(0..100), None);
794    }
795
796    #[test]
797    fn parse_ass_time_edge_cases() {
798        // Test maximum valid values
799        assert!(parse_ass_time("23:59:59.99").is_ok());
800
801        // Test zero padding variations
802        assert_eq!(parse_ass_time("0:0:0.0").unwrap(), 0);
803        assert_eq!(parse_ass_time("0:00:00.0").unwrap(), 0);
804        assert_eq!(parse_ass_time("0:00:00.00").unwrap(), 0);
805
806        // Test missing components
807        assert!(parse_ass_time("0:00").is_err());
808        assert!(parse_ass_time("0").is_err());
809        assert!(parse_ass_time("").is_err());
810
811        // Test extra components
812        assert!(parse_ass_time("0:0:0:0.0").is_err());
813        // Note: parse_ass_time("0:0:0.0.0") actually succeeds by taking first decimal part
814        assert!(parse_ass_time("0:0:0.0.0").is_ok());
815
816        // Test negative values
817        assert!(parse_ass_time("-1:00:00.00").is_err());
818        assert!(parse_ass_time("0:-1:00.00").is_err());
819        assert!(parse_ass_time("0:00:-1.00").is_err());
820        assert!(parse_ass_time("0:00:00.-1").is_err());
821
822        // Test non-numeric values
823        assert!(parse_ass_time("a:00:00.00").is_err());
824        assert!(parse_ass_time("0:b:00.00").is_err());
825        assert!(parse_ass_time("0:00:c.00").is_err());
826        assert!(parse_ass_time("0:00:00.d").is_err());
827
828        // Test boundary values that should fail
829        assert!(parse_ass_time("0:60:00.00").is_err()); // 60 minutes
830        assert!(parse_ass_time("0:00:60.00").is_err()); // 60 seconds
831        assert!(parse_ass_time("0:00:00.100").is_err()); // 100 centiseconds
832    }
833
834    #[test]
835    fn format_ass_time_edge_cases() {
836        // Test very large values
837        assert_eq!(format_ass_time(u32::MAX), "11930:27:52.95");
838
839        // Test boundary values
840        assert_eq!(format_ass_time(99), "0:00:00.99");
841        assert_eq!(format_ass_time(5999), "0:00:59.99");
842        assert_eq!(format_ass_time(359_999), "0:59:59.99");
843
844        // Test values requiring padding
845        assert_eq!(format_ass_time(1), "0:00:00.01");
846        assert_eq!(format_ass_time(10), "0:00:00.10");
847        assert_eq!(format_ass_time(601), "0:00:06.01");
848        assert_eq!(format_ass_time(3661), "0:00:36.61");
849    }
850
851    #[test]
852    fn validate_ass_name_edge_cases() {
853        // Test with tab character (should be allowed)
854        assert!(validate_ass_name("Style\tName"));
855
856        // Test with various control characters (should be rejected)
857        assert!(!validate_ass_name("Style\nName")); // Newline
858        assert!(!validate_ass_name("Style\rName")); // Carriage return
859        assert!(!validate_ass_name("Style\x00Name")); // Null
860        assert!(!validate_ass_name("Style\x7FName")); // DEL
861
862        // Test edge cases with separators
863        assert!(!validate_ass_name(",Style")); // Leading comma
864        assert!(!validate_ass_name("Style,")); // Trailing comma
865        assert!(!validate_ass_name(":Style")); // Leading colon
866        assert!(!validate_ass_name("Style:")); // Trailing colon
867        assert!(!validate_ass_name("{Style")); // Leading brace
868        assert!(!validate_ass_name("Style}")); // Trailing brace
869
870        // Test very long names
871        let long_name = "a".repeat(1000);
872        assert!(validate_ass_name(&long_name));
873
874        // Test Unicode characters
875        assert!(validate_ass_name("Style中文"));
876        assert!(validate_ass_name("Style🎭"));
877        assert!(validate_ass_name("Стиль"));
878    }
879
880    #[test]
881    fn normalize_field_value_edge_cases() {
882        // Test empty string
883        assert_eq!(normalize_field_value(""), "");
884
885        // Test only whitespace
886        assert_eq!(normalize_field_value("   "), "");
887        assert_eq!(normalize_field_value("\t\t\t"), "");
888        assert_eq!(normalize_field_value(" \t \t "), "");
889
890        // Test mixed whitespace
891        assert_eq!(normalize_field_value(" \t value \t "), "value");
892        assert_eq!(normalize_field_value("\n\rvalue\n\r"), "value");
893
894        // Test internal whitespace preservation
895        assert_eq!(normalize_field_value("  val ue  "), "val ue");
896        assert_eq!(normalize_field_value("  val\tue  "), "val\tue");
897    }
898
899    #[test]
900    #[allow(clippy::float_cmp, clippy::approx_constant)]
901    fn parse_numeric_edge_cases() {
902        // Test boundary values for different types
903        assert_eq!(parse_numeric::<u8>("255").unwrap(), 255u8);
904        assert!(parse_numeric::<u8>("256").is_err());
905        assert_eq!(parse_numeric::<i8>("127").unwrap(), 127i8);
906        assert_eq!(parse_numeric::<i8>("-128").unwrap(), -128i8);
907        assert!(parse_numeric::<i8>("128").is_err());
908
909        // Test floating point edge cases
910        assert_eq!(parse_numeric::<f32>("0.0").unwrap(), 0.0f32);
911        assert_eq!(parse_numeric::<f32>("-0.0").unwrap(), -0.0f32);
912        assert!(parse_numeric::<f32>("inf").is_ok());
913        assert!(parse_numeric::<f32>("-inf").is_ok());
914
915        // Test whitespace handling
916        assert_eq!(parse_numeric::<i32>("  42  ").unwrap(), 42i32);
917        assert_eq!(parse_numeric::<f32>(" \t 3.14 \t ").unwrap(), 3.14f32);
918
919        // Test leading zeros
920        assert_eq!(parse_numeric::<i32>("00042").unwrap(), 42i32);
921        assert_eq!(parse_numeric::<f32>("0003.140").unwrap(), 3.14f32);
922
923        // Test scientific notation
924        assert_eq!(parse_numeric::<f32>("1e2").unwrap(), 100.0f32);
925        assert_eq!(parse_numeric::<f32>("1.5e-2").unwrap(), 0.015f32);
926
927        // Test invalid formats
928        assert!(parse_numeric::<i32>("").is_err());
929        assert!(parse_numeric::<i32>("abc").is_err());
930        assert!(parse_numeric::<i32>("12.34").is_err()); // Float for int
931        assert!(parse_numeric::<f32>("12.34.56").is_err()); // Multiple dots
932    }
933
934    #[test]
935    fn eval_cubic_bezier_edge_cases() {
936        // Test identical control points (linear case)
937        let linear_result = eval_cubic_bezier((0.0, 0.0), (0.0, 0.0), (1.0, 1.0), (1.0, 1.0), 0.5);
938        assert!((linear_result.0 - 0.5).abs() < f32::EPSILON);
939        assert!((linear_result.1 - 0.5).abs() < f32::EPSILON);
940
941        // Test extreme t values
942        let p0 = (0.0, 0.0);
943        let p1 = (0.25, 0.5);
944        let p2 = (0.75, 0.5);
945        let p3 = (1.0, 1.0);
946
947        // t = 0 should return p0
948        let result_0 = eval_cubic_bezier(p0, p1, p2, p3, 0.0);
949        assert!((result_0.0 - p0.0).abs() < f32::EPSILON);
950        assert!((result_0.1 - p0.1).abs() < f32::EPSILON);
951
952        // t = 1 should return p3
953        let result_1 = eval_cubic_bezier(p0, p1, p2, p3, 1.0);
954        assert!((result_1.0 - p3.0).abs() < f32::EPSILON);
955        assert!((result_1.1 - p3.1).abs() < f32::EPSILON);
956
957        // Test negative coordinates
958        let neg_result = eval_cubic_bezier((-1.0, -1.0), (-0.5, -0.5), (0.5, 0.5), (1.0, 1.0), 0.5);
959        assert!(neg_result.0 > -1.0 && neg_result.0 < 1.0);
960        assert!(neg_result.1 > -1.0 && neg_result.1 < 1.0);
961
962        // Test very small and very large coordinates
963        let large_result = eval_cubic_bezier(
964            (0.0, 0.0),
965            (1000.0, 1000.0),
966            (2000.0, 2000.0),
967            (3000.0, 3000.0),
968            0.5,
969        );
970        assert!(large_result.0 > 0.0 && large_result.0 < 3000.0);
971        assert!(large_result.1 > 0.0 && large_result.1 < 3000.0);
972    }
973
974    #[test]
975    fn decode_uu_data_error_conditions() {
976        // Test with only invalid lines
977        let invalid_lines = ["invalid", "also invalid", "still invalid"];
978        let result = decode_uu_data(invalid_lines.iter().copied()).unwrap();
979        assert!(result.is_empty());
980
981        // Test with malformed length indicators
982        let malformed_length = ["\x7F!!!!"]; // Length > 45
983        let _result = decode_uu_data(malformed_length.iter().copied());
984        // Should handle gracefully
985
986        // Test with very short lines after valid length
987        let short_lines = ["!"]; // Length 1 but no data
988        let result = decode_uu_data(short_lines.iter().copied()).unwrap();
989        assert!(result.is_empty() || result.len() <= 1);
990
991        // Test with unicode in data
992        let unicode_lines = ["#🎭🎭🎭"];
993        let _result = decode_uu_data(unicode_lines.iter().copied());
994        // Should handle gracefully without panicking
995    }
996}
ass_core/utils/mod.rs

ass_core/utils/
mod.rs