Skip to main content

oxideav_ass/
lib.rs

1//! ASS/SSA subtitle codec + container for oxideav.
2//!
3//! This crate hosts the parser, writer, codec, and container for the
4//! Advanced SubStation Alpha (`.ass`) and SubStation Alpha (`.ssa`) text
5//! subtitle formats. It is a sibling to `oxideav-subtitle`, which hosts
6//! the "lightweight" text formats (SRT, WebVTT) and the shared subtitle
7//! IR re-exports.
8//!
9//! | Format  | Codec id | Container name | Extensions      |
10//! |---------|----------|----------------|-----------------|
11//! | ASS/SSA | `ass`    | `ass`          | `.ass`, `.ssa`  |
12//!
13//! The public API mirrors `oxideav-webp` / `oxideav-gif`:
14//!
15//! * [`register_codecs`] — add the ASS codec.
16//! * [`register_containers`] — add the ASS container + probe.
17//! * [`register`] — do both.
18//!
19//! Format-to-format converters between ASS and the SRT/WebVTT formats
20//! from `oxideav-subtitle` live in [`transform`].
21
22pub mod animate;
23pub mod codec;
24pub mod container;
25pub mod drawing;
26#[cfg(feature = "render")]
27pub mod render;
28pub mod transform;
29
30pub use animate::{
31    extract_cue_animation, parse_overrides, AnimatedTag, ClipRect, CueAnimation, KaraokeKind,
32    KaraokeSpan, RenderState,
33};
34pub use drawing::parse_drawing;
35#[cfg(feature = "render")]
36pub use render::{make_animated_decoder, AnimatedRenderedDecoder};
37
38use oxideav_core::ContainerRegistry;
39use oxideav_core::RuntimeContext;
40use oxideav_core::{CodecCapabilities, CodecId, MediaType};
41use oxideav_core::{CodecInfo, CodecRegistry};
42
43pub use transform::{ass_to_srt, ass_to_webvtt, srt_to_ass, webvtt_to_ass};
44
45// ---------------------------------------------------------------------------
46// Parser / writer (moved verbatim from oxideav-subtitle::ass).
47
48use oxideav_core::{CuePosition, Error, Result, Segment, SubtitleCue, SubtitleStyle, TextAlign};
49use oxideav_subtitle::ir::{SourceFormat, SubtitleTrack};
50
51pub fn parse(bytes: &[u8]) -> Result<SubtitleTrack> {
52    let text = decode_utf8_lossy_stripping_bom(bytes);
53    let mut track = SubtitleTrack {
54        source: Some(SourceFormat::AssOrSsa),
55        ..SubtitleTrack::default()
56    };
57
58    let mut current_section = String::new();
59    let mut style_format: Vec<String> = Vec::new();
60    let mut event_format: Vec<String> = Vec::new();
61    let mut is_ssa = false;
62
63    // extradata: collect everything up to (and including) the [Events] Format line
64    let mut extradata = String::new();
65
66    for line_raw in text.split('\n') {
67        let line = line_raw.trim_end_matches('\r');
68        let trimmed = line.trim();
69        if trimmed.is_empty() {
70            // Keep empty lines in extradata while we're still in header.
71            if !is_events_body(&current_section, &event_format) {
72                extradata.push_str(line);
73                extradata.push('\n');
74            }
75            continue;
76        }
77        if trimmed.starts_with(';') || trimmed.starts_with('!') {
78            // Comment (inside Script Info); preserve in extradata.
79            if !is_events_body(&current_section, &event_format) {
80                extradata.push_str(line);
81                extradata.push('\n');
82            }
83            continue;
84        }
85        if trimmed.starts_with('[') && trimmed.ends_with(']') {
86            current_section = trimmed[1..trimmed.len() - 1].to_ascii_lowercase();
87            if current_section == "v4 styles" {
88                is_ssa = true;
89            }
90            if !is_events_body(&current_section, &event_format) {
91                extradata.push_str(line);
92                extradata.push('\n');
93            }
94            continue;
95        }
96
97        match current_section.as_str() {
98            "script info" => {
99                if let Some((k, v)) = trimmed.split_once(':') {
100                    track.metadata.push((
101                        k.trim().to_ascii_lowercase().replace(' ', "_"),
102                        v.trim().to_string(),
103                    ));
104                }
105                extradata.push_str(line);
106                extradata.push('\n');
107            }
108            "v4+ styles" | "v4 styles" => {
109                extradata.push_str(line);
110                extradata.push('\n');
111                if let Some(rest) = strip_prefix_case(trimmed, "Format:") {
112                    style_format = rest.split(',').map(|s| s.trim().to_string()).collect();
113                } else if let Some(rest) = strip_prefix_case(trimmed, "Style:") {
114                    if let Some(style) = parse_style_line(rest, &style_format, is_ssa) {
115                        track.styles.push(style);
116                    }
117                }
118            }
119            "events" => {
120                if let Some(rest) = strip_prefix_case(trimmed, "Format:") {
121                    event_format = rest.split(',').map(|s| s.trim().to_string()).collect();
122                    extradata.push_str(line);
123                    extradata.push('\n');
124                } else if let Some(rest) = strip_prefix_case(trimmed, "Dialogue:") {
125                    if let Some(cue) = parse_event_line(rest, &event_format) {
126                        track.cues.push(cue);
127                    }
128                } else if let Some(_rest) = strip_prefix_case(trimmed, "Comment:") {
129                    // Ignore comment events.
130                } else {
131                    // Unknown event-section line — drop.
132                }
133            }
134            "fonts" | "graphics" => {
135                // UU-encoded attachment body. We don't decode the
136                // payload but we preserve it verbatim in `extradata`
137                // so a parse → write round-trip keeps the attachment
138                // intact rather than dropping it. The header line
139                // (`[Fonts]` / `[Graphics]`) was already pushed by
140                // the section-header branch above.
141                extradata.push_str(line);
142                extradata.push('\n');
143            }
144            _ => {
145                // Unknown section (e.g. `[Aegisub Project Garbage]`,
146                // `[Aegisub Extradata]`, `[Aegisub Style Storage]`,
147                // editor-specific `[Project]` / `[Garbage]` blocks).
148                // Preserve every body line verbatim so the writer can
149                // emit it back unchanged — dropping the body would
150                // leave a dangling section header and lose editor
151                // state. This is purely structural preservation; we
152                // do not attempt to interpret the keys.
153                extradata.push_str(line);
154                extradata.push('\n');
155            }
156        }
157    }
158
159    track.extradata = extradata.into_bytes();
160    Ok(track)
161}
162
163fn is_events_body(section: &str, event_format: &[String]) -> bool {
164    section == "events" && !event_format.is_empty()
165}
166
167fn strip_prefix_case<'a>(line: &'a str, prefix: &str) -> Option<&'a str> {
168    if line.len() < prefix.len() {
169        return None;
170    }
171    if line[..prefix.len()].eq_ignore_ascii_case(prefix) {
172        Some(line[prefix.len()..].trim_start())
173    } else {
174        None
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Style parsing
180
181fn parse_style_line(line: &str, fmt: &[String], is_ssa: bool) -> Option<SubtitleStyle> {
182    let fields: Vec<&str> = split_csv(line, fmt.len());
183    if fields.len() < fmt.len() {
184        return None;
185    }
186    let mut style = SubtitleStyle::default();
187    for (k, v) in fmt.iter().zip(fields.iter()) {
188        let key = k.to_ascii_lowercase().replace(' ', "");
189        let val = v.trim();
190        match key.as_str() {
191            "name" => style.name = val.to_string(),
192            "fontname" => style.font_family = Some(val.to_string()),
193            "fontsize" => style.font_size = val.parse().ok(),
194            "primarycolour" | "primarycolor" => {
195                style.primary_color = parse_ass_color(val);
196            }
197            "outlinecolour" | "outlinecolor" => {
198                style.outline_color = parse_ass_color(val);
199            }
200            "backcolour" | "backcolor" => {
201                style.back_color = parse_ass_color(val);
202            }
203            "bold" => style.bold = parse_bool_flag(val),
204            "italic" => style.italic = parse_bool_flag(val),
205            "underline" => style.underline = parse_bool_flag(val),
206            "strikeout" | "strikethrough" => style.strike = parse_bool_flag(val),
207            "alignment" => {
208                style.align = if is_ssa {
209                    ssa_alignment_to_textalign(val.parse().unwrap_or(2))
210                } else {
211                    ass_alignment_to_textalign(val.parse().unwrap_or(2))
212                };
213            }
214            "marginl" => style.margin_l = val.parse().ok(),
215            "marginr" => style.margin_r = val.parse().ok(),
216            "marginv" => style.margin_v = val.parse().ok(),
217            "outline" => style.outline = val.parse().ok(),
218            "shadow" => style.shadow = val.parse().ok(),
219            _ => {}
220        }
221    }
222    if style.name.is_empty() {
223        style.name = "Default".into();
224    }
225    Some(style)
226}
227
228/// ASS `\an<N>`: 1=bl, 2=bc, 3=br, 4=ml, 5=mc, 6=mr, 7=tl, 8=tc, 9=tr.
229fn ass_alignment_to_textalign(n: i32) -> TextAlign {
230    match n {
231        1 | 4 | 7 => TextAlign::Left,
232        2 | 5 | 8 => TextAlign::Center,
233        3 | 6 | 9 => TextAlign::Right,
234        _ => TextAlign::Center,
235    }
236}
237
238/// SSA alignment: low nibble = L/C/R (1/2/3), high bit = top/mid/bot.
239fn ssa_alignment_to_textalign(n: i32) -> TextAlign {
240    match n & 0x03 {
241        1 => TextAlign::Left,
242        3 => TextAlign::Right,
243        _ => TextAlign::Center,
244    }
245}
246
247fn parse_bool_flag(s: &str) -> bool {
248    let v: i32 = s.parse().unwrap_or(0);
249    v != 0
250}
251
252/// ASS colors: `&HAABBGGRR&` or `&HBBGGRR` (no alpha) or `&H...`.
253/// Return RGBA. Alpha in ASS is 00 = fully opaque, FF = fully transparent.
254fn parse_ass_color(s: &str) -> Option<(u8, u8, u8, u8)> {
255    let s = s.trim().trim_matches('&');
256    let s = s.trim_start_matches(['H', 'h']);
257    let s = s.trim_start_matches("0x");
258    // Trim trailing `&` or whitespace.
259    let s = s.trim_end_matches('&').trim();
260    if s.is_empty() {
261        return None;
262    }
263    // Parse as hex, pad to 8 chars.
264    let mut v: u32 = u32::from_str_radix(s, 16).ok()?;
265    let has_alpha = s.len() > 6;
266    if !has_alpha {
267        // Pad alpha to 00 (opaque).
268        v &= 0x00FF_FFFF;
269    }
270    let a = ((v >> 24) & 0xFF) as u8;
271    let b = ((v >> 16) & 0xFF) as u8;
272    let g = ((v >> 8) & 0xFF) as u8;
273    let r = (v & 0xFF) as u8;
274    // Invert ASS "transparency" to canonical alpha where 255 is opaque.
275    Some((r, g, b, 255_u8.saturating_sub(a)))
276}
277
278/// Split a comma-separated field list but **only into the first N-1 commas**;
279/// the tail is left whole (dialogue Text may contain commas).
280fn split_csv(line: &str, n: usize) -> Vec<&str> {
281    if n == 0 {
282        return vec![line];
283    }
284    let mut out: Vec<&str> = Vec::with_capacity(n);
285    let mut cursor = line;
286    for _ in 0..n - 1 {
287        if let Some(i) = cursor.find(',') {
288            out.push(&cursor[..i]);
289            cursor = &cursor[i + 1..];
290        } else {
291            out.push(cursor);
292            cursor = "";
293        }
294    }
295    out.push(cursor);
296    out
297}
298
299// ---------------------------------------------------------------------------
300// Event parsing
301
302fn parse_event_line(line: &str, fmt: &[String]) -> Option<SubtitleCue> {
303    if fmt.is_empty() {
304        return None;
305    }
306    let fields = split_csv(line, fmt.len());
307    if fields.len() < fmt.len() {
308        return None;
309    }
310    let mut start_us: i64 = 0;
311    let mut end_us: i64 = 0;
312    let mut style_ref: Option<String> = None;
313    let mut text: &str = "";
314    for (k, v) in fmt.iter().zip(fields.iter()) {
315        let key = k.to_ascii_lowercase();
316        let val = v.trim();
317        match key.as_str() {
318            "start" => start_us = parse_ass_timestamp(val).unwrap_or(0),
319            "end" => end_us = parse_ass_timestamp(val).unwrap_or(0),
320            "style" if !val.is_empty() => {
321                style_ref = Some(val.to_string());
322            }
323            "text" => text = v,
324            _ => {}
325        }
326    }
327    let (segments, positioning) = parse_ass_text(text);
328    Some(SubtitleCue {
329        start_us,
330        end_us,
331        style_ref,
332        positioning,
333        segments,
334    })
335}
336
337/// ASS timestamp: `H:MM:SS.cc` (centiseconds) — sometimes with extra digits.
338fn parse_ass_timestamp(s: &str) -> Option<i64> {
339    let (hms, frac) = match s.find('.') {
340        Some(i) => (&s[..i], &s[i + 1..]),
341        None => (s, "0"),
342    };
343    let parts: Vec<&str> = hms.split(':').collect();
344    let (h, m, sec) = match parts.len() {
345        3 => (
346            parts[0].parse::<u32>().ok()?,
347            parts[1].parse::<u32>().ok()?,
348            parts[2].parse::<u32>().ok()?,
349        ),
350        2 => (
351            0u32,
352            parts[0].parse::<u32>().ok()?,
353            parts[1].parse::<u32>().ok()?,
354        ),
355        _ => return None,
356    };
357    // `frac` is centiseconds (2 digits) but be robust to 1-3 digit forms.
358    let cs_str = if frac.len() > 2 { &frac[..2] } else { frac };
359    let cs: u32 = if cs_str.is_empty() {
360        0
361    } else {
362        cs_str.parse().ok()?
363    };
364    // Pad to 2 digits if only 1 was given.
365    let cs = if frac.len() == 1 { cs * 10 } else { cs };
366    Some(
367        (h as i64) * 3_600_000_000
368            + (m as i64) * 60_000_000
369            + (sec as i64) * 1_000_000
370            + (cs as i64) * 10_000,
371    )
372}
373
374fn format_ass_ts(us: i64) -> String {
375    let us = us.max(0);
376    let cs_total = us / 10_000;
377    let cs = (cs_total % 100) as u32;
378    let s_total = cs_total / 100;
379    let s = (s_total % 60) as u32;
380    let m = ((s_total / 60) % 60) as u32;
381    let h = (s_total / 3_600) as u32;
382    format!("{}:{:02}:{:02}.{:02}", h, m, s, cs)
383}
384
385// ---------------------------------------------------------------------------
386// ASS override-tag parser
387
388fn parse_ass_text(text: &str) -> (Vec<Segment>, Option<CuePosition>) {
389    let mut out: Vec<Segment> = Vec::new();
390    let mut state = AssState::default();
391    let mut positioning: Option<CuePosition> = None;
392
393    let mut cursor = 0;
394    let bytes = text.as_bytes();
395    let mut text_buf = String::new();
396
397    while cursor < bytes.len() {
398        if bytes[cursor] == b'{' {
399            // Flush accumulated text.
400            if !text_buf.is_empty() {
401                out.push(state.wrap(Segment::Text(std::mem::take(&mut text_buf))));
402            }
403            let end = match text[cursor..].find('}') {
404                Some(e) => cursor + e,
405                None => {
406                    text_buf.push('{');
407                    cursor += 1;
408                    continue;
409                }
410            };
411            let overrides = &text[cursor + 1..end];
412            handle_overrides(overrides, &mut state, &mut positioning, &mut out);
413            cursor = end + 1;
414            continue;
415        }
416        if bytes[cursor] == b'\\' && cursor + 1 < bytes.len() {
417            let c = bytes[cursor + 1] as char;
418            if c == 'N' {
419                if !text_buf.is_empty() {
420                    out.push(state.wrap(Segment::Text(std::mem::take(&mut text_buf))));
421                }
422                out.push(Segment::LineBreak);
423                cursor += 2;
424                continue;
425            }
426            if c == 'n' {
427                // Soft line break — treat like a space in text when word-wrap is on.
428                text_buf.push(' ');
429                cursor += 2;
430                continue;
431            }
432            if c == 'h' {
433                // Hard space.
434                text_buf.push('\u{00A0}');
435                cursor += 2;
436                continue;
437            }
438        }
439        text_buf.push(bytes[cursor] as char);
440        cursor += 1;
441    }
442    if !text_buf.is_empty() {
443        out.push(state.wrap(Segment::Text(text_buf)));
444    }
445
446    (out, positioning)
447}
448
449#[derive(Clone, Debug, Default)]
450struct AssState {
451    bold: bool,
452    italic: bool,
453    underline: bool,
454    strike: bool,
455    color: Option<(u8, u8, u8)>,
456    font_family: Option<String>,
457    font_size: Option<f32>,
458}
459
460impl AssState {
461    fn wrap(&self, seg: Segment) -> Segment {
462        let mut s = seg;
463        if self.bold {
464            s = Segment::Bold(vec![s]);
465        }
466        if self.italic {
467            s = Segment::Italic(vec![s]);
468        }
469        if self.underline {
470            s = Segment::Underline(vec![s]);
471        }
472        if self.strike {
473            s = Segment::Strike(vec![s]);
474        }
475        if let Some(rgb) = self.color {
476            s = Segment::Color {
477                rgb,
478                children: vec![s],
479            };
480        }
481        if self.font_family.is_some() || self.font_size.is_some() {
482            s = Segment::Font {
483                family: self.font_family.clone(),
484                size: self.font_size,
485                children: vec![s],
486            };
487        }
488        s
489    }
490}
491
492fn handle_overrides(
493    block: &str,
494    state: &mut AssState,
495    positioning: &mut Option<CuePosition>,
496    out: &mut Vec<Segment>,
497) {
498    // Walk the block splitting on backslashes. Override names are alphabetic,
499    // or a small digit followed by letters (e.g. `1c`, `2c`, `3c`, `4a`).
500    // Parameters are either parenthesised (may contain commas — `\pos(x,y)`,
501    // `\move(...)`, `\clip(...)`, `\t(...)`) or end at the next `\`.
502    let mut i = 0;
503    let bytes = block.as_bytes();
504    // Skip leading whitespace.
505    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
506        i += 1;
507    }
508    // Overrides we did not consume into state get re-emitted verbatim so
509    // encode-side round-trip keeps them (`{\b1\fad(100,100)}` preserves the
510    // fade even though we only interpret the `b1`).
511    let mut passthrough = String::new();
512    while i < bytes.len() {
513        if bytes[i] != b'\\' {
514            i += 1;
515            continue;
516        }
517        let tag_start = i;
518        i += 1;
519        let start = i;
520        if i < bytes.len() && bytes[i].is_ascii_digit() {
521            i += 1;
522            while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
523                i += 1;
524            }
525        } else {
526            while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
527                i += 1;
528            }
529        }
530        let name = &block[start..i];
531        let param_start = i;
532        let param = if i < bytes.len() && bytes[i] == b'(' {
533            let end = match block[i..].find(')') {
534                Some(e) => i + e,
535                None => block.len(),
536            };
537            let p = &block[i + 1..end];
538            i = (end + 1).min(block.len());
539            p.to_string()
540        } else {
541            while i < bytes.len() && bytes[i] != b'\\' {
542                i += 1;
543            }
544            block[param_start..i].to_string()
545        };
546        let tag_end = i;
547        let name_lc = name.to_ascii_lowercase();
548        let understood = match name_lc.as_str() {
549            "b" => {
550                state.bold = parse_bool_flag(&param);
551                true
552            }
553            "i" => {
554                state.italic = parse_bool_flag(&param);
555                true
556            }
557            "u" => {
558                state.underline = parse_bool_flag(&param);
559                true
560            }
561            "s" => {
562                state.strike = parse_bool_flag(&param);
563                true
564            }
565            "c" | "1c" => {
566                if let Some((r, g, b, _)) = parse_ass_color(&param) {
567                    state.color = Some((r, g, b));
568                }
569                true
570            }
571            "fn" => {
572                state.font_family = Some(param.trim().to_string());
573                true
574            }
575            "fs" => {
576                state.font_size = param.trim().parse().ok();
577                true
578            }
579            "pos" => {
580                let parts: Vec<&str> = param.split(',').map(|s| s.trim()).collect();
581                if parts.len() == 2 {
582                    let cp = positioning.get_or_insert_with(CuePosition::default);
583                    cp.x = parts[0].parse().ok();
584                    cp.y = parts[1].parse().ok();
585                    true
586                } else {
587                    false
588                }
589            }
590            "an" => {
591                // Set cue-level horizontal alignment so the typed
592                // CuePosition path stays in sync, but also fall
593                // through to the passthrough so the animate module
594                // can surface the full 1..=9 numpad value on
595                // RenderState::alignment (the cp.align field only
596                // captures left/center/right and loses the
597                // top/middle/bottom row). Round-trip writes the
598                // tag back verbatim via Segment::Raw.
599                let n: i32 = param.trim().parse().unwrap_or(2);
600                let cp = positioning.get_or_insert_with(CuePosition::default);
601                cp.align = ass_alignment_to_textalign(n);
602                false
603            }
604            "a" => {
605                // Legacy SubStation-Alpha alignment code (still seen
606                // in `.ssa` and older `.ass` files). Apply to the
607                // cue-level horizontal alignment using the SSA-style
608                // mapping (low nibble = L/C/R), then fall through to
609                // passthrough so the animate module can surface the
610                // numpad-converted alignment on RenderState.
611                let n: i32 = param.trim().parse().unwrap_or(2);
612                let cp = positioning.get_or_insert_with(CuePosition::default);
613                cp.align = ssa_alignment_to_textalign(n);
614                false
615            }
616            "k" | "kf" | "ko" => {
617                let cs: u32 = param.trim().parse().unwrap_or(0);
618                // Emit an empty karaoke segment as a timing marker; a full
619                // renderer would group the following text chunk into its
620                // children. Consumers that care about karaoke can walk the
621                // stream and pair markers with the text that follows.
622                out.push(Segment::Karaoke {
623                    cs,
624                    children: Vec::new(),
625                });
626                true
627            }
628            "r" => {
629                // Style reset — clear inline overrides so subsequent text
630                // falls back to the cue's base style.
631                *state = AssState::default();
632                true
633            }
634            _ => false,
635        };
636        if !understood {
637            passthrough.push_str(&block[tag_start..tag_end]);
638        }
639    }
640    if !passthrough.is_empty() {
641        out.push(Segment::Raw(format!("{{{}}}", passthrough)));
642    }
643}
644
645// ---------------------------------------------------------------------------
646// Writer
647
648pub fn write(track: &SubtitleTrack) -> Vec<u8> {
649    // Re-use extradata if present (keeps the user's original script-info
650    // and style rows intact). Otherwise synthesise a minimal header.
651    let mut out = String::new();
652    if !track.extradata.is_empty() {
653        out.push_str(&String::from_utf8_lossy(&track.extradata));
654        if !out.ends_with('\n') {
655            out.push('\n');
656        }
657    } else {
658        out.push_str("[Script Info]\n");
659        out.push_str("ScriptType: v4.00+\n");
660        for (k, v) in &track.metadata {
661            let cap = capitalise_key(k);
662            out.push_str(&format!("{}: {}\n", cap, v));
663        }
664        out.push('\n');
665        out.push_str("[V4+ Styles]\n");
666        out.push_str("Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, Alignment, MarginL, MarginR, MarginV, Outline, Shadow\n");
667        let has_default = track.styles.iter().any(|s| s.name == "Default");
668        if !has_default {
669            out.push_str(&default_style_line());
670        }
671        for s in &track.styles {
672            out.push_str(&style_row(s));
673        }
674        out.push('\n');
675        out.push_str("[Events]\n");
676        out.push_str(
677            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
678        );
679    }
680    for cue in &track.cues {
681        let txt = render_event_text(cue);
682        let style = cue.style_ref.clone().unwrap_or_else(|| "Default".into());
683        out.push_str(&format!(
684            "Dialogue: 0,{},{},{},,0,0,0,,{}\n",
685            format_ass_ts(cue.start_us),
686            format_ass_ts(cue.end_us),
687            style,
688            txt
689        ));
690    }
691    out.into_bytes()
692}
693
694fn capitalise_key(k: &str) -> String {
695    // Convert `play_res_x` -> `PlayResX`. Rough heuristic.
696    k.split('_')
697        .map(|part| {
698            let mut chars = part.chars();
699            match chars.next() {
700                Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
701                None => String::new(),
702            }
703        })
704        .collect::<Vec<_>>()
705        .join("")
706}
707
708fn default_style_line() -> String {
709    "Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,2,10,10,10,1,0\n"
710        .into()
711}
712
713fn style_row(s: &SubtitleStyle) -> String {
714    let col = s
715        .primary_color
716        .map(|(r, g, b, a)| format_ass_color(r, g, b, a))
717        .unwrap_or_else(|| "&H00FFFFFF".into());
718    let outline = s
719        .outline_color
720        .map(|(r, g, b, a)| format_ass_color(r, g, b, a))
721        .unwrap_or_else(|| "&H00000000".into());
722    let back = s
723        .back_color
724        .map(|(r, g, b, a)| format_ass_color(r, g, b, a))
725        .unwrap_or_else(|| "&H00000000".into());
726    let fn_ = s.font_family.clone().unwrap_or_else(|| "Arial".into());
727    let fs = s.font_size.unwrap_or(20.0);
728    let align = match s.align {
729        TextAlign::Left | TextAlign::Start => 1,
730        TextAlign::Center => 2,
731        TextAlign::Right | TextAlign::End => 3,
732    };
733    format!(
734        "Style: {},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n",
735        s.name,
736        fn_,
737        fs,
738        col,
739        outline,
740        back,
741        s.bold as u8,
742        s.italic as u8,
743        s.underline as u8,
744        s.strike as u8,
745        align,
746        s.margin_l.unwrap_or(10),
747        s.margin_r.unwrap_or(10),
748        s.margin_v.unwrap_or(10),
749        s.outline.unwrap_or(1.0),
750        s.shadow.unwrap_or(0.0),
751    )
752}
753
754fn format_ass_color(r: u8, g: u8, b: u8, a: u8) -> String {
755    // Invert our alpha (255=opaque) back to ASS transparency (00=opaque).
756    let inv_a = 255_u8.saturating_sub(a);
757    format!("&H{:02X}{:02X}{:02X}{:02X}", inv_a, b, g, r)
758}
759
760fn render_event_text(cue: &SubtitleCue) -> String {
761    let mut out = String::new();
762    if let Some(p) = &cue.positioning {
763        if let (Some(x), Some(y)) = (p.x, p.y) {
764            out.push_str(&format!("{{\\pos({},{})}}", x as i32, y as i32));
765        }
766    }
767    append_ass_segments(&cue.segments, &mut out);
768    out
769}
770
771fn append_ass_segments(segments: &[Segment], out: &mut String) {
772    for seg in segments {
773        match seg {
774            Segment::Text(s) => {
775                // Escape `{`, `}`, and newlines.
776                for c in s.chars() {
777                    match c {
778                        '\n' => out.push_str("\\N"),
779                        '{' | '}' => out.push(c),
780                        _ => out.push(c),
781                    }
782                }
783            }
784            Segment::LineBreak => out.push_str("\\N"),
785            Segment::Bold(c) => {
786                out.push_str("{\\b1}");
787                append_ass_segments(c, out);
788                out.push_str("{\\b0}");
789            }
790            Segment::Italic(c) => {
791                out.push_str("{\\i1}");
792                append_ass_segments(c, out);
793                out.push_str("{\\i0}");
794            }
795            Segment::Underline(c) => {
796                out.push_str("{\\u1}");
797                append_ass_segments(c, out);
798                out.push_str("{\\u0}");
799            }
800            Segment::Strike(c) => {
801                out.push_str("{\\s1}");
802                append_ass_segments(c, out);
803                out.push_str("{\\s0}");
804            }
805            Segment::Color { rgb, children } => {
806                out.push_str(&format!(
807                    "{{\\c&H{:02X}{:02X}{:02X}&}}",
808                    rgb.2, rgb.1, rgb.0
809                ));
810                append_ass_segments(children, out);
811                out.push_str("{\\c}");
812            }
813            Segment::Font {
814                family,
815                size,
816                children,
817            } => {
818                if let Some(fam) = family {
819                    out.push_str(&format!("{{\\fn{}}}", fam));
820                }
821                if let Some(sz) = size {
822                    out.push_str(&format!("{{\\fs{}}}", sz));
823                }
824                append_ass_segments(children, out);
825            }
826            Segment::Voice { children, .. } | Segment::Class { children, .. } => {
827                append_ass_segments(children, out);
828            }
829            Segment::Karaoke { cs, children } => {
830                out.push_str(&format!("{{\\k{}}}", cs));
831                append_ass_segments(children, out);
832            }
833            Segment::Timestamp { .. } => {}
834            Segment::Raw(s) => out.push_str(s),
835        }
836    }
837}
838
839fn decode_utf8_lossy_stripping_bom(bytes: &[u8]) -> String {
840    let stripped = if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
841        &bytes[3..]
842    } else {
843        bytes
844    };
845    String::from_utf8_lossy(stripped).into_owned()
846}
847
848pub(crate) fn looks_like_ass(buf: &[u8]) -> bool {
849    let text = decode_utf8_lossy_stripping_bom(buf);
850    // Look at the first 2048 chars for `[Script Info]` (case-insensitive).
851    let head: String = text.chars().take(2048).collect();
852    let head_lc = head.to_ascii_lowercase();
853    head_lc.contains("[script info]")
854}
855
856/// Serialise one cue to a single `Dialogue:` line. Public alias of
857/// the crate-private `cue_to_bytes` so integration tests + external
858/// drivers can build packets without going through the demuxer.
859pub fn cue_to_bytes_pub(cue: &SubtitleCue) -> Vec<u8> {
860    cue_to_bytes(cue)
861}
862
863/// Serialise one cue for single-packet container emission.
864pub(crate) fn cue_to_bytes(cue: &SubtitleCue) -> Vec<u8> {
865    let style = cue.style_ref.clone().unwrap_or_else(|| "Default".into());
866    let txt = render_event_text(cue);
867    let line = format!(
868        "Dialogue: 0,{},{},{},,0,0,0,,{}",
869        format_ass_ts(cue.start_us),
870        format_ass_ts(cue.end_us),
871        style,
872        txt
873    );
874    line.into_bytes()
875}
876
877pub(crate) fn bytes_to_cue(bytes: &[u8]) -> Result<SubtitleCue> {
878    let text = decode_utf8_lossy_stripping_bom(bytes);
879    let line = text
880        .lines()
881        .find(|l| !l.trim().is_empty())
882        .ok_or_else(|| Error::invalid("ASS: empty cue"))?;
883    let rest = strip_prefix_case(line.trim(), "Dialogue:")
884        .ok_or_else(|| Error::invalid("ASS: cue missing Dialogue prefix"))?;
885    // Use the default format ordering — Layer,Start,End,Style,Name,ML,MR,MV,Effect,Text
886    let fmt = [
887        "Layer", "Start", "End", "Style", "Name", "MarginL", "MarginR", "MarginV", "Effect", "Text",
888    ]
889    .iter()
890    .map(|s| s.to_string())
891    .collect::<Vec<_>>();
892    parse_event_line(rest, &fmt).ok_or_else(|| Error::invalid("ASS: bad Dialogue line"))
893}
894
895// ---------------------------------------------------------------------------
896// Registration entry points
897
898/// Register the ASS codec (decoder + encoder).
899pub fn register_codecs(reg: &mut CodecRegistry) {
900    let caps = CodecCapabilities {
901        decode: true,
902        encode: true,
903        media_type: MediaType::Subtitle,
904        intra_only: true,
905        lossy: false,
906        lossless: true,
907        hardware_accelerated: false,
908        implementation: "ass_sw".into(),
909        max_width: None,
910        max_height: None,
911        max_bitrate: None,
912        max_sample_rate: None,
913        max_channels: None,
914        priority: 100,
915        accepted_pixel_formats: Vec::new(),
916    };
917    reg.register(
918        CodecInfo::new(CodecId::new(codec::ASS_CODEC_ID))
919            .capabilities(caps)
920            .decoder(codec::make_decoder)
921            .encoder(codec::make_encoder),
922    );
923}
924
925/// Register the ASS container (demuxer + muxer + probe).
926pub fn register_containers(reg: &mut ContainerRegistry) {
927    container::register(reg);
928}
929
930/// Unified registration entry point — installs the ASS codec into the
931/// codec sub-registry and the ASS container into the container
932/// sub-registry of the supplied [`RuntimeContext`].
933///
934/// Also wired into [`oxideav_meta::register_all`] via the
935/// [`oxideav_core::register!`] macro below.
936pub fn register(ctx: &mut RuntimeContext) {
937    register_codecs(&mut ctx.codecs);
938    register_containers(&mut ctx.containers);
939}
940
941oxideav_core::register!("ass", register);
942
943#[cfg(test)]
944mod register_tests {
945    use super::*;
946
947    #[test]
948    fn register_via_runtime_context_installs_both_sides() {
949        let mut ctx = RuntimeContext::new();
950        register(&mut ctx);
951        let id = CodecId::new(codec::ASS_CODEC_ID);
952        assert!(
953            ctx.codecs.has_decoder(&id),
954            "ASS decoder factory not installed via RuntimeContext"
955        );
956        assert!(
957            ctx.codecs.has_encoder(&id),
958            "ASS encoder factory not installed via RuntimeContext"
959        );
960        assert_eq!(
961            ctx.containers.container_for_extension("ass"),
962            Some("ass"),
963            "ASS container extension not installed via RuntimeContext"
964        );
965    }
966}
967
968#[cfg(test)]
969mod tests {
970    use super::*;
971
972    const SAMPLE: &str = r"[Script Info]
973Title: Test
974ScriptType: v4.00+
975PlayResX: 384
976PlayResY: 288
977
978[V4+ Styles]
979Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
980Style: Default,Arial,20,&H00FFFFFF,&H00000000,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,1
981
982[Events]
983Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
984Dialogue: 0,0:00:01.00,0:00:03.00,Default,,0,0,0,,{\b1}Hello{\b0} world
985";
986
987    #[test]
988    fn parse_sample() {
989        let t = parse(SAMPLE.as_bytes()).unwrap();
990        assert_eq!(t.cues.len(), 1);
991        assert_eq!(t.cues[0].start_us, 1_000_000);
992        assert_eq!(t.cues[0].end_us, 3_000_000);
993        assert_eq!(t.cues[0].style_ref.as_deref(), Some("Default"));
994        assert!(t.styles.iter().any(|s| s.name == "Default"));
995    }
996
997    #[test]
998    fn parse_override() {
999        let t = parse(SAMPLE.as_bytes()).unwrap();
1000        // First segment should be Bold wrapping "Hello".
1001        let s0 = &t.cues[0].segments[0];
1002        match s0 {
1003            Segment::Bold(inner) => match &inner[0] {
1004                Segment::Text(s) => assert_eq!(s, "Hello"),
1005                other => panic!("expected text in bold, got {other:?}"),
1006            },
1007            other => panic!("expected bold, got {other:?}"),
1008        }
1009    }
1010
1011    #[test]
1012    fn ass_color_parse() {
1013        // &H00FF0000 → alpha 00 (opaque), B=FF, G=00, R=00 → blue opaque
1014        let c = parse_ass_color("&H00FF0000").unwrap();
1015        assert_eq!(c, (0, 0, 255, 255));
1016    }
1017
1018    #[test]
1019    fn ass_timestamp() {
1020        let t = parse_ass_timestamp("0:00:01.50").unwrap();
1021        assert_eq!(t, 1_500_000);
1022    }
1023
1024    #[test]
1025    fn looks_like() {
1026        assert!(looks_like_ass(SAMPLE.as_bytes()));
1027        assert!(!looks_like_ass(b"WEBVTT\n"));
1028    }
1029
1030    #[test]
1031    fn unknown_section_body_round_trips_via_extradata() {
1032        // Lines inside a section we don't model (Aegisub editor state,
1033        // user-private blocks) must survive parse + write. Pre-r75 the
1034        // body was dropped and only the section header remained,
1035        // leaving a dangling header in the writer's output.
1036        let src = "[Script Info]\n\
1037ScriptType: v4.00+\n\
1038\n\
1039[Aegisub Project Garbage]\n\
1040Last Style Storage: Default\n\
1041Video Zoom Percent: 0.500000\n\
1042\n\
1043[Events]\n\
1044Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
1045Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,hi\n";
1046        let t = parse(src.as_bytes()).unwrap();
1047        let out = String::from_utf8(write(&t)).unwrap();
1048        assert!(
1049            out.contains("[Aegisub Project Garbage]"),
1050            "section header lost:\n{out}"
1051        );
1052        assert!(
1053            out.contains("Last Style Storage: Default"),
1054            "body line 1 lost:\n{out}"
1055        );
1056        assert!(
1057            out.contains("Video Zoom Percent: 0.500000"),
1058            "body line 2 lost:\n{out}"
1059        );
1060        // No duplication.
1061        assert_eq!(out.matches("[Aegisub Project Garbage]").count(), 1);
1062        assert_eq!(out.matches("Last Style Storage: Default").count(), 1);
1063    }
1064
1065    #[test]
1066    fn fonts_section_body_round_trips() {
1067        // UU-encoded font payload — opaque to us but the bytes must
1068        // survive verbatim so downstream re-loads still have the
1069        // embedded glyphs.
1070        let src = "[Script Info]\n\
1071ScriptType: v4.00+\n\
1072\n\
1073[Fonts]\n\
1074fontname: Demo_B.ttf\n\
1075M0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=\n\
1076\n\
1077[Events]\n\
1078Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
1079Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,x\n";
1080        let t = parse(src.as_bytes()).unwrap();
1081        let out = String::from_utf8(write(&t)).unwrap();
1082        assert!(out.contains("fontname: Demo_B.ttf"));
1083        assert!(out.contains("M0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz="));
1084    }
1085
1086    #[test]
1087    fn unknown_section_round_trip_is_reparseable() {
1088        // A round-tripped script (with preserved Aegisub section) must
1089        // re-parse to an identical cue list. This catches accidental
1090        // header/body interleaving bugs.
1091        let src = "[Script Info]\n\
1092ScriptType: v4.00+\n\
1093\n\
1094[Aegisub Project Garbage]\n\
1095Last Style Storage: Default\n\
1096Audio File: ?dummy\n\
1097\n\
1098[Aegisub Extradata]\n\
1099Data: 1,_aegi_perspective_ambient_plane,0;0|0;0|0;0|0;0\n\
1100\n\
1101[V4+ Styles]\n\
1102Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n\
1103Style: Default,Arial,20,&H00FFFFFF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,1\n\
1104\n\
1105[Events]\n\
1106Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
1107Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,first\n\
1108Dialogue: 0,0:00:03.00,0:00:04.00,Default,,0,0,0,,second\n";
1109        let t1 = parse(src.as_bytes()).unwrap();
1110        let out = String::from_utf8(write(&t1)).unwrap();
1111        let t2 = parse(out.as_bytes()).unwrap();
1112        assert_eq!(t1.cues.len(), t2.cues.len());
1113        for (a, b) in t1.cues.iter().zip(t2.cues.iter()) {
1114            assert_eq!(a.start_us, b.start_us);
1115            assert_eq!(a.end_us, b.end_us);
1116            assert_eq!(a.style_ref, b.style_ref);
1117        }
1118        // Body preserved on the re-parse → re-emit cycle too.
1119        let out2 = String::from_utf8(write(&t2)).unwrap();
1120        assert!(out2.contains("Last Style Storage: Default"));
1121        assert!(out2.contains("_aegi_perspective_ambient_plane"));
1122    }
1123}