oxideav-ass 0.0.7

ASS/SSA subtitle codec + container for oxideav
Documentation
//! ASS parsing, writing, and round-trip.

use oxideav_ass as ass;
use oxideav_core::Segment;
use oxideav_subtitle::ir::plain_text;

const SAMPLE: &str = r"[Script Info]
; Authored by test
Title: Test Show
ScriptType: v4.00+
PlayResX: 384
PlayResY: 288
WrapStyle: 0

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Arial,20,&H00FFFFFF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,1
Style: Caption,Verdana,18,&H0000FFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,0,8,10,10,10,1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:01.00,0:00:03.00,Default,,0,0,0,,{\b1}Hello{\b0} world
Dialogue: 0,0:00:04.00,0:00:05.50,Caption,,0,0,0,,{\pos(100,200)}positioned, line with, commas
Dialogue: 0,0:00:06.00,0:00:08.00,Default,,0,0,0,,{\i1}line one{\i0}\Nline two
";

#[test]
fn parses_script_info_and_styles() {
    let t = ass::parse(SAMPLE.as_bytes()).unwrap();
    assert!(t
        .metadata
        .iter()
        .any(|(k, v)| k == "title" && v == "Test Show"));
    assert!(t
        .metadata
        .iter()
        .any(|(k, v)| k == "playresx" && v == "384"));
    let default = t.styles.iter().find(|s| s.name == "Default").unwrap();
    assert_eq!(default.font_size, Some(20.0));
    let caption = t.styles.iter().find(|s| s.name == "Caption").unwrap();
    assert!(caption.bold);
}

#[test]
fn parses_dialogue_and_overrides() {
    let t = ass::parse(SAMPLE.as_bytes()).unwrap();
    assert_eq!(t.cues.len(), 3);
    assert_eq!(t.cues[0].start_us, 1_000_000);
    assert_eq!(t.cues[0].end_us, 3_000_000);
    // First segment is Bold wrapping "Hello".
    match &t.cues[0].segments[0] {
        Segment::Bold(inner) => match &inner[0] {
            Segment::Text(s) => assert_eq!(s, "Hello"),
            other => panic!("expected text, got {other:?}"),
        },
        other => panic!("expected bold, got {other:?}"),
    }
}

#[test]
fn parses_pos_override() {
    let t = ass::parse(SAMPLE.as_bytes()).unwrap();
    let c1 = &t.cues[1];
    let pos = c1.positioning.as_ref().unwrap();
    assert_eq!(pos.x, Some(100.0));
    assert_eq!(pos.y, Some(200.0));
    // Commas in text preserved.
    let plain = plain_text(&c1.segments);
    assert!(plain.contains("line with, commas"), "got: {plain}");
}

#[test]
fn parses_linebreak() {
    let t = ass::parse(SAMPLE.as_bytes()).unwrap();
    let c2 = &t.cues[2];
    let mut saw_break = false;
    visit(&c2.segments, &mut |s| {
        if matches!(s, Segment::LineBreak) {
            saw_break = true;
        }
    });
    assert!(saw_break);
}

#[test]
fn unknown_override_preserved_alongside_known_ones() {
    // `\fad` is not interpreted; parser should preserve it in the re-emit.
    let src = r"[Script Info]
ScriptType: v4.00+

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,{\b1\fad(100,200)}hello
";
    let t = ass::parse(src.as_bytes()).unwrap();
    let out = String::from_utf8(ass::write(&t)).unwrap();
    assert!(out.contains("{\\b1}"), "bold override lost:\n{out}");
    assert!(out.contains("\\fad(100,200)"), "fad override lost:\n{out}");
    assert!(out.contains("hello"));
}

#[test]
fn reset_override_clears_inline_state() {
    let src = r"[Script Info]
ScriptType: v4.00+

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,{\b1}bold{\r}plain
";
    let t = ass::parse(src.as_bytes()).unwrap();
    // After \r the following text should not be wrapped in Bold.
    let segs = &t.cues[0].segments;
    let mut saw_plain_not_in_bold = false;
    for s in segs {
        if let Segment::Text(txt) = s {
            if txt.contains("plain") {
                saw_plain_not_in_bold = true;
            }
        }
    }
    assert!(
        saw_plain_not_in_bold,
        "expected `plain` as bare text: {segs:?}"
    );
}

#[test]
fn ssa_negative_bool_flag_parses_as_true() {
    // `-1` is SSA's "true"; `0` is false.
    let src = r"[Script Info]
ScriptType: v4.00

[V4 Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding
Style: Bolded,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,1,1,0,2,10,10,10,0,1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
Dialogue: 0,0:00:01.00,0:00:02.00,Bolded,,0,0,0,,hi
";
    let t = ass::parse(src.as_bytes()).unwrap();
    let s = t.styles.iter().find(|s| s.name == "Bolded").unwrap();
    assert!(s.bold);
    assert!(!s.italic);
}

#[test]
fn write_preserves_events_and_styles() {
    let t = ass::parse(SAMPLE.as_bytes()).unwrap();
    let out = String::from_utf8(ass::write(&t)).unwrap();
    // Events re-emitted.
    assert!(out.contains("[Events]"));
    assert!(out.contains("Dialogue: 0,0:00:01.00,0:00:03.00,Default"));
    assert!(out.contains("Style: Caption"));
    // Bold override re-emitted.
    assert!(out.contains("{\\b1}"));
    // Positioning preserved.
    assert!(out.contains("\\pos(100,200)"));

    // Reparsing yields same cue count + timings + style refs.
    let t2 = ass::parse(out.as_bytes()).unwrap();
    assert_eq!(t2.cues.len(), t.cues.len());
    for (a, b) in t.cues.iter().zip(t2.cues.iter()) {
        assert_eq!(a.start_us, b.start_us);
        assert_eq!(a.end_us, b.end_us);
        assert_eq!(a.style_ref, b.style_ref);
    }
}

#[test]
fn aegisub_project_garbage_section_round_trips() {
    // Aegisub injects an `[Aegisub Project Garbage]` section between
    // `[V4+ Styles]` and `[Events]` carrying editor state (last opened
    // video/audio, scroll position, last style storage). Pre-r75 we
    // dropped the section body silently, leaving a dangling header.
    // Round-trip must now preserve every body line verbatim.
    let src = "[Script Info]\n\
ScriptType: v4.00+\n\
\n\
[V4+ Styles]\n\
Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n\
Style: Default,Arial,20,&H00FFFFFF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,1\n\
\n\
[Aegisub Project Garbage]\n\
Last Style Storage: Default\n\
Audio File: ?dummy\n\
Video File: ?dummy\n\
Video AR Mode: 4\n\
Video AR Value: 1.777778\n\
Video Zoom Percent: 0.500000\n\
Scroll Position: 0\n\
Active Line: 0\n\
\n\
[Events]\n\
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,hello\n";
    let t = ass::parse(src.as_bytes()).unwrap();
    let out = String::from_utf8(ass::write(&t)).unwrap();
    // Every body line of the Aegisub section survives.
    for needle in [
        "[Aegisub Project Garbage]",
        "Last Style Storage: Default",
        "Audio File: ?dummy",
        "Video File: ?dummy",
        "Video AR Mode: 4",
        "Video AR Value: 1.777778",
        "Video Zoom Percent: 0.500000",
        "Scroll Position: 0",
        "Active Line: 0",
    ] {
        assert!(
            out.contains(needle),
            "lost `{}` on round-trip:\n{out}",
            needle
        );
    }
    // The dialogue line still made it through too.
    assert!(out.contains("Dialogue: 0,0:00:01.00,0:00:02.00,Default"));
    // Section header appears exactly once (no duplication from extradata
    // + writer header collision).
    assert_eq!(
        out.matches("[Aegisub Project Garbage]").count(),
        1,
        "section header duplicated:\n{out}"
    );
}

#[test]
fn multiple_unknown_sections_all_round_trip() {
    // libass/Aegisub-ecosystem files routinely carry several
    // editor-private sections. All must survive round-trip.
    let src = "[Script Info]\n\
ScriptType: v4.00+\n\
\n\
[Aegisub Project Garbage]\n\
Last Style Storage: Default\n\
\n\
[Aegisub Extradata]\n\
Data: 1,_aegi_perspective_ambient_plane,0;0|0;0|0;0|0;0\n\
\n\
[Events]\n\
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,first\n";
    let t = ass::parse(src.as_bytes()).unwrap();
    let out = String::from_utf8(ass::write(&t)).unwrap();
    assert!(out.contains("[Aegisub Project Garbage]"));
    assert!(out.contains("Last Style Storage: Default"));
    assert!(out.contains("[Aegisub Extradata]"));
    assert!(out.contains("_aegi_perspective_ambient_plane"));
    // And re-parsing the output yields the same cue count + content.
    let t2 = ass::parse(out.as_bytes()).unwrap();
    assert_eq!(t2.cues.len(), t.cues.len());
}

#[test]
fn fonts_section_uu_body_round_trips() {
    // UU-encoded font attachments must survive even though we don't
    // decode them. The original carriage is opaque to us but the bytes
    // must come back out verbatim so downstream consumers (e.g. an
    // Aegisub re-load) keep the embedded font available.
    let src = "[Script Info]\n\
ScriptType: v4.00+\n\
\n\
[Fonts]\n\
fontname: Demo_B.ttf\n\
M02AwIDQwMDAwMDAwIDA2MDAwMDA0NjQ4NjU2YzZjNkYwMzAxMDQyMDUwMjAyMDAwMDAw\n\
M02AwIDA0MDAwMDAwIDA3MDAwMDA0NjU2NDc0NjUyMjAyMDIwMjAyMDIwMjAyMDIwMjAw\n\
\n\
[Events]\n\
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\
Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,x\n";
    let t = ass::parse(src.as_bytes()).unwrap();
    let out = String::from_utf8(ass::write(&t)).unwrap();
    assert!(out.contains("[Fonts]"));
    assert!(out.contains("fontname: Demo_B.ttf"));
    assert!(out.contains("M02AwIDQwMDAwMDAwIDA2MDAwMDA0NjQ4NjU2YzZjNkYwMzAxMDQyMDUwMjAyMDAwMDAw"));
    assert!(out.contains("M02AwIDA0MDAwMDAwIDA3MDAwMDA0NjU2NDc0NjUyMjAyMDIwMjAyMDIwMjAyMDIwMjAw"));
}

fn visit<F: FnMut(&Segment)>(segs: &[Segment], f: &mut F) {
    for s in segs {
        f(s);
        match s {
            Segment::Bold(c) | Segment::Italic(c) | Segment::Underline(c) | Segment::Strike(c) => {
                visit(c, f)
            }
            Segment::Color { children, .. }
            | Segment::Font { children, .. }
            | Segment::Voice { children, .. }
            | Segment::Class { children, .. }
            | Segment::Karaoke { children, .. } => visit(children, f),
            _ => {}
        }
    }
}