Skip to main content

oxideav_ass/
render.rs

1//! Animated subtitle decoder: wraps a base ASS subtitle decoder and
2//! emits rasterised RGBA `Frame::Video`s sampled at a caller-controlled
3//! moment in cue-local time.
4//!
5//! This decoder closes the gap between the static
6//! [`oxideav_subtitle::RenderedSubtitleDecoder`] (one frame per cue) and
7//! the time-varying state produced by
8//! [`crate::CueAnimation::evaluate_at`]: callers can step the
9//! `eval_offset_ms` field between calls to get a series of frames that
10//! reflect the `\t` / `\fad` / `\move` / `\frx` / `\fry` / `\frz` /
11//! `\clip` overrides at successive moments in the cue's lifetime.
12//!
13//! Pipeline (per `receive_frame`):
14//!
15//! 1. Pull the next [`SubtitleCue`] from the wrapped inner decoder.
16//! 2. [`crate::extract_cue_animation`] the cue.
17//! 3. Sample the animation at `(cue.start + eval_offset_ms)` clamped
18//!    into the cue's `[start, end]` lifetime.
19//! 4. Build a [`oxideav_core::VectorFrame`] containing the cue's
20//!    shaped glyph nodes (via the supplied [`oxideav_scribe::FaceChain`])
21//!    placed line-by-line, then wrap them in a `Group` whose:
22//!    - `transform` composes the animation's `move` ∘ pivoted
23//!      `\frx`/`\fry`/`\frz` ∘ `\fscx`/`\fscy` matrix (3D rotations
24//!      reduced to a 2D affine via a small-angle approximation around
25//!      the pivot, so the renderer stays purely 2D);
26//!    - `opacity` is `RenderState::alpha_mul`;
27//!    - `clip` is the `\clip(rect)` rectangle path or, if
28//!      `\clip(drawing)` is active, the drawing-path parsed by
29//!      [`crate::drawing::parse_drawing`].
30//! 5. Rasterise via [`oxideav_raster::Renderer`].
31//!
32//! The returned `Frame::Video` carries the cue's `start_us` as PTS.
33
34use std::collections::VecDeque;
35
36use oxideav_core::{
37    CodecId, Decoder, Frame, Group, Node, Packet, Paint, Path, PathNode, Point, Result,
38    Rgba as CoreRgba, Segment, SubtitleCue, TextAlign, TimeBase, Transform2D, VectorFrame,
39    VideoFrame, VideoPlane,
40};
41use oxideav_scribe::{FaceChain, Shaper};
42
43use crate::animate::{ClipRect, RenderState};
44use crate::{drawing, extract_cue_animation};
45
46/// Animated subtitle decoder. See module docs.
47pub struct AnimatedRenderedDecoder {
48    inner: Box<dyn Decoder>,
49    codec_id: CodecId,
50    width: u32,
51    height: u32,
52    face: FaceChain,
53    /// Pre-cue queue holding the decoded cue + its extracted animation
54    /// so multiple `receive_frame` calls at different `eval_offset_ms`
55    /// settings reuse the same cue without re-pulling it.
56    queue: VecDeque<CachedCue>,
57    /// Offset from the current cue's `start_us`, in milliseconds, at
58    /// which to sample the animation on the next `receive_frame` call.
59    /// Defaults to `0` (cue start). Set via [`Self::set_offset_ms`].
60    eval_offset_ms: i32,
61    /// Default text colour when no `\c` override is active.
62    pub default_color: [u8; 4],
63    /// Nominal font size in pixels passed to the shaper.
64    pub font_size_px: f32,
65    /// Pixel margin between the canvas edge and the text bounding box.
66    pub side_margin_px: u32,
67    /// Pixel margin between the canvas bottom and the lowest baseline.
68    pub bottom_margin_px: u32,
69}
70
71/// One decoded cue + its lazily-evaluated animation.
72struct CachedCue {
73    cue: SubtitleCue,
74}
75
76impl AnimatedRenderedDecoder {
77    /// Build a new `AnimatedRenderedDecoder` wrapping `inner` and
78    /// rendering at `width × height` using `face`.
79    pub fn new(inner: Box<dyn Decoder>, width: u32, height: u32, face: FaceChain) -> Self {
80        let codec_id = inner.codec_id().clone();
81        Self {
82            inner,
83            codec_id,
84            width,
85            height,
86            face,
87            queue: VecDeque::new(),
88            eval_offset_ms: 0,
89            default_color: [255, 255, 255, 255],
90            font_size_px: 24.0,
91            side_margin_px: 8,
92            bottom_margin_px: 24,
93        }
94    }
95
96    /// Set the cue-relative time at which the *next* `receive_frame`
97    /// call will sample the animation. Subsequent calls keep this
98    /// offset until it's changed.
99    pub fn set_offset_ms(&mut self, offset_ms: i32) {
100        self.eval_offset_ms = offset_ms;
101    }
102
103    /// Current sampling offset in cue-relative milliseconds.
104    pub fn offset_ms(&self) -> i32 {
105        self.eval_offset_ms
106    }
107}
108
109impl Decoder for AnimatedRenderedDecoder {
110    fn codec_id(&self) -> &CodecId {
111        &self.codec_id
112    }
113
114    fn send_packet(&mut self, packet: &Packet) -> Result<()> {
115        self.inner.send_packet(packet)
116    }
117
118    fn receive_frame(&mut self) -> Result<Frame> {
119        // Top up the queue.
120        if self.queue.is_empty() {
121            match self.inner.receive_frame()? {
122                Frame::Subtitle(c) => self.queue.push_back(CachedCue { cue: c }),
123                other => return Ok(other),
124            }
125        }
126        let entry = self.queue.front().expect("queue non-empty");
127        let cue = &entry.cue;
128        let dur_ms = ((cue.end_us - cue.start_us) / 1000).max(0) as i32;
129        let t = self.eval_offset_ms.clamp(0, dur_ms);
130        let anim = extract_cue_animation(cue);
131        let state = anim.evaluate_at(t, dur_ms);
132        let vf = self.render_cue_animated(cue, &state);
133        Ok(Frame::Video(vf))
134    }
135
136    fn flush(&mut self) -> Result<()> {
137        self.inner.flush()
138    }
139
140    fn reset(&mut self) -> Result<()> {
141        self.queue.clear();
142        self.eval_offset_ms = 0;
143        self.inner.reset()
144    }
145}
146
147impl AnimatedRenderedDecoder {
148    fn render_cue_animated(&self, cue: &SubtitleCue, state: &RenderState) -> VideoFrame {
149        let mut buf = vec![0u8; (self.width as usize) * (self.height as usize) * 4];
150
151        // Default alignment.
152        let align = cue
153            .positioning
154            .as_ref()
155            .map(|p| p.align)
156            .unwrap_or(TextAlign::Center);
157
158        // Flatten visible text from the cue's segments.
159        let text = collect_visible_text(&cue.segments);
160        if text.is_empty() {
161            return wrap_buf(buf, self.width, cue.start_us);
162        }
163
164        // Lay out one or more visual lines (split on \n; greedy wrap by
165        // shaped width).
166        let face = &self.face;
167        let max_text_w = self.width.saturating_sub(self.side_margin_px * 2);
168        if max_text_w == 0 {
169            return wrap_buf(buf, self.width, cue.start_us);
170        }
171        let logical_lines = text.split('\n').collect::<Vec<_>>();
172        let size_px = if state.font_size.unwrap_or(self.font_size_px) > 0.0 {
173            state.font_size.unwrap_or(self.font_size_px)
174        } else {
175            self.font_size_px
176        };
177        let mut visual_lines: Vec<String> = Vec::new();
178        for line in &logical_lines {
179            for v in wrap_line(line, face, size_px, max_text_w as f32) {
180                visual_lines.push(v);
181            }
182        }
183        if visual_lines.is_empty() {
184            return wrap_buf(buf, self.width, cue.start_us);
185        }
186        // Layout vertical: stack from bottom up using face metrics.
187        let face_line_h = face.primary().line_height_px(size_px).ceil().max(1.0) as u32;
188        let face_descent_abs = (-face.primary().descent_px(size_px)).ceil().max(0.0) as u32;
189        let line_h = face_line_h.max(1);
190        let n_lines = visual_lines.len();
191        let last_baseline = self
192            .height
193            .saturating_sub(self.bottom_margin_px)
194            .saturating_sub(face_descent_abs);
195
196        // Assemble per-glyph nodes inside an inner Group at canvas coords.
197        let mut inner = Group::default();
198        let mut anchor_x = self.width as f32 / 2.0;
199        let anchor_y = last_baseline as f32;
200        let primary_color = state
201            .primary_color
202            .map(|(r, g, b)| [r, g, b, 255])
203            .unwrap_or(self.default_color);
204        for (i, line) in visual_lines.iter().enumerate() {
205            let line_w_px = measure(face, line, size_px);
206            let line_x = match align {
207                TextAlign::Left | TextAlign::Start => self.side_margin_px as f32,
208                TextAlign::Right | TextAlign::End => {
209                    (self.width as f32 - line_w_px - self.side_margin_px as f32)
210                        .max(self.side_margin_px as f32)
211                }
212                TextAlign::Center => ((self.width as f32 - line_w_px) / 2.0).max(0.0),
213            };
214            let baseline_y =
215                last_baseline.saturating_sub(((n_lines - 1 - i) as u32) * line_h) as f32;
216            // Pick the anchor (= alignment point) from the last line for
217            // pivot fallback.
218            anchor_x = line_x + line_w_px / 2.0;
219            let _ = anchor_y;
220
221            let mut pen_x = line_x;
222            let glyphs = Shaper::shape_to_paths(face, line, size_px);
223            let fill = Paint::Solid(rgba_to_core(primary_color));
224            for (_face_idx, node, glyph_xform) in glyphs {
225                let absolute = Transform2D::translate(pen_x, baseline_y).compose(&glyph_xform);
226                let painted = repaint_node(node, &fill);
227                inner.children.push(Node::Group(Group {
228                    transform: absolute,
229                    children: vec![painted],
230                    ..Group::default()
231                }));
232            }
233            pen_x += line_w_px;
234            let _ = pen_x; // silence unused
235        }
236
237        // Compose the animation transform around the anchor (or
238        // \org-supplied pivot).
239        let pivot = state.pivot.unwrap_or((anchor_x, last_baseline as f32));
240        let anim_xf = animation_transform(state, pivot);
241
242        // Optional clip: prefer drawing-path over rect when both set.
243        let clip_path = if let Some(s) = state.clip_drawing.as_ref() {
244            let (scale, body) = drawing::split_clip_arg(s);
245            Some(drawing::parse_drawing(body, scale))
246        } else {
247            state.clip_rect.as_ref().map(rect_to_path)
248        };
249
250        let group = Group {
251            transform: anim_xf,
252            opacity: state.alpha_mul.clamp(0.0, 1.0),
253            clip: clip_path,
254            children: vec![Node::Group(inner)],
255            ..Group::default()
256        };
257
258        // Rasterise.
259        let frame = VectorFrame {
260            width: self.width as f32,
261            height: self.height as f32,
262            view_box: None,
263            root: Group {
264                children: vec![Node::Group(group)],
265                ..Group::default()
266            },
267            pts: None,
268            time_base: TimeBase::new(1, 1),
269        };
270        let renderer = oxideav_raster::Renderer::new(self.width, self.height);
271        let rendered = renderer.render(&frame);
272        if let Some(plane) = rendered.planes.first() {
273            // The renderer hands us the rasterised output sized to the
274            // canvas; copy it straight into our buffer.
275            let n = (self.width as usize) * (self.height as usize) * 4;
276            let want = n.min(plane.data.len()).min(buf.len());
277            buf[..want].copy_from_slice(&plane.data[..want]);
278        }
279        wrap_buf(buf, self.width, cue.start_us)
280    }
281}
282
283fn wrap_buf(data: Vec<u8>, width: u32, start_us: i64) -> VideoFrame {
284    let stride = (width as usize) * 4;
285    VideoFrame {
286        pts: Some(start_us),
287        planes: vec![VideoPlane { stride, data }],
288    }
289}
290
291fn rgba_to_core(c: [u8; 4]) -> CoreRgba {
292    CoreRgba::new(c[0], c[1], c[2], c[3])
293}
294
295fn rect_to_path(r: &ClipRect) -> Path {
296    let mut p = Path::new();
297    p.move_to(Point::new(r.x1, r.y1));
298    p.line_to(Point::new(r.x2, r.y1));
299    p.line_to(Point::new(r.x2, r.y2));
300    p.line_to(Point::new(r.x1, r.y2));
301    p.close();
302    p
303}
304
305fn repaint_node(node: Node, paint: &Paint) -> Node {
306    match node {
307        Node::Path(PathNode {
308            path,
309            stroke,
310            fill_rule,
311            ..
312        }) => Node::Path(PathNode {
313            path,
314            fill: Some(paint.clone()),
315            stroke,
316            fill_rule,
317        }),
318        Node::Group(mut g) => {
319            g.children = g
320                .children
321                .into_iter()
322                .map(|c| repaint_node(c, paint))
323                .collect();
324            Node::Group(g)
325        }
326        other => other,
327    }
328}
329
330fn measure(face: &FaceChain, text: &str, size_px: f32) -> f32 {
331    if text.is_empty() {
332        return 0.0;
333    }
334    match face.shape(text, size_px) {
335        Ok(g) => oxideav_scribe::run_width(&g),
336        Err(_) => 0.0,
337    }
338}
339
340/// Greedy word-wrap by shaped width. Returns visual lines.
341fn wrap_line(line: &str, face: &FaceChain, size_px: f32, max_w: f32) -> Vec<String> {
342    if line.is_empty() {
343        return vec![String::new()];
344    }
345    if measure(face, line, size_px) <= max_w {
346        return vec![line.to_string()];
347    }
348    // Tokenise into space-separated words; greedy fill.
349    let words: Vec<&str> = line.split(' ').collect();
350    let mut out: Vec<String> = Vec::new();
351    let mut cur = String::new();
352    for w in words {
353        let candidate = if cur.is_empty() {
354            w.to_string()
355        } else {
356            format!("{} {}", cur, w)
357        };
358        if measure(face, &candidate, size_px) <= max_w {
359            cur = candidate;
360        } else {
361            if !cur.is_empty() {
362                out.push(std::mem::take(&mut cur));
363            }
364            cur = w.to_string();
365        }
366    }
367    if !cur.is_empty() {
368        out.push(cur);
369    }
370    out
371}
372
373/// Walk the cue segments and return the visible text (LineBreak →
374/// `\n`, override `Raw` blocks dropped). Used to feed the shaper.
375fn collect_visible_text(segs: &[Segment]) -> String {
376    let mut out = String::new();
377    walk_text(segs, &mut out);
378    out
379}
380
381fn walk_text(segs: &[Segment], out: &mut String) {
382    for s in segs {
383        match s {
384            Segment::Text(t) => out.push_str(t),
385            Segment::LineBreak => out.push('\n'),
386            Segment::Bold(c) | Segment::Italic(c) | Segment::Underline(c) | Segment::Strike(c) => {
387                walk_text(c, out)
388            }
389            Segment::Color { children, .. }
390            | Segment::Font { children, .. }
391            | Segment::Voice { children, .. }
392            | Segment::Class { children, .. }
393            | Segment::Karaoke { children, .. } => walk_text(children, out),
394            Segment::Timestamp { .. } => {}
395            // Override-tag round-trip blocks contribute no visible text.
396            Segment::Raw(_) => {}
397        }
398    }
399}
400
401/// Build the affine 2D transform that approximates the animation's
402/// translate / scale / 3D rotations around `pivot`.
403///
404/// The 2D affine pipeline we apply (right-to-left) is:
405///
406/// 1. translate(-pivot)
407/// 2. scale(sx, sy)
408/// 3. shear/squeeze approximating `\fry` (X scale by cos α_y) and
409///    `\frx` (Y scale by cos α_x). True 3D would project onto a
410///    perspective camera; here we use the small-angle / orthographic
411///    approximation: the visible width shrinks by `cos(α_y)` for a
412///    rotation around Y and the visible height by `cos(α_x)` for a
413///    rotation around X. This is the standard "fold in half" effect
414///    most ASS renderers fall back on when no perspective camera is
415///    configured.
416/// 4. rotate(α_z) (`\frz`)
417/// 5. translate(+pivot)
418/// 6. translate(extra_translate) when `\pos` / `\move` set one.
419fn animation_transform(state: &RenderState, pivot: (f32, f32)) -> Transform2D {
420    let (px, py) = pivot;
421    let mut t = Transform2D::translate(-px, -py);
422    let (sx, sy) = state.scale;
423    if (sx - 1.0).abs() > f32::EPSILON || (sy - 1.0).abs() > f32::EPSILON {
424        t = Transform2D::scale(sx, sy).compose(&t);
425    }
426    // 3D approximation: scale x by |cos(fry)|, y by |cos(frx)|.
427    // (True foreshortening; sign change at >90° is not modelled — most
428    // subtitle use cases rotate <90°.)
429    let cy = state.rotate_y_radians.cos();
430    let cx = state.rotate_x_radians.cos();
431    if (cy - 1.0).abs() > 1e-6 || (cx - 1.0).abs() > 1e-6 {
432        let fx = if cy.abs() < 1e-3 { 1e-3 } else { cy };
433        let fy = if cx.abs() < 1e-3 { 1e-3 } else { cx };
434        t = Transform2D::scale(fx, fy).compose(&t);
435    }
436    if state.rotate_radians.abs() > f32::EPSILON {
437        t = Transform2D::rotate(state.rotate_radians).compose(&t);
438    }
439    t = Transform2D::translate(px, py).compose(&t);
440    if let Some((tx, ty)) = state.translate {
441        // \pos / \move sets an absolute target — translate the pivot
442        // there.
443        t = Transform2D::translate(tx - px, ty - py).compose(&t);
444    }
445    t
446}
447
448/// Factory helper: wrap an existing subtitle decoder + face into a
449/// boxed [`AnimatedRenderedDecoder`].
450pub fn make_animated_decoder(
451    inner: Box<dyn Decoder>,
452    width: u32,
453    height: u32,
454    face: FaceChain,
455) -> Box<dyn Decoder> {
456    Box::new(AnimatedRenderedDecoder::new(inner, width, height, face))
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462    use oxideav_core::{CuePosition, SubtitleCue};
463
464    fn dummy_cue() -> SubtitleCue {
465        SubtitleCue {
466            start_us: 0,
467            end_us: 1_000_000,
468            style_ref: None,
469            positioning: Some(CuePosition {
470                align: TextAlign::Center,
471                ..Default::default()
472            }),
473            segments: vec![Segment::Text("hi".into())],
474        }
475    }
476
477    #[test]
478    fn animation_transform_pivots_around_anchor() {
479        // 90° \frz around pivot (10,10): the pivot itself maps to itself.
480        let mut st = RenderState::identity();
481        st.rotate_radians = std::f32::consts::FRAC_PI_2;
482        let t = animation_transform(&st, (10.0, 10.0));
483        let p = t.apply(Point::new(10.0, 10.0));
484        assert!((p.x - 10.0).abs() < 1e-4);
485        assert!((p.y - 10.0).abs() < 1e-4);
486    }
487
488    #[test]
489    fn frx_compresses_y() {
490        // 60° \frx → cos(60°) = 0.5: y distances around pivot halve.
491        let mut st = RenderState::identity();
492        st.rotate_x_radians = std::f32::consts::FRAC_PI_3;
493        let t = animation_transform(&st, (0.0, 0.0));
494        let p = t.apply(Point::new(0.0, 100.0));
495        assert!((p.y - 50.0).abs() < 1e-3, "got y={}", p.y);
496    }
497
498    #[test]
499    fn fry_compresses_x() {
500        let mut st = RenderState::identity();
501        st.rotate_y_radians = std::f32::consts::FRAC_PI_3;
502        let t = animation_transform(&st, (0.0, 0.0));
503        let p = t.apply(Point::new(100.0, 0.0));
504        assert!((p.x - 50.0).abs() < 1e-3, "got x={}", p.x);
505    }
506
507    #[test]
508    fn org_overrides_anchor_pivot() {
509        let mut st = RenderState::identity();
510        st.rotate_radians = std::f32::consts::FRAC_PI_2;
511        st.pivot = Some((100.0, 100.0));
512        let t = animation_transform(&st, st.pivot.unwrap());
513        let p = t.apply(Point::new(100.0, 100.0));
514        assert!((p.x - 100.0).abs() < 1e-4);
515        assert!((p.y - 100.0).abs() < 1e-4);
516    }
517
518    #[test]
519    fn collects_visible_text() {
520        let segs = vec![
521            Segment::Text("a".into()),
522            Segment::LineBreak,
523            Segment::Bold(vec![Segment::Text("b".into())]),
524            Segment::Raw("{\\fad(0,0)}".into()),
525        ];
526        assert_eq!(collect_visible_text(&segs), "a\nb");
527    }
528
529    #[test]
530    fn rect_to_path_has_5_commands() {
531        let r = ClipRect {
532            x1: 0.0,
533            y1: 0.0,
534            x2: 10.0,
535            y2: 10.0,
536        };
537        let p = rect_to_path(&r);
538        assert_eq!(p.commands.len(), 5);
539    }
540
541    #[test]
542    fn dummy_cue_yields_text() {
543        // Smoke check.
544        let c = dummy_cue();
545        assert_eq!(collect_visible_text(&c.segments), "hi");
546    }
547}