Skip to main content

codec/filter/
mod.rs

1//! Video filters — per-frame transforms applied to decoded frames **before**
2//! per-rung scaling and encoding.
3//!
4//! ## Layout
5//!
6//! The canonical representation is a list of [`VideoFilter`] **values**. This
7//! `mod.rs` owns the cross-cutting pieces — the enum, the textual / structured
8//! parsers, the [`apply`] dispatch, the [`FilterChain`], and the shared plane
9//! helpers — while **each filter's implementation lives in its own file**:
10//! [`crop`], [`pad`], [`hflip`], [`vflip`], [`rotate`], [`grayscale`],
11//! [`overlay`], [`invert`], [`brightness`], [`contrast`], [`saturation`], and
12//! the [`denoise`] family (one file per algorithm under `denoise/`).
13//!
14//! Two kinds of filter:
15//!
16//! - **Stateless** ([`apply`] runs them directly): crop, pad, hflip, vflip,
17//!   rotate, grayscale (geometry, any bit depth); invert, brightness, contrast,
18//!   saturation (colour, 8-bit); and `denoise` (selectable algorithm, 8-bit).
19//! - **Resource** filters need one-time setup — `overlay` loads its PNG and
20//!   converts it to YUV + alpha. Build a [`FilterChain`] with
21//!   [`FilterChain::prepare`] (loads overlays once) and call
22//!   [`FilterChain::apply`] per frame.
23//!
24//! Two interchangeable serializations (they round-trip:
25//! `parse_chain(&chain_to_string(c)) == c`):
26//!
27//! - **Structured** objects (serde feature) — a YAML/JSON DSL writes a chain as
28//!   a list of objects: `[{crop: {w,h}}, hflip, {overlay: {image: "logo.png"}}]`.
29//! - **Textual** ffmpeg-`-vf` style — [`parse_chain`] / [`Display`]:
30//!   `crop=1280:720,hflip,overlay=logo.png:24:24`.
31
32use std::fmt;
33
34use anyhow::{Context, Result, bail};
35use bytes::BytesMut;
36
37use crate::frame::{PixelFormat, VideoFrame};
38
39mod brightness;
40mod contrast;
41mod crop;
42mod denoise;
43mod grayscale;
44mod hflip;
45mod invert;
46mod overlay;
47mod pad;
48mod rotate;
49mod saturation;
50mod vflip;
51
52#[cfg(test)]
53mod tests;
54
55pub use denoise::DenoiseMethod;
56
57/// One video-filter step. The canonical, code-interpreted representation.
58#[derive(Debug, Clone, PartialEq)]
59#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
60#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
61pub enum VideoFilter {
62    /// Crop a `w×h` region. Centred when `x`/`y` are omitted, else at `(x, y)`.
63    Crop {
64        w: u32,
65        h: u32,
66        #[cfg_attr(feature = "serde", serde(default, skip_serializing_if = "Option::is_none"))]
67        x: Option<u32>,
68        #[cfg_attr(feature = "serde", serde(default, skip_serializing_if = "Option::is_none"))]
69        y: Option<u32>,
70    },
71    /// Pad into a `w×h` canvas (neutral black). Centred when `x`/`y` are omitted.
72    Pad {
73        w: u32,
74        h: u32,
75        #[cfg_attr(feature = "serde", serde(default, skip_serializing_if = "Option::is_none"))]
76        x: Option<u32>,
77        #[cfg_attr(feature = "serde", serde(default, skip_serializing_if = "Option::is_none"))]
78        y: Option<u32>,
79    },
80    /// Mirror horizontally (left↔right).
81    #[cfg_attr(feature = "serde", serde(rename = "hflip"))]
82    HFlip,
83    /// Mirror vertically (top↔bottom).
84    #[cfg_attr(feature = "serde", serde(rename = "vflip"))]
85    VFlip,
86    /// Rotate clockwise by 90, 180, or 270 degrees (90/270 swap width↔height).
87    Rotate(u32),
88    /// Drop chroma — set U/V to neutral so the image is grayscale.
89    Grayscale,
90    /// Alpha-composite a PNG (logo / watermark) at top-left `(x, y)`. 8-bit only.
91    Overlay {
92        /// Path to a PNG image (with or without an alpha channel).
93        image: String,
94        #[cfg_attr(feature = "serde", serde(default))]
95        x: u32,
96        #[cfg_attr(feature = "serde", serde(default))]
97        y: u32,
98    },
99    /// Invert (negate) luma + chroma. 8-bit only.
100    Invert,
101    /// Add a luma offset (`-255..=255`); brighten/darken. 8-bit only.
102    Brightness(i32),
103    /// Scale luma contrast around mid-grey (`1.0` = unchanged). 8-bit only.
104    Contrast(f32),
105    /// Scale chroma saturation around neutral (`0` = grayscale, `1.0` = unchanged). 8-bit only.
106    Saturation(f32),
107    /// Spatial **denoise** with a selectable algorithm (see [`DenoiseMethod`])
108    /// and a `strength` in `0.0..=1.0` (default `0.5`) that blends the filtered
109    /// result back with the source (`0` = off, `1` = fully denoised). Applied to
110    /// luma + chroma. 8-bit only.
111    Denoise {
112        #[cfg_attr(feature = "serde", serde(default))]
113        method: DenoiseMethod,
114        #[cfg_attr(feature = "serde", serde(default = "denoise::default_denoise_strength"))]
115        strength: f32,
116    },
117}
118
119impl fmt::Display for VideoFilter {
120    /// The textual (ffmpeg-`-vf`) token for this filter.
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        match self {
123            VideoFilter::Crop { w, h, x: Some(x), y: Some(y) } => write!(f, "crop={w}:{h}:{x}:{y}"),
124            VideoFilter::Crop { w, h, .. } => write!(f, "crop={w}:{h}"),
125            VideoFilter::Pad { w, h, x: Some(x), y: Some(y) } => write!(f, "pad={w}:{h}:{x}:{y}"),
126            VideoFilter::Pad { w, h, .. } => write!(f, "pad={w}:{h}"),
127            VideoFilter::HFlip => write!(f, "hflip"),
128            VideoFilter::VFlip => write!(f, "vflip"),
129            VideoFilter::Rotate(d) => write!(f, "rotate={d}"),
130            VideoFilter::Grayscale => write!(f, "grayscale"),
131            VideoFilter::Overlay { image, x, y } => write!(f, "overlay={image}:{x}:{y}"),
132            VideoFilter::Invert => write!(f, "invert"),
133            VideoFilter::Brightness(b) => write!(f, "brightness={b}"),
134            VideoFilter::Contrast(c) => write!(f, "contrast={c}"),
135            VideoFilter::Saturation(s) => write!(f, "saturation={s}"),
136            VideoFilter::Denoise { method, strength } => write!(f, "denoise={method}:{strength}"),
137        }
138    }
139}
140
141/// A whole chain as a comma-separated textual string (the inverse of
142/// [`parse_chain`]).
143pub fn chain_to_string(chain: &[VideoFilter]) -> String {
144    chain.iter().map(|f| f.to_string()).collect::<Vec<_>>().join(",")
145}
146
147/// A filter chain in either form, for a DSL field that should accept both a
148/// structured list or a string. Resolve with [`FilterSpec::resolve`].
149#[cfg(feature = "serde")]
150#[derive(Debug, Clone)]
151#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
152#[serde(untagged)]
153pub enum FilterSpec {
154    /// An ffmpeg-`-vf`-style chain string, e.g. `"crop=1280:720,hflip"`.
155    Chain(String),
156    /// A structured list of filters.
157    List(Vec<VideoFilter>),
158}
159
160#[cfg(feature = "serde")]
161impl FilterSpec {
162    /// Resolve to the concrete, **validated** filter list. The string form is
163    /// validated by [`parse_chain`]; the structured form is validated by
164    /// round-tripping through its textual rendering, so e.g. `rotate: 45` is
165    /// rejected at config time rather than at apply time.
166    pub fn resolve(&self) -> Result<Vec<VideoFilter>> {
167        match self {
168            FilterSpec::Chain(s) => parse_chain(s),
169            FilterSpec::List(v) => parse_chain(&chain_to_string(v)),
170        }
171    }
172
173    /// Collapse to the chain-string form (for string-only surfaces).
174    pub fn to_chain(&self) -> String {
175        match self {
176            FilterSpec::Chain(s) => s.clone(),
177            FilterSpec::List(v) => chain_to_string(v),
178        }
179    }
180}
181
182/// Parse an ffmpeg-`-vf`-style chain, e.g. `"crop=1280:720,hflip"`.
183pub fn parse_chain(s: &str) -> Result<Vec<VideoFilter>> {
184    let mut out = Vec::new();
185    for part in s.split(',').map(str::trim).filter(|p| !p.is_empty()) {
186        out.push(parse_one(part)?);
187    }
188    if out.is_empty() {
189        bail!("empty filter chain");
190    }
191    Ok(out)
192}
193
194fn parse_one(spec: &str) -> Result<VideoFilter> {
195    let (name, args) = match spec.split_once('=') {
196        Some((n, a)) => (n.trim(), a.trim()),
197        None => (spec.trim(), ""),
198    };
199    let parts: Vec<&str> = args.split(':').map(str::trim).filter(|s| !s.is_empty()).collect();
200    let nums = || -> Result<Vec<u32>> {
201        parts
202            .iter()
203            .map(|s| s.parse::<u32>().map_err(|_| anyhow::anyhow!("bad number '{s}' in '{spec}'")))
204            .collect()
205    };
206    let one_f32 = || -> Result<f32> {
207        parts
208            .first()
209            .ok_or_else(|| anyhow::anyhow!("'{name}' needs a value"))?
210            .parse::<f32>()
211            .map_err(|_| anyhow::anyhow!("bad number in '{spec}'"))
212    };
213    let f = match name {
214        "crop" => match nums()?.as_slice() {
215            [w, h] => VideoFilter::Crop { w: *w, h: *h, x: None, y: None },
216            [w, h, x, y] => VideoFilter::Crop { w: *w, h: *h, x: Some(*x), y: Some(*y) },
217            _ => bail!("crop wants W:H or W:H:X:Y, got '{args}'"),
218        },
219        "pad" => match nums()?.as_slice() {
220            [w, h] => VideoFilter::Pad { w: *w, h: *h, x: None, y: None },
221            [w, h, x, y] => VideoFilter::Pad { w: *w, h: *h, x: Some(*x), y: Some(*y) },
222            _ => bail!("pad wants W:H or W:H:X:Y, got '{args}'"),
223        },
224        "hflip" => VideoFilter::HFlip,
225        "vflip" => VideoFilter::VFlip,
226        "rotate" | "transpose" => {
227            let deg = if name == "transpose" { 90 } else { *nums()?.first().unwrap_or(&90) };
228            if !matches!(deg, 90 | 180 | 270) {
229                bail!("rotate wants 90|180|270, got {deg}");
230            }
231            VideoFilter::Rotate(deg)
232        }
233        "grayscale" | "gray" => VideoFilter::Grayscale,
234        "overlay" => {
235            // overlay=PATH[:X:Y] — PATH must not contain ':'.
236            let image =
237                parts.first().ok_or_else(|| anyhow::anyhow!("overlay needs a PATH"))?.to_string();
238            let x = parts.get(1).map(|s| s.parse::<u32>()).transpose().map_err(|_| anyhow::anyhow!("bad overlay x in '{spec}'"))?.unwrap_or(0);
239            let y = parts.get(2).map(|s| s.parse::<u32>()).transpose().map_err(|_| anyhow::anyhow!("bad overlay y in '{spec}'"))?.unwrap_or(0);
240            VideoFilter::Overlay { image, x, y }
241        }
242        "invert" | "negate" => VideoFilter::Invert,
243        "brightness" => {
244            let b: i32 = parts.first().ok_or_else(|| anyhow::anyhow!("brightness needs a value"))?.parse().map_err(|_| anyhow::anyhow!("bad brightness in '{spec}'"))?;
245            VideoFilter::Brightness(b)
246        }
247        "contrast" => VideoFilter::Contrast(one_f32()?),
248        "saturation" => VideoFilter::Saturation(one_f32()?),
249        "denoise" | "nr" => {
250            // denoise[=METHOD][:STRENGTH] — METHOD is bilateral|gaussian|median|
251            // mean|nlmeans|anisotropic (default bilateral); STRENGTH is 0..=1
252            // (default 0.5). The two args are order-free: a token that parses as
253            // a number is the strength, anything else is the method.
254            let mut method = DenoiseMethod::Bilateral;
255            let mut strength = 0.5f32;
256            for &p in &parts {
257                match p.parse::<f32>() {
258                    Ok(s) => strength = s,
259                    Err(_) => {
260                        method = match p.to_ascii_lowercase().as_str() {
261                            "bilateral" | "bl" => DenoiseMethod::Bilateral,
262                            "gaussian" | "gauss" | "gs" => DenoiseMethod::Gaussian,
263                            "median" | "md" => DenoiseMethod::Median,
264                            "mean" | "box" | "average" => DenoiseMethod::Mean,
265                            "nlmeans" | "nlm" => DenoiseMethod::Nlmeans,
266                            "anisotropic" | "diffusion" | "pm" => DenoiseMethod::Anisotropic,
267                            o => bail!(
268                                "unknown denoise method '{o}' (want bilateral|gaussian|median|\
269                                 mean|nlmeans|anisotropic)"
270                            ),
271                        };
272                    }
273                }
274            }
275            if !(0.0..=1.0).contains(&strength) {
276                bail!("denoise strength must be 0.0..=1.0, got {strength}");
277            }
278            VideoFilter::Denoise { method, strength }
279        }
280        o => bail!("unknown filter '{o}'"),
281    };
282    Ok(f)
283}
284
285/// Apply a whole **stateless** chain to a frame, in order. Returns an error if
286/// the chain contains an `overlay` (use [`FilterChain`] for that).
287pub fn apply_chain(frame: VideoFrame, chain: &[VideoFilter]) -> Result<VideoFrame> {
288    let mut f = frame;
289    for filter in chain {
290        f = apply(&f, filter)?;
291    }
292    Ok(f)
293}
294
295/// Apply one **stateless** filter, dispatching to its module. (`Overlay` errors
296/// here — use [`FilterChain`].)
297pub fn apply(frame: &VideoFrame, filter: &VideoFilter) -> Result<VideoFrame> {
298    match filter {
299        VideoFilter::Crop { w, h, x, y } => crop::apply(frame, *w, *h, *x, *y),
300        VideoFilter::Pad { w, h, x, y } => pad::apply(frame, *w, *h, *x, *y),
301        VideoFilter::HFlip => hflip::apply(frame),
302        VideoFilter::VFlip => vflip::apply(frame),
303        VideoFilter::Rotate(deg) => rotate::apply(frame, *deg),
304        VideoFilter::Grayscale => grayscale::apply(frame),
305        VideoFilter::Invert => invert::apply(frame),
306        VideoFilter::Brightness(delta) => brightness::apply(frame, *delta),
307        VideoFilter::Contrast(c) => contrast::apply(frame, *c),
308        VideoFilter::Saturation(s) => saturation::apply(frame, *s),
309        VideoFilter::Denoise { method, strength } => denoise::apply(frame, *method, *strength),
310        VideoFilter::Overlay { .. } => {
311            bail!("overlay is a resource filter — build a FilterChain::prepare(..) and call .apply()")
312        }
313    }
314}
315
316// ── shared plane helpers (used by the per-filter modules via `super::`) ───────
317
318/// Bytes-per-sample for the supported 4:2:0 formats.
319fn bps(format: PixelFormat) -> Result<usize> {
320    match format {
321        PixelFormat::Yuv420p => Ok(1),
322        PixelFormat::Yuv420p10le => Ok(2),
323        other => bail!("video filters need Yuv420p / Yuv420p10le, got {other:?}"),
324    }
325}
326
327/// Split a frame into its (Y, U, V) plane byte slices for a `w×h` 4:2:0 frame.
328fn planes(frame: &VideoFrame, bps: usize) -> Result<(&[u8], &[u8], &[u8])> {
329    let w = frame.width as usize;
330    let h = frame.height as usize;
331    let y_len = w * h * bps;
332    let c_len = (w / 2) * (h / 2) * bps;
333    if frame.data.len() < y_len + 2 * c_len {
334        bail!("frame data too small: {} < {} for {}x{}", frame.data.len(), y_len + 2 * c_len, w, h);
335    }
336    let (y, rest) = frame.data.split_at(y_len);
337    let (u, v) = rest.split_at(c_len);
338    Ok((y, &u[..c_len], &v[..c_len]))
339}
340
341/// Reassemble a frame from new Y/U/V planes + new dims.
342fn assemble(src: &VideoFrame, w: u32, h: u32, y: Vec<u8>, u: Vec<u8>, v: Vec<u8>) -> VideoFrame {
343    let mut data = BytesMut::with_capacity(y.len() + u.len() + v.len());
344    data.extend_from_slice(&y);
345    data.extend_from_slice(&u);
346    data.extend_from_slice(&v);
347    VideoFrame::new(data.freeze(), w, h, src.format, src.color_space, src.pts)
348}
349
350/// Require 8-bit `Yuv420p` for the colour / overlay / denoise filters and return
351/// the planes (owned, so callers can mutate them).
352fn planes_8bit(frame: &VideoFrame, what: &str) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>)> {
353    if frame.format != PixelFormat::Yuv420p {
354        bail!("the `{what}` filter needs an 8-bit Yuv420p frame (got {:?}); it applies to SDR output", frame.format);
355    }
356    let (y, u, v) = planes(frame, 1)?;
357    Ok((y.to_vec(), u.to_vec(), v.to_vec()))
358}
359
360/// Round `n` down to even (4:2:0 chroma alignment).
361fn even(n: u32) -> u32 {
362    n & !1
363}
364
365// ── prepared chain (loads overlays once, then applies per frame) ─────────────
366
367enum Step {
368    Plain(VideoFilter),
369    Overlay(overlay::PreparedOverlay),
370}
371
372/// A filter chain with its resources prepared (overlay PNGs loaded + converted).
373/// Build once with [`prepare`](FilterChain::prepare), then [`apply`](FilterChain::apply)
374/// per frame.
375pub struct FilterChain {
376    steps: Vec<Step>,
377}
378
379impl FilterChain {
380    /// Prepare a chain: load + convert every `overlay` image (the rest pass
381    /// through). Fails if an overlay image can't be read or decoded.
382    pub fn prepare(filters: &[VideoFilter]) -> Result<Self> {
383        let mut steps = Vec::with_capacity(filters.len());
384        for f in filters {
385            match f {
386                VideoFilter::Overlay { image, x, y } => {
387                    let img = image::ImageReader::open(image)
388                        .with_context(|| format!("opening overlay image '{image}'"))?
389                        .decode()
390                        .with_context(|| format!("decoding overlay image '{image}'"))?
391                        .to_rgba8();
392                    let (w, h) = (img.width(), img.height());
393                    steps.push(Step::Overlay(overlay::PreparedOverlay::from_rgba(img.as_raw(), w, h, *x, *y)?));
394                }
395                other => steps.push(Step::Plain(other.clone())),
396            }
397        }
398        Ok(Self { steps })
399    }
400
401    /// Apply the whole chain to a frame, in order.
402    pub fn apply(&self, frame: VideoFrame) -> Result<VideoFrame> {
403        let mut f = frame;
404        for step in &self.steps {
405            f = match step {
406                Step::Plain(filt) => apply(&f, filt)?,
407                Step::Overlay(ov) => ov.composite(&f)?,
408            };
409        }
410        Ok(f)
411    }
412
413    /// No filters → applying is a no-op.
414    pub fn is_empty(&self) -> bool {
415        self.steps.is_empty()
416    }
417}