webm/
lib.rs

1//! A crate for muxing one or more video/audio streams into a WebM file.
2//!
3//! Note that this crate is only for muxing media that has already been encoded with the appropriate codec.
4//! Consider a crate such as `vpx` if you need encoding as well.
5//!
6//! Actual writing of muxed data is done through a [`mux::Writer`], which lets you supply your own implementation.
7//! This makes it easy to support muxing to files, in-memory buffers, or whatever else you need. Once you have
8//! a [`mux::Writer`], you create a [`mux::SegmentBuilder`] and add the tracks you need. Finally, you create a
9//! [`mux::Segment`] with that builder, to which you can add media frames.
10//!
11//! In typical usage of this library, where you might mux to a WebM file, you would do:
12//! ```no_run
13//! use std::fs::File;
14//! use webm::mux::{SegmentBuilder, SegmentMode, VideoCodecId, Writer};
15//!
16//! let file = File::open("./my-cool-file.webm").unwrap();
17//! let writer = Writer::new(file);
18//!
19//! // Build a segment with a single video track
20//! let builder = SegmentBuilder::new(writer).unwrap();
21//! let builder = builder.set_mode(SegmentMode::Live).unwrap(); // Set live mode for streaming
22//! let (builder, video_track) = builder.add_video_track(640, 480, VideoCodecId::VP8, None).unwrap();
23//! let mut segment = builder.build();
24//!
25//! // Add some video frames
26//! let encoded_video_frame: &[u8] = &[]; // TODO: Your video data here
27//! let timestamp_ns = 0;
28//! let is_keyframe = true;
29//! segment.add_frame(video_track, encoded_video_frame, timestamp_ns, is_keyframe).unwrap();
30//! // TODO: More video frames
31//!
32//! // Done writing frames, finish off the file
33//! _ = segment.finalize(None).inspect_err(|_| eprintln!("Could not finalize WebM file"));
34//! ```
35
36use webm_sys as ffi;
37
38pub mod mux {
39    mod segment;
40    mod writer;
41
42    pub use crate::ffi::mux::TrackNum;
43    pub use segment::{Segment, SegmentBuilder};
44    pub use writer::Writer;
45
46    use crate::ffi;
47    use std::num::NonZeroU64;
48
49    /// This is a copyable handle equivalent to a track number
50    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51    pub struct VideoTrack(NonZeroU64);
52
53    impl From<VideoTrack> for TrackNum {
54        #[inline]
55        fn from(track: VideoTrack) -> Self {
56            track.0.get()
57        }
58    }
59
60    /// This is a copyable handle equivalent to a track number
61    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
62    pub struct AudioTrack(NonZeroU64);
63
64    impl From<AudioTrack> for TrackNum {
65        #[inline]
66        fn from(track: AudioTrack) -> Self {
67            track.0.get()
68        }
69    }
70
71    pub trait Track {
72        #[must_use]
73        fn is_audio(&self) -> bool {
74            false
75        }
76
77        #[must_use]
78        fn is_video(&self) -> bool {
79            false
80        }
81
82        #[must_use]
83        fn track_number(&self) -> TrackNum;
84    }
85
86    impl Track for VideoTrack {
87        #[inline]
88        fn is_video(&self) -> bool {
89            true
90        }
91
92        #[inline]
93        fn track_number(&self) -> TrackNum {
94            self.0.get()
95        }
96    }
97
98    impl Track for AudioTrack {
99        #[inline]
100        fn is_audio(&self) -> bool {
101            true
102        }
103
104        #[inline]
105        fn track_number(&self) -> TrackNum {
106            self.0.get()
107        }
108    }
109
110    #[derive(Eq, PartialEq, Clone, Copy, Debug)]
111    #[repr(u32)]
112    pub enum AudioCodecId {
113        Opus = ffi::mux::OPUS_CODEC_ID,
114        Vorbis = ffi::mux::VORBIS_CODEC_ID,
115    }
116
117    impl AudioCodecId {
118        const fn get_id(self) -> u32 {
119            self as u32
120        }
121    }
122
123    #[derive(Eq, PartialEq, Clone, Copy, Debug)]
124    #[repr(u32)]
125    pub enum VideoCodecId {
126        VP8 = ffi::mux::VP8_CODEC_ID,
127        VP9 = ffi::mux::VP9_CODEC_ID,
128        AV1 = ffi::mux::AV1_CODEC_ID,
129    }
130
131    impl VideoCodecId {
132        const fn get_id(self) -> u32 {
133            self as u32
134        }
135    }
136
137    /// The error type for this entire crate. More specific error types will
138    /// be added in the future, hence the current marking as non-exhaustive.
139    #[derive(Debug)]
140    #[non_exhaustive]
141    pub enum Error {
142        /// An parameter with an invalid value was passed to a method.
143        BadParam,
144
145        /// An unknown error occurred. While this is typically the result of
146        /// incorrect parameters to methods, an internal error in libwebm is
147        /// also possible.
148        Unknown,
149    }
150
151    impl Error {
152        pub(crate) fn check_code(code: ffi::mux::ResultCode) -> Result<(), Self> {
153            match code {
154                ffi::mux::ResultCode::Ok => Ok(()),
155                ffi::mux::ResultCode::BadParam => Err(Self::BadParam),
156                ffi::mux::ResultCode::UnknownLibwebmError => Err(Self::Unknown),
157            }
158        }
159    }
160
161    impl std::fmt::Display for Error {
162        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
163            match self {
164                Self::BadParam => f.write_str("Bad parameter"),
165                Self::Unknown => f.write_str("Unknown error"),
166            }
167        }
168    }
169
170    impl std::error::Error for Error {}
171
172    /// A specification for how pixels in written video frames are subsampled in chroma channels.
173    ///
174    /// Certain video frame formats (e.g. YUV 4:2:0) have a lower resolution in chroma (Cr/Cb) channels than the
175    /// luminance channel. This structure informs video players how that subsampling is done, using a number of
176    /// subsampling factors. A factor of zero means no subsampling, and a factor of one means that particular dimension
177    /// is half resolution.
178    ///
179    /// You may use [`ColorSubsampling::default()`] to get a specification of no subsampling in any dimension.
180    #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
181    pub struct ColorSubsampling {
182        /// The subsampling factor for both chroma channels in the horizontal direction.
183        pub chroma_horizontal: u8,
184
185        /// The subsampling factor for both chroma channels in the vertical direction.
186        pub chroma_vertical: u8,
187    }
188
189    /// A specification of how the range of colors in the input video frames has been clipped.
190    ///
191    /// Certain screens struggle with the full range of available colors, and video content is thus sometimes tuned to
192    /// a restricted range.
193    #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
194    pub enum ColorRange {
195        /// No claim is made as to how colors have been restricted.
196        #[default]
197        Unspecified = 0,
198
199        /// Color values are restricted to a "broadcast-safe" range.
200        Broadcast = 1,
201
202        /// No color clipping is performed.
203        Full = 2,
204    }
205
206    /// A specification for the segment writing mode.
207    ///
208    /// This controls how the segment is written and affects features like seeking.
209    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
210    pub enum SegmentMode {
211        /// Live mode - optimized for real-time streaming.
212        /// In this mode, seeking information may not be available.
213        Live,
214
215        /// File mode - optimized for file-based playback.
216        /// This enables full seeking and duration information.
217        File,
218    }
219
220    /// Transfer characteristics (EOTF - Electro-Optical Transfer Function).
221    ///
222    /// Specifies how the video signal values relate to light output.
223    /// See ITU-T H.273 / ISO/IEC 23091-2.
224    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
225    #[repr(u64)]
226    pub enum TransferCharacteristics {
227        /// Rec. ITU-R BT.709
228        Bt709 = 1,
229        /// Unspecified
230        Unspecified = 2,
231        /// Rec. ITU-R BT.470-6 System M
232        Bt470M = 4,
233        /// Rec. ITU-R BT.470-6 System B, G
234        Bt470Bg = 5,
235        /// Rec. ITU-R BT.601-7 525 or 625
236        Bt601 = 6,
237        /// SMPTE ST 240
238        Smpte240M = 7,
239        /// Linear transfer characteristics
240        Linear = 8,
241        /// Logarithmic transfer (100:1 range)
242        Log100 = 9,
243        /// Logarithmic transfer (316.22777:1 range)
244        Log316 = 10,
245        /// IEC 61966-2-4
246        Iec61966_2_4 = 11,
247        /// Rec. ITU-R BT.1361-0 extended colour gamut system
248        Bt1361 = 12,
249        /// IEC 61966-2-1 sRGB
250        Iec61966_2_1 = 13,
251        /// Rec. ITU-R BT.2020-2 (10-bit system)
252        Bt2020_10bit = 14,
253        /// Rec. ITU-R BT.2020-2 (12-bit system)
254        Bt2020_12bit = 15,
255        /// SMPTE ST 2084 - Perceptual Quantizer (PQ) for HDR10
256        Smpte2084 = 16,
257        /// SMPTE ST 428-1
258        Smpte428 = 17,
259        /// ARIB STD-B67 - Hybrid Log-Gamma (HLG)
260        AribStdB67 = 18,
261    }
262
263    /// Color primaries specification.
264    ///
265    /// Defines the chromaticity coordinates of the source primaries.
266    /// See ITU-T H.273 / ISO/IEC 23091-2.
267    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
268    #[repr(u64)]
269    pub enum ColorPrimaries {
270        /// Rec. ITU-R BT.709
271        Bt709 = 1,
272        /// Unspecified
273        Unspecified = 2,
274        /// Rec. ITU-R BT.470-6 System M
275        Bt470M = 4,
276        /// Rec. ITU-R BT.470-6 System B, G
277        Bt470Bg = 5,
278        /// Rec. ITU-R BT.601-7 525 or 625
279        Bt601 = 6,
280        /// SMPTE ST 240
281        Smpte240M = 7,
282        /// Generic film (colour filters using Illuminant C)
283        Film = 8,
284        /// Rec. ITU-R BT.2020 / BT.2100 - Wide color gamut for HDR
285        Bt2020 = 9,
286        /// SMPTE ST 428-1 (CIE 1931 XYZ)
287        Smpte428 = 10,
288        /// SMPTE RP 431-2 - DCI P3
289        Smpte431 = 11,
290        /// SMPTE EG 432-1 - Display P3
291        Smpte432 = 12,
292        /// EBU Tech. 3213-E
293        Ebu3213 = 22,
294    }
295
296    /// Matrix coefficients for deriving luma and chroma from RGB.
297    ///
298    /// See ITU-T H.273 / ISO/IEC 23091-2.
299    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
300    #[repr(u64)]
301    pub enum MatrixCoefficients {
302        /// Identity matrix (RGB)
303        Identity = 0,
304        /// Rec. ITU-R BT.709
305        Bt709 = 1,
306        /// Unspecified
307        Unspecified = 2,
308        /// FCC
309        Fcc = 4,
310        /// Rec. ITU-R BT.470-6 System B, G
311        Bt470Bg = 5,
312        /// Rec. ITU-R BT.601-7 525 or 625
313        Bt601 = 6,
314        /// SMPTE ST 240
315        Smpte240M = 7,
316        /// YCgCo
317        YCgCo = 8,
318        /// Rec. ITU-R BT.2020-2 non-constant luminance
319        Bt2020Ncl = 9,
320        /// Rec. ITU-R BT.2020-2 constant luminance
321        Bt2020Cl = 10,
322        /// SMPTE ST 2085
323        Smpte2085 = 11,
324        /// Chromaticity-derived non-constant luminance
325        ChromaNcl = 12,
326        /// Chromaticity-derived constant luminance
327        ChromaCl = 13,
328        /// ICtCp (Rec. ITU-R BT.2100-0)
329        ICtCp = 14,
330    }
331
332    /// Chromaticity coordinates (CIE 1931 xy).
333    ///
334    /// Values should be in the range 0.0 to 1.0.
335    #[derive(Debug, Clone, Copy, PartialEq)]
336    pub struct Chromaticity {
337        pub x: f32,
338        pub y: f32,
339    }
340
341    impl Chromaticity {
342        /// D65 white point (standard for BT.709, BT.2020)
343        pub const D65: Self = Self {
344            x: 0.3127,
345            y: 0.3290,
346        };
347    }
348
349    /// Display primaries for HDR mastering metadata.
350    #[derive(Debug, Clone, Copy, PartialEq)]
351    pub struct DisplayPrimaries {
352        pub red: Chromaticity,
353        pub green: Chromaticity,
354        pub blue: Chromaticity,
355    }
356
357    impl DisplayPrimaries {
358        /// Rec. ITU-R BT.709 primaries (SDR)
359        pub const BT_709: Self = Self {
360            red: Chromaticity { x: 0.64, y: 0.33 },
361            green: Chromaticity { x: 0.30, y: 0.60 },
362            blue: Chromaticity { x: 0.15, y: 0.06 },
363        };
364
365        /// Rec. ITU-R BT.2020 primaries (HDR/WCG)
366        pub const BT_2020: Self = Self {
367            red: Chromaticity { x: 0.708, y: 0.292 },
368            green: Chromaticity { x: 0.170, y: 0.797 },
369            blue: Chromaticity { x: 0.131, y: 0.046 },
370        };
371
372        /// DCI-P3 primaries
373        pub const DCI_P3: Self = Self {
374            red: Chromaticity { x: 0.680, y: 0.320 },
375            green: Chromaticity { x: 0.265, y: 0.690 },
376            blue: Chromaticity { x: 0.150, y: 0.060 },
377        };
378    }
379
380    /// SMPTE ST 2086 mastering display metadata.
381    ///
382    /// Specifies the color volume and luminance range of the display used for mastering HDR content.
383    #[derive(Debug, Clone, Copy, PartialEq)]
384    pub struct MasteringDisplayMetadata {
385        /// Maximum luminance in candelas per square meter (cd/m² or nits).
386        /// Typical values: 1000.0, 4000.0, 10000.0
387        pub luminance_max: f32,
388
389        /// Minimum luminance in candelas per square meter (cd/m² or nits).
390        /// Typical values: 0.0001, 0.001, 0.01, 0.05
391        pub luminance_min: f32,
392
393        /// Display primaries (red, green, blue chromaticity coordinates)
394        pub primaries: DisplayPrimaries,
395
396        /// White point chromaticity coordinates
397        pub white_point: Chromaticity,
398    }
399
400    /// HDR10 static metadata.
401    ///
402    /// Includes both content light level metadata and mastering display metadata.
403    #[derive(Debug, Clone, Copy, PartialEq)]
404    pub struct HdrMetadata {
405        /// Maximum Content Light Level in cd/m² (nits).
406        /// The maximum light level of any single pixel in the entire video.
407        /// Typical range: 1000-4000 nits.
408        pub max_cll: u64,
409
410        /// Maximum Frame-Average Light Level in cd/m² (nits).
411        /// The maximum average light level of any single frame in the video.
412        /// Typical range: 100-1000 nits.
413        pub max_fall: u64,
414
415        /// Optional SMPTE ST 2086 mastering metadata.
416        pub mastering_metadata: Option<MasteringDisplayMetadata>,
417    }
418}