Skip to main content

moq_video/encode/
encoder.rs

1//! H.264 encoder over ffmpeg, hardware-preferred.
2//!
3//! Accepts decoded [`ffmpeg::frame::Video`] frames in any pixel format
4//! (whatever the camera hands us), scales/converts them to YUV420P, and
5//! emits Annex-B H.264 packets ready for `moq_mux::codec::h264::Import`.
6
7use bytes::Bytes;
8use ffmpeg_next as ffmpeg;
9
10use crate::Error;
11
12/// Which encoder implementation to use. `#[non_exhaustive]` so new selection
13/// strategies can be added without breaking external `match`es.
14#[derive(Clone, Debug, Default, PartialEq, Eq)]
15#[non_exhaustive]
16pub enum Kind {
17	/// Prefer a platform hardware encoder, fall back to software.
18	#[default]
19	Auto,
20	/// Hardware only; error if none is available.
21	Hardware,
22	/// Software (libx264 / built-in) only.
23	Software,
24	/// A specific ffmpeg encoder by name, e.g. `"h264_videotoolbox"`.
25	Named(String),
26}
27
28/// Encoder configuration. `width` / `height` / `framerate` are the encoded
29/// output; input frames are scaled/converted to match.
30///
31/// `#[non_exhaustive]`: build via [`Config::new`] and set the optional fields,
32/// so future knobs don't break callers.
33#[derive(Clone, Debug)]
34#[non_exhaustive]
35pub struct Config {
36	pub width: u32,
37	pub height: u32,
38	pub framerate: u32,
39	/// Target bitrate in bits per second. `None` derives a sane default
40	/// from resolution and framerate (~0.07 bits per pixel per second).
41	pub bitrate: Option<u64>,
42	/// Keyframe interval in frames. Subscribers joining mid-stream wait at
43	/// most this many frames before they can start decoding.
44	pub gop: u32,
45	pub kind: Kind,
46}
47
48impl Config {
49	pub fn new(width: u32, height: u32, framerate: u32) -> Self {
50		Self {
51			width,
52			height,
53			framerate,
54			bitrate: None,
55			// ~2 seconds at the configured framerate.
56			gop: framerate.saturating_mul(2).max(1),
57			kind: Kind::Auto,
58		}
59	}
60
61	/// Resolved bitrate: explicit override, or a pixels-per-second estimate.
62	fn resolved_bitrate(&self) -> u64 {
63		self.bitrate.unwrap_or_else(|| {
64			let pixels = self.width as u64 * self.height as u64;
65			// 0.07 bits per pixel per second matches the JS publisher's
66			// default and lands ~4.4 Mbps for 1080p30.
67			((pixels * self.framerate as u64) as f64 * 0.07) as u64
68		})
69	}
70}
71
72/// Hardware H.264 encoder names to try first, in priority order. The deps
73/// are declared under platform-specific cfgs in ffmpeg, but probing a name
74/// that isn't compiled in just returns `None`, so listing all of them is
75/// harmless on any platform.
76const HARDWARE_ENCODERS: &[&str] = &[
77	"h264_videotoolbox", // macOS / iOS
78	"h264_nvenc",        // NVIDIA
79	"h264_qsv",          // Intel QuickSync
80	"h264_vaapi",        // Linux VA-API
81	"h264_amf",          // AMD (Windows)
82	"h264_v4l2m2m",      // Linux stateful (e.g. Raspberry Pi)
83];
84
85/// Software fallbacks, in priority order.
86const SOFTWARE_ENCODERS: &[&str] = &["libx264", "h264"];
87
88/// H.264 encoder. Build one with [`Encoder::new`], feed it raw RGBA frames
89/// via [`encode_rgba`](Self::encode_rgba), and publish the resulting Annex-B
90/// packets through [`Producer`](super::Producer).
91pub struct Encoder {
92	encoder: ffmpeg::encoder::video::Encoder,
93	/// Lazily built once we see the first frame's pixel format/size.
94	scaler: Option<Scaler>,
95	width: u32,
96	height: u32,
97	frame_count: i64,
98	/// The ffmpeg encoder name that opened successfully (for logging).
99	name: String,
100}
101
102struct Scaler {
103	ctx: ffmpeg::software::scaling::Context,
104	src_format: ffmpeg::format::Pixel,
105	src_width: u32,
106	src_height: u32,
107}
108
109impl Encoder {
110	pub fn new(config: &Config) -> Result<Self, Error> {
111		// Validate at the construction boundary so both entry points (the
112		// capture loop and a bring-your-own-frames caller) reject a zero
113		// framerate, which would produce a degenerate `1/0` codec time base.
114		if config.framerate == 0 {
115			return Err(Error::InvalidFramerate(0));
116		}
117		if config.width == 0 || config.height == 0 {
118			return Err(Error::Codec(anyhow::anyhow!(
119				"encoder dimensions must be non-zero (got {}x{})",
120				config.width,
121				config.height
122			)));
123		}
124
125		// Idempotent; ensures codecs are registered even when no Camera opened.
126		ffmpeg::init()?;
127		let candidates = encoder_candidates(&config.kind);
128
129		let mut tried = Vec::new();
130		for name in &candidates {
131			tried.push(name.clone());
132			match open_encoder(name, config) {
133				Ok(encoder) => {
134					tracing::info!(encoder = %name, width = config.width, height = config.height, "opened H.264 encoder");
135					return Ok(Self {
136						encoder,
137						scaler: None,
138						width: config.width,
139						height: config.height,
140						frame_count: 0,
141						name: name.clone(),
142					});
143				}
144				Err(e) => {
145					tracing::debug!(encoder = %name, error = %e, "encoder unavailable, trying next");
146				}
147			}
148		}
149
150		Err(Error::NoEncoder(tried.join(", ")))
151	}
152
153	/// The ffmpeg encoder name in use, e.g. `"h264_videotoolbox"`.
154	pub fn name(&self) -> &str {
155		&self.name
156	}
157
158	/// Encode one tightly-packed RGBA frame (`width * height * 4` bytes),
159	/// returning zero or more Annex-B H.264 packets. Set `keyframe` to force an
160	/// IDR (e.g. on resume so a re-subscribing viewer can start decoding at
161	/// once). The frame is scaled/converted to the encoder's resolution.
162	pub fn encode_rgba(&mut self, rgba: &[u8], width: u32, height: u32, keyframe: bool) -> Result<Vec<Bytes>, Error> {
163		let frame = rgba_frame(rgba, width, height)?;
164		self.encode_frame(&frame, keyframe)
165	}
166
167	/// Encode a decoded frame (camera path). With B-frames disabled (the
168	/// low-latency default) the encoder emits one packet per input frame.
169	pub(crate) fn encode(&mut self, frame: &ffmpeg::frame::Video) -> Result<Vec<Bytes>, Error> {
170		self.encode_frame(frame, false)
171	}
172
173	fn encode_frame(&mut self, frame: &ffmpeg::frame::Video, keyframe: bool) -> Result<Vec<Bytes>, Error> {
174		let mut yuv = self.convert(frame)?;
175		if keyframe {
176			yuv.set_kind(ffmpeg::picture::Type::I);
177		}
178		self.encoder.send_frame(&yuv)?;
179		self.drain()
180	}
181
182	/// Flush the encoder, returning any buffered packets.
183	pub fn finish(&mut self) -> Result<Vec<Bytes>, Error> {
184		self.encoder.send_eof()?;
185		self.drain()
186	}
187
188	fn drain(&mut self) -> Result<Vec<Bytes>, Error> {
189		let mut out = Vec::new();
190		let mut packet = ffmpeg::Packet::empty();
191		loop {
192			match self.encoder.receive_packet(&mut packet) {
193				Ok(()) => {
194					if let Some(data) = packet.data() {
195						out.push(Bytes::copy_from_slice(data));
196					}
197				}
198				Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::util::error::EAGAIN => break,
199				Err(ffmpeg::Error::Eof) => break,
200				Err(e) => return Err(e.into()),
201			}
202		}
203		Ok(out)
204	}
205
206	/// Scale/convert an arbitrary input frame to the encoder's YUV420P
207	/// surface, rebuilding the scaler if the input geometry changed.
208	fn convert(&mut self, frame: &ffmpeg::frame::Video) -> Result<ffmpeg::frame::Video, Error> {
209		let (src_format, src_w, src_h) = (frame.format(), frame.width(), frame.height());
210
211		let needs_rebuild = match &self.scaler {
212			Some(s) => s.src_format != src_format || s.src_width != src_w || s.src_height != src_h,
213			None => true,
214		};
215		if needs_rebuild {
216			let ctx = ffmpeg::software::scaling::Context::get(
217				src_format,
218				src_w,
219				src_h,
220				ffmpeg::format::Pixel::YUV420P,
221				self.width,
222				self.height,
223				ffmpeg::software::scaling::Flags::BILINEAR,
224			)?;
225			self.scaler = Some(Scaler {
226				ctx,
227				src_format,
228				src_width: src_w,
229				src_height: src_h,
230			});
231		}
232
233		let scaler = self.scaler.as_mut().expect("scaler built above");
234		let mut yuv = ffmpeg::frame::Video::empty();
235		scaler.ctx.run(frame, &mut yuv)?;
236
237		// The encoder times frames off a monotonic count, not the camera
238		// clock; the moq presentation timestamp is attached downstream.
239		yuv.set_pts(Some(self.frame_count));
240		self.frame_count += 1;
241		Ok(yuv)
242	}
243}
244
245/// Wrap tightly-packed RGBA bytes in an ffmpeg frame, copying row-by-row to
246/// honor ffmpeg's stride (which may exceed `width * 4`).
247fn rgba_frame(rgba: &[u8], width: u32, height: u32) -> Result<ffmpeg::frame::Video, Error> {
248	let row_bytes = width as usize * 4;
249	let expected = row_bytes * height as usize;
250	if rgba.len() < expected {
251		return Err(Error::Codec(anyhow::anyhow!(
252			"RGBA buffer too small: {} < {expected} for {width}x{height}",
253			rgba.len()
254		)));
255	}
256
257	let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::RGBA, width, height);
258	let stride = frame.stride(0);
259	for y in 0..height as usize {
260		let src = y * row_bytes;
261		let dst = y * stride;
262		frame.data_mut(0)[dst..dst + row_bytes].copy_from_slice(&rgba[src..src + row_bytes]);
263	}
264	Ok(frame)
265}
266
267fn encoder_candidates(kind: &Kind) -> Vec<String> {
268	match kind {
269		Kind::Named(name) => vec![name.clone()],
270		Kind::Hardware => HARDWARE_ENCODERS.iter().map(|s| s.to_string()).collect(),
271		Kind::Software => SOFTWARE_ENCODERS.iter().map(|s| s.to_string()).collect(),
272		Kind::Auto => HARDWARE_ENCODERS
273			.iter()
274			.chain(SOFTWARE_ENCODERS)
275			.map(|s| s.to_string())
276			.collect(),
277	}
278}
279
280fn open_encoder(name: &str, config: &Config) -> Result<ffmpeg::encoder::video::Encoder, Error> {
281	let codec = ffmpeg::encoder::find_by_name(name).ok_or_else(|| Error::NoEncoder(name.to_string()))?;
282
283	let ctx = ffmpeg::codec::context::Context::new_with_codec(codec);
284	let mut enc = ctx.encoder().video()?;
285	enc.set_width(config.width);
286	enc.set_height(config.height);
287	enc.set_format(ffmpeg::format::Pixel::YUV420P);
288	enc.set_time_base(ffmpeg::Rational::new(1, config.framerate as i32));
289	enc.set_frame_rate(Some(ffmpeg::Rational::new(config.framerate as i32, 1)));
290	enc.set_gop(config.gop);
291	enc.set_max_b_frames(0); // Low latency: no reordering.
292	enc.set_bit_rate(config.resolved_bitrate() as usize);
293
294	let mut opts = ffmpeg::Dictionary::new();
295	if name == "libx264" {
296		opts.set("preset", "ultrafast");
297		opts.set("tune", "zerolatency");
298	} else if name == "h264_videotoolbox" {
299		opts.set("realtime", "1");
300		// Fall back to the software VideoToolbox path if no GPU encoder.
301		opts.set("allow_sw", "1");
302	}
303
304	Ok(enc.open_with(opts)?)
305}
306
307#[cfg(test)]
308mod tests {
309	use super::*;
310
311	/// A mid-gray YUV420P frame: encodable without a camera.
312	fn gray_frame(width: u32, height: u32) -> ffmpeg::frame::Video {
313		let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::YUV420P, width, height);
314		// Plane 0 is luma (gray = 128); planes 1/2 are chroma (neutral = 128).
315		for plane in 0..frame.planes() {
316			frame.data_mut(plane).fill(128);
317		}
318		frame
319	}
320
321	#[test]
322	fn software_encoder_emits_annexb() {
323		let config = Config {
324			kind: Kind::Software,
325			..Config::new(320, 240, 30)
326		};
327		let mut encoder = Encoder::new(&config).expect("libx264 should be available under nix ffmpeg");
328		assert_eq!(encoder.name(), "libx264");
329
330		let frame = gray_frame(320, 240);
331		let mut packets = Vec::new();
332		for _ in 0..30 {
333			packets.extend(encoder.encode(&frame).unwrap());
334		}
335		packets.extend(encoder.finish().unwrap());
336
337		assert!(!packets.is_empty(), "encoder produced no packets");
338
339		// The first packet must start with an Annex-B start code so the avc3
340		// importer can find the inline SPS/PPS.
341		let first = &packets[0];
342		let has_start_code = first.starts_with(&[0, 0, 0, 1]) || first.starts_with(&[0, 0, 1]);
343		assert!(
344			has_start_code,
345			"first packet is not Annex-B: {:02x?}",
346			&first[..first.len().min(8)]
347		);
348	}
349
350	#[test]
351	fn encode_rgba_emits_annexb() {
352		let config = Config {
353			kind: Kind::Software,
354			..Config::new(320, 240, 30)
355		};
356		let mut encoder = Encoder::new(&config).unwrap();
357
358		// Tightly-packed RGBA (width*height*4); the row-by-row copy must honor
359		// ffmpeg's stride for this to decode.
360		let rgba = vec![0x40u8; 320 * 240 * 4];
361		let mut packets = encoder.encode_rgba(&rgba, 320, 240, true).unwrap();
362		packets.extend(encoder.finish().unwrap());
363		assert!(!packets.is_empty());
364		assert!(packets[0].starts_with(&[0, 0, 0, 1]) || packets[0].starts_with(&[0, 0, 1]));
365	}
366
367	#[test]
368	fn encode_rgba_rejects_short_buffer() {
369		let config = Config {
370			kind: Kind::Software,
371			..Config::new(320, 240, 30)
372		};
373		let mut encoder = Encoder::new(&config).unwrap();
374		// Far smaller than 320*240*4: must error, not panic on the row copy.
375		assert!(matches!(
376			encoder.encode_rgba(&[0u8; 16], 320, 240, false),
377			Err(Error::Codec(_))
378		));
379	}
380
381	#[test]
382	fn new_rejects_zero_framerate() {
383		let config = Config {
384			kind: Kind::Software,
385			..Config::new(320, 240, 0)
386		};
387		assert!(matches!(Encoder::new(&config), Err(Error::InvalidFramerate(0))));
388	}
389
390	#[test]
391	fn unknown_named_encoder_errors() {
392		let config = Config {
393			kind: Kind::Named("definitely_not_a_codec".into()),
394			..Config::new(320, 240, 30)
395		};
396		assert!(matches!(Encoder::new(&config), Err(Error::NoEncoder(_))));
397	}
398
399	#[test]
400	fn default_bitrate_scales_with_resolution() {
401		let small = Config::new(320, 240, 30).resolved_bitrate();
402		let large = Config::new(1920, 1080, 30).resolved_bitrate();
403		assert!(large > small);
404		assert!(small > 0);
405	}
406}