Skip to main content

moq_video/
capture.rs

1//! Frame capture via libavdevice.
2//!
3//! Today this is webcam capture (the internal `Camera`): it opens the platform
4//! camera backend (avfoundation on macOS, v4l2 on Linux, dshow on Windows) and
5//! yields decoded `ffmpeg::frame::Video` frames in the source's native pixel
6//! format. Screen capture is the same libavdevice pipeline with a different
7//! input format (avfoundation screen, x11grab/gdigrab, ...), so it would live
8//! here too. The encoder handles conversion to YUV420P, so callers don't have
9//! to care what the source delivers.
10//!
11//! The public type here is [`Config`], which describes the source;
12//! [`encode::publish_capture`](crate::encode::publish_capture) consumes it.
13
14use std::ffi::CString;
15
16use ffmpeg_next as ffmpeg;
17
18use crate::Error;
19
20/// Webcam capture configuration. All fields are hints; the backend picks
21/// the closest supported mode.
22///
23/// `#[non_exhaustive]`: construct via [`Config::default`] and set fields, so
24/// new options can be added without breaking callers.
25#[derive(Clone, Debug, Default)]
26#[non_exhaustive]
27pub struct Config {
28	/// Platform device identifier. `None` opens the default camera.
29	///
30	/// - macOS (avfoundation): device index (`"0"`) or name (`"FaceTime HD Camera"`).
31	/// - Linux (v4l2): a `/dev/videoN` path.
32	/// - Windows (dshow): the device name (without the `video=` prefix).
33	pub device: Option<String>,
34	pub width: Option<u32>,
35	pub height: Option<u32>,
36	pub framerate: Option<u32>,
37}
38
39/// An open camera, read frame-by-frame via [`read`](Self::read).
40///
41/// Internal for now: it traffics in `ffmpeg` frame types, so keeping it
42/// private leaves the public API free of that version coupling. Promote to
43/// `pub` (a non-breaking change) once a bring-your-own-frames consumer needs it.
44pub(crate) struct Camera {
45	input: ffmpeg::format::context::Input,
46	decoder: ffmpeg::decoder::Video,
47	stream_index: usize,
48	url: String,
49	framerate: Option<u32>,
50}
51
52impl Camera {
53	/// Open the camera described by `config`.
54	pub fn open(config: &Config) -> Result<Self, Error> {
55		ffmpeg::init()?;
56		ffmpeg::device::register_all();
57
58		let backend = Backend::current();
59		let url = backend.url(config.device.as_deref());
60
61		let input_format = find_input_format(backend.format_name)?;
62		let mut opts = ffmpeg::Dictionary::new();
63		if let (Some(w), Some(h)) = (config.width, config.height) {
64			opts.set("video_size", &format!("{w}x{h}"));
65		}
66		if let Some(fps) = config.framerate {
67			opts.set("framerate", &fps.to_string());
68		}
69
70		let ctx = ffmpeg::format::open_with(&url, &input_format, opts)?;
71		let input = match ctx {
72			ffmpeg::format::context::Context::Input(input) => input,
73			ffmpeg::format::context::Context::Output(_) => {
74				// open_with returns Input for an Input format; this arm is unreachable.
75				return Err(Error::NoVideoStream(url));
76			}
77		};
78
79		let stream = input
80			.streams()
81			.best(ffmpeg::media::Type::Video)
82			.ok_or_else(|| Error::NoVideoStream(url.clone()))?;
83		let stream_index = stream.index();
84		let framerate = stream_framerate(&stream);
85
86		let decoder = ffmpeg::codec::context::Context::from_parameters(stream.parameters())?
87			.decoder()
88			.video()?;
89
90		tracing::info!(
91			device = %url,
92			backend = backend.format_name,
93			width = decoder.width(),
94			height = decoder.height(),
95			framerate,
96			"opened camera"
97		);
98
99		Ok(Self {
100			input,
101			decoder,
102			stream_index,
103			url,
104			framerate,
105		})
106	}
107
108	pub fn width(&self) -> u32 {
109		self.decoder.width()
110	}
111
112	pub fn height(&self) -> u32 {
113		self.decoder.height()
114	}
115
116	/// The camera's negotiated frame rate (rounded), or `None` if the
117	/// backend didn't report one.
118	pub fn framerate(&self) -> Option<u32> {
119		self.framerate
120	}
121
122	/// Block until the next decoded frame is available, or `None` once the
123	/// device stops producing frames.
124	pub fn read(&mut self) -> Result<Option<ffmpeg::frame::Video>, Error> {
125		let mut frame = ffmpeg::frame::Video::empty();
126		loop {
127			match self.decoder.receive_frame(&mut frame) {
128				Ok(()) => return Ok(Some(frame)),
129				Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::util::error::EAGAIN => {}
130				Err(ffmpeg::Error::Eof) => return Ok(None),
131				Err(e) => return Err(e.into()),
132			}
133
134			// Pull the next packet for our stream. The inner block drops the
135			// packet iterator (and its borrow of `input`) before we touch
136			// `decoder`, keeping the borrow checker happy.
137			let packet = {
138				let mut packets = self.input.packets();
139				loop {
140					match packets.next() {
141						Some((stream, packet)) if stream.index() == self.stream_index => break Some(packet),
142						Some(_) => continue,
143						None => break None,
144					}
145				}
146			};
147
148			match packet {
149				Some(packet) => self.decoder.send_packet(&packet)?,
150				None => {
151					self.decoder.send_eof()?;
152					return match self.decoder.receive_frame(&mut frame) {
153						Ok(()) => Ok(Some(frame)),
154						// Drained: no more frames after EOF.
155						Err(ffmpeg::Error::Eof) => Ok(None),
156						Err(ffmpeg::Error::Other { errno }) if errno == ffmpeg::util::error::EAGAIN => Ok(None),
157						// A real decode failure must not masquerade as end-of-stream.
158						Err(e) => Err(e.into()),
159					};
160				}
161			}
162		}
163	}
164
165	pub fn device(&self) -> &str {
166		&self.url
167	}
168}
169
170/// Platform capture backend selection.
171struct Backend {
172	format_name: &'static str,
173}
174
175impl Backend {
176	#[cfg(target_os = "macos")]
177	fn current() -> Self {
178		Self {
179			format_name: "avfoundation",
180		}
181	}
182
183	#[cfg(target_os = "linux")]
184	fn current() -> Self {
185		Self { format_name: "v4l2" }
186	}
187
188	#[cfg(target_os = "windows")]
189	fn current() -> Self {
190		Self { format_name: "dshow" }
191	}
192
193	#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
194	fn current() -> Self {
195		Self {
196			format_name: "avfoundation",
197		}
198	}
199
200	/// Build the libavdevice URL for the requested device.
201	fn url(&self, device: Option<&str>) -> String {
202		match self.format_name {
203			// avfoundation device spec is "<video>:<audio>"; pin audio to
204			// "none" so we only open the camera, never a microphone.
205			"avfoundation" => {
206				let video = device.unwrap_or("default");
207				if video.contains(':') {
208					video.to_string()
209				} else {
210					format!("{video}:none")
211				}
212			}
213			"v4l2" => device.unwrap_or("/dev/video0").to_string(),
214			"dshow" => format!("video={}", device.unwrap_or("")),
215			_ => device.unwrap_or("default").to_string(),
216		}
217	}
218}
219
220/// The stream's negotiated frame rate, rounded to whole fps. Prefers the
221/// average rate and falls back to the base (`r_frame_rate`) guess; returns
222/// `None` when neither is populated (some backends leave them at 0).
223fn stream_framerate(stream: &ffmpeg::format::stream::Stream) -> Option<u32> {
224	for rate in [stream.avg_frame_rate(), stream.rate()] {
225		let (num, den) = (rate.numerator(), rate.denominator());
226		if num > 0 && den > 0 {
227			let fps = (num as f64 / den as f64).round();
228			if fps >= 1.0 {
229				return Some(fps as u32);
230			}
231		}
232	}
233	None
234}
235
236/// Look up a libavdevice input format by name. The safe `format::list()`
237/// helper is compiled out on ffmpeg >= 5, so we go through the FFI.
238fn find_input_format(name: &str) -> Result<ffmpeg::format::format::Format, Error> {
239	let cname = CString::new(name).expect("format name has no interior NUL");
240	// SAFETY: `av_find_input_format` takes a NUL-terminated string and returns
241	// a borrowed static pointer (or null). We check for null before wrapping.
242	let ptr = unsafe { ffmpeg::ffi::av_find_input_format(cname.as_ptr()) };
243	if ptr.is_null() {
244		return Err(match name {
245			"avfoundation" => Error::NoCaptureBackend("avfoundation"),
246			"v4l2" => Error::NoCaptureBackend("v4l2"),
247			"dshow" => Error::NoCaptureBackend("dshow"),
248			_ => Error::NoCaptureBackend("camera"),
249		});
250	}
251	// SAFETY: `ptr` is a non-null `AVInputFormat` owned statically by
252	// libavdevice; the const->mut cast is sound because `Input` never mutates
253	// through it (the wrapper only reads format fields).
254	let input = unsafe { ffmpeg::format::Input::wrap(ptr as *mut _) };
255	Ok(ffmpeg::format::format::Format::Input(input))
256}