ff_decode/audio/
decoder_inner.rs

1//! Internal audio decoder implementation using FFmpeg.
2//!
3//! This module contains the low-level decoder logic that directly interacts
4//! with FFmpeg's C API through the ff-sys crate. It is not exposed publicly.
5
6// Allow unsafe code in this module as it's necessary for FFmpeg FFI
7#![allow(unsafe_code)]
8// Allow specific clippy lints for FFmpeg FFI code
9#![allow(clippy::similar_names)]
10#![allow(clippy::too_many_lines)]
11#![allow(clippy::cast_sign_loss)]
12#![allow(clippy::cast_possible_truncation)]
13#![allow(clippy::cast_possible_wrap)]
14#![allow(clippy::module_name_repetitions)]
15#![allow(clippy::match_same_arms)]
16#![allow(clippy::ptr_as_ptr)]
17#![allow(clippy::doc_markdown)]
18#![allow(clippy::unnecessary_cast)]
19#![allow(clippy::if_not_else)]
20#![allow(clippy::unnecessary_wraps)]
21#![allow(clippy::cast_precision_loss)]
22#![allow(clippy::if_same_then_else)]
23#![allow(clippy::cast_lossless)]
24
25use std::path::Path;
26use std::ptr;
27use std::time::Duration;
28
29use ff_format::channel::ChannelLayout;
30use ff_format::codec::AudioCodec;
31use ff_format::time::{Rational, Timestamp};
32use ff_format::{AudioFrame, AudioStreamInfo, SampleFormat};
33use ff_sys::{
34    AVCodecContext, AVCodecID, AVFormatContext, AVFrame, AVMediaType_AVMEDIA_TYPE_AUDIO, AVPacket,
35    AVSampleFormat, SwrContext,
36};
37
38use crate::error::DecodeError;
39
40/// RAII guard for `AVFormatContext` to ensure proper cleanup.
41struct AvFormatContextGuard(*mut AVFormatContext);
42
43impl AvFormatContextGuard {
44    /// Creates a new guard by opening an input file.
45    ///
46    /// # Safety
47    ///
48    /// Caller must ensure FFmpeg is initialized and path is valid.
49    unsafe fn new(path: &Path) -> Result<Self, DecodeError> {
50        // SAFETY: Caller ensures FFmpeg is initialized and path is valid
51        let format_ctx = unsafe {
52            ff_sys::avformat::open_input(path).map_err(|e| DecodeError::Ffmpeg {
53                code: e,
54                message: format!("Failed to open file: {}", ff_sys::av_error_string(e)),
55            })?
56        };
57        Ok(Self(format_ctx))
58    }
59
60    /// Returns the raw pointer.
61    const fn as_ptr(&self) -> *mut AVFormatContext {
62        self.0
63    }
64
65    /// Consumes the guard and returns the raw pointer without dropping.
66    fn into_raw(self) -> *mut AVFormatContext {
67        let ptr = self.0;
68        std::mem::forget(self);
69        ptr
70    }
71}
72
73impl Drop for AvFormatContextGuard {
74    fn drop(&mut self) {
75        if !self.0.is_null() {
76            // SAFETY: self.0 is valid and owned by this guard
77            unsafe {
78                ff_sys::avformat::close_input(&mut (self.0 as *mut _));
79            }
80        }
81    }
82}
83
84/// RAII guard for `AVCodecContext` to ensure proper cleanup.
85struct AvCodecContextGuard(*mut AVCodecContext);
86
87impl AvCodecContextGuard {
88    /// Creates a new guard by allocating a codec context.
89    ///
90    /// # Safety
91    ///
92    /// Caller must ensure codec pointer is valid.
93    unsafe fn new(codec: *const ff_sys::AVCodec) -> Result<Self, DecodeError> {
94        // SAFETY: Caller ensures codec pointer is valid
95        let codec_ctx = unsafe {
96            ff_sys::avcodec::alloc_context3(codec).map_err(|e| DecodeError::Ffmpeg {
97                code: e,
98                message: format!("Failed to allocate codec context: {e}"),
99            })?
100        };
101        Ok(Self(codec_ctx))
102    }
103
104    /// Returns the raw pointer.
105    const fn as_ptr(&self) -> *mut AVCodecContext {
106        self.0
107    }
108
109    /// Consumes the guard and returns the raw pointer without dropping.
110    fn into_raw(self) -> *mut AVCodecContext {
111        let ptr = self.0;
112        std::mem::forget(self);
113        ptr
114    }
115}
116
117impl Drop for AvCodecContextGuard {
118    fn drop(&mut self) {
119        if !self.0.is_null() {
120            // SAFETY: self.0 is valid and owned by this guard
121            unsafe {
122                ff_sys::avcodec::free_context(&mut (self.0 as *mut _));
123            }
124        }
125    }
126}
127
128/// RAII guard for `AVPacket` to ensure proper cleanup.
129struct AvPacketGuard(*mut AVPacket);
130
131impl AvPacketGuard {
132    /// Creates a new guard by allocating a packet.
133    ///
134    /// # Safety
135    ///
136    /// Must be called after FFmpeg initialization.
137    unsafe fn new() -> Result<Self, DecodeError> {
138        // SAFETY: Caller ensures FFmpeg is initialized
139        let packet = unsafe { ff_sys::av_packet_alloc() };
140        if packet.is_null() {
141            return Err(DecodeError::Ffmpeg {
142                code: 0,
143                message: "Failed to allocate packet".to_string(),
144            });
145        }
146        Ok(Self(packet))
147    }
148
149    /// Consumes the guard and returns the raw pointer without dropping.
150    fn into_raw(self) -> *mut AVPacket {
151        let ptr = self.0;
152        std::mem::forget(self);
153        ptr
154    }
155}
156
157impl Drop for AvPacketGuard {
158    fn drop(&mut self) {
159        if !self.0.is_null() {
160            // SAFETY: self.0 is valid and owned by this guard
161            unsafe {
162                ff_sys::av_packet_free(&mut (self.0 as *mut _));
163            }
164        }
165    }
166}
167
168/// RAII guard for `AVFrame` to ensure proper cleanup.
169struct AvFrameGuard(*mut AVFrame);
170
171impl AvFrameGuard {
172    /// Creates a new guard by allocating a frame.
173    ///
174    /// # Safety
175    ///
176    /// Must be called after FFmpeg initialization.
177    unsafe fn new() -> Result<Self, DecodeError> {
178        // SAFETY: Caller ensures FFmpeg is initialized
179        let frame = unsafe { ff_sys::av_frame_alloc() };
180        if frame.is_null() {
181            return Err(DecodeError::Ffmpeg {
182                code: 0,
183                message: "Failed to allocate frame".to_string(),
184            });
185        }
186        Ok(Self(frame))
187    }
188
189    /// Consumes the guard and returns the raw pointer without dropping.
190    fn into_raw(self) -> *mut AVFrame {
191        let ptr = self.0;
192        std::mem::forget(self);
193        ptr
194    }
195}
196
197impl Drop for AvFrameGuard {
198    fn drop(&mut self) {
199        if !self.0.is_null() {
200            // SAFETY: self.0 is valid and owned by this guard
201            unsafe {
202                ff_sys::av_frame_free(&mut (self.0 as *mut _));
203            }
204        }
205    }
206}
207
208/// RAII guard for `SwrContext` to ensure proper cleanup.
209struct SwrContextGuard(*mut SwrContext);
210
211impl SwrContextGuard {
212    /// Consumes the guard and returns the raw pointer without dropping.
213    #[allow(dead_code)]
214    fn into_raw(self) -> *mut SwrContext {
215        let ptr = self.0;
216        std::mem::forget(self);
217        ptr
218    }
219}
220
221impl Drop for SwrContextGuard {
222    fn drop(&mut self) {
223        if !self.0.is_null() {
224            // SAFETY: self.0 is valid and owned by this guard
225            unsafe {
226                ff_sys::swr_free(&mut (self.0 as *mut _));
227            }
228        }
229    }
230}
231
232/// Internal decoder state holding FFmpeg contexts.
233///
234/// This structure manages the lifecycle of FFmpeg objects and is responsible
235/// for proper cleanup when dropped.
236pub(crate) struct AudioDecoderInner {
237    /// Format context for reading the media file
238    format_ctx: *mut AVFormatContext,
239    /// Codec context for decoding audio frames
240    codec_ctx: *mut AVCodecContext,
241    /// Audio stream index in the format context
242    stream_index: i32,
243    /// SwResample context for sample format conversion (optional)
244    swr_ctx: Option<*mut SwrContext>,
245    /// Target output sample format (if conversion is needed)
246    output_format: Option<SampleFormat>,
247    /// Target output sample rate (if resampling is needed)
248    output_sample_rate: Option<u32>,
249    /// Target output channel count (if remixing is needed)
250    output_channels: Option<u32>,
251    /// Whether end of file has been reached
252    eof: bool,
253    /// Current playback position
254    position: Duration,
255    /// Reusable packet for reading from file
256    packet: *mut AVPacket,
257    /// Reusable frame for decoding
258    frame: *mut AVFrame,
259}
260
261impl AudioDecoderInner {
262    /// Opens a media file and initializes the audio decoder.
263    ///
264    /// # Arguments
265    ///
266    /// * `path` - Path to the media file
267    /// * `output_format` - Optional target sample format for conversion
268    /// * `output_sample_rate` - Optional target sample rate for resampling
269    /// * `output_channels` - Optional target channel count for remixing
270    ///
271    /// # Errors
272    ///
273    /// Returns an error if:
274    /// - The file cannot be opened
275    /// - No audio stream is found
276    /// - The codec is not supported
277    /// - Decoder initialization fails
278    pub(crate) fn new(
279        path: &Path,
280        output_format: Option<SampleFormat>,
281        output_sample_rate: Option<u32>,
282        output_channels: Option<u32>,
283    ) -> Result<(Self, AudioStreamInfo), DecodeError> {
284        // Ensure FFmpeg is initialized (thread-safe and idempotent)
285        ff_sys::ensure_initialized();
286
287        // Open the input file (with RAII guard)
288        // SAFETY: Path is valid, AvFormatContextGuard ensures cleanup
289        let format_ctx_guard = unsafe { AvFormatContextGuard::new(path)? };
290        let format_ctx = format_ctx_guard.as_ptr();
291
292        // Read stream information
293        // SAFETY: format_ctx is valid and owned by guard
294        unsafe {
295            ff_sys::avformat::find_stream_info(format_ctx).map_err(|e| DecodeError::Ffmpeg {
296                code: e,
297                message: format!("Failed to find stream info: {}", ff_sys::av_error_string(e)),
298            })?;
299        }
300
301        // Find the audio stream
302        // SAFETY: format_ctx is valid
303        let (stream_index, codec_id) =
304            unsafe { Self::find_audio_stream(format_ctx) }.ok_or_else(|| {
305                DecodeError::NoAudioStream {
306                    path: path.to_path_buf(),
307                }
308            })?;
309
310        // Find the decoder for this codec
311        // SAFETY: codec_id is valid from FFmpeg
312        let codec = unsafe {
313            ff_sys::avcodec::find_decoder(codec_id).ok_or_else(|| {
314                DecodeError::UnsupportedCodec {
315                    codec: format!("codec_id={codec_id:?}"),
316                }
317            })?
318        };
319
320        // Allocate codec context (with RAII guard)
321        // SAFETY: codec pointer is valid, AvCodecContextGuard ensures cleanup
322        let codec_ctx_guard = unsafe { AvCodecContextGuard::new(codec)? };
323        let codec_ctx = codec_ctx_guard.as_ptr();
324
325        // Copy codec parameters from stream to context
326        // SAFETY: format_ctx and codec_ctx are valid, stream_index is valid
327        unsafe {
328            let stream = (*format_ctx).streams.add(stream_index as usize);
329            let codecpar = (*(*stream)).codecpar;
330            ff_sys::avcodec::parameters_to_context(codec_ctx, codecpar).map_err(|e| {
331                DecodeError::Ffmpeg {
332                    code: e,
333                    message: format!(
334                        "Failed to copy codec parameters: {}",
335                        ff_sys::av_error_string(e)
336                    ),
337                }
338            })?;
339        }
340
341        // Open the codec
342        // SAFETY: codec_ctx and codec are valid
343        unsafe {
344            ff_sys::avcodec::open2(codec_ctx, codec, ptr::null_mut()).map_err(|e| {
345                DecodeError::Ffmpeg {
346                    code: e,
347                    message: format!("Failed to open codec: {}", ff_sys::av_error_string(e)),
348                }
349            })?;
350        }
351
352        // Extract stream information
353        // SAFETY: All pointers are valid
354        let stream_info =
355            unsafe { Self::extract_stream_info(format_ctx, stream_index as i32, codec_ctx)? };
356
357        // Allocate packet and frame (with RAII guards)
358        // SAFETY: FFmpeg is initialized, guards ensure cleanup
359        let packet_guard = unsafe { AvPacketGuard::new()? };
360        let frame_guard = unsafe { AvFrameGuard::new()? };
361
362        // All initialization successful - transfer ownership to AudioDecoderInner
363        Ok((
364            Self {
365                format_ctx: format_ctx_guard.into_raw(),
366                codec_ctx: codec_ctx_guard.into_raw(),
367                stream_index: stream_index as i32,
368                swr_ctx: None,
369                output_format,
370                output_sample_rate,
371                output_channels,
372                eof: false,
373                position: Duration::ZERO,
374                packet: packet_guard.into_raw(),
375                frame: frame_guard.into_raw(),
376            },
377            stream_info,
378        ))
379    }
380
381    /// Finds the first audio stream in the format context.
382    ///
383    /// # Returns
384    ///
385    /// Returns `Some((index, codec_id))` if an audio stream is found, `None` otherwise.
386    ///
387    /// # Safety
388    ///
389    /// Caller must ensure `format_ctx` is valid and initialized.
390    unsafe fn find_audio_stream(format_ctx: *mut AVFormatContext) -> Option<(usize, AVCodecID)> {
391        // SAFETY: Caller ensures format_ctx is valid
392        unsafe {
393            let nb_streams = (*format_ctx).nb_streams as usize;
394
395            for i in 0..nb_streams {
396                let stream = (*format_ctx).streams.add(i);
397                let codecpar = (*(*stream)).codecpar;
398
399                if (*codecpar).codec_type == AVMediaType_AVMEDIA_TYPE_AUDIO {
400                    return Some((i, (*codecpar).codec_id));
401                }
402            }
403
404            None
405        }
406    }
407
408    /// Extracts audio stream information from FFmpeg structures.
409    unsafe fn extract_stream_info(
410        format_ctx: *mut AVFormatContext,
411        stream_index: i32,
412        codec_ctx: *mut AVCodecContext,
413    ) -> Result<AudioStreamInfo, DecodeError> {
414        // SAFETY: Caller ensures all pointers are valid
415        let (sample_rate, channels, sample_fmt, duration_val, channel_layout, codec_id) = unsafe {
416            let stream = (*format_ctx).streams.add(stream_index as usize);
417            let codecpar = (*(*stream)).codecpar;
418
419            (
420                (*codecpar).sample_rate as u32,
421                (*codecpar).ch_layout.nb_channels as u32,
422                (*codec_ctx).sample_fmt,
423                (*format_ctx).duration,
424                (*codecpar).ch_layout,
425                (*codecpar).codec_id,
426            )
427        };
428
429        // Extract duration
430        let duration = if duration_val > 0 {
431            let duration_secs = duration_val as f64 / 1_000_000.0;
432            Some(Duration::from_secs_f64(duration_secs))
433        } else {
434            None
435        };
436
437        // Extract sample format
438        let sample_format = Self::convert_sample_format(sample_fmt);
439
440        // Extract channel layout
441        let channel_layout_enum = Self::convert_channel_layout(&channel_layout, channels);
442
443        // Extract codec
444        let codec = Self::convert_codec(codec_id);
445
446        // Build stream info
447        let mut builder = AudioStreamInfo::builder()
448            .index(stream_index as u32)
449            .codec(codec)
450            .sample_rate(sample_rate)
451            .channels(channels)
452            .sample_format(sample_format)
453            .channel_layout(channel_layout_enum);
454
455        if let Some(d) = duration {
456            builder = builder.duration(d);
457        }
458
459        Ok(builder.build())
460    }
461
462    /// Converts FFmpeg sample format to our `SampleFormat` enum.
463    fn convert_sample_format(fmt: AVSampleFormat) -> SampleFormat {
464        if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_U8 {
465            SampleFormat::U8
466        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S16 {
467            SampleFormat::I16
468        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S32 {
469            SampleFormat::I32
470        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_FLT {
471            SampleFormat::F32
472        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_DBL {
473            SampleFormat::F64
474        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_U8P {
475            SampleFormat::U8p
476        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S16P {
477            SampleFormat::I16p
478        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S32P {
479            SampleFormat::I32p
480        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_FLTP {
481            SampleFormat::F32p
482        } else if fmt == ff_sys::AVSampleFormat_AV_SAMPLE_FMT_DBLP {
483            SampleFormat::F64p
484        } else {
485            log::warn!(
486                "sample_format unsupported, falling back to F32 requested={fmt} fallback=F32"
487            );
488            SampleFormat::F32
489        }
490    }
491
492    /// Converts FFmpeg channel layout to our `ChannelLayout` enum.
493    fn convert_channel_layout(layout: &ff_sys::AVChannelLayout, channels: u32) -> ChannelLayout {
494        if layout.order == ff_sys::AVChannelOrder_AV_CHANNEL_ORDER_NATIVE {
495            // SAFETY: When order is AV_CHANNEL_ORDER_NATIVE, the mask field is valid
496            let mask = unsafe { layout.u.mask };
497            match mask {
498                0x4 => ChannelLayout::Mono,
499                0x3 => ChannelLayout::Stereo,
500                0x103 => ChannelLayout::Stereo2_1,
501                0x7 => ChannelLayout::Surround3_0,
502                0x33 => ChannelLayout::Quad,
503                0x37 => ChannelLayout::Surround5_0,
504                0x3F => ChannelLayout::Surround5_1,
505                0x13F => ChannelLayout::Surround6_1,
506                0x63F => ChannelLayout::Surround7_1,
507                _ => {
508                    log::warn!(
509                        "channel_layout mask has no mapping, deriving from channel count \
510                         mask={mask} channels={channels}"
511                    );
512                    ChannelLayout::from_channels(channels)
513                }
514            }
515        } else {
516            log::warn!(
517                "channel_layout order is not NATIVE, deriving from channel count \
518                 order={order} channels={channels}",
519                order = layout.order
520            );
521            ChannelLayout::from_channels(channels)
522        }
523    }
524
525    /// Creates an `AVChannelLayout` from channel count.
526    ///
527    /// # Safety
528    ///
529    /// The returned layout must be freed with `av_channel_layout_uninit`.
530    unsafe fn create_channel_layout(channels: u32) -> ff_sys::AVChannelLayout {
531        // SAFETY: Zeroing AVChannelLayout is safe
532        let mut layout = unsafe { std::mem::zeroed::<ff_sys::AVChannelLayout>() };
533        // SAFETY: Caller ensures proper cleanup
534        unsafe {
535            ff_sys::av_channel_layout_default(&raw mut layout, channels as i32);
536        }
537        layout
538    }
539
540    /// Converts FFmpeg codec ID to our `AudioCodec` enum.
541    fn convert_codec(codec_id: AVCodecID) -> AudioCodec {
542        if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_AAC {
543            AudioCodec::Aac
544        } else if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_MP3 {
545            AudioCodec::Mp3
546        } else if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_OPUS {
547            AudioCodec::Opus
548        } else if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_VORBIS {
549            AudioCodec::Vorbis
550        } else if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_FLAC {
551            AudioCodec::Flac
552        } else if codec_id == ff_sys::AVCodecID_AV_CODEC_ID_PCM_S16LE {
553            AudioCodec::Pcm
554        } else {
555            log::warn!(
556                "audio codec unsupported, falling back to Aac codec_id={codec_id} fallback=Aac"
557            );
558            AudioCodec::Aac
559        }
560    }
561
562    /// Decodes the next audio frame.
563    ///
564    /// # Returns
565    ///
566    /// - `Ok(Some(frame))` - Successfully decoded a frame
567    /// - `Ok(None)` - End of stream reached
568    /// - `Err(_)` - Decoding error occurred
569    pub(crate) fn decode_one(&mut self) -> Result<Option<AudioFrame>, DecodeError> {
570        if self.eof {
571            return Ok(None);
572        }
573
574        unsafe {
575            loop {
576                // Try to receive a frame from the decoder
577                let ret = ff_sys::avcodec_receive_frame(self.codec_ctx, self.frame);
578
579                if ret == 0 {
580                    // Successfully received a frame
581                    let audio_frame = self.convert_frame_to_audio_frame()?;
582
583                    // Update position based on frame timestamp
584                    let pts = (*self.frame).pts;
585                    if pts != ff_sys::AV_NOPTS_VALUE {
586                        let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
587                        let time_base = (*(*stream)).time_base;
588                        let timestamp_secs =
589                            pts as f64 * time_base.num as f64 / time_base.den as f64;
590                        self.position = Duration::from_secs_f64(timestamp_secs);
591                    }
592
593                    return Ok(Some(audio_frame));
594                } else if ret == ff_sys::error_codes::EAGAIN {
595                    // Need to send more packets to the decoder
596                    // Read a packet from the file
597                    let read_ret = ff_sys::av_read_frame(self.format_ctx, self.packet);
598
599                    if read_ret == ff_sys::error_codes::EOF {
600                        // End of file - flush the decoder
601                        ff_sys::avcodec_send_packet(self.codec_ctx, ptr::null());
602                        self.eof = true;
603                        continue;
604                    } else if read_ret < 0 {
605                        return Err(DecodeError::Ffmpeg {
606                            code: read_ret,
607                            message: format!(
608                                "Failed to read frame: {}",
609                                ff_sys::av_error_string(read_ret)
610                            ),
611                        });
612                    }
613
614                    // Check if this packet belongs to the audio stream
615                    if (*self.packet).stream_index == self.stream_index {
616                        // Send the packet to the decoder
617                        let send_ret = ff_sys::avcodec_send_packet(self.codec_ctx, self.packet);
618                        ff_sys::av_packet_unref(self.packet);
619
620                        if send_ret < 0 && send_ret != ff_sys::error_codes::EAGAIN {
621                            return Err(DecodeError::Ffmpeg {
622                                code: send_ret,
623                                message: format!(
624                                    "Failed to send packet: {}",
625                                    ff_sys::av_error_string(send_ret)
626                                ),
627                            });
628                        }
629                    } else {
630                        // Not our stream, unref and continue
631                        ff_sys::av_packet_unref(self.packet);
632                    }
633                } else if ret == ff_sys::error_codes::EOF {
634                    // Decoder has been fully flushed
635                    self.eof = true;
636                    return Ok(None);
637                } else {
638                    return Err(DecodeError::DecodingFailed {
639                        timestamp: Some(self.position),
640                        reason: ff_sys::av_error_string(ret),
641                    });
642                }
643            }
644        }
645    }
646
647    /// Converts an AVFrame to an AudioFrame, applying sample format conversion if needed.
648    unsafe fn convert_frame_to_audio_frame(&mut self) -> Result<AudioFrame, DecodeError> {
649        // SAFETY: Caller ensures self.frame is valid
650        unsafe {
651            let nb_samples = (*self.frame).nb_samples as usize;
652            let channels = (*self.frame).ch_layout.nb_channels as u32;
653            let sample_rate = (*self.frame).sample_rate as u32;
654            let src_format = (*self.frame).format;
655
656            // Determine if we need conversion
657            let needs_conversion = self.output_format.is_some()
658                || self.output_sample_rate.is_some()
659                || self.output_channels.is_some();
660
661            if needs_conversion {
662                self.convert_with_swr(nb_samples, channels, sample_rate, src_format)
663            } else {
664                self.av_frame_to_audio_frame(self.frame)
665            }
666        }
667    }
668
669    /// Converts sample format/rate/channels using SwResample.
670    unsafe fn convert_with_swr(
671        &mut self,
672        nb_samples: usize,
673        src_channels: u32,
674        src_sample_rate: u32,
675        src_format: i32,
676    ) -> Result<AudioFrame, DecodeError> {
677        // Determine target parameters
678        let dst_format = self
679            .output_format
680            .map_or(src_format, Self::sample_format_to_av);
681        let dst_sample_rate = self.output_sample_rate.unwrap_or(src_sample_rate);
682        let dst_channels = self.output_channels.unwrap_or(src_channels);
683
684        // If no conversion is needed, return the frame directly
685        if src_format == dst_format
686            && src_sample_rate == dst_sample_rate
687            && src_channels == dst_channels
688        {
689            return unsafe { self.av_frame_to_audio_frame(self.frame) };
690        }
691
692        // Create channel layouts for source and destination
693        // SAFETY: We'll properly clean up these layouts
694        let mut src_ch_layout = unsafe { Self::create_channel_layout(src_channels) };
695        let mut dst_ch_layout = unsafe { Self::create_channel_layout(dst_channels) };
696
697        // Create SwrContext using swr_alloc_set_opts2
698        let mut swr_ctx: *mut SwrContext = ptr::null_mut();
699
700        // SAFETY: FFmpeg API call with valid parameters
701        let ret = unsafe {
702            ff_sys::swr_alloc_set_opts2(
703                &raw mut swr_ctx,
704                &raw const dst_ch_layout,
705                dst_format,
706                dst_sample_rate as i32,
707                &raw const src_ch_layout,
708                src_format,
709                src_sample_rate as i32,
710                0,
711                ptr::null_mut(),
712            )
713        };
714
715        if ret < 0 {
716            // Clean up channel layouts
717            unsafe {
718                ff_sys::av_channel_layout_uninit(&raw mut src_ch_layout);
719                ff_sys::av_channel_layout_uninit(&raw mut dst_ch_layout);
720            }
721            return Err(DecodeError::Ffmpeg {
722                code: ret,
723                message: format!(
724                    "Failed to allocate SwrContext: {}",
725                    ff_sys::av_error_string(ret)
726                ),
727            });
728        }
729
730        // Wrap in RAII guard for automatic cleanup
731        let _swr_guard = SwrContextGuard(swr_ctx);
732
733        // Initialize the resampler
734        // SAFETY: swr_ctx is valid
735        let ret = unsafe { ff_sys::swr_init(swr_ctx) };
736        if ret < 0 {
737            // Clean up channel layouts
738            unsafe {
739                ff_sys::av_channel_layout_uninit(&raw mut src_ch_layout);
740                ff_sys::av_channel_layout_uninit(&raw mut dst_ch_layout);
741            }
742            return Err(DecodeError::Ffmpeg {
743                code: ret,
744                message: format!(
745                    "Failed to initialize SwrContext: {}",
746                    ff_sys::av_error_string(ret)
747                ),
748            });
749        }
750
751        // Calculate output sample count
752        // SAFETY: swr_ctx is valid and initialized
753        let out_samples = unsafe { ff_sys::swr_get_out_samples(swr_ctx, nb_samples as i32) };
754
755        if out_samples < 0 {
756            // Clean up channel layouts
757            unsafe {
758                ff_sys::av_channel_layout_uninit(&raw mut src_ch_layout);
759                ff_sys::av_channel_layout_uninit(&raw mut dst_ch_layout);
760            }
761            return Err(DecodeError::Ffmpeg {
762                code: 0,
763                message: "Failed to calculate output sample count".to_string(),
764            });
765        }
766
767        let out_samples = out_samples as usize;
768
769        // Calculate buffer size for output
770        let dst_sample_fmt = Self::convert_sample_format(dst_format);
771        let bytes_per_sample = dst_sample_fmt.bytes_per_sample();
772        let is_planar = dst_sample_fmt.is_planar();
773
774        // Allocate output buffer
775        let buffer_size = if is_planar {
776            // For planar formats, each plane has samples * bytes_per_sample
777            out_samples * bytes_per_sample * dst_channels as usize
778        } else {
779            // For packed formats, interleaved samples
780            out_samples * bytes_per_sample * dst_channels as usize
781        };
782
783        let mut out_buffer = vec![0u8; buffer_size];
784
785        // Prepare output pointers for swr_convert
786        let mut out_ptrs = if is_planar {
787            // For planar formats, create separate pointers for each channel
788            let plane_size = out_samples * bytes_per_sample;
789            (0..dst_channels)
790                .map(|i| {
791                    let offset = i as usize * plane_size;
792                    out_buffer[offset..].as_mut_ptr()
793                })
794                .collect::<Vec<_>>()
795        } else {
796            // For packed formats, single pointer
797            vec![out_buffer.as_mut_ptr()]
798        };
799
800        // Get input data pointers from frame
801        // SAFETY: self.frame is valid
802        let in_ptrs = unsafe { (*self.frame).data };
803
804        // Convert samples using SwResample
805        // SAFETY: All pointers are valid and buffers are properly sized
806        let converted_samples = unsafe {
807            ff_sys::swr_convert(
808                swr_ctx,
809                out_ptrs.as_mut_ptr(),
810                out_samples as i32,
811                in_ptrs.as_ptr() as *mut *const u8,
812                nb_samples as i32,
813            )
814        };
815
816        // Clean up channel layouts
817        unsafe {
818            ff_sys::av_channel_layout_uninit(&raw mut src_ch_layout);
819            ff_sys::av_channel_layout_uninit(&raw mut dst_ch_layout);
820        }
821
822        if converted_samples < 0 {
823            return Err(DecodeError::Ffmpeg {
824                code: converted_samples,
825                message: format!(
826                    "Failed to convert samples: {}",
827                    ff_sys::av_error_string(converted_samples)
828                ),
829            });
830        }
831
832        // Extract timestamp from original frame
833        // SAFETY: self.frame is valid
834        let timestamp = unsafe {
835            let pts = (*self.frame).pts;
836            if pts != ff_sys::AV_NOPTS_VALUE {
837                let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
838                let time_base = (*(*stream)).time_base;
839                Timestamp::new(pts, Rational::new(time_base.num, time_base.den))
840            } else {
841                let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
842                let time_base = (*(*stream)).time_base;
843                Timestamp::zero(Rational::new(time_base.num, time_base.den))
844            }
845        };
846
847        // Create planes for AudioFrame
848        let planes = if is_planar {
849            let plane_size = converted_samples as usize * bytes_per_sample;
850            (0..dst_channels)
851                .map(|i| {
852                    let offset = i as usize * plane_size;
853                    out_buffer[offset..offset + plane_size].to_vec()
854                })
855                .collect()
856        } else {
857            // For packed formats, single plane with all data
858            vec![
859                out_buffer[..converted_samples as usize * bytes_per_sample * dst_channels as usize]
860                    .to_vec(),
861            ]
862        };
863
864        AudioFrame::new(
865            planes,
866            converted_samples as usize,
867            dst_channels,
868            dst_sample_rate,
869            dst_sample_fmt,
870            timestamp,
871        )
872        .map_err(|e| DecodeError::Ffmpeg {
873            code: 0,
874            message: format!("Failed to create AudioFrame: {e}"),
875        })
876    }
877
878    /// Converts an AVFrame to an AudioFrame.
879    unsafe fn av_frame_to_audio_frame(
880        &self,
881        frame: *const AVFrame,
882    ) -> Result<AudioFrame, DecodeError> {
883        // SAFETY: Caller ensures frame and format_ctx are valid
884        unsafe {
885            let nb_samples = (*frame).nb_samples as usize;
886            let channels = (*frame).ch_layout.nb_channels as u32;
887            let sample_rate = (*frame).sample_rate as u32;
888            let format = Self::convert_sample_format((*frame).format);
889
890            // Extract timestamp
891            let pts = (*frame).pts;
892            let timestamp = if pts != ff_sys::AV_NOPTS_VALUE {
893                let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
894                let time_base = (*(*stream)).time_base;
895                Timestamp::new(
896                    pts as i64,
897                    Rational::new(time_base.num as i32, time_base.den as i32),
898                )
899            } else {
900                Timestamp::default()
901            };
902
903            // Convert frame to planes
904            let planes = Self::extract_planes(frame, nb_samples, channels, format)?;
905
906            AudioFrame::new(planes, nb_samples, channels, sample_rate, format, timestamp).map_err(
907                |e| DecodeError::Ffmpeg {
908                    code: 0,
909                    message: format!("Failed to create AudioFrame: {e}"),
910                },
911            )
912        }
913    }
914
915    /// Extracts planes from an AVFrame.
916    unsafe fn extract_planes(
917        frame: *const AVFrame,
918        nb_samples: usize,
919        channels: u32,
920        format: SampleFormat,
921    ) -> Result<Vec<Vec<u8>>, DecodeError> {
922        // SAFETY: Caller ensures frame is valid and format matches actual frame format
923        unsafe {
924            let mut planes = Vec::new();
925            let bytes_per_sample = format.bytes_per_sample();
926
927            if format.is_planar() {
928                // Planar: one plane per channel
929                for ch in 0..channels as usize {
930                    let plane_size = nb_samples * bytes_per_sample;
931                    let mut plane_data = vec![0u8; plane_size];
932
933                    let src_ptr = (*frame).data[ch];
934                    std::ptr::copy_nonoverlapping(src_ptr, plane_data.as_mut_ptr(), plane_size);
935
936                    planes.push(plane_data);
937                }
938            } else {
939                // Packed: single plane with interleaved samples
940                let plane_size = nb_samples * channels as usize * bytes_per_sample;
941                let mut plane_data = vec![0u8; plane_size];
942
943                let src_ptr = (*frame).data[0];
944                std::ptr::copy_nonoverlapping(src_ptr, plane_data.as_mut_ptr(), plane_size);
945
946                planes.push(plane_data);
947            }
948
949            Ok(planes)
950        }
951    }
952
953    /// Converts our `SampleFormat` to FFmpeg `AVSampleFormat`.
954    fn sample_format_to_av(format: SampleFormat) -> AVSampleFormat {
955        match format {
956            SampleFormat::U8 => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_U8,
957            SampleFormat::I16 => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S16,
958            SampleFormat::I32 => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S32,
959            SampleFormat::F32 => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_FLT,
960            SampleFormat::F64 => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_DBL,
961            SampleFormat::U8p => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_U8P,
962            SampleFormat::I16p => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S16P,
963            SampleFormat::I32p => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_S32P,
964            SampleFormat::F32p => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_FLTP,
965            SampleFormat::F64p => ff_sys::AVSampleFormat_AV_SAMPLE_FMT_DBLP,
966            _ => {
967                log::warn!(
968                    "sample_format has no AV mapping, falling back to F32 format={format:?} fallback=AV_SAMPLE_FMT_FLT"
969                );
970                ff_sys::AVSampleFormat_AV_SAMPLE_FMT_FLT
971            }
972        }
973    }
974
975    /// Returns the current playback position.
976    pub(crate) fn position(&self) -> Duration {
977        self.position
978    }
979
980    /// Returns whether end of file has been reached.
981    pub(crate) fn is_eof(&self) -> bool {
982        self.eof
983    }
984
985    /// Converts a `Duration` to a presentation timestamp (PTS) in stream time_base units.
986    fn duration_to_pts(&self, duration: Duration) -> i64 {
987        // SAFETY: format_ctx and stream_index are valid (owned by AudioDecoderInner)
988        let time_base = unsafe {
989            let stream = (*self.format_ctx).streams.add(self.stream_index as usize);
990            (*(*stream)).time_base
991        };
992
993        // Convert: duration (seconds) * (time_base.den / time_base.num) = PTS
994        let time_base_f64 = time_base.den as f64 / time_base.num as f64;
995        (duration.as_secs_f64() * time_base_f64) as i64
996    }
997
998    /// Seeks to a specified position in the audio stream.
999    ///
1000    /// # Arguments
1001    ///
1002    /// * `position` - Target position to seek to.
1003    /// * `mode` - Seek mode (Keyframe, Exact, or Backward).
1004    ///
1005    /// # Errors
1006    ///
1007    /// Returns [`DecodeError::SeekFailed`] if the seek operation fails.
1008    pub(crate) fn seek(
1009        &mut self,
1010        position: Duration,
1011        mode: crate::SeekMode,
1012    ) -> Result<(), DecodeError> {
1013        use crate::SeekMode;
1014
1015        let timestamp = self.duration_to_pts(position);
1016        let flags = ff_sys::avformat::seek_flags::BACKWARD;
1017
1018        // 1. Clear any pending packet and frame
1019        // SAFETY: packet and frame are valid (owned by AudioDecoderInner)
1020        unsafe {
1021            ff_sys::av_packet_unref(self.packet);
1022            ff_sys::av_frame_unref(self.frame);
1023        }
1024
1025        // 2. Seek in the format context
1026        // SAFETY: format_ctx and stream_index are valid
1027        unsafe {
1028            ff_sys::avformat::seek_frame(self.format_ctx, self.stream_index, timestamp, flags)
1029                .map_err(|e| DecodeError::SeekFailed {
1030                    target: position,
1031                    reason: ff_sys::av_error_string(e),
1032                })?;
1033        }
1034
1035        // 3. Flush decoder buffers
1036        // SAFETY: codec_ctx is valid (owned by AudioDecoderInner)
1037        unsafe {
1038            ff_sys::avcodec::flush_buffers(self.codec_ctx);
1039        }
1040
1041        // 4. Drain any remaining frames from the decoder after flush
1042        // SAFETY: codec_ctx and frame are valid
1043        unsafe {
1044            loop {
1045                let ret = ff_sys::avcodec_receive_frame(self.codec_ctx, self.frame);
1046                if ret == ff_sys::error_codes::EAGAIN || ret == ff_sys::error_codes::EOF {
1047                    break;
1048                } else if ret == 0 {
1049                    ff_sys::av_frame_unref(self.frame);
1050                } else {
1051                    break;
1052                }
1053            }
1054        }
1055
1056        // 5. Reset internal state
1057        self.eof = false;
1058
1059        // 6. For exact mode, skip frames to reach exact position
1060        if mode == SeekMode::Exact {
1061            self.skip_to_exact(position)?;
1062        }
1063        // For Keyframe/Backward modes, we're already at the keyframe after av_seek_frame
1064
1065        Ok(())
1066    }
1067
1068    /// Skips frames until reaching the exact target position.
1069    ///
1070    /// This is used by [`Self::seek`] when `SeekMode::Exact` is specified.
1071    ///
1072    /// # Arguments
1073    ///
1074    /// * `target` - The exact target position.
1075    fn skip_to_exact(&mut self, target: Duration) -> Result<(), DecodeError> {
1076        // Decode frames until we reach or pass the target
1077        while let Some(frame) = self.decode_one()? {
1078            let frame_time = frame.timestamp().as_duration();
1079            if frame_time >= target {
1080                // We've reached the target position
1081                break;
1082            }
1083            // Continue decoding to get closer (frames are automatically dropped)
1084        }
1085        Ok(())
1086    }
1087
1088    /// Flushes the decoder's internal buffers.
1089    pub(crate) fn flush(&mut self) {
1090        // SAFETY: codec_ctx is valid and owned by this instance
1091        unsafe {
1092            ff_sys::avcodec::flush_buffers(self.codec_ctx);
1093        }
1094        self.eof = false;
1095    }
1096}
1097
1098impl Drop for AudioDecoderInner {
1099    fn drop(&mut self) {
1100        // Free SwResample context if allocated
1101        if let Some(swr_ctx) = self.swr_ctx {
1102            // SAFETY: swr_ctx is valid and owned by this instance
1103            unsafe {
1104                // swr_free frees a SwrContext
1105                ff_sys::swr_free(&mut (swr_ctx as *mut _));
1106            }
1107        }
1108
1109        // Free frame and packet
1110        if !self.frame.is_null() {
1111            // SAFETY: self.frame is valid and owned by this instance
1112            unsafe {
1113                ff_sys::av_frame_free(&mut (self.frame as *mut _));
1114            }
1115        }
1116
1117        if !self.packet.is_null() {
1118            // SAFETY: self.packet is valid and owned by this instance
1119            unsafe {
1120                ff_sys::av_packet_free(&mut (self.packet as *mut _));
1121            }
1122        }
1123
1124        // Free codec context
1125        if !self.codec_ctx.is_null() {
1126            // SAFETY: self.codec_ctx is valid and owned by this instance
1127            unsafe {
1128                ff_sys::avcodec::free_context(&mut (self.codec_ctx as *mut _));
1129            }
1130        }
1131
1132        // Close format context
1133        if !self.format_ctx.is_null() {
1134            // SAFETY: self.format_ctx is valid and owned by this instance
1135            unsafe {
1136                ff_sys::avformat::close_input(&mut (self.format_ctx as *mut _));
1137            }
1138        }
1139    }
1140}
1141
1142// SAFETY: AudioDecoderInner manages FFmpeg contexts which are thread-safe when not shared.
1143// We don't expose mutable access across threads, so Send is safe.
1144unsafe impl Send for AudioDecoderInner {}
1145
1146#[cfg(test)]
1147#[allow(unsafe_code)]
1148mod tests {
1149    use ff_format::channel::ChannelLayout;
1150
1151    use super::AudioDecoderInner;
1152
1153    /// Constructs an `AVChannelLayout` with `AV_CHANNEL_ORDER_NATIVE` and the given mask.
1154    fn native_layout(mask: u64, nb_channels: i32) -> ff_sys::AVChannelLayout {
1155        ff_sys::AVChannelLayout {
1156            order: ff_sys::AVChannelOrder_AV_CHANNEL_ORDER_NATIVE,
1157            nb_channels,
1158            u: ff_sys::AVChannelLayout__bindgen_ty_1 { mask },
1159            opaque: std::ptr::null_mut(),
1160        }
1161    }
1162
1163    /// Constructs an `AVChannelLayout` with `AV_CHANNEL_ORDER_UNSPEC`.
1164    fn unspec_layout(nb_channels: i32) -> ff_sys::AVChannelLayout {
1165        ff_sys::AVChannelLayout {
1166            order: ff_sys::AVChannelOrder_AV_CHANNEL_ORDER_UNSPEC,
1167            nb_channels,
1168            u: ff_sys::AVChannelLayout__bindgen_ty_1 { mask: 0 },
1169            opaque: std::ptr::null_mut(),
1170        }
1171    }
1172
1173    #[test]
1174    fn native_mask_mono() {
1175        let layout = native_layout(0x4, 1);
1176        assert_eq!(
1177            AudioDecoderInner::convert_channel_layout(&layout, 1),
1178            ChannelLayout::Mono
1179        );
1180    }
1181
1182    #[test]
1183    fn native_mask_stereo() {
1184        let layout = native_layout(0x3, 2);
1185        assert_eq!(
1186            AudioDecoderInner::convert_channel_layout(&layout, 2),
1187            ChannelLayout::Stereo
1188        );
1189    }
1190
1191    #[test]
1192    fn native_mask_stereo2_1() {
1193        let layout = native_layout(0x103, 3);
1194        assert_eq!(
1195            AudioDecoderInner::convert_channel_layout(&layout, 3),
1196            ChannelLayout::Stereo2_1
1197        );
1198    }
1199
1200    #[test]
1201    fn native_mask_surround3_0() {
1202        let layout = native_layout(0x7, 3);
1203        assert_eq!(
1204            AudioDecoderInner::convert_channel_layout(&layout, 3),
1205            ChannelLayout::Surround3_0
1206        );
1207    }
1208
1209    #[test]
1210    fn native_mask_quad() {
1211        let layout = native_layout(0x33, 4);
1212        assert_eq!(
1213            AudioDecoderInner::convert_channel_layout(&layout, 4),
1214            ChannelLayout::Quad
1215        );
1216    }
1217
1218    #[test]
1219    fn native_mask_surround5_0() {
1220        let layout = native_layout(0x37, 5);
1221        assert_eq!(
1222            AudioDecoderInner::convert_channel_layout(&layout, 5),
1223            ChannelLayout::Surround5_0
1224        );
1225    }
1226
1227    #[test]
1228    fn native_mask_surround5_1() {
1229        let layout = native_layout(0x3F, 6);
1230        assert_eq!(
1231            AudioDecoderInner::convert_channel_layout(&layout, 6),
1232            ChannelLayout::Surround5_1
1233        );
1234    }
1235
1236    #[test]
1237    fn native_mask_surround6_1() {
1238        let layout = native_layout(0x13F, 7);
1239        assert_eq!(
1240            AudioDecoderInner::convert_channel_layout(&layout, 7),
1241            ChannelLayout::Surround6_1
1242        );
1243    }
1244
1245    #[test]
1246    fn native_mask_surround7_1() {
1247        let layout = native_layout(0x63F, 8);
1248        assert_eq!(
1249            AudioDecoderInner::convert_channel_layout(&layout, 8),
1250            ChannelLayout::Surround7_1
1251        );
1252    }
1253
1254    #[test]
1255    fn native_mask_unknown_falls_back_to_from_channels() {
1256        // mask=0x1 is not a standard layout; should fall back to from_channels(2)
1257        let layout = native_layout(0x1, 2);
1258        assert_eq!(
1259            AudioDecoderInner::convert_channel_layout(&layout, 2),
1260            ChannelLayout::from_channels(2)
1261        );
1262    }
1263
1264    #[test]
1265    fn non_native_order_falls_back_to_from_channels() {
1266        let layout = unspec_layout(6);
1267        assert_eq!(
1268            AudioDecoderInner::convert_channel_layout(&layout, 6),
1269            ChannelLayout::from_channels(6)
1270        );
1271    }
1272}
ff_decode/audio/decoder_inner.rs

ff_decode/audio/
decoder_inner.rs