Skip to main content

lumen_ffmpeg/encode/
audio.rs

1use std::ptr;
2
3use crate::audio::{AudioFrame, SampleFormat};
4use crate::ffi::{self, AvFrame, AvPacket, sys};
5use crate::{FfmpegError, Result};
6use sys::AVMediaType::AVMEDIA_TYPE_AUDIO;
7use sys::AVSampleFormat::{AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP};
8
9use super::codec::{find_audio_encoder, select_audio_sample_format};
10use super::common::refresh_stream_time_base;
11use super::output::OutputContext;
12
13#[derive(Debug, Clone)]
14pub struct AudioEncoderConfig {
15    pub sample_rate: u32,
16    pub channels: u16,
17    pub bit_rate: i64,
18    pub encoder_name: Option<String>,
19}
20
21impl AudioEncoderConfig {
22    pub fn aac(sample_rate: u32, channels: u16) -> Self {
23        Self {
24            sample_rate,
25            channels,
26            bit_rate: 192_000,
27            encoder_name: None,
28        }
29    }
30}
31
32pub struct AudioEncoder {
33    stream_index: usize,
34    stream_time_base: sys::AVRational,
35    context: *mut sys::AVCodecContext,
36    sample_format: sys::AVSampleFormat,
37    sample_rate: u32,
38    channels: u16,
39    frame_size: usize,
40    pending: Vec<f32>,
41    next_pts: i64,
42}
43
44unsafe impl Send for AudioEncoder {}
45
46impl AudioEncoder {
47    pub fn create(output: &mut OutputContext, config: AudioEncoderConfig) -> Result<Self> {
48        if config.sample_rate == 0 || config.channels == 0 {
49            return Err(FfmpegError::new(
50                "AudioEncoder::create",
51                "sample_rate and channels must be greater than zero",
52            ));
53        }
54        let codec = find_audio_encoder(&config)?;
55        let sample_format = select_audio_sample_format(codec)?;
56        let stream = unsafe { sys::avformat_new_stream(output.ptr, ptr::null()) };
57        if stream.is_null() {
58            return Err(FfmpegError::new(
59                "avformat_new_stream",
60                "failed to allocate audio output stream",
61            ));
62        }
63        let context = unsafe { sys::avcodec_alloc_context3(codec) };
64        if context.is_null() {
65            return Err(FfmpegError::new(
66                "avcodec_alloc_context3",
67                "failed to allocate audio encoder context",
68            ));
69        }
70        let time_base = sys::AVRational {
71            num: 1,
72            den: config.sample_rate as i32,
73        };
74        unsafe {
75            (*context).codec_id = sys::AVCodecID::AV_CODEC_ID_AAC;
76            (*context).codec_type = AVMEDIA_TYPE_AUDIO;
77            (*context).sample_rate = config.sample_rate as i32;
78            (*context).sample_fmt = sample_format;
79            (*context).bit_rate = config.bit_rate;
80            (*context).time_base = time_base;
81            sys::av_channel_layout_default(&mut (*context).ch_layout, config.channels as i32);
82            if ((*(*output.ptr).oformat).flags & sys::AVFMT_GLOBALHEADER) != 0 {
83                (*context).flags |= sys::AV_CODEC_FLAG_GLOBAL_HEADER as i32;
84            }
85            ffi::check(
86                sys::avcodec_open2(context, codec, ptr::null_mut()),
87                "avcodec_open2",
88            )?;
89            ffi::check(
90                sys::avcodec_parameters_from_context((*stream).codecpar, context),
91                "avcodec_parameters_from_context",
92            )?;
93            (*stream).time_base = time_base;
94        }
95        let frame_size = unsafe { (*context).frame_size.max(0) as usize };
96        let frame_size = frame_size.max(1);
97        Ok(Self {
98            stream_index: unsafe { (*stream).index as usize },
99            stream_time_base: time_base,
100            context,
101            sample_format,
102            sample_rate: config.sample_rate,
103            channels: config.channels,
104            frame_size,
105            pending: Vec::with_capacity(frame_size.saturating_mul(config.channels as usize)),
106            next_pts: 0,
107        })
108    }
109
110    pub(in crate::encode) fn refresh_stream_time_base(
111        &mut self,
112        output: &OutputContext,
113    ) -> Result<()> {
114        self.stream_time_base = refresh_stream_time_base(
115            output,
116            self.stream_index,
117            "AudioEncoder::refresh_stream_time_base",
118        )?;
119        Ok(())
120    }
121
122    pub(in crate::encode) fn send_audio_frame(
123        &mut self,
124        output: &mut OutputContext,
125        frame: &AudioFrame,
126    ) -> Result<()> {
127        self.validate_audio_frame(frame)?;
128        self.pending.extend_from_slice(&frame.interleaved_f32);
129        let samples_per_packet = self.frame_size.saturating_mul(self.channels as usize);
130        while self.pending.len() >= samples_per_packet {
131            let chunk: Vec<f32> = self.pending.drain(..samples_per_packet).collect();
132            self.send_samples(output, &chunk, self.frame_size)?;
133        }
134        Ok(())
135    }
136
137    fn validate_audio_frame(&self, frame: &AudioFrame) -> Result<()> {
138        if frame.sample_rate != self.sample_rate
139            || frame.channels != self.channels
140            || frame.sample_format != SampleFormat::F32
141        {
142            return Err(FfmpegError::new(
143                "AudioEncoder::send_audio_frame",
144                format!(
145                    "expected {} Hz, {} channel interleaved f32 audio; got {} Hz, {} channel {:?}",
146                    self.sample_rate,
147                    self.channels,
148                    frame.sample_rate,
149                    frame.channels,
150                    frame.sample_format
151                ),
152            ));
153        }
154        let channels = self.channels as usize;
155        if frame.interleaved_f32.len() != frame.samples.saturating_mul(channels) {
156            return Err(FfmpegError::new(
157                "AudioEncoder::send_audio_frame",
158                "audio sample count does not match interleaved buffer length",
159            ));
160        }
161        Ok(())
162    }
163
164    pub(in crate::encode) fn flush(&mut self, output: &mut OutputContext) -> Result<()> {
165        if !self.pending.is_empty() {
166            let channels = self.channels as usize;
167            let samples = self.pending.len().div_ceil(channels);
168            let mut chunk = std::mem::take(&mut self.pending);
169            chunk.resize(samples.saturating_mul(channels), 0.0);
170            self.send_samples(output, &chunk, samples)?;
171        }
172        self.send_frame(output, ptr::null())
173    }
174
175    fn send_samples(
176        &mut self,
177        output: &mut OutputContext,
178        samples: &[f32],
179        sample_count: usize,
180    ) -> Result<()> {
181        let mut frame = AvFrame::new()?;
182        unsafe {
183            (*frame.as_mut_ptr()).format = self.sample_format as i32;
184            (*frame.as_mut_ptr()).nb_samples = sample_count as i32;
185            (*frame.as_mut_ptr()).sample_rate = self.sample_rate as i32;
186            sys::av_channel_layout_default(
187                &mut (*frame.as_mut_ptr()).ch_layout,
188                self.channels as i32,
189            );
190            ffi::check(
191                sys::av_frame_get_buffer(frame.as_mut_ptr(), 0),
192                "av_frame_get_buffer",
193            )?;
194            ffi::check(
195                sys::av_frame_make_writable(frame.as_mut_ptr()),
196                "av_frame_make_writable",
197            )?;
198            fill_audio_frame(frame.as_mut_ptr(), samples, sample_count, self.channels)?;
199            (*frame.as_mut_ptr()).pts = self.next_pts;
200        }
201        self.next_pts = self.next_pts.saturating_add(sample_count as i64);
202        self.send_frame(output, frame.as_ptr())
203    }
204
205    fn send_frame(&mut self, output: &mut OutputContext, frame: *const sys::AVFrame) -> Result<()> {
206        unsafe {
207            ffi::check(
208                sys::avcodec_send_frame(self.context, frame),
209                "avcodec_send_frame",
210            )?;
211        }
212        loop {
213            let mut packet = AvPacket::new()?;
214            let result = unsafe { sys::avcodec_receive_packet(self.context, packet.as_mut_ptr()) };
215            if result == sys::AVERROR(libc::EAGAIN) || result == sys::AVERROR_EOF {
216                break;
217            }
218            if result < 0 {
219                return Err(ffi::error_from_code("avcodec_receive_packet", result));
220            }
221            unsafe {
222                (*packet.as_mut_ptr()).stream_index = self.stream_index as i32;
223                sys::av_packet_rescale_ts(
224                    packet.as_mut_ptr(),
225                    (*self.context).time_base,
226                    self.stream_time_base,
227                );
228                ffi::check(
229                    sys::av_interleaved_write_frame(output.ptr, packet.as_mut_ptr()),
230                    "av_interleaved_write_frame",
231                )
232                .map_err(|error| error.with_path(output.path().to_string()))?;
233            }
234        }
235        Ok(())
236    }
237}
238
239impl Drop for AudioEncoder {
240    fn drop(&mut self) {
241        unsafe {
242            sys::avcodec_free_context(&mut self.context);
243        }
244    }
245}
246
247unsafe fn fill_audio_frame(
248    frame: *mut sys::AVFrame,
249    samples: &[f32],
250    sample_count: usize,
251    channels: u16,
252) -> Result<()> {
253    let channels = channels as usize;
254    let format = unsafe { std::mem::transmute::<i32, sys::AVSampleFormat>((*frame).format) };
255    match format {
256        AV_SAMPLE_FMT_FLTP => {
257            for channel in 0..channels {
258                let plane = unsafe { (*frame).data[channel] }.cast::<f32>();
259                if plane.is_null() {
260                    return Err(FfmpegError::new(
261                        "AudioEncoder::fill_audio_frame",
262                        "audio frame plane is null",
263                    ));
264                }
265                for sample in 0..sample_count {
266                    unsafe {
267                        *plane.add(sample) = samples[sample.saturating_mul(channels) + channel];
268                    }
269                }
270            }
271            Ok(())
272        }
273        AV_SAMPLE_FMT_FLT => {
274            let data = unsafe { (*frame).data[0] }.cast::<f32>();
275            if data.is_null() {
276                return Err(FfmpegError::new(
277                    "AudioEncoder::fill_audio_frame",
278                    "audio frame data is null",
279                ));
280            }
281            unsafe {
282                std::ptr::copy_nonoverlapping(samples.as_ptr(), data, samples.len());
283            }
284            Ok(())
285        }
286        _ => Err(FfmpegError::new(
287            "AudioEncoder::fill_audio_frame",
288            "unsupported audio encoder sample format",
289        )),
290    }
291}