#include <libswresample/swresample.h>
typedef struct AudioResampler {
struct SwrContext* resample_context;
struct AVFrame* tmp_frame;
struct AVFrame* output_frame;
#ifdef FFW_FEATURE_CHANNEL_LAYOUT_V2
AVChannelLayout target_channel_layout;
#else
uint64_t target_channel_layout;
#endif
int target_channels;
int target_sample_format;
int target_sample_rate;
int target_frame_samples;
int source_sample_rate;
int tmp_frame_capacity;
int64_t source_samples;
int64_t expected_source_pts;
int64_t output_samples;
int64_t input_pts_offset;
int64_t output_pts_offset;
int offset;
int flush;
} AudioResampler;
static AVFrame* alloc_frame(
#ifdef FFW_FEATURE_CHANNEL_LAYOUT_V2
const AVChannelLayout* channel_layout,
#else
const uint64_t* channel_layout,
#endif
int sample_fmt,
int sample_rate,
int nb_samples)
{
AVFrame* frame = av_frame_alloc();
if (!frame) {
return NULL;
}
frame->format = sample_fmt;
frame->sample_rate = sample_rate;
frame->nb_samples = nb_samples;
#ifdef FFW_FEATURE_CHANNEL_LAYOUT_V2
if (av_channel_layout_copy(&frame->ch_layout, channel_layout) != 0) {
goto err;
}
#else
frame->channel_layout = *channel_layout;
frame->channels = av_get_channel_layout_nb_channels(*channel_layout);
#endif
if (av_frame_get_buffer(frame, 0) != 0) {
goto err;
}
return frame;
err:
av_frame_free(&frame);
return NULL;
}
void ffw_audio_resampler_free(AudioResampler* resampler);
#ifdef FFW_FEATURE_CHANNEL_LAYOUT_V2
AudioResampler* ffw_audio_resampler_new(
const AVChannelLayout* target_channel_layout,
int target_sample_format,
int target_sample_rate,
int target_frame_samples,
const AVChannelLayout* source_channel_layout,
int source_sample_format,
int source_sample_rate) {
AudioResampler* res;
int ret;
if (!(res = calloc(1, sizeof(AudioResampler)))) {
return NULL;
}
if (av_channel_layout_copy(&res->target_channel_layout, target_channel_layout) != 0) {
goto err;
}
res->target_channels = target_channel_layout->nb_channels;
res->target_sample_format = target_sample_format;
res->target_sample_rate = target_sample_rate;
res->target_frame_samples = target_frame_samples;
res->source_sample_rate = source_sample_rate;
ret = swr_alloc_set_opts2(
&res->resample_context,
(AVChannelLayout*)target_channel_layout,
target_sample_format,
target_sample_rate,
(AVChannelLayout*)source_channel_layout,
source_sample_format,
source_sample_rate,
0,
NULL);
if (ret != 0 || !res->resample_context) {
goto err;
}
if (swr_init(res->resample_context) < 0) {
goto err;
}
return res;
err:
ffw_audio_resampler_free(res);
return NULL;
}
#else
AudioResampler* ffw_audio_resampler_new(
const uint64_t* target_channel_layout,
int target_sample_format,
int target_sample_rate,
int target_frame_samples,
const uint64_t* source_channel_layout,
int source_sample_format,
int source_sample_rate) {
AudioResampler* res = calloc(1, sizeof(AudioResampler));
if (!res) {
return NULL;
}
res->target_channel_layout = *target_channel_layout;
res->target_channels = av_get_channel_layout_nb_channels(*target_channel_layout);
res->target_sample_format = target_sample_format;
res->target_sample_rate = target_sample_rate;
res->target_frame_samples = target_frame_samples;
res->source_sample_rate = source_sample_rate;
res->resample_context = swr_alloc_set_opts(
NULL,
*target_channel_layout,
target_sample_format,
target_sample_rate,
*source_channel_layout,
source_sample_format,
source_sample_rate,
0,
NULL);
if (!res->resample_context) {
goto err;
}
if (swr_init(res->resample_context) < 0) {
goto err;
}
return res;
err:
ffw_audio_resampler_free(res);
return NULL;
}
#endif
int ffw_audio_resampler_push_frame(AudioResampler* resampler, const AVFrame* frame) {
int required_capacity;
int ret;
int64_t pts_delta;
if (resampler->tmp_frame && (resampler->offset < resampler->tmp_frame->nb_samples)) {
return 0;
}
if (frame) {
if (resampler->source_samples == 0) {
resampler->expected_source_pts = frame->pts;
resampler->input_pts_offset = frame->pts;
resampler->output_pts_offset = frame->pts
* resampler->target_sample_rate
/ resampler->source_sample_rate;
}
pts_delta = frame->pts - resampler->expected_source_pts;
if (pts_delta > 0) {
ret = swr_inject_silence(resampler->resample_context, pts_delta);
if (ret < 0) {
return ret;
}
} else if (pts_delta < 0) {
ret = swr_drop_output(
resampler->resample_context,
-pts_delta * resampler->target_sample_rate / resampler->source_sample_rate);
if (ret < 0) {
return ret;
}
}
resampler->source_samples += frame->nb_samples + pts_delta;
resampler->expected_source_pts = resampler->input_pts_offset + resampler->source_samples;
required_capacity = swr_get_out_samples(
resampler->resample_context,
frame->nb_samples);
} else {
resampler->flush = 1;
required_capacity = swr_get_delay(
resampler->resample_context,
resampler->target_sample_rate) + 3;
}
if (required_capacity < 0) {
return required_capacity;
}
if (!resampler->tmp_frame
|| !av_frame_is_writable(resampler->tmp_frame)
|| (required_capacity > resampler->tmp_frame_capacity)) {
av_frame_free(&resampler->tmp_frame);
resampler->tmp_frame = alloc_frame(
&resampler->target_channel_layout,
resampler->target_sample_format,
resampler->target_sample_rate,
required_capacity);
if (!resampler->tmp_frame) {
return -1;
}
resampler->tmp_frame_capacity = required_capacity;
}
resampler->tmp_frame->nb_samples = 0;
resampler->offset = 0;
ret = swr_convert_frame(resampler->resample_context, resampler->tmp_frame, frame);
if (ret < 0) {
return ret;
}
resampler->tmp_frame->pts = resampler->output_samples + resampler->output_pts_offset;
resampler->output_samples += resampler->tmp_frame->nb_samples;
return 1;
}
int ffw_audio_resampler_take_frame(AudioResampler* resampler, AVFrame** frame) {
int available_samples;
int required_samples;
int copy_samples;
if (!resampler->tmp_frame) {
return 0;
}
if (!resampler->target_frame_samples) {
resampler->flush = 0;
if (resampler->tmp_frame->nb_samples > 0) {
*frame = av_frame_clone(resampler->tmp_frame);
resampler->tmp_frame->nb_samples = 0;
return 1;
} else {
return 0;
}
}
if (!resampler->output_frame || !av_frame_is_writable(resampler->output_frame)) {
av_frame_free(&resampler->output_frame);
resampler->output_frame = alloc_frame(
&resampler->target_channel_layout,
resampler->target_sample_format,
resampler->target_sample_rate,
resampler->target_frame_samples);
if (!resampler->output_frame) {
return -1;
}
resampler->output_frame->nb_samples = 0;
}
required_samples = resampler->target_frame_samples - resampler->output_frame->nb_samples;
available_samples = resampler->tmp_frame->nb_samples - resampler->offset;
if (available_samples > required_samples) {
copy_samples = required_samples;
} else {
copy_samples = available_samples;
}
if (copy_samples > 0) {
av_samples_copy(
resampler->output_frame->extended_data,
resampler->tmp_frame->extended_data,
resampler->output_frame->nb_samples,
resampler->offset,
copy_samples,
resampler->target_channels,
resampler->target_sample_format);
if (!resampler->output_frame->nb_samples) {
resampler->output_frame->pts = resampler->tmp_frame->pts + resampler->offset;
}
resampler->offset += copy_samples;
resampler->output_frame->nb_samples += copy_samples;
}
if (!resampler->flush) {
if (resampler->output_frame->nb_samples < resampler->target_frame_samples) {
return 0;
}
}
*frame = av_frame_clone(resampler->output_frame);
resampler->output_frame->nb_samples = 0;
if (resampler->offset >= resampler->tmp_frame->nb_samples) {
resampler->flush = 0;
}
return 1;
}
void ffw_audio_resampler_free(AudioResampler* resampler) {
if (!resampler) {
return;
}
av_frame_free(&resampler->tmp_frame);
av_frame_free(&resampler->output_frame);
swr_free(&resampler->resample_context);
#ifdef FFW_FEATURE_CHANNEL_LAYOUT_V2
av_channel_layout_uninit(&resampler->target_channel_layout);
#endif
free(resampler);
}