#![cfg_attr(
not(any(feature = "asm", feature = "c-ffi", feature = "partial_asm")),
forbid(unsafe_code)
)]
#![deny(unsafe_op_in_unsafe_fn)]
#[cfg(feature = "bitdepth_8")]
use crate::include::common::bitdepth::BitDepth8;
#[cfg(feature = "bitdepth_16")]
use crate::include::common::bitdepth::BitDepth16;
use crate::include::common::validate::validate_input;
use crate::include::dav1d::data::Rav1dData;
use crate::include::dav1d::dav1d::Rav1dDecodeFrameType;
use crate::include::dav1d::dav1d::Rav1dInloopFilterType;
use crate::include::dav1d::dav1d::Rav1dSettings;
use crate::include::dav1d::headers::Rav1dFilmGrainData;
use crate::include::dav1d::picture::Rav1dPicture;
use crate::src::cpu::rav1d_init_cpu;
use crate::src::cpu::rav1d_num_logical_processors;
use crate::src::decode::rav1d_decode_frame_exit;
use crate::src::error::Rav1dError::EAGAIN;
use crate::src::error::Rav1dError::EGeneric;
use crate::src::error::Rav1dError::EINVAL;
use crate::src::error::Rav1dResult;
use crate::src::extensions::OptionError as _;
use crate::src::fg_apply;
use crate::src::internal::Rav1dBitDepthDSPContext;
use crate::src::internal::Rav1dContext;
use crate::src::internal::Rav1dContextFrameThread;
use crate::src::internal::Rav1dContextTaskThread;
use crate::src::internal::Rav1dContextTaskType;
use crate::src::internal::Rav1dFrameContext;
use crate::src::internal::Rav1dState;
use crate::src::internal::Rav1dTaskContext;
use crate::src::internal::Rav1dTaskContextTaskThread;
use crate::src::internal::TaskThreadData;
use crate::src::iter::wrapping_iter;
use crate::src::log::Rav1dLog as _;
use crate::src::log::Rav1dLogger;
use crate::src::mem::try_arc;
use crate::src::obu::rav1d_parse_obus;
use crate::src::picture::PictureFlags;
use crate::src::picture::rav1d_picture_alloc_copy;
#[cfg(feature = "c-ffi")]
use crate::src::send_sync_non_null::SendSyncNonNull;
use crate::src::thread_task::FRAME_ERROR;
use crate::src::thread_task::rav1d_task_delayed_fg;
use crate::src::thread_task::rav1d_worker_task;
use parking_lot::Mutex;
use std::cmp;
#[cfg(feature = "c-ffi")]
use std::ffi::CStr;
#[cfg(feature = "c-ffi")]
use std::ffi::c_void;
use std::mem;
use std::sync::Arc;
use std::sync::Once;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::Ordering;
use std::thread;
use std::thread::JoinHandle;
#[cold]
fn init_internal() {
rav1d_init_cpu();
}
#[cfg(feature = "c-ffi")]
pub(crate) const DAV1D_VERSION: &CStr = c"966d63c1";
#[cfg(feature = "c-ffi")]
pub const DAV1D_API_VERSION_MAJOR: u8 = 7;
#[cfg(feature = "c-ffi")]
pub const DAV1D_API_VERSION_MINOR: u8 = 0;
#[cfg(feature = "c-ffi")]
pub const DAV1D_API_VERSION_PATCH: u8 = 0;
impl Default for Rav1dSettings {
fn default() -> Self {
Self {
n_threads: 0,
max_frame_delay: 0,
apply_grain: true,
operating_point: 0,
all_layers: true,
frame_size_limit: 0,
allocator: Default::default(),
logger: Some(Rav1dLogger::default()),
strict_std_compliance: false,
output_invisible_frames: false,
inloop_filters: Rav1dInloopFilterType::all(),
decode_frame_type: Rav1dDecodeFrameType::All,
}
}
}
struct NumThreads {
n_tc: usize,
n_fc: usize,
}
#[cold]
fn get_num_threads(s: &Rav1dSettings) -> NumThreads {
let n_tc = if s.n_threads != 0 {
s.n_threads as usize
} else {
rav1d_num_logical_processors().get().clamp(1, 256)
};
#[allow(unused_variables)]
let n_fc = if s.max_frame_delay != 0 {
cmp::min(s.max_frame_delay as usize, n_tc)
} else {
cmp::min((n_tc as f64).sqrt().ceil() as usize, 8)
};
#[cfg(not(feature = "unchecked"))]
let n_fc = 1;
NumThreads { n_fc, n_tc }
}
#[cfg(feature = "c-ffi")]
#[cold]
pub(crate) fn rav1d_get_frame_delay(s: &Rav1dSettings) -> Rav1dResult<usize> {
validate_input!((s.n_threads >= 0 && s.n_threads <= 256, EINVAL))?;
validate_input!((s.max_frame_delay >= 0 && s.max_frame_delay <= 256, EINVAL))?;
let NumThreads { n_tc: _, n_fc } = get_num_threads(s);
Ok(n_fc)
}
#[cold]
pub(crate) fn rav1d_open(
s: &Rav1dSettings,
) -> Rav1dResult<(Arc<Rav1dContext>, Vec<JoinHandle<()>>)> {
static initted: Once = Once::new();
initted.call_once(|| init_internal());
validate_input!((s.n_threads >= 0 && s.n_threads <= 256, EINVAL))?;
validate_input!((s.max_frame_delay >= 0 && s.max_frame_delay <= 256, EINVAL))?;
validate_input!((s.operating_point <= 31, EINVAL))?;
#[cfg(feature = "c-ffi")]
validate_input!((
!s.allocator.is_default() || s.allocator.cookie.is_none(),
EINVAL
))?;
let frame_size_limit;
if mem::size_of::<usize>() < 8 && s.frame_size_limit.wrapping_sub(1) >= 8192 * 8192 {
frame_size_limit = 8192 * 8192;
if s.frame_size_limit != 0 {
writeln!(
s.logger,
"Frame size limit reduced from {} to {}.",
s.frame_size_limit, frame_size_limit,
);
}
} else {
frame_size_limit = s.frame_size_limit;
}
let NumThreads { n_tc, n_fc } = get_num_threads(s);
crate::include::dav1d::picture::set_tile_threading(n_tc > 1);
let ttd = TaskThreadData {
cur: (n_fc as u32).into(),
reset_task_cur: AtomicU32::new(u32::MAX),
..Default::default()
};
let task_thread = try_arc(ttd)?;
let fc = (0..n_fc)
.map(|i| {
let mut fc = Rav1dFrameContext::default(i);
fc.task_thread.finished = AtomicBool::new(true);
fc.task_thread.ttd = Arc::clone(&task_thread);
let f = fc.data.get_mut();
f.lf.last_sharpness = u8::MAX;
fc
})
.collect();
let state = Mutex::new(Rav1dState {
frame_thread: Rav1dContextFrameThread {
out_delayed: if n_fc > 1 {
(0..n_fc).map(|_| Default::default()).collect()
} else {
Box::new([])
},
..Default::default()
},
..Default::default()
});
let mut worker_handles = Vec::new();
let mut tc_vec = Vec::new();
for n in 0..n_tc {
let task_thread = Arc::clone(&task_thread);
let thread_data = try_arc(Rav1dTaskContextTaskThread::new(task_thread))?;
let thread_data_copy = Arc::clone(&thread_data);
let task = if n_tc > 1 {
let handle = thread::Builder::new()
.name(format!("rav1d-worker-{n}"))
.spawn(|| rav1d_worker_task(thread_data_copy))
.unwrap();
worker_handles.push(handle);
Rav1dContextTaskType::Worker
} else {
Rav1dContextTaskType::Single(Mutex::new(Box::new(Rav1dTaskContext::new(
thread_data_copy,
))))
};
tc_vec.push(Rav1dContextTaskThread { task, thread_data });
}
let tc = tc_vec.into_boxed_slice();
let c = Rav1dContext {
allocator: s.allocator.clone(),
logger: s.logger.clone(),
apply_grain: s.apply_grain,
operating_point: s.operating_point,
all_layers: s.all_layers,
frame_size_limit,
strict_std_compliance: s.strict_std_compliance,
output_invisible_frames: s.output_invisible_frames,
inloop_filters: s.inloop_filters,
decode_frame_type: s.decode_frame_type,
fc,
task_thread,
state,
tc,
flush: Default::default(),
dsp: Default::default(),
picture_pool: Default::default(),
};
#[cfg_attr(not(feature = "c-ffi"), allow(unused_mut))]
let mut c = try_arc(c)?;
#[cfg(feature = "c-ffi")]
if c.allocator.is_default() {
let c = Arc::get_mut(&mut c).unwrap();
c.allocator.cookie = Some(SendSyncNonNull::from_ref(&c.picture_pool).cast::<c_void>());
}
let c = c;
let mut handle_idx = 0;
for tc in c.tc.iter() {
if let Rav1dContextTaskType::Worker = &tc.task {
*tc.thread_data.c.lock() = Some(Arc::clone(&c));
worker_handles[handle_idx].thread().unpark();
handle_idx += 1;
}
}
Ok((c, worker_handles))
}
impl Rav1dFilmGrainData {
fn has_grain(&self) -> bool {
self.num_y_points != 0
|| self.num_uv_points[0] != 0
|| self.num_uv_points[1] != 0
|| self.clip_to_restricted_range && self.chroma_scaling_from_luma
}
}
impl Rav1dPicture {
fn has_grain(&self) -> bool {
self.frame_hdr.as_ref().unwrap().film_grain.data.has_grain()
}
}
fn output_image(c: &Rav1dContext, state: &mut Rav1dState, out: &mut Rav1dPicture) -> Rav1dResult {
let mut res = Ok(());
let use_cache = !c.all_layers && state.max_spatial_id != 0;
let r#in = if !use_cache {
&mut state.out
} else {
&mut state.cache
};
if !c.apply_grain || !r#in.p.has_grain() {
*out = mem::take(&mut r#in.p);
} else {
res = rav1d_apply_grain(c, out, &r#in.p);
}
let _ = mem::take(r#in);
if use_cache && state.out.p.data.is_some() {
state.cache = mem::take(&mut state.out);
}
res
}
fn output_picture_ready(c: &Rav1dContext, state: &mut Rav1dState, drain: bool) -> bool {
if state.cached_error.is_some() {
return true;
}
if !c.all_layers && state.max_spatial_id != 0 {
if state.out.p.data.is_some() && state.cache.p.data.is_some() {
if state.max_spatial_id == state.cache.p.frame_hdr.as_ref().unwrap().spatial_id
|| state.out.flags.contains(PictureFlags::NEW_TEMPORAL_UNIT)
{
return true;
}
state.cache = mem::take(&mut state.out);
return false;
} else {
if state.cache.p.data.is_some() && drain {
return true;
} else {
if state.out.p.data.is_some() {
state.cache = mem::take(&mut state.out);
return false;
}
}
}
}
state.out.p.data.is_some()
}
fn drain_picture(c: &Rav1dContext, state: &mut Rav1dState, out: &mut Rav1dPicture) -> Rav1dResult {
let mut drained = false;
for _ in 0..c.fc.len() {
let next = state.frame_thread.next;
let fc = &c.fc[next as usize];
let mut task_thread_lock = c.task_thread.lock.lock();
while !fc.task_thread.finished.load(Ordering::SeqCst) {
fc.task_thread.cond.wait(&mut task_thread_lock);
}
let out_delayed = &mut state.frame_thread.out_delayed[next as usize];
if out_delayed.p.data.is_some() || fc.task_thread.error.load(Ordering::SeqCst) != 0 {
let first = c.task_thread.first.load(Ordering::SeqCst);
if first as usize + 1 < c.fc.len() {
c.task_thread.first.fetch_add(1, Ordering::SeqCst);
} else {
c.task_thread.first.store(0, Ordering::SeqCst);
}
let _ = c.task_thread.reset_task_cur.compare_exchange(
first,
u32::MAX,
Ordering::SeqCst,
Ordering::SeqCst,
);
let cur = c.task_thread.cur.get();
if cur != 0 && (cur as usize) < c.fc.len() {
c.task_thread.cur.set(cur - 1);
}
drained = true;
} else if drained {
break;
}
state.frame_thread.next = (state.frame_thread.next + 1) % c.fc.len() as u32;
drop(task_thread_lock);
mem::take(&mut *fc.task_thread.retval.try_lock().unwrap())
.err_or(())
.inspect_err(|_| {
state.cached_error_props = out_delayed.p.m.clone();
let _ = mem::take(out_delayed);
})?;
if out_delayed.p.data.is_some() {
let progress = out_delayed.progress.as_ref().unwrap()[1].load(Ordering::Relaxed);
if (out_delayed.visible || c.output_invisible_frames) && progress != FRAME_ERROR {
state.out = out_delayed.clone();
state.event_flags |= out_delayed.flags.into();
}
let _ = mem::take(out_delayed);
if output_picture_ready(c, state, false) {
return output_image(c, state, out);
}
}
}
if output_picture_ready(c, state, true) {
return output_image(c, state, out);
}
Err(EAGAIN)
}
fn gen_picture(c: &Rav1dContext, state: &mut Rav1dState) -> Rav1dResult {
if output_picture_ready(c, state, false) {
return Ok(());
}
let Rav1dData {
data: r#in,
m: props,
} = mem::take(&mut state.in_0);
let Some(mut r#in) = r#in else { return Ok(()) };
while !r#in.is_empty() {
let len = rav1d_parse_obus(c, state, &r#in, &props);
if let Ok(len) = len {
r#in.slice_in_place(len..);
}
if output_picture_ready(c, state, false) {
if !r#in.is_empty() {
state.in_0 = Rav1dData {
data: Some(r#in),
m: props,
}
}
break;
}
len?;
}
Ok(())
}
pub(crate) fn rav1d_send_data(c: &Rav1dContext, in_0: &mut Rav1dData) -> Rav1dResult {
let state = &mut *c.state.try_lock().unwrap();
if in_0.data.is_some() {
let sz = in_0.data.as_ref().unwrap().len();
validate_input!((sz > 0 && sz <= usize::MAX / 2, EINVAL))?;
state.drain = false;
}
if state.in_0.data.is_some() {
return Err(EAGAIN);
}
state.in_0 = in_0.clone();
let res = gen_picture(c, state);
if res.is_ok() {
let _ = mem::take(in_0);
}
res
}
pub(crate) fn rav1d_get_picture(c: &Rav1dContext, out: &mut Rav1dPicture) -> Rav1dResult {
let state = &mut *c.state.try_lock().unwrap();
let drain = mem::replace(&mut state.drain, true);
gen_picture(c, state)?;
mem::take(&mut state.cached_error).err_or(())?;
if output_picture_ready(c, state, c.fc.len() == 1) {
return output_image(c, state, out);
}
if c.fc.len() > 1 && drain {
return drain_picture(c, state, out);
}
Err(EAGAIN)
}
pub(crate) fn rav1d_apply_grain(
c: &Rav1dContext,
out: &mut Rav1dPicture,
in_0: &Rav1dPicture,
) -> Rav1dResult {
if !in_0.has_grain() {
*out = in_0.clone();
return Ok(());
}
let res = rav1d_picture_alloc_copy(&c.logger, out, in_0.p.w, in_0);
if res.is_err() {
let _ = mem::take(out);
return res;
} else {
if c.tc.len() > 1 {
rav1d_task_delayed_fg(c, out, in_0);
} else {
let bpc = out.p.bpc;
let dsp = Rav1dBitDepthDSPContext::get(bpc).unwrap();
let fg = &dsp.fg;
use fg_apply::rav1d_apply_grain;
match bpc {
#[cfg(feature = "bitdepth_8")]
8 => rav1d_apply_grain::<BitDepth8>(fg, out, in_0),
#[cfg(feature = "bitdepth_16")]
10 | 12 => rav1d_apply_grain::<BitDepth16>(fg, out, in_0),
_ => {}
}
}
return Ok(());
};
}
pub(crate) fn rav1d_flush(c: &Rav1dContext) {
let state = &mut *c.state.try_lock().unwrap();
let old_state = mem::take(state);
state.tiles = old_state.tiles;
state.n_tiles = old_state.n_tiles;
state.frame_thread = old_state.frame_thread;
state.operating_point_idc = old_state.operating_point_idc;
state.max_spatial_id = old_state.max_spatial_id;
state.frame_flags = old_state.frame_flags;
state.event_flags = old_state.event_flags;
if c.fc.len() == 1 && c.tc.len() == 1 {
return;
}
c.flush.store(true, Ordering::SeqCst);
if c.tc.len() > 1 {
let mut task_thread_lock = c.task_thread.lock.lock();
for tc in c.tc.iter() {
while !tc.flushed() {
tc.thread_data.cond.wait(&mut task_thread_lock);
}
}
for fc in c.fc.iter() {
fc.task_thread.tasks.clear();
}
c.task_thread.first.store(0, Ordering::SeqCst);
c.task_thread.cur.set(c.fc.len() as u32);
c.task_thread
.reset_task_cur
.store(u32::MAX, Ordering::SeqCst);
c.task_thread.cond_signaled.store(0, Ordering::SeqCst);
}
if c.fc.len() > 1 {
for fc in wrapping_iter(c.fc.iter(), state.frame_thread.next as usize) {
let _ = rav1d_decode_frame_exit(c, fc, Err(EGeneric));
*fc.task_thread.retval.try_lock().unwrap() = None;
let out_delayed = &mut state.frame_thread.out_delayed[fc.index];
if out_delayed.p.frame_hdr.is_some() {
let _ = mem::take(out_delayed);
}
}
state.frame_thread.next = 0;
}
c.flush.store(false, Ordering::SeqCst);
}
#[cfg(any(feature = "c-ffi", test))]
#[cold]
pub(crate) fn rav1d_close(c: Arc<Rav1dContext>) {
let c = &*c;
rav1d_flush(c);
c.tell_worker_threads_to_die();
}
impl Rav1dContext {
pub(crate) fn tell_worker_threads_to_die(&self) {
if self.tc.is_empty() {
return;
}
let ttd = &*self.task_thread;
let _task_thread_lock = ttd.lock.lock();
for tc in self.tc.iter() {
tc.thread_data.die.set(true);
}
ttd.cond.notify_all();
}
}