use core::{
ffi::{c_char, c_void},
ptr,
};
use std::{
ffi::{CStr, CString},
path::{Path, PathBuf},
};
use crate::{
error::{from_swift, VisionError},
ffi,
recognize_text::{BoundingBox, RecognitionLevel, RecognizedText},
};
const VIDEO_CADENCE_DEFAULT: i32 = 0;
const VIDEO_CADENCE_FRAME_RATE: i32 = 1;
const VIDEO_CADENCE_TIME_INTERVAL: i32 = 2;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum RequestKind {
RecognizeText,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Request {
kind: RequestKind,
recognition_level: RecognitionLevel,
uses_language_correction: bool,
prefer_background_processing: bool,
uses_cpu_only: bool,
revision: Option<usize>,
}
impl Default for Request {
fn default() -> Self {
Self::recognize_text()
}
}
impl Request {
#[must_use]
pub const fn recognize_text() -> Self {
Self {
kind: RequestKind::RecognizeText,
recognition_level: RecognitionLevel::Accurate,
uses_language_correction: true,
prefer_background_processing: false,
uses_cpu_only: false,
revision: None,
}
}
#[must_use]
pub const fn kind(&self) -> RequestKind {
self.kind
}
#[must_use]
pub const fn with_recognition_level(mut self, recognition_level: RecognitionLevel) -> Self {
self.recognition_level = recognition_level;
self
}
#[must_use]
pub const fn with_language_correction(mut self, enabled: bool) -> Self {
self.uses_language_correction = enabled;
self
}
#[must_use]
pub const fn with_prefer_background_processing(mut self, enabled: bool) -> Self {
self.prefer_background_processing = enabled;
self
}
#[must_use]
pub const fn with_uses_cpu_only(mut self, enabled: bool) -> Self {
self.uses_cpu_only = enabled;
self
}
#[must_use]
pub const fn with_revision(mut self, revision: usize) -> Self {
self.revision = Some(revision);
self
}
#[must_use]
pub const fn recognition_level(&self) -> RecognitionLevel {
self.recognition_level
}
#[must_use]
pub const fn uses_language_correction(&self) -> bool {
self.uses_language_correction
}
#[must_use]
pub const fn prefer_background_processing(&self) -> bool {
self.prefer_background_processing
}
#[must_use]
pub const fn uses_cpu_only(&self) -> bool {
self.uses_cpu_only
}
#[must_use]
pub const fn revision(&self) -> Option<usize> {
self.revision
}
const fn recognition_level_raw(&self) -> i32 {
match self.recognition_level {
RecognitionLevel::Fast => 0,
RecognitionLevel::Accurate => 1,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Observation {
pub uuid: String,
pub confidence: f32,
pub time_range: Option<TimeRange>,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct TimeRange {
pub start_seconds: f64,
pub duration_seconds: f64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct RecognizedTextObservation {
pub observation: Observation,
pub text: String,
pub bounding_box: BoundingBox,
}
impl RecognizedTextObservation {
#[must_use]
pub fn into_recognized_text(self) -> RecognizedText {
self.into()
}
#[must_use]
pub fn as_recognized_text(&self) -> RecognizedText {
self.clone().into()
}
}
impl From<RecognizedTextObservation> for RecognizedText {
fn from(value: RecognizedTextObservation) -> Self {
Self {
text: value.text,
confidence: value.observation.confidence,
bounding_box: value.bounding_box,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ImageRequestHandler {
image_path: PathBuf,
}
impl ImageRequestHandler {
#[must_use]
pub fn new(image_path: impl AsRef<Path>) -> Self {
Self {
image_path: image_path.as_ref().to_path_buf(),
}
}
pub fn perform(&self, request: &Request) -> Result<Vec<RecognizedTextObservation>, VisionError> {
let image_c = path_to_cstring(&self.image_path, "image path")?;
let mut out_array: *mut c_void = ptr::null_mut();
let mut out_count: usize = 0;
let mut err_msg: *mut c_char = ptr::null_mut();
let status = unsafe {
ffi::vn_image_request_handler_perform_text_request(
image_c.as_ptr(),
request.recognition_level_raw(),
request.uses_language_correction,
request.prefer_background_processing,
request.uses_cpu_only,
request.revision.unwrap_or_default(),
request.revision.is_some(),
&mut out_array,
&mut out_count,
&mut err_msg,
)
};
if status != ffi::status::OK {
return Err(unsafe { from_swift(status, err_msg) });
}
Ok(collect_request_observations(out_array, out_count))
}
}
pub struct SequenceRequestHandler {
handle: *mut c_void,
}
impl SequenceRequestHandler {
pub fn new() -> Result<Self, VisionError> {
let mut handle: *mut c_void = ptr::null_mut();
let mut err_msg: *mut c_char = ptr::null_mut();
let status = unsafe { ffi::vn_sequence_request_handler_create(&mut handle, &mut err_msg) };
if status != ffi::status::OK {
return Err(unsafe { from_swift(status, err_msg) });
}
if handle.is_null() {
return Err(VisionError::Unknown {
code: ffi::status::UNKNOWN,
message: "sequence request handler bridge returned a null handle".into(),
});
}
Ok(Self { handle })
}
pub fn perform(
&mut self,
image_path: impl AsRef<Path>,
request: &Request,
) -> Result<Vec<RecognizedTextObservation>, VisionError> {
let image_c = path_to_cstring(image_path.as_ref(), "image path")?;
let mut out_array: *mut c_void = ptr::null_mut();
let mut out_count: usize = 0;
let mut err_msg: *mut c_char = ptr::null_mut();
let status = unsafe {
ffi::vn_sequence_request_handler_perform_text_request(
self.handle,
image_c.as_ptr(),
request.recognition_level_raw(),
request.uses_language_correction,
request.prefer_background_processing,
request.uses_cpu_only,
request.revision.unwrap_or_default(),
request.revision.is_some(),
&mut out_array,
&mut out_count,
&mut err_msg,
)
};
if status != ffi::status::OK {
return Err(unsafe { from_swift(status, err_msg) });
}
Ok(collect_request_observations(out_array, out_count))
}
}
impl Drop for SequenceRequestHandler {
fn drop(&mut self) {
if !self.handle.is_null() {
unsafe { ffi::vn_sequence_request_handler_free(self.handle) };
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[non_exhaustive]
pub enum VideoCadence {
EveryFrame,
FrameRate(usize),
TimeIntervalSeconds(f64),
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct VideoProcessingOptions {
pub cadence: Option<VideoCadence>,
}
impl Default for VideoProcessingOptions {
fn default() -> Self {
Self::new()
}
}
impl VideoProcessingOptions {
#[must_use]
pub const fn new() -> Self {
Self { cadence: None }
}
#[must_use]
pub const fn with_cadence(mut self, cadence: VideoCadence) -> Self {
self.cadence = Some(cadence);
self
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct VideoProcessor {
video_path: PathBuf,
}
impl VideoProcessor {
#[must_use]
pub fn new(video_path: impl AsRef<Path>) -> Self {
Self {
video_path: video_path.as_ref().to_path_buf(),
}
}
pub fn analyze(
&self,
request: &Request,
options: VideoProcessingOptions,
) -> Result<Vec<RecognizedTextObservation>, VisionError> {
let video_c = path_to_cstring(&self.video_path, "video path")?;
let (cadence_kind, cadence_value) = cadence_to_ffi(options.cadence)?;
let mut out_array: *mut c_void = ptr::null_mut();
let mut out_count: usize = 0;
let mut err_msg: *mut c_char = ptr::null_mut();
let status = unsafe {
ffi::vn_video_processor_analyze_text_request(
video_c.as_ptr(),
request.recognition_level_raw(),
request.uses_language_correction,
request.prefer_background_processing,
request.uses_cpu_only,
request.revision.unwrap_or_default(),
request.revision.is_some(),
cadence_kind,
cadence_value,
&mut out_array,
&mut out_count,
&mut err_msg,
)
};
if status != ffi::status::OK {
return Err(unsafe { from_swift(status, err_msg) });
}
Ok(collect_request_observations(out_array, out_count))
}
}
fn cadence_to_ffi(cadence: Option<VideoCadence>) -> Result<(i32, f64), VisionError> {
match cadence.unwrap_or(VideoCadence::EveryFrame) {
VideoCadence::EveryFrame => Ok((VIDEO_CADENCE_DEFAULT, 0.0)),
VideoCadence::FrameRate(frame_rate) => {
if frame_rate == 0 {
return Err(VisionError::InvalidArgument(
"video cadence frame rate must be greater than zero".into(),
));
}
let frame_rate = u32::try_from(frame_rate).map_err(|_| {
VisionError::InvalidArgument(
"video cadence frame rate exceeds the supported range".into(),
)
})?;
Ok((VIDEO_CADENCE_FRAME_RATE, f64::from(frame_rate)))
}
VideoCadence::TimeIntervalSeconds(seconds) => {
if !seconds.is_finite() || seconds <= 0.0 {
return Err(VisionError::InvalidArgument(
"video cadence time interval must be a finite positive number".into(),
));
}
Ok((VIDEO_CADENCE_TIME_INTERVAL, seconds))
}
}
}
fn collect_request_observations(
array: *mut c_void,
count: usize,
) -> Vec<RecognizedTextObservation> {
if array.is_null() || count == 0 {
return Vec::new();
}
let typed = array.cast::<ffi::RequestObservationRaw>();
let mut observations = Vec::with_capacity(count);
for index in 0..count {
let raw = unsafe { &*typed.add(index) };
let uuid = c_string_or_empty(raw.uuid);
let text = c_string_or_empty(raw.text);
let time_range = raw.has_time_range.then_some(TimeRange {
start_seconds: raw.time_range_start_seconds,
duration_seconds: raw.time_range_duration_seconds,
});
observations.push(RecognizedTextObservation {
observation: Observation {
uuid,
confidence: raw.confidence,
time_range,
},
text,
bounding_box: BoundingBox {
x: raw.bbox_x,
y: raw.bbox_y,
width: raw.bbox_w,
height: raw.bbox_h,
},
});
}
unsafe { ffi::vn_request_observations_free(array, count) };
observations
}
fn c_string_or_empty(ptr: *mut c_char) -> String {
if ptr.is_null() {
String::new()
} else {
unsafe { CStr::from_ptr(ptr) }.to_string_lossy().into_owned()
}
}
fn path_to_cstring(path: &Path, label: &str) -> Result<CString, VisionError> {
let path_str = path
.to_str()
.ok_or_else(|| VisionError::InvalidArgument(format!("non-UTF-8 {label}")))?;
CString::new(path_str)
.map_err(|err| VisionError::InvalidArgument(format!("{label} NUL byte: {err}")))
}
#[doc(hidden)]
pub fn _test_helper_render_text_video(
first_text: &str,
second_text: &str,
width: i32,
height: i32,
fps: i32,
frames_per_text: i32,
path: &Path,
) -> Result<(), VisionError> {
let first_c =
CString::new(first_text).map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
let second_c =
CString::new(second_text).map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
let path_c = CString::new(path.to_string_lossy().as_ref())
.map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
let status = unsafe {
ffi::vn_test_helper_render_text_video(
first_c.as_ptr(),
second_c.as_ptr(),
width,
height,
fps,
frames_per_text,
path_c.as_ptr(),
)
};
if status != ffi::status::OK {
return Err(VisionError::Unknown {
code: status,
message: "video render helper failed".into(),
});
}
Ok(())
}