1use core::{
12 ffi::{c_char, c_void},
13 ptr,
14};
15use std::{
16 ffi::{CStr, CString},
17 path::{Path, PathBuf},
18};
19
20use crate::{
21 error::{from_swift, VisionError},
22 ffi,
23 recognize_text::{BoundingBox, RecognitionLevel, RecognizedText},
24};
25
26const VIDEO_CADENCE_DEFAULT: i32 = 0;
27const VIDEO_CADENCE_FRAME_RATE: i32 = 1;
28const VIDEO_CADENCE_TIME_INTERVAL: i32 = 2;
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32#[non_exhaustive]
33pub enum RequestKind {
34 RecognizeText,
36}
37
38#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct Request {
41 kind: RequestKind,
42 recognition_level: RecognitionLevel,
43 uses_language_correction: bool,
44 prefer_background_processing: bool,
45 uses_cpu_only: bool,
46 revision: Option<usize>,
47}
48
49impl Default for Request {
50 fn default() -> Self {
51 Self::recognize_text()
52 }
53}
54
55impl Request {
56 #[must_use]
58 pub const fn recognize_text() -> Self {
59 Self {
60 kind: RequestKind::RecognizeText,
61 recognition_level: RecognitionLevel::Accurate,
62 uses_language_correction: true,
63 prefer_background_processing: false,
64 uses_cpu_only: false,
65 revision: None,
66 }
67 }
68
69 #[must_use]
71 pub const fn kind(&self) -> RequestKind {
72 self.kind
73 }
74
75 #[must_use]
77 pub const fn with_recognition_level(mut self, recognition_level: RecognitionLevel) -> Self {
78 self.recognition_level = recognition_level;
79 self
80 }
81
82 #[must_use]
84 pub const fn with_language_correction(mut self, enabled: bool) -> Self {
85 self.uses_language_correction = enabled;
86 self
87 }
88
89 #[must_use]
91 pub const fn with_prefer_background_processing(mut self, enabled: bool) -> Self {
92 self.prefer_background_processing = enabled;
93 self
94 }
95
96 #[must_use]
98 pub const fn with_uses_cpu_only(mut self, enabled: bool) -> Self {
99 self.uses_cpu_only = enabled;
100 self
101 }
102
103 #[must_use]
105 pub const fn with_revision(mut self, revision: usize) -> Self {
106 self.revision = Some(revision);
107 self
108 }
109
110 #[must_use]
111 pub const fn recognition_level(&self) -> RecognitionLevel {
112 self.recognition_level
113 }
114
115 #[must_use]
116 pub const fn uses_language_correction(&self) -> bool {
117 self.uses_language_correction
118 }
119
120 #[must_use]
121 pub const fn prefer_background_processing(&self) -> bool {
122 self.prefer_background_processing
123 }
124
125 #[must_use]
126 pub const fn uses_cpu_only(&self) -> bool {
127 self.uses_cpu_only
128 }
129
130 #[must_use]
131 pub const fn revision(&self) -> Option<usize> {
132 self.revision
133 }
134
135 const fn recognition_level_raw(&self) -> i32 {
136 match self.recognition_level {
137 RecognitionLevel::Fast => 0,
138 RecognitionLevel::Accurate => 1,
139 }
140 }
141}
142
143#[derive(Debug, Clone, PartialEq)]
145pub struct Observation {
146 pub uuid: String,
148 pub confidence: f32,
150 pub time_range: Option<TimeRange>,
152}
153
154#[derive(Debug, Clone, Copy, PartialEq)]
156pub struct TimeRange {
157 pub start_seconds: f64,
158 pub duration_seconds: f64,
159}
160
161#[derive(Debug, Clone, PartialEq)]
163pub struct RecognizedTextObservation {
164 pub observation: Observation,
165 pub text: String,
166 pub bounding_box: BoundingBox,
167}
168
169impl RecognizedTextObservation {
170 #[must_use]
173 pub fn into_recognized_text(self) -> RecognizedText {
174 self.into()
175 }
176
177 #[must_use]
179 pub fn as_recognized_text(&self) -> RecognizedText {
180 self.clone().into()
181 }
182}
183
184impl From<RecognizedTextObservation> for RecognizedText {
185 fn from(value: RecognizedTextObservation) -> Self {
186 Self {
187 text: value.text,
188 confidence: value.observation.confidence,
189 bounding_box: value.bounding_box,
190 }
191 }
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
196pub struct ImageRequestHandler {
197 image_path: PathBuf,
198}
199
200impl ImageRequestHandler {
201 #[must_use]
203 pub fn new(image_path: impl AsRef<Path>) -> Self {
204 Self {
205 image_path: image_path.as_ref().to_path_buf(),
206 }
207 }
208
209 pub fn perform(&self, request: &Request) -> Result<Vec<RecognizedTextObservation>, VisionError> {
216 let image_c = path_to_cstring(&self.image_path, "image path")?;
217 let mut out_array: *mut c_void = ptr::null_mut();
218 let mut out_count: usize = 0;
219 let mut err_msg: *mut c_char = ptr::null_mut();
220 let status = unsafe {
221 ffi::vn_image_request_handler_perform_text_request(
222 image_c.as_ptr(),
223 request.recognition_level_raw(),
224 request.uses_language_correction,
225 request.prefer_background_processing,
226 request.uses_cpu_only,
227 request.revision.unwrap_or_default(),
228 request.revision.is_some(),
229 &mut out_array,
230 &mut out_count,
231 &mut err_msg,
232 )
233 };
234 if status != ffi::status::OK {
235 return Err(unsafe { from_swift(status, err_msg) });
236 }
237 Ok(collect_request_observations(out_array, out_count))
238 }
239}
240
241pub struct SequenceRequestHandler {
243 handle: *mut c_void,
244}
245
246impl SequenceRequestHandler {
247 pub fn new() -> Result<Self, VisionError> {
254 let mut handle: *mut c_void = ptr::null_mut();
255 let mut err_msg: *mut c_char = ptr::null_mut();
256 let status = unsafe { ffi::vn_sequence_request_handler_create(&mut handle, &mut err_msg) };
257 if status != ffi::status::OK {
258 return Err(unsafe { from_swift(status, err_msg) });
259 }
260 if handle.is_null() {
261 return Err(VisionError::Unknown {
262 code: ffi::status::UNKNOWN,
263 message: "sequence request handler bridge returned a null handle".into(),
264 });
265 }
266 Ok(Self { handle })
267 }
268
269 pub fn perform(
277 &mut self,
278 image_path: impl AsRef<Path>,
279 request: &Request,
280 ) -> Result<Vec<RecognizedTextObservation>, VisionError> {
281 let image_c = path_to_cstring(image_path.as_ref(), "image path")?;
282 let mut out_array: *mut c_void = ptr::null_mut();
283 let mut out_count: usize = 0;
284 let mut err_msg: *mut c_char = ptr::null_mut();
285 let status = unsafe {
286 ffi::vn_sequence_request_handler_perform_text_request(
287 self.handle,
288 image_c.as_ptr(),
289 request.recognition_level_raw(),
290 request.uses_language_correction,
291 request.prefer_background_processing,
292 request.uses_cpu_only,
293 request.revision.unwrap_or_default(),
294 request.revision.is_some(),
295 &mut out_array,
296 &mut out_count,
297 &mut err_msg,
298 )
299 };
300 if status != ffi::status::OK {
301 return Err(unsafe { from_swift(status, err_msg) });
302 }
303 Ok(collect_request_observations(out_array, out_count))
304 }
305}
306
307impl Drop for SequenceRequestHandler {
308 fn drop(&mut self) {
309 if !self.handle.is_null() {
310 unsafe { ffi::vn_sequence_request_handler_free(self.handle) };
311 }
312 }
313}
314
315#[derive(Debug, Clone, Copy, PartialEq)]
317#[non_exhaustive]
318pub enum VideoCadence {
319 EveryFrame,
321 FrameRate(usize),
323 TimeIntervalSeconds(f64),
325}
326
327#[derive(Debug, Clone, Copy, PartialEq)]
329pub struct VideoProcessingOptions {
330 pub cadence: Option<VideoCadence>,
331}
332
333impl Default for VideoProcessingOptions {
334 fn default() -> Self {
335 Self::new()
336 }
337}
338
339impl VideoProcessingOptions {
340 #[must_use]
341 pub const fn new() -> Self {
342 Self { cadence: None }
343 }
344
345 #[must_use]
346 pub const fn with_cadence(mut self, cadence: VideoCadence) -> Self {
347 self.cadence = Some(cadence);
348 self
349 }
350}
351
352#[derive(Debug, Clone, PartialEq, Eq)]
354pub struct VideoProcessor {
355 video_path: PathBuf,
356}
357
358impl VideoProcessor {
359 #[must_use]
361 pub fn new(video_path: impl AsRef<Path>) -> Self {
362 Self {
363 video_path: video_path.as_ref().to_path_buf(),
364 }
365 }
366
367 pub fn analyze(
374 &self,
375 request: &Request,
376 options: VideoProcessingOptions,
377 ) -> Result<Vec<RecognizedTextObservation>, VisionError> {
378 let video_c = path_to_cstring(&self.video_path, "video path")?;
379 let (cadence_kind, cadence_value) = cadence_to_ffi(options.cadence)?;
380 let mut out_array: *mut c_void = ptr::null_mut();
381 let mut out_count: usize = 0;
382 let mut err_msg: *mut c_char = ptr::null_mut();
383 let status = unsafe {
384 ffi::vn_video_processor_analyze_text_request(
385 video_c.as_ptr(),
386 request.recognition_level_raw(),
387 request.uses_language_correction,
388 request.prefer_background_processing,
389 request.uses_cpu_only,
390 request.revision.unwrap_or_default(),
391 request.revision.is_some(),
392 cadence_kind,
393 cadence_value,
394 &mut out_array,
395 &mut out_count,
396 &mut err_msg,
397 )
398 };
399 if status != ffi::status::OK {
400 return Err(unsafe { from_swift(status, err_msg) });
401 }
402 Ok(collect_request_observations(out_array, out_count))
403 }
404}
405
406fn cadence_to_ffi(cadence: Option<VideoCadence>) -> Result<(i32, f64), VisionError> {
407 match cadence.unwrap_or(VideoCadence::EveryFrame) {
408 VideoCadence::EveryFrame => Ok((VIDEO_CADENCE_DEFAULT, 0.0)),
409 VideoCadence::FrameRate(frame_rate) => {
410 if frame_rate == 0 {
411 return Err(VisionError::InvalidArgument(
412 "video cadence frame rate must be greater than zero".into(),
413 ));
414 }
415 let frame_rate = u32::try_from(frame_rate).map_err(|_| {
416 VisionError::InvalidArgument(
417 "video cadence frame rate exceeds the supported range".into(),
418 )
419 })?;
420 Ok((VIDEO_CADENCE_FRAME_RATE, f64::from(frame_rate)))
421 }
422 VideoCadence::TimeIntervalSeconds(seconds) => {
423 if !seconds.is_finite() || seconds <= 0.0 {
424 return Err(VisionError::InvalidArgument(
425 "video cadence time interval must be a finite positive number".into(),
426 ));
427 }
428 Ok((VIDEO_CADENCE_TIME_INTERVAL, seconds))
429 }
430 }
431}
432
433fn collect_request_observations(
434 array: *mut c_void,
435 count: usize,
436) -> Vec<RecognizedTextObservation> {
437 if array.is_null() || count == 0 {
438 return Vec::new();
439 }
440
441 let typed = array.cast::<ffi::RequestObservationRaw>();
442 let mut observations = Vec::with_capacity(count);
443 for index in 0..count {
444 let raw = unsafe { &*typed.add(index) };
445 let uuid = c_string_or_empty(raw.uuid);
446 let text = c_string_or_empty(raw.text);
447 let time_range = raw.has_time_range.then_some(TimeRange {
448 start_seconds: raw.time_range_start_seconds,
449 duration_seconds: raw.time_range_duration_seconds,
450 });
451 observations.push(RecognizedTextObservation {
452 observation: Observation {
453 uuid,
454 confidence: raw.confidence,
455 time_range,
456 },
457 text,
458 bounding_box: BoundingBox {
459 x: raw.bbox_x,
460 y: raw.bbox_y,
461 width: raw.bbox_w,
462 height: raw.bbox_h,
463 },
464 });
465 }
466
467 unsafe { ffi::vn_request_observations_free(array, count) };
468 observations
469}
470
471fn c_string_or_empty(ptr: *mut c_char) -> String {
472 if ptr.is_null() {
473 String::new()
474 } else {
475 unsafe { CStr::from_ptr(ptr) }.to_string_lossy().into_owned()
476 }
477}
478
479fn path_to_cstring(path: &Path, label: &str) -> Result<CString, VisionError> {
480 let path_str = path
481 .to_str()
482 .ok_or_else(|| VisionError::InvalidArgument(format!("non-UTF-8 {label}")))?;
483 CString::new(path_str)
484 .map_err(|err| VisionError::InvalidArgument(format!("{label} NUL byte: {err}")))
485}
486
487#[doc(hidden)]
488pub fn _test_helper_render_text_video(
492 first_text: &str,
493 second_text: &str,
494 width: i32,
495 height: i32,
496 fps: i32,
497 frames_per_text: i32,
498 path: &Path,
499) -> Result<(), VisionError> {
500 let first_c =
501 CString::new(first_text).map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
502 let second_c =
503 CString::new(second_text).map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
504 let path_c = CString::new(path.to_string_lossy().as_ref())
505 .map_err(|err| VisionError::InvalidArgument(err.to_string()))?;
506 let status = unsafe {
507 ffi::vn_test_helper_render_text_video(
508 first_c.as_ptr(),
509 second_c.as_ptr(),
510 width,
511 height,
512 fps,
513 frames_per_text,
514 path_c.as_ptr(),
515 )
516 };
517 if status != ffi::status::OK {
518 return Err(VisionError::Unknown {
519 code: status,
520 message: "video render helper failed".into(),
521 });
522 }
523 Ok(())
524}