Skip to main content

apple_vision/text_rectangles/
mod.rs

1#![allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
2#![allow(clippy::too_long_first_doc_paragraph)]
3//! `VNDetectTextRectanglesRequest` — text-region detection (no OCR).
4
5use std::ffi::CString;
6use std::path::Path;
7use std::ptr;
8
9use crate::error::{from_swift, VisionError};
10use crate::ffi;
11use crate::request_base::{ImageBasedRequest, NormalizedRect};
12
13/// A detected text rectangle in normalised (0..1) image coordinates,
14/// origin bottom-left (Vision convention).
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub struct TextRect {
17    pub x: f64,
18    pub y: f64,
19    pub w: f64,
20    pub h: f64,
21    pub confidence: f32,
22}
23
24/// A dedicated `VNTextObservation` wrapper.
25#[derive(Debug, Clone, PartialEq)]
26pub struct TextObservation {
27    pub bounding_box: NormalizedRect,
28    pub confidence: f32,
29    pub character_boxes: Vec<NormalizedRect>,
30}
31
32impl TextObservation {
33    #[must_use]
34    pub fn into_text_rect(self) -> TextRect {
35        TextRect {
36            x: self.bounding_box.x,
37            y: self.bounding_box.y,
38            w: self.bounding_box.width,
39            h: self.bounding_box.height,
40            confidence: self.confidence,
41        }
42    }
43}
44
45impl From<TextObservation> for TextRect {
46    fn from(value: TextObservation) -> Self {
47        value.into_text_rect()
48    }
49}
50
51/// Builder for `VNDetectTextRectanglesRequest`.
52#[derive(Debug, Clone, PartialEq, Default)]
53pub struct TextRectanglesRequest {
54    image_based: ImageBasedRequest,
55    report_character_boxes: bool,
56}
57
58impl TextRectanglesRequest {
59    #[must_use]
60    pub const fn new() -> Self {
61        Self {
62            image_based: ImageBasedRequest::new(),
63            report_character_boxes: false,
64        }
65    }
66
67    #[must_use]
68    pub const fn with_image_based_request(mut self, image_based: ImageBasedRequest) -> Self {
69        self.image_based = image_based;
70        self
71    }
72
73    #[must_use]
74    pub const fn with_report_character_boxes(mut self, report_character_boxes: bool) -> Self {
75        self.report_character_boxes = report_character_boxes;
76        self
77    }
78
79    #[must_use]
80    pub const fn image_based_request(&self) -> &ImageBasedRequest {
81        &self.image_based
82    }
83
84    #[must_use]
85    pub const fn report_character_boxes(&self) -> bool {
86        self.report_character_boxes
87    }
88
89    /// Perform the request against `path` and return dedicated `VNTextObservation`
90    /// wrappers.
91    ///
92    /// # Errors
93    ///
94    /// Returns [`VisionError`] if the image cannot be loaded or Vision rejects
95    /// the request.
96    pub fn perform(&self, path: impl AsRef<Path>) -> Result<Vec<TextObservation>, VisionError> {
97        let path_str = path
98            .as_ref()
99            .to_str()
100            .ok_or_else(|| VisionError::InvalidArgument("non-UTF-8 path".into()))?;
101        let cpath = CString::new(path_str)
102            .map_err(|e| VisionError::InvalidArgument(format!("path NUL byte: {e}")))?;
103        let mut observations_ptr: *mut ffi::TextObservationRaw = ptr::null_mut();
104        let mut count: usize = 0;
105        let mut err = ptr::null_mut();
106        let roi = self.image_based.region_of_interest();
107        // SAFETY: all pointer arguments are valid stack locations or bridge-owned handles; strings are valid C strings for the duration of the call.
108        let status = unsafe {
109            ffi::vn_detect_text_observations_in_path(
110                cpath.as_ptr(),
111                self.report_character_boxes,
112                roi.map_or(0.0, |rect| rect.x),
113                roi.map_or(0.0, |rect| rect.y),
114                roi.map_or(1.0, |rect| rect.width),
115                roi.map_or(1.0, |rect| rect.height),
116                roi.is_some(),
117                self.image_based.prefer_background_processing(),
118                self.image_based.uses_cpu_only(),
119                self.image_based.revision().unwrap_or_default(),
120                self.image_based.revision().is_some(),
121                &mut observations_ptr,
122                &mut count,
123                &mut err,
124            )
125        };
126        if status != ffi::status::OK {
127            // SAFETY: the error pointer is either null or a bridge-allocated C string; `from_swift` frees it.
128            return Err(unsafe { from_swift(status, err) });
129        }
130        if observations_ptr.is_null() || count == 0 {
131            return Ok(Vec::new());
132        }
133        let mut out = Vec::with_capacity(count);
134        for index in 0..count {
135            // SAFETY: the pointer is valid for the reported element count; the index is in bounds.
136            let raw = unsafe { &*observations_ptr.add(index) };
137            let mut character_boxes = Vec::with_capacity(raw.character_box_count);
138            for char_index in 0..raw.character_box_count {
139                // SAFETY: the pointer is valid for the reported element count; the index is in bounds.
140                let character_box = unsafe { &*raw.character_boxes.add(char_index) };
141                character_boxes.push(NormalizedRect::new(
142                    character_box.x,
143                    character_box.y,
144                    character_box.w,
145                    character_box.h,
146                ));
147            }
148            out.push(TextObservation {
149                bounding_box: NormalizedRect::new(raw.bbox_x, raw.bbox_y, raw.bbox_w, raw.bbox_h),
150                confidence: raw.confidence,
151                character_boxes,
152            });
153        }
154        // SAFETY: the pointer/count pair was allocated by the bridge and is freed exactly once here.
155        unsafe { ffi::vn_text_observations_free(observations_ptr.cast(), count) };
156        Ok(out)
157    }
158}
159
160/// Detect dedicated `VNTextObservation` wrappers in the image at `path`.
161///
162/// # Errors
163///
164/// Returns [`VisionError`] if the image fails to load or the Vision request
165/// errors.
166pub fn detect_text_observations(
167    path: impl AsRef<Path>,
168    report_character_boxes: bool,
169) -> Result<Vec<TextObservation>, VisionError> {
170    TextRectanglesRequest::new()
171        .with_report_character_boxes(report_character_boxes)
172        .perform(path)
173}
174
175/// Detect text-region rectangles in the image at `path`.
176///
177/// # Errors
178///
179/// Returns [`VisionError`] when the image fails to load or the Vision request
180/// errors.
181pub fn detect_text_rectangles(
182    path: impl AsRef<Path>,
183    report_character_boxes: bool,
184) -> Result<Vec<TextRect>, VisionError> {
185    detect_text_observations(path, report_character_boxes)
186        .map(|observations| observations.into_iter().map(Into::into).collect())
187}