apple-vision 0.12.0

Safe Rust bindings for Apple's Vision framework — OCR, object detection, face landmarks on macOS
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
//! Raw FFI declarations matching the Swift bridge in
//! `swift-bridge/Sources/VisionBridge/Vision.swift`.

#![allow(missing_docs, non_camel_case_types)]

use core::ffi::{c_char, c_void};

/// Mirrors `VNRecognizedTextRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct RecognizedTextRaw {
    pub text: *mut c_char,
    pub confidence: f32,
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
}

/// Mirrors `VNDetectedFaceRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct DetectedFaceRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub roll: f32,
    pub yaw: f32,
    pub pitch: f32,
}

/// Mirrors `VNDetectedBarcodeRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct DetectedBarcodeRaw {
    pub payload: *mut c_char,
    pub symbology: *mut c_char,
    pub confidence: f32,
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
}

/// Mirrors `VNSaliencyRegionRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct SaliencyRegionRaw {
    pub confidence: f32,
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
}

/// Mirrors `VNFaceLandmarksRaw` in Vision.swift. Layout-compatible.
///
/// All `*_count` fields are NUMBER OF POINTS; each point buffer is an
/// interleaved `[x0, y0, x1, y1, …]` array of doubles, length
/// `count * 2`. A NULL pointer + 0 count means the region wasn't
/// produced for this face.
#[repr(C)]
pub struct FaceLandmarksRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub roll: f32,
    pub yaw: f32,
    pub pitch: f32,

    pub face_contour: *mut f64,
    pub face_contour_count: usize,
    pub left_eye: *mut f64,
    pub left_eye_count: usize,
    pub right_eye: *mut f64,
    pub right_eye_count: usize,
    pub left_eyebrow: *mut f64,
    pub left_eyebrow_count: usize,
    pub right_eyebrow: *mut f64,
    pub right_eyebrow_count: usize,
    pub nose: *mut f64,
    pub nose_count: usize,
    pub nose_crest: *mut f64,
    pub nose_crest_count: usize,
    pub median_line: *mut f64,
    pub median_line_count: usize,
    pub outer_lips: *mut f64,
    pub outer_lips_count: usize,
    pub inner_lips: *mut f64,
    pub inner_lips_count: usize,
    pub left_pupil: *mut f64,
    pub left_pupil_count: usize,
    pub right_pupil: *mut f64,
    pub right_pupil_count: usize,
}

/// Mirrors `VNPoseObservationRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct PoseObservationRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub joint_names: *mut *mut c_char,
    pub joint_xs: *mut f64,
    pub joint_ys: *mut f64,
    pub joint_confidences: *mut f32,
    pub joint_count: usize,
}

/// Mirrors `VNContourRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct ContourRaw {
    pub point_xs: *mut f64,
    pub point_ys: *mut f64,
    pub point_count: usize,
    pub child_count: isize,
    pub aspect_ratio: f32,
}

/// Mirrors `VNRecognizedAnimalRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct RecognizedAnimalRaw {
    pub identifier: *mut c_char,
    pub confidence: f32,
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
}

/// Mirrors `VNClassificationRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct ClassificationRaw {
    pub identifier: *mut c_char,
    pub confidence: f32,
}

/// Mirrors `VNRectangleObservationRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct RectangleObservationRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub tl_x: f64,
    pub tl_y: f64,
    pub tr_x: f64,
    pub tr_y: f64,
    pub bl_x: f64,
    pub bl_y: f64,
    pub br_x: f64,
    pub br_y: f64,
}

/// Mirrors `VNFeaturePrintRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct FeaturePrintRaw {
    pub element_type: i32,
    pub element_count: usize,
    pub bytes: *mut c_void,
}

/// Mirrors `VNHumanObservationRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct HumanObservationRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub upper_body_only: bool,
}

/// Mirrors `VNAestheticsScoresRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct AestheticsScoresRaw {
    pub overall_score: f32,
    pub is_utility: bool,
}

/// Mirrors `VNFaceQualityRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct FaceQualityRaw {
    pub bbox_x: f64,
    pub bbox_y: f64,
    pub bbox_w: f64,
    pub bbox_h: f64,
    pub confidence: f32,
    pub capture_quality: f32,
    pub has_quality: bool,
}

/// Mirrors `VNSegmentationMaskRaw` in Vision.swift. Layout-compatible.
#[repr(C)]
pub struct SegmentationMaskRaw {
    pub width: usize,
    pub height: usize,
    pub bytes_per_row: usize,
    pub bytes: *mut c_void,
}

extern "C" {
    pub fn vn_string_free(s: *mut c_char);

    pub fn vn_recognize_text_in_path(
        path: *const c_char,
        recognition_level: i32,
        uses_language_correction: bool,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_recognize_text_in_pixel_buffer(
        pixel_buffer: *mut c_void,
        recognition_level: i32,
        uses_language_correction: bool,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_recognized_text_free(array: *mut c_void, count: usize);

    pub fn vn_detect_faces_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detect_faces_in_pixel_buffer(
        pixel_buffer: *mut c_void,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detected_faces_free(array: *mut c_void, count: usize);

    pub fn vn_detect_barcodes_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detected_barcodes_free(array: *mut c_void, count: usize);

    pub fn vn_attention_saliency_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_saliency_regions_free(array: *mut c_void, count: usize);

    pub fn vn_detect_face_landmarks_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_face_landmarks_free(array: *mut c_void, count: usize);

    pub fn vn_detect_human_body_pose_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detect_human_hand_pose_in_path(
        path: *const c_char,
        max_hands: usize,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_pose_observations_free(array: *mut c_void, count: usize);

    pub fn vn_detect_contours_in_path(
        path: *const c_char,
        contrast_adjustment: f32,
        detects_dark_on_light: bool,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_contours_free(array: *mut c_void, count: usize);

    pub fn vn_recognize_animals_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_recognized_animals_free(array: *mut c_void, count: usize);

    pub fn vn_classify_image_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;
    pub fn vn_classifications_free(array: *mut c_void, count: usize);

    pub fn vn_detect_rectangles_in_path(
        path: *const c_char,
        max_observations: usize,
        minimum_aspect_ratio: f32,
        maximum_aspect_ratio: f32,
        minimum_size: f32,
        minimum_confidence: f32,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detect_document_segmentation_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_rectangle_observations_free(array: *mut c_void, count: usize);

    pub fn vn_detect_horizon_in_path(
        path: *const c_char,
        out_angle: *mut f64,
        out_has_value: *mut bool,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_generate_image_feature_print_in_path(
        path: *const c_char,
        out_feature: *mut FeaturePrintRaw,
        out_error_message: *mut *mut c_char,
    ) -> i32;
    pub fn vn_feature_print_free(feature: *mut FeaturePrintRaw);

    pub fn vn_detect_human_rectangles_in_path(
        path: *const c_char,
        upper_body_only: bool,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;
    pub fn vn_human_observations_free(array: *mut c_void, count: usize);

    pub fn vn_calculate_aesthetics_scores_in_path(
        path: *const c_char,
        out_scores: *mut AestheticsScoresRaw,
        out_has_value: *mut bool,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_detect_face_capture_quality_in_path(
        path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_face_quality_observations_free(array: *mut c_void, count: usize);

    pub fn vn_generate_person_segmentation_in_path(
        path: *const c_char,
        quality_level: i32,
        out_mask: *mut SegmentationMaskRaw,
        out_has_value: *mut bool,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_generate_foreground_instance_mask_in_path(
        path: *const c_char,
        out_mask: *mut SegmentationMaskRaw,
        out_instance_count: *mut usize,
        out_has_value: *mut bool,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_segmentation_mask_free(mask: *mut SegmentationMaskRaw);

    pub fn vn_generate_optical_flow_in_paths(
        path_a: *const c_char,
        path_b: *const c_char,
        computation_accuracy: i32,
        out_mask: *mut SegmentationMaskRaw,
        out_has_value: *mut bool,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_coreml_classify_in_path(
        path: *const c_char,
        model_path: *const c_char,
        out_array: *mut *mut c_void,
        out_count: *mut usize,
        out_error_message: *mut *mut c_char,
    ) -> i32;

    pub fn vn_test_helper_render_text_png(
        text: *const c_char,
        width: i32,
        height: i32,
        output_path: *const c_char,
    ) -> i32;
}

pub mod status {
    pub const OK: i32 = 0;
    pub const INVALID_ARGUMENT: i32 = -1;
    pub const IMAGE_LOAD_FAILED: i32 = -2;
    pub const REQUEST_FAILED: i32 = -3;
    pub const UNKNOWN: i32 = -99;
}

// silence unused
const _: () = {
    let _ = core::mem::size_of::<*mut c_void>();
};