#![doc = include_str!("../README.md")]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(docsrs, allow(unused_attributes))]
#![deny(missing_docs)]
#[cfg(target_vendor = "apple")]
use std::panic::{AssertUnwindSafe, catch_unwind};
#[cfg(target_vendor = "apple")]
use bytes::Bytes;
use mediaframe::frame::Dimensions;
#[cfg(target_vendor = "apple")]
use mediaschema::domain::aggregates::video::{
Aesthetics, AnimalAnalysis, BarcodeDetection, BodyPose3DDetection, BodyPose3DHeightEstimation,
BodyPose3DJoint, BodyPoseDetection, BodyPoseJoint, BoundingBox, Detection, DocumentSegment,
FaceDetection, FaceLandmarkRegion, FaceLandmarksDetection, HandChirality, HandPoseDetection,
HorizonInfo, HumanAnalysis, PersonInstanceMaskDetection, PersonSegmentationMask, SaliencyRegion,
SubjectDetection, TextDetection,
};
use mediaschema::domain::{ErrorCode, ErrorInfo, Keyframe, KeyframeExtractor, Uuid7};
use mediatime::Timestamp;
#[cfg(target_vendor = "apple")]
type Id = Uuid7;
#[cfg(target_vendor = "apple")]
use objc2::{
encode::{Encode, Encoding},
rc::Retained,
};
#[cfg(target_vendor = "apple")]
use objc2_core_foundation::{CGPoint, CGRect};
#[cfg(target_vendor = "apple")]
use objc2_core_video::{
CVPixelBuffer, CVPixelBufferGetBaseAddress, CVPixelBufferGetBytesPerRow,
CVPixelBufferGetDataSize, CVPixelBufferGetHeight, CVPixelBufferGetPixelFormatType,
CVPixelBufferGetWidth, CVPixelBufferLockBaseAddress, CVPixelBufferLockFlags,
CVPixelBufferUnlockBaseAddress, kCVPixelFormatType_OneComponent8,
kCVPixelFormatType_OneComponent32Float, kCVReturnSuccess,
};
#[cfg(target_vendor = "apple")]
use objc2_foundation::{NSArray, NSData, NSIndexSet, NSNotFound};
#[cfg(target_vendor = "apple")]
use objc2_vision::*;
#[cfg(target_vendor = "apple")]
use smol_str::{SmolStr, StrExt, ToSmolStr};
pub use options::*;
mod options;
#[cfg(target_vendor = "apple")]
#[repr(C, align(16))]
#[derive(Clone, Copy, Debug)]
struct SimdFloat4([f32; 4]);
#[cfg(target_vendor = "apple")]
unsafe impl Encode for SimdFloat4 {
const ENCODING: Encoding = Encoding::None;
}
#[cfg(target_vendor = "apple")]
#[repr(C, align(16))]
#[derive(Clone, Copy, Debug)]
struct SimdFloat4x4 {
columns: [SimdFloat4; 4],
}
#[cfg(target_vendor = "apple")]
unsafe impl Encode for SimdFloat4x4 {
const ENCODING: Encoding = Encoding::Struct("?", &[Encoding::Array(4, &Encoding::None)]);
}
#[cfg(target_vendor = "apple")]
#[inline]
fn clamp01(value: f32) -> f32 {
debug_assert!(
value.is_finite(),
"clamp01 expects finite input; got {value}"
);
value.clamp(0.0, 1.0)
}
#[cfg(target_vendor = "apple")]
fn vision_bbox_to_schema(rect: CGRect) -> Option<BoundingBox> {
let raw_x = rect.origin.x as f32;
let raw_y = (1.0 - (rect.origin.y + rect.size.height)) as f32;
let raw_width = rect.size.width as f32;
let raw_height = rect.size.height as f32;
if !(raw_x.is_finite() && raw_y.is_finite() && raw_width.is_finite() && raw_height.is_finite()) {
return None;
}
let left = clamp01(raw_x);
let top = clamp01(raw_y);
let right = clamp01(raw_x + raw_width);
let bottom = clamp01(raw_y + raw_height);
let width = (right - left).max(0.0);
let height = (bottom - top).max(0.0);
if width <= 0.0 || height <= 0.0 {
return None;
}
BoundingBox::try_new(left, top, width, height).ok()
}
#[cfg(target_vendor = "apple")]
#[inline]
fn vision_point_to_schema(x: f64, y: f64) -> Option<(f32, f32)> {
let x32 = x as f32;
let flipped_y = (1.0 - y) as f32;
if !x32.is_finite() || !flipped_y.is_finite() {
return None;
}
Some((clamp01(x32), clamp01(flipped_y)))
}
#[cfg(target_vendor = "apple")]
#[inline]
fn finite_f32(v: f32) -> Option<f32> {
if v.is_finite() { Some(v) } else { None }
}
#[cfg(target_vendor = "apple")]
const MAX_MASK_BYTES: usize = 64 * 1024 * 1024;
#[cfg(target_vendor = "apple")]
const MAX_LANDMARK_POINTS: usize = 1024;
#[cfg(target_vendor = "apple")]
const MAX_VISION_RESULTS_PER_FRAME: usize = 4096;
#[cfg(target_vendor = "apple")]
const MAX_POSE_JOINTS: usize = 256;
#[cfg(target_vendor = "apple")]
const MAX_NESTED_INSTANCES_PER_OBSERVATION: usize = 64;
#[cfg(target_vendor = "apple")]
const MAX_NESTED_LABELS_PER_OBSERVATION: usize = 32;
#[cfg(target_vendor = "apple")]
const MAX_TEXT_CANDIDATES_PER_OBSERVATION: usize = 10;
#[cfg(target_vendor = "apple")]
const MAX_SALIENCY_REGIONS_PER_FRAME: usize = 64;
#[cfg(target_vendor = "apple")]
const MAX_TOTAL_MASKS_PER_FRAME: usize = 256;
#[cfg(target_vendor = "apple")]
const MAX_TOTAL_MASK_BYTES_PER_FRAME: usize = 256 * 1024 * 1024;
#[cfg(target_vendor = "apple")]
const MAX_TOTAL_MASK_ATTEMPTS_PER_FRAME: usize = 4 * MAX_TOTAL_MASKS_PER_FRAME;
#[cfg(target_vendor = "apple")]
const MAX_FACE_LANDMARK_ATTEMPTS_PER_FRAME: usize = 4 * MAX_FACE_LANDMARK_POINTS_PER_FRAME;
#[cfg(target_vendor = "apple")]
const MAX_FACE_LANDMARK_POINTS_PER_FRAME: usize = 16384;
#[cfg(target_vendor = "apple")]
const MAX_TOTAL_ANIMAL_SUBJECTS_PER_FRAME: usize = 256;
#[cfg(target_vendor = "apple")]
const MAX_TOTAL_TEXT_DETECTIONS_PER_FRAME: usize = 256;
#[cfg(target_vendor = "apple")]
const MAX_INPUT_IMAGE_BYTES: usize = 64 * 1024 * 1024;
#[cfg(target_vendor = "apple")]
const MAX_HAND_POSE_MAXIMUM_HAND_COUNT: usize = 6;
#[cfg(target_vendor = "apple")]
const MAX_FFI_STRING_BYTES: usize = 4096;
#[cfg(target_vendor = "apple")]
fn ffi_nsstring_to_smolstr(ns_str: &objc2_foundation::NSString) -> Option<SmolStr> {
const NS_UTF8_STRING_ENCODING: objc2_foundation::NSStringEncoding = 4;
let utf8_len: usize = ns_str.lengthOfBytesUsingEncoding(NS_UTF8_STRING_ENCODING);
if utf8_len > MAX_FFI_STRING_BYTES {
return None;
}
Some(ns_str.to_smolstr())
}
#[cfg(target_vendor = "apple")]
#[inline]
fn effective_results_cap(user_max: usize) -> usize {
user_max.min(MAX_VISION_RESULTS_PER_FRAME)
}
#[cfg(target_vendor = "apple")]
#[allow(dead_code)]
#[inline]
fn validate_raw_slice_bytes(byte_len: usize, max_bytes: usize) -> Option<()> {
if byte_len > max_bytes {
return None;
}
if byte_len > isize::MAX as usize {
return None;
}
Some(())
}
#[cfg(target_vendor = "apple")]
#[inline]
fn validate_raw_slice_elems<T>(elem_count: usize, max_elems: usize) -> Option<()> {
if elem_count > max_elems {
return None;
}
let byte_len = elem_count.checked_mul(core::mem::size_of::<T>())?;
if byte_len > isize::MAX as usize {
return None;
}
Some(())
}
#[cfg(target_vendor = "apple")]
fn try_alloc_packed_mask(packed_len: usize) -> Option<Vec<u8>> {
if packed_len > MAX_MASK_BYTES {
return None;
}
let mut packed: Vec<u8> = Vec::new();
packed.try_reserve_exact(packed_len).ok()?;
packed.resize(packed_len, 0u8);
Some(packed)
}
#[cfg(target_vendor = "apple")]
#[inline]
fn sanitize_capture_quality(raw: Option<f32>) -> Option<f32> {
match raw {
Some(v) => finite_f32(v),
None => Some(0.0),
}
}
#[cfg(target_vendor = "apple")]
#[inline]
fn sanitize_body_height_pair(
raw_height: f32,
measured_or_reference: BodyPose3DHeightEstimation,
) -> (f32, BodyPose3DHeightEstimation) {
match finite_f32(raw_height) {
Some(finite) => (finite, measured_or_reference),
None => (0.0, BodyPose3DHeightEstimation::Unknown),
}
}
#[cfg(target_vendor = "apple")]
#[inline]
fn validate_mask_dims_for_slice(width: usize, height: usize, total_src_len: usize) -> Option<()> {
let output_payload = width.checked_mul(height)?;
if output_payload > MAX_MASK_BYTES {
return None;
}
if total_src_len > isize::MAX as usize {
return None;
}
Some(())
}
#[cfg(target_vendor = "apple")]
#[inline]
fn project_landmark_to_image(point: CGPoint, face_bbox_vision: CGRect) -> CGPoint {
CGPoint {
x: face_bbox_vision.origin.x + point.x * face_bbox_vision.size.width,
y: face_bbox_vision.origin.y + point.y * face_bbox_vision.size.height,
}
}
#[cfg(target_vendor = "apple")]
fn pose_bbox_from_joint_bounds(
min_x: f32,
min_y: f32,
max_x: f32,
max_y: f32,
) -> Option<BoundingBox> {
if !(min_x.is_finite() && min_y.is_finite() && max_x.is_finite() && max_y.is_finite()) {
return None;
}
let width = max_x - min_x;
let height = max_y - min_y;
if width <= 0.0 || height <= 0.0 {
return None;
}
BoundingBox::try_new(min_x, min_y, width, height).ok()
}
#[cfg(target_vendor = "apple")]
#[inline]
fn sanitize_confidence(value: f32, min: f32) -> Option<f32> {
if value.is_finite() && (0.0..=1.0).contains(&value) && value >= min {
Some(value)
} else {
None
}
}
#[cfg(target_vendor = "apple")]
struct CVPixelBufferLockGuard<'a> {
buffer: &'a CVPixelBuffer,
flags: CVPixelBufferLockFlags,
}
#[cfg(target_vendor = "apple")]
impl<'a> CVPixelBufferLockGuard<'a> {
#[inline]
fn lock(buffer: &'a CVPixelBuffer, flags: CVPixelBufferLockFlags) -> Option<Self> {
let rc = unsafe { CVPixelBufferLockBaseAddress(buffer, flags) };
if rc == kCVReturnSuccess {
Some(Self { buffer, flags })
} else {
None
}
}
#[inline]
fn buffer(&self) -> &CVPixelBuffer {
self.buffer
}
}
#[cfg(target_vendor = "apple")]
impl Drop for CVPixelBufferLockGuard<'_> {
fn drop(&mut self) {
let _ = unsafe { CVPixelBufferUnlockBaseAddress(self.buffer, self.flags) };
}
}
#[cfg(target_vendor = "apple")]
#[derive(Debug)]
pub struct VisionAnalyzer {
opts: ServiceOptions,
requests: VisionRequests,
}
#[cfg(target_vendor = "apple")]
#[derive(Debug)]
struct VisionRequests {
classify: Retained<VNClassifyImageRequest>,
face_rectangles: Retained<VNDetectFaceRectanglesRequest>,
face_landmarks: Retained<VNDetectFaceLandmarksRequest>,
face_quality: Retained<VNDetectFaceCaptureQualityRequest>,
human_rectangles: Retained<VNDetectHumanRectanglesRequest>,
body_pose: Retained<VNDetectHumanBodyPoseRequest>,
body_pose_3d: Retained<VNDetectHumanBodyPose3DRequest>,
hand_pose: Retained<VNDetectHumanHandPoseRequest>,
animals: Retained<VNRecognizeAnimalsRequest>,
animal_body_pose: Retained<VNDetectAnimalBodyPoseRequest>,
person_instance_mask: Retained<VNGeneratePersonInstanceMaskRequest>,
person_segmentation: Retained<VNGeneratePersonSegmentationRequest>,
text: Retained<VNRecognizeTextRequest>,
barcodes: Retained<VNDetectBarcodesRequest>,
attention_saliency: Retained<VNGenerateAttentionBasedSaliencyImageRequest>,
objectness_saliency: Retained<VNGenerateObjectnessBasedSaliencyImageRequest>,
horizon: Retained<VNDetectHorizonRequest>,
document_segments: Retained<VNDetectDocumentSegmentationRequest>,
aesthetics: Retained<VNCalculateImageAestheticsScoresRequest>,
}
#[cfg(target_vendor = "apple")]
fn apple_vision_error(code: ErrorCode, message: impl Into<SmolStr>) -> ErrorInfo {
ErrorInfo::new(code, message)
}
#[cfg(not(target_vendor = "apple"))]
fn apple_vision_error(code: ErrorCode, message: &'static str) -> ErrorInfo {
ErrorInfo::new(code, message)
}
#[cfg(target_vendor = "apple")]
impl VisionRequests {
fn new(opts: ServiceOptions) -> Self {
unsafe {
let classify = VNClassifyImageRequest::new();
classify.setRevision(VNClassifyImageRequestRevision2);
let face_rectangles = VNDetectFaceRectanglesRequest::new();
face_rectangles.setRevision(VNDetectFaceRectanglesRequestRevision3);
let face_landmarks = VNDetectFaceLandmarksRequest::new();
face_landmarks.setRevision(VNDetectFaceLandmarksRequestRevision3);
let face_quality = VNDetectFaceCaptureQualityRequest::new();
face_quality.setRevision(VNDetectFaceCaptureQualityRequestRevision3);
let human_rectangles = VNDetectHumanRectanglesRequest::new();
human_rectangles.setUpperBodyOnly(false);
human_rectangles.setRevision(VNDetectHumanRectanglesRequestRevision2);
let body_pose = VNDetectHumanBodyPoseRequest::new();
body_pose.setRevision(VNDetectHumanBodyPoseRequestRevision1);
let body_pose_3d = VNDetectHumanBodyPose3DRequest::new();
body_pose_3d.setRevision(VNDetectHumanBodyPose3DRequestRevision1);
let hand_pose = VNDetectHumanHandPoseRequest::new();
let user_hand_count = opts.hand_pose().maximum_hand_count();
hand_pose.setMaximumHandCount(user_hand_count.min(MAX_HAND_POSE_MAXIMUM_HAND_COUNT));
hand_pose.setRevision(VNDetectHumanHandPoseRequestRevision1);
let animals = VNRecognizeAnimalsRequest::new();
animals.setRevision(VNRecognizeAnimalsRequestRevision2);
let animal_body_pose = VNDetectAnimalBodyPoseRequest::new();
animal_body_pose.setRevision(VNDetectAnimalBodyPoseRequestRevision1);
let person_instance_mask = VNGeneratePersonInstanceMaskRequest::new();
person_instance_mask.setRevision(VNGeneratePersonInstanceMaskRequestRevision1);
let person_segmentation = VNGeneratePersonSegmentationRequest::new();
person_segmentation.setRevision(VNGeneratePersonSegmentationRequestRevision1);
let text = VNRecognizeTextRequest::new();
text.setRevision(VNRecognizeTextRequestRevision3);
let barcodes = VNDetectBarcodesRequest::new();
barcodes.setRevision(VNDetectBarcodesRequestRevision4);
let attention_saliency = VNGenerateAttentionBasedSaliencyImageRequest::new();
attention_saliency.setRevision(VNGenerateAttentionBasedSaliencyImageRequestRevision2);
let objectness_saliency = VNGenerateObjectnessBasedSaliencyImageRequest::new();
objectness_saliency.setRevision(VNGenerateObjectnessBasedSaliencyImageRequestRevision2);
let horizon = VNDetectHorizonRequest::new();
horizon.setRevision(VNDetectHorizonRequestRevision1);
let document_segments = VNDetectDocumentSegmentationRequest::new();
document_segments.setRevision(VNDetectDocumentSegmentationRequestRevision1);
let aesthetics = VNCalculateImageAestheticsScoresRequest::new();
aesthetics.setRevision(VNCalculateImageAestheticsScoresRequestRevision1);
Self {
classify,
face_rectangles,
face_landmarks,
face_quality,
human_rectangles: { human_rectangles },
body_pose,
body_pose_3d,
hand_pose: { hand_pose },
animals,
animal_body_pose,
person_instance_mask,
person_segmentation,
text,
barcodes,
attention_saliency,
objectness_saliency,
horizon,
document_segments,
aesthetics,
}
}
}
fn perform(&self, handler: &VNSequenceRequestHandler, data: &NSData) -> Result<(), ErrorInfo> {
unsafe {
let requests = NSArray::from_retained_slice(&[
Retained::cast_unchecked::<VNRequest>(self.classify.clone()),
Retained::cast_unchecked::<VNRequest>(self.face_rectangles.clone()),
Retained::cast_unchecked::<VNRequest>(self.face_landmarks.clone()),
Retained::cast_unchecked::<VNRequest>(self.face_quality.clone()),
Retained::cast_unchecked::<VNRequest>(self.human_rectangles.clone()),
Retained::cast_unchecked::<VNRequest>(self.body_pose.clone()),
Retained::cast_unchecked::<VNRequest>(self.body_pose_3d.clone()),
Retained::cast_unchecked::<VNRequest>(self.hand_pose.clone()),
Retained::cast_unchecked::<VNRequest>(self.animals.clone()),
Retained::cast_unchecked::<VNRequest>(self.animal_body_pose.clone()),
Retained::cast_unchecked::<VNRequest>(self.person_instance_mask.clone()),
Retained::cast_unchecked::<VNRequest>(self.person_segmentation.clone()),
Retained::cast_unchecked::<VNRequest>(self.text.clone()),
Retained::cast_unchecked::<VNRequest>(self.barcodes.clone()),
Retained::cast_unchecked::<VNRequest>(self.attention_saliency.clone()),
Retained::cast_unchecked::<VNRequest>(self.objectness_saliency.clone()),
Retained::cast_unchecked::<VNRequest>(self.horizon.clone()),
Retained::cast_unchecked::<VNRequest>(self.document_segments.clone()),
Retained::cast_unchecked::<VNRequest>(self.aesthetics.clone()),
]);
handler
.performRequests_onImageData_error(&requests, data)
.map_err(|e| {
let raw = e.localizedDescription();
let message: SmolStr = ffi_nsstring_to_smolstr(&raw).unwrap_or_else(|| {
SmolStr::new_static("apple-vision request failed (description elided)")
});
apple_vision_error(ErrorCode::AppleVisionRequestFailed, message)
})
}
}
}
#[cfg(target_vendor = "apple")]
impl VisionAnalyzer {
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn new(opts: ServiceOptions) -> Self {
Self {
requests: VisionRequests::new(opts.clone()),
opts,
}
}
#[cfg(feature = "tracing")]
#[allow(dead_code)] fn log_request_revisions(&self, svc: &'static str, worker_id: usize) {
unsafe {
tracing::info!(
service = svc,
worker = worker_id,
classify_rev = self.requests.classify.revision(),
face_rectangles_rev = self.requests.face_rectangles.revision(),
face_landmarks_rev = self.requests.face_landmarks.revision(),
face_quality_rev = self.requests.face_quality.revision(),
human_rectangles_rev = self.requests.human_rectangles.revision(),
body_pose_rev = self.requests.body_pose.revision(),
body_pose_3d_rev = self.requests.body_pose_3d.revision(),
hand_pose_rev = self.requests.hand_pose.revision(),
animals_rev = self.requests.animals.revision(),
animal_body_pose_rev = self.requests.animal_body_pose.revision(),
person_instance_mask_rev = self.requests.person_instance_mask.revision(),
person_segmentation_rev = self.requests.person_segmentation.revision(),
text_rev = self.requests.text.revision(),
barcodes_rev = self.requests.barcodes.revision(),
attention_saliency_rev = self.requests.attention_saliency.revision(),
objectness_saliency_rev = self.requests.objectness_saliency.revision(),
horizon_rev = self.requests.horizon.revision(),
document_segments_rev = self.requests.document_segments.revision(),
aesthetics_rev = self.requests.aesthetics.revision(),
"initialized pinned Apple Vision request revisions"
);
}
}
pub fn analyze_keyframe(
&self,
scene_id: Id,
keyframe_id: Id,
pts: Timestamp,
dimensions: Dimensions,
extractor: KeyframeExtractor,
jpeg_data: &[u8],
) -> Result<Keyframe, ErrorInfo> {
if jpeg_data.len() > MAX_INPUT_IMAGE_BYTES {
return Err(apple_vision_error(
ErrorCode::AppleVisionRequestFailed,
SmolStr::new_static("input image exceeds MAX_INPUT_IMAGE_BYTES"),
));
}
let keyframe =
Keyframe::try_new(keyframe_id, scene_id, pts, dimensions, extractor).map_err(|e| {
apple_vision_error(
ErrorCode::AppleVisionRequestFailed,
SmolStr::from(format!("keyframe construction failed: {e}")),
)
})?;
objc2::rc::autoreleasepool(|_| {
let ns_data = NSData::with_bytes(jpeg_data);
let handler = unsafe { VNSequenceRequestHandler::new() };
self.requests.perform(&handler, &ns_data)?;
let mut mask_total_bytes: usize = 0;
let mut mask_total_count: usize = 0;
let mut mask_total_attempts: usize = 0;
let instance_masks = self.extract_person_instance_masks(
&mut mask_total_bytes,
&mut mask_total_count,
&mut mask_total_attempts,
);
let segmentation_masks = self.extract_person_segmentation_masks(
&mut mask_total_bytes,
&mut mask_total_count,
&mut mask_total_attempts,
);
Ok(
keyframe
.with_classifications(self.extract_classifications())
.with_humans(
HumanAnalysis::new()
.with_subjects(self.extract_human_subjects())
.with_faces(self.extract_faces())
.with_face_rectangles(self.extract_face_rectangles())
.with_face_landmarks(self.extract_face_landmarks())
.with_body_poses(self.extract_body_poses())
.with_hand_poses(self.extract_hand_poses())
.with_body_poses_3d(self.extract_body_poses_3d())
.with_instance_masks(instance_masks)
.with_segmentation_masks(segmentation_masks),
)
.with_animals(
AnimalAnalysis::new()
.with_subjects(self.extract_animal_subjects())
.with_body_poses(self.extract_animal_body_poses()),
)
.with_text_detections(self.extract_text_detections())
.with_barcodes(self.extract_barcodes())
.with_attention_saliency(self.extract_attention_saliency())
.with_objectness_saliency(self.extract_objectness_saliency())
.with_horizon(self.extract_horizon())
.with_document_segments(self.extract_document_segments())
.with_aesthetics(self.extract_aesthetics()),
)
})
}
fn extract_classifications(&self) -> Vec<Detection> {
let opts = self.opts.classifications();
let Some(results) = (unsafe { self.requests.classify.results() }) else {
return Vec::new();
};
let cap = effective_results_cap(opts.max_results());
let mut tags = Vec::with_capacity(cap);
for obs in results.iter().take(cap) {
if tags.len() >= cap {
break;
}
let Some(confidence) =
sanitize_confidence(unsafe { obs.confidence() }, opts.min_confidence())
else {
continue;
};
let identifier = unsafe { obs.identifier() };
let Some(label) = ffi_nsstring_to_smolstr(&identifier) else {
continue;
};
let label = normalize_classification_label(label);
if !label.is_empty()
&& let Ok(detection) = Detection::try_new(label, confidence)
{
tags.push(detection);
}
}
tags
}
fn extract_faces(&self) -> Vec<FaceDetection> {
let Some(results) = (unsafe { self.requests.face_quality.results() }) else {
return Vec::new();
};
let opts = self.opts.face_capture();
let mut faces = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Some(confidence) =
sanitize_confidence(unsafe { obs.confidence() }, opts.min_confidence())
else {
continue;
};
let Some(capture_quality) =
sanitize_capture_quality(unsafe { obs.faceCaptureQuality() }.map(|q| q.floatValue()))
else {
continue;
};
if capture_quality < opts.min_capture_quality() {
continue;
}
let Some(bbox) = vision_bbox_to_schema(unsafe { obs.boundingBox() }.standardize()) else {
continue;
};
let roll = unsafe { obs.roll() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
let yaw = unsafe { obs.yaw() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
let pitch = unsafe { obs.pitch() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
if let Ok(face) = FaceDetection::try_new(bbox, confidence, capture_quality, roll, yaw, pitch)
{
faces.push(face);
}
}
faces
}
fn extract_face_rectangles(&self) -> Vec<FaceDetection> {
let Some(results) = (unsafe { self.requests.face_rectangles.results() }) else {
return Vec::new();
};
let opts = self.opts.face_rectangles();
let mut faces = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Some(confidence) =
sanitize_confidence(unsafe { obs.confidence() }, opts.min_confidence())
else {
continue;
};
let Some(bbox) = vision_bbox_to_schema(unsafe { obs.boundingBox() }.standardize()) else {
continue;
};
let roll = unsafe { obs.roll() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
let yaw = unsafe { obs.yaw() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
let pitch = unsafe { obs.pitch() }
.map(|v| v.floatValue())
.and_then(finite_f32)
.unwrap_or(0.0);
if let Ok(face) = FaceDetection::try_new(bbox, confidence, 0.0, roll, yaw, pitch) {
faces.push(face);
}
}
faces
}
fn extract_face_landmarks(&self) -> Vec<FaceLandmarksDetection> {
let Some(results) = (unsafe { self.requests.face_landmarks.results() }) else {
return Vec::new();
};
let opts = self.opts.face_landmarks();
let mut detections = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
let mut total_points_remaining: usize = MAX_FACE_LANDMARK_POINTS_PER_FRAME;
let mut total_landmark_attempts: usize = 0;
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if total_points_remaining == 0
|| total_landmark_attempts >= MAX_FACE_LANDMARK_ATTEMPTS_PER_FRAME
{
break;
}
let Some(landmarks) = (unsafe { obs.landmarks() }) else {
continue;
};
let Some(confidence) =
sanitize_confidence(unsafe { landmarks.confidence() }, opts.min_confidence())
else {
continue;
};
let face_rect_vision = unsafe { obs.boundingBox() }.standardize();
if vision_bbox_to_schema(face_rect_vision).is_none() {
continue;
}
let mut tentative_remaining = total_points_remaining;
let regions = extract_face_landmark_regions(
&landmarks,
face_rect_vision,
&mut tentative_remaining,
&mut total_landmark_attempts,
);
if regions.len() < opts.min_region_count() {
continue;
}
let Some(bbox) = vision_bbox_to_schema(face_rect_vision) else {
continue;
};
let Ok(detection) = FaceLandmarksDetection::try_new(bbox, confidence, regions) else {
continue;
};
total_points_remaining = tentative_remaining;
detections.push(detection);
}
detections
}
fn extract_human_subjects(&self) -> Vec<SubjectDetection> {
let Some(results) = (unsafe { self.requests.human_rectangles.results() }) else {
return Vec::new();
};
let opts = self.opts.human_subjects();
let mut humans = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Some(confidence) =
sanitize_confidence(unsafe { obs.confidence() }, opts.min_confidence())
else {
continue;
};
let Some(bbox) = vision_bbox_to_schema(unsafe { obs.boundingBox() }.standardize()) else {
continue;
};
let Ok(detection) = Detection::try_new(SmolStr::new_static("person"), confidence) else {
continue;
};
humans.push(SubjectDetection::new(detection, bbox));
}
humans
}
fn extract_body_poses(&self) -> Vec<BodyPoseDetection> {
let Some(results) = (unsafe { self.requests.body_pose.results() }) else {
return Vec::new();
};
let mut body_poses = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Ok(points_by_joint) = (unsafe {
obs.recognizedPointsForJointsGroupName_error(VNHumanBodyPoseObservationJointsGroupNameAll)
}) else {
continue;
};
if points_by_joint.len() > MAX_POSE_JOINTS {
continue;
}
let (joint_names, points) = points_by_joint.to_vecs();
let mut joints = Vec::with_capacity(points.len());
let mut min_x = f32::INFINITY;
let mut min_y = f32::INFINITY;
let mut max_x = f32::NEG_INFINITY;
let mut max_y = f32::NEG_INFINITY;
for (joint_name, point) in joint_names.into_iter().zip(points) {
let Some(name) = ffi_nsstring_to_smolstr(&joint_name) else {
continue;
};
if name.is_empty() {
continue;
}
let Some((x, y)) = vision_point_to_schema(unsafe { point.x() }, unsafe { point.y() })
else {
continue;
};
let Some(confidence) = sanitize_confidence(
unsafe { point.confidence() },
self.opts.body_pose().min_joint_confidence(),
) else {
continue;
};
min_x = min_x.min(x);
min_y = min_y.min(y);
max_x = max_x.max(x);
max_y = max_y.max(y);
let Ok(joint) = BodyPoseJoint::try_new(name, x, y, confidence) else {
continue;
};
joints.push(joint);
}
if joints.is_empty() {
continue;
}
let Some(bbox) = pose_bbox_from_joint_bounds(min_x, min_y, max_x, max_y) else {
continue;
};
let Some(pose_confidence) = sanitize_confidence(unsafe { obs.confidence() }, 0.0) else {
continue;
};
joints.sort_by(|lhs, rhs| lhs.name().cmp(rhs.name()));
if let Ok(pose) = BodyPoseDetection::try_new(bbox, pose_confidence, joints) {
body_poses.push(pose);
}
}
body_poses
}
fn extract_body_poses_3d(&self) -> Vec<BodyPose3DDetection> {
catch_unwind(AssertUnwindSafe(|| {
let Some(results) = (unsafe { self.requests.body_pose_3d.results() }) else {
return Vec::new();
};
let Some(group_name) = (unsafe { VNHumanBodyPose3DObservationJointsGroupNameAll }) else {
return Vec::new();
};
let mut body_poses = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Ok(points_by_joint) =
(unsafe { obs.recognizedPointsForJointsGroupName_error(group_name) })
else {
continue;
};
if points_by_joint.len() > MAX_POSE_JOINTS {
continue;
}
let (joint_names, points) = points_by_joint.to_vecs();
let mut joints = Vec::with_capacity(points.len());
for (joint_name, point) in joint_names.into_iter().zip(points) {
let Some(name) = ffi_nsstring_to_smolstr(&joint_name) else {
continue;
};
if name.is_empty() {
continue;
}
let Some((x, y, z)) = extract_body_pose_3d_coordinates(&point) else {
continue;
};
let raw_confidence: f32 = unsafe { objc2::msg_send![&*point, confidence] };
let Some(confidence) = sanitize_confidence(
raw_confidence,
self.opts.body_pose_3d().min_joint_confidence(),
) else {
continue;
};
let Ok(joint) = BodyPose3DJoint::try_new(name, x, y, z, confidence) else {
continue;
};
joints.push(joint);
}
if joints.is_empty() {
continue;
}
let Some(pose_confidence) = sanitize_confidence(unsafe { obs.confidence() }, 0.0) else {
continue;
};
joints.sort_by(|lhs, rhs| lhs.name().cmp(rhs.name()));
let mapped_estimation =
map_body_pose_3d_height_estimation(unsafe { obs.heightEstimation() });
let (body_height, height_estimation) =
sanitize_body_height_pair(unsafe { obs.bodyHeight() }, mapped_estimation);
if let Ok(pose) =
BodyPose3DDetection::try_new(pose_confidence, body_height, height_estimation, joints)
{
body_poses.push(pose);
}
}
body_poses
}))
.unwrap_or_else(|_| {
#[cfg(feature = "tracing")]
tracing::warn!("caught panic while extracting human body pose 3D; returning empty result");
Vec::new()
})
}
fn extract_hand_poses(&self) -> Vec<HandPoseDetection> {
let Some(results) = (unsafe { self.requests.hand_pose.results() }) else {
return Vec::new();
};
let mut hand_poses = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Ok(points_by_joint) = (unsafe {
obs.recognizedPointsForJointsGroupName_error(VNHumanHandPoseObservationJointsGroupNameAll)
}) else {
continue;
};
if points_by_joint.len() > MAX_POSE_JOINTS {
continue;
}
let (joint_names, points) = points_by_joint.to_vecs();
let mut joints = Vec::with_capacity(points.len());
let mut min_x = f32::INFINITY;
let mut min_y = f32::INFINITY;
let mut max_x = f32::NEG_INFINITY;
let mut max_y = f32::NEG_INFINITY;
for (joint_name, point) in joint_names.into_iter().zip(points) {
let Some(name) = ffi_nsstring_to_smolstr(&joint_name) else {
continue;
};
if name.is_empty() {
continue;
}
let Some((x, y)) = vision_point_to_schema(unsafe { point.x() }, unsafe { point.y() })
else {
continue;
};
let Some(confidence) = sanitize_confidence(
unsafe { point.confidence() },
self.opts.hand_pose().min_joint_confidence(),
) else {
continue;
};
min_x = min_x.min(x);
min_y = min_y.min(y);
max_x = max_x.max(x);
max_y = max_y.max(y);
let Ok(joint) = BodyPoseJoint::try_new(name, x, y, confidence) else {
continue;
};
joints.push(joint);
}
if joints.is_empty() {
continue;
}
let Some(bbox) = pose_bbox_from_joint_bounds(min_x, min_y, max_x, max_y) else {
continue;
};
let Some(pose_confidence) = sanitize_confidence(unsafe { obs.confidence() }, 0.0) else {
continue;
};
joints.sort_by(|lhs, rhs| lhs.name().cmp(rhs.name()));
if let Ok(pose) = HandPoseDetection::try_new(
bbox,
pose_confidence,
map_hand_chirality(unsafe { obs.chirality() }),
joints,
) {
hand_poses.push(pose);
}
}
hand_poses
}
fn extract_person_instance_masks(
&self,
total_mask_bytes: &mut usize,
total_mask_count: &mut usize,
total_mask_attempts: &mut usize,
) -> Vec<PersonInstanceMaskDetection> {
let Some(results) = (unsafe { self.requests.person_instance_mask.results() }) else {
return Vec::new();
};
let opts = self.opts.person_instance_masks();
let mut masks = Vec::new();
'outer: for observation in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if *total_mask_count >= MAX_TOTAL_MASKS_PER_FRAME
|| *total_mask_bytes >= MAX_TOTAL_MASK_BYTES_PER_FRAME
{
break;
}
let Some(confidence) =
sanitize_confidence(unsafe { observation.confidence() }, opts.min_confidence())
else {
continue;
};
let instances = unsafe { observation.allInstances() };
let mut instance_index = instances.firstIndex();
let mut visited = 0usize;
let inner_cap = opts
.max_instances_per_observation()
.min(MAX_NESTED_INSTANCES_PER_OBSERVATION);
while instance_index != NSNotFound as usize {
if visited >= inner_cap {
break;
}
visited += 1;
if *total_mask_count >= MAX_TOTAL_MASKS_PER_FRAME
|| *total_mask_bytes >= MAX_TOTAL_MASK_BYTES_PER_FRAME
|| *total_mask_attempts >= MAX_TOTAL_MASK_ATTEMPTS_PER_FRAME
{
break 'outer;
}
let Ok(wire_instance_index) = u32::try_from(instance_index) else {
instance_index = instances.indexGreaterThanIndex(instance_index);
continue;
};
*total_mask_attempts = total_mask_attempts.saturating_add(1);
let selected_instances = NSIndexSet::indexSetWithIndex(instance_index);
let Ok(mask_buffer) =
(unsafe { observation.generateMaskForInstances_error(&selected_instances) })
else {
instance_index = instances.indexGreaterThanIndex(instance_index);
continue;
};
let remaining_budget = MAX_TOTAL_MASK_BYTES_PER_FRAME.saturating_sub(*total_mask_bytes);
let Some((bbox, dimensions, data)) =
copy_instance_mask_buffer(&mask_buffer, remaining_budget)
else {
instance_index = instances.indexGreaterThanIndex(instance_index);
continue;
};
let data_len = data.len();
match PersonInstanceMaskDetection::try_new(
bbox,
confidence,
wire_instance_index,
dimensions,
data,
) {
Ok(mask) => {
*total_mask_bytes = total_mask_bytes.saturating_add(data_len);
*total_mask_count = total_mask_count.saturating_add(1);
masks.push(mask);
}
Err(_) => {
}
}
instance_index = instances.indexGreaterThanIndex(instance_index);
}
}
masks
}
fn extract_person_segmentation_masks(
&self,
total_mask_bytes: &mut usize,
total_mask_count: &mut usize,
total_mask_attempts: &mut usize,
) -> Vec<PersonSegmentationMask> {
let Some(results) = (unsafe { self.requests.person_segmentation.results() }) else {
return Vec::new();
};
let opts = self.opts.person_segmentation_masks();
let mut masks = Vec::new();
for observation in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if *total_mask_count >= MAX_TOTAL_MASKS_PER_FRAME
|| *total_mask_bytes >= MAX_TOTAL_MASK_BYTES_PER_FRAME
|| *total_mask_attempts >= MAX_TOTAL_MASK_ATTEMPTS_PER_FRAME
{
break;
}
let Some(confidence) =
sanitize_confidence(unsafe { observation.confidence() }, opts.min_confidence())
else {
continue;
};
*total_mask_attempts = total_mask_attempts.saturating_add(1);
let pixel_buffer = unsafe { observation.pixelBuffer() };
let remaining_budget = MAX_TOTAL_MASK_BYTES_PER_FRAME.saturating_sub(*total_mask_bytes);
let Some((bbox, dimensions, data)) =
copy_instance_mask_buffer(&pixel_buffer, remaining_budget)
else {
continue;
};
let data_len = data.len();
if let Ok(mask) = PersonSegmentationMask::try_new(bbox, confidence, dimensions, data) {
*total_mask_bytes = total_mask_bytes.saturating_add(data_len);
*total_mask_count = total_mask_count.saturating_add(1);
masks.push(mask);
}
}
masks
}
fn extract_animal_subjects(&self) -> Vec<SubjectDetection> {
unsafe {
let Some(results) = self.requests.animals.results() else {
return Vec::new();
};
let mut animals = Vec::with_capacity(MAX_TOTAL_ANIMAL_SUBJECTS_PER_FRAME);
'outer: for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if animals.len() >= MAX_TOTAL_ANIMAL_SUBJECTS_PER_FRAME {
break;
}
let labels = obs.labels();
for label in labels.iter().take(MAX_NESTED_LABELS_PER_OBSERVATION) {
if animals.len() >= MAX_TOTAL_ANIMAL_SUBJECTS_PER_FRAME {
break 'outer;
}
let Some(confidence) =
sanitize_confidence(label.confidence(), self.opts.animals().min_confidence())
else {
continue;
};
let identifier = label.identifier();
let Some(id) = ffi_nsstring_to_smolstr(&identifier) else {
continue;
};
if !id.is_empty()
&& let Some(bbox) = vision_bbox_to_schema(obs.boundingBox().standardize())
&& let Ok(detection) = Detection::try_new(id, confidence)
{
animals.push(SubjectDetection::new(detection, bbox));
}
}
}
animals
}
}
fn extract_animal_body_poses(&self) -> Vec<BodyPoseDetection> {
let Some(results) = (unsafe { self.requests.animal_body_pose.results() }) else {
return Vec::new();
};
let Some(group_name) = (unsafe { VNAnimalBodyPoseObservationJointsGroupNameAll }) else {
return Vec::new();
};
let mut body_poses = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Ok(points_by_joint) =
(unsafe { obs.recognizedPointsForJointsGroupName_error(group_name) })
else {
continue;
};
if points_by_joint.len() > MAX_POSE_JOINTS {
continue;
}
let (joint_names, points) = points_by_joint.to_vecs();
let mut joints = Vec::with_capacity(points.len());
let mut min_x = f32::INFINITY;
let mut min_y = f32::INFINITY;
let mut max_x = f32::NEG_INFINITY;
let mut max_y = f32::NEG_INFINITY;
for (joint_name, point) in joint_names.into_iter().zip(points) {
let Some(name) = ffi_nsstring_to_smolstr(&joint_name) else {
continue;
};
if name.is_empty() {
continue;
}
let Some((x, y)) = vision_point_to_schema(unsafe { point.x() }, unsafe { point.y() })
else {
continue;
};
let Some(confidence) = sanitize_confidence(
unsafe { point.confidence() },
self.opts.animal_pose().min_joint_confidence(),
) else {
continue;
};
min_x = min_x.min(x);
min_y = min_y.min(y);
max_x = max_x.max(x);
max_y = max_y.max(y);
let Ok(joint) = BodyPoseJoint::try_new(name, x, y, confidence) else {
continue;
};
joints.push(joint);
}
if joints.is_empty() {
continue;
}
let Some(bbox) = pose_bbox_from_joint_bounds(min_x, min_y, max_x, max_y) else {
continue;
};
let Some(pose_confidence) = sanitize_confidence(unsafe { obs.confidence() }, 0.0) else {
continue;
};
joints.sort_by(|lhs, rhs| lhs.name().cmp(rhs.name()));
if let Ok(pose) = BodyPoseDetection::try_new(bbox, pose_confidence, joints) {
body_poses.push(pose);
}
}
body_poses
}
fn extract_text_detections(&self) -> Vec<TextDetection> {
let Some(results) = self.requests.text.results() else {
return Vec::new();
};
let mut text_detections = Vec::with_capacity(MAX_TOTAL_TEXT_DETECTIONS_PER_FRAME);
'outer: for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if text_detections.len() >= MAX_TOTAL_TEXT_DETECTIONS_PER_FRAME {
break;
}
let candidate_cap = self
.opts
.text()
.max_candidates_per_observation()
.min(MAX_TEXT_CANDIDATES_PER_OBSERVATION);
let candidates = obs.topCandidates(candidate_cap);
for candidate in candidates.iter().take(candidate_cap) {
if text_detections.len() >= MAX_TOTAL_TEXT_DETECTIONS_PER_FRAME {
break 'outer;
}
let raw_string = candidate.string();
let Some(text) = ffi_nsstring_to_smolstr(&raw_string) else {
continue;
};
if text.len() < self.opts.text().min_text_len() {
continue;
}
let Some(confidence) = sanitize_confidence(candidate.confidence(), 0.0) else {
continue;
};
if let Some(bbox) = vision_bbox_to_schema(unsafe { obs.boundingBox() }.standardize())
&& let Ok(detection) = TextDetection::try_new(text, confidence, bbox)
{
text_detections.push(detection);
}
}
}
text_detections
}
fn extract_barcodes(&self) -> Vec<BarcodeDetection> {
let Some(results) = (unsafe { self.requests.barcodes.results() }) else {
return Vec::new();
};
let opts = self.opts.barcodes();
let mut barcodes = Vec::with_capacity(results.len().min(MAX_VISION_RESULTS_PER_FRAME));
for obs in results.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
let Some(confidence) =
sanitize_confidence(unsafe { obs.confidence() }, opts.min_confidence())
else {
continue;
};
if let Some(payload) = unsafe { obs.payloadStringValue() } {
let Some(s) = ffi_nsstring_to_smolstr(&payload) else {
continue;
};
if s.len() >= opts.min_payload_len()
&& let Some(bbox) = vision_bbox_to_schema(unsafe { obs.boundingBox() }.standardize())
{
let raw_sym = unsafe { obs.symbology() };
let Some(symbology) = ffi_nsstring_to_smolstr(&raw_sym) else {
continue;
};
if let Ok(barcode) = BarcodeDetection::try_new(s, symbology, confidence, bbox) {
barcodes.push(barcode);
}
}
}
}
barcodes
}
fn extract_attention_saliency(&self) -> Vec<SaliencyRegion> {
self.extract_saliency_regions(
unsafe { self.requests.attention_saliency.results() },
self.opts.attention_saliency(),
)
}
fn extract_objectness_saliency(&self) -> Vec<SaliencyRegion> {
self.extract_saliency_regions(
unsafe { self.requests.objectness_saliency.results() },
self.opts.objectness_saliency(),
)
}
fn extract_saliency_regions(
&self,
observations: Option<Retained<NSArray<VNSaliencyImageObservation>>>,
opts: AppleVisionSaliencyOptions,
) -> Vec<SaliencyRegion> {
let Some(observations) = observations else {
return Vec::new();
};
let total_cap = opts.max_regions().min(MAX_SALIENCY_REGIONS_PER_FRAME);
let mut regions = Vec::with_capacity(total_cap);
'outer: for observation in observations.iter().take(MAX_VISION_RESULTS_PER_FRAME) {
if regions.len() >= total_cap {
break;
}
let Some(objects) = (unsafe { observation.salientObjects() }) else {
continue;
};
let remaining = total_cap - regions.len();
for object in objects.iter().take(remaining) {
if regions.len() >= total_cap {
break 'outer;
}
let Some(confidence) =
sanitize_confidence(unsafe { object.confidence() }, opts.min_confidence())
else {
continue;
};
let Some(bbox) = vision_bbox_to_schema(unsafe { object.boundingBox() }.standardize())
else {
continue;
};
let Ok(region) = SaliencyRegion::try_new(bbox, confidence) else {
continue;
};
regions.push(region);
}
}
regions
}
fn extract_horizon(&self) -> HorizonInfo {
let empty = HorizonInfo::try_new(0.0, 0.0).expect("zero confidence + zero angle is in range");
let Some(results) = (unsafe { self.requests.horizon.results() }) else {
return empty;
};
let Some(observation) = results.iter().next() else {
return empty;
};
let Some(confidence) = sanitize_confidence(
unsafe { observation.confidence() },
self.opts.horizon().min_confidence(),
) else {
return empty;
};
let Some(angle) = finite_f32(unsafe { observation.angle() } as f32) else {
return empty;
};
HorizonInfo::try_new(angle, confidence).unwrap_or(empty)
}
fn extract_document_segments(&self) -> Vec<DocumentSegment> {
let Some(results) = (unsafe { self.requests.document_segments.results() }) else {
return Vec::new();
};
let opts = self.opts.document_segments();
let cap = effective_results_cap(opts.max_segments());
let mut segments = Vec::with_capacity(cap);
for observation in results.iter().take(cap) {
if segments.len() >= cap {
break;
}
let Some(confidence) =
sanitize_confidence(unsafe { observation.confidence() }, opts.min_confidence())
else {
continue;
};
let (Some(top_left), Some(top_right), Some(bottom_left), Some(bottom_right)) = (
vision_point_to_schema(
unsafe { observation.topLeft() }.x,
unsafe { observation.topLeft() }.y,
),
vision_point_to_schema(
unsafe { observation.topRight() }.x,
unsafe { observation.topRight() }.y,
),
vision_point_to_schema(
unsafe { observation.bottomLeft() }.x,
unsafe { observation.bottomLeft() }.y,
),
vision_point_to_schema(
unsafe { observation.bottomRight() }.x,
unsafe { observation.bottomRight() }.y,
),
) else {
continue;
};
let Ok(segment) =
DocumentSegment::try_new(top_left, top_right, bottom_right, bottom_left, confidence)
else {
continue;
};
segments.push(segment);
}
segments
}
fn extract_aesthetics(&self) -> Aesthetics {
let empty = Aesthetics::new(0.0, false);
let Some(results) = (unsafe { self.requests.aesthetics.results() }) else {
return empty;
};
let Some(obs) = results.iter().next() else {
return empty;
};
let Some(overall_score) = finite_f32(unsafe { obs.overallScore() }) else {
return empty;
};
if overall_score < self.opts.aesthetics().min_overall_score() {
return empty;
}
Aesthetics::new(overall_score, unsafe { obs.isUtility() })
}
}
#[cfg(target_vendor = "apple")]
fn normalize_classification_label(label: SmolStr) -> SmolStr {
label.trim().to_ascii_lowercase_smolstr()
}
#[cfg(target_vendor = "apple")]
fn extract_body_pose_3d_coordinates(
point: &VNHumanBodyRecognizedPoint3D,
) -> Option<(f32, f32, f32)> {
let transform: SimdFloat4x4 = unsafe { objc2::msg_send![point, position] };
let translation = transform.columns.get(3)?;
let x = translation.0[0];
let y = translation.0[1];
let z = translation.0[2];
if !(x.is_finite() && y.is_finite() && z.is_finite()) {
return None;
}
Some((x, y, z))
}
#[cfg(target_vendor = "apple")]
fn map_hand_chirality(chirality: VNChirality) -> HandChirality {
match chirality {
VNChirality::Left => HandChirality::Left,
VNChirality::Right => HandChirality::Right,
_ => HandChirality::Unknown,
}
}
#[cfg(target_vendor = "apple")]
fn extract_face_landmark_regions(
landmarks: &VNFaceLandmarks2D,
face_bbox_vision: CGRect,
total_points_remaining: &mut usize,
total_landmark_attempts: &mut usize,
) -> Vec<FaceLandmarkRegion> {
let mut regions = Vec::new();
for (name, region) in [
("allPoints", unsafe { landmarks.allPoints() }),
("faceContour", unsafe { landmarks.faceContour() }),
("leftEye", unsafe { landmarks.leftEye() }),
("rightEye", unsafe { landmarks.rightEye() }),
("leftEyebrow", unsafe { landmarks.leftEyebrow() }),
("rightEyebrow", unsafe { landmarks.rightEyebrow() }),
("nose", unsafe { landmarks.nose() }),
("noseCrest", unsafe { landmarks.noseCrest() }),
("medianLine", unsafe { landmarks.medianLine() }),
("outerLips", unsafe { landmarks.outerLips() }),
("innerLips", unsafe { landmarks.innerLips() }),
("leftPupil", unsafe { landmarks.leftPupil() }),
("rightPupil", unsafe { landmarks.rightPupil() }),
] {
if *total_points_remaining == 0
|| *total_landmark_attempts >= MAX_FACE_LANDMARK_ATTEMPTS_PER_FRAME
{
break;
}
push_face_landmark_region(
&mut regions,
name,
region,
face_bbox_vision,
total_points_remaining,
total_landmark_attempts,
);
}
regions
}
#[cfg(target_vendor = "apple")]
fn push_face_landmark_region(
regions: &mut Vec<FaceLandmarkRegion>,
name: &'static str,
region: Option<Retained<VNFaceLandmarkRegion2D>>,
face_bbox_vision: CGRect,
total_points_remaining: &mut usize,
total_landmark_attempts: &mut usize,
) {
if *total_points_remaining == 0
|| *total_landmark_attempts >= MAX_FACE_LANDMARK_ATTEMPTS_PER_FRAME
{
return;
}
let Some(region) = region else {
return;
};
let point_count = unsafe { region.pointCount() };
if point_count == 0 {
return;
}
if validate_raw_slice_elems::<CGPoint>(point_count, MAX_LANDMARK_POINTS).is_none() {
return;
}
let points_ptr = unsafe { region.normalizedPoints() };
if points_ptr.is_null() {
return;
}
let attempts_remaining =
MAX_FACE_LANDMARK_ATTEMPTS_PER_FRAME.saturating_sub(*total_landmark_attempts);
let region_cap = point_count
.min(*total_points_remaining)
.min(attempts_remaining);
if region_cap == 0 {
return;
}
*total_landmark_attempts = total_landmark_attempts.saturating_add(region_cap);
let points = unsafe { std::slice::from_raw_parts(points_ptr, region_cap) };
let mut emitted_points: Vec<(f32, f32)> = Vec::with_capacity(region_cap);
for point in points.iter() {
let projected = project_landmark_to_image(*point, face_bbox_vision);
if let Some((x, y)) = vision_point_to_schema(projected.x, projected.y) {
emitted_points.push((x, y));
}
}
if emitted_points.is_empty() {
return;
}
let emitted_len = emitted_points.len();
let Ok(region) = FaceLandmarkRegion::try_new(name, emitted_points) else {
return;
};
*total_points_remaining = total_points_remaining.saturating_sub(emitted_len);
regions.push(region);
}
#[cfg(target_vendor = "apple")]
fn map_body_pose_3d_height_estimation(
estimation: VNHumanBodyPose3DObservationHeightEstimation,
) -> BodyPose3DHeightEstimation {
if estimation == VNHumanBodyPose3DObservationHeightEstimation::Measured {
BodyPose3DHeightEstimation::Measured
} else if estimation == VNHumanBodyPose3DObservationHeightEstimation::Reference {
BodyPose3DHeightEstimation::Reference
} else {
BodyPose3DHeightEstimation::Unknown
}
}
#[cfg(target_vendor = "apple")]
fn copy_instance_mask_buffer(
pixel_buffer: &CVPixelBuffer,
remaining_byte_budget: usize,
) -> Option<(BoundingBox, Dimensions, Bytes)> {
let guard = CVPixelBufferLockGuard::lock(pixel_buffer, CVPixelBufferLockFlags::ReadOnly)?;
copy_instance_mask_buffer_locked(guard.buffer(), remaining_byte_budget)
}
#[cfg(target_vendor = "apple")]
#[allow(non_upper_case_globals)]
fn copy_instance_mask_buffer_locked(
pixel_buffer: &CVPixelBuffer,
remaining_byte_budget: usize,
) -> Option<(BoundingBox, Dimensions, Bytes)> {
let width = CVPixelBufferGetWidth(pixel_buffer);
let height = CVPixelBufferGetHeight(pixel_buffer);
if width == 0 || height == 0 {
return None;
}
let output_payload = width.checked_mul(height)?;
if output_payload > remaining_byte_budget {
return None;
}
let pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer);
let bytes_per_row = CVPixelBufferGetBytesPerRow(pixel_buffer);
let base_address = CVPixelBufferGetBaseAddress(pixel_buffer) as *const u8;
if base_address.is_null() || bytes_per_row == 0 {
return None;
}
let bytes_per_pixel: usize = match pixel_format {
kCVPixelFormatType_OneComponent32Float => core::mem::size_of::<f32>(),
kCVPixelFormatType_OneComponent8 => 1,
_ => return None,
};
let row_pixel_bytes = width.checked_mul(bytes_per_pixel)?;
if bytes_per_row < row_pixel_bytes {
return None;
}
let total_src_len = bytes_per_row.checked_mul(height)?;
validate_mask_dims_for_slice(width, height, total_src_len)?;
let data_size: usize = CVPixelBufferGetDataSize(pixel_buffer);
if total_src_len > data_size {
return None;
}
let src = unsafe { std::slice::from_raw_parts(base_address, total_src_len) };
let dim_width = u32::try_from(width).ok()?;
let dim_height = u32::try_from(height).ok()?;
let (bbox, packed) = match pixel_format {
kCVPixelFormatType_OneComponent32Float => {
process_mask_bytes_f32(width, height, bytes_per_row, src)?
}
kCVPixelFormatType_OneComponent8 => process_mask_bytes_u8(width, height, bytes_per_row, src)?,
_ => return None,
};
Some((
bbox,
Dimensions::new(dim_width, dim_height),
Bytes::from(packed),
))
}
#[cfg(target_vendor = "apple")]
fn process_mask_bytes_f32(
width: usize,
height: usize,
bytes_per_row: usize,
src: &[u8],
) -> Option<(BoundingBox, Vec<u8>)> {
let src_row_pixel_bytes = width.checked_mul(core::mem::size_of::<f32>())?;
let packed_len = width.checked_mul(height)?;
let mut packed = try_alloc_packed_mask(packed_len)?;
let mut min_x = usize::MAX;
let mut min_y = usize::MAX;
let mut max_x = 0usize;
let mut max_y = 0usize;
let mut has_foreground = false;
for row in 0..height {
let src_start = row.checked_mul(bytes_per_row)?;
let src_end = src_start.checked_add(src_row_pixel_bytes)?;
let src_row = src.get(src_start..src_end)?;
let dst_start = row.checked_mul(width)?;
let dst_end = dst_start.checked_add(width)?;
let dst_row = packed.get_mut(dst_start..dst_end)?;
for col in 0..width {
let pixel_start = col.checked_mul(4)?;
let pixel_end = pixel_start.checked_add(4)?;
let bytes: [u8; 4] = src_row.get(pixel_start..pixel_end)?.try_into().ok()?;
let value = f32::from_le_bytes(bytes);
let quantised: u8 = if value.is_finite() {
(value.clamp(0.0, 1.0) * 255.0).round() as u8
} else {
0
};
*dst_row.get_mut(col)? = quantised;
if quantised > 0 {
has_foreground = true;
min_x = min_x.min(col);
min_y = min_y.min(row);
max_x = max_x.max(col);
max_y = max_y.max(row);
}
}
}
if !has_foreground {
return None;
}
let bbox = normalized_bbox_from_pixel_bounds(min_x, min_y, max_x, max_y, width, height)?;
Some((bbox, packed))
}
#[cfg(target_vendor = "apple")]
fn process_mask_bytes_u8(
width: usize,
height: usize,
bytes_per_row: usize,
src: &[u8],
) -> Option<(BoundingBox, Vec<u8>)> {
let packed_len = width.checked_mul(height)?;
let mut packed = try_alloc_packed_mask(packed_len)?;
let mut min_x = usize::MAX;
let mut min_y = usize::MAX;
let mut max_x = 0usize;
let mut max_y = 0usize;
let mut has_foreground = false;
for row in 0..height {
let src_start = row.checked_mul(bytes_per_row)?;
let src_end = src_start.checked_add(width)?;
let src_row = src.get(src_start..src_end)?;
let dst_start = row.checked_mul(width)?;
let dst_end = dst_start.checked_add(width)?;
let dst_row = packed.get_mut(dst_start..dst_end)?;
dst_row.copy_from_slice(src_row);
for (col, value) in dst_row.iter().copied().enumerate() {
if value > 0 {
has_foreground = true;
min_x = min_x.min(col);
min_y = min_y.min(row);
max_x = max_x.max(col);
max_y = max_y.max(row);
}
}
}
if !has_foreground {
return None;
}
let bbox = normalized_bbox_from_pixel_bounds(min_x, min_y, max_x, max_y, width, height)?;
Some((bbox, packed))
}
#[cfg(target_vendor = "apple")]
fn normalized_bbox_from_pixel_bounds(
min_x: usize,
min_y: usize,
max_x: usize,
max_y: usize,
width: usize,
height: usize,
) -> Option<BoundingBox> {
if width == 0 || height == 0 {
return None;
}
let w64 = width as f64;
let h64 = height as f64;
let right_pixel = max_x.checked_add(1)?;
let bottom_pixel = max_y.checked_add(1)?;
if right_pixel > width || bottom_pixel > height || min_x > max_x || min_y > max_y {
return None;
}
let left = (min_x as f64 / w64) as f32;
let top = (min_y as f64 / h64) as f32;
let right = (right_pixel as f64 / w64) as f32;
let bottom = (bottom_pixel as f64 / h64) as f32;
let w = right - left;
let h = bottom - top;
if !(left < 1.0 && top < 1.0) {
return None;
}
if !(w > 0.0 && h > 0.0) {
return None;
}
BoundingBox::try_new(left, top, w, h).ok()
}
#[cfg(not(target_vendor = "apple"))]
#[derive(Debug)]
pub struct VisionAnalyzer {
#[allow(dead_code)]
opts: ServiceOptions,
}
#[cfg(not(target_vendor = "apple"))]
impl VisionAnalyzer {
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn new(opts: ServiceOptions) -> Self {
Self { opts }
}
pub fn analyze_keyframe(
&self,
_scene_id: Uuid7,
_keyframe_id: Uuid7,
_pts: Timestamp,
_dimensions: Dimensions,
_extractor: KeyframeExtractor,
_jpeg_data: &[u8],
) -> Result<Keyframe, ErrorInfo> {
Err(apple_vision_error(
ErrorCode::AppleVisionFailed,
"Apple Vision.framework is only available on macOS",
))
}
}
#[cfg(test)]
mod tests {
use mediaschema::domain::aggregates::video::{
BodyPose3DDetection, BodyPose3DHeightEstimation, HumanAnalysis,
};
#[test]
fn body_poses_3d_survives_through_human_analysis() {
let pose =
BodyPose3DDetection::try_new(0.5, 0.0, BodyPose3DHeightEstimation::Unknown, Vec::new())
.expect("validating ctor on canonical inputs");
let analysis = HumanAnalysis::new().with_body_poses_3d(vec![pose]);
assert_eq!(analysis.body_poses_3d_slice().len(), 1);
}
#[cfg(not(target_vendor = "apple"))]
#[test]
fn non_macos_stub_reports_unavailable() {
use super::*;
use core::num::NonZeroU32;
use mediatime::Timebase;
let analyzer = VisionAnalyzer::new(ServiceOptions::new());
let tb = Timebase::new(1, NonZeroU32::new(1000).expect("nonzero den"));
let err = analyzer
.analyze_keyframe(
Uuid7::new(),
Uuid7::new(),
Timestamp::new(0, tb),
Dimensions::new(320, 180),
KeyframeExtractor::Manual,
&[],
)
.expect_err("stub must return Err");
assert_eq!(err.code(), ErrorCode::AppleVisionFailed);
}
}
#[cfg(all(test, target_vendor = "apple"))]
mod macos_tests {
use super::*;
use mediaschema::domain::aggregates::video::BoundingBox as DomainBoundingBox;
use objc2_core_foundation::{CGPoint, CGRect, CGSize};
#[test]
fn vision_bbox_to_schema_flips_y() {
let rect = CGRect::new(CGPoint::new(0.1, 0.2), CGSize::new(0.3, 0.4));
let bbox = vision_bbox_to_schema(rect).expect("in-range rect must clamp to itself");
assert!((bbox.x() - 0.1).abs() < 1e-6, "x: {}", bbox.x());
assert!((bbox.y() - 0.4).abs() < 1e-6, "y: {}", bbox.y());
assert!((bbox.width() - 0.3).abs() < 1e-6, "w: {}", bbox.width());
assert!((bbox.height() - 0.4).abs() < 1e-6, "h: {}", bbox.height());
}
#[test]
fn vision_bbox_to_schema_full_image_round_trip() {
let rect = CGRect::new(CGPoint::new(0.0, 0.0), CGSize::new(1.0, 1.0));
let bbox = vision_bbox_to_schema(rect).expect("unit rect must clamp to itself");
assert_eq!(bbox.x(), 0.0);
assert_eq!(bbox.y(), 0.0);
assert_eq!(bbox.width(), 1.0);
assert_eq!(bbox.height(), 1.0);
DomainBoundingBox::try_new(bbox.x(), bbox.y(), bbox.width(), bbox.height())
.expect("full-image bbox stays valid after flip");
}
#[test]
fn vision_bbox_clamps_right_spill() {
let rect = CGRect::new(CGPoint::new(0.8, 0.4), CGSize::new(0.5, 0.2));
let bbox = vision_bbox_to_schema(rect).expect("partial overlap must produce a bbox");
assert!((bbox.x() - 0.8).abs() < 1e-6, "x: {}", bbox.x());
assert!((bbox.width() - 0.2).abs() < 1e-6, "w: {}", bbox.width());
assert!((bbox.y() - 0.4).abs() < 1e-6, "y: {}", bbox.y());
assert!((bbox.height() - 0.2).abs() < 1e-6, "h: {}", bbox.height());
DomainBoundingBox::try_new(bbox.x(), bbox.y(), bbox.width(), bbox.height())
.expect("clamped bbox satisfies the [0,1] invariant");
}
#[test]
fn vision_bbox_clamps_bottom_spill() {
let rect = CGRect::new(CGPoint::new(0.1, -0.1), CGSize::new(0.3, 0.4));
let bbox = vision_bbox_to_schema(rect).expect("partial overlap must produce a bbox");
assert!((bbox.x() - 0.1).abs() < 1e-6, "x: {}", bbox.x());
assert!((bbox.y() - 0.7).abs() < 1e-6, "y: {}", bbox.y());
assert!((bbox.width() - 0.3).abs() < 1e-6, "w: {}", bbox.width());
assert!((bbox.height() - 0.3).abs() < 1e-6, "h: {}", bbox.height());
DomainBoundingBox::try_new(bbox.x(), bbox.y(), bbox.width(), bbox.height())
.expect("clamped bbox satisfies the [0,1] invariant");
}
#[test]
fn vision_bbox_fully_offscreen_yields_none() {
let rect = CGRect::new(CGPoint::new(1.5, 0.5), CGSize::new(0.3, 0.4));
assert!(vision_bbox_to_schema(rect).is_none());
}
#[test]
fn vision_bbox_edge_only_yields_none() {
let rect = CGRect::new(CGPoint::new(1.0, 0.5), CGSize::new(0.0, 0.4));
assert!(vision_bbox_to_schema(rect).is_none());
}
#[test]
fn vision_bbox_handles_nan_origin() {
let rect = CGRect::new(CGPoint::new(f64::NAN, 0.0), CGSize::new(0.3, 0.4));
assert!(vision_bbox_to_schema(rect).is_none());
}
#[test]
fn vision_bbox_handles_nan_y_origin() {
let rect = CGRect::new(CGPoint::new(0.1, f64::NAN), CGSize::new(0.3, 0.4));
assert!(vision_bbox_to_schema(rect).is_none());
}
#[test]
fn vision_point_to_schema_flips_y_only() {
let (x, y) = vision_point_to_schema(0.25, 0.75).expect("finite point");
assert!((x - 0.25).abs() < 1e-6);
assert!((y - 0.25).abs() < 1e-6);
}
#[test]
fn vision_point_to_schema_clamps_out_of_range() {
let (x, y) = vision_point_to_schema(1.2, -0.3).expect("finite point");
assert_eq!(x, 1.0);
assert_eq!(y, 1.0);
}
#[test]
fn vision_point_to_schema_rejects_non_finite() {
assert!(vision_point_to_schema(f64::NAN, 0.5).is_none());
assert!(vision_point_to_schema(0.5, f64::NAN).is_none());
assert!(vision_point_to_schema(f64::INFINITY, 0.5).is_none());
assert!(vision_point_to_schema(0.5, f64::INFINITY).is_none());
assert!(vision_point_to_schema(f64::NEG_INFINITY, 0.5).is_none());
assert!(vision_point_to_schema(0.5, f64::NEG_INFINITY).is_none());
assert!(vision_point_to_schema(0.1, 0.2).is_some());
}
#[test]
fn document_quad_with_non_finite_corner_is_dropped() {
let good = (0.1_f64, 0.1_f64);
let bad = (f64::NAN, 0.5_f64);
for (tl, tr, bl, br) in [
(bad, good, good, good),
(good, bad, good, good),
(good, good, bad, good),
(good, good, good, bad),
] {
let result = (
vision_point_to_schema(tl.0, tl.1),
vision_point_to_schema(tr.0, tr.1),
vision_point_to_schema(bl.0, bl.1),
vision_point_to_schema(br.0, br.1),
);
assert!(
!matches!(result, (Some(_), Some(_), Some(_), Some(_))),
"quad with non-finite corner survived: {result:?}",
);
}
}
#[test]
fn pixel_bounds_to_normalized_bbox_does_not_flip() {
let bbox = normalized_bbox_from_pixel_bounds(5, 10, 24, 29, 100, 100).expect("valid bbox");
assert!((bbox.x() - 0.05).abs() < 1e-6);
assert!((bbox.y() - 0.10).abs() < 1e-6);
assert!((bbox.width() - 0.20).abs() < 1e-6);
assert!((bbox.height() - 0.20).abs() < 1e-6);
}
#[test]
fn empty_8bit_mask_yields_none() {
let src = vec![0u8; 4 * 4]; assert!(process_mask_bytes_u8(4, 4, 4, &src).is_none());
}
#[test]
fn empty_32fp_mask_yields_none() {
let src = vec![0u8; 4 * 4 * 4]; assert!(process_mask_bytes_f32(4, 4, 16, &src).is_none());
}
#[test]
fn single_pixel_8bit_mask_round_trip() {
let mut src = vec![0u8; 16];
src[6] = 0xFF;
let (bbox, packed) = process_mask_bytes_u8(4, 4, 4, &src).expect("foreground produces Some");
assert!((bbox.x() - 0.5).abs() < 1e-6, "x: {}", bbox.x());
assert!((bbox.y() - 0.25).abs() < 1e-6, "y: {}", bbox.y());
assert!((bbox.width() - 0.25).abs() < 1e-6, "w: {}", bbox.width());
assert!((bbox.height() - 0.25).abs() < 1e-6, "h: {}", bbox.height());
assert_eq!(packed, src);
}
#[test]
fn single_pixel_32fp_mask_round_trip() {
let mut src = vec![0u8; 4 * 4 * 4];
let value: f32 = 0.75;
let bytes = value.to_le_bytes();
let src_offset = 16 + 8;
src[src_offset..src_offset + 4].copy_from_slice(&bytes);
let (bbox, packed) = process_mask_bytes_f32(4, 4, 16, &src).expect("foreground produces Some");
assert!((bbox.x() - 0.5).abs() < 1e-6, "x: {}", bbox.x());
assert!((bbox.y() - 0.25).abs() < 1e-6, "y: {}", bbox.y());
assert_eq!(packed.len(), 4 * 4);
let dst_offset = 4 + 2;
assert_eq!(packed[dst_offset], 191, "0.75 → 191 after u8 quantisation");
for (idx, &b) in packed.iter().enumerate() {
if idx != dst_offset {
assert_eq!(b, 0, "background pixel {idx} must be 0");
}
}
}
#[test]
fn f32_mask_quantises_canonical_values_and_nan() {
let mut src = vec![0u8; 4 * 4];
src[0..4].copy_from_slice(&0.0_f32.to_le_bytes());
src[4..8].copy_from_slice(&0.5_f32.to_le_bytes());
src[8..12].copy_from_slice(&1.0_f32.to_le_bytes());
src[12..16].copy_from_slice(&f32::NAN.to_le_bytes());
let (_, packed) = process_mask_bytes_f32(4, 1, 16, &src).expect("foreground present");
assert_eq!(packed.len(), 4, "canonical 8-bit-per-pixel payload");
assert_eq!(packed[0], 0, "0.0 → 0");
assert_eq!(packed[1], 128, "0.5 → 128");
assert_eq!(packed[2], 255, "1.0 → 255");
assert_eq!(packed[3], 0, "NaN → 0 (background)");
}
#[test]
fn f32_mask_quantises_out_of_range_and_infinity() {
let mut src = vec![0u8; 4 * 4];
src[0..4].copy_from_slice(&(-0.5_f32).to_le_bytes());
src[4..8].copy_from_slice(&1.5_f32.to_le_bytes());
src[8..12].copy_from_slice(&f32::INFINITY.to_le_bytes());
src[12..16].copy_from_slice(&f32::NEG_INFINITY.to_le_bytes());
let (_, packed) = process_mask_bytes_f32(4, 1, 16, &src).expect("foreground at col 1");
assert_eq!(packed[0], 0, "-0.5 clamps to 0");
assert_eq!(packed[1], 255, "1.5 clamps to 255");
assert_eq!(packed[2], 0, "+Inf → 0 (background)");
assert_eq!(packed[3], 0, "-Inf → 0 (background)");
}
#[test]
fn padded_stride_8bit_mask_packs_correctly() {
let mut src = vec![0u8; 16];
src[0] = 1; src[10] = 1; let (bbox, packed) = process_mask_bytes_u8(3, 2, 8, &src).expect("foreground produces Some");
assert_eq!(packed.len(), 3 * 2);
assert_eq!(packed, [1, 0, 0, 0, 0, 1]);
assert!((bbox.x() - 0.0).abs() < 1e-6);
assert!((bbox.y() - 0.0).abs() < 1e-6);
assert!((bbox.width() - 1.0).abs() < 1e-6);
assert!((bbox.height() - 1.0).abs() < 1e-6);
}
#[test]
fn pose_bbox_from_single_joint_yields_none() {
assert!(pose_bbox_from_joint_bounds(0.5, 0.5, 0.5, 0.5).is_none());
}
#[test]
fn pose_bbox_from_vertical_joints_yields_none() {
assert!(pose_bbox_from_joint_bounds(0.5, 0.1, 0.5, 0.9).is_none());
}
#[test]
fn pose_bbox_from_horizontal_joints_yields_none() {
assert!(pose_bbox_from_joint_bounds(0.1, 0.5, 0.9, 0.5).is_none());
}
#[test]
fn pose_bbox_from_diagonal_joints_is_valid() {
let bbox =
pose_bbox_from_joint_bounds(0.1, 0.2, 0.4, 0.6).expect("non-degenerate joints yield Some");
assert!((bbox.x() - 0.1).abs() < 1e-6);
assert!((bbox.y() - 0.2).abs() < 1e-6);
assert!((bbox.width() - 0.3).abs() < 1e-6);
assert!((bbox.height() - 0.4).abs() < 1e-6);
mediaschema::domain::aggregates::video::BoundingBox::try_new(
bbox.x(),
bbox.y(),
bbox.width(),
bbox.height(),
)
.expect("pose-derived bbox satisfies domain invariants");
}
#[test]
fn pose_bbox_from_nan_joints_yields_none() {
assert!(pose_bbox_from_joint_bounds(f32::NAN, 0.5, 0.5, 0.5).is_none());
assert!(pose_bbox_from_joint_bounds(0.1, 0.1, f32::INFINITY, 0.5).is_none());
}
#[test]
fn document_quad_with_collapsed_corners_is_rejected_by_domain() {
let p = (0.0_f32, 0.0_f32);
assert!(
mediaschema::domain::aggregates::video::DocumentSegment::try_new(p, p, p, p, 0.9).is_err()
);
}
#[test]
fn document_quad_bowtie_is_rejected_by_domain() {
let tl = (0.1_f32, 0.1_f32);
let tr = (0.9_f32, 0.1_f32);
let br = (0.1_f32, 0.9_f32);
let bl = (0.9_f32, 0.9_f32);
assert!(
mediaschema::domain::aggregates::video::DocumentSegment::try_new(tl, tr, br, bl, 0.9)
.is_err()
);
}
#[test]
fn document_quad_well_formed_is_accepted_by_domain() {
let tl = (0.1_f32, 0.1_f32);
let tr = (0.9_f32, 0.1_f32);
let br = (0.9_f32, 0.9_f32);
let bl = (0.1_f32, 0.9_f32);
mediaschema::domain::aggregates::video::DocumentSegment::try_new(tl, tr, br, bl, 0.9)
.expect("well-formed unit quad is valid");
}
#[test]
fn finite_f32_rejects_non_finite() {
assert_eq!(finite_f32(0.0), Some(0.0));
assert_eq!(finite_f32(-1.5), Some(-1.5));
assert_eq!(finite_f32(1.0), Some(1.0));
assert_eq!(finite_f32(f32::NAN), None);
assert_eq!(finite_f32(f32::INFINITY), None);
assert_eq!(finite_f32(f32::NEG_INFINITY), None);
}
#[test]
fn try_alloc_packed_mask_rejects_oversize() {
assert!(try_alloc_packed_mask(MAX_MASK_BYTES).is_some());
assert!(try_alloc_packed_mask(MAX_MASK_BYTES + 1).is_none());
}
#[test]
fn try_alloc_packed_mask_zero_inits_at_requested_length() {
let buf = try_alloc_packed_mask(64).expect("64 byte allocation");
assert_eq!(buf.len(), 64);
assert!(buf.iter().all(|&b| b == 0));
}
#[test]
fn process_mask_bytes_u8_caps_allocation() {
let width = MAX_MASK_BYTES + 1;
let height = 1;
assert!(process_mask_bytes_u8(width, height, width, &[]).is_none());
}
#[test]
fn project_landmark_to_image_centres_landmark() {
let face = CGRect::new(CGPoint::new(0.2, 0.3), CGSize::new(0.4, 0.2));
let projected = project_landmark_to_image(CGPoint::new(0.5, 0.5), face);
assert!((projected.x - 0.4).abs() < 1e-9);
assert!((projected.y - 0.4).abs() < 1e-9);
}
#[test]
fn project_landmark_then_schema_flip_matches_face_corner() {
let face = CGRect::new(CGPoint::new(0.2, 0.3), CGSize::new(0.4, 0.2));
let projected = project_landmark_to_image(CGPoint::new(0.0, 0.0), face);
let (sx, sy) =
vision_point_to_schema(projected.x, projected.y).expect("projected lower-left is finite");
assert!((sx - 0.2).abs() < 1e-6, "schema-x: {sx}");
assert!((sy - 0.7).abs() < 1e-6, "schema-y: {sy}");
}
#[test]
fn projected_non_finite_landmark_is_rejected() {
let face = CGRect::new(CGPoint::new(0.2, 0.3), CGSize::new(0.4, 0.2));
let projected = project_landmark_to_image(CGPoint::new(f64::NAN, 0.5), face);
assert!(vision_point_to_schema(projected.x, projected.y).is_none());
}
#[test]
fn sanitize_capture_quality_absent_maps_to_zero() {
assert_eq!(sanitize_capture_quality(None), Some(0.0));
}
#[test]
fn sanitize_capture_quality_finite_passes_through() {
assert_eq!(sanitize_capture_quality(Some(0.75)), Some(0.75));
assert_eq!(sanitize_capture_quality(Some(0.0)), Some(0.0));
assert_eq!(sanitize_capture_quality(Some(1.0)), Some(1.0));
}
#[test]
fn sanitize_capture_quality_non_finite_returns_none() {
assert_eq!(sanitize_capture_quality(Some(f32::NAN)), None);
assert_eq!(sanitize_capture_quality(Some(f32::INFINITY)), None);
assert_eq!(sanitize_capture_quality(Some(f32::NEG_INFINITY)), None);
}
#[test]
fn sanitize_body_height_pair_finite_preserves_estimation() {
let measured = BodyPose3DHeightEstimation::Measured;
let (h, e) = sanitize_body_height_pair(1.75, measured);
assert!((h - 1.75).abs() < 1e-6);
assert_eq!(e, measured);
let reference = BodyPose3DHeightEstimation::Reference;
let (h, e) = sanitize_body_height_pair(0.42, reference);
assert!((h - 0.42).abs() < 1e-6);
assert_eq!(e, reference);
}
#[test]
fn sanitize_body_height_pair_non_finite_forces_unknown() {
for raw in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
let (h, e) = sanitize_body_height_pair(raw, BodyPose3DHeightEstimation::Measured);
assert_eq!(h, 0.0, "non-finite must collapse to 0.0 (raw = {raw:?})");
assert_eq!(
e,
BodyPose3DHeightEstimation::Unknown,
"non-finite must force UNKNOWN (raw = {raw:?})",
);
let (h, e) = sanitize_body_height_pair(raw, BodyPose3DHeightEstimation::Reference);
assert_eq!(h, 0.0);
assert_eq!(e, BodyPose3DHeightEstimation::Unknown);
}
}
#[test]
fn validate_mask_dims_rejects_oversize_output() {
assert!(validate_mask_dims_for_slice(MAX_MASK_BYTES, 1, 0).is_some());
assert!(validate_mask_dims_for_slice(MAX_MASK_BYTES + 1, 1, 0).is_none());
}
#[test]
fn validate_mask_dims_rejects_isize_overflow_source() {
assert!(validate_mask_dims_for_slice(1, 1, isize::MAX as usize).is_some());
assert!(validate_mask_dims_for_slice(1, 1, (isize::MAX as usize).wrapping_add(1)).is_none());
}
#[test]
fn validate_mask_dims_rejects_dim_overflow() {
assert!(validate_mask_dims_for_slice(usize::MAX, 2, 0).is_none());
}
#[test]
fn validate_raw_slice_bytes_rejects_over_cap() {
assert!(validate_raw_slice_bytes(0, MAX_MASK_BYTES).is_some());
assert!(validate_raw_slice_bytes(MAX_MASK_BYTES, MAX_MASK_BYTES).is_some());
assert!(validate_raw_slice_bytes(MAX_MASK_BYTES + 1, MAX_MASK_BYTES).is_none());
}
#[test]
fn validate_raw_slice_bytes_rejects_isize_overflow() {
assert!(validate_raw_slice_bytes(isize::MAX as usize, usize::MAX).is_some());
assert!(validate_raw_slice_bytes((isize::MAX as usize).wrapping_add(1), usize::MAX).is_none());
}
#[test]
fn validate_raw_slice_elems_rejects_over_cap() {
assert!(
validate_raw_slice_elems::<CGPoint>(MAX_LANDMARK_POINTS, MAX_LANDMARK_POINTS).is_some()
);
assert!(
validate_raw_slice_elems::<CGPoint>(MAX_LANDMARK_POINTS + 1, MAX_LANDMARK_POINTS).is_none()
);
}
#[test]
fn validate_raw_slice_elems_rejects_byte_overflow() {
assert!(validate_raw_slice_elems::<CGPoint>(usize::MAX, usize::MAX).is_none());
}
#[test]
fn normalized_bbox_handles_2pow24_plus_one_width() {
let width: usize = (1 << 24) + 1;
let height: usize = 1;
let right_col = width - 1;
let bbox = normalized_bbox_from_pixel_bounds(right_col, 0, right_col, 0, width, height)
.expect("valid bbox at right edge");
assert!(
bbox.x() < 1.0,
"x must remain strictly less than 1.0: {}",
bbox.x()
);
assert!(
bbox.width() > 0.0,
"positive foreground width: {}",
bbox.width()
);
let right_edge = bbox.x() + bbox.width();
assert!(
right_edge <= 1.0 + 1e-6,
"right edge exceeds image: {right_edge}"
);
}
#[test]
fn normalized_bbox_rejects_degenerate_input() {
assert!(normalized_bbox_from_pixel_bounds(0, 0, 10, 10, 0, 100).is_none());
assert!(normalized_bbox_from_pixel_bounds(0, 0, 10, 10, 100, 0).is_none());
assert!(normalized_bbox_from_pixel_bounds(20, 0, 10, 10, 100, 100).is_none());
}
#[test]
fn normalized_bbox_handles_mantissa_exhaustion_boundaries() {
for shift in 24u32..=25 {
let width: usize = (1 << shift) + 1;
let height: usize = 1;
let right_col = width - 1;
let result = normalized_bbox_from_pixel_bounds(right_col, 0, right_col, 0, width, height);
match result {
None => {
}
Some(bbox) => {
assert!(
bbox.x() < 1.0,
"shift={shift}: x must be < 1.0, got {}",
bbox.x()
);
assert!(
bbox.width() > 0.0,
"shift={shift}: width must be > 0.0, got {}",
bbox.width()
);
let right_edge = bbox.x() + bbox.width();
assert!(
right_edge <= 1.0 + 1e-6,
"shift={shift}: right edge exceeds image: {right_edge}",
);
}
}
}
}
#[test]
fn normalized_bbox_handles_max_mask_bytes_boundary() {
let width = MAX_MASK_BYTES; let height = 1usize;
let right_col = width - 1;
let result = normalized_bbox_from_pixel_bounds(right_col, 0, right_col, 0, width, height);
if let Some(bbox) = result {
assert!(
bbox.x() < 1.0,
"x must remain strictly less than 1.0: {}",
bbox.x()
);
assert!(
bbox.width() > 0.0,
"positive foreground width: {}",
bbox.width()
);
let right_edge = bbox.x() + bbox.width();
assert!(
right_edge <= 1.0 + 1e-6,
"right edge exceeds image: {right_edge}"
);
}
}
#[test]
fn normalized_bbox_rejects_max_above_dimensions() {
assert!(normalized_bbox_from_pixel_bounds(0, 0, 100, 0, 100, 1).is_none());
assert!(normalized_bbox_from_pixel_bounds(0, 0, 0, 100, 1, 100).is_none());
}
#[test]
fn simd_float4x4_encoding_matches_clang_at_encode() {
assert_eq!(SimdFloat4::ENCODING.to_string(), "");
assert_eq!(SimdFloat4x4::ENCODING.to_string(), "{?=[4]}");
}
}