apple-vision 0.16.5

Safe Rust bindings for Apple's Vision framework — OCR, object detection, face landmarks on macOS
Documentation
// Additional text/saliency/mask bridges added in the v0.13 coverage sweep.

import AppKit
import CoreGraphics
import CoreImage
import CoreML
import CoreVideo
import Foundation
import ImageIO
import Vision

// MARK: - Text rectangles (region-only, no OCR)

@_cdecl("vn_detect_text_rectangles_in_path")
public func vn_detect_text_rectangles_in_path(
    _ path: UnsafePointer<CChar>,
    _ reports_character_boxes: Bool,
    _ out_rects_raw: UnsafeMutableRawPointer,
    _ out_count: UnsafeMutablePointer<Int>,
    _ outErrorMessage: UnsafeMutablePointer<UnsafeMutablePointer<CChar>?>?
) -> Int32 {
    let out_rects = out_rects_raw.assumingMemoryBound(to: UnsafeMutablePointer<VNSimpleRectRaw>?.self)
    out_rects.pointee = nil
    out_count.pointee = 0
    let p = String(cString: path)
    guard let img = loadCGImage(path: p) else {
        outErrorMessage?.pointee = ffiString("could not load image at \(p)")
        return VN_IMAGE_LOAD_FAILED
    }
    let req = VNDetectTextRectanglesRequest()
    req.reportCharacterBoxes = reports_character_boxes
    let handler = VNImageRequestHandler(cgImage: img, options: [:])
    do { try handler.perform([req]) } catch {
        outErrorMessage?.pointee = ffiString("text rectangles request failed: \(error.localizedDescription)")
        return VN_REQUEST_FAILED
    }
    guard let obs = req.results else { return VN_OK }
    var rects = obs.map { mkRect($0.boundingBox, $0.confidence) }
    if rects.isEmpty { return VN_OK }
    let buf = UnsafeMutablePointer<VNSimpleRectRaw>.allocate(capacity: rects.count)
    buf.initialize(from: &rects, count: rects.count)
    out_rects.pointee = buf
    out_count.pointee = rects.count
    return VN_OK
}

@frozen
public struct VNTextObservationRaw {
    public var bbox_x: Double
    public var bbox_y: Double
    public var bbox_w: Double
    public var bbox_h: Double
    public var confidence: Float
    public var _pad: Float
    public var character_boxes: UnsafeMutablePointer<VNSimpleRectRaw>?
    public var character_box_count: Int
}

@_cdecl("vn_detect_text_observations_in_path")
public func vn_detect_text_observations_in_path(
    _ path: UnsafePointer<CChar>,
    _ reports_character_boxes: Bool,
    _ roiX: Double,
    _ roiY: Double,
    _ roiW: Double,
    _ roiH: Double,
    _ hasRegionOfInterest: Bool,
    _ preferBackgroundProcessing: Bool,
    _ usesCPUOnly: Bool,
    _ revision: Int,
    _ hasRevision: Bool,
    _ out_observations_raw: UnsafeMutableRawPointer,
    _ out_count: UnsafeMutablePointer<Int>,
    _ outErrorMessage: UnsafeMutablePointer<UnsafeMutablePointer<CChar>?>?
) -> Int32 {
    let outObservations = out_observations_raw.assumingMemoryBound(to: UnsafeMutablePointer<VNTextObservationRaw>?.self)
    outObservations.pointee = nil
    out_count.pointee = 0
    let p = String(cString: path)
    guard let img = loadCGImage(path: p) else {
        outErrorMessage?.pointee = ffiString("could not load image at \(p)")
        return VN_IMAGE_LOAD_FAILED
    }
    let req = VNDetectTextRectanglesRequest()
    req.reportCharacterBoxes = reports_character_boxes
    applyImageBasedRequestConfig(
        req,
        roiX: roiX,
        roiY: roiY,
        roiW: roiW,
        roiH: roiH,
        hasRegionOfInterest: hasRegionOfInterest,
        preferBackgroundProcessing: preferBackgroundProcessing,
        usesCPUOnly: usesCPUOnly,
        revision: revision,
        hasRevision: hasRevision
    )
    let handler = VNImageRequestHandler(cgImage: img, options: [:])
    do { try handler.perform([req]) } catch {
        outErrorMessage?.pointee = ffiString("text observations request failed: \(error.localizedDescription)")
        return VN_REQUEST_FAILED
    }
    guard let observations = req.results, !observations.isEmpty else { return VN_OK }
    let buffer = UnsafeMutablePointer<VNTextObservationRaw>.allocate(capacity: observations.count)
    for (index, observation) in observations.enumerated() {
        let characterBoxes = observation.characterBoxes ?? []
        let characterBuffer: UnsafeMutablePointer<VNSimpleRectRaw>?
        if characterBoxes.isEmpty {
            characterBuffer = nil
        } else {
            let allocated = UnsafeMutablePointer<VNSimpleRectRaw>.allocate(capacity: characterBoxes.count)
            var raws = characterBoxes.map { mkRect($0.boundingBox, $0.confidence) }
            allocated.initialize(from: &raws, count: raws.count)
            characterBuffer = allocated
        }
        let bbox = observation.boundingBox
        buffer.advanced(by: index).initialize(to: VNTextObservationRaw(
            bbox_x: Double(bbox.origin.x),
            bbox_y: Double(bbox.origin.y),
            bbox_w: Double(bbox.size.width),
            bbox_h: Double(bbox.size.height),
            confidence: observation.confidence,
            _pad: 0,
            character_boxes: characterBuffer,
            character_box_count: characterBoxes.count
        ))
    }
    outObservations.pointee = buffer
    out_count.pointee = observations.count
    return VN_OK
}

@_cdecl("vn_text_observations_free")
public func vn_text_observations_free(_ ptr: UnsafeMutableRawPointer?, _ count: Int) {
    guard let ptr else { return }
    let typed = ptr.assumingMemoryBound(to: VNTextObservationRaw.self)
    for index in 0..<count {
        typed[index].character_boxes?.deallocate()
    }
    typed.deinitialize(count: count)
    typed.deallocate()
}

// MARK: - Objectness-based saliency

@_cdecl("vn_objectness_saliency_in_path")
public func vn_objectness_saliency_in_path(
    _ path: UnsafePointer<CChar>,
    _ out_rects_raw: UnsafeMutableRawPointer,
    _ out_count: UnsafeMutablePointer<Int>,
    _ outErrorMessage: UnsafeMutablePointer<UnsafeMutablePointer<CChar>?>?
) -> Int32 {
    let out_rects = out_rects_raw.assumingMemoryBound(to: UnsafeMutablePointer<VNSimpleRectRaw>?.self)
    out_rects.pointee = nil
    out_count.pointee = 0
    let p = String(cString: path)
    guard let img = loadCGImage(path: p) else {
        outErrorMessage?.pointee = ffiString("could not load image at \(p)")
        return VN_IMAGE_LOAD_FAILED
    }
    let req = VNGenerateObjectnessBasedSaliencyImageRequest()
    let handler = VNImageRequestHandler(cgImage: img, options: [:])
    do { try handler.perform([req]) } catch {
        outErrorMessage?.pointee = ffiString("objectness saliency request failed: \(error.localizedDescription)")
        return VN_REQUEST_FAILED
    }
    guard let obs = req.results?.first, let regions = obs.salientObjects else { return VN_OK }
    var rects = regions.map { mkRect($0.boundingBox, $0.confidence) }
    if rects.isEmpty { return VN_OK }
    let buf = UnsafeMutablePointer<VNSimpleRectRaw>.allocate(capacity: rects.count)
    buf.initialize(from: &rects, count: rects.count)
    out_rects.pointee = buf
    out_count.pointee = rects.count
    return VN_OK
}

// MARK: - Person instance mask (macOS 14+)

/// Writes a packed 8-bit per-pixel mask. Caller must `free()` the buffer.
@_cdecl("vn_person_instance_mask_in_path")
public func vn_person_instance_mask_in_path(
    _ path: UnsafePointer<CChar>,
    _ out_width: UnsafeMutablePointer<Int>,
    _ out_height: UnsafeMutablePointer<Int>,
    _ out_bytes_per_row: UnsafeMutablePointer<Int>,
    _ out_data: UnsafeMutablePointer<UnsafeMutablePointer<UInt8>?>,
    _ outErrorMessage: UnsafeMutablePointer<UnsafeMutablePointer<CChar>?>?
) -> Int32 {
    out_width.pointee = 0
    out_height.pointee = 0
    out_bytes_per_row.pointee = 0
    out_data.pointee = nil
    if #available(macOS 14.0, *) {
        let p = String(cString: path)
        guard let img = loadCGImage(path: p) else {
            outErrorMessage?.pointee = ffiString("could not load image at \(p)")
            return VN_IMAGE_LOAD_FAILED
        }
        let req = VNGeneratePersonInstanceMaskRequest()
        let handler = VNImageRequestHandler(cgImage: img, options: [:])
        do { try handler.perform([req]) } catch {
            outErrorMessage?.pointee = ffiString("person instance mask request failed: \(error.localizedDescription)")
            return VN_REQUEST_FAILED
        }
        guard let obs = req.results?.first else { return VN_OK }
        let allInstances = obs.allInstances
        if allInstances.isEmpty { return VN_OK }
        guard let pb = try? obs.generateScaledMaskForImage(forInstances: allInstances, from: handler) else {
            return VN_OK
        }
        CVPixelBufferLockBaseAddress(pb, .readOnly)
        defer { CVPixelBufferUnlockBaseAddress(pb, .readOnly) }
        let w = CVPixelBufferGetWidth(pb)
        let h = CVPixelBufferGetHeight(pb)
        let bpr = CVPixelBufferGetBytesPerRow(pb)
        guard let base = CVPixelBufferGetBaseAddress(pb) else { return VN_OK }
        let size = bpr * h
        let data = UnsafeMutablePointer<UInt8>.allocate(capacity: size)
        data.initialize(from: base.assumingMemoryBound(to: UInt8.self), count: size)
        out_width.pointee = w
        out_height.pointee = h
        out_bytes_per_row.pointee = bpr
        out_data.pointee = data
        return VN_OK
    }
    return VN_REQUEST_FAILED
}

@_cdecl("vn_mask_buffer_free")
public func vn_mask_buffer_free(_ ptr: UnsafeMutablePointer<UInt8>?, _ size: Int) {
    guard let ptr = ptr else { return }
    ptr.deinitialize(count: size)
    ptr.deallocate()
}