use crate::{Result, VirtualProductionError};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MarkerDictionary {
Aruco4x4_50,
Aruco5x5_100,
Aruco6x6_250,
AprilTag36h11,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct Point2f {
pub x: f32,
pub y: f32,
}
impl Point2f {
#[must_use]
pub const fn new(x: f32, y: f32) -> Self {
Self { x, y }
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MarkerPose {
pub id: u32,
pub translation: [f64; 3],
pub rotation: [[f64; 3]; 3],
pub reprojection_error: f64,
pub corners: [Point2f; 4],
}
impl MarkerPose {
#[must_use]
pub fn new(id: u32, corners: [Point2f; 4]) -> Self {
Self {
id,
translation: [0.0; 3],
rotation: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
reprojection_error: 0.0,
corners,
}
}
#[must_use]
pub fn centroid(&self) -> Point2f {
let x = self.corners.iter().map(|c| c.x).sum::<f32>() / 4.0;
let y = self.corners.iter().map(|c| c.y).sum::<f32>() / 4.0;
Point2f::new(x, y)
}
#[must_use]
pub fn pixel_size(&self) -> f32 {
let [c0, c1, c2, c3] = self.corners;
let d01 = ((c1.x - c0.x).powi(2) + (c1.y - c0.y).powi(2)).sqrt();
let d12 = ((c2.x - c1.x).powi(2) + (c2.y - c1.y).powi(2)).sqrt();
let d23 = ((c3.x - c2.x).powi(2) + (c3.y - c2.y).powi(2)).sqrt();
let d30 = ((c0.x - c3.x).powi(2) + (c0.y - c3.y).powi(2)).sqrt();
(d01 + d12 + d23 + d30) / 4.0
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnchoredObject {
pub name: String,
pub marker_id: u32,
pub offset: [f64; 3],
pub scale: f64,
pub visible: bool,
pub label: Option<String>,
}
impl AnchoredObject {
#[must_use]
pub fn new(name: &str, marker_id: u32) -> Self {
Self {
name: name.to_string(),
marker_id,
offset: [0.0; 3],
scale: 1.0,
visible: true,
label: None,
}
}
#[must_use]
pub fn with_offset(mut self, dx: f64, dy: f64, dz: f64) -> Self {
self.offset = [dx, dy, dz];
self
}
#[must_use]
pub fn with_scale(mut self, scale: f64) -> Self {
self.scale = scale;
self
}
#[must_use]
pub fn with_visible(mut self, visible: bool) -> Self {
self.visible = visible;
self
}
#[must_use]
pub fn world_position(&self, pose: &MarkerPose) -> [f64; 3] {
let r = &pose.rotation;
let o = &self.offset;
[
r[0][0] * o[0] + r[0][1] * o[1] + r[0][2] * o[2] + pose.translation[0],
r[1][0] * o[0] + r[1][1] * o[1] + r[1][2] * o[2] + pose.translation[1],
r[2][0] * o[0] + r[2][1] * o[1] + r[2][2] * o[2] + pose.translation[2],
]
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArOverlayConfig {
pub dictionary: MarkerDictionary,
pub marker_size_m: f64,
pub min_marker_area_px: f32,
pub max_markers: usize,
pub overlay_alpha: f32,
}
impl Default for ArOverlayConfig {
fn default() -> Self {
Self {
dictionary: MarkerDictionary::Aruco4x4_50,
marker_size_m: 0.15,
min_marker_area_px: 100.0,
max_markers: 16,
overlay_alpha: 1.0,
}
}
}
struct MarkerDetector {
config: ArOverlayConfig,
}
impl MarkerDetector {
fn new(config: ArOverlayConfig) -> Self {
Self { config }
}
fn detect(&self, image: &[u8], width: usize, height: usize) -> Vec<(u32, [Point2f; 4])> {
if image.len() != width * height * 3 {
return Vec::new();
}
let gray: Vec<u8> = image
.chunks_exact(3)
.map(|c| {
let r = c[0] as u32;
let g = c[1] as u32;
let b = c[2] as u32;
((r * 77 + g * 150 + b * 29) >> 8) as u8
})
.collect();
self.find_quads(&gray, width, height)
}
fn find_quads(&self, gray: &[u8], width: usize, height: usize) -> Vec<(u32, [Point2f; 4])> {
let threshold = 128u8;
let min_side = (self.config.min_marker_area_px.sqrt() as usize).max(4);
let mut results = Vec::new();
let mut visited = vec![false; width * height];
let mut id_counter = 0u32;
let mut y = 1;
while y + 1 < height {
let mut x = 1;
while x + 1 < width {
let idx = y * width + x;
if visited[idx] || gray[idx] < threshold {
x += 1;
continue;
}
let (min_x, min_y, max_x, max_y, count) =
Self::flood_region(gray, &mut visited, x, y, width, height, threshold);
let w = max_x.saturating_sub(min_x);
let h = max_y.saturating_sub(min_y);
let area = (w * h) as f32;
if w >= min_side && h >= min_side && area >= self.config.min_marker_area_px {
let fill_ratio = count as f32 / area.max(1.0);
if fill_ratio > 0.5 {
let corners = [
Point2f::new(min_x as f32, min_y as f32),
Point2f::new(max_x as f32, min_y as f32),
Point2f::new(max_x as f32, max_y as f32),
Point2f::new(min_x as f32, max_y as f32),
];
results.push((id_counter % 50, corners));
id_counter += 1;
if results.len() >= self.config.max_markers {
return results;
}
}
}
x += 1;
}
y += 1;
}
results
}
fn flood_region(
gray: &[u8],
visited: &mut [bool],
start_x: usize,
start_y: usize,
width: usize,
height: usize,
threshold: u8,
) -> (usize, usize, usize, usize, usize) {
let mut stack = Vec::new();
stack.push((start_x, start_y));
let mut min_x = start_x;
let mut min_y = start_y;
let mut max_x = start_x;
let mut max_y = start_y;
let mut count = 0usize;
while let Some((cx, cy)) = stack.pop() {
let idx = cy * width + cx;
if visited[idx] {
continue;
}
visited[idx] = true;
if gray[idx] < threshold {
continue;
}
count += 1;
min_x = min_x.min(cx);
min_y = min_y.min(cy);
max_x = max_x.max(cx);
max_y = max_y.max(cy);
if cx + 1 < width {
stack.push((cx + 1, cy));
}
if cx > 0 {
stack.push((cx - 1, cy));
}
if cy + 1 < height {
stack.push((cx, cy + 1));
}
if cy > 0 {
stack.push((cx, cy - 1));
}
}
(min_x, min_y, max_x, max_y, count)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CameraIntrinsics {
pub fx: f64,
pub fy: f64,
pub cx: f64,
pub cy: f64,
}
impl CameraIntrinsics {
#[must_use]
pub fn new(fx: f64, fy: f64, cx: f64, cy: f64) -> Self {
Self { fx, fy, cx, cy }
}
#[must_use]
pub fn from_image_size(width: usize, height: usize) -> Self {
let cx = width as f64 / 2.0;
let cy = height as f64 / 2.0;
let fx = cx / (35.0_f64.to_radians().tan());
let fy = fx;
Self { fx, fy, cx, cy }
}
}
struct PoseEstimator {
intrinsics: CameraIntrinsics,
marker_size_m: f64,
}
impl PoseEstimator {
fn new(intrinsics: CameraIntrinsics, marker_size_m: f64) -> Self {
Self {
intrinsics,
marker_size_m,
}
}
fn estimate_pose(&self, corners: &[Point2f; 4]) -> ([f64; 3], [[f64; 3]; 3]) {
let half = self.marker_size_m / 2.0;
let obj_pts = [
[-half, half, 0.0],
[half, half, 0.0],
[half, -half, 0.0],
[-half, -half, 0.0],
];
let h = self.dlt_homography(corners, &obj_pts);
self.decompose_homography(&h)
}
fn dlt_homography(&self, img_pts: &[Point2f; 4], obj_pts: &[[f64; 3]; 4]) -> [[f64; 9]; 1] {
let fx = self.intrinsics.fx;
let fy = self.intrinsics.fy;
let cx = self.intrinsics.cx;
let cy = self.intrinsics.cy;
let mut a = [[0.0f64; 9]; 8];
for (i, (img, obj)) in img_pts.iter().zip(obj_pts.iter()).enumerate() {
let xn = (img.x as f64 - cx) / fx;
let yn = (img.y as f64 - cy) / fy;
let ox = obj[0];
let oy = obj[1];
a[2 * i] = [ox, oy, 1.0, 0.0, 0.0, 0.0, -xn * ox, -xn * oy, -xn];
a[2 * i + 1] = [0.0, 0.0, 0.0, ox, oy, 1.0, -yn * ox, -yn * oy, -yn];
}
let h = Self::solve_homogeneous_8x9(&a);
[h]
}
fn solve_homogeneous_8x9(a: &[[f64; 9]; 8]) -> [f64; 9] {
let mut ata = [[0.0f64; 9]; 9];
for i in 0..9 {
for j in 0..9 {
for k in 0..8 {
ata[i][j] += a[k][i] * a[k][j];
}
}
}
let mut v = [1.0f64; 9];
let norm = v.iter().map(|x| x * x).sum::<f64>().sqrt().max(1e-15);
for x in &mut v {
*x /= norm;
}
for _ in 0..50 {
let mut new_v = [0.0f64; 9];
for i in 0..9 {
for j in 0..9 {
new_v[i] += ata[i][j] * v[j];
}
}
let shift = new_v.iter().zip(v.iter()).map(|(a, b)| a * b).sum::<f64>();
for i in 0..9 {
new_v[i] -= shift * v[i];
}
let new_norm = new_v.iter().map(|x| x * x).sum::<f64>().sqrt().max(1e-15);
for i in 0..9 {
v[i] = new_v[i] / new_norm;
}
}
v
}
fn decompose_homography(&self, h: &[[f64; 9]; 1]) -> ([f64; 3], [[f64; 3]; 3]) {
let hv = &h[0];
let r1 = [hv[0], hv[3], hv[6]];
let r2 = [hv[1], hv[4], hv[7]];
let t = [hv[2], hv[5], hv[8]];
let norm1 = (r1[0] * r1[0] + r1[1] * r1[1] + r1[2] * r1[2])
.sqrt()
.max(1e-15);
let norm2 = (r2[0] * r2[0] + r2[1] * r2[1] + r2[2] * r2[2])
.sqrt()
.max(1e-15);
let scale = (norm1 + norm2) / 2.0;
let r1n = [r1[0] / norm1, r1[1] / norm1, r1[2] / norm1];
let r2n = [r2[0] / norm2, r2[1] / norm2, r2[2] / norm2];
let r3n = [
r1n[1] * r2n[2] - r1n[2] * r2n[1],
r1n[2] * r2n[0] - r1n[0] * r2n[2],
r1n[0] * r2n[1] - r1n[1] * r2n[0],
];
let rotation = [r1n, r2n, r3n];
let translation = [t[0] / scale, t[1] / scale, t[2] / scale];
(translation, rotation)
}
}
pub struct ArOverlay {
config: ArOverlayConfig,
detector: MarkerDetector,
anchored_objects: Vec<AnchoredObject>,
last_poses: Vec<MarkerPose>,
frame_count: u64,
}
impl ArOverlay {
pub fn new() -> Result<Self> {
Self::with_config(ArOverlayConfig::default())
}
pub fn with_config(config: ArOverlayConfig) -> Result<Self> {
let detector = MarkerDetector::new(config.clone());
Ok(Self {
config,
detector,
anchored_objects: Vec::new(),
last_poses: Vec::new(),
frame_count: 0,
})
}
pub fn add_object(&mut self, obj: AnchoredObject) {
self.anchored_objects.push(obj);
}
pub fn remove_object(&mut self, name: &str) {
self.anchored_objects.retain(|o| o.name != name);
}
pub fn process_frame(
&mut self,
image: &[u8],
width: usize,
height: usize,
intrinsics: &CameraIntrinsics,
) -> Result<Vec<MarkerPose>> {
if image.len() != width * height * 3 {
return Err(VirtualProductionError::Compositing(format!(
"Image size mismatch: expected {}, got {}",
width * height * 3,
image.len()
)));
}
let detections = self.detector.detect(image, width, height);
let estimator = PoseEstimator::new(intrinsics.clone(), self.config.marker_size_m);
let mut poses: Vec<MarkerPose> = detections
.into_iter()
.map(|(id, corners)| {
let (translation, rotation) = estimator.estimate_pose(&corners);
let mut pose = MarkerPose::new(id, corners);
pose.translation = translation;
pose.rotation = rotation;
pose
})
.collect();
poses.sort_by_key(|p| p.id);
self.last_poses = poses.clone();
self.frame_count += 1;
Ok(poses)
}
#[must_use]
pub fn visible_object_positions(&self) -> Vec<(&AnchoredObject, [f64; 3])> {
let mut result = Vec::new();
for obj in &self.anchored_objects {
if !obj.visible {
continue;
}
if let Some(pose) = self.last_poses.iter().find(|p| p.id == obj.marker_id) {
let pos = obj.world_position(pose);
result.push((obj, pos));
}
}
result
}
pub fn composite_overlay(
&self,
camera_rgb: &[u8],
width: usize,
height: usize,
) -> Result<Vec<u8>> {
if camera_rgb.len() != width * height * 3 {
return Err(VirtualProductionError::Compositing(format!(
"Frame size mismatch: expected {}, got {}",
width * height * 3,
camera_rgb.len()
)));
}
let mut rgba = Vec::with_capacity(width * height * 4);
for chunk in camera_rgb.chunks_exact(3) {
rgba.push(chunk[0]);
rgba.push(chunk[1]);
rgba.push(chunk[2]);
rgba.push(255);
}
let alpha = (self.config.overlay_alpha * 255.0) as u8;
for pose in &self.last_poses {
let corners = &pose.corners;
for i in 0..4 {
let c0 = corners[i];
let c1 = corners[(i + 1) % 4];
self.draw_line_rgba(&mut rgba, width, height, c0, c1, [0, 255, 0, alpha]);
}
}
Ok(rgba)
}
fn draw_line_rgba(
&self,
rgba: &mut [u8],
width: usize,
height: usize,
p0: Point2f,
p1: Point2f,
color: [u8; 4],
) {
let mut x0 = p0.x as i32;
let mut y0 = p0.y as i32;
let x1 = p1.x as i32;
let y1 = p1.y as i32;
let dx = (x1 - x0).abs();
let dy = -(y1 - y0).abs();
let sx: i32 = if x0 < x1 { 1 } else { -1 };
let sy: i32 = if y0 < y1 { 1 } else { -1 };
let mut err = dx + dy;
loop {
if x0 >= 0 && y0 >= 0 && (x0 as usize) < width && (y0 as usize) < height {
let idx = (y0 as usize * width + x0 as usize) * 4;
rgba[idx] = color[0];
rgba[idx + 1] = color[1];
rgba[idx + 2] = color[2];
rgba[idx + 3] = color[3];
}
if x0 == x1 && y0 == y1 {
break;
}
let e2 = 2 * err;
if e2 >= dy {
err += dy;
x0 += sx;
}
if e2 <= dx {
err += dx;
y0 += sy;
}
}
}
#[must_use]
pub fn config(&self) -> &ArOverlayConfig {
&self.config
}
#[must_use]
pub fn frame_count(&self) -> u64 {
self.frame_count
}
#[must_use]
pub fn objects(&self) -> &[AnchoredObject] {
&self.anchored_objects
}
#[must_use]
pub fn last_poses(&self) -> &[MarkerPose] {
&self.last_poses
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_test_image(w: usize, h: usize) -> Vec<u8> {
let mut img = vec![0u8; w * h * 3];
for y in 10..30 {
for x in 10..30 {
if x < w && y < h {
let idx = (y * w + x) * 3;
img[idx] = 255;
img[idx + 1] = 255;
img[idx + 2] = 255;
}
}
}
img
}
#[test]
fn test_ar_overlay_creation() {
let ar = ArOverlay::new();
assert!(ar.is_ok());
}
#[test]
fn test_ar_overlay_with_config() {
let config = ArOverlayConfig {
dictionary: MarkerDictionary::Aruco6x6_250,
marker_size_m: 0.2,
min_marker_area_px: 200.0,
max_markers: 8,
overlay_alpha: 0.8,
};
let ar = ArOverlay::with_config(config);
assert!(ar.is_ok());
}
#[test]
fn test_add_and_remove_object() {
let mut ar = ArOverlay::new().expect("should create");
ar.add_object(AnchoredObject::new("cube", 0));
ar.add_object(AnchoredObject::new("sphere", 1));
assert_eq!(ar.objects().len(), 2);
ar.remove_object("cube");
assert_eq!(ar.objects().len(), 1);
assert_eq!(ar.objects()[0].name, "sphere");
}
#[test]
fn test_process_frame_empty_image() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(64, 64);
let img = vec![0u8; 64 * 64 * 3];
let poses = ar.process_frame(&img, 64, 64, &intrinsics);
assert!(poses.is_ok());
assert_eq!(poses.expect("ok").len(), 0);
}
#[test]
fn test_process_frame_with_marker() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(64, 64);
let img = make_test_image(64, 64);
let poses = ar.process_frame(&img, 64, 64, &intrinsics);
assert!(poses.is_ok());
let poses = poses.expect("ok");
assert!(!poses.is_empty(), "should detect the white square marker");
}
#[test]
fn test_process_frame_size_mismatch() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(64, 64);
let bad_img = vec![0u8; 10]; let result = ar.process_frame(&bad_img, 64, 64, &intrinsics);
assert!(result.is_err());
}
#[test]
fn test_frame_count_increments() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(32, 32);
let img = vec![0u8; 32 * 32 * 3];
assert_eq!(ar.frame_count(), 0);
ar.process_frame(&img, 32, 32, &intrinsics)
.expect("should succeed");
ar.process_frame(&img, 32, 32, &intrinsics)
.expect("should succeed");
assert_eq!(ar.frame_count(), 2);
}
#[test]
fn test_composite_overlay() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(64, 64);
let img = make_test_image(64, 64);
ar.process_frame(&img, 64, 64, &intrinsics)
.expect("should succeed");
let result = ar.composite_overlay(&img, 64, 64);
assert!(result.is_ok());
let rgba = result.expect("ok");
assert_eq!(rgba.len(), 64 * 64 * 4);
}
#[test]
fn test_composite_overlay_size_mismatch() {
let ar = ArOverlay::new().expect("should create");
let bad_img = vec![0u8; 10];
let result = ar.composite_overlay(&bad_img, 64, 64);
assert!(result.is_err());
}
#[test]
fn test_marker_pose_centroid() {
let corners = [
Point2f::new(0.0, 0.0),
Point2f::new(10.0, 0.0),
Point2f::new(10.0, 10.0),
Point2f::new(0.0, 10.0),
];
let pose = MarkerPose::new(0, corners);
let centroid = pose.centroid();
assert!((centroid.x - 5.0).abs() < 1e-5);
assert!((centroid.y - 5.0).abs() < 1e-5);
}
#[test]
fn test_marker_pose_pixel_size() {
let corners = [
Point2f::new(0.0, 0.0),
Point2f::new(10.0, 0.0),
Point2f::new(10.0, 10.0),
Point2f::new(0.0, 10.0),
];
let pose = MarkerPose::new(0, corners);
let size = pose.pixel_size();
assert!(
(size - 10.0).abs() < 1e-4,
"pixel size should be 10: {size}"
);
}
#[test]
fn test_anchored_object_world_position_identity() {
let obj = AnchoredObject::new("cube", 0).with_offset(1.0, 0.0, 0.0);
let pose = MarkerPose::new(
0,
[
Point2f::new(0.0, 0.0),
Point2f::new(1.0, 0.0),
Point2f::new(1.0, 1.0),
Point2f::new(0.0, 1.0),
],
);
let mut p = pose;
p.translation = [2.0, 3.0, 4.0];
let world = obj.world_position(&p);
assert!((world[0] - 3.0).abs() < 1e-9);
assert!((world[1] - 3.0).abs() < 1e-9);
assert!((world[2] - 4.0).abs() < 1e-9);
}
#[test]
fn test_visible_object_positions_no_poses() {
let mut ar = ArOverlay::new().expect("should create");
ar.add_object(AnchoredObject::new("cube", 42));
let positions = ar.visible_object_positions();
assert!(positions.is_empty());
}
#[test]
fn test_visible_object_positions_with_detected_marker() {
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(64, 64);
let img = make_test_image(64, 64);
let poses = ar
.process_frame(&img, 64, 64, &intrinsics)
.expect("should succeed");
if let Some(first_pose) = poses.first() {
ar.add_object(AnchoredObject::new("cube", first_pose.id));
let positions = ar.visible_object_positions();
assert!(!positions.is_empty());
}
}
#[test]
fn test_camera_intrinsics_from_image_size() {
let intr = CameraIntrinsics::from_image_size(1920, 1080);
assert!((intr.cx - 960.0).abs() < 1e-6);
assert!((intr.cy - 540.0).abs() < 1e-6);
assert!(intr.fx > 0.0);
}
#[test]
fn test_anchored_object_builder() {
let obj = AnchoredObject::new("box", 5)
.with_offset(0.1, 0.2, 0.3)
.with_scale(2.0)
.with_visible(false);
assert_eq!(obj.marker_id, 5);
assert!((obj.offset[0] - 0.1).abs() < 1e-9);
assert!((obj.scale - 2.0).abs() < 1e-9);
assert!(!obj.visible);
}
#[test]
fn test_multiple_markers_detected() {
let w = 128usize;
let h = 64usize;
let mut img = vec![0u8; w * h * 3];
for y in 5..25 {
for x in 5..25 {
let idx = (y * w + x) * 3;
img[idx] = 255;
img[idx + 1] = 255;
img[idx + 2] = 255;
}
}
for y in 5..25 {
for x in 80..100 {
let idx = (y * w + x) * 3;
img[idx] = 255;
img[idx + 1] = 255;
img[idx + 2] = 255;
}
}
let mut ar = ArOverlay::new().expect("should create");
let intrinsics = CameraIntrinsics::from_image_size(w, h);
let poses = ar
.process_frame(&img, w, h, &intrinsics)
.expect("should succeed");
assert!(poses.len() >= 2, "expected ≥2 markers, got {}", poses.len());
}
}