use crate::results::Results;
use crate::visualizer::color::{COLORS, POSE_COLORS};
use crate::visualizer::skeleton::{KPT_COLOR_INDICES, LIMB_COLOR_INDICES, SKELETON};
use ab_glyph::{Font, FontRef, PxScale, ScaleFont};
use image::{DynamicImage, Rgb};
use imageproc::drawing::{draw_filled_rect_mut, draw_hollow_rect_mut, draw_text_mut};
use imageproc::rect::Rect;
use std::fs::{self, File};
use std::io::{self, BufReader, Read};
use std::path::{Path, PathBuf};
const ASSETS_URL: &str = "https://github.com/ultralytics/assets/releases/download/v0.0.0";
#[must_use]
pub const fn get_class_color(class_id: usize) -> Rgb<u8> {
let color = COLORS[class_id % COLORS.len()];
Rgb(color)
}
#[must_use]
pub fn find_next_run_dir(base: &str, prefix: &str) -> String {
let base_path = Path::new(base);
let first = base_path.join(prefix);
if !first.exists() {
return first.to_string_lossy().to_string();
}
for i in 2.. {
let numbered = base_path.join(format!("{prefix}{i}"));
if !numbered.exists() {
return numbered.to_string_lossy().to_string();
}
}
base_path.join(prefix).to_string_lossy().to_string()
}
#[allow(clippy::missing_errors_doc)]
pub fn load_image(path: &str) -> image::ImageResult<DynamicImage> {
let path_obj = Path::new(path);
let ext = path_obj
.extension()
.and_then(|e| e.to_str())
.map(str::to_lowercase);
if let Some("jpg" | "jpeg") = ext.as_deref()
&& let Ok(file) = File::open(path)
{
let mut decoder = jpeg_decoder::Decoder::new(BufReader::new(file));
if let Ok(pixels) = decoder.decode()
&& let Some(metadata) = decoder.info()
{
let width = u32::from(metadata.width);
let height = u32::from(metadata.height);
match metadata.pixel_format {
jpeg_decoder::PixelFormat::RGB24 => {
if let Some(buffer) = image::ImageBuffer::from_raw(width, height, pixels) {
return Ok(DynamicImage::ImageRgb8(buffer));
}
}
jpeg_decoder::PixelFormat::L8 => {
if let Some(buffer) = image::ImageBuffer::from_raw(width, height, pixels) {
return Ok(DynamicImage::ImageLuma8(buffer));
}
}
_ => {}
}
}
}
image::open(path)
}
#[must_use]
pub fn check_font(font: &str) -> Option<PathBuf> {
let font_name = Path::new(font).file_name()?.to_string_lossy();
let config_dir = dirs::config_dir()?.join("Ultralytics");
let font_path = config_dir.join(font_name.as_ref());
if font_path.exists() {
return Some(font_path);
}
if let Err(e) = fs::create_dir_all(&config_dir) {
eprintln!("Failed to create config directory: {e}");
return None;
}
let url = format!("{ASSETS_URL}/{font_name}");
println!("Downloading {url} to {}", font_path.display());
match ureq::get(&url).call() {
Ok(response) => {
let mut file = match File::create(&font_path) {
Ok(f) => f,
Err(e) => {
eprintln!("Failed to create font file: {e}");
return None;
}
};
let mut reader = response.into_body().into_reader();
if let Err(e) = io::copy(&mut reader, &mut file) {
eprintln!("Failed to download font: {e}");
let _ = fs::remove_file(&font_path);
return None;
}
Some(font_path)
}
Err(e) => {
eprintln!("Failed to download font from {url}: {e}");
None
}
}
}
#[must_use]
pub fn annotate_image(
image: &DynamicImage,
result: &Results,
top_k: Option<usize>,
) -> DynamicImage {
let mut img = image.to_rgb8();
let font_name = if result.boxes.is_some() && result.names.values().any(|n| !n.is_ascii()) {
"Arial.Unicode.ttf"
} else {
"Arial.ttf"
};
let font_path = check_font(font_name);
let font_data = font_path.and_then(|path| {
let mut file = File::open(path).ok()?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer).ok()?;
Some(buffer)
});
let font = font_data
.as_ref()
.and_then(|data| FontRef::try_from_slice(data).ok());
draw_detection(&mut img, result, font.as_ref());
draw_pose(&mut img, result, None, None, None);
draw_obb(&mut img, result, font.as_ref());
draw_classification(&mut img, result, font.as_ref(), top_k.unwrap_or(5));
DynamicImage::ImageRgb8(img)
}
#[allow(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap
)]
fn draw_line_segment(
img: &mut image::RgbImage,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
color: Rgb<u8>,
thickness: i32,
) {
#[allow(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap
)]
let (width, height) = img.dimensions();
let dx = (x2 - x1).abs();
let dy = (y2 - y1).abs();
let sx = if x1 < x2 { 1.0 } else { -1.0 };
let sy = if y1 < y2 { 1.0 } else { -1.0 };
let mut err = dx - dy;
let mut x = x1;
let mut y = y1;
loop {
let half_t = thickness / 2;
for tx in -half_t..=half_t {
for ty in -half_t..=half_t {
let px = (x as i32 + tx).max(0).min(width as i32 - 1) as u32;
let py = (y as i32 + ty).max(0).min(height as i32 - 1) as u32;
img.put_pixel(px, py, color);
}
}
if (x - x2).abs() < 1.0 && (y - y2).abs() < 1.0 {
break;
}
let e2 = 2.0 * err;
if e2 > -dy {
err -= dy;
x += sx;
}
if e2 < dx {
err += dx;
y += sy;
}
}
}
#[allow(
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
clippy::cast_possible_wrap
)]
fn draw_filled_circle(img: &mut image::RgbImage, cx: i32, cy: i32, radius: i32, color: Rgb<u8>) {
let (width, height) = img.dimensions();
for y in (cy - radius)..=(cy + radius) {
for x in (cx - radius)..=(cx + radius) {
let dx = x - cx;
let dy = y - cy;
if dx * dx + dy * dy <= radius * radius
&& x >= 0
&& y >= 0
&& x < width as i32
&& y < height as i32
{
img.put_pixel(x as u32, y as u32, color);
}
}
}
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_wrap,
clippy::manual_clamp,
clippy::too_many_lines
)]
fn draw_detection(img: &mut image::RgbImage, result: &Results, font: Option<&FontRef>) {
let (width, height) = img.dimensions();
let max_dim = width.max(height) as f32;
let scale_factor = (max_dim / 640.0).max(1.0);
let thickness = (1.0 * scale_factor).round().max(1.0) as i32;
let font_scale = (11.0 * scale_factor).max(10.0);
if let Some(ref boxes) = result.boxes {
let xyxy = boxes.xyxy();
let conf = boxes.conf();
let cls = boxes.cls();
let mut overlay = img.clone();
let mut mask_present = false;
if let Some(ref masks) = result.masks {
let mask_n = masks.data.dim().0;
for i in 0..boxes.len() {
if i >= mask_n {
break;
}
let class_id = cls[i] as usize;
let color = get_class_color(class_id);
let (r, g, b) = (color.0[0], color.0[1], color.0[2]);
mask_present = true;
let x1 = xyxy[[i, 0]].max(0.0).min(width as f32) as u32;
let y1 = xyxy[[i, 1]].max(0.0).min(height as f32) as u32;
let x2 = xyxy[[i, 2]].max(0.0).min(width as f32) as u32;
let y2 = xyxy[[i, 3]].max(0.0).min(height as f32) as u32;
for y in y1..y2 {
for x in x1..x2 {
if masks.data[[i, y as usize, x as usize]] > 0.5 {
let pixel = overlay.get_pixel_mut(x, y);
pixel.0[0] = r;
pixel.0[1] = g;
pixel.0[2] = b;
}
}
}
}
}
if mask_present {
let alpha = 0.3;
for y in 0..height {
for x in 0..width {
let p_img = img.get_pixel_mut(x, y);
let p_overlay = overlay.get_pixel(x, y);
p_img.0[0] = f32::from(p_overlay.0[0])
.mul_add(alpha, f32::from(p_img.0[0]) * (1.0 - alpha))
as u8;
p_img.0[1] = f32::from(p_overlay.0[1])
.mul_add(alpha, f32::from(p_img.0[1]) * (1.0 - alpha))
as u8;
p_img.0[2] = f32::from(p_overlay.0[2])
.mul_add(alpha, f32::from(p_img.0[2]) * (1.0 - alpha))
as u8;
}
}
}
let mut labels_rects: Vec<Rect> = Vec::new();
for i in 0..boxes.len() {
let class_id = cls[i] as usize;
let confidence = conf[i];
let mut x1 = xyxy[[i, 0]].round() as i32;
let mut y1 = xyxy[[i, 1]].round() as i32;
let mut x2 = xyxy[[i, 2]].round() as i32;
let mut y2 = xyxy[[i, 3]].round() as i32;
if x1 > x2 {
std::mem::swap(&mut x1, &mut x2);
}
if y1 > y2 {
std::mem::swap(&mut y1, &mut y2);
}
x1 = x1.max(0).min(width as i32 - 1);
y1 = y1.max(0).min(height as i32 - 1);
x2 = x2.max(0).min(width as i32 - 1);
y2 = y2.max(0).min(height as i32 - 1);
if x2 <= x1 || y2 <= y1 {
continue;
}
let color = get_class_color(class_id);
for t in 0..thickness {
let tx1 = (x1 + t).min(x2);
let ty1 = (y1 + t).min(y2);
let tx2 = (x2 - t).max(tx1);
let ty2 = (y2 - t).max(ty1);
if tx2 > tx1 && ty2 > ty1 {
let rect = Rect::at(tx1, ty1).of_size((tx2 - tx1) as u32, (ty2 - ty1) as u32);
draw_hollow_rect_mut(img, rect, color);
}
}
let class_name = result.names.get(&class_id).map_or("object", String::as_str);
let label = format!(" {class_name} {confidence:.2} ");
if let Some(ref f) = font {
let scale = PxScale::from(font_scale);
let scaled_font = f.as_scaled(scale);
let mut text_w = 0.0;
for c in label.chars() {
text_w += scaled_font.h_advance(scaled_font.glyph_id(c));
}
let text_w = text_w.ceil() as i32;
let text_h = scale.y.ceil() as i32;
let mut text_x = x1;
let mut text_y = y1 - text_h;
if text_y < 0 {
text_y = y1;
}
if text_x < 0 {
text_x = 0;
}
if text_x + text_w >= width as i32 {
text_x = width as i32 - text_w - 1;
}
if text_y + text_h >= height as i32 {
text_y = height as i32 - text_h - 1;
}
let mut attempts = 0;
let max_attempts = 10;
let mut current_rect =
Rect::at(text_x, text_y).of_size(text_w as u32, text_h as u32);
'placement: while attempts < max_attempts {
if !labels_rects
.iter()
.any(|existing| rect_intersect(¤t_rect, existing))
{
break 'placement;
}
text_y += text_h;
if text_y + text_h >= height as i32 {
text_y = y1 - text_h;
if text_y < 0 {
text_y = y1;
}
text_x += 10; if text_x + text_w >= width as i32 {
break 'placement;
}
}
current_rect = Rect::at(text_x, text_y).of_size(text_w as u32, text_h as u32);
attempts += 1;
}
labels_rects.push(current_rect);
if text_x >= 0
&& text_y >= 0
&& text_x + text_w < width as i32
&& text_y + text_h < height as i32
{
draw_filled_rect_mut(img, current_rect, color);
let text_color = get_text_color(color);
draw_text_mut(img, text_color, text_x, text_y, scale, f, &label);
}
}
}
}
}
fn get_text_color(bg: Rgb<u8>) -> Rgb<u8> {
let r = f32::from(bg.0[0]);
let g = f32::from(bg.0[1]);
let b = f32::from(bg.0[2]);
let luminance = 0.114_f32.mul_add(b, 0.299_f32.mul_add(r, 0.587 * g));
if luminance > 150.0 {
Rgb([0, 0, 0]) } else {
Rgb([255, 255, 255]) }
}
fn rect_intersect(r1: &Rect, r2: &Rect) -> bool {
let r1_left = r1.left();
let r1_right = r1.right();
let r1_top = r1.top();
let r1_bottom = r1.bottom();
let r2_left = r2.left();
let r2_right = r2.right();
let r2_top = r2.top();
let r2_bottom = r2.bottom();
!(r2_left >= r1_right || r2_right <= r1_left || r2_top >= r1_bottom || r2_bottom <= r1_top)
}
#[allow(
clippy::doc_overindented_list_items,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_wrap
)]
fn draw_pose(
img: &mut image::RgbImage,
result: &Results,
skeleton: Option<&[[usize; 2]]>,
limb_colors: Option<&[usize]>,
kpt_colors: Option<&[usize]>,
) {
let (width, height) = img.dimensions();
let max_dim = width.max(height) as f32;
let scale_factor = (max_dim / 640.0).max(1.0);
let thickness = (1.0 * scale_factor).round().max(1.0) as i32;
let radius = (3.0 * scale_factor).round() as i32;
if let Some(ref keypoints) = result.keypoints {
let skeleton = skeleton.unwrap_or(&SKELETON);
let limb_colors = limb_colors.unwrap_or(&LIMB_COLOR_INDICES);
let kpt_colors = kpt_colors.unwrap_or(&KPT_COLOR_INDICES);
let kpt_data = &keypoints.data;
let n_persons = kpt_data.shape()[0];
let n_kpts = kpt_data.shape()[1];
for person_idx in 0..n_persons {
for (limb_idx, &[kpt_a, kpt_b]) in skeleton.iter().enumerate() {
if kpt_a >= n_kpts || kpt_b >= n_kpts {
continue;
}
let x1 = kpt_data[[person_idx, kpt_a, 0]];
let y1 = kpt_data[[person_idx, kpt_a, 1]];
let conf1 = kpt_data[[person_idx, kpt_a, 2]];
let x2 = kpt_data[[person_idx, kpt_b, 0]];
let y2 = kpt_data[[person_idx, kpt_b, 1]];
let conf2 = kpt_data[[person_idx, kpt_b, 2]];
if conf1 > 0.5 && conf2 > 0.5 {
let color_idx = limb_colors[limb_idx % limb_colors.len()];
let color = Rgb(POSE_COLORS[color_idx]);
draw_line_segment(img, x1, y1, x2, y2, color, thickness);
}
}
for kpt_idx in 0..n_kpts {
let x = kpt_data[[person_idx, kpt_idx, 0]];
let y = kpt_data[[person_idx, kpt_idx, 1]];
let conf = kpt_data[[person_idx, kpt_idx, 2]];
if conf > 0.5 && x >= 0.0 && y >= 0.0 && x < width as f32 && y < height as f32 {
let color_idx = kpt_colors[kpt_idx % kpt_colors.len()];
let color = Rgb(POSE_COLORS[color_idx]);
draw_filled_circle(img, x as i32, y as i32, radius, color);
}
}
}
}
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_wrap,
clippy::manual_clamp
)]
fn draw_obb(img: &mut image::RgbImage, result: &Results, font: Option<&FontRef>) {
let (width, height) = img.dimensions();
let max_dim = width.max(height) as f32;
let scale_factor = (max_dim / 640.0).max(1.0);
let thickness = (1.0 * scale_factor).round().max(1.0) as i32;
let font_scale = (11.0 * scale_factor).max(10.0);
if let Some(ref obb) = result.obb {
let corners = obb.xyxyxyxy();
let conf = obb.conf();
let cls = obb.cls();
let mut labels_rects: Vec<Rect> = Vec::new();
for i in 0..obb.len() {
let class_id = cls[i] as usize;
let color = get_class_color(class_id);
for j in 0..4 {
let next_j = (j + 1) % 4;
let x1 = corners[[i, j, 0]];
let y1 = corners[[i, j, 1]];
let x2 = corners[[i, next_j, 0]];
let y2 = corners[[i, next_j, 1]];
draw_line_segment(img, x1, y1, x2, y2, color, thickness);
}
let class_name = result.names.get(&class_id).map_or("object", String::as_str);
let label = format!(" {} {:.2} ", class_name, conf[i]);
if let Some(ref f) = font {
let scale = PxScale::from(font_scale);
let scaled_font = f.as_scaled(scale);
let mut text_w = 0.0;
for c in label.chars() {
text_w += scaled_font.h_advance(scaled_font.glyph_id(c));
}
let text_w = text_w.ceil() as i32;
let text_h = scale.y.ceil() as i32;
let mut text_x = corners[[i, 0, 0]] as i32;
let mut text_y = (corners[[i, 0, 1]] as i32 - text_h).max(0);
if text_x < 0 {
text_x = 0;
}
if text_x + text_w >= width as i32 {
text_x = width as i32 - text_w - 1;
}
if text_y + text_h >= height as i32 {
text_y = height as i32 - text_h - 1;
}
let mut attempts = 0;
let max_attempts = 10;
let mut current_rect =
Rect::at(text_x, text_y).of_size(text_w as u32, text_h as u32);
'placement: while attempts < max_attempts {
if !labels_rects
.iter()
.any(|existing| rect_intersect(¤t_rect, existing))
{
break 'placement;
}
text_y += text_h;
if text_y + text_h >= height as i32 {
text_y = (corners[[i, 0, 1]] as i32 - text_h).max(0);
text_x += 10; if text_x + text_w >= width as i32 {
break 'placement;
}
}
current_rect = Rect::at(text_x, text_y).of_size(text_w as u32, text_h as u32);
attempts += 1;
}
labels_rects.push(current_rect);
if text_x >= 0
&& text_y >= 0
&& text_x + text_w < width as i32
&& text_y + text_h < height as i32
{
draw_filled_rect_mut(img, current_rect, color);
let text_color = get_text_color(color);
draw_text_mut(img, text_color, text_x, text_y, scale, f, &label);
}
}
}
}
}
#[allow(
clippy::many_single_char_names,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_wrap,
clippy::manual_clamp
)]
fn draw_transparent_rect(
img: &mut image::RgbImage,
x: i32,
y: i32,
w: u32,
h: u32,
color: Rgb<u8>,
alpha: f32,
) {
let (width, height) = img.dimensions();
let alpha = alpha.max(0.0).min(1.0);
let inv_alpha = 1.0 - alpha;
let r = f32::from(color[0]);
let g = f32::from(color[1]);
let b = f32::from(color[2]);
for dy in 0..h {
let py = y + dy as i32;
if py < 0 || py >= height as i32 {
continue;
}
for dx in 0..w {
let px = x + dx as i32;
if px < 0 || px >= width as i32 {
continue;
}
let pixel = img.get_pixel_mut(px as u32, py as u32);
let current = pixel.0;
let new_r = f32::from(current[0]).mul_add(inv_alpha, r * alpha) as u8;
let new_g = f32::from(current[1]).mul_add(inv_alpha, g * alpha) as u8;
let new_b = f32::from(current[2]).mul_add(inv_alpha, b * alpha) as u8;
*pixel = Rgb([new_r, new_g, new_b]);
}
}
}
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_possible_wrap,
clippy::manual_clamp
)]
fn draw_classification(
img: &mut image::RgbImage,
result: &Results,
font: Option<&FontRef>,
top_k: usize,
) {
if let Some(ref probs) = result.probs {
let top_indices = probs.top_k(top_k);
let (width, _height) = img.dimensions();
if let Some(ref f) = font {
let scale_factor = (width as f32 / 600.0).max(0.6).min(2.0);
let base_size = 30.0;
let scale = PxScale::from(base_size * scale_factor);
let line_height = (scale.y * 1.2) as i32;
let x_pos = (20.0 * scale_factor) as i32;
let mut y_pos = (20.0 * scale_factor) as i32;
let mut max_width = 0;
let mut entries = Vec::new();
for &class_id in &top_indices {
let score = probs.data[class_id];
if score < 0.01 {
continue;
}
let class_name = result.names.get(&class_id).map_or("class", String::as_str);
let label = format!("{class_name} {score:.2}");
entries.push(label);
}
for label in &entries {
let w = (label.len() as f32 * scale.x * 0.5) as u32;
if w > max_width {
max_width = w;
}
}
let box_height = (entries.len() as i32 * line_height) + 10;
let box_width = max_width + 20;
if !entries.is_empty() {
draw_transparent_rect(
img,
x_pos - 5,
y_pos - 5,
box_width,
box_height as u32,
Rgb([0, 0, 0]),
0.4, );
}
for label in entries {
draw_text_mut(img, Rgb([255, 255, 255]), x_pos, y_pos, scale, f, &label);
y_pos += line_height;
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::results::Speed;
use ndarray::Array3;
use std::collections::HashMap;
#[test]
fn test_get_class_color() {
let c = get_class_color(0);
assert_eq!(c, Rgb([4, 42, 255]));
}
#[test]
fn test_draw_line_segment() {
let mut img = image::RgbImage::new(100, 100);
let color = Rgb([255, 0, 0]);
draw_line_segment(&mut img, 10.0, 10.0, 50.0, 10.0, color, 1);
assert_eq!(*img.get_pixel(30, 10), color);
assert_eq!(*img.get_pixel(30, 20), Rgb([0, 0, 0]));
}
#[test]
fn test_annotate_image_empty() {
let img = DynamicImage::new_rgb8(100, 100);
let orig_img = Array3::<u8>::zeros((100, 100, 3));
let path = "test.jpg".to_string();
let names = HashMap::new();
let speed = Speed::new(0.0, 0.0, 0.0);
let results = Results::new(orig_img, path, names, speed, (640, 640));
let annotated = annotate_image(&img, &results, None);
assert_eq!(annotated.width(), 100);
assert_eq!(annotated.height(), 100);
}
}