pub mod bbox;
pub mod color;
pub mod component;
pub mod config;
pub mod download;
pub mod detection;
pub mod element;
pub mod export;
pub mod icon_classifier;
pub mod merger;
pub mod object_detector;
pub mod preprocess;
pub mod text_detection;
pub use bbox::Bbox;
pub use color::{
detect_background_color, detect_colors, detect_dominant_color, detect_element_color,
};
pub use component::Component;
pub use config::Config;
pub use element::{
compute_prominence, prominence_label, AiOutput, CompactElement, Element, ElementPosition,
OutputResult, TreeNode, TreeOutput,
};
pub use export::{
draw_elements, draw_object_detections, save_ai_json, save_compact_json,
save_detection_tree_json, save_detection_tree_text, save_json,
save_object_detection_visualization, save_text_summary, save_tree_json, save_tree_text,
save_visualization,
};
pub use icon_classifier::IconClassifier;
pub use merger::{
check_containment, merge, merge_text_lines, reassign_ids, refine_elements, refine_texts,
remove_bottom_bar, remove_top_bar, synthesize_orphan_text_regions,
};
pub use object_detector::{build_detection_tree, Detection, DetectionBbox, DetectionNode};
pub use text_detection::{detect_text, TextResult};
use std::sync::atomic::{AtomicBool, Ordering};
static MODELS_INITIALIZED: AtomicBool = AtomicBool::new(false);
const MODEL_DIR_HELP: &str = "\n\
Expected directory structure:\n\
{models_dir}/\n\
object-detection/\n\
yoloe-26n-seg.onnx\n\
yoloe-26n_classes.txt\n\
icon-classifier/\n\
icon_classifier.onnx\n\
labels.json\n\
ocr-models/\n\
ppocrv5_mobile_det.onnx\n\
ppocrv5_mobile_rec.onnx\n\
ppocrv5_dict.txt";
pub fn init_models(models_dir: &str) -> anyhow::Result<()> {
if MODELS_INITIALIZED.load(Ordering::Relaxed) {
println!("[init_models] Already initialized, skipping");
return Ok(());
}
let models_path = std::path::Path::new(models_dir);
println!("[init_models] Loading models from: {}", models_dir);
if !download::all_models_exist(models_path) {
println!("[init_models] Some model files are missing, downloading from Hugging Face...");
println!(" Repo: https://huggingface.co/chenjian-wei/quasivision-models");
download::download_missing(models_path)
.map_err(|e| anyhow::anyhow!("[init_models] Download failed: {e}"))?;
} else {
println!(" [download] All model files already exist");
}
text_detection::init_ocr()
.map_err(|e| anyhow::anyhow!("[init_models] OCR init failed: {}\n{}", e, MODEL_DIR_HELP.replace("{models_dir}", models_dir)))?;
icon_classifier::init_global(std::path::Path::new(models_dir))
.map_err(|e| anyhow::anyhow!("[init_models] Icon classifier init failed: {}\n{}", e, MODEL_DIR_HELP.replace("{models_dir}", models_dir)))?;
object_detector::init_global(models_dir)
.map_err(|e| anyhow::anyhow!("[init_models] Object detector init failed: {}\n{}", e, MODEL_DIR_HELP.replace("{models_dir}", models_dir)))?;
MODELS_INITIALIZED.store(true, Ordering::Relaxed);
println!("[init_models] All models loaded successfully");
Ok(())
}
pub fn clean_models() {
MODELS_INITIALIZED.store(false, Ordering::Relaxed);
text_detection::clean_ocr();
icon_classifier::clean_global();
object_detector::clean_global();
println!("[clean_models] All models cleaned up");
}
pub fn to_json_string(elements: &[Element], img_shape: (u32, u32)) -> String {
let out = OutputResult {
comps: elements.to_vec(),
img_shape,
};
serde_json::to_string_pretty(&out).unwrap_or_default()
}
pub fn to_compact_string(elements: &[Element], _img_shape: (u32, u32)) -> String {
let compact: Vec<CompactElement> = elements.iter().map(CompactElement::from).collect();
serde_json::to_string_pretty(&compact).unwrap_or_default()
}
pub fn to_ai_json_string(elements: &[Element], img_shape: (u32, u32)) -> String {
let ai = AiOutput::from_elements(elements, img_shape);
serde_json::to_string_pretty(&ai).unwrap_or_default()
}
pub fn to_tree_json_string(elements: &[Element], img_shape: (u32, u32)) -> String {
let tree = TreeOutput::from_elements(elements, img_shape);
serde_json::to_string_pretty(&tree).unwrap_or_default()
}
pub fn to_tree_text_string(elements: &[Element], img_shape: (u32, u32)) -> String {
TreeOutput::from_elements(elements, img_shape).to_text()
}
pub fn to_text_summary(elements: &[Element], img_shape: (u32, u32)) -> String {
element::elements_to_text(elements, img_shape)
}
pub fn object_detection_to_json_string(roots: &[DetectionNode], img_shape: (u32, u32)) -> String {
let count: usize = roots.iter().map(|n| 1 + count_all(&n.children)).sum();
let value = serde_json::json!({
"img_shape": [img_shape.1, img_shape.0],
"count": count,
"objects": roots,
});
serde_json::to_string_pretty(&value).unwrap_or_default()
}
fn count_all(nodes: &[DetectionNode]) -> usize {
nodes.iter().map(|n| 1 + count_all(&n.children)).sum()
}
pub fn object_detection_to_tree_text(roots: &[DetectionNode], img_shape: (u32, u32)) -> String {
if roots.is_empty() {
return format!(
"Objects ({}×{}):\n (none detected)",
img_shape.1, img_shape.0
);
}
let total: usize = roots.iter().map(|n| 1 + count_all(&n.children)).sum();
let mut lines = Vec::new();
lines.push(format!(
"Objects ({}×{}) — {} found:",
img_shape.1, img_shape.0, total
));
fn render_node(node: &DetectionNode, prefix: &str, is_last: bool, lines: &mut Vec<String>) {
let connector = if is_last { "└─ " } else { "├─ " };
let x = node.bbox.x_min.round() as i32;
let y = node.bbox.y_min.round() as i32;
let w = (node.bbox.x_max - node.bbox.x_min).round() as i32;
let h = (node.bbox.y_max - node.bbox.y_min).round() as i32;
let pct = (node.confidence * 100.0).round() as u32;
lines.push(format!(
"{}{}[{:>3},{:>3} {:>3}×{:>3}] {} ({}%)",
prefix, connector, x, y, w, h, node.class_name, pct
));
let child_prefix = if is_last { " " } else { "│ " };
let new_prefix = format!("{}{}", prefix, child_prefix);
let count = node.children.len();
for (i, child) in node.children.iter().enumerate() {
render_node(child, &new_prefix, i == count - 1, lines);
}
}
let root_count = roots.len();
for (i, root) in roots.iter().enumerate() {
render_node(root, "", i == root_count - 1, &mut lines);
}
lines.join("\n")
}
pub mod pipeline {
use std::path::Path;
use std::thread;
use anyhow::{Context, Result};
use image::DynamicImage;
use crate::component::Component;
use crate::config::Config;
use crate::detection::*;
use crate::element::Element;
use crate::icon_classifier::IconClassifier;
use crate::merger::*;
use crate::object_detector::{self, Detection};
use crate::preprocess;
use crate::text_detection::{self, TextResult};
#[derive(Debug, Clone)]
pub struct PipelineConfig {
pub ui_config: Config,
pub models_dir: String,
pub paragraph: bool,
pub remove_bar: bool,
pub sub_component: bool,
pub synthesize_text: bool,
pub detect_model_path: String,
pub detect_labels_path: String,
pub detect_conf: f32,
}
impl PipelineConfig {
pub fn new(models_dir: &str) -> Self {
Self {
ui_config: Config::default(),
models_dir: models_dir.to_string(),
paragraph: false,
remove_bar: true,
sub_component: true,
synthesize_text: true,
detect_model_path: format!(
"{}/object-detection/yoloe-26n-seg.onnx",
models_dir.trim_end_matches('/')
),
detect_labels_path: format!(
"{}/object-detection/yoloe-26n_classes.txt",
models_dir.trim_end_matches('/')
),
detect_conf: 0.01,
}
}
pub fn with_ui_config(mut self, config: Config) -> Self {
self.ui_config = config;
self
}
pub fn with_paragraph(mut self, enabled: bool) -> Self {
self.paragraph = enabled;
self
}
pub fn with_remove_bar(mut self, enabled: bool) -> Self {
self.remove_bar = enabled;
self
}
pub fn with_sub_component(mut self, enabled: bool) -> Self {
self.sub_component = enabled;
self
}
pub fn with_synthesize_text(mut self, enabled: bool) -> Self {
self.synthesize_text = enabled;
self
}
pub fn with_detect_model(mut self, model_path: &str, labels_path: &str) -> Self {
self.detect_model_path = model_path.to_string();
self.detect_labels_path = labels_path.to_string();
self
}
pub fn with_detect_conf(mut self, conf: f32) -> Self {
self.detect_conf = conf;
self
}
pub fn read_image(&self, path: &str) -> Result<(DynamicImage, image::GrayImage)> {
preprocess::read_image(path, None, None)
.with_context(|| format!("Failed to read image: {path}"))
}
pub fn read_image_from_bytes(
&self,
data: &[u8],
) -> Result<(DynamicImage, image::GrayImage)> {
preprocess::read_image_from_bytes(data)
}
pub fn binarize(&self, img: &DynamicImage) -> image::GrayImage {
preprocess::binarization_color(img, self.ui_config.gradient_threshold)
}
pub fn remove_lines(&self, binary: &mut image::GrayImage) {
crate::detection::remove_lines(
binary,
self.ui_config.line_thickness,
self.ui_config.line_min_length_ratio,
);
}
pub fn detect_components(&self, img: &DynamicImage) -> Result<Vec<Component>> {
let gray = img.to_luma8();
let mut binary = preprocess::binarization_color(img, self.ui_config.gradient_threshold);
remove_lines(
&mut binary,
self.ui_config.line_thickness,
self.ui_config.line_min_length_ratio,
);
let (mut comps, _) = component_detection(&binary, &self.ui_config, true);
comps = merge_intersected(&comps, false, (0, 0));
comps = component_filter(
&comps,
self.ui_config.obj_min_area as i64,
(img.height(), img.width()),
);
block_recognition(&binary, &mut comps, self.ui_config.block_side_length);
let nested = nested_components_detection(
&gray,
&self.ui_config,
self.ui_config.block_gradient_threshold,
);
for mut nc in nested {
let mut is_new = true;
for c in &comps {
let rel = nc.bbox.relation(&c.bbox);
if rel == -1 || rel == 2 {
is_new = false;
break;
}
}
if is_new {
nc.category = "Block".to_string();
comps.push(nc);
}
}
classify_by_geometry(&mut comps, (img.height(), img.width()));
let rgb = img.to_rgb8();
let extra_icons = icon_color_detection(&rgb, &comps, &self.ui_config);
for mut ic in extra_icons {
ic.category = "Icon".to_string();
comps.push(ic);
}
if self.sub_component {
let sub_comps = detect_sub_components(&comps, &binary, &self.ui_config);
for mut sub in sub_comps {
let mut overlap = false;
for c in &comps {
let rel = sub.bbox.relation_with_bias(&c.bbox, (2, 2));
if rel != 0 {
overlap = true;
break;
}
}
if !overlap {
if sub.category == "Compo" {
sub.category = "Button".to_string();
}
comps.push(sub);
}
}
}
Ok(comps)
}
pub fn run_ocr(&self, img: &DynamicImage) -> Result<TextResult> {
Ok(text_detection::detect_text(img))
}
pub fn merge(
&self,
img: &DynamicImage,
comps: &[Component],
text_result: &TextResult,
) -> Result<Vec<Element>> {
let mut elements = merge(
img,
comps,
&text_result.texts,
&self.ui_config,
self.paragraph,
self.remove_bar,
);
if self.synthesize_text {
synthesize_orphan_text_regions(&mut elements, &text_result.texts, 0.3, 12);
}
let rgb = img.to_rgb8();
crate::color::detect_colors(&rgb, &mut elements);
Ok(elements)
}
pub fn classify_icons(&self, img: &DynamicImage, elements: &mut [Element]) -> Result<()> {
let icon_count = elements.iter().filter(|e| e.class == "Icon").count();
if icon_count == 0 {
return Ok(());
}
if crate::icon_classifier::classify_all_icons_global(img, elements).is_ok() {
return Ok(());
}
let models_root = Path::new(&self.models_dir);
let mut classifier =
IconClassifier::new(models_root).context("Failed to initialize IconClassifier")?;
crate::icon_classifier::classify_all_icons(&mut classifier, img, elements);
Ok(())
}
pub fn detect_objects(&self, img: &DynamicImage) -> Vec<Detection> {
object_detector::run_object_detection(
img,
&self.detect_model_path,
&self.detect_labels_path,
self.detect_conf,
)
}
pub fn run_full(&self, img_path: &str) -> Result<PipelineResult> {
let (_img, _gray) = self.read_image(img_path)?;
let ocr_handle = thread::spawn({
let img = _img.clone();
move || text_detection::detect_text(&img)
});
let od_handle = thread::spawn({
let img = _img.clone();
let model = self.detect_model_path.clone();
let labels = self.detect_labels_path.clone();
let conf = self.detect_conf;
move || object_detector::run_object_detection(&img, &model, &labels, conf)
});
let comps = self.detect_components(&_img)?;
let text_result = ocr_handle.join().expect("OCR thread panicked");
let mut elements = self.merge(&_img, &comps, &text_result)?;
self.classify_icons(&_img, &mut elements)?;
let object_detections = od_handle.join().expect("Object detection thread panicked");
let img_shape = (_img.height(), _img.width());
Ok(PipelineResult {
elements,
object_detections,
img_shape,
_img,
})
}
}
pub struct PipelineResult {
pub elements: Vec<Element>,
pub object_detections: Vec<Detection>,
pub img_shape: (u32, u32),
pub _img: DynamicImage,
}
}