1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
//! Pure Rust vision processing module for multimodal models.
//!
//! This module provides image preprocessing pipelines that match HuggingFace processor
//! outputs without requiring Python dependencies.
//!
//! # Architecture
//!
//! The vision module is structured as follows:
//!
//! - `transforms`: Core image transformations (resize, normalize, crop, etc.)
//! - `preprocessor_config`: HuggingFace config parsing
//! - `image_processor`: Trait and output types for processors
//! - `processors`: Model-specific implementations (LLaVA, Qwen-VL, etc.)
//!
//! # Usage
//!
//! ```rust,ignore
//! use smg::multimodal::vision::{
//! PreProcessorConfig,
//! processors::LlavaProcessor,
//! ImagePreProcessor,
//! };
//!
//! // Load config from HuggingFace
//! let config = PreProcessorConfig::from_json(config_json)?;
//!
//! // Create processor and preprocess images
//! let processor = LlavaProcessor::new();
//! let result = processor.preprocess(&images, &config)?;
//! ```
// Re-export commonly used types
pub use ;
pub use PreProcessorConfig;
pub use ;
pub use TransformError;