oxify_connect_vision/
lib.rs

1//! # oxify-connect-vision
2//!
3//! Vision/OCR connector for OxiFY workflows.
4//!
5//! This crate provides OCR (Optical Character Recognition) capabilities
6//! for extracting text from images and documents.
7//!
8//! ## Features
9//!
10//! - **Mock provider**: For testing and development
11//! - **Tesseract**: Traditional OCR engine (requires system installation)
12//! - **Surya**: Deep learning OCR with layout analysis (ONNX)
13//! - **PaddleOCR**: High-quality multilingual OCR (ONNX)
14//! - **Google Cloud Vision**: Cloud-based OCR with Google's Vision API
15//!
16//! ## Usage
17//!
18//! ```rust,ignore
19//! use oxify_connect_vision::{providers, VisionProvider};
20//!
21//! // Create a mock provider for testing
22//! let provider = providers::MockVisionProvider::new();
23//!
24//! // Process an image
25//! let result = provider.process_image(&image_bytes).await?;
26//!
27//! println!("Extracted text: {}", result.text);
28//! println!("Markdown: {}", result.markdown);
29//! ```
30//!
31//! ## Feature Flags
32//!
33//! - `mock` (default): Enable mock provider
34//! - `tesseract`: Enable Tesseract OCR support
35//! - `surya`: Enable Surya OCR (requires ONNX models)
36//! - `paddle`: Enable PaddleOCR (requires ONNX models)
37//! - `google-vision`: Enable Google Cloud Vision API support
38//! - `cuda`: Enable CUDA GPU acceleration
39//! - `coreml`: Enable CoreML GPU acceleration (macOS)
40//! - `all-providers`: Enable all OCR providers
41
42pub mod access_control;
43pub mod audit;
44pub mod batch;
45pub mod benchmark;
46pub mod cache;
47pub mod config;
48pub mod diagnostics;
49pub mod downloader;
50pub mod encryption;
51pub mod errors;
52pub mod form_detection;
53pub mod gpu;
54pub mod logging;
55pub mod metrics;
56pub mod model_loading;
57pub mod otel;
58pub mod pdf_processing;
59pub mod persistent_cache;
60pub mod preprocessing;
61pub mod profiling;
62pub mod providers;
63pub mod quantization;
64pub mod simd;
65pub mod streaming;
66pub mod table_extraction;
67pub mod types;
68pub mod validation;
69
70// Re-exports for convenience
71pub use access_control::{
72    AccessController, AccessError, ApiKey, Permission, UsageSnapshot, UsageStats,
73};
74pub use audit::{
75    AuditEvent, AuditEventType, AuditLogger, AuditResult, AuditSeverity, AuditStats,
76    RetentionPolicy,
77};
78pub use batch::{
79    process_batch_simple, BatchConfig, BatchItemResult, BatchProcessor, BatchProgress, BatchResult,
80};
81pub use benchmark::{BenchmarkResult, BenchmarkRunner, ComparisonReport, MemoryProfile};
82pub use cache::{CacheKey, CacheStats, VisionCache};
83pub use config::{
84    BatchConfig as ConfigBatch, CacheConfig as ConfigCache, ConfigWatcher,
85    DownloaderConfig as ConfigDownloader, PreprocessingConfig as ConfigPreprocessing,
86    ProviderConfig as ConfigProvider, VisionConfig,
87};
88pub use diagnostics::{ErrorCategory, ErrorDiagnostic, SystemDiagnostics};
89pub use downloader::{
90    compute_checksum, default_cache_dir, DownloadProgress, DownloaderConfig, ModelDownloader,
91    ModelInfo,
92};
93pub use encryption::{
94    EncryptedData, EncryptionAlgorithm, EncryptionConfig, EncryptionError, EncryptionProvider,
95    EncryptionStats, KeyDerivationFunction,
96};
97pub use errors::{Result, VisionError};
98pub use form_detection::{
99    Checkbox, FieldType, FormDetectionConfig, FormDetectionResult, FormDetector, FormField,
100    RadioButton, RadioGroup, Signature,
101};
102pub use gpu::{GpuConfig, GpuInfo, GpuProvider};
103pub use logging::{LogEntry, LogLevel, LogSampler, SamplingConfig, StructuredLogger};
104pub use metrics::{Counter, Gauge, Histogram, MetricsSummary, OcrMetrics, Timer};
105pub use model_loading::{
106    LoadingStrategy, MemoryStats, ModelHandle, ModelLoader, ModelLoadingConfig,
107};
108pub use otel::{
109    OtelConfig, OtelError, Span, SpanAttributes, SpanContext, SpanData, SpanEvent, SpanStatus,
110    TracingProvider, TracingStats,
111};
112pub use pdf_processing::{
113    PdfDocumentResult, PdfMetadata, PdfPage, PdfProcessingConfig, PdfProcessor, SearchResult,
114    TocEntry,
115};
116pub use persistent_cache::{
117    CacheBackend, CacheStats as PersistentCacheStats, EvictionPolicy, PersistentCache, RedisConfig,
118    SqliteConfig,
119};
120pub use preprocessing::{ImagePreprocessor, PreprocessConfig};
121pub use profiling::{
122    BottleneckInfo, CallTreeNode, MemorySnapshot, ProfileEntry, Profiler, ProfilerConfig,
123    ProfilingError, ProfilingReport, ReportStats,
124};
125pub use providers::{create_provider, ProviderCapabilities, VisionProvider, VisionProviderConfig};
126
127#[cfg(feature = "google-vision")]
128pub use providers::{CostStats, GoogleVisionConfig, GoogleVisionProvider};
129pub use quantization::{
130    ModelQuantizer, QuantizationBenefits, QuantizationConfig, QuantizationMethod,
131    QuantizationPrecision, QuantizedModelInfo,
132};
133pub use simd::{SimdConfig, SimdError, SimdInstructionSet, SimdProcessor, SimdStats};
134pub use streaming::{
135    AsyncFrameStream, FrameMetadata, SamplingStrategy, StreamConfig, StreamError, StreamProcessor,
136    StreamStats,
137};
138pub use table_extraction::{Table, TableCell, TableExtractionConfig, TableExtractor};
139pub use types::{BlockRole, ImageInput, OcrMetadata, OcrResult, OutputFormat, TextBlock};
140pub use validation::{validate_image_bytes, validate_image_file, ImageValidator, ValidationConfig};
141
142/// Prelude module for common imports.
143pub mod prelude {
144    pub use crate::errors::{Result, VisionError};
145    pub use crate::providers::{VisionProvider, VisionProviderConfig};
146    pub use crate::types::{BlockRole, OcrResult, TextBlock};
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_create_mock_provider() {
155        let config = VisionProviderConfig::mock();
156        let provider = create_provider(&config).unwrap();
157        assert_eq!(provider.provider_name(), "mock");
158    }
159
160    #[tokio::test]
161    async fn test_mock_provider_process() {
162        let provider = providers::MockVisionProvider::new();
163        let result = provider.process_image(b"fake image data").await.unwrap();
164
165        assert!(!result.text.is_empty());
166        assert!(!result.markdown.is_empty());
167        assert_eq!(result.metadata.provider, "mock");
168    }
169
170    #[test]
171    fn test_ocr_result_serialization() {
172        let result = OcrResult::from_text("Hello, World!");
173        let json = serde_json::to_string(&result).unwrap();
174        let parsed: OcrResult = serde_json::from_str(&json).unwrap();
175        assert_eq!(parsed.text, result.text);
176    }
177
178    #[test]
179    fn test_text_block_builder() {
180        let block = TextBlock::new("Test")
181            .with_bbox([0.1, 0.2, 0.3, 0.4])
182            .with_confidence(0.95)
183            .with_role(BlockRole::Header)
184            .with_order(1);
185
186        assert_eq!(block.text, "Test");
187        assert_eq!(block.confidence, 0.95);
188        assert_eq!(block.role, BlockRole::Header);
189    }
190
191    #[test]
192    fn test_cache_operations() {
193        let cache = VisionCache::new();
194        let key = CacheKey::new(b"test", "mock", "markdown", None);
195        let result = OcrResult::from_text("Cached result");
196
197        cache.set(key.clone(), result);
198
199        let cached = cache.get(&key).unwrap();
200        assert_eq!(cached.text, "Cached result");
201    }
202}