oar_ocr/core/
macros.rs

1//! Macros for the OCR pipeline.
2//!
3//! This module provides utility macros to reduce code duplication across
4//! the OCR pipeline, particularly for builder patterns and metrics collection.
5
6/// Central task registry macro that defines all tasks in a single location.
7///
8/// This macro uses the "callback pattern" - it takes a callback macro name and
9/// invokes it with the task registry data. Different consumers can process
10/// the same data differently.
11///
12/// # Task Entry Format
13///
14/// Each task is defined as:
15/// ```text
16/// TaskName {
17///     output: OutputType,           // fully qualified: $crate::domain::tasks::*
18///     adapter: AdapterType,         // fully qualified: $crate::domain::adapters::*
19///     input: image,
20///     constructor: constructor_name,
21///     conversion: into_method_name,
22///     name: "snake_case_name",
23///     doc: "Documentation string",
24/// }
25/// ```
26///
27/// # Benefits of Fully Qualified Paths
28///
29/// Using `$crate::` paths means consumer modules don't need to import
30/// adapter/output types - the macro expansion includes the full paths.
31#[macro_export]
32macro_rules! with_task_registry {
33    ($callback:path) => {
34        $callback! {
35            TextDetection {
36                output: $crate::domain::tasks::TextDetectionOutput,
37                adapter: $crate::domain::adapters::TextDetectionAdapter,
38                input: image,
39                constructor: text_detection,
40                conversion: into_text_detection,
41                name: "text_detection",
42                doc: "Text detection - locating text regions in images",
43            },
44            TextRecognition {
45                output: $crate::domain::tasks::TextRecognitionOutput,
46                adapter: $crate::domain::adapters::TextRecognitionAdapter,
47                input: image,
48                constructor: text_recognition,
49                conversion: into_text_recognition,
50                name: "text_recognition",
51                doc: "Text recognition - converting text regions to strings",
52            },
53            DocumentOrientation {
54                output: $crate::domain::tasks::DocumentOrientationOutput,
55                adapter: $crate::domain::adapters::DocumentOrientationAdapter,
56                input: image,
57                constructor: document_orientation,
58                conversion: into_document_orientation,
59                name: "document_orientation",
60                doc: "Document orientation classification",
61            },
62            TextLineOrientation {
63                output: $crate::domain::tasks::TextLineOrientationOutput,
64                adapter: $crate::domain::adapters::TextLineOrientationAdapter,
65                input: image,
66                constructor: text_line_orientation,
67                conversion: into_text_line_orientation,
68                name: "text_line_orientation",
69                doc: "Text line orientation classification",
70            },
71            DocumentRectification {
72                output: $crate::domain::tasks::DocumentRectificationOutput,
73                adapter: $crate::domain::adapters::UVDocRectifierAdapter,
74                input: image,
75                constructor: document_rectification,
76                conversion: into_document_rectification,
77                name: "document_rectification",
78                doc: "Document rectification/unwarp",
79            },
80            LayoutDetection {
81                output: $crate::domain::tasks::LayoutDetectionOutput,
82                adapter: $crate::domain::adapters::LayoutDetectionAdapter,
83                input: image,
84                constructor: layout_detection,
85                conversion: into_layout_detection,
86                name: "layout_detection",
87                doc: "Layout detection/analysis",
88            },
89            TableCellDetection {
90                output: $crate::domain::tasks::TableCellDetectionOutput,
91                adapter: $crate::domain::adapters::TableCellDetectionAdapter,
92                input: image,
93                constructor: table_cell_detection,
94                conversion: into_table_cell_detection,
95                name: "table_cell_detection",
96                doc: "Table cell detection - locating cells within table regions",
97            },
98            FormulaRecognition {
99                output: $crate::domain::tasks::FormulaRecognitionOutput,
100                adapter: $crate::domain::adapters::FormulaRecognitionAdapter,
101                input: image,
102                constructor: formula_recognition,
103                conversion: into_formula_recognition,
104                name: "formula_recognition",
105                doc: "Formula recognition - converting mathematical formulas to LaTeX",
106            },
107            SealTextDetection {
108                output: $crate::domain::tasks::SealTextDetectionOutput,
109                adapter: $crate::domain::adapters::SealTextDetectionAdapter,
110                input: image,
111                constructor: seal_text_detection,
112                conversion: into_seal_text_detection,
113                name: "seal_text_detection",
114                doc: "Seal text detection - locating text regions in seal/stamp images",
115            },
116            TableClassification {
117                output: $crate::domain::tasks::TableClassificationOutput,
118                adapter: $crate::domain::adapters::TableClassificationAdapter,
119                input: image,
120                constructor: table_classification,
121                conversion: into_table_classification,
122                name: "table_classification",
123                doc: "Table classification - classifying table images as wired or wireless",
124            },
125            TableStructureRecognition {
126                output: $crate::domain::tasks::TableStructureRecognitionOutput,
127                adapter: $crate::domain::adapters::TableStructureRecognitionAdapter,
128                input: image,
129                constructor: table_structure_recognition,
130                conversion: into_table_structure_recognition,
131                name: "table_structure_recognition",
132                doc: "Table structure recognition - recognizing table structure as HTML with bboxes",
133            }
134        }
135    };
136}
137
138/// Generates the TaskType enum from the task registry.
139#[macro_export]
140macro_rules! impl_task_type_enum {
141    ($(
142        $task:ident {
143            output: $output:ty,
144            adapter: $adapter:ty,
145            input: $input_kind:ident,
146            constructor: $constructor:ident,
147            conversion: $conversion:ident,
148            name: $name:literal,
149            doc: $doc:literal,
150        }
151    ),* $(,)?) => {
152        /// Represents the type of OCR task being performed.
153        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
154        pub enum TaskType {
155            $(
156                #[doc = $doc]
157                $task,
158            )*
159        }
160
161        impl TaskType {
162            /// Returns a human-readable name for the task type.
163            pub fn name(&self) -> &'static str {
164                match self {
165                    $(TaskType::$task => $name,)*
166                }
167            }
168        }
169    };
170}
171
172/// Generates the DynTaskOutput enum and its methods from the task registry.
173#[macro_export]
174macro_rules! impl_dyn_task_output {
175    ($(
176        $task:ident {
177            output: $output:ty,
178            adapter: $adapter:ty,
179            input: $input_kind:ident,
180            constructor: $constructor:ident,
181            conversion: $conversion:ident,
182            name: $name:literal,
183            doc: $doc:literal,
184        }
185    ),* $(,)?) => {
186        /// Type-erased output from dynamic adapter execution.
187        ///
188        /// This enum wraps all possible task output types to enable dynamic dispatch.
189        #[derive(Debug, Clone)]
190        pub enum DynTaskOutput {
191            $(
192                #[doc = $doc]
193                $task($output),
194            )*
195        }
196
197        impl DynTaskOutput {
198            $(
199                #[doc = concat!("Extracts ", stringify!($output), " if this is a ", stringify!($task), " variant.")]
200                pub fn $conversion(self) -> Result<$output, $crate::core::OCRError> {
201                    match self {
202                        Self::$task(output) => Ok(output),
203                        _ => Err($crate::core::OCRError::InvalidInput {
204                            message: format!(
205                                concat!("Expected ", stringify!($task), " output, got {:?}"),
206                                std::mem::discriminant(&self)
207                            ),
208                        }),
209                    }
210                }
211            )*
212
213            /// Returns the underlying task type for this output.
214            pub fn task_type(&self) -> $crate::core::traits::task::TaskType {
215                match self {
216                    $(Self::$task(_) => $crate::core::traits::task::TaskType::$task,)*
217                }
218            }
219
220            /// Creates an empty `DynTaskOutput` variant for the given task type.
221            ///
222            /// This is intended for registry wiring completeness checks and test scaffolding.
223            pub fn empty_for(task_type: $crate::core::traits::task::TaskType) -> Self {
224                match task_type {
225                    $($crate::core::traits::task::TaskType::$task => {
226                        Self::$task(<$output>::empty())
227                    })*
228                }
229            }
230        }
231    };
232}
233
234/// Generates the TaskAdapter enum and its DynModelAdapter implementation.
235///
236/// All adapters are uniformly boxed to enable macro generation. The boxing overhead
237/// is negligible since:
238/// - Adapter creation happens once at build time
239/// - Adapters are long-lived
240/// - Inference time dominates any pointer overhead
241/// - Most adapter internals are already indirect (Arc, Vec, etc.)
242#[macro_export]
243macro_rules! impl_task_adapter {
244    ($(
245        $task:ident {
246            output: $output:ty,
247            adapter: $adapter:ty,
248            input: $input_kind:ident,
249            constructor: $constructor:ident,
250            conversion: $conversion:ident,
251            name: $name:literal,
252            doc: $doc:literal,
253        }
254    ),* $(,)?) => {
255        /// Task-specific adapter enum with uniform Box storage.
256        ///
257        /// This enum directly holds concrete adapter types (boxed for uniform layout),
258        /// avoiding the need for runtime downcast. Each variant corresponds to a
259        /// specific task type and its adapter.
260        ///
261        /// # Benefits
262        ///
263        /// - **No runtime downcast**: Direct pattern matching on enum variants
264        /// - **Compile-time exhaustiveness**: Adding a new task type requires handling it explicitly
265        /// - **Type safety**: Each variant holds the exact adapter type for that task
266        /// - **Uniform memory layout**: All variants are pointer-sized
267        ///
268        /// # Custom Adapters
269        ///
270        /// For testing or extension, use the `Custom` variant which wraps a `Box<dyn DynModelAdapter>`.
271        #[derive(Debug)]
272        pub enum TaskAdapter {
273            $(
274                #[doc = $doc]
275                $task(Box<$adapter>),
276            )*
277            /// Custom adapter for testing or extension (wraps DynModelAdapter)
278            Custom(Box<dyn DynModelAdapter>),
279        }
280
281        impl TaskAdapter {
282            $(
283                #[doc = concat!("Creates a TaskAdapter from a ", stringify!($adapter), ".")]
284                pub fn $constructor(adapter: $adapter) -> Self {
285                    Self::$task(Box::new(adapter))
286                }
287            )*
288
289            /// Creates a TaskAdapter from a custom DynModelAdapter.
290            pub fn custom<A: DynModelAdapter + 'static>(adapter: A) -> Self {
291                Self::Custom(Box::new(adapter))
292            }
293        }
294
295        impl DynModelAdapter for TaskAdapter {
296            fn info(&self) -> AdapterInfo {
297                match self {
298                    $(Self::$task(a) => a.info(),)*
299                    Self::Custom(a) => a.info(),
300                }
301            }
302
303            fn task_type(&self) -> TaskType {
304                match self {
305                    $(Self::$task(_) => TaskType::$task,)*
306                    Self::Custom(a) => a.task_type(),
307                }
308            }
309
310            fn supports_batching(&self) -> bool {
311                match self {
312                    $(Self::$task(a) => a.supports_batching(),)*
313                    Self::Custom(a) => a.supports_batching(),
314                }
315            }
316
317            fn recommended_batch_size(&self) -> usize {
318                match self {
319                    $(Self::$task(a) => a.recommended_batch_size(),)*
320                    Self::Custom(a) => a.recommended_batch_size(),
321                }
322            }
323
324            fn execute_dyn(&self, input: DynTaskInput) -> Result<DynTaskOutput, OCRError> {
325                match self {
326                    $(
327                        Self::$task(adapter) => {
328                            $crate::impl_task_adapter!(@execute $input_kind, adapter, input, $task)
329                        }
330                    )*
331                    Self::Custom(adapter) => adapter.execute_dyn(input),
332                }
333            }
334        }
335    };
336
337    // Internal rule: execute for image input tasks
338    (@execute image, $adapter:ident, $input:ident, $task:ident) => {{
339        let image_input = match $input {
340            DynTaskInput::Image(img) => img,
341        };
342        let output = $adapter.execute(image_input, None)?;
343        Ok(DynTaskOutput::$task(output))
344    }};
345}
346
347/// Macro to handle optional nested config initialization in builders.
348///
349/// This macro eliminates the repeated pattern of:
350/// ```rust,no_run
351/// // if self.config.field.is_none() {
352/// //     self.config.field = Some(Type::new());
353/// // }
354/// ```
355///
356/// # Usage
357///
358/// ```rust,no_run
359/// // Instead of:
360/// // if self.config.orientation.is_none() {
361/// //     self.config.orientation = Some(DocOrientationClassifierConfig::new());
362/// // }
363/// // if let Some(ref mut config) = self.config.orientation {
364/// //     config.confidence_threshold = Some(threshold);
365/// // }
366///
367/// // Use:
368/// // with_nested!(self.config.orientation, DocOrientationClassifierConfig, config => {
369/// //     config.confidence_threshold = Some(threshold);
370/// // });
371/// ```
372#[macro_export]
373macro_rules! with_nested {
374    ($field:expr, $type:ty, $var:ident => $body:block) => {
375        if $field.is_none() {
376            $field = Some(<$type>::new());
377        }
378        if let Some(ref mut $var) = $field {
379            $body
380        }
381    };
382}
383
384/// Macro to create pre-populated StageMetrics with common patterns.
385///
386/// This macro reduces duplication in metrics construction across stages.
387///
388/// # Usage
389///
390/// ```rust,no_run
391/// // Instead of:
392/// // StageMetrics::new(success_count, failure_count)
393/// //     .with_processing_time(start_time.elapsed())
394/// //     .with_info("stage", "cropping")
395/// //     .with_info("batch_size", batch_size.to_string())
396/// //     .with_info("parallel", parallel.to_string())
397///
398/// // Use:
399/// // metrics!(success_count, failure_count, start_time; stage = "cropping", batch_size = batch_size, parallel = parallel)
400/// // Or without timing:
401/// // metrics!(success_count, failure_count; stage = "cropping", batch_size = batch_size)
402/// ```
403#[macro_export]
404macro_rules! metrics {
405    // With timing
406    ($success:expr, $failure:expr, $start_time:expr; $($key:ident = $value:expr),*) => {
407        {
408            let mut metrics = $crate::pipeline::stages::StageMetrics::new($success, $failure);
409            metrics = metrics.with_processing_time($start_time.elapsed());
410            $(
411                metrics = metrics.with_info(stringify!($key), $value.to_string());
412            )*
413            metrics
414        }
415    };
416    // Without timing
417    ($success:expr, $failure:expr; $($key:ident = $value:expr),*) => {
418        {
419            let mut metrics = $crate::pipeline::stages::StageMetrics::new($success, $failure);
420            $(
421                metrics = metrics.with_info(stringify!($key), $value.to_string());
422            )*
423            metrics
424        }
425    };
426}
427
428/// Comprehensive builder macro for generating common builder method patterns.
429///
430/// This macro generates multiple types of builder methods to reduce code duplication:
431/// 1. Simple setters for direct field assignment
432/// 2. Nested config setters using the `with_nested!` macro
433/// 3. Enable/disable methods for optional features
434/// 4. Dynamic batching configuration methods
435///
436/// # Usage
437///
438/// ```rust,no_run
439/// // impl_complete_builder! {
440/// //     builder: MyBuilder,
441/// //     config_field: config,
442///
443/// //     // Simple setters
444/// //     simple_setters: {
445/// //         field_name: FieldType => "Documentation for the setter",
446/// //     },
447///
448/// //     // Nested config setters
449/// //     nested_setters: {
450/// //         config_path: ConfigType => {
451/// //             field_name: FieldType => "Documentation",
452/// //         },
453/// //     },
454///
455/// //     // Enable/disable methods
456/// //     enable_methods: {
457/// //         method_name => config_field: DefaultType => "Documentation",
458/// //     },
459/// // }
460/// ```
461#[macro_export]
462macro_rules! impl_complete_builder {
463    // Simple setters only
464    (
465        builder: $builder:ident,
466        config_field: $config_field:ident,
467        simple_setters: {
468            $($simple_field:ident: $simple_type:ty => $simple_doc:literal),* $(,)?
469        }
470    ) => {
471        impl $builder {
472            $(
473                #[doc = $simple_doc]
474                pub fn $simple_field(mut self, value: $simple_type) -> Self {
475                    self.$config_field.$simple_field = Some(value);
476                    self
477                }
478            )*
479        }
480    };
481
482    // Nested setters only
483    (
484        builder: $builder:ident,
485        config_field: $config_field:ident,
486        nested_setters: {
487            $($nested_path:ident: $nested_type:ty => {
488                $($nested_field:ident: $nested_field_type:ty => $nested_doc:literal),* $(,)?
489            }),* $(,)?
490        }
491    ) => {
492        impl $builder {
493            $($(
494                #[doc = $nested_doc]
495                pub fn $nested_field(mut self, value: $nested_field_type) -> Self {
496                    $crate::with_nested!(self.$config_field.$nested_path, $nested_type, config => {
497                        config.$nested_field = Some(value);
498                    });
499                    self
500                }
501            )*)*
502        }
503    };
504
505    // Enable methods only
506    (
507        builder: $builder:ident,
508        config_field: $config_field:ident,
509        enable_methods: {
510            $($enable_method:ident => $enable_field:ident: $enable_type:ty => $enable_doc:literal),* $(,)?
511        }
512    ) => {
513        impl $builder {
514            $(
515                #[doc = $enable_doc]
516                pub fn $enable_method(mut self) -> Self {
517                    self.$config_field.$enable_field = Some(<$enable_type>::default());
518                    self
519                }
520            )*
521        }
522    };
523}
524
525/// Macro to implement `new()` and `with_common()` for config structs with per-module defaults.
526#[macro_export]
527macro_rules! impl_config_new_and_with_common {
528    (
529        $Config:ident,
530        common_defaults: ($model_name_opt:expr, $batch_size_opt:expr),
531        fields: { $( $field:ident : $default_expr:expr ),* $(,)? }
532    ) => {
533        impl $Config {
534            /// Creates a new config instance with default values
535            pub fn new() -> Self {
536                Self {
537                    common: $crate::core::config::builder::ModelInferenceConfig::with_defaults(
538                        $model_name_opt, $batch_size_opt
539                    ),
540                    $( $field: $default_expr ),*
541                }
542            }
543            /// Creates a new config instance using provided common configuration
544            pub fn with_common(common: $crate::core::config::builder::ModelInferenceConfig) -> Self {
545                Self {
546                    common,
547                    $( $field: $default_expr ),*
548                }
549            }
550        }
551    };
552}
553
554/// Macro to implement common builder methods for structs with a `ModelInferenceConfig` field.
555#[macro_export]
556macro_rules! impl_common_builder_methods {
557    ($Builder:ident, $common_field:ident) => {
558        impl $Builder {
559            /// Sets the model path
560            pub fn model_path(mut self, model_path: impl Into<std::path::PathBuf>) -> Self {
561                self.$common_field = self.$common_field.model_path(model_path);
562                self
563            }
564            /// Sets the model name
565            pub fn model_name(mut self, model_name: impl Into<String>) -> Self {
566                self.$common_field = self.$common_field.model_name(model_name);
567                self
568            }
569            /// Sets the batch size
570            pub fn batch_size(mut self, batch_size: usize) -> Self {
571                self.$common_field = self.$common_field.batch_size(batch_size);
572                self
573            }
574            /// Enables or disables logging
575            pub fn enable_logging(mut self, enable: bool) -> Self {
576                self.$common_field = self.$common_field.enable_logging(enable);
577                self
578            }
579            /// Sets the ONNX Runtime session configuration
580            pub fn ort_session(
581                mut self,
582                config: $crate::core::config::onnx::OrtSessionConfig,
583            ) -> Self {
584                self.$common_field = self.$common_field.ort_session(config);
585                self
586            }
587        }
588    };
589}
590
591/// Macro to inject common builder methods into an existing `impl Builder` block.
592/// Use this inside `impl YourBuilder { ... }` and pass the field name that holds
593/// `ModelInferenceConfig` (e.g., `common`).
594#[macro_export]
595macro_rules! common_builder_methods {
596    ($common_field:ident) => {
597        /// Sets the model path
598        pub fn model_path(mut self, model_path: impl Into<std::path::PathBuf>) -> Self {
599            self.$common_field = self.$common_field.model_path(model_path);
600            self
601        }
602        /// Sets the model name
603        pub fn model_name(mut self, model_name: impl Into<String>) -> Self {
604            self.$common_field = self.$common_field.model_name(model_name);
605            self
606        }
607        /// Sets the batch size
608        pub fn batch_size(mut self, batch_size: usize) -> Self {
609            self.$common_field = self.$common_field.batch_size(batch_size);
610            self
611        }
612        /// Enables or disables logging
613        pub fn enable_logging(mut self, enable: bool) -> Self {
614            self.$common_field = self.$common_field.enable_logging(enable);
615            self
616        }
617        /// Sets the ONNX Runtime session configuration
618        pub fn ort_session(mut self, config: $crate::core::config::onnx::OrtSessionConfig) -> Self {
619            self.$common_field = self.$common_field.ort_session(config);
620            self
621        }
622    };
623}
624
625/// Macro to conditionally apply OrtSessionConfig to any builder that has `with_ort_config`.
626///
627/// This macro eliminates the repeated pattern:
628/// ```rust,no_run
629/// // let mut builder = SomeBuilder::new();
630/// // if let Some(ort_config) = ort_config {
631/// //     builder = builder.with_ort_config(ort_config);
632/// // }
633/// ```
634///
635/// Instead, use:
636/// ```rust,no_run
637/// // let builder = apply_ort_config!(SomeBuilder::new(), ort_config);
638/// ```
639///
640/// # Usage
641///
642/// ```rust,no_run
643/// // Works with any builder that has a `with_ort_config` method:
644/// // let builder = apply_ort_config!(
645/// //     DBModelBuilder::new()
646/// //         .preprocess_config(config),
647/// //     ort_config
648/// // );
649/// ```
650#[macro_export]
651macro_rules! apply_ort_config {
652    ($builder:expr, $ort_config:expr) => {{
653        let builder = $builder;
654        if let Some(cfg) = $ort_config {
655            builder.with_ort_config(cfg)
656        } else {
657            builder
658        }
659    }};
660}
661
662#[cfg(test)]
663mod tests {
664
665    // Test configuration structs
666    #[derive(Debug, Default)]
667    struct TestConfig {
668        simple_field: Option<String>,
669        nested_config: Option<NestedConfig>,
670        enable_field: Option<EnabledFeature>,
671    }
672
673    #[derive(Debug, Default)]
674    struct NestedConfig {
675        nested_field: Option<i32>,
676    }
677
678    impl NestedConfig {
679        fn new() -> Self {
680            Self::default()
681        }
682    }
683
684    #[derive(Debug, Default)]
685    struct EnabledFeature {
686        _enabled: bool,
687    }
688
689    // Test builder struct
690    #[derive(Debug)]
691    struct TestBuilder {
692        config: TestConfig,
693    }
694
695    impl TestBuilder {
696        fn new() -> Self {
697            Self {
698                config: TestConfig::default(),
699            }
700        }
701
702        fn get_config(&self) -> &TestConfig {
703            &self.config
704        }
705    }
706
707    // Apply the macro to generate builder methods (separate calls for each type)
708    impl_complete_builder! {
709        builder: TestBuilder,
710        config_field: config,
711        simple_setters: {
712            simple_field: String => "Sets a simple field value",
713        }
714    }
715
716    impl_complete_builder! {
717        builder: TestBuilder,
718        config_field: config,
719        nested_setters: {
720            nested_config: NestedConfig => {
721                nested_field: i32 => "Sets a nested field value",
722            },
723        }
724    }
725
726    impl_complete_builder! {
727        builder: TestBuilder,
728        config_field: config,
729        enable_methods: {
730            enable_feature => enable_field: EnabledFeature => "Enables a feature with default configuration",
731        }
732    }
733
734    #[test]
735    fn test_impl_complete_builder_nested_setter() {
736        let builder = TestBuilder::new().nested_field(42);
737
738        assert!(builder.get_config().nested_config.is_some());
739        assert_eq!(
740            builder
741                .get_config()
742                .nested_config
743                .as_ref()
744                .unwrap()
745                .nested_field,
746            Some(42)
747        );
748    }
749
750    #[test]
751    fn test_impl_complete_builder_enable_method() {
752        let builder = TestBuilder::new().enable_feature();
753
754        assert!(builder.get_config().enable_field.is_some());
755    }
756
757    #[test]
758    fn test_impl_complete_builder_chaining() {
759        let builder = TestBuilder::new()
760            .simple_field("test".to_string())
761            .nested_field(123)
762            .enable_feature();
763
764        let config = builder.get_config();
765        assert_eq!(config.simple_field, Some("test".to_string()));
766        assert!(config.nested_config.is_some());
767        assert_eq!(
768            config.nested_config.as_ref().unwrap().nested_field,
769            Some(123)
770        );
771        assert!(config.enable_field.is_some());
772    }
773}