1use crate::core::OCRError;
8use crate::processors::types::{ColorOrder, TensorLayout};
9use image::DynamicImage;
10use rayon::prelude::*;
11
12#[derive(Debug)]
18pub struct NormalizeImage {
19 pub alpha: Vec<f32>,
21 pub beta: Vec<f32>,
23 pub order: TensorLayout,
25 pub color_order: ColorOrder,
27}
28
29impl NormalizeImage {
30 pub fn new(
51 scale: Option<f32>,
52 mean: Option<Vec<f32>>,
53 std: Option<Vec<f32>>,
54 order: Option<TensorLayout>,
55 color_order: Option<ColorOrder>,
56 ) -> Result<Self, OCRError> {
57 Self::with_color_order(scale, mean, std, order, color_order)
58 }
59
60 pub fn with_color_order(
85 scale: Option<f32>,
86 mean: Option<Vec<f32>>,
87 std: Option<Vec<f32>>,
88 order: Option<TensorLayout>,
89 color_order: Option<ColorOrder>,
90 ) -> Result<Self, OCRError> {
91 let scale = scale.unwrap_or(1.0 / 255.0);
92 let mean = mean.unwrap_or_else(|| vec![0.485, 0.456, 0.406]);
93 let std = std.unwrap_or_else(|| vec![0.229, 0.224, 0.225]);
94 let order = order.unwrap_or(TensorLayout::CHW);
95 let color_order = color_order.unwrap_or_default();
96
97 if scale <= 0.0 {
98 return Err(OCRError::ConfigError {
99 message: "Scale must be greater than 0".to_string(),
100 });
101 }
102
103 if mean.len() != 3 {
104 return Err(OCRError::ConfigError {
105 message: "Mean must have exactly 3 elements (3-channel normalization)".to_string(),
106 });
107 }
108
109 if std.len() != 3 {
110 return Err(OCRError::ConfigError {
111 message: "Std must have exactly 3 elements (3-channel normalization)".to_string(),
112 });
113 }
114
115 for (i, &s) in std.iter().enumerate() {
116 if s <= 0.0 {
117 return Err(OCRError::ConfigError {
118 message: format!(
119 "Standard deviation at index {i} must be greater than 0, got {s}"
120 ),
121 });
122 }
123 }
124
125 let alpha: Vec<f32> = std.iter().map(|s| scale / s).collect();
126 let beta: Vec<f32> = mean.iter().zip(&std).map(|(m, s)| -m / s).collect();
127
128 Ok(Self {
129 alpha,
130 beta,
131 order,
132 color_order,
133 })
134 }
135
136 pub fn validate_config(&self) -> Result<(), OCRError> {
148 if self.alpha.len() != 3 || self.beta.len() != 3 {
149 return Err(OCRError::ConfigError {
150 message: "Alpha and beta must have exactly 3 elements (3-channel normalization)"
151 .to_string(),
152 });
153 }
154
155 for (i, &alpha) in self.alpha.iter().enumerate() {
156 if !alpha.is_finite() {
157 return Err(OCRError::ConfigError {
158 message: format!("Alpha value at index {i} is not finite: {alpha}"),
159 });
160 }
161 }
162
163 for (i, &beta) in self.beta.iter().enumerate() {
164 if !beta.is_finite() {
165 return Err(OCRError::ConfigError {
166 message: format!("Beta value at index {i} is not finite: {beta}"),
167 });
168 }
169 }
170
171 Ok(())
172 }
173
174 pub fn for_ocr_recognition() -> Result<Self, OCRError> {
186 Self::new(
187 Some(2.0 / 255.0),
188 Some(vec![1.0, 1.0, 1.0]),
189 Some(vec![1.0, 1.0, 1.0]),
190 Some(TensorLayout::CHW),
191 Some(ColorOrder::BGR),
192 )
193 }
194
195 pub fn imagenet_rgb() -> Result<Self, OCRError> {
197 Self::with_color_order(
198 None,
199 Some(vec![0.485, 0.456, 0.406]),
200 Some(vec![0.229, 0.224, 0.225]),
201 Some(TensorLayout::CHW),
202 Some(ColorOrder::RGB),
203 )
204 }
205
206 pub fn imagenet_bgr_from_rgb_stats() -> Result<Self, OCRError> {
211 Self::with_color_order(
212 None,
213 Some(vec![0.406, 0.456, 0.485]),
214 Some(vec![0.225, 0.224, 0.229]),
215 Some(TensorLayout::CHW),
216 Some(ColorOrder::BGR),
217 )
218 }
219
220 pub fn with_color_order_from_rgb_stats(
225 scale: Option<f32>,
226 mean_rgb: Vec<f32>,
227 std_rgb: Vec<f32>,
228 order: Option<TensorLayout>,
229 output_color_order: ColorOrder,
230 ) -> Result<Self, OCRError> {
231 if mean_rgb.len() != 3 || std_rgb.len() != 3 {
232 return Err(OCRError::ConfigError {
233 message: format!(
234 "mean/std must have exactly 3 elements (got mean={}, std={})",
235 mean_rgb.len(),
236 std_rgb.len()
237 ),
238 });
239 }
240
241 let (mean, std) = match output_color_order {
242 ColorOrder::RGB => (mean_rgb, std_rgb),
243 ColorOrder::BGR => (
244 vec![mean_rgb[2], mean_rgb[1], mean_rgb[0]],
245 vec![std_rgb[2], std_rgb[1], std_rgb[0]],
246 ),
247 };
248
249 Self::with_color_order(
250 scale,
251 Some(mean),
252 Some(std),
253 order,
254 Some(output_color_order),
255 )
256 }
257
258 pub fn apply(&self, imgs: Vec<DynamicImage>) -> Vec<Vec<f32>> {
268 imgs.into_iter().map(|img| self.normalize(img)).collect()
269 }
270
271 fn validate_batch_inputs(
283 &self,
284 imgs_len: usize,
285 shapes: &[(usize, usize, usize)],
286 batch_tensor: &[f32],
287 ) -> Result<(usize, usize, usize, usize), OCRError> {
288 if imgs_len != shapes.len() {
289 return Err(OCRError::InvalidInput {
290 message: format!(
291 "Images and shapes length mismatch: {} images vs {} shapes",
292 imgs_len,
293 shapes.len()
294 ),
295 });
296 }
297
298 let batch_size = imgs_len;
299 if batch_size == 0 {
300 return Ok((0, 0, 0, 0));
301 }
302
303 let max_width = shapes.iter().map(|(_, _, w)| *w).max().unwrap_or(0);
304 let channels = shapes.first().map(|(c, _, _)| *c).unwrap_or(0);
305 let height = shapes.first().map(|(_, h, _)| *h).unwrap_or(0);
306 let img_size = channels * height * max_width;
307
308 if batch_tensor.len() < batch_size * img_size {
309 return Err(OCRError::BufferTooSmall {
310 expected: batch_size * img_size,
311 actual: batch_tensor.len(),
312 });
313 }
314
315 Ok((batch_size, channels, height, max_width))
316 }
317
318 pub fn apply_to_batch(
330 &self,
331 imgs: Vec<DynamicImage>,
332 batch_tensor: &mut [f32],
333 shapes: &[(usize, usize, usize)],
334 ) -> Result<(), OCRError> {
335 let (batch_size, channels, height, max_width) =
336 self.validate_batch_inputs(imgs.len(), shapes, batch_tensor)?;
337
338 if batch_size == 0 {
339 return Ok(());
340 }
341
342 let img_size = channels * height * max_width;
343
344 for (batch_idx, (img, &(_c, h, w))) in imgs.into_iter().zip(shapes.iter()).enumerate() {
345 let normalized_img = self.normalize(img);
346
347 let batch_offset = batch_idx * img_size;
348
349 for ch in 0.._c {
350 for y in 0..h {
351 for x in 0..w {
352 let src_idx = ch * h * w + y * w + x;
353 let dst_idx = batch_offset + ch * height * max_width + y * max_width + x;
354 if src_idx < normalized_img.len() && dst_idx < batch_tensor.len() {
355 batch_tensor[dst_idx] = normalized_img[src_idx];
356 }
357 }
358 }
359 }
360 }
361
362 Ok(())
363 }
364
365 pub fn normalize_streaming_to_batch(
378 &self,
379 imgs: Vec<DynamicImage>,
380 batch_tensor: &mut [f32],
381 shapes: &[(usize, usize, usize)],
382 ) -> Result<(), OCRError> {
383 let (batch_size, channels, height, max_width) =
384 self.validate_batch_inputs(imgs.len(), shapes, batch_tensor)?;
385
386 if batch_size == 0 {
387 return Ok(());
388 }
389
390 let img_size = channels * height * max_width;
391 batch_tensor.fill(0.0);
392
393 let src_channels: [usize; 3] = match self.color_order {
395 ColorOrder::RGB => [0, 1, 2],
396 ColorOrder::BGR => [2, 1, 0],
397 };
398
399 for (batch_idx, (img, &(_c, h, w))) in imgs.into_iter().zip(shapes.iter()).enumerate() {
400 let rgb_img = img.to_rgb8();
401 let (width, height_img) = rgb_img.dimensions();
402 let batch_offset = batch_idx * img_size;
403
404 match self.order {
405 TensorLayout::CHW => {
406 for (c, &src_c) in src_channels.iter().enumerate().take(channels.min(3)) {
407 for y in 0..h.min(height_img as usize) {
408 for x in 0..w.min(width as usize) {
409 let pixel = rgb_img.get_pixel(x as u32, y as u32);
410 let channel_value = pixel[src_c] as f32;
411 let dst_idx =
412 batch_offset + c * height * max_width + y * max_width + x;
413 if dst_idx < batch_tensor.len() {
414 batch_tensor[dst_idx] =
415 channel_value * self.alpha[c] + self.beta[c];
416 }
417 }
418 }
419 }
420 }
421 TensorLayout::HWC => {
422 for y in 0..h.min(height_img as usize) {
423 for x in 0..w.min(width as usize) {
424 let pixel = rgb_img.get_pixel(x as u32, y as u32);
425 for (c, &src_c) in src_channels.iter().enumerate().take(channels.min(3))
426 {
427 let channel_value = pixel[src_c] as f32;
428 let dst_idx =
429 batch_offset + y * max_width * channels + x * channels + c;
430 if dst_idx < batch_tensor.len() {
431 batch_tensor[dst_idx] =
432 channel_value * self.alpha[c] + self.beta[c];
433 }
434 }
435 }
436 }
437 }
438 }
439 }
440
441 Ok(())
442 }
443
444 fn normalize(&self, img: DynamicImage) -> Vec<f32> {
454 let rgb_img = img.to_rgb8();
455 let (width, height) = rgb_img.dimensions();
456 let channels = 3;
457
458 let map_channel = |c: u32| -> usize {
462 match self.color_order {
463 ColorOrder::RGB => c as usize,
464 ColorOrder::BGR => match c {
465 0 => 2, 1 => 1, 2 => 0, _ => c as usize,
469 },
470 }
471 };
472
473 match self.order {
474 TensorLayout::CHW => {
475 let mut result = vec![0.0f32; (channels * height * width) as usize];
476
477 for c in 0..channels {
478 let src_c = map_channel(c);
479 for y in 0..height {
480 for x in 0..width {
481 let pixel = rgb_img.get_pixel(x, y);
482 let channel_value = pixel[src_c] as f32;
483 let dst_idx = (c * height * width + y * width + x) as usize;
484
485 result[dst_idx] =
486 channel_value * self.alpha[c as usize] + self.beta[c as usize];
487 }
488 }
489 }
490 result
491 }
492 TensorLayout::HWC => {
493 let mut result = vec![0.0f32; (height * width * channels) as usize];
494
495 for y in 0..height {
496 for x in 0..width {
497 let pixel = rgb_img.get_pixel(x, y);
498 for c in 0..channels {
499 let src_c = map_channel(c);
500 let channel_value = pixel[src_c] as f32;
501 let dst_idx = (y * width * channels + x * channels + c) as usize;
502
503 result[dst_idx] =
504 channel_value * self.alpha[c as usize] + self.beta[c as usize];
505 }
506 }
507 }
508 result
509 }
510 }
511 }
512
513 pub fn normalize_to(&self, img: DynamicImage) -> Result<crate::core::Tensor4D, OCRError> {
523 let rgb_img = img.to_rgb8();
524 let (width, height) = rgb_img.dimensions();
525 let channels = 3;
526
527 let map_channel = |c: u32| -> usize {
529 match self.color_order {
530 ColorOrder::RGB => c as usize,
531 ColorOrder::BGR => match c {
532 0 => 2,
533 1 => 1,
534 2 => 0,
535 _ => c as usize,
536 },
537 }
538 };
539
540 match self.order {
541 TensorLayout::CHW => {
542 let mut result = vec![0.0f32; (channels * height * width) as usize];
543
544 for c in 0..channels {
545 let src_c = map_channel(c);
546 for y in 0..height {
547 for x in 0..width {
548 let pixel = rgb_img.get_pixel(x, y);
549 let channel_value = pixel[src_c] as f32;
550 let dst_idx = (c * height * width + y * width + x) as usize;
551
552 result[dst_idx] =
553 channel_value * self.alpha[c as usize] + self.beta[c as usize];
554 }
555 }
556 }
557
558 ndarray::Array4::from_shape_vec(
559 (1, channels as usize, height as usize, width as usize),
560 result,
561 )
562 .map_err(|e| {
563 OCRError::tensor_operation_error(
564 "normalization_tensor_creation_chw",
565 &[1, channels as usize, height as usize, width as usize],
566 &[(channels * height * width) as usize],
567 &format!("Failed to create CHW normalization tensor for {}x{} image with {} channels",
568 width, height, channels),
569 e,
570 )
571 })
572 }
573 TensorLayout::HWC => {
574 let mut result = vec![0.0f32; (height * width * channels) as usize];
575
576 for y in 0..height {
577 for x in 0..width {
578 let pixel = rgb_img.get_pixel(x, y);
579 for c in 0..channels {
580 let src_c = map_channel(c);
581 let channel_value = pixel[src_c] as f32;
582 let dst_idx = (y * width * channels + x * channels + c) as usize;
583
584 result[dst_idx] =
585 channel_value * self.alpha[c as usize] + self.beta[c as usize];
586 }
587 }
588 }
589
590 ndarray::Array4::from_shape_vec(
591 (1, height as usize, width as usize, channels as usize),
592 result,
593 )
594 .map_err(|e| {
595 OCRError::tensor_operation_error(
596 "normalization_tensor_creation_hwc",
597 &[1, height as usize, width as usize, channels as usize],
598 &[(height * width * channels) as usize],
599 &format!("Failed to create HWC normalization tensor for {}x{} image with {} channels",
600 width, height, channels),
601 e,
602 )
603 })
604 }
605 }
606 }
607
608 pub fn normalize_batch_to(
623 &self,
624 imgs: Vec<DynamicImage>,
625 ) -> Result<crate::core::Tensor4D, OCRError> {
626 if imgs.is_empty() {
627 return Ok(ndarray::Array4::zeros((0, 0, 0, 0)));
628 }
629
630 let batch_size = imgs.len();
631
632 let rgb_imgs: Vec<_> = imgs.into_iter().map(|img| img.to_rgb8()).collect();
633 let dimensions: Vec<_> = rgb_imgs.iter().map(|img| img.dimensions()).collect();
634
635 let (first_width, first_height) = dimensions.first().copied().unwrap_or((0, 0));
636 for (i, &(width, height)) in dimensions.iter().enumerate() {
637 if width != first_width || height != first_height {
638 return Err(OCRError::InvalidInput {
639 message: format!(
640 "All images in batch must have the same dimensions. Image 0: {first_width}x{first_height}, Image {i}: {width}x{height}"
641 ),
642 });
643 }
644 }
645
646 let (width, height) = (first_width, first_height);
647 let channels = 3u32;
648
649 let src_channels: [usize; 3] = match self.color_order {
652 ColorOrder::RGB => [0, 1, 2],
653 ColorOrder::BGR => [2, 1, 0], };
655
656 let alpha = self.alpha.clone();
658 let beta = self.beta.clone();
659
660 match self.order {
661 TensorLayout::CHW => {
662 let mut result = vec![0.0f32; batch_size * (channels * height * width) as usize];
663
664 let img_size = (channels * height * width) as usize;
665 if batch_size == 1 {
666 let rgb_img = &rgb_imgs[0];
668 let batch_slice = &mut result[0..img_size];
669 for c in 0..channels {
670 let src_c = src_channels[c as usize];
671 for y in 0..height {
672 for x in 0..width {
673 let pixel = rgb_img.get_pixel(x, y);
674 let channel_value = pixel[src_c] as f32;
675 let dst_idx = (c * height * width + y * width + x) as usize;
676 batch_slice[dst_idx] =
677 channel_value * alpha[c as usize] + beta[c as usize];
678 }
679 }
680 }
681 } else {
682 result.par_chunks_mut(img_size).enumerate().for_each(
683 |(batch_idx, batch_slice)| {
684 let rgb_img = &rgb_imgs[batch_idx];
685 for c in 0..channels {
686 let src_c = src_channels[c as usize];
687 for y in 0..height {
688 for x in 0..width {
689 let pixel = rgb_img.get_pixel(x, y);
690 let channel_value = pixel[src_c] as f32;
691 let dst_idx = (c * height * width + y * width + x) as usize;
692 batch_slice[dst_idx] =
693 channel_value * alpha[c as usize] + beta[c as usize];
694 }
695 }
696 }
697 },
698 );
699 }
700
701 ndarray::Array4::from_shape_vec(
702 (
703 batch_size,
704 channels as usize,
705 height as usize,
706 width as usize,
707 ),
708 result,
709 )
710 .map_err(|e| {
711 OCRError::tensor_operation(
712 "Failed to create batch normalization tensor in CHW format",
713 e,
714 )
715 })
716 }
717 TensorLayout::HWC => {
718 let mut result = vec![0.0f32; batch_size * (height * width * channels) as usize];
719
720 let img_size = (height * width * channels) as usize;
721 if batch_size == 1 {
722 let rgb_img = &rgb_imgs[0];
724 let batch_slice = &mut result[0..img_size];
725 for y in 0..height {
726 for x in 0..width {
727 let pixel = rgb_img.get_pixel(x, y);
728 for c in 0..channels {
729 let src_c = src_channels[c as usize];
730 let channel_value = pixel[src_c] as f32;
731 let dst_idx = (y * width * channels + x * channels + c) as usize;
732 batch_slice[dst_idx] =
733 channel_value * alpha[c as usize] + beta[c as usize];
734 }
735 }
736 }
737 } else {
738 result.par_chunks_mut(img_size).enumerate().for_each(
739 |(batch_idx, batch_slice)| {
740 let rgb_img = &rgb_imgs[batch_idx];
741 for y in 0..height {
742 for x in 0..width {
743 let pixel = rgb_img.get_pixel(x, y);
744 for c in 0..channels {
745 let src_c = src_channels[c as usize];
746 let channel_value = pixel[src_c] as f32;
747 let dst_idx =
748 (y * width * channels + x * channels + c) as usize;
749 batch_slice[dst_idx] =
750 channel_value * alpha[c as usize] + beta[c as usize];
751 }
752 }
753 }
754 },
755 );
756 }
757
758 ndarray::Array4::from_shape_vec(
759 (
760 batch_size,
761 height as usize,
762 width as usize,
763 channels as usize,
764 ),
765 result,
766 )
767 .map_err(|e| {
768 OCRError::tensor_operation(
769 "Failed to create batch normalization tensor in HWC format",
770 e,
771 )
772 })
773 }
774 }
775 }
776}
777
778#[cfg(test)]
779mod tests {
780 use super::*;
781 use image::{Rgb, RgbImage};
782
783 #[test]
784 fn test_normalize_image_color_order_rgb_vs_bgr_chw() -> Result<(), OCRError> {
785 let mut img = RgbImage::new(1, 1);
786 img.put_pixel(0, 0, Rgb([10, 20, 30])); let rgb = NormalizeImage::with_color_order(
789 Some(1.0),
790 Some(vec![0.0, 0.0, 0.0]),
791 Some(vec![1.0, 1.0, 1.0]),
792 Some(TensorLayout::CHW),
793 Some(ColorOrder::RGB),
794 )?;
795 let bgr = NormalizeImage::with_color_order(
796 Some(1.0),
797 Some(vec![0.0, 0.0, 0.0]),
798 Some(vec![1.0, 1.0, 1.0]),
799 Some(TensorLayout::CHW),
800 Some(ColorOrder::BGR),
801 )?;
802
803 let rgb_out = rgb.apply(vec![DynamicImage::ImageRgb8(img.clone())]);
804 let bgr_out = bgr.apply(vec![DynamicImage::ImageRgb8(img)]);
805
806 assert_eq!(rgb_out.len(), 1);
807 assert_eq!(bgr_out.len(), 1);
808 assert_eq!(rgb_out[0], vec![10.0, 20.0, 30.0]);
809 assert_eq!(bgr_out[0], vec![30.0, 20.0, 10.0]);
810 Ok(())
811 }
812
813 #[test]
814 fn test_normalize_image_mean_std_applied_in_output_channel_order() -> Result<(), OCRError> {
815 let mut img = RgbImage::new(1, 1);
816 img.put_pixel(0, 0, Rgb([11, 22, 33])); let rgb = NormalizeImage::with_color_order(
819 Some(1.0),
820 Some(vec![1.0, 2.0, 3.0]), Some(vec![2.0, 4.0, 5.0]), Some(TensorLayout::CHW),
823 Some(ColorOrder::RGB),
824 )?;
825 let bgr = NormalizeImage::with_color_order(
826 Some(1.0),
827 Some(vec![3.0, 2.0, 1.0]), Some(vec![5.0, 4.0, 2.0]), Some(TensorLayout::CHW),
830 Some(ColorOrder::BGR),
831 )?;
832
833 let rgb_out = rgb.apply(vec![DynamicImage::ImageRgb8(img.clone())]);
834 let bgr_out = bgr.apply(vec![DynamicImage::ImageRgb8(img)]);
835
836 assert_eq!(rgb_out[0], vec![5.0, 5.0, 6.0]); assert_eq!(bgr_out[0], vec![6.0, 5.0, 5.0]); Ok(())
839 }
840}