Skip to main content

fpo_rust/
process.rs

1//! Image pre-processing and model-output post-processing.
2
3use crate::config::{ImageColorMode, ImageInterpolation, PaddingColor, PlateConfig};
4use anyhow::{bail, Context};
5use image::{
6    imageops::{self, FilterType},
7    DynamicImage, GrayImage, ImageBuffer, Luma, Rgb, RgbImage,
8};
9use std::path::Path;
10
11// ---------------------------------------------------------------------------
12// Image reading
13// ---------------------------------------------------------------------------
14
15/// Read an image from disk in the colour mode specified by `config`.
16///
17/// Returns a `DynamicImage` whose colour space matches the config:
18/// - `Grayscale` → `DynamicImage::ImageLuma8`
19/// - `Rgb`       → `DynamicImage::ImageRgb8`
20pub fn read_plate_image(
21    path: impl AsRef<Path>,
22    color_mode: &ImageColorMode,
23) -> anyhow::Result<DynamicImage> {
24    let img = image::open(path.as_ref())
25        .with_context(|| format!("Cannot open image: {}", path.as_ref().display()))?;
26
27    let out = match color_mode {
28        ImageColorMode::Grayscale => DynamicImage::ImageLuma8(img.to_luma8()),
29        ImageColorMode::Rgb => DynamicImage::ImageRgb8(img.to_rgb8()),
30    };
31    Ok(out)
32}
33
34// ---------------------------------------------------------------------------
35// Interpolation helpers
36// ---------------------------------------------------------------------------
37
38fn to_filter(interp: &ImageInterpolation) -> FilterType {
39    match interp {
40        ImageInterpolation::Nearest => FilterType::Nearest,
41        ImageInterpolation::Linear => FilterType::Triangle,
42        ImageInterpolation::Cubic => FilterType::CatmullRom,
43        ImageInterpolation::Area => FilterType::Lanczos3, // no "area" in `image`
44        ImageInterpolation::Lanczos4 => FilterType::Lanczos3,
45    }
46}
47
48// ---------------------------------------------------------------------------
49// Resizing
50// ---------------------------------------------------------------------------
51
52/// Resize a dynamic image to `(target_w, target_h)`, honouring the config options.
53///
54/// When `keep_aspect_ratio` is true the image is letter-boxed with `padding_color`.
55/// The output always has the colour space implied by `color_mode`.
56pub fn resize_image(
57    img: DynamicImage,
58    target_h: u32,
59    target_w: u32,
60    color_mode: &ImageColorMode,
61    keep_aspect_ratio: bool,
62    interp: &ImageInterpolation,
63    padding_color: &PaddingColor,
64) -> anyhow::Result<DynamicImage> {
65    let filter = to_filter(interp);
66
67    if !keep_aspect_ratio {
68        let resized = img.resize_exact(target_w, target_h, filter);
69        return Ok(match color_mode {
70            ImageColorMode::Grayscale => DynamicImage::ImageLuma8(resized.to_luma8()),
71            ImageColorMode::Rgb => DynamicImage::ImageRgb8(resized.to_rgb8()),
72        });
73    }
74
75    // --- letter-box ---
76    let (orig_w, orig_h) = (img.width(), img.height());
77    let scale = (target_w as f64 / orig_w as f64).min(target_h as f64 / orig_h as f64);
78    let new_w = (orig_w as f64 * scale).round() as u32;
79    let new_h = (orig_h as f64 * scale).round() as u32;
80
81    let resized = img.resize_exact(new_w, new_h, filter);
82
83    let pad_left = ((target_w - new_w) as f64 / 2.0 - 0.1).round() as u32;
84    let pad_top = ((target_h - new_h) as f64 / 2.0 - 0.1).round() as u32;
85
86    match color_mode {
87        ImageColorMode::Grayscale => {
88            let fill = Luma([padding_color.as_gray()]);
89            let mut canvas: GrayImage = ImageBuffer::from_pixel(target_w, target_h, fill);
90            imageops::overlay(&mut canvas, &resized.to_luma8(), pad_left as i64, pad_top as i64);
91            Ok(DynamicImage::ImageLuma8(canvas))
92        }
93        ImageColorMode::Rgb => {
94            let [r, g, b] = padding_color.as_rgb();
95            let fill = Rgb([r, g, b]);
96            let mut canvas: RgbImage = ImageBuffer::from_pixel(target_w, target_h, fill);
97            imageops::overlay(&mut canvas, &resized.to_rgb8(), pad_left as i64, pad_top as i64);
98            Ok(DynamicImage::ImageRgb8(canvas))
99        }
100    }
101}
102
103/// Convenience wrapper: read an image from disk and resize it.
104pub fn read_and_resize_plate_image(
105    path: impl AsRef<Path>,
106    cfg: &PlateConfig,
107) -> anyhow::Result<DynamicImage> {
108    let img = read_plate_image(path, &cfg.image_color_mode)?;
109    resize_image(
110        img,
111        cfg.img_height,
112        cfg.img_width,
113        &cfg.image_color_mode,
114        cfg.keep_aspect_ratio,
115        &cfg.interpolation,
116        &cfg.padding_color,
117    )
118}
119
120// ---------------------------------------------------------------------------
121// Pre-processing: image(s) → flat u8 tensor (N, H, W, C)
122// ---------------------------------------------------------------------------
123
124/// Convert a `DynamicImage` into a flat `Vec<u8>` in `(H, W, C)` order.
125pub fn image_to_hwc(img: &DynamicImage, color_mode: &ImageColorMode) -> Vec<u8> {
126    match color_mode {
127        ImageColorMode::Grayscale => img.to_luma8().into_raw(),
128        ImageColorMode::Rgb => img.to_rgb8().into_raw(),
129    }
130}
131
132/// Build the ONNX-ready `u8` tensor `(N, H, W, C)` from a batch of images.
133///
134/// Each image must already have the correct `(H, W)` dimensions.
135pub fn images_to_batch(imgs: &[DynamicImage], cfg: &PlateConfig) -> Vec<u8> {
136    imgs.iter()
137        .flat_map(|img| image_to_hwc(img, &cfg.image_color_mode))
138        .collect()
139}
140
141// ---------------------------------------------------------------------------
142// Post-processing: raw model output → predictions
143// ---------------------------------------------------------------------------
144
145/// Per-image plate prediction returned by the recognizer.
146#[derive(Debug, Clone)]
147pub struct PlatePrediction {
148    /// Decoded license-plate string.
149    pub plate: String,
150    /// Per-character confidence scores (present when `return_confidence = true`).
151    pub char_probs: Option<Vec<f32>>,
152    /// Predicted region / country label (present when the model has a region head).
153    pub region: Option<String>,
154    /// Probability for the predicted region (present together with `region` when
155    /// `return_confidence = true`).
156    pub region_prob: Option<f32>,
157}
158
159/// Decode the raw plate-head output tensor into `PlatePrediction` values.
160///
161/// # Parameters
162/// * `model_output`     – flat f32 slice of shape `(N * max_plate_slots * vocab_size)`.
163/// * `n`                – batch size.
164/// * `max_plate_slots`  – number of character positions.
165/// * `alphabet`         – the model's character set.
166/// * `pad_char`         – padding character to strip from the right.
167/// * `remove_pad_char`  – whether to strip trailing `pad_char`.
168/// * `return_confidence`– include per-character probabilities.
169/// * `region_output`    – optional flat f32 slice `(N * num_regions)` (already softmaxed).
170/// * `region_labels`    – label list for the region head.
171pub fn postprocess_output(
172    model_output: &[f32],
173    n: usize,
174    max_plate_slots: usize,
175    alphabet: &str,
176    pad_char: char,
177    remove_pad_char: bool,
178    return_confidence: bool,
179    region_output: Option<&[f32]>,
180    region_labels: Option<&[String]>,
181) -> anyhow::Result<Vec<PlatePrediction>> {
182    let vocab_size = alphabet.chars().count();
183    if model_output.len() != n * max_plate_slots * vocab_size {
184        bail!(
185            "Unexpected model output length: got {}, expected {} (n={n}, slots={max_plate_slots}, vocab={vocab_size})",
186            model_output.len(),
187            n * max_plate_slots * vocab_size
188        );
189    }
190
191    let chars: Vec<char> = alphabet.chars().collect();
192    let mut results = Vec::with_capacity(n);
193
194    for i in 0..n {
195        let sample = &model_output[i * max_plate_slots * vocab_size..(i + 1) * max_plate_slots * vocab_size];
196
197        let mut plate = String::with_capacity(max_plate_slots);
198        let mut probs = if return_confidence {
199            Some(Vec::with_capacity(max_plate_slots))
200        } else {
201            None
202        };
203
204        for slot in 0..max_plate_slots {
205            let logits = &sample[slot * vocab_size..(slot + 1) * vocab_size];
206            let (best_idx, &best_val) = logits
207                .iter()
208                .enumerate()
209                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
210                .unwrap();
211            plate.push(chars[best_idx]);
212            if let Some(ref mut p) = probs {
213                p.push(best_val);
214            }
215        }
216
217        if remove_pad_char {
218            while plate.ends_with(pad_char) {
219                plate.pop();
220            }
221        }
222
223        // Region
224        let (region, region_prob) = match (region_output, region_labels) {
225            (Some(ro), Some(rl)) => {
226                let num_regions = rl.len();
227                if num_regions == 0 {
228                    (None, None)
229                } else {
230                    let rsample = &ro[i * num_regions..(i + 1) * num_regions];
231                    let (ridx, &rval) = rsample
232                        .iter()
233                        .enumerate()
234                        .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
235                        .unwrap();
236                    let label = rl.get(ridx).map(|s| s.clone());
237                    let prob = if return_confidence { Some(rval) } else { None };
238                    (label, prob)
239                }
240            }
241            _ => (None, None),
242        };
243
244        results.push(PlatePrediction {
245            plate,
246            char_probs: probs,
247            region,
248            region_prob,
249        });
250    }
251
252    Ok(results)
253}