leptonica 0.1.0

Rust port of Leptonica image processing library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
//! Text line detection for dewarping
//!
//! This module provides functionality to detect text line centers
//! in a binary image. These centers are used to build the disparity model.

use crate::core::{Pix, PixelDepth};
use crate::morph::{close_brick, erode_brick, open_brick};
use crate::recog::{RecogError, RecogResult};
use crate::region::{ConnectivityType, find_connected_components};

use super::types::TextLine;

/// Find the centers of text lines in a binary image
///
/// This function identifies text lines and returns points along their centers.
/// These points are used to build the vertical disparity model.
///
/// # Arguments
///
/// * `pix` - Input binary image (1 bpp)
///
/// # Returns
///
/// A vector of `TextLine` objects, each containing points along the center
/// of a text line.
pub fn find_textline_centers(pix: &Pix) -> RecogResult<Vec<TextLine>> {
    if pix.depth() != PixelDepth::Bit1 {
        return Err(RecogError::UnsupportedDepth {
            expected: "1 bpp",
            actual: pix.depth().bits(),
        });
    }

    let w = pix.width();
    let _h = pix.height();

    // Filter to solidify text lines within x-height region
    // and remove ascenders/descenders
    // Step 1: Small vertical opening to remove noise
    let pix1 = open_brick(pix, 1, 3)?;

    // Step 2: Small closing to bridge gaps between letters
    let csize1 = (w / 80).max(15);
    let pix2 = close_brick(&pix1, csize1, 1)?;

    // Step 3: Opening to remove thin connections
    let pix3 = open_brick(&pix2, csize1, 1)?;

    // Step 4: Large closing to bridge gaps between words
    let csize2 = (w / 30).max(40);
    let pix4 = close_brick(&pix3, csize2, 1)?;

    // Remove tall components (embedded images) by finding components
    // with long vertical runs
    let seed = erode_brick(&pix4, 1, 50)?;
    let tall_components = seed_fill_binary(&seed, &pix4)?;
    let filtered = xor_pix(&pix4, &tall_components)?;

    // Get connected components
    let components = find_connected_components(&filtered, ConnectivityType::EightWay)?;

    if components.is_empty() {
        return Ok(vec![]);
    }

    // Filter out small components (width < 100 or height < 4) and get centers
    let mut text_lines = Vec::new();

    for comp in components.iter() {
        let bx = comp.bounds.x;
        let by = comp.bounds.y;
        let bw = comp.bounds.w as u32;
        let bh = comp.bounds.h as u32;

        if bw < 100 || bh < 4 {
            continue;
        }

        // Get the weighted center of each vertical column for this component
        // We scan the original filtered image within the bounding box
        let centers = get_mean_verticals_from_box(&filtered, bx, by, bw, bh);
        if !centers.is_empty() {
            text_lines.push(TextLine::new(centers));
        }
    }

    Ok(text_lines)
}

/// Get the weighted center of each vertical column within a bounding box
///
/// For each x-coordinate in the bounding box, compute the centroid y-coordinate
/// of all foreground pixels in that column.
fn get_mean_verticals_from_box(pix: &Pix, bx: i32, by: i32, bw: u32, bh: u32) -> Vec<(f32, f32)> {
    let mut centers = Vec::with_capacity(bw as usize);
    let img_w = pix.width();
    let img_h = pix.height();

    // Iterate over columns in the bounding box
    for dx in 0..bw {
        let x = (bx + dx as i32) as u32;
        if x >= img_w {
            continue;
        }

        let mut sum_y = 0u32;
        let mut count = 0u32;

        for dy in 0..bh {
            let y = (by + dy as i32) as u32;
            if y >= img_h {
                continue;
            }

            // Check if this pixel is foreground
            let pixel = pix.get_pixel_unchecked(x, y);
            if pixel != 0 {
                sum_y += y;
                count += 1;
            }
        }

        if count > 0 {
            let mean_y = (sum_y as f32) / (count as f32);
            centers.push((x as f32, mean_y));
        }
    }

    centers
}

/// Perform seed fill (binary reconstruction)
///
/// Grow the seed under the constraint of the mask until no changes occur.
fn seed_fill_binary(seed: &Pix, mask: &Pix) -> RecogResult<Pix> {
    let w = seed.width();
    let h = seed.height();

    if w != mask.width() || h != mask.height() {
        return Err(RecogError::InvalidParameter(
            "seed and mask must have same dimensions".to_string(),
        ));
    }

    // Clone seed as starting point
    let result = seed.deep_clone();
    let mut result_mut = result.try_into_mut().unwrap();

    // Iterate until no changes
    let max_iterations = (w + h) as usize; // Maximum iterations needed
    for _ in 0..max_iterations {
        let mut changed = false;

        // Dilate and AND with mask
        for y in 0..h {
            for x in 0..w {
                if result_mut.get_pixel_unchecked(x, y) == 0 {
                    // Check if any 8-connected neighbor is set
                    let mut has_neighbor = false;
                    for dy in -1i32..=1 {
                        for dx in -1i32..=1 {
                            if dx == 0 && dy == 0 {
                                continue;
                            }
                            let nx = x as i32 + dx;
                            let ny = y as i32 + dy;
                            if nx >= 0
                                && nx < w as i32
                                && ny >= 0
                                && ny < h as i32
                                && result_mut.get_pixel_unchecked(nx as u32, ny as u32) != 0
                            {
                                has_neighbor = true;
                                break;
                            }
                        }
                        if has_neighbor {
                            break;
                        }
                    }

                    // If has neighbor and mask is set, fill
                    if has_neighbor && mask.get_pixel_unchecked(x, y) != 0 {
                        result_mut.set_pixel_unchecked(x, y, 1);
                        changed = true;
                    }
                }
            }
        }

        if !changed {
            break;
        }
    }

    Ok(result_mut.into())
}

/// XOR two binary images
fn xor_pix(pix1: &Pix, pix2: &Pix) -> RecogResult<Pix> {
    let w = pix1.width();
    let h = pix1.height();

    if w != pix2.width() || h != pix2.height() {
        return Err(RecogError::InvalidParameter(
            "images must have same dimensions".to_string(),
        ));
    }

    let result = Pix::new(w, h, PixelDepth::Bit1)?;
    let mut result_mut = result.try_into_mut().unwrap();

    for y in 0..h {
        for x in 0..w {
            let v1 = pix1.get_pixel_unchecked(x, y);
            let v2 = pix2.get_pixel_unchecked(x, y);
            result_mut.set_pixel_unchecked(x, y, v1 ^ v2);
        }
    }

    Ok(result_mut.into())
}

/// Remove short lines from the list
///
/// Lines shorter than `min_fraction` of the longest line are removed.
///
/// # Arguments
///
/// * `lines` - Vector of text lines
/// * `min_fraction` - Minimum fraction of longest line (typically 0.8)
///
/// # Returns
///
/// Filtered vector of text lines
pub fn remove_short_lines(lines: Vec<TextLine>, min_fraction: f32) -> Vec<TextLine> {
    if lines.is_empty() {
        return lines;
    }

    // Find the longest line
    let max_extent = lines
        .iter()
        .map(|l| l.horizontal_extent())
        .fold(0.0f32, f32::max);

    let min_extent = max_extent * min_fraction;

    // Keep only lines at least min_extent long
    lines
        .into_iter()
        .filter(|l| l.horizontal_extent() >= min_extent)
        .collect()
}

/// Check if lines have valid coverage of the image height
///
/// Returns true if there are enough lines in both the top and bottom halves.
pub fn is_line_coverage_valid(lines: &[TextLine], image_height: u32, min_lines: u32) -> bool {
    if lines.len() < min_lines as usize {
        return false;
    }

    let mid_y = (image_height / 2) as f32;
    let mut n_top = 0;
    let mut n_bot = 0;

    for line in lines {
        if let Some(y) = line.mid_y() {
            if y < mid_y {
                n_top += 1;
            } else {
                n_bot += 1;
            }
        }
    }

    // Need at least 3 lines in each half
    n_top >= 3 && n_bot >= 3
}

/// Sort lines by their vertical position (top to bottom)
pub fn sort_lines_by_y(lines: &mut [TextLine]) {
    lines.sort_by(|a, b| {
        let ya = a.mid_y().unwrap_or(0.0);
        let yb = b.mid_y().unwrap_or(0.0);
        ya.partial_cmp(&yb).unwrap_or(std::cmp::Ordering::Equal)
    });
}

/// Estimate the predominant text-line flow direction of an image.
///
/// Detects text lines and fits a line through their mid-points to determine
/// the overall angle at which text flows across the page.
///
/// # Arguments
///
/// * `pix` - Input binary image (1 bpp)
///
/// # Returns
///
/// Angle in radians (0.0 = horizontal left-to-right text,
/// positive = text tilted counter-clockwise).
///
/// Returns 0.0 if not enough text lines are found.
pub fn pix_find_textline_flow_direction(pix: &Pix) -> RecogResult<f32> {
    if pix.depth() != PixelDepth::Bit1 {
        return Err(RecogError::UnsupportedDepth {
            expected: "1 bpp",
            actual: pix.depth().bits(),
        });
    }

    let lines = find_textline_centers(pix)?;
    if lines.len() < 2 {
        return Ok(0.0);
    }

    // Collect (x_mid, y_mid) of each line
    let points: Vec<(f64, f64)> = lines
        .iter()
        .filter_map(|l| {
            let n = l.points.len();
            if n == 0 {
                return None;
            }
            Some((l.points[n / 2].0 as f64, l.points[n / 2].1 as f64))
        })
        .collect();

    if points.len() < 2 {
        return Ok(0.0);
    }

    // Linear regression: y = m*x + b
    let n = points.len() as f64;
    let sx: f64 = points.iter().map(|(x, _)| x).sum();
    let sy: f64 = points.iter().map(|(_, y)| y).sum();
    let sxx: f64 = points.iter().map(|(x, _)| x * x).sum();
    let sxy: f64 = points.iter().map(|(x, y)| x * y).sum();

    let det = n * sxx - sx * sx;
    if det.abs() < 1e-10 {
        return Ok(0.0);
    }

    let slope = (n * sxy - sx * sy) / det;
    Ok(slope.atan() as f32)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn create_test_line(y: f32, x_start: f32, x_end: f32) -> TextLine {
        let mut points = Vec::new();
        let mut x = x_start;
        while x <= x_end {
            points.push((x, y));
            x += 10.0;
        }
        TextLine::new(points)
    }

    #[test]
    fn test_remove_short_lines() {
        let lines = vec![
            create_test_line(10.0, 0.0, 100.0), // extent = 100
            create_test_line(30.0, 0.0, 50.0),  // extent = 50 (short, < 100)
            create_test_line(50.0, 0.0, 90.0),  // extent = 90 (short, < 100)
            create_test_line(70.0, 0.0, 200.0), // extent = 200 (longest)
        ];

        let filtered = remove_short_lines(lines, 0.5);

        // Should keep lines with extent >= 100 (50% of 200)
        // Lines with extents 100 and 200 are kept
        assert_eq!(filtered.len(), 2);
    }

    #[test]
    fn test_remove_short_lines_empty() {
        let lines: Vec<TextLine> = vec![];
        let filtered = remove_short_lines(lines, 0.8);
        assert!(filtered.is_empty());
    }

    #[test]
    fn test_is_line_coverage_valid() {
        let lines = vec![
            create_test_line(50.0, 0.0, 100.0),
            create_test_line(100.0, 0.0, 100.0),
            create_test_line(150.0, 0.0, 100.0),
            create_test_line(200.0, 0.0, 100.0),
            create_test_line(250.0, 0.0, 100.0),
            create_test_line(300.0, 0.0, 100.0),
            create_test_line(350.0, 0.0, 100.0),
            create_test_line(400.0, 0.0, 100.0),
        ];

        // Image height 500: mid = 250
        // Lines at y < 250: 50, 100, 150, 200 (4 lines in top half)
        // Lines at y >= 250: 250, 300, 350, 400 (4 lines in bottom half)
        assert!(is_line_coverage_valid(&lines, 500, 6));
    }

    #[test]
    fn test_is_line_coverage_invalid_not_enough_lines() {
        let lines = vec![
            create_test_line(50.0, 0.0, 100.0),
            create_test_line(100.0, 0.0, 100.0),
        ];

        assert!(!is_line_coverage_valid(&lines, 500, 6));
    }

    #[test]
    fn test_pix_find_textline_flow_direction_empty() {
        let pix = Pix::new(100, 100, PixelDepth::Bit1).unwrap();
        let result = pix_find_textline_flow_direction(&pix);
        assert!(result.is_ok());
        // Empty image with no text lines → fallback to 0.0 radians (horizontal)
        assert!((result.unwrap() - 0.0).abs() < 0.1);
    }

    #[test]
    fn test_sort_lines_by_y() {
        let mut lines = vec![
            create_test_line(100.0, 0.0, 100.0),
            create_test_line(50.0, 0.0, 100.0),
            create_test_line(200.0, 0.0, 100.0),
            create_test_line(75.0, 0.0, 100.0),
        ];

        sort_lines_by_y(&mut lines);

        assert_eq!(lines[0].mid_y(), Some(50.0));
        assert_eq!(lines[1].mid_y(), Some(75.0));
        assert_eq!(lines[2].mid_y(), Some(100.0));
        assert_eq!(lines[3].mid_y(), Some(200.0));
    }
}