ruvector-scipix 2.0.4

Rust OCR engine for scientific documents - extract LaTeX, MathML from math equations, research papers, and technical diagrams with ONNX GPU acceleration
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
//! Text region detection and segmentation

use super::{RegionType, Result, TextRegion};
use image::GrayImage;
use std::collections::{HashMap, HashSet};

/// Find text regions in a binary or grayscale image
///
/// Uses connected component analysis and geometric heuristics to identify
/// text regions and classify them by type (text, math, table, etc.)
///
/// # Arguments
/// * `image` - Input grayscale or binary image
/// * `min_region_size` - Minimum region area in pixels
///
/// # Returns
/// Vector of detected text regions with bounding boxes
///
/// # Example
/// ```no_run
/// use ruvector_scipix::preprocess::segmentation::find_text_regions;
/// # use image::GrayImage;
/// # let image = GrayImage::new(100, 100);
/// let regions = find_text_regions(&image, 100).unwrap();
/// println!("Found {} regions", regions.len());
/// ```
pub fn find_text_regions(image: &GrayImage, min_region_size: u32) -> Result<Vec<TextRegion>> {
    // Find connected components
    let components = connected_components(image);

    // Extract bounding boxes for each component
    let bboxes = extract_bounding_boxes(&components);

    // Filter by size and merge overlapping regions
    let filtered = filter_by_size(bboxes, min_region_size);
    let merged = merge_overlapping_regions(filtered, 10);

    // Find text lines and group components
    let text_lines = find_text_lines(image, &merged);

    // Classify regions and create TextRegion objects
    let regions = classify_regions(image, text_lines);

    Ok(regions)
}

/// Connected component labeling using flood-fill algorithm
///
/// Returns labeled image where each connected component has a unique ID
fn connected_components(image: &GrayImage) -> Vec<Vec<u32>> {
    let (width, height) = image.dimensions();
    let mut labels = vec![vec![0u32; width as usize]; height as usize];
    let mut current_label = 1u32;

    for y in 0..height {
        for x in 0..width {
            if labels[y as usize][x as usize] == 0 && image.get_pixel(x, y)[0] < 128 {
                // Found unlabeled foreground pixel, start flood fill
                flood_fill(image, &mut labels, x, y, current_label);
                current_label += 1;
            }
        }
    }

    labels
}

/// Flood fill algorithm for connected component labeling
fn flood_fill(image: &GrayImage, labels: &mut [Vec<u32>], start_x: u32, start_y: u32, label: u32) {
    let (width, height) = image.dimensions();
    let mut stack = vec![(start_x, start_y)];

    while let Some((x, y)) = stack.pop() {
        if x >= width || y >= height {
            continue;
        }

        if labels[y as usize][x as usize] != 0 || image.get_pixel(x, y)[0] >= 128 {
            continue;
        }

        labels[y as usize][x as usize] = label;

        // Add 4-connected neighbors
        if x > 0 {
            stack.push((x - 1, y));
        }
        if x < width - 1 {
            stack.push((x + 1, y));
        }
        if y > 0 {
            stack.push((x, y - 1));
        }
        if y < height - 1 {
            stack.push((x, y + 1));
        }
    }
}

/// Extract bounding boxes for each labeled component
fn extract_bounding_boxes(labels: &[Vec<u32>]) -> HashMap<u32, (u32, u32, u32, u32)> {
    let mut bboxes: HashMap<u32, (u32, u32, u32, u32)> = HashMap::new();

    for (y, row) in labels.iter().enumerate() {
        for (x, &label) in row.iter().enumerate() {
            if label == 0 {
                continue;
            }

            let bbox = bboxes
                .entry(label)
                .or_insert((x as u32, y as u32, x as u32, y as u32));

            // Update bounding box
            bbox.0 = bbox.0.min(x as u32); // min_x
            bbox.1 = bbox.1.min(y as u32); // min_y
            bbox.2 = bbox.2.max(x as u32); // max_x
            bbox.3 = bbox.3.max(y as u32); // max_y
        }
    }

    // Convert to (x, y, width, height) format
    bboxes
        .into_iter()
        .map(|(label, (min_x, min_y, max_x, max_y))| {
            let width = max_x - min_x + 1;
            let height = max_y - min_y + 1;
            (label, (min_x, min_y, width, height))
        })
        .collect()
}

/// Filter regions by minimum size
fn filter_by_size(
    bboxes: HashMap<u32, (u32, u32, u32, u32)>,
    min_size: u32,
) -> Vec<(u32, u32, u32, u32)> {
    bboxes
        .into_values()
        .filter(|(_, _, w, h)| w * h >= min_size)
        .collect()
}

/// Merge overlapping or nearby regions
///
/// # Arguments
/// * `regions` - Vector of bounding boxes (x, y, width, height)
/// * `merge_distance` - Maximum distance to merge regions
pub fn merge_overlapping_regions(
    regions: Vec<(u32, u32, u32, u32)>,
    merge_distance: u32,
) -> Vec<(u32, u32, u32, u32)> {
    if regions.is_empty() {
        return regions;
    }

    let mut merged = Vec::new();
    let mut used = HashSet::new();

    for i in 0..regions.len() {
        if used.contains(&i) {
            continue;
        }

        let mut current = regions[i];
        let mut changed = true;

        while changed {
            changed = false;

            for j in (i + 1)..regions.len() {
                if used.contains(&j) {
                    continue;
                }

                if boxes_overlap_or_close(&current, &regions[j], merge_distance) {
                    current = merge_boxes(&current, &regions[j]);
                    used.insert(j);
                    changed = true;
                }
            }
        }

        merged.push(current);
        used.insert(i);
    }

    merged
}

/// Check if two bounding boxes overlap or are close
fn boxes_overlap_or_close(
    box1: &(u32, u32, u32, u32),
    box2: &(u32, u32, u32, u32),
    distance: u32,
) -> bool {
    let (x1, y1, w1, h1) = *box1;
    let (x2, y2, w2, h2) = *box2;

    let x1_end = x1 + w1;
    let y1_end = y1 + h1;
    let x2_end = x2 + w2;
    let y2_end = y2 + h2;

    // Check for overlap or proximity
    let x_overlap = (x1 <= x2_end + distance) && (x2 <= x1_end + distance);
    let y_overlap = (y1 <= y2_end + distance) && (y2 <= y1_end + distance);

    x_overlap && y_overlap
}

/// Merge two bounding boxes
fn merge_boxes(box1: &(u32, u32, u32, u32), box2: &(u32, u32, u32, u32)) -> (u32, u32, u32, u32) {
    let (x1, y1, w1, h1) = *box1;
    let (x2, y2, w2, h2) = *box2;

    let min_x = x1.min(x2);
    let min_y = y1.min(y2);
    let max_x = (x1 + w1).max(x2 + w2);
    let max_y = (y1 + h1).max(y2 + h2);

    (min_x, min_y, max_x - min_x, max_y - min_y)
}

/// Find text lines using projection profiles
///
/// Groups regions into lines based on vertical alignment
pub fn find_text_lines(
    _image: &GrayImage,
    regions: &[(u32, u32, u32, u32)],
) -> Vec<Vec<(u32, u32, u32, u32)>> {
    if regions.is_empty() {
        return Vec::new();
    }

    // Sort regions by y-coordinate
    let mut sorted_regions = regions.to_vec();
    sorted_regions.sort_by_key(|r| r.1);

    let mut lines = Vec::new();
    let mut current_line = vec![sorted_regions[0]];

    for region in sorted_regions.iter().skip(1) {
        let (_, y, _, h) = region;
        let (_, prev_y, _, prev_h) = current_line.last().unwrap();

        // Check if region is on the same line (vertical overlap)
        let line_height = (*prev_h).max(*h);
        let distance = if y > prev_y { y - prev_y } else { prev_y - y };

        if distance < line_height / 2 {
            current_line.push(*region);
        } else {
            lines.push(current_line.clone());
            current_line = vec![*region];
        }
    }

    if !current_line.is_empty() {
        lines.push(current_line);
    }

    lines
}

/// Classify regions by type (text, math, table, etc.)
fn classify_regions(
    image: &GrayImage,
    text_lines: Vec<Vec<(u32, u32, u32, u32)>>,
) -> Vec<TextRegion> {
    let mut regions = Vec::new();

    for line in text_lines {
        for bbox in line {
            let (x, y, width, height) = bbox;

            // Calculate features for classification
            let aspect_ratio = width as f32 / height as f32;
            let density = calculate_density(image, bbox);

            // Simple heuristic classification
            let region_type = if aspect_ratio > 10.0 {
                // Very wide region might be a table or figure caption
                RegionType::Table
            } else if aspect_ratio < 0.5 && height > 50 {
                // Tall region might be a figure
                RegionType::Figure
            } else if density > 0.3 && height < 30 {
                // Dense, small region likely math
                RegionType::Math
            } else {
                // Default to text
                RegionType::Text
            };

            regions.push(TextRegion {
                region_type,
                bbox: (x, y, width, height),
                confidence: 0.8, // Default confidence
                text_height: height as f32,
                baseline_angle: 0.0,
            });
        }
    }

    regions
}

/// Calculate pixel density in a region
fn calculate_density(image: &GrayImage, bbox: (u32, u32, u32, u32)) -> f32 {
    let (x, y, width, height) = bbox;
    let total_pixels = (width * height) as f32;

    if total_pixels == 0.0 {
        return 0.0;
    }

    let mut foreground_pixels = 0;

    for py in y..(y + height) {
        for px in x..(x + width) {
            if image.get_pixel(px, py)[0] < 128 {
                foreground_pixels += 1;
            }
        }
    }

    foreground_pixels as f32 / total_pixels
}

#[cfg(test)]
mod tests {
    use super::*;
    use image::Luma;

    fn create_test_image_with_rectangles() -> GrayImage {
        let mut img = GrayImage::new(200, 200);

        // Fill with white
        for pixel in img.pixels_mut() {
            *pixel = Luma([255]);
        }

        // Draw some black rectangles (simulating text regions)
        for y in 20..40 {
            for x in 20..100 {
                img.put_pixel(x, y, Luma([0]));
            }
        }

        for y in 60..80 {
            for x in 20..120 {
                img.put_pixel(x, y, Luma([0]));
            }
        }

        for y in 100..120 {
            for x in 20..80 {
                img.put_pixel(x, y, Luma([0]));
            }
        }

        img
    }

    #[test]
    fn test_find_text_regions() {
        let img = create_test_image_with_rectangles();
        let regions = find_text_regions(&img, 100);

        assert!(regions.is_ok());
        let r = regions.unwrap();

        // Should find at least 3 regions
        assert!(r.len() >= 3);

        for region in r {
            println!("Region: {:?} at {:?}", region.region_type, region.bbox);
        }
    }

    #[test]
    fn test_connected_components() {
        let img = create_test_image_with_rectangles();
        let components = connected_components(&img);

        // Check that we have non-zero labels
        let max_label = components
            .iter()
            .flat_map(|row| row.iter())
            .max()
            .unwrap_or(&0);

        assert!(*max_label > 0);
    }

    #[test]
    fn test_merge_overlapping_regions() {
        let regions = vec![(10, 10, 50, 20), (40, 10, 50, 20), (100, 100, 30, 30)];

        let merged = merge_overlapping_regions(regions, 10);

        // First two should merge, third stays separate
        assert_eq!(merged.len(), 2);
    }

    #[test]
    fn test_merge_boxes() {
        let box1 = (10, 10, 50, 20);
        let box2 = (40, 15, 30, 25);

        let merged = merge_boxes(&box1, &box2);

        assert_eq!(merged.0, 10); // min x
        assert_eq!(merged.1, 10); // min y
        assert!(merged.2 >= 50); // width
        assert!(merged.3 >= 25); // height
    }

    #[test]
    fn test_boxes_overlap() {
        let box1 = (10, 10, 50, 20);
        let box2 = (40, 10, 50, 20);

        assert!(boxes_overlap_or_close(&box1, &box2, 0));
        assert!(boxes_overlap_or_close(&box1, &box2, 10));
    }

    #[test]
    fn test_boxes_dont_overlap() {
        let box1 = (10, 10, 20, 20);
        let box2 = (100, 100, 20, 20);

        assert!(!boxes_overlap_or_close(&box1, &box2, 0));
    }

    #[test]
    fn test_find_text_lines() {
        let regions = vec![
            (10, 10, 50, 20),
            (70, 12, 50, 20),
            (10, 50, 50, 20),
            (70, 52, 50, 20),
        ];

        let img = GrayImage::new(200, 100);
        let lines = find_text_lines(&img, &regions);

        // Should find 2 lines
        assert_eq!(lines.len(), 2);
        assert_eq!(lines[0].len(), 2);
        assert_eq!(lines[1].len(), 2);
    }

    #[test]
    fn test_calculate_density() {
        let mut img = GrayImage::new(100, 100);

        // Fill region with 50% black pixels
        for y in 10..30 {
            for x in 10..30 {
                let val = if (x + y) % 2 == 0 { 0 } else { 255 };
                img.put_pixel(x, y, Luma([val]));
            }
        }

        let density = calculate_density(&img, (10, 10, 20, 20));
        assert!((density - 0.5).abs() < 0.1);
    }

    #[test]
    fn test_filter_by_size() {
        let mut bboxes = HashMap::new();
        bboxes.insert(1, (10, 10, 50, 50)); // 2500 pixels
        bboxes.insert(2, (100, 100, 10, 10)); // 100 pixels
        bboxes.insert(3, (200, 200, 30, 30)); // 900 pixels

        let filtered = filter_by_size(bboxes, 500);

        // Should keep regions 1 and 3
        assert_eq!(filtered.len(), 2);
    }
}