Skip to main content

superbook_pdf/margin/
group.rs

1//! Group Crop Region Analysis
2//!
3//! Implements Tukey fence outlier removal for unified crop regions
4//! across multiple pages.
5
6use super::types::{ContentRect, MarginError, Result};
7use rayon::prelude::*;
8use std::path::{Path, PathBuf};
9
10// ============================================================
11// Constants
12// ============================================================
13
14/// Tukey fence parameter (k value)
15const TUKEY_K: f64 = 1.5;
16
17/// Minimum inlier ratio before falling back to all data
18const MIN_INLIER_RATIO: f64 = 0.5;
19
20/// Minimum inlier count before falling back
21const MIN_INLIER_COUNT: usize = 3;
22
23// ============================================================
24// Data Structures
25// ============================================================
26
27/// Bounding box with page information
28#[derive(Debug, Clone)]
29pub struct PageBoundingBox {
30    /// Page number (1-indexed)
31    pub page_number: usize,
32    /// Bounding box rectangle
33    pub bounding_box: ContentRect,
34    /// Whether this is an odd page
35    pub is_odd: bool,
36}
37
38impl PageBoundingBox {
39    /// Create new page bounding box
40    pub fn new(page_number: usize, bounding_box: ContentRect) -> Self {
41        Self {
42            page_number,
43            bounding_box,
44            is_odd: page_number % 2 == 1,
45        }
46    }
47
48    /// Check if bounding box is valid (non-zero area)
49    pub fn is_valid(&self) -> bool {
50        self.bounding_box.width > 0 && self.bounding_box.height > 0
51    }
52
53    /// Get the right edge coordinate
54    pub fn right(&self) -> u32 {
55        self.bounding_box.x + self.bounding_box.width
56    }
57
58    /// Get the bottom edge coordinate
59    pub fn bottom(&self) -> u32 {
60        self.bounding_box.y + self.bounding_box.height
61    }
62}
63
64/// Group crop region result
65#[derive(Debug, Clone, Default)]
66pub struct GroupCropRegion {
67    /// Left edge
68    pub left: u32,
69    /// Top edge
70    pub top: u32,
71    /// Width
72    pub width: u32,
73    /// Height
74    pub height: u32,
75    /// Number of inlier pages used
76    pub inlier_count: usize,
77    /// Total pages in group
78    pub total_count: usize,
79}
80
81impl GroupCropRegion {
82    /// Check if the region is valid
83    pub fn is_valid(&self) -> bool {
84        self.width > 0 && self.height > 0
85    }
86
87    /// Get right edge
88    pub fn right(&self) -> u32 {
89        self.left + self.width
90    }
91
92    /// Get bottom edge
93    pub fn bottom(&self) -> u32 {
94        self.top + self.height
95    }
96
97    /// Convert to ContentRect
98    pub fn to_content_rect(&self) -> ContentRect {
99        ContentRect {
100            x: self.left,
101            y: self.top,
102            width: self.width,
103            height: self.height,
104        }
105    }
106}
107
108/// Unified crop regions for odd and even pages
109#[derive(Debug, Clone)]
110pub struct UnifiedCropRegions {
111    /// Crop region for odd pages
112    pub odd_region: GroupCropRegion,
113    /// Crop region for even pages
114    pub even_region: GroupCropRegion,
115}
116
117// ============================================================
118// Group Crop Analyzer
119// ============================================================
120
121/// Group crop region analyzer using Tukey fence for outlier removal
122pub struct GroupCropAnalyzer;
123
124impl GroupCropAnalyzer {
125    /// Decide the optimal crop region for a group of pages using Tukey fence
126    ///
127    /// Algorithm:
128    /// 1. Collect bounding boxes from all pages
129    /// 2. Calculate Q1, Q3, IQR for each edge (left, top, right, bottom)
130    /// 3. Apply Tukey fence (k=1.5) to identify outliers
131    /// 4. Remove pages where ANY edge is an outlier
132    /// 5. Calculate median of inliers for final crop region
133    pub fn decide_group_crop_region(bounding_boxes: &[PageBoundingBox]) -> GroupCropRegion {
134        // Validation
135        if bounding_boxes.is_empty() {
136            return GroupCropRegion::default();
137        }
138
139        // Filter out invalid bounding boxes (zero area)
140        let valid: Vec<&PageBoundingBox> = bounding_boxes.iter().filter(|b| b.is_valid()).collect();
141
142        if valid.is_empty() {
143            return GroupCropRegion::default();
144        }
145
146        // Extract and sort edge values
147        let mut lefts: Vec<u32> = valid.iter().map(|b| b.bounding_box.x).collect();
148        let mut tops: Vec<u32> = valid.iter().map(|b| b.bounding_box.y).collect();
149        let mut rights: Vec<u32> = valid.iter().map(|b| b.right()).collect();
150        let mut bottoms: Vec<u32> = valid.iter().map(|b| b.bottom()).collect();
151
152        lefts.sort_unstable();
153        tops.sort_unstable();
154        rights.sort_unstable();
155        bottoms.sort_unstable();
156
157        // Calculate quartiles and IQR for each edge
158        let (q1_l, q3_l, iqr_l) = Self::calculate_iqr(&lefts);
159        let (q1_t, q3_t, iqr_t) = Self::calculate_iqr(&tops);
160        let (q1_r, q3_r, iqr_r) = Self::calculate_iqr(&rights);
161        let (q1_b, q3_b, iqr_b) = Self::calculate_iqr(&bottoms);
162
163        // Identify inliers (pages where no edge is an outlier)
164        let inliers: Vec<&PageBoundingBox> = valid
165            .iter()
166            .filter(|b| {
167                !Self::is_outlier(b.bounding_box.x, q1_l, q3_l, iqr_l)
168                    && !Self::is_outlier(b.bounding_box.y, q1_t, q3_t, iqr_t)
169                    && !Self::is_outlier(b.right(), q1_r, q3_r, iqr_r)
170                    && !Self::is_outlier(b.bottom(), q1_b, q3_b, iqr_b)
171            })
172            .copied()
173            .collect();
174
175        // If too few inliers, fall back to using all valid data
176        let use_inliers = if inliers.len() >= MIN_INLIER_COUNT
177            && inliers.len() as f64 >= valid.len() as f64 * MIN_INLIER_RATIO
178        {
179            inliers
180        } else {
181            valid
182        };
183
184        // Calculate median for final crop region
185        let lefts: Vec<u32> = use_inliers.iter().map(|b| b.bounding_box.x).collect();
186        let tops: Vec<u32> = use_inliers.iter().map(|b| b.bounding_box.y).collect();
187        let rights: Vec<u32> = use_inliers.iter().map(|b| b.right()).collect();
188        let bottoms: Vec<u32> = use_inliers.iter().map(|b| b.bottom()).collect();
189
190        let left = Self::median_u32(&lefts);
191        let top = Self::median_u32(&tops);
192        let right = Self::median_u32(&rights);
193        let bottom = Self::median_u32(&bottoms);
194
195        // Calculate width and height
196        let width = right.saturating_sub(left);
197        let height = bottom.saturating_sub(top);
198
199        GroupCropRegion {
200            left,
201            top,
202            width,
203            height,
204            inlier_count: use_inliers.len(),
205            total_count: bounding_boxes.len(),
206        }
207    }
208
209    /// Unify crop regions for odd and even page groups
210    pub fn unify_odd_even_regions(bounding_boxes: &[PageBoundingBox]) -> UnifiedCropRegions {
211        Self::unify_and_expand_regions(bounding_boxes, 0, 0, 0)
212    }
213
214    /// Unify crop regions with Y coordinate unification, margin expansion, and size limits
215    ///
216    /// This function implements the full C# algorithm:
217    /// 1. Calculate separate crop regions for odd/even pages
218    /// 2. Unify Y coordinates (use min top, max bottom)
219    /// 3. Expand width/height by margin_percent
220    /// 4. Center the expansion
221    /// 5. Clamp to image bounds
222    pub fn unify_and_expand_regions(
223        bounding_boxes: &[PageBoundingBox],
224        margin_percent: u32,
225        max_width: u32,
226        max_height: u32,
227    ) -> UnifiedCropRegions {
228        // Split into odd and even groups
229        let odd_boxes: Vec<PageBoundingBox> = bounding_boxes
230            .iter()
231            .filter(|b| b.is_odd)
232            .cloned()
233            .collect();
234        let even_boxes: Vec<PageBoundingBox> = bounding_boxes
235            .iter()
236            .filter(|b| !b.is_odd)
237            .cloned()
238            .collect();
239
240        // Calculate crop region for each group
241        let mut odd_region = Self::decide_group_crop_region(&odd_boxes);
242        let mut even_region = Self::decide_group_crop_region(&even_boxes);
243
244        // Unify Y coordinates (min top, max bottom) for consistent vertical positioning
245        if odd_region.is_valid() && even_region.is_valid() {
246            let unified_top = odd_region.top.min(even_region.top);
247            let unified_bottom = odd_region.bottom().max(even_region.bottom());
248
249            odd_region.top = unified_top;
250            odd_region.height = unified_bottom.saturating_sub(unified_top);
251
252            even_region.top = unified_top;
253            even_region.height = unified_bottom.saturating_sub(unified_top);
254        }
255
256        // Expand both regions to match the larger one and add margin
257        if odd_region.is_valid() && even_region.is_valid() {
258            let target_width = odd_region.width.max(even_region.width);
259            let target_height = odd_region.height.max(even_region.height);
260
261            // Add margin percent
262            let expanded_width = target_width + target_width * margin_percent / 100;
263            let expanded_height = target_height + target_height * margin_percent / 100;
264
265            // Apply max bounds if specified
266            let final_width = if max_width > 0 {
267                expanded_width.min(max_width)
268            } else {
269                expanded_width
270            };
271            let final_height = if max_height > 0 {
272                expanded_height.min(max_height)
273            } else {
274                expanded_height
275            };
276
277            // Center the expansion for odd region
278            Self::expand_region_centered(&mut odd_region, final_width, final_height, max_width, max_height);
279
280            // Center the expansion for even region
281            Self::expand_region_centered(&mut even_region, final_width, final_height, max_width, max_height);
282        }
283
284        UnifiedCropRegions {
285            odd_region,
286            even_region,
287        }
288    }
289
290    /// Expand a region to target size, centering the expansion
291    fn expand_region_centered(
292        region: &mut GroupCropRegion,
293        target_width: u32,
294        target_height: u32,
295        max_width: u32,
296        max_height: u32,
297    ) {
298        if region.width < target_width {
299            let dw = target_width - region.width;
300            let new_left = region.left.saturating_sub(dw / 2);
301
302            // Clamp to image bounds
303            let clamped_left = if max_width > 0 {
304                new_left.min(max_width.saturating_sub(target_width))
305            } else {
306                new_left
307            };
308
309            region.left = clamped_left;
310            region.width = target_width;
311        }
312
313        if region.height < target_height {
314            let dh = target_height - region.height;
315            let new_top = region.top.saturating_sub(dh / 2);
316
317            // Clamp to image bounds
318            let clamped_top = if max_height > 0 {
319                new_top.min(max_height.saturating_sub(target_height))
320            } else {
321                new_top
322            };
323
324            region.top = clamped_top;
325            region.height = target_height;
326        }
327    }
328
329    /// Calculate IQR (Interquartile Range)
330    /// Returns (Q1, Q3, IQR) with IQR minimum of 1 to avoid division issues
331    fn calculate_iqr(sorted_values: &[u32]) -> (f64, f64, f64) {
332        if sorted_values.is_empty() {
333            return (0.0, 0.0, 1.0);
334        }
335
336        let q1 = Self::percentile(sorted_values, 0.25);
337        let q3 = Self::percentile(sorted_values, 0.75);
338        let iqr = (q3 - q1).max(1.0); // Guard against IQR == 0
339
340        (q1, q3, iqr)
341    }
342
343    /// Check if a value is an outlier using Tukey fence
344    fn is_outlier(value: u32, q1: f64, q3: f64, iqr: f64) -> bool {
345        let v = value as f64;
346        v < q1 - TUKEY_K * iqr || v > q3 + TUKEY_K * iqr
347    }
348
349    /// Calculate percentile with linear interpolation
350    /// Input must be sorted in ascending order
351    fn percentile(sorted_values: &[u32], p: f64) -> f64 {
352        if sorted_values.is_empty() {
353            return 0.0;
354        }
355        if sorted_values.len() == 1 {
356            return sorted_values[0] as f64;
357        }
358
359        let idx = p * (sorted_values.len() - 1) as f64;
360        let lo = idx.floor() as usize;
361        let hi = idx.ceil() as usize;
362
363        if lo == hi {
364            sorted_values[lo] as f64
365        } else {
366            let frac = idx - lo as f64;
367            sorted_values[lo] as f64 + (sorted_values[hi] as f64 - sorted_values[lo] as f64) * frac
368        }
369    }
370
371    /// Calculate median of u32 values
372    fn median_u32(values: &[u32]) -> u32 {
373        if values.is_empty() {
374            return 0;
375        }
376
377        let mut sorted = values.to_vec();
378        sorted.sort_unstable();
379
380        let n = sorted.len();
381        if n % 2 == 1 {
382            sorted[n / 2]
383        } else {
384            (sorted[n / 2 - 1] + sorted[n / 2]) / 2
385        }
386    }
387
388    /// Detect text bounding box from image using edge detection
389    ///
390    /// This function detects the content area by finding
391    /// non-background pixels and returning the minimal bounding box.
392    pub fn detect_text_bounding_box(
393        image_path: &Path,
394        background_threshold: u8,
395    ) -> Result<ContentRect> {
396        if !image_path.exists() {
397            return Err(MarginError::ImageNotFound(image_path.to_path_buf()));
398        }
399
400        let img = image::open(image_path).map_err(|e| MarginError::InvalidImage(e.to_string()))?;
401        let gray = img.to_luma8();
402        let (width, height) = gray.dimensions();
403
404        let mut min_x = width;
405        let mut max_x = 0u32;
406        let mut min_y = height;
407        let mut max_y = 0u32;
408
409        // Scan for content pixels
410        for y in 0..height {
411            for x in 0..width {
412                let pixel = gray.get_pixel(x, y);
413                if pixel.0[0] < background_threshold {
414                    min_x = min_x.min(x);
415                    max_x = max_x.max(x);
416                    min_y = min_y.min(y);
417                    max_y = max_y.max(y);
418                }
419            }
420        }
421
422        // Check if any content was found
423        if min_x > max_x || min_y > max_y {
424            return Err(MarginError::NoContentDetected);
425        }
426
427        Ok(ContentRect {
428            x: min_x,
429            y: min_y,
430            width: max_x - min_x + 1,
431            height: max_y - min_y + 1,
432        })
433    }
434
435    /// Detect bounding boxes for all pages in parallel
436    pub fn detect_all_bounding_boxes(
437        image_paths: &[PathBuf],
438        background_threshold: u8,
439    ) -> Vec<PageBoundingBox> {
440        image_paths
441            .par_iter()
442            .enumerate()
443            .filter_map(|(idx, path)| {
444                match Self::detect_text_bounding_box(path, background_threshold) {
445                    Ok(bbox) => Some(PageBoundingBox::new(idx + 1, bbox)),
446                    Err(_) => None,
447                }
448            })
449            .collect()
450    }
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456
457    #[test]
458    fn test_page_bounding_box_creation() {
459        let rect = ContentRect {
460            x: 100,
461            y: 50,
462            width: 800,
463            height: 1200,
464        };
465        let bbox = PageBoundingBox::new(1, rect);
466        assert_eq!(bbox.page_number, 1);
467        assert!(bbox.is_odd);
468        assert!(bbox.is_valid());
469        assert_eq!(bbox.right(), 900);
470        assert_eq!(bbox.bottom(), 1250);
471    }
472
473    #[test]
474    fn test_page_bounding_box_even_page() {
475        let rect = ContentRect {
476            x: 100,
477            y: 50,
478            width: 800,
479            height: 1200,
480        };
481        let bbox = PageBoundingBox::new(2, rect);
482        assert_eq!(bbox.page_number, 2);
483        assert!(!bbox.is_odd);
484    }
485
486    #[test]
487    fn test_group_crop_region_valid() {
488        let region = GroupCropRegion {
489            left: 100,
490            top: 50,
491            width: 800,
492            height: 1200,
493            inlier_count: 10,
494            total_count: 12,
495        };
496        assert!(region.is_valid());
497        assert_eq!(region.right(), 900);
498        assert_eq!(region.bottom(), 1250);
499    }
500
501    #[test]
502    fn test_group_crop_region_invalid() {
503        let region = GroupCropRegion {
504            left: 100,
505            top: 50,
506            width: 0,
507            height: 1200,
508            inlier_count: 0,
509            total_count: 0,
510        };
511        assert!(!region.is_valid());
512    }
513
514    #[test]
515    fn test_decide_group_crop_empty() {
516        let result = GroupCropAnalyzer::decide_group_crop_region(&[]);
517        assert!(!result.is_valid());
518        assert_eq!(result.inlier_count, 0);
519    }
520
521    #[test]
522    fn test_decide_group_crop_single_page() {
523        let boxes = vec![PageBoundingBox::new(
524            1,
525            ContentRect {
526                x: 100,
527                y: 50,
528                width: 800,
529                height: 1200,
530            },
531        )];
532        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
533        assert!(result.is_valid());
534        assert_eq!(result.left, 100);
535        assert_eq!(result.top, 50);
536        assert_eq!(result.width, 800);
537        assert_eq!(result.height, 1200);
538    }
539
540    #[test]
541    fn test_decide_group_crop_multiple_pages() {
542        let boxes = vec![
543            PageBoundingBox::new(
544                1,
545                ContentRect {
546                    x: 100,
547                    y: 50,
548                    width: 800,
549                    height: 1200,
550                },
551            ),
552            PageBoundingBox::new(
553                2,
554                ContentRect {
555                    x: 105,
556                    y: 55,
557                    width: 790,
558                    height: 1190,
559                },
560            ),
561            PageBoundingBox::new(
562                3,
563                ContentRect {
564                    x: 95,
565                    y: 45,
566                    width: 810,
567                    height: 1210,
568                },
569            ),
570        ];
571        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
572        assert!(result.is_valid());
573        assert_eq!(result.inlier_count, 3);
574        // Median should be close to 100, 50
575        assert!((result.left as i32 - 100).abs() <= 5);
576        assert!((result.top as i32 - 50).abs() <= 5);
577    }
578
579    #[test]
580    fn test_decide_group_crop_with_outlier() {
581        let boxes = vec![
582            PageBoundingBox::new(
583                1,
584                ContentRect {
585                    x: 100,
586                    y: 50,
587                    width: 800,
588                    height: 1200,
589                },
590            ),
591            PageBoundingBox::new(
592                2,
593                ContentRect {
594                    x: 105,
595                    y: 55,
596                    width: 790,
597                    height: 1190,
598                },
599            ),
600            PageBoundingBox::new(
601                3,
602                ContentRect {
603                    x: 95,
604                    y: 45,
605                    width: 810,
606                    height: 1210,
607                },
608            ),
609            PageBoundingBox::new(
610                4,
611                ContentRect {
612                    x: 100,
613                    y: 50,
614                    width: 800,
615                    height: 1200,
616                },
617            ),
618            PageBoundingBox::new(
619                5,
620                ContentRect {
621                    x: 500,
622                    y: 500,
623                    width: 200,
624                    height: 200,
625                },
626            ), // Outlier
627        ];
628        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
629        assert!(result.is_valid());
630        // Outlier should be excluded
631        assert!(result.inlier_count <= boxes.len());
632    }
633
634    #[test]
635    fn test_unify_odd_even_regions() {
636        let boxes = vec![
637            PageBoundingBox::new(
638                1,
639                ContentRect {
640                    x: 100,
641                    y: 50,
642                    width: 800,
643                    height: 1200,
644                },
645            ),
646            PageBoundingBox::new(
647                2,
648                ContentRect {
649                    x: 150,
650                    y: 60,
651                    width: 750,
652                    height: 1180,
653                },
654            ),
655            PageBoundingBox::new(
656                3,
657                ContentRect {
658                    x: 105,
659                    y: 55,
660                    width: 795,
661                    height: 1195,
662                },
663            ),
664            PageBoundingBox::new(
665                4,
666                ContentRect {
667                    x: 155,
668                    y: 65,
669                    width: 745,
670                    height: 1175,
671                },
672            ),
673        ];
674        let result = GroupCropAnalyzer::unify_odd_even_regions(&boxes);
675
676        // Odd pages (1, 3) should be grouped
677        assert!(result.odd_region.is_valid());
678        assert_eq!(result.odd_region.total_count, 2);
679
680        // Even pages (2, 4) should be grouped
681        assert!(result.even_region.is_valid());
682        assert_eq!(result.even_region.total_count, 2);
683    }
684
685    #[test]
686    fn test_group_crop_region_to_content_rect() {
687        let region = GroupCropRegion {
688            left: 100,
689            top: 50,
690            width: 800,
691            height: 1200,
692            inlier_count: 5,
693            total_count: 5,
694        };
695        let rect = region.to_content_rect();
696        assert_eq!(rect.x, 100);
697        assert_eq!(rect.y, 50);
698        assert_eq!(rect.width, 800);
699        assert_eq!(rect.height, 1200);
700    }
701
702    // ============================================================
703    // TC-MARGIN Spec Tests
704    // ============================================================
705
706    // TC-MARGIN-001: 均一マージン - 正確な検出
707    #[test]
708    fn test_tc_margin_001_uniform_margins_detected() {
709        // Create pages with identical margins (uniform)
710        let boxes = vec![
711            PageBoundingBox::new(1, ContentRect { x: 100, y: 100, width: 800, height: 1000 }),
712            PageBoundingBox::new(2, ContentRect { x: 100, y: 100, width: 800, height: 1000 }),
713            PageBoundingBox::new(3, ContentRect { x: 100, y: 100, width: 800, height: 1000 }),
714            PageBoundingBox::new(4, ContentRect { x: 100, y: 100, width: 800, height: 1000 }),
715        ];
716
717        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
718
719        // All pages have same margins, so result should be exact
720        assert!(result.is_valid());
721        assert_eq!(result.left, 100);
722        assert_eq!(result.top, 100);
723        assert_eq!(result.width, 800);
724        assert_eq!(result.height, 1000);
725        assert_eq!(result.inlier_count, 4);
726    }
727
728    // TC-MARGIN-002: 不均一マージン - 統一計算
729    #[test]
730    fn test_tc_margin_002_nonuniform_margins_unified() {
731        // Create pages with varying margins
732        let boxes = vec![
733            PageBoundingBox::new(1, ContentRect { x: 100, y: 90, width: 800, height: 1000 }),
734            PageBoundingBox::new(2, ContentRect { x: 110, y: 100, width: 790, height: 990 }),
735            PageBoundingBox::new(3, ContentRect { x: 95, y: 95, width: 805, height: 1005 }),
736            PageBoundingBox::new(4, ContentRect { x: 105, y: 105, width: 795, height: 995 }),
737        ];
738
739        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
740
741        // Result should use median values to unify
742        assert!(result.is_valid());
743        // Median values should be calculated
744        assert!(result.left >= 95 && result.left <= 110);
745        assert!(result.top >= 90 && result.top <= 105);
746    }
747
748    // TC-MARGIN-003: マージンなし - ゼロマージン
749    #[test]
750    fn test_tc_margin_003_no_margins() {
751        // Content fills entire page (no margins)
752        let boxes = vec![
753            PageBoundingBox::new(1, ContentRect { x: 0, y: 0, width: 1000, height: 1200 }),
754            PageBoundingBox::new(2, ContentRect { x: 0, y: 0, width: 1000, height: 1200 }),
755        ];
756
757        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
758
759        assert!(result.is_valid());
760        assert_eq!(result.left, 0);
761        assert_eq!(result.top, 0);
762    }
763
764    // TC-MARGIN-004: 外れ値ページ - Tukey除外
765    #[test]
766    fn test_tc_margin_004_outlier_exclusion_tukey() {
767        // Create pages with one outlier
768        let boxes = vec![
769            PageBoundingBox::new(1, ContentRect { x: 100, y: 100, width: 800, height: 1000 }),
770            PageBoundingBox::new(2, ContentRect { x: 102, y: 98, width: 798, height: 1002 }),
771            PageBoundingBox::new(3, ContentRect { x: 101, y: 101, width: 799, height: 999 }),
772            PageBoundingBox::new(4, ContentRect { x: 99, y: 99, width: 801, height: 1001 }),
773            // Outlier page with very different margins
774            PageBoundingBox::new(5, ContentRect { x: 300, y: 300, width: 400, height: 600 }),
775        ];
776
777        let result = GroupCropAnalyzer::decide_group_crop_region(&boxes);
778
779        // Outlier should be excluded, result should be based on normal pages
780        assert!(result.is_valid());
781        // Inlier count should be less than total (outlier excluded)
782        assert!(result.inlier_count <= result.total_count);
783        // Result should be close to normal pages, not influenced by outlier
784        assert!(result.left < 200); // Far from outlier's 300
785    }
786
787    // TC-MARGIN-005: 奇偶ページ差 - 個別リージョン
788    #[test]
789    fn test_tc_margin_005_odd_even_separate_regions() {
790        // Odd pages have different margins than even pages (typical for books)
791        let boxes = vec![
792            PageBoundingBox::new(1, ContentRect { x: 120, y: 100, width: 780, height: 1000 }), // Odd
793            PageBoundingBox::new(2, ContentRect { x: 100, y: 100, width: 780, height: 1000 }), // Even
794            PageBoundingBox::new(3, ContentRect { x: 122, y: 102, width: 778, height: 998 }), // Odd
795            PageBoundingBox::new(4, ContentRect { x: 98, y: 98, width: 782, height: 1002 }),  // Even
796        ];
797
798        let result = GroupCropAnalyzer::unify_odd_even_regions(&boxes);
799
800        // Both regions should be valid
801        assert!(result.odd_region.is_valid());
802        assert!(result.even_region.is_valid());
803
804        // Odd region should have larger left margin
805        assert!(result.odd_region.left >= result.even_region.left);
806
807        // Each region should have 2 pages
808        assert_eq!(result.odd_region.total_count, 2);
809        assert_eq!(result.even_region.total_count, 2);
810    }
811}