1pub mod core;
3pub mod edge;
4pub mod error;
5pub mod geom;
6pub mod prelude;
7pub mod topology;
8
9pub use crate::core::buffer::DocBuffer;
10use crate::edge::EdgeDetector;
11pub use crate::error::DocQuadError;
12use crate::geom::Quadrilateral;
13use std::time::Instant;
14
15const DOWNSAMPLE_THRESHOLD_PIXELS: u32 = 1024 * 768; const TARGET_LONG_EDGE: u32 = 1024;
19
20const MIN_PERIMETER_RATIO: f32 = 0.03;
22
23const MIN_BBOX_AREA_RATIO: f32 = 0.005;
25
26const MAX_BBOX_AREA_RATIO: f32 = 0.99;
28
29const MIN_DOC_AREA_RATIO: f32 = 0.01;
36
37pub fn find_document(buffer: &DocBuffer<'_>) -> Result<Option<Quadrilateral>, DocQuadError> {
39 let start_total = Instant::now();
40
41 let total_pixels = buffer.width * buffer.height;
42 log::info!(
43 "[Lib::find_document] - START: input={}x{}, total_pixels={}, stride={}",
44 buffer.width,
45 buffer.height,
46 total_pixels,
47 buffer.stride
48 );
49
50 let scale = if total_pixels > DOWNSAMPLE_THRESHOLD_PIXELS {
52 let long_edge = buffer.width.max(buffer.height);
53 TARGET_LONG_EDGE as f32 / long_edge as f32
54 } else {
55 1.0
56 };
57
58 log::debug!(
59 "[Lib::find_document] - Downsample decision: total_pixels={}, threshold={}, scale={:.4}",
60 total_pixels,
61 DOWNSAMPLE_THRESHOLD_PIXELS,
62 scale
63 );
64
65 let (proc_width, proc_height, proc_data) = if scale < 1.0 {
66 let w = ((buffer.width as f32 * scale) as u32).max(3);
67 let h = ((buffer.height as f32 * scale) as u32).max(3);
68 let data = downsample_nearest(buffer, w, h);
69 log::info!(
70 "[Lib::find_document] - Downsampled {}x{} -> {}x{} (scale={:.4})",
71 buffer.width,
72 buffer.height,
73 w,
74 h,
75 scale
76 );
77 (w, h, data)
78 } else {
79 let data = if buffer.stride == buffer.width {
81 log::debug!("[Lib::find_document] - Contiguous memory, direct copy.");
82 buffer.data[..(buffer.width * buffer.height) as usize].to_vec()
83 } else {
84 log::debug!(
85 "[Lib::find_document] - Strided memory (stride={} > width={}), compacting rows.",
86 buffer.stride,
87 buffer.width
88 );
89 let view = buffer.as_array_view()?;
90 let mut compact = Vec::with_capacity((buffer.width * buffer.height) as usize);
91 for row in view.rows() {
92 compact.extend(row.iter().copied());
93 }
94 compact
95 };
96 (buffer.width, buffer.height, data)
97 };
98
99 log::info!(
100 "[Lib::find_document] - Processing resolution: {}x{}, proc_data_len={}",
101 proc_width,
102 proc_height,
103 proc_data.len()
104 );
105
106 let proc_buffer = DocBuffer::new(&proc_data, proc_width, proc_height, proc_width)?;
108
109 log::info!("[Lib::find_document] - Stage 3: Edge detection (Canny + morphological close).");
111 let mut detector = EdgeDetector::new(proc_width as usize, proc_height as usize)?;
112 let edges = detector.detect(&proc_buffer)?;
113
114 let edge_pixel_count = edges.iter().filter(|&&v| v == 255).count();
116 let edge_density = edge_pixel_count as f32 / (proc_width * proc_height) as f32 * 100.0;
117 log::info!(
118 "[Lib::find_document] - Stage 3 result: edge_pixels={}, density={:.2}%",
119 edge_pixel_count,
120 edge_density
121 );
122
123 if edge_density > 20.0 {
124 log::warn!(
125 "[Lib::find_document] - Edge density {:.2}% is very high (>20%). \
126 Canny thresholds may be too low or morphological close over-connected noise.",
127 edge_density
128 );
129 } else if edge_density < 0.1 {
130 log::warn!(
131 "[Lib::find_document] - Edge density {:.2}% is very low (<0.1%). \
132 Canny thresholds may be too high, document edges may be missed.",
133 edge_density
134 );
135 }
136
137 log::info!("[Lib::find_document] - Stage 4: Contour extraction.");
139 let raw_contours =
140 crate::topology::contour::ContourExtractor::extract(&edges, proc_width, proc_height);
141
142 let raw_count = raw_contours.len();
143 log::info!(
144 "[Lib::find_document] - Stage 4 result: {} raw contours extracted.",
145 raw_count
146 );
147
148 if raw_count == 0 {
149 log::warn!(
150 "[Lib::find_document] - No contours extracted. \
151 Edge image may be empty or all edges are on image boundary."
152 );
153 log::info!(
154 "[Lib::find_document] - Detection finished. Found=false. Total Elapsed: {}ms",
155 start_total.elapsed().as_millis()
156 );
157 return Ok(None);
158 }
159
160 let proc_area = (proc_width * proc_height) as f32;
162 let long_edge_px = proc_width.max(proc_height) as f32;
163
164 let min_perimeter = long_edge_px * MIN_PERIMETER_RATIO;
165 let min_bbox_area = proc_area * MIN_BBOX_AREA_RATIO;
166 let max_bbox_area = proc_area * MAX_BBOX_AREA_RATIO;
167
168 log::info!(
169 "[Lib::find_document] - Stage 5: Pre-filter params: \
170 min_perimeter={:.1}px (ratio={:.2}), \
171 min_bbox_area={:.0}px² (ratio={:.3}), \
172 max_bbox_area={:.0}px² (ratio={:.2})",
173 min_perimeter,
174 MIN_PERIMETER_RATIO,
175 min_bbox_area,
176 MIN_BBOX_AREA_RATIO,
177 max_bbox_area,
178 MAX_BBOX_AREA_RATIO
179 );
180
181 let mut rejected_too_short = 0usize;
182 let mut rejected_too_small_bbox = 0usize;
183 let mut rejected_too_large_bbox = 0usize;
184
185 let mut perimeter_histogram = [0usize; 10]; let filtered_contours: Vec<_> = raw_contours
189 .into_iter()
190 .filter(|contour| {
191 if contour.len() < 4 {
193 rejected_too_short += 1;
194 return false;
195 }
196
197 let perimeter = contour.len() as f32;
198
199 let bucket = ((perimeter / long_edge_px * 20.0) as usize).min(9);
201 perimeter_histogram[bucket] += 1;
202
203 if perimeter < min_perimeter {
205 rejected_too_short += 1;
206 return false;
207 }
208
209 let (min_x, max_x, min_y, max_y) = contour.iter().fold(
211 (f32::MAX, f32::MIN, f32::MAX, f32::MIN),
212 |(mnx, mxx, mny, mxy), c| {
213 (mnx.min(c.x), mxx.max(c.x), mny.min(c.y), mxy.max(c.y))
214 },
215 );
216 let bbox_area = (max_x - min_x) * (max_y - min_y);
217
218 if bbox_area < min_bbox_area {
219 rejected_too_small_bbox += 1;
220 return false;
221 }
222 if bbox_area > max_bbox_area {
223 rejected_too_large_bbox += 1;
224 return false;
225 }
226 true
227 })
228 .collect();
229
230 log::debug!(
232 "[Lib::find_document] - Contour perimeter distribution \
233 (bucket_width={:.1}px, 0~{:.1}px+):",
234 long_edge_px * 0.05,
235 long_edge_px * 0.5
236 );
237 for (i, &count) in perimeter_histogram.iter().enumerate() {
238 if count > 0 {
239 log::debug!(
240 "[Lib::find_document] - [{:.1}~{:.1}px]: {} contours",
241 long_edge_px * 0.05 * i as f32,
242 long_edge_px * 0.05 * (i + 1) as f32,
243 count
244 );
245 }
246 }
247
248 log::info!(
249 "[Lib::find_document] - Stage 5 result: {}/{} contours remain. \
250 Rejected: too_short/small_perimeter={}, too_small_bbox={}, too_large_bbox={}",
251 filtered_contours.len(),
252 raw_count,
253 rejected_too_short,
254 rejected_too_small_bbox,
255 rejected_too_large_bbox
256 );
257
258 if filtered_contours.is_empty() {
259 log::warn!(
260 "[Lib::find_document] - No contours survived pre-filter. \
261 Current thresholds: min_perimeter={:.1}px, min_bbox_area={:.0}px². \
262 All {} raw contours were too short/small. \
263 This typically means edge continuity is poor — \
264 check Canny output or increase morphological close radius.",
265 min_perimeter,
266 min_bbox_area,
267 raw_count
268 );
269 log::info!(
270 "[Lib::find_document] - Detection finished. Found=false. Total Elapsed: {}ms",
271 start_total.elapsed().as_millis()
272 );
273 return Ok(None);
274 }
275
276 let min_area = proc_area * MIN_DOC_AREA_RATIO;
278
279 log::info!(
280 "[Lib::find_document] - Stage 6: Geometry analysis on {} contours. \
281 min_area={:.0}px² (ratio={:.2}, proc_area={:.0}px²)",
282 filtered_contours.len(),
283 min_area,
284 MIN_DOC_AREA_RATIO,
285 proc_area
286 );
287
288 let mut candidates = Vec::new();
289 let mut geom_rejected_simplify = 0usize;
290 let mut geom_rejected_validate = 0usize;
291 let mut geom_rejected_area = 0usize;
292
293 for (idx, contour) in filtered_contours.into_iter().enumerate() {
294 let contour_len = contour.len();
295
296 let (min_x, max_x, min_y, max_y) = contour.iter().fold(
298 (f32::MAX, f32::MIN, f32::MAX, f32::MIN),
299 |(mnx, mxx, mny, mxy), c| (mnx.min(c.x), mxx.max(c.x), mny.min(c.y), mxy.max(c.y)),
300 );
301 let bbox_w = max_x - min_x;
302 let bbox_h = max_y - min_y;
303
304 log::debug!(
305 "[Lib::find_document] - Geometry[{}]: contour_len={}, \
306 bbox=[({:.0},{:.0})-({:.0},{:.0})] {:.0}x{:.0}px, \
307 attempting simplify_to_quad.",
308 idx,
309 contour_len,
310 min_x,
311 min_y,
312 max_x,
313 max_y,
314 bbox_w,
315 bbox_h
316 );
317
318 let Some(simplified) =
319 crate::geom::simplify::GeometrySimplifier::simplify_to_quad(contour)
320 else {
321 geom_rejected_simplify += 1;
322 log::debug!(
323 "[Lib::find_document] - Geometry[{}]: simplify_to_quad returned None \
324 (contour_len={}, bbox={:.0}x{:.0}px).",
325 idx,
326 contour_len,
327 bbox_w,
328 bbox_h
329 );
330 continue;
331 };
332
333 log::debug!(
334 "[Lib::find_document] - Geometry[{}]: simplified to {} points, \
335 attempting validate_and_score.",
336 idx,
337 simplified.0.len()
338 );
339
340 let Some((area, pts)) =
341 crate::geom::validate::GeometryValidator::validate_and_score(&simplified)
342 else {
343 geom_rejected_validate += 1;
344 log::debug!(
345 "[Lib::find_document] - Geometry[{}]: validate_and_score returned None \
346 (contour_len={}).",
347 idx,
348 contour_len
349 );
350 continue;
351 };
352
353 let original_area = area / (scale * scale);
355
356 log::debug!(
357 "[Lib::find_document] - Geometry[{}]: proc_area={:.0}px², \
358 original_area={:.0}px², min_area={:.0}px², scale={:.4}",
359 idx,
360 area,
361 original_area,
362 min_area,
363 scale
364 );
365
366 if area > min_area {
367 let sorted_pts = crate::geom::transform::Transformer::sort_points(
368 pts.map(|p| glam::Vec2::new(p.x / scale, p.y / scale)),
369 );
370 log::info!(
371 "[Lib::find_document] - Geometry[{}]: ACCEPTED quad. \
372 original_area={:.0}px², \
373 points=[TL({:.1},{:.1}), TR({:.1},{:.1}), BR({:.1},{:.1}), BL({:.1},{:.1})]",
374 idx,
375 original_area,
376 sorted_pts[0].x,
377 sorted_pts[0].y,
378 sorted_pts[1].x,
379 sorted_pts[1].y,
380 sorted_pts[2].x,
381 sorted_pts[2].y,
382 sorted_pts[3].x,
383 sorted_pts[3].y,
384 );
385 candidates.push(Quadrilateral {
386 points: sorted_pts,
387 area: original_area,
388 score: 1.0,
389 });
390 } else {
391 geom_rejected_area += 1;
392 log::debug!(
393 "[Lib::find_document] - Geometry[{}]: rejected by area filter \
394 (proc_area={:.0} < min_area={:.0}).",
395 idx,
396 area,
397 min_area
398 );
399 }
400 }
401
402 log::info!(
403 "[Lib::find_document] - Stage 6 result: {} candidates accepted. \
404 Rejected: simplify={}, validate={}, area_too_small={}",
405 candidates.len(),
406 geom_rejected_simplify,
407 geom_rejected_validate,
408 geom_rejected_area
409 );
410
411 let result = candidates.into_iter().max_by(|a, b| {
413 a.area
414 .partial_cmp(&b.area)
415 .unwrap_or(std::cmp::Ordering::Equal)
416 });
417
418 log::info!(
419 "[Lib::find_document] - Detection finished. Found={}. Total Elapsed: {}ms",
420 result.is_some(),
421 start_total.elapsed().as_millis()
422 );
423
424 Ok(result)
425}
426
427fn downsample_nearest(buffer: &DocBuffer<'_>, target_w: u32, target_h: u32) -> Vec<u8> {
429 let mut out = Vec::with_capacity((target_w * target_h) as usize);
430 let src_w = buffer.width;
431 let src_h = buffer.height;
432 let stride = buffer.stride as usize;
433
434 for ty in 0..target_h {
435 let sy = ((ty as f32 + 0.5) * src_h as f32 / target_h as f32) as usize;
437 let sy = sy.min(src_h as usize - 1);
438 let row_offset = sy * stride;
439
440 for tx in 0..target_w {
441 let sx = ((tx as f32 + 0.5) * src_w as f32 / target_w as f32) as usize;
442 let sx = sx.min(src_w as usize - 1);
443 out.push(buffer.data[row_offset + sx]);
444 }
445 }
446 out
447}