1pub mod core;
3pub mod edge;
4pub mod error;
5pub mod geom;
6pub mod prelude;
7pub mod topology;
8
9pub use crate::core::buffer::DocBuffer;
10use crate::edge::EdgeDetector;
11pub use crate::error::DocQuadError;
12use crate::geom::Quadrilateral;
13use std::time::Instant;
14
15const DOWNSAMPLE_THRESHOLD_PIXELS: u32 = 1024 * 768; const TARGET_LONG_EDGE: u32 = 1024;
19
20const MIN_PERIMETER_RATIO: f32 = 0.03;
22
23const MIN_BBOX_AREA_RATIO: f32 = 0.005;
25
26const MAX_BBOX_AREA_RATIO: f32 = 0.99;
28
29const MIN_DOC_AREA_RATIO: f32 = 0.01;
36
37pub fn find_document(buffer: &DocBuffer<'_>) -> Result<Option<Quadrilateral>, DocQuadError> {
39 let start_total = Instant::now();
40
41 let total_pixels = buffer.width * buffer.height;
42 log::info!(
43 "[Lib::find_document] - START: input={}x{}, total_pixels={}, stride={}",
44 buffer.width,
45 buffer.height,
46 total_pixels,
47 buffer.stride
48 );
49
50 let scale = if total_pixels > DOWNSAMPLE_THRESHOLD_PIXELS {
52 let long_edge = buffer.width.max(buffer.height);
53 TARGET_LONG_EDGE as f32 / long_edge as f32
54 } else {
55 1.0
56 };
57
58 log::debug!(
59 "[Lib::find_document] - Downsample decision: total_pixels={}, threshold={}, scale={:.4}",
60 total_pixels,
61 DOWNSAMPLE_THRESHOLD_PIXELS,
62 scale
63 );
64
65 let (proc_width, proc_height, proc_data) = if scale < 1.0 {
66 let w = ((buffer.width as f32 * scale) as u32).max(3);
67 let h = ((buffer.height as f32 * scale) as u32).max(3);
68
69 let data = downsample_bilinear(buffer, w, h);
71
72 log::info!(
73 "[Lib::find_document] - Downsampled {}x{} -> {}x{} (scale={:.4}) using bilinear interp",
74 buffer.width,
75 buffer.height,
76 w,
77 h,
78 scale
79 );
80 (w, h, data)
81 } else {
82 let data = if buffer.stride == buffer.width {
84 log::debug!("[Lib::find_document] - Contiguous memory, direct copy.");
85 buffer.data[..(buffer.width * buffer.height) as usize].to_vec()
86 } else {
87 log::debug!(
88 "[Lib::find_document] - Strided memory (stride={} > width={}), compacting rows.",
89 buffer.stride,
90 buffer.width
91 );
92 let view = buffer.as_array_view()?;
93 let mut compact = Vec::with_capacity((buffer.width * buffer.height) as usize);
94 for row in view.rows() {
95 compact.extend(row.iter().copied());
96 }
97 compact
98 };
99 (buffer.width, buffer.height, data)
100 };
101
102 log::info!(
103 "[Lib::find_document] - Processing resolution: {}x{}, proc_data_len={}",
104 proc_width,
105 proc_height,
106 proc_data.len()
107 );
108
109 let proc_buffer = DocBuffer::new(&proc_data, proc_width, proc_height, proc_width)?;
111
112 log::info!("[Lib::find_document] - Stage 3: Edge detection (Canny + morphological close).");
114 let mut detector = EdgeDetector::new(proc_width as usize, proc_height as usize)?;
115 let edges = detector.detect(&proc_buffer)?;
116
117 let edge_pixel_count = edges.iter().filter(|&&v| v == 255).count();
119 let edge_density = edge_pixel_count as f32 / (proc_width * proc_height) as f32 * 100.0;
120 log::info!(
121 "[Lib::find_document] - Stage 3 result: edge_pixels={}, density={:.2}%",
122 edge_pixel_count,
123 edge_density
124 );
125
126 if edge_density > 20.0 {
127 log::warn!(
128 "[Lib::find_document] - Edge density {:.2}% is very high (>20%). \
129 Canny thresholds may be too low or morphological close over-connected noise.",
130 edge_density
131 );
132 } else if edge_density < 0.1 {
133 log::warn!(
134 "[Lib::find_document] - Edge density {:.2}% is very low (<0.1%). \
135 Canny thresholds may be too high, document edges may be missed.",
136 edge_density
137 );
138 }
139
140 log::info!("[Lib::find_document] - Stage 4: Contour extraction.");
142 let raw_contours =
143 crate::topology::contour::ContourExtractor::extract(&edges, proc_width, proc_height);
144
145 let raw_count = raw_contours.len();
146 log::info!(
147 "[Lib::find_document] - Stage 4 result: {} raw contours extracted.",
148 raw_count
149 );
150
151 if raw_count == 0 {
152 log::warn!(
153 "[Lib::find_document] - No contours extracted. \
154 Edge image may be empty or all edges are on image boundary."
155 );
156 log::info!(
157 "[Lib::find_document] - Detection finished. Found=false. Total Elapsed: {}ms",
158 start_total.elapsed().as_millis()
159 );
160 return Ok(None);
161 }
162
163 let proc_area = (proc_width * proc_height) as f32;
165 let long_edge_px = proc_width.max(proc_height) as f32;
166
167 let min_perimeter = long_edge_px * MIN_PERIMETER_RATIO;
168 let min_bbox_area = proc_area * MIN_BBOX_AREA_RATIO;
169 let max_bbox_area = proc_area * MAX_BBOX_AREA_RATIO;
170
171 log::info!(
172 "[Lib::find_document] - Stage 5: Pre-filter params: \
173 min_perimeter={:.1}px (ratio={:.2}), \
174 min_bbox_area={:.0}px² (ratio={:.3}), \
175 max_bbox_area={:.0}px² (ratio={:.2})",
176 min_perimeter,
177 MIN_PERIMETER_RATIO,
178 min_bbox_area,
179 MIN_BBOX_AREA_RATIO,
180 max_bbox_area,
181 MAX_BBOX_AREA_RATIO
182 );
183
184 let mut rejected_too_short = 0usize;
185 let mut rejected_too_small_bbox = 0usize;
186 let mut rejected_too_large_bbox = 0usize;
187
188 let mut perimeter_histogram = [0usize; 10]; let filtered_contours: Vec<_> = raw_contours
192 .into_iter()
193 .filter(|contour| {
194 if contour.len() < 4 {
196 rejected_too_short += 1;
197 return false;
198 }
199
200 let perimeter = contour.len() as f32;
201
202 let bucket = ((perimeter / long_edge_px * 20.0) as usize).min(9);
204 perimeter_histogram[bucket] += 1;
205
206 if perimeter < min_perimeter {
208 rejected_too_short += 1;
209 return false;
210 }
211
212 let (min_x, max_x, min_y, max_y) = contour.iter().fold(
214 (f32::MAX, f32::MIN, f32::MAX, f32::MIN),
215 |(mnx, mxx, mny, mxy), c| {
216 (mnx.min(c.x), mxx.max(c.x), mny.min(c.y), mxy.max(c.y))
217 },
218 );
219 let bbox_area = (max_x - min_x) * (max_y - min_y);
220
221 if bbox_area < min_bbox_area {
222 rejected_too_small_bbox += 1;
223 return false;
224 }
225 if bbox_area > max_bbox_area {
226 rejected_too_large_bbox += 1;
227 return false;
228 }
229 true
230 })
231 .collect();
232
233 log::debug!(
235 "[Lib::find_document] - Contour perimeter distribution \
236 (bucket_width={:.1}px, 0~{:.1}px+):",
237 long_edge_px * 0.05,
238 long_edge_px * 0.5
239 );
240 for (i, &count) in perimeter_histogram.iter().enumerate() {
241 if count > 0 {
242 log::debug!(
243 "[Lib::find_document] - [{:.1}~{:.1}px]: {} contours",
244 long_edge_px * 0.05 * i as f32,
245 long_edge_px * 0.05 * (i + 1) as f32,
246 count
247 );
248 }
249 }
250
251 log::info!(
252 "[Lib::find_document] - Stage 5 result: {}/{} contours remain. \
253 Rejected: too_short/small_perimeter={}, too_small_bbox={}, too_large_bbox={}",
254 filtered_contours.len(),
255 raw_count,
256 rejected_too_short,
257 rejected_too_small_bbox,
258 rejected_too_large_bbox
259 );
260
261 if filtered_contours.is_empty() {
262 log::warn!(
263 "[Lib::find_document] - No contours survived pre-filter. \
264 Current thresholds: min_perimeter={:.1}px, min_bbox_area={:.0}px². \
265 All {} raw contours were too short/small. \
266 This typically means edge continuity is poor — \
267 check Canny output or increase morphological close radius.",
268 min_perimeter,
269 min_bbox_area,
270 raw_count
271 );
272 log::info!(
273 "[Lib::find_document] - Detection finished. Found=false. Total Elapsed: {}ms",
274 start_total.elapsed().as_millis()
275 );
276 return Ok(None);
277 }
278
279 let min_area = proc_area * MIN_DOC_AREA_RATIO;
281
282 log::info!(
283 "[Lib::find_document] - Stage 6: Geometry analysis on {} contours. \
284 min_area={:.0}px² (ratio={:.2}, proc_area={:.0}px²)",
285 filtered_contours.len(),
286 min_area,
287 MIN_DOC_AREA_RATIO,
288 proc_area
289 );
290
291 let mut candidates = Vec::new();
292 let mut geom_rejected_simplify = 0usize;
293 let mut geom_rejected_validate = 0usize;
294 let mut geom_rejected_area = 0usize;
295
296 for (idx, contour) in filtered_contours.into_iter().enumerate() {
297 let contour_len = contour.len();
298
299 let (min_x, max_x, min_y, max_y) = contour.iter().fold(
301 (f32::MAX, f32::MIN, f32::MAX, f32::MIN),
302 |(mnx, mxx, mny, mxy), c| (mnx.min(c.x), mxx.max(c.x), mny.min(c.y), mxy.max(c.y)),
303 );
304 let bbox_w = max_x - min_x;
305 let bbox_h = max_y - min_y;
306
307 log::debug!(
308 "[Lib::find_document] - Geometry[{}]: contour_len={}, \
309 bbox=[({:.0},{:.0})-({:.0},{:.0})] {:.0}x{:.0}px, \
310 attempting simplify_to_quad.",
311 idx,
312 contour_len,
313 min_x,
314 min_y,
315 max_x,
316 max_y,
317 bbox_w,
318 bbox_h
319 );
320
321 let Some(simplified) =
322 crate::geom::simplify::GeometrySimplifier::simplify_to_quad(&contour)
323 else {
324 geom_rejected_simplify += 1;
325 log::debug!(
326 "[Lib::find_document] - Geometry[{}]: simplify_to_quad returned None \
327 (contour_len={}, bbox={:.0}x{:.0}px).",
328 idx,
329 contour_len,
330 bbox_w,
331 bbox_h
332 );
333 continue;
334 };
335
336 log::debug!(
337 "[Lib::find_document] - Geometry[{}]: simplified to {} points, \
338 attempting validate_and_score.",
339 idx,
340 simplified.0.len()
341 );
342
343 let Some((area, pts)) =
344 crate::geom::validate::GeometryValidator::validate_and_score(&simplified)
345 else {
346 geom_rejected_validate += 1;
347 log::debug!(
348 "[Lib::find_document] - Geometry[{}]: validate_and_score returned None \
349 (contour_len={}).",
350 idx,
351 contour_len
352 );
353 continue;
354 };
355
356 let original_area = area / (scale * scale);
358
359 log::debug!(
360 "[Lib::find_document] - Geometry[{}]: proc_area={:.0}px², \
361 original_area={:.0}px², min_area={:.0}px², scale={:.4}",
362 idx,
363 area,
364 original_area,
365 min_area,
366 scale
367 );
368
369 if area > min_area {
370 let sorted_pts = crate::geom::transform::Transformer::sort_points(
371 pts.map(|p| glam::Vec2::new(p.x / scale, p.y / scale)),
372 );
373 log::info!(
374 "[Lib::find_document] - Geometry[{}]: ACCEPTED quad. \
375 original_area={:.0}px², \
376 points=[TL({:.1},{:.1}), TR({:.1},{:.1}), BR({:.1},{:.1}), BL({:.1},{:.1})]",
377 idx,
378 original_area,
379 sorted_pts[0].x,
380 sorted_pts[0].y,
381 sorted_pts[1].x,
382 sorted_pts[1].y,
383 sorted_pts[2].x,
384 sorted_pts[2].y,
385 sorted_pts[3].x,
386 sorted_pts[3].y,
387 );
388 candidates.push(Quadrilateral {
389 points: sorted_pts,
390 area: original_area,
391 score: 1.0,
392 });
393 } else {
394 geom_rejected_area += 1;
395 log::debug!(
396 "[Lib::find_document] - Geometry[{}]: rejected by area filter \
397 (proc_area={:.0} < min_area={:.0}).",
398 idx,
399 area,
400 min_area
401 );
402 }
403 }
404
405 log::info!(
406 "[Lib::find_document] - Stage 6 result: {} candidates accepted. \
407 Rejected: simplify={}, validate={}, area_too_small={}",
408 candidates.len(),
409 geom_rejected_simplify,
410 geom_rejected_validate,
411 geom_rejected_area
412 );
413
414 let result = candidates.into_iter().max_by(|a, b| {
416 a.area
417 .partial_cmp(&b.area)
418 .unwrap_or(std::cmp::Ordering::Equal)
419 });
420
421 log::info!(
422 "[Lib::find_document] - Detection finished. Found={}. Total Elapsed: {}ms",
423 result.is_some(),
424 start_total.elapsed().as_millis()
425 );
426
427 Ok(result)
428}
429
430fn downsample_bilinear(buffer: &DocBuffer<'_>, target_w: u32, target_h: u32) -> Vec<u8> {
435 let mut out = Vec::with_capacity((target_w * target_h) as usize);
436 let src_w = buffer.width;
437 let src_h = buffer.height;
438 let stride = buffer.stride as usize;
439 let data = buffer.data;
440
441 let x_ratio = (src_w.saturating_sub(1)) as f32 / (target_w.max(2) - 1) as f32;
443 let y_ratio = (src_h.saturating_sub(1)) as f32 / (target_h.max(2) - 1) as f32;
444
445 for ty in 0..target_h {
446 let gy = (ty as f32) * y_ratio;
447 let y_floor = gy.floor() as usize;
448 let y_ceil = (y_floor + 1).min(src_h as usize - 1);
449 let y_weight = gy - y_floor as f32;
450 let y_weight_inv = 1.0 - y_weight;
451
452 let row_floor_offset = y_floor * stride;
453 let row_ceil_offset = y_ceil * stride;
454
455 for tx in 0..target_w {
456 let gx = (tx as f32) * x_ratio;
457 let x_floor = gx.floor() as usize;
458 let x_ceil = (x_floor + 1).min(src_w as usize - 1);
459 let x_weight = gx - x_floor as f32;
460 let x_weight_inv = 1.0 - x_weight;
461
462 let tl = data[row_floor_offset + x_floor] as f32;
464 let tr = data[row_floor_offset + x_ceil] as f32;
465 let bl = data[row_ceil_offset + x_floor] as f32;
466 let br = data[row_ceil_offset + x_ceil] as f32;
467
468 let top = tl * x_weight_inv + tr * x_weight;
470 let bottom = bl * x_weight_inv + br * x_weight;
471
472 let val = top * y_weight_inv + bottom * y_weight;
474
475 out.push(val.round() as u8);
476 }
477 }
478 out
479}