1use serde::Serialize;
2use serde_json::{json, Value};
3use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap};
5use std::path::PathBuf;
6
7use crate::cli::args::SeverityThreshold;
8use crate::cli::common::{file_format, format_range};
9use crate::cli::envelope;
10use crate::cli::error::{AppError, EXIT_CHECK_FINDINGS, EXIT_SUCCESS};
11use crate::cli::sheet_query::{cell_at, cell_has_formula, cell_is_present, header_value};
12use crate::excel::{open_workbook, Cell, CellType, Sheet, Workbook};
13use crate::utils::{cell_reference, index_to_col_name};
14
15const RULES: [CheckRuleId; 8] = [
16 CheckRuleId::BlankHeaders,
17 CheckRuleId::DuplicateHeaders,
18 CheckRuleId::BlankRows,
19 CheckRuleId::BlankColumns,
20 CheckRuleId::NullRatio,
21 CheckRuleId::DuplicateValues,
22 CheckRuleId::TypeDrift,
23 CheckRuleId::FormulaPresence,
24];
25
26pub fn handle(
27 file: PathBuf,
28 sheet: Option<String>,
29 rules: Option<String>,
30 severity_threshold: SeverityThreshold,
31) -> Result<(Value, i32), AppError> {
32 let format_str = file_format(&file);
33 let path_str = file.to_string_lossy().to_string();
34
35 let mut workbook =
36 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
37 let report = run_check_report(
38 &mut workbook,
39 sheet.as_deref(),
40 rules.as_deref(),
41 severity_threshold,
42 )?;
43
44 let data = json!({
45 "summary": report.summary,
46 "stats": report.stats,
47 "findings": report.findings,
48 });
49
50 let target = if let Some(sheet_name) = sheet {
51 let sheet_index =
52 workbook
53 .resolve_sheet_by_name(&sheet_name)
54 .map_err(|e| AppError::TargetNotFound {
55 message: e.to_string(),
56 })?;
57 envelope::target_sheet(&sheet_name, sheet_index)
58 } else {
59 envelope::target_workbook()
60 };
61
62 let exit_code = exit_code_for_findings(
63 data["summary"]["finding_count"]
64 .as_u64()
65 .unwrap_or_default() as usize,
66 );
67
68 Ok((
69 envelope::success_envelope(
70 "check",
71 &path_str,
72 &format_str,
73 target,
74 json!({}),
75 data,
76 vec![],
77 ),
78 exit_code,
79 ))
80}
81
82pub(crate) fn run_check_report(
83 workbook: &mut Workbook,
84 sheet: Option<&str>,
85 rules: Option<&str>,
86 severity_threshold: SeverityThreshold,
87) -> Result<CheckReport, AppError> {
88 let selected_rules = parse_rules(rules)?;
89 let threshold = Severity::from_threshold(severity_threshold);
90 let checked_sheet_indices = resolve_checked_sheets(workbook, sheet)?;
91
92 for index in &checked_sheet_indices {
93 let sheet_name = workbook.get_sheet_names()[*index].clone();
94 workbook
95 .ensure_sheet_loaded(*index, &sheet_name)
96 .map_err(crate::cli::error::anyhow_to_app_error)?;
97 }
98
99 let sheet_names = workbook.get_sheet_names();
100 let mut findings = run_rules(workbook, &selected_rules, &checked_sheet_indices)?;
101 let finding_count_before_threshold = findings.len();
102 findings.retain(|finding| finding.severity >= threshold);
103 sort_findings(&mut findings, &sheet_names);
104
105 Ok(CheckReport {
106 summary: summarize_findings(&findings),
107 stats: build_stats(
108 workbook,
109 &checked_sheet_indices,
110 &selected_rules,
111 severity_threshold,
112 finding_count_before_threshold,
113 )?,
114 findings,
115 })
116}
117
118fn parse_rules(value: Option<&str>) -> Result<Vec<CheckRuleId>, AppError> {
119 let Some(value) = value else {
120 return Ok(RULES.to_vec());
121 };
122
123 let mut requested = Vec::new();
124 for raw in value.split(',') {
125 let id = raw.trim();
126 if id.is_empty() {
127 continue;
128 }
129 let rule = CheckRuleId::parse(id).ok_or_else(|| AppError::InvalidQuery {
130 message: format!(
131 "Unknown check rule '{}'. Supported rules: {}",
132 id,
133 RULES
134 .iter()
135 .map(CheckRuleId::as_str)
136 .collect::<Vec<_>>()
137 .join(", ")
138 ),
139 })?;
140 if !requested.contains(&rule) {
141 requested.push(rule);
142 }
143 }
144
145 if requested.is_empty() {
146 return Err(AppError::InvalidQuery {
147 message: "--rules must include at least one rule id".to_string(),
148 });
149 }
150
151 Ok(RULES
152 .iter()
153 .copied()
154 .filter(|rule| requested.contains(rule))
155 .collect())
156}
157
158fn resolve_checked_sheets(
159 workbook: &Workbook,
160 sheet: Option<&str>,
161) -> Result<Vec<usize>, AppError> {
162 if let Some(name) = sheet {
163 workbook
164 .resolve_sheet_by_name(name)
165 .map(|index| vec![index])
166 .map_err(|e| AppError::TargetNotFound {
167 message: e.to_string(),
168 })
169 } else {
170 Ok((0..workbook.get_sheet_names().len()).collect())
171 }
172}
173
174fn run_rules(
175 workbook: &Workbook,
176 rules: &[CheckRuleId],
177 sheet_indices: &[usize],
178) -> Result<Vec<CheckFinding>, AppError> {
179 let mut findings = Vec::new();
180
181 for sheet_index in sheet_indices {
182 let context = SheetCheckContext::new(workbook, *sheet_index)?;
183 for rule in rules {
184 match rule {
185 CheckRuleId::BlankHeaders => findings.extend(find_blank_headers(&context)),
186 CheckRuleId::DuplicateHeaders => findings.extend(find_duplicate_headers(&context)),
187 CheckRuleId::BlankRows => findings.extend(find_blank_rows(&context)),
188 CheckRuleId::BlankColumns => findings.extend(find_blank_columns(&context)),
189 CheckRuleId::NullRatio => findings.extend(check_null_ratio(&context)),
190 CheckRuleId::DuplicateValues => findings.extend(check_duplicate_values(&context)),
191 CheckRuleId::TypeDrift => findings.extend(check_type_drift(&context)),
192 CheckRuleId::FormulaPresence => findings.extend(check_formula_presence(&context)),
193 }
194 }
195 }
196
197 Ok(findings)
198}
199
200struct SheetCheckContext<'a> {
201 sheet: &'a Sheet,
202 header_row: Option<usize>,
203 used_range: String,
204 data_start_row: usize,
205 data_row_count: usize,
206 facts: SheetFacts,
207}
208
209struct SheetFacts {
210 row_has_present: Vec<bool>,
211 column_has_present: Vec<bool>,
212 data_column_has_present: Vec<bool>,
213 data_column_null_rows: Vec<Vec<usize>>,
214 data_column_type_counts: Vec<BTreeMap<&'static str, usize>>,
215 data_column_cells_by_type: Vec<BTreeMap<&'static str, Vec<String>>>,
216 formula_cells: Vec<FormulaFact>,
217 formula_bounds: Option<(usize, usize, usize, usize)>,
218}
219
220struct FormulaFact {
221 cell: String,
222 formula: String,
223}
224
225impl SheetFacts {
226 fn new(sheet: &Sheet, data_start_row: usize, data_row_count: usize) -> Self {
227 let mut facts = Self {
228 row_has_present: vec![false; sheet.max_rows + 1],
229 column_has_present: vec![false; sheet.max_cols + 1],
230 data_column_has_present: vec![false; sheet.max_cols + 1],
231 data_column_null_rows: vec![Vec::new(); sheet.max_cols + 1],
232 data_column_type_counts: vec![BTreeMap::new(); sheet.max_cols + 1],
233 data_column_cells_by_type: vec![BTreeMap::new(); sheet.max_cols + 1],
234 formula_cells: Vec::new(),
235 formula_bounds: None,
236 };
237
238 for row in 1..=sheet.max_rows {
239 for col in 1..=sheet.max_cols {
240 let cell = cell_at(sheet, row, col);
241 let present = cell_is_present(cell);
242
243 facts.row_has_present[row] |= present;
244 facts.column_has_present[col] |= present;
245
246 if data_row_count == 0 || row < data_start_row {
247 continue;
248 }
249
250 facts.data_column_has_present[col] |= present;
251 if !present {
252 facts.data_column_null_rows[col].push(row);
253 }
254
255 let Some(cell) = cell else {
256 continue;
257 };
258
259 if let Some(kind) = cell_kind(cell) {
260 *facts.data_column_type_counts[col].entry(kind).or_default() += 1;
261 facts.data_column_cells_by_type[col]
262 .entry(kind)
263 .or_default()
264 .push(cell_reference((row, col)));
265 }
266
267 if cell_has_formula(cell) {
268 facts.add_formula(row, col, cell);
269 }
270 }
271 }
272
273 facts
274 }
275
276 fn add_formula(&mut self, row: usize, col: usize, cell: &Cell) {
277 self.formula_bounds = Some(match self.formula_bounds {
278 Some((min_row, min_col, max_row, max_col)) => (
279 min_row.min(row),
280 min_col.min(col),
281 max_row.max(row),
282 max_col.max(col),
283 ),
284 None => (row, col, row, col),
285 });
286 self.formula_cells.push(FormulaFact {
287 cell: cell_reference((row, col)),
288 formula: cell.formula.clone().unwrap_or_else(|| cell.value.clone()),
289 });
290 }
291}
292
293impl<'a> SheetCheckContext<'a> {
294 fn new(workbook: &'a Workbook, sheet_index: usize) -> Result<Self, AppError> {
295 let sheet =
296 workbook
297 .get_sheet_by_index(sheet_index)
298 .ok_or_else(|| AppError::TargetNotFound {
299 message: format!("Sheet index {} not found", sheet_index),
300 })?;
301 let used_range = workbook
302 .get_used_range(sheet_index)
303 .map_err(crate::cli::error::anyhow_to_app_error)?;
304 let (_, header_row) = workbook
305 .find_header_candidates(sheet_index)
306 .map_err(crate::cli::error::anyhow_to_app_error)?;
307 let data_start_row = header_row.map_or(1, |row| row.saturating_add(1));
308 let data_row_count = if sheet.max_rows >= data_start_row {
309 sheet.max_rows - data_start_row + 1
310 } else {
311 0
312 };
313 let facts = SheetFacts::new(sheet, data_start_row, data_row_count);
314
315 Ok(Self {
316 sheet,
317 header_row,
318 used_range,
319 data_start_row,
320 data_row_count,
321 facts,
322 })
323 }
324
325 fn column_name(&self, col: usize) -> String {
326 self.header_row
327 .and_then(|row| cell_at(self.sheet, row, col))
328 .map(|cell| cell.value.trim())
329 .filter(|value| !value.is_empty())
330 .map(ToOwned::to_owned)
331 .unwrap_or_else(|| format!("col_{}", index_to_col_name(col)))
332 }
333
334 fn data_column_range(&self, col: usize) -> Option<String> {
335 if self.data_row_count == 0 {
336 None
337 } else {
338 Some(format_range(
339 self.data_start_row,
340 col,
341 self.sheet.max_rows,
342 col,
343 ))
344 }
345 }
346}
347
348fn find_blank_headers(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
349 let Some(header_row) = context.header_row else {
350 return Vec::new();
351 };
352
353 (1..=context.sheet.max_cols)
354 .filter(|col| is_blank_cell(cell_at(context.sheet, header_row, *col)))
355 .map(|col| {
356 let column_label = index_to_col_name(col);
357 let range = cell_reference((header_row, col));
358 CheckFinding {
359 rule_id: CheckRuleId::BlankHeaders,
360 severity: Severity::Warning,
361 sheet: context.sheet.name.clone(),
362 row: Some(header_row),
363 column: Some(col),
364 range: Some(range.clone()),
365 message: format!("Blank header at {range}."),
366 details: json!({
367 "header_row": header_row,
368 "column_label": column_label,
369 "reason": "blank_header",
370 }),
371 }
372 })
373 .collect()
374}
375
376fn find_duplicate_headers(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
377 let Some(header_row) = context.header_row else {
378 return Vec::new();
379 };
380
381 let mut counts: HashMap<String, usize> = HashMap::new();
382 let mut first_locations: HashMap<String, (usize, String)> = HashMap::new();
383 let headers: Vec<_> = (1..=context.sheet.max_cols)
384 .map(|col| {
385 let header = header_value(context.sheet, header_row, col);
386 if !header.is_empty() {
387 *counts.entry(header.clone()).or_insert(0) += 1;
388 first_locations
389 .entry(header.clone())
390 .or_insert_with(|| (col, cell_reference((header_row, col))));
391 }
392 header
393 })
394 .collect();
395
396 let mut seen: HashMap<String, usize> = HashMap::new();
397 let mut findings = Vec::new();
398 for (offset, header) in headers.into_iter().enumerate() {
399 if header.is_empty() {
400 continue;
401 }
402
403 let occurrence = seen.entry(header.clone()).or_insert(0);
404 *occurrence += 1;
405 if *occurrence == 1 {
406 continue;
407 }
408
409 let col = offset + 1;
410 let range = cell_reference((header_row, col));
411 let (first_column, first_range) = first_locations
412 .get(&header)
413 .cloned()
414 .unwrap_or_else(|| (col, range.clone()));
415 findings.push(CheckFinding {
416 rule_id: CheckRuleId::DuplicateHeaders,
417 severity: Severity::Warning,
418 sheet: context.sheet.name.clone(),
419 row: Some(header_row),
420 column: Some(col),
421 range: Some(range.clone()),
422 message: format!("Duplicate header '{header}' at {range}."),
423 details: json!({
424 "header": header,
425 "normalized_header": header,
426 "first_column": first_column,
427 "first_range": first_range,
428 "duplicate_count": counts.get(&header).copied().unwrap_or(0),
429 }),
430 });
431 }
432
433 findings
434}
435
436fn find_blank_rows(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
437 if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 {
438 return Vec::new();
439 }
440
441 (1..=context.sheet.max_rows)
442 .filter(|row| !context.facts.row_has_present[*row])
443 .map(|row| {
444 let range = format_range(row, 1, row, context.sheet.max_cols);
445 CheckFinding {
446 rule_id: CheckRuleId::BlankRows,
447 severity: Severity::Warning,
448 sheet: context.sheet.name.clone(),
449 row: Some(row),
450 column: None,
451 range: Some(range),
452 message: format!("Blank row {row} in used range {}.", context.used_range),
453 details: json!({
454 "used_range": context.used_range,
455 "max_columns": context.sheet.max_cols,
456 "reason": "blank_row",
457 }),
458 }
459 })
460 .collect()
461}
462
463fn find_blank_columns(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
464 if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 {
465 return Vec::new();
466 }
467
468 (1..=context.sheet.max_cols)
469 .filter(|col| !context.facts.column_has_present[*col])
470 .map(|col| {
471 let column_label = index_to_col_name(col);
472 let range = format_range(1, col, context.sheet.max_rows, col);
473 CheckFinding {
474 rule_id: CheckRuleId::BlankColumns,
475 severity: Severity::Warning,
476 sheet: context.sheet.name.clone(),
477 row: None,
478 column: Some(col),
479 range: Some(range),
480 message: format!(
481 "Blank column {column_label} in used range {}.",
482 context.used_range
483 ),
484 details: json!({
485 "used_range": context.used_range,
486 "column_label": column_label,
487 "max_rows": context.sheet.max_rows,
488 "reason": "blank_column",
489 }),
490 }
491 })
492 .collect()
493}
494
495fn check_null_ratio(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
496 if context.data_row_count == 0 {
497 return Vec::new();
498 }
499
500 let mut findings = Vec::new();
501 for col in 1..=context.sheet.max_cols {
502 let null_rows = &context.facts.data_column_null_rows[col];
503 if null_rows.is_empty() {
504 continue;
505 }
506
507 let null_count = null_rows.len();
508 let null_ratio = rounded_ratio(null_count, context.data_row_count);
509 let severity = if null_count == context.data_row_count {
510 Severity::Error
511 } else if null_ratio >= 0.5 {
512 Severity::Warning
513 } else {
514 Severity::Info
515 };
516 let column_name = context.column_name(col);
517 let first_null_row = null_rows[0];
518 let first_null_cell = cell_reference((first_null_row, col));
519
520 findings.push(CheckFinding {
521 rule_id: CheckRuleId::NullRatio,
522 severity,
523 sheet: context.sheet.name.clone(),
524 row: Some(first_null_row),
525 column: Some(col),
526 range: context.data_column_range(col),
527 message: format!(
528 "Column '{}' has blank values in {} of {} data rows.",
529 column_name, null_count, context.data_row_count
530 ),
531 details: json!({
532 "column_name": column_name,
533 "data_row_count": context.data_row_count,
534 "first_null_cell": first_null_cell,
535 "null_count": null_count,
536 "null_ratio": null_ratio,
537 "severity_threshold": {
538 "info": "> 0 and < 0.5",
539 "warning": ">= 0.5 and < 1.0",
540 "error": "1.0"
541 }
542 }),
543 });
544 }
545
546 findings
547}
548
549fn check_duplicate_values(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
550 let Some((candidate_col, selection)) = default_duplicate_candidate(context) else {
551 return Vec::new();
552 };
553
554 let mut values: BTreeMap<String, Vec<usize>> = BTreeMap::new();
555 for row in context.data_start_row..=context.sheet.max_rows {
556 if let Some(cell) = cell_at(context.sheet, row, candidate_col) {
557 let value = cell.value.trim();
558 if !value.is_empty() {
559 values.entry(value.to_string()).or_default().push(row);
560 }
561 }
562 }
563
564 let column_name = context.column_name(candidate_col);
565 values
566 .into_iter()
567 .filter(|(_, rows)| rows.len() > 1)
568 .map(|(duplicate_value, rows)| {
569 let cells: Vec<String> = rows
570 .iter()
571 .map(|row| cell_reference((*row, candidate_col)))
572 .collect();
573
574 CheckFinding {
575 rule_id: CheckRuleId::DuplicateValues,
576 severity: Severity::Warning,
577 sheet: context.sheet.name.clone(),
578 row: rows.first().copied(),
579 column: Some(candidate_col),
580 range: context.data_column_range(candidate_col),
581 message: format!(
582 "Column '{}' has duplicate value '{}' in {} rows.",
583 column_name,
584 duplicate_value,
585 rows.len()
586 ),
587 details: json!({
588 "candidate_column": {
589 "column": candidate_col,
590 "column_name": column_name,
591 "selection": selection
592 },
593 "duplicate_value": duplicate_value,
594 "occurrence_count": rows.len(),
595 "rows": rows,
596 "cells": cells
597 }),
598 }
599 })
600 .collect()
601}
602
603fn default_duplicate_candidate(context: &SheetCheckContext<'_>) -> Option<(usize, &'static str)> {
604 if context.data_row_count == 0 {
605 return None;
606 }
607
608 if let Some(header_row) = context.header_row {
609 for col in 1..=context.sheet.max_cols {
610 let has_header = cell_at(context.sheet, header_row, col)
611 .map(|cell| !cell.value.trim().is_empty())
612 .unwrap_or(false);
613 if has_header && context.facts.data_column_has_present[col] {
614 return Some((col, "first non-empty header data column"));
615 }
616 }
617 }
618
619 (1..=context.sheet.max_cols)
620 .find(|col| context.facts.data_column_has_present[*col])
621 .map(|col| (col, "first data column with values"))
622}
623
624fn check_type_drift(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
625 if context.data_row_count == 0 {
626 return Vec::new();
627 }
628
629 let mut findings = Vec::new();
630 for col in 1..=context.sheet.max_cols {
631 let type_counts = &context.facts.data_column_type_counts[col];
632 let cells_by_type = &context.facts.data_column_cells_by_type[col];
633 if type_counts.len() < 2 {
634 continue;
635 }
636
637 let dominant_type = dominant_type(type_counts);
638 let Some((drift_type, drift_count)) = first_drift_type(type_counts, dominant_type) else {
639 continue;
640 };
641 let Some(first_drift_cell) = cells_by_type
642 .get(drift_type)
643 .and_then(|cells| cells.first())
644 .cloned()
645 else {
646 continue;
647 };
648 let Some((first_drift_row, _)) = crate::utils::parse_cell_reference(&first_drift_cell)
649 else {
650 continue;
651 };
652 let column_name = context.column_name(col);
653 let sample_drift_cells: Vec<String> = cells_by_type
654 .get(drift_type)
655 .into_iter()
656 .flat_map(|cells| cells.iter().take(5).cloned())
657 .collect();
658
659 findings.push(CheckFinding {
660 rule_id: CheckRuleId::TypeDrift,
661 severity: Severity::Warning,
662 sheet: context.sheet.name.clone(),
663 row: Some(first_drift_row),
664 column: Some(col),
665 range: context.data_column_range(col),
666 message: format!(
667 "Column '{}' mixes {} values with dominant {} values.",
668 column_name, drift_type, dominant_type
669 ),
670 details: json!({
671 "column_name": column_name,
672 "dominant_type": dominant_type,
673 "drift_type": drift_type,
674 "drift_count": drift_count,
675 "type_counts": type_counts,
676 "sample_drift_cells": sample_drift_cells
677 }),
678 });
679 }
680
681 findings
682}
683
684fn check_formula_presence(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
685 if context.data_row_count == 0 {
686 return Vec::new();
687 }
688
689 if context.facts.formula_cells.is_empty() {
690 return Vec::new();
691 }
692
693 let formula_count = context.facts.formula_cells.len();
694 let formula_ratio = rounded_ratio(formula_count, context.data_row_count);
695 let formulas: Vec<Value> = context
696 .facts
697 .formula_cells
698 .iter()
699 .take(5)
700 .map(|formula| {
701 json!({
702 "cell": formula.cell.clone(),
703 "formula": formula.formula.clone(),
704 })
705 })
706 .collect();
707 let Some((min_row, min_col, max_row, max_col)) = context.facts.formula_bounds else {
708 return Vec::new();
709 };
710
711 vec![CheckFinding {
712 rule_id: CheckRuleId::FormulaPresence,
713 severity: Severity::Info,
714 sheet: context.sheet.name.clone(),
715 row: Some(min_row),
716 column: Some(min_col),
717 range: Some(format_range(min_row, min_col, max_row, max_col)),
718 message: format!(
719 "Sheet '{}' contains {} formula cells.",
720 context.sheet.name, formula_count
721 ),
722 details: json!({
723 "data_row_count": context.data_row_count,
724 "formula_count": formula_count,
725 "formula_ratio": formula_ratio,
726 "sample_formula_cells": formulas
727 }),
728 }]
729}
730
731fn is_blank_cell(cell: Option<&Cell>) -> bool {
732 cell.map(|cell| !cell_has_formula(cell) && cell.value.trim().is_empty())
733 .unwrap_or(true)
734}
735
736fn cell_kind(cell: &Cell) -> Option<&'static str> {
737 if !cell_is_present(Some(cell)) {
738 return None;
739 }
740
741 match cell.cell_type {
742 CellType::Text => Some("string"),
743 CellType::Number => Some("number"),
744 CellType::Date => Some("date"),
745 CellType::Boolean => Some("boolean"),
746 CellType::Empty => None,
747 }
748}
749
750fn dominant_type(type_counts: &BTreeMap<&'static str, usize>) -> &'static str {
751 type_counts
752 .iter()
753 .max_by(|(left_type, left_count), (right_type, right_count)| {
754 left_count
755 .cmp(right_count)
756 .then_with(|| right_type.cmp(left_type))
757 })
758 .map(|(kind, _)| *kind)
759 .unwrap_or("string")
760}
761
762fn first_drift_type(
763 type_counts: &BTreeMap<&'static str, usize>,
764 dominant_type: &'static str,
765) -> Option<(&'static str, usize)> {
766 type_counts
767 .iter()
768 .filter(|(kind, _)| **kind != dominant_type)
769 .min_by(|(left_type, left_count), (right_type, right_count)| {
770 left_count
771 .cmp(right_count)
772 .then_with(|| left_type.cmp(right_type))
773 })
774 .map(|(kind, count)| (*kind, *count))
775}
776
777fn rounded_ratio(numerator: usize, denominator: usize) -> f64 {
778 if denominator == 0 {
779 0.0
780 } else {
781 ((numerator as f64 / denominator as f64) * 10_000.0).round() / 10_000.0
782 }
783}
784
785fn summarize_findings(findings: &[CheckFinding]) -> Value {
786 let error_count = findings
787 .iter()
788 .filter(|finding| finding.severity == Severity::Error)
789 .count();
790 let warning_count = findings
791 .iter()
792 .filter(|finding| finding.severity == Severity::Warning)
793 .count();
794 let info_count = findings
795 .iter()
796 .filter(|finding| finding.severity == Severity::Info)
797 .count();
798 let finding_count = findings.len();
799
800 json!({
801 "status": if finding_count == 0 { "pass" } else { "fail" },
802 "finding_count": finding_count,
803 "error_count": error_count,
804 "warning_count": warning_count,
805 "info_count": info_count,
806 })
807}
808
809fn build_stats(
810 workbook: &Workbook,
811 checked_sheet_indices: &[usize],
812 rules: &[CheckRuleId],
813 severity_threshold: SeverityThreshold,
814 finding_count_before_threshold: usize,
815) -> Result<Value, AppError> {
816 let checked_sheets: Result<Vec<_>, AppError> = checked_sheet_indices
817 .iter()
818 .map(|index| {
819 let sheet =
820 workbook
821 .get_sheet_by_index(*index)
822 .ok_or_else(|| AppError::TargetNotFound {
823 message: format!("Sheet index {} not found", index),
824 })?;
825 let used_range = workbook
826 .get_used_range(*index)
827 .map_err(crate::cli::error::anyhow_to_app_error)?;
828
829 Ok(json!({
830 "name": sheet.name,
831 "index": index,
832 "used_range": used_range,
833 "max_rows": sheet.max_rows,
834 "max_cols": sheet.max_cols,
835 }))
836 })
837 .collect();
838
839 Ok(json!({
840 "sheet_count": workbook.get_sheet_names().len(),
841 "checked_sheet_count": checked_sheet_indices.len(),
842 "checked_sheets": checked_sheets?,
843 "rules_run": rules.iter().map(CheckRuleId::as_str).collect::<Vec<_>>(),
844 "severity_threshold": severity_threshold.as_str(),
845 "finding_count_before_threshold": finding_count_before_threshold,
846 }))
847}
848
849fn exit_code_for_findings(finding_count: usize) -> i32 {
850 if finding_count == 0 {
851 EXIT_SUCCESS
852 } else {
853 EXIT_CHECK_FINDINGS
854 }
855}
856
857fn sort_findings(findings: &mut [CheckFinding], sheet_names: &[String]) {
858 let sheet_order: HashMap<&str, usize> = sheet_names
859 .iter()
860 .enumerate()
861 .map(|(index, name)| (name.as_str(), index))
862 .collect();
863
864 findings.sort_by(|left, right| {
865 compare_usize(
866 sheet_order.get(left.sheet.as_str()).copied(),
867 sheet_order.get(right.sheet.as_str()).copied(),
868 )
869 .then_with(|| left.rule_id.order().cmp(&right.rule_id.order()))
870 .then_with(|| compare_location(left.row, right.row))
871 .then_with(|| compare_location(left.column, right.column))
872 .then_with(|| left.range.cmp(&right.range))
873 .then_with(|| left.message.cmp(&right.message))
874 .then_with(|| left.details.to_string().cmp(&right.details.to_string()))
875 });
876}
877
878fn compare_location(left: Option<usize>, right: Option<usize>) -> Ordering {
879 match (left, right) {
880 (Some(left), Some(right)) => left.cmp(&right),
881 (Some(_), None) => Ordering::Less,
882 (None, Some(_)) => Ordering::Greater,
883 (None, None) => Ordering::Equal,
884 }
885}
886
887fn compare_usize(left: Option<usize>, right: Option<usize>) -> Ordering {
888 match (left, right) {
889 (Some(left), Some(right)) => left.cmp(&right),
890 (Some(_), None) => Ordering::Less,
891 (None, Some(_)) => Ordering::Greater,
892 (None, None) => Ordering::Equal,
893 }
894}
895
896#[derive(Clone, Debug)]
897pub(crate) struct CheckReport {
898 pub(crate) summary: Value,
899 pub(crate) stats: Value,
900 pub(crate) findings: Vec<CheckFinding>,
901}
902
903#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)]
904#[serde(rename_all = "snake_case")]
905pub(crate) enum CheckRuleId {
906 BlankHeaders,
907 DuplicateHeaders,
908 BlankRows,
909 BlankColumns,
910 NullRatio,
911 DuplicateValues,
912 TypeDrift,
913 FormulaPresence,
914}
915
916impl CheckRuleId {
917 fn parse(value: &str) -> Option<Self> {
918 RULES.iter().copied().find(|rule| rule.as_str() == value)
919 }
920
921 pub(crate) fn as_str(&self) -> &'static str {
922 match self {
923 CheckRuleId::BlankHeaders => "blank_headers",
924 CheckRuleId::DuplicateHeaders => "duplicate_headers",
925 CheckRuleId::BlankRows => "blank_rows",
926 CheckRuleId::BlankColumns => "blank_columns",
927 CheckRuleId::NullRatio => "null_ratio",
928 CheckRuleId::DuplicateValues => "duplicate_values",
929 CheckRuleId::TypeDrift => "type_drift",
930 CheckRuleId::FormulaPresence => "formula_presence",
931 }
932 }
933
934 fn order(&self) -> usize {
935 RULES
936 .iter()
937 .position(|rule| rule == self)
938 .unwrap_or(usize::MAX)
939 }
940}
941
942#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
943#[serde(rename_all = "lowercase")]
944pub(crate) enum Severity {
945 Info,
946 Warning,
947 Error,
948}
949
950impl Severity {
951 fn from_threshold(threshold: SeverityThreshold) -> Self {
952 match threshold {
953 SeverityThreshold::Info => Severity::Info,
954 SeverityThreshold::Warning => Severity::Warning,
955 SeverityThreshold::Error => Severity::Error,
956 }
957 }
958}
959
960#[derive(Clone, Debug, Serialize)]
961pub(crate) struct CheckFinding {
962 pub(crate) rule_id: CheckRuleId,
963 pub(crate) severity: Severity,
964 pub(crate) sheet: String,
965 pub(crate) row: Option<usize>,
966 pub(crate) column: Option<usize>,
967 pub(crate) range: Option<String>,
968 pub(crate) message: String,
969 pub(crate) details: Value,
970}
971
972#[cfg(test)]
973mod tests {
974 use serde_json::json;
975
976 use super::*;
977 use crate::cli::error::{EXIT_CHECK_FINDINGS, EXIT_SUCCESS};
978 use crate::excel::{Cell, Sheet, Workbook};
979
980 #[test]
981 fn exit_code_uses_one_for_successful_reports_with_findings() {
982 assert_eq!(exit_code_for_findings(0), EXIT_SUCCESS);
983 assert_eq!(exit_code_for_findings(2), EXIT_CHECK_FINDINGS);
984 }
985
986 #[test]
987 fn findings_sort_by_sheet_rule_position_then_location() {
988 let mut findings = vec![
989 CheckFinding {
990 rule_id: CheckRuleId::DuplicateHeaders,
991 severity: Severity::Warning,
992 sheet: "Orders".to_string(),
993 row: Some(3),
994 column: Some(2),
995 range: Some("B3".to_string()),
996 message: "later".to_string(),
997 details: json!({"field": "customer"}),
998 },
999 CheckFinding {
1000 rule_id: CheckRuleId::BlankHeaders,
1001 severity: Severity::Warning,
1002 sheet: "Summary".to_string(),
1003 row: None,
1004 column: None,
1005 range: None,
1006 message: "workbook-level".to_string(),
1007 details: json!({}),
1008 },
1009 CheckFinding {
1010 rule_id: CheckRuleId::BlankHeaders,
1011 severity: Severity::Warning,
1012 sheet: "Orders".to_string(),
1013 row: Some(2),
1014 column: Some(1),
1015 range: Some("A2".to_string()),
1016 message: "earlier".to_string(),
1017 details: json!({}),
1018 },
1019 ];
1020
1021 sort_findings(
1022 &mut findings,
1023 &["Summary".to_string(), "Orders".to_string()],
1024 );
1025
1026 assert_eq!(findings[0].sheet, "Summary");
1027 assert_eq!(findings[1].rule_id, CheckRuleId::BlankHeaders);
1028 assert_eq!(findings[1].row, Some(2));
1029 assert_eq!(findings[2].rule_id, CheckRuleId::DuplicateHeaders);
1030 }
1031
1032 fn sheet_with_values(name: &str, values: &[&[&str]]) -> Sheet {
1033 let max_rows = values.len();
1034 let max_cols = values.iter().map(|row| row.len()).max().unwrap_or(0);
1035 let mut data = vec![vec![Cell::empty(); max_cols + 1]; max_rows + 1];
1036
1037 for (row_idx, row) in values.iter().enumerate() {
1038 for (col_idx, value) in row.iter().enumerate() {
1039 data[row_idx + 1][col_idx + 1] = Cell::new((*value).to_string(), false);
1040 }
1041 }
1042
1043 Sheet {
1044 name: name.to_string(),
1045 data,
1046 max_rows,
1047 max_cols,
1048 is_loaded: true,
1049 freeze_panes: crate::excel::FreezePanes::none(),
1050 }
1051 }
1052
1053 #[test]
1054 fn run_check_report_reuses_rule_pipeline_for_structured_findings() {
1055 let mut workbook = Workbook::from_sheets_for_test(vec![sheet_with_values(
1056 "Data",
1057 &[&["Name", "Name"], &["Ada", ""], &["", ""]],
1058 )]);
1059
1060 let report = run_check_report(&mut workbook, None, None, SeverityThreshold::Info).unwrap();
1061
1062 assert_eq!(report.summary["status"], "fail");
1063 assert_eq!(report.stats["checked_sheet_count"], 1);
1064 assert!(!report.findings.is_empty());
1065 assert!(report
1066 .findings
1067 .iter()
1068 .any(|finding| finding.rule_id == CheckRuleId::DuplicateHeaders));
1069 }
1070}