excel_cli/cli/
check.rs

1use serde::Serialize;
2use serde_json::{json, Value};
3use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap};
5use std::path::PathBuf;
6
7use crate::cli::args::SeverityThreshold;
8use crate::cli::common::{file_format, format_range};
9use crate::cli::envelope;
10use crate::cli::error::{AppError, EXIT_CHECK_FINDINGS, EXIT_SUCCESS};
11use crate::cli::sheet_query::{cell_at, cell_has_formula, cell_is_present, header_value};
12use crate::excel::{open_workbook, Cell, CellType, Sheet, Workbook};
13use crate::utils::{cell_reference, index_to_col_name};
14
15const RULES: [CheckRuleId; 8] = [
16    CheckRuleId::BlankHeaders,
17    CheckRuleId::DuplicateHeaders,
18    CheckRuleId::BlankRows,
19    CheckRuleId::BlankColumns,
20    CheckRuleId::NullRatio,
21    CheckRuleId::DuplicateValues,
22    CheckRuleId::TypeDrift,
23    CheckRuleId::FormulaPresence,
24];
25
26pub fn handle(
27    file: PathBuf,
28    sheet: Option<String>,
29    rules: Option<String>,
30    severity_threshold: SeverityThreshold,
31) -> Result<(Value, i32), AppError> {
32    let format_str = file_format(&file);
33    let path_str = file.to_string_lossy().to_string();
34
35    let mut workbook =
36        open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
37    let report = run_check_report(
38        &mut workbook,
39        sheet.as_deref(),
40        rules.as_deref(),
41        severity_threshold,
42    )?;
43
44    let data = json!({
45        "summary": report.summary,
46        "stats": report.stats,
47        "findings": report.findings,
48    });
49
50    let target = if let Some(sheet_name) = sheet {
51        let sheet_index =
52            workbook
53                .resolve_sheet_by_name(&sheet_name)
54                .map_err(|e| AppError::TargetNotFound {
55                    message: e.to_string(),
56                })?;
57        envelope::target_sheet(&sheet_name, sheet_index)
58    } else {
59        envelope::target_workbook()
60    };
61
62    let exit_code = exit_code_for_findings(
63        data["summary"]["finding_count"]
64            .as_u64()
65            .unwrap_or_default() as usize,
66    );
67
68    Ok((
69        envelope::success_envelope(
70            "check",
71            &path_str,
72            &format_str,
73            target,
74            json!({}),
75            data,
76            vec![],
77        ),
78        exit_code,
79    ))
80}
81
82pub(crate) fn run_check_report(
83    workbook: &mut Workbook,
84    sheet: Option<&str>,
85    rules: Option<&str>,
86    severity_threshold: SeverityThreshold,
87) -> Result<CheckReport, AppError> {
88    let selected_rules = parse_rules(rules)?;
89    let threshold = Severity::from_threshold(severity_threshold);
90    let checked_sheet_indices = resolve_checked_sheets(workbook, sheet)?;
91
92    for index in &checked_sheet_indices {
93        let sheet_name = workbook.get_sheet_names()[*index].clone();
94        workbook
95            .ensure_sheet_loaded(*index, &sheet_name)
96            .map_err(crate::cli::error::anyhow_to_app_error)?;
97    }
98
99    let sheet_names = workbook.get_sheet_names();
100    let mut findings = run_rules(workbook, &selected_rules, &checked_sheet_indices)?;
101    let finding_count_before_threshold = findings.len();
102    findings.retain(|finding| finding.severity >= threshold);
103    sort_findings(&mut findings, &sheet_names);
104
105    Ok(CheckReport {
106        summary: summarize_findings(&findings),
107        stats: build_stats(
108            workbook,
109            &checked_sheet_indices,
110            &selected_rules,
111            severity_threshold,
112            finding_count_before_threshold,
113        )?,
114        findings,
115    })
116}
117
118fn parse_rules(value: Option<&str>) -> Result<Vec<CheckRuleId>, AppError> {
119    let Some(value) = value else {
120        return Ok(RULES.to_vec());
121    };
122
123    let mut requested = Vec::new();
124    for raw in value.split(',') {
125        let id = raw.trim();
126        if id.is_empty() {
127            continue;
128        }
129        let rule = CheckRuleId::parse(id).ok_or_else(|| AppError::InvalidQuery {
130            message: format!(
131                "Unknown check rule '{}'. Supported rules: {}",
132                id,
133                RULES
134                    .iter()
135                    .map(CheckRuleId::as_str)
136                    .collect::<Vec<_>>()
137                    .join(", ")
138            ),
139        })?;
140        if !requested.contains(&rule) {
141            requested.push(rule);
142        }
143    }
144
145    if requested.is_empty() {
146        return Err(AppError::InvalidQuery {
147            message: "--rules must include at least one rule id".to_string(),
148        });
149    }
150
151    Ok(RULES
152        .iter()
153        .copied()
154        .filter(|rule| requested.contains(rule))
155        .collect())
156}
157
158fn resolve_checked_sheets(
159    workbook: &Workbook,
160    sheet: Option<&str>,
161) -> Result<Vec<usize>, AppError> {
162    if let Some(name) = sheet {
163        workbook
164            .resolve_sheet_by_name(name)
165            .map(|index| vec![index])
166            .map_err(|e| AppError::TargetNotFound {
167                message: e.to_string(),
168            })
169    } else {
170        Ok((0..workbook.get_sheet_names().len()).collect())
171    }
172}
173
174fn run_rules(
175    workbook: &Workbook,
176    rules: &[CheckRuleId],
177    sheet_indices: &[usize],
178) -> Result<Vec<CheckFinding>, AppError> {
179    let mut findings = Vec::new();
180
181    for sheet_index in sheet_indices {
182        let context = SheetCheckContext::new(workbook, *sheet_index)?;
183        for rule in rules {
184            match rule {
185                CheckRuleId::BlankHeaders => findings.extend(find_blank_headers(&context)),
186                CheckRuleId::DuplicateHeaders => findings.extend(find_duplicate_headers(&context)),
187                CheckRuleId::BlankRows => findings.extend(find_blank_rows(&context)),
188                CheckRuleId::BlankColumns => findings.extend(find_blank_columns(&context)),
189                CheckRuleId::NullRatio => findings.extend(check_null_ratio(&context)),
190                CheckRuleId::DuplicateValues => findings.extend(check_duplicate_values(&context)),
191                CheckRuleId::TypeDrift => findings.extend(check_type_drift(&context)),
192                CheckRuleId::FormulaPresence => findings.extend(check_formula_presence(&context)),
193            }
194        }
195    }
196
197    Ok(findings)
198}
199
200struct SheetCheckContext<'a> {
201    sheet: &'a Sheet,
202    header_row: Option<usize>,
203    used_range: String,
204    data_start_row: usize,
205    data_row_count: usize,
206    facts: SheetFacts,
207}
208
209struct SheetFacts {
210    row_has_present: Vec<bool>,
211    column_has_present: Vec<bool>,
212    data_column_has_present: Vec<bool>,
213    data_column_null_rows: Vec<Vec<usize>>,
214    data_column_type_counts: Vec<BTreeMap<&'static str, usize>>,
215    data_column_cells_by_type: Vec<BTreeMap<&'static str, Vec<String>>>,
216    formula_cells: Vec<FormulaFact>,
217    formula_bounds: Option<(usize, usize, usize, usize)>,
218}
219
220struct FormulaFact {
221    cell: String,
222    formula: String,
223}
224
225impl SheetFacts {
226    fn new(sheet: &Sheet, data_start_row: usize, data_row_count: usize) -> Self {
227        let mut facts = Self {
228            row_has_present: vec![false; sheet.max_rows + 1],
229            column_has_present: vec![false; sheet.max_cols + 1],
230            data_column_has_present: vec![false; sheet.max_cols + 1],
231            data_column_null_rows: vec![Vec::new(); sheet.max_cols + 1],
232            data_column_type_counts: vec![BTreeMap::new(); sheet.max_cols + 1],
233            data_column_cells_by_type: vec![BTreeMap::new(); sheet.max_cols + 1],
234            formula_cells: Vec::new(),
235            formula_bounds: None,
236        };
237
238        for row in 1..=sheet.max_rows {
239            for col in 1..=sheet.max_cols {
240                let cell = cell_at(sheet, row, col);
241                let present = cell_is_present(cell);
242
243                facts.row_has_present[row] |= present;
244                facts.column_has_present[col] |= present;
245
246                if data_row_count == 0 || row < data_start_row {
247                    continue;
248                }
249
250                facts.data_column_has_present[col] |= present;
251                if !present {
252                    facts.data_column_null_rows[col].push(row);
253                }
254
255                let Some(cell) = cell else {
256                    continue;
257                };
258
259                if let Some(kind) = cell_kind(cell) {
260                    *facts.data_column_type_counts[col].entry(kind).or_default() += 1;
261                    facts.data_column_cells_by_type[col]
262                        .entry(kind)
263                        .or_default()
264                        .push(cell_reference((row, col)));
265                }
266
267                if cell_has_formula(cell) {
268                    facts.add_formula(row, col, cell);
269                }
270            }
271        }
272
273        facts
274    }
275
276    fn add_formula(&mut self, row: usize, col: usize, cell: &Cell) {
277        self.formula_bounds = Some(match self.formula_bounds {
278            Some((min_row, min_col, max_row, max_col)) => (
279                min_row.min(row),
280                min_col.min(col),
281                max_row.max(row),
282                max_col.max(col),
283            ),
284            None => (row, col, row, col),
285        });
286        self.formula_cells.push(FormulaFact {
287            cell: cell_reference((row, col)),
288            formula: cell.formula.clone().unwrap_or_else(|| cell.value.clone()),
289        });
290    }
291}
292
293impl<'a> SheetCheckContext<'a> {
294    fn new(workbook: &'a Workbook, sheet_index: usize) -> Result<Self, AppError> {
295        let sheet =
296            workbook
297                .get_sheet_by_index(sheet_index)
298                .ok_or_else(|| AppError::TargetNotFound {
299                    message: format!("Sheet index {} not found", sheet_index),
300                })?;
301        let used_range = workbook
302            .get_used_range(sheet_index)
303            .map_err(crate::cli::error::anyhow_to_app_error)?;
304        let (_, header_row) = workbook
305            .find_header_candidates(sheet_index)
306            .map_err(crate::cli::error::anyhow_to_app_error)?;
307        let data_start_row = header_row.map_or(1, |row| row.saturating_add(1));
308        let data_row_count = if sheet.max_rows >= data_start_row {
309            sheet.max_rows - data_start_row + 1
310        } else {
311            0
312        };
313        let facts = SheetFacts::new(sheet, data_start_row, data_row_count);
314
315        Ok(Self {
316            sheet,
317            header_row,
318            used_range,
319            data_start_row,
320            data_row_count,
321            facts,
322        })
323    }
324
325    fn column_name(&self, col: usize) -> String {
326        self.header_row
327            .and_then(|row| cell_at(self.sheet, row, col))
328            .map(|cell| cell.value.trim())
329            .filter(|value| !value.is_empty())
330            .map(ToOwned::to_owned)
331            .unwrap_or_else(|| format!("col_{}", index_to_col_name(col)))
332    }
333
334    fn data_column_range(&self, col: usize) -> Option<String> {
335        if self.data_row_count == 0 {
336            None
337        } else {
338            Some(format_range(
339                self.data_start_row,
340                col,
341                self.sheet.max_rows,
342                col,
343            ))
344        }
345    }
346}
347
348fn find_blank_headers(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
349    let Some(header_row) = context.header_row else {
350        return Vec::new();
351    };
352
353    (1..=context.sheet.max_cols)
354        .filter(|col| is_blank_cell(cell_at(context.sheet, header_row, *col)))
355        .map(|col| {
356            let column_label = index_to_col_name(col);
357            let range = cell_reference((header_row, col));
358            CheckFinding {
359                rule_id: CheckRuleId::BlankHeaders,
360                severity: Severity::Warning,
361                sheet: context.sheet.name.clone(),
362                row: Some(header_row),
363                column: Some(col),
364                range: Some(range.clone()),
365                message: format!("Blank header at {range}."),
366                details: json!({
367                    "header_row": header_row,
368                    "column_label": column_label,
369                    "reason": "blank_header",
370                }),
371            }
372        })
373        .collect()
374}
375
376fn find_duplicate_headers(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
377    let Some(header_row) = context.header_row else {
378        return Vec::new();
379    };
380
381    let mut counts: HashMap<String, usize> = HashMap::new();
382    let mut first_locations: HashMap<String, (usize, String)> = HashMap::new();
383    let headers: Vec<_> = (1..=context.sheet.max_cols)
384        .map(|col| {
385            let header = header_value(context.sheet, header_row, col);
386            if !header.is_empty() {
387                *counts.entry(header.clone()).or_insert(0) += 1;
388                first_locations
389                    .entry(header.clone())
390                    .or_insert_with(|| (col, cell_reference((header_row, col))));
391            }
392            header
393        })
394        .collect();
395
396    let mut seen: HashMap<String, usize> = HashMap::new();
397    let mut findings = Vec::new();
398    for (offset, header) in headers.into_iter().enumerate() {
399        if header.is_empty() {
400            continue;
401        }
402
403        let occurrence = seen.entry(header.clone()).or_insert(0);
404        *occurrence += 1;
405        if *occurrence == 1 {
406            continue;
407        }
408
409        let col = offset + 1;
410        let range = cell_reference((header_row, col));
411        let (first_column, first_range) = first_locations
412            .get(&header)
413            .cloned()
414            .unwrap_or_else(|| (col, range.clone()));
415        findings.push(CheckFinding {
416            rule_id: CheckRuleId::DuplicateHeaders,
417            severity: Severity::Warning,
418            sheet: context.sheet.name.clone(),
419            row: Some(header_row),
420            column: Some(col),
421            range: Some(range.clone()),
422            message: format!("Duplicate header '{header}' at {range}."),
423            details: json!({
424                "header": header,
425                "normalized_header": header,
426                "first_column": first_column,
427                "first_range": first_range,
428                "duplicate_count": counts.get(&header).copied().unwrap_or(0),
429            }),
430        });
431    }
432
433    findings
434}
435
436fn find_blank_rows(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
437    if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 {
438        return Vec::new();
439    }
440
441    (1..=context.sheet.max_rows)
442        .filter(|row| !context.facts.row_has_present[*row])
443        .map(|row| {
444            let range = format_range(row, 1, row, context.sheet.max_cols);
445            CheckFinding {
446                rule_id: CheckRuleId::BlankRows,
447                severity: Severity::Warning,
448                sheet: context.sheet.name.clone(),
449                row: Some(row),
450                column: None,
451                range: Some(range),
452                message: format!("Blank row {row} in used range {}.", context.used_range),
453                details: json!({
454                    "used_range": context.used_range,
455                    "max_columns": context.sheet.max_cols,
456                    "reason": "blank_row",
457                }),
458            }
459        })
460        .collect()
461}
462
463fn find_blank_columns(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
464    if context.used_range.is_empty() || context.sheet.max_rows == 0 || context.sheet.max_cols == 0 {
465        return Vec::new();
466    }
467
468    (1..=context.sheet.max_cols)
469        .filter(|col| !context.facts.column_has_present[*col])
470        .map(|col| {
471            let column_label = index_to_col_name(col);
472            let range = format_range(1, col, context.sheet.max_rows, col);
473            CheckFinding {
474                rule_id: CheckRuleId::BlankColumns,
475                severity: Severity::Warning,
476                sheet: context.sheet.name.clone(),
477                row: None,
478                column: Some(col),
479                range: Some(range),
480                message: format!(
481                    "Blank column {column_label} in used range {}.",
482                    context.used_range
483                ),
484                details: json!({
485                    "used_range": context.used_range,
486                    "column_label": column_label,
487                    "max_rows": context.sheet.max_rows,
488                    "reason": "blank_column",
489                }),
490            }
491        })
492        .collect()
493}
494
495fn check_null_ratio(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
496    if context.data_row_count == 0 {
497        return Vec::new();
498    }
499
500    let mut findings = Vec::new();
501    for col in 1..=context.sheet.max_cols {
502        let null_rows = &context.facts.data_column_null_rows[col];
503        if null_rows.is_empty() {
504            continue;
505        }
506
507        let null_count = null_rows.len();
508        let null_ratio = rounded_ratio(null_count, context.data_row_count);
509        let severity = if null_count == context.data_row_count {
510            Severity::Error
511        } else if null_ratio >= 0.5 {
512            Severity::Warning
513        } else {
514            Severity::Info
515        };
516        let column_name = context.column_name(col);
517        let first_null_row = null_rows[0];
518        let first_null_cell = cell_reference((first_null_row, col));
519
520        findings.push(CheckFinding {
521            rule_id: CheckRuleId::NullRatio,
522            severity,
523            sheet: context.sheet.name.clone(),
524            row: Some(first_null_row),
525            column: Some(col),
526            range: context.data_column_range(col),
527            message: format!(
528                "Column '{}' has blank values in {} of {} data rows.",
529                column_name, null_count, context.data_row_count
530            ),
531            details: json!({
532                "column_name": column_name,
533                "data_row_count": context.data_row_count,
534                "first_null_cell": first_null_cell,
535                "null_count": null_count,
536                "null_ratio": null_ratio,
537                "severity_threshold": {
538                    "info": "> 0 and < 0.5",
539                    "warning": ">= 0.5 and < 1.0",
540                    "error": "1.0"
541                }
542            }),
543        });
544    }
545
546    findings
547}
548
549fn check_duplicate_values(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
550    let Some((candidate_col, selection)) = default_duplicate_candidate(context) else {
551        return Vec::new();
552    };
553
554    let mut values: BTreeMap<String, Vec<usize>> = BTreeMap::new();
555    for row in context.data_start_row..=context.sheet.max_rows {
556        if let Some(cell) = cell_at(context.sheet, row, candidate_col) {
557            let value = cell.value.trim();
558            if !value.is_empty() {
559                values.entry(value.to_string()).or_default().push(row);
560            }
561        }
562    }
563
564    let column_name = context.column_name(candidate_col);
565    values
566        .into_iter()
567        .filter(|(_, rows)| rows.len() > 1)
568        .map(|(duplicate_value, rows)| {
569            let cells: Vec<String> = rows
570                .iter()
571                .map(|row| cell_reference((*row, candidate_col)))
572                .collect();
573
574            CheckFinding {
575                rule_id: CheckRuleId::DuplicateValues,
576                severity: Severity::Warning,
577                sheet: context.sheet.name.clone(),
578                row: rows.first().copied(),
579                column: Some(candidate_col),
580                range: context.data_column_range(candidate_col),
581                message: format!(
582                    "Column '{}' has duplicate value '{}' in {} rows.",
583                    column_name,
584                    duplicate_value,
585                    rows.len()
586                ),
587                details: json!({
588                    "candidate_column": {
589                        "column": candidate_col,
590                        "column_name": column_name,
591                        "selection": selection
592                    },
593                    "duplicate_value": duplicate_value,
594                    "occurrence_count": rows.len(),
595                    "rows": rows,
596                    "cells": cells
597                }),
598            }
599        })
600        .collect()
601}
602
603fn default_duplicate_candidate(context: &SheetCheckContext<'_>) -> Option<(usize, &'static str)> {
604    if context.data_row_count == 0 {
605        return None;
606    }
607
608    if let Some(header_row) = context.header_row {
609        for col in 1..=context.sheet.max_cols {
610            let has_header = cell_at(context.sheet, header_row, col)
611                .map(|cell| !cell.value.trim().is_empty())
612                .unwrap_or(false);
613            if has_header && context.facts.data_column_has_present[col] {
614                return Some((col, "first non-empty header data column"));
615            }
616        }
617    }
618
619    (1..=context.sheet.max_cols)
620        .find(|col| context.facts.data_column_has_present[*col])
621        .map(|col| (col, "first data column with values"))
622}
623
624fn check_type_drift(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
625    if context.data_row_count == 0 {
626        return Vec::new();
627    }
628
629    let mut findings = Vec::new();
630    for col in 1..=context.sheet.max_cols {
631        let type_counts = &context.facts.data_column_type_counts[col];
632        let cells_by_type = &context.facts.data_column_cells_by_type[col];
633        if type_counts.len() < 2 {
634            continue;
635        }
636
637        let dominant_type = dominant_type(type_counts);
638        let Some((drift_type, drift_count)) = first_drift_type(type_counts, dominant_type) else {
639            continue;
640        };
641        let Some(first_drift_cell) = cells_by_type
642            .get(drift_type)
643            .and_then(|cells| cells.first())
644            .cloned()
645        else {
646            continue;
647        };
648        let Some((first_drift_row, _)) = crate::utils::parse_cell_reference(&first_drift_cell)
649        else {
650            continue;
651        };
652        let column_name = context.column_name(col);
653        let sample_drift_cells: Vec<String> = cells_by_type
654            .get(drift_type)
655            .into_iter()
656            .flat_map(|cells| cells.iter().take(5).cloned())
657            .collect();
658
659        findings.push(CheckFinding {
660            rule_id: CheckRuleId::TypeDrift,
661            severity: Severity::Warning,
662            sheet: context.sheet.name.clone(),
663            row: Some(first_drift_row),
664            column: Some(col),
665            range: context.data_column_range(col),
666            message: format!(
667                "Column '{}' mixes {} values with dominant {} values.",
668                column_name, drift_type, dominant_type
669            ),
670            details: json!({
671                "column_name": column_name,
672                "dominant_type": dominant_type,
673                "drift_type": drift_type,
674                "drift_count": drift_count,
675                "type_counts": type_counts,
676                "sample_drift_cells": sample_drift_cells
677            }),
678        });
679    }
680
681    findings
682}
683
684fn check_formula_presence(context: &SheetCheckContext<'_>) -> Vec<CheckFinding> {
685    if context.data_row_count == 0 {
686        return Vec::new();
687    }
688
689    if context.facts.formula_cells.is_empty() {
690        return Vec::new();
691    }
692
693    let formula_count = context.facts.formula_cells.len();
694    let formula_ratio = rounded_ratio(formula_count, context.data_row_count);
695    let formulas: Vec<Value> = context
696        .facts
697        .formula_cells
698        .iter()
699        .take(5)
700        .map(|formula| {
701            json!({
702                "cell": formula.cell.clone(),
703                "formula": formula.formula.clone(),
704            })
705        })
706        .collect();
707    let Some((min_row, min_col, max_row, max_col)) = context.facts.formula_bounds else {
708        return Vec::new();
709    };
710
711    vec![CheckFinding {
712        rule_id: CheckRuleId::FormulaPresence,
713        severity: Severity::Info,
714        sheet: context.sheet.name.clone(),
715        row: Some(min_row),
716        column: Some(min_col),
717        range: Some(format_range(min_row, min_col, max_row, max_col)),
718        message: format!(
719            "Sheet '{}' contains {} formula cells.",
720            context.sheet.name, formula_count
721        ),
722        details: json!({
723            "data_row_count": context.data_row_count,
724            "formula_count": formula_count,
725            "formula_ratio": formula_ratio,
726            "sample_formula_cells": formulas
727        }),
728    }]
729}
730
731fn is_blank_cell(cell: Option<&Cell>) -> bool {
732    cell.map(|cell| !cell_has_formula(cell) && cell.value.trim().is_empty())
733        .unwrap_or(true)
734}
735
736fn cell_kind(cell: &Cell) -> Option<&'static str> {
737    if !cell_is_present(Some(cell)) {
738        return None;
739    }
740
741    match cell.cell_type {
742        CellType::Text => Some("string"),
743        CellType::Number => Some("number"),
744        CellType::Date => Some("date"),
745        CellType::Boolean => Some("boolean"),
746        CellType::Empty => None,
747    }
748}
749
750fn dominant_type(type_counts: &BTreeMap<&'static str, usize>) -> &'static str {
751    type_counts
752        .iter()
753        .max_by(|(left_type, left_count), (right_type, right_count)| {
754            left_count
755                .cmp(right_count)
756                .then_with(|| right_type.cmp(left_type))
757        })
758        .map(|(kind, _)| *kind)
759        .unwrap_or("string")
760}
761
762fn first_drift_type(
763    type_counts: &BTreeMap<&'static str, usize>,
764    dominant_type: &'static str,
765) -> Option<(&'static str, usize)> {
766    type_counts
767        .iter()
768        .filter(|(kind, _)| **kind != dominant_type)
769        .min_by(|(left_type, left_count), (right_type, right_count)| {
770            left_count
771                .cmp(right_count)
772                .then_with(|| left_type.cmp(right_type))
773        })
774        .map(|(kind, count)| (*kind, *count))
775}
776
777fn rounded_ratio(numerator: usize, denominator: usize) -> f64 {
778    if denominator == 0 {
779        0.0
780    } else {
781        ((numerator as f64 / denominator as f64) * 10_000.0).round() / 10_000.0
782    }
783}
784
785fn summarize_findings(findings: &[CheckFinding]) -> Value {
786    let error_count = findings
787        .iter()
788        .filter(|finding| finding.severity == Severity::Error)
789        .count();
790    let warning_count = findings
791        .iter()
792        .filter(|finding| finding.severity == Severity::Warning)
793        .count();
794    let info_count = findings
795        .iter()
796        .filter(|finding| finding.severity == Severity::Info)
797        .count();
798    let finding_count = findings.len();
799
800    json!({
801        "status": if finding_count == 0 { "pass" } else { "fail" },
802        "finding_count": finding_count,
803        "error_count": error_count,
804        "warning_count": warning_count,
805        "info_count": info_count,
806    })
807}
808
809fn build_stats(
810    workbook: &Workbook,
811    checked_sheet_indices: &[usize],
812    rules: &[CheckRuleId],
813    severity_threshold: SeverityThreshold,
814    finding_count_before_threshold: usize,
815) -> Result<Value, AppError> {
816    let checked_sheets: Result<Vec<_>, AppError> = checked_sheet_indices
817        .iter()
818        .map(|index| {
819            let sheet =
820                workbook
821                    .get_sheet_by_index(*index)
822                    .ok_or_else(|| AppError::TargetNotFound {
823                        message: format!("Sheet index {} not found", index),
824                    })?;
825            let used_range = workbook
826                .get_used_range(*index)
827                .map_err(crate::cli::error::anyhow_to_app_error)?;
828
829            Ok(json!({
830                "name": sheet.name,
831                "index": index,
832                "used_range": used_range,
833                "max_rows": sheet.max_rows,
834                "max_cols": sheet.max_cols,
835            }))
836        })
837        .collect();
838
839    Ok(json!({
840        "sheet_count": workbook.get_sheet_names().len(),
841        "checked_sheet_count": checked_sheet_indices.len(),
842        "checked_sheets": checked_sheets?,
843        "rules_run": rules.iter().map(CheckRuleId::as_str).collect::<Vec<_>>(),
844        "severity_threshold": severity_threshold.as_str(),
845        "finding_count_before_threshold": finding_count_before_threshold,
846    }))
847}
848
849fn exit_code_for_findings(finding_count: usize) -> i32 {
850    if finding_count == 0 {
851        EXIT_SUCCESS
852    } else {
853        EXIT_CHECK_FINDINGS
854    }
855}
856
857fn sort_findings(findings: &mut [CheckFinding], sheet_names: &[String]) {
858    let sheet_order: HashMap<&str, usize> = sheet_names
859        .iter()
860        .enumerate()
861        .map(|(index, name)| (name.as_str(), index))
862        .collect();
863
864    findings.sort_by(|left, right| {
865        compare_usize(
866            sheet_order.get(left.sheet.as_str()).copied(),
867            sheet_order.get(right.sheet.as_str()).copied(),
868        )
869        .then_with(|| left.rule_id.order().cmp(&right.rule_id.order()))
870        .then_with(|| compare_location(left.row, right.row))
871        .then_with(|| compare_location(left.column, right.column))
872        .then_with(|| left.range.cmp(&right.range))
873        .then_with(|| left.message.cmp(&right.message))
874        .then_with(|| left.details.to_string().cmp(&right.details.to_string()))
875    });
876}
877
878fn compare_location(left: Option<usize>, right: Option<usize>) -> Ordering {
879    match (left, right) {
880        (Some(left), Some(right)) => left.cmp(&right),
881        (Some(_), None) => Ordering::Less,
882        (None, Some(_)) => Ordering::Greater,
883        (None, None) => Ordering::Equal,
884    }
885}
886
887fn compare_usize(left: Option<usize>, right: Option<usize>) -> Ordering {
888    match (left, right) {
889        (Some(left), Some(right)) => left.cmp(&right),
890        (Some(_), None) => Ordering::Less,
891        (None, Some(_)) => Ordering::Greater,
892        (None, None) => Ordering::Equal,
893    }
894}
895
896#[derive(Clone, Debug)]
897pub(crate) struct CheckReport {
898    pub(crate) summary: Value,
899    pub(crate) stats: Value,
900    pub(crate) findings: Vec<CheckFinding>,
901}
902
903#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)]
904#[serde(rename_all = "snake_case")]
905pub(crate) enum CheckRuleId {
906    BlankHeaders,
907    DuplicateHeaders,
908    BlankRows,
909    BlankColumns,
910    NullRatio,
911    DuplicateValues,
912    TypeDrift,
913    FormulaPresence,
914}
915
916impl CheckRuleId {
917    fn parse(value: &str) -> Option<Self> {
918        RULES.iter().copied().find(|rule| rule.as_str() == value)
919    }
920
921    pub(crate) fn as_str(&self) -> &'static str {
922        match self {
923            CheckRuleId::BlankHeaders => "blank_headers",
924            CheckRuleId::DuplicateHeaders => "duplicate_headers",
925            CheckRuleId::BlankRows => "blank_rows",
926            CheckRuleId::BlankColumns => "blank_columns",
927            CheckRuleId::NullRatio => "null_ratio",
928            CheckRuleId::DuplicateValues => "duplicate_values",
929            CheckRuleId::TypeDrift => "type_drift",
930            CheckRuleId::FormulaPresence => "formula_presence",
931        }
932    }
933
934    fn order(&self) -> usize {
935        RULES
936            .iter()
937            .position(|rule| rule == self)
938            .unwrap_or(usize::MAX)
939    }
940}
941
942#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
943#[serde(rename_all = "lowercase")]
944pub(crate) enum Severity {
945    Info,
946    Warning,
947    Error,
948}
949
950impl Severity {
951    fn from_threshold(threshold: SeverityThreshold) -> Self {
952        match threshold {
953            SeverityThreshold::Info => Severity::Info,
954            SeverityThreshold::Warning => Severity::Warning,
955            SeverityThreshold::Error => Severity::Error,
956        }
957    }
958}
959
960#[derive(Clone, Debug, Serialize)]
961pub(crate) struct CheckFinding {
962    pub(crate) rule_id: CheckRuleId,
963    pub(crate) severity: Severity,
964    pub(crate) sheet: String,
965    pub(crate) row: Option<usize>,
966    pub(crate) column: Option<usize>,
967    pub(crate) range: Option<String>,
968    pub(crate) message: String,
969    pub(crate) details: Value,
970}
971
972#[cfg(test)]
973mod tests {
974    use serde_json::json;
975
976    use super::*;
977    use crate::cli::error::{EXIT_CHECK_FINDINGS, EXIT_SUCCESS};
978    use crate::excel::{Cell, Sheet, Workbook};
979
980    #[test]
981    fn exit_code_uses_one_for_successful_reports_with_findings() {
982        assert_eq!(exit_code_for_findings(0), EXIT_SUCCESS);
983        assert_eq!(exit_code_for_findings(2), EXIT_CHECK_FINDINGS);
984    }
985
986    #[test]
987    fn findings_sort_by_sheet_rule_position_then_location() {
988        let mut findings = vec![
989            CheckFinding {
990                rule_id: CheckRuleId::DuplicateHeaders,
991                severity: Severity::Warning,
992                sheet: "Orders".to_string(),
993                row: Some(3),
994                column: Some(2),
995                range: Some("B3".to_string()),
996                message: "later".to_string(),
997                details: json!({"field": "customer"}),
998            },
999            CheckFinding {
1000                rule_id: CheckRuleId::BlankHeaders,
1001                severity: Severity::Warning,
1002                sheet: "Summary".to_string(),
1003                row: None,
1004                column: None,
1005                range: None,
1006                message: "workbook-level".to_string(),
1007                details: json!({}),
1008            },
1009            CheckFinding {
1010                rule_id: CheckRuleId::BlankHeaders,
1011                severity: Severity::Warning,
1012                sheet: "Orders".to_string(),
1013                row: Some(2),
1014                column: Some(1),
1015                range: Some("A2".to_string()),
1016                message: "earlier".to_string(),
1017                details: json!({}),
1018            },
1019        ];
1020
1021        sort_findings(
1022            &mut findings,
1023            &["Summary".to_string(), "Orders".to_string()],
1024        );
1025
1026        assert_eq!(findings[0].sheet, "Summary");
1027        assert_eq!(findings[1].rule_id, CheckRuleId::BlankHeaders);
1028        assert_eq!(findings[1].row, Some(2));
1029        assert_eq!(findings[2].rule_id, CheckRuleId::DuplicateHeaders);
1030    }
1031
1032    fn sheet_with_values(name: &str, values: &[&[&str]]) -> Sheet {
1033        let max_rows = values.len();
1034        let max_cols = values.iter().map(|row| row.len()).max().unwrap_or(0);
1035        let mut data = vec![vec![Cell::empty(); max_cols + 1]; max_rows + 1];
1036
1037        for (row_idx, row) in values.iter().enumerate() {
1038            for (col_idx, value) in row.iter().enumerate() {
1039                data[row_idx + 1][col_idx + 1] = Cell::new((*value).to_string(), false);
1040            }
1041        }
1042
1043        Sheet {
1044            name: name.to_string(),
1045            data,
1046            max_rows,
1047            max_cols,
1048            is_loaded: true,
1049            freeze_panes: crate::excel::FreezePanes::none(),
1050        }
1051    }
1052
1053    #[test]
1054    fn run_check_report_reuses_rule_pipeline_for_structured_findings() {
1055        let mut workbook = Workbook::from_sheets_for_test(vec![sheet_with_values(
1056            "Data",
1057            &[&["Name", "Name"], &["Ada", ""], &["", ""]],
1058        )]);
1059
1060        let report = run_check_report(&mut workbook, None, None, SeverityThreshold::Info).unwrap();
1061
1062        assert_eq!(report.summary["status"], "fail");
1063        assert_eq!(report.stats["checked_sheet_count"], 1);
1064        assert!(!report.findings.is_empty());
1065        assert!(report
1066            .findings
1067            .iter()
1068            .any(|finding| finding.rule_id == CheckRuleId::DuplicateHeaders));
1069    }
1070}
excel_cli/cli/check.rs

excel_cli/cli/
check.rs