spreadsheet_mcp/analysis/
classification.rs

1use crate::model::{RegionKind, SheetClassification, SheetRegion};
2use crate::utils::column_number_to_name;
3use crate::workbook::{SheetMetrics, StyleUsage};
4use std::collections::HashMap;
5
6pub fn classify(
7    non_empty: u32,
8    formulas: u32,
9    rows: u32,
10    columns: u32,
11    comments: u32,
12    _styles: &HashMap<String, StyleUsage>,
13) -> SheetClassification {
14    if non_empty == 0 {
15        return SheetClassification::Empty;
16    }
17    let formula_ratio = if non_empty == 0 {
18        0.0
19    } else {
20        formulas as f32 / non_empty as f32
21    };
22    if formula_ratio > 0.7 {
23        SheetClassification::Calculator
24    } else if formula_ratio > 0.2 {
25        SheetClassification::Mixed
26    } else if rows < 5 || columns < 3 || comments > 10 {
27        SheetClassification::Metadata
28    } else {
29        SheetClassification::Data
30    }
31}
32
33pub fn narrative(metrics: &SheetMetrics) -> String {
34    let formula_ratio = if metrics.non_empty_cells == 0 {
35        0.0
36    } else {
37        metrics.formula_cells as f32 / metrics.non_empty_cells as f32
38    };
39
40    format!(
41        "{} sheet with {} rows, {} columns, {:.0}% formulas, {} style clusters",
42        match metrics.classification {
43            SheetClassification::Data => "Data-centric",
44            SheetClassification::Calculator => "Calculator",
45            SheetClassification::Mixed => "Mixed-use",
46            SheetClassification::Metadata => "Metadata",
47            SheetClassification::Empty => "Empty",
48        },
49        metrics.row_count,
50        metrics.column_count,
51        formula_ratio * 100.0,
52        metrics.style_map.len()
53    )
54}
55
56pub fn regions(metrics: &SheetMetrics) -> Vec<SheetRegion> {
57    if metrics.non_empty_cells == 0 {
58        return vec![];
59    }
60    let mut regions = Vec::new();
61    let end_col = column_number_to_name(metrics.column_count.max(1));
62    let end_cell = format!("{}{}", end_col, metrics.row_count.max(1));
63
64    let kind = match metrics.classification {
65        SheetClassification::Calculator => RegionKind::Calculator,
66        SheetClassification::Metadata => RegionKind::Metadata,
67        _ => RegionKind::Data,
68    };
69
70    regions.push(SheetRegion {
71        kind,
72        address: format!("A1:{}", end_cell),
73        description: format!(
74            "Primary region covering {:.0}% of sheet cells",
75            density(metrics) * 100.0
76        ),
77    });
78    regions
79}
80
81pub fn key_ranges(metrics: &SheetMetrics) -> Vec<String> {
82    if metrics.non_empty_cells == 0 {
83        return vec![];
84    }
85
86    let mut ranges = Vec::new();
87    ranges.push("Header band likely in row 1".to_string());
88    if matches!(metrics.classification, SheetClassification::Calculator) {
89        ranges.push("Check final output cells near bottom rows".to_string());
90    }
91    ranges
92}
93
94fn density(metrics: &SheetMetrics) -> f32 {
95    let total = (metrics.row_count.max(1) * metrics.column_count.max(1)) as f32;
96    if total == 0.0 {
97        0.0
98    } else {
99        metrics.non_empty_cells as f32 / total
100    }
101}