spreadsheet_read_mcp/tools/
mod.rs

1pub mod filters;
2
3use crate::analysis::{formula::FormulaGraph, stats};
4use crate::model::*;
5use crate::state::AppState;
6use crate::workbook::{WorkbookContext, cell_to_value};
7use anyhow::{Result, anyhow};
8use regex::Regex;
9use schemars::JsonSchema;
10use serde::Deserialize;
11use std::cmp::Ordering;
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::sync::Arc;
14
15const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
16const TRACE_PAGE_MIN: usize = 5;
17const TRACE_PAGE_MAX: usize = 200;
18const TRACE_RANGE_THRESHOLD: usize = 4;
19const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
20const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
21const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
22const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
23const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
24const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
25
26pub async fn list_workbooks(
27    state: Arc<AppState>,
28    params: ListWorkbooksParams,
29) -> Result<WorkbookListResponse> {
30    let filter = params.into_filter()?;
31    state.list_workbooks(filter)
32}
33
34pub async fn describe_workbook(
35    state: Arc<AppState>,
36    params: DescribeWorkbookParams,
37) -> Result<WorkbookDescription> {
38    let workbook = state.open_workbook(&params.workbook_id).await?;
39    let desc = workbook.describe();
40    Ok(desc)
41}
42
43#[derive(Debug, Deserialize, JsonSchema)]
44pub struct ListWorkbooksParams {
45    pub slug_prefix: Option<String>,
46    pub folder: Option<String>,
47    pub path_glob: Option<String>,
48}
49
50impl ListWorkbooksParams {
51    fn into_filter(self) -> Result<filters::WorkbookFilter> {
52        filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
53    }
54}
55
56#[derive(Debug, Deserialize, JsonSchema)]
57pub struct DescribeWorkbookParams {
58    pub workbook_id: WorkbookId,
59}
60
61#[derive(Debug, Deserialize, JsonSchema)]
62pub struct ListSheetsParams {
63    pub workbook_id: WorkbookId,
64}
65
66pub async fn list_sheets(
67    state: Arc<AppState>,
68    params: ListSheetsParams,
69) -> Result<SheetListResponse> {
70    let workbook = state.open_workbook(&params.workbook_id).await?;
71    let summaries = workbook.list_summaries()?;
72    let response = SheetListResponse {
73        workbook_id: workbook.id.clone(),
74        workbook_short_id: workbook.short_id.clone(),
75        sheets: summaries,
76    };
77    Ok(response)
78}
79
80#[derive(Debug, Deserialize, JsonSchema)]
81pub struct SheetOverviewParams {
82    pub workbook_id: WorkbookId,
83    pub sheet_name: String,
84}
85
86#[derive(Debug, Deserialize, JsonSchema)]
87pub struct WorkbookSummaryParams {
88    pub workbook_id: WorkbookId,
89}
90
91pub async fn workbook_summary(
92    state: Arc<AppState>,
93    params: WorkbookSummaryParams,
94) -> Result<WorkbookSummaryResponse> {
95    let workbook = state.open_workbook(&params.workbook_id).await?;
96    let sheet_names = workbook.sheet_names();
97
98    let mut total_cells: u64 = 0;
99    let mut total_formulas: u64 = 0;
100    let mut breakdown = WorkbookBreakdown::default();
101    let mut region_counts = RegionCountSummary::default();
102    let mut entry_points: Vec<EntryPoint> = Vec::new();
103    let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
104
105    for sheet_name in &sheet_names {
106        let entry = workbook.get_sheet_metrics(sheet_name)?;
107        total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
108        total_formulas += entry.metrics.formula_cells as u64;
109        match entry.metrics.classification {
110            SheetClassification::Calculator => breakdown.calculator_sheets += 1,
111            SheetClassification::Metadata => breakdown.metadata_sheets += 1,
112            SheetClassification::Empty => {}
113            _ => breakdown.data_sheets += 1,
114        }
115
116        for region in &entry.detected_regions {
117            match region
118                .region_kind
119                .clone()
120                .unwrap_or(region.classification.clone())
121            {
122                RegionKind::Calculator => region_counts.calculator += 1,
123                RegionKind::Metadata => region_counts.metadata += 1,
124                RegionKind::Parameters => region_counts.parameters += 1,
125                RegionKind::Outputs => region_counts.outputs += 1,
126                RegionKind::Data | RegionKind::Table => region_counts.data += 1,
127                _ => region_counts.other += 1,
128            }
129            if region.confidence >= 0.3 {
130                let kind = region
131                    .region_kind
132                    .as_ref()
133                    .unwrap_or(&region.classification);
134                let priority = match kind {
135                    RegionKind::Parameters => 0,
136                    RegionKind::Data | RegionKind::Table => 1,
137                    RegionKind::Outputs => 2,
138                    RegionKind::Calculator => 3,
139                    RegionKind::Metadata => 4,
140                    _ => 5,
141                };
142                entry_points.push(EntryPoint {
143                    sheet_name: sheet_name.clone(),
144                    region_id: Some(region.id),
145                    bounds: Some(region.bounds.clone()),
146                    rationale: format!(
147                        "{:?} region ({} rows, {:.0}% conf, p{})",
148                        kind,
149                        region.row_count,
150                        region.confidence * 100.0,
151                        priority
152                    ),
153                });
154            }
155        }
156
157        if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
158            entry_points.push(EntryPoint {
159                sheet_name: sheet_name.clone(),
160                region_id: None,
161                bounds: None,
162                rationale: "Whole sheet is non-empty; start at top-left".to_string(),
163            });
164        }
165    }
166
167    entry_points.sort_by(|a, b| {
168        let pa = priority_from_rationale(&a.rationale);
169        let pb = priority_from_rationale(&b.rationale);
170        pa.cmp(&pb)
171            .then_with(|| {
172                a.bounds
173                    .as_ref()
174                    .map(|_| 1)
175                    .cmp(&b.bounds.as_ref().map(|_| 1))
176            })
177            .then_with(|| a.sheet_name.cmp(&b.sheet_name))
178    });
179    entry_points.truncate(5);
180
181    let mut seen_ranges = std::collections::HashSet::new();
182    for item in workbook.named_items()? {
183        if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
184            continue;
185        }
186        if !seen_ranges.insert(item.refers_to.clone()) {
187            continue;
188        }
189        key_named_ranges.push(item);
190        if key_named_ranges.len() >= 10 {
191            break;
192        }
193    }
194
195    Ok(WorkbookSummaryResponse {
196        workbook_id: workbook.id.clone(),
197        workbook_short_id: workbook.short_id.clone(),
198        slug: workbook.slug.clone(),
199        sheet_count: sheet_names.len(),
200        total_cells,
201        total_formulas,
202        breakdown,
203        region_counts,
204        key_named_ranges,
205        suggested_entry_points: entry_points,
206    })
207}
208
209fn priority_from_rationale(rationale: &str) -> u32 {
210    if rationale.contains("p0") {
211        0
212    } else if rationale.contains("p1") {
213        1
214    } else if rationale.contains("p2") {
215        2
216    } else if rationale.contains("p3") {
217        3
218    } else if rationale.contains("p4") {
219        4
220    } else {
221        5
222    }
223}
224
225pub async fn sheet_overview(
226    state: Arc<AppState>,
227    params: SheetOverviewParams,
228) -> Result<SheetOverviewResponse> {
229    let workbook = state.open_workbook(&params.workbook_id).await?;
230    let overview = workbook.sheet_overview(&params.sheet_name)?;
231    Ok(overview)
232}
233
234fn default_start_row() -> u32 {
235    1
236}
237
238fn default_page_size() -> u32 {
239    50
240}
241
242fn default_include_formulas() -> bool {
243    true
244}
245
246fn default_include_header() -> bool {
247    true
248}
249
250#[derive(Debug, Deserialize, JsonSchema)]
251pub struct SheetPageParams {
252    pub workbook_id: WorkbookId,
253    pub sheet_name: String,
254    #[serde(default = "default_start_row")]
255    pub start_row: u32,
256    #[serde(default = "default_page_size")]
257    pub page_size: u32,
258    #[serde(default)]
259    pub columns: Option<Vec<String>>,
260    #[serde(default)]
261    pub columns_by_header: Option<Vec<String>>,
262    #[serde(default = "default_include_formulas")]
263    pub include_formulas: bool,
264    #[serde(default)]
265    pub include_styles: bool,
266    #[serde(default = "default_include_header")]
267    pub include_header: bool,
268    #[serde(default)]
269    pub format: Option<SheetPageFormat>,
270}
271
272impl Default for SheetPageParams {
273    fn default() -> Self {
274        SheetPageParams {
275            workbook_id: WorkbookId(String::new()),
276            sheet_name: String::new(),
277            start_row: default_start_row(),
278            page_size: default_page_size(),
279            columns: None,
280            columns_by_header: None,
281            include_formulas: default_include_formulas(),
282            include_styles: false,
283            include_header: default_include_header(),
284            format: None,
285        }
286    }
287}
288
289fn default_find_limit() -> u32 {
290    50
291}
292
293#[derive(Debug, Deserialize, JsonSchema)]
294pub struct FindValueParams {
295    pub workbook_id: WorkbookId,
296    pub query: String,
297    #[serde(default)]
298    pub label: Option<String>,
299    #[serde(default)]
300    pub mode: Option<FindMode>,
301    #[serde(default)]
302    pub match_mode: Option<String>,
303    #[serde(default)]
304    pub case_sensitive: bool,
305    #[serde(default)]
306    pub sheet_name: Option<String>,
307    #[serde(default)]
308    pub region_id: Option<u32>,
309    #[serde(default)]
310    pub table_name: Option<String>,
311    #[serde(default)]
312    pub value_types: Option<Vec<String>>,
313    #[serde(default)]
314    pub search_headers_only: bool,
315    #[serde(default)]
316    pub direction: Option<LabelDirection>,
317    #[serde(default = "default_find_limit")]
318    pub limit: u32,
319}
320
321impl Default for FindValueParams {
322    fn default() -> Self {
323        Self {
324            workbook_id: WorkbookId(String::new()),
325            query: String::new(),
326            label: None,
327            mode: None,
328            match_mode: None,
329            case_sensitive: false,
330            sheet_name: None,
331            region_id: None,
332            table_name: None,
333            value_types: None,
334            search_headers_only: false,
335            direction: None,
336            limit: default_find_limit(),
337        }
338    }
339}
340
341#[derive(Debug, Deserialize, JsonSchema, Default)]
342pub struct ReadTableParams {
343    pub workbook_id: WorkbookId,
344    #[serde(default)]
345    pub sheet_name: Option<String>,
346    #[serde(default)]
347    pub table_name: Option<String>,
348    #[serde(default)]
349    pub region_id: Option<u32>,
350    #[serde(default)]
351    pub range: Option<String>,
352    #[serde(default)]
353    pub header_row: Option<u32>,
354    #[serde(default)]
355    pub header_rows: Option<u32>,
356    #[serde(default)]
357    pub columns: Option<Vec<String>>,
358    #[serde(default)]
359    pub filters: Option<Vec<TableFilter>>,
360    #[serde(default)]
361    pub sample_mode: Option<String>,
362    #[serde(default)]
363    pub limit: Option<u32>,
364    #[serde(default)]
365    pub offset: Option<u32>,
366}
367
368#[derive(Debug, Deserialize, JsonSchema, Clone)]
369pub struct TableFilter {
370    pub column: String,
371    pub op: String,
372    pub value: serde_json::Value,
373}
374
375#[derive(Debug, Deserialize, JsonSchema, Default)]
376pub struct TableProfileParams {
377    pub workbook_id: WorkbookId,
378    #[serde(default)]
379    pub sheet_name: Option<String>,
380    #[serde(default)]
381    pub region_id: Option<u32>,
382    #[serde(default)]
383    pub table_name: Option<String>,
384    #[serde(default)]
385    pub sample_mode: Option<String>,
386    #[serde(default)]
387    pub sample_size: Option<u32>,
388}
389
390#[derive(Debug, Deserialize, JsonSchema)]
391pub struct RangeValuesParams {
392    pub workbook_id: WorkbookId,
393    pub sheet_name: String,
394    pub ranges: Vec<String>,
395    #[serde(default)]
396    pub include_headers: Option<bool>,
397}
398
399pub async fn sheet_page(
400    state: Arc<AppState>,
401    params: SheetPageParams,
402) -> Result<SheetPageResponse> {
403    if params.page_size == 0 {
404        return Err(anyhow!("page_size must be greater than zero"));
405    }
406
407    let workbook = state.open_workbook(&params.workbook_id).await?;
408    let metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
409    let format = params.format.unwrap_or_default();
410
411    let start_row = params.start_row.max(1);
412    let page_size = params.page_size.min(500);
413    let include_formulas = params.include_formulas;
414    let include_styles = params.include_styles;
415    let columns = params.columns.clone();
416    let columns_by_header = params.columns_by_header.clone();
417    let include_header = params.include_header;
418
419    let page = workbook.with_sheet(&params.sheet_name, |sheet| {
420        build_page(
421            sheet,
422            start_row,
423            page_size,
424            columns.clone(),
425            columns_by_header.clone(),
426            include_formulas,
427            include_styles,
428            include_header,
429        )
430    })?;
431
432    let has_more = page.end_row < metrics.metrics.row_count;
433    let next_start_row = if has_more {
434        Some(page.end_row + 1)
435    } else {
436        None
437    };
438
439    let compact_payload = if matches!(format, SheetPageFormat::Compact) {
440        Some(build_compact_payload(
441            &page.header,
442            &page.rows,
443            include_header,
444        ))
445    } else {
446        None
447    };
448
449    let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
450        Some(build_values_only_payload(
451            &page.header,
452            &page.rows,
453            include_header,
454        ))
455    } else {
456        None
457    };
458
459    let response = SheetPageResponse {
460        workbook_id: workbook.id.clone(),
461        workbook_short_id: workbook.short_id.clone(),
462        sheet_name: params.sheet_name,
463        rows: if matches!(format, SheetPageFormat::Full) {
464            page.rows
465        } else {
466            Vec::new()
467        },
468        has_more,
469        next_start_row,
470        header_row: if include_header { page.header } else { None },
471        compact: compact_payload,
472        values_only: values_only_payload,
473        format,
474    };
475    Ok(response)
476}
477
478#[derive(Debug, Deserialize, JsonSchema)]
479pub struct SheetFormulaMapParams {
480    pub workbook_id: WorkbookId,
481    pub sheet_name: String,
482    pub range: Option<String>,
483    #[serde(default)]
484    pub expand: bool,
485}
486
487pub async fn sheet_formula_map(
488    state: Arc<AppState>,
489    params: SheetFormulaMapParams,
490) -> Result<SheetFormulaMapResponse> {
491    let workbook = state.open_workbook(&params.workbook_id).await?;
492    let graph = workbook.formula_graph(&params.sheet_name)?;
493    let mut groups = Vec::new();
494    let mut truncated = false;
495
496    for mut group in graph.groups() {
497        if let Some(range) = &params.range {
498            group.addresses.retain(|addr| address_in_range(addr, range));
499            if group.addresses.is_empty() {
500                continue;
501            }
502        }
503        if !params.expand && group.addresses.len() > 15 {
504            group.addresses.truncate(15);
505            truncated = true;
506        }
507        groups.push(group);
508    }
509
510    let response = SheetFormulaMapResponse {
511        workbook_id: workbook.id.clone(),
512        workbook_short_id: workbook.short_id.clone(),
513        sheet_name: params.sheet_name.clone(),
514        groups,
515        truncated,
516    };
517    Ok(response)
518}
519
520#[derive(Debug, Deserialize, JsonSchema)]
521pub struct FormulaTraceParams {
522    pub workbook_id: WorkbookId,
523    pub sheet_name: String,
524    pub cell_address: String,
525    pub direction: TraceDirection,
526    pub depth: Option<u32>,
527    pub limit: Option<u32>,
528    #[serde(default)]
529    pub page_size: Option<usize>,
530    #[serde(default)]
531    pub cursor: Option<TraceCursor>,
532}
533
534pub async fn formula_trace(
535    state: Arc<AppState>,
536    params: FormulaTraceParams,
537) -> Result<FormulaTraceResponse> {
538    let workbook = state.open_workbook(&params.workbook_id).await?;
539    let graph = workbook.formula_graph(&params.sheet_name)?;
540    let formula_lookup = build_formula_lookup(&graph);
541    let depth = params.depth.unwrap_or(3).clamp(1, 5);
542    let page_size = params
543        .page_size
544        .or_else(|| params.limit.map(|v| v as usize))
545        .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
546        .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
547
548    let origin = params.cell_address.to_uppercase();
549    let config = TraceConfig {
550        direction: &params.direction,
551        origin: &origin,
552        sheet_name: &params.sheet_name,
553        depth_limit: depth,
554        page_size,
555    };
556    let (layers, next_cursor, notes) = build_trace_layers(
557        &workbook,
558        &graph,
559        &formula_lookup,
560        &config,
561        params.cursor.clone(),
562    )?;
563
564    let response = FormulaTraceResponse {
565        workbook_id: workbook.id.clone(),
566        workbook_short_id: workbook.short_id.clone(),
567        sheet_name: params.sheet_name.clone(),
568        origin,
569        direction: params.direction.clone(),
570        layers,
571        next_cursor,
572        notes,
573    };
574    Ok(response)
575}
576
577#[derive(Debug, Deserialize, JsonSchema)]
578pub struct NamedRangesParams {
579    pub workbook_id: WorkbookId,
580    pub sheet_name: Option<String>,
581    pub name_prefix: Option<String>,
582}
583
584pub async fn named_ranges(
585    state: Arc<AppState>,
586    params: NamedRangesParams,
587) -> Result<NamedRangesResponse> {
588    let workbook = state.open_workbook(&params.workbook_id).await?;
589    let mut items = workbook.named_items()?;
590
591    if let Some(sheet_filter) = &params.sheet_name {
592        items.retain(|item| {
593            item.sheet_name
594                .as_ref()
595                .map(|name| name.eq_ignore_ascii_case(sheet_filter))
596                .unwrap_or(false)
597        });
598    }
599    if let Some(prefix) = &params.name_prefix {
600        let prefix_lower = prefix.to_ascii_lowercase();
601        items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
602    }
603
604    let response = NamedRangesResponse {
605        workbook_id: workbook.id.clone(),
606        workbook_short_id: workbook.short_id.clone(),
607        items,
608    };
609    Ok(response)
610}
611
612struct PageBuildResult {
613    rows: Vec<RowSnapshot>,
614    header: Option<RowSnapshot>,
615    end_row: u32,
616}
617
618#[allow(clippy::too_many_arguments)]
619fn build_page(
620    sheet: &umya_spreadsheet::Worksheet,
621    start_row: u32,
622    page_size: u32,
623    columns: Option<Vec<String>>,
624    columns_by_header: Option<Vec<String>>,
625    include_formulas: bool,
626    include_styles: bool,
627    include_header: bool,
628) -> PageBuildResult {
629    let max_col = sheet.get_highest_column();
630    let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
631    let column_indices =
632        resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
633
634    let header = if include_header {
635        Some(build_row_snapshot(
636            sheet,
637            1,
638            &column_indices,
639            include_formulas,
640            include_styles,
641        ))
642    } else {
643        None
644    };
645
646    let mut rows = Vec::new();
647    for row_idx in start_row..=end_row {
648        rows.push(build_row_snapshot(
649            sheet,
650            row_idx,
651            &column_indices,
652            include_formulas,
653            include_styles,
654        ));
655    }
656
657    PageBuildResult {
658        rows,
659        header,
660        end_row,
661    }
662}
663
664fn build_row_snapshot(
665    sheet: &umya_spreadsheet::Worksheet,
666    row_index: u32,
667    columns: &[u32],
668    include_formulas: bool,
669    include_styles: bool,
670) -> RowSnapshot {
671    let mut cells = Vec::new();
672    for &col in columns {
673        if let Some(cell) = sheet.get_cell((col, row_index)) {
674            cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
675        } else {
676            let address = crate::utils::cell_address(col, row_index);
677            cells.push(CellSnapshot {
678                address,
679                value: None,
680                formula: None,
681                cached_value: None,
682                number_format: None,
683                style_tags: Vec::new(),
684                notes: Vec::new(),
685            });
686        }
687    }
688
689    RowSnapshot { row_index, cells }
690}
691
692fn build_cell_snapshot(
693    cell: &umya_spreadsheet::Cell,
694    include_formulas: bool,
695    include_styles: bool,
696) -> CellSnapshot {
697    let address = cell.get_coordinate().get_coordinate();
698    let value = crate::workbook::cell_to_value(cell);
699    let formula = if include_formulas && cell.is_formula() {
700        Some(cell.get_formula().to_string())
701    } else {
702        None
703    };
704    let cached_value = if cell.is_formula() {
705        value.clone()
706    } else {
707        None
708    };
709    let number_format = if include_styles {
710        cell.get_style()
711            .get_number_format()
712            .map(|fmt| fmt.get_format_code().to_string())
713    } else {
714        None
715    };
716    let style_tags = if include_styles {
717        crate::analysis::style::tag_cell(cell)
718            .map(|(_, tagging)| tagging.tags)
719            .unwrap_or_default()
720    } else {
721        Vec::new()
722    };
723
724    CellSnapshot {
725        address,
726        value,
727        formula,
728        cached_value,
729        number_format,
730        style_tags,
731        notes: Vec::new(),
732    }
733}
734
735fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
736    use std::collections::BTreeSet;
737    use umya_spreadsheet::helper::coordinate::column_index_from_string;
738
739    let mut indices = BTreeSet::new();
740    if let Some(specs) = columns {
741        for spec in specs {
742            if let Some((start, end)) = spec.split_once(':') {
743                let start_idx = column_index_from_string(start);
744                let end_idx = column_index_from_string(end);
745                let (min_idx, max_idx) = if start_idx <= end_idx {
746                    (start_idx, end_idx)
747                } else {
748                    (end_idx, start_idx)
749                };
750                for idx in min_idx..=max_idx {
751                    indices.insert(idx);
752                }
753            } else {
754                indices.insert(column_index_from_string(spec));
755            }
756        }
757    } else {
758        for idx in 1..=max_column.max(1) {
759            indices.insert(idx);
760        }
761    }
762
763    indices.into_iter().collect()
764}
765
766fn resolve_columns_with_headers(
767    sheet: &umya_spreadsheet::Worksheet,
768    columns: Option<&Vec<String>>,
769    columns_by_header: Option<&Vec<String>>,
770    max_column: u32,
771) -> Vec<u32> {
772    if columns_by_header.is_none() {
773        return resolve_columns(columns, max_column);
774    }
775
776    let mut selected = Vec::new();
777    let header_targets: Vec<String> = columns_by_header
778        .unwrap()
779        .iter()
780        .map(|h| h.trim().to_ascii_lowercase())
781        .collect();
782
783    for col_idx in 1..=max_column.max(1) {
784        let header_cell = sheet.get_cell((col_idx, 1u32));
785        let header_value = header_cell
786            .and_then(cell_to_value)
787            .map(cell_value_to_string_lower);
788        if let Some(hval) = header_value
789            && header_targets.iter().any(|target| target == &hval)
790        {
791            selected.push(col_idx);
792        }
793    }
794
795    if selected.is_empty() {
796        resolve_columns(columns, max_column)
797    } else {
798        selected
799    }
800}
801
802fn cell_value_to_string_lower(value: CellValue) -> String {
803    match value {
804        CellValue::Text(s) => s.to_ascii_lowercase(),
805        CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
806        CellValue::Bool(b) => b.to_string(),
807        CellValue::Error(e) => e.to_ascii_lowercase(),
808        CellValue::Date(d) => d.to_ascii_lowercase(),
809    }
810}
811
812fn build_compact_payload(
813    header: &Option<RowSnapshot>,
814    rows: &[RowSnapshot],
815    include_header: bool,
816) -> SheetPageCompact {
817    let headers = derive_headers(header, rows);
818    let header_row = if include_header {
819        header
820            .as_ref()
821            .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
822            .unwrap_or_default()
823    } else {
824        Vec::new()
825    };
826    let data_rows = rows
827        .iter()
828        .map(|row| {
829            let mut vals: Vec<Option<CellValue>> = Vec::new();
830            vals.push(Some(CellValue::Number(row.row_index as f64)));
831            vals.extend(row.cells.iter().map(|c| c.value.clone()));
832            vals
833        })
834        .collect();
835
836    SheetPageCompact {
837        headers,
838        header_row,
839        rows: data_rows,
840    }
841}
842
843fn build_values_only_payload(
844    header: &Option<RowSnapshot>,
845    rows: &[RowSnapshot],
846    include_header: bool,
847) -> SheetPageValues {
848    let mut data = Vec::new();
849    if include_header && let Some(h) = header {
850        data.push(h.cells.iter().map(|c| c.value.clone()).collect());
851    }
852    for row in rows {
853        data.push(row.cells.iter().map(|c| c.value.clone()).collect());
854    }
855
856    SheetPageValues { rows: data }
857}
858
859fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
860    if let Some(h) = header {
861        let mut headers: Vec<String> = h
862            .cells
863            .iter()
864            .map(|c| match &c.value {
865                Some(CellValue::Text(t)) => t.clone(),
866                Some(CellValue::Number(n)) => n.to_string(),
867                Some(CellValue::Bool(b)) => b.to_string(),
868                Some(CellValue::Date(d)) => d.clone(),
869                Some(CellValue::Error(e)) => e.clone(),
870                None => c.address.clone(),
871            })
872            .collect();
873        headers.insert(0, "Row".to_string());
874        headers
875    } else if let Some(first) = rows.first() {
876        let mut headers = Vec::new();
877        headers.push("Row".to_string());
878        for cell in &first.cells {
879            headers.push(cell.address.clone());
880        }
881        headers
882    } else {
883        vec![]
884    }
885}
886fn default_stats_sample() -> usize {
887    500
888}
889
890#[derive(Debug, Deserialize, JsonSchema)]
891pub struct SheetStatisticsParams {
892    pub workbook_id: WorkbookId,
893    pub sheet_name: String,
894    #[serde(default)]
895    pub sample_rows: Option<usize>,
896}
897
898pub async fn sheet_statistics(
899    state: Arc<AppState>,
900    params: SheetStatisticsParams,
901) -> Result<SheetStatisticsResponse> {
902    let workbook = state.open_workbook(&params.workbook_id).await?;
903    let sheet_metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
904    let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
905    let stats = workbook.with_sheet(&params.sheet_name, |sheet| {
906        stats::compute_sheet_statistics(sheet, sample_rows)
907    })?;
908    let response = SheetStatisticsResponse {
909        workbook_id: workbook.id.clone(),
910        workbook_short_id: workbook.short_id.clone(),
911        sheet_name: params.sheet_name,
912        row_count: sheet_metrics.metrics.row_count,
913        column_count: sheet_metrics.metrics.column_count,
914        density: stats.density,
915        numeric_columns: stats.numeric_columns,
916        text_columns: stats.text_columns,
917        null_counts: stats.null_counts,
918        duplicate_warnings: stats.duplicate_warnings,
919    };
920    Ok(response)
921}
922
923fn address_in_range(address: &str, range: &str) -> bool {
924    parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
925        if let Some((col, row)) = parse_address(address) {
926            col >= start_col && col <= end_col && row >= start_row && row <= end_row
927        } else {
928            false
929        }
930    })
931}
932
933fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
934    let mut parts = range.split(':');
935    let start = parts.next()?;
936    let end = parts.next().unwrap_or(start);
937    let start_idx = parse_address(start)?;
938    let end_idx = parse_address(end)?;
939    Some((
940        (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
941        (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
942    ))
943}
944
945fn parse_address(address: &str) -> Option<(u32, u32)> {
946    use umya_spreadsheet::helper::coordinate::index_from_coordinate;
947    let (col, row, _, _) = index_from_coordinate(address);
948    match (col, row) {
949        (Some(c), Some(r)) => Some((c, r)),
950        _ => None,
951    }
952}
953
954#[derive(Clone)]
955struct TableTarget {
956    sheet_name: String,
957    table_name: Option<String>,
958    range: ((u32, u32), (u32, u32)),
959    header_hint: Option<u32>,
960}
961
962fn resolve_table_target(
963    workbook: &WorkbookContext,
964    params: &ReadTableParams,
965) -> Result<TableTarget> {
966    if let Some(region_id) = params.region_id
967        && let Some(sheet) = &params.sheet_name
968        && let Ok(region) = workbook.detected_region(sheet, region_id)
969    {
970        return Ok(TableTarget {
971            sheet_name: sheet.clone(),
972            table_name: None,
973            range: parse_range(&region.bounds).unwrap_or(((1, 1), (1, 1))),
974            header_hint: region.header_row,
975        });
976    }
977
978    if let Some(table_name) = &params.table_name {
979        let items = workbook.named_items()?;
980        for item in items {
981            if item.name.eq_ignore_ascii_case(table_name)
982                || item
983                    .name
984                    .to_ascii_lowercase()
985                    .contains(&table_name.to_ascii_lowercase())
986            {
987                let mut sheet_name = item
988                    .sheet_name
989                    .clone()
990                    .or_else(|| params.sheet_name.clone())
991                    .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
992                let refers_to = item.refers_to.trim_start_matches('=');
993                let mut range_part = refers_to;
994                if let Some((sheet_part, rest)) = refers_to.split_once('!') {
995                    sheet_name = sheet_part.trim_matches('\'').to_string();
996                    range_part = rest;
997                }
998                if let Some(range) = parse_range(range_part) {
999                    return Ok(TableTarget {
1000                        sheet_name,
1001                        table_name: Some(item.name.clone()),
1002                        range,
1003                        header_hint: if item.kind == NamedItemKind::Table {
1004                            Some(range.0.1)
1005                        } else {
1006                            None
1007                        },
1008                    });
1009                }
1010            }
1011        }
1012    }
1013
1014    let sheet_name = params
1015        .sheet_name
1016        .clone()
1017        .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1018
1019    if let Some(rng) = &params.range
1020        && let Some(range) = parse_range(rng)
1021    {
1022        return Ok(TableTarget {
1023            sheet_name,
1024            table_name: None,
1025            range,
1026            header_hint: None,
1027        });
1028    }
1029
1030    let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1031    let end_col = metrics.metrics.column_count.max(1);
1032    let end_row = metrics.metrics.row_count.max(1);
1033    Ok(TableTarget {
1034        sheet_name,
1035        table_name: None,
1036        range: ((1, 1), (end_col, end_row)),
1037        header_hint: None,
1038    })
1039}
1040
1041#[allow(clippy::too_many_arguments)]
1042fn extract_table_rows(
1043    sheet: &umya_spreadsheet::Worksheet,
1044    target: &TableTarget,
1045    header_row: Option<u32>,
1046    header_rows: Option<u32>,
1047    columns: Option<Vec<String>>,
1048    filters: Option<Vec<TableFilter>>,
1049    limit: usize,
1050    offset: usize,
1051    sample_mode: &str,
1052) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1053    let ((start_col, start_row), (end_col, end_row)) = target.range;
1054    let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1055    if header_start < start_row {
1056        header_start = start_row;
1057    }
1058    if header_start > end_row {
1059        header_start = start_row;
1060    }
1061    let header_rows_count = header_rows.unwrap_or(1).max(1);
1062    let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1063    let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1064        resolve_columns(Some(cols), end_col).into_iter().collect()
1065    } else {
1066        (start_col..=end_col).collect()
1067    };
1068
1069    let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1070    let mut all_rows: Vec<TableRow> = Vec::new();
1071    let mut total_rows: u32 = 0;
1072
1073    for row_idx in data_start_row..=end_row {
1074        let mut row = BTreeMap::new();
1075        for (i, col_idx) in column_indices.iter().enumerate() {
1076            let header = headers
1077                .get(i)
1078                .cloned()
1079                .unwrap_or_else(|| format!("Col{col_idx}"));
1080            let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1081            row.insert(header, value);
1082        }
1083        if !row_passes_filters(&row, filters.as_ref()) {
1084            continue;
1085        }
1086        total_rows += 1;
1087        if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1088            continue;
1089        }
1090        all_rows.push(row);
1091    }
1092
1093    let rows = sample_rows(all_rows, limit, offset, sample_mode);
1094
1095    Ok((headers, rows, total_rows))
1096}
1097
1098fn build_headers(
1099    sheet: &umya_spreadsheet::Worksheet,
1100    columns: &[u32],
1101    header_start: u32,
1102    header_rows: u32,
1103) -> Vec<String> {
1104    let mut headers = Vec::new();
1105    for col_idx in columns {
1106        let mut parts = Vec::new();
1107        for h in header_start..(header_start + header_rows) {
1108            let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1109            if let Some(value) = sheet
1110                .get_cell((origin_col, origin_row))
1111                .and_then(cell_to_value)
1112            {
1113                match value {
1114                    CellValue::Text(ref s) if s.trim().is_empty() => {}
1115                    CellValue::Text(s) => parts.push(s),
1116                    CellValue::Number(n) => parts.push(n.to_string()),
1117                    CellValue::Bool(b) => parts.push(b.to_string()),
1118                    CellValue::Error(e) => parts.push(e),
1119                    CellValue::Date(d) => parts.push(d),
1120                }
1121            }
1122        }
1123        if parts.is_empty() {
1124            headers.push(crate::utils::column_number_to_name(*col_idx));
1125        } else {
1126            headers.push(parts.join(" / "));
1127        }
1128    }
1129
1130    if headers.iter().all(|h| h.trim().is_empty()) {
1131        return columns
1132            .iter()
1133            .map(|c| crate::utils::column_number_to_name(*c))
1134            .collect();
1135    }
1136
1137    dedupe_headers(headers)
1138}
1139
1140fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1141    let mut seen: HashMap<String, u32> = HashMap::new();
1142    for h in headers.iter_mut() {
1143        let key = h.clone();
1144        if key.trim().is_empty() {
1145            continue;
1146        }
1147        let count = seen.entry(key.clone()).or_insert(0);
1148        if *count > 0 {
1149            h.push_str(&format!("_{}", *count + 1));
1150        }
1151        *count += 1;
1152    }
1153    headers
1154}
1155
1156fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1157    if let Some(filters) = filters {
1158        for filter in filters {
1159            if let Some(value) = row.get(&filter.column) {
1160                match filter.op.as_str() {
1161                    "eq" => {
1162                        if !value_eq(value, &filter.value) {
1163                            return false;
1164                        }
1165                    }
1166                    "neq" => {
1167                        if value_eq(value, &filter.value) {
1168                            return false;
1169                        }
1170                    }
1171                    "contains" => {
1172                        if !value_contains(value, &filter.value) {
1173                            return false;
1174                        }
1175                    }
1176                    "gt" => {
1177                        if !value_gt(value, &filter.value) {
1178                            return false;
1179                        }
1180                    }
1181                    "lt" => {
1182                        if !value_lt(value, &filter.value) {
1183                            return false;
1184                        }
1185                    }
1186                    "in" => {
1187                        let list = filter
1188                            .value
1189                            .as_array()
1190                            .cloned()
1191                            .unwrap_or_else(|| vec![filter.value.clone()]);
1192                        if !list.iter().any(|cmp| value_eq(value, cmp)) {
1193                            return false;
1194                        }
1195                    }
1196                    _ => {}
1197                }
1198            }
1199        }
1200    }
1201    true
1202}
1203
1204fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1205    match (cell, cmp) {
1206        (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1207        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1208            v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1209        }
1210        (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1211            .parse::<f64>()
1212            .map(|f| (*n - f).abs() < f64::EPSILON)
1213            .unwrap_or(false),
1214        (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1215        (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1216            t.eq_ignore_ascii_case("true") == *b
1217        }
1218        (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1219        (None, serde_json::Value::Null) => true,
1220        _ => false,
1221    }
1222}
1223
1224fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1225    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1226        return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1227    }
1228    false
1229}
1230
1231fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1232    match (cell, cmp) {
1233        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1234            v.as_f64().is_some_and(|f| *n > f)
1235        }
1236        _ => false,
1237    }
1238}
1239
1240fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1241    match (cell, cmp) {
1242        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1243            v.as_f64().is_some_and(|f| *n < f)
1244        }
1245        _ => false,
1246    }
1247}
1248
1249fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1250    if rows.is_empty() {
1251        return rows;
1252    }
1253
1254    match mode {
1255        "distributed" => {
1256            if limit == 0 {
1257                return Vec::new();
1258            }
1259            let mut indices = Vec::new();
1260            let span = rows.len().saturating_sub(1);
1261            let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1262            let mut idx = offset;
1263            while idx < rows.len() && indices.len() < limit {
1264                indices.push(idx);
1265                idx = idx.saturating_add(step);
1266                if idx == indices.last().copied().unwrap_or(0) {
1267                    idx += 1;
1268                }
1269            }
1270            if indices.len() < limit {
1271                let last_idx = rows.len().saturating_sub(1);
1272                if !indices.contains(&last_idx) {
1273                    indices.push(last_idx);
1274                }
1275            }
1276            indices
1277                .into_iter()
1278                .filter_map(|i| rows.get(i).cloned())
1279                .collect()
1280        }
1281        "last" => {
1282            let start = rows.len().saturating_sub(limit + offset);
1283            rows.into_iter().skip(start + offset).take(limit).collect()
1284        }
1285        _ => rows.into_iter().skip(offset).take(limit).collect(),
1286    }
1287}
1288
1289fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1290    let mut summaries = Vec::new();
1291    for header in headers {
1292        let mut nulls = 0u32;
1293        let mut distinct_set: HashSet<String> = HashSet::new();
1294        let mut values: Vec<f64> = Vec::new();
1295        let mut top_counts: HashMap<String, u32> = HashMap::new();
1296
1297        for row in rows {
1298            match row.get(header) {
1299                Some(Some(CellValue::Number(n))) => {
1300                    values.push(*n);
1301                    let key = n.to_string();
1302                    *top_counts.entry(key).or_default() += 1;
1303                }
1304                Some(Some(CellValue::Text(s))) => {
1305                    distinct_set.insert(s.clone());
1306                    *top_counts.entry(s.clone()).or_default() += 1;
1307                }
1308                Some(Some(CellValue::Bool(b))) => {
1309                    let key = b.to_string();
1310                    distinct_set.insert(key.clone());
1311                    *top_counts.entry(key).or_default() += 1;
1312                }
1313                Some(Some(CellValue::Date(d))) => {
1314                    distinct_set.insert(d.clone());
1315                    *top_counts.entry(d.clone()).or_default() += 1;
1316                }
1317                Some(Some(CellValue::Error(e))) => {
1318                    distinct_set.insert(e.clone());
1319                    *top_counts.entry(e.clone()).or_default() += 1;
1320                }
1321                _ => {
1322                    nulls += 1;
1323                }
1324            }
1325        }
1326
1327        let inferred_type = if !values.is_empty() {
1328            "number"
1329        } else if !distinct_set.is_empty() {
1330            "text"
1331        } else {
1332            "unknown"
1333        }
1334        .to_string();
1335
1336        let min = values.iter().cloned().reduce(f64::min);
1337        let max = values.iter().cloned().reduce(f64::max);
1338        let mean = if values.is_empty() {
1339            None
1340        } else {
1341            Some(values.iter().sum::<f64>() / values.len() as f64)
1342        };
1343
1344        let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1345        top_values.sort_by(|a, b| b.1.cmp(&a.1));
1346        let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1347
1348        summaries.push(ColumnTypeSummary {
1349            name: header.clone(),
1350            inferred_type,
1351            nulls,
1352            distinct: distinct_set.len() as u32,
1353            top_values,
1354            min,
1355            max,
1356            mean,
1357        });
1358    }
1359    summaries
1360}
1361
1362#[allow(clippy::too_many_arguments)]
1363fn collect_value_matches(
1364    sheet: &umya_spreadsheet::Worksheet,
1365    sheet_name: &str,
1366    mode: &FindMode,
1367    match_mode: &str,
1368    direction: &LabelDirection,
1369    params: &FindValueParams,
1370    region: Option<&DetectedRegion>,
1371    default_bounds: ((u32, u32), (u32, u32)),
1372) -> Result<Vec<FindValueMatch>> {
1373    let mut results = Vec::new();
1374    let regex = if match_mode == "regex" {
1375        Regex::new(&params.query).ok()
1376    } else {
1377        None
1378    };
1379    let bounds = region
1380        .as_ref()
1381        .and_then(|r| parse_range(&r.bounds))
1382        .unwrap_or(default_bounds);
1383
1384    let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1385
1386    for cell in sheet.get_cell_collection() {
1387        let coord = cell.get_coordinate();
1388        let col = *coord.get_col_num();
1389        let row = *coord.get_row_num();
1390        if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1391            continue;
1392        }
1393        if params.search_headers_only && row != header_row {
1394            continue;
1395        }
1396
1397        let value = cell_to_value(cell);
1398        if let Some(ref allowed) = params.value_types
1399            && !value_type_matches(&value, allowed)
1400        {
1401            continue;
1402        }
1403        if matches!(mode, FindMode::Value) {
1404            if !value_matches(
1405                &value,
1406                &params.query,
1407                match_mode,
1408                params.case_sensitive,
1409                &regex,
1410            ) {
1411                continue;
1412            }
1413        } else if let Some(label) = &params.label {
1414            if !label_matches(cell, label, match_mode, params.case_sensitive, &regex) {
1415                continue;
1416            }
1417        } else {
1418            continue;
1419        }
1420
1421        let neighbors = collect_neighbors(sheet, row, col);
1422        let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1423            let target_value = match direction {
1424                LabelDirection::Right => sheet.get_cell((col + 1, row)),
1425                LabelDirection::Below => sheet.get_cell((col, row + 1)),
1426                LabelDirection::Any => sheet
1427                    .get_cell((col + 1, row))
1428                    .or_else(|| sheet.get_cell((col, row + 1))),
1429            }
1430            .and_then(cell_to_value);
1431            if target_value.is_none() {
1432                continue;
1433            }
1434            (
1435                Some(LabelHit {
1436                    label_address: coord.get_coordinate(),
1437                    label: label_from_cell(cell),
1438                }),
1439                target_value,
1440            )
1441        } else {
1442            (None, value.clone())
1443        };
1444
1445        let row_context = build_row_context(sheet, row, col);
1446
1447        results.push(FindValueMatch {
1448            address: coord.get_coordinate(),
1449            sheet_name: sheet_name.to_string(),
1450            value: match_value,
1451            row_context,
1452            neighbors,
1453            label_hit,
1454        });
1455    }
1456
1457    Ok(results)
1458}
1459
1460fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1461    cell_to_value(cell)
1462        .map(|v| match v {
1463            CellValue::Text(s) => s,
1464            CellValue::Number(n) => n.to_string(),
1465            CellValue::Bool(b) => b.to_string(),
1466            CellValue::Date(d) => d,
1467            CellValue::Error(e) => e,
1468        })
1469        .unwrap_or_else(|| cell.get_value().to_string())
1470}
1471
1472fn value_matches(
1473    value: &Option<CellValue>,
1474    query: &str,
1475    mode: &str,
1476    case_sensitive: bool,
1477    regex: &Option<Regex>,
1478) -> bool {
1479    if value.is_none() {
1480        return false;
1481    }
1482    let haystack = cell_value_to_string_lower(value.clone().unwrap());
1483    let needle = if case_sensitive {
1484        query.to_string()
1485    } else {
1486        query.to_ascii_lowercase()
1487    };
1488
1489    match mode {
1490        "exact" => haystack == needle,
1491        "regex" => regex
1492            .as_ref()
1493            .map(|re| re.is_match(&haystack))
1494            .unwrap_or(false),
1495        _ => haystack.contains(&needle),
1496    }
1497}
1498
1499fn label_matches(
1500    cell: &umya_spreadsheet::Cell,
1501    label: &str,
1502    mode: &str,
1503    case_sensitive: bool,
1504    regex: &Option<Regex>,
1505) -> bool {
1506    let value = cell_to_value(cell);
1507    if value.is_none() {
1508        return false;
1509    }
1510    let haystack = cell_value_to_string_lower(value.unwrap());
1511    let needle = if case_sensitive {
1512        label.to_string()
1513    } else {
1514        label.to_ascii_lowercase()
1515    };
1516    match mode {
1517        "exact" => haystack == needle,
1518        "regex" => regex
1519            .as_ref()
1520            .map(|re| re.is_match(&haystack))
1521            .unwrap_or(false),
1522        _ => haystack.contains(&needle),
1523    }
1524}
1525
1526fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1527    if value.is_none() {
1528        return allowed.iter().any(|v| v == "null");
1529    }
1530    match value.as_ref().unwrap() {
1531        CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1532        CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1533        CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1534        CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1535        CellValue::Error(_) => true,
1536    }
1537}
1538
1539fn collect_neighbors(
1540    sheet: &umya_spreadsheet::Worksheet,
1541    row: u32,
1542    col: u32,
1543) -> Option<NeighborValues> {
1544    Some(NeighborValues {
1545        left: if col > 1 {
1546            sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1547        } else {
1548            None
1549        },
1550        right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1551        up: if row > 1 {
1552            sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1553        } else {
1554            None
1555        },
1556        down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1557    })
1558}
1559
1560fn build_row_context(
1561    sheet: &umya_spreadsheet::Worksheet,
1562    row: u32,
1563    col: u32,
1564) -> Option<RowContext> {
1565    let header_value = sheet
1566        .get_cell((col, 1u32))
1567        .and_then(cell_to_value)
1568        .map(|v| match v {
1569            CellValue::Text(s) => s,
1570            CellValue::Number(n) => n.to_string(),
1571            CellValue::Bool(b) => b.to_string(),
1572            CellValue::Date(d) => d,
1573            CellValue::Error(e) => e,
1574        })
1575        .unwrap_or_else(|| format!("Col{}", col));
1576    let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1577    Some(RowContext {
1578        headers: vec![header_value],
1579        values: vec![value],
1580    })
1581}
1582
1583#[derive(Debug, Deserialize, JsonSchema)]
1584pub struct FindFormulaParams {
1585    pub workbook_id: WorkbookId,
1586    pub query: String,
1587    pub sheet_name: Option<String>,
1588    #[serde(default)]
1589    pub case_sensitive: bool,
1590}
1591
1592pub async fn find_formula(
1593    state: Arc<AppState>,
1594    params: FindFormulaParams,
1595) -> Result<FindFormulaResponse> {
1596    let workbook = state.open_workbook(&params.workbook_id).await?;
1597    let query = if params.case_sensitive {
1598        params.query.clone()
1599    } else {
1600        params.query.to_ascii_lowercase()
1601    };
1602    let mut matches = Vec::new();
1603
1604    let sheet_names: Vec<String> = if let Some(sheet) = &params.sheet_name {
1605        vec![sheet.clone()]
1606    } else {
1607        workbook.sheet_names()
1608    };
1609
1610    for sheet_name in sheet_names {
1611        let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1612            collect_formula_matches(sheet, &sheet_name, &query, params.case_sensitive)
1613        })?;
1614        matches.extend(sheet_matches);
1615    }
1616
1617    let response = FindFormulaResponse {
1618        workbook_id: workbook.id.clone(),
1619        workbook_short_id: workbook.short_id.clone(),
1620        matches,
1621    };
1622    Ok(response)
1623}
1624
1625#[derive(Debug, Deserialize, JsonSchema)]
1626pub struct ScanVolatilesParams {
1627    pub workbook_id: WorkbookId,
1628    pub sheet_name: Option<String>,
1629}
1630
1631pub async fn scan_volatiles(
1632    state: Arc<AppState>,
1633    params: ScanVolatilesParams,
1634) -> Result<VolatileScanResponse> {
1635    let workbook = state.open_workbook(&params.workbook_id).await?;
1636    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
1637        vec![sheet.clone()]
1638    } else {
1639        workbook.sheet_names()
1640    };
1641
1642    let mut items = Vec::new();
1643    let mut truncated = false;
1644
1645    for sheet_name in target_sheets {
1646        let graph = workbook.formula_graph(&sheet_name)?;
1647        for group in graph.groups() {
1648            if !group.is_volatile {
1649                continue;
1650            }
1651            for address in group.addresses.iter().take(50) {
1652                items.push(VolatileScanEntry {
1653                    address: address.clone(),
1654                    sheet_name: sheet_name.clone(),
1655                    function: "volatile".to_string(),
1656                    note: Some(group.formula.clone()),
1657                });
1658            }
1659            if group.addresses.len() > 50 {
1660                truncated = true;
1661            }
1662        }
1663    }
1664
1665    let response = VolatileScanResponse {
1666        workbook_id: workbook.id.clone(),
1667        workbook_short_id: workbook.short_id.clone(),
1668        items,
1669        truncated,
1670    };
1671    Ok(response)
1672}
1673
1674#[derive(Debug, Deserialize, JsonSchema)]
1675pub struct SheetStylesParams {
1676    pub workbook_id: WorkbookId,
1677    pub sheet_name: String,
1678}
1679
1680pub async fn sheet_styles(
1681    state: Arc<AppState>,
1682    params: SheetStylesParams,
1683) -> Result<SheetStylesResponse> {
1684    let workbook = state.open_workbook(&params.workbook_id).await?;
1685    let entry = workbook.get_sheet_metrics(&params.sheet_name)?;
1686
1687    let styles = entry
1688        .metrics
1689        .style_map
1690        .iter()
1691        .map(|(style_id, usage)| StyleSummary {
1692            style_id: style_id.clone(),
1693            occurrences: usage.occurrences,
1694            tags: usage.tags.clone(),
1695            example_cells: usage.example_cells.clone(),
1696        })
1697        .collect();
1698
1699    let response = SheetStylesResponse {
1700        workbook_id: workbook.id.clone(),
1701        workbook_short_id: workbook.short_id.clone(),
1702        sheet_name: params.sheet_name.clone(),
1703        styles,
1704        conditional_rules: Vec::new(),
1705    };
1706    Ok(response)
1707}
1708
1709pub async fn range_values(
1710    state: Arc<AppState>,
1711    params: RangeValuesParams,
1712) -> Result<RangeValuesResponse> {
1713    let workbook = state.open_workbook(&params.workbook_id).await?;
1714    let include_headers = params.include_headers.unwrap_or(false);
1715    let values = workbook.with_sheet(&params.sheet_name, |sheet| {
1716        params
1717            .ranges
1718            .iter()
1719            .filter_map(|range| {
1720                parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
1721                    let mut rows = Vec::new();
1722                    for r in start_row..=end_row {
1723                        let mut row_vals = Vec::new();
1724                        for c in start_col..=end_col {
1725                            if include_headers && r == start_row && start_row == 1 {
1726                                row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
1727                            } else {
1728                                row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
1729                            }
1730                        }
1731                        rows.push(row_vals);
1732                    }
1733                    RangeValuesEntry {
1734                        range: range.clone(),
1735                        rows,
1736                    }
1737                })
1738            })
1739            .collect()
1740    })?;
1741
1742    Ok(RangeValuesResponse {
1743        workbook_id: workbook.id.clone(),
1744        workbook_short_id: workbook.short_id.clone(),
1745        sheet_name: params.sheet_name,
1746        values,
1747    })
1748}
1749
1750pub async fn find_value(
1751    state: Arc<AppState>,
1752    params: FindValueParams,
1753) -> Result<FindValueResponse> {
1754    let workbook = state.open_workbook(&params.workbook_id).await?;
1755    let mut matches = Vec::new();
1756    let mut truncated = false;
1757    let mode = params.mode.clone().unwrap_or_else(|| {
1758        if params.label.is_some() {
1759            FindMode::Label
1760        } else {
1761            FindMode::Value
1762        }
1763    });
1764    let match_mode = params
1765        .match_mode
1766        .as_deref()
1767        .unwrap_or("contains")
1768        .to_ascii_lowercase();
1769    let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
1770
1771    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
1772        vec![sheet.clone()]
1773    } else {
1774        workbook.sheet_names()
1775    };
1776
1777    for sheet_name in target_sheets {
1778        let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
1779        let default_bounds = (
1780            (1, 1),
1781            (
1782                metrics_entry.metrics.column_count.max(1),
1783                metrics_entry.metrics.row_count.max(1),
1784            ),
1785        );
1786        let region_bounds = params
1787            .region_id
1788            .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
1789        let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1790            collect_value_matches(
1791                sheet,
1792                &sheet_name,
1793                &mode,
1794                &match_mode,
1795                &direction,
1796                &params,
1797                region_bounds.as_ref(),
1798                default_bounds,
1799            )
1800        })??;
1801        matches.extend(sheet_matches);
1802        if matches.len() as u32 >= params.limit {
1803            truncated = true;
1804            matches.truncate(params.limit as usize);
1805            break;
1806        }
1807    }
1808
1809    Ok(FindValueResponse {
1810        workbook_id: workbook.id.clone(),
1811        workbook_short_id: workbook.short_id.clone(),
1812        matches,
1813        truncated,
1814    })
1815}
1816
1817pub async fn read_table(
1818    state: Arc<AppState>,
1819    params: ReadTableParams,
1820) -> Result<ReadTableResponse> {
1821    let workbook = state.open_workbook(&params.workbook_id).await?;
1822    let resolved = resolve_table_target(&workbook, &params)?;
1823    let limit = params.limit.unwrap_or(100) as usize;
1824    let offset = params.offset.unwrap_or(0) as usize;
1825    let sample_mode = params
1826        .sample_mode
1827        .clone()
1828        .unwrap_or_else(|| "first".to_string());
1829
1830    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1831        extract_table_rows(
1832            sheet,
1833            &resolved,
1834            params.header_row,
1835            params.header_rows,
1836            params.columns.clone(),
1837            params.filters.clone(),
1838            limit,
1839            offset,
1840            &sample_mode,
1841        )
1842    })??;
1843
1844    let has_more = offset + rows.len() < total_rows as usize;
1845
1846    Ok(ReadTableResponse {
1847        workbook_id: workbook.id.clone(),
1848        workbook_short_id: workbook.short_id.clone(),
1849        sheet_name: resolved.sheet_name,
1850        table_name: resolved.table_name,
1851        headers,
1852        rows,
1853        total_rows,
1854        has_more,
1855    })
1856}
1857
1858pub async fn table_profile(
1859    state: Arc<AppState>,
1860    params: TableProfileParams,
1861) -> Result<TableProfileResponse> {
1862    let workbook = state.open_workbook(&params.workbook_id).await?;
1863    let resolved = resolve_table_target(
1864        &workbook,
1865        &ReadTableParams {
1866            workbook_id: params.workbook_id.clone(),
1867            sheet_name: params.sheet_name.clone(),
1868            table_name: params.table_name.clone(),
1869            region_id: params.region_id,
1870            range: None,
1871            header_row: None,
1872            header_rows: None,
1873            columns: None,
1874            filters: None,
1875            sample_mode: params.sample_mode.clone(),
1876            limit: params.sample_size,
1877            offset: Some(0),
1878        },
1879    )?;
1880
1881    let sample_size = params.sample_size.unwrap_or(10) as usize;
1882    let sample_mode = params
1883        .sample_mode
1884        .clone()
1885        .unwrap_or_else(|| "distributed".to_string());
1886
1887    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1888        extract_table_rows(
1889            sheet,
1890            &resolved,
1891            None,
1892            None,
1893            None,
1894            None,
1895            sample_size,
1896            0,
1897            &sample_mode,
1898        )
1899    })??;
1900
1901    let column_types = summarize_columns(&headers, &rows);
1902
1903    Ok(TableProfileResponse {
1904        workbook_id: workbook.id.clone(),
1905        workbook_short_id: workbook.short_id.clone(),
1906        sheet_name: resolved.sheet_name,
1907        table_name: resolved.table_name,
1908        headers,
1909        column_types,
1910        row_count: total_rows,
1911        samples: rows,
1912        notes: Vec::new(),
1913    })
1914}
1915
1916#[derive(Debug, Deserialize, JsonSchema)]
1917pub struct ManifestStubParams {
1918    pub workbook_id: WorkbookId,
1919    pub sheet_filter: Option<String>,
1920}
1921
1922pub async fn get_manifest_stub(
1923    state: Arc<AppState>,
1924    params: ManifestStubParams,
1925) -> Result<ManifestStubResponse> {
1926    let workbook = state.open_workbook(&params.workbook_id).await?;
1927    let mut summaries = workbook.list_summaries()?;
1928
1929    if let Some(filter) = &params.sheet_filter {
1930        summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
1931    }
1932
1933    let sheets = summaries
1934        .into_iter()
1935        .map(|summary| ManifestSheetStub {
1936            sheet_name: summary.name.clone(),
1937            classification: summary.classification.clone(),
1938            candidate_expectations: vec![format!(
1939                "Review {} sheet for expectation candidates",
1940                format!("{:?}", summary.classification).to_ascii_lowercase()
1941            )],
1942            notes: summary.style_tags,
1943        })
1944        .collect();
1945
1946    let response = ManifestStubResponse {
1947        workbook_id: workbook.id.clone(),
1948        workbook_short_id: workbook.short_id.clone(),
1949        slug: workbook.slug.clone(),
1950        sheets,
1951    };
1952    Ok(response)
1953}
1954
1955#[derive(Debug, Deserialize, JsonSchema)]
1956pub struct CloseWorkbookParams {
1957    pub workbook_id: WorkbookId,
1958}
1959
1960pub async fn close_workbook(
1961    state: Arc<AppState>,
1962    params: CloseWorkbookParams,
1963) -> Result<CloseWorkbookResponse> {
1964    state.close_workbook(&params.workbook_id)?;
1965    Ok(CloseWorkbookResponse {
1966        workbook_id: params.workbook_id.clone(),
1967        message: format!("workbook {} evicted", params.workbook_id.as_str()),
1968    })
1969}
1970fn collect_formula_matches(
1971    sheet: &umya_spreadsheet::Worksheet,
1972    sheet_name: &str,
1973    query: &str,
1974    case_sensitive: bool,
1975) -> Vec<FindFormulaMatch> {
1976    use crate::workbook::cell_to_value;
1977
1978    let mut results = Vec::new();
1979    for cell in sheet.get_cell_collection() {
1980        if !cell.is_formula() {
1981            continue;
1982        }
1983        let formula = cell.get_formula();
1984        let haystack = if case_sensitive {
1985            formula.to_string()
1986        } else {
1987            formula.to_ascii_lowercase()
1988        };
1989        if !haystack.contains(query) {
1990            continue;
1991        }
1992        let coord = cell.get_coordinate();
1993        let column = *coord.get_col_num();
1994        let row = *coord.get_row_num();
1995        let columns = vec![column];
1996        let context_row = build_row_snapshot(sheet, row, &columns, true, false);
1997        let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
1998
1999        results.push(FindFormulaMatch {
2000            address: coord.get_coordinate(),
2001            sheet_name: sheet_name.to_string(),
2002            formula: formula.to_string(),
2003            cached_value: if cell.is_formula() {
2004                cell_to_value(cell)
2005            } else {
2006                None
2007            },
2008            context: vec![header_row, context_row],
2009        });
2010    }
2011    results
2012}
2013
2014#[derive(Clone)]
2015struct TraceFormulaInfo {
2016    fingerprint: String,
2017    formula: String,
2018}
2019
2020#[derive(Clone)]
2021struct TraceEdgeRaw {
2022    from: String,
2023    to: String,
2024    neighbor: String,
2025}
2026
2027#[derive(Clone)]
2028struct LayerLinks {
2029    depth: u32,
2030    edges: Vec<TraceEdgeRaw>,
2031}
2032
2033#[derive(Clone)]
2034struct NeighborDetail {
2035    address: String,
2036    column: Option<u32>,
2037    row: Option<u32>,
2038    kind: TraceCellKind,
2039    value: Option<CellValue>,
2040    formula: Option<String>,
2041    fingerprint: Option<String>,
2042    external: bool,
2043}
2044
2045fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2046    let mut map = HashMap::new();
2047    for group in graph.groups() {
2048        for address in group.addresses.clone() {
2049            map.insert(
2050                address.to_ascii_uppercase(),
2051                TraceFormulaInfo {
2052                    fingerprint: group.fingerprint.clone(),
2053                    formula: group.formula.clone(),
2054                },
2055            );
2056        }
2057    }
2058    map
2059}
2060
2061struct TraceConfig<'a> {
2062    direction: &'a TraceDirection,
2063    origin: &'a str,
2064    sheet_name: &'a str,
2065    depth_limit: u32,
2066    page_size: usize,
2067}
2068
2069fn build_trace_layers(
2070    workbook: &WorkbookContext,
2071    graph: &FormulaGraph,
2072    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2073    config: &TraceConfig<'_>,
2074    cursor: Option<TraceCursor>,
2075) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2076    let layer_links =
2077        collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2078    let mut layers = Vec::new();
2079    let mut next_cursor = None;
2080    let mut notes = Vec::new();
2081    let focus_depth = cursor.as_ref().map(|c| c.depth);
2082
2083    for layer in layer_links {
2084        let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2085        let offset = cursor
2086            .as_ref()
2087            .filter(|c| c.depth == layer.depth)
2088            .map(|c| c.offset)
2089            .unwrap_or(0);
2090
2091        let mut node_set: HashSet<String> = HashSet::new();
2092        for edge in &layer.edges {
2093            node_set.insert(edge.neighbor.clone());
2094        }
2095        let mut nodes: Vec<String> = node_set.into_iter().collect();
2096        nodes.sort_by(|a, b| compare_addresses(a, b));
2097
2098        let details = workbook.with_sheet(config.sheet_name, |sheet| {
2099            collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2100        })?;
2101        let total_nodes = details.len();
2102        let start = offset.min(total_nodes);
2103        let end = if produce_edges {
2104            (start + config.page_size).min(total_nodes)
2105        } else {
2106            start
2107        };
2108        let selected_slice = if produce_edges {
2109            &details[start..end]
2110        } else {
2111            &details[0..0]
2112        };
2113        let selected_addresses: HashSet<String> = selected_slice
2114            .iter()
2115            .map(|detail| detail.address.clone())
2116            .collect();
2117
2118        let summary = build_layer_summary(&details);
2119        let range_highlights = build_range_highlights(&details);
2120        let group_highlights = build_formula_group_highlights(&details);
2121        let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2122
2123        let highlights = TraceLayerHighlights {
2124            top_ranges: range_highlights.clone(),
2125            top_formula_groups: group_highlights.clone(),
2126            notable_cells,
2127        };
2128
2129        let edges = if produce_edges {
2130            build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2131        } else {
2132            Vec::new()
2133        };
2134
2135        let has_more = produce_edges && end < total_nodes;
2136        if has_more && next_cursor.is_none() {
2137            next_cursor = Some(TraceCursor {
2138                depth: layer.depth,
2139                offset: end,
2140            });
2141        }
2142        if has_more {
2143            notes.push(format!(
2144                "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2145                layer.depth, end, total_nodes, layer.depth, end
2146            ));
2147        }
2148
2149        layers.push(TraceLayer {
2150            depth: layer.depth,
2151            summary,
2152            highlights,
2153            edges,
2154            has_more,
2155        });
2156    }
2157
2158    Ok((layers, next_cursor, notes))
2159}
2160
2161fn collect_layer_links(
2162    graph: &FormulaGraph,
2163    direction: &TraceDirection,
2164    origin: &str,
2165    depth_limit: u32,
2166) -> Vec<LayerLinks> {
2167    let mut visited: HashSet<String> = HashSet::new();
2168    visited.insert(origin.to_string());
2169    let mut frontier = vec![origin.to_string()];
2170    let mut layers = Vec::new();
2171
2172    for depth in 1..=depth_limit {
2173        let mut next_frontier_set: HashSet<String> = HashSet::new();
2174        let mut edges = Vec::new();
2175
2176        for cell in &frontier {
2177            let neighbors = match direction {
2178                TraceDirection::Precedents => graph.precedents(cell),
2179                TraceDirection::Dependents => graph.dependents(cell),
2180            };
2181
2182            for neighbor in neighbors {
2183                let neighbor_upper = neighbor.to_ascii_uppercase();
2184                let edge = match direction {
2185                    TraceDirection::Precedents => TraceEdgeRaw {
2186                        from: cell.clone(),
2187                        to: neighbor_upper.clone(),
2188                        neighbor: neighbor_upper.clone(),
2189                    },
2190                    TraceDirection::Dependents => TraceEdgeRaw {
2191                        from: neighbor_upper.clone(),
2192                        to: cell.clone(),
2193                        neighbor: neighbor_upper.clone(),
2194                    },
2195                };
2196                edges.push(edge);
2197                if visited.insert(neighbor_upper.clone()) {
2198                    next_frontier_set.insert(neighbor_upper);
2199                }
2200            }
2201        }
2202
2203        if edges.is_empty() {
2204            break;
2205        }
2206
2207        layers.push(LayerLinks { depth, edges });
2208        if next_frontier_set.is_empty() {
2209            break;
2210        }
2211        let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2212        next_frontier.sort();
2213        frontier = next_frontier;
2214    }
2215
2216    layers
2217}
2218
2219fn collect_neighbor_details(
2220    sheet: &umya_spreadsheet::Worksheet,
2221    current_sheet: &str,
2222    addresses: &[String],
2223    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2224) -> Vec<NeighborDetail> {
2225    let mut details = Vec::new();
2226    for address in addresses {
2227        let (sheet_part, cell_part) = split_sheet_and_cell(address);
2228        let normalized_sheet = sheet_part
2229            .as_ref()
2230            .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2231        let is_external = normalized_sheet
2232            .as_ref()
2233            .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2234            .unwrap_or(false);
2235
2236        let Some(cell_ref) = cell_part else {
2237            details.push(NeighborDetail {
2238                address: address.clone(),
2239                column: None,
2240                row: None,
2241                kind: TraceCellKind::External,
2242                value: None,
2243                formula: None,
2244                fingerprint: None,
2245                external: true,
2246            });
2247            continue;
2248        };
2249
2250        let cell_ref_upper = cell_ref.to_ascii_uppercase();
2251
2252        if is_external {
2253            let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2254            details.push(NeighborDetail {
2255                address: address.clone(),
2256                column: None,
2257                row: None,
2258                kind: TraceCellKind::External,
2259                value: None,
2260                formula: formula_info.map(|info| info.formula.clone()),
2261                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2262                external: true,
2263            });
2264            continue;
2265        }
2266
2267        let Some((col, row)) = parse_address(&cell_ref_upper) else {
2268            details.push(NeighborDetail {
2269                address: address.clone(),
2270                column: None,
2271                row: None,
2272                kind: TraceCellKind::External,
2273                value: None,
2274                formula: None,
2275                fingerprint: None,
2276                external: true,
2277            });
2278            continue;
2279        };
2280
2281        let cell_opt = sheet.get_cell((&col, &row));
2282        let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2283        if let Some(cell) = cell_opt {
2284            let value = cell_to_value(cell);
2285            let kind = if cell.is_formula() {
2286                TraceCellKind::Formula
2287            } else if value.is_some() {
2288                TraceCellKind::Literal
2289            } else {
2290                TraceCellKind::Blank
2291            };
2292            details.push(NeighborDetail {
2293                address: address.clone(),
2294                column: Some(col),
2295                row: Some(row),
2296                kind,
2297                value,
2298                formula: formula_info.map(|info| info.formula.clone()),
2299                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2300                external: false,
2301            });
2302        } else {
2303            details.push(NeighborDetail {
2304                address: address.clone(),
2305                column: Some(col),
2306                row: Some(row),
2307                kind: TraceCellKind::Blank,
2308                value: None,
2309                formula: formula_info.map(|info| info.formula.clone()),
2310                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2311                external: false,
2312            });
2313        }
2314    }
2315    details
2316}
2317
2318fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2319    let mut summary = TraceLayerSummary {
2320        total_nodes: details.len(),
2321        formula_nodes: 0,
2322        value_nodes: 0,
2323        blank_nodes: 0,
2324        external_nodes: 0,
2325        unique_formula_groups: 0,
2326    };
2327
2328    let mut fingerprints: HashSet<String> = HashSet::new();
2329
2330    for detail in details {
2331        match detail.kind {
2332            TraceCellKind::Formula => {
2333                summary.formula_nodes += 1;
2334                if let Some(fp) = &detail.fingerprint {
2335                    fingerprints.insert(fp.clone());
2336                }
2337            }
2338            TraceCellKind::Literal => summary.value_nodes += 1,
2339            TraceCellKind::Blank => summary.blank_nodes += 1,
2340            TraceCellKind::External => summary.external_nodes += 1,
2341        }
2342    }
2343
2344    summary.unique_formula_groups = fingerprints.len();
2345    summary
2346}
2347
2348fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2349    let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2350    for detail in details {
2351        if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2352            let entry = aggregates
2353                .entry(fingerprint.clone())
2354                .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2355            entry.1 += 1;
2356            if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2357                entry.2.push(detail.address.clone());
2358            }
2359        }
2360    }
2361
2362    let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2363        .into_iter()
2364        .map(
2365            |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2366                fingerprint,
2367                formula,
2368                count,
2369                sample_addresses,
2370            },
2371        )
2372        .collect();
2373
2374    highlights.sort_by(|a, b| b.count.cmp(&a.count));
2375    highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2376    highlights
2377}
2378
2379fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2380    let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2381    for detail in details {
2382        if let (Some(col), Some(_row)) = (detail.column, detail.row)
2383            && !detail.external
2384        {
2385            by_column.entry(col).or_default().push(detail);
2386        }
2387    }
2388
2389    for column_entries in by_column.values_mut() {
2390        column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2391    }
2392
2393    let mut ranges = Vec::new();
2394    for entries in by_column.values() {
2395        let mut current: Vec<&NeighborDetail> = Vec::new();
2396        for detail in entries {
2397            if current.is_empty() {
2398                current.push(detail);
2399                continue;
2400            }
2401            let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2402            if detail.row.unwrap_or(0) == prev_row + 1 {
2403                current.push(detail);
2404            } else {
2405                if current.len() >= TRACE_RANGE_THRESHOLD {
2406                    ranges.push(make_range_highlight(&current));
2407                }
2408                current.clear();
2409                current.push(detail);
2410            }
2411        }
2412        if current.len() >= TRACE_RANGE_THRESHOLD {
2413            ranges.push(make_range_highlight(&current));
2414        }
2415    }
2416
2417    ranges.sort_by(|a, b| b.count.cmp(&a.count));
2418    ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2419    ranges
2420}
2421
2422fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2423    let mut literals = 0usize;
2424    let mut formulas = 0usize;
2425    let mut blanks = 0usize;
2426    let mut sample_values = Vec::new();
2427    let mut sample_formulas = Vec::new();
2428    let mut sample_addresses = Vec::new();
2429
2430    for detail in details {
2431        match detail.kind {
2432            TraceCellKind::Formula => {
2433                formulas += 1;
2434                if let Some(formula) = &detail.formula
2435                    && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2436                    && !sample_formulas.contains(formula)
2437                {
2438                    sample_formulas.push(formula.clone());
2439                }
2440            }
2441            TraceCellKind::Literal => {
2442                literals += 1;
2443                if let Some(value) = &detail.value
2444                    && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2445                {
2446                    sample_values.push(value.clone());
2447                }
2448            }
2449            TraceCellKind::Blank => blanks += 1,
2450            TraceCellKind::External => {}
2451        }
2452        if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2453            sample_addresses.push(detail.address.clone());
2454        }
2455    }
2456
2457    TraceRangeHighlight {
2458        start: details
2459            .first()
2460            .map(|d| d.address.clone())
2461            .unwrap_or_default(),
2462        end: details
2463            .last()
2464            .map(|d| d.address.clone())
2465            .unwrap_or_default(),
2466        count: details.len(),
2467        literals,
2468        formulas,
2469        blanks,
2470        sample_values,
2471        sample_formulas,
2472        sample_addresses,
2473    }
2474}
2475
2476fn build_notable_cells(
2477    details: &[NeighborDetail],
2478    ranges: &[TraceRangeHighlight],
2479    groups: &[TraceFormulaGroupHighlight],
2480) -> Vec<TraceCellHighlight> {
2481    let mut exclude: HashSet<String> = HashSet::new();
2482    for range in ranges {
2483        exclude.insert(range.start.clone());
2484        exclude.insert(range.end.clone());
2485        for addr in &range.sample_addresses {
2486            exclude.insert(addr.clone());
2487        }
2488    }
2489    for group in groups {
2490        for addr in &group.sample_addresses {
2491            exclude.insert(addr.clone());
2492        }
2493    }
2494
2495    let mut highlights = Vec::new();
2496    let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
2497
2498    for detail in details {
2499        if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
2500            break;
2501        }
2502        if exclude.contains(&detail.address) {
2503            continue;
2504        }
2505        let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
2506        if *counter >= 2 && detail.kind != TraceCellKind::External {
2507            continue;
2508        }
2509        highlights.push(TraceCellHighlight {
2510            address: detail.address.clone(),
2511            kind: detail.kind.clone(),
2512            value: detail.value.clone(),
2513            formula: detail.formula.clone(),
2514        });
2515        *counter += 1;
2516    }
2517
2518    highlights
2519}
2520
2521fn build_edges_for_layer(
2522    raw_edges: &[TraceEdgeRaw],
2523    selected: &HashSet<String>,
2524    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2525) -> Vec<FormulaTraceEdge> {
2526    let mut edges = Vec::new();
2527    for edge in raw_edges {
2528        if selected.contains(&edge.neighbor) {
2529            let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
2530                .map(|info| info.formula.clone());
2531            edges.push(FormulaTraceEdge {
2532                from: edge.from.clone(),
2533                to: edge.to.clone(),
2534                formula,
2535                note: None,
2536            });
2537        }
2538    }
2539    edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
2540    edges
2541}
2542
2543fn lookup_formula_info<'a>(
2544    lookup: &'a HashMap<String, TraceFormulaInfo>,
2545    cell_ref: &str,
2546    original: &str,
2547) -> Option<&'a TraceFormulaInfo> {
2548    if let Some(info) = lookup.get(cell_ref) {
2549        return Some(info);
2550    }
2551    if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
2552        let upper = cell.to_ascii_uppercase();
2553        return lookup.get(&upper);
2554    }
2555    None
2556}
2557
2558fn compare_addresses(left: &str, right: &str) -> Ordering {
2559    let (sheet_left, cell_left) = split_sheet_and_cell(left);
2560    let (sheet_right, cell_right) = split_sheet_and_cell(right);
2561
2562    let sheet_left_key = sheet_left
2563        .as_ref()
2564        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2565        .unwrap_or_default();
2566    let sheet_right_key = sheet_right
2567        .as_ref()
2568        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2569        .unwrap_or_default();
2570
2571    match sheet_left_key.cmp(&sheet_right_key) {
2572        Ordering::Equal => {
2573            let left_core = cell_left.unwrap_or_else(|| left.to_string());
2574            let right_core = cell_right.unwrap_or_else(|| right.to_string());
2575            let left_coords = parse_address(&left_core.to_ascii_uppercase());
2576            let right_coords = parse_address(&right_core.to_ascii_uppercase());
2577            match (left_coords, right_coords) {
2578                (Some((lc, lr)), Some((rc, rr))) => lc
2579                    .cmp(&rc)
2580                    .then_with(|| lr.cmp(&rr))
2581                    .then_with(|| left_core.cmp(&right_core)),
2582                _ => left_core.cmp(&right_core),
2583            }
2584        }
2585        other => other,
2586    }
2587}
2588
2589fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
2590    if let Some(idx) = address.rfind('!') {
2591        let sheet = address[..idx].to_string();
2592        let cell = address[idx + 1..].to_string();
2593        (Some(sheet), Some(cell))
2594    } else {
2595        (None, Some(address.to_string()))
2596    }
2597}
2598
2599fn clean_sheet_name(sheet: &str) -> String {
2600    let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
2601    let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
2602    after_bracket
2603        .trim_matches(|c| c == '\'' || c == '"')
2604        .to_string()
2605}