spreadsheet_mcp/tools/
mod.rs

1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4
5use crate::analysis::{formula::FormulaGraph, stats};
6use crate::model::*;
7use crate::state::AppState;
8use crate::workbook::{WorkbookContext, cell_to_value};
9use anyhow::{Result, anyhow};
10use regex::Regex;
11use schemars::JsonSchema;
12use serde::Deserialize;
13use std::cmp::Ordering;
14use std::collections::{BTreeMap, HashMap, HashSet};
15use std::sync::Arc;
16
17const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
18const TRACE_PAGE_MIN: usize = 5;
19const TRACE_PAGE_MAX: usize = 200;
20const TRACE_RANGE_THRESHOLD: usize = 4;
21const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
22const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
23const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
24const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
25const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
26const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
27const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
28
29pub async fn list_workbooks(
30    state: Arc<AppState>,
31    params: ListWorkbooksParams,
32) -> Result<WorkbookListResponse> {
33    let filter = params.into_filter()?;
34    state.list_workbooks(filter)
35}
36
37pub async fn describe_workbook(
38    state: Arc<AppState>,
39    params: DescribeWorkbookParams,
40) -> Result<WorkbookDescription> {
41    let workbook = state.open_workbook(&params.workbook_id).await?;
42    let desc = workbook.describe();
43    Ok(desc)
44}
45
46#[derive(Debug, Deserialize, JsonSchema)]
47pub struct ListWorkbooksParams {
48    pub slug_prefix: Option<String>,
49    pub folder: Option<String>,
50    pub path_glob: Option<String>,
51}
52
53impl ListWorkbooksParams {
54    fn into_filter(self) -> Result<filters::WorkbookFilter> {
55        filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
56    }
57}
58
59#[derive(Debug, Deserialize, JsonSchema)]
60pub struct DescribeWorkbookParams {
61    pub workbook_id: WorkbookId,
62}
63
64#[derive(Debug, Deserialize, JsonSchema)]
65pub struct ListSheetsParams {
66    pub workbook_id: WorkbookId,
67}
68
69pub async fn list_sheets(
70    state: Arc<AppState>,
71    params: ListSheetsParams,
72) -> Result<SheetListResponse> {
73    let workbook = state.open_workbook(&params.workbook_id).await?;
74    let summaries = workbook.list_summaries()?;
75    let response = SheetListResponse {
76        workbook_id: workbook.id.clone(),
77        workbook_short_id: workbook.short_id.clone(),
78        sheets: summaries,
79    };
80    Ok(response)
81}
82
83#[derive(Debug, Deserialize, JsonSchema)]
84pub struct SheetOverviewParams {
85    pub workbook_id: WorkbookId,
86    pub sheet_name: String,
87}
88
89#[derive(Debug, Deserialize, JsonSchema)]
90pub struct WorkbookSummaryParams {
91    pub workbook_id: WorkbookId,
92}
93
94pub async fn workbook_summary(
95    state: Arc<AppState>,
96    params: WorkbookSummaryParams,
97) -> Result<WorkbookSummaryResponse> {
98    let workbook = state.open_workbook(&params.workbook_id).await?;
99    let sheet_names = workbook.sheet_names();
100
101    let mut total_cells: u64 = 0;
102    let mut total_formulas: u64 = 0;
103    let mut breakdown = WorkbookBreakdown::default();
104    let mut region_counts = RegionCountSummary::default();
105    let mut entry_points: Vec<EntryPoint> = Vec::new();
106    let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
107
108    for sheet_name in &sheet_names {
109        let entry = workbook.get_sheet_metrics(sheet_name)?;
110        total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
111        total_formulas += entry.metrics.formula_cells as u64;
112        match entry.metrics.classification {
113            SheetClassification::Calculator => breakdown.calculator_sheets += 1,
114            SheetClassification::Metadata => breakdown.metadata_sheets += 1,
115            SheetClassification::Empty => {}
116            _ => breakdown.data_sheets += 1,
117        }
118
119        for region in &entry.detected_regions {
120            match region
121                .region_kind
122                .clone()
123                .unwrap_or(region.classification.clone())
124            {
125                RegionKind::Calculator => region_counts.calculator += 1,
126                RegionKind::Metadata => region_counts.metadata += 1,
127                RegionKind::Parameters => region_counts.parameters += 1,
128                RegionKind::Outputs => region_counts.outputs += 1,
129                RegionKind::Data | RegionKind::Table => region_counts.data += 1,
130                _ => region_counts.other += 1,
131            }
132            if region.confidence >= 0.3 {
133                let kind = region
134                    .region_kind
135                    .as_ref()
136                    .unwrap_or(&region.classification);
137                let priority = match kind {
138                    RegionKind::Parameters => 0,
139                    RegionKind::Data | RegionKind::Table => 1,
140                    RegionKind::Outputs => 2,
141                    RegionKind::Calculator => 3,
142                    RegionKind::Metadata => 4,
143                    _ => 5,
144                };
145                entry_points.push(EntryPoint {
146                    sheet_name: sheet_name.clone(),
147                    region_id: Some(region.id),
148                    bounds: Some(region.bounds.clone()),
149                    rationale: format!(
150                        "{:?} region ({} rows, {:.0}% conf, p{})",
151                        kind,
152                        region.row_count,
153                        region.confidence * 100.0,
154                        priority
155                    ),
156                });
157            }
158        }
159
160        if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
161            entry_points.push(EntryPoint {
162                sheet_name: sheet_name.clone(),
163                region_id: None,
164                bounds: None,
165                rationale: "Whole sheet is non-empty; start at top-left".to_string(),
166            });
167        }
168    }
169
170    entry_points.sort_by(|a, b| {
171        let pa = priority_from_rationale(&a.rationale);
172        let pb = priority_from_rationale(&b.rationale);
173        pa.cmp(&pb)
174            .then_with(|| {
175                a.bounds
176                    .as_ref()
177                    .map(|_| 1)
178                    .cmp(&b.bounds.as_ref().map(|_| 1))
179            })
180            .then_with(|| a.sheet_name.cmp(&b.sheet_name))
181    });
182    entry_points.truncate(5);
183
184    let mut seen_ranges = std::collections::HashSet::new();
185    for item in workbook.named_items()? {
186        if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
187            continue;
188        }
189        if !seen_ranges.insert(item.refers_to.clone()) {
190            continue;
191        }
192        key_named_ranges.push(item);
193        if key_named_ranges.len() >= 10 {
194            break;
195        }
196    }
197
198    Ok(WorkbookSummaryResponse {
199        workbook_id: workbook.id.clone(),
200        workbook_short_id: workbook.short_id.clone(),
201        slug: workbook.slug.clone(),
202        sheet_count: sheet_names.len(),
203        total_cells,
204        total_formulas,
205        breakdown,
206        region_counts,
207        key_named_ranges,
208        suggested_entry_points: entry_points,
209    })
210}
211
212fn priority_from_rationale(rationale: &str) -> u32 {
213    if rationale.contains("p0") {
214        0
215    } else if rationale.contains("p1") {
216        1
217    } else if rationale.contains("p2") {
218        2
219    } else if rationale.contains("p3") {
220        3
221    } else if rationale.contains("p4") {
222        4
223    } else {
224        5
225    }
226}
227
228pub async fn sheet_overview(
229    state: Arc<AppState>,
230    params: SheetOverviewParams,
231) -> Result<SheetOverviewResponse> {
232    let workbook = state.open_workbook(&params.workbook_id).await?;
233    let overview = workbook.sheet_overview(&params.sheet_name)?;
234    Ok(overview)
235}
236
237fn default_start_row() -> u32 {
238    1
239}
240
241fn default_page_size() -> u32 {
242    50
243}
244
245fn default_include_formulas() -> bool {
246    true
247}
248
249fn default_include_header() -> bool {
250    true
251}
252
253#[derive(Debug, Deserialize, JsonSchema)]
254pub struct SheetPageParams {
255    pub workbook_id: WorkbookId,
256    pub sheet_name: String,
257    #[serde(default = "default_start_row")]
258    pub start_row: u32,
259    #[serde(default = "default_page_size")]
260    pub page_size: u32,
261    #[serde(default)]
262    pub columns: Option<Vec<String>>,
263    #[serde(default)]
264    pub columns_by_header: Option<Vec<String>>,
265    #[serde(default = "default_include_formulas")]
266    pub include_formulas: bool,
267    #[serde(default)]
268    pub include_styles: bool,
269    #[serde(default = "default_include_header")]
270    pub include_header: bool,
271    #[serde(default)]
272    pub format: Option<SheetPageFormat>,
273}
274
275impl Default for SheetPageParams {
276    fn default() -> Self {
277        SheetPageParams {
278            workbook_id: WorkbookId(String::new()),
279            sheet_name: String::new(),
280            start_row: default_start_row(),
281            page_size: default_page_size(),
282            columns: None,
283            columns_by_header: None,
284            include_formulas: default_include_formulas(),
285            include_styles: false,
286            include_header: default_include_header(),
287            format: None,
288        }
289    }
290}
291
292fn default_find_limit() -> u32 {
293    50
294}
295
296#[derive(Debug, Deserialize, JsonSchema)]
297pub struct FindValueParams {
298    pub workbook_id: WorkbookId,
299    pub query: String,
300    #[serde(default)]
301    pub label: Option<String>,
302    #[serde(default)]
303    pub mode: Option<FindMode>,
304    #[serde(default)]
305    pub match_mode: Option<String>,
306    #[serde(default)]
307    pub case_sensitive: bool,
308    #[serde(default)]
309    pub sheet_name: Option<String>,
310    #[serde(default)]
311    pub region_id: Option<u32>,
312    #[serde(default)]
313    pub table_name: Option<String>,
314    #[serde(default)]
315    pub value_types: Option<Vec<String>>,
316    #[serde(default)]
317    pub search_headers_only: bool,
318    #[serde(default)]
319    pub direction: Option<LabelDirection>,
320    #[serde(default = "default_find_limit")]
321    pub limit: u32,
322}
323
324impl Default for FindValueParams {
325    fn default() -> Self {
326        Self {
327            workbook_id: WorkbookId(String::new()),
328            query: String::new(),
329            label: None,
330            mode: None,
331            match_mode: None,
332            case_sensitive: false,
333            sheet_name: None,
334            region_id: None,
335            table_name: None,
336            value_types: None,
337            search_headers_only: false,
338            direction: None,
339            limit: default_find_limit(),
340        }
341    }
342}
343
344#[derive(Debug, Deserialize, JsonSchema, Default)]
345pub struct ReadTableParams {
346    pub workbook_id: WorkbookId,
347    #[serde(default)]
348    pub sheet_name: Option<String>,
349    #[serde(default)]
350    pub table_name: Option<String>,
351    #[serde(default)]
352    pub region_id: Option<u32>,
353    #[serde(default)]
354    pub range: Option<String>,
355    #[serde(default)]
356    pub header_row: Option<u32>,
357    #[serde(default)]
358    pub header_rows: Option<u32>,
359    #[serde(default)]
360    pub columns: Option<Vec<String>>,
361    #[serde(default)]
362    pub filters: Option<Vec<TableFilter>>,
363    #[serde(default)]
364    pub sample_mode: Option<String>,
365    #[serde(default)]
366    pub limit: Option<u32>,
367    #[serde(default)]
368    pub offset: Option<u32>,
369}
370
371#[derive(Debug, Deserialize, JsonSchema, Clone)]
372pub struct TableFilter {
373    pub column: String,
374    pub op: String,
375    pub value: serde_json::Value,
376}
377
378#[derive(Debug, Deserialize, JsonSchema, Default)]
379pub struct TableProfileParams {
380    pub workbook_id: WorkbookId,
381    #[serde(default)]
382    pub sheet_name: Option<String>,
383    #[serde(default)]
384    pub region_id: Option<u32>,
385    #[serde(default)]
386    pub table_name: Option<String>,
387    #[serde(default)]
388    pub sample_mode: Option<String>,
389    #[serde(default)]
390    pub sample_size: Option<u32>,
391}
392
393#[derive(Debug, Deserialize, JsonSchema)]
394pub struct RangeValuesParams {
395    pub workbook_id: WorkbookId,
396    pub sheet_name: String,
397    pub ranges: Vec<String>,
398    #[serde(default)]
399    pub include_headers: Option<bool>,
400}
401
402pub async fn sheet_page(
403    state: Arc<AppState>,
404    params: SheetPageParams,
405) -> Result<SheetPageResponse> {
406    if params.page_size == 0 {
407        return Err(anyhow!("page_size must be greater than zero"));
408    }
409
410    let workbook = state.open_workbook(&params.workbook_id).await?;
411    let metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
412    let format = params.format.unwrap_or_default();
413
414    let start_row = params.start_row.max(1);
415    let page_size = params.page_size.min(500);
416    let include_formulas = params.include_formulas;
417    let include_styles = params.include_styles;
418    let columns = params.columns.clone();
419    let columns_by_header = params.columns_by_header.clone();
420    let include_header = params.include_header;
421
422    let page = workbook.with_sheet(&params.sheet_name, |sheet| {
423        build_page(
424            sheet,
425            start_row,
426            page_size,
427            columns.clone(),
428            columns_by_header.clone(),
429            include_formulas,
430            include_styles,
431            include_header,
432        )
433    })?;
434
435    let has_more = page.end_row < metrics.metrics.row_count;
436    let next_start_row = if has_more {
437        Some(page.end_row + 1)
438    } else {
439        None
440    };
441
442    let compact_payload = if matches!(format, SheetPageFormat::Compact) {
443        Some(build_compact_payload(
444            &page.header,
445            &page.rows,
446            include_header,
447        ))
448    } else {
449        None
450    };
451
452    let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
453        Some(build_values_only_payload(
454            &page.header,
455            &page.rows,
456            include_header,
457        ))
458    } else {
459        None
460    };
461
462    let response = SheetPageResponse {
463        workbook_id: workbook.id.clone(),
464        workbook_short_id: workbook.short_id.clone(),
465        sheet_name: params.sheet_name,
466        rows: if matches!(format, SheetPageFormat::Full) {
467            page.rows
468        } else {
469            Vec::new()
470        },
471        has_more,
472        next_start_row,
473        header_row: if include_header { page.header } else { None },
474        compact: compact_payload,
475        values_only: values_only_payload,
476        format,
477    };
478    Ok(response)
479}
480
481#[derive(Debug, Deserialize, JsonSchema)]
482pub struct SheetFormulaMapParams {
483    pub workbook_id: WorkbookId,
484    pub sheet_name: String,
485    pub range: Option<String>,
486    #[serde(default)]
487    pub expand: bool,
488    #[serde(default)]
489    pub limit: Option<u32>,
490    #[serde(default)]
491    pub sort_by: Option<FormulaSortBy>,
492}
493
494#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
495#[serde(rename_all = "snake_case")]
496pub enum FormulaSortBy {
497    #[default]
498    Address,
499    Complexity,
500    Count,
501}
502
503pub async fn sheet_formula_map(
504    state: Arc<AppState>,
505    params: SheetFormulaMapParams,
506) -> Result<SheetFormulaMapResponse> {
507    let workbook = state.open_workbook(&params.workbook_id).await?;
508    let graph = workbook.formula_graph(&params.sheet_name)?;
509    let mut groups = Vec::new();
510    let mut truncated = false;
511
512    for mut group in graph.groups() {
513        if let Some(range) = &params.range {
514            group.addresses.retain(|addr| address_in_range(addr, range));
515            if group.addresses.is_empty() {
516                continue;
517            }
518        }
519        if !params.expand && group.addresses.len() > 15 {
520            group.addresses.truncate(15);
521            truncated = true;
522        }
523        groups.push(group);
524    }
525
526    let sort_by = params.sort_by.unwrap_or_default();
527    match sort_by {
528        FormulaSortBy::Address => {
529            groups.sort_by(|a, b| a.addresses.first().cmp(&b.addresses.first()));
530        }
531        FormulaSortBy::Complexity => {
532            groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
533        }
534        FormulaSortBy::Count => {
535            groups.sort_by(|a, b| b.addresses.len().cmp(&a.addresses.len()));
536        }
537    }
538
539    if let Some(limit) = params.limit
540        && groups.len() > limit as usize
541    {
542        groups.truncate(limit as usize);
543        truncated = true;
544    }
545
546    let response = SheetFormulaMapResponse {
547        workbook_id: workbook.id.clone(),
548        workbook_short_id: workbook.short_id.clone(),
549        sheet_name: params.sheet_name.clone(),
550        groups,
551        truncated,
552    };
553    Ok(response)
554}
555
556#[derive(Debug, Deserialize, JsonSchema)]
557pub struct FormulaTraceParams {
558    pub workbook_id: WorkbookId,
559    pub sheet_name: String,
560    pub cell_address: String,
561    pub direction: TraceDirection,
562    pub depth: Option<u32>,
563    pub limit: Option<u32>,
564    #[serde(default)]
565    pub page_size: Option<usize>,
566    #[serde(default)]
567    pub cursor: Option<TraceCursor>,
568}
569
570pub async fn formula_trace(
571    state: Arc<AppState>,
572    params: FormulaTraceParams,
573) -> Result<FormulaTraceResponse> {
574    let workbook = state.open_workbook(&params.workbook_id).await?;
575    let graph = workbook.formula_graph(&params.sheet_name)?;
576    let formula_lookup = build_formula_lookup(&graph);
577    let depth = params.depth.unwrap_or(3).clamp(1, 5);
578    let page_size = params
579        .page_size
580        .or_else(|| params.limit.map(|v| v as usize))
581        .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
582        .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
583
584    let origin = params.cell_address.to_uppercase();
585    let config = TraceConfig {
586        direction: &params.direction,
587        origin: &origin,
588        sheet_name: &params.sheet_name,
589        depth_limit: depth,
590        page_size,
591    };
592    let (layers, next_cursor, notes) = build_trace_layers(
593        &workbook,
594        &graph,
595        &formula_lookup,
596        &config,
597        params.cursor.clone(),
598    )?;
599
600    let response = FormulaTraceResponse {
601        workbook_id: workbook.id.clone(),
602        workbook_short_id: workbook.short_id.clone(),
603        sheet_name: params.sheet_name.clone(),
604        origin,
605        direction: params.direction.clone(),
606        layers,
607        next_cursor,
608        notes,
609    };
610    Ok(response)
611}
612
613#[derive(Debug, Deserialize, JsonSchema)]
614pub struct NamedRangesParams {
615    pub workbook_id: WorkbookId,
616    pub sheet_name: Option<String>,
617    pub name_prefix: Option<String>,
618}
619
620pub async fn named_ranges(
621    state: Arc<AppState>,
622    params: NamedRangesParams,
623) -> Result<NamedRangesResponse> {
624    let workbook = state.open_workbook(&params.workbook_id).await?;
625    let mut items = workbook.named_items()?;
626
627    if let Some(sheet_filter) = &params.sheet_name {
628        items.retain(|item| {
629            item.sheet_name
630                .as_ref()
631                .map(|name| name.eq_ignore_ascii_case(sheet_filter))
632                .unwrap_or(false)
633        });
634    }
635    if let Some(prefix) = &params.name_prefix {
636        let prefix_lower = prefix.to_ascii_lowercase();
637        items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
638    }
639
640    let response = NamedRangesResponse {
641        workbook_id: workbook.id.clone(),
642        workbook_short_id: workbook.short_id.clone(),
643        items,
644    };
645    Ok(response)
646}
647
648struct PageBuildResult {
649    rows: Vec<RowSnapshot>,
650    header: Option<RowSnapshot>,
651    end_row: u32,
652}
653
654#[allow(clippy::too_many_arguments)]
655fn build_page(
656    sheet: &umya_spreadsheet::Worksheet,
657    start_row: u32,
658    page_size: u32,
659    columns: Option<Vec<String>>,
660    columns_by_header: Option<Vec<String>>,
661    include_formulas: bool,
662    include_styles: bool,
663    include_header: bool,
664) -> PageBuildResult {
665    let max_col = sheet.get_highest_column();
666    let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
667    let column_indices =
668        resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
669
670    let header = if include_header {
671        Some(build_row_snapshot(
672            sheet,
673            1,
674            &column_indices,
675            include_formulas,
676            include_styles,
677        ))
678    } else {
679        None
680    };
681
682    let mut rows = Vec::new();
683    for row_idx in start_row..=end_row {
684        rows.push(build_row_snapshot(
685            sheet,
686            row_idx,
687            &column_indices,
688            include_formulas,
689            include_styles,
690        ));
691    }
692
693    PageBuildResult {
694        rows,
695        header,
696        end_row,
697    }
698}
699
700fn build_row_snapshot(
701    sheet: &umya_spreadsheet::Worksheet,
702    row_index: u32,
703    columns: &[u32],
704    include_formulas: bool,
705    include_styles: bool,
706) -> RowSnapshot {
707    let mut cells = Vec::new();
708    for &col in columns {
709        if let Some(cell) = sheet.get_cell((col, row_index)) {
710            cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
711        } else {
712            let address = crate::utils::cell_address(col, row_index);
713            cells.push(CellSnapshot {
714                address,
715                value: None,
716                formula: None,
717                cached_value: None,
718                number_format: None,
719                style_tags: Vec::new(),
720                notes: Vec::new(),
721            });
722        }
723    }
724
725    RowSnapshot { row_index, cells }
726}
727
728fn build_cell_snapshot(
729    cell: &umya_spreadsheet::Cell,
730    include_formulas: bool,
731    include_styles: bool,
732) -> CellSnapshot {
733    let address = cell.get_coordinate().get_coordinate();
734    let value = crate::workbook::cell_to_value(cell);
735    let formula = if include_formulas && cell.is_formula() {
736        Some(cell.get_formula().to_string())
737    } else {
738        None
739    };
740    let cached_value = if cell.is_formula() {
741        value.clone()
742    } else {
743        None
744    };
745    let number_format = if include_styles {
746        cell.get_style()
747            .get_number_format()
748            .map(|fmt| fmt.get_format_code().to_string())
749    } else {
750        None
751    };
752    let style_tags = if include_styles {
753        crate::analysis::style::tag_cell(cell)
754            .map(|(_, tagging)| tagging.tags)
755            .unwrap_or_default()
756    } else {
757        Vec::new()
758    };
759
760    CellSnapshot {
761        address,
762        value,
763        formula,
764        cached_value,
765        number_format,
766        style_tags,
767        notes: Vec::new(),
768    }
769}
770
771fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
772    use std::collections::BTreeSet;
773    use umya_spreadsheet::helper::coordinate::column_index_from_string;
774
775    let mut indices = BTreeSet::new();
776    if let Some(specs) = columns {
777        for spec in specs {
778            if let Some((start, end)) = spec.split_once(':') {
779                let start_idx = column_index_from_string(start);
780                let end_idx = column_index_from_string(end);
781                let (min_idx, max_idx) = if start_idx <= end_idx {
782                    (start_idx, end_idx)
783                } else {
784                    (end_idx, start_idx)
785                };
786                for idx in min_idx..=max_idx {
787                    indices.insert(idx);
788                }
789            } else {
790                indices.insert(column_index_from_string(spec));
791            }
792        }
793    } else {
794        for idx in 1..=max_column.max(1) {
795            indices.insert(idx);
796        }
797    }
798
799    indices.into_iter().collect()
800}
801
802fn resolve_columns_with_headers(
803    sheet: &umya_spreadsheet::Worksheet,
804    columns: Option<&Vec<String>>,
805    columns_by_header: Option<&Vec<String>>,
806    max_column: u32,
807) -> Vec<u32> {
808    if columns_by_header.is_none() {
809        return resolve_columns(columns, max_column);
810    }
811
812    let mut selected = Vec::new();
813    let header_targets: Vec<String> = columns_by_header
814        .unwrap()
815        .iter()
816        .map(|h| h.trim().to_ascii_lowercase())
817        .collect();
818
819    for col_idx in 1..=max_column.max(1) {
820        let header_cell = sheet.get_cell((col_idx, 1u32));
821        let header_value = header_cell
822            .and_then(cell_to_value)
823            .map(cell_value_to_string_lower);
824        if let Some(hval) = header_value
825            && header_targets.iter().any(|target| target == &hval)
826        {
827            selected.push(col_idx);
828        }
829    }
830
831    if selected.is_empty() {
832        resolve_columns(columns, max_column)
833    } else {
834        selected
835    }
836}
837
838fn cell_value_to_string_lower(value: CellValue) -> String {
839    match value {
840        CellValue::Text(s) => s.to_ascii_lowercase(),
841        CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
842        CellValue::Bool(b) => b.to_string(),
843        CellValue::Error(e) => e.to_ascii_lowercase(),
844        CellValue::Date(d) => d.to_ascii_lowercase(),
845    }
846}
847
848fn build_compact_payload(
849    header: &Option<RowSnapshot>,
850    rows: &[RowSnapshot],
851    include_header: bool,
852) -> SheetPageCompact {
853    let headers = derive_headers(header, rows);
854    let header_row = if include_header {
855        header
856            .as_ref()
857            .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
858            .unwrap_or_default()
859    } else {
860        Vec::new()
861    };
862    let data_rows = rows
863        .iter()
864        .map(|row| {
865            let mut vals: Vec<Option<CellValue>> = Vec::new();
866            vals.push(Some(CellValue::Number(row.row_index as f64)));
867            vals.extend(row.cells.iter().map(|c| c.value.clone()));
868            vals
869        })
870        .collect();
871
872    SheetPageCompact {
873        headers,
874        header_row,
875        rows: data_rows,
876    }
877}
878
879fn build_values_only_payload(
880    header: &Option<RowSnapshot>,
881    rows: &[RowSnapshot],
882    include_header: bool,
883) -> SheetPageValues {
884    let mut data = Vec::new();
885    if include_header && let Some(h) = header {
886        data.push(h.cells.iter().map(|c| c.value.clone()).collect());
887    }
888    for row in rows {
889        data.push(row.cells.iter().map(|c| c.value.clone()).collect());
890    }
891
892    SheetPageValues { rows: data }
893}
894
895fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
896    if let Some(h) = header {
897        let mut headers: Vec<String> = h
898            .cells
899            .iter()
900            .map(|c| match &c.value {
901                Some(CellValue::Text(t)) => t.clone(),
902                Some(CellValue::Number(n)) => n.to_string(),
903                Some(CellValue::Bool(b)) => b.to_string(),
904                Some(CellValue::Date(d)) => d.clone(),
905                Some(CellValue::Error(e)) => e.clone(),
906                None => c.address.clone(),
907            })
908            .collect();
909        headers.insert(0, "Row".to_string());
910        headers
911    } else if let Some(first) = rows.first() {
912        let mut headers = Vec::new();
913        headers.push("Row".to_string());
914        for cell in &first.cells {
915            headers.push(cell.address.clone());
916        }
917        headers
918    } else {
919        vec![]
920    }
921}
922fn default_stats_sample() -> usize {
923    500
924}
925
926#[derive(Debug, Deserialize, JsonSchema)]
927pub struct SheetStatisticsParams {
928    pub workbook_id: WorkbookId,
929    pub sheet_name: String,
930    #[serde(default)]
931    pub sample_rows: Option<usize>,
932}
933
934pub async fn sheet_statistics(
935    state: Arc<AppState>,
936    params: SheetStatisticsParams,
937) -> Result<SheetStatisticsResponse> {
938    let workbook = state.open_workbook(&params.workbook_id).await?;
939    let sheet_metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
940    let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
941    let stats = workbook.with_sheet(&params.sheet_name, |sheet| {
942        stats::compute_sheet_statistics(sheet, sample_rows)
943    })?;
944    let response = SheetStatisticsResponse {
945        workbook_id: workbook.id.clone(),
946        workbook_short_id: workbook.short_id.clone(),
947        sheet_name: params.sheet_name,
948        row_count: sheet_metrics.metrics.row_count,
949        column_count: sheet_metrics.metrics.column_count,
950        density: stats.density,
951        numeric_columns: stats.numeric_columns,
952        text_columns: stats.text_columns,
953        null_counts: stats.null_counts,
954        duplicate_warnings: stats.duplicate_warnings,
955    };
956    Ok(response)
957}
958
959fn address_in_range(address: &str, range: &str) -> bool {
960    parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
961        if let Some((col, row)) = parse_address(address) {
962            col >= start_col && col <= end_col && row >= start_row && row <= end_row
963        } else {
964            false
965        }
966    })
967}
968
969fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
970    let mut parts = range.split(':');
971    let start = parts.next()?;
972    let end = parts.next().unwrap_or(start);
973    let start_idx = parse_address(start)?;
974    let end_idx = parse_address(end)?;
975    Some((
976        (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
977        (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
978    ))
979}
980
981fn parse_address(address: &str) -> Option<(u32, u32)> {
982    use umya_spreadsheet::helper::coordinate::index_from_coordinate;
983    let (col, row, _, _) = index_from_coordinate(address);
984    match (col, row) {
985        (Some(c), Some(r)) => Some((c, r)),
986        _ => None,
987    }
988}
989
990#[derive(Clone)]
991struct TableTarget {
992    sheet_name: String,
993    table_name: Option<String>,
994    range: ((u32, u32), (u32, u32)),
995    header_hint: Option<u32>,
996}
997
998fn resolve_table_target(
999    workbook: &WorkbookContext,
1000    params: &ReadTableParams,
1001) -> Result<TableTarget> {
1002    if let Some(region_id) = params.region_id
1003        && let Some(sheet) = &params.sheet_name
1004        && let Ok(region) = workbook.detected_region(sheet, region_id)
1005    {
1006        return Ok(TableTarget {
1007            sheet_name: sheet.clone(),
1008            table_name: None,
1009            range: parse_range(&region.bounds).unwrap_or(((1, 1), (1, 1))),
1010            header_hint: region.header_row,
1011        });
1012    }
1013
1014    if let Some(table_name) = &params.table_name {
1015        let items = workbook.named_items()?;
1016        for item in items {
1017            if item.name.eq_ignore_ascii_case(table_name)
1018                || item
1019                    .name
1020                    .to_ascii_lowercase()
1021                    .contains(&table_name.to_ascii_lowercase())
1022            {
1023                let mut sheet_name = item
1024                    .sheet_name
1025                    .clone()
1026                    .or_else(|| params.sheet_name.clone())
1027                    .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1028                let refers_to = item.refers_to.trim_start_matches('=');
1029                let mut range_part = refers_to;
1030                if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1031                    sheet_name = sheet_part.trim_matches('\'').to_string();
1032                    range_part = rest;
1033                }
1034                if let Some(range) = parse_range(range_part) {
1035                    return Ok(TableTarget {
1036                        sheet_name,
1037                        table_name: Some(item.name.clone()),
1038                        range,
1039                        header_hint: if item.kind == NamedItemKind::Table {
1040                            Some(range.0.1)
1041                        } else {
1042                            None
1043                        },
1044                    });
1045                }
1046            }
1047        }
1048    }
1049
1050    let sheet_name = params
1051        .sheet_name
1052        .clone()
1053        .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1054
1055    if let Some(rng) = &params.range
1056        && let Some(range) = parse_range(rng)
1057    {
1058        return Ok(TableTarget {
1059            sheet_name,
1060            table_name: None,
1061            range,
1062            header_hint: None,
1063        });
1064    }
1065
1066    let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1067    let end_col = metrics.metrics.column_count.max(1);
1068    let end_row = metrics.metrics.row_count.max(1);
1069    Ok(TableTarget {
1070        sheet_name,
1071        table_name: None,
1072        range: ((1, 1), (end_col, end_row)),
1073        header_hint: None,
1074    })
1075}
1076
1077#[allow(clippy::too_many_arguments)]
1078fn extract_table_rows(
1079    sheet: &umya_spreadsheet::Worksheet,
1080    target: &TableTarget,
1081    header_row: Option<u32>,
1082    header_rows: Option<u32>,
1083    columns: Option<Vec<String>>,
1084    filters: Option<Vec<TableFilter>>,
1085    limit: usize,
1086    offset: usize,
1087    sample_mode: &str,
1088) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1089    let ((start_col, start_row), (end_col, end_row)) = target.range;
1090    let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1091    if header_start < start_row {
1092        header_start = start_row;
1093    }
1094    if header_start > end_row {
1095        header_start = start_row;
1096    }
1097    let header_rows_count = header_rows.unwrap_or(1).max(1);
1098    let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1099    let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1100        resolve_columns(Some(cols), end_col).into_iter().collect()
1101    } else {
1102        (start_col..=end_col).collect()
1103    };
1104
1105    let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1106    let mut all_rows: Vec<TableRow> = Vec::new();
1107    let mut total_rows: u32 = 0;
1108
1109    for row_idx in data_start_row..=end_row {
1110        let mut row = BTreeMap::new();
1111        for (i, col_idx) in column_indices.iter().enumerate() {
1112            let header = headers
1113                .get(i)
1114                .cloned()
1115                .unwrap_or_else(|| format!("Col{col_idx}"));
1116            let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1117            row.insert(header, value);
1118        }
1119        if !row_passes_filters(&row, filters.as_ref()) {
1120            continue;
1121        }
1122        total_rows += 1;
1123        if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1124            continue;
1125        }
1126        all_rows.push(row);
1127    }
1128
1129    let rows = sample_rows(all_rows, limit, offset, sample_mode);
1130
1131    Ok((headers, rows, total_rows))
1132}
1133
1134fn build_headers(
1135    sheet: &umya_spreadsheet::Worksheet,
1136    columns: &[u32],
1137    header_start: u32,
1138    header_rows: u32,
1139) -> Vec<String> {
1140    let mut headers = Vec::new();
1141    for col_idx in columns {
1142        let mut parts = Vec::new();
1143        for h in header_start..(header_start + header_rows) {
1144            let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1145            if let Some(value) = sheet
1146                .get_cell((origin_col, origin_row))
1147                .and_then(cell_to_value)
1148            {
1149                match value {
1150                    CellValue::Text(ref s) if s.trim().is_empty() => {}
1151                    CellValue::Text(s) => parts.push(s),
1152                    CellValue::Number(n) => parts.push(n.to_string()),
1153                    CellValue::Bool(b) => parts.push(b.to_string()),
1154                    CellValue::Error(e) => parts.push(e),
1155                    CellValue::Date(d) => parts.push(d),
1156                }
1157            }
1158        }
1159        if parts.is_empty() {
1160            headers.push(crate::utils::column_number_to_name(*col_idx));
1161        } else {
1162            headers.push(parts.join(" / "));
1163        }
1164    }
1165
1166    if headers.iter().all(|h| h.trim().is_empty()) {
1167        return columns
1168            .iter()
1169            .map(|c| crate::utils::column_number_to_name(*c))
1170            .collect();
1171    }
1172
1173    dedupe_headers(headers)
1174}
1175
1176fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1177    let mut seen: HashMap<String, u32> = HashMap::new();
1178    for h in headers.iter_mut() {
1179        let key = h.clone();
1180        if key.trim().is_empty() {
1181            continue;
1182        }
1183        let count = seen.entry(key.clone()).or_insert(0);
1184        if *count > 0 {
1185            h.push_str(&format!("_{}", *count + 1));
1186        }
1187        *count += 1;
1188    }
1189    headers
1190}
1191
1192fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1193    if let Some(filters) = filters {
1194        for filter in filters {
1195            if let Some(value) = row.get(&filter.column) {
1196                match filter.op.as_str() {
1197                    "eq" => {
1198                        if !value_eq(value, &filter.value) {
1199                            return false;
1200                        }
1201                    }
1202                    "neq" => {
1203                        if value_eq(value, &filter.value) {
1204                            return false;
1205                        }
1206                    }
1207                    "contains" => {
1208                        if !value_contains(value, &filter.value) {
1209                            return false;
1210                        }
1211                    }
1212                    "gt" => {
1213                        if !value_gt(value, &filter.value) {
1214                            return false;
1215                        }
1216                    }
1217                    "lt" => {
1218                        if !value_lt(value, &filter.value) {
1219                            return false;
1220                        }
1221                    }
1222                    "in" => {
1223                        let list = filter
1224                            .value
1225                            .as_array()
1226                            .cloned()
1227                            .unwrap_or_else(|| vec![filter.value.clone()]);
1228                        if !list.iter().any(|cmp| value_eq(value, cmp)) {
1229                            return false;
1230                        }
1231                    }
1232                    _ => {}
1233                }
1234            }
1235        }
1236    }
1237    true
1238}
1239
1240fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1241    match (cell, cmp) {
1242        (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1243        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1244            v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1245        }
1246        (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1247            .parse::<f64>()
1248            .map(|f| (*n - f).abs() < f64::EPSILON)
1249            .unwrap_or(false),
1250        (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1251        (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1252            t.eq_ignore_ascii_case("true") == *b
1253        }
1254        (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1255        (None, serde_json::Value::Null) => true,
1256        _ => false,
1257    }
1258}
1259
1260fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1261    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1262        return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1263    }
1264    false
1265}
1266
1267fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1268    match (cell, cmp) {
1269        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1270            v.as_f64().is_some_and(|f| *n > f)
1271        }
1272        _ => false,
1273    }
1274}
1275
1276fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1277    match (cell, cmp) {
1278        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1279            v.as_f64().is_some_and(|f| *n < f)
1280        }
1281        _ => false,
1282    }
1283}
1284
1285fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1286    if rows.is_empty() {
1287        return rows;
1288    }
1289
1290    match mode {
1291        "distributed" => {
1292            if limit == 0 {
1293                return Vec::new();
1294            }
1295            let mut indices = Vec::new();
1296            let span = rows.len().saturating_sub(1);
1297            let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1298            let mut idx = offset;
1299            while idx < rows.len() && indices.len() < limit {
1300                indices.push(idx);
1301                idx = idx.saturating_add(step);
1302                if idx == indices.last().copied().unwrap_or(0) {
1303                    idx += 1;
1304                }
1305            }
1306            if indices.len() < limit {
1307                let last_idx = rows.len().saturating_sub(1);
1308                if !indices.contains(&last_idx) {
1309                    indices.push(last_idx);
1310                }
1311            }
1312            indices
1313                .into_iter()
1314                .filter_map(|i| rows.get(i).cloned())
1315                .collect()
1316        }
1317        "last" => {
1318            let start = rows.len().saturating_sub(limit + offset);
1319            rows.into_iter().skip(start + offset).take(limit).collect()
1320        }
1321        _ => rows.into_iter().skip(offset).take(limit).collect(),
1322    }
1323}
1324
1325fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1326    let mut summaries = Vec::new();
1327    for header in headers {
1328        let mut nulls = 0u32;
1329        let mut distinct_set: HashSet<String> = HashSet::new();
1330        let mut values: Vec<f64> = Vec::new();
1331        let mut top_counts: HashMap<String, u32> = HashMap::new();
1332
1333        for row in rows {
1334            match row.get(header) {
1335                Some(Some(CellValue::Number(n))) => {
1336                    values.push(*n);
1337                    let key = n.to_string();
1338                    *top_counts.entry(key).or_default() += 1;
1339                }
1340                Some(Some(CellValue::Text(s))) => {
1341                    distinct_set.insert(s.clone());
1342                    *top_counts.entry(s.clone()).or_default() += 1;
1343                }
1344                Some(Some(CellValue::Bool(b))) => {
1345                    let key = b.to_string();
1346                    distinct_set.insert(key.clone());
1347                    *top_counts.entry(key).or_default() += 1;
1348                }
1349                Some(Some(CellValue::Date(d))) => {
1350                    distinct_set.insert(d.clone());
1351                    *top_counts.entry(d.clone()).or_default() += 1;
1352                }
1353                Some(Some(CellValue::Error(e))) => {
1354                    distinct_set.insert(e.clone());
1355                    *top_counts.entry(e.clone()).or_default() += 1;
1356                }
1357                _ => {
1358                    nulls += 1;
1359                }
1360            }
1361        }
1362
1363        let inferred_type = if !values.is_empty() {
1364            "number"
1365        } else if !distinct_set.is_empty() {
1366            "text"
1367        } else {
1368            "unknown"
1369        }
1370        .to_string();
1371
1372        let min = values.iter().cloned().reduce(f64::min);
1373        let max = values.iter().cloned().reduce(f64::max);
1374        let mean = if values.is_empty() {
1375            None
1376        } else {
1377            Some(values.iter().sum::<f64>() / values.len() as f64)
1378        };
1379
1380        let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1381        top_values.sort_by(|a, b| b.1.cmp(&a.1));
1382        let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1383
1384        summaries.push(ColumnTypeSummary {
1385            name: header.clone(),
1386            inferred_type,
1387            nulls,
1388            distinct: distinct_set.len() as u32,
1389            top_values,
1390            min,
1391            max,
1392            mean,
1393        });
1394    }
1395    summaries
1396}
1397
1398#[allow(clippy::too_many_arguments)]
1399fn collect_value_matches(
1400    sheet: &umya_spreadsheet::Worksheet,
1401    sheet_name: &str,
1402    mode: &FindMode,
1403    match_mode: &str,
1404    direction: &LabelDirection,
1405    params: &FindValueParams,
1406    region: Option<&DetectedRegion>,
1407    default_bounds: ((u32, u32), (u32, u32)),
1408) -> Result<Vec<FindValueMatch>> {
1409    let mut results = Vec::new();
1410    let regex = if match_mode == "regex" {
1411        Regex::new(&params.query).ok()
1412    } else {
1413        None
1414    };
1415    let bounds = region
1416        .as_ref()
1417        .and_then(|r| parse_range(&r.bounds))
1418        .unwrap_or(default_bounds);
1419
1420    let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1421
1422    for cell in sheet.get_cell_collection() {
1423        let coord = cell.get_coordinate();
1424        let col = *coord.get_col_num();
1425        let row = *coord.get_row_num();
1426        if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1427            continue;
1428        }
1429        if params.search_headers_only && row != header_row {
1430            continue;
1431        }
1432
1433        let value = cell_to_value(cell);
1434        if let Some(ref allowed) = params.value_types
1435            && !value_type_matches(&value, allowed)
1436        {
1437            continue;
1438        }
1439        if matches!(mode, FindMode::Value) {
1440            if !value_matches(
1441                &value,
1442                &params.query,
1443                match_mode,
1444                params.case_sensitive,
1445                &regex,
1446            ) {
1447                continue;
1448            }
1449        } else if let Some(label) = &params.label {
1450            if !label_matches(cell, label, match_mode, params.case_sensitive, &regex) {
1451                continue;
1452            }
1453        } else {
1454            continue;
1455        }
1456
1457        let neighbors = collect_neighbors(sheet, row, col);
1458        let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1459            let target_value = match direction {
1460                LabelDirection::Right => sheet.get_cell((col + 1, row)),
1461                LabelDirection::Below => sheet.get_cell((col, row + 1)),
1462                LabelDirection::Any => sheet
1463                    .get_cell((col + 1, row))
1464                    .or_else(|| sheet.get_cell((col, row + 1))),
1465            }
1466            .and_then(cell_to_value);
1467            if target_value.is_none() {
1468                continue;
1469            }
1470            (
1471                Some(LabelHit {
1472                    label_address: coord.get_coordinate(),
1473                    label: label_from_cell(cell),
1474                }),
1475                target_value,
1476            )
1477        } else {
1478            (None, value.clone())
1479        };
1480
1481        let row_context = build_row_context(sheet, row, col);
1482
1483        results.push(FindValueMatch {
1484            address: coord.get_coordinate(),
1485            sheet_name: sheet_name.to_string(),
1486            value: match_value,
1487            row_context,
1488            neighbors,
1489            label_hit,
1490        });
1491    }
1492
1493    Ok(results)
1494}
1495
1496fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1497    cell_to_value(cell)
1498        .map(|v| match v {
1499            CellValue::Text(s) => s,
1500            CellValue::Number(n) => n.to_string(),
1501            CellValue::Bool(b) => b.to_string(),
1502            CellValue::Date(d) => d,
1503            CellValue::Error(e) => e,
1504        })
1505        .unwrap_or_else(|| cell.get_value().to_string())
1506}
1507
1508fn value_matches(
1509    value: &Option<CellValue>,
1510    query: &str,
1511    mode: &str,
1512    case_sensitive: bool,
1513    regex: &Option<Regex>,
1514) -> bool {
1515    if value.is_none() {
1516        return false;
1517    }
1518    let haystack = cell_value_to_string_lower(value.clone().unwrap());
1519    let needle = if case_sensitive {
1520        query.to_string()
1521    } else {
1522        query.to_ascii_lowercase()
1523    };
1524
1525    match mode {
1526        "exact" => haystack == needle,
1527        "regex" => regex
1528            .as_ref()
1529            .map(|re| re.is_match(&haystack))
1530            .unwrap_or(false),
1531        _ => haystack.contains(&needle),
1532    }
1533}
1534
1535fn label_matches(
1536    cell: &umya_spreadsheet::Cell,
1537    label: &str,
1538    mode: &str,
1539    case_sensitive: bool,
1540    regex: &Option<Regex>,
1541) -> bool {
1542    let value = cell_to_value(cell);
1543    if value.is_none() {
1544        return false;
1545    }
1546    let haystack = cell_value_to_string_lower(value.unwrap());
1547    let needle = if case_sensitive {
1548        label.to_string()
1549    } else {
1550        label.to_ascii_lowercase()
1551    };
1552    match mode {
1553        "exact" => haystack == needle,
1554        "regex" => regex
1555            .as_ref()
1556            .map(|re| re.is_match(&haystack))
1557            .unwrap_or(false),
1558        _ => haystack.contains(&needle),
1559    }
1560}
1561
1562fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1563    if value.is_none() {
1564        return allowed.iter().any(|v| v == "null");
1565    }
1566    match value.as_ref().unwrap() {
1567        CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1568        CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1569        CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1570        CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1571        CellValue::Error(_) => true,
1572    }
1573}
1574
1575fn collect_neighbors(
1576    sheet: &umya_spreadsheet::Worksheet,
1577    row: u32,
1578    col: u32,
1579) -> Option<NeighborValues> {
1580    Some(NeighborValues {
1581        left: if col > 1 {
1582            sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1583        } else {
1584            None
1585        },
1586        right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1587        up: if row > 1 {
1588            sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1589        } else {
1590            None
1591        },
1592        down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1593    })
1594}
1595
1596fn build_row_context(
1597    sheet: &umya_spreadsheet::Worksheet,
1598    row: u32,
1599    col: u32,
1600) -> Option<RowContext> {
1601    let header_value = sheet
1602        .get_cell((col, 1u32))
1603        .and_then(cell_to_value)
1604        .map(|v| match v {
1605            CellValue::Text(s) => s,
1606            CellValue::Number(n) => n.to_string(),
1607            CellValue::Bool(b) => b.to_string(),
1608            CellValue::Date(d) => d,
1609            CellValue::Error(e) => e,
1610        })
1611        .unwrap_or_else(|| format!("Col{}", col));
1612    let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1613    Some(RowContext {
1614        headers: vec![header_value],
1615        values: vec![value],
1616    })
1617}
1618
1619#[derive(Debug, Deserialize, JsonSchema)]
1620pub struct FindFormulaParams {
1621    pub workbook_id: WorkbookId,
1622    pub query: String,
1623    pub sheet_name: Option<String>,
1624    #[serde(default)]
1625    pub case_sensitive: bool,
1626}
1627
1628pub async fn find_formula(
1629    state: Arc<AppState>,
1630    params: FindFormulaParams,
1631) -> Result<FindFormulaResponse> {
1632    let workbook = state.open_workbook(&params.workbook_id).await?;
1633    let query = if params.case_sensitive {
1634        params.query.clone()
1635    } else {
1636        params.query.to_ascii_lowercase()
1637    };
1638    let mut matches = Vec::new();
1639
1640    let sheet_names: Vec<String> = if let Some(sheet) = &params.sheet_name {
1641        vec![sheet.clone()]
1642    } else {
1643        workbook.sheet_names()
1644    };
1645
1646    for sheet_name in sheet_names {
1647        let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1648            collect_formula_matches(sheet, &sheet_name, &query, params.case_sensitive)
1649        })?;
1650        matches.extend(sheet_matches);
1651    }
1652
1653    let response = FindFormulaResponse {
1654        workbook_id: workbook.id.clone(),
1655        workbook_short_id: workbook.short_id.clone(),
1656        matches,
1657    };
1658    Ok(response)
1659}
1660
1661#[derive(Debug, Deserialize, JsonSchema)]
1662pub struct ScanVolatilesParams {
1663    pub workbook_id: WorkbookId,
1664    pub sheet_name: Option<String>,
1665}
1666
1667pub async fn scan_volatiles(
1668    state: Arc<AppState>,
1669    params: ScanVolatilesParams,
1670) -> Result<VolatileScanResponse> {
1671    let workbook = state.open_workbook(&params.workbook_id).await?;
1672    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
1673        vec![sheet.clone()]
1674    } else {
1675        workbook.sheet_names()
1676    };
1677
1678    let mut items = Vec::new();
1679    let mut truncated = false;
1680
1681    for sheet_name in target_sheets {
1682        let graph = workbook.formula_graph(&sheet_name)?;
1683        for group in graph.groups() {
1684            if !group.is_volatile {
1685                continue;
1686            }
1687            for address in group.addresses.iter().take(50) {
1688                items.push(VolatileScanEntry {
1689                    address: address.clone(),
1690                    sheet_name: sheet_name.clone(),
1691                    function: "volatile".to_string(),
1692                    note: Some(group.formula.clone()),
1693                });
1694            }
1695            if group.addresses.len() > 50 {
1696                truncated = true;
1697            }
1698        }
1699    }
1700
1701    let response = VolatileScanResponse {
1702        workbook_id: workbook.id.clone(),
1703        workbook_short_id: workbook.short_id.clone(),
1704        items,
1705        truncated,
1706    };
1707    Ok(response)
1708}
1709
1710#[derive(Debug, Deserialize, JsonSchema)]
1711pub struct SheetStylesParams {
1712    pub workbook_id: WorkbookId,
1713    pub sheet_name: String,
1714}
1715
1716pub async fn sheet_styles(
1717    state: Arc<AppState>,
1718    params: SheetStylesParams,
1719) -> Result<SheetStylesResponse> {
1720    let workbook = state.open_workbook(&params.workbook_id).await?;
1721    let entry = workbook.get_sheet_metrics(&params.sheet_name)?;
1722
1723    let styles = entry
1724        .metrics
1725        .style_map
1726        .iter()
1727        .map(|(style_id, usage)| StyleSummary {
1728            style_id: style_id.clone(),
1729            occurrences: usage.occurrences,
1730            tags: usage.tags.clone(),
1731            example_cells: usage.example_cells.clone(),
1732        })
1733        .collect();
1734
1735    let response = SheetStylesResponse {
1736        workbook_id: workbook.id.clone(),
1737        workbook_short_id: workbook.short_id.clone(),
1738        sheet_name: params.sheet_name.clone(),
1739        styles,
1740        conditional_rules: Vec::new(),
1741    };
1742    Ok(response)
1743}
1744
1745pub async fn range_values(
1746    state: Arc<AppState>,
1747    params: RangeValuesParams,
1748) -> Result<RangeValuesResponse> {
1749    let workbook = state.open_workbook(&params.workbook_id).await?;
1750    let include_headers = params.include_headers.unwrap_or(false);
1751    let values = workbook.with_sheet(&params.sheet_name, |sheet| {
1752        params
1753            .ranges
1754            .iter()
1755            .filter_map(|range| {
1756                parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
1757                    let mut rows = Vec::new();
1758                    for r in start_row..=end_row {
1759                        let mut row_vals = Vec::new();
1760                        for c in start_col..=end_col {
1761                            if include_headers && r == start_row && start_row == 1 {
1762                                row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
1763                            } else {
1764                                row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
1765                            }
1766                        }
1767                        rows.push(row_vals);
1768                    }
1769                    RangeValuesEntry {
1770                        range: range.clone(),
1771                        rows,
1772                    }
1773                })
1774            })
1775            .collect()
1776    })?;
1777
1778    Ok(RangeValuesResponse {
1779        workbook_id: workbook.id.clone(),
1780        workbook_short_id: workbook.short_id.clone(),
1781        sheet_name: params.sheet_name,
1782        values,
1783    })
1784}
1785
1786pub async fn find_value(
1787    state: Arc<AppState>,
1788    params: FindValueParams,
1789) -> Result<FindValueResponse> {
1790    let workbook = state.open_workbook(&params.workbook_id).await?;
1791    let mut matches = Vec::new();
1792    let mut truncated = false;
1793    let mode = params.mode.clone().unwrap_or_else(|| {
1794        if params.label.is_some() {
1795            FindMode::Label
1796        } else {
1797            FindMode::Value
1798        }
1799    });
1800    let match_mode = params
1801        .match_mode
1802        .as_deref()
1803        .unwrap_or("contains")
1804        .to_ascii_lowercase();
1805    let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
1806
1807    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
1808        vec![sheet.clone()]
1809    } else {
1810        workbook.sheet_names()
1811    };
1812
1813    for sheet_name in target_sheets {
1814        let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
1815        let default_bounds = (
1816            (1, 1),
1817            (
1818                metrics_entry.metrics.column_count.max(1),
1819                metrics_entry.metrics.row_count.max(1),
1820            ),
1821        );
1822        let region_bounds = params
1823            .region_id
1824            .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
1825        let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
1826            collect_value_matches(
1827                sheet,
1828                &sheet_name,
1829                &mode,
1830                &match_mode,
1831                &direction,
1832                &params,
1833                region_bounds.as_ref(),
1834                default_bounds,
1835            )
1836        })??;
1837        matches.extend(sheet_matches);
1838        if matches.len() as u32 >= params.limit {
1839            truncated = true;
1840            matches.truncate(params.limit as usize);
1841            break;
1842        }
1843    }
1844
1845    Ok(FindValueResponse {
1846        workbook_id: workbook.id.clone(),
1847        workbook_short_id: workbook.short_id.clone(),
1848        matches,
1849        truncated,
1850    })
1851}
1852
1853pub async fn read_table(
1854    state: Arc<AppState>,
1855    params: ReadTableParams,
1856) -> Result<ReadTableResponse> {
1857    let workbook = state.open_workbook(&params.workbook_id).await?;
1858    let resolved = resolve_table_target(&workbook, &params)?;
1859    let limit = params.limit.unwrap_or(100) as usize;
1860    let offset = params.offset.unwrap_or(0) as usize;
1861    let sample_mode = params
1862        .sample_mode
1863        .clone()
1864        .unwrap_or_else(|| "first".to_string());
1865
1866    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1867        extract_table_rows(
1868            sheet,
1869            &resolved,
1870            params.header_row,
1871            params.header_rows,
1872            params.columns.clone(),
1873            params.filters.clone(),
1874            limit,
1875            offset,
1876            &sample_mode,
1877        )
1878    })??;
1879
1880    let has_more = offset + rows.len() < total_rows as usize;
1881
1882    Ok(ReadTableResponse {
1883        workbook_id: workbook.id.clone(),
1884        workbook_short_id: workbook.short_id.clone(),
1885        sheet_name: resolved.sheet_name,
1886        table_name: resolved.table_name,
1887        headers,
1888        rows,
1889        total_rows,
1890        has_more,
1891    })
1892}
1893
1894pub async fn table_profile(
1895    state: Arc<AppState>,
1896    params: TableProfileParams,
1897) -> Result<TableProfileResponse> {
1898    let workbook = state.open_workbook(&params.workbook_id).await?;
1899    let resolved = resolve_table_target(
1900        &workbook,
1901        &ReadTableParams {
1902            workbook_id: params.workbook_id.clone(),
1903            sheet_name: params.sheet_name.clone(),
1904            table_name: params.table_name.clone(),
1905            region_id: params.region_id,
1906            range: None,
1907            header_row: None,
1908            header_rows: None,
1909            columns: None,
1910            filters: None,
1911            sample_mode: params.sample_mode.clone(),
1912            limit: params.sample_size,
1913            offset: Some(0),
1914        },
1915    )?;
1916
1917    let sample_size = params.sample_size.unwrap_or(10) as usize;
1918    let sample_mode = params
1919        .sample_mode
1920        .clone()
1921        .unwrap_or_else(|| "distributed".to_string());
1922
1923    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
1924        extract_table_rows(
1925            sheet,
1926            &resolved,
1927            None,
1928            None,
1929            None,
1930            None,
1931            sample_size,
1932            0,
1933            &sample_mode,
1934        )
1935    })??;
1936
1937    let column_types = summarize_columns(&headers, &rows);
1938
1939    Ok(TableProfileResponse {
1940        workbook_id: workbook.id.clone(),
1941        workbook_short_id: workbook.short_id.clone(),
1942        sheet_name: resolved.sheet_name,
1943        table_name: resolved.table_name,
1944        headers,
1945        column_types,
1946        row_count: total_rows,
1947        samples: rows,
1948        notes: Vec::new(),
1949    })
1950}
1951
1952#[derive(Debug, Deserialize, JsonSchema)]
1953pub struct ManifestStubParams {
1954    pub workbook_id: WorkbookId,
1955    pub sheet_filter: Option<String>,
1956}
1957
1958pub async fn get_manifest_stub(
1959    state: Arc<AppState>,
1960    params: ManifestStubParams,
1961) -> Result<ManifestStubResponse> {
1962    let workbook = state.open_workbook(&params.workbook_id).await?;
1963    let mut summaries = workbook.list_summaries()?;
1964
1965    if let Some(filter) = &params.sheet_filter {
1966        summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
1967    }
1968
1969    let sheets = summaries
1970        .into_iter()
1971        .map(|summary| ManifestSheetStub {
1972            sheet_name: summary.name.clone(),
1973            classification: summary.classification.clone(),
1974            candidate_expectations: vec![format!(
1975                "Review {} sheet for expectation candidates",
1976                format!("{:?}", summary.classification).to_ascii_lowercase()
1977            )],
1978            notes: summary.style_tags,
1979        })
1980        .collect();
1981
1982    let response = ManifestStubResponse {
1983        workbook_id: workbook.id.clone(),
1984        workbook_short_id: workbook.short_id.clone(),
1985        slug: workbook.slug.clone(),
1986        sheets,
1987    };
1988    Ok(response)
1989}
1990
1991#[derive(Debug, Deserialize, JsonSchema)]
1992pub struct CloseWorkbookParams {
1993    pub workbook_id: WorkbookId,
1994}
1995
1996pub async fn close_workbook(
1997    state: Arc<AppState>,
1998    params: CloseWorkbookParams,
1999) -> Result<CloseWorkbookResponse> {
2000    state.close_workbook(&params.workbook_id)?;
2001    Ok(CloseWorkbookResponse {
2002        workbook_id: params.workbook_id.clone(),
2003        message: format!("workbook {} evicted", params.workbook_id.as_str()),
2004    })
2005}
2006fn collect_formula_matches(
2007    sheet: &umya_spreadsheet::Worksheet,
2008    sheet_name: &str,
2009    query: &str,
2010    case_sensitive: bool,
2011) -> Vec<FindFormulaMatch> {
2012    use crate::workbook::cell_to_value;
2013
2014    let mut results = Vec::new();
2015    for cell in sheet.get_cell_collection() {
2016        if !cell.is_formula() {
2017            continue;
2018        }
2019        let formula = cell.get_formula();
2020        let haystack = if case_sensitive {
2021            formula.to_string()
2022        } else {
2023            formula.to_ascii_lowercase()
2024        };
2025        if !haystack.contains(query) {
2026            continue;
2027        }
2028        let coord = cell.get_coordinate();
2029        let column = *coord.get_col_num();
2030        let row = *coord.get_row_num();
2031        let columns = vec![column];
2032        let context_row = build_row_snapshot(sheet, row, &columns, true, false);
2033        let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
2034
2035        results.push(FindFormulaMatch {
2036            address: coord.get_coordinate(),
2037            sheet_name: sheet_name.to_string(),
2038            formula: formula.to_string(),
2039            cached_value: if cell.is_formula() {
2040                cell_to_value(cell)
2041            } else {
2042                None
2043            },
2044            context: vec![header_row, context_row],
2045        });
2046    }
2047    results
2048}
2049
2050#[derive(Clone)]
2051struct TraceFormulaInfo {
2052    fingerprint: String,
2053    formula: String,
2054}
2055
2056#[derive(Clone)]
2057struct TraceEdgeRaw {
2058    from: String,
2059    to: String,
2060    neighbor: String,
2061}
2062
2063#[derive(Clone)]
2064struct LayerLinks {
2065    depth: u32,
2066    edges: Vec<TraceEdgeRaw>,
2067    truncated_cells: Vec<String>,
2068}
2069
2070#[derive(Clone)]
2071struct NeighborDetail {
2072    address: String,
2073    column: Option<u32>,
2074    row: Option<u32>,
2075    kind: TraceCellKind,
2076    value: Option<CellValue>,
2077    formula: Option<String>,
2078    fingerprint: Option<String>,
2079    external: bool,
2080}
2081
2082fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2083    let mut map = HashMap::new();
2084    for group in graph.groups() {
2085        for address in group.addresses.clone() {
2086            map.insert(
2087                address.to_ascii_uppercase(),
2088                TraceFormulaInfo {
2089                    fingerprint: group.fingerprint.clone(),
2090                    formula: group.formula.clone(),
2091                },
2092            );
2093        }
2094    }
2095    map
2096}
2097
2098struct TraceConfig<'a> {
2099    direction: &'a TraceDirection,
2100    origin: &'a str,
2101    sheet_name: &'a str,
2102    depth_limit: u32,
2103    page_size: usize,
2104}
2105
2106fn build_trace_layers(
2107    workbook: &WorkbookContext,
2108    graph: &FormulaGraph,
2109    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2110    config: &TraceConfig<'_>,
2111    cursor: Option<TraceCursor>,
2112) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2113    let layer_links =
2114        collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2115    let mut layers = Vec::new();
2116    let mut next_cursor = None;
2117    let mut notes = Vec::new();
2118    let focus_depth = cursor.as_ref().map(|c| c.depth);
2119
2120    for layer in layer_links {
2121        let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2122        let offset = cursor
2123            .as_ref()
2124            .filter(|c| c.depth == layer.depth)
2125            .map(|c| c.offset)
2126            .unwrap_or(0);
2127
2128        let mut node_set: HashSet<String> = HashSet::new();
2129        for edge in &layer.edges {
2130            node_set.insert(edge.neighbor.clone());
2131        }
2132        let mut nodes: Vec<String> = node_set.into_iter().collect();
2133        nodes.sort_by(|a, b| compare_addresses(a, b));
2134
2135        let details = workbook.with_sheet(config.sheet_name, |sheet| {
2136            collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2137        })?;
2138        let total_nodes = details.len();
2139        let start = offset.min(total_nodes);
2140        let end = if produce_edges {
2141            (start + config.page_size).min(total_nodes)
2142        } else {
2143            start
2144        };
2145        let selected_slice = if produce_edges {
2146            &details[start..end]
2147        } else {
2148            &details[0..0]
2149        };
2150        let selected_addresses: HashSet<String> = selected_slice
2151            .iter()
2152            .map(|detail| detail.address.clone())
2153            .collect();
2154
2155        let summary = build_layer_summary(&details);
2156        let range_highlights = build_range_highlights(&details);
2157        let group_highlights = build_formula_group_highlights(&details);
2158        let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2159
2160        let highlights = TraceLayerHighlights {
2161            top_ranges: range_highlights.clone(),
2162            top_formula_groups: group_highlights.clone(),
2163            notable_cells,
2164        };
2165
2166        let edges = if produce_edges {
2167            build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2168        } else {
2169            Vec::new()
2170        };
2171
2172        let has_more = produce_edges && end < total_nodes;
2173        if has_more && next_cursor.is_none() {
2174            next_cursor = Some(TraceCursor {
2175                depth: layer.depth,
2176                offset: end,
2177            });
2178        }
2179        if has_more {
2180            notes.push(format!(
2181                "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2182                layer.depth, end, total_nodes, layer.depth, end
2183            ));
2184        }
2185
2186        if !layer.truncated_cells.is_empty() {
2187            let cell_list = if layer.truncated_cells.len() <= 3 {
2188                layer.truncated_cells.join(", ")
2189            } else {
2190                format!(
2191                    "{}, ... ({} more)",
2192                    layer.truncated_cells[..3].join(", "),
2193                    layer.truncated_cells.len() - 3
2194                )
2195            };
2196            notes.push(format!(
2197                "Layer {}: dependents truncated at {} per cell for: {}",
2198                layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
2199            ));
2200        }
2201
2202        layers.push(TraceLayer {
2203            depth: layer.depth,
2204            summary,
2205            highlights,
2206            edges,
2207            has_more,
2208        });
2209    }
2210
2211    Ok((layers, next_cursor, notes))
2212}
2213
2214fn collect_layer_links(
2215    graph: &FormulaGraph,
2216    direction: &TraceDirection,
2217    origin: &str,
2218    depth_limit: u32,
2219) -> Vec<LayerLinks> {
2220    let mut visited: HashSet<String> = HashSet::new();
2221    visited.insert(origin.to_string());
2222    let mut frontier = vec![origin.to_string()];
2223    let mut layers = Vec::new();
2224
2225    for depth in 1..=depth_limit {
2226        let mut next_frontier_set: HashSet<String> = HashSet::new();
2227        let mut edges = Vec::new();
2228        let mut truncated_cells = Vec::new();
2229
2230        for cell in &frontier {
2231            let (neighbors, was_truncated) = match direction {
2232                TraceDirection::Precedents => (graph.precedents(cell), false),
2233                TraceDirection::Dependents => {
2234                    graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
2235                }
2236            };
2237
2238            if was_truncated {
2239                truncated_cells.push(cell.clone());
2240            }
2241
2242            for neighbor in neighbors {
2243                let neighbor_upper = neighbor.to_ascii_uppercase();
2244                let edge = match direction {
2245                    TraceDirection::Precedents => TraceEdgeRaw {
2246                        from: cell.clone(),
2247                        to: neighbor_upper.clone(),
2248                        neighbor: neighbor_upper.clone(),
2249                    },
2250                    TraceDirection::Dependents => TraceEdgeRaw {
2251                        from: neighbor_upper.clone(),
2252                        to: cell.clone(),
2253                        neighbor: neighbor_upper.clone(),
2254                    },
2255                };
2256                edges.push(edge);
2257                if visited.insert(neighbor_upper.clone()) {
2258                    next_frontier_set.insert(neighbor_upper);
2259                }
2260            }
2261        }
2262
2263        if edges.is_empty() {
2264            break;
2265        }
2266
2267        layers.push(LayerLinks {
2268            depth,
2269            edges,
2270            truncated_cells,
2271        });
2272        if next_frontier_set.is_empty() {
2273            break;
2274        }
2275        let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2276        next_frontier.sort();
2277        frontier = next_frontier;
2278    }
2279
2280    layers
2281}
2282
2283fn collect_neighbor_details(
2284    sheet: &umya_spreadsheet::Worksheet,
2285    current_sheet: &str,
2286    addresses: &[String],
2287    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2288) -> Vec<NeighborDetail> {
2289    let mut details = Vec::new();
2290    for address in addresses {
2291        let (sheet_part, cell_part) = split_sheet_and_cell(address);
2292        let normalized_sheet = sheet_part
2293            .as_ref()
2294            .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2295        let is_external = normalized_sheet
2296            .as_ref()
2297            .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2298            .unwrap_or(false);
2299
2300        let Some(cell_ref) = cell_part else {
2301            details.push(NeighborDetail {
2302                address: address.clone(),
2303                column: None,
2304                row: None,
2305                kind: TraceCellKind::External,
2306                value: None,
2307                formula: None,
2308                fingerprint: None,
2309                external: true,
2310            });
2311            continue;
2312        };
2313
2314        let cell_ref_upper = cell_ref.to_ascii_uppercase();
2315
2316        if is_external {
2317            let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2318            details.push(NeighborDetail {
2319                address: address.clone(),
2320                column: None,
2321                row: None,
2322                kind: TraceCellKind::External,
2323                value: None,
2324                formula: formula_info.map(|info| info.formula.clone()),
2325                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2326                external: true,
2327            });
2328            continue;
2329        }
2330
2331        let Some((col, row)) = parse_address(&cell_ref_upper) else {
2332            details.push(NeighborDetail {
2333                address: address.clone(),
2334                column: None,
2335                row: None,
2336                kind: TraceCellKind::External,
2337                value: None,
2338                formula: None,
2339                fingerprint: None,
2340                external: true,
2341            });
2342            continue;
2343        };
2344
2345        let cell_opt = sheet.get_cell((&col, &row));
2346        let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2347        if let Some(cell) = cell_opt {
2348            let value = cell_to_value(cell);
2349            let kind = if cell.is_formula() {
2350                TraceCellKind::Formula
2351            } else if value.is_some() {
2352                TraceCellKind::Literal
2353            } else {
2354                TraceCellKind::Blank
2355            };
2356            details.push(NeighborDetail {
2357                address: address.clone(),
2358                column: Some(col),
2359                row: Some(row),
2360                kind,
2361                value,
2362                formula: formula_info.map(|info| info.formula.clone()),
2363                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2364                external: false,
2365            });
2366        } else {
2367            details.push(NeighborDetail {
2368                address: address.clone(),
2369                column: Some(col),
2370                row: Some(row),
2371                kind: TraceCellKind::Blank,
2372                value: None,
2373                formula: formula_info.map(|info| info.formula.clone()),
2374                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2375                external: false,
2376            });
2377        }
2378    }
2379    details
2380}
2381
2382fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2383    let mut summary = TraceLayerSummary {
2384        total_nodes: details.len(),
2385        formula_nodes: 0,
2386        value_nodes: 0,
2387        blank_nodes: 0,
2388        external_nodes: 0,
2389        unique_formula_groups: 0,
2390    };
2391
2392    let mut fingerprints: HashSet<String> = HashSet::new();
2393
2394    for detail in details {
2395        match detail.kind {
2396            TraceCellKind::Formula => {
2397                summary.formula_nodes += 1;
2398                if let Some(fp) = &detail.fingerprint {
2399                    fingerprints.insert(fp.clone());
2400                }
2401            }
2402            TraceCellKind::Literal => summary.value_nodes += 1,
2403            TraceCellKind::Blank => summary.blank_nodes += 1,
2404            TraceCellKind::External => summary.external_nodes += 1,
2405        }
2406    }
2407
2408    summary.unique_formula_groups = fingerprints.len();
2409    summary
2410}
2411
2412fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2413    let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2414    for detail in details {
2415        if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2416            let entry = aggregates
2417                .entry(fingerprint.clone())
2418                .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2419            entry.1 += 1;
2420            if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2421                entry.2.push(detail.address.clone());
2422            }
2423        }
2424    }
2425
2426    let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2427        .into_iter()
2428        .map(
2429            |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2430                fingerprint,
2431                formula,
2432                count,
2433                sample_addresses,
2434            },
2435        )
2436        .collect();
2437
2438    highlights.sort_by(|a, b| b.count.cmp(&a.count));
2439    highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2440    highlights
2441}
2442
2443fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2444    let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2445    for detail in details {
2446        if let (Some(col), Some(_row)) = (detail.column, detail.row)
2447            && !detail.external
2448        {
2449            by_column.entry(col).or_default().push(detail);
2450        }
2451    }
2452
2453    for column_entries in by_column.values_mut() {
2454        column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2455    }
2456
2457    let mut ranges = Vec::new();
2458    for entries in by_column.values() {
2459        let mut current: Vec<&NeighborDetail> = Vec::new();
2460        for detail in entries {
2461            if current.is_empty() {
2462                current.push(detail);
2463                continue;
2464            }
2465            let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2466            if detail.row.unwrap_or(0) == prev_row + 1 {
2467                current.push(detail);
2468            } else {
2469                if current.len() >= TRACE_RANGE_THRESHOLD {
2470                    ranges.push(make_range_highlight(&current));
2471                }
2472                current.clear();
2473                current.push(detail);
2474            }
2475        }
2476        if current.len() >= TRACE_RANGE_THRESHOLD {
2477            ranges.push(make_range_highlight(&current));
2478        }
2479    }
2480
2481    ranges.sort_by(|a, b| b.count.cmp(&a.count));
2482    ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2483    ranges
2484}
2485
2486fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2487    let mut literals = 0usize;
2488    let mut formulas = 0usize;
2489    let mut blanks = 0usize;
2490    let mut sample_values = Vec::new();
2491    let mut sample_formulas = Vec::new();
2492    let mut sample_addresses = Vec::new();
2493
2494    for detail in details {
2495        match detail.kind {
2496            TraceCellKind::Formula => {
2497                formulas += 1;
2498                if let Some(formula) = &detail.formula
2499                    && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2500                    && !sample_formulas.contains(formula)
2501                {
2502                    sample_formulas.push(formula.clone());
2503                }
2504            }
2505            TraceCellKind::Literal => {
2506                literals += 1;
2507                if let Some(value) = &detail.value
2508                    && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2509                {
2510                    sample_values.push(value.clone());
2511                }
2512            }
2513            TraceCellKind::Blank => blanks += 1,
2514            TraceCellKind::External => {}
2515        }
2516        if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2517            sample_addresses.push(detail.address.clone());
2518        }
2519    }
2520
2521    TraceRangeHighlight {
2522        start: details
2523            .first()
2524            .map(|d| d.address.clone())
2525            .unwrap_or_default(),
2526        end: details
2527            .last()
2528            .map(|d| d.address.clone())
2529            .unwrap_or_default(),
2530        count: details.len(),
2531        literals,
2532        formulas,
2533        blanks,
2534        sample_values,
2535        sample_formulas,
2536        sample_addresses,
2537    }
2538}
2539
2540fn build_notable_cells(
2541    details: &[NeighborDetail],
2542    ranges: &[TraceRangeHighlight],
2543    groups: &[TraceFormulaGroupHighlight],
2544) -> Vec<TraceCellHighlight> {
2545    let mut exclude: HashSet<String> = HashSet::new();
2546    for range in ranges {
2547        exclude.insert(range.start.clone());
2548        exclude.insert(range.end.clone());
2549        for addr in &range.sample_addresses {
2550            exclude.insert(addr.clone());
2551        }
2552    }
2553    for group in groups {
2554        for addr in &group.sample_addresses {
2555            exclude.insert(addr.clone());
2556        }
2557    }
2558
2559    let mut highlights = Vec::new();
2560    let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
2561
2562    for detail in details {
2563        if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
2564            break;
2565        }
2566        if exclude.contains(&detail.address) {
2567            continue;
2568        }
2569        let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
2570        if *counter >= 2 && detail.kind != TraceCellKind::External {
2571            continue;
2572        }
2573        highlights.push(TraceCellHighlight {
2574            address: detail.address.clone(),
2575            kind: detail.kind.clone(),
2576            value: detail.value.clone(),
2577            formula: detail.formula.clone(),
2578        });
2579        *counter += 1;
2580    }
2581
2582    highlights
2583}
2584
2585fn build_edges_for_layer(
2586    raw_edges: &[TraceEdgeRaw],
2587    selected: &HashSet<String>,
2588    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2589) -> Vec<FormulaTraceEdge> {
2590    let mut edges = Vec::new();
2591    for edge in raw_edges {
2592        if selected.contains(&edge.neighbor) {
2593            let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
2594                .map(|info| info.formula.clone());
2595            edges.push(FormulaTraceEdge {
2596                from: edge.from.clone(),
2597                to: edge.to.clone(),
2598                formula,
2599                note: None,
2600            });
2601        }
2602    }
2603    edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
2604    edges
2605}
2606
2607fn lookup_formula_info<'a>(
2608    lookup: &'a HashMap<String, TraceFormulaInfo>,
2609    cell_ref: &str,
2610    original: &str,
2611) -> Option<&'a TraceFormulaInfo> {
2612    if let Some(info) = lookup.get(cell_ref) {
2613        return Some(info);
2614    }
2615    if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
2616        let upper = cell.to_ascii_uppercase();
2617        return lookup.get(&upper);
2618    }
2619    None
2620}
2621
2622fn compare_addresses(left: &str, right: &str) -> Ordering {
2623    let (sheet_left, cell_left) = split_sheet_and_cell(left);
2624    let (sheet_right, cell_right) = split_sheet_and_cell(right);
2625
2626    let sheet_left_key = sheet_left
2627        .as_ref()
2628        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2629        .unwrap_or_default();
2630    let sheet_right_key = sheet_right
2631        .as_ref()
2632        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
2633        .unwrap_or_default();
2634
2635    match sheet_left_key.cmp(&sheet_right_key) {
2636        Ordering::Equal => {
2637            let left_core = cell_left.unwrap_or_else(|| left.to_string());
2638            let right_core = cell_right.unwrap_or_else(|| right.to_string());
2639            let left_coords = parse_address(&left_core.to_ascii_uppercase());
2640            let right_coords = parse_address(&right_core.to_ascii_uppercase());
2641            match (left_coords, right_coords) {
2642                (Some((lc, lr)), Some((rc, rr))) => lc
2643                    .cmp(&rc)
2644                    .then_with(|| lr.cmp(&rr))
2645                    .then_with(|| left_core.cmp(&right_core)),
2646                _ => left_core.cmp(&right_core),
2647            }
2648        }
2649        other => other,
2650    }
2651}
2652
2653fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
2654    if let Some(idx) = address.rfind('!') {
2655        let sheet = address[..idx].to_string();
2656        let cell = address[idx + 1..].to_string();
2657        (Some(sheet), Some(cell))
2658    } else {
2659        (None, Some(address.to_string()))
2660    }
2661}
2662
2663fn clean_sheet_name(sheet: &str) -> String {
2664    let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
2665    let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
2666    after_bracket
2667        .trim_matches(|c| c == '\'' || c == '"')
2668        .to_string()
2669}