spreadsheet_mcp/tools/
mod.rs

1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4
5use crate::analysis::{formula::FormulaGraph, stats};
6use crate::model::*;
7use crate::state::AppState;
8use crate::workbook::{WorkbookContext, cell_to_value};
9use anyhow::{Result, anyhow};
10use regex::Regex;
11use schemars::JsonSchema;
12use serde::Deserialize;
13use std::cmp::Ordering;
14use std::collections::{BTreeMap, HashMap, HashSet};
15use std::sync::Arc;
16
17const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
18const TRACE_PAGE_MIN: usize = 5;
19const TRACE_PAGE_MAX: usize = 200;
20const TRACE_RANGE_THRESHOLD: usize = 4;
21const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
22const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
23const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
24const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
25const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
26const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
27const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
28
29pub async fn list_workbooks(
30    state: Arc<AppState>,
31    params: ListWorkbooksParams,
32) -> Result<WorkbookListResponse> {
33    let filter = params.into_filter()?;
34    state.list_workbooks(filter)
35}
36
37pub async fn describe_workbook(
38    state: Arc<AppState>,
39    params: DescribeWorkbookParams,
40) -> Result<WorkbookDescription> {
41    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
42    let desc = workbook.describe();
43    Ok(desc)
44}
45
46#[derive(Debug, Deserialize, JsonSchema)]
47pub struct ListWorkbooksParams {
48    pub slug_prefix: Option<String>,
49    pub folder: Option<String>,
50    pub path_glob: Option<String>,
51}
52
53impl ListWorkbooksParams {
54    fn into_filter(self) -> Result<filters::WorkbookFilter> {
55        filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
56    }
57}
58
59#[derive(Debug, Deserialize, JsonSchema)]
60pub struct DescribeWorkbookParams {
61    #[serde(alias = "workbook_id")]
62    pub workbook_or_fork_id: WorkbookId,
63}
64
65#[derive(Debug, Deserialize, JsonSchema)]
66pub struct ListSheetsParams {
67    #[serde(alias = "workbook_id")]
68    pub workbook_or_fork_id: WorkbookId,
69}
70
71pub async fn list_sheets(
72    state: Arc<AppState>,
73    params: ListSheetsParams,
74) -> Result<SheetListResponse> {
75    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
76    let summaries = workbook.list_summaries()?;
77    let response = SheetListResponse {
78        workbook_id: workbook.id.clone(),
79        workbook_short_id: workbook.short_id.clone(),
80        sheets: summaries,
81    };
82    Ok(response)
83}
84
85#[derive(Debug, Deserialize, JsonSchema)]
86pub struct SheetOverviewParams {
87    #[serde(alias = "workbook_id")]
88    pub workbook_or_fork_id: WorkbookId,
89    pub sheet_name: String,
90}
91
92#[derive(Debug, Deserialize, JsonSchema)]
93pub struct WorkbookSummaryParams {
94    #[serde(alias = "workbook_id")]
95    pub workbook_or_fork_id: WorkbookId,
96}
97
98pub async fn workbook_summary(
99    state: Arc<AppState>,
100    params: WorkbookSummaryParams,
101) -> Result<WorkbookSummaryResponse> {
102    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
103    let sheet_names = workbook.sheet_names();
104
105    let mut total_cells: u64 = 0;
106    let mut total_formulas: u64 = 0;
107    let mut breakdown = WorkbookBreakdown::default();
108    let mut region_counts = RegionCountSummary::default();
109    let mut entry_points: Vec<EntryPoint> = Vec::new();
110    let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
111
112    for sheet_name in &sheet_names {
113        let entry = workbook.get_sheet_metrics(sheet_name)?;
114        total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
115        total_formulas += entry.metrics.formula_cells as u64;
116        match entry.metrics.classification {
117            SheetClassification::Calculator => breakdown.calculator_sheets += 1,
118            SheetClassification::Metadata => breakdown.metadata_sheets += 1,
119            SheetClassification::Empty => {}
120            _ => breakdown.data_sheets += 1,
121        }
122
123        for region in &entry.detected_regions {
124            match region
125                .region_kind
126                .clone()
127                .unwrap_or(region.classification.clone())
128            {
129                RegionKind::Calculator => region_counts.calculator += 1,
130                RegionKind::Metadata => region_counts.metadata += 1,
131                RegionKind::Parameters => region_counts.parameters += 1,
132                RegionKind::Outputs => region_counts.outputs += 1,
133                RegionKind::Data | RegionKind::Table => region_counts.data += 1,
134                _ => region_counts.other += 1,
135            }
136            if region.confidence >= 0.3 {
137                let kind = region
138                    .region_kind
139                    .as_ref()
140                    .unwrap_or(&region.classification);
141                let priority = match kind {
142                    RegionKind::Parameters => 0,
143                    RegionKind::Data | RegionKind::Table => 1,
144                    RegionKind::Outputs => 2,
145                    RegionKind::Calculator => 3,
146                    RegionKind::Metadata => 4,
147                    _ => 5,
148                };
149                entry_points.push(EntryPoint {
150                    sheet_name: sheet_name.clone(),
151                    region_id: Some(region.id),
152                    bounds: Some(region.bounds.clone()),
153                    rationale: format!(
154                        "{:?} region ({} rows, {:.0}% conf, p{})",
155                        kind,
156                        region.row_count,
157                        region.confidence * 100.0,
158                        priority
159                    ),
160                });
161            }
162        }
163
164        if entry.detected_regions.is_empty() && entry.metrics.non_empty_cells > 0 {
165            entry_points.push(EntryPoint {
166                sheet_name: sheet_name.clone(),
167                region_id: None,
168                bounds: None,
169                rationale: "Whole sheet is non-empty; start at top-left".to_string(),
170            });
171        }
172    }
173
174    entry_points.sort_by(|a, b| {
175        let pa = priority_from_rationale(&a.rationale);
176        let pb = priority_from_rationale(&b.rationale);
177        pa.cmp(&pb)
178            .then_with(|| {
179                a.bounds
180                    .as_ref()
181                    .map(|_| 1)
182                    .cmp(&b.bounds.as_ref().map(|_| 1))
183            })
184            .then_with(|| a.sheet_name.cmp(&b.sheet_name))
185    });
186    entry_points.truncate(5);
187
188    let mut seen_ranges = std::collections::HashSet::new();
189    for item in workbook.named_items()? {
190        if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
191            continue;
192        }
193        if !seen_ranges.insert(item.refers_to.clone()) {
194            continue;
195        }
196        key_named_ranges.push(item);
197        if key_named_ranges.len() >= 10 {
198            break;
199        }
200    }
201
202    Ok(WorkbookSummaryResponse {
203        workbook_id: workbook.id.clone(),
204        workbook_short_id: workbook.short_id.clone(),
205        slug: workbook.slug.clone(),
206        sheet_count: sheet_names.len(),
207        total_cells,
208        total_formulas,
209        breakdown,
210        region_counts,
211        key_named_ranges,
212        suggested_entry_points: entry_points,
213    })
214}
215
216fn priority_from_rationale(rationale: &str) -> u32 {
217    if rationale.contains("p0") {
218        0
219    } else if rationale.contains("p1") {
220        1
221    } else if rationale.contains("p2") {
222        2
223    } else if rationale.contains("p3") {
224        3
225    } else if rationale.contains("p4") {
226        4
227    } else {
228        5
229    }
230}
231
232pub async fn sheet_overview(
233    state: Arc<AppState>,
234    params: SheetOverviewParams,
235) -> Result<SheetOverviewResponse> {
236    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
237    let overview = workbook.sheet_overview(&params.sheet_name)?;
238    Ok(overview)
239}
240
241fn default_start_row() -> u32 {
242    1
243}
244
245fn default_page_size() -> u32 {
246    50
247}
248
249fn default_include_formulas() -> bool {
250    true
251}
252
253fn default_include_header() -> bool {
254    true
255}
256
257#[derive(Debug, Deserialize, JsonSchema)]
258pub struct SheetPageParams {
259    #[serde(alias = "workbook_id")]
260    pub workbook_or_fork_id: WorkbookId,
261    pub sheet_name: String,
262    #[serde(default = "default_start_row")]
263    pub start_row: u32,
264    #[serde(default = "default_page_size")]
265    pub page_size: u32,
266    #[serde(default)]
267    pub columns: Option<Vec<String>>,
268    #[serde(default)]
269    pub columns_by_header: Option<Vec<String>>,
270    #[serde(default = "default_include_formulas")]
271    pub include_formulas: bool,
272    #[serde(default)]
273    pub include_styles: bool,
274    #[serde(default = "default_include_header")]
275    pub include_header: bool,
276    #[serde(default)]
277    pub format: Option<SheetPageFormat>,
278}
279
280impl Default for SheetPageParams {
281    fn default() -> Self {
282        SheetPageParams {
283            workbook_or_fork_id: WorkbookId(String::new()),
284            sheet_name: String::new(),
285            start_row: default_start_row(),
286            page_size: default_page_size(),
287            columns: None,
288            columns_by_header: None,
289            include_formulas: default_include_formulas(),
290            include_styles: false,
291            include_header: default_include_header(),
292            format: None,
293        }
294    }
295}
296
297fn default_find_limit() -> u32 {
298    50
299}
300
301#[derive(Debug, Deserialize, JsonSchema)]
302pub struct FindValueParams {
303    #[serde(alias = "workbook_id")]
304    pub workbook_or_fork_id: WorkbookId,
305    pub query: String,
306    #[serde(default)]
307    pub label: Option<String>,
308    #[serde(default)]
309    pub mode: Option<FindMode>,
310    #[serde(default)]
311    pub match_mode: Option<String>,
312    #[serde(default)]
313    pub case_sensitive: bool,
314    #[serde(default)]
315    pub sheet_name: Option<String>,
316    #[serde(default)]
317    pub region_id: Option<u32>,
318    #[serde(default)]
319    pub table_name: Option<String>,
320    #[serde(default)]
321    pub value_types: Option<Vec<String>>,
322    #[serde(default)]
323    pub search_headers_only: bool,
324    #[serde(default)]
325    pub direction: Option<LabelDirection>,
326    #[serde(default = "default_find_limit")]
327    pub limit: u32,
328}
329
330impl Default for FindValueParams {
331    fn default() -> Self {
332        Self {
333            workbook_or_fork_id: WorkbookId(String::new()),
334            query: String::new(),
335            label: None,
336            mode: None,
337            match_mode: None,
338            case_sensitive: false,
339            sheet_name: None,
340            region_id: None,
341            table_name: None,
342            value_types: None,
343            search_headers_only: false,
344            direction: None,
345            limit: default_find_limit(),
346        }
347    }
348}
349
350#[derive(Debug, Deserialize, JsonSchema, Default)]
351pub struct ReadTableParams {
352    #[serde(alias = "workbook_id")]
353    pub workbook_or_fork_id: WorkbookId,
354    #[serde(default)]
355    pub sheet_name: Option<String>,
356    #[serde(default)]
357    pub table_name: Option<String>,
358    #[serde(default)]
359    pub region_id: Option<u32>,
360    #[serde(default)]
361    pub range: Option<String>,
362    #[serde(default)]
363    pub header_row: Option<u32>,
364    #[serde(default)]
365    pub header_rows: Option<u32>,
366    #[serde(default)]
367    pub columns: Option<Vec<String>>,
368    #[serde(default)]
369    pub filters: Option<Vec<TableFilter>>,
370    #[serde(default)]
371    pub sample_mode: Option<String>,
372    #[serde(default)]
373    pub limit: Option<u32>,
374    #[serde(default)]
375    pub offset: Option<u32>,
376}
377
378#[derive(Debug, Deserialize, JsonSchema, Clone)]
379pub struct TableFilter {
380    pub column: String,
381    pub op: String,
382    pub value: serde_json::Value,
383}
384
385#[derive(Debug, Deserialize, JsonSchema, Default)]
386pub struct TableProfileParams {
387    #[serde(alias = "workbook_id")]
388    pub workbook_or_fork_id: WorkbookId,
389    #[serde(default)]
390    pub sheet_name: Option<String>,
391    #[serde(default)]
392    pub region_id: Option<u32>,
393    #[serde(default)]
394    pub table_name: Option<String>,
395    #[serde(default)]
396    pub sample_mode: Option<String>,
397    #[serde(default)]
398    pub sample_size: Option<u32>,
399}
400
401#[derive(Debug, Deserialize, JsonSchema)]
402pub struct RangeValuesParams {
403    #[serde(alias = "workbook_id")]
404    pub workbook_or_fork_id: WorkbookId,
405    pub sheet_name: String,
406    pub ranges: Vec<String>,
407    #[serde(default)]
408    pub include_headers: Option<bool>,
409}
410
411pub async fn sheet_page(
412    state: Arc<AppState>,
413    params: SheetPageParams,
414) -> Result<SheetPageResponse> {
415    if params.page_size == 0 {
416        return Err(anyhow!("page_size must be greater than zero"));
417    }
418
419    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
420    let metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
421    let format = params.format.unwrap_or_default();
422
423    let start_row = params.start_row.max(1);
424    let page_size = params.page_size.min(500);
425    let include_formulas = params.include_formulas;
426    let include_styles = params.include_styles;
427    let columns = params.columns.clone();
428    let columns_by_header = params.columns_by_header.clone();
429    let include_header = params.include_header;
430
431    let page = workbook.with_sheet(&params.sheet_name, |sheet| {
432        build_page(
433            sheet,
434            start_row,
435            page_size,
436            columns.clone(),
437            columns_by_header.clone(),
438            include_formulas,
439            include_styles,
440            include_header,
441        )
442    })?;
443
444    let has_more = page.end_row < metrics.metrics.row_count;
445    let next_start_row = if has_more {
446        Some(page.end_row + 1)
447    } else {
448        None
449    };
450
451    let compact_payload = if matches!(format, SheetPageFormat::Compact) {
452        Some(build_compact_payload(
453            &page.header,
454            &page.rows,
455            include_header,
456        ))
457    } else {
458        None
459    };
460
461    let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
462        Some(build_values_only_payload(
463            &page.header,
464            &page.rows,
465            include_header,
466        ))
467    } else {
468        None
469    };
470
471    let response = SheetPageResponse {
472        workbook_id: workbook.id.clone(),
473        workbook_short_id: workbook.short_id.clone(),
474        sheet_name: params.sheet_name,
475        rows: if matches!(format, SheetPageFormat::Full) {
476            page.rows
477        } else {
478            Vec::new()
479        },
480        has_more,
481        next_start_row,
482        header_row: if include_header { page.header } else { None },
483        compact: compact_payload,
484        values_only: values_only_payload,
485        format,
486    };
487    Ok(response)
488}
489
490#[derive(Debug, Deserialize, JsonSchema)]
491pub struct SheetFormulaMapParams {
492    #[serde(alias = "workbook_id")]
493    pub workbook_or_fork_id: WorkbookId,
494    pub sheet_name: String,
495    pub range: Option<String>,
496    #[serde(default)]
497    pub expand: bool,
498    #[serde(default)]
499    pub limit: Option<u32>,
500    #[serde(default)]
501    pub sort_by: Option<FormulaSortBy>,
502}
503
504#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
505#[serde(rename_all = "snake_case")]
506pub enum FormulaSortBy {
507    #[default]
508    Address,
509    Complexity,
510    Count,
511}
512
513pub async fn sheet_formula_map(
514    state: Arc<AppState>,
515    params: SheetFormulaMapParams,
516) -> Result<SheetFormulaMapResponse> {
517    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
518    let graph = workbook.formula_graph(&params.sheet_name)?;
519    let mut groups = Vec::new();
520    let mut truncated = false;
521
522    for mut group in graph.groups() {
523        if let Some(range) = &params.range {
524            group.addresses.retain(|addr| address_in_range(addr, range));
525            if group.addresses.is_empty() {
526                continue;
527            }
528        }
529        if !params.expand && group.addresses.len() > 15 {
530            group.addresses.truncate(15);
531            truncated = true;
532        }
533        groups.push(group);
534    }
535
536    let sort_by = params.sort_by.unwrap_or_default();
537    match sort_by {
538        FormulaSortBy::Address => {
539            groups.sort_by(|a, b| a.addresses.first().cmp(&b.addresses.first()));
540        }
541        FormulaSortBy::Complexity => {
542            groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
543        }
544        FormulaSortBy::Count => {
545            groups.sort_by(|a, b| b.addresses.len().cmp(&a.addresses.len()));
546        }
547    }
548
549    if let Some(limit) = params.limit
550        && groups.len() > limit as usize
551    {
552        groups.truncate(limit as usize);
553        truncated = true;
554    }
555
556    let response = SheetFormulaMapResponse {
557        workbook_id: workbook.id.clone(),
558        workbook_short_id: workbook.short_id.clone(),
559        sheet_name: params.sheet_name.clone(),
560        groups,
561        truncated,
562    };
563    Ok(response)
564}
565
566#[derive(Debug, Deserialize, JsonSchema)]
567pub struct FormulaTraceParams {
568    #[serde(alias = "workbook_id")]
569    pub workbook_or_fork_id: WorkbookId,
570    pub sheet_name: String,
571    pub cell_address: String,
572    pub direction: TraceDirection,
573    pub depth: Option<u32>,
574    pub limit: Option<u32>,
575    #[serde(default)]
576    pub page_size: Option<usize>,
577    #[serde(default)]
578    pub cursor: Option<TraceCursor>,
579}
580
581pub async fn formula_trace(
582    state: Arc<AppState>,
583    params: FormulaTraceParams,
584) -> Result<FormulaTraceResponse> {
585    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
586    let graph = workbook.formula_graph(&params.sheet_name)?;
587    let formula_lookup = build_formula_lookup(&graph);
588    let depth = params.depth.unwrap_or(3).clamp(1, 5);
589    let page_size = params
590        .page_size
591        .or_else(|| params.limit.map(|v| v as usize))
592        .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
593        .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
594
595    let origin = params.cell_address.to_uppercase();
596    let config = TraceConfig {
597        direction: &params.direction,
598        origin: &origin,
599        sheet_name: &params.sheet_name,
600        depth_limit: depth,
601        page_size,
602    };
603    let (layers, next_cursor, notes) = build_trace_layers(
604        &workbook,
605        &graph,
606        &formula_lookup,
607        &config,
608        params.cursor.clone(),
609    )?;
610
611    let response = FormulaTraceResponse {
612        workbook_id: workbook.id.clone(),
613        workbook_short_id: workbook.short_id.clone(),
614        sheet_name: params.sheet_name.clone(),
615        origin,
616        direction: params.direction.clone(),
617        layers,
618        next_cursor,
619        notes,
620    };
621    Ok(response)
622}
623
624#[derive(Debug, Deserialize, JsonSchema)]
625pub struct NamedRangesParams {
626    #[serde(alias = "workbook_id")]
627    pub workbook_or_fork_id: WorkbookId,
628    pub sheet_name: Option<String>,
629    pub name_prefix: Option<String>,
630}
631
632pub async fn named_ranges(
633    state: Arc<AppState>,
634    params: NamedRangesParams,
635) -> Result<NamedRangesResponse> {
636    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
637    let mut items = workbook.named_items()?;
638
639    if let Some(sheet_filter) = &params.sheet_name {
640        items.retain(|item| {
641            item.sheet_name
642                .as_ref()
643                .map(|name| name.eq_ignore_ascii_case(sheet_filter))
644                .unwrap_or(false)
645        });
646    }
647    if let Some(prefix) = &params.name_prefix {
648        let prefix_lower = prefix.to_ascii_lowercase();
649        items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
650    }
651
652    let response = NamedRangesResponse {
653        workbook_id: workbook.id.clone(),
654        workbook_short_id: workbook.short_id.clone(),
655        items,
656    };
657    Ok(response)
658}
659
660struct PageBuildResult {
661    rows: Vec<RowSnapshot>,
662    header: Option<RowSnapshot>,
663    end_row: u32,
664}
665
666#[allow(clippy::too_many_arguments)]
667fn build_page(
668    sheet: &umya_spreadsheet::Worksheet,
669    start_row: u32,
670    page_size: u32,
671    columns: Option<Vec<String>>,
672    columns_by_header: Option<Vec<String>>,
673    include_formulas: bool,
674    include_styles: bool,
675    include_header: bool,
676) -> PageBuildResult {
677    let max_col = sheet.get_highest_column();
678    let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
679    let column_indices =
680        resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
681
682    let header = if include_header {
683        Some(build_row_snapshot(
684            sheet,
685            1,
686            &column_indices,
687            include_formulas,
688            include_styles,
689        ))
690    } else {
691        None
692    };
693
694    let mut rows = Vec::new();
695    for row_idx in start_row..=end_row {
696        rows.push(build_row_snapshot(
697            sheet,
698            row_idx,
699            &column_indices,
700            include_formulas,
701            include_styles,
702        ));
703    }
704
705    PageBuildResult {
706        rows,
707        header,
708        end_row,
709    }
710}
711
712fn build_row_snapshot(
713    sheet: &umya_spreadsheet::Worksheet,
714    row_index: u32,
715    columns: &[u32],
716    include_formulas: bool,
717    include_styles: bool,
718) -> RowSnapshot {
719    let mut cells = Vec::new();
720    for &col in columns {
721        if let Some(cell) = sheet.get_cell((col, row_index)) {
722            cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
723        } else {
724            let address = crate::utils::cell_address(col, row_index);
725            cells.push(CellSnapshot {
726                address,
727                value: None,
728                formula: None,
729                cached_value: None,
730                number_format: None,
731                style_tags: Vec::new(),
732                notes: Vec::new(),
733            });
734        }
735    }
736
737    RowSnapshot { row_index, cells }
738}
739
740fn build_cell_snapshot(
741    cell: &umya_spreadsheet::Cell,
742    include_formulas: bool,
743    include_styles: bool,
744) -> CellSnapshot {
745    let address = cell.get_coordinate().get_coordinate();
746    let value = crate::workbook::cell_to_value(cell);
747    let formula = if include_formulas && cell.is_formula() {
748        Some(cell.get_formula().to_string())
749    } else {
750        None
751    };
752    let cached_value = if cell.is_formula() {
753        value.clone()
754    } else {
755        None
756    };
757    let number_format = if include_styles {
758        cell.get_style()
759            .get_number_format()
760            .map(|fmt| fmt.get_format_code().to_string())
761    } else {
762        None
763    };
764    let style_tags = if include_styles {
765        crate::analysis::style::tag_cell(cell)
766            .map(|(_, tagging)| tagging.tags)
767            .unwrap_or_default()
768    } else {
769        Vec::new()
770    };
771
772    CellSnapshot {
773        address,
774        value,
775        formula,
776        cached_value,
777        number_format,
778        style_tags,
779        notes: Vec::new(),
780    }
781}
782
783fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
784    use std::collections::BTreeSet;
785    use umya_spreadsheet::helper::coordinate::column_index_from_string;
786
787    let mut indices = BTreeSet::new();
788    if let Some(specs) = columns {
789        for spec in specs {
790            if let Some((start, end)) = spec.split_once(':') {
791                let start_idx = column_index_from_string(start);
792                let end_idx = column_index_from_string(end);
793                let (min_idx, max_idx) = if start_idx <= end_idx {
794                    (start_idx, end_idx)
795                } else {
796                    (end_idx, start_idx)
797                };
798                for idx in min_idx..=max_idx {
799                    indices.insert(idx);
800                }
801            } else {
802                indices.insert(column_index_from_string(spec));
803            }
804        }
805    } else {
806        for idx in 1..=max_column.max(1) {
807            indices.insert(idx);
808        }
809    }
810
811    indices.into_iter().collect()
812}
813
814fn resolve_columns_with_headers(
815    sheet: &umya_spreadsheet::Worksheet,
816    columns: Option<&Vec<String>>,
817    columns_by_header: Option<&Vec<String>>,
818    max_column: u32,
819) -> Vec<u32> {
820    if columns_by_header.is_none() {
821        return resolve_columns(columns, max_column);
822    }
823
824    let mut selected = Vec::new();
825    let header_targets: Vec<String> = columns_by_header
826        .unwrap()
827        .iter()
828        .map(|h| h.trim().to_ascii_lowercase())
829        .collect();
830
831    for col_idx in 1..=max_column.max(1) {
832        let header_cell = sheet.get_cell((col_idx, 1u32));
833        let header_value = header_cell
834            .and_then(cell_to_value)
835            .map(cell_value_to_string_lower);
836        if let Some(hval) = header_value
837            && header_targets.iter().any(|target| target == &hval)
838        {
839            selected.push(col_idx);
840        }
841    }
842
843    if selected.is_empty() {
844        resolve_columns(columns, max_column)
845    } else {
846        selected
847    }
848}
849
850fn cell_value_to_string_lower(value: CellValue) -> String {
851    match value {
852        CellValue::Text(s) => s.to_ascii_lowercase(),
853        CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
854        CellValue::Bool(b) => b.to_string(),
855        CellValue::Error(e) => e.to_ascii_lowercase(),
856        CellValue::Date(d) => d.to_ascii_lowercase(),
857    }
858}
859
860fn build_compact_payload(
861    header: &Option<RowSnapshot>,
862    rows: &[RowSnapshot],
863    include_header: bool,
864) -> SheetPageCompact {
865    let headers = derive_headers(header, rows);
866    let header_row = if include_header {
867        header
868            .as_ref()
869            .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
870            .unwrap_or_default()
871    } else {
872        Vec::new()
873    };
874    let data_rows = rows
875        .iter()
876        .map(|row| {
877            let mut vals: Vec<Option<CellValue>> = Vec::new();
878            vals.push(Some(CellValue::Number(row.row_index as f64)));
879            vals.extend(row.cells.iter().map(|c| c.value.clone()));
880            vals
881        })
882        .collect();
883
884    SheetPageCompact {
885        headers,
886        header_row,
887        rows: data_rows,
888    }
889}
890
891fn build_values_only_payload(
892    header: &Option<RowSnapshot>,
893    rows: &[RowSnapshot],
894    include_header: bool,
895) -> SheetPageValues {
896    let mut data = Vec::new();
897    if include_header && let Some(h) = header {
898        data.push(h.cells.iter().map(|c| c.value.clone()).collect());
899    }
900    for row in rows {
901        data.push(row.cells.iter().map(|c| c.value.clone()).collect());
902    }
903
904    SheetPageValues { rows: data }
905}
906
907fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
908    if let Some(h) = header {
909        let mut headers: Vec<String> = h
910            .cells
911            .iter()
912            .map(|c| match &c.value {
913                Some(CellValue::Text(t)) => t.clone(),
914                Some(CellValue::Number(n)) => n.to_string(),
915                Some(CellValue::Bool(b)) => b.to_string(),
916                Some(CellValue::Date(d)) => d.clone(),
917                Some(CellValue::Error(e)) => e.clone(),
918                None => c.address.clone(),
919            })
920            .collect();
921        headers.insert(0, "Row".to_string());
922        headers
923    } else if let Some(first) = rows.first() {
924        let mut headers = Vec::new();
925        headers.push("Row".to_string());
926        for cell in &first.cells {
927            headers.push(cell.address.clone());
928        }
929        headers
930    } else {
931        vec![]
932    }
933}
934fn default_stats_sample() -> usize {
935    500
936}
937
938#[derive(Debug, Deserialize, JsonSchema)]
939pub struct SheetStatisticsParams {
940    #[serde(alias = "workbook_id")]
941    pub workbook_or_fork_id: WorkbookId,
942    pub sheet_name: String,
943    #[serde(default)]
944    pub sample_rows: Option<usize>,
945}
946
947pub async fn sheet_statistics(
948    state: Arc<AppState>,
949    params: SheetStatisticsParams,
950) -> Result<SheetStatisticsResponse> {
951    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
952    let sheet_metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
953    let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
954    let stats = workbook.with_sheet(&params.sheet_name, |sheet| {
955        stats::compute_sheet_statistics(sheet, sample_rows)
956    })?;
957    let response = SheetStatisticsResponse {
958        workbook_id: workbook.id.clone(),
959        workbook_short_id: workbook.short_id.clone(),
960        sheet_name: params.sheet_name,
961        row_count: sheet_metrics.metrics.row_count,
962        column_count: sheet_metrics.metrics.column_count,
963        density: stats.density,
964        numeric_columns: stats.numeric_columns,
965        text_columns: stats.text_columns,
966        null_counts: stats.null_counts,
967        duplicate_warnings: stats.duplicate_warnings,
968    };
969    Ok(response)
970}
971
972fn address_in_range(address: &str, range: &str) -> bool {
973    parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
974        if let Some((col, row)) = parse_address(address) {
975            col >= start_col && col <= end_col && row >= start_row && row <= end_row
976        } else {
977            false
978        }
979    })
980}
981
982fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
983    let mut parts = range.split(':');
984    let start = parts.next()?;
985    let end = parts.next().unwrap_or(start);
986    let start_idx = parse_address(start)?;
987    let end_idx = parse_address(end)?;
988    Some((
989        (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
990        (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
991    ))
992}
993
994fn parse_address(address: &str) -> Option<(u32, u32)> {
995    use umya_spreadsheet::helper::coordinate::index_from_coordinate;
996    let (col, row, _, _) = index_from_coordinate(address);
997    match (col, row) {
998        (Some(c), Some(r)) => Some((c, r)),
999        _ => None,
1000    }
1001}
1002
1003#[derive(Clone)]
1004struct TableTarget {
1005    sheet_name: String,
1006    table_name: Option<String>,
1007    range: ((u32, u32), (u32, u32)),
1008    header_hint: Option<u32>,
1009}
1010
1011fn resolve_table_target(
1012    workbook: &WorkbookContext,
1013    params: &ReadTableParams,
1014) -> Result<TableTarget> {
1015    if let Some(region_id) = params.region_id
1016        && let Some(sheet) = &params.sheet_name
1017        && let Ok(region) = workbook.detected_region(sheet, region_id)
1018    {
1019        return Ok(TableTarget {
1020            sheet_name: sheet.clone(),
1021            table_name: None,
1022            range: parse_range(&region.bounds).unwrap_or(((1, 1), (1, 1))),
1023            header_hint: region.header_row,
1024        });
1025    }
1026
1027    if let Some(table_name) = &params.table_name {
1028        let items = workbook.named_items()?;
1029        for item in items {
1030            if item.name.eq_ignore_ascii_case(table_name)
1031                || item
1032                    .name
1033                    .to_ascii_lowercase()
1034                    .contains(&table_name.to_ascii_lowercase())
1035            {
1036                let mut sheet_name = item
1037                    .sheet_name
1038                    .clone()
1039                    .or_else(|| params.sheet_name.clone())
1040                    .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1041                let refers_to = item.refers_to.trim_start_matches('=');
1042                let mut range_part = refers_to;
1043                if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1044                    sheet_name = sheet_part.trim_matches('\'').to_string();
1045                    range_part = rest;
1046                }
1047                if let Some(range) = parse_range(range_part) {
1048                    return Ok(TableTarget {
1049                        sheet_name,
1050                        table_name: Some(item.name.clone()),
1051                        range,
1052                        header_hint: if item.kind == NamedItemKind::Table {
1053                            Some(range.0.1)
1054                        } else {
1055                            None
1056                        },
1057                    });
1058                }
1059            }
1060        }
1061    }
1062
1063    let sheet_name = params
1064        .sheet_name
1065        .clone()
1066        .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1067
1068    if let Some(rng) = &params.range
1069        && let Some(range) = parse_range(rng)
1070    {
1071        return Ok(TableTarget {
1072            sheet_name,
1073            table_name: None,
1074            range,
1075            header_hint: None,
1076        });
1077    }
1078
1079    let metrics = workbook.get_sheet_metrics(&sheet_name)?;
1080    let end_col = metrics.metrics.column_count.max(1);
1081    let end_row = metrics.metrics.row_count.max(1);
1082    Ok(TableTarget {
1083        sheet_name,
1084        table_name: None,
1085        range: ((1, 1), (end_col, end_row)),
1086        header_hint: None,
1087    })
1088}
1089
1090#[allow(clippy::too_many_arguments)]
1091fn extract_table_rows(
1092    sheet: &umya_spreadsheet::Worksheet,
1093    target: &TableTarget,
1094    header_row: Option<u32>,
1095    header_rows: Option<u32>,
1096    columns: Option<Vec<String>>,
1097    filters: Option<Vec<TableFilter>>,
1098    limit: usize,
1099    offset: usize,
1100    sample_mode: &str,
1101) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1102    let ((start_col, start_row), (end_col, end_row)) = target.range;
1103    let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1104    if header_start < start_row {
1105        header_start = start_row;
1106    }
1107    if header_start > end_row {
1108        header_start = start_row;
1109    }
1110    let header_rows_count = header_rows.unwrap_or(1).max(1);
1111    let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
1112    let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
1113        resolve_columns(Some(cols), end_col).into_iter().collect()
1114    } else {
1115        (start_col..=end_col).collect()
1116    };
1117
1118    let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
1119    let mut all_rows: Vec<TableRow> = Vec::new();
1120    let mut total_rows: u32 = 0;
1121
1122    for row_idx in data_start_row..=end_row {
1123        let mut row = BTreeMap::new();
1124        for (i, col_idx) in column_indices.iter().enumerate() {
1125            let header = headers
1126                .get(i)
1127                .cloned()
1128                .unwrap_or_else(|| format!("Col{col_idx}"));
1129            let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
1130            row.insert(header, value);
1131        }
1132        if !row_passes_filters(&row, filters.as_ref()) {
1133            continue;
1134        }
1135        total_rows += 1;
1136        if matches!(sample_mode, "first" | "all") && total_rows as usize > offset + limit {
1137            continue;
1138        }
1139        all_rows.push(row);
1140    }
1141
1142    let rows = sample_rows(all_rows, limit, offset, sample_mode);
1143
1144    Ok((headers, rows, total_rows))
1145}
1146
1147fn build_headers(
1148    sheet: &umya_spreadsheet::Worksheet,
1149    columns: &[u32],
1150    header_start: u32,
1151    header_rows: u32,
1152) -> Vec<String> {
1153    let mut headers = Vec::new();
1154    for col_idx in columns {
1155        let mut parts = Vec::new();
1156        for h in header_start..(header_start + header_rows) {
1157            let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
1158            if let Some(value) = sheet
1159                .get_cell((origin_col, origin_row))
1160                .and_then(cell_to_value)
1161            {
1162                match value {
1163                    CellValue::Text(ref s) if s.trim().is_empty() => {}
1164                    CellValue::Text(s) => parts.push(s),
1165                    CellValue::Number(n) => parts.push(n.to_string()),
1166                    CellValue::Bool(b) => parts.push(b.to_string()),
1167                    CellValue::Error(e) => parts.push(e),
1168                    CellValue::Date(d) => parts.push(d),
1169                }
1170            }
1171        }
1172        if parts.is_empty() {
1173            headers.push(crate::utils::column_number_to_name(*col_idx));
1174        } else {
1175            headers.push(parts.join(" / "));
1176        }
1177    }
1178
1179    if headers.iter().all(|h| h.trim().is_empty()) {
1180        return columns
1181            .iter()
1182            .map(|c| crate::utils::column_number_to_name(*c))
1183            .collect();
1184    }
1185
1186    dedupe_headers(headers)
1187}
1188
1189fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
1190    let mut seen: HashMap<String, u32> = HashMap::new();
1191    for h in headers.iter_mut() {
1192        let key = h.clone();
1193        if key.trim().is_empty() {
1194            continue;
1195        }
1196        let count = seen.entry(key.clone()).or_insert(0);
1197        if *count > 0 {
1198            h.push_str(&format!("_{}", *count + 1));
1199        }
1200        *count += 1;
1201    }
1202    headers
1203}
1204
1205fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
1206    if let Some(filters) = filters {
1207        for filter in filters {
1208            if let Some(value) = row.get(&filter.column) {
1209                match filter.op.as_str() {
1210                    "eq" => {
1211                        if !value_eq(value, &filter.value) {
1212                            return false;
1213                        }
1214                    }
1215                    "neq" => {
1216                        if value_eq(value, &filter.value) {
1217                            return false;
1218                        }
1219                    }
1220                    "contains" => {
1221                        if !value_contains(value, &filter.value) {
1222                            return false;
1223                        }
1224                    }
1225                    "gt" => {
1226                        if !value_gt(value, &filter.value) {
1227                            return false;
1228                        }
1229                    }
1230                    "lt" => {
1231                        if !value_lt(value, &filter.value) {
1232                            return false;
1233                        }
1234                    }
1235                    "in" => {
1236                        let list = filter
1237                            .value
1238                            .as_array()
1239                            .cloned()
1240                            .unwrap_or_else(|| vec![filter.value.clone()]);
1241                        if !list.iter().any(|cmp| value_eq(value, cmp)) {
1242                            return false;
1243                        }
1244                    }
1245                    _ => {}
1246                }
1247            }
1248        }
1249    }
1250    true
1251}
1252
1253fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1254    match (cell, cmp) {
1255        (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
1256        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1257            v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
1258        }
1259        (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
1260            .parse::<f64>()
1261            .map(|f| (*n - f).abs() < f64::EPSILON)
1262            .unwrap_or(false),
1263        (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
1264        (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
1265            t.eq_ignore_ascii_case("true") == *b
1266        }
1267        (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
1268        (None, serde_json::Value::Null) => true,
1269        _ => false,
1270    }
1271}
1272
1273fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1274    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
1275        return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
1276    }
1277    false
1278}
1279
1280fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1281    match (cell, cmp) {
1282        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1283            v.as_f64().is_some_and(|f| *n > f)
1284        }
1285        _ => false,
1286    }
1287}
1288
1289fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
1290    match (cell, cmp) {
1291        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
1292            v.as_f64().is_some_and(|f| *n < f)
1293        }
1294        _ => false,
1295    }
1296}
1297
1298fn sample_rows(rows: Vec<TableRow>, limit: usize, offset: usize, mode: &str) -> Vec<TableRow> {
1299    if rows.is_empty() {
1300        return rows;
1301    }
1302
1303    match mode {
1304        "distributed" => {
1305            if limit == 0 {
1306                return Vec::new();
1307            }
1308            let mut indices = Vec::new();
1309            let span = rows.len().saturating_sub(1);
1310            let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
1311            let mut idx = offset;
1312            while idx < rows.len() && indices.len() < limit {
1313                indices.push(idx);
1314                idx = idx.saturating_add(step);
1315                if idx == indices.last().copied().unwrap_or(0) {
1316                    idx += 1;
1317                }
1318            }
1319            if indices.len() < limit {
1320                let last_idx = rows.len().saturating_sub(1);
1321                if !indices.contains(&last_idx) {
1322                    indices.push(last_idx);
1323                }
1324            }
1325            indices
1326                .into_iter()
1327                .filter_map(|i| rows.get(i).cloned())
1328                .collect()
1329        }
1330        "last" => {
1331            let start = rows.len().saturating_sub(limit + offset);
1332            rows.into_iter().skip(start + offset).take(limit).collect()
1333        }
1334        _ => rows.into_iter().skip(offset).take(limit).collect(),
1335    }
1336}
1337
1338fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
1339    let mut summaries = Vec::new();
1340    for header in headers {
1341        let mut nulls = 0u32;
1342        let mut distinct_set: HashSet<String> = HashSet::new();
1343        let mut values: Vec<f64> = Vec::new();
1344        let mut top_counts: HashMap<String, u32> = HashMap::new();
1345
1346        for row in rows {
1347            match row.get(header) {
1348                Some(Some(CellValue::Number(n))) => {
1349                    values.push(*n);
1350                    let key = n.to_string();
1351                    *top_counts.entry(key).or_default() += 1;
1352                }
1353                Some(Some(CellValue::Text(s))) => {
1354                    distinct_set.insert(s.clone());
1355                    *top_counts.entry(s.clone()).or_default() += 1;
1356                }
1357                Some(Some(CellValue::Bool(b))) => {
1358                    let key = b.to_string();
1359                    distinct_set.insert(key.clone());
1360                    *top_counts.entry(key).or_default() += 1;
1361                }
1362                Some(Some(CellValue::Date(d))) => {
1363                    distinct_set.insert(d.clone());
1364                    *top_counts.entry(d.clone()).or_default() += 1;
1365                }
1366                Some(Some(CellValue::Error(e))) => {
1367                    distinct_set.insert(e.clone());
1368                    *top_counts.entry(e.clone()).or_default() += 1;
1369                }
1370                _ => {
1371                    nulls += 1;
1372                }
1373            }
1374        }
1375
1376        let inferred_type = if !values.is_empty() {
1377            "number"
1378        } else if !distinct_set.is_empty() {
1379            "text"
1380        } else {
1381            "unknown"
1382        }
1383        .to_string();
1384
1385        let min = values.iter().cloned().reduce(f64::min);
1386        let max = values.iter().cloned().reduce(f64::max);
1387        let mean = if values.is_empty() {
1388            None
1389        } else {
1390            Some(values.iter().sum::<f64>() / values.len() as f64)
1391        };
1392
1393        let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
1394        top_values.sort_by(|a, b| b.1.cmp(&a.1));
1395        let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
1396
1397        summaries.push(ColumnTypeSummary {
1398            name: header.clone(),
1399            inferred_type,
1400            nulls,
1401            distinct: distinct_set.len() as u32,
1402            top_values,
1403            min,
1404            max,
1405            mean,
1406        });
1407    }
1408    summaries
1409}
1410
1411#[allow(clippy::too_many_arguments)]
1412fn collect_value_matches(
1413    sheet: &umya_spreadsheet::Worksheet,
1414    sheet_name: &str,
1415    mode: &FindMode,
1416    match_mode: &str,
1417    direction: &LabelDirection,
1418    params: &FindValueParams,
1419    region: Option<&DetectedRegion>,
1420    default_bounds: ((u32, u32), (u32, u32)),
1421) -> Result<Vec<FindValueMatch>> {
1422    let mut results = Vec::new();
1423    let regex = if match_mode == "regex" {
1424        Regex::new(&params.query).ok()
1425    } else {
1426        None
1427    };
1428    let bounds = region
1429        .as_ref()
1430        .and_then(|r| parse_range(&r.bounds))
1431        .unwrap_or(default_bounds);
1432
1433    let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
1434
1435    for cell in sheet.get_cell_collection() {
1436        let coord = cell.get_coordinate();
1437        let col = *coord.get_col_num();
1438        let row = *coord.get_row_num();
1439        if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
1440            continue;
1441        }
1442        if params.search_headers_only && row != header_row {
1443            continue;
1444        }
1445
1446        let value = cell_to_value(cell);
1447        if let Some(ref allowed) = params.value_types
1448            && !value_type_matches(&value, allowed)
1449        {
1450            continue;
1451        }
1452        if matches!(mode, FindMode::Value) {
1453            if !value_matches(
1454                &value,
1455                &params.query,
1456                match_mode,
1457                params.case_sensitive,
1458                &regex,
1459            ) {
1460                continue;
1461            }
1462        } else if let Some(label) = &params.label {
1463            if !label_matches(cell, label, match_mode, params.case_sensitive, &regex) {
1464                continue;
1465            }
1466        } else {
1467            continue;
1468        }
1469
1470        let neighbors = collect_neighbors(sheet, row, col);
1471        let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
1472            let target_value = match direction {
1473                LabelDirection::Right => sheet.get_cell((col + 1, row)),
1474                LabelDirection::Below => sheet.get_cell((col, row + 1)),
1475                LabelDirection::Any => sheet
1476                    .get_cell((col + 1, row))
1477                    .or_else(|| sheet.get_cell((col, row + 1))),
1478            }
1479            .and_then(cell_to_value);
1480            if target_value.is_none() {
1481                continue;
1482            }
1483            (
1484                Some(LabelHit {
1485                    label_address: coord.get_coordinate(),
1486                    label: label_from_cell(cell),
1487                }),
1488                target_value,
1489            )
1490        } else {
1491            (None, value.clone())
1492        };
1493
1494        let row_context = build_row_context(sheet, row, col);
1495
1496        results.push(FindValueMatch {
1497            address: coord.get_coordinate(),
1498            sheet_name: sheet_name.to_string(),
1499            value: match_value,
1500            row_context,
1501            neighbors,
1502            label_hit,
1503        });
1504    }
1505
1506    Ok(results)
1507}
1508
1509fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
1510    cell_to_value(cell)
1511        .map(|v| match v {
1512            CellValue::Text(s) => s,
1513            CellValue::Number(n) => n.to_string(),
1514            CellValue::Bool(b) => b.to_string(),
1515            CellValue::Date(d) => d,
1516            CellValue::Error(e) => e,
1517        })
1518        .unwrap_or_else(|| cell.get_value().to_string())
1519}
1520
1521fn value_matches(
1522    value: &Option<CellValue>,
1523    query: &str,
1524    mode: &str,
1525    case_sensitive: bool,
1526    regex: &Option<Regex>,
1527) -> bool {
1528    if value.is_none() {
1529        return false;
1530    }
1531    let haystack = cell_value_to_string_lower(value.clone().unwrap());
1532    let needle = if case_sensitive {
1533        query.to_string()
1534    } else {
1535        query.to_ascii_lowercase()
1536    };
1537
1538    match mode {
1539        "exact" => haystack == needle,
1540        "regex" => regex
1541            .as_ref()
1542            .map(|re| re.is_match(&haystack))
1543            .unwrap_or(false),
1544        _ => haystack.contains(&needle),
1545    }
1546}
1547
1548fn label_matches(
1549    cell: &umya_spreadsheet::Cell,
1550    label: &str,
1551    mode: &str,
1552    case_sensitive: bool,
1553    regex: &Option<Regex>,
1554) -> bool {
1555    let value = cell_to_value(cell);
1556    if value.is_none() {
1557        return false;
1558    }
1559    let haystack = cell_value_to_string_lower(value.unwrap());
1560    let needle = if case_sensitive {
1561        label.to_string()
1562    } else {
1563        label.to_ascii_lowercase()
1564    };
1565    match mode {
1566        "exact" => haystack == needle,
1567        "regex" => regex
1568            .as_ref()
1569            .map(|re| re.is_match(&haystack))
1570            .unwrap_or(false),
1571        _ => haystack.contains(&needle),
1572    }
1573}
1574
1575fn value_type_matches(value: &Option<CellValue>, allowed: &[String]) -> bool {
1576    if value.is_none() {
1577        return allowed.iter().any(|v| v == "null");
1578    }
1579    match value.as_ref().unwrap() {
1580        CellValue::Text(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("text")),
1581        CellValue::Number(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("number")),
1582        CellValue::Bool(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("bool")),
1583        CellValue::Date(_) => allowed.iter().any(|v| v.eq_ignore_ascii_case("date")),
1584        CellValue::Error(_) => true,
1585    }
1586}
1587
1588fn collect_neighbors(
1589    sheet: &umya_spreadsheet::Worksheet,
1590    row: u32,
1591    col: u32,
1592) -> Option<NeighborValues> {
1593    Some(NeighborValues {
1594        left: if col > 1 {
1595            sheet.get_cell((col - 1, row)).and_then(cell_to_value)
1596        } else {
1597            None
1598        },
1599        right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
1600        up: if row > 1 {
1601            sheet.get_cell((col, row - 1)).and_then(cell_to_value)
1602        } else {
1603            None
1604        },
1605        down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
1606    })
1607}
1608
1609fn build_row_context(
1610    sheet: &umya_spreadsheet::Worksheet,
1611    row: u32,
1612    col: u32,
1613) -> Option<RowContext> {
1614    let header_value = sheet
1615        .get_cell((col, 1u32))
1616        .and_then(cell_to_value)
1617        .map(|v| match v {
1618            CellValue::Text(s) => s,
1619            CellValue::Number(n) => n.to_string(),
1620            CellValue::Bool(b) => b.to_string(),
1621            CellValue::Date(d) => d,
1622            CellValue::Error(e) => e,
1623        })
1624        .unwrap_or_else(|| format!("Col{}", col));
1625    let value = sheet.get_cell((col, row)).and_then(cell_to_value);
1626    Some(RowContext {
1627        headers: vec![header_value],
1628        values: vec![value],
1629    })
1630}
1631
1632fn default_find_formula_limit() -> u32 {
1633    50
1634}
1635
1636#[derive(Debug, Deserialize, JsonSchema, Default)]
1637pub struct FindFormulaParams {
1638    #[serde(alias = "workbook_id")]
1639    pub workbook_or_fork_id: WorkbookId,
1640    pub query: String,
1641    pub sheet_name: Option<String>,
1642    #[serde(default)]
1643    pub case_sensitive: bool,
1644    #[serde(default)]
1645    pub include_context: bool,
1646    #[serde(default = "default_find_formula_limit")]
1647    pub limit: u32,
1648    #[serde(default)]
1649    pub offset: u32,
1650}
1651
1652pub async fn find_formula(
1653    state: Arc<AppState>,
1654    params: FindFormulaParams,
1655) -> Result<FindFormulaResponse> {
1656    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1657    let query = if params.case_sensitive {
1658        params.query.clone()
1659    } else {
1660        params.query.to_ascii_lowercase()
1661    };
1662
1663    let sheet_names: Vec<String> = if let Some(sheet) = &params.sheet_name {
1664        vec![sheet.clone()]
1665    } else {
1666        workbook.sheet_names()
1667    };
1668
1669    let limit = params.limit.clamp(1, 500);
1670    let offset = params.offset;
1671
1672    let mut matches = Vec::new();
1673    let mut seen: u32 = 0;
1674    let mut truncated = false;
1675
1676    for sheet_name in sheet_names {
1677        let (sheet_matches, sheet_seen, sheet_truncated) =
1678            workbook.with_sheet(&sheet_name, |sheet| {
1679                collect_formula_matches(
1680                    sheet,
1681                    &sheet_name,
1682                    &query,
1683                    params.case_sensitive,
1684                    params.include_context,
1685                    offset,
1686                    limit,
1687                    seen,
1688                )
1689            })?;
1690
1691        seen = sheet_seen;
1692        truncated |= sheet_truncated;
1693        matches.extend(sheet_matches);
1694
1695        if truncated {
1696            break;
1697        }
1698    }
1699
1700    let response = FindFormulaResponse {
1701        workbook_id: workbook.id.clone(),
1702        workbook_short_id: workbook.short_id.clone(),
1703        matches,
1704        truncated,
1705        next_offset: truncated.then(|| offset.saturating_add(limit)),
1706    };
1707    Ok(response)
1708}
1709
1710#[derive(Debug, Deserialize, JsonSchema)]
1711pub struct ScanVolatilesParams {
1712    #[serde(alias = "workbook_id")]
1713    pub workbook_or_fork_id: WorkbookId,
1714    pub sheet_name: Option<String>,
1715}
1716
1717pub async fn scan_volatiles(
1718    state: Arc<AppState>,
1719    params: ScanVolatilesParams,
1720) -> Result<VolatileScanResponse> {
1721    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1722    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
1723        vec![sheet.clone()]
1724    } else {
1725        workbook.sheet_names()
1726    };
1727
1728    let mut items = Vec::new();
1729    let mut truncated = false;
1730
1731    for sheet_name in target_sheets {
1732        let graph = workbook.formula_graph(&sheet_name)?;
1733        for group in graph.groups() {
1734            if !group.is_volatile {
1735                continue;
1736            }
1737            for address in group.addresses.iter().take(50) {
1738                items.push(VolatileScanEntry {
1739                    address: address.clone(),
1740                    sheet_name: sheet_name.clone(),
1741                    function: "volatile".to_string(),
1742                    note: Some(group.formula.clone()),
1743                });
1744            }
1745            if group.addresses.len() > 50 {
1746                truncated = true;
1747            }
1748        }
1749    }
1750
1751    let response = VolatileScanResponse {
1752        workbook_id: workbook.id.clone(),
1753        workbook_short_id: workbook.short_id.clone(),
1754        items,
1755        truncated,
1756    };
1757    Ok(response)
1758}
1759
1760#[derive(Debug, Deserialize, JsonSchema)]
1761pub struct WorkbookStyleSummaryParams {
1762    #[serde(alias = "workbook_id")]
1763    pub workbook_or_fork_id: WorkbookId,
1764    pub max_styles: Option<u32>,
1765    pub max_conditional_formats: Option<u32>,
1766    pub max_cells_scan: Option<u32>,
1767}
1768
1769#[derive(Debug)]
1770struct WorkbookStyleAccum {
1771    descriptor: StyleDescriptor,
1772    occurrences: u32,
1773    tags: HashSet<String>,
1774    example_cells: Vec<String>,
1775}
1776
1777impl WorkbookStyleAccum {
1778    fn new(descriptor: StyleDescriptor) -> Self {
1779        Self {
1780            descriptor,
1781            occurrences: 0,
1782            tags: HashSet::new(),
1783            example_cells: Vec::new(),
1784        }
1785    }
1786}
1787
1788pub async fn workbook_style_summary(
1789    state: Arc<AppState>,
1790    params: WorkbookStyleSummaryParams,
1791) -> Result<WorkbookStyleSummaryResponse> {
1792    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1793    let sheet_names = workbook.sheet_names();
1794
1795    const STYLE_EXAMPLE_LIMIT: usize = 5;
1796    const STYLE_LIMIT_DEFAULT: usize = 200;
1797    const CF_LIMIT_DEFAULT: usize = 200;
1798    const CELL_SCAN_LIMIT_DEFAULT: usize = 500_000;
1799
1800    let style_limit = params
1801        .max_styles
1802        .map(|v| v as usize)
1803        .unwrap_or(STYLE_LIMIT_DEFAULT);
1804    let cf_limit = params
1805        .max_conditional_formats
1806        .map(|v| v as usize)
1807        .unwrap_or(CF_LIMIT_DEFAULT);
1808    let cell_scan_limit = params
1809        .max_cells_scan
1810        .map(|v| v as usize)
1811        .unwrap_or(CELL_SCAN_LIMIT_DEFAULT);
1812
1813    let mut acc: HashMap<String, WorkbookStyleAccum> = HashMap::new();
1814    let mut scanned_cells: usize = 0;
1815    let mut scan_truncated = false;
1816
1817    for sheet_name in &sheet_names {
1818        if scan_truncated {
1819            break;
1820        }
1821        workbook.with_sheet(sheet_name, |sheet| {
1822            for cell in sheet.get_cell_collection() {
1823                if scanned_cells >= cell_scan_limit {
1824                    scan_truncated = true;
1825                    break;
1826                }
1827                scanned_cells += 1;
1828
1829                let address = cell.get_coordinate().get_coordinate().to_string();
1830                let descriptor = crate::styles::descriptor_from_style(cell.get_style());
1831                let style_id = crate::styles::stable_style_id(&descriptor);
1832
1833                let entry = acc
1834                    .entry(style_id.clone())
1835                    .or_insert_with(|| WorkbookStyleAccum::new(descriptor.clone()));
1836                entry.occurrences += 1;
1837                if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
1838                    entry.example_cells.push(format!("{sheet_name}!{address}"));
1839                }
1840
1841                if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
1842                    for tag in tagging.tags {
1843                        entry.tags.insert(tag);
1844                    }
1845                }
1846            }
1847        })?;
1848    }
1849
1850    let total_styles = acc.len() as u32;
1851    let mut styles: Vec<WorkbookStyleUsage> = acc
1852        .into_iter()
1853        .map(|(style_id, entry)| {
1854            let mut tags: Vec<String> = entry.tags.into_iter().collect();
1855            tags.sort();
1856            WorkbookStyleUsage {
1857                style_id,
1858                occurrences: entry.occurrences,
1859                tags,
1860                example_cells: entry.example_cells,
1861                descriptor: Some(entry.descriptor),
1862            }
1863        })
1864        .collect();
1865
1866    styles.sort_by(|a, b| {
1867        b.occurrences
1868            .cmp(&a.occurrences)
1869            .then_with(|| a.style_id.cmp(&b.style_id))
1870    });
1871
1872    let inferred_default_style_id = styles.first().map(|s| s.style_id.clone());
1873    let mut inferred_default_font = styles
1874        .first()
1875        .and_then(|s| s.descriptor.as_ref().and_then(|d| d.font.clone()));
1876
1877    let styles_truncated = if styles.len() > style_limit {
1878        styles.truncate(style_limit);
1879        true
1880    } else {
1881        false
1882    };
1883
1884    let theme = workbook.with_spreadsheet(|book| {
1885        let theme = book.get_theme();
1886        let elements = theme.get_theme_elements();
1887        let scheme = elements.get_color_scheme();
1888        let mut colors = BTreeMap::new();
1889
1890        let mut insert_color = |name: &str, value: String| {
1891            if !value.trim().is_empty() {
1892                colors.insert(name.to_string(), value);
1893            }
1894        };
1895
1896        insert_color("dk1", scheme.get_dk1().get_val());
1897        insert_color("lt1", scheme.get_lt1().get_val());
1898        insert_color("dk2", scheme.get_dk2().get_val());
1899        insert_color("lt2", scheme.get_lt2().get_val());
1900        insert_color("accent1", scheme.get_accent1().get_val());
1901        insert_color("accent2", scheme.get_accent2().get_val());
1902        insert_color("accent3", scheme.get_accent3().get_val());
1903        insert_color("accent4", scheme.get_accent4().get_val());
1904        insert_color("accent5", scheme.get_accent5().get_val());
1905        insert_color("accent6", scheme.get_accent6().get_val());
1906        insert_color("hlink", scheme.get_hlink().get_val());
1907        insert_color("fol_hlink", scheme.get_fol_hlink().get_val());
1908
1909        let font_scheme = elements.get_font_scheme();
1910        let major = font_scheme.get_major_font();
1911        let minor = font_scheme.get_minor_font();
1912        let font_scheme_summary = ThemeFontSchemeSummary {
1913            major_latin: Some(major.get_latin_font().get_typeface().to_string())
1914                .filter(|s| !s.trim().is_empty()),
1915            major_east_asian: Some(major.get_east_asian_font().get_typeface().to_string())
1916                .filter(|s| !s.trim().is_empty()),
1917            major_complex_script: Some(major.get_complex_script_font().get_typeface().to_string())
1918                .filter(|s| !s.trim().is_empty()),
1919            minor_latin: Some(minor.get_latin_font().get_typeface().to_string())
1920                .filter(|s| !s.trim().is_empty()),
1921            minor_east_asian: Some(minor.get_east_asian_font().get_typeface().to_string())
1922                .filter(|s| !s.trim().is_empty()),
1923            minor_complex_script: Some(minor.get_complex_script_font().get_typeface().to_string())
1924                .filter(|s| !s.trim().is_empty()),
1925        };
1926
1927        ThemeSummary {
1928            name: Some(theme.get_name().to_string()).filter(|s| !s.trim().is_empty()),
1929            colors,
1930            font_scheme: font_scheme_summary,
1931        }
1932    })?;
1933
1934    if inferred_default_font.is_none()
1935        && let Some(name) = theme
1936            .font_scheme
1937            .minor_latin
1938            .clone()
1939            .or_else(|| theme.font_scheme.major_latin.clone())
1940    {
1941        inferred_default_font = Some(FontDescriptor {
1942            name: Some(name),
1943            size: None,
1944            bold: None,
1945            italic: None,
1946            underline: None,
1947            strikethrough: None,
1948            color: None,
1949        });
1950    }
1951
1952    let mut conditional_formats: Vec<ConditionalFormatSummary> = Vec::new();
1953    let mut conditional_formats_truncated = false;
1954    {
1955        use umya_spreadsheet::structs::EnumTrait;
1956        for sheet_name in &sheet_names {
1957            if conditional_formats_truncated {
1958                break;
1959            }
1960            workbook.with_sheet(sheet_name, |sheet| {
1961                for cf in sheet.get_conditional_formatting_collection() {
1962                    if conditional_formats.len() >= cf_limit {
1963                        conditional_formats_truncated = true;
1964                        break;
1965                    }
1966                    let range = cf.get_sequence_of_references().get_sqref().to_string();
1967                    let mut types: HashSet<String> = HashSet::new();
1968                    for rule in cf.get_conditional_collection() {
1969                        types.insert(rule.get_type().get_value_string().to_string());
1970                    }
1971                    let mut rule_types: Vec<String> = types.into_iter().collect();
1972                    rule_types.sort();
1973                    conditional_formats.push(ConditionalFormatSummary {
1974                        sheet_name: sheet_name.clone(),
1975                        range,
1976                        rule_types,
1977                        rule_count: cf.get_conditional_collection().len() as u32,
1978                    });
1979                }
1980            })?;
1981        }
1982    }
1983
1984    let mut notes: Vec<String> = Vec::new();
1985    if scan_truncated {
1986        notes.push(format!(
1987            "Stopped scanning after {cell_scan_limit} cells; style counts may be incomplete."
1988        ));
1989    }
1990    notes.push(
1991        "Named styles are not directly exposed by umya-spreadsheet; styles here are inferred from cell formatting."
1992            .to_string(),
1993    );
1994
1995    Ok(WorkbookStyleSummaryResponse {
1996        workbook_id: workbook.id.clone(),
1997        workbook_short_id: workbook.short_id.clone(),
1998        theme: Some(theme),
1999        inferred_default_style_id,
2000        inferred_default_font,
2001        styles,
2002        total_styles,
2003        styles_truncated,
2004        conditional_formats,
2005        conditional_formats_truncated,
2006        scan_truncated,
2007        notes,
2008    })
2009}
2010
2011#[derive(Debug, Deserialize, JsonSchema)]
2012pub struct SheetStylesParams {
2013    #[serde(alias = "workbook_id")]
2014    pub workbook_or_fork_id: WorkbookId,
2015    pub sheet_name: String,
2016    #[serde(default)]
2017    pub scope: Option<SheetStylesScope>,
2018    #[serde(default)]
2019    pub granularity: Option<String>,
2020    #[serde(default)]
2021    pub max_items: Option<usize>,
2022}
2023
2024#[derive(Debug, Deserialize, JsonSchema)]
2025#[serde(tag = "kind", rename_all = "snake_case")]
2026pub enum SheetStylesScope {
2027    Range { range: String },
2028    Region { region_id: u32 },
2029}
2030
2031#[derive(Debug)]
2032struct StyleAccum {
2033    descriptor: StyleDescriptor,
2034    occurrences: u32,
2035    tags: HashSet<String>,
2036    example_cells: Vec<String>,
2037    positions: Vec<(u32, u32)>,
2038}
2039
2040impl StyleAccum {
2041    fn new(descriptor: StyleDescriptor) -> Self {
2042        Self {
2043            descriptor,
2044            occurrences: 0,
2045            tags: HashSet::new(),
2046            example_cells: Vec::new(),
2047            positions: Vec::new(),
2048        }
2049    }
2050}
2051
2052pub async fn sheet_styles(
2053    state: Arc<AppState>,
2054    params: SheetStylesParams,
2055) -> Result<SheetStylesResponse> {
2056    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2057    const STYLE_EXAMPLE_LIMIT: usize = 5;
2058    const STYLE_RANGE_LIMIT: usize = 50;
2059    const STYLE_LIMIT: usize = 200;
2060    const MAX_MAX_ITEMS: usize = 5000;
2061
2062    let metrics = workbook.get_sheet_metrics(&params.sheet_name)?;
2063    let full_bounds = (
2064        (1, 1),
2065        (
2066            metrics.metrics.column_count.max(1),
2067            metrics.metrics.row_count.max(1),
2068        ),
2069    );
2070
2071    let bounds = match &params.scope {
2072        Some(SheetStylesScope::Range { range }) => {
2073            parse_range(range).ok_or_else(|| anyhow!("invalid range: {}", range))?
2074        }
2075        Some(SheetStylesScope::Region { region_id }) => {
2076            let region = workbook.detected_region(&params.sheet_name, *region_id)?;
2077            parse_range(&region.bounds)
2078                .ok_or_else(|| anyhow!("invalid region bounds: {}", region.bounds))?
2079        }
2080        None => full_bounds,
2081    };
2082
2083    let granularity = params
2084        .granularity
2085        .as_deref()
2086        .unwrap_or("runs")
2087        .to_ascii_lowercase();
2088    if granularity != "runs" && granularity != "cells" {
2089        return Err(anyhow!(
2090            "invalid granularity: {} (expected runs|cells)",
2091            granularity
2092        ));
2093    }
2094
2095    let max_items = params
2096        .max_items
2097        .unwrap_or(STYLE_RANGE_LIMIT)
2098        .clamp(1, MAX_MAX_ITEMS);
2099
2100    let (styles, total_styles, styles_truncated) =
2101        workbook.with_sheet(&params.sheet_name, |sheet| {
2102            let mut acc: HashMap<String, StyleAccum> = HashMap::new();
2103
2104            for cell in sheet.get_cell_collection() {
2105                let address = cell.get_coordinate().get_coordinate().to_string();
2106                let Some((col, row)) = parse_address(&address) else {
2107                    continue;
2108                };
2109                if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
2110                    continue;
2111                }
2112
2113                let descriptor = crate::styles::descriptor_from_style(cell.get_style());
2114                let style_id = crate::styles::stable_style_id(&descriptor);
2115
2116                let entry = acc
2117                    .entry(style_id.clone())
2118                    .or_insert_with(|| StyleAccum::new(descriptor.clone()));
2119                entry.occurrences += 1;
2120                if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
2121                    entry.example_cells.push(address.clone());
2122                }
2123
2124                if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
2125                    for tag in tagging.tags {
2126                        entry.tags.insert(tag);
2127                    }
2128                }
2129
2130                entry.positions.push((row, col));
2131            }
2132
2133            let mut summaries: Vec<StyleSummary> = acc
2134                .into_iter()
2135                .map(|(style_id, mut entry)| {
2136                    entry.positions.sort_unstable();
2137                    entry.positions.dedup();
2138
2139                    let (cell_ranges, ranges_truncated) = if granularity == "cells" {
2140                        let mut out = Vec::new();
2141                        for (row, col) in entry.positions.iter().take(max_items) {
2142                            out.push(crate::utils::cell_address(*col, *row));
2143                        }
2144                        (out, entry.positions.len() > max_items)
2145                    } else {
2146                        crate::styles::compress_positions_to_ranges(&entry.positions, max_items)
2147                    };
2148
2149                    StyleSummary {
2150                        style_id,
2151                        occurrences: entry.occurrences,
2152                        tags: entry.tags.into_iter().collect(),
2153                        example_cells: entry.example_cells,
2154                        descriptor: Some(entry.descriptor),
2155                        cell_ranges,
2156                        ranges_truncated,
2157                    }
2158                })
2159                .collect();
2160
2161            summaries.sort_by(|a, b| {
2162                b.occurrences
2163                    .cmp(&a.occurrences)
2164                    .then_with(|| a.style_id.cmp(&b.style_id))
2165            });
2166
2167            let total = summaries.len() as u32;
2168            let truncated = if summaries.len() > STYLE_LIMIT {
2169                summaries.truncate(STYLE_LIMIT);
2170                true
2171            } else {
2172                false
2173            };
2174
2175            Ok::<_, anyhow::Error>((summaries, total, truncated))
2176        })??;
2177
2178    Ok(SheetStylesResponse {
2179        workbook_id: workbook.id.clone(),
2180        workbook_short_id: workbook.short_id.clone(),
2181        sheet_name: params.sheet_name.clone(),
2182        styles,
2183        conditional_rules: Vec::new(),
2184        total_styles,
2185        styles_truncated,
2186    })
2187}
2188
2189pub async fn range_values(
2190    state: Arc<AppState>,
2191    params: RangeValuesParams,
2192) -> Result<RangeValuesResponse> {
2193    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2194    let include_headers = params.include_headers.unwrap_or(false);
2195    let values = workbook.with_sheet(&params.sheet_name, |sheet| {
2196        params
2197            .ranges
2198            .iter()
2199            .filter_map(|range| {
2200                parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
2201                    let mut rows = Vec::new();
2202                    for r in start_row..=end_row {
2203                        let mut row_vals = Vec::new();
2204                        for c in start_col..=end_col {
2205                            if include_headers && r == start_row && start_row == 1 {
2206                                row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
2207                            } else {
2208                                row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
2209                            }
2210                        }
2211                        rows.push(row_vals);
2212                    }
2213                    RangeValuesEntry {
2214                        range: range.clone(),
2215                        rows,
2216                    }
2217                })
2218            })
2219            .collect()
2220    })?;
2221
2222    Ok(RangeValuesResponse {
2223        workbook_id: workbook.id.clone(),
2224        workbook_short_id: workbook.short_id.clone(),
2225        sheet_name: params.sheet_name,
2226        values,
2227    })
2228}
2229
2230pub async fn find_value(
2231    state: Arc<AppState>,
2232    params: FindValueParams,
2233) -> Result<FindValueResponse> {
2234    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2235    let mut matches = Vec::new();
2236    let mut truncated = false;
2237    let mode = params.mode.clone().unwrap_or_else(|| {
2238        if params.label.is_some() {
2239            FindMode::Label
2240        } else {
2241            FindMode::Value
2242        }
2243    });
2244    let match_mode = params
2245        .match_mode
2246        .as_deref()
2247        .unwrap_or("contains")
2248        .to_ascii_lowercase();
2249    let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
2250
2251    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
2252        vec![sheet.clone()]
2253    } else {
2254        workbook.sheet_names()
2255    };
2256
2257    for sheet_name in target_sheets {
2258        let metrics_entry = workbook.get_sheet_metrics(&sheet_name)?;
2259        let default_bounds = (
2260            (1, 1),
2261            (
2262                metrics_entry.metrics.column_count.max(1),
2263                metrics_entry.metrics.row_count.max(1),
2264            ),
2265        );
2266        let region_bounds = params
2267            .region_id
2268            .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
2269        let sheet_matches = workbook.with_sheet(&sheet_name, |sheet| {
2270            collect_value_matches(
2271                sheet,
2272                &sheet_name,
2273                &mode,
2274                &match_mode,
2275                &direction,
2276                &params,
2277                region_bounds.as_ref(),
2278                default_bounds,
2279            )
2280        })??;
2281        matches.extend(sheet_matches);
2282        if matches.len() as u32 >= params.limit {
2283            truncated = true;
2284            matches.truncate(params.limit as usize);
2285            break;
2286        }
2287    }
2288
2289    Ok(FindValueResponse {
2290        workbook_id: workbook.id.clone(),
2291        workbook_short_id: workbook.short_id.clone(),
2292        matches,
2293        truncated,
2294    })
2295}
2296
2297pub async fn read_table(
2298    state: Arc<AppState>,
2299    params: ReadTableParams,
2300) -> Result<ReadTableResponse> {
2301    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2302    let resolved = resolve_table_target(&workbook, &params)?;
2303    let limit = params.limit.unwrap_or(100) as usize;
2304    let offset = params.offset.unwrap_or(0) as usize;
2305    let sample_mode = params
2306        .sample_mode
2307        .clone()
2308        .unwrap_or_else(|| "first".to_string());
2309
2310    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
2311        extract_table_rows(
2312            sheet,
2313            &resolved,
2314            params.header_row,
2315            params.header_rows,
2316            params.columns.clone(),
2317            params.filters.clone(),
2318            limit,
2319            offset,
2320            &sample_mode,
2321        )
2322    })??;
2323
2324    let has_more = offset + rows.len() < total_rows as usize;
2325
2326    Ok(ReadTableResponse {
2327        workbook_id: workbook.id.clone(),
2328        workbook_short_id: workbook.short_id.clone(),
2329        sheet_name: resolved.sheet_name,
2330        table_name: resolved.table_name,
2331        headers,
2332        rows,
2333        total_rows,
2334        has_more,
2335    })
2336}
2337
2338pub async fn table_profile(
2339    state: Arc<AppState>,
2340    params: TableProfileParams,
2341) -> Result<TableProfileResponse> {
2342    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2343    let resolved = resolve_table_target(
2344        &workbook,
2345        &ReadTableParams {
2346            workbook_or_fork_id: params.workbook_or_fork_id.clone(),
2347            sheet_name: params.sheet_name.clone(),
2348            table_name: params.table_name.clone(),
2349            region_id: params.region_id,
2350            range: None,
2351            header_row: None,
2352            header_rows: None,
2353            columns: None,
2354            filters: None,
2355            sample_mode: params.sample_mode.clone(),
2356            limit: params.sample_size,
2357            offset: Some(0),
2358        },
2359    )?;
2360
2361    let sample_size = params.sample_size.unwrap_or(10) as usize;
2362    let sample_mode = params
2363        .sample_mode
2364        .clone()
2365        .unwrap_or_else(|| "distributed".to_string());
2366
2367    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
2368        extract_table_rows(
2369            sheet,
2370            &resolved,
2371            None,
2372            None,
2373            None,
2374            None,
2375            sample_size,
2376            0,
2377            &sample_mode,
2378        )
2379    })??;
2380
2381    let column_types = summarize_columns(&headers, &rows);
2382
2383    Ok(TableProfileResponse {
2384        workbook_id: workbook.id.clone(),
2385        workbook_short_id: workbook.short_id.clone(),
2386        sheet_name: resolved.sheet_name,
2387        table_name: resolved.table_name,
2388        headers,
2389        column_types,
2390        row_count: total_rows,
2391        samples: rows,
2392        notes: Vec::new(),
2393    })
2394}
2395
2396#[derive(Debug, Deserialize, JsonSchema)]
2397pub struct ManifestStubParams {
2398    #[serde(alias = "workbook_id")]
2399    pub workbook_or_fork_id: WorkbookId,
2400    pub sheet_filter: Option<String>,
2401}
2402
2403pub async fn get_manifest_stub(
2404    state: Arc<AppState>,
2405    params: ManifestStubParams,
2406) -> Result<ManifestStubResponse> {
2407    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2408    let mut summaries = workbook.list_summaries()?;
2409
2410    if let Some(filter) = &params.sheet_filter {
2411        summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
2412    }
2413
2414    let sheets = summaries
2415        .into_iter()
2416        .map(|summary| ManifestSheetStub {
2417            sheet_name: summary.name.clone(),
2418            classification: summary.classification.clone(),
2419            candidate_expectations: vec![format!(
2420                "Review {} sheet for expectation candidates",
2421                format!("{:?}", summary.classification).to_ascii_lowercase()
2422            )],
2423            notes: summary.style_tags,
2424        })
2425        .collect();
2426
2427    let response = ManifestStubResponse {
2428        workbook_id: workbook.id.clone(),
2429        workbook_short_id: workbook.short_id.clone(),
2430        slug: workbook.slug.clone(),
2431        sheets,
2432    };
2433    Ok(response)
2434}
2435
2436#[derive(Debug, Deserialize, JsonSchema)]
2437pub struct CloseWorkbookParams {
2438    #[serde(alias = "workbook_id")]
2439    pub workbook_or_fork_id: WorkbookId,
2440}
2441
2442pub async fn close_workbook(
2443    state: Arc<AppState>,
2444    params: CloseWorkbookParams,
2445) -> Result<CloseWorkbookResponse> {
2446    state.close_workbook(&params.workbook_or_fork_id)?;
2447    Ok(CloseWorkbookResponse {
2448        workbook_id: params.workbook_or_fork_id.clone(),
2449        message: format!("workbook {} evicted", params.workbook_or_fork_id.as_str()),
2450    })
2451}
2452#[allow(clippy::too_many_arguments)]
2453fn collect_formula_matches(
2454    sheet: &umya_spreadsheet::Worksheet,
2455    sheet_name: &str,
2456    query: &str,
2457    case_sensitive: bool,
2458    include_context: bool,
2459    offset: u32,
2460    limit: u32,
2461    seen_so_far: u32,
2462) -> (Vec<FindFormulaMatch>, u32, bool) {
2463    use crate::workbook::cell_to_value;
2464
2465    let mut results = Vec::new();
2466    let mut seen = seen_so_far;
2467
2468    for cell in sheet.get_cell_collection() {
2469        if !cell.is_formula() {
2470            continue;
2471        }
2472        let formula = cell.get_formula();
2473        let haystack = if case_sensitive {
2474            formula.to_string()
2475        } else {
2476            formula.to_ascii_lowercase()
2477        };
2478        if !haystack.contains(query) {
2479            continue;
2480        }
2481
2482        if seen < offset {
2483            seen += 1;
2484            continue;
2485        }
2486
2487        if results.len() as u32 >= limit {
2488            return (results, seen, true);
2489        }
2490
2491        let coord = cell.get_coordinate();
2492        let column = *coord.get_col_num();
2493        let row = *coord.get_row_num();
2494
2495        let context = if include_context {
2496            let columns = vec![column];
2497            let context_row = build_row_snapshot(sheet, row, &columns, true, false);
2498            let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
2499            vec![header_row, context_row]
2500        } else {
2501            Vec::new()
2502        };
2503
2504        results.push(FindFormulaMatch {
2505            address: coord.get_coordinate(),
2506            sheet_name: sheet_name.to_string(),
2507            formula: formula.to_string(),
2508            cached_value: cell_to_value(cell),
2509            context,
2510        });
2511
2512        seen += 1;
2513    }
2514
2515    (results, seen, false)
2516}
2517
2518#[derive(Clone)]
2519struct TraceFormulaInfo {
2520    fingerprint: String,
2521    formula: String,
2522}
2523
2524#[derive(Clone)]
2525struct TraceEdgeRaw {
2526    from: String,
2527    to: String,
2528    neighbor: String,
2529}
2530
2531#[derive(Clone)]
2532struct LayerLinks {
2533    depth: u32,
2534    edges: Vec<TraceEdgeRaw>,
2535    truncated_cells: Vec<String>,
2536}
2537
2538#[derive(Clone)]
2539struct NeighborDetail {
2540    address: String,
2541    column: Option<u32>,
2542    row: Option<u32>,
2543    kind: TraceCellKind,
2544    value: Option<CellValue>,
2545    formula: Option<String>,
2546    fingerprint: Option<String>,
2547    external: bool,
2548}
2549
2550fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
2551    let mut map = HashMap::new();
2552    for group in graph.groups() {
2553        for address in group.addresses.clone() {
2554            map.insert(
2555                address.to_ascii_uppercase(),
2556                TraceFormulaInfo {
2557                    fingerprint: group.fingerprint.clone(),
2558                    formula: group.formula.clone(),
2559                },
2560            );
2561        }
2562    }
2563    map
2564}
2565
2566struct TraceConfig<'a> {
2567    direction: &'a TraceDirection,
2568    origin: &'a str,
2569    sheet_name: &'a str,
2570    depth_limit: u32,
2571    page_size: usize,
2572}
2573
2574fn build_trace_layers(
2575    workbook: &WorkbookContext,
2576    graph: &FormulaGraph,
2577    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2578    config: &TraceConfig<'_>,
2579    cursor: Option<TraceCursor>,
2580) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
2581    let layer_links =
2582        collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
2583    let mut layers = Vec::new();
2584    let mut next_cursor = None;
2585    let mut notes = Vec::new();
2586    let focus_depth = cursor.as_ref().map(|c| c.depth);
2587
2588    for layer in layer_links {
2589        let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
2590        let offset = cursor
2591            .as_ref()
2592            .filter(|c| c.depth == layer.depth)
2593            .map(|c| c.offset)
2594            .unwrap_or(0);
2595
2596        let mut node_set: HashSet<String> = HashSet::new();
2597        for edge in &layer.edges {
2598            node_set.insert(edge.neighbor.clone());
2599        }
2600        let mut nodes: Vec<String> = node_set.into_iter().collect();
2601        nodes.sort_by(|a, b| compare_addresses(a, b));
2602
2603        let details = workbook.with_sheet(config.sheet_name, |sheet| {
2604            collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
2605        })?;
2606        let total_nodes = details.len();
2607        let start = offset.min(total_nodes);
2608        let end = if produce_edges {
2609            (start + config.page_size).min(total_nodes)
2610        } else {
2611            start
2612        };
2613        let selected_slice = if produce_edges {
2614            &details[start..end]
2615        } else {
2616            &details[0..0]
2617        };
2618        let selected_addresses: HashSet<String> = selected_slice
2619            .iter()
2620            .map(|detail| detail.address.clone())
2621            .collect();
2622
2623        let summary = build_layer_summary(&details);
2624        let range_highlights = build_range_highlights(&details);
2625        let group_highlights = build_formula_group_highlights(&details);
2626        let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
2627
2628        let highlights = TraceLayerHighlights {
2629            top_ranges: range_highlights.clone(),
2630            top_formula_groups: group_highlights.clone(),
2631            notable_cells,
2632        };
2633
2634        let edges = if produce_edges {
2635            build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
2636        } else {
2637            Vec::new()
2638        };
2639
2640        let has_more = produce_edges && end < total_nodes;
2641        if has_more && next_cursor.is_none() {
2642            next_cursor = Some(TraceCursor {
2643                depth: layer.depth,
2644                offset: end,
2645            });
2646        }
2647        if has_more {
2648            notes.push(format!(
2649                "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
2650                layer.depth, end, total_nodes, layer.depth, end
2651            ));
2652        }
2653
2654        if !layer.truncated_cells.is_empty() {
2655            let cell_list = if layer.truncated_cells.len() <= 3 {
2656                layer.truncated_cells.join(", ")
2657            } else {
2658                format!(
2659                    "{}, ... ({} more)",
2660                    layer.truncated_cells[..3].join(", "),
2661                    layer.truncated_cells.len() - 3
2662                )
2663            };
2664            notes.push(format!(
2665                "Layer {}: dependents truncated at {} per cell for: {}",
2666                layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
2667            ));
2668        }
2669
2670        layers.push(TraceLayer {
2671            depth: layer.depth,
2672            summary,
2673            highlights,
2674            edges,
2675            has_more,
2676        });
2677    }
2678
2679    Ok((layers, next_cursor, notes))
2680}
2681
2682fn collect_layer_links(
2683    graph: &FormulaGraph,
2684    direction: &TraceDirection,
2685    origin: &str,
2686    depth_limit: u32,
2687) -> Vec<LayerLinks> {
2688    let mut visited: HashSet<String> = HashSet::new();
2689    visited.insert(origin.to_string());
2690    let mut frontier = vec![origin.to_string()];
2691    let mut layers = Vec::new();
2692
2693    for depth in 1..=depth_limit {
2694        let mut next_frontier_set: HashSet<String> = HashSet::new();
2695        let mut edges = Vec::new();
2696        let mut truncated_cells = Vec::new();
2697
2698        for cell in &frontier {
2699            let (neighbors, was_truncated) = match direction {
2700                TraceDirection::Precedents => (graph.precedents(cell), false),
2701                TraceDirection::Dependents => {
2702                    graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
2703                }
2704            };
2705
2706            if was_truncated {
2707                truncated_cells.push(cell.clone());
2708            }
2709
2710            for neighbor in neighbors {
2711                let neighbor_upper = neighbor.to_ascii_uppercase();
2712                let edge = match direction {
2713                    TraceDirection::Precedents => TraceEdgeRaw {
2714                        from: cell.clone(),
2715                        to: neighbor_upper.clone(),
2716                        neighbor: neighbor_upper.clone(),
2717                    },
2718                    TraceDirection::Dependents => TraceEdgeRaw {
2719                        from: neighbor_upper.clone(),
2720                        to: cell.clone(),
2721                        neighbor: neighbor_upper.clone(),
2722                    },
2723                };
2724                edges.push(edge);
2725                if visited.insert(neighbor_upper.clone()) {
2726                    next_frontier_set.insert(neighbor_upper);
2727                }
2728            }
2729        }
2730
2731        if edges.is_empty() {
2732            break;
2733        }
2734
2735        layers.push(LayerLinks {
2736            depth,
2737            edges,
2738            truncated_cells,
2739        });
2740        if next_frontier_set.is_empty() {
2741            break;
2742        }
2743        let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
2744        next_frontier.sort();
2745        frontier = next_frontier;
2746    }
2747
2748    layers
2749}
2750
2751fn collect_neighbor_details(
2752    sheet: &umya_spreadsheet::Worksheet,
2753    current_sheet: &str,
2754    addresses: &[String],
2755    formula_lookup: &HashMap<String, TraceFormulaInfo>,
2756) -> Vec<NeighborDetail> {
2757    let mut details = Vec::new();
2758    for address in addresses {
2759        let (sheet_part, cell_part) = split_sheet_and_cell(address);
2760        let normalized_sheet = sheet_part
2761            .as_ref()
2762            .map(|s| clean_sheet_name(s).to_ascii_lowercase());
2763        let is_external = normalized_sheet
2764            .as_ref()
2765            .map(|s| !s.eq_ignore_ascii_case(current_sheet))
2766            .unwrap_or(false);
2767
2768        let Some(cell_ref) = cell_part else {
2769            details.push(NeighborDetail {
2770                address: address.clone(),
2771                column: None,
2772                row: None,
2773                kind: TraceCellKind::External,
2774                value: None,
2775                formula: None,
2776                fingerprint: None,
2777                external: true,
2778            });
2779            continue;
2780        };
2781
2782        let cell_ref_upper = cell_ref.to_ascii_uppercase();
2783
2784        if is_external {
2785            let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2786            details.push(NeighborDetail {
2787                address: address.clone(),
2788                column: None,
2789                row: None,
2790                kind: TraceCellKind::External,
2791                value: None,
2792                formula: formula_info.map(|info| info.formula.clone()),
2793                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2794                external: true,
2795            });
2796            continue;
2797        }
2798
2799        let Some((col, row)) = parse_address(&cell_ref_upper) else {
2800            details.push(NeighborDetail {
2801                address: address.clone(),
2802                column: None,
2803                row: None,
2804                kind: TraceCellKind::External,
2805                value: None,
2806                formula: None,
2807                fingerprint: None,
2808                external: true,
2809            });
2810            continue;
2811        };
2812
2813        let cell_opt = sheet.get_cell((&col, &row));
2814        let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
2815        if let Some(cell) = cell_opt {
2816            let value = cell_to_value(cell);
2817            let kind = if cell.is_formula() {
2818                TraceCellKind::Formula
2819            } else if value.is_some() {
2820                TraceCellKind::Literal
2821            } else {
2822                TraceCellKind::Blank
2823            };
2824            details.push(NeighborDetail {
2825                address: address.clone(),
2826                column: Some(col),
2827                row: Some(row),
2828                kind,
2829                value,
2830                formula: formula_info.map(|info| info.formula.clone()),
2831                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2832                external: false,
2833            });
2834        } else {
2835            details.push(NeighborDetail {
2836                address: address.clone(),
2837                column: Some(col),
2838                row: Some(row),
2839                kind: TraceCellKind::Blank,
2840                value: None,
2841                formula: formula_info.map(|info| info.formula.clone()),
2842                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
2843                external: false,
2844            });
2845        }
2846    }
2847    details
2848}
2849
2850fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
2851    let mut summary = TraceLayerSummary {
2852        total_nodes: details.len(),
2853        formula_nodes: 0,
2854        value_nodes: 0,
2855        blank_nodes: 0,
2856        external_nodes: 0,
2857        unique_formula_groups: 0,
2858    };
2859
2860    let mut fingerprints: HashSet<String> = HashSet::new();
2861
2862    for detail in details {
2863        match detail.kind {
2864            TraceCellKind::Formula => {
2865                summary.formula_nodes += 1;
2866                if let Some(fp) = &detail.fingerprint {
2867                    fingerprints.insert(fp.clone());
2868                }
2869            }
2870            TraceCellKind::Literal => summary.value_nodes += 1,
2871            TraceCellKind::Blank => summary.blank_nodes += 1,
2872            TraceCellKind::External => summary.external_nodes += 1,
2873        }
2874    }
2875
2876    summary.unique_formula_groups = fingerprints.len();
2877    summary
2878}
2879
2880fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
2881    let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
2882    for detail in details {
2883        if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
2884            let entry = aggregates
2885                .entry(fingerprint.clone())
2886                .or_insert_with(|| (formula.clone(), 0, Vec::new()));
2887            entry.1 += 1;
2888            if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
2889                entry.2.push(detail.address.clone());
2890            }
2891        }
2892    }
2893
2894    let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
2895        .into_iter()
2896        .map(
2897            |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
2898                fingerprint,
2899                formula,
2900                count,
2901                sample_addresses,
2902            },
2903        )
2904        .collect();
2905
2906    highlights.sort_by(|a, b| b.count.cmp(&a.count));
2907    highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
2908    highlights
2909}
2910
2911fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
2912    let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
2913    for detail in details {
2914        if let (Some(col), Some(_row)) = (detail.column, detail.row)
2915            && !detail.external
2916        {
2917            by_column.entry(col).or_default().push(detail);
2918        }
2919    }
2920
2921    for column_entries in by_column.values_mut() {
2922        column_entries.sort_by(|a, b| a.row.cmp(&b.row));
2923    }
2924
2925    let mut ranges = Vec::new();
2926    for entries in by_column.values() {
2927        let mut current: Vec<&NeighborDetail> = Vec::new();
2928        for detail in entries {
2929            if current.is_empty() {
2930                current.push(detail);
2931                continue;
2932            }
2933            let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
2934            if detail.row.unwrap_or(0) == prev_row + 1 {
2935                current.push(detail);
2936            } else {
2937                if current.len() >= TRACE_RANGE_THRESHOLD {
2938                    ranges.push(make_range_highlight(&current));
2939                }
2940                current.clear();
2941                current.push(detail);
2942            }
2943        }
2944        if current.len() >= TRACE_RANGE_THRESHOLD {
2945            ranges.push(make_range_highlight(&current));
2946        }
2947    }
2948
2949    ranges.sort_by(|a, b| b.count.cmp(&a.count));
2950    ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
2951    ranges
2952}
2953
2954fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
2955    let mut literals = 0usize;
2956    let mut formulas = 0usize;
2957    let mut blanks = 0usize;
2958    let mut sample_values = Vec::new();
2959    let mut sample_formulas = Vec::new();
2960    let mut sample_addresses = Vec::new();
2961
2962    for detail in details {
2963        match detail.kind {
2964            TraceCellKind::Formula => {
2965                formulas += 1;
2966                if let Some(formula) = &detail.formula
2967                    && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
2968                    && !sample_formulas.contains(formula)
2969                {
2970                    sample_formulas.push(formula.clone());
2971                }
2972            }
2973            TraceCellKind::Literal => {
2974                literals += 1;
2975                if let Some(value) = &detail.value
2976                    && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
2977                {
2978                    sample_values.push(value.clone());
2979                }
2980            }
2981            TraceCellKind::Blank => blanks += 1,
2982            TraceCellKind::External => {}
2983        }
2984        if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
2985            sample_addresses.push(detail.address.clone());
2986        }
2987    }
2988
2989    TraceRangeHighlight {
2990        start: details
2991            .first()
2992            .map(|d| d.address.clone())
2993            .unwrap_or_default(),
2994        end: details
2995            .last()
2996            .map(|d| d.address.clone())
2997            .unwrap_or_default(),
2998        count: details.len(),
2999        literals,
3000        formulas,
3001        blanks,
3002        sample_values,
3003        sample_formulas,
3004        sample_addresses,
3005    }
3006}
3007
3008fn build_notable_cells(
3009    details: &[NeighborDetail],
3010    ranges: &[TraceRangeHighlight],
3011    groups: &[TraceFormulaGroupHighlight],
3012) -> Vec<TraceCellHighlight> {
3013    let mut exclude: HashSet<String> = HashSet::new();
3014    for range in ranges {
3015        exclude.insert(range.start.clone());
3016        exclude.insert(range.end.clone());
3017        for addr in &range.sample_addresses {
3018            exclude.insert(addr.clone());
3019        }
3020    }
3021    for group in groups {
3022        for addr in &group.sample_addresses {
3023            exclude.insert(addr.clone());
3024        }
3025    }
3026
3027    let mut highlights = Vec::new();
3028    let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
3029
3030    for detail in details {
3031        if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
3032            break;
3033        }
3034        if exclude.contains(&detail.address) {
3035            continue;
3036        }
3037        let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
3038        if *counter >= 2 && detail.kind != TraceCellKind::External {
3039            continue;
3040        }
3041        highlights.push(TraceCellHighlight {
3042            address: detail.address.clone(),
3043            kind: detail.kind.clone(),
3044            value: detail.value.clone(),
3045            formula: detail.formula.clone(),
3046        });
3047        *counter += 1;
3048    }
3049
3050    highlights
3051}
3052
3053fn build_edges_for_layer(
3054    raw_edges: &[TraceEdgeRaw],
3055    selected: &HashSet<String>,
3056    formula_lookup: &HashMap<String, TraceFormulaInfo>,
3057) -> Vec<FormulaTraceEdge> {
3058    let mut edges = Vec::new();
3059    for edge in raw_edges {
3060        if selected.contains(&edge.neighbor) {
3061            let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
3062                .map(|info| info.formula.clone());
3063            edges.push(FormulaTraceEdge {
3064                from: edge.from.clone(),
3065                to: edge.to.clone(),
3066                formula,
3067                note: None,
3068            });
3069        }
3070    }
3071    edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
3072    edges
3073}
3074
3075fn lookup_formula_info<'a>(
3076    lookup: &'a HashMap<String, TraceFormulaInfo>,
3077    cell_ref: &str,
3078    original: &str,
3079) -> Option<&'a TraceFormulaInfo> {
3080    if let Some(info) = lookup.get(cell_ref) {
3081        return Some(info);
3082    }
3083    if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
3084        let upper = cell.to_ascii_uppercase();
3085        return lookup.get(&upper);
3086    }
3087    None
3088}
3089
3090fn compare_addresses(left: &str, right: &str) -> Ordering {
3091    let (sheet_left, cell_left) = split_sheet_and_cell(left);
3092    let (sheet_right, cell_right) = split_sheet_and_cell(right);
3093
3094    let sheet_left_key = sheet_left
3095        .as_ref()
3096        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
3097        .unwrap_or_default();
3098    let sheet_right_key = sheet_right
3099        .as_ref()
3100        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
3101        .unwrap_or_default();
3102
3103    match sheet_left_key.cmp(&sheet_right_key) {
3104        Ordering::Equal => {
3105            let left_core = cell_left.unwrap_or_else(|| left.to_string());
3106            let right_core = cell_right.unwrap_or_else(|| right.to_string());
3107            let left_coords = parse_address(&left_core.to_ascii_uppercase());
3108            let right_coords = parse_address(&right_core.to_ascii_uppercase());
3109            match (left_coords, right_coords) {
3110                (Some((lc, lr)), Some((rc, rr))) => lc
3111                    .cmp(&rc)
3112                    .then_with(|| lr.cmp(&rr))
3113                    .then_with(|| left_core.cmp(&right_core)),
3114                _ => left_core.cmp(&right_core),
3115            }
3116        }
3117        other => other,
3118    }
3119}
3120
3121fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
3122    if let Some(idx) = address.rfind('!') {
3123        let sheet = address[..idx].to_string();
3124        let cell = address[idx + 1..].to_string();
3125        (Some(sheet), Some(cell))
3126    } else {
3127        (None, Some(address.to_string()))
3128    }
3129}
3130
3131fn clean_sheet_name(sheet: &str) -> String {
3132    let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
3133    let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
3134    after_bracket
3135        .trim_matches(|c| c == '\'' || c == '"')
3136        .to_string()
3137}