Skip to main content

spreadsheet_mcp/tools/
mod.rs

1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4pub mod param_enums;
5#[cfg(feature = "recalc")]
6pub mod rules_batch;
7#[cfg(feature = "recalc")]
8pub mod sheet_layout;
9pub mod vba;
10#[cfg(feature = "recalc")]
11pub mod write_normalize;
12
13use crate::analysis::{formula::FormulaGraph, stats};
14use crate::config::OutputProfile;
15use crate::model::*;
16use crate::state::AppState;
17use crate::utils::column_number_to_name;
18use crate::workbook::{WorkbookContext, cell_to_value};
19use anyhow::{Result, anyhow};
20use regex::Regex;
21use schemars::JsonSchema;
22use serde::Deserialize;
23use std::cmp::Ordering;
24use std::collections::{BTreeMap, HashMap, HashSet};
25use std::path::PathBuf;
26use std::sync::Arc;
27
28#[cfg(feature = "recalc")]
29fn fork_recalc_needed(state: &AppState, workbook_or_fork_id: &WorkbookId) -> bool {
30    state
31        .fork_registry()
32        .and_then(|registry| registry.get_fork(workbook_or_fork_id.as_str()).ok())
33        .is_some_and(|ctx| ctx.recalc_needed)
34}
35
36#[cfg(feature = "recalc")]
37fn sheet_has_formula_in_bounds(
38    sheet: &umya_spreadsheet::Worksheet,
39    bounds: &[((u32, u32), (u32, u32))],
40) -> bool {
41    if bounds.is_empty() {
42        return false;
43    }
44    for cell in sheet.get_cell_collection() {
45        if !cell.is_formula() {
46            continue;
47        }
48        let address = cell.get_coordinate().get_coordinate().to_string();
49        let Some((col, row)) = parse_address(&address) else {
50            continue;
51        };
52        if bounds
53            .iter()
54            .any(|b| col >= b.0.0 && col <= b.1.0 && row >= b.0.1 && row <= b.1.1)
55        {
56            return true;
57        }
58    }
59    false
60}
61
62const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
63const TRACE_PAGE_MIN: usize = 5;
64const TRACE_PAGE_MAX: usize = 200;
65const TRACE_RANGE_THRESHOLD: usize = 4;
66const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
67const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
68const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
69const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
70const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
71const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
72const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
73
74const DEFAULT_OVERVIEW_MAX_REGIONS: u32 = 25;
75const DEFAULT_OVERVIEW_MAX_HEADERS: u32 = 50;
76const DEFAULT_OVERVIEW_INCLUDE_HEADERS: bool = true;
77
78const ENTRY_POINT_MAX_ROWS: u32 = 10_000;
79const ENTRY_POINT_MAX_COLS: u32 = 200;
80
81pub async fn list_workbooks(
82    state: Arc<AppState>,
83    params: ListWorkbooksParams,
84) -> Result<WorkbookListResponse> {
85    let config = state.config();
86    let output_profile = config.output_profile();
87    let include_paths = params
88        .include_paths
89        .unwrap_or(!matches!(output_profile, OutputProfile::TokenDense));
90
91    let offset = params.offset.unwrap_or(0) as usize;
92    let limit = params.limit.unwrap_or(100) as usize;
93    let filter = params.into_filter()?;
94    let mut response = state.list_workbooks(filter)?;
95    let total_count = response.workbooks.len();
96
97    if offset < total_count {
98        let end = (offset + limit).min(total_count);
99        response.workbooks = response
100            .workbooks
101            .into_iter()
102            .skip(offset)
103            .take(end - offset)
104            .collect();
105    } else {
106        response.workbooks.clear();
107    }
108
109    // Apply include_paths toggle
110    if !include_paths {
111        for wb in &mut response.workbooks {
112            wb.path = None;
113            wb.caps = None;
114            wb.client_path = None;
115        }
116    } else if !config.path_mappings.is_empty() {
117        for wb in &mut response.workbooks {
118            if let Some(p) = wb.path.as_ref() {
119                let abs = config.resolve_path(PathBuf::from(p));
120                wb.client_path = config
121                    .map_path_for_client(&abs)
122                    .map(|mapped| mapped.display().to_string());
123            }
124        }
125    }
126
127    // Set next_offset if more data exists
128    response.next_offset = if offset + response.workbooks.len() < total_count {
129        Some((offset + response.workbooks.len()) as u32)
130    } else {
131        None
132    };
133
134    Ok(response)
135}
136
137pub async fn describe_workbook(
138    state: Arc<AppState>,
139    params: DescribeWorkbookParams,
140) -> Result<WorkbookDescription> {
141    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
142    let mut desc = workbook.describe();
143    let config = state.config();
144    if !config.path_mappings.is_empty() {
145        let internal = PathBuf::from(&desc.path);
146        desc.client_path = config
147            .map_path_for_client(&internal)
148            .map(|mapped| mapped.display().to_string());
149    }
150    Ok(desc)
151}
152
153#[derive(Debug, Deserialize, JsonSchema)]
154pub struct ListWorkbooksParams {
155    /// Filter by workbook slug prefix
156    pub slug_prefix: Option<String>,
157    /// Filter by folder path
158    pub folder: Option<String>,
159    /// Filter by glob pattern (e.g., "**/*.xlsx")
160    pub path_glob: Option<String>,
161    /// Maximum number of workbooks to return (default: 100)
162    #[serde(default)]
163    pub limit: Option<u32>,
164    /// Offset for pagination; use next_offset from previous response
165    #[serde(default)]
166    pub offset: Option<u32>,
167    /// Include file paths and capabilities (default: false in token_dense profile)
168    #[serde(default)]
169    pub include_paths: Option<bool>,
170}
171
172impl ListWorkbooksParams {
173    fn into_filter(self) -> Result<filters::WorkbookFilter> {
174        filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
175    }
176}
177
178#[derive(Debug, Deserialize, JsonSchema)]
179pub struct DescribeWorkbookParams {
180    #[serde(alias = "workbook_id")]
181    pub workbook_or_fork_id: WorkbookId,
182}
183
184#[derive(Debug, Deserialize, JsonSchema)]
185pub struct ListSheetsParams {
186    /// Workbook ID or fork ID
187    #[serde(alias = "workbook_id")]
188    pub workbook_or_fork_id: WorkbookId,
189    /// Maximum number of sheets to return (default: 100)
190    #[serde(default)]
191    pub limit: Option<u32>,
192    /// Offset for pagination; use next_offset from previous response
193    #[serde(default)]
194    pub offset: Option<u32>,
195    /// Include row/column counts and metrics (default: false in token_dense profile)
196    #[serde(default)]
197    pub include_bounds: Option<bool>,
198}
199
200pub async fn list_sheets(
201    state: Arc<AppState>,
202    params: ListSheetsParams,
203) -> Result<SheetListResponse> {
204    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
205    let config = state.config();
206    let output_profile = config.output_profile();
207    let include_bounds = params
208        .include_bounds
209        .unwrap_or(!matches!(output_profile, OutputProfile::TokenDense));
210    let mut summaries = workbook.list_summaries(include_bounds)?;
211
212    let total_count = summaries.len();
213    let offset = params.offset.unwrap_or(0) as usize;
214    let limit = params.limit.unwrap_or(100) as usize;
215
216    if offset < total_count {
217        summaries = summaries.into_iter().skip(offset).take(limit).collect();
218    } else {
219        summaries.clear();
220    }
221
222    let next_offset = if offset + summaries.len() < total_count {
223        Some((offset + summaries.len()) as u32)
224    } else {
225        None
226    };
227
228    let response = SheetListResponse {
229        workbook_id: workbook.id.clone(),
230        workbook_short_id: workbook.short_id.clone(),
231        sheets: summaries,
232        next_offset,
233    };
234    Ok(response)
235}
236
237#[derive(Debug, Deserialize, JsonSchema)]
238pub struct SheetOverviewParams {
239    /// Workbook ID or fork ID
240    #[serde(alias = "workbook_id")]
241    pub workbook_or_fork_id: WorkbookId,
242    /// Sheet name
243    pub sheet_name: String,
244    /// Maximum detected regions to return (default: 25)
245    #[serde(default)]
246    pub max_regions: Option<u32>,
247    /// Maximum headers per region (default: 50)
248    #[serde(default)]
249    pub max_headers: Option<u32>,
250    /// Include headers in region info (default: true)
251    #[serde(default)]
252    pub include_headers: Option<bool>,
253}
254
255#[derive(Debug, Deserialize, JsonSchema)]
256pub struct WorkbookSummaryParams {
257    /// Workbook ID or fork ID
258    #[serde(alias = "workbook_id")]
259    pub workbook_or_fork_id: WorkbookId,
260    /// Return minimal summary without entry points or named ranges (default: true in token_dense profile)
261    #[serde(default)]
262    pub summary_only: Option<bool>,
263    /// Include suggested entry points for exploration (default: !summary_only)
264    #[serde(default)]
265    pub include_entry_points: Option<bool>,
266    /// Include key named ranges and tables (default: !summary_only)
267    #[serde(default)]
268    pub include_named_ranges: Option<bool>,
269}
270
271pub async fn workbook_summary(
272    state: Arc<AppState>,
273    params: WorkbookSummaryParams,
274) -> Result<WorkbookSummaryResponse> {
275    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
276    let config = state.config();
277    let output_profile = config.output_profile();
278    let summary_only = params
279        .summary_only
280        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
281    let include_entry_points = params.include_entry_points.unwrap_or(!summary_only);
282    let include_named_ranges = params.include_named_ranges.unwrap_or(!summary_only);
283
284    tokio::task::spawn_blocking(move || {
285        build_workbook_summary(workbook, include_entry_points, include_named_ranges)
286    })
287    .await?
288}
289
290fn build_workbook_summary(
291    workbook: Arc<WorkbookContext>,
292    include_entry_points: bool,
293    include_named_ranges: bool,
294) -> Result<WorkbookSummaryResponse> {
295    let sheet_names = workbook.sheet_names();
296
297    let mut total_cells: u64 = 0;
298    let mut total_formulas: u64 = 0;
299    let mut breakdown = WorkbookBreakdown::default();
300    let mut region_counts = RegionCountSummary::default();
301    let mut entry_points: Vec<EntryPoint> = Vec::new();
302    let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
303    let mut notes: Vec<String> = Vec::new();
304
305    for sheet_name in &sheet_names {
306        let entry = workbook.get_sheet_metrics_fast(sheet_name)?;
307        total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
308        total_formulas += entry.metrics.formula_cells as u64;
309
310        match entry.metrics.classification {
311            SheetClassification::Calculator => breakdown.calculator_sheets += 1,
312            SheetClassification::Metadata => breakdown.metadata_sheets += 1,
313            SheetClassification::Empty => {}
314            _ => breakdown.data_sheets += 1,
315        }
316
317        if entry.metrics.non_empty_cells == 0 {
318            continue;
319        }
320
321        match entry.metrics.classification {
322            SheetClassification::Calculator => region_counts.calculator += 1,
323            SheetClassification::Metadata => region_counts.metadata += 1,
324            SheetClassification::Empty => {}
325            _ => region_counts.data += 1,
326        }
327
328        if include_entry_points {
329            let priority = entry_point_priority(&entry.metrics.classification);
330            entry_points.push(EntryPoint {
331                sheet_name: sheet_name.clone(),
332                region_id: None,
333                bounds: entry_point_bounds(&entry.metrics),
334                rationale: format!(
335                    "Fast summary p{}: {:?} sheet",
336                    priority, entry.metrics.classification
337                ),
338            });
339        }
340    }
341
342    if include_entry_points {
343        entry_points.sort_by(|a, b| {
344            let pa = priority_from_rationale(&a.rationale);
345            let pb = priority_from_rationale(&b.rationale);
346            pa.cmp(&pb)
347                .then_with(|| {
348                    a.bounds
349                        .as_ref()
350                        .map(|_| 1)
351                        .cmp(&b.bounds.as_ref().map(|_| 1))
352                })
353                .then_with(|| a.sheet_name.cmp(&b.sheet_name))
354        });
355        entry_points.truncate(5);
356    }
357
358    if include_named_ranges {
359        let mut seen_ranges = std::collections::HashSet::new();
360        for item in workbook.named_items()? {
361            if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
362                continue;
363            }
364            if !seen_ranges.insert(item.refers_to.clone()) {
365                continue;
366            }
367            key_named_ranges.push(item);
368            if key_named_ranges.len() >= 10 {
369                break;
370            }
371        }
372    }
373
374    notes.push("Region counts and entry points are inferred from sheet metrics; use sheet_overview for full region detection.".to_string());
375
376    Ok(WorkbookSummaryResponse {
377        workbook_id: workbook.id.clone(),
378        workbook_short_id: workbook.short_id.clone(),
379        slug: workbook.slug.clone(),
380        sheet_count: sheet_names.len(),
381        total_cells,
382        total_formulas,
383        breakdown,
384        region_counts,
385        key_named_ranges,
386        suggested_entry_points: entry_points,
387        notes,
388    })
389}
390
391fn entry_point_priority(classification: &SheetClassification) -> u32 {
392    match classification {
393        SheetClassification::Data => 1,
394        SheetClassification::Mixed => 2,
395        SheetClassification::Calculator => 3,
396        SheetClassification::Metadata => 4,
397        SheetClassification::Empty => 5,
398    }
399}
400
401fn entry_point_bounds(metrics: &crate::workbook::SheetMetrics) -> Option<String> {
402    if metrics.row_count == 0 || metrics.column_count == 0 {
403        return None;
404    }
405    if metrics.row_count > ENTRY_POINT_MAX_ROWS || metrics.column_count > ENTRY_POINT_MAX_COLS {
406        return None;
407    }
408    let end_col = column_number_to_name(metrics.column_count.max(1));
409    let end_cell = format!("{}{}", end_col, metrics.row_count.max(1));
410    Some(format!("A1:{}", end_cell))
411}
412
413fn priority_from_rationale(rationale: &str) -> u32 {
414    if rationale.contains("p0") {
415        0
416    } else if rationale.contains("p1") {
417        1
418    } else if rationale.contains("p2") {
419        2
420    } else if rationale.contains("p3") {
421        3
422    } else if rationale.contains("p4") {
423        4
424    } else {
425        5
426    }
427}
428
429pub async fn sheet_overview(
430    state: Arc<AppState>,
431    params: SheetOverviewParams,
432) -> Result<SheetOverviewResponse> {
433    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
434    let sheet_name = params.sheet_name.clone();
435    let mut overview =
436        tokio::task::spawn_blocking(move || workbook.sheet_overview(&sheet_name)).await??;
437
438    let max_regions = params
439        .max_regions
440        .unwrap_or(DEFAULT_OVERVIEW_MAX_REGIONS)
441        .max(1);
442    let max_headers = params
443        .max_headers
444        .unwrap_or(DEFAULT_OVERVIEW_MAX_HEADERS)
445        .max(1);
446    let include_headers = params
447        .include_headers
448        .unwrap_or(DEFAULT_OVERVIEW_INCLUDE_HEADERS);
449
450    let region_limit = if params.max_regions == Some(0) {
451        usize::MAX
452    } else {
453        max_regions as usize
454    };
455    let header_limit = if params.max_headers == Some(0) {
456        usize::MAX
457    } else {
458        max_headers as usize
459    };
460
461    let total_regions = overview.detected_regions.len() as u32;
462    let mut headers_truncated = false;
463
464    for region in &mut overview.detected_regions {
465        let header_count = region.header_count.max(region.headers.len() as u32);
466        region.header_count = header_count;
467        if !include_headers {
468            region.headers.clear();
469        } else if region.headers.len() > header_limit {
470            region.headers.truncate(header_limit);
471        }
472        region.headers_truncated = region.headers.len() as u32 != header_count;
473        headers_truncated |= region.headers_truncated;
474    }
475
476    let regions_truncated = if overview.detected_regions.len() > region_limit {
477        overview.detected_regions.truncate(region_limit);
478        true
479    } else {
480        false
481    };
482
483    overview.detected_region_count = total_regions;
484    overview.detected_regions_truncated = regions_truncated;
485
486    if regions_truncated {
487        overview.notes.push(format!(
488            "Detected regions truncated to {} ({} total).",
489            region_limit, total_regions
490        ));
491    }
492    if headers_truncated {
493        overview.notes.push(format!(
494            "Region headers truncated to {} columns.",
495            header_limit
496        ));
497    }
498
499    Ok(overview)
500}
501
502fn default_start_row() -> u32 {
503    1
504}
505
506fn default_page_size() -> u32 {
507    50
508}
509
510fn default_include_formulas() -> bool {
511    true
512}
513
514fn default_include_header() -> bool {
515    true
516}
517
518#[derive(Debug, Deserialize, JsonSchema)]
519pub struct SheetPageParams {
520    /// Workbook ID or fork ID
521    #[serde(alias = "workbook_id")]
522    pub workbook_or_fork_id: WorkbookId,
523    /// Sheet name
524    pub sheet_name: String,
525    /// 1-based starting row (default: 1)
526    #[serde(default = "default_start_row")]
527    pub start_row: u32,
528    /// Number of rows per page (default: 50, max: 500)
529    #[serde(default = "default_page_size")]
530    pub page_size: u32,
531    /// Limit to specific columns by letter (e.g., ["A", "C", "D"])
532    #[serde(default)]
533    pub columns: Option<Vec<String>>,
534    /// Limit to columns by header text (matched case-insensitively)
535    #[serde(default)]
536    pub columns_by_header: Option<Vec<String>>,
537    /// Include formulas (default: false in token_dense profile)
538    #[serde(default = "default_include_formulas")]
539    pub include_formulas: bool,
540    /// Include style information (default: false)
541    #[serde(default)]
542    pub include_styles: bool,
543    /// Include header row in response (default: true)
544    #[serde(default = "default_include_header")]
545    pub include_header: bool,
546    /// Output format: "compact" (default in token_dense) or "full" (per-cell objects)
547    #[serde(default)]
548    pub format: Option<SheetPageFormat>,
549}
550
551impl Default for SheetPageParams {
552    fn default() -> Self {
553        SheetPageParams {
554            workbook_or_fork_id: WorkbookId(String::new()),
555            sheet_name: String::new(),
556            start_row: default_start_row(),
557            page_size: default_page_size(),
558            columns: None,
559            columns_by_header: None,
560            include_formulas: default_include_formulas(),
561            include_styles: false,
562            include_header: default_include_header(),
563            format: None,
564        }
565    }
566}
567
568fn default_find_limit() -> u32 {
569    50
570}
571
572#[derive(Debug, Deserialize, JsonSchema)]
573pub struct FindValueParams {
574    /// Workbook ID or fork ID
575    #[serde(alias = "workbook_id")]
576    pub workbook_or_fork_id: WorkbookId,
577    /// Value or pattern to search for
578    pub query: String,
579    /// For label mode: find cells near this label text
580    #[serde(default)]
581    pub label: Option<String>,
582    /// Search mode: "value" (default) or "label" for key-value lookups
583    #[serde(default)]
584    pub mode: Option<FindMode>,
585    /// Match mode for text comparison
586    #[serde(default)]
587    pub match_mode: Option<MatchMode>,
588    /// Case-sensitive matching (default: false)
589    #[serde(default)]
590    pub case_sensitive: bool,
591    /// Limit search to specific sheet
592    #[serde(default)]
593    pub sheet_name: Option<String>,
594    /// Limit search to specific detected region
595    #[serde(default)]
596    pub region_id: Option<u32>,
597    /// Limit search to specific named table
598    #[serde(default)]
599    pub table_name: Option<String>,
600    /// Filter by value types
601    #[serde(default)]
602    pub value_types: Option<Vec<ValueTypeFilter>>,
603    /// Only search in header rows (default: false)
604    #[serde(default)]
605    pub search_headers_only: bool,
606    /// For label mode: direction to look for value
607    #[serde(default)]
608    pub direction: Option<LabelDirection>,
609    /// Maximum matches to return (default: 50)
610    #[serde(default = "default_find_limit")]
611    pub limit: u32,
612    /// Offset for pagination
613    #[serde(default)]
614    pub offset: Option<u32>,
615    /// Context to include with matches
616    #[serde(default)]
617    pub context: Option<FindContext>,
618    /// Number of cells in each direction for context (default: 3)
619    #[serde(default)]
620    pub context_width: Option<u32>,
621}
622
623impl Default for FindValueParams {
624    fn default() -> Self {
625        Self {
626            workbook_or_fork_id: WorkbookId(String::new()),
627            query: String::new(),
628            label: None,
629            mode: None,
630            match_mode: None,
631            case_sensitive: false,
632            sheet_name: None,
633            region_id: None,
634            table_name: None,
635            value_types: None,
636            search_headers_only: false,
637            direction: None,
638            limit: default_find_limit(),
639            offset: None,
640            context: None,
641            context_width: None,
642        }
643    }
644}
645
646#[derive(Debug, Deserialize, JsonSchema, Default)]
647pub struct ReadTableParams {
648    /// Workbook ID or fork ID
649    #[serde(alias = "workbook_id")]
650    pub workbook_or_fork_id: WorkbookId,
651    /// Sheet name (uses first sheet if omitted)
652    #[serde(default)]
653    pub sheet_name: Option<String>,
654    /// Read from a named Excel table
655    #[serde(default)]
656    pub table_name: Option<String>,
657    /// Read from a detected region by ID (from sheet_overview)
658    #[serde(default)]
659    pub region_id: Option<u32>,
660    /// A1-style range (e.g., "A1:D100")
661    #[serde(default)]
662    pub range: Option<String>,
663    /// 1-based row number for headers (auto-detected if omitted)
664    #[serde(default)]
665    pub header_row: Option<u32>,
666    /// Number of header rows for multi-row headers (default: 1)
667    #[serde(default)]
668    pub header_rows: Option<u32>,
669    /// Limit to specific columns by letter (e.g., ["A", "C", "D"])
670    #[serde(default)]
671    pub columns: Option<Vec<String>>,
672    /// Row filters to apply
673    #[serde(default)]
674    pub filters: Option<Vec<TableFilter>>,
675    /// Sampling mode for selecting rows
676    #[serde(default)]
677    pub sample_mode: Option<SampleMode>,
678    /// Maximum rows to return
679    #[serde(default)]
680    pub limit: Option<u32>,
681    /// Offset for pagination; use next_offset from previous response
682    #[serde(default)]
683    pub offset: Option<u32>,
684    /// Output format: "csv" (default), "values" (arrays), or "json" (typed CellValue)
685    #[serde(default)]
686    pub format: Option<TableOutputFormat>,
687    /// Include header row in output (default: true for csv)
688    #[serde(default)]
689    pub include_headers: Option<bool>,
690    /// Include column type information (default: false)
691    #[serde(default)]
692    pub include_types: Option<bool>,
693}
694
695#[derive(Debug, Deserialize, JsonSchema, Clone)]
696pub struct TableFilter {
697    /// Column letter or header name
698    pub column: String,
699    /// Comparison operator
700    pub op: FilterOp,
701    /// Value to compare against
702    pub value: serde_json::Value,
703}
704
705#[derive(Debug, Deserialize, JsonSchema, Default)]
706pub struct TableProfileParams {
707    /// Workbook ID or fork ID
708    #[serde(alias = "workbook_id")]
709    pub workbook_or_fork_id: WorkbookId,
710    /// Sheet name (uses first sheet if omitted)
711    #[serde(default)]
712    pub sheet_name: Option<String>,
713    /// Profile a detected region by ID
714    #[serde(default)]
715    pub region_id: Option<u32>,
716    /// Profile a named Excel table
717    #[serde(default)]
718    pub table_name: Option<String>,
719    /// Sampling mode for selecting sample rows
720    #[serde(default)]
721    pub sample_mode: Option<SampleMode>,
722    /// Number of sample rows to include (default: 5)
723    #[serde(default)]
724    pub sample_size: Option<u32>,
725    /// Return only column types without samples (default: true in token_dense profile)
726    #[serde(default)]
727    pub summary_only: Option<bool>,
728}
729
730#[derive(Debug, Deserialize, JsonSchema)]
731pub struct RangeValuesParams {
732    /// Workbook ID or fork ID
733    #[serde(alias = "workbook_id")]
734    pub workbook_or_fork_id: WorkbookId,
735    /// Sheet name
736    pub sheet_name: String,
737    /// A1-style ranges to read (e.g., ["A1:C10", "E1:E10"])
738    pub ranges: Vec<String>,
739    /// Include detected header row (default: true)
740    #[serde(default)]
741    pub include_headers: Option<bool>,
742    /// Output format: "values" (default), "csv", or "json"
743    #[serde(default)]
744    pub format: Option<TableOutputFormat>,
745    /// Maximum rows per range before pagination
746    #[serde(default)]
747    pub page_size: Option<u32>,
748}
749
750pub async fn sheet_page(
751    state: Arc<AppState>,
752    params: SheetPageParams,
753) -> Result<SheetPageResponse> {
754    if params.page_size == 0 {
755        return Err(anyhow!("page_size must be greater than zero"));
756    }
757
758    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
759    let metrics = workbook.get_sheet_metrics_fast(&params.sheet_name)?;
760    let config = state.config();
761    let output_profile = config.output_profile();
762    let format = params.format.unwrap_or(match output_profile {
763        OutputProfile::TokenDense => SheetPageFormat::Compact,
764        OutputProfile::Verbose => SheetPageFormat::Full,
765    });
766
767    let start_row = params.start_row.max(1);
768    let page_size = params.page_size.min(500);
769    let include_formulas =
770        if params.format.is_none() && matches!(output_profile, OutputProfile::TokenDense) {
771            false
772        } else {
773            params.include_formulas
774        };
775    let include_styles =
776        if params.format.is_none() && matches!(output_profile, OutputProfile::TokenDense) {
777            false
778        } else {
779            params.include_styles
780        };
781    let columns = params.columns.clone();
782    let columns_by_header = params.columns_by_header.clone();
783    let include_header = params.include_header;
784
785    let mut page = workbook.with_sheet(&params.sheet_name, |sheet| {
786        build_page(
787            sheet,
788            start_row,
789            page_size,
790            columns.clone(),
791            columns_by_header.clone(),
792            include_formulas,
793            include_styles,
794            include_header,
795        )
796    })?;
797
798    let max_cells = config.max_cells();
799    let max_payload_bytes = config.max_payload_bytes();
800    let cells_per_row = page.rows.first().map(|row| row.cells.len()).unwrap_or(0);
801    let mut row_limit = cap_rows_by_cells(page.rows.len(), cells_per_row, max_cells);
802
803    if row_limit > 0 {
804        row_limit = cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
805            let response = build_sheet_page_response(
806                &workbook,
807                &params.sheet_name,
808                format,
809                include_header,
810                &page.header,
811                &page.rows[..count],
812                None,
813            );
814            serde_json::to_vec(&response)
815                .map(|payload| payload.len())
816                .unwrap_or(usize::MAX)
817        });
818    }
819
820    if row_limit < page.rows.len() {
821        page.rows.truncate(row_limit);
822    }
823
824    let last_row_index = page
825        .rows
826        .last()
827        .map(|row| row.row_index)
828        .unwrap_or(start_row.saturating_sub(1));
829    let next_start_row = if last_row_index < metrics.metrics.row_count {
830        Some(last_row_index + 1)
831    } else {
832        None
833    };
834
835    Ok(build_sheet_page_response(
836        &workbook,
837        &params.sheet_name,
838        format,
839        include_header,
840        &page.header,
841        &page.rows,
842        next_start_row,
843    ))
844}
845
846#[derive(Debug, Deserialize, JsonSchema)]
847pub struct SheetFormulaMapParams {
848    /// Workbook ID or fork ID
849    #[serde(alias = "workbook_id")]
850    pub workbook_or_fork_id: WorkbookId,
851    /// Sheet name
852    pub sheet_name: String,
853    /// Limit to A1-style range (e.g., "D2:D100")
854    pub range: Option<String>,
855    /// Expand range references in formulas (default: false)
856    #[serde(default)]
857    pub expand: bool,
858    /// Maximum formula groups to return
859    #[serde(default)]
860    pub limit: Option<u32>,
861    /// Sort by: "address" (default), "complexity" (longest formulas first), "count" (most repeated first)
862    #[serde(default)]
863    pub sort_by: Option<FormulaSortBy>,
864    /// Return only formula text and count without addresses (default: true in token_dense profile)
865    #[serde(default)]
866    pub summary_only: Option<bool>,
867    /// Include cell addresses for each formula group (default: !summary_only)
868    #[serde(default)]
869    pub include_addresses: Option<bool>,
870    /// Maximum addresses to include per formula group (default: 15)
871    #[serde(default)]
872    pub addresses_limit: Option<u32>,
873}
874
875#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
876#[serde(rename_all = "snake_case")]
877pub enum FormulaSortBy {
878    #[default]
879    Address,
880    Complexity,
881    Count,
882}
883
884/// Match mode for text searches
885#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
886#[serde(rename_all = "snake_case")]
887pub enum MatchMode {
888    /// Substring match (default)
889    #[default]
890    Contains,
891    /// Exact match
892    Exact,
893    /// Prefix match
894    Prefix,
895    /// Regular expression match
896    Regex,
897}
898
899/// Context to include with find_value matches
900#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
901#[serde(rename_all = "snake_case")]
902pub enum FindContext {
903    /// No context (default)
904    #[default]
905    None,
906    /// Include neighboring cells
907    Neighbors,
908    /// Include full row context
909    Row,
910    /// Include both neighbors and row context
911    Both,
912}
913
914/// Sampling mode for table reads
915#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
916#[serde(rename_all = "snake_case")]
917pub enum SampleMode {
918    /// First N rows (default)
919    #[default]
920    First,
921    /// Last N rows
922    Last,
923    /// Evenly distributed sample
924    Distributed,
925}
926
927/// Granularity for style analysis
928#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
929#[serde(rename_all = "snake_case")]
930pub enum StyleGranularity {
931    /// Group contiguous cells with same style (default)
932    #[default]
933    Runs,
934    /// Report each cell individually
935    Cells,
936}
937
938/// Filter operators for table queries
939#[derive(Debug, Clone, Copy, Deserialize, JsonSchema, PartialEq, Eq)]
940#[serde(rename_all = "snake_case")]
941pub enum FilterOp {
942    /// Equal
943    Eq,
944    /// Not equal
945    #[serde(alias = "ne")]
946    Neq,
947    /// Greater than
948    Gt,
949    /// Less than
950    Lt,
951    /// Greater than or equal
952    Gte,
953    /// Less than or equal
954    Lte,
955    /// Contains substring (text only)
956    Contains,
957    /// Starts with prefix (text only)
958    StartsWith,
959    /// Ends with suffix (text only)
960    EndsWith,
961    /// Value is in list
962    In,
963}
964
965/// Cell value types for filtering
966#[derive(Debug, Clone, Copy, Deserialize, JsonSchema, PartialEq, Eq)]
967#[serde(rename_all = "snake_case")]
968pub enum ValueTypeFilter {
969    Text,
970    Number,
971    Bool,
972    Date,
973    Null,
974}
975
976pub async fn sheet_formula_map(
977    state: Arc<AppState>,
978    params: SheetFormulaMapParams,
979) -> Result<SheetFormulaMapResponse> {
980    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
981    let config = state.config();
982    let output_profile = config.output_profile();
983    let summary_only = params
984        .summary_only
985        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
986    let include_addresses = params.include_addresses.unwrap_or(!summary_only);
987    let addresses_limit = params.addresses_limit.unwrap_or(15);
988    let max_items = config.max_items();
989    let max_payload_bytes = config.max_payload_bytes();
990
991    let graph = workbook.formula_graph(&params.sheet_name)?;
992    let all_groups = graph.groups();
993    let mut groups = Vec::new();
994
995    for mut group in all_groups {
996        if let Some(range) = &params.range {
997            group.addresses.retain(|addr| address_in_range(addr, range));
998            if group.addresses.is_empty() {
999                continue;
1000            }
1001        }
1002
1003        let address_count = group.addresses.len();
1004
1005        if summary_only || !include_addresses {
1006            group.addresses.clear();
1007        } else if !params.expand && address_count > addresses_limit as usize {
1008            group.addresses.truncate(addresses_limit as usize);
1009        }
1010
1011        groups.push(group);
1012    }
1013
1014    let total_groups = groups.len();
1015
1016    let sort_by = params.sort_by.unwrap_or_default();
1017    match sort_by {
1018        FormulaSortBy::Address => {
1019            groups.sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
1020        }
1021        FormulaSortBy::Complexity => {
1022            groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
1023        }
1024        FormulaSortBy::Count => {
1025            groups.sort_by(|a, b| {
1026                let count_a = a.count.unwrap_or(a.addresses.len() as u32);
1027                let count_b = b.count.unwrap_or(b.addresses.len() as u32);
1028                count_b.cmp(&count_a)
1029            });
1030        }
1031    }
1032
1033    if let Some(limit) = params.limit
1034        && groups.len() > limit as usize
1035    {
1036        groups.truncate(limit as usize);
1037    }
1038
1039    if let Some(max_items) = max_items
1040        && groups.len() > max_items
1041    {
1042        groups.truncate(max_items);
1043    }
1044
1045    if let Some(max_bytes) = max_payload_bytes {
1046        let group_limit = cap_rows_by_payload_bytes(groups.len(), Some(max_bytes), |count| {
1047            let response = SheetFormulaMapResponse {
1048                workbook_id: workbook.id.clone(),
1049                workbook_short_id: workbook.short_id.clone(),
1050                sheet_name: params.sheet_name.clone(),
1051                groups: groups[..count].to_vec(),
1052                next_offset: None,
1053            };
1054            serde_json::to_vec(&response)
1055                .map(|payload| payload.len())
1056                .unwrap_or(usize::MAX)
1057        });
1058
1059        if group_limit < groups.len() {
1060            groups.truncate(group_limit);
1061        }
1062    }
1063
1064    let next_offset = if groups.len() < total_groups {
1065        Some(groups.len() as u32)
1066    } else {
1067        None
1068    };
1069
1070    let response = SheetFormulaMapResponse {
1071        workbook_id: workbook.id.clone(),
1072        workbook_short_id: workbook.short_id.clone(),
1073        sheet_name: params.sheet_name.clone(),
1074        groups,
1075        next_offset,
1076    };
1077    Ok(response)
1078}
1079
1080#[derive(Debug, Deserialize, JsonSchema)]
1081pub struct FormulaTraceParams {
1082    #[serde(alias = "workbook_id")]
1083    pub workbook_or_fork_id: WorkbookId,
1084    pub sheet_name: String,
1085    pub cell_address: String,
1086    pub direction: TraceDirection,
1087    pub depth: Option<u32>,
1088    pub limit: Option<u32>,
1089    #[serde(default)]
1090    pub page_size: Option<usize>,
1091    #[serde(default)]
1092    pub cursor: Option<TraceCursor>,
1093}
1094
1095pub async fn formula_trace(
1096    state: Arc<AppState>,
1097    params: FormulaTraceParams,
1098) -> Result<FormulaTraceResponse> {
1099    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1100    let graph = workbook.formula_graph(&params.sheet_name)?;
1101    let formula_lookup = build_formula_lookup(&graph);
1102    let depth = params.depth.unwrap_or(3).clamp(1, 5);
1103    let page_size = params
1104        .page_size
1105        .or_else(|| params.limit.map(|v| v as usize))
1106        .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
1107        .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
1108
1109    let origin = params.cell_address.to_uppercase();
1110    let config = TraceConfig {
1111        direction: &params.direction,
1112        origin: &origin,
1113        sheet_name: &params.sheet_name,
1114        depth_limit: depth,
1115        page_size,
1116    };
1117    let (layers, next_cursor, notes) = build_trace_layers(
1118        &workbook,
1119        &graph,
1120        &formula_lookup,
1121        &config,
1122        params.cursor.clone(),
1123    )?;
1124
1125    let response = FormulaTraceResponse {
1126        workbook_id: workbook.id.clone(),
1127        workbook_short_id: workbook.short_id.clone(),
1128        sheet_name: params.sheet_name.clone(),
1129        origin,
1130        direction: params.direction.clone(),
1131        layers,
1132        next_cursor,
1133        notes,
1134    };
1135    Ok(response)
1136}
1137
1138#[derive(Debug, Deserialize, JsonSchema)]
1139pub struct NamedRangesParams {
1140    #[serde(alias = "workbook_id")]
1141    pub workbook_or_fork_id: WorkbookId,
1142    pub sheet_name: Option<String>,
1143    pub name_prefix: Option<String>,
1144}
1145
1146pub async fn named_ranges(
1147    state: Arc<AppState>,
1148    params: NamedRangesParams,
1149) -> Result<NamedRangesResponse> {
1150    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1151    let mut items = workbook.named_items()?;
1152
1153    if let Some(sheet_filter) = &params.sheet_name {
1154        items.retain(|item| {
1155            item.sheet_name
1156                .as_ref()
1157                .map(|name| name.eq_ignore_ascii_case(sheet_filter))
1158                .unwrap_or(false)
1159        });
1160    }
1161    if let Some(prefix) = &params.name_prefix {
1162        let prefix_lower = prefix.to_ascii_lowercase();
1163        items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
1164    }
1165
1166    let response = NamedRangesResponse {
1167        workbook_id: workbook.id.clone(),
1168        workbook_short_id: workbook.short_id.clone(),
1169        items,
1170    };
1171    Ok(response)
1172}
1173
1174struct PageBuildResult {
1175    rows: Vec<RowSnapshot>,
1176    header: Option<RowSnapshot>,
1177}
1178
1179#[allow(clippy::too_many_arguments)]
1180fn build_page(
1181    sheet: &umya_spreadsheet::Worksheet,
1182    start_row: u32,
1183    page_size: u32,
1184    columns: Option<Vec<String>>,
1185    columns_by_header: Option<Vec<String>>,
1186    include_formulas: bool,
1187    include_styles: bool,
1188    include_header: bool,
1189) -> PageBuildResult {
1190    let max_col = sheet.get_highest_column();
1191    let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
1192    let column_indices =
1193        resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
1194
1195    let header = if include_header {
1196        Some(build_row_snapshot(
1197            sheet,
1198            1,
1199            &column_indices,
1200            include_formulas,
1201            include_styles,
1202        ))
1203    } else {
1204        None
1205    };
1206
1207    let mut rows = Vec::new();
1208    for row_idx in start_row..=end_row {
1209        rows.push(build_row_snapshot(
1210            sheet,
1211            row_idx,
1212            &column_indices,
1213            include_formulas,
1214            include_styles,
1215        ));
1216    }
1217
1218    PageBuildResult { rows, header }
1219}
1220
1221fn build_row_snapshot(
1222    sheet: &umya_spreadsheet::Worksheet,
1223    row_index: u32,
1224    columns: &[u32],
1225    include_formulas: bool,
1226    include_styles: bool,
1227) -> RowSnapshot {
1228    let mut cells = Vec::new();
1229    for &col in columns {
1230        if let Some(cell) = sheet.get_cell((col, row_index)) {
1231            cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
1232        } else {
1233            let address = crate::utils::cell_address(col, row_index);
1234            cells.push(CellSnapshot {
1235                address,
1236                value: None,
1237                formula: None,
1238                cached_value: None,
1239                number_format: None,
1240                style_tags: Vec::new(),
1241                notes: Vec::new(),
1242            });
1243        }
1244    }
1245
1246    RowSnapshot { row_index, cells }
1247}
1248
1249fn build_cell_snapshot(
1250    cell: &umya_spreadsheet::Cell,
1251    include_formulas: bool,
1252    include_styles: bool,
1253) -> CellSnapshot {
1254    let address = cell.get_coordinate().get_coordinate();
1255    let value = crate::workbook::cell_to_value(cell);
1256    let formula = if include_formulas && cell.is_formula() {
1257        Some(cell.get_formula().to_string())
1258    } else {
1259        None
1260    };
1261    let cached_value = if cell.is_formula() {
1262        value.clone()
1263    } else {
1264        None
1265    };
1266    let number_format = if include_styles {
1267        cell.get_style()
1268            .get_number_format()
1269            .map(|fmt| fmt.get_format_code().to_string())
1270    } else {
1271        None
1272    };
1273    let style_tags = if include_styles {
1274        crate::analysis::style::tag_cell(cell)
1275            .map(|(_, tagging)| tagging.tags)
1276            .unwrap_or_default()
1277    } else {
1278        Vec::new()
1279    };
1280
1281    CellSnapshot {
1282        address,
1283        value,
1284        formula,
1285        cached_value,
1286        number_format,
1287        style_tags,
1288        notes: Vec::new(),
1289    }
1290}
1291
1292fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
1293    use std::collections::BTreeSet;
1294    use umya_spreadsheet::helper::coordinate::column_index_from_string;
1295
1296    let mut indices = BTreeSet::new();
1297    if let Some(specs) = columns {
1298        for spec in specs {
1299            if let Some((start, end)) = spec.split_once(':') {
1300                let start_idx = column_index_from_string(start);
1301                let end_idx = column_index_from_string(end);
1302                let (min_idx, max_idx) = if start_idx <= end_idx {
1303                    (start_idx, end_idx)
1304                } else {
1305                    (end_idx, start_idx)
1306                };
1307                for idx in min_idx..=max_idx {
1308                    indices.insert(idx);
1309                }
1310            } else {
1311                indices.insert(column_index_from_string(spec));
1312            }
1313        }
1314    } else {
1315        for idx in 1..=max_column.max(1) {
1316            indices.insert(idx);
1317        }
1318    }
1319
1320    indices.into_iter().collect()
1321}
1322
1323fn resolve_columns_with_headers(
1324    sheet: &umya_spreadsheet::Worksheet,
1325    columns: Option<&Vec<String>>,
1326    columns_by_header: Option<&Vec<String>>,
1327    max_column: u32,
1328) -> Vec<u32> {
1329    if columns_by_header.is_none() {
1330        return resolve_columns(columns, max_column);
1331    }
1332
1333    let mut selected = Vec::new();
1334    let header_targets: Vec<String> = columns_by_header
1335        .unwrap()
1336        .iter()
1337        .map(|h| h.trim().to_ascii_lowercase())
1338        .collect();
1339
1340    for col_idx in 1..=max_column.max(1) {
1341        let header_cell = sheet.get_cell((col_idx, 1u32));
1342        let header_value = header_cell
1343            .and_then(cell_to_value)
1344            .map(cell_value_to_string_lower);
1345        if let Some(hval) = header_value
1346            && header_targets.iter().any(|target| target == &hval)
1347        {
1348            selected.push(col_idx);
1349        }
1350    }
1351
1352    if selected.is_empty() {
1353        resolve_columns(columns, max_column)
1354    } else {
1355        selected
1356    }
1357}
1358
1359fn cell_value_to_string_lower(value: CellValue) -> String {
1360    match value {
1361        CellValue::Text(s) => s.to_ascii_lowercase(),
1362        CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
1363        CellValue::Bool(b) => b.to_string(),
1364        CellValue::Error(e) => e.to_ascii_lowercase(),
1365        CellValue::Date(d) => d.to_ascii_lowercase(),
1366    }
1367}
1368
1369fn cell_value_to_plain_string(value: &CellValue) -> String {
1370    match value {
1371        CellValue::Text(s) => s.clone(),
1372        CellValue::Number(n) => n.to_string(),
1373        CellValue::Bool(b) => b.to_string(),
1374        CellValue::Error(e) => e.clone(),
1375        CellValue::Date(d) => d.clone(),
1376    }
1377}
1378
1379fn cell_value_to_kind(value: &CellValue) -> CellValueKind {
1380    match value {
1381        CellValue::Text(_) => CellValueKind::Text,
1382        CellValue::Number(_) => CellValueKind::Number,
1383        CellValue::Bool(_) => CellValueKind::Bool,
1384        CellValue::Error(_) => CellValueKind::Error,
1385        CellValue::Date(_) => CellValueKind::Date,
1386    }
1387}
1388
1389fn cell_value_to_primitive(value: &CellValue) -> CellValuePrimitive {
1390    match value {
1391        CellValue::Text(s) => CellValuePrimitive::Text(s.clone()),
1392        CellValue::Number(n) => CellValuePrimitive::Number(*n),
1393        CellValue::Bool(b) => CellValuePrimitive::Bool(*b),
1394        CellValue::Error(e) => CellValuePrimitive::Text(e.clone()),
1395        CellValue::Date(d) => CellValuePrimitive::Text(d.clone()),
1396    }
1397}
1398
1399fn csv_escape_field(field: &str) -> String {
1400    if field.contains(',') || field.contains('"') || field.contains('\n') || field.contains('\r') {
1401        let mut escaped = String::with_capacity(field.len() + 2);
1402        escaped.push('"');
1403        for ch in field.chars() {
1404            if ch == '"' {
1405                escaped.push('"');
1406            }
1407            escaped.push(ch);
1408        }
1409        escaped.push('"');
1410        escaped
1411    } else {
1412        field.to_string()
1413    }
1414}
1415
1416fn push_csv_row<I>(buffer: &mut String, fields: I)
1417where
1418    I: IntoIterator<Item = String>,
1419{
1420    let mut first = true;
1421    for field in fields {
1422        if !first {
1423            buffer.push(',');
1424        }
1425        first = false;
1426        let escaped = csv_escape_field(&field);
1427        buffer.push_str(&escaped);
1428    }
1429    buffer.push('\n');
1430}
1431
1432fn table_rows_to_values(
1433    headers: &[String],
1434    rows: &[TableRow],
1435) -> Vec<Vec<Option<CellValuePrimitive>>> {
1436    let mut out = Vec::with_capacity(rows.len());
1437    for row in rows {
1438        let mut vals = Vec::with_capacity(headers.len());
1439        for header in headers {
1440            let value = row
1441                .get(header)
1442                .and_then(|cell| cell.as_ref())
1443                .map(cell_value_to_primitive);
1444            vals.push(value);
1445        }
1446        out.push(vals);
1447    }
1448    out
1449}
1450
1451fn table_rows_to_types(headers: &[String], rows: &[TableRow]) -> Vec<Vec<Option<CellValueKind>>> {
1452    let mut out = Vec::with_capacity(rows.len());
1453    for row in rows {
1454        let mut kinds = Vec::with_capacity(headers.len());
1455        for header in headers {
1456            let kind = row
1457                .get(header)
1458                .and_then(|cell| cell.as_ref())
1459                .map(cell_value_to_kind);
1460            kinds.push(kind);
1461        }
1462        out.push(kinds);
1463    }
1464    out
1465}
1466
1467fn table_rows_to_csv(headers: &[String], rows: &[TableRow], include_headers: bool) -> String {
1468    let mut csv = String::new();
1469    if include_headers {
1470        push_csv_row(&mut csv, headers.iter().cloned());
1471    }
1472    for row in rows {
1473        let values = headers.iter().map(|header| {
1474            row.get(header)
1475                .and_then(|cell| cell.as_ref())
1476                .map(cell_value_to_plain_string)
1477                .unwrap_or_default()
1478        });
1479        push_csv_row(&mut csv, values);
1480    }
1481    csv
1482}
1483
1484fn filter_table_row(row: &TableRow, headers: &[String]) -> TableRow {
1485    let mut filtered = TableRow::new();
1486    for header in headers {
1487        if let Some(value) = row.get(header) {
1488            filtered.insert(header.clone(), value.clone());
1489        }
1490    }
1491    filtered
1492}
1493
1494type ReadTablePayload = (
1495    Vec<String>,
1496    Vec<TableRow>,
1497    Option<Vec<Vec<Option<CellValuePrimitive>>>>,
1498    Option<Vec<Vec<Option<CellValueKind>>>>,
1499    Option<String>,
1500);
1501
1502fn build_read_table_payload(
1503    format: TableOutputFormat,
1504    headers: &[String],
1505    rows: &[TableRow],
1506    include_headers: bool,
1507    include_types: bool,
1508) -> ReadTablePayload {
1509    let headers_out = if include_headers {
1510        headers.to_vec()
1511    } else {
1512        Vec::new()
1513    };
1514
1515    let types_out = if include_types {
1516        Some(table_rows_to_types(headers, rows))
1517    } else {
1518        None
1519    };
1520
1521    match format {
1522        TableOutputFormat::Json => (headers_out, rows.to_vec(), None, types_out, None),
1523        TableOutputFormat::Values => (
1524            headers_out,
1525            Vec::new(),
1526            Some(table_rows_to_values(headers, rows)),
1527            types_out,
1528            None,
1529        ),
1530        TableOutputFormat::Csv => (
1531            Vec::new(),
1532            Vec::new(),
1533            None,
1534            types_out,
1535            Some(table_rows_to_csv(headers, rows, include_headers)),
1536        ),
1537    }
1538}
1539
1540fn cell_matrix_to_values(rows: &[Vec<Option<CellValue>>]) -> Vec<Vec<Option<CellValuePrimitive>>> {
1541    let mut out = Vec::with_capacity(rows.len());
1542    for row in rows {
1543        let mut vals = Vec::with_capacity(row.len());
1544        for cell in row {
1545            vals.push(cell.as_ref().map(cell_value_to_primitive));
1546        }
1547        out.push(vals);
1548    }
1549    out
1550}
1551
1552fn cell_matrix_to_csv(rows: &[Vec<Option<CellValue>>]) -> String {
1553    let mut csv = String::new();
1554    for row in rows {
1555        let values = row.iter().map(|cell| {
1556            cell.as_ref()
1557                .map(cell_value_to_plain_string)
1558                .unwrap_or_default()
1559        });
1560        push_csv_row(&mut csv, values);
1561    }
1562    csv
1563}
1564
1565fn build_range_values_entry(
1566    format: TableOutputFormat,
1567    range: &str,
1568    rows: &[Vec<Option<CellValue>>],
1569    next_start_row: Option<u32>,
1570) -> RangeValuesEntry {
1571    match format {
1572        TableOutputFormat::Json => RangeValuesEntry {
1573            range: range.to_string(),
1574            rows: Some(rows.to_vec()),
1575            values: None,
1576            csv: None,
1577            next_start_row,
1578        },
1579        TableOutputFormat::Values => RangeValuesEntry {
1580            range: range.to_string(),
1581            rows: None,
1582            values: Some(cell_matrix_to_values(rows)),
1583            csv: None,
1584            next_start_row,
1585        },
1586        TableOutputFormat::Csv => RangeValuesEntry {
1587            range: range.to_string(),
1588            rows: None,
1589            values: None,
1590            csv: Some(cell_matrix_to_csv(rows)),
1591            next_start_row,
1592        },
1593    }
1594}
1595
1596fn cap_rows_by_cells(row_count: usize, cells_per_row: usize, max_cells: Option<usize>) -> usize {
1597    let Some(max_cells) = max_cells else {
1598        return row_count;
1599    };
1600    if cells_per_row == 0 {
1601        return row_count;
1602    }
1603    let allowed = max_cells / cells_per_row;
1604    row_count.min(allowed)
1605}
1606
1607fn cap_rows_by_payload_bytes<F>(
1608    row_count: usize,
1609    max_bytes: Option<usize>,
1610    mut size_for_rows: F,
1611) -> usize
1612where
1613    F: FnMut(usize) -> usize,
1614{
1615    let Some(max_bytes) = max_bytes else {
1616        return row_count;
1617    };
1618    if row_count == 0 {
1619        return 0;
1620    }
1621    let mut low = 0usize;
1622    let mut high = row_count;
1623    while low < high {
1624        let mid = (low + high).div_ceil(2);
1625        if size_for_rows(mid) <= max_bytes {
1626            low = mid;
1627        } else {
1628            high = mid - 1;
1629        }
1630    }
1631    low
1632}
1633
1634fn build_compact_payload(
1635    header: &Option<RowSnapshot>,
1636    rows: &[RowSnapshot],
1637    include_header: bool,
1638) -> SheetPageCompact {
1639    let headers = derive_headers(header, rows);
1640    let header_row = if include_header {
1641        header
1642            .as_ref()
1643            .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
1644            .unwrap_or_default()
1645    } else {
1646        Vec::new()
1647    };
1648    let data_rows = rows
1649        .iter()
1650        .map(|row| {
1651            let mut vals: Vec<Option<CellValue>> = Vec::new();
1652            vals.push(Some(CellValue::Number(row.row_index as f64)));
1653            vals.extend(row.cells.iter().map(|c| c.value.clone()));
1654            vals
1655        })
1656        .collect();
1657
1658    SheetPageCompact {
1659        headers,
1660        header_row,
1661        rows: data_rows,
1662    }
1663}
1664
1665fn build_values_only_payload(
1666    header: &Option<RowSnapshot>,
1667    rows: &[RowSnapshot],
1668    include_header: bool,
1669) -> SheetPageValues {
1670    let mut data = Vec::new();
1671    if include_header && let Some(h) = header {
1672        data.push(h.cells.iter().map(|c| c.value.clone()).collect());
1673    }
1674    for row in rows {
1675        data.push(row.cells.iter().map(|c| c.value.clone()).collect());
1676    }
1677
1678    SheetPageValues { rows: data }
1679}
1680
1681fn build_sheet_page_response(
1682    workbook: &WorkbookContext,
1683    sheet_name: &str,
1684    format: SheetPageFormat,
1685    include_header: bool,
1686    header: &Option<RowSnapshot>,
1687    rows: &[RowSnapshot],
1688    next_start_row: Option<u32>,
1689) -> SheetPageResponse {
1690    let compact_payload = if matches!(format, SheetPageFormat::Compact) {
1691        Some(build_compact_payload(header, rows, include_header))
1692    } else {
1693        None
1694    };
1695
1696    let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
1697        Some(build_values_only_payload(header, rows, include_header))
1698    } else {
1699        None
1700    };
1701
1702    let rows_payload = if matches!(format, SheetPageFormat::Full) {
1703        rows.to_vec()
1704    } else {
1705        Vec::new()
1706    };
1707
1708    let header_row = if include_header && matches!(format, SheetPageFormat::Full) {
1709        header.clone()
1710    } else {
1711        None
1712    };
1713
1714    SheetPageResponse {
1715        workbook_id: workbook.id.clone(),
1716        workbook_short_id: workbook.short_id.clone(),
1717        sheet_name: sheet_name.to_string(),
1718        rows: rows_payload,
1719        next_start_row,
1720        header_row,
1721        compact: compact_payload,
1722        values_only: values_only_payload,
1723        format,
1724    }
1725}
1726
1727fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
1728    if let Some(h) = header {
1729        let mut headers: Vec<String> = h
1730            .cells
1731            .iter()
1732            .map(|c| match &c.value {
1733                Some(CellValue::Text(t)) => t.clone(),
1734                Some(CellValue::Number(n)) => n.to_string(),
1735                Some(CellValue::Bool(b)) => b.to_string(),
1736                Some(CellValue::Date(d)) => d.clone(),
1737                Some(CellValue::Error(e)) => e.clone(),
1738                None => c.address.clone(),
1739            })
1740            .collect();
1741        headers.insert(0, "Row".to_string());
1742        headers
1743    } else if let Some(first) = rows.first() {
1744        let mut headers = Vec::new();
1745        headers.push("Row".to_string());
1746        for cell in &first.cells {
1747            headers.push(cell.address.clone());
1748        }
1749        headers
1750    } else {
1751        vec![]
1752    }
1753}
1754fn default_stats_sample() -> usize {
1755    500
1756}
1757
1758#[derive(Debug, Deserialize, JsonSchema)]
1759pub struct SheetStatisticsParams {
1760    /// Workbook ID or fork ID
1761    #[serde(alias = "workbook_id")]
1762    pub workbook_or_fork_id: WorkbookId,
1763    /// Sheet name
1764    pub sheet_name: String,
1765    /// Number of rows to sample for statistics (default: 500)
1766    #[serde(default)]
1767    pub sample_rows: Option<usize>,
1768    /// Return stats without sample values (default: true in token_dense profile)
1769    #[serde(default)]
1770    pub summary_only: Option<bool>,
1771}
1772
1773pub async fn sheet_statistics(
1774    state: Arc<AppState>,
1775    params: SheetStatisticsParams,
1776) -> Result<SheetStatisticsResponse> {
1777    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
1778    let config = state.config();
1779    let output_profile = config.output_profile();
1780    let summary_only = params
1781        .summary_only
1782        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
1783    let sheet_metrics = workbook.get_sheet_metrics_fast(&params.sheet_name)?;
1784    let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
1785    let stats = workbook.with_sheet(&params.sheet_name, |sheet| {
1786        stats::compute_sheet_statistics(sheet, sample_rows)
1787    })?;
1788    let mut numeric_columns = stats.numeric_columns;
1789    let mut text_columns = stats.text_columns;
1790
1791    if summary_only {
1792        for column in &mut numeric_columns {
1793            column.samples.clear();
1794        }
1795        for column in &mut text_columns {
1796            column.samples.clear();
1797        }
1798    }
1799
1800    let max_items = config.max_items();
1801    let max_payload_bytes = config.max_payload_bytes();
1802
1803    if let Some(max_items) = max_items {
1804        if numeric_columns.len() > max_items {
1805            numeric_columns.truncate(max_items);
1806        }
1807        if text_columns.len() > max_items {
1808            text_columns.truncate(max_items);
1809        }
1810    }
1811
1812    if let Some(max_bytes) = max_payload_bytes {
1813        let response_size = |numeric: &Vec<ColumnSummary>, text: &Vec<ColumnSummary>| {
1814            let response = SheetStatisticsResponse {
1815                workbook_id: workbook.id.clone(),
1816                workbook_short_id: workbook.short_id.clone(),
1817                sheet_name: params.sheet_name.clone(),
1818                row_count: sheet_metrics.metrics.row_count,
1819                column_count: sheet_metrics.metrics.column_count,
1820                density: stats.density,
1821                numeric_columns: numeric.clone(),
1822                text_columns: text.clone(),
1823                null_counts: stats.null_counts.clone(),
1824                duplicate_warnings: stats.duplicate_warnings.clone(),
1825            };
1826            serde_json::to_vec(&response)
1827                .map(|payload| payload.len())
1828                .unwrap_or(usize::MAX)
1829        };
1830
1831        let mut current_size = response_size(&numeric_columns, &text_columns);
1832        if current_size > max_bytes && !summary_only {
1833            for column in &mut numeric_columns {
1834                column.samples.clear();
1835            }
1836            for column in &mut text_columns {
1837                column.samples.clear();
1838            }
1839            current_size = response_size(&numeric_columns, &text_columns);
1840        }
1841
1842        while current_size > max_bytes && (!text_columns.is_empty() || !numeric_columns.is_empty())
1843        {
1844            if !text_columns.is_empty() {
1845                text_columns.pop();
1846            } else if !numeric_columns.is_empty() {
1847                numeric_columns.pop();
1848            }
1849            current_size = response_size(&numeric_columns, &text_columns);
1850        }
1851    }
1852
1853    Ok(SheetStatisticsResponse {
1854        workbook_id: workbook.id.clone(),
1855        workbook_short_id: workbook.short_id.clone(),
1856        sheet_name: params.sheet_name,
1857        row_count: sheet_metrics.metrics.row_count,
1858        column_count: sheet_metrics.metrics.column_count,
1859        density: stats.density,
1860        numeric_columns,
1861        text_columns,
1862        null_counts: stats.null_counts,
1863        duplicate_warnings: stats.duplicate_warnings,
1864    })
1865}
1866
1867fn address_in_range(address: &str, range: &str) -> bool {
1868    parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
1869        if let Some((col, row)) = parse_address(address) {
1870            col >= start_col && col <= end_col && row >= start_row && row <= end_row
1871        } else {
1872            false
1873        }
1874    })
1875}
1876
1877fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
1878    let mut parts = range.split(':');
1879    let start = parts.next()?;
1880    let end = parts.next().unwrap_or(start);
1881    let start_idx = parse_address(start)?;
1882    let end_idx = parse_address(end)?;
1883    Some((
1884        (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
1885        (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
1886    ))
1887}
1888
1889fn parse_address(address: &str) -> Option<(u32, u32)> {
1890    use umya_spreadsheet::helper::coordinate::index_from_coordinate;
1891    let (col, row, _, _) = index_from_coordinate(address);
1892    match (col, row) {
1893        (Some(c), Some(r)) => Some((c, r)),
1894        _ => None,
1895    }
1896}
1897
1898#[derive(Clone)]
1899struct TableTarget {
1900    sheet_name: String,
1901    table_name: Option<String>,
1902    range: ((u32, u32), (u32, u32)),
1903    header_hint: Option<u32>,
1904}
1905
1906fn resolve_table_target(
1907    workbook: &WorkbookContext,
1908    params: &ReadTableParams,
1909) -> Result<TableTarget> {
1910    if let Some(region_id) = params.region_id
1911        && let Some(sheet) = &params.sheet_name
1912        && let Ok(region) = workbook.detected_region(sheet, region_id)
1913    {
1914        return Ok(TableTarget {
1915            sheet_name: sheet.clone(),
1916            table_name: None,
1917            range: parse_range(&region.bounds).unwrap_or(((1, 1), (1, 1))),
1918            header_hint: region.header_row,
1919        });
1920    }
1921
1922    if let Some(table_name) = &params.table_name {
1923        let items = workbook.named_items()?;
1924        for item in items {
1925            if item.name.eq_ignore_ascii_case(table_name)
1926                || item
1927                    .name
1928                    .to_ascii_lowercase()
1929                    .contains(&table_name.to_ascii_lowercase())
1930            {
1931                let mut sheet_name = item
1932                    .sheet_name
1933                    .clone()
1934                    .or_else(|| params.sheet_name.clone())
1935                    .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1936                let refers_to = item.refers_to.trim_start_matches('=');
1937                let mut range_part = refers_to;
1938                if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1939                    sheet_name = sheet_part.trim_matches('\'').to_string();
1940                    range_part = rest;
1941                }
1942                if let Some(range) = parse_range(range_part) {
1943                    return Ok(TableTarget {
1944                        sheet_name,
1945                        table_name: Some(item.name.clone()),
1946                        range,
1947                        header_hint: if item.kind == NamedItemKind::Table {
1948                            Some(range.0.1)
1949                        } else {
1950                            None
1951                        },
1952                    });
1953                }
1954            }
1955        }
1956    }
1957
1958    let sheet_name = params
1959        .sheet_name
1960        .clone()
1961        .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1962
1963    if let Some(rng) = &params.range
1964        && let Some(range) = parse_range(rng)
1965    {
1966        return Ok(TableTarget {
1967            sheet_name,
1968            table_name: None,
1969            range,
1970            header_hint: None,
1971        });
1972    }
1973
1974    let metrics = workbook.get_sheet_metrics_fast(&sheet_name)?;
1975    let end_col = metrics.metrics.column_count.max(1);
1976    let end_row = metrics.metrics.row_count.max(1);
1977    Ok(TableTarget {
1978        sheet_name,
1979        table_name: None,
1980        range: ((1, 1), (end_col, end_row)),
1981        header_hint: None,
1982    })
1983}
1984
1985#[allow(clippy::too_many_arguments)]
1986fn extract_table_rows(
1987    sheet: &umya_spreadsheet::Worksheet,
1988    target: &TableTarget,
1989    header_row: Option<u32>,
1990    header_rows: Option<u32>,
1991    columns: Option<Vec<String>>,
1992    filters: Option<Vec<TableFilter>>,
1993    limit: usize,
1994    offset: usize,
1995    sample_mode: SampleMode,
1996) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1997    let ((start_col, start_row), (end_col, end_row)) = target.range;
1998    let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1999    if header_start < start_row {
2000        header_start = start_row;
2001    }
2002    if header_start > end_row {
2003        header_start = start_row;
2004    }
2005    let header_rows_count = header_rows.unwrap_or(1).max(1);
2006    let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
2007    let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
2008        resolve_columns(Some(cols), end_col).into_iter().collect()
2009    } else {
2010        (start_col..=end_col).collect()
2011    };
2012
2013    let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
2014    let mut all_rows: Vec<TableRow> = Vec::new();
2015    let mut total_rows: u32 = 0;
2016
2017    for row_idx in data_start_row..=end_row {
2018        let mut row = BTreeMap::new();
2019        for (i, col_idx) in column_indices.iter().enumerate() {
2020            let header = headers
2021                .get(i)
2022                .cloned()
2023                .unwrap_or_else(|| format!("Col{col_idx}"));
2024            let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
2025            row.insert(header, value);
2026        }
2027        if !row_passes_filters(&row, filters.as_ref()) {
2028            continue;
2029        }
2030        total_rows += 1;
2031        if matches!(sample_mode, SampleMode::First) && total_rows as usize > offset + limit {
2032            continue;
2033        }
2034        all_rows.push(row);
2035    }
2036
2037    let rows = sample_rows(all_rows, limit, offset, sample_mode);
2038
2039    Ok((headers, rows, total_rows))
2040}
2041
2042fn build_headers(
2043    sheet: &umya_spreadsheet::Worksheet,
2044    columns: &[u32],
2045    header_start: u32,
2046    header_rows: u32,
2047) -> Vec<String> {
2048    let mut headers = Vec::new();
2049    for col_idx in columns {
2050        let mut parts = Vec::new();
2051        for h in header_start..(header_start + header_rows) {
2052            let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
2053            if let Some(value) = sheet
2054                .get_cell((origin_col, origin_row))
2055                .and_then(cell_to_value)
2056            {
2057                match value {
2058                    CellValue::Text(ref s) if s.trim().is_empty() => {}
2059                    CellValue::Text(s) => parts.push(s),
2060                    CellValue::Number(n) => parts.push(n.to_string()),
2061                    CellValue::Bool(b) => parts.push(b.to_string()),
2062                    CellValue::Error(e) => parts.push(e),
2063                    CellValue::Date(d) => parts.push(d),
2064                }
2065            }
2066        }
2067        if parts.is_empty() {
2068            headers.push(crate::utils::column_number_to_name(*col_idx));
2069        } else {
2070            headers.push(parts.join(" / "));
2071        }
2072    }
2073
2074    if headers.iter().all(|h| h.trim().is_empty()) {
2075        return columns
2076            .iter()
2077            .map(|c| crate::utils::column_number_to_name(*c))
2078            .collect();
2079    }
2080
2081    dedupe_headers(headers)
2082}
2083
2084fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
2085    let mut seen: HashMap<String, u32> = HashMap::new();
2086    for h in headers.iter_mut() {
2087        let key = h.clone();
2088        if key.trim().is_empty() {
2089            continue;
2090        }
2091        let count = seen.entry(key.clone()).or_insert(0);
2092        if *count > 0 {
2093            h.push_str(&format!("_{}", *count + 1));
2094        }
2095        *count += 1;
2096    }
2097    headers
2098}
2099
2100fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
2101    if let Some(filters) = filters {
2102        for filter in filters {
2103            if let Some(value) = row.get(&filter.column) {
2104                match filter.op {
2105                    FilterOp::Eq => {
2106                        if !value_eq(value, &filter.value) {
2107                            return false;
2108                        }
2109                    }
2110                    FilterOp::Neq => {
2111                        if value_eq(value, &filter.value) {
2112                            return false;
2113                        }
2114                    }
2115                    FilterOp::Contains => {
2116                        if !value_contains(value, &filter.value) {
2117                            return false;
2118                        }
2119                    }
2120                    FilterOp::Gt => {
2121                        if !value_gt(value, &filter.value) {
2122                            return false;
2123                        }
2124                    }
2125                    FilterOp::Lt => {
2126                        if !value_lt(value, &filter.value) {
2127                            return false;
2128                        }
2129                    }
2130                    FilterOp::Gte => {
2131                        if !value_gte(value, &filter.value) {
2132                            return false;
2133                        }
2134                    }
2135                    FilterOp::Lte => {
2136                        if !value_lte(value, &filter.value) {
2137                            return false;
2138                        }
2139                    }
2140                    FilterOp::StartsWith => {
2141                        if !value_starts_with(value, &filter.value) {
2142                            return false;
2143                        }
2144                    }
2145                    FilterOp::EndsWith => {
2146                        if !value_ends_with(value, &filter.value) {
2147                            return false;
2148                        }
2149                    }
2150                    FilterOp::In => {
2151                        let list = filter
2152                            .value
2153                            .as_array()
2154                            .cloned()
2155                            .unwrap_or_else(|| vec![filter.value.clone()]);
2156                        if !list.iter().any(|cmp| value_eq(value, cmp)) {
2157                            return false;
2158                        }
2159                    }
2160                }
2161            }
2162        }
2163    }
2164    true
2165}
2166
2167fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2168    match (cell, cmp) {
2169        (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
2170        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2171            v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
2172        }
2173        (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
2174            .parse::<f64>()
2175            .map(|f| (*n - f).abs() < f64::EPSILON)
2176            .unwrap_or(false),
2177        (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
2178        (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
2179            t.eq_ignore_ascii_case("true") == *b
2180        }
2181        (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
2182        (None, serde_json::Value::Null) => true,
2183        _ => false,
2184    }
2185}
2186
2187fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2188    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2189        return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
2190    }
2191    false
2192}
2193
2194fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2195    match (cell, cmp) {
2196        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2197            v.as_f64().is_some_and(|f| *n > f)
2198        }
2199        _ => false,
2200    }
2201}
2202
2203fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2204    match (cell, cmp) {
2205        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2206            v.as_f64().is_some_and(|f| *n < f)
2207        }
2208        _ => false,
2209    }
2210}
2211
2212fn value_gte(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2213    match (cell, cmp) {
2214        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2215            v.as_f64().is_some_and(|f| *n >= f)
2216        }
2217        _ => false,
2218    }
2219}
2220
2221fn value_lte(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2222    match (cell, cmp) {
2223        (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2224            v.as_f64().is_some_and(|f| *n <= f)
2225        }
2226        _ => false,
2227    }
2228}
2229
2230fn value_starts_with(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2231    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2232        return s.to_ascii_lowercase().starts_with(&t.to_ascii_lowercase());
2233    }
2234    false
2235}
2236
2237fn value_ends_with(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2238    if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2239        return s.to_ascii_lowercase().ends_with(&t.to_ascii_lowercase());
2240    }
2241    false
2242}
2243
2244fn sample_rows(
2245    rows: Vec<TableRow>,
2246    limit: usize,
2247    offset: usize,
2248    mode: SampleMode,
2249) -> Vec<TableRow> {
2250    if rows.is_empty() {
2251        return rows;
2252    }
2253
2254    match mode {
2255        SampleMode::Distributed => {
2256            if limit == 0 {
2257                return Vec::new();
2258            }
2259            let mut indices = Vec::new();
2260            let span = rows.len().saturating_sub(1);
2261            let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
2262            let mut idx = offset;
2263            while idx < rows.len() && indices.len() < limit {
2264                indices.push(idx);
2265                idx = idx.saturating_add(step);
2266                if idx == indices.last().copied().unwrap_or(0) {
2267                    idx += 1;
2268                }
2269            }
2270            if indices.len() < limit {
2271                let last_idx = rows.len().saturating_sub(1);
2272                if !indices.contains(&last_idx) {
2273                    indices.push(last_idx);
2274                }
2275            }
2276            indices
2277                .into_iter()
2278                .filter_map(|i| rows.get(i).cloned())
2279                .collect()
2280        }
2281        SampleMode::Last => {
2282            let start = rows.len().saturating_sub(limit + offset);
2283            rows.into_iter().skip(start + offset).take(limit).collect()
2284        }
2285        SampleMode::First => rows.into_iter().skip(offset).take(limit).collect(),
2286    }
2287}
2288
2289fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
2290    let mut summaries = Vec::new();
2291    for header in headers {
2292        let mut nulls = 0u32;
2293        let mut distinct_set: HashSet<String> = HashSet::new();
2294        let mut values: Vec<f64> = Vec::new();
2295        let mut top_counts: HashMap<String, u32> = HashMap::new();
2296
2297        for row in rows {
2298            match row.get(header) {
2299                Some(Some(CellValue::Number(n))) => {
2300                    values.push(*n);
2301                    let key = n.to_string();
2302                    *top_counts.entry(key).or_default() += 1;
2303                }
2304                Some(Some(CellValue::Text(s))) => {
2305                    distinct_set.insert(s.clone());
2306                    *top_counts.entry(s.clone()).or_default() += 1;
2307                }
2308                Some(Some(CellValue::Bool(b))) => {
2309                    let key = b.to_string();
2310                    distinct_set.insert(key.clone());
2311                    *top_counts.entry(key).or_default() += 1;
2312                }
2313                Some(Some(CellValue::Date(d))) => {
2314                    distinct_set.insert(d.clone());
2315                    *top_counts.entry(d.clone()).or_default() += 1;
2316                }
2317                Some(Some(CellValue::Error(e))) => {
2318                    distinct_set.insert(e.clone());
2319                    *top_counts.entry(e.clone()).or_default() += 1;
2320                }
2321                _ => {
2322                    nulls += 1;
2323                }
2324            }
2325        }
2326
2327        let inferred_type = if !values.is_empty() {
2328            "number"
2329        } else if !distinct_set.is_empty() {
2330            "text"
2331        } else {
2332            "unknown"
2333        }
2334        .to_string();
2335
2336        let min = values.iter().cloned().reduce(f64::min);
2337        let max = values.iter().cloned().reduce(f64::max);
2338        let mean = if values.is_empty() {
2339            None
2340        } else {
2341            Some(values.iter().sum::<f64>() / values.len() as f64)
2342        };
2343
2344        let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
2345        top_values.sort_by(|a, b| b.1.cmp(&a.1));
2346        let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
2347
2348        summaries.push(ColumnTypeSummary {
2349            name: header.clone(),
2350            inferred_type,
2351            nulls,
2352            distinct: distinct_set.len() as u32,
2353            top_values,
2354            min,
2355            max,
2356            mean,
2357        });
2358    }
2359    summaries
2360}
2361
2362#[allow(clippy::too_many_arguments)]
2363fn collect_value_matches(
2364    sheet: &umya_spreadsheet::Worksheet,
2365    sheet_name: &str,
2366    mode: &FindMode,
2367    match_mode: MatchMode,
2368    direction: &LabelDirection,
2369    params: &FindValueParams,
2370    region: Option<&DetectedRegion>,
2371    default_bounds: ((u32, u32), (u32, u32)),
2372    offset: u32,
2373    limit: u32,
2374    seen_so_far: u32,
2375) -> Result<(Vec<FindValueMatch>, u32, bool)> {
2376    let mut results = Vec::new();
2377    let mut seen = seen_so_far;
2378    let regex = if match_mode == MatchMode::Regex {
2379        Regex::new(&params.query).ok()
2380    } else {
2381        None
2382    };
2383    let bounds = region
2384        .as_ref()
2385        .and_then(|r| parse_range(&r.bounds))
2386        .unwrap_or(default_bounds);
2387
2388    let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
2389
2390    let context_mode = params.context.unwrap_or_default();
2391    let include_neighbors = matches!(context_mode, FindContext::Neighbors | FindContext::Both);
2392    let include_row_context = matches!(context_mode, FindContext::Row | FindContext::Both);
2393    let context_width = params.context_width.unwrap_or(3).max(1);
2394
2395    for cell in sheet.get_cell_collection() {
2396        let coord = cell.get_coordinate();
2397        let col = *coord.get_col_num();
2398        let row = *coord.get_row_num();
2399        if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
2400            continue;
2401        }
2402        if params.search_headers_only && row != header_row {
2403            continue;
2404        }
2405
2406        let value = cell_to_value(cell);
2407        if let Some(ref allowed) = params.value_types
2408            && !value_type_matches(&value, allowed)
2409        {
2410            continue;
2411        }
2412        if matches!(mode, FindMode::Value) {
2413            if !value_matches(
2414                &value,
2415                &params.query,
2416                match_mode,
2417                params.case_sensitive,
2418                &regex,
2419            ) {
2420                continue;
2421            }
2422        } else if let Some(label) = &params.label {
2423            if !label_matches(cell, label, match_mode, params.case_sensitive, &regex) {
2424                continue;
2425            }
2426        } else {
2427            continue;
2428        }
2429
2430        if seen < offset {
2431            seen += 1;
2432            continue;
2433        }
2434
2435        if results.len() as u32 >= limit {
2436            return Ok((results, seen, true));
2437        }
2438
2439        let neighbors = if include_neighbors {
2440            collect_neighbors(sheet, row, col)
2441        } else {
2442            None
2443        };
2444        let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
2445            let target_value = match direction {
2446                LabelDirection::Right => sheet.get_cell((col + 1, row)),
2447                LabelDirection::Below => sheet.get_cell((col, row + 1)),
2448                LabelDirection::Any => sheet
2449                    .get_cell((col + 1, row))
2450                    .or_else(|| sheet.get_cell((col, row + 1))),
2451            }
2452            .and_then(cell_to_value);
2453            if target_value.is_none() {
2454                continue;
2455            }
2456            (
2457                Some(LabelHit {
2458                    label_address: coord.get_coordinate(),
2459                    label: label_from_cell(cell),
2460                }),
2461                target_value,
2462            )
2463        } else {
2464            (None, value.clone())
2465        };
2466
2467        let row_context = if include_row_context {
2468            build_row_context(sheet, row, col, context_width)
2469        } else {
2470            None
2471        };
2472
2473        results.push(FindValueMatch {
2474            address: coord.get_coordinate(),
2475            sheet_name: sheet_name.to_string(),
2476            value: match_value,
2477            row_context,
2478            neighbors,
2479            label_hit,
2480        });
2481        seen += 1;
2482    }
2483
2484    Ok((results, seen, false))
2485}
2486
2487fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
2488    cell_to_value(cell)
2489        .map(|v| match v {
2490            CellValue::Text(s) => s,
2491            CellValue::Number(n) => n.to_string(),
2492            CellValue::Bool(b) => b.to_string(),
2493            CellValue::Date(d) => d,
2494            CellValue::Error(e) => e,
2495        })
2496        .unwrap_or_else(|| cell.get_value().to_string())
2497}
2498
2499fn value_matches(
2500    value: &Option<CellValue>,
2501    query: &str,
2502    mode: MatchMode,
2503    case_sensitive: bool,
2504    regex: &Option<Regex>,
2505) -> bool {
2506    if value.is_none() {
2507        return false;
2508    }
2509    let haystack = cell_value_to_string_lower(value.clone().unwrap());
2510    let needle = if case_sensitive {
2511        query.to_string()
2512    } else {
2513        query.to_ascii_lowercase()
2514    };
2515
2516    match mode {
2517        MatchMode::Exact => haystack == needle,
2518        MatchMode::Prefix => haystack.starts_with(&needle),
2519        MatchMode::Regex => regex
2520            .as_ref()
2521            .map(|re| re.is_match(&haystack))
2522            .unwrap_or(false),
2523        MatchMode::Contains => haystack.contains(&needle),
2524    }
2525}
2526
2527fn label_matches(
2528    cell: &umya_spreadsheet::Cell,
2529    label: &str,
2530    mode: MatchMode,
2531    case_sensitive: bool,
2532    regex: &Option<Regex>,
2533) -> bool {
2534    let value = cell_to_value(cell);
2535    if value.is_none() {
2536        return false;
2537    }
2538    let haystack = cell_value_to_string_lower(value.unwrap());
2539    let needle = if case_sensitive {
2540        label.to_string()
2541    } else {
2542        label.to_ascii_lowercase()
2543    };
2544    match mode {
2545        MatchMode::Exact => haystack == needle,
2546        MatchMode::Prefix => haystack.starts_with(&needle),
2547        MatchMode::Regex => regex
2548            .as_ref()
2549            .map(|re| re.is_match(&haystack))
2550            .unwrap_or(false),
2551        MatchMode::Contains => haystack.contains(&needle),
2552    }
2553}
2554
2555fn value_type_matches(value: &Option<CellValue>, allowed: &[ValueTypeFilter]) -> bool {
2556    if value.is_none() {
2557        return allowed.contains(&ValueTypeFilter::Null);
2558    }
2559    match value.as_ref().unwrap() {
2560        CellValue::Text(_) => allowed.contains(&ValueTypeFilter::Text),
2561        CellValue::Number(_) => allowed.contains(&ValueTypeFilter::Number),
2562        CellValue::Bool(_) => allowed.contains(&ValueTypeFilter::Bool),
2563        CellValue::Date(_) => allowed.contains(&ValueTypeFilter::Date),
2564        CellValue::Error(_) => true,
2565    }
2566}
2567
2568fn collect_neighbors(
2569    sheet: &umya_spreadsheet::Worksheet,
2570    row: u32,
2571    col: u32,
2572) -> Option<NeighborValues> {
2573    Some(NeighborValues {
2574        left: if col > 1 {
2575            sheet.get_cell((col - 1, row)).and_then(cell_to_value)
2576        } else {
2577            None
2578        },
2579        right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
2580        up: if row > 1 {
2581            sheet.get_cell((col, row - 1)).and_then(cell_to_value)
2582        } else {
2583            None
2584        },
2585        down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
2586    })
2587}
2588
2589fn build_row_context(
2590    sheet: &umya_spreadsheet::Worksheet,
2591    row: u32,
2592    col: u32,
2593    width: u32,
2594) -> Option<RowContext> {
2595    let width = width.max(1);
2596    let half = width / 2;
2597    let max_col = sheet.get_highest_column().max(1);
2598    let start_col = col.saturating_sub(half).max(1);
2599    let end_col = (col + half).min(max_col);
2600
2601    let mut headers = Vec::new();
2602    let mut values = Vec::new();
2603
2604    for current_col in start_col..=end_col {
2605        let header_value = sheet
2606            .get_cell((current_col, 1u32))
2607            .and_then(cell_to_value)
2608            .map(|v| match v {
2609                CellValue::Text(s) => s,
2610                CellValue::Number(n) => n.to_string(),
2611                CellValue::Bool(b) => b.to_string(),
2612                CellValue::Date(d) => d,
2613                CellValue::Error(e) => e,
2614            })
2615            .unwrap_or_else(|| format!("Col{}", current_col));
2616        let value = sheet.get_cell((current_col, row)).and_then(cell_to_value);
2617        headers.push(header_value);
2618        values.push(value);
2619    }
2620
2621    Some(RowContext { headers, values })
2622}
2623
2624fn default_find_formula_limit() -> u32 {
2625    50
2626}
2627
2628#[derive(Debug, Deserialize, JsonSchema, Default)]
2629pub struct FindFormulaParams {
2630    /// Workbook ID or fork ID
2631    #[serde(alias = "workbook_id")]
2632    pub workbook_or_fork_id: WorkbookId,
2633    /// Text to search for in formulas (e.g., "SUM(", "VLOOKUP")
2634    pub query: String,
2635    /// Limit to specific sheet (searches all if omitted)
2636    pub sheet_name: Option<String>,
2637    /// Case-sensitive matching (default: false)
2638    #[serde(default)]
2639    pub case_sensitive: bool,
2640    /// Include header row and cell context (default: false)
2641    #[serde(default)]
2642    pub include_context: bool,
2643    /// Maximum matches to return (default: 50)
2644    #[serde(default = "default_find_formula_limit")]
2645    pub limit: u32,
2646    /// Offset for pagination; use next_offset from previous response
2647    #[serde(default)]
2648    pub offset: u32,
2649    /// Rows of context to include above/below (requires include_context=true)
2650    #[serde(default)]
2651    pub context_rows: Option<u32>,
2652    /// Columns of context to include left/right (requires include_context=true)
2653    #[serde(default)]
2654    pub context_cols: Option<u32>,
2655}
2656
2657pub async fn find_formula(
2658    state: Arc<AppState>,
2659    params: FindFormulaParams,
2660) -> Result<FindFormulaResponse> {
2661    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2662    let query = if params.case_sensitive {
2663        params.query.clone()
2664    } else {
2665        params.query.to_ascii_lowercase()
2666    };
2667
2668    let sheet_names: Vec<String> = if let Some(sheet) = &params.sheet_name {
2669        vec![sheet.clone()]
2670    } else {
2671        workbook.sheet_names()
2672    };
2673
2674    let limit = params.limit.clamp(1, 500);
2675    let offset = params.offset;
2676    let context_rows = params.context_rows.unwrap_or(1);
2677    let context_cols = params.context_cols.unwrap_or(1);
2678
2679    let mut matches = Vec::new();
2680    let mut seen: u32 = 0;
2681    let mut truncated = false;
2682
2683    for sheet_name in sheet_names {
2684        let (sheet_matches, sheet_seen, sheet_truncated) =
2685            workbook.with_sheet(&sheet_name, |sheet| {
2686                collect_formula_matches(
2687                    sheet,
2688                    &sheet_name,
2689                    &query,
2690                    params.case_sensitive,
2691                    params.include_context,
2692                    context_rows,
2693                    context_cols,
2694                    offset,
2695                    limit,
2696                    seen,
2697                )
2698            })?;
2699
2700        seen = sheet_seen;
2701        truncated |= sheet_truncated;
2702        matches.extend(sheet_matches);
2703
2704        if truncated {
2705            break;
2706        }
2707    }
2708
2709    let next_offset = if truncated {
2710        Some(offset.saturating_add(matches.len() as u32))
2711    } else {
2712        None
2713    };
2714
2715    let response = FindFormulaResponse {
2716        workbook_id: workbook.id.clone(),
2717        workbook_short_id: workbook.short_id.clone(),
2718        matches,
2719        next_offset,
2720    };
2721    Ok(response)
2722}
2723
2724#[derive(Debug, Deserialize, JsonSchema)]
2725pub struct ScanVolatilesParams {
2726    /// Workbook ID or fork ID
2727    #[serde(alias = "workbook_id")]
2728    pub workbook_or_fork_id: WorkbookId,
2729    /// Limit to specific sheet (scans all if omitted)
2730    pub sheet_name: Option<String>,
2731    /// Return counts only without addresses (default: true in token_dense profile)
2732    #[serde(default)]
2733    pub summary_only: Option<bool>,
2734    /// Include cell addresses for each volatile (default: !summary_only)
2735    #[serde(default)]
2736    pub include_addresses: Option<bool>,
2737    /// Maximum addresses to include per volatile function (default: 15)
2738    #[serde(default)]
2739    pub addresses_limit: Option<u32>,
2740}
2741
2742pub async fn scan_volatiles(
2743    state: Arc<AppState>,
2744    params: ScanVolatilesParams,
2745) -> Result<VolatileScanResponse> {
2746    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2747    let config = state.config();
2748    let output_profile = config.output_profile();
2749    let summary_only = params
2750        .summary_only
2751        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
2752    let include_addresses = params.include_addresses.unwrap_or(!summary_only);
2753    let addresses_limit = params.addresses_limit.unwrap_or(15);
2754    let max_items = config.max_items();
2755    let max_payload_bytes = config.max_payload_bytes();
2756
2757    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
2758        vec![sheet.clone()]
2759    } else {
2760        workbook.sheet_names()
2761    };
2762
2763    let mut items = Vec::new();
2764    for sheet_name in target_sheets {
2765        let graph = workbook.formula_graph(&sheet_name)?;
2766        for group in graph.groups() {
2767            if !group.is_volatile {
2768                continue;
2769            }
2770
2771            if summary_only {
2772                items.push(VolatileScanEntry {
2773                    address: String::new(),
2774                    sheet_name: sheet_name.clone(),
2775                    function: "volatile".to_string(),
2776                    note: Some(format!("Count: {}", group.addresses.len())),
2777                });
2778            } else if include_addresses {
2779                for address in group.addresses.iter().take(addresses_limit as usize) {
2780                    items.push(VolatileScanEntry {
2781                        address: address.clone(),
2782                        sheet_name: sheet_name.clone(),
2783                        function: "volatile".to_string(),
2784                        note: Some(group.formula.clone()),
2785                    });
2786                }
2787            } else {
2788                items.push(VolatileScanEntry {
2789                    address: String::new(),
2790                    sheet_name: sheet_name.clone(),
2791                    function: "volatile".to_string(),
2792                    note: Some(group.formula.clone()),
2793                });
2794            }
2795        }
2796    }
2797
2798    let total_items = items.len();
2799
2800    if let Some(max_items) = max_items
2801        && items.len() > max_items
2802    {
2803        items.truncate(max_items);
2804    }
2805
2806    if let Some(max_bytes) = max_payload_bytes {
2807        let item_limit = cap_rows_by_payload_bytes(items.len(), Some(max_bytes), |count| {
2808            let response = VolatileScanResponse {
2809                workbook_id: workbook.id.clone(),
2810                workbook_short_id: workbook.short_id.clone(),
2811                items: items[..count].to_vec(),
2812                next_offset: None,
2813            };
2814            serde_json::to_vec(&response)
2815                .map(|payload| payload.len())
2816                .unwrap_or(usize::MAX)
2817        });
2818
2819        if item_limit < items.len() {
2820            items.truncate(item_limit);
2821        }
2822    }
2823
2824    let next_offset = if items.len() < total_items {
2825        Some(items.len() as u32)
2826    } else {
2827        None
2828    };
2829
2830    let response = VolatileScanResponse {
2831        workbook_id: workbook.id.clone(),
2832        workbook_short_id: workbook.short_id.clone(),
2833        items,
2834        next_offset,
2835    };
2836    Ok(response)
2837}
2838
2839#[derive(Debug, Deserialize, JsonSchema)]
2840pub struct WorkbookStyleSummaryParams {
2841    /// Workbook ID or fork ID
2842    #[serde(alias = "workbook_id")]
2843    pub workbook_or_fork_id: WorkbookId,
2844    /// Maximum distinct styles to return (default: 50)
2845    pub max_styles: Option<u32>,
2846    /// Maximum conditional format rules to return (default: 20)
2847    pub max_conditional_formats: Option<u32>,
2848    /// Maximum cells to scan per sheet (default: 10000)
2849    pub max_cells_scan: Option<u32>,
2850    /// Return counts and tags only, no descriptors (default: true in token_dense profile)
2851    #[serde(default)]
2852    pub summary_only: Option<bool>,
2853    /// Include full style descriptors (fonts, fills, borders)
2854    #[serde(default)]
2855    pub include_descriptor: Option<bool>,
2856    /// Include example cell addresses for each style
2857    #[serde(default)]
2858    pub include_example_cells: Option<bool>,
2859    /// Include workbook theme colors
2860    #[serde(default)]
2861    pub include_theme: Option<bool>,
2862    /// Include conditional formatting rules
2863    #[serde(default)]
2864    pub include_conditional_formats: Option<bool>,
2865}
2866
2867#[derive(Debug)]
2868struct WorkbookStyleAccum {
2869    descriptor: StyleDescriptor,
2870    occurrences: u32,
2871    tags: HashSet<String>,
2872    example_cells: Vec<String>,
2873}
2874
2875impl WorkbookStyleAccum {
2876    fn new(descriptor: StyleDescriptor) -> Self {
2877        Self {
2878            descriptor,
2879            occurrences: 0,
2880            tags: HashSet::new(),
2881            example_cells: Vec::new(),
2882        }
2883    }
2884}
2885
2886pub async fn workbook_style_summary(
2887    state: Arc<AppState>,
2888    params: WorkbookStyleSummaryParams,
2889) -> Result<WorkbookStyleSummaryResponse> {
2890    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
2891    let config = state.config();
2892    let output_profile = config.output_profile();
2893    let summary_only = params
2894        .summary_only
2895        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
2896    let include_descriptor = params.include_descriptor.unwrap_or(!summary_only);
2897    let include_example_cells = params.include_example_cells.unwrap_or(!summary_only);
2898    let include_theme = params.include_theme.unwrap_or(!summary_only);
2899    let include_conditional_formats = params.include_conditional_formats.unwrap_or(!summary_only);
2900    let max_payload_bytes = config.max_payload_bytes();
2901    let sheet_names = workbook.sheet_names();
2902
2903    const STYLE_EXAMPLE_LIMIT: usize = 5;
2904    const STYLE_LIMIT_DEFAULT: usize = 200;
2905    const CF_LIMIT_DEFAULT: usize = 200;
2906    const CELL_SCAN_LIMIT_DEFAULT: usize = 500_000;
2907
2908    let style_limit = params
2909        .max_styles
2910        .map(|v| v as usize)
2911        .unwrap_or(STYLE_LIMIT_DEFAULT);
2912    let style_limit = config
2913        .max_items()
2914        .map(|limit| style_limit.min(limit))
2915        .unwrap_or(style_limit);
2916    let cf_limit = params
2917        .max_conditional_formats
2918        .map(|v| v as usize)
2919        .unwrap_or(CF_LIMIT_DEFAULT);
2920    let cf_limit = config
2921        .max_items()
2922        .map(|limit| cf_limit.min(limit))
2923        .unwrap_or(cf_limit);
2924    let cell_scan_limit = params
2925        .max_cells_scan
2926        .map(|v| v as usize)
2927        .unwrap_or(CELL_SCAN_LIMIT_DEFAULT);
2928
2929    let mut acc: HashMap<String, WorkbookStyleAccum> = HashMap::new();
2930    let mut scanned_cells: usize = 0;
2931    let mut scan_truncated = false;
2932
2933    for sheet_name in &sheet_names {
2934        if scan_truncated {
2935            break;
2936        }
2937        workbook.with_sheet(sheet_name, |sheet| {
2938            for cell in sheet.get_cell_collection() {
2939                if scanned_cells >= cell_scan_limit {
2940                    scan_truncated = true;
2941                    break;
2942                }
2943                scanned_cells += 1;
2944
2945                let address = cell.get_coordinate().get_coordinate().to_string();
2946                let descriptor = crate::styles::descriptor_from_style(cell.get_style());
2947                let style_id = crate::styles::stable_style_id(&descriptor);
2948
2949                let entry = acc
2950                    .entry(style_id.clone())
2951                    .or_insert_with(|| WorkbookStyleAccum::new(descriptor.clone()));
2952                entry.occurrences += 1;
2953                if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
2954                    entry.example_cells.push(format!("{sheet_name}!{address}"));
2955                }
2956
2957                if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
2958                    for tag in tagging.tags {
2959                        entry.tags.insert(tag);
2960                    }
2961                }
2962            }
2963        })?;
2964    }
2965
2966    let total_styles = acc.len() as u32;
2967    let mut styles: Vec<WorkbookStyleUsage> = acc
2968        .into_iter()
2969        .map(|(style_id, entry)| {
2970            let mut tags: Vec<String> = entry.tags.into_iter().collect();
2971            tags.sort();
2972            WorkbookStyleUsage {
2973                style_id,
2974                occurrences: entry.occurrences,
2975                tags,
2976                example_cells: if include_example_cells {
2977                    entry.example_cells
2978                } else {
2979                    Vec::new()
2980                },
2981                descriptor: if include_descriptor {
2982                    Some(entry.descriptor)
2983                } else {
2984                    None
2985                },
2986            }
2987        })
2988        .collect();
2989
2990    styles.sort_by(|a, b| {
2991        b.occurrences
2992            .cmp(&a.occurrences)
2993            .then_with(|| a.style_id.cmp(&b.style_id))
2994    });
2995
2996    let inferred_default_style_id = styles.first().map(|s| s.style_id.clone());
2997    let mut inferred_default_font = styles
2998        .first()
2999        .and_then(|s| s.descriptor.as_ref().and_then(|d| d.font.clone()));
3000
3001    let mut styles_truncated = if styles.len() > style_limit {
3002        styles.truncate(style_limit);
3003        true
3004    } else {
3005        false
3006    };
3007
3008    let theme = workbook.with_spreadsheet(|book| {
3009        let theme = book.get_theme();
3010        let elements = theme.get_theme_elements();
3011        let scheme = elements.get_color_scheme();
3012        let mut colors = BTreeMap::new();
3013
3014        let mut insert_color = |name: &str, value: String| {
3015            if !value.trim().is_empty() {
3016                colors.insert(name.to_string(), value);
3017            }
3018        };
3019
3020        insert_color("dk1", scheme.get_dk1().get_val());
3021        insert_color("lt1", scheme.get_lt1().get_val());
3022        insert_color("dk2", scheme.get_dk2().get_val());
3023        insert_color("lt2", scheme.get_lt2().get_val());
3024        insert_color("accent1", scheme.get_accent1().get_val());
3025        insert_color("accent2", scheme.get_accent2().get_val());
3026        insert_color("accent3", scheme.get_accent3().get_val());
3027        insert_color("accent4", scheme.get_accent4().get_val());
3028        insert_color("accent5", scheme.get_accent5().get_val());
3029        insert_color("accent6", scheme.get_accent6().get_val());
3030        insert_color("hlink", scheme.get_hlink().get_val());
3031        insert_color("fol_hlink", scheme.get_fol_hlink().get_val());
3032
3033        let font_scheme = elements.get_font_scheme();
3034        let major = font_scheme.get_major_font();
3035        let minor = font_scheme.get_minor_font();
3036        let font_scheme_summary = ThemeFontSchemeSummary {
3037            major_latin: Some(major.get_latin_font().get_typeface().to_string())
3038                .filter(|s| !s.trim().is_empty()),
3039            major_east_asian: Some(major.get_east_asian_font().get_typeface().to_string())
3040                .filter(|s| !s.trim().is_empty()),
3041            major_complex_script: Some(major.get_complex_script_font().get_typeface().to_string())
3042                .filter(|s| !s.trim().is_empty()),
3043            minor_latin: Some(minor.get_latin_font().get_typeface().to_string())
3044                .filter(|s| !s.trim().is_empty()),
3045            minor_east_asian: Some(minor.get_east_asian_font().get_typeface().to_string())
3046                .filter(|s| !s.trim().is_empty()),
3047            minor_complex_script: Some(minor.get_complex_script_font().get_typeface().to_string())
3048                .filter(|s| !s.trim().is_empty()),
3049        };
3050
3051        ThemeSummary {
3052            name: Some(theme.get_name().to_string()).filter(|s| !s.trim().is_empty()),
3053            colors,
3054            font_scheme: font_scheme_summary,
3055        }
3056    })?;
3057
3058    if inferred_default_font.is_none()
3059        && let Some(name) = theme
3060            .font_scheme
3061            .minor_latin
3062            .clone()
3063            .or_else(|| theme.font_scheme.major_latin.clone())
3064    {
3065        inferred_default_font = Some(FontDescriptor {
3066            name: Some(name),
3067            size: None,
3068            bold: None,
3069            italic: None,
3070            underline: None,
3071            strikethrough: None,
3072            color: None,
3073        });
3074    }
3075
3076    let theme = if include_theme { Some(theme) } else { None };
3077
3078    let mut conditional_formats: Vec<ConditionalFormatSummary> = Vec::new();
3079    let mut conditional_formats_truncated = false;
3080    if include_conditional_formats {
3081        use umya_spreadsheet::structs::EnumTrait;
3082        for sheet_name in &sheet_names {
3083            if conditional_formats_truncated {
3084                break;
3085            }
3086            workbook.with_sheet(sheet_name, |sheet| {
3087                for cf in sheet.get_conditional_formatting_collection() {
3088                    if conditional_formats.len() >= cf_limit {
3089                        conditional_formats_truncated = true;
3090                        break;
3091                    }
3092                    let range = cf.get_sequence_of_references().get_sqref().to_string();
3093                    let mut types: HashSet<String> = HashSet::new();
3094                    for rule in cf.get_conditional_collection() {
3095                        types.insert(rule.get_type().get_value_string().to_string());
3096                    }
3097                    let mut rule_types: Vec<String> = types.into_iter().collect();
3098                    rule_types.sort();
3099                    conditional_formats.push(ConditionalFormatSummary {
3100                        sheet_name: sheet_name.clone(),
3101                        range,
3102                        rule_types,
3103                        rule_count: cf.get_conditional_collection().len() as u32,
3104                    });
3105                }
3106            })?;
3107        }
3108    }
3109
3110    let mut notes: Vec<String> = Vec::new();
3111    if scan_truncated {
3112        notes.push(format!(
3113            "Stopped scanning after {cell_scan_limit} cells; style counts may be incomplete."
3114        ));
3115    }
3116    notes.push(
3117        "Named styles are not directly exposed by umya-spreadsheet; styles here are inferred from cell formatting."
3118            .to_string(),
3119    );
3120
3121    if let Some(max_bytes) = max_payload_bytes {
3122        let style_limit = cap_rows_by_payload_bytes(styles.len(), Some(max_bytes), |count| {
3123            let response = WorkbookStyleSummaryResponse {
3124                workbook_id: workbook.id.clone(),
3125                workbook_short_id: workbook.short_id.clone(),
3126                theme: theme.clone(),
3127                inferred_default_style_id: inferred_default_style_id.clone(),
3128                inferred_default_font: inferred_default_font.clone(),
3129                styles: styles[..count].to_vec(),
3130                total_styles,
3131                styles_truncated: false,
3132                conditional_formats: conditional_formats.clone(),
3133                conditional_formats_truncated,
3134                scan_truncated,
3135                notes: notes.clone(),
3136            };
3137            serde_json::to_vec(&response)
3138                .map(|payload| payload.len())
3139                .unwrap_or(usize::MAX)
3140        });
3141
3142        if style_limit < styles.len() {
3143            styles.truncate(style_limit);
3144            styles_truncated = true;
3145        }
3146
3147        if !conditional_formats.is_empty() {
3148            let cf_limit =
3149                cap_rows_by_payload_bytes(conditional_formats.len(), Some(max_bytes), |count| {
3150                    let response = WorkbookStyleSummaryResponse {
3151                        workbook_id: workbook.id.clone(),
3152                        workbook_short_id: workbook.short_id.clone(),
3153                        theme: theme.clone(),
3154                        inferred_default_style_id: inferred_default_style_id.clone(),
3155                        inferred_default_font: inferred_default_font.clone(),
3156                        styles: styles.clone(),
3157                        total_styles,
3158                        styles_truncated,
3159                        conditional_formats: conditional_formats[..count].to_vec(),
3160                        conditional_formats_truncated: false,
3161                        scan_truncated,
3162                        notes: notes.clone(),
3163                    };
3164                    serde_json::to_vec(&response)
3165                        .map(|payload| payload.len())
3166                        .unwrap_or(usize::MAX)
3167                });
3168
3169            if cf_limit < conditional_formats.len() {
3170                conditional_formats.truncate(cf_limit);
3171                conditional_formats_truncated = true;
3172            }
3173        }
3174    }
3175
3176    Ok(WorkbookStyleSummaryResponse {
3177        workbook_id: workbook.id.clone(),
3178        workbook_short_id: workbook.short_id.clone(),
3179        theme,
3180        inferred_default_style_id,
3181        inferred_default_font,
3182        styles,
3183        total_styles,
3184        styles_truncated,
3185        conditional_formats,
3186        conditional_formats_truncated,
3187        scan_truncated,
3188        notes,
3189    })
3190}
3191
3192#[derive(Debug, Deserialize, JsonSchema)]
3193pub struct SheetStylesParams {
3194    /// Workbook ID or fork ID
3195    #[serde(alias = "workbook_id")]
3196    pub workbook_or_fork_id: WorkbookId,
3197    /// Sheet name
3198    pub sheet_name: String,
3199    /// Limit scope: use range (e.g., "A1:D100") or region_id
3200    #[serde(default)]
3201    pub scope: Option<SheetStylesScope>,
3202    /// Granularity for style grouping
3203    #[serde(default)]
3204    pub granularity: Option<StyleGranularity>,
3205    /// Maximum style entries to return (default: 100)
3206    #[serde(default)]
3207    pub max_items: Option<usize>,
3208    /// Return counts and tags only (default: true in token_dense profile)
3209    #[serde(default)]
3210    pub summary_only: Option<bool>,
3211    /// Include full style descriptors (fonts, fills, borders)
3212    #[serde(default)]
3213    pub include_descriptor: Option<bool>,
3214    /// Include cell ranges for each style
3215    #[serde(default)]
3216    pub include_ranges: Option<bool>,
3217    /// Include example cell addresses
3218    #[serde(default)]
3219    pub include_example_cells: Option<bool>,
3220}
3221
3222#[derive(Debug, Deserialize, JsonSchema)]
3223#[serde(tag = "kind", rename_all = "snake_case")]
3224pub enum SheetStylesScope {
3225    Range { range: String },
3226    Region { region_id: u32 },
3227}
3228
3229#[derive(Debug)]
3230struct StyleAccum {
3231    descriptor: StyleDescriptor,
3232    occurrences: u32,
3233    tags: HashSet<String>,
3234    example_cells: Vec<String>,
3235    positions: Vec<(u32, u32)>,
3236}
3237
3238impl StyleAccum {
3239    fn new(descriptor: StyleDescriptor) -> Self {
3240        Self {
3241            descriptor,
3242            occurrences: 0,
3243            tags: HashSet::new(),
3244            example_cells: Vec::new(),
3245            positions: Vec::new(),
3246        }
3247    }
3248}
3249
3250pub async fn sheet_styles(
3251    state: Arc<AppState>,
3252    params: SheetStylesParams,
3253) -> Result<SheetStylesResponse> {
3254    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3255    let config = state.config();
3256    let output_profile = config.output_profile();
3257    let summary_only = params
3258        .summary_only
3259        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
3260    let include_descriptor = params.include_descriptor.unwrap_or(!summary_only);
3261    let include_ranges = params.include_ranges.unwrap_or(!summary_only);
3262    let include_example_cells = params.include_example_cells.unwrap_or(!summary_only);
3263    const STYLE_EXAMPLE_LIMIT: usize = 5;
3264    const STYLE_RANGE_LIMIT: usize = 50;
3265    const STYLE_LIMIT: usize = 200;
3266    const MAX_MAX_ITEMS: usize = 5000;
3267
3268    let max_payload_bytes = config.max_payload_bytes();
3269    let style_limit = config
3270        .max_items()
3271        .map(|limit| STYLE_LIMIT.min(limit))
3272        .unwrap_or(STYLE_LIMIT);
3273
3274    let metrics = workbook.get_sheet_metrics_fast(&params.sheet_name)?;
3275    let full_bounds = (
3276        (1, 1),
3277        (
3278            metrics.metrics.column_count.max(1),
3279            metrics.metrics.row_count.max(1),
3280        ),
3281    );
3282
3283    let bounds = match &params.scope {
3284        Some(SheetStylesScope::Range { range }) => {
3285            parse_range(range).ok_or_else(|| anyhow!("invalid range: {}", range))?
3286        }
3287        Some(SheetStylesScope::Region { region_id }) => {
3288            let region = workbook.detected_region(&params.sheet_name, *region_id)?;
3289            parse_range(&region.bounds)
3290                .ok_or_else(|| anyhow!("invalid region bounds: {}", region.bounds))?
3291        }
3292        None => full_bounds,
3293    };
3294
3295    let granularity = params.granularity.unwrap_or_default();
3296
3297    let max_items = params
3298        .max_items
3299        .unwrap_or(STYLE_RANGE_LIMIT)
3300        .clamp(1, MAX_MAX_ITEMS);
3301
3302    let (mut styles, total_styles, mut styles_truncated) =
3303        workbook.with_sheet(&params.sheet_name, |sheet| {
3304            let mut acc: HashMap<String, StyleAccum> = HashMap::new();
3305
3306            for cell in sheet.get_cell_collection() {
3307                let address = cell.get_coordinate().get_coordinate().to_string();
3308                let Some((col, row)) = parse_address(&address) else {
3309                    continue;
3310                };
3311                if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
3312                    continue;
3313                }
3314
3315                let descriptor = crate::styles::descriptor_from_style(cell.get_style());
3316                let style_id = crate::styles::stable_style_id(&descriptor);
3317
3318                let entry = acc
3319                    .entry(style_id.clone())
3320                    .or_insert_with(|| StyleAccum::new(descriptor.clone()));
3321                entry.occurrences += 1;
3322                if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
3323                    entry.example_cells.push(address.clone());
3324                }
3325
3326                if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
3327                    for tag in tagging.tags {
3328                        entry.tags.insert(tag);
3329                    }
3330                }
3331
3332                entry.positions.push((row, col));
3333            }
3334
3335            let mut summaries: Vec<StyleSummary> = acc
3336                .into_iter()
3337                .map(|(style_id, mut entry)| {
3338                    entry.positions.sort_unstable();
3339                    entry.positions.dedup();
3340
3341                    let (cell_ranges, ranges_truncated) = if include_ranges {
3342                        if granularity == StyleGranularity::Cells {
3343                            let mut out = Vec::new();
3344                            for (row, col) in entry.positions.iter().take(max_items) {
3345                                out.push(crate::utils::cell_address(*col, *row));
3346                            }
3347                            (out, entry.positions.len() > max_items)
3348                        } else {
3349                            crate::styles::compress_positions_to_ranges(&entry.positions, max_items)
3350                        }
3351                    } else {
3352                        (Vec::new(), false)
3353                    };
3354
3355                    StyleSummary {
3356                        style_id,
3357                        occurrences: entry.occurrences,
3358                        tags: entry.tags.into_iter().collect(),
3359                        example_cells: if include_example_cells {
3360                            entry.example_cells
3361                        } else {
3362                            Vec::new()
3363                        },
3364                        descriptor: if include_descriptor {
3365                            Some(entry.descriptor)
3366                        } else {
3367                            None
3368                        },
3369                        cell_ranges,
3370                        ranges_truncated,
3371                    }
3372                })
3373                .collect();
3374
3375            summaries.sort_by(|a, b| {
3376                b.occurrences
3377                    .cmp(&a.occurrences)
3378                    .then_with(|| a.style_id.cmp(&b.style_id))
3379            });
3380
3381            let total = summaries.len() as u32;
3382            let truncated = if summaries.len() > style_limit {
3383                summaries.truncate(style_limit);
3384                true
3385            } else {
3386                false
3387            };
3388
3389            Ok::<_, anyhow::Error>((summaries, total, truncated))
3390        })??;
3391
3392    if let Some(max_bytes) = max_payload_bytes {
3393        let row_limit = cap_rows_by_payload_bytes(styles.len(), Some(max_bytes), |count| {
3394            let response = SheetStylesResponse {
3395                workbook_id: workbook.id.clone(),
3396                workbook_short_id: workbook.short_id.clone(),
3397                sheet_name: params.sheet_name.clone(),
3398                styles: styles[..count].to_vec(),
3399                conditional_rules: Vec::new(),
3400                total_styles,
3401                styles_truncated: false,
3402            };
3403            serde_json::to_vec(&response)
3404                .map(|payload| payload.len())
3405                .unwrap_or(usize::MAX)
3406        });
3407        if row_limit < styles.len() {
3408            styles.truncate(row_limit);
3409            styles_truncated = true;
3410        }
3411    }
3412
3413    Ok(SheetStylesResponse {
3414        workbook_id: workbook.id.clone(),
3415        workbook_short_id: workbook.short_id.clone(),
3416        sheet_name: params.sheet_name.clone(),
3417        styles,
3418        conditional_rules: Vec::new(),
3419        total_styles,
3420        styles_truncated,
3421    })
3422}
3423
3424pub async fn range_values(
3425    state: Arc<AppState>,
3426    params: RangeValuesParams,
3427) -> Result<RangeValuesResponse> {
3428    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3429    let config = state.config();
3430    let output_profile = config.output_profile();
3431    let format = params.format.unwrap_or(match output_profile {
3432        OutputProfile::TokenDense => TableOutputFormat::Values,
3433        OutputProfile::Verbose => TableOutputFormat::Json,
3434    });
3435    let include_headers = params.include_headers.unwrap_or(false);
3436    if let Some(page_size) = params.page_size
3437        && page_size == 0
3438    {
3439        return Err(anyhow!("page_size must be greater than zero"));
3440    }
3441    let max_cells = config.max_cells();
3442    let max_payload_bytes = config.max_payload_bytes();
3443    #[cfg(feature = "recalc")]
3444    let requested_bounds: Vec<((u32, u32), (u32, u32))> = params
3445        .ranges
3446        .iter()
3447        .filter_map(|r| parse_range(r))
3448        .collect();
3449
3450    #[cfg(feature = "recalc")]
3451    let (values, has_formula_in_target) = workbook.with_sheet(&params.sheet_name, |sheet| {
3452        let has_formula_in_target = sheet_has_formula_in_bounds(sheet, &requested_bounds);
3453        let values = params
3454            .ranges
3455            .iter()
3456            .filter_map(|range| {
3457                parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
3458                    let total_rows = (end_row - start_row + 1) as usize;
3459                    let total_cols = (end_col - start_col + 1) as usize;
3460                    let mut row_limit = total_rows;
3461                    if let Some(page_size) = params.page_size {
3462                        row_limit = row_limit.min(page_size as usize);
3463                    }
3464
3465                    let mut rows = Vec::new();
3466                    for r in start_row..=end_row {
3467                        if rows.len() >= row_limit {
3468                            break;
3469                        }
3470                        let mut row_vals = Vec::new();
3471                        for c in start_col..=end_col {
3472                            if include_headers && r == start_row && start_row == 1 {
3473                                row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
3474                            } else {
3475                                row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
3476                            }
3477                        }
3478                        rows.push(row_vals);
3479                    }
3480
3481                    let mut row_limit = cap_rows_by_cells(rows.len(), total_cols, max_cells);
3482                    if row_limit > 0 {
3483                        row_limit =
3484                            cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3485                                let entry =
3486                                    build_range_values_entry(format, range, &rows[..count], None);
3487                                serde_json::to_vec(&entry)
3488                                    .map(|payload| payload.len())
3489                                    .unwrap_or(usize::MAX)
3490                            });
3491                    }
3492
3493                    if row_limit < rows.len() {
3494                        rows.truncate(row_limit);
3495                    }
3496
3497                    let next_start_row = if rows.len() < total_rows {
3498                        Some(start_row + rows.len() as u32)
3499                    } else {
3500                        None
3501                    };
3502
3503                    build_range_values_entry(format, range, &rows, next_start_row)
3504                })
3505            })
3506            .collect();
3507
3508        Ok::<_, anyhow::Error>((values, has_formula_in_target))
3509    })??;
3510
3511    #[cfg(not(feature = "recalc"))]
3512    let values = workbook.with_sheet(&params.sheet_name, |sheet| {
3513        let values = params
3514            .ranges
3515            .iter()
3516            .filter_map(|range| {
3517                parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
3518                    let total_rows = (end_row - start_row + 1) as usize;
3519                    let total_cols = (end_col - start_col + 1) as usize;
3520                    let mut row_limit = total_rows;
3521                    if let Some(page_size) = params.page_size {
3522                        row_limit = row_limit.min(page_size as usize);
3523                    }
3524
3525                    let mut rows = Vec::new();
3526                    for r in start_row..=end_row {
3527                        if rows.len() >= row_limit {
3528                            break;
3529                        }
3530                        let mut row_vals = Vec::new();
3531                        for c in start_col..=end_col {
3532                            if include_headers && r == start_row && start_row == 1 {
3533                                row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
3534                            } else {
3535                                row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
3536                            }
3537                        }
3538                        rows.push(row_vals);
3539                    }
3540
3541                    let mut row_limit = cap_rows_by_cells(rows.len(), total_cols, max_cells);
3542                    if row_limit > 0 {
3543                        row_limit =
3544                            cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3545                                let entry =
3546                                    build_range_values_entry(format, range, &rows[..count], None);
3547                                serde_json::to_vec(&entry)
3548                                    .map(|payload| payload.len())
3549                                    .unwrap_or(usize::MAX)
3550                            });
3551                    }
3552
3553                    if row_limit < rows.len() {
3554                        rows.truncate(row_limit);
3555                    }
3556
3557                    let next_start_row = if rows.len() < total_rows {
3558                        Some(start_row + rows.len() as u32)
3559                    } else {
3560                        None
3561                    };
3562
3563                    build_range_values_entry(format, range, &rows, next_start_row)
3564                })
3565            })
3566            .collect();
3567
3568        Ok::<_, anyhow::Error>(values)
3569    })??;
3570
3571    #[cfg(feature = "recalc")]
3572    let warnings: Vec<Warning> = {
3573        if fork_recalc_needed(&state, &params.workbook_or_fork_id) && has_formula_in_target {
3574            vec![Warning {
3575                code: "WARN_STALE_FORMULAS".to_string(),
3576                message: "Fork has pending edits and may contain stale formula results; call recalculate on the fork for fresh values.".to_string(),
3577            }]
3578        } else {
3579            Vec::new()
3580        }
3581    };
3582
3583    #[cfg(not(feature = "recalc"))]
3584    let warnings: Vec<Warning> = Vec::new();
3585
3586    Ok(RangeValuesResponse {
3587        workbook_id: workbook.id.clone(),
3588        workbook_short_id: workbook.short_id.clone(),
3589        sheet_name: params.sheet_name,
3590        warnings,
3591        values,
3592    })
3593}
3594
3595pub async fn find_value(
3596    state: Arc<AppState>,
3597    params: FindValueParams,
3598) -> Result<FindValueResponse> {
3599    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3600    let mut matches = Vec::new();
3601    let mut truncated = false;
3602    let mut seen: u32 = 0;
3603    let offset = params.offset.unwrap_or(0);
3604    let limit = params.limit;
3605    let mode = params.mode.clone().unwrap_or_else(|| {
3606        if params.label.is_some() {
3607            FindMode::Label
3608        } else {
3609            FindMode::Value
3610        }
3611    });
3612    let match_mode = params.match_mode.unwrap_or_default();
3613    let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
3614
3615    let target_sheets: Vec<String> = if let Some(sheet) = &params.sheet_name {
3616        vec![sheet.clone()]
3617    } else {
3618        workbook.sheet_names()
3619    };
3620
3621    for sheet_name in target_sheets {
3622        let metrics_entry = workbook.get_sheet_metrics_fast(&sheet_name)?;
3623        let default_bounds = (
3624            (1, 1),
3625            (
3626                metrics_entry.metrics.column_count.max(1),
3627                metrics_entry.metrics.row_count.max(1),
3628            ),
3629        );
3630        let region_bounds = params
3631            .region_id
3632            .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
3633        let (sheet_matches, sheet_seen, sheet_truncated) =
3634            workbook.with_sheet(&sheet_name, |sheet| {
3635                collect_value_matches(
3636                    sheet,
3637                    &sheet_name,
3638                    &mode,
3639                    match_mode,
3640                    &direction,
3641                    &params,
3642                    region_bounds.as_ref(),
3643                    default_bounds,
3644                    offset,
3645                    limit,
3646                    seen,
3647                )
3648            })??;
3649        seen = sheet_seen;
3650        matches.extend(sheet_matches);
3651        if sheet_truncated {
3652            truncated = true;
3653            break;
3654        }
3655    }
3656
3657    let next_offset = if truncated {
3658        Some(offset.saturating_add(matches.len() as u32))
3659    } else {
3660        None
3661    };
3662
3663    Ok(FindValueResponse {
3664        workbook_id: workbook.id.clone(),
3665        workbook_short_id: workbook.short_id.clone(),
3666        matches,
3667        next_offset,
3668    })
3669}
3670
3671pub async fn read_table(
3672    state: Arc<AppState>,
3673    params: ReadTableParams,
3674) -> Result<ReadTableResponse> {
3675    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3676    let config = state.config();
3677    let output_profile = config.output_profile();
3678    let format = params.format.unwrap_or(match output_profile {
3679        OutputProfile::TokenDense => TableOutputFormat::Csv,
3680        OutputProfile::Verbose => TableOutputFormat::Json,
3681    });
3682    let include_headers = params.include_headers.unwrap_or(true);
3683    let include_types = params.include_types.unwrap_or(false);
3684    let resolved = resolve_table_target(&workbook, &params)?;
3685    let limit = params.limit.unwrap_or(100) as usize;
3686    let offset = params.offset.unwrap_or(0) as usize;
3687    let sample_mode = params.sample_mode.unwrap_or_default();
3688
3689    #[cfg(feature = "recalc")]
3690    let (headers, rows, total_rows, has_formula_in_target) =
3691        workbook.with_sheet(&resolved.sheet_name, |sheet| {
3692            let has_formula_in_target = sheet_has_formula_in_bounds(sheet, &[resolved.range]);
3693            let (headers, rows, total_rows) = extract_table_rows(
3694                sheet,
3695                &resolved,
3696                params.header_row,
3697                params.header_rows,
3698                params.columns.clone(),
3699                params.filters.clone(),
3700                limit,
3701                offset,
3702                sample_mode,
3703            )?;
3704            Ok::<_, anyhow::Error>((headers, rows, total_rows, has_formula_in_target))
3705        })??;
3706
3707    #[cfg(not(feature = "recalc"))]
3708    let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
3709        let (headers, rows, total_rows) = extract_table_rows(
3710            sheet,
3711            &resolved,
3712            params.header_row,
3713            params.header_rows,
3714            params.columns.clone(),
3715            params.filters.clone(),
3716            limit,
3717            offset,
3718            sample_mode,
3719        )?;
3720        Ok::<_, anyhow::Error>((headers, rows, total_rows))
3721    })??;
3722
3723    #[cfg(feature = "recalc")]
3724    let warnings: Vec<Warning> = {
3725        if fork_recalc_needed(&state, &params.workbook_or_fork_id) && has_formula_in_target {
3726            vec![Warning {
3727                code: "WARN_STALE_FORMULAS".to_string(),
3728                message: "Fork has pending edits and may contain stale formula results; call recalculate on the fork for fresh values.".to_string(),
3729            }]
3730        } else {
3731            Vec::new()
3732        }
3733    };
3734
3735    #[cfg(not(feature = "recalc"))]
3736    let warnings: Vec<Warning> = Vec::new();
3737
3738    let max_cells = config.max_cells();
3739    let max_payload_bytes = config.max_payload_bytes();
3740    let mut row_limit = cap_rows_by_cells(rows.len(), headers.len().max(1), max_cells);
3741    if row_limit > 0 {
3742        row_limit = cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3743            let (headers_out, rows_out, values_out, types_out, csv_out) = build_read_table_payload(
3744                format,
3745                &headers,
3746                &rows[..count],
3747                include_headers,
3748                include_types,
3749            );
3750            let response = ReadTableResponse {
3751                workbook_id: workbook.id.clone(),
3752                workbook_short_id: workbook.short_id.clone(),
3753                sheet_name: resolved.sheet_name.clone(),
3754                table_name: resolved.table_name.clone(),
3755                warnings: warnings.clone(),
3756                headers: headers_out,
3757                rows: rows_out,
3758                values: values_out,
3759                types: types_out,
3760                csv: csv_out,
3761                total_rows,
3762                next_offset: None,
3763            };
3764            serde_json::to_vec(&response)
3765                .map(|payload| payload.len())
3766                .unwrap_or(usize::MAX)
3767        });
3768    }
3769
3770    let rows = rows.into_iter().take(row_limit).collect::<Vec<_>>();
3771    let next_offset = if offset + rows.len() < total_rows as usize {
3772        Some((offset + rows.len()) as u32)
3773    } else {
3774        None
3775    };
3776    let (headers_out, rows_out, values_out, types_out, csv_out) =
3777        build_read_table_payload(format, &headers, &rows, include_headers, include_types);
3778
3779    Ok(ReadTableResponse {
3780        workbook_id: workbook.id.clone(),
3781        workbook_short_id: workbook.short_id.clone(),
3782        sheet_name: resolved.sheet_name,
3783        table_name: resolved.table_name,
3784        warnings,
3785        headers: headers_out,
3786        rows: rows_out,
3787        values: values_out,
3788        types: types_out,
3789        csv: csv_out,
3790        total_rows,
3791        next_offset,
3792    })
3793}
3794
3795pub async fn table_profile(
3796    state: Arc<AppState>,
3797    params: TableProfileParams,
3798) -> Result<TableProfileResponse> {
3799    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3800    let config = state.config();
3801    let output_profile = config.output_profile();
3802    let summary_only = params
3803        .summary_only
3804        .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
3805    let resolved = resolve_table_target(
3806        &workbook,
3807        &ReadTableParams {
3808            workbook_or_fork_id: params.workbook_or_fork_id.clone(),
3809            sheet_name: params.sheet_name.clone(),
3810            table_name: params.table_name.clone(),
3811            region_id: params.region_id,
3812            range: None,
3813            header_row: None,
3814            header_rows: None,
3815            columns: None,
3816            filters: None,
3817            sample_mode: params.sample_mode,
3818            limit: params.sample_size,
3819            offset: Some(0),
3820            format: Some(TableOutputFormat::Json),
3821            include_headers: None,
3822            include_types: None,
3823        },
3824    )?;
3825
3826    let sample_size = params.sample_size.unwrap_or(10) as usize;
3827    let sample_mode = params.sample_mode.unwrap_or(SampleMode::Distributed);
3828
3829    let (mut headers, rows, total_rows) =
3830        workbook.with_sheet(&resolved.sheet_name, |sheet| {
3831            extract_table_rows(
3832                sheet,
3833                &resolved,
3834                None,
3835                None,
3836                None,
3837                None,
3838                sample_size,
3839                0,
3840                sample_mode,
3841            )
3842        })??;
3843
3844    let max_items = config.max_items();
3845    let max_payload_bytes = config.max_payload_bytes();
3846
3847    if let Some(max_items) = max_items
3848        && headers.len() > max_items
3849    {
3850        headers.truncate(max_items);
3851    }
3852
3853    let mut column_types = summarize_columns(&headers, &rows);
3854
3855    let mut samples: Vec<TableRow> = if summary_only {
3856        Vec::new()
3857    } else {
3858        rows.into_iter()
3859            .map(|row| filter_table_row(&row, &headers))
3860            .collect()
3861    };
3862
3863    if !summary_only {
3864        if let Some(max_items) = max_items
3865            && samples.len() > max_items
3866        {
3867            samples.truncate(max_items);
3868        }
3869
3870        if let Some(max_bytes) = max_payload_bytes {
3871            let sample_limit = cap_rows_by_payload_bytes(samples.len(), Some(max_bytes), |count| {
3872                let response = TableProfileResponse {
3873                    workbook_id: workbook.id.clone(),
3874                    workbook_short_id: workbook.short_id.clone(),
3875                    sheet_name: resolved.sheet_name.clone(),
3876                    table_name: resolved.table_name.clone(),
3877                    headers: headers.clone(),
3878                    column_types: column_types.clone(),
3879                    row_count: total_rows,
3880                    samples: samples[..count].to_vec(),
3881                    notes: Vec::new(),
3882                };
3883                serde_json::to_vec(&response)
3884                    .map(|payload| payload.len())
3885                    .unwrap_or(usize::MAX)
3886            });
3887            if sample_limit < samples.len() {
3888                samples.truncate(sample_limit);
3889            }
3890
3891            let response = TableProfileResponse {
3892                workbook_id: workbook.id.clone(),
3893                workbook_short_id: workbook.short_id.clone(),
3894                sheet_name: resolved.sheet_name.clone(),
3895                table_name: resolved.table_name.clone(),
3896                headers: headers.clone(),
3897                column_types: column_types.clone(),
3898                row_count: total_rows,
3899                samples: samples.clone(),
3900                notes: Vec::new(),
3901            };
3902            if serde_json::to_vec(&response)
3903                .map(|payload| payload.len() > max_bytes)
3904                .unwrap_or(false)
3905                && !headers.is_empty()
3906            {
3907                let header_limit =
3908                    cap_rows_by_payload_bytes(headers.len(), Some(max_bytes), |count| {
3909                        let headers_slice = headers[..count].to_vec();
3910                        let column_slice = column_types[..count.min(column_types.len())].to_vec();
3911                        let samples_slice = samples
3912                            .iter()
3913                            .map(|row| filter_table_row(row, &headers_slice))
3914                            .collect::<Vec<_>>();
3915                        let response = TableProfileResponse {
3916                            workbook_id: workbook.id.clone(),
3917                            workbook_short_id: workbook.short_id.clone(),
3918                            sheet_name: resolved.sheet_name.clone(),
3919                            table_name: resolved.table_name.clone(),
3920                            headers: headers_slice,
3921                            column_types: column_slice,
3922                            row_count: total_rows,
3923                            samples: samples_slice,
3924                            notes: Vec::new(),
3925                        };
3926                        serde_json::to_vec(&response)
3927                            .map(|payload| payload.len())
3928                            .unwrap_or(usize::MAX)
3929                    });
3930
3931                if header_limit < headers.len() {
3932                    headers.truncate(header_limit);
3933                    column_types.truncate(header_limit.min(column_types.len()));
3934                    samples = samples
3935                        .into_iter()
3936                        .map(|row| filter_table_row(&row, &headers))
3937                        .collect();
3938                }
3939            }
3940        }
3941    }
3942
3943    Ok(TableProfileResponse {
3944        workbook_id: workbook.id.clone(),
3945        workbook_short_id: workbook.short_id.clone(),
3946        sheet_name: resolved.sheet_name,
3947        table_name: resolved.table_name,
3948        headers,
3949        column_types,
3950        row_count: total_rows,
3951        samples,
3952        notes: Vec::new(),
3953    })
3954}
3955
3956#[derive(Debug, Deserialize, JsonSchema)]
3957pub struct ManifestStubParams {
3958    #[serde(alias = "workbook_id")]
3959    pub workbook_or_fork_id: WorkbookId,
3960    pub sheet_filter: Option<String>,
3961}
3962
3963pub async fn get_manifest_stub(
3964    state: Arc<AppState>,
3965    params: ManifestStubParams,
3966) -> Result<ManifestStubResponse> {
3967    let workbook = state.open_workbook(&params.workbook_or_fork_id).await?;
3968    let mut summaries = workbook.list_summaries(true)?;
3969
3970    if let Some(filter) = &params.sheet_filter {
3971        summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
3972    }
3973
3974    let sheets = summaries
3975        .into_iter()
3976        .map(|summary| ManifestSheetStub {
3977            sheet_name: summary.name.clone(),
3978            classification: summary.classification.clone(),
3979            candidate_expectations: vec![format!(
3980                "Review {} sheet for expectation candidates",
3981                format!("{:?}", summary.classification).to_ascii_lowercase()
3982            )],
3983            notes: summary.style_tags,
3984        })
3985        .collect();
3986
3987    let response = ManifestStubResponse {
3988        workbook_id: workbook.id.clone(),
3989        workbook_short_id: workbook.short_id.clone(),
3990        slug: workbook.slug.clone(),
3991        sheets,
3992    };
3993    Ok(response)
3994}
3995
3996#[derive(Debug, Deserialize, JsonSchema)]
3997pub struct CloseWorkbookParams {
3998    #[serde(alias = "workbook_id")]
3999    pub workbook_or_fork_id: WorkbookId,
4000}
4001
4002pub async fn close_workbook(
4003    state: Arc<AppState>,
4004    params: CloseWorkbookParams,
4005) -> Result<CloseWorkbookResponse> {
4006    state.close_workbook(&params.workbook_or_fork_id)?;
4007    Ok(CloseWorkbookResponse {
4008        workbook_id: params.workbook_or_fork_id.clone(),
4009        message: format!("workbook {} evicted", params.workbook_or_fork_id.as_str()),
4010    })
4011}
4012#[allow(clippy::too_many_arguments)]
4013fn collect_formula_matches(
4014    sheet: &umya_spreadsheet::Worksheet,
4015    sheet_name: &str,
4016    query: &str,
4017    case_sensitive: bool,
4018    include_context: bool,
4019    context_rows: u32,
4020    context_cols: u32,
4021    offset: u32,
4022    limit: u32,
4023    seen_so_far: u32,
4024) -> (Vec<FindFormulaMatch>, u32, bool) {
4025    use crate::workbook::cell_to_value;
4026
4027    let mut results = Vec::new();
4028    let mut seen = seen_so_far;
4029
4030    for cell in sheet.get_cell_collection() {
4031        if !cell.is_formula() {
4032            continue;
4033        }
4034        let formula = cell.get_formula();
4035        let haystack = if case_sensitive {
4036            formula.to_string()
4037        } else {
4038            formula.to_ascii_lowercase()
4039        };
4040        if !haystack.contains(query) {
4041            continue;
4042        }
4043
4044        if seen < offset {
4045            seen += 1;
4046            continue;
4047        }
4048
4049        if results.len() as u32 >= limit {
4050            return (results, seen, true);
4051        }
4052
4053        let coord = cell.get_coordinate();
4054        let column = *coord.get_col_num();
4055        let row = *coord.get_row_num();
4056
4057        let context = if include_context {
4058            let col_start = column.saturating_sub(context_cols / 2).max(1);
4059            let col_end = column + context_cols / 2;
4060            let columns: Vec<u32> = (col_start..=col_end).collect();
4061
4062            let mut context_rows_vec = Vec::new();
4063
4064            if context_rows > 0 {
4065                let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
4066                context_rows_vec.push(header_row);
4067            }
4068
4069            let row_start = row.saturating_sub(context_rows / 2).max(1);
4070            let row_end = (row + context_rows / 2).min(sheet.get_highest_row());
4071
4072            for ctx_row in row_start..=row_end {
4073                let ctx_row_snapshot = build_row_snapshot(sheet, ctx_row, &columns, true, false);
4074                context_rows_vec.push(ctx_row_snapshot);
4075            }
4076
4077            context_rows_vec
4078        } else {
4079            Vec::new()
4080        };
4081
4082        results.push(FindFormulaMatch {
4083            address: coord.get_coordinate(),
4084            sheet_name: sheet_name.to_string(),
4085            formula: formula.to_string(),
4086            cached_value: cell_to_value(cell),
4087            context,
4088        });
4089
4090        seen += 1;
4091    }
4092
4093    (results, seen, false)
4094}
4095
4096#[derive(Clone)]
4097struct TraceFormulaInfo {
4098    fingerprint: String,
4099    formula: String,
4100}
4101
4102#[derive(Clone)]
4103struct TraceEdgeRaw {
4104    from: String,
4105    to: String,
4106    neighbor: String,
4107}
4108
4109#[derive(Clone)]
4110struct LayerLinks {
4111    depth: u32,
4112    edges: Vec<TraceEdgeRaw>,
4113    truncated_cells: Vec<String>,
4114}
4115
4116#[derive(Clone)]
4117struct NeighborDetail {
4118    address: String,
4119    column: Option<u32>,
4120    row: Option<u32>,
4121    kind: TraceCellKind,
4122    value: Option<CellValue>,
4123    formula: Option<String>,
4124    fingerprint: Option<String>,
4125    external: bool,
4126}
4127
4128fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
4129    let mut map = HashMap::new();
4130    for group in graph.groups() {
4131        for address in group.addresses.clone() {
4132            map.insert(
4133                address.to_ascii_uppercase(),
4134                TraceFormulaInfo {
4135                    fingerprint: group.fingerprint.clone(),
4136                    formula: group.formula.clone(),
4137                },
4138            );
4139        }
4140    }
4141    map
4142}
4143
4144struct TraceConfig<'a> {
4145    direction: &'a TraceDirection,
4146    origin: &'a str,
4147    sheet_name: &'a str,
4148    depth_limit: u32,
4149    page_size: usize,
4150}
4151
4152fn build_trace_layers(
4153    workbook: &WorkbookContext,
4154    graph: &FormulaGraph,
4155    formula_lookup: &HashMap<String, TraceFormulaInfo>,
4156    config: &TraceConfig<'_>,
4157    cursor: Option<TraceCursor>,
4158) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
4159    let layer_links =
4160        collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
4161    let mut layers = Vec::new();
4162    let mut next_cursor = None;
4163    let mut notes = Vec::new();
4164    let focus_depth = cursor.as_ref().map(|c| c.depth);
4165
4166    for layer in layer_links {
4167        let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
4168        let offset = cursor
4169            .as_ref()
4170            .filter(|c| c.depth == layer.depth)
4171            .map(|c| c.offset)
4172            .unwrap_or(0);
4173
4174        let mut node_set: HashSet<String> = HashSet::new();
4175        for edge in &layer.edges {
4176            node_set.insert(edge.neighbor.clone());
4177        }
4178        let mut nodes: Vec<String> = node_set.into_iter().collect();
4179        nodes.sort_by(|a, b| compare_addresses(a, b));
4180
4181        let details = workbook.with_sheet(config.sheet_name, |sheet| {
4182            collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
4183        })?;
4184        let total_nodes = details.len();
4185        let start = offset.min(total_nodes);
4186        let end = if produce_edges {
4187            (start + config.page_size).min(total_nodes)
4188        } else {
4189            start
4190        };
4191        let selected_slice = if produce_edges {
4192            &details[start..end]
4193        } else {
4194            &details[0..0]
4195        };
4196        let selected_addresses: HashSet<String> = selected_slice
4197            .iter()
4198            .map(|detail| detail.address.clone())
4199            .collect();
4200
4201        let summary = build_layer_summary(&details);
4202        let range_highlights = build_range_highlights(&details);
4203        let group_highlights = build_formula_group_highlights(&details);
4204        let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
4205
4206        let highlights = TraceLayerHighlights {
4207            top_ranges: range_highlights.clone(),
4208            top_formula_groups: group_highlights.clone(),
4209            notable_cells,
4210        };
4211
4212        let edges = if produce_edges {
4213            build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
4214        } else {
4215            Vec::new()
4216        };
4217
4218        let has_more = produce_edges && end < total_nodes;
4219        if has_more && next_cursor.is_none() {
4220            next_cursor = Some(TraceCursor {
4221                depth: layer.depth,
4222                offset: end,
4223            });
4224        }
4225        if has_more {
4226            notes.push(format!(
4227                "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
4228                layer.depth, end, total_nodes, layer.depth, end
4229            ));
4230        }
4231
4232        if !layer.truncated_cells.is_empty() {
4233            let cell_list = if layer.truncated_cells.len() <= 3 {
4234                layer.truncated_cells.join(", ")
4235            } else {
4236                format!(
4237                    "{}, ... ({} more)",
4238                    layer.truncated_cells[..3].join(", "),
4239                    layer.truncated_cells.len() - 3
4240                )
4241            };
4242            notes.push(format!(
4243                "Layer {}: dependents truncated at {} per cell for: {}",
4244                layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
4245            ));
4246        }
4247
4248        layers.push(TraceLayer {
4249            depth: layer.depth,
4250            summary,
4251            highlights,
4252            edges,
4253            has_more,
4254        });
4255    }
4256
4257    Ok((layers, next_cursor, notes))
4258}
4259
4260fn collect_layer_links(
4261    graph: &FormulaGraph,
4262    direction: &TraceDirection,
4263    origin: &str,
4264    depth_limit: u32,
4265) -> Vec<LayerLinks> {
4266    let mut visited: HashSet<String> = HashSet::new();
4267    visited.insert(origin.to_string());
4268    let mut frontier = vec![origin.to_string()];
4269    let mut layers = Vec::new();
4270
4271    for depth in 1..=depth_limit {
4272        let mut next_frontier_set: HashSet<String> = HashSet::new();
4273        let mut edges = Vec::new();
4274        let mut truncated_cells = Vec::new();
4275
4276        for cell in &frontier {
4277            let (neighbors, was_truncated) = match direction {
4278                TraceDirection::Precedents => (graph.precedents(cell), false),
4279                TraceDirection::Dependents => {
4280                    graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
4281                }
4282            };
4283
4284            if was_truncated {
4285                truncated_cells.push(cell.clone());
4286            }
4287
4288            for neighbor in neighbors {
4289                let neighbor_upper = neighbor.to_ascii_uppercase();
4290                let edge = match direction {
4291                    TraceDirection::Precedents => TraceEdgeRaw {
4292                        from: cell.clone(),
4293                        to: neighbor_upper.clone(),
4294                        neighbor: neighbor_upper.clone(),
4295                    },
4296                    TraceDirection::Dependents => TraceEdgeRaw {
4297                        from: neighbor_upper.clone(),
4298                        to: cell.clone(),
4299                        neighbor: neighbor_upper.clone(),
4300                    },
4301                };
4302                edges.push(edge);
4303                if visited.insert(neighbor_upper.clone()) {
4304                    next_frontier_set.insert(neighbor_upper);
4305                }
4306            }
4307        }
4308
4309        if edges.is_empty() {
4310            break;
4311        }
4312
4313        layers.push(LayerLinks {
4314            depth,
4315            edges,
4316            truncated_cells,
4317        });
4318        if next_frontier_set.is_empty() {
4319            break;
4320        }
4321        let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
4322        next_frontier.sort();
4323        frontier = next_frontier;
4324    }
4325
4326    layers
4327}
4328
4329fn collect_neighbor_details(
4330    sheet: &umya_spreadsheet::Worksheet,
4331    current_sheet: &str,
4332    addresses: &[String],
4333    formula_lookup: &HashMap<String, TraceFormulaInfo>,
4334) -> Vec<NeighborDetail> {
4335    let mut details = Vec::new();
4336    for address in addresses {
4337        let (sheet_part, cell_part) = split_sheet_and_cell(address);
4338        let normalized_sheet = sheet_part
4339            .as_ref()
4340            .map(|s| clean_sheet_name(s).to_ascii_lowercase());
4341        let is_external = normalized_sheet
4342            .as_ref()
4343            .map(|s| !s.eq_ignore_ascii_case(current_sheet))
4344            .unwrap_or(false);
4345
4346        let Some(cell_ref) = cell_part else {
4347            details.push(NeighborDetail {
4348                address: address.clone(),
4349                column: None,
4350                row: None,
4351                kind: TraceCellKind::External,
4352                value: None,
4353                formula: None,
4354                fingerprint: None,
4355                external: true,
4356            });
4357            continue;
4358        };
4359
4360        let cell_ref_upper = cell_ref.to_ascii_uppercase();
4361
4362        if is_external {
4363            let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
4364            details.push(NeighborDetail {
4365                address: address.clone(),
4366                column: None,
4367                row: None,
4368                kind: TraceCellKind::External,
4369                value: None,
4370                formula: formula_info.map(|info| info.formula.clone()),
4371                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4372                external: true,
4373            });
4374            continue;
4375        }
4376
4377        let Some((col, row)) = parse_address(&cell_ref_upper) else {
4378            details.push(NeighborDetail {
4379                address: address.clone(),
4380                column: None,
4381                row: None,
4382                kind: TraceCellKind::External,
4383                value: None,
4384                formula: None,
4385                fingerprint: None,
4386                external: true,
4387            });
4388            continue;
4389        };
4390
4391        let cell_opt = sheet.get_cell((&col, &row));
4392        let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
4393        if let Some(cell) = cell_opt {
4394            let value = cell_to_value(cell);
4395            let kind = if cell.is_formula() {
4396                TraceCellKind::Formula
4397            } else if value.is_some() {
4398                TraceCellKind::Literal
4399            } else {
4400                TraceCellKind::Blank
4401            };
4402            details.push(NeighborDetail {
4403                address: address.clone(),
4404                column: Some(col),
4405                row: Some(row),
4406                kind,
4407                value,
4408                formula: formula_info.map(|info| info.formula.clone()),
4409                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4410                external: false,
4411            });
4412        } else {
4413            details.push(NeighborDetail {
4414                address: address.clone(),
4415                column: Some(col),
4416                row: Some(row),
4417                kind: TraceCellKind::Blank,
4418                value: None,
4419                formula: formula_info.map(|info| info.formula.clone()),
4420                fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4421                external: false,
4422            });
4423        }
4424    }
4425    details
4426}
4427
4428fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
4429    let mut summary = TraceLayerSummary {
4430        total_nodes: details.len(),
4431        formula_nodes: 0,
4432        value_nodes: 0,
4433        blank_nodes: 0,
4434        external_nodes: 0,
4435        unique_formula_groups: 0,
4436    };
4437
4438    let mut fingerprints: HashSet<String> = HashSet::new();
4439
4440    for detail in details {
4441        match detail.kind {
4442            TraceCellKind::Formula => {
4443                summary.formula_nodes += 1;
4444                if let Some(fp) = &detail.fingerprint {
4445                    fingerprints.insert(fp.clone());
4446                }
4447            }
4448            TraceCellKind::Literal => summary.value_nodes += 1,
4449            TraceCellKind::Blank => summary.blank_nodes += 1,
4450            TraceCellKind::External => summary.external_nodes += 1,
4451        }
4452    }
4453
4454    summary.unique_formula_groups = fingerprints.len();
4455    summary
4456}
4457
4458fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
4459    let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
4460    for detail in details {
4461        if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
4462            let entry = aggregates
4463                .entry(fingerprint.clone())
4464                .or_insert_with(|| (formula.clone(), 0, Vec::new()));
4465            entry.1 += 1;
4466            if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
4467                entry.2.push(detail.address.clone());
4468            }
4469        }
4470    }
4471
4472    let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
4473        .into_iter()
4474        .map(
4475            |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
4476                fingerprint,
4477                formula,
4478                count,
4479                sample_addresses,
4480            },
4481        )
4482        .collect();
4483
4484    highlights.sort_by(|a, b| b.count.cmp(&a.count));
4485    highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
4486    highlights
4487}
4488
4489fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
4490    let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
4491    for detail in details {
4492        if let (Some(col), Some(_row)) = (detail.column, detail.row)
4493            && !detail.external
4494        {
4495            by_column.entry(col).or_default().push(detail);
4496        }
4497    }
4498
4499    for column_entries in by_column.values_mut() {
4500        column_entries.sort_by(|a, b| a.row.cmp(&b.row));
4501    }
4502
4503    let mut ranges = Vec::new();
4504    for entries in by_column.values() {
4505        let mut current: Vec<&NeighborDetail> = Vec::new();
4506        for detail in entries {
4507            if current.is_empty() {
4508                current.push(detail);
4509                continue;
4510            }
4511            let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
4512            if detail.row.unwrap_or(0) == prev_row + 1 {
4513                current.push(detail);
4514            } else {
4515                if current.len() >= TRACE_RANGE_THRESHOLD {
4516                    ranges.push(make_range_highlight(&current));
4517                }
4518                current.clear();
4519                current.push(detail);
4520            }
4521        }
4522        if current.len() >= TRACE_RANGE_THRESHOLD {
4523            ranges.push(make_range_highlight(&current));
4524        }
4525    }
4526
4527    ranges.sort_by(|a, b| b.count.cmp(&a.count));
4528    ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
4529    ranges
4530}
4531
4532fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
4533    let mut literals = 0usize;
4534    let mut formulas = 0usize;
4535    let mut blanks = 0usize;
4536    let mut sample_values = Vec::new();
4537    let mut sample_formulas = Vec::new();
4538    let mut sample_addresses = Vec::new();
4539
4540    for detail in details {
4541        match detail.kind {
4542            TraceCellKind::Formula => {
4543                formulas += 1;
4544                if let Some(formula) = &detail.formula
4545                    && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
4546                    && !sample_formulas.contains(formula)
4547                {
4548                    sample_formulas.push(formula.clone());
4549                }
4550            }
4551            TraceCellKind::Literal => {
4552                literals += 1;
4553                if let Some(value) = &detail.value
4554                    && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
4555                {
4556                    sample_values.push(value.clone());
4557                }
4558            }
4559            TraceCellKind::Blank => blanks += 1,
4560            TraceCellKind::External => {}
4561        }
4562        if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
4563            sample_addresses.push(detail.address.clone());
4564        }
4565    }
4566
4567    TraceRangeHighlight {
4568        start: details
4569            .first()
4570            .map(|d| d.address.clone())
4571            .unwrap_or_default(),
4572        end: details
4573            .last()
4574            .map(|d| d.address.clone())
4575            .unwrap_or_default(),
4576        count: details.len(),
4577        literals,
4578        formulas,
4579        blanks,
4580        sample_values,
4581        sample_formulas,
4582        sample_addresses,
4583    }
4584}
4585
4586fn build_notable_cells(
4587    details: &[NeighborDetail],
4588    ranges: &[TraceRangeHighlight],
4589    groups: &[TraceFormulaGroupHighlight],
4590) -> Vec<TraceCellHighlight> {
4591    let mut exclude: HashSet<String> = HashSet::new();
4592    for range in ranges {
4593        exclude.insert(range.start.clone());
4594        exclude.insert(range.end.clone());
4595        for addr in &range.sample_addresses {
4596            exclude.insert(addr.clone());
4597        }
4598    }
4599    for group in groups {
4600        for addr in &group.sample_addresses {
4601            exclude.insert(addr.clone());
4602        }
4603    }
4604
4605    let mut highlights = Vec::new();
4606    let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
4607
4608    for detail in details {
4609        if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
4610            break;
4611        }
4612        if exclude.contains(&detail.address) {
4613            continue;
4614        }
4615        let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
4616        if *counter >= 2 && detail.kind != TraceCellKind::External {
4617            continue;
4618        }
4619        highlights.push(TraceCellHighlight {
4620            address: detail.address.clone(),
4621            kind: detail.kind.clone(),
4622            value: detail.value.clone(),
4623            formula: detail.formula.clone(),
4624        });
4625        *counter += 1;
4626    }
4627
4628    highlights
4629}
4630
4631fn build_edges_for_layer(
4632    raw_edges: &[TraceEdgeRaw],
4633    selected: &HashSet<String>,
4634    formula_lookup: &HashMap<String, TraceFormulaInfo>,
4635) -> Vec<FormulaTraceEdge> {
4636    let mut edges = Vec::new();
4637    for edge in raw_edges {
4638        if selected.contains(&edge.neighbor) {
4639            let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
4640                .map(|info| info.formula.clone());
4641            edges.push(FormulaTraceEdge {
4642                from: edge.from.clone(),
4643                to: edge.to.clone(),
4644                formula,
4645                note: None,
4646            });
4647        }
4648    }
4649    edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
4650    edges
4651}
4652
4653fn lookup_formula_info<'a>(
4654    lookup: &'a HashMap<String, TraceFormulaInfo>,
4655    cell_ref: &str,
4656    original: &str,
4657) -> Option<&'a TraceFormulaInfo> {
4658    if let Some(info) = lookup.get(cell_ref) {
4659        return Some(info);
4660    }
4661    if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
4662        let upper = cell.to_ascii_uppercase();
4663        return lookup.get(&upper);
4664    }
4665    None
4666}
4667
4668fn compare_addresses(left: &str, right: &str) -> Ordering {
4669    let (sheet_left, cell_left) = split_sheet_and_cell(left);
4670    let (sheet_right, cell_right) = split_sheet_and_cell(right);
4671
4672    let sheet_left_key = sheet_left
4673        .as_ref()
4674        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
4675        .unwrap_or_default();
4676    let sheet_right_key = sheet_right
4677        .as_ref()
4678        .map(|s| clean_sheet_name(s).to_ascii_uppercase())
4679        .unwrap_or_default();
4680
4681    match sheet_left_key.cmp(&sheet_right_key) {
4682        Ordering::Equal => {
4683            let left_core = cell_left.unwrap_or_else(|| left.to_string());
4684            let right_core = cell_right.unwrap_or_else(|| right.to_string());
4685            let left_coords = parse_address(&left_core.to_ascii_uppercase());
4686            let right_coords = parse_address(&right_core.to_ascii_uppercase());
4687            match (left_coords, right_coords) {
4688                (Some((lc, lr)), Some((rc, rr))) => lc
4689                    .cmp(&rc)
4690                    .then_with(|| lr.cmp(&rr))
4691                    .then_with(|| left_core.cmp(&right_core)),
4692                _ => left_core.cmp(&right_core),
4693            }
4694        }
4695        other => other,
4696    }
4697}
4698
4699fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
4700    if let Some(idx) = address.rfind('!') {
4701        let sheet = address[..idx].to_string();
4702        let cell = address[idx + 1..].to_string();
4703        (Some(sheet), Some(cell))
4704    } else {
4705        (None, Some(address.to_string()))
4706    }
4707}
4708
4709fn clean_sheet_name(sheet: &str) -> String {
4710    let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
4711    let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
4712    after_bracket
4713        .trim_matches(|c| c == '\'' || c == '"')
4714        .to_string()
4715}