1pub mod filters;
2#[cfg(feature = "recalc")]
3pub mod fork;
4pub mod param_enums;
5#[cfg(feature = "recalc")]
6pub mod rules_batch;
7#[cfg(feature = "recalc")]
8pub mod sheet_layout;
9pub mod vba;
10#[cfg(feature = "recalc")]
11pub mod write_normalize;
12
13use crate::analysis::{formula::FormulaGraph, stats};
14use crate::config::OutputProfile;
15use crate::model::*;
16use crate::state::AppState;
17use crate::utils::column_number_to_name;
18use crate::workbook::{WorkbookContext, cell_to_value};
19use anyhow::{Result, anyhow};
20use regex::Regex;
21use schemars::JsonSchema;
22use serde::Deserialize;
23use std::cmp::Ordering;
24use std::collections::{BTreeMap, HashMap, HashSet};
25use std::path::PathBuf;
26use std::sync::Arc;
27
28#[cfg(feature = "recalc")]
29fn fork_recalc_needed(state: &AppState, workbook_or_fork_id: &WorkbookId) -> bool {
30 state
31 .fork_registry()
32 .and_then(|registry| registry.get_fork(workbook_or_fork_id.as_str()).ok())
33 .is_some_and(|ctx| ctx.recalc_needed)
34}
35
36#[cfg(feature = "recalc")]
37fn sheet_has_formula_in_bounds(
38 sheet: &umya_spreadsheet::Worksheet,
39 bounds: &[((u32, u32), (u32, u32))],
40) -> bool {
41 if bounds.is_empty() {
42 return false;
43 }
44 for cell in sheet.get_cell_collection() {
45 if !cell.is_formula() {
46 continue;
47 }
48 let address = cell.get_coordinate().get_coordinate().to_string();
49 let Some((col, row)) = parse_address(&address) else {
50 continue;
51 };
52 if bounds
53 .iter()
54 .any(|b| col >= b.0.0 && col <= b.1.0 && row >= b.0.1 && row <= b.1.1)
55 {
56 return true;
57 }
58 }
59 false
60}
61
62const DEFAULT_TRACE_PAGE_SIZE: usize = 20;
63const TRACE_PAGE_MIN: usize = 5;
64const TRACE_PAGE_MAX: usize = 200;
65const TRACE_RANGE_THRESHOLD: usize = 4;
66const TRACE_RANGE_HIGHLIGHT_LIMIT: usize = 3;
67const TRACE_GROUP_HIGHLIGHT_LIMIT: usize = 3;
68const TRACE_CELL_HIGHLIGHT_LIMIT: usize = 5;
69const TRACE_RANGE_VALUE_SAMPLES: usize = 3;
70const TRACE_RANGE_FORMULA_SAMPLES: usize = 2;
71const TRACE_GROUP_SAMPLE_LIMIT: usize = 5;
72const TRACE_DEPENDENTS_PER_CELL_LIMIT: usize = 500;
73
74const DEFAULT_OVERVIEW_MAX_REGIONS: u32 = 25;
75const DEFAULT_OVERVIEW_MAX_HEADERS: u32 = 50;
76const DEFAULT_OVERVIEW_INCLUDE_HEADERS: bool = true;
77
78const ENTRY_POINT_MAX_ROWS: u32 = 10_000;
79const ENTRY_POINT_MAX_COLS: u32 = 200;
80
81pub async fn list_workbooks(
82 state: Arc<AppState>,
83 params: ListWorkbooksParams,
84) -> Result<WorkbookListResponse> {
85 let config = state.config();
86 let output_profile = config.output_profile();
87 let include_paths = params
88 .include_paths
89 .unwrap_or(!matches!(output_profile, OutputProfile::TokenDense));
90
91 let offset = params.offset.unwrap_or(0) as usize;
92 let limit = params.limit.unwrap_or(100) as usize;
93 let filter = params.into_filter()?;
94 let mut response = state.list_workbooks(filter)?;
95 let total_count = response.workbooks.len();
96
97 if offset < total_count {
98 let end = (offset + limit).min(total_count);
99 response.workbooks = response
100 .workbooks
101 .into_iter()
102 .skip(offset)
103 .take(end - offset)
104 .collect();
105 } else {
106 response.workbooks.clear();
107 }
108
109 if !include_paths {
111 for wb in &mut response.workbooks {
112 wb.path = None;
113 wb.caps = None;
114 wb.client_path = None;
115 }
116 } else if !config.path_mappings.is_empty() {
117 for wb in &mut response.workbooks {
118 if let Some(p) = wb.path.as_ref() {
119 let abs = config.resolve_path(PathBuf::from(p));
120 wb.client_path = config
121 .map_path_for_client(&abs)
122 .map(|mapped| mapped.display().to_string());
123 }
124 }
125 }
126
127 response.next_offset = if offset + response.workbooks.len() < total_count {
129 Some((offset + response.workbooks.len()) as u32)
130 } else {
131 None
132 };
133
134 Ok(response)
135}
136
137pub async fn describe_workbook(
138 state: Arc<AppState>,
139 params: DescribeWorkbookParams,
140) -> Result<WorkbookDescription> {
141 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
142 let mut desc = workbook.describe();
143 let config = state.config();
144 if !config.path_mappings.is_empty() {
145 let internal = PathBuf::from(&desc.path);
146 desc.client_path = config
147 .map_path_for_client(&internal)
148 .map(|mapped| mapped.display().to_string());
149 }
150 Ok(desc)
151}
152
153#[derive(Debug, Deserialize, JsonSchema)]
154pub struct ListWorkbooksParams {
155 pub slug_prefix: Option<String>,
157 pub folder: Option<String>,
159 pub path_glob: Option<String>,
161 #[serde(default)]
163 pub limit: Option<u32>,
164 #[serde(default)]
166 pub offset: Option<u32>,
167 #[serde(default)]
169 pub include_paths: Option<bool>,
170}
171
172impl ListWorkbooksParams {
173 fn into_filter(self) -> Result<filters::WorkbookFilter> {
174 filters::WorkbookFilter::new(self.slug_prefix, self.folder, self.path_glob)
175 }
176}
177
178#[derive(Debug, Deserialize, JsonSchema)]
179pub struct DescribeWorkbookParams {
180 #[serde(alias = "workbook_id")]
181 pub workbook_or_fork_id: WorkbookId,
182}
183
184#[derive(Debug, Deserialize, JsonSchema)]
185pub struct ListSheetsParams {
186 #[serde(alias = "workbook_id")]
188 pub workbook_or_fork_id: WorkbookId,
189 #[serde(default)]
191 pub limit: Option<u32>,
192 #[serde(default)]
194 pub offset: Option<u32>,
195 #[serde(default)]
197 pub include_bounds: Option<bool>,
198}
199
200pub async fn list_sheets(
201 state: Arc<AppState>,
202 params: ListSheetsParams,
203) -> Result<SheetListResponse> {
204 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
205 let config = state.config();
206 let output_profile = config.output_profile();
207 let include_bounds = params
208 .include_bounds
209 .unwrap_or(!matches!(output_profile, OutputProfile::TokenDense));
210 let mut summaries = workbook.list_summaries(include_bounds)?;
211
212 let total_count = summaries.len();
213 let offset = params.offset.unwrap_or(0) as usize;
214 let limit = params.limit.unwrap_or(100) as usize;
215
216 if offset < total_count {
217 summaries = summaries.into_iter().skip(offset).take(limit).collect();
218 } else {
219 summaries.clear();
220 }
221
222 let next_offset = if offset + summaries.len() < total_count {
223 Some((offset + summaries.len()) as u32)
224 } else {
225 None
226 };
227
228 let response = SheetListResponse {
229 workbook_id: workbook.id.clone(),
230 workbook_short_id: workbook.short_id.clone(),
231 sheets: summaries,
232 next_offset,
233 };
234 Ok(response)
235}
236
237#[derive(Debug, Deserialize, JsonSchema)]
238pub struct SheetOverviewParams {
239 #[serde(alias = "workbook_id")]
241 pub workbook_or_fork_id: WorkbookId,
242 pub sheet_name: String,
244 #[serde(default)]
246 pub max_regions: Option<u32>,
247 #[serde(default)]
249 pub max_headers: Option<u32>,
250 #[serde(default)]
252 pub include_headers: Option<bool>,
253}
254
255#[derive(Debug, Deserialize, JsonSchema)]
256pub struct WorkbookSummaryParams {
257 #[serde(alias = "workbook_id")]
259 pub workbook_or_fork_id: WorkbookId,
260 #[serde(default)]
262 pub summary_only: Option<bool>,
263 #[serde(default)]
265 pub include_entry_points: Option<bool>,
266 #[serde(default)]
268 pub include_named_ranges: Option<bool>,
269}
270
271pub async fn workbook_summary(
272 state: Arc<AppState>,
273 params: WorkbookSummaryParams,
274) -> Result<WorkbookSummaryResponse> {
275 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
276 let config = state.config();
277 let output_profile = config.output_profile();
278 let summary_only = params
279 .summary_only
280 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
281 let include_entry_points = params.include_entry_points.unwrap_or(!summary_only);
282 let include_named_ranges = params.include_named_ranges.unwrap_or(!summary_only);
283
284 tokio::task::spawn_blocking(move || {
285 build_workbook_summary(workbook, include_entry_points, include_named_ranges)
286 })
287 .await?
288}
289
290fn build_workbook_summary(
291 workbook: Arc<WorkbookContext>,
292 include_entry_points: bool,
293 include_named_ranges: bool,
294) -> Result<WorkbookSummaryResponse> {
295 let sheet_names = workbook.sheet_names();
296
297 let mut total_cells: u64 = 0;
298 let mut total_formulas: u64 = 0;
299 let mut breakdown = WorkbookBreakdown::default();
300 let mut region_counts = RegionCountSummary::default();
301 let mut entry_points: Vec<EntryPoint> = Vec::new();
302 let mut key_named_ranges: Vec<NamedRangeDescriptor> = Vec::new();
303 let mut notes: Vec<String> = Vec::new();
304
305 for sheet_name in &sheet_names {
306 let entry = workbook.get_sheet_metrics_fast(sheet_name)?;
307 total_cells += (entry.metrics.row_count as u64) * (entry.metrics.column_count as u64);
308 total_formulas += entry.metrics.formula_cells as u64;
309
310 match entry.metrics.classification {
311 SheetClassification::Calculator => breakdown.calculator_sheets += 1,
312 SheetClassification::Metadata => breakdown.metadata_sheets += 1,
313 SheetClassification::Empty => {}
314 _ => breakdown.data_sheets += 1,
315 }
316
317 if entry.metrics.non_empty_cells == 0 {
318 continue;
319 }
320
321 match entry.metrics.classification {
322 SheetClassification::Calculator => region_counts.calculator += 1,
323 SheetClassification::Metadata => region_counts.metadata += 1,
324 SheetClassification::Empty => {}
325 _ => region_counts.data += 1,
326 }
327
328 if include_entry_points {
329 let priority = entry_point_priority(&entry.metrics.classification);
330 entry_points.push(EntryPoint {
331 sheet_name: sheet_name.clone(),
332 region_id: None,
333 bounds: entry_point_bounds(&entry.metrics),
334 rationale: format!(
335 "Fast summary p{}: {:?} sheet",
336 priority, entry.metrics.classification
337 ),
338 });
339 }
340 }
341
342 if include_entry_points {
343 entry_points.sort_by(|a, b| {
344 let pa = priority_from_rationale(&a.rationale);
345 let pb = priority_from_rationale(&b.rationale);
346 pa.cmp(&pb)
347 .then_with(|| {
348 a.bounds
349 .as_ref()
350 .map(|_| 1)
351 .cmp(&b.bounds.as_ref().map(|_| 1))
352 })
353 .then_with(|| a.sheet_name.cmp(&b.sheet_name))
354 });
355 entry_points.truncate(5);
356 }
357
358 if include_named_ranges {
359 let mut seen_ranges = std::collections::HashSet::new();
360 for item in workbook.named_items()? {
361 if item.kind != NamedItemKind::NamedRange && item.kind != NamedItemKind::Table {
362 continue;
363 }
364 if !seen_ranges.insert(item.refers_to.clone()) {
365 continue;
366 }
367 key_named_ranges.push(item);
368 if key_named_ranges.len() >= 10 {
369 break;
370 }
371 }
372 }
373
374 notes.push("Region counts and entry points are inferred from sheet metrics; use sheet_overview for full region detection.".to_string());
375
376 Ok(WorkbookSummaryResponse {
377 workbook_id: workbook.id.clone(),
378 workbook_short_id: workbook.short_id.clone(),
379 slug: workbook.slug.clone(),
380 sheet_count: sheet_names.len(),
381 total_cells,
382 total_formulas,
383 breakdown,
384 region_counts,
385 key_named_ranges,
386 suggested_entry_points: entry_points,
387 notes,
388 })
389}
390
391fn entry_point_priority(classification: &SheetClassification) -> u32 {
392 match classification {
393 SheetClassification::Data => 1,
394 SheetClassification::Mixed => 2,
395 SheetClassification::Calculator => 3,
396 SheetClassification::Metadata => 4,
397 SheetClassification::Empty => 5,
398 }
399}
400
401fn entry_point_bounds(metrics: &crate::workbook::SheetMetrics) -> Option<String> {
402 if metrics.row_count == 0 || metrics.column_count == 0 {
403 return None;
404 }
405 if metrics.row_count > ENTRY_POINT_MAX_ROWS || metrics.column_count > ENTRY_POINT_MAX_COLS {
406 return None;
407 }
408 let end_col = column_number_to_name(metrics.column_count.max(1));
409 let end_cell = format!("{}{}", end_col, metrics.row_count.max(1));
410 Some(format!("A1:{}", end_cell))
411}
412
413fn priority_from_rationale(rationale: &str) -> u32 {
414 if rationale.contains("p0") {
415 0
416 } else if rationale.contains("p1") {
417 1
418 } else if rationale.contains("p2") {
419 2
420 } else if rationale.contains("p3") {
421 3
422 } else if rationale.contains("p4") {
423 4
424 } else {
425 5
426 }
427}
428
429pub async fn sheet_overview(
430 state: Arc<AppState>,
431 params: SheetOverviewParams,
432) -> Result<SheetOverviewResponse> {
433 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
434 let sheet_name = params.sheet_name.clone();
435 let mut overview =
436 tokio::task::spawn_blocking(move || workbook.sheet_overview(&sheet_name)).await??;
437
438 let max_regions = params
439 .max_regions
440 .unwrap_or(DEFAULT_OVERVIEW_MAX_REGIONS)
441 .max(1);
442 let max_headers = params
443 .max_headers
444 .unwrap_or(DEFAULT_OVERVIEW_MAX_HEADERS)
445 .max(1);
446 let include_headers = params
447 .include_headers
448 .unwrap_or(DEFAULT_OVERVIEW_INCLUDE_HEADERS);
449
450 let region_limit = if params.max_regions == Some(0) {
451 usize::MAX
452 } else {
453 max_regions as usize
454 };
455 let header_limit = if params.max_headers == Some(0) {
456 usize::MAX
457 } else {
458 max_headers as usize
459 };
460
461 let total_regions = overview.detected_regions.len() as u32;
462 let mut headers_truncated = false;
463
464 for region in &mut overview.detected_regions {
465 let header_count = region.header_count.max(region.headers.len() as u32);
466 region.header_count = header_count;
467 if !include_headers {
468 region.headers.clear();
469 } else if region.headers.len() > header_limit {
470 region.headers.truncate(header_limit);
471 }
472 region.headers_truncated = region.headers.len() as u32 != header_count;
473 headers_truncated |= region.headers_truncated;
474 }
475
476 let regions_truncated = if overview.detected_regions.len() > region_limit {
477 overview.detected_regions.truncate(region_limit);
478 true
479 } else {
480 false
481 };
482
483 overview.detected_region_count = total_regions;
484 overview.detected_regions_truncated = regions_truncated;
485
486 if regions_truncated {
487 overview.notes.push(format!(
488 "Detected regions truncated to {} ({} total).",
489 region_limit, total_regions
490 ));
491 }
492 if headers_truncated {
493 overview.notes.push(format!(
494 "Region headers truncated to {} columns.",
495 header_limit
496 ));
497 }
498
499 Ok(overview)
500}
501
502fn default_start_row() -> u32 {
503 1
504}
505
506fn default_page_size() -> u32 {
507 50
508}
509
510fn default_include_formulas() -> bool {
511 true
512}
513
514fn default_include_header() -> bool {
515 true
516}
517
518#[derive(Debug, Deserialize, JsonSchema)]
519pub struct SheetPageParams {
520 #[serde(alias = "workbook_id")]
522 pub workbook_or_fork_id: WorkbookId,
523 pub sheet_name: String,
525 #[serde(default = "default_start_row")]
527 pub start_row: u32,
528 #[serde(default = "default_page_size")]
530 pub page_size: u32,
531 #[serde(default)]
533 pub columns: Option<Vec<String>>,
534 #[serde(default)]
536 pub columns_by_header: Option<Vec<String>>,
537 #[serde(default = "default_include_formulas")]
539 pub include_formulas: bool,
540 #[serde(default)]
542 pub include_styles: bool,
543 #[serde(default = "default_include_header")]
545 pub include_header: bool,
546 #[serde(default)]
548 pub format: Option<SheetPageFormat>,
549}
550
551impl Default for SheetPageParams {
552 fn default() -> Self {
553 SheetPageParams {
554 workbook_or_fork_id: WorkbookId(String::new()),
555 sheet_name: String::new(),
556 start_row: default_start_row(),
557 page_size: default_page_size(),
558 columns: None,
559 columns_by_header: None,
560 include_formulas: default_include_formulas(),
561 include_styles: false,
562 include_header: default_include_header(),
563 format: None,
564 }
565 }
566}
567
568fn default_find_limit() -> u32 {
569 50
570}
571
572#[derive(Debug, Deserialize, JsonSchema)]
573pub struct FindValueParams {
574 #[serde(alias = "workbook_id")]
576 pub workbook_or_fork_id: WorkbookId,
577 pub query: String,
579 #[serde(default)]
581 pub label: Option<String>,
582 #[serde(default)]
584 pub mode: Option<FindMode>,
585 #[serde(default)]
587 pub match_mode: Option<MatchMode>,
588 #[serde(default)]
590 pub case_sensitive: bool,
591 #[serde(default)]
593 pub sheet_name: Option<String>,
594 #[serde(default)]
596 pub region_id: Option<u32>,
597 #[serde(default)]
599 pub table_name: Option<String>,
600 #[serde(default)]
602 pub value_types: Option<Vec<ValueTypeFilter>>,
603 #[serde(default)]
605 pub search_headers_only: bool,
606 #[serde(default)]
608 pub direction: Option<LabelDirection>,
609 #[serde(default = "default_find_limit")]
611 pub limit: u32,
612 #[serde(default)]
614 pub offset: Option<u32>,
615 #[serde(default)]
617 pub context: Option<FindContext>,
618 #[serde(default)]
620 pub context_width: Option<u32>,
621}
622
623impl Default for FindValueParams {
624 fn default() -> Self {
625 Self {
626 workbook_or_fork_id: WorkbookId(String::new()),
627 query: String::new(),
628 label: None,
629 mode: None,
630 match_mode: None,
631 case_sensitive: false,
632 sheet_name: None,
633 region_id: None,
634 table_name: None,
635 value_types: None,
636 search_headers_only: false,
637 direction: None,
638 limit: default_find_limit(),
639 offset: None,
640 context: None,
641 context_width: None,
642 }
643 }
644}
645
646#[derive(Debug, Deserialize, JsonSchema, Default)]
647pub struct ReadTableParams {
648 #[serde(alias = "workbook_id")]
650 pub workbook_or_fork_id: WorkbookId,
651 #[serde(default)]
653 pub sheet_name: Option<String>,
654 #[serde(default)]
656 pub table_name: Option<String>,
657 #[serde(default)]
659 pub region_id: Option<u32>,
660 #[serde(default)]
662 pub range: Option<String>,
663 #[serde(default)]
665 pub header_row: Option<u32>,
666 #[serde(default)]
668 pub header_rows: Option<u32>,
669 #[serde(default)]
671 pub columns: Option<Vec<String>>,
672 #[serde(default)]
674 pub filters: Option<Vec<TableFilter>>,
675 #[serde(default)]
677 pub sample_mode: Option<SampleMode>,
678 #[serde(default)]
680 pub limit: Option<u32>,
681 #[serde(default)]
683 pub offset: Option<u32>,
684 #[serde(default)]
686 pub format: Option<TableOutputFormat>,
687 #[serde(default)]
689 pub include_headers: Option<bool>,
690 #[serde(default)]
692 pub include_types: Option<bool>,
693}
694
695#[derive(Debug, Deserialize, JsonSchema, Clone)]
696pub struct TableFilter {
697 pub column: String,
699 pub op: FilterOp,
701 pub value: serde_json::Value,
703}
704
705#[derive(Debug, Deserialize, JsonSchema, Default)]
706pub struct TableProfileParams {
707 #[serde(alias = "workbook_id")]
709 pub workbook_or_fork_id: WorkbookId,
710 #[serde(default)]
712 pub sheet_name: Option<String>,
713 #[serde(default)]
715 pub region_id: Option<u32>,
716 #[serde(default)]
718 pub table_name: Option<String>,
719 #[serde(default)]
721 pub sample_mode: Option<SampleMode>,
722 #[serde(default)]
724 pub sample_size: Option<u32>,
725 #[serde(default)]
727 pub summary_only: Option<bool>,
728}
729
730#[derive(Debug, Deserialize, JsonSchema)]
731pub struct RangeValuesParams {
732 #[serde(alias = "workbook_id")]
734 pub workbook_or_fork_id: WorkbookId,
735 pub sheet_name: String,
737 pub ranges: Vec<String>,
739 #[serde(default)]
741 pub include_headers: Option<bool>,
742 #[serde(default)]
744 pub format: Option<TableOutputFormat>,
745 #[serde(default)]
747 pub page_size: Option<u32>,
748}
749
750pub async fn sheet_page(
751 state: Arc<AppState>,
752 params: SheetPageParams,
753) -> Result<SheetPageResponse> {
754 if params.page_size == 0 {
755 return Err(anyhow!("page_size must be greater than zero"));
756 }
757
758 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
759 let metrics = workbook.get_sheet_metrics_fast(¶ms.sheet_name)?;
760 let config = state.config();
761 let output_profile = config.output_profile();
762 let format = params.format.unwrap_or(match output_profile {
763 OutputProfile::TokenDense => SheetPageFormat::Compact,
764 OutputProfile::Verbose => SheetPageFormat::Full,
765 });
766
767 let start_row = params.start_row.max(1);
768 let page_size = params.page_size.min(500);
769 let include_formulas =
770 if params.format.is_none() && matches!(output_profile, OutputProfile::TokenDense) {
771 false
772 } else {
773 params.include_formulas
774 };
775 let include_styles =
776 if params.format.is_none() && matches!(output_profile, OutputProfile::TokenDense) {
777 false
778 } else {
779 params.include_styles
780 };
781 let columns = params.columns.clone();
782 let columns_by_header = params.columns_by_header.clone();
783 let include_header = params.include_header;
784
785 let mut page = workbook.with_sheet(¶ms.sheet_name, |sheet| {
786 build_page(
787 sheet,
788 start_row,
789 page_size,
790 columns.clone(),
791 columns_by_header.clone(),
792 include_formulas,
793 include_styles,
794 include_header,
795 )
796 })?;
797
798 let max_cells = config.max_cells();
799 let max_payload_bytes = config.max_payload_bytes();
800 let cells_per_row = page.rows.first().map(|row| row.cells.len()).unwrap_or(0);
801 let mut row_limit = cap_rows_by_cells(page.rows.len(), cells_per_row, max_cells);
802
803 if row_limit > 0 {
804 row_limit = cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
805 let response = build_sheet_page_response(
806 &workbook,
807 ¶ms.sheet_name,
808 format,
809 include_header,
810 &page.header,
811 &page.rows[..count],
812 None,
813 );
814 serde_json::to_vec(&response)
815 .map(|payload| payload.len())
816 .unwrap_or(usize::MAX)
817 });
818 }
819
820 if row_limit < page.rows.len() {
821 page.rows.truncate(row_limit);
822 }
823
824 let last_row_index = page
825 .rows
826 .last()
827 .map(|row| row.row_index)
828 .unwrap_or(start_row.saturating_sub(1));
829 let next_start_row = if last_row_index < metrics.metrics.row_count {
830 Some(last_row_index + 1)
831 } else {
832 None
833 };
834
835 Ok(build_sheet_page_response(
836 &workbook,
837 ¶ms.sheet_name,
838 format,
839 include_header,
840 &page.header,
841 &page.rows,
842 next_start_row,
843 ))
844}
845
846#[derive(Debug, Deserialize, JsonSchema)]
847pub struct SheetFormulaMapParams {
848 #[serde(alias = "workbook_id")]
850 pub workbook_or_fork_id: WorkbookId,
851 pub sheet_name: String,
853 pub range: Option<String>,
855 #[serde(default)]
857 pub expand: bool,
858 #[serde(default)]
860 pub limit: Option<u32>,
861 #[serde(default)]
863 pub sort_by: Option<FormulaSortBy>,
864 #[serde(default)]
866 pub summary_only: Option<bool>,
867 #[serde(default)]
869 pub include_addresses: Option<bool>,
870 #[serde(default)]
872 pub addresses_limit: Option<u32>,
873}
874
875#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema)]
876#[serde(rename_all = "snake_case")]
877pub enum FormulaSortBy {
878 #[default]
879 Address,
880 Complexity,
881 Count,
882}
883
884#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
886#[serde(rename_all = "snake_case")]
887pub enum MatchMode {
888 #[default]
890 Contains,
891 Exact,
893 Prefix,
895 Regex,
897}
898
899#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
901#[serde(rename_all = "snake_case")]
902pub enum FindContext {
903 #[default]
905 None,
906 Neighbors,
908 Row,
910 Both,
912}
913
914#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
916#[serde(rename_all = "snake_case")]
917pub enum SampleMode {
918 #[default]
920 First,
921 Last,
923 Distributed,
925}
926
927#[derive(Debug, Clone, Copy, Default, Deserialize, JsonSchema, PartialEq, Eq)]
929#[serde(rename_all = "snake_case")]
930pub enum StyleGranularity {
931 #[default]
933 Runs,
934 Cells,
936}
937
938#[derive(Debug, Clone, Copy, Deserialize, JsonSchema, PartialEq, Eq)]
940#[serde(rename_all = "snake_case")]
941pub enum FilterOp {
942 Eq,
944 #[serde(alias = "ne")]
946 Neq,
947 Gt,
949 Lt,
951 Gte,
953 Lte,
955 Contains,
957 StartsWith,
959 EndsWith,
961 In,
963}
964
965#[derive(Debug, Clone, Copy, Deserialize, JsonSchema, PartialEq, Eq)]
967#[serde(rename_all = "snake_case")]
968pub enum ValueTypeFilter {
969 Text,
970 Number,
971 Bool,
972 Date,
973 Null,
974}
975
976pub async fn sheet_formula_map(
977 state: Arc<AppState>,
978 params: SheetFormulaMapParams,
979) -> Result<SheetFormulaMapResponse> {
980 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
981 let config = state.config();
982 let output_profile = config.output_profile();
983 let summary_only = params
984 .summary_only
985 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
986 let include_addresses = params.include_addresses.unwrap_or(!summary_only);
987 let addresses_limit = params.addresses_limit.unwrap_or(15);
988 let max_items = config.max_items();
989 let max_payload_bytes = config.max_payload_bytes();
990
991 let graph = workbook.formula_graph(¶ms.sheet_name)?;
992 let all_groups = graph.groups();
993 let mut groups = Vec::new();
994
995 for mut group in all_groups {
996 if let Some(range) = ¶ms.range {
997 group.addresses.retain(|addr| address_in_range(addr, range));
998 if group.addresses.is_empty() {
999 continue;
1000 }
1001 }
1002
1003 let address_count = group.addresses.len();
1004
1005 if summary_only || !include_addresses {
1006 group.addresses.clear();
1007 } else if !params.expand && address_count > addresses_limit as usize {
1008 group.addresses.truncate(addresses_limit as usize);
1009 }
1010
1011 groups.push(group);
1012 }
1013
1014 let total_groups = groups.len();
1015
1016 let sort_by = params.sort_by.unwrap_or_default();
1017 match sort_by {
1018 FormulaSortBy::Address => {
1019 groups.sort_by(|a, b| a.fingerprint.cmp(&b.fingerprint));
1020 }
1021 FormulaSortBy::Complexity => {
1022 groups.sort_by(|a, b| b.formula.len().cmp(&a.formula.len()));
1023 }
1024 FormulaSortBy::Count => {
1025 groups.sort_by(|a, b| {
1026 let count_a = a.count.unwrap_or(a.addresses.len() as u32);
1027 let count_b = b.count.unwrap_or(b.addresses.len() as u32);
1028 count_b.cmp(&count_a)
1029 });
1030 }
1031 }
1032
1033 if let Some(limit) = params.limit
1034 && groups.len() > limit as usize
1035 {
1036 groups.truncate(limit as usize);
1037 }
1038
1039 if let Some(max_items) = max_items
1040 && groups.len() > max_items
1041 {
1042 groups.truncate(max_items);
1043 }
1044
1045 if let Some(max_bytes) = max_payload_bytes {
1046 let group_limit = cap_rows_by_payload_bytes(groups.len(), Some(max_bytes), |count| {
1047 let response = SheetFormulaMapResponse {
1048 workbook_id: workbook.id.clone(),
1049 workbook_short_id: workbook.short_id.clone(),
1050 sheet_name: params.sheet_name.clone(),
1051 groups: groups[..count].to_vec(),
1052 next_offset: None,
1053 };
1054 serde_json::to_vec(&response)
1055 .map(|payload| payload.len())
1056 .unwrap_or(usize::MAX)
1057 });
1058
1059 if group_limit < groups.len() {
1060 groups.truncate(group_limit);
1061 }
1062 }
1063
1064 let next_offset = if groups.len() < total_groups {
1065 Some(groups.len() as u32)
1066 } else {
1067 None
1068 };
1069
1070 let response = SheetFormulaMapResponse {
1071 workbook_id: workbook.id.clone(),
1072 workbook_short_id: workbook.short_id.clone(),
1073 sheet_name: params.sheet_name.clone(),
1074 groups,
1075 next_offset,
1076 };
1077 Ok(response)
1078}
1079
1080#[derive(Debug, Deserialize, JsonSchema)]
1081pub struct FormulaTraceParams {
1082 #[serde(alias = "workbook_id")]
1083 pub workbook_or_fork_id: WorkbookId,
1084 pub sheet_name: String,
1085 pub cell_address: String,
1086 pub direction: TraceDirection,
1087 pub depth: Option<u32>,
1088 pub limit: Option<u32>,
1089 #[serde(default)]
1090 pub page_size: Option<usize>,
1091 #[serde(default)]
1092 pub cursor: Option<TraceCursor>,
1093}
1094
1095pub async fn formula_trace(
1096 state: Arc<AppState>,
1097 params: FormulaTraceParams,
1098) -> Result<FormulaTraceResponse> {
1099 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1100 let graph = workbook.formula_graph(¶ms.sheet_name)?;
1101 let formula_lookup = build_formula_lookup(&graph);
1102 let depth = params.depth.unwrap_or(3).clamp(1, 5);
1103 let page_size = params
1104 .page_size
1105 .or_else(|| params.limit.map(|v| v as usize))
1106 .unwrap_or(DEFAULT_TRACE_PAGE_SIZE)
1107 .clamp(TRACE_PAGE_MIN, TRACE_PAGE_MAX);
1108
1109 let origin = params.cell_address.to_uppercase();
1110 let config = TraceConfig {
1111 direction: ¶ms.direction,
1112 origin: &origin,
1113 sheet_name: ¶ms.sheet_name,
1114 depth_limit: depth,
1115 page_size,
1116 };
1117 let (layers, next_cursor, notes) = build_trace_layers(
1118 &workbook,
1119 &graph,
1120 &formula_lookup,
1121 &config,
1122 params.cursor.clone(),
1123 )?;
1124
1125 let response = FormulaTraceResponse {
1126 workbook_id: workbook.id.clone(),
1127 workbook_short_id: workbook.short_id.clone(),
1128 sheet_name: params.sheet_name.clone(),
1129 origin,
1130 direction: params.direction.clone(),
1131 layers,
1132 next_cursor,
1133 notes,
1134 };
1135 Ok(response)
1136}
1137
1138#[derive(Debug, Deserialize, JsonSchema)]
1139pub struct NamedRangesParams {
1140 #[serde(alias = "workbook_id")]
1141 pub workbook_or_fork_id: WorkbookId,
1142 pub sheet_name: Option<String>,
1143 pub name_prefix: Option<String>,
1144}
1145
1146pub async fn named_ranges(
1147 state: Arc<AppState>,
1148 params: NamedRangesParams,
1149) -> Result<NamedRangesResponse> {
1150 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1151 let mut items = workbook.named_items()?;
1152
1153 if let Some(sheet_filter) = ¶ms.sheet_name {
1154 items.retain(|item| {
1155 item.sheet_name
1156 .as_ref()
1157 .map(|name| name.eq_ignore_ascii_case(sheet_filter))
1158 .unwrap_or(false)
1159 });
1160 }
1161 if let Some(prefix) = ¶ms.name_prefix {
1162 let prefix_lower = prefix.to_ascii_lowercase();
1163 items.retain(|item| item.name.to_ascii_lowercase().starts_with(&prefix_lower));
1164 }
1165
1166 let response = NamedRangesResponse {
1167 workbook_id: workbook.id.clone(),
1168 workbook_short_id: workbook.short_id.clone(),
1169 items,
1170 };
1171 Ok(response)
1172}
1173
1174struct PageBuildResult {
1175 rows: Vec<RowSnapshot>,
1176 header: Option<RowSnapshot>,
1177}
1178
1179#[allow(clippy::too_many_arguments)]
1180fn build_page(
1181 sheet: &umya_spreadsheet::Worksheet,
1182 start_row: u32,
1183 page_size: u32,
1184 columns: Option<Vec<String>>,
1185 columns_by_header: Option<Vec<String>>,
1186 include_formulas: bool,
1187 include_styles: bool,
1188 include_header: bool,
1189) -> PageBuildResult {
1190 let max_col = sheet.get_highest_column();
1191 let end_row = (start_row + page_size - 1).min(sheet.get_highest_row().max(start_row));
1192 let column_indices =
1193 resolve_columns_with_headers(sheet, columns.as_ref(), columns_by_header.as_ref(), max_col);
1194
1195 let header = if include_header {
1196 Some(build_row_snapshot(
1197 sheet,
1198 1,
1199 &column_indices,
1200 include_formulas,
1201 include_styles,
1202 ))
1203 } else {
1204 None
1205 };
1206
1207 let mut rows = Vec::new();
1208 for row_idx in start_row..=end_row {
1209 rows.push(build_row_snapshot(
1210 sheet,
1211 row_idx,
1212 &column_indices,
1213 include_formulas,
1214 include_styles,
1215 ));
1216 }
1217
1218 PageBuildResult { rows, header }
1219}
1220
1221fn build_row_snapshot(
1222 sheet: &umya_spreadsheet::Worksheet,
1223 row_index: u32,
1224 columns: &[u32],
1225 include_formulas: bool,
1226 include_styles: bool,
1227) -> RowSnapshot {
1228 let mut cells = Vec::new();
1229 for &col in columns {
1230 if let Some(cell) = sheet.get_cell((col, row_index)) {
1231 cells.push(build_cell_snapshot(cell, include_formulas, include_styles));
1232 } else {
1233 let address = crate::utils::cell_address(col, row_index);
1234 cells.push(CellSnapshot {
1235 address,
1236 value: None,
1237 formula: None,
1238 cached_value: None,
1239 number_format: None,
1240 style_tags: Vec::new(),
1241 notes: Vec::new(),
1242 });
1243 }
1244 }
1245
1246 RowSnapshot { row_index, cells }
1247}
1248
1249fn build_cell_snapshot(
1250 cell: &umya_spreadsheet::Cell,
1251 include_formulas: bool,
1252 include_styles: bool,
1253) -> CellSnapshot {
1254 let address = cell.get_coordinate().get_coordinate();
1255 let value = crate::workbook::cell_to_value(cell);
1256 let formula = if include_formulas && cell.is_formula() {
1257 Some(cell.get_formula().to_string())
1258 } else {
1259 None
1260 };
1261 let cached_value = if cell.is_formula() {
1262 value.clone()
1263 } else {
1264 None
1265 };
1266 let number_format = if include_styles {
1267 cell.get_style()
1268 .get_number_format()
1269 .map(|fmt| fmt.get_format_code().to_string())
1270 } else {
1271 None
1272 };
1273 let style_tags = if include_styles {
1274 crate::analysis::style::tag_cell(cell)
1275 .map(|(_, tagging)| tagging.tags)
1276 .unwrap_or_default()
1277 } else {
1278 Vec::new()
1279 };
1280
1281 CellSnapshot {
1282 address,
1283 value,
1284 formula,
1285 cached_value,
1286 number_format,
1287 style_tags,
1288 notes: Vec::new(),
1289 }
1290}
1291
1292fn resolve_columns(columns: Option<&Vec<String>>, max_column: u32) -> Vec<u32> {
1293 use std::collections::BTreeSet;
1294 use umya_spreadsheet::helper::coordinate::column_index_from_string;
1295
1296 let mut indices = BTreeSet::new();
1297 if let Some(specs) = columns {
1298 for spec in specs {
1299 if let Some((start, end)) = spec.split_once(':') {
1300 let start_idx = column_index_from_string(start);
1301 let end_idx = column_index_from_string(end);
1302 let (min_idx, max_idx) = if start_idx <= end_idx {
1303 (start_idx, end_idx)
1304 } else {
1305 (end_idx, start_idx)
1306 };
1307 for idx in min_idx..=max_idx {
1308 indices.insert(idx);
1309 }
1310 } else {
1311 indices.insert(column_index_from_string(spec));
1312 }
1313 }
1314 } else {
1315 for idx in 1..=max_column.max(1) {
1316 indices.insert(idx);
1317 }
1318 }
1319
1320 indices.into_iter().collect()
1321}
1322
1323fn resolve_columns_with_headers(
1324 sheet: &umya_spreadsheet::Worksheet,
1325 columns: Option<&Vec<String>>,
1326 columns_by_header: Option<&Vec<String>>,
1327 max_column: u32,
1328) -> Vec<u32> {
1329 if columns_by_header.is_none() {
1330 return resolve_columns(columns, max_column);
1331 }
1332
1333 let mut selected = Vec::new();
1334 let header_targets: Vec<String> = columns_by_header
1335 .unwrap()
1336 .iter()
1337 .map(|h| h.trim().to_ascii_lowercase())
1338 .collect();
1339
1340 for col_idx in 1..=max_column.max(1) {
1341 let header_cell = sheet.get_cell((col_idx, 1u32));
1342 let header_value = header_cell
1343 .and_then(cell_to_value)
1344 .map(cell_value_to_string_lower);
1345 if let Some(hval) = header_value
1346 && header_targets.iter().any(|target| target == &hval)
1347 {
1348 selected.push(col_idx);
1349 }
1350 }
1351
1352 if selected.is_empty() {
1353 resolve_columns(columns, max_column)
1354 } else {
1355 selected
1356 }
1357}
1358
1359fn cell_value_to_string_lower(value: CellValue) -> String {
1360 match value {
1361 CellValue::Text(s) => s.to_ascii_lowercase(),
1362 CellValue::Number(n) => n.to_string().to_ascii_lowercase(),
1363 CellValue::Bool(b) => b.to_string(),
1364 CellValue::Error(e) => e.to_ascii_lowercase(),
1365 CellValue::Date(d) => d.to_ascii_lowercase(),
1366 }
1367}
1368
1369fn cell_value_to_plain_string(value: &CellValue) -> String {
1370 match value {
1371 CellValue::Text(s) => s.clone(),
1372 CellValue::Number(n) => n.to_string(),
1373 CellValue::Bool(b) => b.to_string(),
1374 CellValue::Error(e) => e.clone(),
1375 CellValue::Date(d) => d.clone(),
1376 }
1377}
1378
1379fn cell_value_to_kind(value: &CellValue) -> CellValueKind {
1380 match value {
1381 CellValue::Text(_) => CellValueKind::Text,
1382 CellValue::Number(_) => CellValueKind::Number,
1383 CellValue::Bool(_) => CellValueKind::Bool,
1384 CellValue::Error(_) => CellValueKind::Error,
1385 CellValue::Date(_) => CellValueKind::Date,
1386 }
1387}
1388
1389fn cell_value_to_primitive(value: &CellValue) -> CellValuePrimitive {
1390 match value {
1391 CellValue::Text(s) => CellValuePrimitive::Text(s.clone()),
1392 CellValue::Number(n) => CellValuePrimitive::Number(*n),
1393 CellValue::Bool(b) => CellValuePrimitive::Bool(*b),
1394 CellValue::Error(e) => CellValuePrimitive::Text(e.clone()),
1395 CellValue::Date(d) => CellValuePrimitive::Text(d.clone()),
1396 }
1397}
1398
1399fn csv_escape_field(field: &str) -> String {
1400 if field.contains(',') || field.contains('"') || field.contains('\n') || field.contains('\r') {
1401 let mut escaped = String::with_capacity(field.len() + 2);
1402 escaped.push('"');
1403 for ch in field.chars() {
1404 if ch == '"' {
1405 escaped.push('"');
1406 }
1407 escaped.push(ch);
1408 }
1409 escaped.push('"');
1410 escaped
1411 } else {
1412 field.to_string()
1413 }
1414}
1415
1416fn push_csv_row<I>(buffer: &mut String, fields: I)
1417where
1418 I: IntoIterator<Item = String>,
1419{
1420 let mut first = true;
1421 for field in fields {
1422 if !first {
1423 buffer.push(',');
1424 }
1425 first = false;
1426 let escaped = csv_escape_field(&field);
1427 buffer.push_str(&escaped);
1428 }
1429 buffer.push('\n');
1430}
1431
1432fn table_rows_to_values(
1433 headers: &[String],
1434 rows: &[TableRow],
1435) -> Vec<Vec<Option<CellValuePrimitive>>> {
1436 let mut out = Vec::with_capacity(rows.len());
1437 for row in rows {
1438 let mut vals = Vec::with_capacity(headers.len());
1439 for header in headers {
1440 let value = row
1441 .get(header)
1442 .and_then(|cell| cell.as_ref())
1443 .map(cell_value_to_primitive);
1444 vals.push(value);
1445 }
1446 out.push(vals);
1447 }
1448 out
1449}
1450
1451fn table_rows_to_types(headers: &[String], rows: &[TableRow]) -> Vec<Vec<Option<CellValueKind>>> {
1452 let mut out = Vec::with_capacity(rows.len());
1453 for row in rows {
1454 let mut kinds = Vec::with_capacity(headers.len());
1455 for header in headers {
1456 let kind = row
1457 .get(header)
1458 .and_then(|cell| cell.as_ref())
1459 .map(cell_value_to_kind);
1460 kinds.push(kind);
1461 }
1462 out.push(kinds);
1463 }
1464 out
1465}
1466
1467fn table_rows_to_csv(headers: &[String], rows: &[TableRow], include_headers: bool) -> String {
1468 let mut csv = String::new();
1469 if include_headers {
1470 push_csv_row(&mut csv, headers.iter().cloned());
1471 }
1472 for row in rows {
1473 let values = headers.iter().map(|header| {
1474 row.get(header)
1475 .and_then(|cell| cell.as_ref())
1476 .map(cell_value_to_plain_string)
1477 .unwrap_or_default()
1478 });
1479 push_csv_row(&mut csv, values);
1480 }
1481 csv
1482}
1483
1484fn filter_table_row(row: &TableRow, headers: &[String]) -> TableRow {
1485 let mut filtered = TableRow::new();
1486 for header in headers {
1487 if let Some(value) = row.get(header) {
1488 filtered.insert(header.clone(), value.clone());
1489 }
1490 }
1491 filtered
1492}
1493
1494type ReadTablePayload = (
1495 Vec<String>,
1496 Vec<TableRow>,
1497 Option<Vec<Vec<Option<CellValuePrimitive>>>>,
1498 Option<Vec<Vec<Option<CellValueKind>>>>,
1499 Option<String>,
1500);
1501
1502fn build_read_table_payload(
1503 format: TableOutputFormat,
1504 headers: &[String],
1505 rows: &[TableRow],
1506 include_headers: bool,
1507 include_types: bool,
1508) -> ReadTablePayload {
1509 let headers_out = if include_headers {
1510 headers.to_vec()
1511 } else {
1512 Vec::new()
1513 };
1514
1515 let types_out = if include_types {
1516 Some(table_rows_to_types(headers, rows))
1517 } else {
1518 None
1519 };
1520
1521 match format {
1522 TableOutputFormat::Json => (headers_out, rows.to_vec(), None, types_out, None),
1523 TableOutputFormat::Values => (
1524 headers_out,
1525 Vec::new(),
1526 Some(table_rows_to_values(headers, rows)),
1527 types_out,
1528 None,
1529 ),
1530 TableOutputFormat::Csv => (
1531 Vec::new(),
1532 Vec::new(),
1533 None,
1534 types_out,
1535 Some(table_rows_to_csv(headers, rows, include_headers)),
1536 ),
1537 }
1538}
1539
1540fn cell_matrix_to_values(rows: &[Vec<Option<CellValue>>]) -> Vec<Vec<Option<CellValuePrimitive>>> {
1541 let mut out = Vec::with_capacity(rows.len());
1542 for row in rows {
1543 let mut vals = Vec::with_capacity(row.len());
1544 for cell in row {
1545 vals.push(cell.as_ref().map(cell_value_to_primitive));
1546 }
1547 out.push(vals);
1548 }
1549 out
1550}
1551
1552fn cell_matrix_to_csv(rows: &[Vec<Option<CellValue>>]) -> String {
1553 let mut csv = String::new();
1554 for row in rows {
1555 let values = row.iter().map(|cell| {
1556 cell.as_ref()
1557 .map(cell_value_to_plain_string)
1558 .unwrap_or_default()
1559 });
1560 push_csv_row(&mut csv, values);
1561 }
1562 csv
1563}
1564
1565fn build_range_values_entry(
1566 format: TableOutputFormat,
1567 range: &str,
1568 rows: &[Vec<Option<CellValue>>],
1569 next_start_row: Option<u32>,
1570) -> RangeValuesEntry {
1571 match format {
1572 TableOutputFormat::Json => RangeValuesEntry {
1573 range: range.to_string(),
1574 rows: Some(rows.to_vec()),
1575 values: None,
1576 csv: None,
1577 next_start_row,
1578 },
1579 TableOutputFormat::Values => RangeValuesEntry {
1580 range: range.to_string(),
1581 rows: None,
1582 values: Some(cell_matrix_to_values(rows)),
1583 csv: None,
1584 next_start_row,
1585 },
1586 TableOutputFormat::Csv => RangeValuesEntry {
1587 range: range.to_string(),
1588 rows: None,
1589 values: None,
1590 csv: Some(cell_matrix_to_csv(rows)),
1591 next_start_row,
1592 },
1593 }
1594}
1595
1596fn cap_rows_by_cells(row_count: usize, cells_per_row: usize, max_cells: Option<usize>) -> usize {
1597 let Some(max_cells) = max_cells else {
1598 return row_count;
1599 };
1600 if cells_per_row == 0 {
1601 return row_count;
1602 }
1603 let allowed = max_cells / cells_per_row;
1604 row_count.min(allowed)
1605}
1606
1607fn cap_rows_by_payload_bytes<F>(
1608 row_count: usize,
1609 max_bytes: Option<usize>,
1610 mut size_for_rows: F,
1611) -> usize
1612where
1613 F: FnMut(usize) -> usize,
1614{
1615 let Some(max_bytes) = max_bytes else {
1616 return row_count;
1617 };
1618 if row_count == 0 {
1619 return 0;
1620 }
1621 let mut low = 0usize;
1622 let mut high = row_count;
1623 while low < high {
1624 let mid = (low + high).div_ceil(2);
1625 if size_for_rows(mid) <= max_bytes {
1626 low = mid;
1627 } else {
1628 high = mid - 1;
1629 }
1630 }
1631 low
1632}
1633
1634fn build_compact_payload(
1635 header: &Option<RowSnapshot>,
1636 rows: &[RowSnapshot],
1637 include_header: bool,
1638) -> SheetPageCompact {
1639 let headers = derive_headers(header, rows);
1640 let header_row = if include_header {
1641 header
1642 .as_ref()
1643 .map(|h| h.cells.iter().map(|c| c.value.clone()).collect())
1644 .unwrap_or_default()
1645 } else {
1646 Vec::new()
1647 };
1648 let data_rows = rows
1649 .iter()
1650 .map(|row| {
1651 let mut vals: Vec<Option<CellValue>> = Vec::new();
1652 vals.push(Some(CellValue::Number(row.row_index as f64)));
1653 vals.extend(row.cells.iter().map(|c| c.value.clone()));
1654 vals
1655 })
1656 .collect();
1657
1658 SheetPageCompact {
1659 headers,
1660 header_row,
1661 rows: data_rows,
1662 }
1663}
1664
1665fn build_values_only_payload(
1666 header: &Option<RowSnapshot>,
1667 rows: &[RowSnapshot],
1668 include_header: bool,
1669) -> SheetPageValues {
1670 let mut data = Vec::new();
1671 if include_header && let Some(h) = header {
1672 data.push(h.cells.iter().map(|c| c.value.clone()).collect());
1673 }
1674 for row in rows {
1675 data.push(row.cells.iter().map(|c| c.value.clone()).collect());
1676 }
1677
1678 SheetPageValues { rows: data }
1679}
1680
1681fn build_sheet_page_response(
1682 workbook: &WorkbookContext,
1683 sheet_name: &str,
1684 format: SheetPageFormat,
1685 include_header: bool,
1686 header: &Option<RowSnapshot>,
1687 rows: &[RowSnapshot],
1688 next_start_row: Option<u32>,
1689) -> SheetPageResponse {
1690 let compact_payload = if matches!(format, SheetPageFormat::Compact) {
1691 Some(build_compact_payload(header, rows, include_header))
1692 } else {
1693 None
1694 };
1695
1696 let values_only_payload = if matches!(format, SheetPageFormat::ValuesOnly) {
1697 Some(build_values_only_payload(header, rows, include_header))
1698 } else {
1699 None
1700 };
1701
1702 let rows_payload = if matches!(format, SheetPageFormat::Full) {
1703 rows.to_vec()
1704 } else {
1705 Vec::new()
1706 };
1707
1708 let header_row = if include_header && matches!(format, SheetPageFormat::Full) {
1709 header.clone()
1710 } else {
1711 None
1712 };
1713
1714 SheetPageResponse {
1715 workbook_id: workbook.id.clone(),
1716 workbook_short_id: workbook.short_id.clone(),
1717 sheet_name: sheet_name.to_string(),
1718 rows: rows_payload,
1719 next_start_row,
1720 header_row,
1721 compact: compact_payload,
1722 values_only: values_only_payload,
1723 format,
1724 }
1725}
1726
1727fn derive_headers(header: &Option<RowSnapshot>, rows: &[RowSnapshot]) -> Vec<String> {
1728 if let Some(h) = header {
1729 let mut headers: Vec<String> = h
1730 .cells
1731 .iter()
1732 .map(|c| match &c.value {
1733 Some(CellValue::Text(t)) => t.clone(),
1734 Some(CellValue::Number(n)) => n.to_string(),
1735 Some(CellValue::Bool(b)) => b.to_string(),
1736 Some(CellValue::Date(d)) => d.clone(),
1737 Some(CellValue::Error(e)) => e.clone(),
1738 None => c.address.clone(),
1739 })
1740 .collect();
1741 headers.insert(0, "Row".to_string());
1742 headers
1743 } else if let Some(first) = rows.first() {
1744 let mut headers = Vec::new();
1745 headers.push("Row".to_string());
1746 for cell in &first.cells {
1747 headers.push(cell.address.clone());
1748 }
1749 headers
1750 } else {
1751 vec![]
1752 }
1753}
1754fn default_stats_sample() -> usize {
1755 500
1756}
1757
1758#[derive(Debug, Deserialize, JsonSchema)]
1759pub struct SheetStatisticsParams {
1760 #[serde(alias = "workbook_id")]
1762 pub workbook_or_fork_id: WorkbookId,
1763 pub sheet_name: String,
1765 #[serde(default)]
1767 pub sample_rows: Option<usize>,
1768 #[serde(default)]
1770 pub summary_only: Option<bool>,
1771}
1772
1773pub async fn sheet_statistics(
1774 state: Arc<AppState>,
1775 params: SheetStatisticsParams,
1776) -> Result<SheetStatisticsResponse> {
1777 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
1778 let config = state.config();
1779 let output_profile = config.output_profile();
1780 let summary_only = params
1781 .summary_only
1782 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
1783 let sheet_metrics = workbook.get_sheet_metrics_fast(¶ms.sheet_name)?;
1784 let sample_rows = params.sample_rows.unwrap_or_else(default_stats_sample);
1785 let stats = workbook.with_sheet(¶ms.sheet_name, |sheet| {
1786 stats::compute_sheet_statistics(sheet, sample_rows)
1787 })?;
1788 let mut numeric_columns = stats.numeric_columns;
1789 let mut text_columns = stats.text_columns;
1790
1791 if summary_only {
1792 for column in &mut numeric_columns {
1793 column.samples.clear();
1794 }
1795 for column in &mut text_columns {
1796 column.samples.clear();
1797 }
1798 }
1799
1800 let max_items = config.max_items();
1801 let max_payload_bytes = config.max_payload_bytes();
1802
1803 if let Some(max_items) = max_items {
1804 if numeric_columns.len() > max_items {
1805 numeric_columns.truncate(max_items);
1806 }
1807 if text_columns.len() > max_items {
1808 text_columns.truncate(max_items);
1809 }
1810 }
1811
1812 if let Some(max_bytes) = max_payload_bytes {
1813 let response_size = |numeric: &Vec<ColumnSummary>, text: &Vec<ColumnSummary>| {
1814 let response = SheetStatisticsResponse {
1815 workbook_id: workbook.id.clone(),
1816 workbook_short_id: workbook.short_id.clone(),
1817 sheet_name: params.sheet_name.clone(),
1818 row_count: sheet_metrics.metrics.row_count,
1819 column_count: sheet_metrics.metrics.column_count,
1820 density: stats.density,
1821 numeric_columns: numeric.clone(),
1822 text_columns: text.clone(),
1823 null_counts: stats.null_counts.clone(),
1824 duplicate_warnings: stats.duplicate_warnings.clone(),
1825 };
1826 serde_json::to_vec(&response)
1827 .map(|payload| payload.len())
1828 .unwrap_or(usize::MAX)
1829 };
1830
1831 let mut current_size = response_size(&numeric_columns, &text_columns);
1832 if current_size > max_bytes && !summary_only {
1833 for column in &mut numeric_columns {
1834 column.samples.clear();
1835 }
1836 for column in &mut text_columns {
1837 column.samples.clear();
1838 }
1839 current_size = response_size(&numeric_columns, &text_columns);
1840 }
1841
1842 while current_size > max_bytes && (!text_columns.is_empty() || !numeric_columns.is_empty())
1843 {
1844 if !text_columns.is_empty() {
1845 text_columns.pop();
1846 } else if !numeric_columns.is_empty() {
1847 numeric_columns.pop();
1848 }
1849 current_size = response_size(&numeric_columns, &text_columns);
1850 }
1851 }
1852
1853 Ok(SheetStatisticsResponse {
1854 workbook_id: workbook.id.clone(),
1855 workbook_short_id: workbook.short_id.clone(),
1856 sheet_name: params.sheet_name,
1857 row_count: sheet_metrics.metrics.row_count,
1858 column_count: sheet_metrics.metrics.column_count,
1859 density: stats.density,
1860 numeric_columns,
1861 text_columns,
1862 null_counts: stats.null_counts,
1863 duplicate_warnings: stats.duplicate_warnings,
1864 })
1865}
1866
1867fn address_in_range(address: &str, range: &str) -> bool {
1868 parse_range(range).is_none_or(|((start_col, start_row), (end_col, end_row))| {
1869 if let Some((col, row)) = parse_address(address) {
1870 col >= start_col && col <= end_col && row >= start_row && row <= end_row
1871 } else {
1872 false
1873 }
1874 })
1875}
1876
1877fn parse_range(range: &str) -> Option<((u32, u32), (u32, u32))> {
1878 let mut parts = range.split(':');
1879 let start = parts.next()?;
1880 let end = parts.next().unwrap_or(start);
1881 let start_idx = parse_address(start)?;
1882 let end_idx = parse_address(end)?;
1883 Some((
1884 (start_idx.0.min(end_idx.0), start_idx.1.min(end_idx.1)),
1885 (start_idx.0.max(end_idx.0), start_idx.1.max(end_idx.1)),
1886 ))
1887}
1888
1889fn parse_address(address: &str) -> Option<(u32, u32)> {
1890 use umya_spreadsheet::helper::coordinate::index_from_coordinate;
1891 let (col, row, _, _) = index_from_coordinate(address);
1892 match (col, row) {
1893 (Some(c), Some(r)) => Some((c, r)),
1894 _ => None,
1895 }
1896}
1897
1898#[derive(Clone)]
1899struct TableTarget {
1900 sheet_name: String,
1901 table_name: Option<String>,
1902 range: ((u32, u32), (u32, u32)),
1903 header_hint: Option<u32>,
1904}
1905
1906fn resolve_table_target(
1907 workbook: &WorkbookContext,
1908 params: &ReadTableParams,
1909) -> Result<TableTarget> {
1910 if let Some(region_id) = params.region_id
1911 && let Some(sheet) = ¶ms.sheet_name
1912 && let Ok(region) = workbook.detected_region(sheet, region_id)
1913 {
1914 return Ok(TableTarget {
1915 sheet_name: sheet.clone(),
1916 table_name: None,
1917 range: parse_range(®ion.bounds).unwrap_or(((1, 1), (1, 1))),
1918 header_hint: region.header_row,
1919 });
1920 }
1921
1922 if let Some(table_name) = ¶ms.table_name {
1923 let items = workbook.named_items()?;
1924 for item in items {
1925 if item.name.eq_ignore_ascii_case(table_name)
1926 || item
1927 .name
1928 .to_ascii_lowercase()
1929 .contains(&table_name.to_ascii_lowercase())
1930 {
1931 let mut sheet_name = item
1932 .sheet_name
1933 .clone()
1934 .or_else(|| params.sheet_name.clone())
1935 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1936 let refers_to = item.refers_to.trim_start_matches('=');
1937 let mut range_part = refers_to;
1938 if let Some((sheet_part, rest)) = refers_to.split_once('!') {
1939 sheet_name = sheet_part.trim_matches('\'').to_string();
1940 range_part = rest;
1941 }
1942 if let Some(range) = parse_range(range_part) {
1943 return Ok(TableTarget {
1944 sheet_name,
1945 table_name: Some(item.name.clone()),
1946 range,
1947 header_hint: if item.kind == NamedItemKind::Table {
1948 Some(range.0.1)
1949 } else {
1950 None
1951 },
1952 });
1953 }
1954 }
1955 }
1956 }
1957
1958 let sheet_name = params
1959 .sheet_name
1960 .clone()
1961 .unwrap_or_else(|| workbook.sheet_names().first().cloned().unwrap_or_default());
1962
1963 if let Some(rng) = ¶ms.range
1964 && let Some(range) = parse_range(rng)
1965 {
1966 return Ok(TableTarget {
1967 sheet_name,
1968 table_name: None,
1969 range,
1970 header_hint: None,
1971 });
1972 }
1973
1974 let metrics = workbook.get_sheet_metrics_fast(&sheet_name)?;
1975 let end_col = metrics.metrics.column_count.max(1);
1976 let end_row = metrics.metrics.row_count.max(1);
1977 Ok(TableTarget {
1978 sheet_name,
1979 table_name: None,
1980 range: ((1, 1), (end_col, end_row)),
1981 header_hint: None,
1982 })
1983}
1984
1985#[allow(clippy::too_many_arguments)]
1986fn extract_table_rows(
1987 sheet: &umya_spreadsheet::Worksheet,
1988 target: &TableTarget,
1989 header_row: Option<u32>,
1990 header_rows: Option<u32>,
1991 columns: Option<Vec<String>>,
1992 filters: Option<Vec<TableFilter>>,
1993 limit: usize,
1994 offset: usize,
1995 sample_mode: SampleMode,
1996) -> Result<(Vec<String>, Vec<TableRow>, u32)> {
1997 let ((start_col, start_row), (end_col, end_row)) = target.range;
1998 let mut header_start = header_row.or(target.header_hint).unwrap_or(start_row);
1999 if header_start < start_row {
2000 header_start = start_row;
2001 }
2002 if header_start > end_row {
2003 header_start = start_row;
2004 }
2005 let header_rows_count = header_rows.unwrap_or(1).max(1);
2006 let data_start_row = (header_start + header_rows_count).max(start_row + header_rows_count);
2007 let column_indices: Vec<u32> = if let Some(cols) = columns.as_ref() {
2008 resolve_columns(Some(cols), end_col).into_iter().collect()
2009 } else {
2010 (start_col..=end_col).collect()
2011 };
2012
2013 let headers = build_headers(sheet, &column_indices, header_start, header_rows_count);
2014 let mut all_rows: Vec<TableRow> = Vec::new();
2015 let mut total_rows: u32 = 0;
2016
2017 for row_idx in data_start_row..=end_row {
2018 let mut row = BTreeMap::new();
2019 for (i, col_idx) in column_indices.iter().enumerate() {
2020 let header = headers
2021 .get(i)
2022 .cloned()
2023 .unwrap_or_else(|| format!("Col{col_idx}"));
2024 let value = sheet.get_cell((*col_idx, row_idx)).and_then(cell_to_value);
2025 row.insert(header, value);
2026 }
2027 if !row_passes_filters(&row, filters.as_ref()) {
2028 continue;
2029 }
2030 total_rows += 1;
2031 if matches!(sample_mode, SampleMode::First) && total_rows as usize > offset + limit {
2032 continue;
2033 }
2034 all_rows.push(row);
2035 }
2036
2037 let rows = sample_rows(all_rows, limit, offset, sample_mode);
2038
2039 Ok((headers, rows, total_rows))
2040}
2041
2042fn build_headers(
2043 sheet: &umya_spreadsheet::Worksheet,
2044 columns: &[u32],
2045 header_start: u32,
2046 header_rows: u32,
2047) -> Vec<String> {
2048 let mut headers = Vec::new();
2049 for col_idx in columns {
2050 let mut parts = Vec::new();
2051 for h in header_start..(header_start + header_rows) {
2052 let (origin_col, origin_row) = sheet.map_merged_cell((*col_idx, h));
2053 if let Some(value) = sheet
2054 .get_cell((origin_col, origin_row))
2055 .and_then(cell_to_value)
2056 {
2057 match value {
2058 CellValue::Text(ref s) if s.trim().is_empty() => {}
2059 CellValue::Text(s) => parts.push(s),
2060 CellValue::Number(n) => parts.push(n.to_string()),
2061 CellValue::Bool(b) => parts.push(b.to_string()),
2062 CellValue::Error(e) => parts.push(e),
2063 CellValue::Date(d) => parts.push(d),
2064 }
2065 }
2066 }
2067 if parts.is_empty() {
2068 headers.push(crate::utils::column_number_to_name(*col_idx));
2069 } else {
2070 headers.push(parts.join(" / "));
2071 }
2072 }
2073
2074 if headers.iter().all(|h| h.trim().is_empty()) {
2075 return columns
2076 .iter()
2077 .map(|c| crate::utils::column_number_to_name(*c))
2078 .collect();
2079 }
2080
2081 dedupe_headers(headers)
2082}
2083
2084fn dedupe_headers(mut headers: Vec<String>) -> Vec<String> {
2085 let mut seen: HashMap<String, u32> = HashMap::new();
2086 for h in headers.iter_mut() {
2087 let key = h.clone();
2088 if key.trim().is_empty() {
2089 continue;
2090 }
2091 let count = seen.entry(key.clone()).or_insert(0);
2092 if *count > 0 {
2093 h.push_str(&format!("_{}", *count + 1));
2094 }
2095 *count += 1;
2096 }
2097 headers
2098}
2099
2100fn row_passes_filters(row: &TableRow, filters: Option<&Vec<TableFilter>>) -> bool {
2101 if let Some(filters) = filters {
2102 for filter in filters {
2103 if let Some(value) = row.get(&filter.column) {
2104 match filter.op {
2105 FilterOp::Eq => {
2106 if !value_eq(value, &filter.value) {
2107 return false;
2108 }
2109 }
2110 FilterOp::Neq => {
2111 if value_eq(value, &filter.value) {
2112 return false;
2113 }
2114 }
2115 FilterOp::Contains => {
2116 if !value_contains(value, &filter.value) {
2117 return false;
2118 }
2119 }
2120 FilterOp::Gt => {
2121 if !value_gt(value, &filter.value) {
2122 return false;
2123 }
2124 }
2125 FilterOp::Lt => {
2126 if !value_lt(value, &filter.value) {
2127 return false;
2128 }
2129 }
2130 FilterOp::Gte => {
2131 if !value_gte(value, &filter.value) {
2132 return false;
2133 }
2134 }
2135 FilterOp::Lte => {
2136 if !value_lte(value, &filter.value) {
2137 return false;
2138 }
2139 }
2140 FilterOp::StartsWith => {
2141 if !value_starts_with(value, &filter.value) {
2142 return false;
2143 }
2144 }
2145 FilterOp::EndsWith => {
2146 if !value_ends_with(value, &filter.value) {
2147 return false;
2148 }
2149 }
2150 FilterOp::In => {
2151 let list = filter
2152 .value
2153 .as_array()
2154 .cloned()
2155 .unwrap_or_else(|| vec![filter.value.clone()]);
2156 if !list.iter().any(|cmp| value_eq(value, cmp)) {
2157 return false;
2158 }
2159 }
2160 }
2161 }
2162 }
2163 }
2164 true
2165}
2166
2167fn value_eq(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2168 match (cell, cmp) {
2169 (Some(CellValue::Text(s)), serde_json::Value::String(t)) => s == t,
2170 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2171 v.as_f64().is_some_and(|f| (*n - f).abs() < f64::EPSILON)
2172 }
2173 (Some(CellValue::Number(n)), serde_json::Value::String(t)) => t
2174 .parse::<f64>()
2175 .map(|f| (*n - f).abs() < f64::EPSILON)
2176 .unwrap_or(false),
2177 (Some(CellValue::Bool(b)), serde_json::Value::Bool(v)) => b == v,
2178 (Some(CellValue::Bool(b)), serde_json::Value::String(t)) => {
2179 t.eq_ignore_ascii_case("true") == *b
2180 }
2181 (Some(CellValue::Date(d)), serde_json::Value::String(t)) => d == t,
2182 (None, serde_json::Value::Null) => true,
2183 _ => false,
2184 }
2185}
2186
2187fn value_contains(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2188 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2189 return s.to_ascii_lowercase().contains(&t.to_ascii_lowercase());
2190 }
2191 false
2192}
2193
2194fn value_gt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2195 match (cell, cmp) {
2196 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2197 v.as_f64().is_some_and(|f| *n > f)
2198 }
2199 _ => false,
2200 }
2201}
2202
2203fn value_lt(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2204 match (cell, cmp) {
2205 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2206 v.as_f64().is_some_and(|f| *n < f)
2207 }
2208 _ => false,
2209 }
2210}
2211
2212fn value_gte(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2213 match (cell, cmp) {
2214 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2215 v.as_f64().is_some_and(|f| *n >= f)
2216 }
2217 _ => false,
2218 }
2219}
2220
2221fn value_lte(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2222 match (cell, cmp) {
2223 (Some(CellValue::Number(n)), serde_json::Value::Number(v)) => {
2224 v.as_f64().is_some_and(|f| *n <= f)
2225 }
2226 _ => false,
2227 }
2228}
2229
2230fn value_starts_with(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2231 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2232 return s.to_ascii_lowercase().starts_with(&t.to_ascii_lowercase());
2233 }
2234 false
2235}
2236
2237fn value_ends_with(cell: &Option<CellValue>, cmp: &serde_json::Value) -> bool {
2238 if let (Some(CellValue::Text(s)), serde_json::Value::String(t)) = (cell, cmp) {
2239 return s.to_ascii_lowercase().ends_with(&t.to_ascii_lowercase());
2240 }
2241 false
2242}
2243
2244fn sample_rows(
2245 rows: Vec<TableRow>,
2246 limit: usize,
2247 offset: usize,
2248 mode: SampleMode,
2249) -> Vec<TableRow> {
2250 if rows.is_empty() {
2251 return rows;
2252 }
2253
2254 match mode {
2255 SampleMode::Distributed => {
2256 if limit == 0 {
2257 return Vec::new();
2258 }
2259 let mut indices = Vec::new();
2260 let span = rows.len().saturating_sub(1);
2261 let step = std::cmp::max(1, span / std::cmp::max(1, limit.saturating_sub(1)));
2262 let mut idx = offset;
2263 while idx < rows.len() && indices.len() < limit {
2264 indices.push(idx);
2265 idx = idx.saturating_add(step);
2266 if idx == indices.last().copied().unwrap_or(0) {
2267 idx += 1;
2268 }
2269 }
2270 if indices.len() < limit {
2271 let last_idx = rows.len().saturating_sub(1);
2272 if !indices.contains(&last_idx) {
2273 indices.push(last_idx);
2274 }
2275 }
2276 indices
2277 .into_iter()
2278 .filter_map(|i| rows.get(i).cloned())
2279 .collect()
2280 }
2281 SampleMode::Last => {
2282 let start = rows.len().saturating_sub(limit + offset);
2283 rows.into_iter().skip(start + offset).take(limit).collect()
2284 }
2285 SampleMode::First => rows.into_iter().skip(offset).take(limit).collect(),
2286 }
2287}
2288
2289fn summarize_columns(headers: &[String], rows: &[TableRow]) -> Vec<ColumnTypeSummary> {
2290 let mut summaries = Vec::new();
2291 for header in headers {
2292 let mut nulls = 0u32;
2293 let mut distinct_set: HashSet<String> = HashSet::new();
2294 let mut values: Vec<f64> = Vec::new();
2295 let mut top_counts: HashMap<String, u32> = HashMap::new();
2296
2297 for row in rows {
2298 match row.get(header) {
2299 Some(Some(CellValue::Number(n))) => {
2300 values.push(*n);
2301 let key = n.to_string();
2302 *top_counts.entry(key).or_default() += 1;
2303 }
2304 Some(Some(CellValue::Text(s))) => {
2305 distinct_set.insert(s.clone());
2306 *top_counts.entry(s.clone()).or_default() += 1;
2307 }
2308 Some(Some(CellValue::Bool(b))) => {
2309 let key = b.to_string();
2310 distinct_set.insert(key.clone());
2311 *top_counts.entry(key).or_default() += 1;
2312 }
2313 Some(Some(CellValue::Date(d))) => {
2314 distinct_set.insert(d.clone());
2315 *top_counts.entry(d.clone()).or_default() += 1;
2316 }
2317 Some(Some(CellValue::Error(e))) => {
2318 distinct_set.insert(e.clone());
2319 *top_counts.entry(e.clone()).or_default() += 1;
2320 }
2321 _ => {
2322 nulls += 1;
2323 }
2324 }
2325 }
2326
2327 let inferred_type = if !values.is_empty() {
2328 "number"
2329 } else if !distinct_set.is_empty() {
2330 "text"
2331 } else {
2332 "unknown"
2333 }
2334 .to_string();
2335
2336 let min = values.iter().cloned().reduce(f64::min);
2337 let max = values.iter().cloned().reduce(f64::max);
2338 let mean = if values.is_empty() {
2339 None
2340 } else {
2341 Some(values.iter().sum::<f64>() / values.len() as f64)
2342 };
2343
2344 let mut top_values: Vec<(String, u32)> = top_counts.into_iter().collect();
2345 top_values.sort_by(|a, b| b.1.cmp(&a.1));
2346 let top_values = top_values.into_iter().take(3).map(|(v, _)| v).collect();
2347
2348 summaries.push(ColumnTypeSummary {
2349 name: header.clone(),
2350 inferred_type,
2351 nulls,
2352 distinct: distinct_set.len() as u32,
2353 top_values,
2354 min,
2355 max,
2356 mean,
2357 });
2358 }
2359 summaries
2360}
2361
2362#[allow(clippy::too_many_arguments)]
2363fn collect_value_matches(
2364 sheet: &umya_spreadsheet::Worksheet,
2365 sheet_name: &str,
2366 mode: &FindMode,
2367 match_mode: MatchMode,
2368 direction: &LabelDirection,
2369 params: &FindValueParams,
2370 region: Option<&DetectedRegion>,
2371 default_bounds: ((u32, u32), (u32, u32)),
2372 offset: u32,
2373 limit: u32,
2374 seen_so_far: u32,
2375) -> Result<(Vec<FindValueMatch>, u32, bool)> {
2376 let mut results = Vec::new();
2377 let mut seen = seen_so_far;
2378 let regex = if match_mode == MatchMode::Regex {
2379 Regex::new(¶ms.query).ok()
2380 } else {
2381 None
2382 };
2383 let bounds = region
2384 .as_ref()
2385 .and_then(|r| parse_range(&r.bounds))
2386 .unwrap_or(default_bounds);
2387
2388 let header_row = region.and_then(|r| r.header_row).unwrap_or(1);
2389
2390 let context_mode = params.context.unwrap_or_default();
2391 let include_neighbors = matches!(context_mode, FindContext::Neighbors | FindContext::Both);
2392 let include_row_context = matches!(context_mode, FindContext::Row | FindContext::Both);
2393 let context_width = params.context_width.unwrap_or(3).max(1);
2394
2395 for cell in sheet.get_cell_collection() {
2396 let coord = cell.get_coordinate();
2397 let col = *coord.get_col_num();
2398 let row = *coord.get_row_num();
2399 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
2400 continue;
2401 }
2402 if params.search_headers_only && row != header_row {
2403 continue;
2404 }
2405
2406 let value = cell_to_value(cell);
2407 if let Some(ref allowed) = params.value_types
2408 && !value_type_matches(&value, allowed)
2409 {
2410 continue;
2411 }
2412 if matches!(mode, FindMode::Value) {
2413 if !value_matches(
2414 &value,
2415 ¶ms.query,
2416 match_mode,
2417 params.case_sensitive,
2418 ®ex,
2419 ) {
2420 continue;
2421 }
2422 } else if let Some(label) = ¶ms.label {
2423 if !label_matches(cell, label, match_mode, params.case_sensitive, ®ex) {
2424 continue;
2425 }
2426 } else {
2427 continue;
2428 }
2429
2430 if seen < offset {
2431 seen += 1;
2432 continue;
2433 }
2434
2435 if results.len() as u32 >= limit {
2436 return Ok((results, seen, true));
2437 }
2438
2439 let neighbors = if include_neighbors {
2440 collect_neighbors(sheet, row, col)
2441 } else {
2442 None
2443 };
2444 let (label_hit, match_value) = if matches!(mode, FindMode::Label) {
2445 let target_value = match direction {
2446 LabelDirection::Right => sheet.get_cell((col + 1, row)),
2447 LabelDirection::Below => sheet.get_cell((col, row + 1)),
2448 LabelDirection::Any => sheet
2449 .get_cell((col + 1, row))
2450 .or_else(|| sheet.get_cell((col, row + 1))),
2451 }
2452 .and_then(cell_to_value);
2453 if target_value.is_none() {
2454 continue;
2455 }
2456 (
2457 Some(LabelHit {
2458 label_address: coord.get_coordinate(),
2459 label: label_from_cell(cell),
2460 }),
2461 target_value,
2462 )
2463 } else {
2464 (None, value.clone())
2465 };
2466
2467 let row_context = if include_row_context {
2468 build_row_context(sheet, row, col, context_width)
2469 } else {
2470 None
2471 };
2472
2473 results.push(FindValueMatch {
2474 address: coord.get_coordinate(),
2475 sheet_name: sheet_name.to_string(),
2476 value: match_value,
2477 row_context,
2478 neighbors,
2479 label_hit,
2480 });
2481 seen += 1;
2482 }
2483
2484 Ok((results, seen, false))
2485}
2486
2487fn label_from_cell(cell: &umya_spreadsheet::Cell) -> String {
2488 cell_to_value(cell)
2489 .map(|v| match v {
2490 CellValue::Text(s) => s,
2491 CellValue::Number(n) => n.to_string(),
2492 CellValue::Bool(b) => b.to_string(),
2493 CellValue::Date(d) => d,
2494 CellValue::Error(e) => e,
2495 })
2496 .unwrap_or_else(|| cell.get_value().to_string())
2497}
2498
2499fn value_matches(
2500 value: &Option<CellValue>,
2501 query: &str,
2502 mode: MatchMode,
2503 case_sensitive: bool,
2504 regex: &Option<Regex>,
2505) -> bool {
2506 if value.is_none() {
2507 return false;
2508 }
2509 let haystack = cell_value_to_string_lower(value.clone().unwrap());
2510 let needle = if case_sensitive {
2511 query.to_string()
2512 } else {
2513 query.to_ascii_lowercase()
2514 };
2515
2516 match mode {
2517 MatchMode::Exact => haystack == needle,
2518 MatchMode::Prefix => haystack.starts_with(&needle),
2519 MatchMode::Regex => regex
2520 .as_ref()
2521 .map(|re| re.is_match(&haystack))
2522 .unwrap_or(false),
2523 MatchMode::Contains => haystack.contains(&needle),
2524 }
2525}
2526
2527fn label_matches(
2528 cell: &umya_spreadsheet::Cell,
2529 label: &str,
2530 mode: MatchMode,
2531 case_sensitive: bool,
2532 regex: &Option<Regex>,
2533) -> bool {
2534 let value = cell_to_value(cell);
2535 if value.is_none() {
2536 return false;
2537 }
2538 let haystack = cell_value_to_string_lower(value.unwrap());
2539 let needle = if case_sensitive {
2540 label.to_string()
2541 } else {
2542 label.to_ascii_lowercase()
2543 };
2544 match mode {
2545 MatchMode::Exact => haystack == needle,
2546 MatchMode::Prefix => haystack.starts_with(&needle),
2547 MatchMode::Regex => regex
2548 .as_ref()
2549 .map(|re| re.is_match(&haystack))
2550 .unwrap_or(false),
2551 MatchMode::Contains => haystack.contains(&needle),
2552 }
2553}
2554
2555fn value_type_matches(value: &Option<CellValue>, allowed: &[ValueTypeFilter]) -> bool {
2556 if value.is_none() {
2557 return allowed.contains(&ValueTypeFilter::Null);
2558 }
2559 match value.as_ref().unwrap() {
2560 CellValue::Text(_) => allowed.contains(&ValueTypeFilter::Text),
2561 CellValue::Number(_) => allowed.contains(&ValueTypeFilter::Number),
2562 CellValue::Bool(_) => allowed.contains(&ValueTypeFilter::Bool),
2563 CellValue::Date(_) => allowed.contains(&ValueTypeFilter::Date),
2564 CellValue::Error(_) => true,
2565 }
2566}
2567
2568fn collect_neighbors(
2569 sheet: &umya_spreadsheet::Worksheet,
2570 row: u32,
2571 col: u32,
2572) -> Option<NeighborValues> {
2573 Some(NeighborValues {
2574 left: if col > 1 {
2575 sheet.get_cell((col - 1, row)).and_then(cell_to_value)
2576 } else {
2577 None
2578 },
2579 right: sheet.get_cell((col + 1, row)).and_then(cell_to_value),
2580 up: if row > 1 {
2581 sheet.get_cell((col, row - 1)).and_then(cell_to_value)
2582 } else {
2583 None
2584 },
2585 down: sheet.get_cell((col, row + 1)).and_then(cell_to_value),
2586 })
2587}
2588
2589fn build_row_context(
2590 sheet: &umya_spreadsheet::Worksheet,
2591 row: u32,
2592 col: u32,
2593 width: u32,
2594) -> Option<RowContext> {
2595 let width = width.max(1);
2596 let half = width / 2;
2597 let max_col = sheet.get_highest_column().max(1);
2598 let start_col = col.saturating_sub(half).max(1);
2599 let end_col = (col + half).min(max_col);
2600
2601 let mut headers = Vec::new();
2602 let mut values = Vec::new();
2603
2604 for current_col in start_col..=end_col {
2605 let header_value = sheet
2606 .get_cell((current_col, 1u32))
2607 .and_then(cell_to_value)
2608 .map(|v| match v {
2609 CellValue::Text(s) => s,
2610 CellValue::Number(n) => n.to_string(),
2611 CellValue::Bool(b) => b.to_string(),
2612 CellValue::Date(d) => d,
2613 CellValue::Error(e) => e,
2614 })
2615 .unwrap_or_else(|| format!("Col{}", current_col));
2616 let value = sheet.get_cell((current_col, row)).and_then(cell_to_value);
2617 headers.push(header_value);
2618 values.push(value);
2619 }
2620
2621 Some(RowContext { headers, values })
2622}
2623
2624fn default_find_formula_limit() -> u32 {
2625 50
2626}
2627
2628#[derive(Debug, Deserialize, JsonSchema, Default)]
2629pub struct FindFormulaParams {
2630 #[serde(alias = "workbook_id")]
2632 pub workbook_or_fork_id: WorkbookId,
2633 pub query: String,
2635 pub sheet_name: Option<String>,
2637 #[serde(default)]
2639 pub case_sensitive: bool,
2640 #[serde(default)]
2642 pub include_context: bool,
2643 #[serde(default = "default_find_formula_limit")]
2645 pub limit: u32,
2646 #[serde(default)]
2648 pub offset: u32,
2649 #[serde(default)]
2651 pub context_rows: Option<u32>,
2652 #[serde(default)]
2654 pub context_cols: Option<u32>,
2655}
2656
2657pub async fn find_formula(
2658 state: Arc<AppState>,
2659 params: FindFormulaParams,
2660) -> Result<FindFormulaResponse> {
2661 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2662 let query = if params.case_sensitive {
2663 params.query.clone()
2664 } else {
2665 params.query.to_ascii_lowercase()
2666 };
2667
2668 let sheet_names: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
2669 vec![sheet.clone()]
2670 } else {
2671 workbook.sheet_names()
2672 };
2673
2674 let limit = params.limit.clamp(1, 500);
2675 let offset = params.offset;
2676 let context_rows = params.context_rows.unwrap_or(1);
2677 let context_cols = params.context_cols.unwrap_or(1);
2678
2679 let mut matches = Vec::new();
2680 let mut seen: u32 = 0;
2681 let mut truncated = false;
2682
2683 for sheet_name in sheet_names {
2684 let (sheet_matches, sheet_seen, sheet_truncated) =
2685 workbook.with_sheet(&sheet_name, |sheet| {
2686 collect_formula_matches(
2687 sheet,
2688 &sheet_name,
2689 &query,
2690 params.case_sensitive,
2691 params.include_context,
2692 context_rows,
2693 context_cols,
2694 offset,
2695 limit,
2696 seen,
2697 )
2698 })?;
2699
2700 seen = sheet_seen;
2701 truncated |= sheet_truncated;
2702 matches.extend(sheet_matches);
2703
2704 if truncated {
2705 break;
2706 }
2707 }
2708
2709 let next_offset = if truncated {
2710 Some(offset.saturating_add(matches.len() as u32))
2711 } else {
2712 None
2713 };
2714
2715 let response = FindFormulaResponse {
2716 workbook_id: workbook.id.clone(),
2717 workbook_short_id: workbook.short_id.clone(),
2718 matches,
2719 next_offset,
2720 };
2721 Ok(response)
2722}
2723
2724#[derive(Debug, Deserialize, JsonSchema)]
2725pub struct ScanVolatilesParams {
2726 #[serde(alias = "workbook_id")]
2728 pub workbook_or_fork_id: WorkbookId,
2729 pub sheet_name: Option<String>,
2731 #[serde(default)]
2733 pub summary_only: Option<bool>,
2734 #[serde(default)]
2736 pub include_addresses: Option<bool>,
2737 #[serde(default)]
2739 pub addresses_limit: Option<u32>,
2740}
2741
2742pub async fn scan_volatiles(
2743 state: Arc<AppState>,
2744 params: ScanVolatilesParams,
2745) -> Result<VolatileScanResponse> {
2746 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2747 let config = state.config();
2748 let output_profile = config.output_profile();
2749 let summary_only = params
2750 .summary_only
2751 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
2752 let include_addresses = params.include_addresses.unwrap_or(!summary_only);
2753 let addresses_limit = params.addresses_limit.unwrap_or(15);
2754 let max_items = config.max_items();
2755 let max_payload_bytes = config.max_payload_bytes();
2756
2757 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
2758 vec![sheet.clone()]
2759 } else {
2760 workbook.sheet_names()
2761 };
2762
2763 let mut items = Vec::new();
2764 for sheet_name in target_sheets {
2765 let graph = workbook.formula_graph(&sheet_name)?;
2766 for group in graph.groups() {
2767 if !group.is_volatile {
2768 continue;
2769 }
2770
2771 if summary_only {
2772 items.push(VolatileScanEntry {
2773 address: String::new(),
2774 sheet_name: sheet_name.clone(),
2775 function: "volatile".to_string(),
2776 note: Some(format!("Count: {}", group.addresses.len())),
2777 });
2778 } else if include_addresses {
2779 for address in group.addresses.iter().take(addresses_limit as usize) {
2780 items.push(VolatileScanEntry {
2781 address: address.clone(),
2782 sheet_name: sheet_name.clone(),
2783 function: "volatile".to_string(),
2784 note: Some(group.formula.clone()),
2785 });
2786 }
2787 } else {
2788 items.push(VolatileScanEntry {
2789 address: String::new(),
2790 sheet_name: sheet_name.clone(),
2791 function: "volatile".to_string(),
2792 note: Some(group.formula.clone()),
2793 });
2794 }
2795 }
2796 }
2797
2798 let total_items = items.len();
2799
2800 if let Some(max_items) = max_items
2801 && items.len() > max_items
2802 {
2803 items.truncate(max_items);
2804 }
2805
2806 if let Some(max_bytes) = max_payload_bytes {
2807 let item_limit = cap_rows_by_payload_bytes(items.len(), Some(max_bytes), |count| {
2808 let response = VolatileScanResponse {
2809 workbook_id: workbook.id.clone(),
2810 workbook_short_id: workbook.short_id.clone(),
2811 items: items[..count].to_vec(),
2812 next_offset: None,
2813 };
2814 serde_json::to_vec(&response)
2815 .map(|payload| payload.len())
2816 .unwrap_or(usize::MAX)
2817 });
2818
2819 if item_limit < items.len() {
2820 items.truncate(item_limit);
2821 }
2822 }
2823
2824 let next_offset = if items.len() < total_items {
2825 Some(items.len() as u32)
2826 } else {
2827 None
2828 };
2829
2830 let response = VolatileScanResponse {
2831 workbook_id: workbook.id.clone(),
2832 workbook_short_id: workbook.short_id.clone(),
2833 items,
2834 next_offset,
2835 };
2836 Ok(response)
2837}
2838
2839#[derive(Debug, Deserialize, JsonSchema)]
2840pub struct WorkbookStyleSummaryParams {
2841 #[serde(alias = "workbook_id")]
2843 pub workbook_or_fork_id: WorkbookId,
2844 pub max_styles: Option<u32>,
2846 pub max_conditional_formats: Option<u32>,
2848 pub max_cells_scan: Option<u32>,
2850 #[serde(default)]
2852 pub summary_only: Option<bool>,
2853 #[serde(default)]
2855 pub include_descriptor: Option<bool>,
2856 #[serde(default)]
2858 pub include_example_cells: Option<bool>,
2859 #[serde(default)]
2861 pub include_theme: Option<bool>,
2862 #[serde(default)]
2864 pub include_conditional_formats: Option<bool>,
2865}
2866
2867#[derive(Debug)]
2868struct WorkbookStyleAccum {
2869 descriptor: StyleDescriptor,
2870 occurrences: u32,
2871 tags: HashSet<String>,
2872 example_cells: Vec<String>,
2873}
2874
2875impl WorkbookStyleAccum {
2876 fn new(descriptor: StyleDescriptor) -> Self {
2877 Self {
2878 descriptor,
2879 occurrences: 0,
2880 tags: HashSet::new(),
2881 example_cells: Vec::new(),
2882 }
2883 }
2884}
2885
2886pub async fn workbook_style_summary(
2887 state: Arc<AppState>,
2888 params: WorkbookStyleSummaryParams,
2889) -> Result<WorkbookStyleSummaryResponse> {
2890 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
2891 let config = state.config();
2892 let output_profile = config.output_profile();
2893 let summary_only = params
2894 .summary_only
2895 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
2896 let include_descriptor = params.include_descriptor.unwrap_or(!summary_only);
2897 let include_example_cells = params.include_example_cells.unwrap_or(!summary_only);
2898 let include_theme = params.include_theme.unwrap_or(!summary_only);
2899 let include_conditional_formats = params.include_conditional_formats.unwrap_or(!summary_only);
2900 let max_payload_bytes = config.max_payload_bytes();
2901 let sheet_names = workbook.sheet_names();
2902
2903 const STYLE_EXAMPLE_LIMIT: usize = 5;
2904 const STYLE_LIMIT_DEFAULT: usize = 200;
2905 const CF_LIMIT_DEFAULT: usize = 200;
2906 const CELL_SCAN_LIMIT_DEFAULT: usize = 500_000;
2907
2908 let style_limit = params
2909 .max_styles
2910 .map(|v| v as usize)
2911 .unwrap_or(STYLE_LIMIT_DEFAULT);
2912 let style_limit = config
2913 .max_items()
2914 .map(|limit| style_limit.min(limit))
2915 .unwrap_or(style_limit);
2916 let cf_limit = params
2917 .max_conditional_formats
2918 .map(|v| v as usize)
2919 .unwrap_or(CF_LIMIT_DEFAULT);
2920 let cf_limit = config
2921 .max_items()
2922 .map(|limit| cf_limit.min(limit))
2923 .unwrap_or(cf_limit);
2924 let cell_scan_limit = params
2925 .max_cells_scan
2926 .map(|v| v as usize)
2927 .unwrap_or(CELL_SCAN_LIMIT_DEFAULT);
2928
2929 let mut acc: HashMap<String, WorkbookStyleAccum> = HashMap::new();
2930 let mut scanned_cells: usize = 0;
2931 let mut scan_truncated = false;
2932
2933 for sheet_name in &sheet_names {
2934 if scan_truncated {
2935 break;
2936 }
2937 workbook.with_sheet(sheet_name, |sheet| {
2938 for cell in sheet.get_cell_collection() {
2939 if scanned_cells >= cell_scan_limit {
2940 scan_truncated = true;
2941 break;
2942 }
2943 scanned_cells += 1;
2944
2945 let address = cell.get_coordinate().get_coordinate().to_string();
2946 let descriptor = crate::styles::descriptor_from_style(cell.get_style());
2947 let style_id = crate::styles::stable_style_id(&descriptor);
2948
2949 let entry = acc
2950 .entry(style_id.clone())
2951 .or_insert_with(|| WorkbookStyleAccum::new(descriptor.clone()));
2952 entry.occurrences += 1;
2953 if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
2954 entry.example_cells.push(format!("{sheet_name}!{address}"));
2955 }
2956
2957 if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
2958 for tag in tagging.tags {
2959 entry.tags.insert(tag);
2960 }
2961 }
2962 }
2963 })?;
2964 }
2965
2966 let total_styles = acc.len() as u32;
2967 let mut styles: Vec<WorkbookStyleUsage> = acc
2968 .into_iter()
2969 .map(|(style_id, entry)| {
2970 let mut tags: Vec<String> = entry.tags.into_iter().collect();
2971 tags.sort();
2972 WorkbookStyleUsage {
2973 style_id,
2974 occurrences: entry.occurrences,
2975 tags,
2976 example_cells: if include_example_cells {
2977 entry.example_cells
2978 } else {
2979 Vec::new()
2980 },
2981 descriptor: if include_descriptor {
2982 Some(entry.descriptor)
2983 } else {
2984 None
2985 },
2986 }
2987 })
2988 .collect();
2989
2990 styles.sort_by(|a, b| {
2991 b.occurrences
2992 .cmp(&a.occurrences)
2993 .then_with(|| a.style_id.cmp(&b.style_id))
2994 });
2995
2996 let inferred_default_style_id = styles.first().map(|s| s.style_id.clone());
2997 let mut inferred_default_font = styles
2998 .first()
2999 .and_then(|s| s.descriptor.as_ref().and_then(|d| d.font.clone()));
3000
3001 let mut styles_truncated = if styles.len() > style_limit {
3002 styles.truncate(style_limit);
3003 true
3004 } else {
3005 false
3006 };
3007
3008 let theme = workbook.with_spreadsheet(|book| {
3009 let theme = book.get_theme();
3010 let elements = theme.get_theme_elements();
3011 let scheme = elements.get_color_scheme();
3012 let mut colors = BTreeMap::new();
3013
3014 let mut insert_color = |name: &str, value: String| {
3015 if !value.trim().is_empty() {
3016 colors.insert(name.to_string(), value);
3017 }
3018 };
3019
3020 insert_color("dk1", scheme.get_dk1().get_val());
3021 insert_color("lt1", scheme.get_lt1().get_val());
3022 insert_color("dk2", scheme.get_dk2().get_val());
3023 insert_color("lt2", scheme.get_lt2().get_val());
3024 insert_color("accent1", scheme.get_accent1().get_val());
3025 insert_color("accent2", scheme.get_accent2().get_val());
3026 insert_color("accent3", scheme.get_accent3().get_val());
3027 insert_color("accent4", scheme.get_accent4().get_val());
3028 insert_color("accent5", scheme.get_accent5().get_val());
3029 insert_color("accent6", scheme.get_accent6().get_val());
3030 insert_color("hlink", scheme.get_hlink().get_val());
3031 insert_color("fol_hlink", scheme.get_fol_hlink().get_val());
3032
3033 let font_scheme = elements.get_font_scheme();
3034 let major = font_scheme.get_major_font();
3035 let minor = font_scheme.get_minor_font();
3036 let font_scheme_summary = ThemeFontSchemeSummary {
3037 major_latin: Some(major.get_latin_font().get_typeface().to_string())
3038 .filter(|s| !s.trim().is_empty()),
3039 major_east_asian: Some(major.get_east_asian_font().get_typeface().to_string())
3040 .filter(|s| !s.trim().is_empty()),
3041 major_complex_script: Some(major.get_complex_script_font().get_typeface().to_string())
3042 .filter(|s| !s.trim().is_empty()),
3043 minor_latin: Some(minor.get_latin_font().get_typeface().to_string())
3044 .filter(|s| !s.trim().is_empty()),
3045 minor_east_asian: Some(minor.get_east_asian_font().get_typeface().to_string())
3046 .filter(|s| !s.trim().is_empty()),
3047 minor_complex_script: Some(minor.get_complex_script_font().get_typeface().to_string())
3048 .filter(|s| !s.trim().is_empty()),
3049 };
3050
3051 ThemeSummary {
3052 name: Some(theme.get_name().to_string()).filter(|s| !s.trim().is_empty()),
3053 colors,
3054 font_scheme: font_scheme_summary,
3055 }
3056 })?;
3057
3058 if inferred_default_font.is_none()
3059 && let Some(name) = theme
3060 .font_scheme
3061 .minor_latin
3062 .clone()
3063 .or_else(|| theme.font_scheme.major_latin.clone())
3064 {
3065 inferred_default_font = Some(FontDescriptor {
3066 name: Some(name),
3067 size: None,
3068 bold: None,
3069 italic: None,
3070 underline: None,
3071 strikethrough: None,
3072 color: None,
3073 });
3074 }
3075
3076 let theme = if include_theme { Some(theme) } else { None };
3077
3078 let mut conditional_formats: Vec<ConditionalFormatSummary> = Vec::new();
3079 let mut conditional_formats_truncated = false;
3080 if include_conditional_formats {
3081 use umya_spreadsheet::structs::EnumTrait;
3082 for sheet_name in &sheet_names {
3083 if conditional_formats_truncated {
3084 break;
3085 }
3086 workbook.with_sheet(sheet_name, |sheet| {
3087 for cf in sheet.get_conditional_formatting_collection() {
3088 if conditional_formats.len() >= cf_limit {
3089 conditional_formats_truncated = true;
3090 break;
3091 }
3092 let range = cf.get_sequence_of_references().get_sqref().to_string();
3093 let mut types: HashSet<String> = HashSet::new();
3094 for rule in cf.get_conditional_collection() {
3095 types.insert(rule.get_type().get_value_string().to_string());
3096 }
3097 let mut rule_types: Vec<String> = types.into_iter().collect();
3098 rule_types.sort();
3099 conditional_formats.push(ConditionalFormatSummary {
3100 sheet_name: sheet_name.clone(),
3101 range,
3102 rule_types,
3103 rule_count: cf.get_conditional_collection().len() as u32,
3104 });
3105 }
3106 })?;
3107 }
3108 }
3109
3110 let mut notes: Vec<String> = Vec::new();
3111 if scan_truncated {
3112 notes.push(format!(
3113 "Stopped scanning after {cell_scan_limit} cells; style counts may be incomplete."
3114 ));
3115 }
3116 notes.push(
3117 "Named styles are not directly exposed by umya-spreadsheet; styles here are inferred from cell formatting."
3118 .to_string(),
3119 );
3120
3121 if let Some(max_bytes) = max_payload_bytes {
3122 let style_limit = cap_rows_by_payload_bytes(styles.len(), Some(max_bytes), |count| {
3123 let response = WorkbookStyleSummaryResponse {
3124 workbook_id: workbook.id.clone(),
3125 workbook_short_id: workbook.short_id.clone(),
3126 theme: theme.clone(),
3127 inferred_default_style_id: inferred_default_style_id.clone(),
3128 inferred_default_font: inferred_default_font.clone(),
3129 styles: styles[..count].to_vec(),
3130 total_styles,
3131 styles_truncated: false,
3132 conditional_formats: conditional_formats.clone(),
3133 conditional_formats_truncated,
3134 scan_truncated,
3135 notes: notes.clone(),
3136 };
3137 serde_json::to_vec(&response)
3138 .map(|payload| payload.len())
3139 .unwrap_or(usize::MAX)
3140 });
3141
3142 if style_limit < styles.len() {
3143 styles.truncate(style_limit);
3144 styles_truncated = true;
3145 }
3146
3147 if !conditional_formats.is_empty() {
3148 let cf_limit =
3149 cap_rows_by_payload_bytes(conditional_formats.len(), Some(max_bytes), |count| {
3150 let response = WorkbookStyleSummaryResponse {
3151 workbook_id: workbook.id.clone(),
3152 workbook_short_id: workbook.short_id.clone(),
3153 theme: theme.clone(),
3154 inferred_default_style_id: inferred_default_style_id.clone(),
3155 inferred_default_font: inferred_default_font.clone(),
3156 styles: styles.clone(),
3157 total_styles,
3158 styles_truncated,
3159 conditional_formats: conditional_formats[..count].to_vec(),
3160 conditional_formats_truncated: false,
3161 scan_truncated,
3162 notes: notes.clone(),
3163 };
3164 serde_json::to_vec(&response)
3165 .map(|payload| payload.len())
3166 .unwrap_or(usize::MAX)
3167 });
3168
3169 if cf_limit < conditional_formats.len() {
3170 conditional_formats.truncate(cf_limit);
3171 conditional_formats_truncated = true;
3172 }
3173 }
3174 }
3175
3176 Ok(WorkbookStyleSummaryResponse {
3177 workbook_id: workbook.id.clone(),
3178 workbook_short_id: workbook.short_id.clone(),
3179 theme,
3180 inferred_default_style_id,
3181 inferred_default_font,
3182 styles,
3183 total_styles,
3184 styles_truncated,
3185 conditional_formats,
3186 conditional_formats_truncated,
3187 scan_truncated,
3188 notes,
3189 })
3190}
3191
3192#[derive(Debug, Deserialize, JsonSchema)]
3193pub struct SheetStylesParams {
3194 #[serde(alias = "workbook_id")]
3196 pub workbook_or_fork_id: WorkbookId,
3197 pub sheet_name: String,
3199 #[serde(default)]
3201 pub scope: Option<SheetStylesScope>,
3202 #[serde(default)]
3204 pub granularity: Option<StyleGranularity>,
3205 #[serde(default)]
3207 pub max_items: Option<usize>,
3208 #[serde(default)]
3210 pub summary_only: Option<bool>,
3211 #[serde(default)]
3213 pub include_descriptor: Option<bool>,
3214 #[serde(default)]
3216 pub include_ranges: Option<bool>,
3217 #[serde(default)]
3219 pub include_example_cells: Option<bool>,
3220}
3221
3222#[derive(Debug, Deserialize, JsonSchema)]
3223#[serde(tag = "kind", rename_all = "snake_case")]
3224pub enum SheetStylesScope {
3225 Range { range: String },
3226 Region { region_id: u32 },
3227}
3228
3229#[derive(Debug)]
3230struct StyleAccum {
3231 descriptor: StyleDescriptor,
3232 occurrences: u32,
3233 tags: HashSet<String>,
3234 example_cells: Vec<String>,
3235 positions: Vec<(u32, u32)>,
3236}
3237
3238impl StyleAccum {
3239 fn new(descriptor: StyleDescriptor) -> Self {
3240 Self {
3241 descriptor,
3242 occurrences: 0,
3243 tags: HashSet::new(),
3244 example_cells: Vec::new(),
3245 positions: Vec::new(),
3246 }
3247 }
3248}
3249
3250pub async fn sheet_styles(
3251 state: Arc<AppState>,
3252 params: SheetStylesParams,
3253) -> Result<SheetStylesResponse> {
3254 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3255 let config = state.config();
3256 let output_profile = config.output_profile();
3257 let summary_only = params
3258 .summary_only
3259 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
3260 let include_descriptor = params.include_descriptor.unwrap_or(!summary_only);
3261 let include_ranges = params.include_ranges.unwrap_or(!summary_only);
3262 let include_example_cells = params.include_example_cells.unwrap_or(!summary_only);
3263 const STYLE_EXAMPLE_LIMIT: usize = 5;
3264 const STYLE_RANGE_LIMIT: usize = 50;
3265 const STYLE_LIMIT: usize = 200;
3266 const MAX_MAX_ITEMS: usize = 5000;
3267
3268 let max_payload_bytes = config.max_payload_bytes();
3269 let style_limit = config
3270 .max_items()
3271 .map(|limit| STYLE_LIMIT.min(limit))
3272 .unwrap_or(STYLE_LIMIT);
3273
3274 let metrics = workbook.get_sheet_metrics_fast(¶ms.sheet_name)?;
3275 let full_bounds = (
3276 (1, 1),
3277 (
3278 metrics.metrics.column_count.max(1),
3279 metrics.metrics.row_count.max(1),
3280 ),
3281 );
3282
3283 let bounds = match ¶ms.scope {
3284 Some(SheetStylesScope::Range { range }) => {
3285 parse_range(range).ok_or_else(|| anyhow!("invalid range: {}", range))?
3286 }
3287 Some(SheetStylesScope::Region { region_id }) => {
3288 let region = workbook.detected_region(¶ms.sheet_name, *region_id)?;
3289 parse_range(®ion.bounds)
3290 .ok_or_else(|| anyhow!("invalid region bounds: {}", region.bounds))?
3291 }
3292 None => full_bounds,
3293 };
3294
3295 let granularity = params.granularity.unwrap_or_default();
3296
3297 let max_items = params
3298 .max_items
3299 .unwrap_or(STYLE_RANGE_LIMIT)
3300 .clamp(1, MAX_MAX_ITEMS);
3301
3302 let (mut styles, total_styles, mut styles_truncated) =
3303 workbook.with_sheet(¶ms.sheet_name, |sheet| {
3304 let mut acc: HashMap<String, StyleAccum> = HashMap::new();
3305
3306 for cell in sheet.get_cell_collection() {
3307 let address = cell.get_coordinate().get_coordinate().to_string();
3308 let Some((col, row)) = parse_address(&address) else {
3309 continue;
3310 };
3311 if col < bounds.0.0 || col > bounds.1.0 || row < bounds.0.1 || row > bounds.1.1 {
3312 continue;
3313 }
3314
3315 let descriptor = crate::styles::descriptor_from_style(cell.get_style());
3316 let style_id = crate::styles::stable_style_id(&descriptor);
3317
3318 let entry = acc
3319 .entry(style_id.clone())
3320 .or_insert_with(|| StyleAccum::new(descriptor.clone()));
3321 entry.occurrences += 1;
3322 if entry.example_cells.len() < STYLE_EXAMPLE_LIMIT {
3323 entry.example_cells.push(address.clone());
3324 }
3325
3326 if let Some((_, tagging)) = crate::analysis::style::tag_cell(cell) {
3327 for tag in tagging.tags {
3328 entry.tags.insert(tag);
3329 }
3330 }
3331
3332 entry.positions.push((row, col));
3333 }
3334
3335 let mut summaries: Vec<StyleSummary> = acc
3336 .into_iter()
3337 .map(|(style_id, mut entry)| {
3338 entry.positions.sort_unstable();
3339 entry.positions.dedup();
3340
3341 let (cell_ranges, ranges_truncated) = if include_ranges {
3342 if granularity == StyleGranularity::Cells {
3343 let mut out = Vec::new();
3344 for (row, col) in entry.positions.iter().take(max_items) {
3345 out.push(crate::utils::cell_address(*col, *row));
3346 }
3347 (out, entry.positions.len() > max_items)
3348 } else {
3349 crate::styles::compress_positions_to_ranges(&entry.positions, max_items)
3350 }
3351 } else {
3352 (Vec::new(), false)
3353 };
3354
3355 StyleSummary {
3356 style_id,
3357 occurrences: entry.occurrences,
3358 tags: entry.tags.into_iter().collect(),
3359 example_cells: if include_example_cells {
3360 entry.example_cells
3361 } else {
3362 Vec::new()
3363 },
3364 descriptor: if include_descriptor {
3365 Some(entry.descriptor)
3366 } else {
3367 None
3368 },
3369 cell_ranges,
3370 ranges_truncated,
3371 }
3372 })
3373 .collect();
3374
3375 summaries.sort_by(|a, b| {
3376 b.occurrences
3377 .cmp(&a.occurrences)
3378 .then_with(|| a.style_id.cmp(&b.style_id))
3379 });
3380
3381 let total = summaries.len() as u32;
3382 let truncated = if summaries.len() > style_limit {
3383 summaries.truncate(style_limit);
3384 true
3385 } else {
3386 false
3387 };
3388
3389 Ok::<_, anyhow::Error>((summaries, total, truncated))
3390 })??;
3391
3392 if let Some(max_bytes) = max_payload_bytes {
3393 let row_limit = cap_rows_by_payload_bytes(styles.len(), Some(max_bytes), |count| {
3394 let response = SheetStylesResponse {
3395 workbook_id: workbook.id.clone(),
3396 workbook_short_id: workbook.short_id.clone(),
3397 sheet_name: params.sheet_name.clone(),
3398 styles: styles[..count].to_vec(),
3399 conditional_rules: Vec::new(),
3400 total_styles,
3401 styles_truncated: false,
3402 };
3403 serde_json::to_vec(&response)
3404 .map(|payload| payload.len())
3405 .unwrap_or(usize::MAX)
3406 });
3407 if row_limit < styles.len() {
3408 styles.truncate(row_limit);
3409 styles_truncated = true;
3410 }
3411 }
3412
3413 Ok(SheetStylesResponse {
3414 workbook_id: workbook.id.clone(),
3415 workbook_short_id: workbook.short_id.clone(),
3416 sheet_name: params.sheet_name.clone(),
3417 styles,
3418 conditional_rules: Vec::new(),
3419 total_styles,
3420 styles_truncated,
3421 })
3422}
3423
3424pub async fn range_values(
3425 state: Arc<AppState>,
3426 params: RangeValuesParams,
3427) -> Result<RangeValuesResponse> {
3428 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3429 let config = state.config();
3430 let output_profile = config.output_profile();
3431 let format = params.format.unwrap_or(match output_profile {
3432 OutputProfile::TokenDense => TableOutputFormat::Values,
3433 OutputProfile::Verbose => TableOutputFormat::Json,
3434 });
3435 let include_headers = params.include_headers.unwrap_or(false);
3436 if let Some(page_size) = params.page_size
3437 && page_size == 0
3438 {
3439 return Err(anyhow!("page_size must be greater than zero"));
3440 }
3441 let max_cells = config.max_cells();
3442 let max_payload_bytes = config.max_payload_bytes();
3443 #[cfg(feature = "recalc")]
3444 let requested_bounds: Vec<((u32, u32), (u32, u32))> = params
3445 .ranges
3446 .iter()
3447 .filter_map(|r| parse_range(r))
3448 .collect();
3449
3450 #[cfg(feature = "recalc")]
3451 let (values, has_formula_in_target) = workbook.with_sheet(¶ms.sheet_name, |sheet| {
3452 let has_formula_in_target = sheet_has_formula_in_bounds(sheet, &requested_bounds);
3453 let values = params
3454 .ranges
3455 .iter()
3456 .filter_map(|range| {
3457 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
3458 let total_rows = (end_row - start_row + 1) as usize;
3459 let total_cols = (end_col - start_col + 1) as usize;
3460 let mut row_limit = total_rows;
3461 if let Some(page_size) = params.page_size {
3462 row_limit = row_limit.min(page_size as usize);
3463 }
3464
3465 let mut rows = Vec::new();
3466 for r in start_row..=end_row {
3467 if rows.len() >= row_limit {
3468 break;
3469 }
3470 let mut row_vals = Vec::new();
3471 for c in start_col..=end_col {
3472 if include_headers && r == start_row && start_row == 1 {
3473 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
3474 } else {
3475 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
3476 }
3477 }
3478 rows.push(row_vals);
3479 }
3480
3481 let mut row_limit = cap_rows_by_cells(rows.len(), total_cols, max_cells);
3482 if row_limit > 0 {
3483 row_limit =
3484 cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3485 let entry =
3486 build_range_values_entry(format, range, &rows[..count], None);
3487 serde_json::to_vec(&entry)
3488 .map(|payload| payload.len())
3489 .unwrap_or(usize::MAX)
3490 });
3491 }
3492
3493 if row_limit < rows.len() {
3494 rows.truncate(row_limit);
3495 }
3496
3497 let next_start_row = if rows.len() < total_rows {
3498 Some(start_row + rows.len() as u32)
3499 } else {
3500 None
3501 };
3502
3503 build_range_values_entry(format, range, &rows, next_start_row)
3504 })
3505 })
3506 .collect();
3507
3508 Ok::<_, anyhow::Error>((values, has_formula_in_target))
3509 })??;
3510
3511 #[cfg(not(feature = "recalc"))]
3512 let values = workbook.with_sheet(¶ms.sheet_name, |sheet| {
3513 let values = params
3514 .ranges
3515 .iter()
3516 .filter_map(|range| {
3517 parse_range(range).map(|((start_col, start_row), (end_col, end_row))| {
3518 let total_rows = (end_row - start_row + 1) as usize;
3519 let total_cols = (end_col - start_col + 1) as usize;
3520 let mut row_limit = total_rows;
3521 if let Some(page_size) = params.page_size {
3522 row_limit = row_limit.min(page_size as usize);
3523 }
3524
3525 let mut rows = Vec::new();
3526 for r in start_row..=end_row {
3527 if rows.len() >= row_limit {
3528 break;
3529 }
3530 let mut row_vals = Vec::new();
3531 for c in start_col..=end_col {
3532 if include_headers && r == start_row && start_row == 1 {
3533 row_vals.push(sheet.get_cell((c, 1u32)).and_then(cell_to_value));
3534 } else {
3535 row_vals.push(sheet.get_cell((c, r)).and_then(cell_to_value));
3536 }
3537 }
3538 rows.push(row_vals);
3539 }
3540
3541 let mut row_limit = cap_rows_by_cells(rows.len(), total_cols, max_cells);
3542 if row_limit > 0 {
3543 row_limit =
3544 cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3545 let entry =
3546 build_range_values_entry(format, range, &rows[..count], None);
3547 serde_json::to_vec(&entry)
3548 .map(|payload| payload.len())
3549 .unwrap_or(usize::MAX)
3550 });
3551 }
3552
3553 if row_limit < rows.len() {
3554 rows.truncate(row_limit);
3555 }
3556
3557 let next_start_row = if rows.len() < total_rows {
3558 Some(start_row + rows.len() as u32)
3559 } else {
3560 None
3561 };
3562
3563 build_range_values_entry(format, range, &rows, next_start_row)
3564 })
3565 })
3566 .collect();
3567
3568 Ok::<_, anyhow::Error>(values)
3569 })??;
3570
3571 #[cfg(feature = "recalc")]
3572 let warnings: Vec<Warning> = {
3573 if fork_recalc_needed(&state, ¶ms.workbook_or_fork_id) && has_formula_in_target {
3574 vec![Warning {
3575 code: "WARN_STALE_FORMULAS".to_string(),
3576 message: "Fork has pending edits and may contain stale formula results; call recalculate on the fork for fresh values.".to_string(),
3577 }]
3578 } else {
3579 Vec::new()
3580 }
3581 };
3582
3583 #[cfg(not(feature = "recalc"))]
3584 let warnings: Vec<Warning> = Vec::new();
3585
3586 Ok(RangeValuesResponse {
3587 workbook_id: workbook.id.clone(),
3588 workbook_short_id: workbook.short_id.clone(),
3589 sheet_name: params.sheet_name,
3590 warnings,
3591 values,
3592 })
3593}
3594
3595pub async fn find_value(
3596 state: Arc<AppState>,
3597 params: FindValueParams,
3598) -> Result<FindValueResponse> {
3599 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3600 let mut matches = Vec::new();
3601 let mut truncated = false;
3602 let mut seen: u32 = 0;
3603 let offset = params.offset.unwrap_or(0);
3604 let limit = params.limit;
3605 let mode = params.mode.clone().unwrap_or_else(|| {
3606 if params.label.is_some() {
3607 FindMode::Label
3608 } else {
3609 FindMode::Value
3610 }
3611 });
3612 let match_mode = params.match_mode.unwrap_or_default();
3613 let direction = params.direction.clone().unwrap_or(LabelDirection::Any);
3614
3615 let target_sheets: Vec<String> = if let Some(sheet) = ¶ms.sheet_name {
3616 vec![sheet.clone()]
3617 } else {
3618 workbook.sheet_names()
3619 };
3620
3621 for sheet_name in target_sheets {
3622 let metrics_entry = workbook.get_sheet_metrics_fast(&sheet_name)?;
3623 let default_bounds = (
3624 (1, 1),
3625 (
3626 metrics_entry.metrics.column_count.max(1),
3627 metrics_entry.metrics.row_count.max(1),
3628 ),
3629 );
3630 let region_bounds = params
3631 .region_id
3632 .and_then(|id| workbook.detected_region(&sheet_name, id).ok());
3633 let (sheet_matches, sheet_seen, sheet_truncated) =
3634 workbook.with_sheet(&sheet_name, |sheet| {
3635 collect_value_matches(
3636 sheet,
3637 &sheet_name,
3638 &mode,
3639 match_mode,
3640 &direction,
3641 ¶ms,
3642 region_bounds.as_ref(),
3643 default_bounds,
3644 offset,
3645 limit,
3646 seen,
3647 )
3648 })??;
3649 seen = sheet_seen;
3650 matches.extend(sheet_matches);
3651 if sheet_truncated {
3652 truncated = true;
3653 break;
3654 }
3655 }
3656
3657 let next_offset = if truncated {
3658 Some(offset.saturating_add(matches.len() as u32))
3659 } else {
3660 None
3661 };
3662
3663 Ok(FindValueResponse {
3664 workbook_id: workbook.id.clone(),
3665 workbook_short_id: workbook.short_id.clone(),
3666 matches,
3667 next_offset,
3668 })
3669}
3670
3671pub async fn read_table(
3672 state: Arc<AppState>,
3673 params: ReadTableParams,
3674) -> Result<ReadTableResponse> {
3675 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3676 let config = state.config();
3677 let output_profile = config.output_profile();
3678 let format = params.format.unwrap_or(match output_profile {
3679 OutputProfile::TokenDense => TableOutputFormat::Csv,
3680 OutputProfile::Verbose => TableOutputFormat::Json,
3681 });
3682 let include_headers = params.include_headers.unwrap_or(true);
3683 let include_types = params.include_types.unwrap_or(false);
3684 let resolved = resolve_table_target(&workbook, ¶ms)?;
3685 let limit = params.limit.unwrap_or(100) as usize;
3686 let offset = params.offset.unwrap_or(0) as usize;
3687 let sample_mode = params.sample_mode.unwrap_or_default();
3688
3689 #[cfg(feature = "recalc")]
3690 let (headers, rows, total_rows, has_formula_in_target) =
3691 workbook.with_sheet(&resolved.sheet_name, |sheet| {
3692 let has_formula_in_target = sheet_has_formula_in_bounds(sheet, &[resolved.range]);
3693 let (headers, rows, total_rows) = extract_table_rows(
3694 sheet,
3695 &resolved,
3696 params.header_row,
3697 params.header_rows,
3698 params.columns.clone(),
3699 params.filters.clone(),
3700 limit,
3701 offset,
3702 sample_mode,
3703 )?;
3704 Ok::<_, anyhow::Error>((headers, rows, total_rows, has_formula_in_target))
3705 })??;
3706
3707 #[cfg(not(feature = "recalc"))]
3708 let (headers, rows, total_rows) = workbook.with_sheet(&resolved.sheet_name, |sheet| {
3709 let (headers, rows, total_rows) = extract_table_rows(
3710 sheet,
3711 &resolved,
3712 params.header_row,
3713 params.header_rows,
3714 params.columns.clone(),
3715 params.filters.clone(),
3716 limit,
3717 offset,
3718 sample_mode,
3719 )?;
3720 Ok::<_, anyhow::Error>((headers, rows, total_rows))
3721 })??;
3722
3723 #[cfg(feature = "recalc")]
3724 let warnings: Vec<Warning> = {
3725 if fork_recalc_needed(&state, ¶ms.workbook_or_fork_id) && has_formula_in_target {
3726 vec![Warning {
3727 code: "WARN_STALE_FORMULAS".to_string(),
3728 message: "Fork has pending edits and may contain stale formula results; call recalculate on the fork for fresh values.".to_string(),
3729 }]
3730 } else {
3731 Vec::new()
3732 }
3733 };
3734
3735 #[cfg(not(feature = "recalc"))]
3736 let warnings: Vec<Warning> = Vec::new();
3737
3738 let max_cells = config.max_cells();
3739 let max_payload_bytes = config.max_payload_bytes();
3740 let mut row_limit = cap_rows_by_cells(rows.len(), headers.len().max(1), max_cells);
3741 if row_limit > 0 {
3742 row_limit = cap_rows_by_payload_bytes(row_limit, max_payload_bytes, |count| {
3743 let (headers_out, rows_out, values_out, types_out, csv_out) = build_read_table_payload(
3744 format,
3745 &headers,
3746 &rows[..count],
3747 include_headers,
3748 include_types,
3749 );
3750 let response = ReadTableResponse {
3751 workbook_id: workbook.id.clone(),
3752 workbook_short_id: workbook.short_id.clone(),
3753 sheet_name: resolved.sheet_name.clone(),
3754 table_name: resolved.table_name.clone(),
3755 warnings: warnings.clone(),
3756 headers: headers_out,
3757 rows: rows_out,
3758 values: values_out,
3759 types: types_out,
3760 csv: csv_out,
3761 total_rows,
3762 next_offset: None,
3763 };
3764 serde_json::to_vec(&response)
3765 .map(|payload| payload.len())
3766 .unwrap_or(usize::MAX)
3767 });
3768 }
3769
3770 let rows = rows.into_iter().take(row_limit).collect::<Vec<_>>();
3771 let next_offset = if offset + rows.len() < total_rows as usize {
3772 Some((offset + rows.len()) as u32)
3773 } else {
3774 None
3775 };
3776 let (headers_out, rows_out, values_out, types_out, csv_out) =
3777 build_read_table_payload(format, &headers, &rows, include_headers, include_types);
3778
3779 Ok(ReadTableResponse {
3780 workbook_id: workbook.id.clone(),
3781 workbook_short_id: workbook.short_id.clone(),
3782 sheet_name: resolved.sheet_name,
3783 table_name: resolved.table_name,
3784 warnings,
3785 headers: headers_out,
3786 rows: rows_out,
3787 values: values_out,
3788 types: types_out,
3789 csv: csv_out,
3790 total_rows,
3791 next_offset,
3792 })
3793}
3794
3795pub async fn table_profile(
3796 state: Arc<AppState>,
3797 params: TableProfileParams,
3798) -> Result<TableProfileResponse> {
3799 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3800 let config = state.config();
3801 let output_profile = config.output_profile();
3802 let summary_only = params
3803 .summary_only
3804 .unwrap_or(matches!(output_profile, OutputProfile::TokenDense));
3805 let resolved = resolve_table_target(
3806 &workbook,
3807 &ReadTableParams {
3808 workbook_or_fork_id: params.workbook_or_fork_id.clone(),
3809 sheet_name: params.sheet_name.clone(),
3810 table_name: params.table_name.clone(),
3811 region_id: params.region_id,
3812 range: None,
3813 header_row: None,
3814 header_rows: None,
3815 columns: None,
3816 filters: None,
3817 sample_mode: params.sample_mode,
3818 limit: params.sample_size,
3819 offset: Some(0),
3820 format: Some(TableOutputFormat::Json),
3821 include_headers: None,
3822 include_types: None,
3823 },
3824 )?;
3825
3826 let sample_size = params.sample_size.unwrap_or(10) as usize;
3827 let sample_mode = params.sample_mode.unwrap_or(SampleMode::Distributed);
3828
3829 let (mut headers, rows, total_rows) =
3830 workbook.with_sheet(&resolved.sheet_name, |sheet| {
3831 extract_table_rows(
3832 sheet,
3833 &resolved,
3834 None,
3835 None,
3836 None,
3837 None,
3838 sample_size,
3839 0,
3840 sample_mode,
3841 )
3842 })??;
3843
3844 let max_items = config.max_items();
3845 let max_payload_bytes = config.max_payload_bytes();
3846
3847 if let Some(max_items) = max_items
3848 && headers.len() > max_items
3849 {
3850 headers.truncate(max_items);
3851 }
3852
3853 let mut column_types = summarize_columns(&headers, &rows);
3854
3855 let mut samples: Vec<TableRow> = if summary_only {
3856 Vec::new()
3857 } else {
3858 rows.into_iter()
3859 .map(|row| filter_table_row(&row, &headers))
3860 .collect()
3861 };
3862
3863 if !summary_only {
3864 if let Some(max_items) = max_items
3865 && samples.len() > max_items
3866 {
3867 samples.truncate(max_items);
3868 }
3869
3870 if let Some(max_bytes) = max_payload_bytes {
3871 let sample_limit = cap_rows_by_payload_bytes(samples.len(), Some(max_bytes), |count| {
3872 let response = TableProfileResponse {
3873 workbook_id: workbook.id.clone(),
3874 workbook_short_id: workbook.short_id.clone(),
3875 sheet_name: resolved.sheet_name.clone(),
3876 table_name: resolved.table_name.clone(),
3877 headers: headers.clone(),
3878 column_types: column_types.clone(),
3879 row_count: total_rows,
3880 samples: samples[..count].to_vec(),
3881 notes: Vec::new(),
3882 };
3883 serde_json::to_vec(&response)
3884 .map(|payload| payload.len())
3885 .unwrap_or(usize::MAX)
3886 });
3887 if sample_limit < samples.len() {
3888 samples.truncate(sample_limit);
3889 }
3890
3891 let response = TableProfileResponse {
3892 workbook_id: workbook.id.clone(),
3893 workbook_short_id: workbook.short_id.clone(),
3894 sheet_name: resolved.sheet_name.clone(),
3895 table_name: resolved.table_name.clone(),
3896 headers: headers.clone(),
3897 column_types: column_types.clone(),
3898 row_count: total_rows,
3899 samples: samples.clone(),
3900 notes: Vec::new(),
3901 };
3902 if serde_json::to_vec(&response)
3903 .map(|payload| payload.len() > max_bytes)
3904 .unwrap_or(false)
3905 && !headers.is_empty()
3906 {
3907 let header_limit =
3908 cap_rows_by_payload_bytes(headers.len(), Some(max_bytes), |count| {
3909 let headers_slice = headers[..count].to_vec();
3910 let column_slice = column_types[..count.min(column_types.len())].to_vec();
3911 let samples_slice = samples
3912 .iter()
3913 .map(|row| filter_table_row(row, &headers_slice))
3914 .collect::<Vec<_>>();
3915 let response = TableProfileResponse {
3916 workbook_id: workbook.id.clone(),
3917 workbook_short_id: workbook.short_id.clone(),
3918 sheet_name: resolved.sheet_name.clone(),
3919 table_name: resolved.table_name.clone(),
3920 headers: headers_slice,
3921 column_types: column_slice,
3922 row_count: total_rows,
3923 samples: samples_slice,
3924 notes: Vec::new(),
3925 };
3926 serde_json::to_vec(&response)
3927 .map(|payload| payload.len())
3928 .unwrap_or(usize::MAX)
3929 });
3930
3931 if header_limit < headers.len() {
3932 headers.truncate(header_limit);
3933 column_types.truncate(header_limit.min(column_types.len()));
3934 samples = samples
3935 .into_iter()
3936 .map(|row| filter_table_row(&row, &headers))
3937 .collect();
3938 }
3939 }
3940 }
3941 }
3942
3943 Ok(TableProfileResponse {
3944 workbook_id: workbook.id.clone(),
3945 workbook_short_id: workbook.short_id.clone(),
3946 sheet_name: resolved.sheet_name,
3947 table_name: resolved.table_name,
3948 headers,
3949 column_types,
3950 row_count: total_rows,
3951 samples,
3952 notes: Vec::new(),
3953 })
3954}
3955
3956#[derive(Debug, Deserialize, JsonSchema)]
3957pub struct ManifestStubParams {
3958 #[serde(alias = "workbook_id")]
3959 pub workbook_or_fork_id: WorkbookId,
3960 pub sheet_filter: Option<String>,
3961}
3962
3963pub async fn get_manifest_stub(
3964 state: Arc<AppState>,
3965 params: ManifestStubParams,
3966) -> Result<ManifestStubResponse> {
3967 let workbook = state.open_workbook(¶ms.workbook_or_fork_id).await?;
3968 let mut summaries = workbook.list_summaries(true)?;
3969
3970 if let Some(filter) = ¶ms.sheet_filter {
3971 summaries.retain(|summary| summary.name.eq_ignore_ascii_case(filter));
3972 }
3973
3974 let sheets = summaries
3975 .into_iter()
3976 .map(|summary| ManifestSheetStub {
3977 sheet_name: summary.name.clone(),
3978 classification: summary.classification.clone(),
3979 candidate_expectations: vec![format!(
3980 "Review {} sheet for expectation candidates",
3981 format!("{:?}", summary.classification).to_ascii_lowercase()
3982 )],
3983 notes: summary.style_tags,
3984 })
3985 .collect();
3986
3987 let response = ManifestStubResponse {
3988 workbook_id: workbook.id.clone(),
3989 workbook_short_id: workbook.short_id.clone(),
3990 slug: workbook.slug.clone(),
3991 sheets,
3992 };
3993 Ok(response)
3994}
3995
3996#[derive(Debug, Deserialize, JsonSchema)]
3997pub struct CloseWorkbookParams {
3998 #[serde(alias = "workbook_id")]
3999 pub workbook_or_fork_id: WorkbookId,
4000}
4001
4002pub async fn close_workbook(
4003 state: Arc<AppState>,
4004 params: CloseWorkbookParams,
4005) -> Result<CloseWorkbookResponse> {
4006 state.close_workbook(¶ms.workbook_or_fork_id)?;
4007 Ok(CloseWorkbookResponse {
4008 workbook_id: params.workbook_or_fork_id.clone(),
4009 message: format!("workbook {} evicted", params.workbook_or_fork_id.as_str()),
4010 })
4011}
4012#[allow(clippy::too_many_arguments)]
4013fn collect_formula_matches(
4014 sheet: &umya_spreadsheet::Worksheet,
4015 sheet_name: &str,
4016 query: &str,
4017 case_sensitive: bool,
4018 include_context: bool,
4019 context_rows: u32,
4020 context_cols: u32,
4021 offset: u32,
4022 limit: u32,
4023 seen_so_far: u32,
4024) -> (Vec<FindFormulaMatch>, u32, bool) {
4025 use crate::workbook::cell_to_value;
4026
4027 let mut results = Vec::new();
4028 let mut seen = seen_so_far;
4029
4030 for cell in sheet.get_cell_collection() {
4031 if !cell.is_formula() {
4032 continue;
4033 }
4034 let formula = cell.get_formula();
4035 let haystack = if case_sensitive {
4036 formula.to_string()
4037 } else {
4038 formula.to_ascii_lowercase()
4039 };
4040 if !haystack.contains(query) {
4041 continue;
4042 }
4043
4044 if seen < offset {
4045 seen += 1;
4046 continue;
4047 }
4048
4049 if results.len() as u32 >= limit {
4050 return (results, seen, true);
4051 }
4052
4053 let coord = cell.get_coordinate();
4054 let column = *coord.get_col_num();
4055 let row = *coord.get_row_num();
4056
4057 let context = if include_context {
4058 let col_start = column.saturating_sub(context_cols / 2).max(1);
4059 let col_end = column + context_cols / 2;
4060 let columns: Vec<u32> = (col_start..=col_end).collect();
4061
4062 let mut context_rows_vec = Vec::new();
4063
4064 if context_rows > 0 {
4065 let header_row = build_row_snapshot(sheet, 1, &columns, false, false);
4066 context_rows_vec.push(header_row);
4067 }
4068
4069 let row_start = row.saturating_sub(context_rows / 2).max(1);
4070 let row_end = (row + context_rows / 2).min(sheet.get_highest_row());
4071
4072 for ctx_row in row_start..=row_end {
4073 let ctx_row_snapshot = build_row_snapshot(sheet, ctx_row, &columns, true, false);
4074 context_rows_vec.push(ctx_row_snapshot);
4075 }
4076
4077 context_rows_vec
4078 } else {
4079 Vec::new()
4080 };
4081
4082 results.push(FindFormulaMatch {
4083 address: coord.get_coordinate(),
4084 sheet_name: sheet_name.to_string(),
4085 formula: formula.to_string(),
4086 cached_value: cell_to_value(cell),
4087 context,
4088 });
4089
4090 seen += 1;
4091 }
4092
4093 (results, seen, false)
4094}
4095
4096#[derive(Clone)]
4097struct TraceFormulaInfo {
4098 fingerprint: String,
4099 formula: String,
4100}
4101
4102#[derive(Clone)]
4103struct TraceEdgeRaw {
4104 from: String,
4105 to: String,
4106 neighbor: String,
4107}
4108
4109#[derive(Clone)]
4110struct LayerLinks {
4111 depth: u32,
4112 edges: Vec<TraceEdgeRaw>,
4113 truncated_cells: Vec<String>,
4114}
4115
4116#[derive(Clone)]
4117struct NeighborDetail {
4118 address: String,
4119 column: Option<u32>,
4120 row: Option<u32>,
4121 kind: TraceCellKind,
4122 value: Option<CellValue>,
4123 formula: Option<String>,
4124 fingerprint: Option<String>,
4125 external: bool,
4126}
4127
4128fn build_formula_lookup(graph: &FormulaGraph) -> HashMap<String, TraceFormulaInfo> {
4129 let mut map = HashMap::new();
4130 for group in graph.groups() {
4131 for address in group.addresses.clone() {
4132 map.insert(
4133 address.to_ascii_uppercase(),
4134 TraceFormulaInfo {
4135 fingerprint: group.fingerprint.clone(),
4136 formula: group.formula.clone(),
4137 },
4138 );
4139 }
4140 }
4141 map
4142}
4143
4144struct TraceConfig<'a> {
4145 direction: &'a TraceDirection,
4146 origin: &'a str,
4147 sheet_name: &'a str,
4148 depth_limit: u32,
4149 page_size: usize,
4150}
4151
4152fn build_trace_layers(
4153 workbook: &WorkbookContext,
4154 graph: &FormulaGraph,
4155 formula_lookup: &HashMap<String, TraceFormulaInfo>,
4156 config: &TraceConfig<'_>,
4157 cursor: Option<TraceCursor>,
4158) -> Result<(Vec<TraceLayer>, Option<TraceCursor>, Vec<String>)> {
4159 let layer_links =
4160 collect_layer_links(graph, config.direction, config.origin, config.depth_limit);
4161 let mut layers = Vec::new();
4162 let mut next_cursor = None;
4163 let mut notes = Vec::new();
4164 let focus_depth = cursor.as_ref().map(|c| c.depth);
4165
4166 for layer in layer_links {
4167 let produce_edges = focus_depth.is_none_or(|depth| depth == layer.depth);
4168 let offset = cursor
4169 .as_ref()
4170 .filter(|c| c.depth == layer.depth)
4171 .map(|c| c.offset)
4172 .unwrap_or(0);
4173
4174 let mut node_set: HashSet<String> = HashSet::new();
4175 for edge in &layer.edges {
4176 node_set.insert(edge.neighbor.clone());
4177 }
4178 let mut nodes: Vec<String> = node_set.into_iter().collect();
4179 nodes.sort_by(|a, b| compare_addresses(a, b));
4180
4181 let details = workbook.with_sheet(config.sheet_name, |sheet| {
4182 collect_neighbor_details(sheet, config.sheet_name, &nodes, formula_lookup)
4183 })?;
4184 let total_nodes = details.len();
4185 let start = offset.min(total_nodes);
4186 let end = if produce_edges {
4187 (start + config.page_size).min(total_nodes)
4188 } else {
4189 start
4190 };
4191 let selected_slice = if produce_edges {
4192 &details[start..end]
4193 } else {
4194 &details[0..0]
4195 };
4196 let selected_addresses: HashSet<String> = selected_slice
4197 .iter()
4198 .map(|detail| detail.address.clone())
4199 .collect();
4200
4201 let summary = build_layer_summary(&details);
4202 let range_highlights = build_range_highlights(&details);
4203 let group_highlights = build_formula_group_highlights(&details);
4204 let notable_cells = build_notable_cells(&details, &range_highlights, &group_highlights);
4205
4206 let highlights = TraceLayerHighlights {
4207 top_ranges: range_highlights.clone(),
4208 top_formula_groups: group_highlights.clone(),
4209 notable_cells,
4210 };
4211
4212 let edges = if produce_edges {
4213 build_edges_for_layer(&layer.edges, &selected_addresses, formula_lookup)
4214 } else {
4215 Vec::new()
4216 };
4217
4218 let has_more = produce_edges && end < total_nodes;
4219 if has_more && next_cursor.is_none() {
4220 next_cursor = Some(TraceCursor {
4221 depth: layer.depth,
4222 offset: end,
4223 });
4224 }
4225 if has_more {
4226 notes.push(format!(
4227 "Layer {} truncated at {} of {} nodes; supply cursor.depth={} and cursor.offset={} to continue",
4228 layer.depth, end, total_nodes, layer.depth, end
4229 ));
4230 }
4231
4232 if !layer.truncated_cells.is_empty() {
4233 let cell_list = if layer.truncated_cells.len() <= 3 {
4234 layer.truncated_cells.join(", ")
4235 } else {
4236 format!(
4237 "{}, ... ({} more)",
4238 layer.truncated_cells[..3].join(", "),
4239 layer.truncated_cells.len() - 3
4240 )
4241 };
4242 notes.push(format!(
4243 "Layer {}: dependents truncated at {} per cell for: {}",
4244 layer.depth, TRACE_DEPENDENTS_PER_CELL_LIMIT, cell_list
4245 ));
4246 }
4247
4248 layers.push(TraceLayer {
4249 depth: layer.depth,
4250 summary,
4251 highlights,
4252 edges,
4253 has_more,
4254 });
4255 }
4256
4257 Ok((layers, next_cursor, notes))
4258}
4259
4260fn collect_layer_links(
4261 graph: &FormulaGraph,
4262 direction: &TraceDirection,
4263 origin: &str,
4264 depth_limit: u32,
4265) -> Vec<LayerLinks> {
4266 let mut visited: HashSet<String> = HashSet::new();
4267 visited.insert(origin.to_string());
4268 let mut frontier = vec![origin.to_string()];
4269 let mut layers = Vec::new();
4270
4271 for depth in 1..=depth_limit {
4272 let mut next_frontier_set: HashSet<String> = HashSet::new();
4273 let mut edges = Vec::new();
4274 let mut truncated_cells = Vec::new();
4275
4276 for cell in &frontier {
4277 let (neighbors, was_truncated) = match direction {
4278 TraceDirection::Precedents => (graph.precedents(cell), false),
4279 TraceDirection::Dependents => {
4280 graph.dependents_limited(cell, Some(TRACE_DEPENDENTS_PER_CELL_LIMIT))
4281 }
4282 };
4283
4284 if was_truncated {
4285 truncated_cells.push(cell.clone());
4286 }
4287
4288 for neighbor in neighbors {
4289 let neighbor_upper = neighbor.to_ascii_uppercase();
4290 let edge = match direction {
4291 TraceDirection::Precedents => TraceEdgeRaw {
4292 from: cell.clone(),
4293 to: neighbor_upper.clone(),
4294 neighbor: neighbor_upper.clone(),
4295 },
4296 TraceDirection::Dependents => TraceEdgeRaw {
4297 from: neighbor_upper.clone(),
4298 to: cell.clone(),
4299 neighbor: neighbor_upper.clone(),
4300 },
4301 };
4302 edges.push(edge);
4303 if visited.insert(neighbor_upper.clone()) {
4304 next_frontier_set.insert(neighbor_upper);
4305 }
4306 }
4307 }
4308
4309 if edges.is_empty() {
4310 break;
4311 }
4312
4313 layers.push(LayerLinks {
4314 depth,
4315 edges,
4316 truncated_cells,
4317 });
4318 if next_frontier_set.is_empty() {
4319 break;
4320 }
4321 let mut next_frontier: Vec<String> = next_frontier_set.into_iter().collect();
4322 next_frontier.sort();
4323 frontier = next_frontier;
4324 }
4325
4326 layers
4327}
4328
4329fn collect_neighbor_details(
4330 sheet: &umya_spreadsheet::Worksheet,
4331 current_sheet: &str,
4332 addresses: &[String],
4333 formula_lookup: &HashMap<String, TraceFormulaInfo>,
4334) -> Vec<NeighborDetail> {
4335 let mut details = Vec::new();
4336 for address in addresses {
4337 let (sheet_part, cell_part) = split_sheet_and_cell(address);
4338 let normalized_sheet = sheet_part
4339 .as_ref()
4340 .map(|s| clean_sheet_name(s).to_ascii_lowercase());
4341 let is_external = normalized_sheet
4342 .as_ref()
4343 .map(|s| !s.eq_ignore_ascii_case(current_sheet))
4344 .unwrap_or(false);
4345
4346 let Some(cell_ref) = cell_part else {
4347 details.push(NeighborDetail {
4348 address: address.clone(),
4349 column: None,
4350 row: None,
4351 kind: TraceCellKind::External,
4352 value: None,
4353 formula: None,
4354 fingerprint: None,
4355 external: true,
4356 });
4357 continue;
4358 };
4359
4360 let cell_ref_upper = cell_ref.to_ascii_uppercase();
4361
4362 if is_external {
4363 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
4364 details.push(NeighborDetail {
4365 address: address.clone(),
4366 column: None,
4367 row: None,
4368 kind: TraceCellKind::External,
4369 value: None,
4370 formula: formula_info.map(|info| info.formula.clone()),
4371 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4372 external: true,
4373 });
4374 continue;
4375 }
4376
4377 let Some((col, row)) = parse_address(&cell_ref_upper) else {
4378 details.push(NeighborDetail {
4379 address: address.clone(),
4380 column: None,
4381 row: None,
4382 kind: TraceCellKind::External,
4383 value: None,
4384 formula: None,
4385 fingerprint: None,
4386 external: true,
4387 });
4388 continue;
4389 };
4390
4391 let cell_opt = sheet.get_cell((&col, &row));
4392 let formula_info = lookup_formula_info(formula_lookup, &cell_ref_upper, address);
4393 if let Some(cell) = cell_opt {
4394 let value = cell_to_value(cell);
4395 let kind = if cell.is_formula() {
4396 TraceCellKind::Formula
4397 } else if value.is_some() {
4398 TraceCellKind::Literal
4399 } else {
4400 TraceCellKind::Blank
4401 };
4402 details.push(NeighborDetail {
4403 address: address.clone(),
4404 column: Some(col),
4405 row: Some(row),
4406 kind,
4407 value,
4408 formula: formula_info.map(|info| info.formula.clone()),
4409 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4410 external: false,
4411 });
4412 } else {
4413 details.push(NeighborDetail {
4414 address: address.clone(),
4415 column: Some(col),
4416 row: Some(row),
4417 kind: TraceCellKind::Blank,
4418 value: None,
4419 formula: formula_info.map(|info| info.formula.clone()),
4420 fingerprint: formula_info.map(|info| info.fingerprint.clone()),
4421 external: false,
4422 });
4423 }
4424 }
4425 details
4426}
4427
4428fn build_layer_summary(details: &[NeighborDetail]) -> TraceLayerSummary {
4429 let mut summary = TraceLayerSummary {
4430 total_nodes: details.len(),
4431 formula_nodes: 0,
4432 value_nodes: 0,
4433 blank_nodes: 0,
4434 external_nodes: 0,
4435 unique_formula_groups: 0,
4436 };
4437
4438 let mut fingerprints: HashSet<String> = HashSet::new();
4439
4440 for detail in details {
4441 match detail.kind {
4442 TraceCellKind::Formula => {
4443 summary.formula_nodes += 1;
4444 if let Some(fp) = &detail.fingerprint {
4445 fingerprints.insert(fp.clone());
4446 }
4447 }
4448 TraceCellKind::Literal => summary.value_nodes += 1,
4449 TraceCellKind::Blank => summary.blank_nodes += 1,
4450 TraceCellKind::External => summary.external_nodes += 1,
4451 }
4452 }
4453
4454 summary.unique_formula_groups = fingerprints.len();
4455 summary
4456}
4457
4458fn build_formula_group_highlights(details: &[NeighborDetail]) -> Vec<TraceFormulaGroupHighlight> {
4459 let mut aggregates: HashMap<String, (String, usize, Vec<String>)> = HashMap::new();
4460 for detail in details {
4461 if let (Some(fingerprint), Some(formula)) = (&detail.fingerprint, &detail.formula) {
4462 let entry = aggregates
4463 .entry(fingerprint.clone())
4464 .or_insert_with(|| (formula.clone(), 0, Vec::new()));
4465 entry.1 += 1;
4466 if entry.2.len() < TRACE_GROUP_SAMPLE_LIMIT {
4467 entry.2.push(detail.address.clone());
4468 }
4469 }
4470 }
4471
4472 let mut highlights: Vec<TraceFormulaGroupHighlight> = aggregates
4473 .into_iter()
4474 .map(
4475 |(fingerprint, (formula, count, sample_addresses))| TraceFormulaGroupHighlight {
4476 fingerprint,
4477 formula,
4478 count,
4479 sample_addresses,
4480 },
4481 )
4482 .collect();
4483
4484 highlights.sort_by(|a, b| b.count.cmp(&a.count));
4485 highlights.truncate(TRACE_GROUP_HIGHLIGHT_LIMIT);
4486 highlights
4487}
4488
4489fn build_range_highlights(details: &[NeighborDetail]) -> Vec<TraceRangeHighlight> {
4490 let mut by_column: HashMap<u32, Vec<&NeighborDetail>> = HashMap::new();
4491 for detail in details {
4492 if let (Some(col), Some(_row)) = (detail.column, detail.row)
4493 && !detail.external
4494 {
4495 by_column.entry(col).or_default().push(detail);
4496 }
4497 }
4498
4499 for column_entries in by_column.values_mut() {
4500 column_entries.sort_by(|a, b| a.row.cmp(&b.row));
4501 }
4502
4503 let mut ranges = Vec::new();
4504 for entries in by_column.values() {
4505 let mut current: Vec<&NeighborDetail> = Vec::new();
4506 for detail in entries {
4507 if current.is_empty() {
4508 current.push(detail);
4509 continue;
4510 }
4511 let prev_row = current.last().and_then(|d| d.row).unwrap_or(0);
4512 if detail.row.unwrap_or(0) == prev_row + 1 {
4513 current.push(detail);
4514 } else {
4515 if current.len() >= TRACE_RANGE_THRESHOLD {
4516 ranges.push(make_range_highlight(¤t));
4517 }
4518 current.clear();
4519 current.push(detail);
4520 }
4521 }
4522 if current.len() >= TRACE_RANGE_THRESHOLD {
4523 ranges.push(make_range_highlight(¤t));
4524 }
4525 }
4526
4527 ranges.sort_by(|a, b| b.count.cmp(&a.count));
4528 ranges.truncate(TRACE_RANGE_HIGHLIGHT_LIMIT);
4529 ranges
4530}
4531
4532fn make_range_highlight(details: &[&NeighborDetail]) -> TraceRangeHighlight {
4533 let mut literals = 0usize;
4534 let mut formulas = 0usize;
4535 let mut blanks = 0usize;
4536 let mut sample_values = Vec::new();
4537 let mut sample_formulas = Vec::new();
4538 let mut sample_addresses = Vec::new();
4539
4540 for detail in details {
4541 match detail.kind {
4542 TraceCellKind::Formula => {
4543 formulas += 1;
4544 if let Some(formula) = &detail.formula
4545 && sample_formulas.len() < TRACE_RANGE_FORMULA_SAMPLES
4546 && !sample_formulas.contains(formula)
4547 {
4548 sample_formulas.push(formula.clone());
4549 }
4550 }
4551 TraceCellKind::Literal => {
4552 literals += 1;
4553 if let Some(value) = &detail.value
4554 && sample_values.len() < TRACE_RANGE_VALUE_SAMPLES
4555 {
4556 sample_values.push(value.clone());
4557 }
4558 }
4559 TraceCellKind::Blank => blanks += 1,
4560 TraceCellKind::External => {}
4561 }
4562 if sample_addresses.len() < TRACE_RANGE_VALUE_SAMPLES {
4563 sample_addresses.push(detail.address.clone());
4564 }
4565 }
4566
4567 TraceRangeHighlight {
4568 start: details
4569 .first()
4570 .map(|d| d.address.clone())
4571 .unwrap_or_default(),
4572 end: details
4573 .last()
4574 .map(|d| d.address.clone())
4575 .unwrap_or_default(),
4576 count: details.len(),
4577 literals,
4578 formulas,
4579 blanks,
4580 sample_values,
4581 sample_formulas,
4582 sample_addresses,
4583 }
4584}
4585
4586fn build_notable_cells(
4587 details: &[NeighborDetail],
4588 ranges: &[TraceRangeHighlight],
4589 groups: &[TraceFormulaGroupHighlight],
4590) -> Vec<TraceCellHighlight> {
4591 let mut exclude: HashSet<String> = HashSet::new();
4592 for range in ranges {
4593 exclude.insert(range.start.clone());
4594 exclude.insert(range.end.clone());
4595 for addr in &range.sample_addresses {
4596 exclude.insert(addr.clone());
4597 }
4598 }
4599 for group in groups {
4600 for addr in &group.sample_addresses {
4601 exclude.insert(addr.clone());
4602 }
4603 }
4604
4605 let mut highlights = Vec::new();
4606 let mut kind_counts: HashMap<TraceCellKind, usize> = HashMap::new();
4607
4608 for detail in details {
4609 if highlights.len() >= TRACE_CELL_HIGHLIGHT_LIMIT {
4610 break;
4611 }
4612 if exclude.contains(&detail.address) {
4613 continue;
4614 }
4615 let counter = kind_counts.entry(detail.kind.clone()).or_insert(0);
4616 if *counter >= 2 && detail.kind != TraceCellKind::External {
4617 continue;
4618 }
4619 highlights.push(TraceCellHighlight {
4620 address: detail.address.clone(),
4621 kind: detail.kind.clone(),
4622 value: detail.value.clone(),
4623 formula: detail.formula.clone(),
4624 });
4625 *counter += 1;
4626 }
4627
4628 highlights
4629}
4630
4631fn build_edges_for_layer(
4632 raw_edges: &[TraceEdgeRaw],
4633 selected: &HashSet<String>,
4634 formula_lookup: &HashMap<String, TraceFormulaInfo>,
4635) -> Vec<FormulaTraceEdge> {
4636 let mut edges = Vec::new();
4637 for edge in raw_edges {
4638 if selected.contains(&edge.neighbor) {
4639 let formula = lookup_formula_info(formula_lookup, &edge.neighbor, &edge.neighbor)
4640 .map(|info| info.formula.clone());
4641 edges.push(FormulaTraceEdge {
4642 from: edge.from.clone(),
4643 to: edge.to.clone(),
4644 formula,
4645 note: None,
4646 });
4647 }
4648 }
4649 edges.sort_by(|a, b| compare_addresses(&a.to, &b.to));
4650 edges
4651}
4652
4653fn lookup_formula_info<'a>(
4654 lookup: &'a HashMap<String, TraceFormulaInfo>,
4655 cell_ref: &str,
4656 original: &str,
4657) -> Option<&'a TraceFormulaInfo> {
4658 if let Some(info) = lookup.get(cell_ref) {
4659 return Some(info);
4660 }
4661 if let (Some(_sheet), Some(cell)) = split_sheet_and_cell(original) {
4662 let upper = cell.to_ascii_uppercase();
4663 return lookup.get(&upper);
4664 }
4665 None
4666}
4667
4668fn compare_addresses(left: &str, right: &str) -> Ordering {
4669 let (sheet_left, cell_left) = split_sheet_and_cell(left);
4670 let (sheet_right, cell_right) = split_sheet_and_cell(right);
4671
4672 let sheet_left_key = sheet_left
4673 .as_ref()
4674 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
4675 .unwrap_or_default();
4676 let sheet_right_key = sheet_right
4677 .as_ref()
4678 .map(|s| clean_sheet_name(s).to_ascii_uppercase())
4679 .unwrap_or_default();
4680
4681 match sheet_left_key.cmp(&sheet_right_key) {
4682 Ordering::Equal => {
4683 let left_core = cell_left.unwrap_or_else(|| left.to_string());
4684 let right_core = cell_right.unwrap_or_else(|| right.to_string());
4685 let left_coords = parse_address(&left_core.to_ascii_uppercase());
4686 let right_coords = parse_address(&right_core.to_ascii_uppercase());
4687 match (left_coords, right_coords) {
4688 (Some((lc, lr)), Some((rc, rr))) => lc
4689 .cmp(&rc)
4690 .then_with(|| lr.cmp(&rr))
4691 .then_with(|| left_core.cmp(&right_core)),
4692 _ => left_core.cmp(&right_core),
4693 }
4694 }
4695 other => other,
4696 }
4697}
4698
4699fn split_sheet_and_cell(address: &str) -> (Option<String>, Option<String>) {
4700 if let Some(idx) = address.rfind('!') {
4701 let sheet = address[..idx].to_string();
4702 let cell = address[idx + 1..].to_string();
4703 (Some(sheet), Some(cell))
4704 } else {
4705 (None, Some(address.to_string()))
4706 }
4707}
4708
4709fn clean_sheet_name(sheet: &str) -> String {
4710 let trimmed = sheet.trim_matches(|c| c == '\'' || c == '"');
4711 let after_bracket = trimmed.rsplit(']').next().unwrap_or(trimmed);
4712 after_bracket
4713 .trim_matches(|c| c == '\'' || c == '"')
4714 .to_string()
4715}