1use regex::Regex;
2use serde_json::{json, Value};
3use std::path::PathBuf;
4
5use crate::cli::args::{OutputFormat, OutputShape, ReadCommands};
6use crate::cli::common::{file_format, format_bounds, sheet_by_index};
7use crate::cli::envelope;
8use crate::cli::error::AppError;
9use crate::cli::sheet_query::{
10 load_target_sheet, read_header_values, resolve_bounds, resolve_optional_header_row,
11 stable_record_keys,
12};
13use crate::excel::{open_workbook, CellType, Sheet};
14use crate::utils::{index_to_col_name, parse_cell_reference};
15
16pub fn handle(cmd: ReadCommands) -> Result<Value, AppError> {
17 match cmd {
18 ReadCommands::Cell {
19 file,
20 sheet,
21 sheet_index,
22 cell,
23 format: _,
24 } => read_cell(file, sheet, sheet_index, cell),
25 ReadCommands::Range {
26 file,
27 sheet,
28 sheet_index,
29 range,
30 format: _,
31 } => read_range(file, sheet, sheet_index, range),
32 ReadCommands::Rows {
33 file,
34 sheet,
35 sheet_index,
36 range,
37 header_row,
38 select,
39 filters,
40 limit,
41 offset,
42 non_empty,
43 output_shape,
44 format,
45 } => read_rows(
46 "read.rows",
47 false,
48 RowReadRequest {
49 file,
50 sheet,
51 sheet_index,
52 range,
53 header_row,
54 select,
55 filters,
56 limit,
57 offset,
58 non_empty,
59 output_shape,
60 format,
61 },
62 ),
63 ReadCommands::Records {
64 file,
65 sheet,
66 sheet_index,
67 range,
68 header_row,
69 select,
70 filters,
71 limit,
72 offset,
73 non_empty,
74 output_shape,
75 format,
76 } => read_rows(
77 "read.records",
78 true,
79 RowReadRequest {
80 file,
81 sheet,
82 sheet_index,
83 range,
84 header_row,
85 select,
86 filters,
87 limit,
88 offset,
89 non_empty,
90 output_shape,
91 format,
92 },
93 ),
94 }
95}
96
97fn read_cell(
98 file: std::path::PathBuf,
99 sheet: Option<String>,
100 sheet_index: Option<usize>,
101 cell: String,
102) -> Result<Value, AppError> {
103 let format_str = file_format(&file);
104 let path_str = file.to_string_lossy().to_string();
105
106 let (row, col) = parse_cell_reference(&cell).ok_or_else(|| AppError::InvalidQuery {
107 message: format!("Invalid cell reference: {}", cell),
108 })?;
109
110 let mut workbook =
111 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
112
113 let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
114
115 workbook
116 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
117 .map_err(crate::cli::error::anyhow_to_app_error)?;
118
119 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
120
121 let cell_ref = cell.to_ascii_uppercase();
122 let in_bounds = row < sheet_obj.data.len() && col < sheet_obj.data[row].len();
123 let (value, cell_type, formula) = if in_bounds {
124 let c = &sheet_obj.data[row][col];
125 let formula =
126 workbook.formula_for_cell(resolved_sheet.index, &resolved_sheet.name, &cell_ref);
127 let type_str = if c.is_formula || formula.is_some() {
128 "formula"
129 } else {
130 match c.cell_type {
131 CellType::Text => "text",
132 CellType::Number => "number",
133 CellType::Date => "date",
134 CellType::Boolean => "boolean",
135 CellType::Empty => "empty",
136 }
137 };
138 (crate::json_export::process_cell_value(c), type_str, formula)
139 } else {
140 (Value::Null, "empty", None)
141 };
142
143 let mut data = serde_json::Map::new();
144 data.insert("cell".to_string(), json!(cell_ref));
145 data.insert("value".to_string(), value);
146 data.insert("type".to_string(), json!(cell_type));
147 if let Some(formula) = formula {
148 data.insert("formula".to_string(), json!(formula));
149 }
150
151 Ok(envelope::success_envelope(
152 "read.cell",
153 &path_str,
154 &format_str,
155 envelope::target_cell(&resolved_sheet.name, resolved_sheet.index, &cell_ref),
156 json!({}),
157 Value::Object(data),
158 vec![],
159 ))
160}
161
162#[derive(Clone, Copy)]
163enum FilterOp {
164 Eq,
165 Ne,
166 Gt,
167 Gte,
168 Lt,
169 Lte,
170 Contains,
171 Regex,
172 IsNull,
173 NotNull,
174}
175
176struct FilterSpec {
177 raw: String,
178 col_idx: usize,
179 op: FilterOp,
180 value: String,
181 numeric_value: Option<f64>,
182 regex: Option<Regex>,
183}
184
185struct RowReadRequest {
186 file: PathBuf,
187 sheet: Option<String>,
188 sheet_index: Option<usize>,
189 range: Option<String>,
190 header_row: String,
191 select: Option<String>,
192 filters: Vec<String>,
193 limit: Option<usize>,
194 offset: Option<usize>,
195 non_empty: bool,
196 output_shape: OutputShape,
197 format: OutputFormat,
198}
199
200#[derive(Clone, Copy)]
201struct RowBounds {
202 start_row: usize,
203 end_row: usize,
204 start_col: usize,
205 end_col: usize,
206}
207
208struct RowOutput {
209 values: Vec<Value>,
210 row_count: usize,
211 truncated: bool,
212}
213
214#[derive(Clone, Copy)]
215struct RowOutputFormat<'a> {
216 selected_indices: &'a [usize],
217 columns: &'a [String],
218 output_shape: OutputShape,
219}
220
221struct RowCollectRequest<'a> {
222 sheet: &'a Sheet,
223 bounds: RowBounds,
224 output_format: RowOutputFormat<'a>,
225 filters: &'a [FilterSpec],
226 non_empty: bool,
227 offset: usize,
228 limit: Option<usize>,
229}
230
231fn invalid_query(message: impl Into<String>) -> AppError {
232 AppError::InvalidQuery {
233 message: message.into(),
234 }
235}
236
237fn sheet_row_values(sheet: &Sheet, row: usize, bounds: RowBounds) -> Option<Vec<Value>> {
238 if row >= sheet.data.len() {
239 return None;
240 }
241
242 let values = (bounds.start_col..=bounds.end_col)
243 .map(|col| {
244 if col < sheet.data[row].len() {
245 crate::json_export::process_cell_value(&sheet.data[row][col])
246 } else {
247 Value::Null
248 }
249 })
250 .collect();
251
252 Some(values)
253}
254
255fn row_passes_filters(row: &[Value], filters: &[FilterSpec], non_empty: bool) -> bool {
256 if non_empty && row.iter().all(is_empty_cell) {
257 return false;
258 }
259
260 filters.iter().all(|filter| filter_matches(row, filter))
261}
262
263fn output_row(row: &[Value], output_format: RowOutputFormat<'_>) -> Value {
264 if matches!(
265 output_format.output_shape,
266 OutputShape::Records | OutputShape::Jsonl
267 ) {
268 let mut record = serde_json::Map::new();
269 for idx in output_format.selected_indices {
270 let value = row.get(*idx).cloned().unwrap_or(Value::Null);
271 record.insert(output_format.columns[*idx].clone(), value);
272 }
273 return Value::Object(record);
274 }
275
276 Value::Array(
277 output_format
278 .selected_indices
279 .iter()
280 .map(|idx| row.get(*idx).cloned().unwrap_or(Value::Null))
281 .collect(),
282 )
283}
284
285fn collect_row_output(request: RowCollectRequest<'_>) -> RowOutput {
286 let mut values = Vec::new();
287 let mut skipped = 0usize;
288 let mut truncated = false;
289
290 for row_idx in request.bounds.start_row..=request.bounds.end_row {
291 let Some(row) = sheet_row_values(request.sheet, row_idx, request.bounds) else {
292 break;
293 };
294 if !row_passes_filters(&row, request.filters, request.non_empty) {
295 continue;
296 }
297 if skipped < request.offset {
298 skipped += 1;
299 continue;
300 }
301 if request.limit.is_some_and(|size| values.len() >= size) {
302 truncated = true;
303 break;
304 }
305
306 values.push(output_row(&row, request.output_format));
307 }
308
309 let row_count = values.len();
310 RowOutput {
311 values,
312 row_count,
313 truncated,
314 }
315}
316
317fn parse_selected_columns(
318 select: Option<String>,
319 columns: &[String],
320) -> Result<Vec<usize>, AppError> {
321 let Some(select) = select else {
322 return Ok((0..columns.len()).collect());
323 };
324
325 let mut selected = Vec::new();
326 for field in select.split(',').map(str::trim) {
327 if field.is_empty() {
328 return Err(invalid_query("Selected column names cannot be empty"));
329 }
330 let col_idx = columns
331 .iter()
332 .position(|column| column == field)
333 .ok_or_else(|| invalid_query(format!("Unknown selected column: {field}")))?;
334 selected.push(col_idx);
335 }
336
337 Ok(selected)
338}
339
340fn parse_filters(filters: Vec<String>, columns: &[String]) -> Result<Vec<FilterSpec>, AppError> {
341 filters
342 .into_iter()
343 .map(|raw| {
344 let mut parts = raw.splitn(3, ':');
345 let field = parts.next().unwrap_or_default().trim();
346 let op = parts.next().unwrap_or_default().trim();
347 let value = parts.next().ok_or_else(|| {
348 invalid_query(format!("Invalid filter '{raw}'; expected field:op:value"))
349 })?;
350 let value = value.to_string();
351
352 if field.is_empty() {
353 return Err(invalid_query(format!(
354 "Invalid filter '{raw}'; field is empty"
355 )));
356 }
357
358 let col_idx = columns
359 .iter()
360 .position(|column| column == field)
361 .ok_or_else(|| invalid_query(format!("Unknown filter column: {field}")))?;
362
363 let op = match op {
364 "eq" => FilterOp::Eq,
365 "ne" => FilterOp::Ne,
366 "gt" => FilterOp::Gt,
367 "gte" => FilterOp::Gte,
368 "lt" => FilterOp::Lt,
369 "lte" => FilterOp::Lte,
370 "contains" => FilterOp::Contains,
371 "regex" => FilterOp::Regex,
372 "isnull" => FilterOp::IsNull,
373 "notnull" => FilterOp::NotNull,
374 "" => {
375 return Err(invalid_query(format!(
376 "Invalid filter '{raw}'; operator is empty"
377 )))
378 }
379 _ => return Err(invalid_query(format!("Unknown filter operator: {op}"))),
380 };
381
382 let numeric_value = if matches!(
383 op,
384 FilterOp::Gt | FilterOp::Gte | FilterOp::Lt | FilterOp::Lte
385 ) {
386 Some(value.trim().parse::<f64>().map_err(|_| {
387 invalid_query(format!("Numeric filter value is invalid in '{raw}'"))
388 })?)
389 } else {
390 None
391 };
392
393 let regex = if matches!(op, FilterOp::Regex) {
394 Some(
395 Regex::new(&value)
396 .map_err(|err| invalid_query(format!("Invalid regex filter: {err}")))?,
397 )
398 } else {
399 None
400 };
401
402 Ok(FilterSpec {
403 raw,
404 col_idx,
405 op,
406 value,
407 numeric_value,
408 regex,
409 })
410 })
411 .collect()
412}
413
414fn value_as_filter_text(value: &Value) -> String {
415 match value {
416 Value::Null => String::new(),
417 Value::String(value) => value.clone(),
418 Value::Number(value) => value.to_string(),
419 Value::Bool(value) => value.to_string(),
420 other => other.to_string(),
421 }
422}
423
424fn value_as_number(value: &Value) -> Option<f64> {
425 match value {
426 Value::Number(number) => number.as_f64(),
427 Value::String(value) => value.trim().parse::<f64>().ok(),
428 _ => None,
429 }
430}
431
432fn is_empty_cell(value: &Value) -> bool {
433 match value {
434 Value::Null => true,
435 Value::String(value) => value.trim().is_empty(),
436 _ => false,
437 }
438}
439
440fn compare_numeric<F>(cell: &Value, filter_value: f64, compare: F) -> bool
441where
442 F: Fn(f64, f64) -> bool,
443{
444 let Some(left) = value_as_number(cell) else {
445 return false;
446 };
447 compare(left, filter_value)
448}
449
450fn filter_matches(row: &[Value], filter: &FilterSpec) -> bool {
451 let Some(cell) = row.get(filter.col_idx) else {
452 return false;
453 };
454
455 match filter.op {
456 FilterOp::Eq => {
457 if let (Some(left), Ok(right)) =
458 (value_as_number(cell), filter.value.trim().parse::<f64>())
459 {
460 (left - right).abs() < f64::EPSILON
461 } else {
462 value_as_filter_text(cell) == filter.value
463 }
464 }
465 FilterOp::Ne => {
466 if let (Some(left), Ok(right)) =
467 (value_as_number(cell), filter.value.trim().parse::<f64>())
468 {
469 (left - right).abs() >= f64::EPSILON
470 } else {
471 value_as_filter_text(cell) != filter.value
472 }
473 }
474 FilterOp::Gt => {
475 compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| a > b)
476 }
477 FilterOp::Gte => compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| {
478 a >= b
479 }),
480 FilterOp::Lt => {
481 compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| a < b)
482 }
483 FilterOp::Lte => compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| {
484 a <= b
485 }),
486 FilterOp::Contains => value_as_filter_text(cell).contains(&filter.value),
487 FilterOp::Regex => filter
488 .regex
489 .as_ref()
490 .is_some_and(|regex| regex.is_match(&value_as_filter_text(cell))),
491 FilterOp::IsNull => is_empty_cell(cell),
492 FilterOp::NotNull => !is_empty_cell(cell),
493 }
494}
495
496fn read_range(
497 file: std::path::PathBuf,
498 sheet: Option<String>,
499 sheet_index: Option<usize>,
500 range: String,
501) -> Result<Value, AppError> {
502 let format_str = file_format(&file);
503 let path_str = file.to_string_lossy().to_string();
504
505 let mut workbook =
506 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
507
508 let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
509
510 workbook
511 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
512 .map_err(crate::cli::error::anyhow_to_app_error)?;
513
514 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
515 let bounds = resolve_bounds(&workbook, sheet_obj, resolved_sheet.index, Some(&range))?;
516
517 let mut rows = Vec::new();
518 for row in bounds.start_row..=bounds.end_row {
519 let mut cols = Vec::new();
520 for col in bounds.start_col..=bounds.end_col {
521 let value = if row < sheet_obj.data.len() && col < sheet_obj.data[row].len() {
522 crate::json_export::process_cell_value(&sheet_obj.data[row][col])
523 } else {
524 Value::Null
525 };
526 cols.push(value);
527 }
528 rows.push(Value::Array(cols));
529 }
530
531 let range_str = format_bounds(bounds);
532
533 let data = json!({
534 "range": range_str,
535 "rows": rows,
536 });
537
538 Ok(envelope::success_envelope(
539 "read.range",
540 &path_str,
541 &format_str,
542 envelope::target_range(&resolved_sheet.name, resolved_sheet.index, &range_str),
543 json!({}),
544 data,
545 vec![],
546 ))
547}
548
549fn read_rows(
550 command: &'static str,
551 command_requires_header: bool,
552 request: RowReadRequest,
553) -> Result<Value, AppError> {
554 let RowReadRequest {
555 file,
556 sheet,
557 sheet_index,
558 range,
559 header_row,
560 select,
561 filters,
562 limit,
563 offset,
564 non_empty,
565 output_shape,
566 format,
567 } = request;
568
569 if output_shape == OutputShape::Jsonl && matches!(format, OutputFormat::Text) {
570 return Err(AppError::InvalidArgs {
571 message: "--output-shape jsonl cannot be combined with --format text".to_string(),
572 });
573 }
574
575 let format_str = file_format(&file);
576 let path_str = file.to_string_lossy().to_string();
577
578 let mut workbook =
579 open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
580
581 let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
582
583 workbook
584 .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
585 .map_err(crate::cli::error::anyhow_to_app_error)?;
586
587 let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
588 let requested_bounds =
589 resolve_bounds(&workbook, sheet_obj, resolved_sheet.index, range.as_deref())?;
590 let resolved_header =
591 resolve_optional_header_row(&workbook, sheet_obj, resolved_sheet.index, &header_row)?;
592
593 let range_str = format_bounds(requested_bounds);
594
595 if resolved_header.is_none()
596 && (command_requires_header
597 || matches!(output_shape, OutputShape::Records | OutputShape::Jsonl))
598 {
599 return Err(invalid_query(
600 "A resolved header row is required for records or jsonl output",
601 ));
602 }
603
604 let (has_header, columns, data_start_row) = if let Some(header_row_idx) = resolved_header {
605 let headers = read_header_values(sheet_obj, header_row_idx, requested_bounds);
606 let columns = stable_record_keys(&headers, requested_bounds.start_col);
607 let data_start_row = requested_bounds
608 .start_row
609 .max(header_row_idx.saturating_add(1));
610 (true, columns, data_start_row)
611 } else {
612 let columns: Vec<String> = (requested_bounds.start_col..=requested_bounds.end_col)
613 .map(|col| format!("col_{}", index_to_col_name(col)))
614 .collect();
615 (false, columns, requested_bounds.start_row)
616 };
617
618 let selected_indices = parse_selected_columns(select, &columns)?;
619 let parsed_filters = parse_filters(filters, &columns)?;
620 let applied_filters: Vec<String> = parsed_filters
621 .iter()
622 .map(|filter| filter.raw.clone())
623 .collect();
624 let selected_columns: Vec<String> = selected_indices
625 .iter()
626 .map(|idx| columns[*idx].clone())
627 .collect();
628
629 let row_output = collect_row_output(RowCollectRequest {
630 sheet: sheet_obj,
631 bounds: RowBounds {
632 start_row: data_start_row,
633 end_row: requested_bounds.end_row,
634 start_col: requested_bounds.start_col,
635 end_col: requested_bounds.end_col,
636 },
637 output_format: RowOutputFormat {
638 selected_indices: &selected_indices,
639 columns: &columns,
640 output_shape,
641 },
642 filters: &parsed_filters,
643 non_empty,
644 offset: offset.unwrap_or(0),
645 limit,
646 });
647
648 let row_count = row_output.row_count;
649 let truncated = row_output.truncated;
650
651 let data = if matches!(output_shape, OutputShape::Records | OutputShape::Jsonl) {
652 json!({
653 "resolved_header_row": resolved_header.unwrap(),
654 "mode": output_shape.as_str(),
655 "records": row_output.values,
656 })
657 } else {
658 json!({
659 "resolved_header_row": if has_header {
660 resolved_header.map(Value::from).unwrap_or(Value::Null)
661 } else {
662 Value::Null
663 },
664 "mode": "rows",
665 "rows": row_output.values,
666 })
667 };
668
669 let meta = json!({
670 "applied_filters": applied_filters,
671 "selected_columns": selected_columns,
672 "row_count": row_count,
673 "truncated": truncated,
674 "output_shape": output_shape.as_str(),
675 });
676
677 Ok(envelope::success_envelope(
678 command,
679 &path_str,
680 &format_str,
681 envelope::target_range(&resolved_sheet.name, resolved_sheet.index, &range_str),
682 meta,
683 data,
684 vec![],
685 ))
686}