Skip to main content

excel_cli/cli/
read.rs

1use regex::Regex;
2use serde_json::{json, Value};
3use std::path::PathBuf;
4
5use crate::cli::args::{OutputFormat, OutputShape, ReadCommands};
6use crate::cli::common::{file_format, format_bounds, sheet_by_index};
7use crate::cli::envelope;
8use crate::cli::error::AppError;
9use crate::cli::sheet_query::{
10    load_target_sheet, read_header_values, resolve_bounds, resolve_optional_header_row,
11    stable_record_keys,
12};
13use crate::excel::{open_workbook, CellType, Sheet};
14use crate::utils::{index_to_col_name, parse_cell_reference};
15
16pub fn handle(cmd: ReadCommands) -> Result<Value, AppError> {
17    match cmd {
18        ReadCommands::Cell {
19            file,
20            sheet,
21            sheet_index,
22            cell,
23            format: _,
24        } => read_cell(file, sheet, sheet_index, cell),
25        ReadCommands::Range {
26            file,
27            sheet,
28            sheet_index,
29            range,
30            format: _,
31        } => read_range(file, sheet, sheet_index, range),
32        ReadCommands::Rows {
33            file,
34            sheet,
35            sheet_index,
36            range,
37            header_row,
38            select,
39            filters,
40            limit,
41            offset,
42            non_empty,
43            output_shape,
44            format,
45        } => read_rows(
46            "read.rows",
47            false,
48            RowReadRequest {
49                file,
50                sheet,
51                sheet_index,
52                range,
53                header_row,
54                select,
55                filters,
56                limit,
57                offset,
58                non_empty,
59                output_shape,
60                format,
61            },
62        ),
63        ReadCommands::Records {
64            file,
65            sheet,
66            sheet_index,
67            range,
68            header_row,
69            select,
70            filters,
71            limit,
72            offset,
73            non_empty,
74            output_shape,
75            format,
76        } => read_rows(
77            "read.records",
78            true,
79            RowReadRequest {
80                file,
81                sheet,
82                sheet_index,
83                range,
84                header_row,
85                select,
86                filters,
87                limit,
88                offset,
89                non_empty,
90                output_shape,
91                format,
92            },
93        ),
94    }
95}
96
97fn read_cell(
98    file: std::path::PathBuf,
99    sheet: Option<String>,
100    sheet_index: Option<usize>,
101    cell: String,
102) -> Result<Value, AppError> {
103    let format_str = file_format(&file);
104    let path_str = file.to_string_lossy().to_string();
105
106    let (row, col) = parse_cell_reference(&cell).ok_or_else(|| AppError::InvalidQuery {
107        message: format!("Invalid cell reference: {}", cell),
108    })?;
109
110    let mut workbook =
111        open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
112
113    let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
114
115    workbook
116        .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
117        .map_err(crate::cli::error::anyhow_to_app_error)?;
118
119    let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
120
121    let cell_ref = cell.to_ascii_uppercase();
122    let in_bounds = row < sheet_obj.data.len() && col < sheet_obj.data[row].len();
123    let (value, cell_type, formula) = if in_bounds {
124        let c = &sheet_obj.data[row][col];
125        let formula =
126            workbook.formula_for_cell(resolved_sheet.index, &resolved_sheet.name, &cell_ref);
127        let type_str = if c.is_formula || formula.is_some() {
128            "formula"
129        } else {
130            match c.cell_type {
131                CellType::Text => "text",
132                CellType::Number => "number",
133                CellType::Date => "date",
134                CellType::Boolean => "boolean",
135                CellType::Empty => "empty",
136            }
137        };
138        (crate::json_export::process_cell_value(c), type_str, formula)
139    } else {
140        (Value::Null, "empty", None)
141    };
142
143    let mut data = serde_json::Map::new();
144    data.insert("cell".to_string(), json!(cell_ref));
145    data.insert("value".to_string(), value);
146    data.insert("type".to_string(), json!(cell_type));
147    if let Some(formula) = formula {
148        data.insert("formula".to_string(), json!(formula));
149    }
150
151    Ok(envelope::success_envelope(
152        "read.cell",
153        &path_str,
154        &format_str,
155        envelope::target_cell(&resolved_sheet.name, resolved_sheet.index, &cell_ref),
156        json!({}),
157        Value::Object(data),
158        vec![],
159    ))
160}
161
162#[derive(Clone, Copy)]
163enum FilterOp {
164    Eq,
165    Ne,
166    Gt,
167    Gte,
168    Lt,
169    Lte,
170    Contains,
171    Regex,
172    IsNull,
173    NotNull,
174}
175
176struct FilterSpec {
177    raw: String,
178    col_idx: usize,
179    op: FilterOp,
180    value: String,
181    numeric_value: Option<f64>,
182    regex: Option<Regex>,
183}
184
185struct RowReadRequest {
186    file: PathBuf,
187    sheet: Option<String>,
188    sheet_index: Option<usize>,
189    range: Option<String>,
190    header_row: String,
191    select: Option<String>,
192    filters: Vec<String>,
193    limit: Option<usize>,
194    offset: Option<usize>,
195    non_empty: bool,
196    output_shape: OutputShape,
197    format: OutputFormat,
198}
199
200#[derive(Clone, Copy)]
201struct RowBounds {
202    start_row: usize,
203    end_row: usize,
204    start_col: usize,
205    end_col: usize,
206}
207
208struct RowOutput {
209    values: Vec<Value>,
210    row_count: usize,
211    truncated: bool,
212}
213
214#[derive(Clone, Copy)]
215struct RowOutputFormat<'a> {
216    selected_indices: &'a [usize],
217    columns: &'a [String],
218    output_shape: OutputShape,
219}
220
221struct RowCollectRequest<'a> {
222    sheet: &'a Sheet,
223    bounds: RowBounds,
224    output_format: RowOutputFormat<'a>,
225    filters: &'a [FilterSpec],
226    non_empty: bool,
227    offset: usize,
228    limit: Option<usize>,
229}
230
231fn invalid_query(message: impl Into<String>) -> AppError {
232    AppError::InvalidQuery {
233        message: message.into(),
234    }
235}
236
237fn sheet_row_values(sheet: &Sheet, row: usize, bounds: RowBounds) -> Option<Vec<Value>> {
238    if row >= sheet.data.len() {
239        return None;
240    }
241
242    let values = (bounds.start_col..=bounds.end_col)
243        .map(|col| {
244            if col < sheet.data[row].len() {
245                crate::json_export::process_cell_value(&sheet.data[row][col])
246            } else {
247                Value::Null
248            }
249        })
250        .collect();
251
252    Some(values)
253}
254
255fn row_passes_filters(row: &[Value], filters: &[FilterSpec], non_empty: bool) -> bool {
256    if non_empty && row.iter().all(is_empty_cell) {
257        return false;
258    }
259
260    filters.iter().all(|filter| filter_matches(row, filter))
261}
262
263fn output_row(row: &[Value], output_format: RowOutputFormat<'_>) -> Value {
264    if matches!(
265        output_format.output_shape,
266        OutputShape::Records | OutputShape::Jsonl
267    ) {
268        let mut record = serde_json::Map::new();
269        for idx in output_format.selected_indices {
270            let value = row.get(*idx).cloned().unwrap_or(Value::Null);
271            record.insert(output_format.columns[*idx].clone(), value);
272        }
273        return Value::Object(record);
274    }
275
276    Value::Array(
277        output_format
278            .selected_indices
279            .iter()
280            .map(|idx| row.get(*idx).cloned().unwrap_or(Value::Null))
281            .collect(),
282    )
283}
284
285fn collect_row_output(request: RowCollectRequest<'_>) -> RowOutput {
286    let mut values = Vec::new();
287    let mut skipped = 0usize;
288    let mut truncated = false;
289
290    for row_idx in request.bounds.start_row..=request.bounds.end_row {
291        let Some(row) = sheet_row_values(request.sheet, row_idx, request.bounds) else {
292            break;
293        };
294        if !row_passes_filters(&row, request.filters, request.non_empty) {
295            continue;
296        }
297        if skipped < request.offset {
298            skipped += 1;
299            continue;
300        }
301        if request.limit.is_some_and(|size| values.len() >= size) {
302            truncated = true;
303            break;
304        }
305
306        values.push(output_row(&row, request.output_format));
307    }
308
309    let row_count = values.len();
310    RowOutput {
311        values,
312        row_count,
313        truncated,
314    }
315}
316
317fn parse_selected_columns(
318    select: Option<String>,
319    columns: &[String],
320) -> Result<Vec<usize>, AppError> {
321    let Some(select) = select else {
322        return Ok((0..columns.len()).collect());
323    };
324
325    let mut selected = Vec::new();
326    for field in select.split(',').map(str::trim) {
327        if field.is_empty() {
328            return Err(invalid_query("Selected column names cannot be empty"));
329        }
330        let col_idx = columns
331            .iter()
332            .position(|column| column == field)
333            .ok_or_else(|| invalid_query(format!("Unknown selected column: {field}")))?;
334        selected.push(col_idx);
335    }
336
337    Ok(selected)
338}
339
340fn parse_filters(filters: Vec<String>, columns: &[String]) -> Result<Vec<FilterSpec>, AppError> {
341    filters
342        .into_iter()
343        .map(|raw| {
344            let mut parts = raw.splitn(3, ':');
345            let field = parts.next().unwrap_or_default().trim();
346            let op = parts.next().unwrap_or_default().trim();
347            let value = parts.next().ok_or_else(|| {
348                invalid_query(format!("Invalid filter '{raw}'; expected field:op:value"))
349            })?;
350            let value = value.to_string();
351
352            if field.is_empty() {
353                return Err(invalid_query(format!(
354                    "Invalid filter '{raw}'; field is empty"
355                )));
356            }
357
358            let col_idx = columns
359                .iter()
360                .position(|column| column == field)
361                .ok_or_else(|| invalid_query(format!("Unknown filter column: {field}")))?;
362
363            let op = match op {
364                "eq" => FilterOp::Eq,
365                "ne" => FilterOp::Ne,
366                "gt" => FilterOp::Gt,
367                "gte" => FilterOp::Gte,
368                "lt" => FilterOp::Lt,
369                "lte" => FilterOp::Lte,
370                "contains" => FilterOp::Contains,
371                "regex" => FilterOp::Regex,
372                "isnull" => FilterOp::IsNull,
373                "notnull" => FilterOp::NotNull,
374                "" => {
375                    return Err(invalid_query(format!(
376                        "Invalid filter '{raw}'; operator is empty"
377                    )))
378                }
379                _ => return Err(invalid_query(format!("Unknown filter operator: {op}"))),
380            };
381
382            let numeric_value = if matches!(
383                op,
384                FilterOp::Gt | FilterOp::Gte | FilterOp::Lt | FilterOp::Lte
385            ) {
386                Some(value.trim().parse::<f64>().map_err(|_| {
387                    invalid_query(format!("Numeric filter value is invalid in '{raw}'"))
388                })?)
389            } else {
390                None
391            };
392
393            let regex = if matches!(op, FilterOp::Regex) {
394                Some(
395                    Regex::new(&value)
396                        .map_err(|err| invalid_query(format!("Invalid regex filter: {err}")))?,
397                )
398            } else {
399                None
400            };
401
402            Ok(FilterSpec {
403                raw,
404                col_idx,
405                op,
406                value,
407                numeric_value,
408                regex,
409            })
410        })
411        .collect()
412}
413
414fn value_as_filter_text(value: &Value) -> String {
415    match value {
416        Value::Null => String::new(),
417        Value::String(value) => value.clone(),
418        Value::Number(value) => value.to_string(),
419        Value::Bool(value) => value.to_string(),
420        other => other.to_string(),
421    }
422}
423
424fn value_as_number(value: &Value) -> Option<f64> {
425    match value {
426        Value::Number(number) => number.as_f64(),
427        Value::String(value) => value.trim().parse::<f64>().ok(),
428        _ => None,
429    }
430}
431
432fn is_empty_cell(value: &Value) -> bool {
433    match value {
434        Value::Null => true,
435        Value::String(value) => value.trim().is_empty(),
436        _ => false,
437    }
438}
439
440fn compare_numeric<F>(cell: &Value, filter_value: f64, compare: F) -> bool
441where
442    F: Fn(f64, f64) -> bool,
443{
444    let Some(left) = value_as_number(cell) else {
445        return false;
446    };
447    compare(left, filter_value)
448}
449
450fn filter_matches(row: &[Value], filter: &FilterSpec) -> bool {
451    let Some(cell) = row.get(filter.col_idx) else {
452        return false;
453    };
454
455    match filter.op {
456        FilterOp::Eq => {
457            if let (Some(left), Ok(right)) =
458                (value_as_number(cell), filter.value.trim().parse::<f64>())
459            {
460                (left - right).abs() < f64::EPSILON
461            } else {
462                value_as_filter_text(cell) == filter.value
463            }
464        }
465        FilterOp::Ne => {
466            if let (Some(left), Ok(right)) =
467                (value_as_number(cell), filter.value.trim().parse::<f64>())
468            {
469                (left - right).abs() >= f64::EPSILON
470            } else {
471                value_as_filter_text(cell) != filter.value
472            }
473        }
474        FilterOp::Gt => {
475            compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| a > b)
476        }
477        FilterOp::Gte => compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| {
478            a >= b
479        }),
480        FilterOp::Lt => {
481            compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| a < b)
482        }
483        FilterOp::Lte => compare_numeric(cell, filter.numeric_value.unwrap_or_default(), |a, b| {
484            a <= b
485        }),
486        FilterOp::Contains => value_as_filter_text(cell).contains(&filter.value),
487        FilterOp::Regex => filter
488            .regex
489            .as_ref()
490            .is_some_and(|regex| regex.is_match(&value_as_filter_text(cell))),
491        FilterOp::IsNull => is_empty_cell(cell),
492        FilterOp::NotNull => !is_empty_cell(cell),
493    }
494}
495
496fn read_range(
497    file: std::path::PathBuf,
498    sheet: Option<String>,
499    sheet_index: Option<usize>,
500    range: String,
501) -> Result<Value, AppError> {
502    let format_str = file_format(&file);
503    let path_str = file.to_string_lossy().to_string();
504
505    let mut workbook =
506        open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
507
508    let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
509
510    workbook
511        .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
512        .map_err(crate::cli::error::anyhow_to_app_error)?;
513
514    let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
515    let bounds = resolve_bounds(&workbook, sheet_obj, resolved_sheet.index, Some(&range))?;
516
517    let mut rows = Vec::new();
518    for row in bounds.start_row..=bounds.end_row {
519        let mut cols = Vec::new();
520        for col in bounds.start_col..=bounds.end_col {
521            let value = if row < sheet_obj.data.len() && col < sheet_obj.data[row].len() {
522                crate::json_export::process_cell_value(&sheet_obj.data[row][col])
523            } else {
524                Value::Null
525            };
526            cols.push(value);
527        }
528        rows.push(Value::Array(cols));
529    }
530
531    let range_str = format_bounds(bounds);
532
533    let data = json!({
534        "range": range_str,
535        "rows": rows,
536    });
537
538    Ok(envelope::success_envelope(
539        "read.range",
540        &path_str,
541        &format_str,
542        envelope::target_range(&resolved_sheet.name, resolved_sheet.index, &range_str),
543        json!({}),
544        data,
545        vec![],
546    ))
547}
548
549fn read_rows(
550    command: &'static str,
551    command_requires_header: bool,
552    request: RowReadRequest,
553) -> Result<Value, AppError> {
554    let RowReadRequest {
555        file,
556        sheet,
557        sheet_index,
558        range,
559        header_row,
560        select,
561        filters,
562        limit,
563        offset,
564        non_empty,
565        output_shape,
566        format,
567    } = request;
568
569    if output_shape == OutputShape::Jsonl && matches!(format, OutputFormat::Text) {
570        return Err(AppError::InvalidArgs {
571            message: "--output-shape jsonl cannot be combined with --format text".to_string(),
572        });
573    }
574
575    let format_str = file_format(&file);
576    let path_str = file.to_string_lossy().to_string();
577
578    let mut workbook =
579        open_workbook(&file, false).map_err(crate::cli::error::anyhow_to_app_error)?;
580
581    let resolved_sheet = load_target_sheet(&workbook, &sheet, &sheet_index)?;
582
583    workbook
584        .ensure_sheet_loaded(resolved_sheet.index, &resolved_sheet.name)
585        .map_err(crate::cli::error::anyhow_to_app_error)?;
586
587    let sheet_obj = sheet_by_index(&workbook, resolved_sheet.index, &resolved_sheet.name)?;
588    let requested_bounds =
589        resolve_bounds(&workbook, sheet_obj, resolved_sheet.index, range.as_deref())?;
590    let resolved_header =
591        resolve_optional_header_row(&workbook, sheet_obj, resolved_sheet.index, &header_row)?;
592
593    let range_str = format_bounds(requested_bounds);
594
595    if resolved_header.is_none()
596        && (command_requires_header
597            || matches!(output_shape, OutputShape::Records | OutputShape::Jsonl))
598    {
599        return Err(invalid_query(
600            "A resolved header row is required for records or jsonl output",
601        ));
602    }
603
604    let (has_header, columns, data_start_row) = if let Some(header_row_idx) = resolved_header {
605        let headers = read_header_values(sheet_obj, header_row_idx, requested_bounds);
606        let columns = stable_record_keys(&headers, requested_bounds.start_col);
607        let data_start_row = requested_bounds
608            .start_row
609            .max(header_row_idx.saturating_add(1));
610        (true, columns, data_start_row)
611    } else {
612        let columns: Vec<String> = (requested_bounds.start_col..=requested_bounds.end_col)
613            .map(|col| format!("col_{}", index_to_col_name(col)))
614            .collect();
615        (false, columns, requested_bounds.start_row)
616    };
617
618    let selected_indices = parse_selected_columns(select, &columns)?;
619    let parsed_filters = parse_filters(filters, &columns)?;
620    let applied_filters: Vec<String> = parsed_filters
621        .iter()
622        .map(|filter| filter.raw.clone())
623        .collect();
624    let selected_columns: Vec<String> = selected_indices
625        .iter()
626        .map(|idx| columns[*idx].clone())
627        .collect();
628
629    let row_output = collect_row_output(RowCollectRequest {
630        sheet: sheet_obj,
631        bounds: RowBounds {
632            start_row: data_start_row,
633            end_row: requested_bounds.end_row,
634            start_col: requested_bounds.start_col,
635            end_col: requested_bounds.end_col,
636        },
637        output_format: RowOutputFormat {
638            selected_indices: &selected_indices,
639            columns: &columns,
640            output_shape,
641        },
642        filters: &parsed_filters,
643        non_empty,
644        offset: offset.unwrap_or(0),
645        limit,
646    });
647
648    let row_count = row_output.row_count;
649    let truncated = row_output.truncated;
650
651    let data = if matches!(output_shape, OutputShape::Records | OutputShape::Jsonl) {
652        json!({
653            "resolved_header_row": resolved_header.unwrap(),
654            "mode": output_shape.as_str(),
655            "records": row_output.values,
656        })
657    } else {
658        json!({
659            "resolved_header_row": if has_header {
660                resolved_header.map(Value::from).unwrap_or(Value::Null)
661            } else {
662                Value::Null
663            },
664            "mode": "rows",
665            "rows": row_output.values,
666        })
667    };
668
669    let meta = json!({
670        "applied_filters": applied_filters,
671        "selected_columns": selected_columns,
672        "row_count": row_count,
673        "truncated": truncated,
674        "output_shape": output_shape.as_str(),
675    });
676
677    Ok(envelope::success_envelope(
678        command,
679        &path_str,
680        &format_str,
681        envelope::target_range(&resolved_sheet.name, resolved_sheet.index, &range_str),
682        meta,
683        data,
684        vec![],
685    ))
686}