Skip to main content

xls_rs/operations/
core.rs

1//! Core data operations struct and basic methods
2
3use super::types::SortOrder;
4use crate::traits::{
5    DataOperator, FilterCondition, FilterOperator, SortOperator, TransformOperation,
6    TransformOperator,
7};
8use anyhow::Result;
9use rayon::prelude::*;
10
11/// Data operations for spreadsheet manipulation
12pub struct DataOperations;
13
14impl DataOperations {
15    pub fn new() -> Self {
16        Self
17    }
18}
19
20// Trait implementations for better SOC
21impl SortOperator for DataOperations {
22    fn sort(&self, data: &mut Vec<Vec<String>>, column: usize, ascending: bool) -> Result<()> {
23        let order = if ascending {
24            SortOrder::Ascending
25        } else {
26            SortOrder::Descending
27        };
28        self.sort_by_column(data, column, order)
29    }
30}
31
32impl DataOperations {
33    /// Sort rows by a specific column (public for backward compatibility)
34    pub fn sort_by_column(
35        &self,
36        data: &mut Vec<Vec<String>>,
37        column: usize,
38        order: SortOrder,
39    ) -> Result<()> {
40        if data.is_empty() {
41            return Ok(());
42        }
43
44        let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
45        if column >= max_cols {
46            anyhow::bail!(
47                "Column index {} out of range (max: {})",
48                column,
49                max_cols - 1
50            );
51        }
52
53        // Use parallel sort for better performance on large datasets
54        data.par_sort_by(|a, b| {
55            let val_a = a.get(column).map(|s| s.as_str()).unwrap_or("");
56            let val_b = b.get(column).map(|s| s.as_str()).unwrap_or("");
57
58            let cmp = match (val_a.parse::<f64>(), val_b.parse::<f64>()) {
59                (Ok(num_a), Ok(num_b)) => num_a
60                    .partial_cmp(&num_b)
61                    .unwrap_or(std::cmp::Ordering::Equal),
62                _ => val_a.cmp(val_b),
63            };
64
65            match order {
66                SortOrder::Ascending => cmp,
67                SortOrder::Descending => cmp.reverse(),
68            }
69        });
70
71        Ok(())
72    }
73}
74
75impl FilterOperator for DataOperations {
76    fn filter(
77        &self,
78        data: &[Vec<String>],
79        column: usize,
80        condition: FilterCondition,
81    ) -> Result<Vec<Vec<String>>> {
82        if data.is_empty() {
83            return Ok(Vec::new());
84        }
85
86        let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
87        if column >= max_cols {
88            anyhow::bail!(
89                "Column index {} out of range (max: {})",
90                column,
91                max_cols.saturating_sub(1)
92            );
93        }
94
95        // Pre-filter indices in parallel to avoid cloning until final collection
96        let indices: Vec<usize> = data
97            .par_iter()
98            .enumerate()
99            .filter(|(_idx, row)| {
100                let cell_value = row.get(column).map(|s| s.as_str()).unwrap_or("");
101                self.evaluate_condition(cell_value, &condition)
102                    .unwrap_or(false)
103            })
104            .map(|(idx, _)| idx)
105            .collect();
106
107        // Collect only filtered rows to minimize allocations
108        let filtered: Vec<Vec<String>> = indices.into_iter().map(|idx| data[idx].clone()).collect();
109
110        Ok(filtered)
111    }
112}
113
114impl DataOperations {
115    /// Filter rows by a condition on a column (legacy method for compatibility)
116    pub fn filter_rows(
117        &self,
118        data: &[Vec<String>],
119        column: usize,
120        operator: &str,
121        value: &str,
122    ) -> Result<Vec<Vec<String>>> {
123        let condition = self.parse_filter_condition(operator, value)?;
124        <Self as FilterOperator>::filter(self, data, column, condition)
125    }
126
127    fn parse_filter_condition(&self, operator: &str, value: &str) -> Result<FilterCondition> {
128        Ok(match operator {
129            "=" | "==" => FilterCondition::Equals(value.to_string()),
130            "!=" | "<>" => FilterCondition::NotEquals(value.to_string()),
131            ">" => FilterCondition::GreaterThan(value.to_string()),
132            ">=" => FilterCondition::GreaterThanOrEqual(value.to_string()),
133            "<" => FilterCondition::LessThan(value.to_string()),
134            "<=" => FilterCondition::LessThanOrEqual(value.to_string()),
135            "contains" => FilterCondition::Contains(value.to_string()),
136            "starts_with" => FilterCondition::StartsWith(value.to_string()),
137            "ends_with" => FilterCondition::EndsWith(value.to_string()),
138            _ => anyhow::bail!("Unknown operator: {}", operator),
139        })
140    }
141
142    fn evaluate_condition(&self, cell_value: &str, condition: &FilterCondition) -> Result<bool> {
143        Ok(match condition {
144            FilterCondition::Equals(v) => cell_value == v,
145            FilterCondition::NotEquals(v) => cell_value != v,
146            FilterCondition::GreaterThan(v) => {
147                match (cell_value.parse::<f64>(), v.parse::<f64>()) {
148                    (Ok(a), Ok(b)) => a > b,
149                    _ => cell_value > v.as_str(),
150                }
151            }
152            FilterCondition::GreaterThanOrEqual(v) => {
153                match (cell_value.parse::<f64>(), v.parse::<f64>()) {
154                    (Ok(a), Ok(b)) => a >= b,
155                    _ => cell_value >= v.as_str(),
156                }
157            }
158            FilterCondition::LessThan(v) => match (cell_value.parse::<f64>(), v.parse::<f64>()) {
159                (Ok(a), Ok(b)) => a < b,
160                _ => cell_value < v.as_str(),
161            },
162            FilterCondition::LessThanOrEqual(v) => {
163                match (cell_value.parse::<f64>(), v.parse::<f64>()) {
164                    (Ok(a), Ok(b)) => a <= b,
165                    _ => cell_value <= v.as_str(),
166                }
167            }
168            FilterCondition::Contains(v) => cell_value.contains(v),
169            FilterCondition::StartsWith(v) => cell_value.starts_with(v),
170            FilterCondition::EndsWith(v) => cell_value.ends_with(v),
171            FilterCondition::Regex(pattern) => {
172                use regex::Regex;
173                let re = Regex::new(pattern)?;
174                re.is_match(cell_value)
175            }
176        })
177    }
178
179    /// Evaluate a filter condition (legacy method for compatibility)
180    pub fn evaluate_filter_condition(
181        &self,
182        cell_value: &str,
183        operator: &str,
184        value: &str,
185    ) -> Result<bool> {
186        let condition = self.parse_filter_condition(operator, value)?;
187        self.evaluate_condition(cell_value, &condition)
188    }
189
190    /// Replace values in a column
191    pub fn replace(
192        &self,
193        data: &mut Vec<Vec<String>>,
194        column: usize,
195        find: &str,
196        replace_with: &str,
197    ) -> usize {
198        let mut count = 0;
199        for row in data.iter_mut() {
200            if let Some(cell) = row.get_mut(column) {
201                if cell.contains(find) {
202                    *cell = cell.replace(find, replace_with);
203                    count += 1;
204                }
205            }
206        }
207        count
208    }
209
210    /// Find and replace across all columns
211    pub fn find_replace(
212        &self,
213        data: &mut Vec<Vec<String>>,
214        find: &str,
215        replace_with: &str,
216        _column: Option<usize>,
217    ) -> Result<usize> {
218        let mut count = 0;
219        for row in data.iter_mut() {
220            for cell in row.iter_mut() {
221                if cell.contains(find) {
222                    *cell = cell.replace(find, replace_with);
223                    count += 1;
224                }
225            }
226        }
227        Ok(count)
228    }
229
230    /// Remove duplicate rows (returns new vec)
231    pub fn deduplicate(&self, data: &[Vec<String>]) -> Vec<Vec<String>> {
232        use std::collections::HashSet;
233        let mut seen: HashSet<Vec<String>> = HashSet::with_capacity(data.len());
234        data.iter()
235            .filter(|row| seen.insert((*row).clone()))
236            .cloned()
237            .collect()
238    }
239
240    /// Remove duplicate rows in place
241    pub fn deduplicate_mut(&self, data: &mut Vec<Vec<String>>) -> usize {
242        use std::collections::HashSet;
243        let original_len = data.len();
244        let mut seen: HashSet<Vec<String>> = HashSet::with_capacity(data.len());
245        data.retain(|row| seen.insert(row.clone()));
246        original_len - data.len()
247    }
248
249    /// Transpose data (rows to columns)
250    pub fn transpose(&self, data: &[Vec<String>]) -> Vec<Vec<String>> {
251        if data.is_empty() {
252            return Vec::new();
253        }
254
255        let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
256        let mut result: Vec<Vec<String>> = Vec::with_capacity(max_cols);
257
258        for col_idx in 0..max_cols {
259            let mut new_row = Vec::with_capacity(data.len());
260            for row in data.iter() {
261                if col_idx < row.len() {
262                    new_row.push(row[col_idx].clone());
263                } else {
264                    new_row.push(String::new());
265                }
266            }
267            result.push(new_row);
268        }
269
270        result
271    }
272
273    /// Format data as markdown table
274    pub fn to_markdown(&self, data: &[Vec<String>]) -> String {
275        if data.is_empty() {
276            return String::new();
277        }
278
279        let mut output = String::with_capacity(data.len() * data[0].len() * 20);
280
281        // Header row
282        if let Some(header) = data.first() {
283            output.push_str("| ");
284            output.push_str(&header.join(" | "));
285            output.push_str(" |\n");
286
287            // Separator
288            output.push_str("| ");
289            let sep: String = header.iter().map(|_| "---").collect::<Vec<_>>().join(" | ");
290            output.push_str(&sep);
291            output.push_str(" |\n");
292        }
293
294        // Data rows
295        for row in data.iter().skip(1) {
296            output.push_str("| ");
297            output.push_str(&row.join(" | "));
298            output.push_str(" |\n");
299        }
300
301        output
302    }
303
304    /// Insert a row at a specific index
305    pub fn insert_row(&self, data: &mut Vec<Vec<String>>, index: usize, row: Vec<String>) {
306        if index <= data.len() {
307            data.insert(index, row);
308        }
309    }
310
311    /// Delete a row at a specific index
312    pub fn delete_row(&self, data: &mut Vec<Vec<String>>, index: usize) -> Option<Vec<String>> {
313        if index < data.len() {
314            Some(data.remove(index))
315        } else {
316            None
317        }
318    }
319
320    /// Insert a column at a specific index
321    pub fn insert_column(&self, data: &mut Vec<Vec<String>>, index: usize, values: Vec<String>) {
322        for (row_idx, row) in data.iter_mut().enumerate() {
323            let value = values.get(row_idx).cloned().unwrap_or_default();
324            if index <= row.len() {
325                row.insert(index, value);
326            } else {
327                row.push(value);
328            }
329        }
330    }
331
332    /// Delete a column at a specific index
333    pub fn delete_column(&self, data: &mut Vec<Vec<String>>, index: usize) {
334        for row in data.iter_mut() {
335            if index < row.len() {
336                row.remove(index);
337            }
338        }
339    }
340}
341
342impl TransformOperator for DataOperations {
343    fn transform(&self, data: &mut Vec<Vec<String>>, operation: TransformOperation) -> Result<()> {
344        match operation {
345            TransformOperation::RenameColumn { from, to } => {
346                if let Some(row) = data.first_mut() {
347                    if from < row.len() {
348                        row[from] = to;
349                    }
350                }
351            }
352            TransformOperation::DropColumn(col_idx) => {
353                for row in data.iter_mut() {
354                    if col_idx < row.len() {
355                        row.remove(col_idx);
356                    }
357                }
358            }
359            TransformOperation::AddColumn { name, formula } => {
360                if let Some(formula_str) = formula {
361                    // Use formula evaluator to compute the column value
362                    use crate::formula::FormulaEvaluator;
363                    let evaluator = FormulaEvaluator::new();
364
365                    // Check if formula contains cell references that might be row-relative
366                    // Cell references like A1, B2, C10, etc. suggest per-row evaluation
367                    let has_cell_refs = formula_str.chars().any(|c: char| c.is_ascii_uppercase())
368                        && formula_str.contains(|c: char| c.is_ascii_digit());
369
370                    if has_cell_refs {
371                        // Per-row formula evaluation: evaluate formula for each row
372                        // with row-specific cell references (A1 for row 0, A2 for row 1, etc.)
373                        // Clone data first to avoid borrow issues
374                        let data_clone = data.clone();
375                        for (row_idx, row) in data.iter_mut().enumerate() {
376                            // Replace row number in cell references with current row index
377                            // e.g., A1 -> A{row_idx+1}, B2 -> B{row_idx+1}
378                            let row_formula = adjust_cell_references_for_row(&formula_str, row_idx);
379
380                            match evaluator.evaluate_formula_full(&row_formula, &data_clone) {
381                                Ok(result) => {
382                                    let value = match result {
383                                        crate::formula::FormulaResult::Number(n) => n.to_string(),
384                                        crate::formula::FormulaResult::Text(s) => s,
385                                    };
386                                    row.push(value);
387                                }
388                                Err(_) => {
389                                    row.push(format!("#ERROR: {}", name));
390                                }
391                            }
392                        }
393                    } else {
394                        // Aggregate formula: evaluate once for all rows (SUM, AVERAGE, etc.)
395                        match evaluator.evaluate_formula_full(&formula_str, data) {
396                            Ok(result) => {
397                                let value = match result {
398                                    crate::formula::FormulaResult::Number(n) => n.to_string(),
399                                    crate::formula::FormulaResult::Text(s) => s,
400                                };
401                                for row in data.iter_mut() {
402                                    row.push(value.clone());
403                                }
404                            }
405                            Err(_) => {
406                                for row in data.iter_mut() {
407                                    row.push(format!("#ERROR: {}", name));
408                                }
409                            }
410                        }
411                    }
412                } else {
413                    // No formula, just add the column name
414                    for row in data.iter_mut() {
415                        row.push(name.clone());
416                    }
417                }
418            }
419            TransformOperation::FillNa { column, value } => {
420                if value.is_empty() {
421                    return Ok(());
422                }
423                for row in data.iter_mut() {
424                    if column < row.len() && row[column].is_empty() {
425                        row[column] = value.clone();
426                    }
427                }
428            }
429        }
430        Ok(())
431    }
432}
433
434impl DataOperator for DataOperations {}
435
436/// Adjust cell references in a formula to be row-specific
437///
438/// This function takes a formula like "A1*2" and adjusts it to use the
439/// current row index, e.g., for row 0 it becomes "A1*2", for row 1 it becomes "A2*2", etc.
440///
441/// # Arguments
442/// * `formula` - The formula string potentially containing cell references
443/// * `row_idx` - The zero-based row index
444///
445/// # Returns
446/// A formula string with adjusted cell references
447fn adjust_cell_references_for_row(formula: &str, row_idx: usize) -> String {
448    use crate::regex_cache::cell_reference_regex;
449
450    let row_num = row_idx + 1;
451    let re = cell_reference_regex();
452
453    let result = re.replace_all(formula, |caps: &regex::Captures| {
454        let column = &caps[1]; // e.g., "A", "B", "AA"
455        let _old_row = &caps[2]; // e.g., "1", "2", "100"
456        format!("{}{}", column, row_num)
457    });
458
459    result.to_string()
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    #[test]
467    fn test_adjust_cell_references_for_row() {
468        // Single column references
469        assert_eq!(adjust_cell_references_for_row("A1", 0), "A1");
470        assert_eq!(adjust_cell_references_for_row("A1", 1), "A2");
471        assert_eq!(adjust_cell_references_for_row("A1", 9), "A10");
472
473        // Multiple column references
474        assert_eq!(adjust_cell_references_for_row("A1+B2", 0), "A1+B1");
475        assert_eq!(adjust_cell_references_for_row("A1+B2", 1), "A2+B2");
476        assert_eq!(adjust_cell_references_for_row("A1+B2", 2), "A3+B3");
477
478        // Complex formulas
479        assert_eq!(
480            adjust_cell_references_for_row("SUM(A1:B10)", 0),
481            "SUM(A1:B1)"
482        );
483        assert_eq!(adjust_cell_references_for_row("A1*2+B1", 5), "A6*2+B6");
484
485        // Double-letter columns
486        assert_eq!(adjust_cell_references_for_row("AA1", 2), "AA3");
487        assert_eq!(adjust_cell_references_for_row("AB12+CD3", 1), "AB2+CD2");
488
489        // Mixed case
490        assert_eq!(adjust_cell_references_for_row("a1+b2", 0), "a1+b1");
491    }
492}