1use super::types::SortOrder;
4use crate::traits::{
5 DataOperator, FilterCondition, FilterOperator, SortOperator, TransformOperation,
6 TransformOperator,
7};
8use anyhow::Result;
9use rayon::prelude::*;
10
11pub struct DataOperations;
13
14impl DataOperations {
15 pub fn new() -> Self {
16 Self
17 }
18}
19
20impl SortOperator for DataOperations {
22 fn sort(&self, data: &mut Vec<Vec<String>>, column: usize, ascending: bool) -> Result<()> {
23 let order = if ascending {
24 SortOrder::Ascending
25 } else {
26 SortOrder::Descending
27 };
28 self.sort_by_column(data, column, order)
29 }
30}
31
32impl DataOperations {
33 pub fn sort_by_column(
35 &self,
36 data: &mut Vec<Vec<String>>,
37 column: usize,
38 order: SortOrder,
39 ) -> Result<()> {
40 if data.is_empty() {
41 return Ok(());
42 }
43
44 let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
45 if column >= max_cols {
46 anyhow::bail!(
47 "Column index {} out of range (max: {})",
48 column,
49 max_cols - 1
50 );
51 }
52
53 data.par_sort_by(|a, b| {
55 let val_a = a.get(column).map(|s| s.as_str()).unwrap_or("");
56 let val_b = b.get(column).map(|s| s.as_str()).unwrap_or("");
57
58 let cmp = match (val_a.parse::<f64>(), val_b.parse::<f64>()) {
59 (Ok(num_a), Ok(num_b)) => num_a
60 .partial_cmp(&num_b)
61 .unwrap_or(std::cmp::Ordering::Equal),
62 _ => val_a.cmp(val_b),
63 };
64
65 match order {
66 SortOrder::Ascending => cmp,
67 SortOrder::Descending => cmp.reverse(),
68 }
69 });
70
71 Ok(())
72 }
73}
74
75impl FilterOperator for DataOperations {
76 fn filter(
77 &self,
78 data: &[Vec<String>],
79 column: usize,
80 condition: FilterCondition,
81 ) -> Result<Vec<Vec<String>>> {
82 if data.is_empty() {
83 return Ok(Vec::new());
84 }
85
86 let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
87 if column >= max_cols {
88 anyhow::bail!(
89 "Column index {} out of range (max: {})",
90 column,
91 max_cols.saturating_sub(1)
92 );
93 }
94
95 let indices: Vec<usize> = data
97 .par_iter()
98 .enumerate()
99 .filter(|(_idx, row)| {
100 let cell_value = row.get(column).map(|s| s.as_str()).unwrap_or("");
101 self.evaluate_condition(cell_value, &condition)
102 .unwrap_or(false)
103 })
104 .map(|(idx, _)| idx)
105 .collect();
106
107 let filtered: Vec<Vec<String>> = indices.into_iter().map(|idx| data[idx].clone()).collect();
109
110 Ok(filtered)
111 }
112}
113
114impl DataOperations {
115 pub fn filter_rows(
117 &self,
118 data: &[Vec<String>],
119 column: usize,
120 operator: &str,
121 value: &str,
122 ) -> Result<Vec<Vec<String>>> {
123 let condition = self.parse_filter_condition(operator, value)?;
124 <Self as FilterOperator>::filter(self, data, column, condition)
125 }
126
127 fn parse_filter_condition(&self, operator: &str, value: &str) -> Result<FilterCondition> {
128 Ok(match operator {
129 "=" | "==" => FilterCondition::Equals(value.to_string()),
130 "!=" | "<>" => FilterCondition::NotEquals(value.to_string()),
131 ">" => FilterCondition::GreaterThan(value.to_string()),
132 ">=" => FilterCondition::GreaterThanOrEqual(value.to_string()),
133 "<" => FilterCondition::LessThan(value.to_string()),
134 "<=" => FilterCondition::LessThanOrEqual(value.to_string()),
135 "contains" => FilterCondition::Contains(value.to_string()),
136 "starts_with" => FilterCondition::StartsWith(value.to_string()),
137 "ends_with" => FilterCondition::EndsWith(value.to_string()),
138 _ => anyhow::bail!("Unknown operator: {}", operator),
139 })
140 }
141
142 fn evaluate_condition(&self, cell_value: &str, condition: &FilterCondition) -> Result<bool> {
143 Ok(match condition {
144 FilterCondition::Equals(v) => cell_value == v,
145 FilterCondition::NotEquals(v) => cell_value != v,
146 FilterCondition::GreaterThan(v) => {
147 match (cell_value.parse::<f64>(), v.parse::<f64>()) {
148 (Ok(a), Ok(b)) => a > b,
149 _ => cell_value > v.as_str(),
150 }
151 }
152 FilterCondition::GreaterThanOrEqual(v) => {
153 match (cell_value.parse::<f64>(), v.parse::<f64>()) {
154 (Ok(a), Ok(b)) => a >= b,
155 _ => cell_value >= v.as_str(),
156 }
157 }
158 FilterCondition::LessThan(v) => match (cell_value.parse::<f64>(), v.parse::<f64>()) {
159 (Ok(a), Ok(b)) => a < b,
160 _ => cell_value < v.as_str(),
161 },
162 FilterCondition::LessThanOrEqual(v) => {
163 match (cell_value.parse::<f64>(), v.parse::<f64>()) {
164 (Ok(a), Ok(b)) => a <= b,
165 _ => cell_value <= v.as_str(),
166 }
167 }
168 FilterCondition::Contains(v) => cell_value.contains(v),
169 FilterCondition::StartsWith(v) => cell_value.starts_with(v),
170 FilterCondition::EndsWith(v) => cell_value.ends_with(v),
171 FilterCondition::Regex(pattern) => {
172 use regex::Regex;
173 let re = Regex::new(pattern)?;
174 re.is_match(cell_value)
175 }
176 })
177 }
178
179 pub fn evaluate_filter_condition(
181 &self,
182 cell_value: &str,
183 operator: &str,
184 value: &str,
185 ) -> Result<bool> {
186 let condition = self.parse_filter_condition(operator, value)?;
187 self.evaluate_condition(cell_value, &condition)
188 }
189
190 pub fn replace(
192 &self,
193 data: &mut Vec<Vec<String>>,
194 column: usize,
195 find: &str,
196 replace_with: &str,
197 ) -> usize {
198 let mut count = 0;
199 for row in data.iter_mut() {
200 if let Some(cell) = row.get_mut(column) {
201 if cell.contains(find) {
202 *cell = cell.replace(find, replace_with);
203 count += 1;
204 }
205 }
206 }
207 count
208 }
209
210 pub fn find_replace(
212 &self,
213 data: &mut Vec<Vec<String>>,
214 find: &str,
215 replace_with: &str,
216 _column: Option<usize>,
217 ) -> Result<usize> {
218 let mut count = 0;
219 for row in data.iter_mut() {
220 for cell in row.iter_mut() {
221 if cell.contains(find) {
222 *cell = cell.replace(find, replace_with);
223 count += 1;
224 }
225 }
226 }
227 Ok(count)
228 }
229
230 pub fn deduplicate(&self, data: &[Vec<String>]) -> Vec<Vec<String>> {
232 use std::collections::HashSet;
233 let mut seen: HashSet<Vec<String>> = HashSet::with_capacity(data.len());
234 data.iter()
235 .filter(|row| seen.insert((*row).clone()))
236 .cloned()
237 .collect()
238 }
239
240 pub fn deduplicate_mut(&self, data: &mut Vec<Vec<String>>) -> usize {
242 use std::collections::HashSet;
243 let original_len = data.len();
244 let mut seen: HashSet<Vec<String>> = HashSet::with_capacity(data.len());
245 data.retain(|row| seen.insert(row.clone()));
246 original_len - data.len()
247 }
248
249 pub fn transpose(&self, data: &[Vec<String>]) -> Vec<Vec<String>> {
251 if data.is_empty() {
252 return Vec::new();
253 }
254
255 let max_cols = data.iter().map(|r| r.len()).max().unwrap_or(0);
256 let mut result: Vec<Vec<String>> = Vec::with_capacity(max_cols);
257
258 for col_idx in 0..max_cols {
259 let mut new_row = Vec::with_capacity(data.len());
260 for row in data.iter() {
261 if col_idx < row.len() {
262 new_row.push(row[col_idx].clone());
263 } else {
264 new_row.push(String::new());
265 }
266 }
267 result.push(new_row);
268 }
269
270 result
271 }
272
273 pub fn to_markdown(&self, data: &[Vec<String>]) -> String {
275 if data.is_empty() {
276 return String::new();
277 }
278
279 let mut output = String::with_capacity(data.len() * data[0].len() * 20);
280
281 if let Some(header) = data.first() {
283 output.push_str("| ");
284 output.push_str(&header.join(" | "));
285 output.push_str(" |\n");
286
287 output.push_str("| ");
289 let sep: String = header.iter().map(|_| "---").collect::<Vec<_>>().join(" | ");
290 output.push_str(&sep);
291 output.push_str(" |\n");
292 }
293
294 for row in data.iter().skip(1) {
296 output.push_str("| ");
297 output.push_str(&row.join(" | "));
298 output.push_str(" |\n");
299 }
300
301 output
302 }
303
304 pub fn insert_row(&self, data: &mut Vec<Vec<String>>, index: usize, row: Vec<String>) {
306 if index <= data.len() {
307 data.insert(index, row);
308 }
309 }
310
311 pub fn delete_row(&self, data: &mut Vec<Vec<String>>, index: usize) -> Option<Vec<String>> {
313 if index < data.len() {
314 Some(data.remove(index))
315 } else {
316 None
317 }
318 }
319
320 pub fn insert_column(&self, data: &mut Vec<Vec<String>>, index: usize, values: Vec<String>) {
322 for (row_idx, row) in data.iter_mut().enumerate() {
323 let value = values.get(row_idx).cloned().unwrap_or_default();
324 if index <= row.len() {
325 row.insert(index, value);
326 } else {
327 row.push(value);
328 }
329 }
330 }
331
332 pub fn delete_column(&self, data: &mut Vec<Vec<String>>, index: usize) {
334 for row in data.iter_mut() {
335 if index < row.len() {
336 row.remove(index);
337 }
338 }
339 }
340}
341
342impl TransformOperator for DataOperations {
343 fn transform(&self, data: &mut Vec<Vec<String>>, operation: TransformOperation) -> Result<()> {
344 match operation {
345 TransformOperation::RenameColumn { from, to } => {
346 if let Some(row) = data.first_mut() {
347 if from < row.len() {
348 row[from] = to;
349 }
350 }
351 }
352 TransformOperation::DropColumn(col_idx) => {
353 for row in data.iter_mut() {
354 if col_idx < row.len() {
355 row.remove(col_idx);
356 }
357 }
358 }
359 TransformOperation::AddColumn { name, formula } => {
360 if let Some(formula_str) = formula {
361 use crate::formula::FormulaEvaluator;
363 let evaluator = FormulaEvaluator::new();
364
365 let has_cell_refs = formula_str.chars().any(|c: char| c.is_ascii_uppercase())
368 && formula_str.contains(|c: char| c.is_ascii_digit());
369
370 if has_cell_refs {
371 let data_clone = data.clone();
375 for (row_idx, row) in data.iter_mut().enumerate() {
376 let row_formula = adjust_cell_references_for_row(&formula_str, row_idx);
379
380 match evaluator.evaluate_formula_full(&row_formula, &data_clone) {
381 Ok(result) => {
382 let value = match result {
383 crate::formula::FormulaResult::Number(n) => n.to_string(),
384 crate::formula::FormulaResult::Text(s) => s,
385 };
386 row.push(value);
387 }
388 Err(_) => {
389 row.push(format!("#ERROR: {}", name));
390 }
391 }
392 }
393 } else {
394 match evaluator.evaluate_formula_full(&formula_str, data) {
396 Ok(result) => {
397 let value = match result {
398 crate::formula::FormulaResult::Number(n) => n.to_string(),
399 crate::formula::FormulaResult::Text(s) => s,
400 };
401 for row in data.iter_mut() {
402 row.push(value.clone());
403 }
404 }
405 Err(_) => {
406 for row in data.iter_mut() {
407 row.push(format!("#ERROR: {}", name));
408 }
409 }
410 }
411 }
412 } else {
413 for row in data.iter_mut() {
415 row.push(name.clone());
416 }
417 }
418 }
419 TransformOperation::FillNa { column, value } => {
420 if value.is_empty() {
421 return Ok(());
422 }
423 for row in data.iter_mut() {
424 if column < row.len() && row[column].is_empty() {
425 row[column] = value.clone();
426 }
427 }
428 }
429 }
430 Ok(())
431 }
432}
433
434impl DataOperator for DataOperations {}
435
436fn adjust_cell_references_for_row(formula: &str, row_idx: usize) -> String {
448 use crate::regex_cache::cell_reference_regex;
449
450 let row_num = row_idx + 1;
451 let re = cell_reference_regex();
452
453 let result = re.replace_all(formula, |caps: ®ex::Captures| {
454 let column = &caps[1]; let _old_row = &caps[2]; format!("{}{}", column, row_num)
457 });
458
459 result.to_string()
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465
466 #[test]
467 fn test_adjust_cell_references_for_row() {
468 assert_eq!(adjust_cell_references_for_row("A1", 0), "A1");
470 assert_eq!(adjust_cell_references_for_row("A1", 1), "A2");
471 assert_eq!(adjust_cell_references_for_row("A1", 9), "A10");
472
473 assert_eq!(adjust_cell_references_for_row("A1+B2", 0), "A1+B1");
475 assert_eq!(adjust_cell_references_for_row("A1+B2", 1), "A2+B2");
476 assert_eq!(adjust_cell_references_for_row("A1+B2", 2), "A3+B3");
477
478 assert_eq!(
480 adjust_cell_references_for_row("SUM(A1:B10)", 0),
481 "SUM(A1:B1)"
482 );
483 assert_eq!(adjust_cell_references_for_row("A1*2+B1", 5), "A6*2+B6");
484
485 assert_eq!(adjust_cell_references_for_row("AA1", 2), "AA3");
487 assert_eq!(adjust_cell_references_for_row("AB12+CD3", 1), "AB2+CD2");
488
489 assert_eq!(adjust_cell_references_for_row("a1+b2", 0), "a1+b1");
491 }
492}