1use crate::traits::{
2 CellRangeProvider, DataReader, DataWriteOptions, DataWriter, FileHandler, SchemaProvider,
3};
4use anyhow::{Context, Result};
5use csv::{ReaderBuilder, WriterBuilder};
6use std::fs::File;
7use std::io::{BufReader, BufWriter, Read};
8
9#[derive(Debug, Clone)]
11pub struct CellRange {
12 pub start_row: usize,
13 pub start_col: usize,
14 pub end_row: usize,
15 pub end_col: usize,
16}
17
18impl CellRange {
19 pub fn parse(range_str: &str) -> Result<Self> {
21 let range_str = range_str.trim().to_uppercase();
22
23 if let Some(colon_pos) = range_str.find(':') {
24 let start = &range_str[..colon_pos];
25 let end = &range_str[colon_pos + 1..];
26 let (start_row, start_col) = Self::parse_cell(start)?;
27 let (end_row, end_col) = Self::parse_cell(end)?;
28 Ok(Self {
29 start_row,
30 start_col,
31 end_row,
32 end_col,
33 })
34 } else {
35 let (row, col) = Self::parse_cell(&range_str)?;
36 Ok(Self {
37 start_row: row,
38 start_col: col,
39 end_row: row,
40 end_col: col,
41 })
42 }
43 }
44
45 fn parse_cell(cell: &str) -> Result<(usize, usize)> {
46 let mut col_str = String::new();
47 let mut row_str = String::new();
48
49 for ch in cell.chars() {
50 if ch.is_alphabetic() {
51 col_str.push(ch);
52 } else if ch.is_ascii_digit() {
53 row_str.push(ch);
54 }
55 }
56
57 let col = Self::column_to_index(&col_str)?;
58 let row = row_str
59 .parse::<usize>()
60 .with_context(|| format!("Invalid row in cell: {cell}"))?;
61
62 Ok((row.saturating_sub(1), col)) }
64
65 fn column_to_index(col: &str) -> Result<usize> {
66 if col.is_empty() {
67 anyhow::bail!("Empty column reference");
68 }
69 let mut index = 0usize;
70 for ch in col.chars() {
71 index = index * 26 + (ch.to_ascii_uppercase() as usize - b'A' as usize + 1);
72 }
73 Ok(index - 1)
74 }
75}
76
77pub struct CsvHandler;
78
79impl CsvHandler {
80 pub fn new() -> Self {
81 Self
82 }
83
84 pub fn read(&self, path: &str) -> Result<String> {
85 let mut file =
86 File::open(path).with_context(|| format!("Failed to open CSV file: {path}"))?;
87
88 let mut contents = String::new();
89 file.read_to_string(&mut contents)?;
90
91 Ok(contents)
92 }
93
94 pub fn write_from_csv(&self, input_path: &str, output_path: &str) -> Result<()> {
95 let mut reader = ReaderBuilder::new()
96 .has_headers(false)
97 .from_path(input_path)
98 .with_context(|| format!("Failed to open CSV file: {input_path}"))?;
99
100 let mut writer = WriterBuilder::new()
101 .has_headers(false)
102 .from_path(output_path)
103 .with_context(|| format!("Failed to create CSV file: {}", output_path))?;
104
105 for result in reader.records() {
106 let record = result?;
107 let row: Vec<String> = record.iter().map(|s| s.to_string()).collect();
108 writer.write_record(sanitize_csv_row(&row))?;
109 }
110
111 writer.flush()?;
112 Ok(())
113 }
114
115 pub fn write_records(&self, path: &str, records: Vec<Vec<String>>) -> Result<()> {
116 let mut writer = WriterBuilder::new()
117 .has_headers(false)
118 .from_path(path)
119 .with_context(|| format!("Failed to create CSV file: {path}"))?;
120
121 for record in records {
122 writer.write_record(&record)?;
123 }
124
125 writer.flush()?;
126 Ok(())
127 }
128
129 pub fn read_range(&self, path: &str, range: &CellRange) -> Result<Vec<Vec<String>>> {
131 let mut reader = ReaderBuilder::new()
132 .has_headers(false)
133 .flexible(true)
134 .from_path(path)
135 .with_context(|| format!("Failed to open CSV file: {path}"))?;
136
137 let estimated_rows = range.end_row.saturating_sub(range.start_row) + 1;
138 let _estimated_cols = range.end_col.saturating_sub(range.start_col) + 1;
139 let mut result = Vec::with_capacity(estimated_rows.min(1024));
140
141 for (row_idx, record) in reader.records().enumerate() {
142 if row_idx < range.start_row {
143 continue;
144 }
145 if row_idx > range.end_row {
146 break;
147 }
148
149 let record = record?;
150 let num_cols = (range.end_col.saturating_sub(range.start_col) + 1)
152 .min(record.len().saturating_sub(range.start_col));
153 let mut row = Vec::with_capacity(num_cols);
154 for col_idx in range.start_col..=range.end_col {
155 if let Some(val) = record.get(col_idx) {
156 row.push(String::from(val));
157 }
158 }
159 result.push(row);
160 }
161
162 Ok(result)
163 }
164
165 pub fn read_as_json(&self, path: &str) -> Result<String> {
167 let mut reader = ReaderBuilder::new()
168 .has_headers(false)
169 .flexible(true)
170 .from_path(path)
171 .with_context(|| format!("Failed to open CSV file: {path}"))?;
172
173 let mut rows: Vec<Vec<String>> = Vec::with_capacity(1024);
174 for record in reader.records() {
175 let record = record?;
176 let mut row = Vec::with_capacity(record.len());
178 for val in record.iter() {
179 row.push(String::from(val));
180 }
181 rows.push(row);
182 }
183
184 serde_json::to_string_pretty(&rows).with_context(|| "Failed to serialize to JSON")
185 }
186
187 pub fn append_records(&self, path: &str, records: &[Vec<String>]) -> Result<()> {
189 use std::fs::OpenOptions;
190
191 let file = OpenOptions::new()
192 .create(true)
193 .append(true)
194 .open(path)
195 .with_context(|| format!("Failed to open CSV file for append: {path}"))?;
196
197 let mut writer = csv::WriterBuilder::new()
198 .has_headers(false)
199 .from_writer(file);
200
201 for record in records {
202 writer.write_record(record)?;
203 }
204
205 writer.flush()?;
206 Ok(())
207 }
208
209 pub fn write_range(
211 &self,
212 path: &str,
213 data: &[Vec<String>],
214 start_row: usize,
215 start_col: usize,
216 ) -> Result<()> {
217 let mut existing: Vec<Vec<String>> = if std::path::Path::new(path).exists() {
219 let mut reader = ReaderBuilder::new()
220 .has_headers(false)
221 .flexible(true)
222 .from_path(path)?;
223 reader
224 .records()
225 .map(|r| {
226 let record =
227 r.with_context(|| format!("Failed to read CSV record from: {path}"))?;
228 Ok(record.iter().map(|s| s.to_string()).collect())
229 })
230 .collect::<Result<Vec<Vec<String>>>>()?
231 } else {
232 Vec::with_capacity(start_row + data.len())
233 };
234
235 let needed_rows = start_row + data.len();
237 if existing.len() < needed_rows {
238 existing.resize_with(needed_rows, Vec::new);
239 }
240
241 for (row_idx, row) in data.iter().enumerate() {
243 let target_row = start_row + row_idx;
244 let needed_cols = start_col + row.len();
245
246 if existing[target_row].len() < needed_cols {
247 existing[target_row].resize(needed_cols, String::new());
248 }
249
250 for (col_idx, value) in row.iter().enumerate() {
252 existing[target_row][start_col + col_idx] = value.clone();
253 }
254 }
255
256 self.write_records_safe(path, existing)
257 }
258}
259
260const CSV_INJECTION_CHARS: &[char] = &['=', '+', '-', '@', '\t', '\r', '\n'];
262
263pub fn sanitize_csv_value(value: &str) -> String {
267 if value.is_empty() {
268 return value.to_string();
269 }
270 let first = value.chars().next().unwrap();
271 if CSV_INJECTION_CHARS.contains(&first) {
272 format!("'{}", value)
273 } else {
274 value.to_string()
275 }
276}
277
278pub fn sanitize_csv_row(row: &[String]) -> Vec<String> {
280 row.iter().map(|v| sanitize_csv_value(v)).collect()
281}
282
283impl CsvHandler {
284 pub fn write_records_safe(&self, path: &str, records: Vec<Vec<String>>) -> Result<()> {
286 let sanitized: Vec<Vec<String>> = records.iter().map(|row| sanitize_csv_row(row)).collect();
287 self.write_records(path, sanitized)
288 }
289
290 pub fn append_records_safe(&self, path: &str, records: &[Vec<String>]) -> Result<()> {
292 let sanitized: Vec<Vec<String>> = records.iter().map(|row| sanitize_csv_row(row)).collect();
293 self.append_records(path, &sanitized)
294 }
295}
296
297pub struct StreamingCsvReader {
299 reader: csv::Reader<BufReader<File>>,
300 current_row: usize,
301}
302
303impl StreamingCsvReader {
304 pub fn open(path: &str) -> Result<Self> {
305 let file = File::open(path).with_context(|| format!("Failed to open CSV file: {path}"))?;
306 let buf_reader = BufReader::with_capacity(64 * 1024, file); let reader = ReaderBuilder::new()
309 .has_headers(false)
310 .flexible(true)
311 .from_reader(buf_reader);
312
313 Ok(Self {
314 reader,
315 current_row: 0,
316 })
317 }
318
319 pub fn current_row(&self) -> usize {
320 self.current_row
321 }
322}
323
324impl Iterator for StreamingCsvReader {
325 type Item = Result<Vec<String>>;
326
327 fn next(&mut self) -> Option<Self::Item> {
328 match self.reader.records().next() {
329 Some(Ok(record)) => {
330 self.current_row += 1;
331 let mut row = Vec::with_capacity(record.len());
333 for val in record.iter() {
334 row.push(String::from(val));
335 }
336 Some(Ok(row))
337 }
338 Some(Err(e)) => Some(Err(anyhow::anyhow!("CSV read error: {}", e))),
339 None => None,
340 }
341 }
342}
343
344pub struct StreamingCsvWriter {
346 writer: csv::Writer<BufWriter<File>>,
347 rows_written: usize,
348}
349
350impl StreamingCsvWriter {
351 pub fn create(path: &str) -> Result<Self> {
352 let file =
353 File::create(path).with_context(|| format!("Failed to create CSV file: {path}"))?;
354 let buf_writer = BufWriter::with_capacity(64 * 1024, file);
355
356 let writer = WriterBuilder::new()
357 .has_headers(false)
358 .from_writer(buf_writer);
359
360 Ok(Self {
361 writer,
362 rows_written: 0,
363 })
364 }
365
366 pub fn write_row(&mut self, row: &[String]) -> Result<()> {
367 let safe = sanitize_csv_row(row);
368 self.writer.write_record(&safe)?;
369 self.rows_written += 1;
370 Ok(())
371 }
372
373 pub fn rows_written(&self) -> usize {
374 self.rows_written
375 }
376
377 pub fn flush(&mut self) -> Result<()> {
378 self.writer.flush()?;
379 Ok(())
380 }
381}
382
383impl Drop for StreamingCsvWriter {
384 fn drop(&mut self) {
385 let _ = self.writer.flush();
386 }
387}
388
389impl DataReader for CsvHandler {
392 fn read(&self, path: &str) -> Result<Vec<Vec<String>>> {
393 let mut reader = ReaderBuilder::new()
394 .has_headers(false)
395 .flexible(true)
396 .from_path(path)
397 .with_context(|| format!("Failed to open CSV file: {path}"))?;
398
399 let mut rows = Vec::with_capacity(1024);
401 for record in reader.records() {
402 let record = record?;
403 let mut row = Vec::with_capacity(record.len());
404 for val in record.iter() {
405 row.push(String::from(val));
406 }
407 rows.push(row);
408 }
409 Ok(rows)
410 }
411
412 fn read_with_headers(&self, path: &str) -> Result<Vec<Vec<String>>> {
413 <Self as DataReader>::read(self, path)
415 }
416
417 fn read_range(&self, path: &str, range: &CellRange) -> Result<Vec<Vec<String>>> {
418 CsvHandler::read_range(self, path, range)
419 }
420
421 fn read_as_json(&self, path: &str) -> Result<String> {
422 CsvHandler::read_as_json(self, path)
423 }
424
425 fn supports_format(&self, path: &str) -> bool {
426 path.to_lowercase().ends_with(".csv")
427 }
428}
429
430impl DataWriter for CsvHandler {
431 fn write(&self, path: &str, data: &[Vec<String>], _options: DataWriteOptions) -> Result<()> {
432 self.write_records_safe(path, data.to_vec())
433 }
434
435 fn write_range(
436 &self,
437 path: &str,
438 data: &[Vec<String>],
439 start_row: usize,
440 start_col: usize,
441 ) -> Result<()> {
442 self.write_range(path, data, start_row, start_col)
443 }
444
445 fn append(&self, path: &str, data: &[Vec<String>]) -> Result<()> {
446 self.append_records_safe(path, data)
447 }
448
449 fn supports_format(&self, path: &str) -> bool {
450 path.to_lowercase().ends_with(".csv")
451 }
452}
453
454impl FileHandler for CsvHandler {
455 fn format_name(&self) -> &'static str {
456 "csv"
457 }
458
459 fn supported_extensions(&self) -> &'static [&'static str] {
460 &["csv"]
461 }
462}
463
464impl SchemaProvider for CsvHandler {
465 fn get_schema(&self, path: &str) -> Result<Vec<(String, String)>> {
466 let data = <Self as DataReader>::read(self, path)?;
467 if data.is_empty() {
468 return Ok(Vec::new());
469 }
470
471 let num_cols = data[0].len();
472 Ok((0..num_cols)
473 .map(|i| (format!("col_{}", i), "string".to_string()))
474 .collect())
475 }
476
477 fn get_column_names(&self, path: &str) -> Result<Vec<String>> {
478 let data = <Self as DataReader>::read(self, path)?;
479 if data.is_empty() {
480 return Ok(Vec::new());
481 }
482
483 Ok(data[0].clone())
484 }
485
486 fn get_row_count(&self, path: &str) -> Result<usize> {
487 let data = <Self as DataReader>::read(self, path)?;
488 Ok(data.len())
489 }
490
491 fn get_column_count(&self, path: &str) -> Result<usize> {
492 let data = <Self as DataReader>::read(self, path)?;
493 Ok(data.first().map(|r| r.len()).unwrap_or(0))
494 }
495}
496
497pub struct CellRangeHelper;
499
500impl CellRangeProvider for CellRangeHelper {
501 fn parse_range(&self, range_str: &str) -> Result<CellRange> {
502 CellRange::parse(range_str)
503 }
504
505 fn to_cell_reference(&self, row: usize, col: usize) -> String {
506 CellRange::index_to_column(col, row)
507 }
508
509 fn from_cell_reference(&self, cell: &str) -> Result<(usize, usize)> {
510 CellRange::parse_cell(cell)
511 }
512}
513
514impl CellRange {
515 pub fn index_to_column(col: usize, row: usize) -> String {
517 let mut index = col;
518 index += 1; let mut result = String::new();
520 while index > 0 {
521 index -= 1;
522 result.push((b'A' + (index % 26) as u8) as char);
523 index /= 26;
524 }
525 let col_str: String = result.chars().rev().collect();
526 format!("{}{}", col_str, row + 1)
527 }
528}