Skip to main content

xls_rs/
csv_handler.rs

1use crate::traits::{
2    CellRangeProvider, DataReader, DataWriteOptions, DataWriter, FileHandler, SchemaProvider,
3};
4use anyhow::{Context, Result};
5use csv::{ReaderBuilder, WriterBuilder};
6use std::fs::File;
7use std::io::{BufReader, BufWriter, Read};
8
9/// Represents a cell range like A1:B3
10#[derive(Debug, Clone)]
11pub struct CellRange {
12    pub start_row: usize,
13    pub start_col: usize,
14    pub end_row: usize,
15    pub end_col: usize,
16}
17
18impl CellRange {
19    /// Parse a range string like "A1:B3" or "A1"
20    pub fn parse(range_str: &str) -> Result<Self> {
21        let range_str = range_str.trim().to_uppercase();
22
23        if let Some(colon_pos) = range_str.find(':') {
24            let start = &range_str[..colon_pos];
25            let end = &range_str[colon_pos + 1..];
26            let (start_row, start_col) = Self::parse_cell(start)?;
27            let (end_row, end_col) = Self::parse_cell(end)?;
28            Ok(Self {
29                start_row,
30                start_col,
31                end_row,
32                end_col,
33            })
34        } else {
35            let (row, col) = Self::parse_cell(&range_str)?;
36            Ok(Self {
37                start_row: row,
38                start_col: col,
39                end_row: row,
40                end_col: col,
41            })
42        }
43    }
44
45    fn parse_cell(cell: &str) -> Result<(usize, usize)> {
46        let mut col_str = String::new();
47        let mut row_str = String::new();
48
49        for ch in cell.chars() {
50            if ch.is_alphabetic() {
51                col_str.push(ch);
52            } else if ch.is_ascii_digit() {
53                row_str.push(ch);
54            }
55        }
56
57        let col = Self::column_to_index(&col_str)?;
58        let row = row_str
59            .parse::<usize>()
60            .with_context(|| format!("Invalid row in cell: {cell}"))?;
61
62        Ok((row.saturating_sub(1), col)) // Convert to 0-indexed
63    }
64
65    fn column_to_index(col: &str) -> Result<usize> {
66        if col.is_empty() {
67            anyhow::bail!("Empty column reference");
68        }
69        let mut index = 0usize;
70        for ch in col.chars() {
71            index = index * 26 + (ch.to_ascii_uppercase() as usize - b'A' as usize + 1);
72        }
73        Ok(index - 1)
74    }
75}
76
77pub struct CsvHandler;
78
79impl CsvHandler {
80    pub fn new() -> Self {
81        Self
82    }
83
84    pub fn read(&self, path: &str) -> Result<String> {
85        let mut file =
86            File::open(path).with_context(|| format!("Failed to open CSV file: {path}"))?;
87
88        let mut contents = String::new();
89        file.read_to_string(&mut contents)?;
90
91        Ok(contents)
92    }
93
94    pub fn write_from_csv(&self, input_path: &str, output_path: &str) -> Result<()> {
95        let mut reader = ReaderBuilder::new()
96            .has_headers(false)
97            .from_path(input_path)
98            .with_context(|| format!("Failed to open CSV file: {input_path}"))?;
99
100        let mut writer = WriterBuilder::new()
101            .has_headers(false)
102            .from_path(output_path)
103            .with_context(|| format!("Failed to create CSV file: {}", output_path))?;
104
105        for result in reader.records() {
106            let record = result?;
107            let row: Vec<String> = record.iter().map(|s| s.to_string()).collect();
108            writer.write_record(sanitize_csv_row(&row))?;
109        }
110
111        writer.flush()?;
112        Ok(())
113    }
114
115    pub fn write_records(&self, path: &str, records: Vec<Vec<String>>) -> Result<()> {
116        let mut writer = WriterBuilder::new()
117            .has_headers(false)
118            .from_path(path)
119            .with_context(|| format!("Failed to create CSV file: {path}"))?;
120
121        for record in records {
122            writer.write_record(&record)?;
123        }
124
125        writer.flush()?;
126        Ok(())
127    }
128
129    /// Read a specific range from CSV file
130    pub fn read_range(&self, path: &str, range: &CellRange) -> Result<Vec<Vec<String>>> {
131        let mut reader = ReaderBuilder::new()
132            .has_headers(false)
133            .flexible(true)
134            .from_path(path)
135            .with_context(|| format!("Failed to open CSV file: {path}"))?;
136
137        let estimated_rows = range.end_row.saturating_sub(range.start_row) + 1;
138        let _estimated_cols = range.end_col.saturating_sub(range.start_col) + 1;
139        let mut result = Vec::with_capacity(estimated_rows.min(1024));
140
141        for (row_idx, record) in reader.records().enumerate() {
142            if row_idx < range.start_row {
143                continue;
144            }
145            if row_idx > range.end_row {
146                break;
147            }
148
149            let record = record?;
150            // Pre-allocate with exact capacity to avoid reallocations
151            let num_cols = (range.end_col.saturating_sub(range.start_col) + 1)
152                .min(record.len().saturating_sub(range.start_col));
153            let mut row = Vec::with_capacity(num_cols);
154            for col_idx in range.start_col..=range.end_col {
155                if let Some(val) = record.get(col_idx) {
156                    row.push(String::from(val));
157                }
158            }
159            result.push(row);
160        }
161
162        Ok(result)
163    }
164
165    /// Read CSV and return as JSON array
166    pub fn read_as_json(&self, path: &str) -> Result<String> {
167        let mut reader = ReaderBuilder::new()
168            .has_headers(false)
169            .flexible(true)
170            .from_path(path)
171            .with_context(|| format!("Failed to open CSV file: {path}"))?;
172
173        let mut rows: Vec<Vec<String>> = Vec::with_capacity(1024);
174        for record in reader.records() {
175            let record = record?;
176            // Pre-allocate based on record length
177            let mut row = Vec::with_capacity(record.len());
178            for val in record.iter() {
179                row.push(String::from(val));
180            }
181            rows.push(row);
182        }
183
184        serde_json::to_string_pretty(&rows).with_context(|| "Failed to serialize to JSON")
185    }
186
187    /// Append records to an existing CSV file (or create if doesn't exist)
188    pub fn append_records(&self, path: &str, records: &[Vec<String>]) -> Result<()> {
189        use std::fs::OpenOptions;
190
191        let file = OpenOptions::new()
192            .create(true)
193            .append(true)
194            .open(path)
195            .with_context(|| format!("Failed to open CSV file for append: {path}"))?;
196
197        let mut writer = csv::WriterBuilder::new()
198            .has_headers(false)
199            .from_writer(file);
200
201        for record in records {
202            writer.write_record(record)?;
203        }
204
205        writer.flush()?;
206        Ok(())
207    }
208
209    /// Write data to a specific cell range in CSV
210    pub fn write_range(
211        &self,
212        path: &str,
213        data: &[Vec<String>],
214        start_row: usize,
215        start_col: usize,
216    ) -> Result<()> {
217        // Read existing data if file exists
218        let mut existing: Vec<Vec<String>> = if std::path::Path::new(path).exists() {
219            let mut reader = ReaderBuilder::new()
220                .has_headers(false)
221                .flexible(true)
222                .from_path(path)?;
223            reader
224                .records()
225                .map(|r| {
226                    let record =
227                        r.with_context(|| format!("Failed to read CSV record from: {path}"))?;
228                    Ok(record.iter().map(|s| s.to_string()).collect())
229                })
230                .collect::<Result<Vec<Vec<String>>>>()?
231        } else {
232            Vec::with_capacity(start_row + data.len())
233        };
234
235        // Expand existing data if needed
236        let needed_rows = start_row + data.len();
237        if existing.len() < needed_rows {
238            existing.resize_with(needed_rows, Vec::new);
239        }
240
241        // Write data to range
242        for (row_idx, row) in data.iter().enumerate() {
243            let target_row = start_row + row_idx;
244            let needed_cols = start_col + row.len();
245
246            if existing[target_row].len() < needed_cols {
247                existing[target_row].resize(needed_cols, String::new());
248            }
249
250            // Direct assignment instead of clone when possible
251            for (col_idx, value) in row.iter().enumerate() {
252                existing[target_row][start_col + col_idx] = value.clone();
253            }
254        }
255
256        self.write_records_safe(path, existing)
257    }
258}
259
260/// Characters that can trigger formula injection in spreadsheet applications
261const CSV_INJECTION_CHARS: &[char] = &['=', '+', '-', '@', '\t', '\r', '\n'];
262
263/// Sanitize a cell value to prevent CSV delimiter injection.
264/// Prefixes dangerous leading characters with a single quote to neutralize them.
265/// Also handles embedded newlines by quoting the value.
266pub fn sanitize_csv_value(value: &str) -> String {
267    if value.is_empty() {
268        return value.to_string();
269    }
270    let first = value.chars().next().unwrap();
271    if CSV_INJECTION_CHARS.contains(&first) {
272        format!("'{}", value)
273    } else {
274        value.to_string()
275    }
276}
277
278/// Sanitize an entire row of CSV values
279pub fn sanitize_csv_row(row: &[String]) -> Vec<String> {
280    row.iter().map(|v| sanitize_csv_value(v)).collect()
281}
282
283impl CsvHandler {
284    /// Write records with CSV injection protection
285    pub fn write_records_safe(&self, path: &str, records: Vec<Vec<String>>) -> Result<()> {
286        let sanitized: Vec<Vec<String>> = records.iter().map(|row| sanitize_csv_row(row)).collect();
287        self.write_records(path, sanitized)
288    }
289
290    /// Append records with CSV injection protection
291    pub fn append_records_safe(&self, path: &str, records: &[Vec<String>]) -> Result<()> {
292        let sanitized: Vec<Vec<String>> = records.iter().map(|row| sanitize_csv_row(row)).collect();
293        self.append_records(path, &sanitized)
294    }
295}
296
297/// Streaming CSV reader for large files - processes rows one at a time
298pub struct StreamingCsvReader {
299    reader: csv::Reader<BufReader<File>>,
300    current_row: usize,
301}
302
303impl StreamingCsvReader {
304    pub fn open(path: &str) -> Result<Self> {
305        let file = File::open(path).with_context(|| format!("Failed to open CSV file: {path}"))?;
306        let buf_reader = BufReader::with_capacity(64 * 1024, file); // 64KB buffer
307
308        let reader = ReaderBuilder::new()
309            .has_headers(false)
310            .flexible(true)
311            .from_reader(buf_reader);
312
313        Ok(Self {
314            reader,
315            current_row: 0,
316        })
317    }
318
319    pub fn current_row(&self) -> usize {
320        self.current_row
321    }
322}
323
324impl Iterator for StreamingCsvReader {
325    type Item = Result<Vec<String>>;
326
327    fn next(&mut self) -> Option<Self::Item> {
328        match self.reader.records().next() {
329            Some(Ok(record)) => {
330                self.current_row += 1;
331                // Pre-allocate capacity to avoid reallocations
332                let mut row = Vec::with_capacity(record.len());
333                for val in record.iter() {
334                    row.push(String::from(val));
335                }
336                Some(Ok(row))
337            }
338            Some(Err(e)) => Some(Err(anyhow::anyhow!("CSV read error: {}", e))),
339            None => None,
340        }
341    }
342}
343
344/// Streaming CSV writer for large files
345pub struct StreamingCsvWriter {
346    writer: csv::Writer<BufWriter<File>>,
347    rows_written: usize,
348}
349
350impl StreamingCsvWriter {
351    pub fn create(path: &str) -> Result<Self> {
352        let file =
353            File::create(path).with_context(|| format!("Failed to create CSV file: {path}"))?;
354        let buf_writer = BufWriter::with_capacity(64 * 1024, file);
355
356        let writer = WriterBuilder::new()
357            .has_headers(false)
358            .from_writer(buf_writer);
359
360        Ok(Self {
361            writer,
362            rows_written: 0,
363        })
364    }
365
366    pub fn write_row(&mut self, row: &[String]) -> Result<()> {
367        let safe = sanitize_csv_row(row);
368        self.writer.write_record(&safe)?;
369        self.rows_written += 1;
370        Ok(())
371    }
372
373    pub fn rows_written(&self) -> usize {
374        self.rows_written
375    }
376
377    pub fn flush(&mut self) -> Result<()> {
378        self.writer.flush()?;
379        Ok(())
380    }
381}
382
383impl Drop for StreamingCsvWriter {
384    fn drop(&mut self) {
385        let _ = self.writer.flush();
386    }
387}
388
389// Trait implementations for CsvHandler
390
391impl DataReader for CsvHandler {
392    fn read(&self, path: &str) -> Result<Vec<Vec<String>>> {
393        let mut reader = ReaderBuilder::new()
394            .has_headers(false)
395            .flexible(true)
396            .from_path(path)
397            .with_context(|| format!("Failed to open CSV file: {path}"))?;
398
399        // Pre-allocate with capacity hint for better performance
400        let mut rows = Vec::with_capacity(1024);
401        for record in reader.records() {
402            let record = record?;
403            let mut row = Vec::with_capacity(record.len());
404            for val in record.iter() {
405                row.push(String::from(val));
406            }
407            rows.push(row);
408        }
409        Ok(rows)
410    }
411
412    fn read_with_headers(&self, path: &str) -> Result<Vec<Vec<String>>> {
413        // For CSV, headers are just the first row
414        <Self as DataReader>::read(self, path)
415    }
416
417    fn read_range(&self, path: &str, range: &CellRange) -> Result<Vec<Vec<String>>> {
418        CsvHandler::read_range(self, path, range)
419    }
420
421    fn read_as_json(&self, path: &str) -> Result<String> {
422        CsvHandler::read_as_json(self, path)
423    }
424
425    fn supports_format(&self, path: &str) -> bool {
426        path.to_lowercase().ends_with(".csv")
427    }
428}
429
430impl DataWriter for CsvHandler {
431    fn write(&self, path: &str, data: &[Vec<String>], _options: DataWriteOptions) -> Result<()> {
432        self.write_records_safe(path, data.to_vec())
433    }
434
435    fn write_range(
436        &self,
437        path: &str,
438        data: &[Vec<String>],
439        start_row: usize,
440        start_col: usize,
441    ) -> Result<()> {
442        self.write_range(path, data, start_row, start_col)
443    }
444
445    fn append(&self, path: &str, data: &[Vec<String>]) -> Result<()> {
446        self.append_records_safe(path, data)
447    }
448
449    fn supports_format(&self, path: &str) -> bool {
450        path.to_lowercase().ends_with(".csv")
451    }
452}
453
454impl FileHandler for CsvHandler {
455    fn format_name(&self) -> &'static str {
456        "csv"
457    }
458
459    fn supported_extensions(&self) -> &'static [&'static str] {
460        &["csv"]
461    }
462}
463
464impl SchemaProvider for CsvHandler {
465    fn get_schema(&self, path: &str) -> Result<Vec<(String, String)>> {
466        let data = <Self as DataReader>::read(self, path)?;
467        if data.is_empty() {
468            return Ok(Vec::new());
469        }
470
471        let num_cols = data[0].len();
472        Ok((0..num_cols)
473            .map(|i| (format!("col_{}", i), "string".to_string()))
474            .collect())
475    }
476
477    fn get_column_names(&self, path: &str) -> Result<Vec<String>> {
478        let data = <Self as DataReader>::read(self, path)?;
479        if data.is_empty() {
480            return Ok(Vec::new());
481        }
482
483        Ok(data[0].clone())
484    }
485
486    fn get_row_count(&self, path: &str) -> Result<usize> {
487        let data = <Self as DataReader>::read(self, path)?;
488        Ok(data.len())
489    }
490
491    fn get_column_count(&self, path: &str) -> Result<usize> {
492        let data = <Self as DataReader>::read(self, path)?;
493        Ok(data.first().map(|r| r.len()).unwrap_or(0))
494    }
495}
496
497/// Helper struct for CellRangeProvider implementation
498pub struct CellRangeHelper;
499
500impl CellRangeProvider for CellRangeHelper {
501    fn parse_range(&self, range_str: &str) -> Result<CellRange> {
502        CellRange::parse(range_str)
503    }
504
505    fn to_cell_reference(&self, row: usize, col: usize) -> String {
506        CellRange::index_to_column(col, row)
507    }
508
509    fn from_cell_reference(&self, cell: &str) -> Result<(usize, usize)> {
510        CellRange::parse_cell(cell)
511    }
512}
513
514impl CellRange {
515    /// Convert column index and row to cell reference (e.g., (0, 0) -> "A1")
516    pub fn index_to_column(col: usize, row: usize) -> String {
517        let mut index = col;
518        index += 1; // Convert to 1-based
519        let mut result = String::new();
520        while index > 0 {
521            index -= 1;
522            result.push((b'A' + (index % 26) as u8) as char);
523            index /= 26;
524        }
525        let col_str: String = result.chars().rev().collect();
526        format!("{}{}", col_str, row + 1)
527    }
528}