1#![allow(clippy::should_implement_trait)]
6#[allow(unused_imports)]
7use super::functions::*;
8use crate::{Error, Result};
9use std::fs::{File, OpenOptions};
10use std::io::{BufRead, BufReader, BufWriter, Write};
11use std::path::Path;
12
13#[allow(dead_code)]
15#[derive(Debug, Clone)]
16pub struct CsvWriterConfig {
17 pub delimiter: char,
19 pub line_ending: String,
21 pub quote_all: bool,
23 pub precision: usize,
25}
26#[allow(dead_code)]
48pub struct CsvParser<'a> {
49 pub(super) input: &'a str,
50 pub(super) delimiter: char,
51 pub(super) comment_prefix: Option<char>,
52}
53#[allow(dead_code)]
54impl<'a> CsvParser<'a> {
55 pub fn new(input: &'a str, delimiter: char) -> Self {
57 Self {
58 input,
59 delimiter,
60 comment_prefix: None,
61 }
62 }
63 pub fn with_comment_prefix(mut self, prefix: char) -> Self {
65 self.comment_prefix = Some(prefix);
66 self
67 }
68 pub fn parse_all(self) -> std::result::Result<Vec<CsvRecord>, Error> {
72 let mut records = Vec::new();
73 let mut chars = self.input.chars().peekable();
74 'outer: loop {
75 if chars.peek().is_none() {
76 break;
77 }
78 let mut fields: Vec<String> = Vec::new();
79 let mut field = String::new();
80 let mut in_quotes = false;
81 loop {
82 match chars.next() {
83 None => {
84 if in_quotes {
85 return Err(Error::Parse(
86 "unterminated quoted field at EOF".to_string(),
87 ));
88 }
89 fields.push(field);
90 break;
91 }
92 Some('"') if !in_quotes => {
93 in_quotes = true;
94 }
95 Some('"') if in_quotes => {
96 if chars.peek() == Some(&'"') {
97 chars.next();
98 field.push('"');
99 } else {
100 in_quotes = false;
101 }
102 }
103 Some('\\') if in_quotes => match chars.next() {
104 Some('n') => field.push('\n'),
105 Some('t') => field.push('\t'),
106 Some('r') => field.push('\r'),
107 Some('"') => field.push('"'),
108 Some('\\') => field.push('\\'),
109 Some(c) => {
110 field.push('\\');
111 field.push(c);
112 }
113 None => {
114 return Err(Error::Parse("trailing backslash at EOF".to_string()));
115 }
116 },
117 Some('\r') if !in_quotes => {
118 if chars.peek() == Some(&'\n') {
119 chars.next();
120 }
121 fields.push(field);
122 break;
123 }
124 Some('\n') if !in_quotes => {
125 fields.push(field);
126 break;
127 }
128 Some(c) if c == self.delimiter && !in_quotes => {
129 fields.push(field.clone());
130 field = String::new();
131 }
132 Some(c) => {
133 field.push(c);
134 }
135 #[allow(unreachable_patterns)]
136 _ => {}
137 }
138 }
139 if let Some(prefix) = self.comment_prefix
140 && fields
141 .first()
142 .map(|f| f.trim_start().starts_with(prefix))
143 .unwrap_or(false)
144 {
145 continue 'outer;
146 }
147 if fields.len() == 1 && fields[0].trim().is_empty() {
148 continue 'outer;
149 }
150 records.push(CsvRecord { fields });
151 }
152 Ok(records)
153 }
154}
155#[derive(Debug, Clone, PartialEq)]
157pub struct CsvRecord {
158 pub fields: Vec<String>,
160}
161impl CsvRecord {
162 pub fn len(&self) -> usize {
164 self.fields.len()
165 }
166 pub fn is_empty(&self) -> bool {
168 self.fields.is_empty()
169 }
170 pub fn get(&self, index: usize) -> &str {
172 self.fields.get(index).map(|s| s.as_str()).unwrap_or("")
173 }
174}
175#[allow(dead_code)]
177#[derive(Debug, Clone, Copy, PartialEq, Eq)]
178pub enum PivotAgg {
179 Sum,
181 Mean,
183 Count,
185 Min,
187 Max,
189}
190#[allow(dead_code)]
195#[derive(Debug, Clone)]
196pub struct CsvTable {
197 pub headers: Vec<String>,
199 pub rows: Vec<Vec<String>>,
201}
202#[allow(dead_code)]
203impl CsvTable {
204 pub fn new(headers: Vec<String>) -> Self {
206 Self {
207 headers,
208 rows: Vec::new(),
209 }
210 }
211 pub fn from_str(data: &str, delimiter: char) -> std::result::Result<Self, Error> {
213 let parser = CsvParser::new(data, delimiter).with_comment_prefix('#');
214 let mut records = parser.parse_all()?;
215 if records.is_empty() {
216 return Err(Error::Parse("CSV table is empty".to_string()));
217 }
218 let header_rec = records.remove(0);
219 let headers: Vec<String> = header_rec
220 .fields
221 .iter()
222 .map(|s| s.trim().to_string())
223 .collect();
224 let ncols = headers.len();
225 let mut rows = Vec::new();
226 for rec in records {
227 let mut row: Vec<String> = rec
228 .fields
229 .into_iter()
230 .map(|s| s.trim().to_string())
231 .collect();
232 while row.len() < ncols {
233 row.push(String::new());
234 }
235 rows.push(row);
236 }
237 Ok(Self { headers, rows })
238 }
239 pub fn to_csv_string(&self, delimiter: char) -> String {
241 let mut out = String::new();
242 out.push_str(&self.headers.join(&delimiter.to_string()));
243 out.push('\n');
244 for row in &self.rows {
245 let line: Vec<String> = row.iter().map(|f| quote_field(f, delimiter)).collect();
246 out.push_str(&line.join(&delimiter.to_string()));
247 out.push('\n');
248 }
249 out
250 }
251 pub fn column_index(&self, name: &str) -> std::result::Result<usize, Error> {
253 self.headers
254 .iter()
255 .position(|h| h == name)
256 .ok_or_else(|| Error::Parse(format!("column '{}' not found", name)))
257 }
258 pub fn column_values(&self, name: &str) -> std::result::Result<Vec<&str>, Error> {
260 let idx = self.column_index(name)?;
261 Ok(self.rows.iter().map(|r| r[idx].as_str()).collect())
262 }
263 pub fn column_f64(&self, name: &str) -> std::result::Result<Vec<f64>, Error> {
265 let idx = self.column_index(name)?;
266 self.rows
267 .iter()
268 .enumerate()
269 .map(|(i, r)| {
270 let s = r[idx].trim();
271 if s.is_empty() {
272 Ok(f64::NAN)
273 } else {
274 s.parse::<f64>().map_err(|_| {
275 Error::Parse(format!("row {}: cannot parse '{}' as f64", i, s))
276 })
277 }
278 })
279 .collect()
280 }
281 pub fn row_count(&self) -> usize {
283 self.rows.len()
284 }
285 pub fn col_count(&self) -> usize {
287 self.headers.len()
288 }
289}
290#[allow(dead_code)]
292pub struct InMemoryCsvWriter {
293 pub(super) columns: Vec<String>,
294 pub(super) delimiter: char,
295 pub(super) precision: usize,
296 pub(super) rows: Vec<Vec<f64>>,
297}
298#[allow(dead_code)]
299impl InMemoryCsvWriter {
300 pub fn new(columns: &[&str], delimiter: char) -> Self {
302 Self {
303 columns: columns.iter().map(|s| s.to_string()).collect(),
304 delimiter,
305 precision: 6,
306 rows: Vec::new(),
307 }
308 }
309 pub fn with_precision(mut self, precision: usize) -> Self {
311 self.precision = precision;
312 self
313 }
314 pub fn write_row(&self, values: &[f64]) -> std::result::Result<String, Error> {
316 if values.len() != self.columns.len() {
317 return Err(Error::Parse(format!(
318 "expected {} values, got {}",
319 self.columns.len(),
320 values.len()
321 )));
322 }
323 let parts: Vec<String> = values
324 .iter()
325 .map(|v| format!("{:.prec$}", v, prec = self.precision))
326 .collect();
327 Ok(parts.join(&self.delimiter.to_string()))
328 }
329 pub fn write_header(&self) -> String {
331 self.columns.join(&self.delimiter.to_string())
332 }
333 pub fn add_row(&mut self, values: Vec<f64>) {
335 self.rows.push(values);
336 }
337 pub fn write_all(&self, rows: &[Vec<f64>]) -> String {
339 let mut out = self.write_header();
340 out.push('\n');
341 for row in rows {
342 if let Ok(line) = self.write_row(row) {
343 out.push_str(&line);
344 out.push('\n');
345 }
346 }
347 out
348 }
349}
350pub struct CsvReader;
352impl CsvReader {
353 pub fn read(path: &str) -> Result<(Vec<String>, Vec<Vec<f64>>)> {
357 let file = File::open(Path::new(path))?;
358 let reader = BufReader::new(file);
359 let mut lines = reader.lines();
360 let header_line = lines
361 .next()
362 .ok_or_else(|| Error::Parse("empty CSV file".to_string()))??;
363 let headers: Vec<String> = header_line
364 .split(',')
365 .map(|s| s.trim().to_string())
366 .collect();
367 let mut rows = Vec::new();
368 for line in lines {
369 let line = line?;
370 let trimmed = line.trim();
371 if trimmed.is_empty() {
372 continue;
373 }
374 let row: std::result::Result<Vec<f64>, _> = trimmed
375 .split(',')
376 .map(|s| s.trim().parse::<f64>())
377 .collect();
378 let row = row.map_err(|e| Error::Parse(e.to_string()))?;
379 rows.push(row);
380 }
381 Ok((headers, rows))
382 }
383}
384#[allow(dead_code)]
386#[derive(Debug, Clone, PartialEq, Eq)]
387pub enum ColumnType {
388 Integer,
390 Float,
392 Boolean,
394 Text,
396 Empty,
398}
399#[allow(dead_code)]
418pub struct CsvStreamParser {
419 pub(super) reader: BufReader<File>,
420 pub(super) delimiter: char,
421 pub(super) headers: Vec<String>,
422 pub(super) pending: String,
424 pub(super) open_quotes: bool,
425}
426#[allow(dead_code)]
427impl CsvStreamParser {
428 pub fn open(path: &str, delimiter: char) -> std::result::Result<Self, Error> {
430 let file = File::open(Path::new(path))?;
431 let mut reader = BufReader::new(file);
432 let mut header_line = String::new();
433 reader.read_line(&mut header_line)?;
434 let headers = split_csv_line(
435 header_line.trim_end_matches('\n').trim_end_matches('\r'),
436 delimiter,
437 )
438 .into_iter()
439 .map(|s| s.trim().to_string())
440 .collect();
441 Ok(Self {
442 reader,
443 delimiter,
444 headers,
445 pending: String::new(),
446 open_quotes: false,
447 })
448 }
449 pub fn headers(&self) -> &[String] {
451 &self.headers
452 }
453 pub fn next_record(&mut self) -> std::result::Result<Option<CsvRecord>, Error> {
457 loop {
458 let mut line = String::new();
459 let bytes_read = self.reader.read_line(&mut line)?;
460 if bytes_read == 0 {
461 if self.pending.is_empty() {
462 return Ok(None);
463 }
464 if self.open_quotes {
465 return Err(Error::Parse("unterminated quoted field at EOF".to_string()));
466 }
467 let record = self.flush_pending()?;
468 return Ok(Some(record));
469 }
470 for ch in line.chars() {
471 if ch == '"' {
472 self.open_quotes = !self.open_quotes;
473 }
474 }
475 self.pending.push_str(&line);
476 if !self.open_quotes {
477 let record = self.flush_pending()?;
478 if record.fields.len() == 1 && record.fields[0].trim().is_empty() {
479 continue;
480 }
481 return Ok(Some(record));
482 }
483 }
484 }
485 fn flush_pending(&mut self) -> std::result::Result<CsvRecord, Error> {
486 let line = std::mem::take(&mut self.pending);
487 let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
488 let fields = split_csv_line(trimmed, self.delimiter);
489 Ok(CsvRecord {
490 fields: fields.into_iter().map(|s| s.trim().to_string()).collect(),
491 })
492 }
493}
494#[allow(dead_code)]
502pub struct InMemoryCsvReader {
503 pub(super) headers: Vec<String>,
504 pub(super) rows: Vec<Vec<Option<f64>>>,
505}
506#[allow(dead_code)]
507impl InMemoryCsvReader {
508 pub fn from_str(data: &str) -> std::result::Result<Self, Error> {
510 Self::parse_with_delimiter(data, ',')
511 }
512 pub fn parse_with_delimiter(data: &str, delim: char) -> std::result::Result<Self, Error> {
514 let mut non_empty_lines: Vec<&str> = data
515 .lines()
516 .filter(|l| {
517 let t = l.trim();
518 !t.is_empty() && !t.starts_with('#')
519 })
520 .collect();
521 if non_empty_lines.is_empty() {
522 return Err(Error::Parse("CSV input is empty".to_string()));
523 }
524 let header_line = non_empty_lines.remove(0);
525 let headers: Vec<String> = split_csv_line(header_line, delim)
526 .into_iter()
527 .map(|s| s.trim().to_string())
528 .collect();
529 if headers.is_empty() {
530 return Err(Error::Parse("no headers found".to_string()));
531 }
532 let mut rows: Vec<Vec<Option<f64>>> = Vec::new();
533 for line in &non_empty_lines {
534 let fields = split_csv_line(line, delim);
535 let parsed: Vec<Option<f64>> = fields
536 .iter()
537 .map(|f| {
538 let t = f.trim();
539 if t.is_empty() {
540 None
541 } else {
542 t.parse::<f64>().ok()
543 }
544 })
545 .collect();
546 rows.push(parsed);
547 }
548 Ok(Self { headers, rows })
549 }
550 pub fn get_column_f64(&self, name: &str) -> std::result::Result<Vec<f64>, Error> {
554 let idx = self
555 .headers
556 .iter()
557 .position(|h| h == name)
558 .ok_or_else(|| Error::Parse(format!("column '{}' not found", name)))?;
559 let col: Vec<f64> = self
560 .rows
561 .iter()
562 .map(|row| row.get(idx).copied().flatten().unwrap_or(f64::NAN))
563 .collect();
564 Ok(col)
565 }
566 pub fn get_row_count(&self) -> usize {
568 self.rows.len()
569 }
570 pub fn headers(&self) -> &[String] {
572 &self.headers
573 }
574 pub fn column_stats(&self, name: &str) -> std::result::Result<(f64, f64, f64, f64), Error> {
576 let col = self.get_column_f64(name)?;
577 let valid: Vec<f64> = col.into_iter().filter(|v| !v.is_nan()).collect();
578 if valid.is_empty() {
579 return Err(Error::Parse(format!(
580 "column '{}' has no valid numeric data",
581 name
582 )));
583 }
584 let n = valid.len() as f64;
585 let min = valid.iter().cloned().fold(f64::INFINITY, f64::min);
586 let max = valid.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
587 let mean = valid.iter().sum::<f64>() / n;
588 let variance = valid.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n;
589 let std = variance.sqrt();
590 Ok((min, max, mean, std))
591 }
592}
593#[allow(dead_code)]
610pub struct TypedCsvReader {
611 pub(super) table: CsvTable,
612}
613#[allow(dead_code)]
614impl TypedCsvReader {
615 pub fn from_str(data: &str) -> std::result::Result<Self, Error> {
617 let table = CsvTable::from_str(data, ',')?;
618 Ok(Self { table })
619 }
620 pub fn with_delimiter(data: &str, delimiter: char) -> std::result::Result<Self, Error> {
622 let table = CsvTable::from_str(data, delimiter)?;
623 Ok(Self { table })
624 }
625 pub fn column_type(&self, name: &str) -> std::result::Result<ColumnType, Error> {
627 let idx = self.table.column_index(name)?;
628 let values: Vec<&str> = self.table.rows.iter().map(|r| r[idx].as_str()).collect();
629 Ok(infer_column_type(&values))
630 }
631 pub fn column_as_i64(&self, name: &str) -> std::result::Result<Vec<i64>, Error> {
633 let idx = self.table.column_index(name)?;
634 self.table
635 .rows
636 .iter()
637 .enumerate()
638 .map(|(i, r)| {
639 let s = r[idx].trim();
640 s.parse::<i64>()
641 .map_err(|_| Error::Parse(format!("row {}: cannot parse '{}' as i64", i, s)))
642 })
643 .collect()
644 }
645 pub fn column_as_f64(&self, name: &str) -> std::result::Result<Vec<f64>, Error> {
647 self.table.column_f64(name)
648 }
649 pub fn column_as_bool(&self, name: &str) -> std::result::Result<Vec<bool>, Error> {
653 let idx = self.table.column_index(name)?;
654 self.table
655 .rows
656 .iter()
657 .enumerate()
658 .map(|(i, r)| {
659 let s = r[idx].trim().to_lowercase();
660 match s.as_str() {
661 "true" | "1" | "yes" => Ok(true),
662 "false" | "0" | "no" => Ok(false),
663 other => Err(Error::Parse(format!(
664 "row {}: cannot parse '{}' as bool",
665 i, other
666 ))),
667 }
668 })
669 .collect()
670 }
671 pub fn table(&self) -> &CsvTable {
673 &self.table
674 }
675 pub fn headers(&self) -> &[String] {
677 &self.table.headers
678 }
679 pub fn row_count(&self) -> usize {
681 self.table.row_count()
682 }
683}
684#[allow(dead_code)]
686#[derive(Debug, Clone)]
687pub struct CsvDiff {
688 pub removed: Vec<Vec<String>>,
690 pub added: Vec<Vec<String>>,
692 pub changed: Vec<CsvChangedRow>,
694}
695pub struct CsvWriter {
697 pub(super) writer: BufWriter<File>,
698}
699impl CsvWriter {
700 pub fn new(path: &str, headers: &[&str]) -> Result<Self> {
702 let file = File::create(Path::new(path))?;
703 let mut writer = BufWriter::new(file);
704 writeln!(writer, "{}", headers.join(","))?;
705 writer.flush()?;
706 let file = OpenOptions::new().append(true).open(Path::new(path))?;
707 let writer = BufWriter::new(file);
708 Ok(Self { writer })
709 }
710 pub fn write_row(&mut self, values: &[f64]) -> Result<()> {
712 let row: Vec<String> = values.iter().map(|v| v.to_string()).collect();
713 writeln!(self.writer, "{}", row.join(","))?;
714 self.writer.flush()?;
715 Ok(())
716 }
717}
718#[allow(dead_code)]
720#[derive(Debug, Clone)]
721pub struct CsvChangedRow {
722 pub key: String,
724 pub before: Vec<String>,
726 pub after: Vec<String>,
728}
729#[allow(dead_code)]
745pub struct ConfigurableCsvWriter {
746 pub(super) config: CsvWriterConfig,
747 pub(super) buffer: String,
748}
749#[allow(dead_code)]
750impl ConfigurableCsvWriter {
751 pub fn new(config: CsvWriterConfig) -> Self {
753 Self {
754 config,
755 buffer: String::new(),
756 }
757 }
758 pub fn write_header(&mut self, headers: &[&str]) {
760 let line = if self.config.quote_all {
761 headers
762 .iter()
763 .map(|h| format!("\"{}\"", h.replace('"', "\"\"")))
764 .collect::<Vec<_>>()
765 .join(&self.config.delimiter.to_string())
766 } else {
767 headers
768 .iter()
769 .map(|h| quote_field(h, self.config.delimiter))
770 .collect::<Vec<_>>()
771 .join(&self.config.delimiter.to_string())
772 };
773 self.buffer.push_str(&line);
774 self.buffer.push_str(&self.config.line_ending);
775 }
776 pub fn write_f64_row(&mut self, values: &[f64]) {
778 let prec = self.config.precision;
779 let line: Vec<String> = values
780 .iter()
781 .map(|v| format!("{:.prec$}", v, prec = prec))
782 .collect();
783 self.buffer
784 .push_str(&line.join(&self.config.delimiter.to_string()));
785 self.buffer.push_str(&self.config.line_ending);
786 }
787 pub fn write_str_row(&mut self, values: &[&str]) {
789 let delim = self.config.delimiter;
790 let line: Vec<String> = values
791 .iter()
792 .map(|v| {
793 if self.config.quote_all {
794 format!("\"{}\"", v.replace('"', "\"\""))
795 } else {
796 quote_field(v, delim)
797 }
798 })
799 .collect();
800 self.buffer
801 .push_str(&line.join(&self.config.delimiter.to_string()));
802 self.buffer.push_str(&self.config.line_ending);
803 }
804 pub fn finish(self) -> String {
806 self.buffer
807 }
808}