scirs2_io/csv/
mod.rs

1//! CSV file format support
2//!
3//! This module provides functionality for reading and writing CSV (Comma-Separated Values)
4//! files, commonly used for storing tabular data.
5//!
6//! Features:
7//! - Reading and writing CSV files with various configuration options
8//! - Support for custom delimiters, quotes, and line endings
9//! - Handling of missing values and type conversions
10//! - Memory-efficient processing of large files
11//! - Column-based I/O operations
12
13use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
14use ndarray::{Array1, Array2};
15use num_complex::Complex64;
16use std::fs::File;
17use std::io::{BufRead, BufReader, BufWriter, Write};
18use std::path::Path;
19
20use crate::error::{IoError, Result};
21
22/// CSV reader configuration
23#[derive(Debug, Clone)]
24pub struct CsvReaderConfig {
25    /// Delimiter character (default: ',')
26    pub delimiter: char,
27    /// Quote character (default: '"')
28    pub quote_char: char,
29    /// Whether to trim whitespace from fields (default: false)
30    pub trim: bool,
31    /// Whether the file has a header row (default: true)
32    pub has_header: bool,
33    /// Comment character, lines starting with this will be ignored (default: None)
34    pub comment_char: Option<char>,
35    /// Skip rows at the beginning of the file (default: 0)
36    pub skip_rows: usize,
37    /// Maximum number of rows to read (default: None = all rows)
38    pub max_rows: Option<usize>,
39}
40
41impl Default for CsvReaderConfig {
42    fn default() -> Self {
43        Self {
44            delimiter: ',',
45            quote_char: '"',
46            trim: false,
47            has_header: true,
48            comment_char: None,
49            skip_rows: 0,
50            max_rows: None,
51        }
52    }
53}
54
55/// Read a CSV file into a 2D array of strings
56///
57/// # Arguments
58///
59/// * `path` - Path to the CSV file
60/// * `config` - Optional CSV reader configuration
61///
62/// # Returns
63///
64/// * `Result<(Vec<String>, Array2<String>)>` - Header labels and data as strings
65///
66/// # Examples
67///
68/// ```no_run
69/// use scirs2_io::csv::{read_csv, CsvReaderConfig};
70///
71/// // Read with default configuration
72/// let (headers, data) = read_csv("data.csv", None).unwrap();
73/// println!("Headers: {:?}", headers);
74/// println!("Data shape: {:?}", data.shape());
75///
76/// // Read with custom configuration
77/// let config = CsvReaderConfig {
78///     delimiter: ';',
79///     has_header: false,
80///     ..Default::default()
81/// };
82/// let (_, data) = read_csv("data.csv", Some(config)).unwrap();
83/// ```
84pub fn read_csv<P: AsRef<Path>>(
85    path: P,
86    config: Option<CsvReaderConfig>,
87) -> Result<(Vec<String>, Array2<String>)> {
88    let config = config.unwrap_or_default();
89
90    let file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
91    let reader = BufReader::new(file);
92
93    let mut lines = reader.lines();
94    let mut rows = Vec::new();
95
96    // Skip rows if needed
97    for _ in 0..config.skip_rows {
98        if lines.next().is_none() {
99            return Err(IoError::FormatError("Not enough rows in file".to_string()));
100        }
101    }
102
103    // Read header if present
104    let headers = if config.has_header {
105        match lines.next() {
106            Some(Ok(line)) => parse_csv_line(&line, &config),
107            Some(Err(e)) => return Err(IoError::FileError(e.to_string())),
108            None => return Err(IoError::FormatError("Empty file".to_string())),
109        }
110    } else {
111        Vec::new()
112    };
113
114    // Read data
115    let mut row_count = 0;
116    for line_result in lines {
117        // Break if we've read enough rows
118        if let Some(max) = config.max_rows {
119            if row_count >= max {
120                break;
121            }
122        }
123
124        let line = line_result.map_err(|e| IoError::FileError(e.to_string()))?;
125
126        // Skip comment lines
127        if let Some(comment_char) = config.comment_char {
128            if line.trim().starts_with(comment_char) {
129                continue;
130            }
131        }
132
133        // Skip empty lines
134        if line.trim().is_empty() {
135            continue;
136        }
137
138        let row = parse_csv_line(&line, &config);
139        rows.push(row);
140        row_count += 1;
141    }
142
143    // Check if we have any data
144    if rows.is_empty() {
145        return Err(IoError::FormatError("No data rows in file".to_string()));
146    }
147
148    // Determine the number of columns from the first row
149    let num_cols = rows[0].len();
150
151    // Ensure all rows have the same number of columns
152    for (i, row) in rows.iter().enumerate() {
153        if row.len() != num_cols {
154            return Err(IoError::FormatError(format!(
155                "Inconsistent number of columns: row {} has {} columns, expected {}",
156                i + 1,
157                row.len(),
158                num_cols
159            )));
160        }
161    }
162
163    // Convert to Array2
164    let num_rows = rows.len();
165    let mut data = Array2::from_elem((num_rows, num_cols), String::new());
166
167    for (i, row) in rows.iter().enumerate() {
168        for (j, value) in row.iter().enumerate() {
169            data[[i, j]] = value.clone();
170        }
171    }
172
173    Ok((headers, data))
174}
175
176/// Parse a CSV line into fields
177fn parse_csv_line(line: &str, config: &CsvReaderConfig) -> Vec<String> {
178    let mut fields = Vec::new();
179    let mut field = String::new();
180    let mut in_quotes = false;
181    let mut chars = line.chars().peekable();
182
183    while let Some(c) = chars.next() {
184        // Handle quotes
185        if c == config.quote_char {
186            // Check for escaped quotes (double quotes)
187            if in_quotes && chars.peek() == Some(&config.quote_char) {
188                chars.next(); // Consume the second quote
189                field.push(config.quote_char);
190            } else {
191                in_quotes = !in_quotes;
192            }
193        }
194        // Handle delimiters
195        else if c == config.delimiter && !in_quotes {
196            let processed_field = if config.trim {
197                field.trim().to_string()
198            } else {
199                field
200            };
201            fields.push(processed_field);
202            field = String::new();
203        }
204        // Handle regular characters
205        else {
206            field.push(c);
207        }
208    }
209
210    // Add the last field
211    let processed_field = if config.trim {
212        field.trim().to_string()
213    } else {
214        field
215    };
216    fields.push(processed_field);
217
218    fields
219}
220
221/// Read a CSV file and convert to numeric arrays
222///
223/// # Arguments
224///
225/// * `path` - Path to the CSV file
226/// * `config` - Optional CSV reader configuration
227///
228/// # Returns
229///
230/// * `Result<(Vec<String>, Array2<f64>)>` - Header labels and data as f64 values
231///
232/// # Examples
233///
234/// ```no_run
235/// use scirs2_io::csv::{read_csv_numeric, CsvReaderConfig};
236///
237/// let (headers, data) = read_csv_numeric("data.csv", None).unwrap();
238/// println!("Numeric data shape: {:?}", data.shape());
239/// ```
240pub fn read_csv_numeric<P: AsRef<Path>>(
241    path: P,
242    config: Option<CsvReaderConfig>,
243) -> Result<(Vec<String>, Array2<f64>)> {
244    let (headers, string_data) = read_csv(path, config)?;
245
246    let shape = string_data.shape();
247    let mut numeric_data = Array2::<f64>::zeros((shape[0], shape[1]));
248
249    for i in 0..shape[0] {
250        for j in 0..shape[1] {
251            let value = string_data[[i, j]].parse::<f64>().map_err(|_| {
252                IoError::FormatError(format!(
253                    "Could not convert value '{}' at position [{}, {}] to number",
254                    string_data[[i, j]],
255                    i,
256                    j
257                ))
258            })?;
259            numeric_data[[i, j]] = value;
260        }
261    }
262
263    Ok((headers, numeric_data))
264}
265
266/// CSV writer configuration
267#[derive(Debug, Clone)]
268pub struct CsvWriterConfig {
269    /// Delimiter character (default: ',')
270    pub delimiter: char,
271    /// Quote character (default: '"')
272    pub quote_char: char,
273    /// Always quote fields (default: false)
274    pub always_quote: bool,
275    /// Quote fields containing special characters (default: true)
276    pub quote_special: bool,
277    /// Write header row (default: true)
278    pub write_header: bool,
279    /// Line ending (default: LF)
280    pub line_ending: LineEnding,
281}
282
283impl Default for CsvWriterConfig {
284    fn default() -> Self {
285        Self {
286            delimiter: ',',
287            quote_char: '"',
288            always_quote: false,
289            quote_special: true,
290            write_header: true,
291            line_ending: LineEnding::default(),
292        }
293    }
294}
295
296/// Line ending options for CSV files
297#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
298pub enum LineEnding {
299    /// LF (Unix) line ending: \n
300    #[default]
301    LF,
302    /// CRLF (Windows) line ending: \r\n
303    CRLF,
304}
305
306impl LineEnding {
307    fn as_str(&self) -> &'static str {
308        match self {
309            LineEnding::LF => "\n",
310            LineEnding::CRLF => "\r\n",
311        }
312    }
313}
314
315/// Types of missing values that can be recognized in CSV files
316#[derive(Debug, Clone)]
317pub struct MissingValueOptions {
318    /// Strings to interpret as missing values (default: NA, N/A, NaN, null, "")
319    pub values: Vec<String>,
320    /// Replace missing values with a default value (default: None)
321    pub fill_value: Option<f64>,
322}
323
324impl Default for MissingValueOptions {
325    fn default() -> Self {
326        Self {
327            values: vec![
328                "NA".to_string(),
329                "N/A".to_string(),
330                "NaN".to_string(),
331                "null".to_string(),
332                "".to_string(),
333            ],
334            fill_value: None,
335        }
336    }
337}
338
339/// Column data type specification for type conversion
340#[derive(Debug, Clone, Copy, PartialEq, Eq)]
341pub enum ColumnType {
342    /// String type (default)
343    String,
344    /// Integer type (i64)
345    Integer,
346    /// Float type (f64)
347    Float,
348    /// Boolean type (true/false, yes/no, 1/0)
349    Boolean,
350    /// Date type (YYYY-MM-DD)
351    Date,
352    /// Time type (HH:MM:SS)
353    Time,
354    /// DateTime type (YYYY-MM-DDThh:mm:ss)
355    DateTime,
356    /// Complex number (real+imagi)
357    Complex,
358}
359
360/// Column specification for CSV reading
361#[derive(Debug, Clone)]
362pub struct ColumnSpec {
363    /// Column index
364    pub index: usize,
365    /// Column name (optional)
366    pub name: Option<String>,
367    /// Column data type
368    pub dtype: ColumnType,
369    /// Custom missing values for this column
370    pub missing_values: Option<MissingValueOptions>,
371}
372
373/// Data value type for mixed type columns
374#[derive(Debug, Clone)]
375pub enum DataValue {
376    /// String value
377    String(String),
378    /// Integer value
379    Integer(i64),
380    /// Float value
381    Float(f64),
382    /// Boolean value
383    Boolean(bool),
384    /// Date value (year, month, day)
385    Date(NaiveDate),
386    /// Time value (hour, minute, second)
387    Time(NaiveTime),
388    /// DateTime value
389    DateTime(NaiveDateTime),
390    /// Complex number value (real, imaginary)
391    Complex(Complex64),
392    /// Missing value
393    Missing,
394}
395
396impl std::fmt::Display for DataValue {
397    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
398        match self {
399            DataValue::String(s) => write!(f, "{}", s),
400            DataValue::Integer(i) => write!(f, "{}", i),
401            DataValue::Float(v) => write!(f, "{}", v),
402            DataValue::Boolean(b) => write!(f, "{}", b),
403            DataValue::Date(d) => write!(f, "{}", d.format("%Y-%m-%d")),
404            DataValue::Time(t) => write!(f, "{}", t.format("%H:%M:%S%.f")),
405            DataValue::DateTime(dt) => write!(f, "{}", dt.format("%Y-%m-%dT%H:%M:%S%.f")),
406            DataValue::Complex(c) => {
407                if c.im >= 0.0 {
408                    write!(f, "{}+{}i", c.re, c.im)
409                } else {
410                    write!(f, "{}{}i", c.re, c.im)
411                }
412            }
413            DataValue::Missing => write!(f, "NA"),
414        }
415    }
416}
417
418/// Automatically detect column types from data
419pub fn detect_column_types(data: &Array2<String>) -> Vec<ColumnType> {
420    let (rows, cols) = (data.shape()[0], data.shape()[1]);
421
422    // Default to String if we can't determine type
423    if rows == 0 {
424        return vec![ColumnType::String; cols];
425    }
426
427    let mut col_types = vec![ColumnType::String; cols];
428
429    for col in 0..cols {
430        let mut is_int = true;
431        let mut is_float = true;
432        let mut is_bool = true;
433        let mut is_date = true;
434        let mut is_time = true;
435        let mut is_datetime = true;
436        let mut is_complex = true;
437        let mut non_empty_rows = 0;
438
439        for row in 0..rows {
440            let val = data[[row, col]].trim();
441
442            // Skip empty values for type detection
443            if val.is_empty() {
444                continue;
445            }
446
447            non_empty_rows += 1;
448
449            // Check if could be a boolean
450            let lower_val = val.to_lowercase();
451            let is_valid_bool =
452                ["true", "false", "yes", "no", "1", "0"].contains(&lower_val.as_str());
453            if !is_valid_bool {
454                is_bool = false;
455            }
456
457            // Check if could be an integer
458            if is_int && val.parse::<i64>().is_err() {
459                is_int = false;
460            }
461
462            // Check if could be a float
463            if is_float && val.parse::<f64>().is_err() {
464                is_float = false;
465            }
466
467            // Check if could be a date (YYYY-MM-DD)
468            if is_date && NaiveDate::parse_from_str(val, "%Y-%m-%d").is_err() {
469                is_date = false;
470            }
471
472            // Check if could be a time (HH:MM:SS)
473            if is_time
474                && NaiveTime::parse_from_str(val, "%H:%M:%S").is_err()
475                && NaiveTime::parse_from_str(val, "%H:%M:%S%.f").is_err()
476            {
477                is_time = false;
478            }
479
480            // Check if could be a datetime (YYYY-MM-DDThh:mm:ss)
481            if is_datetime
482                && NaiveDateTime::parse_from_str(val, "%Y-%m-%dT%H:%M:%S").is_err()
483                && NaiveDateTime::parse_from_str(val, "%Y-%m-%d %H:%M:%S").is_err()
484                && NaiveDateTime::parse_from_str(val, "%Y-%m-%dT%H:%M:%S%.f").is_err()
485                && NaiveDateTime::parse_from_str(val, "%Y-%m-%d %H:%M:%S%.f").is_err()
486            {
487                is_datetime = false;
488            }
489
490            // Check if could be a complex number
491            if is_complex {
492                // Try to parse as complex number with patterns like "3+4i", "3-4i", etc.
493                is_complex = parse_complex(val).is_some();
494            }
495        }
496
497        // Don't auto-detect special types if we have too few samples
498        if non_empty_rows < 2 {
499            is_date = false;
500            is_time = false;
501            is_datetime = false;
502            is_complex = false;
503        }
504
505        // Assign most specific type, with priority
506        if is_bool {
507            col_types[col] = ColumnType::Boolean;
508        } else if is_int {
509            col_types[col] = ColumnType::Integer;
510        } else if is_float {
511            col_types[col] = ColumnType::Float;
512        } else if is_date {
513            col_types[col] = ColumnType::Date;
514        } else if is_time {
515            col_types[col] = ColumnType::Time;
516        } else if is_datetime {
517            col_types[col] = ColumnType::DateTime;
518        } else if is_complex {
519            col_types[col] = ColumnType::Complex;
520        }
521    }
522
523    col_types
524}
525
526/// Parse a complex number from string like "3+4i", "-1-2i"
527fn parse_complex(s: &str) -> Option<Complex64> {
528    // Common complex number formats:
529    // 1. "a+bi" or "a-bi" - standard form
530    // 2. "(a,b)" - coordinate form
531
532    if s.contains('i') {
533        // Handle standard form
534        let s = s.trim().replace(" ", "");
535
536        // Remove trailing 'i'
537        let s = if s.ends_with('i') {
538            &s[0..s.len() - 1]
539        } else {
540            return None;
541        };
542
543        // Find the position of + or - that isn't at the start
544        let mut split_pos = None;
545        let mut in_first_number = true;
546
547        for (i, c) in s.chars().enumerate() {
548            if i == 0 {
549                continue; // Skip first character which might be a sign
550            }
551
552            if c == '+' || c == '-' {
553                split_pos = Some((i, c));
554                break;
555            }
556
557            if !c.is_ascii_digit()
558                && c != '.'
559                && c != 'e'
560                && c != 'E'
561                && !(c == '-' && (s.as_bytes()[i - 1] == b'e' || s.as_bytes()[i - 1] == b'E'))
562            {
563                in_first_number = false;
564            }
565        }
566
567        if let Some((pos, sign)) = split_pos {
568            let real_part = s[0..pos].parse::<f64>().ok()?;
569            let imag_part = if sign == '+' {
570                s[pos + 1..].parse::<f64>().ok()?
571            } else {
572                -s[pos + 1..].parse::<f64>().ok()?
573            };
574
575            Some(Complex64::new(real_part, imag_part))
576        } else if in_first_number {
577            // Form like "0i" (just imaginary part)
578            Some(Complex64::new(0.0, s.parse::<f64>().ok()?))
579        } else {
580            None
581        }
582    } else if s.starts_with('(') && s.ends_with(')') && s.contains(',') {
583        // Handle coordinate form (a,b)
584        let contents = &s[1..s.len() - 1];
585        let parts: Vec<&str> = contents.split(',').collect();
586
587        if parts.len() == 2 {
588            let real = parts[0].trim().parse::<f64>().ok()?;
589            let imag = parts[1].trim().parse::<f64>().ok()?;
590            Some(Complex64::new(real, imag))
591        } else {
592            None
593        }
594    } else {
595        None
596    }
597}
598
599/// Convert a string to a specified type with missing value handling
600fn convert_value(
601    value: &str,
602    col_type: ColumnType,
603    missing_values: &MissingValueOptions,
604) -> Result<DataValue> {
605    let trimmed = value.trim();
606
607    // Check for missing values
608    if missing_values
609        .values
610        .iter()
611        .any(|mv| mv.eq_ignore_ascii_case(trimmed))
612    {
613        if let (Some(fill), ColumnType::Float) = (missing_values.fill_value, col_type) {
614            return Ok(DataValue::Float(fill));
615        }
616        return Ok(DataValue::Missing);
617    }
618
619    // Empty string check
620    if trimmed.is_empty() {
621        return Ok(DataValue::Missing);
622    }
623
624    // Type conversion
625    match col_type {
626        ColumnType::String => Ok(DataValue::String(trimmed.to_string())),
627        ColumnType::Integer => match trimmed.parse::<i64>() {
628            Ok(val) => Ok(DataValue::Integer(val)),
629            Err(_) => Err(IoError::FormatError(format!(
630                "Cannot convert '{}' to integer",
631                value
632            ))),
633        },
634        ColumnType::Float => match trimmed.parse::<f64>() {
635            Ok(val) => Ok(DataValue::Float(val)),
636            Err(_) => Err(IoError::FormatError(format!(
637                "Cannot convert '{}' to float",
638                value
639            ))),
640        },
641        ColumnType::Boolean => {
642            let lower = trimmed.to_lowercase();
643            match lower.as_str() {
644                "true" | "yes" | "1" => Ok(DataValue::Boolean(true)),
645                "false" | "no" | "0" => Ok(DataValue::Boolean(false)),
646                _ => Err(IoError::FormatError(format!(
647                    "Cannot convert '{}' to boolean",
648                    value
649                ))),
650            }
651        }
652        ColumnType::Date => match NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") {
653            Ok(date) => Ok(DataValue::Date(date)),
654            Err(_) => Err(IoError::FormatError(format!(
655                "Cannot convert '{}' to date (expected YYYY-MM-DD)",
656                value
657            ))),
658        },
659        ColumnType::Time => {
660            let result = NaiveTime::parse_from_str(trimmed, "%H:%M:%S")
661                .or_else(|_| NaiveTime::parse_from_str(trimmed, "%H:%M:%S%.f"));
662
663            match result {
664                Ok(time) => Ok(DataValue::Time(time)),
665                Err(_) => Err(IoError::FormatError(format!(
666                    "Cannot convert '{}' to time (expected HH:MM:SS[.f])",
667                    value
668                ))),
669            }
670        }
671        ColumnType::DateTime => {
672            let result = NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S")
673                .or_else(|_| NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S"))
674                .or_else(|_| NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%dT%H:%M:%S%.f"))
675                .or_else(|_| NaiveDateTime::parse_from_str(trimmed, "%Y-%m-%d %H:%M:%S%.f"));
676
677            match result {
678                Ok(dt) => Ok(DataValue::DateTime(dt)),
679                Err(_) => Err(IoError::FormatError(format!(
680                    "Cannot convert '{}' to datetime (expected YYYY-MM-DD[T ]HH:MM:SS[.f])",
681                    value
682                ))),
683            }
684        }
685        ColumnType::Complex => match parse_complex(trimmed) {
686            Some(complex) => Ok(DataValue::Complex(complex)),
687            None => Err(IoError::FormatError(format!(
688                "Cannot convert '{}' to complex number (expected a+bi or (a,b))",
689                value
690            ))),
691        },
692    }
693}
694
695/// Write a 2D array to a CSV file
696///
697/// # Arguments
698///
699/// * `path` - Path to the output CSV file
700/// * `data` - 2D array to write
701/// * `headers` - Optional column headers
702/// * `config` - Optional CSV writer configuration
703///
704/// # Returns
705///
706/// * `Result<()>` - Success or error
707///
708/// # Examples
709///
710/// ```no_run
711/// use ndarray::array;
712/// use scirs2_io::csv::{write_csv, CsvWriterConfig};
713///
714/// let data = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
715/// let headers = vec!["A".to_string(), "B".to_string(), "C".to_string()];
716///
717/// // Write with default configuration
718/// write_csv("output.csv", &data, Some(&headers), None).unwrap();
719///
720/// // Write with custom configuration
721/// let config = CsvWriterConfig {
722///     delimiter: ';',
723///     always_quote: true,
724///     ..Default::default()
725/// };
726/// write_csv("output_custom.csv", &data, Some(&headers), Some(config)).unwrap();
727/// ```
728pub fn write_csv<P: AsRef<Path>, T: std::fmt::Display>(
729    path: P,
730    data: &Array2<T>,
731    headers: Option<&Vec<String>>,
732    config: Option<CsvWriterConfig>,
733) -> Result<()> {
734    let config = config.unwrap_or_default();
735
736    // Get data shape
737    let shape = data.shape();
738    let (rows, cols) = (shape[0], shape[1]);
739
740    // Check headers match data width
741    if let Some(hdrs) = headers {
742        if hdrs.len() != cols && config.write_header {
743            return Err(IoError::FormatError(format!(
744                "Header length ({}) does not match data width ({})",
745                hdrs.len(),
746                cols
747            )));
748        }
749    }
750
751    // Open file for writing
752    let mut file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
753
754    // Write headers if provided and enabled
755    if let Some(hdrs) = headers {
756        if config.write_header {
757            let header_line = format_csv_line(hdrs, &config);
758            file.write_all(header_line.as_bytes())
759                .map_err(|e| IoError::FileError(e.to_string()))?;
760            file.write_all(config.line_ending.as_str().as_bytes())
761                .map_err(|e| IoError::FileError(e.to_string()))?;
762        }
763    }
764
765    // Write data rows
766    for i in 0..rows {
767        let row: Vec<String> = (0..cols).map(|j| data[[i, j]].to_string()).collect();
768
769        let line = format_csv_line(&row, &config);
770        file.write_all(line.as_bytes())
771            .map_err(|e| IoError::FileError(e.to_string()))?;
772
773        if i < rows - 1 || config.line_ending == LineEnding::CRLF {
774            file.write_all(config.line_ending.as_str().as_bytes())
775                .map_err(|e| IoError::FileError(e.to_string()))?;
776        } else {
777            // For LF, ensure file ends with a newline but avoid extra newline
778            file.write_all(b"\n")
779                .map_err(|e| IoError::FileError(e.to_string()))?;
780        }
781    }
782
783    Ok(())
784}
785
786/// Format a row as a CSV line
787fn format_csv_line(fields: &[String], config: &CsvWriterConfig) -> String {
788    let mut result = String::new();
789
790    for (i, field) in fields.iter().enumerate() {
791        let need_quotes = config.always_quote
792            || (config.quote_special
793                && (field.contains(config.delimiter)
794                    || field.contains(config.quote_char)
795                    || field.contains('\n')
796                    || field.contains('\r')));
797
798        if need_quotes {
799            // Add quote character
800            result.push(config.quote_char);
801
802            // Add the field with escaped quotes
803            let escaped = field.replace(
804                config.quote_char,
805                &format!("{}{}", config.quote_char, config.quote_char),
806            );
807            result.push_str(&escaped);
808
809            // Close quotes
810            result.push(config.quote_char);
811        } else {
812            result.push_str(field);
813        }
814
815        // Add delimiter if not the last field
816        if i < fields.len() - 1 {
817            result.push(config.delimiter);
818        }
819    }
820
821    result
822}
823
824/// Read a CSV file with type conversion and missing value handling
825///
826/// # Arguments
827///
828/// * `path` - Path to the CSV file
829/// * `config` - Optional CSV reader configuration
830/// * `col_types` - Optional column data types
831/// * `missing_values` - Optional missing value handling options
832///
833/// # Returns
834///
835/// * `Result<(Vec<String>, Vec<Vec<DataValue>>)>` - Headers and typed data values
836///
837/// # Examples
838///
839/// ```no_run
840/// use scirs2_io::csv::{read_csv_typed, ColumnType, CsvReaderConfig, MissingValueOptions};
841///
842/// // Read with automatic type detection
843/// let (headers, data) = read_csv_typed("data.csv", None, None, None).unwrap();
844///
845/// // Read with specified column types
846/// let col_types = vec![
847///     ColumnType::String,
848///     ColumnType::Integer,
849///     ColumnType::Float,
850///     ColumnType::Boolean,
851/// ];
852/// let (headers, data) = read_csv_typed("data.csv", None, Some(&col_types), None).unwrap();
853///
854/// // Read with custom missing value handling
855/// let missing_opts = MissingValueOptions {
856///     values: vec!["missing".to_string(), "unknown".to_string()],
857///     fill_value: Some(0.0),
858/// };
859/// let (headers, data) = read_csv_typed("data.csv", None, None, Some(missing_opts)).unwrap();
860/// ```
861pub fn read_csv_typed<P: AsRef<Path>>(
862    path: P,
863    config: Option<CsvReaderConfig>,
864    col_types: Option<&[ColumnType]>,
865    missing_values: Option<MissingValueOptions>,
866) -> Result<(Vec<String>, Vec<Vec<DataValue>>)> {
867    // Get string data first
868    let (headers, string_data) = read_csv(path, config)?;
869
870    // If no data, return early
871    if string_data.shape()[0] == 0 || string_data.shape()[1] == 0 {
872        return Ok((headers, Vec::new()));
873    }
874
875    // Determine column types if not provided
876    let types = match col_types {
877        Some(types) => {
878            if types.len() != string_data.shape()[1] {
879                return Err(IoError::FormatError(format!(
880                    "Number of column types ({}) does not match data width ({})",
881                    types.len(),
882                    string_data.shape()[1]
883                )));
884            }
885            types.to_vec()
886        }
887        None => detect_column_types(&string_data),
888    };
889
890    let missing_opts = missing_values.unwrap_or_default();
891
892    // Convert data
893    let mut typed_data = Vec::with_capacity(string_data.shape()[0]);
894
895    for i in 0..string_data.shape()[0] {
896        let mut row = Vec::with_capacity(string_data.shape()[1]);
897
898        for j in 0..string_data.shape()[1] {
899            let value = convert_value(&string_data[[i, j]], types[j], &missing_opts)?;
900            row.push(value);
901        }
902
903        typed_data.push(row);
904    }
905
906    Ok((headers, typed_data))
907}
908
909/// Read a CSV file in chunks to process large files memory-efficiently
910///
911/// # Arguments
912///
913/// * `path` - Path to the CSV file
914/// * `config` - Optional CSV reader configuration
915/// * `chunk_size` - Number of rows to read in each chunk
916/// * `callback` - Function to process each chunk
917///
918/// # Returns
919///
920/// * `Result<()>` - Success or error
921///
922/// # Examples
923///
924/// ```no_run
925/// use scirs2_io::csv::{read_csv_chunked, CsvReaderConfig};
926/// use ndarray::Array2;
927///
928/// let config = CsvReaderConfig::default();
929/// let mut total_rows = 0;
930///
931/// read_csv_chunked("large_data.csv", Some(config), 1000, |headers, chunk| {
932///     println!("Processing chunk with {} rows", chunk.shape()[0]);
933///     total_rows += chunk.shape()[0];
934///     true // continue processing
935/// }).unwrap();
936///
937/// println!("Total rows processed: {}", total_rows);
938/// ```
939pub fn read_csv_chunked<P, F>(
940    path: P,
941    config: Option<CsvReaderConfig>,
942    chunk_size: usize,
943    mut callback: F,
944) -> Result<()>
945where
946    P: AsRef<Path>,
947    F: FnMut(&[String], &Array2<String>) -> bool,
948{
949    let config = config.unwrap_or_default();
950
951    let file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
952    let reader = BufReader::new(file);
953    let mut lines = reader.lines();
954
955    // Skip rows if needed
956    for _ in 0..config.skip_rows {
957        if lines.next().is_none() {
958            return Err(IoError::FormatError("Not enough rows in file".to_string()));
959        }
960    }
961
962    // Read header if present
963    let headers = if config.has_header {
964        match lines.next() {
965            Some(Ok(line)) => parse_csv_line(&line, &config),
966            Some(Err(e)) => return Err(IoError::FileError(e.to_string())),
967            None => return Err(IoError::FormatError("Empty file".to_string())),
968        }
969    } else {
970        Vec::new()
971    };
972
973    let mut buffer = Vec::with_capacity(chunk_size);
974    let mut num_cols = 0;
975
976    // Process file in chunks
977    for line_result in lines {
978        // Process comment lines, empty lines
979        let line = line_result.map_err(|e| IoError::FileError(e.to_string()))?;
980
981        if let Some(comment_char) = config.comment_char {
982            if line.trim().starts_with(comment_char) {
983                continue;
984            }
985        }
986
987        if line.trim().is_empty() {
988            continue;
989        }
990
991        // Parse the line
992        let row = parse_csv_line(&line, &config);
993
994        // Determine number of columns from first data row
995        if buffer.is_empty() {
996            num_cols = row.len();
997        } else if row.len() != num_cols {
998            return Err(IoError::FormatError(format!(
999                "Inconsistent number of columns: got {}, expected {}",
1000                row.len(),
1001                num_cols
1002            )));
1003        }
1004
1005        buffer.push(row);
1006
1007        // Process chunk when we've reached chunk_size
1008        if buffer.len() >= chunk_size
1009            && !process_chunk(&headers, &mut buffer, num_cols, &mut callback)?
1010        {
1011            return Ok(()); // Callback returned false, stop processing
1012        }
1013    }
1014
1015    // Process remaining rows
1016    if !buffer.is_empty() {
1017        process_chunk(&headers, &mut buffer, num_cols, &mut callback)?;
1018    }
1019
1020    Ok(())
1021}
1022
1023/// Helper function to process a chunk of data
1024fn process_chunk<F>(
1025    headers: &[String],
1026    buffer: &mut Vec<Vec<String>>,
1027    num_cols: usize,
1028    callback: &mut F,
1029) -> Result<bool>
1030where
1031    F: FnMut(&[String], &Array2<String>) -> bool,
1032{
1033    let num_rows = buffer.len();
1034    let mut data = Array2::<String>::from_elem((num_rows, num_cols), String::new());
1035
1036    for (i, row) in buffer.iter().enumerate() {
1037        for (j, value) in row.iter().enumerate() {
1038            data[[i, j]] = value.clone();
1039        }
1040    }
1041
1042    buffer.clear();
1043
1044    Ok(callback(headers, &data))
1045}
1046
1047/// Write typed data to a CSV file
1048///
1049/// # Arguments
1050///
1051/// * `path` - Path to the output CSV file
1052/// * `data` - Vector of vectors containing typed data values
1053/// * `headers` - Optional column headers
1054/// * `config` - Optional CSV writer configuration
1055///
1056/// # Returns
1057///
1058/// * `Result<()>` - Success or error
1059///
1060/// # Examples
1061///
1062/// ```no_run
1063/// use scirs2_io::csv::{write_csv_typed, DataValue, CsvWriterConfig};
1064///
1065/// // Create mixed-type data
1066/// let row1 = vec![
1067///     DataValue::String("Alice".to_string()),
1068///     DataValue::Integer(25),
1069///     DataValue::Float(168.5),
1070///     DataValue::Boolean(true),
1071/// ];
1072/// let row2 = vec![
1073///     DataValue::String("Bob".to_string()),
1074///     DataValue::Integer(32),
1075///     DataValue::Float(175.0),
1076///     DataValue::Boolean(false),
1077/// ];
1078///
1079/// let data = vec![row1, row2];
1080/// let headers = vec![
1081///     "Name".to_string(),
1082///     "Age".to_string(),
1083///     "Height".to_string(),
1084///     "Active".to_string(),
1085/// ];
1086///
1087/// write_csv_typed("typed_data.csv", &data, Some(&headers), None).unwrap();
1088/// ```
1089pub fn write_csv_typed<P: AsRef<Path>>(
1090    path: P,
1091    data: &[Vec<DataValue>],
1092    headers: Option<&Vec<String>>,
1093    config: Option<CsvWriterConfig>,
1094) -> Result<()> {
1095    let config = config.unwrap_or_default();
1096
1097    if data.is_empty() {
1098        return Err(IoError::FormatError("No data provided".to_string()));
1099    }
1100
1101    // Check all rows have the same length
1102    let num_cols = data[0].len();
1103    for (i, row) in data.iter().enumerate().skip(1) {
1104        if row.len() != num_cols {
1105            return Err(IoError::FormatError(format!(
1106                "Row {} has {} columns, expected {}",
1107                i,
1108                row.len(),
1109                num_cols
1110            )));
1111        }
1112    }
1113
1114    // Check headers match data width
1115    if let Some(hdrs) = headers {
1116        if hdrs.len() != num_cols && config.write_header {
1117            return Err(IoError::FormatError(format!(
1118                "Header length ({}) does not match data width ({})",
1119                hdrs.len(),
1120                num_cols
1121            )));
1122        }
1123    }
1124
1125    // Open file for writing with buffering for better performance
1126    let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
1127    let mut writer = BufWriter::new(file);
1128
1129    // Write headers if provided and enabled
1130    if let Some(hdrs) = headers {
1131        if config.write_header {
1132            let header_line = format_csv_line(hdrs, &config);
1133            writer
1134                .write_all(header_line.as_bytes())
1135                .map_err(|e| IoError::FileError(e.to_string()))?;
1136            writer
1137                .write_all(config.line_ending.as_str().as_bytes())
1138                .map_err(|e| IoError::FileError(e.to_string()))?;
1139        }
1140    }
1141
1142    // Write data rows
1143    for (i, row) in data.iter().enumerate() {
1144        let string_row: Vec<String> = row.iter().map(|val| val.to_string()).collect();
1145
1146        let line = format_csv_line(&string_row, &config);
1147        writer
1148            .write_all(line.as_bytes())
1149            .map_err(|e| IoError::FileError(e.to_string()))?;
1150
1151        if i < data.len() - 1 || config.line_ending == LineEnding::CRLF {
1152            writer
1153                .write_all(config.line_ending.as_str().as_bytes())
1154                .map_err(|e| IoError::FileError(e.to_string()))?;
1155        } else {
1156            // For LF, ensure file ends with a newline but avoid extra newline
1157            writer
1158                .write_all(b"\n")
1159                .map_err(|e| IoError::FileError(e.to_string()))?;
1160        }
1161    }
1162
1163    // Ensure data is written to disk
1164    writer
1165        .flush()
1166        .map_err(|e| IoError::FileError(e.to_string()))?;
1167
1168    Ok(())
1169}
1170
1171/// Write multiple 1D arrays to a CSV file as columns
1172///
1173/// # Arguments
1174///
1175/// * `path` - Path to the output CSV file
1176/// * `columns` - Vector of 1D arrays to write as columns
1177/// * `headers` - Optional column headers
1178/// * `config` - Optional CSV writer configuration
1179///
1180/// # Returns
1181///
1182/// * `Result<()>` - Success or error
1183///
1184/// # Examples
1185///
1186/// ```no_run
1187/// use ndarray::{Array1, array};
1188/// use scirs2_io::csv::{write_csv_columns, CsvWriterConfig};
1189///
1190/// let col1 = array![1.0, 2.0, 3.0];
1191/// let col2 = array![4.0, 5.0, 6.0];
1192/// let columns = vec![col1, col2];
1193/// let headers = vec!["X".to_string(), "Y".to_string()];
1194///
1195/// write_csv_columns("columns.csv", &columns, Some(&headers), None).unwrap();
1196/// ```
1197pub fn write_csv_columns<P: AsRef<Path>, T: std::fmt::Display + Clone>(
1198    path: P,
1199    columns: &[Array1<T>],
1200    headers: Option<&Vec<String>>,
1201    config: Option<CsvWriterConfig>,
1202) -> Result<()> {
1203    if columns.is_empty() {
1204        return Err(IoError::FormatError("No columns provided".to_string()));
1205    }
1206
1207    // Check all columns have the same length
1208    let num_rows = columns[0].len();
1209    for (i, col) in columns.iter().enumerate().skip(1) {
1210        if col.len() != num_rows {
1211            return Err(IoError::FormatError(format!(
1212                "Column {} has length {}, expected {}",
1213                i,
1214                col.len(),
1215                num_rows
1216            )));
1217        }
1218    }
1219
1220    // Check headers match column count
1221    if let Some(hdrs) = headers {
1222        if hdrs.len() != columns.len() {
1223            return Err(IoError::FormatError(format!(
1224                "Header length ({}) does not match column count ({})",
1225                hdrs.len(),
1226                columns.len()
1227            )));
1228        }
1229    }
1230
1231    // Convert to Array2
1232    let num_cols = columns.len();
1233    let mut data = Array2::<String>::from_elem((num_rows, num_cols), String::new());
1234
1235    for (j, col) in columns.iter().enumerate() {
1236        for (i, val) in col.iter().enumerate() {
1237            data[[i, j]] = val.to_string();
1238        }
1239    }
1240
1241    // Write to CSV
1242    write_csv(path, &data, headers, config)
1243}