Skip to main content

ferray_io/text/
mod.rs

1// ferray-io: Text I/O
2//
3// REQ-7: savetxt(path, &array, delimiter, fmt) writes 2D array as delimited text
4// REQ-8: loadtxt::<T>(path, delimiter, skiprows) reads delimited text into 2D array
5// REQ-9: genfromtxt(path, delimiter, filling_values) reads text with missing value handling
6
7pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::Ix2;
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22/// Options for saving text files.
23#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25    /// Column delimiter (default: ',').
26    pub delimiter: char,
27    /// Format string for each element. Uses Rust format syntax.
28    /// If `None`, the default `Display` formatting is used.
29    pub fmt: Option<String>,
30    /// Optional header line written before data.
31    pub header: Option<String>,
32    /// Optional footer line written after data.
33    pub footer: Option<String>,
34    /// Line ending (default: "\n").
35    pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39    fn default() -> Self {
40        Self {
41            delimiter: ',',
42            fmt: None,
43            header: None,
44            footer: None,
45            newline: "\n".to_string(),
46        }
47    }
48}
49
50/// Format a single value using a format string.
51///
52/// Supports:
53/// - NumPy printf-style: `"%.6e"`, `"%.18e"`, `"%10.5f"`, `"%.4f"`, `"%d"`
54/// - Rust-style with braces: `"{:.6}"`, `"{:.6e}"`, `"{:>10.5}"`
55/// - Plain `"{}"` — default Display
56///
57/// Unrecognized patterns fall back to default Display formatting.
58fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
59    // Rust-style: contains "{" — parse precision patterns.
60    if fmt_str.contains('{') {
61        if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
62            if let Some(prec_str) = spec.strip_prefix('.') {
63                let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
64                let digits_str = if is_sci {
65                    &prec_str[..prec_str.len() - 1]
66                } else {
67                    prec_str
68                };
69                if let Ok(prec) = digits_str.parse::<usize>() {
70                    // Parse value as f64 for numeric formatting
71                    if let Ok(v) = val.to_string().parse::<f64>() {
72                        return if is_sci {
73                            format!("{v:.prec$e}")
74                        } else {
75                            format!("{v:.prec$}")
76                        };
77                    }
78                }
79            }
80        }
81        // Fallback: simple substitution
82        return fmt_str.replace("{}", &val.to_string());
83    }
84
85    // NumPy printf-style: starts with "%"
86    if let Some(spec) = fmt_str.strip_prefix('%') {
87        let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
88            (rest, 'e')
89        } else if let Some(rest) = spec.strip_suffix('E') {
90            (rest, 'E')
91        } else if let Some(rest) = spec.strip_suffix('f') {
92            (rest, 'f')
93        } else if let Some(rest) = spec.strip_suffix('g') {
94            (rest, 'g')
95        } else {
96            // %d, %i, or unrecognized — use default Display
97            return format!("{val}");
98        };
99
100        // Parse value as f64 for numeric formatting
101        if let Ok(v) = val.to_string().parse::<f64>() {
102            if let Some(dot_pos) = body.find('.') {
103                let prec_str = &body[dot_pos + 1..];
104                if let Ok(prec) = prec_str.parse::<usize>() {
105                    return match mode {
106                        'e' => format!("{v:.prec$e}"),
107                        'E' => format!("{v:.prec$E}"),
108                        _ => format!("{v:.prec$}"),
109                    };
110                }
111            } else if body.is_empty() {
112                return match mode {
113                    'e' => format!("{v:e}"),
114                    'E' => format!("{v:E}"),
115                    _ => format!("{v}"),
116                };
117            }
118        }
119    }
120
121    // Unrecognized format — default Display
122    format!("{val}")
123}
124
125/// Save a 2D array as delimited text.
126///
127/// The `fmt` field in [`SaveTxtOptions`] supports:
128/// - NumPy printf-style: `"%.6e"`, `"%.18e"`, `"%10.5f"`, `"%d"`
129/// - Rust-style: `"{:.6}"`, `"{:.6e}"`
130/// - Default: `None` uses standard `Display` formatting.
131///
132/// # Errors
133/// Returns `FerrayError::IoError` on file write failures.
134/// Returns `FerrayError::IoError` if the array is not contiguous.
135pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
136    path: P,
137    array: &Array<T, Ix2>,
138    opts: &SaveTxtOptions,
139) -> FerrayResult<()> {
140    let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
141        FerrayError::io_error(format!(
142            "failed to create file '{}': {e}",
143            path.as_ref().display()
144        ))
145    })?;
146
147    savetxt_to_writer(&mut file, array, opts)
148}
149
150/// Save a 2D array as delimited text to a writer.
151pub fn savetxt_to_writer<T: Element + Display, W: Write>(
152    writer: &mut W,
153    array: &Array<T, Ix2>,
154    opts: &SaveTxtOptions,
155) -> FerrayResult<()> {
156    let shape = array.shape();
157    let nrows = shape[0];
158    let ncols = shape[1];
159
160    if let Some(ref header) = opts.header {
161        write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
162        writer
163            .write_all(opts.newline.as_bytes())
164            .map_err(|e| FerrayError::io_error(e.to_string()))?;
165    }
166
167    let slice = array
168        .as_slice()
169        .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
170
171    for row in 0..nrows {
172        for col in 0..ncols {
173            if col > 0 {
174                write!(writer, "{}", opts.delimiter)
175                    .map_err(|e| FerrayError::io_error(e.to_string()))?;
176            }
177            let val = &slice[row * ncols + col];
178            if let Some(ref fmt_str) = opts.fmt {
179                // Format string support:
180                // - NumPy printf-style: "%.6e", "%.18e", "%10.5f", "%d"
181                // - Rust-style: "{:.6}", "{:.6e}", "{:>10.5}"
182                // We convert common printf patterns to Rust format, then
183                // fall back to string substitution.
184                let formatted = format_value(val, fmt_str);
185                write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
186            } else {
187                write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
188            }
189        }
190        writer
191            .write_all(opts.newline.as_bytes())
192            .map_err(|e| FerrayError::io_error(e.to_string()))?;
193    }
194
195    if let Some(ref footer) = opts.footer {
196        write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
197        writer
198            .write_all(opts.newline.as_bytes())
199            .map_err(|e| FerrayError::io_error(e.to_string()))?;
200    }
201
202    writer
203        .flush()
204        .map_err(|e| FerrayError::io_error(e.to_string()))?;
205    Ok(())
206}
207
208/// Load a delimited text file into a 2D array.
209///
210/// Each row of the text file becomes a row in the array. All rows must
211/// have the same number of columns.
212///
213/// # Type Parameters
214/// - `T`: Element type to parse each cell into. Must implement `FromStr`.
215///
216/// # Errors
217/// - Returns `FerrayError::IoError` on file read or parse failures.
218pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
219where
220    T: Element + FromStr,
221    T::Err: Display,
222    P: AsRef<Path>,
223{
224    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
225        FerrayError::io_error(format!(
226            "failed to read file '{}': {e}",
227            path.as_ref().display()
228        ))
229    })?;
230
231    loadtxt_from_str(&content, delimiter, skiprows)
232}
233
234/// Load delimited text from a string into a 2D array.
235pub fn loadtxt_from_str<T>(
236    content: &str,
237    delimiter: char,
238    skiprows: usize,
239) -> FerrayResult<Array<T, Ix2>>
240where
241    T: Element + FromStr,
242    T::Err: Display,
243{
244    let opts = TextParseOptions {
245        delimiter,
246        skiprows,
247        ..Default::default()
248    };
249
250    let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
251
252    if nrows == 0 {
253        return Array::from_vec(Ix2::new([0, 0]), vec![]);
254    }
255
256    let data: FerrayResult<Vec<T>> = cells
257        .iter()
258        .enumerate()
259        .map(|(i, cell)| {
260            cell.parse::<T>().map_err(|e| {
261                let row = i / ncols;
262                let col = i % ncols;
263                FerrayError::io_error(format!(
264                    "failed to parse value '{cell}' at row {row}, col {col}: {e}"
265                ))
266            })
267        })
268        .collect();
269
270    let data = data?;
271    Array::from_vec(Ix2::new([nrows, ncols]), data)
272}
273
274/// Load a delimited text file with missing value handling.
275///
276/// Missing values (empty cells or cells matching common missing indicators)
277/// are replaced with `filling_values`. This is analogous to NumPy's `genfromtxt`.
278///
279/// Returns a 2D `f64` array where missing values are replaced with `filling_value`
280/// (typically `f64::NAN`).
281///
282/// # Errors
283/// Returns `FerrayError::IoError` on file read or parse failures.
284pub fn genfromtxt<P: AsRef<Path>>(
285    path: P,
286    delimiter: char,
287    filling_value: f64,
288    skiprows: usize,
289    missing_values: &[&str],
290) -> FerrayResult<Array<f64, Ix2>> {
291    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
292        FerrayError::io_error(format!(
293            "failed to read file '{}': {e}",
294            path.as_ref().display()
295        ))
296    })?;
297
298    genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
299}
300
301/// Load delimited text from a string with missing value handling.
302pub fn genfromtxt_from_str(
303    content: &str,
304    delimiter: char,
305    filling_value: f64,
306    skiprows: usize,
307    missing_values: &[&str],
308) -> FerrayResult<Array<f64, Ix2>> {
309    let opts = TextParseOptions {
310        delimiter,
311        skiprows,
312        ..Default::default()
313    };
314
315    // Default missing markers
316    let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
317    for mv in missing_values {
318        if !all_missing.contains(mv) {
319            all_missing.push(mv);
320        }
321    }
322
323    let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
324
325    if nrows == 0 {
326        return Array::from_vec(Ix2::new([0, 0]), vec![]);
327    }
328
329    let data: FerrayResult<Vec<f64>> = cells
330        .iter()
331        .enumerate()
332        .map(|(i, cell)| match cell {
333            None => Ok(filling_value),
334            Some(s) => s.parse::<f64>().map_err(|e| {
335                let row = i / ncols;
336                let col = i % ncols;
337                FerrayError::io_error(format!(
338                    "failed to parse value '{s}' at row {row}, col {col}: {e}"
339                ))
340            }),
341        })
342        .collect();
343
344    let data = data?;
345    Array::from_vec(Ix2::new([nrows, ncols]), data)
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    #[test]
353    fn loadtxt_simple_csv() {
354        let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
355        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
356        assert_eq!(arr.shape(), &[2, 3]);
357        assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
358    }
359
360    #[test]
361    fn loadtxt_with_skiprows() {
362        let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
363        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
364        assert_eq!(arr.shape(), &[2, 2]);
365        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
366    }
367
368    #[test]
369    fn loadtxt_tab_delimited() {
370        let content = "1\t2\t3\n4\t5\t6\n";
371        let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
372        assert_eq!(arr.shape(), &[2, 3]);
373        assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
374    }
375
376    #[test]
377    fn loadtxt_integers() {
378        let content = "10,20\n30,40\n";
379        let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
380        assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
381    }
382
383    #[test]
384    fn loadtxt_file_roundtrip() {
385        let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
386        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
387
388        let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
389        let _ = std::fs::create_dir_all(&dir);
390        let path = dir.join("test.csv");
391
392        savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
393        let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
394
395        assert_eq!(loaded.shape(), &[2, 3]);
396        assert_eq!(loaded.as_slice().unwrap(), &data[..]);
397        let _ = std::fs::remove_file(&path);
398    }
399
400    #[test]
401    fn savetxt_custom_delimiter() {
402        let data = vec![1.0f64, 2.0, 3.0, 4.0];
403        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
404
405        let mut buf = Vec::new();
406        let opts = SaveTxtOptions {
407            delimiter: '\t',
408            ..Default::default()
409        };
410        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
411        let output = String::from_utf8(buf).unwrap();
412        assert!(output.contains('\t'));
413        assert!(!output.contains(','));
414    }
415
416    #[test]
417    fn savetxt_with_header_footer() {
418        let data = vec![1.0f64, 2.0];
419        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
420
421        let mut buf = Vec::new();
422        let opts = SaveTxtOptions {
423            header: Some("# my header".to_string()),
424            footer: Some("# end".to_string()),
425            ..Default::default()
426        };
427        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
428        let output = String::from_utf8(buf).unwrap();
429        assert!(output.starts_with("# my header\n"));
430        assert!(output.ends_with("# end\n"));
431    }
432
433    #[test]
434    fn genfromtxt_missing_nan() {
435        let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
436        let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
437        assert_eq!(arr.shape(), &[3, 3]);
438        let slice = arr.as_slice().unwrap();
439        assert_eq!(slice[0], 1.0);
440        assert!(slice[4].is_nan()); // missing value replaced with NaN
441        assert!(slice[8].is_nan()); // trailing empty
442    }
443
444    #[test]
445    fn genfromtxt_na_marker() {
446        let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
447        let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
448        assert_eq!(arr.shape(), &[2, 3]);
449        let slice = arr.as_slice().unwrap();
450        assert_eq!(slice[1], -999.0);
451        assert_eq!(slice[5], -999.0);
452    }
453
454    #[test]
455    fn genfromtxt_with_skiprows() {
456        let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
457        let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
458        assert_eq!(arr.shape(), &[2, 2]);
459        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
460    }
461
462    #[test]
463    fn genfromtxt_file() {
464        let content = "1.0,2.0\n,4.0\n";
465        let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
466        let _ = std::fs::create_dir_all(&dir);
467        let path = dir.join("genfromtxt_test.csv");
468        std::fs::write(&path, content).unwrap();
469
470        let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
471        assert_eq!(arr.shape(), &[2, 2]);
472        assert!(arr.as_slice().unwrap()[2].is_nan());
473        let _ = std::fs::remove_file(&path);
474    }
475
476    #[test]
477    fn loadtxt_empty() {
478        let content = "";
479        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
480        assert_eq!(arr.shape(), &[0, 0]);
481    }
482}