Skip to main content

ferray_io/text/
mod.rs

1// ferray-io: Text I/O
2//
3// REQ-7: savetxt(path, &array, delimiter, fmt) writes 2D array as delimited text
4// REQ-8: loadtxt::<T>(path, delimiter, skiprows) reads delimited text into 2D array
5// REQ-9: genfromtxt(path, delimiter, filling_values) reads text with missing value handling
6
7pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::Ix2;
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22/// Options for saving text files.
23#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25    /// Column delimiter (default: ',').
26    pub delimiter: char,
27    /// Format string for each element. Uses Rust format syntax.
28    /// If `None`, the default `Display` formatting is used.
29    pub fmt: Option<String>,
30    /// Optional header line written before data.
31    pub header: Option<String>,
32    /// Optional footer line written after data.
33    pub footer: Option<String>,
34    /// Line ending (default: "\n").
35    pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39    fn default() -> Self {
40        Self {
41            delimiter: ',',
42            fmt: None,
43            header: None,
44            footer: None,
45            newline: "\n".to_string(),
46        }
47    }
48}
49
50/// Save a 2D array as delimited text.
51///
52/// # Errors
53/// Returns `FerrayError::IoError` on file write failures.
54/// Returns `FerrayError::IoError` if the array is not contiguous.
55pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
56    path: P,
57    array: &Array<T, Ix2>,
58    opts: &SaveTxtOptions,
59) -> FerrayResult<()> {
60    let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
61        FerrayError::io_error(format!(
62            "failed to create file '{}': {e}",
63            path.as_ref().display()
64        ))
65    })?;
66
67    savetxt_to_writer(&mut file, array, opts)
68}
69
70/// Save a 2D array as delimited text to a writer.
71pub fn savetxt_to_writer<T: Element + Display, W: Write>(
72    writer: &mut W,
73    array: &Array<T, Ix2>,
74    opts: &SaveTxtOptions,
75) -> FerrayResult<()> {
76    let shape = array.shape();
77    let nrows = shape[0];
78    let ncols = shape[1];
79
80    if let Some(ref header) = opts.header {
81        write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
82        writer
83            .write_all(opts.newline.as_bytes())
84            .map_err(|e| FerrayError::io_error(e.to_string()))?;
85    }
86
87    let slice = array
88        .as_slice()
89        .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
90
91    for row in 0..nrows {
92        for col in 0..ncols {
93            if col > 0 {
94                write!(writer, "{}", opts.delimiter)
95                    .map_err(|e| FerrayError::io_error(e.to_string()))?;
96            }
97            let val = &slice[row * ncols + col];
98            if let Some(ref fmt_str) = opts.fmt {
99                // Use the format string with the value
100                // We support a simple subset: if fmt contains "{}", use it directly
101                let formatted = fmt_str.replace("{}", &val.to_string());
102                write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
103            } else {
104                write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
105            }
106        }
107        writer
108            .write_all(opts.newline.as_bytes())
109            .map_err(|e| FerrayError::io_error(e.to_string()))?;
110    }
111
112    if let Some(ref footer) = opts.footer {
113        write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
114        writer
115            .write_all(opts.newline.as_bytes())
116            .map_err(|e| FerrayError::io_error(e.to_string()))?;
117    }
118
119    writer
120        .flush()
121        .map_err(|e| FerrayError::io_error(e.to_string()))?;
122    Ok(())
123}
124
125/// Load a delimited text file into a 2D array.
126///
127/// Each row of the text file becomes a row in the array. All rows must
128/// have the same number of columns.
129///
130/// # Type Parameters
131/// - `T`: Element type to parse each cell into. Must implement `FromStr`.
132///
133/// # Errors
134/// - Returns `FerrayError::IoError` on file read or parse failures.
135pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
136where
137    T: Element + FromStr,
138    T::Err: Display,
139    P: AsRef<Path>,
140{
141    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
142        FerrayError::io_error(format!(
143            "failed to read file '{}': {e}",
144            path.as_ref().display()
145        ))
146    })?;
147
148    loadtxt_from_str(&content, delimiter, skiprows)
149}
150
151/// Load delimited text from a string into a 2D array.
152pub fn loadtxt_from_str<T>(
153    content: &str,
154    delimiter: char,
155    skiprows: usize,
156) -> FerrayResult<Array<T, Ix2>>
157where
158    T: Element + FromStr,
159    T::Err: Display,
160{
161    let opts = TextParseOptions {
162        delimiter,
163        skiprows,
164        ..Default::default()
165    };
166
167    let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
168
169    if nrows == 0 {
170        return Array::from_vec(Ix2::new([0, 0]), vec![]);
171    }
172
173    let data: FerrayResult<Vec<T>> = cells
174        .iter()
175        .enumerate()
176        .map(|(i, cell)| {
177            cell.parse::<T>().map_err(|e| {
178                let row = i / ncols;
179                let col = i % ncols;
180                FerrayError::io_error(format!(
181                    "failed to parse value '{cell}' at row {row}, col {col}: {e}"
182                ))
183            })
184        })
185        .collect();
186
187    let data = data?;
188    Array::from_vec(Ix2::new([nrows, ncols]), data)
189}
190
191/// Load a delimited text file with missing value handling.
192///
193/// Missing values (empty cells or cells matching common missing indicators)
194/// are replaced with `filling_values`. This is analogous to NumPy's `genfromtxt`.
195///
196/// Returns a 2D `f64` array where missing values are replaced with `filling_value`
197/// (typically `f64::NAN`).
198///
199/// # Errors
200/// Returns `FerrayError::IoError` on file read or parse failures.
201pub fn genfromtxt<P: AsRef<Path>>(
202    path: P,
203    delimiter: char,
204    filling_value: f64,
205    skiprows: usize,
206    missing_values: &[&str],
207) -> FerrayResult<Array<f64, Ix2>> {
208    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
209        FerrayError::io_error(format!(
210            "failed to read file '{}': {e}",
211            path.as_ref().display()
212        ))
213    })?;
214
215    genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
216}
217
218/// Load delimited text from a string with missing value handling.
219pub fn genfromtxt_from_str(
220    content: &str,
221    delimiter: char,
222    filling_value: f64,
223    skiprows: usize,
224    missing_values: &[&str],
225) -> FerrayResult<Array<f64, Ix2>> {
226    let opts = TextParseOptions {
227        delimiter,
228        skiprows,
229        ..Default::default()
230    };
231
232    // Default missing markers
233    let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
234    for mv in missing_values {
235        if !all_missing.contains(mv) {
236            all_missing.push(mv);
237        }
238    }
239
240    let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
241
242    if nrows == 0 {
243        return Array::from_vec(Ix2::new([0, 0]), vec![]);
244    }
245
246    let data: FerrayResult<Vec<f64>> = cells
247        .iter()
248        .enumerate()
249        .map(|(i, cell)| match cell {
250            None => Ok(filling_value),
251            Some(s) => s.parse::<f64>().map_err(|e| {
252                let row = i / ncols;
253                let col = i % ncols;
254                FerrayError::io_error(format!(
255                    "failed to parse value '{s}' at row {row}, col {col}: {e}"
256                ))
257            }),
258        })
259        .collect();
260
261    let data = data?;
262    Array::from_vec(Ix2::new([nrows, ncols]), data)
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn loadtxt_simple_csv() {
271        let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
272        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
273        assert_eq!(arr.shape(), &[2, 3]);
274        assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
275    }
276
277    #[test]
278    fn loadtxt_with_skiprows() {
279        let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
280        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
281        assert_eq!(arr.shape(), &[2, 2]);
282        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
283    }
284
285    #[test]
286    fn loadtxt_tab_delimited() {
287        let content = "1\t2\t3\n4\t5\t6\n";
288        let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
289        assert_eq!(arr.shape(), &[2, 3]);
290        assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
291    }
292
293    #[test]
294    fn loadtxt_integers() {
295        let content = "10,20\n30,40\n";
296        let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
297        assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
298    }
299
300    #[test]
301    fn loadtxt_file_roundtrip() {
302        let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
303        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
304
305        let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
306        let _ = std::fs::create_dir_all(&dir);
307        let path = dir.join("test.csv");
308
309        savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
310        let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
311
312        assert_eq!(loaded.shape(), &[2, 3]);
313        assert_eq!(loaded.as_slice().unwrap(), &data[..]);
314        let _ = std::fs::remove_file(&path);
315    }
316
317    #[test]
318    fn savetxt_custom_delimiter() {
319        let data = vec![1.0f64, 2.0, 3.0, 4.0];
320        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
321
322        let mut buf = Vec::new();
323        let opts = SaveTxtOptions {
324            delimiter: '\t',
325            ..Default::default()
326        };
327        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
328        let output = String::from_utf8(buf).unwrap();
329        assert!(output.contains('\t'));
330        assert!(!output.contains(','));
331    }
332
333    #[test]
334    fn savetxt_with_header_footer() {
335        let data = vec![1.0f64, 2.0];
336        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
337
338        let mut buf = Vec::new();
339        let opts = SaveTxtOptions {
340            header: Some("# my header".to_string()),
341            footer: Some("# end".to_string()),
342            ..Default::default()
343        };
344        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
345        let output = String::from_utf8(buf).unwrap();
346        assert!(output.starts_with("# my header\n"));
347        assert!(output.ends_with("# end\n"));
348    }
349
350    #[test]
351    fn genfromtxt_missing_nan() {
352        let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
353        let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
354        assert_eq!(arr.shape(), &[3, 3]);
355        let slice = arr.as_slice().unwrap();
356        assert_eq!(slice[0], 1.0);
357        assert!(slice[4].is_nan()); // missing value replaced with NaN
358        assert!(slice[8].is_nan()); // trailing empty
359    }
360
361    #[test]
362    fn genfromtxt_na_marker() {
363        let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
364        let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
365        assert_eq!(arr.shape(), &[2, 3]);
366        let slice = arr.as_slice().unwrap();
367        assert_eq!(slice[1], -999.0);
368        assert_eq!(slice[5], -999.0);
369    }
370
371    #[test]
372    fn genfromtxt_with_skiprows() {
373        let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
374        let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
375        assert_eq!(arr.shape(), &[2, 2]);
376        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
377    }
378
379    #[test]
380    fn genfromtxt_file() {
381        let content = "1.0,2.0\n,4.0\n";
382        let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
383        let _ = std::fs::create_dir_all(&dir);
384        let path = dir.join("genfromtxt_test.csv");
385        std::fs::write(&path, content).unwrap();
386
387        let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
388        assert_eq!(arr.shape(), &[2, 2]);
389        assert!(arr.as_slice().unwrap()[2].is_nan());
390        let _ = std::fs::remove_file(&path);
391    }
392
393    #[test]
394    fn loadtxt_empty() {
395        let content = "";
396        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
397        assert_eq!(arr.shape(), &[0, 0]);
398    }
399}