Skip to main content

ferray_io/text/
mod.rs

1// ferray-io: Text I/O
2//
3// REQ-7: savetxt(path, &array, delimiter, fmt) writes 2D array as delimited text
4// REQ-8: loadtxt::<T>(path, delimiter, skiprows) reads delimited text into 2D array
5// REQ-9: genfromtxt(path, delimiter, filling_values) reads text with missing value handling
6
7pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::{Ix1, Ix2};
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22/// Options for saving text files.
23#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25    /// Column delimiter (default: ',').
26    pub delimiter: char,
27    /// Format string for each element. Uses Rust format syntax.
28    /// If `None`, the default `Display` formatting is used.
29    pub fmt: Option<String>,
30    /// Optional header line written before data.
31    pub header: Option<String>,
32    /// Optional footer line written after data.
33    pub footer: Option<String>,
34    /// Line ending (default: "\n").
35    pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39    fn default() -> Self {
40        Self {
41            delimiter: ',',
42            fmt: None,
43            header: None,
44            footer: None,
45            newline: "\n".to_string(),
46        }
47    }
48}
49
50/// Format a single value using a format string.
51///
52/// Supports:
53/// - `NumPy` printf-style: `"%.6e"`, `"%.18e"`, `"%10.5f"`, `"%.4f"`, `"%d"`
54/// - Rust-style with braces: `"{:.6}"`, `"{:.6e}"`, `"{:>10.5}"`
55/// - Plain `"{}"` — default Display
56///
57/// Unrecognized patterns fall back to default Display formatting.
58fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
59    // Rust-style: contains "{" — parse precision patterns.
60    if fmt_str.contains('{') {
61        if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
62            if let Some(prec_str) = spec.strip_prefix('.') {
63                let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
64                let digits_str = if is_sci {
65                    &prec_str[..prec_str.len() - 1]
66                } else {
67                    prec_str
68                };
69                if let Ok(prec) = digits_str.parse::<usize>() {
70                    // Parse value as f64 for numeric formatting
71                    if let Ok(v) = val.to_string().parse::<f64>() {
72                        return if is_sci {
73                            format!("{v:.prec$e}")
74                        } else {
75                            format!("{v:.prec$}")
76                        };
77                    }
78                }
79            }
80        }
81        // Fallback: simple substitution
82        return fmt_str.replace("{}", &val.to_string());
83    }
84
85    // NumPy printf-style: starts with "%"
86    if let Some(spec) = fmt_str.strip_prefix('%') {
87        let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
88            (rest, 'e')
89        } else if let Some(rest) = spec.strip_suffix('E') {
90            (rest, 'E')
91        } else if let Some(rest) = spec.strip_suffix('f') {
92            (rest, 'f')
93        } else if let Some(rest) = spec.strip_suffix('g') {
94            (rest, 'g')
95        } else {
96            // %d, %i, or unrecognized — use default Display
97            return format!("{val}");
98        };
99
100        // Parse value as f64 for numeric formatting
101        if let Ok(v) = val.to_string().parse::<f64>() {
102            if let Some(dot_pos) = body.find('.') {
103                let prec_str = &body[dot_pos + 1..];
104                if let Ok(prec) = prec_str.parse::<usize>() {
105                    return match mode {
106                        'e' => format!("{v:.prec$e}"),
107                        'E' => format!("{v:.prec$E}"),
108                        _ => format!("{v:.prec$}"),
109                    };
110                }
111            } else if body.is_empty() {
112                return match mode {
113                    'e' => format!("{v:e}"),
114                    'E' => format!("{v:E}"),
115                    _ => format!("{v}"),
116                };
117            }
118        }
119    }
120
121    // Unrecognized format — default Display
122    format!("{val}")
123}
124
125/// Save a 2D array as delimited text.
126///
127/// The `fmt` field in [`SaveTxtOptions`] supports:
128/// - `NumPy` printf-style: `"%.6e"`, `"%.18e"`, `"%10.5f"`, `"%d"`
129/// - Rust-style: `"{:.6}"`, `"{:.6e}"`
130/// - Default: `None` uses standard `Display` formatting.
131///
132/// # Errors
133/// Returns `FerrayError::IoError` on file write failures.
134/// Returns `FerrayError::IoError` if the array is not contiguous.
135pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
136    path: P,
137    array: &Array<T, Ix2>,
138    opts: &SaveTxtOptions,
139) -> FerrayResult<()> {
140    let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
141        FerrayError::io_error(format!(
142            "failed to create file '{}': {e}",
143            path.as_ref().display()
144        ))
145    })?;
146
147    savetxt_to_writer(&mut file, array, opts)
148}
149
150/// 1-D variant of [`savetxt`] (#494).
151///
152/// Equivalent to `numpy.savetxt(path, array)` for a 1-D input —
153/// each element is written on its own line. Mirrors numpy's
154/// behavior of treating a 1-D array as a single-column 2-D array.
155///
156/// # Errors
157/// Same as [`savetxt`]; additionally if the array is non-contiguous.
158pub fn savetxt_1d<T: Element + Display, P: AsRef<Path>>(
159    path: P,
160    array: &Array<T, Ix1>,
161    opts: &SaveTxtOptions,
162) -> FerrayResult<()> {
163    let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
164        FerrayError::io_error(format!(
165            "failed to create file '{}': {e}",
166            path.as_ref().display()
167        ))
168    })?;
169    savetxt_1d_to_writer(&mut file, array, opts)
170}
171
172/// 1-D variant of [`savetxt_to_writer`].
173pub fn savetxt_1d_to_writer<T: Element + Display, W: Write>(
174    writer: &mut W,
175    array: &Array<T, Ix1>,
176    opts: &SaveTxtOptions,
177) -> FerrayResult<()> {
178    if let Some(ref header) = opts.header {
179        write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
180        writer
181            .write_all(opts.newline.as_bytes())
182            .map_err(|e| FerrayError::io_error(e.to_string()))?;
183    }
184    let slice = array
185        .as_slice()
186        .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
187    for val in slice {
188        let formatted = if let Some(ref fmt_str) = opts.fmt {
189            format_value(val, fmt_str)
190        } else {
191            format!("{val}")
192        };
193        writer
194            .write_all(formatted.as_bytes())
195            .map_err(|e| FerrayError::io_error(e.to_string()))?;
196        writer
197            .write_all(opts.newline.as_bytes())
198            .map_err(|e| FerrayError::io_error(e.to_string()))?;
199    }
200    if let Some(ref footer) = opts.footer {
201        write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
202        writer
203            .write_all(opts.newline.as_bytes())
204            .map_err(|e| FerrayError::io_error(e.to_string()))?;
205    }
206    writer
207        .flush()
208        .map_err(|e| FerrayError::io_error(e.to_string()))?;
209    Ok(())
210}
211
212/// 1-D counterpart of [`loadtxt`] (#494).
213///
214/// Reads a single-column delimited text file (one value per line)
215/// into an `Array<T, Ix1>`. Equivalent to NumPy's
216/// `numpy.loadtxt(path, ndmin=1)` when the file has one column.
217///
218/// # Errors
219/// Same as [`loadtxt`].
220pub fn loadtxt_1d<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix1>>
221where
222    T: Element + FromStr,
223    T::Err: Display,
224    P: AsRef<Path>,
225{
226    let arr2 = loadtxt::<T, _>(path, delimiter, skiprows)?;
227    let shape = arr2.shape();
228    let n = shape[0] * shape[1];
229    let data: Vec<T> = arr2.iter().cloned().collect();
230    Array::<T, Ix1>::from_vec(Ix1::new([n]), data)
231}
232
233/// Save a 2D array as delimited text to a writer.
234pub fn savetxt_to_writer<T: Element + Display, W: Write>(
235    writer: &mut W,
236    array: &Array<T, Ix2>,
237    opts: &SaveTxtOptions,
238) -> FerrayResult<()> {
239    let shape = array.shape();
240    let nrows = shape[0];
241    let ncols = shape[1];
242
243    if let Some(ref header) = opts.header {
244        write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
245        writer
246            .write_all(opts.newline.as_bytes())
247            .map_err(|e| FerrayError::io_error(e.to_string()))?;
248    }
249
250    let slice = array
251        .as_slice()
252        .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
253
254    for row in 0..nrows {
255        for col in 0..ncols {
256            if col > 0 {
257                write!(writer, "{}", opts.delimiter)
258                    .map_err(|e| FerrayError::io_error(e.to_string()))?;
259            }
260            let val = &slice[row * ncols + col];
261            if let Some(ref fmt_str) = opts.fmt {
262                // Format string support:
263                // - NumPy printf-style: "%.6e", "%.18e", "%10.5f", "%d"
264                // - Rust-style: "{:.6}", "{:.6e}", "{:>10.5}"
265                // We convert common printf patterns to Rust format, then
266                // fall back to string substitution.
267                let formatted = format_value(val, fmt_str);
268                write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
269            } else {
270                write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
271            }
272        }
273        writer
274            .write_all(opts.newline.as_bytes())
275            .map_err(|e| FerrayError::io_error(e.to_string()))?;
276    }
277
278    if let Some(ref footer) = opts.footer {
279        write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
280        writer
281            .write_all(opts.newline.as_bytes())
282            .map_err(|e| FerrayError::io_error(e.to_string()))?;
283    }
284
285    writer
286        .flush()
287        .map_err(|e| FerrayError::io_error(e.to_string()))?;
288    Ok(())
289}
290
291/// Load a delimited text file into a 2D array.
292///
293/// Each row of the text file becomes a row in the array. All rows must
294/// have the same number of columns.
295///
296/// # Type Parameters
297/// - `T`: Element type to parse each cell into. Must implement `FromStr`.
298///
299/// # Errors
300/// - Returns `FerrayError::IoError` on file read or parse failures.
301pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
302where
303    T: Element + FromStr,
304    T::Err: Display,
305    P: AsRef<Path>,
306{
307    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
308        FerrayError::io_error(format!(
309            "failed to read file '{}': {e}",
310            path.as_ref().display()
311        ))
312    })?;
313
314    loadtxt_from_str(&content, delimiter, skiprows)
315}
316
317/// Load delimited text from a string into a 2D array.
318pub fn loadtxt_from_str<T>(
319    content: &str,
320    delimiter: char,
321    skiprows: usize,
322) -> FerrayResult<Array<T, Ix2>>
323where
324    T: Element + FromStr,
325    T::Err: Display,
326{
327    let opts = TextParseOptions {
328        delimiter,
329        skiprows,
330        ..Default::default()
331    };
332
333    let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
334
335    if nrows == 0 {
336        return Array::from_vec(Ix2::new([0, 0]), vec![]);
337    }
338
339    let data: FerrayResult<Vec<T>> = cells
340        .iter()
341        .enumerate()
342        .map(|(i, cell)| {
343            cell.parse::<T>().map_err(|e| {
344                let row = i / ncols;
345                let col = i % ncols;
346                FerrayError::io_error(format!(
347                    "failed to parse value '{cell}' at row {row}, col {col}: {e}"
348                ))
349            })
350        })
351        .collect();
352
353    let data = data?;
354    Array::from_vec(Ix2::new([nrows, ncols]), data)
355}
356
357/// Load a delimited text file with missing value handling.
358///
359/// Missing values (empty cells or cells matching common missing indicators)
360/// are replaced with `filling_values`. This is analogous to `NumPy`'s `genfromtxt`.
361///
362/// Returns a 2D `f64` array where missing values are replaced with `filling_value`
363/// (typically `f64::NAN`).
364///
365/// # Errors
366/// Returns `FerrayError::IoError` on file read or parse failures.
367pub fn genfromtxt<P: AsRef<Path>>(
368    path: P,
369    delimiter: char,
370    filling_value: f64,
371    skiprows: usize,
372    missing_values: &[&str],
373) -> FerrayResult<Array<f64, Ix2>> {
374    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
375        FerrayError::io_error(format!(
376            "failed to read file '{}': {e}",
377            path.as_ref().display()
378        ))
379    })?;
380
381    genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
382}
383
384/// Load delimited text from a string with missing value handling.
385pub fn genfromtxt_from_str(
386    content: &str,
387    delimiter: char,
388    filling_value: f64,
389    skiprows: usize,
390    missing_values: &[&str],
391) -> FerrayResult<Array<f64, Ix2>> {
392    let opts = TextParseOptions {
393        delimiter,
394        skiprows,
395        ..Default::default()
396    };
397
398    // Default missing markers
399    let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
400    for mv in missing_values {
401        if !all_missing.contains(mv) {
402            all_missing.push(mv);
403        }
404    }
405
406    let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
407
408    if nrows == 0 {
409        return Array::from_vec(Ix2::new([0, 0]), vec![]);
410    }
411
412    let data: FerrayResult<Vec<f64>> = cells
413        .iter()
414        .enumerate()
415        .map(|(i, cell)| match cell {
416            None => Ok(filling_value),
417            Some(s) => s.parse::<f64>().map_err(|e| {
418                let row = i / ncols;
419                let col = i % ncols;
420                FerrayError::io_error(format!(
421                    "failed to parse value '{s}' at row {row}, col {col}: {e}"
422                ))
423            }),
424        })
425        .collect();
426
427    let data = data?;
428    Array::from_vec(Ix2::new([nrows, ncols]), data)
429}
430
431// ---------------------------------------------------------------------------
432// fromregex
433// ---------------------------------------------------------------------------
434
435/// Read text using a regular expression to extract structured groups.
436///
437/// `regex` must contain at least one capturing group. For every line in
438/// `content`, the regex is matched against the full line; matches where every
439/// capture is parsed successfully via `T::from_str` produce one row of the
440/// output. Lines that do not match (or that contain unparseable captures)
441/// are skipped.
442///
443/// The result is a 2-D `Array<T, Ix2>` of shape `(rows, captures)`.
444///
445/// Analogous to `numpy.fromregex`. NumPy's structured-dtype support is not
446/// modeled here — every capture group must parse to the same `T`; for mixed
447/// dtypes use one call per column or the structured-record API in
448/// `ferray-core::record`.
449///
450/// # Errors
451/// - `FerrayError::InvalidValue` if the regex cannot be compiled or contains
452///   no capture groups.
453pub fn fromregex<T>(content: &str, regex: &str) -> FerrayResult<Array<T, Ix2>>
454where
455    T: Element + FromStr,
456    T::Err: Display,
457{
458    let re = regex::Regex::new(regex)
459        .map_err(|e| FerrayError::invalid_value(format!("fromregex: invalid regex: {e}")))?;
460    let n_groups = re.captures_len().saturating_sub(1);
461    if n_groups == 0 {
462        return Err(FerrayError::invalid_value(
463            "fromregex: regex must contain at least one capture group",
464        ));
465    }
466    let mut data: Vec<T> = Vec::new();
467    let mut nrows = 0usize;
468    'lines: for line in content.lines() {
469        if let Some(caps) = re.captures(line) {
470            // Try to parse every capture group into T. If any fails, skip this row.
471            let start = data.len();
472            for g in 1..=n_groups {
473                let m = caps.get(g).map_or("", |m| m.as_str());
474                match m.parse::<T>() {
475                    Ok(v) => data.push(v),
476                    Err(_) => {
477                        // Roll back this row's pushes, then continue with next line.
478                        data.truncate(start);
479                        continue 'lines;
480                    }
481                }
482            }
483            nrows += 1;
484        }
485    }
486    Array::from_vec(Ix2::new([nrows, n_groups]), data)
487}
488
489/// Read regex-extracted rows from a file, parsing every capture group as `T`.
490///
491/// Convenience wrapper that reads `path` to a string and calls [`fromregex`].
492///
493/// # Errors
494/// - `FerrayError::IoError` if the file cannot be read.
495/// - Errors from [`fromregex`] (regex compile / no groups).
496pub fn fromregex_from_file<T, P>(path: P, regex: &str) -> FerrayResult<Array<T, Ix2>>
497where
498    T: Element + FromStr,
499    T::Err: Display,
500    P: AsRef<Path>,
501{
502    let content = fs::read_to_string(path.as_ref()).map_err(|e| {
503        FerrayError::io_error(format!(
504            "fromregex: failed to read file '{}': {e}",
505            path.as_ref().display()
506        ))
507    })?;
508    fromregex::<T>(&content, regex)
509}
510
511#[cfg(test)]
512#[allow(clippy::float_cmp)] // Roundtrip tests assert exact equality on hand-picked text values.
513mod tests {
514    use super::*;
515
516    // ---- 1-D variants (#494) -------------------------------------------
517
518    #[test]
519    fn savetxt_1d_writes_one_value_per_line() {
520        let arr = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.5, 2.5, 3.0, 4.0]).unwrap();
521        let mut buf: Vec<u8> = Vec::new();
522        let opts = SaveTxtOptions::default();
523        savetxt_1d_to_writer(&mut buf, &arr, &opts).unwrap();
524        let s = String::from_utf8(buf).unwrap();
525        assert_eq!(s, "1.5\n2.5\n3\n4\n");
526    }
527
528    #[test]
529    fn savetxt_1d_then_loadtxt_1d_roundtrip() {
530        let arr =
531            Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, -2.5, 3.5, 0.0, 7.25]).unwrap();
532        let dir = tempfile::tempdir().unwrap();
533        let p = dir.path().join("vec.txt");
534        let opts = SaveTxtOptions::default();
535        savetxt_1d(&p, &arr, &opts).unwrap();
536        let back: Array<f64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
537        assert_eq!(back.shape(), &[5]);
538        assert_eq!(back.as_slice().unwrap(), arr.as_slice().unwrap());
539    }
540
541    #[test]
542    fn loadtxt_1d_flattens_multicolumn_input() {
543        // Two-column file: loadtxt_1d should flatten in row-major order.
544        let dir = tempfile::tempdir().unwrap();
545        let p = dir.path().join("rect.txt");
546        std::fs::write(&p, "1,2\n3,4\n5,6\n").unwrap();
547        let v: Array<i64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
548        assert_eq!(v.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
549    }
550
551    #[test]
552    fn loadtxt_simple_csv() {
553        let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
554        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
555        assert_eq!(arr.shape(), &[2, 3]);
556        assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
557    }
558
559    #[test]
560    fn loadtxt_with_skiprows() {
561        let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
562        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
563        assert_eq!(arr.shape(), &[2, 2]);
564        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
565    }
566
567    #[test]
568    fn loadtxt_tab_delimited() {
569        let content = "1\t2\t3\n4\t5\t6\n";
570        let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
571        assert_eq!(arr.shape(), &[2, 3]);
572        assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
573    }
574
575    #[test]
576    fn loadtxt_integers() {
577        let content = "10,20\n30,40\n";
578        let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
579        assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
580    }
581
582    #[test]
583    fn loadtxt_file_roundtrip() {
584        let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
585        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
586
587        let dir = tempfile::TempDir::new().unwrap();
588        let path = dir.path().join("test.csv");
589
590        savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
591        let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
592
593        assert_eq!(loaded.shape(), &[2, 3]);
594        assert_eq!(loaded.as_slice().unwrap(), &data[..]);
595    }
596
597    #[test]
598    fn savetxt_custom_delimiter() {
599        let data = vec![1.0f64, 2.0, 3.0, 4.0];
600        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
601
602        let mut buf = Vec::new();
603        let opts = SaveTxtOptions {
604            delimiter: '\t',
605            ..Default::default()
606        };
607        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
608        let output = String::from_utf8(buf).unwrap();
609        assert!(output.contains('\t'));
610        assert!(!output.contains(','));
611    }
612
613    #[test]
614    fn savetxt_with_header_footer() {
615        let data = vec![1.0f64, 2.0];
616        let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
617
618        let mut buf = Vec::new();
619        let opts = SaveTxtOptions {
620            header: Some("# my header".to_string()),
621            footer: Some("# end".to_string()),
622            ..Default::default()
623        };
624        savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
625        let output = String::from_utf8(buf).unwrap();
626        assert!(output.starts_with("# my header\n"));
627        assert!(output.ends_with("# end\n"));
628    }
629
630    #[test]
631    fn genfromtxt_missing_nan() {
632        let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
633        let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
634        assert_eq!(arr.shape(), &[3, 3]);
635        let slice = arr.as_slice().unwrap();
636        assert_eq!(slice[0], 1.0);
637        assert!(slice[4].is_nan()); // missing value replaced with NaN
638        assert!(slice[8].is_nan()); // trailing empty
639    }
640
641    #[test]
642    fn genfromtxt_na_marker() {
643        let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
644        let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
645        assert_eq!(arr.shape(), &[2, 3]);
646        let slice = arr.as_slice().unwrap();
647        assert_eq!(slice[1], -999.0);
648        assert_eq!(slice[5], -999.0);
649    }
650
651    #[test]
652    fn genfromtxt_with_skiprows() {
653        let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
654        let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
655        assert_eq!(arr.shape(), &[2, 2]);
656        assert_eq!(arr.as_slice().unwrap()[0], 1.0);
657    }
658
659    #[test]
660    fn genfromtxt_file() {
661        let content = "1.0,2.0\n,4.0\n";
662        let dir = tempfile::TempDir::new().unwrap();
663        let path = dir.path().join("genfromtxt_test.csv");
664        std::fs::write(&path, content).unwrap();
665
666        let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
667        assert_eq!(arr.shape(), &[2, 2]);
668        assert!(arr.as_slice().unwrap()[2].is_nan());
669    }
670
671    #[test]
672    fn loadtxt_empty() {
673        let content = "";
674        let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
675        assert_eq!(arr.shape(), &[0, 0]);
676    }
677
678    // -- fromregex --
679
680    #[test]
681    fn fromregex_basic_one_group() {
682        // Pull integers out of "value=NN" lines, ignore other lines.
683        let s = "value=10\nvalue=20\nirrelevant\nvalue=30\n";
684        let arr: Array<i32, Ix2> = fromregex(s, r"^value=(\d+)$").unwrap();
685        assert_eq!(arr.shape(), &[3, 1]);
686        assert_eq!(arr.as_slice().unwrap(), &[10, 20, 30]);
687    }
688
689    #[test]
690    fn fromregex_multiple_groups() {
691        // Two captures per row → shape (n, 2).
692        let s = "1,2\n3,4\n5,6\n";
693        let arr: Array<f64, Ix2> = fromregex(s, r"^([\d.]+),([\d.]+)$").unwrap();
694        assert_eq!(arr.shape(), &[3, 2]);
695        assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
696    }
697
698    #[test]
699    fn fromregex_no_groups_errs() {
700        let r: FerrayResult<Array<i32, Ix2>> = fromregex("a\nb\n", r"^[ab]$");
701        assert!(r.is_err());
702    }
703
704    #[test]
705    fn fromregex_invalid_regex_errs() {
706        let r: FerrayResult<Array<i32, Ix2>> = fromregex("", r"(unclosed");
707        assert!(r.is_err());
708    }
709
710    #[test]
711    fn fromregex_skips_unparseable_rows() {
712        // Second row has a non-numeric capture; it should be skipped silently.
713        let s = "v=10\nv=foo\nv=20\n";
714        let arr: Array<i32, Ix2> = fromregex(s, r"^v=(\S+)$").unwrap();
715        assert_eq!(arr.shape(), &[2, 1]);
716        assert_eq!(arr.as_slice().unwrap(), &[10, 20]);
717    }
718
719    #[test]
720    fn fromregex_from_file_roundtrip() {
721        let dir = tempfile::TempDir::new().unwrap();
722        let path = dir.path().join("regex_test.txt");
723        std::fs::write(&path, "x=1\nx=2\nx=3\n").unwrap();
724        let arr: Array<i32, Ix2> = fromregex_from_file(&path, r"^x=(\d+)$").unwrap();
725        assert_eq!(arr.shape(), &[3, 1]);
726        assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3]);
727    }
728}