#![allow(dead_code)]
use std::fs::File;
use std::path::Path;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Row {
pub values: Vec<Option<String>>,
}
impl Row {
pub fn get(&self, idx: usize) -> Option<&str> {
self.values.get(idx).and_then(|v| v.as_deref())
}
pub fn len(&self) -> usize {
self.values.len()
}
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
}
pub struct CsvStream {
headers: Vec<String>,
reader: csv::Reader<File>,
n_cols: usize,
}
impl CsvStream {
pub fn headers(&self) -> &[String] {
&self.headers
}
pub fn col_index(&self, name: &str) -> Option<usize> {
self.headers.iter().position(|h| h == name)
}
pub fn n_columns(&self) -> usize {
self.n_cols
}
}
impl Iterator for CsvStream {
type Item = Result<Row, String>;
fn next(&mut self) -> Option<Self::Item> {
let rec = self.reader.records().next()?;
let rec = match rec {
Ok(r) => r,
Err(e) => return Some(Err(format!("CSV row: {e}"))),
};
let mut values: Vec<Option<String>> = Vec::with_capacity(self.n_cols);
for i in 0..self.n_cols {
match rec.get(i) {
Some(s) if !s.is_empty() => values.push(Some(s.to_string())),
_ => values.push(None),
}
}
Some(Ok(Row { values }))
}
}
pub fn open_csv_stream(path: &Path) -> Result<CsvStream, String> {
let mut reader = csv::ReaderBuilder::new()
.has_headers(true)
.flexible(true)
.from_path(path)
.map_err(|e| format!("CSV open {}: {e}", path.display()))?;
let headers: Vec<String> = reader
.headers()
.map_err(|e| format!("CSV header {}: {e}", path.display()))?
.iter()
.map(|s| s.to_string())
.collect();
let n_cols = headers.len();
Ok(CsvStream {
headers,
reader,
n_cols,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn write_csv(content: &str) -> tempfile::NamedTempFile {
let mut f = tempfile::NamedTempFile::new().unwrap();
f.write_all(content.as_bytes()).unwrap();
f
}
#[test]
fn reads_header_and_rows() {
let f = write_csv("a,b,c\n1,2,3\n4,5,6\n");
let mut s = open_csv_stream(f.path()).unwrap();
assert_eq!(s.headers(), &["a", "b", "c"]);
assert_eq!(s.n_columns(), 3);
assert_eq!(s.col_index("b"), Some(1));
assert_eq!(s.col_index("missing"), None);
let r1 = s.next().unwrap().unwrap();
assert_eq!(
r1.values,
vec![Some("1".into()), Some("2".into()), Some("3".into())]
);
assert_eq!(r1.get(0), Some("1"));
assert_eq!(r1.get(2), Some("3"));
let r2 = s.next().unwrap().unwrap();
assert_eq!(
r2.values,
vec![Some("4".into()), Some("5".into()), Some("6".into())]
);
assert!(s.next().is_none());
}
#[test]
fn empty_cells_become_none() {
let f = write_csv("a,b,c\n1,,3\n,,\n");
let mut s = open_csv_stream(f.path()).unwrap();
let r1 = s.next().unwrap().unwrap();
assert_eq!(r1.values, vec![Some("1".into()), None, Some("3".into())]);
let r2 = s.next().unwrap().unwrap();
assert_eq!(r2.values, vec![None, None, None]);
}
#[test]
fn header_only_yields_zero_rows() {
let f = write_csv("only,header\n");
let mut s = open_csv_stream(f.path()).unwrap();
assert_eq!(s.headers(), &["only", "header"]);
assert!(s.next().is_none());
}
#[test]
fn missing_file_yields_error() {
let r = open_csv_stream(Path::new("/nonexistent/file.csv"));
assert!(r.is_err());
}
#[test]
fn row_get_out_of_bounds_returns_none() {
let f = write_csv("a,b\nx,y\n");
let mut s = open_csv_stream(f.path()).unwrap();
let r = s.next().unwrap().unwrap();
assert_eq!(r.get(0), Some("x"));
assert_eq!(r.get(1), Some("y"));
assert_eq!(r.get(5), None);
assert_eq!(r.len(), 2);
assert!(!r.is_empty());
}
#[test]
fn streams_independent_of_file_size_one_row_in_ram() {
let mut content = String::from("c0,c1,c2,c3,c4\n");
for i in 0..10_000 {
content.push_str(&format!("{i},{i},{i},{i},{i}\n"));
}
let f = write_csv(&content);
let s = open_csv_stream(f.path()).unwrap();
let count = s.filter_map(Result::ok).count();
assert_eq!(count, 10_000);
}
}