use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use minarrow::{SuperTable, Table};
use crate::models::encoders::csv::{CsvEncodeOptions, encode_supertable_csv, encode_table_csv};
pub struct CsvWriter<W: Write> {
writer: W,
options: CsvEncodeOptions,
}
impl CsvWriter<Vec<u8>> {
pub fn new_vec() -> Self {
Self::with_options(Vec::new(), CsvEncodeOptions::default())
}
pub fn into_inner(self) -> Vec<u8> {
self.writer
}
}
impl<W: Write> CsvWriter<W> {
pub fn new(writer: W) -> Self {
Self::with_options(writer, CsvEncodeOptions::default())
}
pub fn with_options(writer: W, options: CsvEncodeOptions) -> Self {
CsvWriter { writer, options }
}
pub fn write_table(&mut self, table: &Table) -> io::Result<()> {
encode_table_csv(table, &mut self.writer, &self.options)
}
pub fn write_supertable(&mut self, st: &SuperTable) -> io::Result<()> {
encode_supertable_csv(st, &mut self.writer, &self.options)
}
pub fn flush(&mut self) -> io::Result<()> {
self.writer.flush()
}
}
impl CsvWriter<File> {
pub fn to_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let file = File::create(path)?;
Ok(Self::new(file))
}
pub fn to_path_with_options<P: AsRef<Path>>(
path: P,
options: CsvEncodeOptions,
) -> io::Result<Self> {
let file = File::create(path)?;
Ok(Self::with_options(file, options))
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::models::encoders::csv::CsvEncodeOptions;
use minarrow::{
Array, Bitmask, Buffer, Field, FieldArray, NumericArray, Table, TextArray, vec64,
};
fn make_test_table() -> Table {
let int_col = FieldArray {
field: Field {
name: "ints".to_string(),
dtype: minarrow::ArrowType::Int32,
nullable: false,
metadata: Default::default(),
}
.into(),
array: Array::NumericArray(NumericArray::Int32(
minarrow::IntegerArray {
data: Buffer::from(vec64![1, 2, 3]),
null_mask: None,
}
.into(),
)),
null_count: 0,
};
let str_col = FieldArray {
field: Field {
name: "strs".to_string(),
dtype: minarrow::ArrowType::String,
nullable: true,
metadata: Default::default(),
}
.into(),
array: Array::TextArray(TextArray::String32(
minarrow::StringArray {
offsets: Buffer::from(vec64![0u32, 3, 3, 7]),
data: Buffer::from_vec64(b"foo\0barbaz".to_vec().into()),
null_mask: Some(Bitmask::from_bools(&[true, false, true])),
}
.into(),
)),
null_count: 1,
};
Table {
name: "test".to_string(),
cols: vec![int_col, str_col],
n_rows: 3,
}
}
#[test]
fn test_csv_writer_table_default() {
let table = make_test_table();
let mut w = CsvWriter::new_vec();
w.write_table(&table).unwrap();
w.flush().unwrap();
let csv = String::from_utf8(w.into_inner()).unwrap();
let lines: Vec<_> = csv.lines().collect();
assert_eq!(lines.len(), 4);
assert_eq!(lines[0], "ints,strs");
assert_eq!(lines[1], "1,foo");
assert_eq!(lines[2], "2,");
assert_eq!(lines[3], "3,barbaz");
}
#[test]
fn test_csv_writer_table_custom_options() {
let table = make_test_table();
let mut opts = CsvEncodeOptions::default();
opts.delimiter = b';';
opts.null_repr = "NULL";
let mut w = CsvWriter::with_options(Vec::new(), opts.clone());
w.write_table(&table).unwrap();
let out = w.into_inner();
let csv = String::from_utf8(out).unwrap();
assert!(csv.starts_with("ints;strs\n"));
assert!(csv.contains("2;NULL"));
}
#[test]
fn test_csv_writer_supertable() {
let t1 = make_test_table();
let t2 = make_test_table();
let supertbl =
SuperTable::from_batches(vec![Arc::new(t1.clone()), Arc::new(t2.clone())], None);
let mut writer = CsvWriter::new_vec();
writer.write_supertable(&supertbl).unwrap();
let csv = String::from_utf8(writer.into_inner()).unwrap();
let lines: Vec<_> = csv.lines().collect();
assert_eq!(lines.len(), 1 + 3 + 3);
assert_eq!(lines[0], "ints,strs");
assert_eq!(lines[4], "1,foo"); }
#[test]
fn test_csv_writer_to_path() {
let table = make_test_table();
let tmp = tempfile::NamedTempFile::new().unwrap();
{
let mut writer = CsvWriter::to_path(tmp.path()).unwrap();
writer.write_table(&table).unwrap();
writer.flush().unwrap();
}
let contents = std::fs::read_to_string(tmp.path()).unwrap();
assert!(contents.contains("ints,strs"));
}
}