use crate::edb::io::{Reader, Writer};
use crate::edb::{AttributeKind, Constant, Fact};
use crate::error::{fact_does_not_correspond_to_schema, Error, Result};
use crate::syntax::{ANONYMOUS_COLUMN_NAME, COMMA_SEPARATOR};
use crate::{Collection, Labeled, Relation};
use csv::{ReaderBuilder, Trim, WriterBuilder};
use std::fs::File;
use std::io::{BufReader, Write};
use std::path::Path;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Options {
pub delimiter: u8,
pub has_headers: bool,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub struct DelimitedLines {}
pub const PRAGMA_ID: &str = "csv";
impl Default for Options {
fn default() -> Self {
Self {
delimiter: b',',
has_headers: false,
}
}
}
impl Options {
pub fn comma_separated() -> Self {
Self {
delimiter: b',',
has_headers: Default::default(),
}
}
pub fn tab_separated() -> Self {
Self {
delimiter: b'\t',
has_headers: Default::default(),
}
}
pub fn with_delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = delimiter;
self
}
pub fn has_headers(mut self) -> Self {
self.has_headers = true;
self
}
pub fn no_headers(mut self) -> Self {
self.has_headers = false;
self
}
}
impl Reader for DelimitedLines {
type Options = Options;
fn read_from_with_options(
&self,
file_name: &Path,
as_relation: &Relation,
options: &Self::Options,
) -> Result<Relation> {
let file = File::open(file_name)?;
let reader = BufReader::new(file);
let mut reader = ReaderBuilder::new()
.delimiter(options.delimiter)
.has_headers(options.has_headers)
.comment(b'#'.into())
.double_quote(true)
.quoting(true)
.trim(Trim::All)
.from_reader(reader);
let mut new_relation = as_relation.clone_with_schema_only();
let attribute_types: Vec<AttributeKind> = as_relation
.schema()
.iter()
.map(|a| a.kind().unwrap_or(AttributeKind::String))
.collect();
let arity = attribute_types.len();
for result in reader.records() {
let record = result.map_err(|e| Error::Serialization(Box::new(e)))?;
if record.len() == arity {
let values: Vec<Constant> = record
.iter()
.enumerate()
.map(|(i, s)| match attribute_types.get(i) {
Some(AttributeKind::String) => Constant::String(s.to_string()),
Some(AttributeKind::Integer) => Constant::String(s.to_string()),
Some(AttributeKind::Boolean) => Constant::String(s.to_string()),
_ => unreachable!(),
})
.collect();
new_relation.add(Fact::new(new_relation.label_ref(), values)?)?;
} else {
return Err(fact_does_not_correspond_to_schema(
new_relation.label_ref(),
record
.iter()
.map(str::to_string)
.collect::<Vec<String>>()
.join(COMMA_SEPARATOR),
));
}
}
Ok(new_relation)
}
}
impl Writer for DelimitedLines {
type Options = Options;
fn write_to_with_options(
&self,
file_name: &Path,
from_relation: &Relation,
options: &Self::Options,
) -> Result<()> {
let file = File::create(file_name)?;
let mut writer = WriterBuilder::new()
.delimiter(options.delimiter)
.has_headers(options.has_headers)
.double_quote(true)
.from_writer(file);
write(&mut writer, from_relation, options.has_headers)
}
fn print_with_options(&self, relation: &Relation, options: &Self::Options) -> Result<()> {
let mut writer = WriterBuilder::new()
.delimiter(options.delimiter)
.has_headers(options.has_headers)
.from_writer(std::io::stdout());
write(&mut writer, relation, options.has_headers)
}
}
fn write<W: Write>(w: &mut csv::Writer<W>, relation: &Relation, has_headers: bool) -> Result<()> {
if has_headers {
let headers: Vec<String> = relation
.schema()
.iter()
.map(|a| {
a.label()
.map(|p| p.to_string())
.unwrap_or_else(|| ANONYMOUS_COLUMN_NAME.to_string())
})
.collect();
w.write_record(&headers)
.map_err(|e| Error::Serialization(Box::new(e)))?;
}
for fact in relation.iter() {
let record: Vec<String> = fact.iter().map(|c| c.to_string()).collect();
w.write_record(&record)
.map_err(|e| Error::Serialization(Box::new(e)))?;
}
Ok(())
}