c0sv 0.2.0

Binary CSV, using C0 ASCII control codes
Documentation
#![no_std]

extern crate alloc;

#[cfg(feature="std")]
extern crate std;

#[cfg(feature = "std")]
use std::io::Read;

#[cfg(feature = "std")]
use std::collections::HashMap;

use nom::Finish;
use alloc::vec::Vec;

mod parser;
pub use parser::Unit;
pub use parser::Units;
pub use parser::Heading;
pub use parser::Document;

pub enum Error {
    /// The data is incomplete, need more.
    Incomplete,

    /// A character was encountered at the wrong time.
    Invalid,
    Parse,
    #[cfg(feature = "std")]
    IO(std::io::Error),
}

impl From<nom::error::Error<&[u8]>> for Error {
    fn from(_error: nom::error::Error<&[u8]>) -> Self {
        Error::Parse
    }
}

#[cfg(feature = "std")]
impl From<std::io::Error> for Error {
    fn from(error: std::io::Error) -> Self {
        Error::IO(error)
    }
}

/// Try to read a whole document contained in the byte array.
/// The document may have any amount of trailing data, which will be ignored.
pub fn read_document(source: &[u8]) -> Result<Document<'_>, Error> {
    match parser::parse_document(source) {
        Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete),
        value => Ok(value.finish()?.1),
    }
}

#[cfg(feature = "std")]
pub enum DocumentIteratorRow {
    WithHeading(HashMap<Vec<u8>, Vec<u8>>),
    WithoutHeading(Vec<Vec<u8>>),
}

/// Iterator for a whole document.
/// It's strongly recommended to use a BufReader or some other buffered reader, because this will
/// very often only iterate a single byte at a time to avoid overreading.
#[cfg(feature = "std")]
pub struct DocumentIterator<R> where R: Read {
    reader: R,
    started: bool,
    done: bool,
    heading: Option<Vec<Vec<u8>>>,
    buffer: Vec<u8>,
}

#[cfg(feature = "std")]
impl<R> DocumentIterator<R> where R: Read {
    pub fn new(reader: R) -> Self {
        DocumentIterator {
            reader,
            started: false,
            done: false,
            heading: None,
            buffer: Vec::new(),
        }
    }
}

#[cfg(feature = "std")]
impl<R> Iterator for DocumentIterator<R> where R: Read {
    type Item = Result<DocumentIteratorRow, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.done {
            return None;
        }

        if self.started {
            match self.buffer.get(0) {
                Some(&parser::RECORD_SEPARATOR) => (),
                Some(&parser::END_OF_TEXT) => {
                    self.done = true;
                    return None;
                }
                _ => {
                    self.done = true;
                    return Some(Err(Error::Invalid))
                }
            }
        } else {
            self.started = true;
            let mut complete = false;
            while !complete {
                let heading = parser::parse_heading(&self.buffer);
                match heading {
                    Err(nom::Err::Incomplete(needed)) => {
                        let needed = match needed {
                            nom::Needed::Unknown => 1,
                            nom::Needed::Size(size) => size.get(),
                        };
                        let mut extend = Vec::new();
                        extend.resize(needed, 0);
                        if let Err(e) = self.reader.read_exact(&mut extend) {
                            self.done = true;
                            return Some(Err(e.into()));
                        }
                        self.buffer.extend_from_slice(&extend);
                    },
                    // We just take an error as this not being a header.
                    Err(_) => complete = true,
                    Ok((left, heading)) => {
                        complete = true;
                        let heading: Vec<_> = heading.0.units.into_iter().map(|unit| unit.0.into_owned()).collect();
                        self.heading = Some(heading);
                        self.buffer = Vec::from(left);
                    }
                }
            }
        }

        loop {
            // The record will always start with either a start-of-text or record separator
            let record = parser::parse_units(&self.buffer[1..]);
            match record {
                Err(nom::Err::Incomplete(needed)) => {
                    let needed = match needed {
                        nom::Needed::Unknown => 1,
                        nom::Needed::Size(size) => size.get(),
                    };
                    let mut extend = Vec::new();
                    extend.resize(needed, 0);
                    if let Err(e) = self.reader.read_exact(&mut extend) {
                        self.done = true;
                        return Some(Err(e.into()));
                    }
                    self.buffer.extend_from_slice(&extend);
                },
                e => {
                    let (left, record) = match e.finish() {
                        Ok(ok) => ok,
                        Err(e) => {
                            self.done = true;
                            return Some(Err(e.into()));
                        }
                    };
                    match record.end_type {
                        parser::EndType::EndOfHeader => {
                            self.done = true;
                            return Some(Err(Error::Invalid));
                        }
                        parser::EndType::EndOfDocument => {
                            self.done = true;
                        }
                        parser::EndType::EndOfRecord => (),
                    }
                    let record: Vec<_> = record.units.into_iter().map(|unit| unit.0.into_owned()).collect();
                    self.buffer = Vec::from(left);
                    if let Some(ref heading) = self.heading {
                        let map: HashMap<_, _> = heading.iter().cloned().zip(record.into_iter()).collect();
                        return Some(Ok(DocumentIteratorRow::WithHeading(map)));
                    } else {
                        return Some(Ok(DocumentIteratorRow::WithoutHeading(record)));
                    }
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[cfg(feature = "std")]
    #[test]
    fn document_iterator_with_header() {
        let iterator = DocumentIterator::new(&b"\x01alpha\x1Fbeta\x1Fgamma\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F\x1F\x03"[..]);
        let rows: Vec<_> = iterator.collect();
        if let Ok(DocumentIteratorRow::WithHeading(record)) = &rows[0] {
            assert_eq!(record.get(&b"alpha"[..]).unwrap(), b"test_unit");
            assert_eq!(record.get(&b"beta"[..]).unwrap(), b"second_test_unit");
            assert_eq!(record.get(&b"gamma"[..]).unwrap(), b"owned\x1Btext");
        } else {
            panic!();
        }
        if let Ok(DocumentIteratorRow::WithHeading(record)) = &rows[1] {
            assert_eq!(record.get(&b"alpha"[..]).unwrap(), b"record_2_unit");
            assert_eq!(record.get(&b"beta"[..]).unwrap(), b"");
            assert_eq!(record.get(&b"gamma"[..]).unwrap(), b"");
        } else {
            panic!();
        }
    }

    #[cfg(feature = "std")]
    #[test]
    fn document_iterator_without_header() {
        let iterator = DocumentIterator::new(&b"\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F\x1F\x03"[..]);
        let rows: Vec<_> = iterator.collect();
        if let Ok(DocumentIteratorRow::WithoutHeading(record)) = &rows[0] {
            assert_eq!(record[0], b"test_unit");
            assert_eq!(record[1], b"second_test_unit");
            assert_eq!(record[2], b"owned\x1Btext");
        } else {
            panic!();
        }
        if let Ok(DocumentIteratorRow::WithoutHeading(record)) = &rows[1] {
            assert_eq!(record[0], b"record_2_unit");
            assert_eq!(record[1], b"");
            assert_eq!(record[2], b"");
        } else {
            panic!();
        }
    }
}