#![no_std]
extern crate alloc;
#[cfg(feature="std")]
extern crate std;
#[cfg(feature = "std")]
use std::io::Read;
#[cfg(feature = "std")]
use std::collections::HashMap;
use nom::Finish;
use alloc::vec::Vec;
mod parser;
pub use parser::Unit;
pub use parser::Units;
pub use parser::Heading;
pub use parser::Document;
pub enum Error {
Incomplete,
Invalid,
Parse,
#[cfg(feature = "std")]
IO(std::io::Error),
}
impl From<nom::error::Error<&[u8]>> for Error {
fn from(_error: nom::error::Error<&[u8]>) -> Self {
Error::Parse
}
}
#[cfg(feature = "std")]
impl From<std::io::Error> for Error {
fn from(error: std::io::Error) -> Self {
Error::IO(error)
}
}
pub fn read_document(source: &[u8]) -> Result<Document<'_>, Error> {
match parser::parse_document(source) {
Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete),
value => Ok(value.finish()?.1),
}
}
#[cfg(feature = "std")]
pub enum DocumentIteratorRow {
WithHeading(HashMap<Vec<u8>, Vec<u8>>),
WithoutHeading(Vec<Vec<u8>>),
}
#[cfg(feature = "std")]
pub struct DocumentIterator<R> where R: Read {
reader: R,
started: bool,
done: bool,
heading: Option<Vec<Vec<u8>>>,
buffer: Vec<u8>,
}
#[cfg(feature = "std")]
impl<R> DocumentIterator<R> where R: Read {
pub fn new(reader: R) -> Self {
DocumentIterator {
reader,
started: false,
done: false,
heading: None,
buffer: Vec::new(),
}
}
}
#[cfg(feature = "std")]
impl<R> Iterator for DocumentIterator<R> where R: Read {
type Item = Result<DocumentIteratorRow, Error>;
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
if self.started {
match self.buffer.get(0) {
Some(&parser::RECORD_SEPARATOR) => (),
Some(&parser::END_OF_TEXT) => {
self.done = true;
return None;
}
_ => {
self.done = true;
return Some(Err(Error::Invalid))
}
}
} else {
self.started = true;
let mut complete = false;
while !complete {
let heading = parser::parse_heading(&self.buffer);
match heading {
Err(nom::Err::Incomplete(needed)) => {
let needed = match needed {
nom::Needed::Unknown => 1,
nom::Needed::Size(size) => size.get(),
};
let mut extend = Vec::new();
extend.resize(needed, 0);
if let Err(e) = self.reader.read_exact(&mut extend) {
self.done = true;
return Some(Err(e.into()));
}
self.buffer.extend_from_slice(&extend);
},
Err(_) => complete = true,
Ok((left, heading)) => {
complete = true;
let heading: Vec<_> = heading.0.units.into_iter().map(|unit| unit.0.into_owned()).collect();
self.heading = Some(heading);
self.buffer = Vec::from(left);
}
}
}
}
loop {
let record = parser::parse_units(&self.buffer[1..]);
match record {
Err(nom::Err::Incomplete(needed)) => {
let needed = match needed {
nom::Needed::Unknown => 1,
nom::Needed::Size(size) => size.get(),
};
let mut extend = Vec::new();
extend.resize(needed, 0);
if let Err(e) = self.reader.read_exact(&mut extend) {
self.done = true;
return Some(Err(e.into()));
}
self.buffer.extend_from_slice(&extend);
},
e => {
let (left, record) = match e.finish() {
Ok(ok) => ok,
Err(e) => {
self.done = true;
return Some(Err(e.into()));
}
};
match record.end_type {
parser::EndType::EndOfHeader => {
self.done = true;
return Some(Err(Error::Invalid));
}
parser::EndType::EndOfDocument => {
self.done = true;
}
parser::EndType::EndOfRecord => (),
}
let record: Vec<_> = record.units.into_iter().map(|unit| unit.0.into_owned()).collect();
self.buffer = Vec::from(left);
if let Some(ref heading) = self.heading {
let map: HashMap<_, _> = heading.iter().cloned().zip(record.into_iter()).collect();
return Some(Ok(DocumentIteratorRow::WithHeading(map)));
} else {
return Some(Ok(DocumentIteratorRow::WithoutHeading(record)));
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "std")]
#[test]
fn document_iterator_with_header() {
let iterator = DocumentIterator::new(&b"\x01alpha\x1Fbeta\x1Fgamma\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F\x1F\x03"[..]);
let rows: Vec<_> = iterator.collect();
if let Ok(DocumentIteratorRow::WithHeading(record)) = &rows[0] {
assert_eq!(record.get(&b"alpha"[..]).unwrap(), b"test_unit");
assert_eq!(record.get(&b"beta"[..]).unwrap(), b"second_test_unit");
assert_eq!(record.get(&b"gamma"[..]).unwrap(), b"owned\x1Btext");
} else {
panic!();
}
if let Ok(DocumentIteratorRow::WithHeading(record)) = &rows[1] {
assert_eq!(record.get(&b"alpha"[..]).unwrap(), b"record_2_unit");
assert_eq!(record.get(&b"beta"[..]).unwrap(), b"");
assert_eq!(record.get(&b"gamma"[..]).unwrap(), b"");
} else {
panic!();
}
}
#[cfg(feature = "std")]
#[test]
fn document_iterator_without_header() {
let iterator = DocumentIterator::new(&b"\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F\x1F\x03"[..]);
let rows: Vec<_> = iterator.collect();
if let Ok(DocumentIteratorRow::WithoutHeading(record)) = &rows[0] {
assert_eq!(record[0], b"test_unit");
assert_eq!(record[1], b"second_test_unit");
assert_eq!(record[2], b"owned\x1Btext");
} else {
panic!();
}
if let Ok(DocumentIteratorRow::WithoutHeading(record)) = &rows[1] {
assert_eq!(record[0], b"record_2_unit");
assert_eq!(record[1], b"");
assert_eq!(record[2], b"");
} else {
panic!();
}
}
}