use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::slice;
use nom::bytes::streaming::{tag, take_while, take};
use nom::combinator::{peek, opt};
use nom::branch::alt;
use nom::IResult;
pub const START_OF_HEADING: u8 = 0x01;
pub const START_OF_TEXT: u8 = 0x02;
pub const END_OF_TEXT: u8 = 0x03;
pub const ESCAPE: u8 = 0x1B;
pub const RECORD_SEPARATOR: u8 = 0x1E;
pub const UNIT_SEPARATOR: u8 = 0x1F;
#[derive(Debug)]
pub enum EndType {
EndOfHeader,
EndOfRecord,
EndOfDocument,
}
#[derive(Debug)]
pub struct Unit<'a>(pub Cow<'a, [u8]>);
#[derive(Debug)]
pub struct Units<'a>{
pub end_type: EndType,
pub units: Vec<Unit<'a>>,
}
#[derive(Debug)]
pub struct Heading<'a>(pub Units<'a>);
#[derive(Debug)]
pub struct Document<'a>{
pub heading: Option<Heading<'a>>,
pub records: Vec<Units<'a>>,
}
pub fn is_control(input: u8) -> bool {
matches!(
input,
START_OF_HEADING | START_OF_TEXT | END_OF_TEXT | ESCAPE | RECORD_SEPARATOR | UNIT_SEPARATOR
)
}
pub fn parse_unit(input: &[u8]) -> IResult<&[u8], Unit<'_>> {
let (input, unit) = take_while(|byte| !is_control(byte))(input)?;
let (input, mut control) = peek(take(1u8))(input)?;
if control[0] != ESCAPE {
return Ok((input, Unit(unit.into())));
}
let mut unit: Vec<u8> = Vec::from(unit);
let mut input = input;
while control[0] == ESCAPE {
input = &input[1..];
let ret = take(1u8)(input)?;
input = ret.0;
unit.push(ret.1[0]);
let ret = take_while(|byte| !is_control(byte))(input)?;
input = ret.0;
unit.extend(ret.1);
control = peek(take(1u8))(input)?.1;
}
return Ok((input, Unit(unit.into())));
}
pub fn parse_units(mut input: &[u8]) -> IResult<&[u8], Units<'_>> {
let mut output = Vec::new();
loop {
let (inner_input, unit) = parse_unit(input)?;
output.push(unit);
match inner_input[0] {
RECORD_SEPARATOR => return Ok((inner_input, Units {
units: output,
end_type: EndType::EndOfRecord,
})),
START_OF_TEXT => return Ok((inner_input, Units {
units: output,
end_type: EndType::EndOfHeader,
})),
END_OF_TEXT => return Ok((inner_input, Units {
units: output,
end_type: EndType::EndOfDocument,
})),
_ => (),
}
input = &inner_input[1..];
}
}
pub fn parse_heading(input: &[u8]) -> IResult<&[u8], Heading<'_>> {
let (input, _) = tag(slice::from_ref(&START_OF_HEADING))(input)?;
let (input, units) = parse_units(input)?;
return Ok((input, Heading(units)));
}
pub fn parse_records(input: &[u8]) -> IResult<&[u8], Vec<Units<'_>>> {
let (mut input, _) = tag(slice::from_ref(&START_OF_TEXT))(input)?;
let mut records = Vec::new();
loop {
let (inner_input, units) = parse_units(input)?;
records.push(units);
let (inner_input, control) = alt((
tag(slice::from_ref(&RECORD_SEPARATOR)),
tag(slice::from_ref(&END_OF_TEXT)),
))(inner_input)?;
if control[0] == END_OF_TEXT {
return Ok((inner_input, records));
}
input = inner_input;
}
}
pub fn parse_document(input: &[u8]) -> IResult<&[u8], Document<'_>> {
let (input, heading) = opt(parse_heading)(input)?;
let (input, records) = parse_records(input)?;
return Ok((input, Document {
heading,
records,
}));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unit() {
let (input, parsed) = parse_unit(b"test_unit\x1F").unwrap();
assert_eq!(input, b"\x1F");
assert_eq!(parsed.0, &b"test_unit"[..]);
assert!(matches!(parsed, Unit(Cow::Borrowed(_))), "borrowed when no escapes");
let (input, parsed) = parse_unit(b"test_unit\x1B\x1Frest_of_unit\x1F").unwrap();
assert_eq!(input, b"\x1F");
assert_eq!(parsed.0, &b"test_unit\x1Frest_of_unit"[..]);
assert!(matches!(parsed, Unit(Cow::Owned(_))), "owned when escapes");
}
#[test]
fn units() {
let (input, parsed) = parse_units(b"test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x02").unwrap();
assert_eq!(input, b"\x02");
assert_eq!(parsed.units.len(), 3);
assert_eq!(parsed.units[0].0, &b"test_unit"[..]);
assert!(matches!(parsed.units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.units[1].0, &b"second_test_unit"[..]);
assert!(matches!(parsed.units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.units[2].0, &b"owned\x1Btext"[..]);
assert!(matches!(parsed.units[2].0, Cow::Owned(_)));
}
#[test]
fn heading() {
let (input, parsed) = parse_heading(b"\x01test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x02").unwrap();
assert_eq!(input, b"\x02");
assert_eq!(parsed.0.units.len(), 3);
assert_eq!(parsed.0.units[0].0, &b"test_unit"[..]);
assert!(matches!(parsed.0.units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.0.units[1].0, &b"second_test_unit"[..]);
assert!(matches!(parsed.0.units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.0.units[2].0, &b"owned\x1Btext"[..]);
assert!(matches!(parsed.0.units[2].0, Cow::Owned(_)));
}
#[test]
fn records() {
let (input, parsed) = parse_records(b"\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F2_second_test_unit\x1Fowned\x1B\x1Btext_2\x03").unwrap();
assert_eq!(input, b"");
assert_eq!(parsed.len(), 2);
assert_eq!(parsed[0].units.len(), 3);
assert_eq!(parsed[0].units[0].0, &b"test_unit"[..]);
assert!(matches!(parsed[0].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed[0].units[1].0, &b"second_test_unit"[..]);
assert!(matches!(parsed[0].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed[0].units[2].0, &b"owned\x1Btext"[..]);
assert!(matches!(parsed[0].units[2].0, Cow::Owned(_)));
assert_eq!(parsed[1].units.len(), 3);
assert_eq!(parsed[1].units[0].0, &b"record_2_unit"[..]);
assert!(matches!(parsed[1].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed[1].units[1].0, &b"2_second_test_unit"[..]);
assert!(matches!(parsed[1].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed[1].units[2].0, &b"owned\x1Btext_2"[..]);
assert!(matches!(parsed[1].units[2].0, Cow::Owned(_)));
}
#[test]
fn document_headingless() {
let (input, parsed) = parse_document(b"\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F2_second_test_unit\x1Fowned\x1B\x1Btext_2\x03").unwrap();
assert_eq!(input, b"");
assert!(matches!(parsed.heading, None));
assert_eq!(parsed.records.len(), 2);
assert_eq!(parsed.records[0].units.len(), 3);
assert_eq!(parsed.records[0].units[0].0, &b"test_unit"[..]);
assert!(matches!(parsed.records[0].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[0].units[1].0, &b"second_test_unit"[..]);
assert!(matches!(parsed.records[0].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[0].units[2].0, &b"owned\x1Btext"[..]);
assert!(matches!(parsed.records[0].units[2].0, Cow::Owned(_)));
assert_eq!(parsed.records[1].units.len(), 3);
assert_eq!(parsed.records[1].units[0].0, &b"record_2_unit"[..]);
assert!(matches!(parsed.records[1].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[1].units[1].0, &b"2_second_test_unit"[..]);
assert!(matches!(parsed.records[1].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[1].units[2].0, &b"owned\x1Btext_2"[..]);
assert!(matches!(parsed.records[1].units[2].0, Cow::Owned(_)));
}
#[test]
fn document() {
let (input, parsed) = parse_document(b"\x01alpha\x1Fbeta\x1Fgamma\x02test_unit\x1Fsecond_test_unit\x1Fowned\x1B\x1Btext\x1Erecord_2_unit\x1F\x1F\x03").unwrap();
assert_eq!(input, b"");
let heading = parsed.heading.unwrap();
assert!(matches!(heading.0.units[0], Unit(Cow::Borrowed(b"alpha"))));
assert!(matches!(heading.0.units[1], Unit(Cow::Borrowed(b"beta"))));
assert!(matches!(heading.0.units[2], Unit(Cow::Borrowed(b"gamma"))));
assert_eq!(parsed.records.len(), 2);
assert_eq!(parsed.records[0].units.len(), 3);
assert_eq!(parsed.records[0].units[0].0, &b"test_unit"[..]);
assert!(matches!(parsed.records[0].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[0].units[1].0, &b"second_test_unit"[..]);
assert!(matches!(parsed.records[0].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[0].units[2].0, &b"owned\x1Btext"[..]);
assert!(matches!(parsed.records[0].units[2].0, Cow::Owned(_)));
assert_eq!(parsed.records[1].units.len(), 3);
assert_eq!(parsed.records[1].units[0].0, &b"record_2_unit"[..]);
assert!(matches!(parsed.records[1].units[0].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[1].units[1].0, &b""[..]);
assert!(matches!(parsed.records[1].units[1].0, Cow::Borrowed(_)));
assert_eq!(parsed.records[1].units[2].0, &b""[..]);
assert!(matches!(parsed.records[1].units[2].0, Cow::Borrowed(_)));
}
}