use std::fmt;
use std::num::NonZeroU8;
use nom::{
Compare, CompareResult, ExtendInto, IResult, Input, Needed, Offset, Parser,
error::{Error as NomError, ErrorKind},
};
use nom_locate::LocatedSpan;
use serde::{Serialize, Serializer, ser::SerializeSeq};
use smallvec::SmallVec;
use thiserror::Error;
#[derive(Error, Debug)]
#[error(transparent)]
pub struct Error {
#[from]
internal: InternalError,
}
impl From<nom::Err<NomError<Span<'_>>>> for Error {
fn from(err: nom::Err<NomError<Span<'_>>>) -> Error {
Error {
internal: match err {
nom::Err::Incomplete(_) => unreachable!(),
nom::Err::Failure(e) => {
InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
}
nom::Err::Error(e) => {
InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
}
},
}
}
}
#[derive(Error, Debug)]
enum InternalError {
#[error("verification error: '{0}' at line {1}")]
Verify(&'static str, usize),
#[error("nom error kind: {}, line: {}:{}", .0.description(), .1, .2)]
Nom(ErrorKind, u32, usize),
}
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug)]
enum TextEsc<'a> {
Text(&'a str),
Esc(&'a str),
}
impl<'a> ExtendInto for TextEsc<'a> {
type Item = char;
type Extender = ItemsInner<'a>;
fn new_builder(&self) -> Self::Extender {
ItemsInner {
data: SmallVec::new(),
}
}
fn extend_into(&self, acc: &mut Self::Extender) {
acc.data.push(match self {
TextEsc::Text(t) => TextEsc::Text(t),
TextEsc::Esc(e) => TextEsc::Esc(e),
});
}
}
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Item<'a>(ItemsInner<'a>);
#[derive(Debug, Default)]
struct ItemsInner<'a> {
data: SmallVec<[TextEsc<'a>; 1]>,
}
fn map_item_iter<'a>(
item: &TextEsc<'a>,
) -> (
Option<NonZeroU8>,
std::slice::Iter<'a, u8>,
Option<NonZeroU8>,
) {
match item {
TextEsc::Text(t) => (None, t.as_bytes().iter(), None),
TextEsc::Esc(t) => (
NonZeroU8::new(0xFF),
t.as_bytes().iter(),
NonZeroU8::new(0xFF),
),
}
}
impl ItemsInner<'_> {
fn bytes(&self) -> Bytes<'_> {
let mut item_iter = self.data.iter();
let str_iter = item_iter.next().map(map_item_iter);
Bytes {
item_iter,
str_iter,
}
}
fn len(&self) -> usize {
let mut sum = 0;
let mut esc = false;
for item in &self.data {
match item {
TextEsc::Text(t) => {
if esc {
sum += 1;
esc = false;
}
sum += t.len()
}
TextEsc::Esc(t) => {
esc = true;
sum += 2 + t.len() + 1
}
}
}
sum
}
}
struct Bytes<'a> {
str_iter: Option<(
Option<NonZeroU8>,
std::slice::Iter<'a, u8>,
Option<NonZeroU8>,
)>,
item_iter: std::slice::Iter<'a, TextEsc<'a>>,
}
impl Iterator for Bytes<'_> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
while let Some(ref mut str_iter) = self.str_iter {
if let Some(b) = str_iter.0.take() {
return Some(b.into());
}
if let Some(b) = str_iter.1.next() {
return Some(*b);
}
if let Some(b) = str_iter.2.take() {
return Some(b.into());
}
self.str_iter = self.item_iter.next().map(map_item_iter);
}
None
}
}
impl Eq for ItemsInner<'_> {}
impl<'a> From<&'a str> for ItemsInner<'a> {
fn from(s: &'a str) -> ItemsInner<'a> {
let mut data = SmallVec::new();
data.push(TextEsc::Text(s));
ItemsInner { data }
}
}
impl PartialEq for ItemsInner<'_> {
fn eq(&self, other: &Self) -> bool {
self.bytes().eq(other.bytes())
}
}
struct TextSlice<'a>(&'a [TextEsc<'a>]);
impl fmt::Display for TextSlice<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for item in self.0 {
match item {
TextEsc::Text(t) => f.write_str(t)?,
TextEsc::Esc(_) => unreachable!(),
}
}
Ok(())
}
}
impl Serialize for TextSlice<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.collect_str(self)
}
}
impl Serialize for ItemsInner<'_> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut seq = serializer.serialize_seq(None)?;
let mut from = 0;
for (i, item) in self.data.iter().enumerate() {
if let TextEsc::Esc(t) = item {
if from != i {
seq.serialize_element(&TextSlice(&self.data[from..i]))?;
}
seq.serialize_element(t)?;
from = i + 1;
}
}
if from != self.data.len() {
seq.serialize_element(&TextSlice(&self.data[from..]))?;
}
seq.end()
}
}
#[derive(Clone, Copy)]
struct Str<'a>(Span<'a>);
impl<S: AsRef<str>> Compare<S> for Str<'_> {
fn compare(&self, s: S) -> CompareResult {
self.0.compare(s.as_ref())
}
fn compare_no_case(&self, s: S) -> CompareResult {
self.0.compare_no_case(s.as_ref())
}
}
impl<'a> ExtendInto for Str<'a> {
type Item = char;
type Extender = ItemsInner<'a>;
fn new_builder(&self) -> Self::Extender {
ItemsInner {
data: SmallVec::new(),
}
}
fn extend_into(&self, acc: &mut Self::Extender) {
acc.data.push(TextEsc::Text(*self.0));
}
}
impl<'a> Input for Str<'a> {
type Item = char;
type Iter = std::str::Chars<'a>;
type IterIndices = std::str::CharIndices<'a>;
fn input_len(&self) -> usize {
self.0.input_len()
}
fn take(&self, count: usize) -> Self {
Str(self.0.take(count))
}
fn take_from(&self, index: usize) -> Self {
Self(self.0.take_from(index))
}
fn take_split(&self, count: usize) -> (Self, Self) {
let (a, b) = self.0.take_split(count);
(Str(a), Str(b))
}
fn position<P: Fn(Self::Item) -> bool>(&self, predicate: P) -> Option<usize> {
self.0.position(predicate)
}
fn iter_elements(&self) -> Self::Iter {
self.0.iter_elements()
}
fn iter_indices(&self) -> Self::IterIndices {
self.0.iter_indices()
}
fn slice_index(&self, count: usize) -> Result<usize, Needed> {
self.0.slice_index(count)
}
}
impl Offset for Str<'_> {
fn offset(&self, second: &Self) -> usize {
self.0.offset(&second.0)
}
}
fn escaped_transform_<'a, F, G>(
normal: F,
control_char: char,
transform: G,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, ItemsInner<'a>> + 'a
where
F: Parser<Str<'a>, Output = Str<'a>, Error = NomError<Str<'a>>> + 'a,
G: Parser<Str<'a>, Output = TextEsc<'a>, Error = NomError<Str<'a>>> + 'a,
{
let mut e = nom::bytes::escaped_transform(normal, control_char, transform);
move |i: Span<'a>| {
e.parse_complete(Str(i))
.map(|(i, o)| (i.0, o))
.map_err(|e| e.map_input(|i| i.0))
}
}
#[derive(Debug, Eq, PartialEq, Serialize)]
pub enum Value<'a> {
Pointer(&'a str),
Item(Item<'a>),
}
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Line<'a> {
level: u8,
xref: Option<&'a str>,
tag: &'a str,
value: Option<Value<'a>>,
}
impl<'a> Line<'a> {
pub fn level(&self) -> u8 {
self.level
}
pub fn xref(&self) -> Option<&'a str> {
self.xref
}
pub fn tag(&self) -> &'a str {
self.tag
}
pub fn value(&self) -> Option<&Value<'a>> {
self.value.as_ref()
}
fn len(&self) -> usize {
let level_len = if self.level < 10 { 1 } else { 2 };
let xref_len = if let Some(xref) = self.xref {
1 + 2 + xref.len()
} else {
0
};
let value_len = match self.value {
Some(Value::Pointer(p)) => 1 + 2 + p.len(),
Some(Value::Item(ref text)) if text.0.len() == 0 => unreachable!(),
Some(Value::Item(ref text)) => {
1 + text.0.len() + text.0.bytes().filter(|&c| c == b'@').count()
}
None => 0,
};
level_len + xref_len + 1 + self.tag.len() + value_len
}
}
fn line<'a>(terminator: Span<'a>) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Line<'a>> {
move |input: Span<'a>| {
use nom::ParseTo;
use nom::branch::alt;
use nom::bytes::{tag, take_while, take_while1};
use nom::character::complete::alphanumeric1;
use nom::character::one_of;
use nom::combinator::{cut, opt, peek, recognize, verify};
use nom::sequence::{delimited, preceded, terminated};
let delim_ = tag(" ");
let digit_ = take_while1(|ch: char| ch.is_ascii_digit());
let l_digit_ = alt((tag("0"), preceded(peek(one_of("123456789")), digit_)));
let level_ = verify(l_digit_.map_opt(|i: Span<'_>| i.parse_to()), |&o| o < 100);
let identifier_string_ =
|| verify::<Span<'a>, _, _, _, _>(alphanumeric1, |o: &Span<'_>| o.len() <= 20);
let pointer_ = || delimited(tag("@"), identifier_string_(), cut(tag("@")));
let tag_ = verify(recognize((opt(tag("_")), alphanumeric1)), |o: &Span<'_>| {
o.len() <= 31
});
let escape_text_plus_space_ =
take_while(|ch: char| ch.is_ascii_alphanumeric() || ch == ' ');
let escape_text_ = recognize((alphanumeric1, escape_text_plus_space_));
let escape_ = delimited(tag("#"), cut(escape_text_), cut(tag("@")))
.map(|o: Str<'_>| TextEsc::Esc(*o.0));
let line_text_ = escaped_transform_(
take_while1(|ch: char| {
!matches!(ch,
'\u{0000}'..='\u{0008}' |
'\u{000A}'..='\u{001F}' |
'@' |
'\u{007F}'
)
}),
'@',
alt((
tag("@").map(|o: Str<'_>| TextEsc::Text(*o.0)),
terminated(escape_, alt((tag(" "), peek(tag(*terminator))))),
)),
);
let line_item_ = line_text_.map(|t| Value::Item(Item(t)));
let line_value_ = alt((pointer_().map(|p| Value::Pointer(*p)), line_item_));
let terminator_ = tag(*terminator);
let opt_pointer_ = opt(preceded(tag(" "), pointer_().map(|s| *s)));
let opt_line_value = opt(preceded(tag(" "), opt(line_value_)));
verify(
(
level_,
opt_pointer_,
delim_,
tag_,
opt_line_value,
terminator_,
)
.map(|(l, x, _, t, v, _)| Line {
level: l,
xref: x,
tag: *t,
value: v.flatten(),
}),
|l| l.len() + terminator.len() <= 255 && l.level == 0 || l.xref.is_none(),
)
.parse_complete(input)
}
}
fn verify_lines<'a>(
(input, (_, ls)): (Span<'a>, (Span<'a>, Vec<Line<'a>>)),
) -> Result<Vec<Line<'a>>, Error> {
fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Line<'b>>, Error> {
Err(InternalError::Verify(s, l + 1).into())
}
if !input.is_empty() {
return v("not all input consumed", 0);
}
let mut records = std::collections::BTreeSet::new();
let mut last: Option<&Line<'_>> = None;
for (i, l) in ls.iter().enumerate() {
let last_plus_1 = last.map(|r| r.level + 1).unwrap_or(0);
if l.level > last_plus_1 {
return v("level increase too great", i);
}
if l.level == last_plus_1
&& last
.map(|r| r.tag == "CONT" || r.tag == "CONC")
.unwrap_or(false)
{
return v("CONT/CONC cannot have a subrecord", i);
}
if l.tag == "CONT" || l.tag == "CONC" {
if l.level == 0 {
return v("CONT/CONC cannot be a top level record", i);
}
if !l
.value
.as_ref()
.map(|v| matches!(v, Value::Item(_)))
.unwrap_or(true)
{
return v("CONT/CONC cannot have a cross reference value", i);
}
if l.level != last_plus_1
&& !last
.map(|r| r.tag == "CONT" || r.tag == "CONC")
.unwrap_or(false)
{
return v(
"CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC",
i,
);
}
if l.level != last_plus_1 && l.level != last_plus_1 - 1 {
return v(
"CONT/CONC can only be a subrecord or sibling of the last record",
i,
);
}
if l.level != 0
&& !last
.and_then(|r| r.value.as_ref())
.map(|v| matches!(v, Value::Item(_)))
.unwrap_or(true)
{
return v("CONT/CONC cannot follow a cross reference value", i);
}
}
if l.level != last_plus_1
&& !last
.map(|r| r.tag == "CONT" || r.tag == "TRLR" || r.value.is_some())
.unwrap_or(true)
{
return v(
"CONT/TRLR are the only records allowed to have no subrecords or value",
i - 1,
);
}
if let Some(xref) = l.xref
&& !records.insert(xref)
{
return v("duplicate cross reference", i);
}
last = Some(l);
}
for (i, l) in ls.iter().enumerate() {
if let Some(Value::Pointer(p)) = l.value
&& !records.contains(p)
{
return v("missing cross reference", i);
}
}
Ok(ls)
}
pub fn lines(input: &str) -> Result<Vec<Line<'_>>, Error> {
use nom::branch::alt;
use nom::bytes::{tag, take_till};
use nom::combinator::{all_consuming, opt, peek, recognize};
use nom::multi::many1;
use nom::sequence::preceded;
let terminator_ = alt((recognize((tag("\r"), opt(tag("\n")))), tag("\n")));
let not_line_ending_ = take_till(|ch: char| ch == '\r' || ch == '\n');
let find_terminator_ = peek(preceded(not_line_ending_, terminator_));
all_consuming(preceded(
tag("\u{FEFF}"),
find_terminator_.flat_map(|i| many1(line(i)).map(move |o| (i, o))),
))
.parse_complete(Span::new(input))
.map_err(|e| e.into())
.and_then(verify_lines)
}
#[allow(single_use_lifetimes)]
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Record<'a> {
level: u8,
xref: Option<&'a str>,
tag: &'a str,
value: Option<Value<'a>>,
line: usize,
subrecords: Vec<Record<'a>>,
}
impl<'a> From<(usize, Line<'a>)> for Record<'a> {
fn from((i, l): (usize, Line<'a>)) -> Record<'a> {
Record {
level: l.level,
xref: l.xref,
tag: l.tag,
value: l.value,
line: i,
subrecords: Vec::new(),
}
}
}
fn verify_records<'a>(records: Vec<Record<'a>>) -> Result<Vec<Record<'a>>, Error> {
fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
Err(InternalError::Verify(s, l + 1).into())
}
match records.first() {
None => unreachable!(), Some(head) if head.level != 0 => unreachable!(), Some(head) if head.tag != "HEAD" => return v("HEAD must be the first record", 0),
Some(head) if head.xref.is_some() => {
return v("HEAD must not have a cross-reference identifier", 0);
}
Some(head) if head.value.is_some() => return v("HEAD must not have a value", 0),
_ => {}
}
match records.last() {
None => return v("TRLR record is required", 0),
Some(trlr) if trlr.tag != "TRLR" => {
return v("TRLR must be the last record", trlr.line);
}
Some(trlr) if trlr.level != 0 => {
return v("TRLR must be a level 0 record", trlr.line);
}
Some(trlr) if trlr.xref.is_some() => {
return v("TRLR must not have a cross-reference identifier", trlr.line);
}
Some(trlr) if trlr.value.is_some() => {
return v("TRLR must not have a value", trlr.line);
}
_ => {}
}
if let Some(head) = records.first() {
match head.subrecords.first() {
None => unreachable!(), Some(gedc) if gedc.tag != "GEDC" => {
return v("GEDC must be the first subrecord of HEAD", gedc.line);
}
Some(gedc) if gedc.value.is_some() => {
return v("GEDC must not have a value", gedc.line);
}
Some(gedc) => {
match gedc.subrecords.first() {
None => unreachable!(), Some(vers) if vers.tag != "VERS" => {
return v("VERS must be the first subrecord of GEDC", vers.line);
}
Some(vers) => {
let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
if vers.value != expected {
return v("GEDC.VERS must have value 5.5.5", vers.line);
}
}
}
match gedc.subrecords.get(1) {
None => {
return v("GEDC must have a FORM subrecord", gedc.line);
}
Some(form) if form.tag != "FORM" => {
return v("FORM must be the second subrecord of GEDC", form.line);
}
Some(form) if form.value.is_none() => {
return v("GEDC.FORM must have a value", form.line);
}
Some(form) => {
match form.subrecords.first() {
None => {
return v("GEDC.FORM must have a VERS subrecord", form.line);
}
Some(vers) if vers.tag != "VERS" => {
return v("VERS must be the first subrecord of FORM", vers.line);
}
Some(vers) => {
let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
if vers.value != expected {
return v("GEDC.FORM.VERS must have value 5.5.5", vers.line);
}
}
}
}
}
}
}
match head.subrecords.get(1) {
None => {
return v("HEAD must have a CHAR subrecord", head.line);
}
Some(char) if char.tag != "CHAR" => {
return v("CHAR must be the second subrecord of HEAD", char.line);
}
_ => {}
}
}
Ok(records)
}
pub fn records(input: &str) -> Result<Vec<Record<'_>>, Error> {
lines(input).and_then(|ls| {
fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
Err(InternalError::Verify(s, l + 1).into())
}
let mut recs = Record {
level: 0,
xref: None,
tag: "",
value: None,
line: 0,
subrecords: Vec::new(),
};
let mut stack: Vec<usize> = Vec::new();
for (i, l) in ls.into_iter().enumerate() {
let lvl = l.level;
stack.truncate(lvl.into());
let append = if lvl == 0 {
&mut recs
} else {
stack
.iter()
.fold(&mut recs, |acc, &x| &mut acc.subrecords[x])
};
fn cont_conc<'a>(r: &mut Record<'a>, l: Line<'a>, cont: bool) {
let mut v = match r.value.take() {
Some(Value::Item(Item(v))) => v,
Some(Value::Pointer(_)) | None => Default::default(),
};
if cont {
v.data.push(TextEsc::Text("\n"));
}
if let Some(Value::Item(Item(i))) = l.value {
v.data.extend(i.data.into_iter());
}
r.value = Some(Value::Item(Item(v)));
}
if l.tag == "CONT" || l.tag == "CONC" {
if i < 6 {
return v("CONT/CONC not supported as basic form HEAD subrecords", i);
}
let cont = l.tag == "CONT";
cont_conc(append, l, cont);
} else {
stack.push(append.subrecords.len());
append.subrecords.push((i + 1, l).into());
}
}
verify_records(recs.subrecords)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn terminators() {
let expected_line = Line {
level: 0,
xref: None,
tag: "HEAD",
value: None,
};
let (remaining, result) = line("\r".into())("0 HEAD\r".into()).unwrap();
assert_eq!("", *remaining);
assert_eq!(expected_line, result);
let (remaining, result) = line("\n".into())("0 HEAD\n".into()).unwrap();
assert_eq!("", *remaining);
assert_eq!(expected_line, result);
let (remaining, result) = line("\r\n".into())("0 HEAD\r\n".into()).unwrap();
assert_eq!("", *remaining);
assert_eq!(expected_line, result);
}
#[track_caller]
fn valid_case<'a>(input: &'a str, l: u8, x: Option<&'a str>, t: &'a str, v: Option<Value<'a>>) {
let expected_line = Line {
level: l,
xref: x,
tag: t,
value: v,
};
let (remaining, result) = line("\r\n".into())(input.into()).unwrap();
assert_eq!("", *remaining);
assert_eq!(expected_line, result);
eprintln!("{}", input);
let c = 2 + if input.ends_with(" \r\n") { 1 } else { 0 };
assert_eq!(
input.len(),
line("\r\n".into())(input.into()).unwrap().1.len() + c
);
}
#[track_caller]
fn invalid_case(input: &str, len: usize, error: &str) {
let l = line("\r\n".into())(input.into());
match l {
Ok(v) => {
eprintln!("{:?}", v);
assert!(false);
}
Err(nom::Err::Incomplete(e)) => {
eprintln!("{:?}", e);
assert!(false);
}
Err(nom::Err::Failure(e)) | Err(nom::Err::Error(e)) => {
assert_eq!(len, e.input.fragment().len());
assert_eq!(error, &format!("{:?}", e));
}
}
}
#[track_caller]
fn invalid_lines(input: &str, error: &str) {
match lines(input) {
Ok(v) => {
eprintln!("{:?}", v);
assert!(false);
}
Err(e) => {
assert_eq!(error, &format!("{}", e));
}
}
}
#[track_caller]
fn invalid_records(input: &str, error: &str) {
match records(input) {
Ok(v) => {
eprintln!("{:?}", v);
assert!(false);
}
Err(e) => {
assert_eq!(error, &format!("{}", e));
}
}
}
#[test]
fn tags() {
valid_case("0 HEAD\r\n", 0, None, "HEAD", None);
let upper = "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
valid_case(upper, 0, None, &upper[2..28], None);
let lower = "0 abcdefghijklmnopqrstuvwxyz\r\n";
valid_case(lower, 0, None, &lower[2..28], None);
valid_case("0 _0123456789\r\n", 0, None, "_0123456789", None);
valid_case("0 ADDR \r\n", 0, None, "ADDR", None);
let max_level = "99 ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
valid_case(max_level, 99, None, &max_level[3..34], None);
let max = "0 @N1234567890123456789@ ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
valid_case(max, 0, Some(&max[3..23]), &max[25..56], None);
}
#[test]
fn levels() {
for i in 0..100 {
let l = format!("{} HEAD\r\n", i);
valid_case(&l, i, None, "HEAD", None);
}
}
#[test]
fn simple_value() {
let v = Some(Value::Item(Item("UTF-8".into())));
valid_case("1 CHAR UTF-8\r\n", 1, None, "CHAR", v);
}
#[test]
fn simple_xref() {
valid_case(
"0 @N1234567890123456789@ NOTE\r\n",
0,
Some("N1234567890123456789"),
"NOTE",
None,
);
}
#[test]
fn simple_pointer() {
let v = Some(Value::Pointer("N1234567890123456789"));
valid_case("1 NOTE @N1234567890123456789@\r\n", 1, None, "NOTE", v);
}
#[test]
fn simple_note() {
let v = Some(Value::Item(Item("foo".into())));
valid_case("0 @N1@ NOTE foo\r\n", 0, Some("N1"), "NOTE", v);
}
#[test]
fn unicode_values() {
valid_case(
"1 NOTE こんにちは\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("こんにちは".into()))),
);
valid_case(
"1 NOTE Señor Ñoño\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("Señor Ñoño".into()))),
);
valid_case(
"1 NOTE Hello 👋 World 🌍\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("Hello 👋 World 🌍".into()))),
);
valid_case(
"1 NOTE 你好世界\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("你好世界".into()))),
);
}
#[test]
fn escape_line_value() {
let mut items = ItemsInner {
data: SmallVec::new(),
};
Str("ABT ".into()).extend_into(&mut items);
TextEsc::Esc("DFRENCH R").extend_into(&mut items);
Str("11 NIVO 6".into()).extend_into(&mut items);
let v = Some(Value::Item(Item(items)));
valid_case("1 DATE ABT @#DFRENCH R@ 11 NIVO 6\r\n", 1, None, "DATE", v);
let mut items = ItemsInner {
data: SmallVec::new(),
};
TextEsc::Esc("DHEBREW").extend_into(&mut items);
Str("5765".into()).extend_into(&mut items);
let v = Some(Value::Item(Item(items)));
valid_case("1 DATE @#DHEBREW@ 5765\r\n", 1, None, "DATE", v);
let mut items = ItemsInner {
data: SmallVec::new(),
};
TextEsc::Esc("DJULIAN").extend_into(&mut items);
Str("1 JAN 1700".into()).extend_into(&mut items);
let v = Some(Value::Item(Item(items)));
valid_case("1 DATE @#DJULIAN@ 1 JAN 1700\r\n", 1, None, "DATE", v);
let mut items = ItemsInner {
data: SmallVec::new(),
};
TextEsc::Esc("DHEBREW").extend_into(&mut items);
Str("to ".into()).extend_into(&mut items);
TextEsc::Esc("DGREGORIAN").extend_into(&mut items);
let v = Some(Value::Item(Item(items)));
valid_case("1 DATE @#DHEBREW@ to @#DGREGORIAN@\r\n", 1, None, "DATE", v);
let mut items = ItemsInner {
data: SmallVec::new(),
};
TextEsc::Esc("DROMAN").extend_into(&mut items);
let v = Some(Value::Item(Item(items)));
valid_case("1 DATE @#DROMAN@\r\n", 1, None, "DATE", v);
}
#[test]
fn escape_at() {
let v = Some(Value::Item(Item("foo@example.com".into())));
valid_case("1 EMAIL foo@@example.com\r\n", 1, None, "EMAIL", v);
let v = Some(Value::Item(Item("@foo".into())));
valid_case("1 NOTE @@foo\r\n", 1, None, "NOTE", v);
valid_case(
"1 NOTE @@\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("@".into()))),
);
valid_case(
"1 NOTE @@@@\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("@@".into()))),
);
valid_case(
"1 NOTE @@hello@@\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("@hello@".into()))),
);
}
#[test]
fn escape_text() {
invalid_case(
"1 DATE @#@\r\n",
3,
r#"Error { input: LocatedSpan { offset: 9, line: 1, fragment: "@\r\n", extra: () }, code: AlphaNumeric }"#,
);
invalid_case(
"1 DATE @#DHEBREW@5765\r\n",
16,
r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@#DHEBREW@5765\r\n", extra: () }, code: Tag }"#,
);
}
#[test]
fn invalid_tags() {
invalid_case(
"0 __HEAD\r\n",
7,
r#"Error { input: LocatedSpan { offset: 3, line: 1, fragment: "_HEAD\r\n", extra: () }, code: AlphaNumeric }"#,
);
invalid_case(
"0 ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n",
34,
r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n", extra: () }, code: Verify }"#,
);
}
#[test]
fn invalid_levels() {
invalid_case(
"01 HEAD\r\n",
8,
r#"Error { input: LocatedSpan { offset: 1, line: 1, fragment: "1 HEAD\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"100 HEAD\r\n",
10,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "100 HEAD\r\n", extra: () }, code: Verify }"#,
);
invalid_lines(
"\u{FEFF}1 HEAD\r\n",
"verification error: 'level increase too great' at line 1",
);
invalid_lines(
"\u{FEFF}0 HEAD\r\n2 VERS 5.5.5\r\n",
"verification error: 'level increase too great' at line 2",
);
}
#[test]
fn invalid_pointer() {
invalid_case(
"0 @N01234567890123456789@ NOTE foo\r\n",
34,
r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "@N01234567890123456789@ NOTE foo\r\n", extra: () }, code: AlphaNumeric }"#,
);
invalid_case(
"0 NOTE @N01234567890123456789@\r\n",
25,
r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@N01234567890123456789@\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 @N1@ NOTE foo\r\n",
17,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "1 @N1@ NOTE foo\r\n", extra: () }, code: Verify }"#,
);
}
#[test]
fn line_length_limit() {
let max_value = "x".repeat(246);
let valid_line = format!("0 NOTE {}\r\n", max_value);
valid_case(
&valid_line,
0,
None,
"NOTE",
Some(Value::Item(Item(max_value.as_str().into()))),
);
let over_value = "x".repeat(247);
let invalid_line = format!("0 NOTE {}\n", over_value);
invalid_case(
&invalid_line,
1,
"Error { input: LocatedSpan { offset: 254, line: 1, fragment: \"\\n\", extra: () }, code: Tag }",
);
}
#[test]
fn leading_whitespace() {
let expected_line = Line {
level: 0,
xref: None,
tag: "HEAD",
value: None,
};
let (remaining, result) = line("\n".into())("0 HEAD\n\r".into()).unwrap();
assert_eq!("\r", *remaining);
assert_eq!(expected_line, result);
invalid_case(
" 0 HEAD\r\n",
9,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: " 0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
invalid_case(
"\t0 HEAD\r\n",
9,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\t0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
invalid_case(
"\r\n0 HEAD\r\n",
10,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
invalid_case(
"\n0 HEAD\r\n",
9,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
invalid_case(
"\r0 HEAD\r\n",
9,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
invalid_case(
"\n\r0 HEAD\r\n",
10,
r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
);
}
#[test]
fn multiple_lines() {
let expected_lines = vec![
Line {
level: 0,
xref: None,
tag: "HEAD",
value: None,
},
Line {
level: 1,
xref: None,
tag: "GEDC",
value: None,
},
Line {
level: 2,
xref: None,
tag: "VERS",
value: Some(Value::Item(Item("5.5.5".into()))),
},
Line {
level: 0,
xref: None,
tag: "TRLR",
value: None,
},
];
let cr = "\u{FEFF}0 HEAD\r1 GEDC\r2 VERS 5.5.5\r0 TRLR\r";
assert_eq!(expected_lines, lines(cr).unwrap());
let lf = "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n";
assert_eq!(expected_lines, lines(lf).unwrap());
let crlf = "\u{FEFF}0 HEAD\r\n1 GEDC\r\n2 VERS 5.5.5\r\n0 TRLR\r\n";
assert_eq!(expected_lines, lines(crlf).unwrap());
}
#[test]
fn invalid_bom() {
invalid_lines("0 HEAD\n0 TRLR\n", "nom error kind: Tag, line: 1:1");
}
#[test]
fn invalid_xrefs() {
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n1 NOTE @N2@\n",
"verification error: 'missing cross reference' at line 2",
);
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n0 @N1@ NOTE Test\n",
"verification error: 'duplicate cross reference' at line 2",
);
}
#[test]
fn invalid_terminators() {
invalid_lines(
"\u{FEFF}0 HEAD\r0 TRLR\n",
"nom error kind: End of file, line: 1:9",
);
invalid_lines(
"\u{FEFF}0 HEAD\n\r0 TRLR\n\r",
"nom error kind: End of file, line: 2:1",
);
}
#[test]
fn invalid_cont_conc() {
invalid_lines(
"\u{FEFF}0 HEAD\n0 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC cannot be a top level record' at line 2",
);
invalid_lines(
"\u{FEFF}0 HEAD\n0 CONT t\n0 TRLR\n",
"verification error: 'CONT/CONC cannot be a top level record' at line 2",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC\n0 TRLR\n",
"verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 3",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n1 CONT\n0 TRLR\n",
"verification error: 'CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC' at line 3",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n2 CONT\n1 CONT\n0 TRLR\n",
"verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n1 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC cannot have a subrecord' at line 4",
);
invalid_lines(
"\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 TEXT t\n0 TRLR\n",
"verification error: 'CONT/CONC cannot have a subrecord' at line 4",
);
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n1 CONT @N1@\n",
"verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
);
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n1 CONC @N1@\n",
"verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
);
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONT more\n",
"verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
);
invalid_lines(
"\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONC more\n",
"verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
);
}
#[track_caller]
fn valid_items(items: &ItemsInner<'_>, len: usize, bytes: &[u8], json: &str) {
assert_eq!(len, items.len());
assert_eq!(bytes, &*items.bytes().collect::<Vec<_>>());
assert_eq!(json, &serde_json::to_string(items).unwrap());
}
#[test]
fn items() {
let mut items = ItemsInner {
data: SmallVec::new(),
};
Str("hello".into()).extend_into(&mut items);
Str(" ".into()).extend_into(&mut items);
Str("world".into()).extend_into(&mut items);
Str("!".into()).extend_into(&mut items);
valid_items(&items, 12, b"hello world!", r#"["hello world!"]"#);
items.data.clear();
TextEsc::Esc("hello").extend_into(&mut items);
valid_items(&items, 8, b"\xFFhello\xFF", r#"["hello"]"#);
items.data.clear();
Str("ABT ".into()).extend_into(&mut items);
TextEsc::Esc("DFRENCH R").extend_into(&mut items);
Str("11 NIVO 6".into()).extend_into(&mut items);
valid_items(
&items,
26,
b"ABT \xFFDFRENCH R\xFF11 NIVO 6",
r#"["ABT ","DFRENCH R","11 NIVO 6"]"#,
);
items.data.clear();
TextEsc::Esc("DFRENCH R").extend_into(&mut items);
Str("11 NIVO 6".into()).extend_into(&mut items);
valid_items(
&items,
22,
b"\xFFDFRENCH R\xFF11 NIVO 6",
r#"["DFRENCH R","11 NIVO 6"]"#,
);
}
#[test]
fn control_characters() {
valid_case(
"1 NOTE hello\tworld\r\n",
1,
None,
"NOTE",
Some(Value::Item(Item("hello\tworld".into()))),
);
invalid_case(
"1 NOTE hello\x00world\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\0world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\x07world\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7}world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\x08world\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{8}world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\x0Bworld\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{b}world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\x0Cworld\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{c}world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\x1Bworld\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{1b}world\r\n", extra: () }, code: Tag }"#,
);
invalid_case(
"1 NOTE hello\u{007F}world\r\n",
8,
r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7f}world\r\n", extra: () }, code: Tag }"#,
);
}
#[test]
fn valid_records() {
let expected = vec![
Record {
level: 0,
xref: None,
tag: "HEAD",
value: None,
line: 1,
subrecords: vec![
Record {
level: 1,
xref: None,
tag: "GEDC",
value: None,
line: 2,
subrecords: vec![
Record {
level: 2,
xref: None,
tag: "VERS",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("5.5.5")],
}))),
line: 3,
subrecords: vec![],
},
Record {
level: 2,
xref: None,
tag: "FORM",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
}))),
line: 4,
subrecords: vec![Record {
level: 3,
xref: None,
tag: "VERS",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("5.5.5")],
}))),
line: 5,
subrecords: vec![],
}],
},
],
},
Record {
level: 1,
xref: None,
tag: "CHAR",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("UTF-8")],
}))),
line: 6,
subrecords: vec![],
},
],
},
Record {
level: 0,
xref: None,
tag: "TRLR",
value: None,
line: 7,
subrecords: vec![],
},
];
let r = records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n0 TRLR\n",
)
.unwrap();
assert_eq!(expected, r);
}
#[test]
fn cont_conc() {
let expected = vec![
Record {
level: 0,
xref: None,
tag: "HEAD",
value: None,
line: 1,
subrecords: vec![
Record {
level: 1,
xref: None,
tag: "GEDC",
value: None,
line: 2,
subrecords: vec![
Record {
level: 2,
xref: None,
tag: "VERS",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("5.5.5")],
}))),
line: 3,
subrecords: vec![],
},
Record {
level: 2,
xref: None,
tag: "FORM",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
}))),
line: 4,
subrecords: vec![Record {
level: 3,
xref: None,
tag: "VERS",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("5.5.5")],
}))),
line: 5,
subrecords: vec![],
}],
},
],
},
Record {
level: 1,
xref: None,
tag: "CHAR",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![TextEsc::Text("UTF-8")],
}))),
line: 6,
subrecords: vec![],
},
Record {
level: 1,
xref: None,
tag: "TEXT",
value: Some(Value::Item(Item(ItemsInner {
data: smallvec::smallvec![
TextEsc::Text("fir"),
TextEsc::Text("st"),
TextEsc::Text("\n"),
TextEsc::Text("sec"),
TextEsc::Text("ond"),
],
}))),
line: 7,
subrecords: vec![],
},
],
},
Record {
level: 0,
xref: None,
tag: "TRLR",
value: None,
line: 11,
subrecords: vec![],
},
];
let r = records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n1 TEXT fir\n2 CONC st\n2 CONT sec\n2 CONC ond\n0 TRLR\n",
)
.unwrap();
assert_eq!(expected, r);
}
#[test]
fn head_record() {
invalid_records("\u{FEFF}\n", "nom error kind: OneOf, line: 1:2");
invalid_records(
"\u{FEFF}0 @I1@ INDI\n1 NAME Test\n0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
"verification error: 'HEAD must be the first record' at line 1",
);
invalid_records(
"\u{FEFF}0 @H1@ HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
"verification error: 'HEAD must not have a cross-reference identifier' at line 1",
);
invalid_records(
"\u{FEFF}0 HEAD something\n0 TRLR\n",
"verification error: 'HEAD must not have a value' at line 1",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 CONT t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 3",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n3 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 4",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 5",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n4 CONC t\n0 TRLR\n",
"verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 6",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 NOTE test\n0 TRLR\n",
"verification error: 'GEDC must be the first subrecord of HEAD' at line 3",
);
invalid_records(
"\u{FEFF}0 HEAD\n0 TRLR\n",
"verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 1",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC something\n2 VERS 5.5.5\n0 TRLR\n",
"verification error: 'GEDC must not have a value' at line 3",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 FORM TEST-FORM\n0 TRLR\n",
"verification error: 'VERS must be the first subrecord of GEDC' at line 4",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.1\n2 FORM TEST-FORM\n0 TRLR\n",
"verification error: 'GEDC.VERS must have value 5.5.5' at line 4",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
"verification error: 'GEDC must have a FORM subrecord' at line 3",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 NOTE test\n0 TRLR\n",
"verification error: 'FORM must be the second subrecord of GEDC' at line 5",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM\n3 VERS 5.5.5\n0 TRLR\n",
"verification error: 'GEDC.FORM must have a value' at line 5",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n0 TRLR\n",
"verification error: 'GEDC.FORM must have a VERS subrecord' at line 5",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 NOTE test\n0 TRLR\n",
"verification error: 'VERS must be the first subrecord of FORM' at line 6",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.1\n0 TRLR\n",
"verification error: 'GEDC.FORM.VERS must have value 5.5.5' at line 6",
);
}
#[test]
fn trlr_record() {
invalid_records(
"\u{FEFF}0 HEAD\n",
"verification error: 'TRLR must be the last record' at line 2",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR\n0 @I1@ INDI\n1 NAME Test\n",
"verification error: 'TRLR must be the last record' at line 8",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 @T1@ TRLR\n",
"verification error: 'TRLR must not have a cross-reference identifier' at line 7",
);
invalid_records(
"\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR something\n",
"verification error: 'TRLR must not have a value' at line 7",
);
}
}