gedcom_core/
data.rs

1// Copyright 2021 Ahmed Charles <acharles@outlook.com>
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The core GEDCOM data representation language may be used to represent
16//! any form of structured information, not just genealogical data, using
17//! a sequential stream of characters.
18
19use std::fmt;
20use std::num::{NonZeroU8, NonZeroUsize};
21use std::ops::{RangeFrom, RangeTo};
22
23use nom::{
24    error::{make_error, ErrorKind, ParseError},
25    IResult, Needed,
26};
27use nom::{
28    AsChar, Compare, CompareResult, ExtendInto, InputIter, InputLength, InputTake,
29    InputTakeAtPosition, Offset, Slice,
30};
31use serde::{ser::SerializeSeq, Serialize, Serializer};
32use smallvec::SmallVec;
33
34#[derive(Debug)]
35enum TextEsc<'a> {
36    Text(&'a str),
37    Esc(&'a str),
38}
39
40impl<'a> ExtendInto for TextEsc<'a> {
41    type Item = char;
42    type Extender = ItemsInner<'a>;
43    fn new_builder(&self) -> Self::Extender {
44        ItemsInner {
45            data: SmallVec::new(),
46        }
47    }
48    fn extend_into(&self, acc: &mut Self::Extender) {
49        acc.data.push(match self {
50            TextEsc::Text(t) => TextEsc::Text(t),
51            TextEsc::Esc(e) => TextEsc::Esc(e),
52        });
53    }
54}
55
56/// Represents an efficient, extendable string.
57#[allow(single_use_lifetimes)]
58#[derive(Debug, Eq, PartialEq, Serialize)]
59pub struct Item<'a>(ItemsInner<'a>);
60
61#[derive(Debug)]
62struct ItemsInner<'a> {
63    data: SmallVec<[TextEsc<'a>; 1]>,
64}
65
66fn map_item_iter<'a>(
67    item: &TextEsc<'a>,
68) -> (
69    Option<NonZeroU8>,
70    std::slice::Iter<'a, u8>,
71    Option<NonZeroU8>,
72) {
73    match item {
74        TextEsc::Text(t) => (None, t.as_bytes().iter(), None),
75        TextEsc::Esc(t) => (
76            NonZeroU8::new(0xFF),
77            t.as_bytes().iter(),
78            NonZeroU8::new(0xFF),
79        ),
80    }
81}
82
83impl ItemsInner<'_> {
84    fn bytes(&self) -> Bytes<'_> {
85        let mut item_iter = self.data.iter();
86        let str_iter = item_iter.next().map(map_item_iter);
87        Bytes {
88            item_iter,
89            str_iter,
90        }
91    }
92    fn len(&self) -> usize {
93        let mut sum = 0;
94        let mut esc = false;
95        for item in &self.data {
96            match item {
97                TextEsc::Text(t) => {
98                    if esc {
99                        sum += 1;
100                        esc = false;
101                    }
102                    sum += t.len()
103                }
104                TextEsc::Esc(t) => {
105                    esc = true;
106                    sum += 2 + t.len() + 1
107                }
108            }
109        }
110        sum
111    }
112}
113
114struct Bytes<'a> {
115    str_iter: Option<(
116        Option<NonZeroU8>,
117        std::slice::Iter<'a, u8>,
118        Option<NonZeroU8>,
119    )>,
120    item_iter: std::slice::Iter<'a, TextEsc<'a>>,
121}
122
123impl Iterator for Bytes<'_> {
124    type Item = u8;
125    fn next(&mut self) -> Option<Self::Item> {
126        while let Some(ref mut str_iter) = self.str_iter {
127            if let Some(b) = str_iter.0.take() {
128                return Some(b.into());
129            }
130            if let Some(b) = str_iter.1.next() {
131                return Some(*b);
132            }
133            if let Some(b) = str_iter.2.take() {
134                return Some(b.into());
135            }
136            self.str_iter = self.item_iter.next().map(map_item_iter);
137        }
138        None
139    }
140}
141
142impl Eq for ItemsInner<'_> {}
143
144impl<'a> From<&'a str> for ItemsInner<'a> {
145    fn from(s: &'a str) -> ItemsInner<'a> {
146        let mut data = SmallVec::new();
147        data.push(TextEsc::Text(s));
148        ItemsInner { data }
149    }
150}
151
152impl PartialEq for ItemsInner<'_> {
153    fn eq(&self, other: &Self) -> bool {
154        self.bytes().eq(other.bytes())
155    }
156}
157
158struct TextSlice<'a>(&'a [TextEsc<'a>]);
159
160impl fmt::Display for TextSlice<'_> {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        for item in self.0 {
163            match item {
164                TextEsc::Text(t) => f.write_str(t)?,
165                TextEsc::Esc(_) => unreachable!(),
166            }
167        }
168        Ok(())
169    }
170}
171
172impl Serialize for TextSlice<'_> {
173    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
174        serializer.collect_str(self)
175    }
176}
177
178impl Serialize for ItemsInner<'_> {
179    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
180        let mut seq = serializer.serialize_seq(None)?;
181        let mut from = 0;
182        for (i, item) in self.data.iter().enumerate() {
183            if let TextEsc::Esc(t) = item {
184                if from != i {
185                    seq.serialize_element(&TextSlice(&self.data[from..i]))?;
186                }
187                seq.serialize_element(t)?;
188                from = i + 1;
189            }
190        }
191        if from != self.data.len() {
192            seq.serialize_element(&TextSlice(&self.data[from..]))?;
193        }
194        seq.end()
195    }
196}
197
198#[derive(Clone, Copy)]
199struct Str<'a>(&'a str);
200
201impl<'a> Compare<&'a str> for Str<'_> {
202    fn compare(&self, t: &'a str) -> CompareResult {
203        self.0.compare(t)
204    }
205    fn compare_no_case(&self, t: &'a str) -> CompareResult {
206        self.0.compare_no_case(t)
207    }
208}
209
210impl<'a> ExtendInto for Str<'a> {
211    type Item = char;
212    type Extender = ItemsInner<'a>;
213    fn new_builder(&self) -> Self::Extender {
214        ItemsInner {
215            data: SmallVec::new(),
216        }
217    }
218    fn extend_into(&self, acc: &mut Self::Extender) {
219        acc.data.push(TextEsc::Text(self.0));
220    }
221}
222
223impl<'a> InputIter for Str<'a> {
224    type Item = char;
225    type Iter = std::str::CharIndices<'a>;
226    type IterElem = std::str::Chars<'a>;
227    fn iter_indices(&self) -> Self::Iter {
228        self.0.iter_indices()
229    }
230    fn iter_elements(&self) -> Self::IterElem {
231        self.0.iter_elements()
232    }
233    fn position<P>(&self, predicate: P) -> Option<usize>
234    where
235        P: Fn(Self::Item) -> bool,
236    {
237        self.0.position(predicate)
238    }
239    fn slice_index(&self, count: usize) -> Result<usize, Needed> {
240        self.0.slice_index(count)
241    }
242}
243
244impl InputLength for Str<'_> {
245    fn input_len(&self) -> usize {
246        self.0.input_len()
247    }
248}
249
250impl InputTake for Str<'_> {
251    fn take(&self, count: usize) -> Self {
252        Str(self.0.take(count))
253    }
254    fn take_split(&self, count: usize) -> (Self, Self) {
255        let (a, b) = self.0.take_split(count);
256        (Str(a), Str(b))
257    }
258}
259
260impl InputTakeAtPosition for Str<'_> {
261    type Item = char;
262    fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
263    where
264        P: Fn(Self::Item) -> bool,
265    {
266        match self.0.find(predicate) {
267            Some(i) => Ok((Str(&self.0[i..]), Str(&self.0[..i]))),
268            None => Err(nom::Err::Incomplete(nom::Needed::Size(
269                NonZeroUsize::new(1).unwrap(),
270            ))),
271        }
272    }
273    fn split_at_position1<P, E: ParseError<Self>>(
274        &self,
275        predicate: P,
276        e: ErrorKind,
277    ) -> IResult<Self, Self, E>
278    where
279        P: Fn(Self::Item) -> bool,
280    {
281        match self.0.find(predicate) {
282            Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))),
283            Some(i) => Ok((Str(&self.0[i..]), Str(&self.0[..i]))),
284            None => Err(nom::Err::Incomplete(nom::Needed::Size(
285                NonZeroUsize::new(1).unwrap(),
286            ))),
287        }
288    }
289    fn split_at_position_complete<P, E: ParseError<Self>>(
290        &self,
291        predicate: P,
292    ) -> IResult<Self, Self, E>
293    where
294        P: Fn(Self::Item) -> bool,
295    {
296        match self.0.find(predicate) {
297            Some(i) => Ok((Str(&self.0[i..]), Str(&self.0[..i]))),
298            None => Ok(self.take_split(self.input_len())),
299        }
300    }
301    fn split_at_position1_complete<P, E: ParseError<Self>>(
302        &self,
303        predicate: P,
304        e: ErrorKind,
305    ) -> IResult<Self, Self, E>
306    where
307        P: Fn(Self::Item) -> bool,
308    {
309        match self.0.find(predicate) {
310            Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))),
311            Some(i) => Ok((Str(&self.0[i..]), Str(&self.0[..i]))),
312            None => {
313                if self.0.is_empty() {
314                    Err(nom::Err::Error(E::from_error_kind(*self, e)))
315                } else {
316                    Ok(self.take_split(self.input_len()))
317                }
318            }
319        }
320    }
321}
322
323impl Offset for Str<'_> {
324    fn offset(&self, second: &Self) -> usize {
325        self.0.offset(second.0)
326    }
327}
328
329impl Slice<RangeFrom<usize>> for Str<'_> {
330    fn slice(&self, range: RangeFrom<usize>) -> Self {
331        Str(self.0.slice(range))
332    }
333}
334
335impl Slice<RangeTo<usize>> for Str<'_> {
336    fn slice(&self, range: RangeTo<usize>) -> Self {
337        Str(self.0.slice(range))
338    }
339}
340
341fn escaped_transform_<'a, F: 'a, G: 'a>(
342    normal: F,
343    control_char: char,
344    transform: G,
345) -> impl FnMut(&'a str) -> IResult<&'a str, ItemsInner<'a>> + 'a
346where
347    F: FnMut(Str<'a>) -> IResult<Str<'a>, Str<'a>>,
348    G: FnMut(Str<'a>) -> IResult<Str<'a>, TextEsc<'a>>,
349{
350    let mut e = nom::bytes::complete::escaped_transform(normal, control_char, transform);
351    move |i: &str| {
352        e(Str(i))
353            .map(|(i, o)| (i.0, o))
354            .map_err(|e| e.map_input(|i| i.0))
355    }
356}
357
358fn one_of_<I, F, E: ParseError<I>>(f: F) -> impl Fn(I) -> IResult<I, I, E>
359where
360    I: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + InputIter,
361    <I as InputIter>::Item: AsChar + Copy,
362    F: Fn(<I as InputIter>::Item) -> bool,
363{
364    move |i: I| match (i).iter_elements().next().map(|c| (c, f(c))) {
365        Some((c, true)) => Ok((i.slice(c.len()..), i.slice(..c.len()))),
366        _ => Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::OneOf))),
367    }
368}
369
370/// Represents a line value, either a pointer or item.
371#[allow(single_use_lifetimes)]
372#[derive(Debug, Eq, PartialEq, Serialize)]
373pub enum Value<'a> {
374    /// Represents a pointer to another record.
375    Pointer(&'a str),
376    /// Represents an actual value.
377    Item(Item<'a>),
378}
379
380/// Represents an entire line or record in the GEDCOM data format.
381#[allow(single_use_lifetimes)]
382#[derive(Debug, Eq, PartialEq, Serialize)]
383pub struct Line<'a> {
384    level: u8,
385    xref: Option<&'a str>,
386    tag: &'a str,
387    value: Option<Value<'a>>,
388}
389
390impl<'a> Line<'a> {
391    /// The level of this record.
392    pub fn level(&self) -> u8 {
393        self.level
394    }
395    /// The optional cross-reference identifier for this record.
396    pub fn xref(&self) -> Option<&'a str> {
397        self.xref
398    }
399    /// The tag for this record.
400    pub fn tag(&self) -> &'a str {
401        self.tag
402    }
403    /// The optional value for this record.
404    pub fn value(&self) -> Option<&Value<'a>> {
405        self.value.as_ref()
406    }
407    fn len(&self) -> usize {
408        let level_len = if self.level < 10 { 1 } else { 2 };
409        let xref_len = if let Some(xref) = self.xref {
410            1 + 2 + xref.len()
411        } else {
412            0
413        };
414        let value_len = match self.value {
415            Some(Value::Pointer(p)) => 1 + 2 + p.len(),
416            Some(Value::Item(ref text)) if text.0.len() == 0 => unreachable!(),
417            Some(Value::Item(ref text)) => {
418                1 + text.0.len() + text.0.bytes().filter(|&c| c == b'@').count()
419            }
420            None => 0,
421        };
422        level_len + xref_len + 1 + self.tag.len() + value_len
423    }
424}
425
426fn line<'a>(terminator: &'a str) -> impl Fn(&'a str) -> IResult<&'a str, Line<'a>> {
427    move |input: &str| {
428        use nom::branch::alt;
429        use nom::bytes::complete::{tag, take_while, take_while1};
430        use nom::character::complete::{alphanumeric1, one_of};
431        use nom::combinator::{map, map_opt, opt, peek, recognize, verify};
432        use nom::sequence::{delimited, preceded, terminated, tuple};
433        use nom::ParseTo;
434
435        let delim_ = tag(" ");
436        let alphanum_ = || one_of_(|ch: char| ch.is_ascii_alphanumeric());
437        let alphanum_space_ = || take_while(|ch: char| ch.is_ascii_alphanumeric() || ch == ' ');
438        let digit_ = take_while1(|ch: char| ch.is_ascii_digit());
439
440        // [ digit | non_zero_digit + digit ]
441        let l_digit_ = alt((tag("0"), preceded(peek(one_of("123456789")), digit_)));
442        let level_ = verify(map_opt(l_digit_, |i: &str| i.parse_to()), |&o| o < 100);
443
444        // [ alphanum | alphanum + identifier_string ]
445        let identifier_string_ = || verify(alphanumeric1, |o: &str| o.len() <= 20);
446
447        // U+0040 + identifier_string + U+0040
448        let pointer_ = || delimited(tag("@"), identifier_string_(), tag("@"));
449
450        // [ [ U+005F ] + alphanum | tag + alphanum ]
451        let tag_ = verify(
452            recognize(preceded(opt(tag("_")), alphanumeric1)),
453            |o: &str| o.len() <= 31,
454        );
455
456        // [ alphanum | escape_text + alphanum | escape_text + space ]
457        let escape_text_ = || recognize(preceded(alphanum_(), alphanum_space_()));
458
459        // U+0040 + U+0023 + escape_text + U+0040
460        let escape_ = || {
461            map(
462                delimited(
463                    tag("#"),
464                    escape_text_(),
465                    terminated(tag("@"), opt(tag(" "))),
466                ),
467                |o: Str<'_>| TextEsc::Esc(o.0),
468            )
469        };
470
471        // [ line_char | line_text + line_char ]
472        let line_text_ = || {
473            escaped_transform_(
474                one_of_(|ch: char| {
475                    !matches!(ch,
476                        // disallowed: U+0000 - U+001F, except U+0009 = most C0 control characters
477                        '\u{0000}'..='\u{0008}' |
478                        '\u{000A}'..='\u{001F}' |
479                        // special: U+0040 + U+0040 = @@
480                        '@' |
481                        // disallowed: U+00FF = Delete character
482                        '\u{00FF}'
483                    )
484                }),
485                '@',
486                alt((
487                    map(one_of_(|ch: char| ch == '@'), |o: Str<'_>| {
488                        TextEsc::Text(o.0)
489                    }),
490                    escape_(),
491                )),
492            )
493        };
494
495        // [ escape | line_text | escape + delim + line_text ]
496        // Note: this is inaccurate, because dates allow text before escapes,
497        // e.g. ABT @#FRENCH R@ 11 NIVO 6
498        let line_item_ = || map(line_text_(), |t| Value::Item(Item(t)));
499
500        // [ pointer | line_item ]
501        let line_value_ = alt((map(pointer_(), Value::Pointer), line_item_()));
502
503        // use the detected ending
504        let terminator_ = tag(terminator);
505
506        // level + [ delim + xref_ID ] + delim + tag + [ delim + line_value ] + terminator
507        let opt_pointer_ = opt(preceded(tag(" "), pointer_()));
508        let opt_line_value = opt(preceded(tag(" "), opt(line_value_)));
509        let (input, (l, x, _, t, v, _)) = tuple((
510            level_,
511            opt_pointer_,
512            delim_,
513            tag_,
514            opt_line_value,
515            terminator_,
516        ))(input)?;
517        Ok((
518            input,
519            Line {
520                level: l,
521                xref: x,
522                tag: t,
523                value: v.flatten(),
524            },
525        ))
526    }
527}
528
529fn verify_lines<'a>(
530    (i, (terminator, ls)): (&'a str, (&'a str, Vec<Line<'a>>)),
531) -> IResult<&'a str, Vec<Line<'a>>> {
532    let mut records = std::collections::BTreeSet::new();
533    let mut last: Option<&Line<'_>> = None;
534    for l in &ls {
535        if l.len() + terminator.len() > 255 {
536            return Err(nom::Err::Failure(make_error(i, ErrorKind::Verify)));
537        }
538        let last_plus_1 = last.map(|r| r.level + 1).unwrap_or(0);
539        if l.level > last_plus_1 {
540            return Err(nom::Err::Failure(make_error(i, ErrorKind::Verify)));
541        }
542        let subrecord = l.level != 0 && l.level == last_plus_1;
543        if !subrecord
544            && !last
545                .map(|r| r.tag == "CONT" || r.value.is_some())
546                .unwrap_or(true)
547        {
548            return Err(nom::Err::Failure(make_error(i, ErrorKind::Verify)));
549        }
550        if let Some(xref) = l.xref {
551            if !records.insert(xref) {
552                return Err(nom::Err::Failure(make_error(i, ErrorKind::Verify)));
553            }
554        }
555        last = Some(l);
556    }
557    for l in &ls {
558        if let Some(Value::Pointer(p)) = l.value {
559            if !records.contains(p) {
560                return Err(nom::Err::Failure(make_error(i, ErrorKind::Verify)));
561            }
562        }
563    }
564    Ok((i, ls))
565}
566
567/// Parses a string (GEDCOM file content) into a sequence of `Line`s.
568pub fn lines(input: &str) -> IResult<&'_ str, Vec<Line<'_>>> {
569    use nom::branch::alt;
570    use nom::bytes::complete::{tag, take_while};
571    use nom::combinator::{all_consuming, flat_map, map, opt, peek, recognize};
572    use nom::multi::many1;
573    use nom::sequence::preceded;
574
575    // [ carriage_return | line_feed | carriage_return + line_feed ]
576    let terminator_ = alt((recognize(preceded(tag("\r"), opt(tag("\n")))), tag("\n")));
577
578    let not_line_ending_ = take_while(|ch: char| ch != '\r' && ch != '\n');
579    let find_terminator_ = peek(preceded(not_line_ending_, terminator_));
580    all_consuming(preceded(
581        tag("\u{FEFF}"),
582        flat_map(find_terminator_, |i| map(many1(line(i)), move |o| (i, o))),
583    ))(input)
584    .and_then(verify_lines)
585}
586
587#[cfg(test)]
588mod tests {
589    use super::*;
590
591    #[test]
592    fn terminators() {
593        let expected_line = Line {
594            level: 0,
595            xref: None,
596            tag: "HEAD",
597            value: None,
598        };
599        let expected = ("", expected_line);
600        assert_eq!(expected, line("\r")("0 HEAD\r").unwrap());
601        assert_eq!(expected, line("\n")("0 HEAD\n").unwrap());
602        assert_eq!(expected, line("\r\n")("0 HEAD\r\n").unwrap());
603    }
604
605    fn valid_case<'a>(input: &'a str, l: u8, x: Option<&'a str>, t: &'a str, v: Option<Value<'a>>) {
606        let expected_line = Line {
607            level: l,
608            xref: x,
609            tag: t,
610            value: v,
611        };
612        let expected = ("", expected_line);
613        assert_eq!(expected, line("\r\n")(input).unwrap());
614        eprintln!("{}", input);
615        let c = 2 + if input.ends_with(" \r\n") { 1 } else { 0 };
616        assert_eq!(input.len(), line("\r\n")(input).unwrap().1.len() + c);
617    }
618
619    fn invalid_case(input: &str, len: usize) {
620        let l = line("\r\n")(input);
621        match l {
622            Ok(v) => {
623                eprintln!("{:?}", v);
624                assert!(false);
625            }
626            Err(nom::Err::Incomplete(e)) => {
627                eprintln!("{:?}", e);
628                assert!(false);
629            }
630            Err(nom::Err::Failure(e)) => {
631                eprintln!("{:?}", e);
632                assert!(false);
633            }
634            Err(nom::Err::Error(e)) => {
635                eprintln!("{:?}", e);
636                assert_eq!(len, e.input.len());
637            }
638        }
639    }
640
641    #[test]
642    fn tags() {
643        valid_case("0 HEAD\r\n", 0, None, "HEAD", None);
644        let upper = "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
645        valid_case(upper, 0, None, &upper[2..28], None);
646        let lower = "0 abcdefghijklmnopqrstuvwxyz\r\n";
647        valid_case(lower, 0, None, &lower[2..28], None);
648        valid_case("0 _0123456789\r\n", 0, None, "_0123456789", None);
649        valid_case("0 ADDR \r\n", 0, None, "ADDR", None);
650        let max = "99 @N1234567890123456789@ ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
651        valid_case(max, 99, Some(&max[4..24]), &max[26..57], None);
652    }
653
654    #[test]
655    fn levels() {
656        for i in 0..100 {
657            let l = format!("{} HEAD\r\n", i);
658            valid_case(&l, i, None, "HEAD", None);
659        }
660    }
661
662    #[test]
663    fn simple_value() {
664        let v = Some(Value::Item(Item("UTF-8".into())));
665        valid_case("1 CHAR UTF-8\r\n", 1, None, "CHAR", v);
666    }
667
668    #[test]
669    fn simple_xref() {
670        valid_case("0 @N1@ NOTE\r\n", 0, Some("N1"), "NOTE", None);
671    }
672
673    #[test]
674    fn simple_pointer() {
675        let v = Some(Value::Pointer("N1234567890123456789"));
676        valid_case("1 NOTE @N1234567890123456789@\r\n", 1, None, "NOTE", v);
677    }
678
679    #[test]
680    fn simple_note() {
681        let v = Some(Value::Item(Item("foo".into())));
682        valid_case("0 @N1@ NOTE foo\r\n", 0, Some("N1"), "NOTE", v);
683    }
684
685    #[test]
686    fn escape_line_value() {
687        let mut items = ItemsInner {
688            data: SmallVec::new(),
689        };
690        Str("ABT ").extend_into(&mut items);
691        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
692        Str("11 NIVO 6").extend_into(&mut items);
693        let v = Some(Value::Item(Item(items)));
694        valid_case("1 DATE ABT @#DFRENCH R@ 11 NIVO 6\r\n", 1, None, "DATE", v);
695    }
696
697    #[test]
698    fn escape_at() {
699        let v = Some(Value::Item(Item("foo@example.com".into())));
700        valid_case("1 EMAIL foo@@example.com\r\n", 1, None, "EMAIL", v);
701        let v = Some(Value::Item(Item("@foo".into())));
702        valid_case("1 NOTE @@foo\r\n", 1, None, "NOTE", v);
703    }
704
705    #[test]
706    fn invalid_tags() {
707        invalid_case("0 __HEAD\r\n", 7);
708        invalid_case("0 ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n", 34);
709    }
710
711    #[test]
712    fn invalid_levels() {
713        invalid_case("01 HEAD\r\n", 8);
714        invalid_case("100 HEAD\r\n", 10);
715
716        assert!(lines("\u{FEFF}1 HEAD\r\n").is_err());
717        assert!(lines("\u{FEFF}0 HEAD\r\n2 VERS 5.5.5\r\n").is_err());
718    }
719
720    #[test]
721    fn invalid_pointer() {
722        invalid_case("0 @N01234567890123456789@ NOTE foo\r\n", 34);
723        invalid_case("0 NOTE @N01234567890123456789@\r\n", 25);
724    }
725
726    #[test]
727    fn leading_whitespace() {
728        let expected_line = Line {
729            level: 0,
730            xref: None,
731            tag: "HEAD",
732            value: None,
733        };
734        let expected = ("\r", expected_line);
735        assert_eq!(expected, line("\n")("0 HEAD\n\r").unwrap());
736
737        invalid_case(" 0 HEAD\r\n", 9);
738        invalid_case("\t0 HEAD\r\n", 9);
739        invalid_case("\r\n0 HEAD\r\n", 10);
740        invalid_case("\n0 HEAD\r\n", 9);
741        invalid_case("\r0 HEAD\r\n", 9);
742        invalid_case("\n\r0 HEAD\r\n", 10);
743    }
744
745    #[test]
746    fn multiple_lines() {
747        let expected_lines = vec![
748            Line {
749                level: 0,
750                xref: None,
751                tag: "HEAD",
752                value: None,
753            },
754            Line {
755                level: 1,
756                xref: None,
757                tag: "GEDC",
758                value: None,
759            },
760            Line {
761                level: 2,
762                xref: None,
763                tag: "VERS",
764                value: Some(Value::Item(Item("5.5.5".into()))),
765            },
766            Line {
767                level: 0,
768                xref: None,
769                tag: "TRLR",
770                value: None,
771            },
772        ];
773        let cr = "\u{FEFF}0 HEAD\r1 GEDC\r2 VERS 5.5.5\r0 TRLR\r";
774        assert_eq!(expected_lines, lines(cr).unwrap().1);
775        let lf = "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n";
776        assert_eq!(expected_lines, lines(lf).unwrap().1);
777        let crlf = "\u{FEFF}0 HEAD\r\n1 GEDC\r\n2 VERS 5.5.5\r\n0 TRLR\r\n";
778        assert_eq!(expected_lines, lines(crlf).unwrap().1);
779    }
780
781    #[test]
782    fn invalid_xrefs() {
783        assert!(lines("\u{FEFF}0 @N1@ NOTE\n0 NOTE @N2@\n").is_err());
784        assert!(lines("\u{FEFF}0 @N1@ NOTE\n0 @N1@ NOTE\n").is_err());
785    }
786
787    #[test]
788    fn invalid_terminators() {
789        assert!(lines("\u{FEFF}0 HEAD\r0 TRLR\n").is_err());
790        assert!(lines("\u{FEFF}0 HEAD\n\r0 TRLR\n\r").is_err());
791    }
792
793    fn valid_items(items: &ItemsInner<'_>, len: usize, bytes: &[u8], json: &str) {
794        assert_eq!(len, items.len());
795        assert_eq!(bytes, &*items.bytes().collect::<Vec<_>>());
796        assert_eq!(json, &serde_json::to_string(items).unwrap());
797    }
798
799    #[test]
800    fn items() {
801        let mut items = ItemsInner {
802            data: SmallVec::new(),
803        };
804        Str("hello").extend_into(&mut items);
805        Str(" ").extend_into(&mut items);
806        Str("world").extend_into(&mut items);
807        Str("!").extend_into(&mut items);
808        valid_items(&items, 12, b"hello world!", r#"["hello world!"]"#);
809        items.data.clear();
810        TextEsc::Esc("hello").extend_into(&mut items);
811        valid_items(&items, 8, b"\xFFhello\xFF", r#"["hello"]"#);
812        items.data.clear();
813        Str("ABT ").extend_into(&mut items);
814        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
815        Str("11 NIVO 6").extend_into(&mut items);
816        valid_items(
817            &items,
818            26,
819            b"ABT \xFFDFRENCH R\xFF11 NIVO 6",
820            r#"["ABT ","DFRENCH R","11 NIVO 6"]"#,
821        );
822        items.data.clear();
823        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
824        Str("11 NIVO 6").extend_into(&mut items);
825        valid_items(
826            &items,
827            22,
828            b"\xFFDFRENCH R\xFF11 NIVO 6",
829            r#"["DFRENCH R","11 NIVO 6"]"#,
830        );
831    }
832}