Skip to main content

gedcom_core/
data.rs

1// Copyright 2021-2026 Ahmed Charles <me@ahmedcharles.com>
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The core GEDCOM data representation language may be used to represent
16//! any form of structured information, not just genealogical data, using
17//! a sequential stream of characters.
18
19use std::fmt;
20use std::num::NonZeroU8;
21
22use nom::{
23    Compare, CompareResult, ExtendInto, IResult, Input, Needed, Offset, Parser,
24    error::{Error as NomError, ErrorKind},
25};
26use nom_locate::LocatedSpan;
27use serde::{Serialize, Serializer, ser::SerializeSeq};
28use smallvec::SmallVec;
29use thiserror::Error;
30
31/// Represents an error with reading a GEDCOM file.
32#[derive(Error, Debug)]
33#[error(transparent)]
34pub struct Error {
35    #[from]
36    internal: InternalError,
37}
38
39impl From<nom::Err<NomError<Span<'_>>>> for Error {
40    fn from(err: nom::Err<NomError<Span<'_>>>) -> Error {
41        Error {
42            internal: match err {
43                nom::Err::Incomplete(_) => unreachable!(),
44                nom::Err::Failure(e) => {
45                    InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
46                }
47                nom::Err::Error(e) => {
48                    InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
49                }
50            },
51        }
52    }
53}
54
55#[derive(Error, Debug)]
56enum InternalError {
57    #[error("verification error: '{0}' at line {1}")]
58    Verify(&'static str, usize),
59    #[error("nom error kind: {}, line: {}:{}", .0.description(), .1, .2)]
60    Nom(ErrorKind, u32, usize),
61}
62
63type Span<'a> = LocatedSpan<&'a str>;
64
65#[derive(Debug)]
66enum TextEsc<'a> {
67    Text(&'a str),
68    Esc(&'a str),
69}
70
71impl<'a> ExtendInto for TextEsc<'a> {
72    type Item = char;
73    type Extender = ItemsInner<'a>;
74    fn new_builder(&self) -> Self::Extender {
75        ItemsInner {
76            data: SmallVec::new(),
77        }
78    }
79    fn extend_into(&self, acc: &mut Self::Extender) {
80        acc.data.push(match self {
81            TextEsc::Text(t) => TextEsc::Text(t),
82            TextEsc::Esc(e) => TextEsc::Esc(e),
83        });
84    }
85}
86
87/// Represents an efficient, extendable string.
88#[derive(Debug, Eq, PartialEq, Serialize)]
89pub struct Item<'a>(ItemsInner<'a>);
90
91#[derive(Debug, Default)]
92struct ItemsInner<'a> {
93    data: SmallVec<[TextEsc<'a>; 1]>,
94}
95
96fn map_item_iter<'a>(
97    item: &TextEsc<'a>,
98) -> (
99    Option<NonZeroU8>,
100    std::slice::Iter<'a, u8>,
101    Option<NonZeroU8>,
102) {
103    match item {
104        TextEsc::Text(t) => (None, t.as_bytes().iter(), None),
105        TextEsc::Esc(t) => (
106            NonZeroU8::new(0xFF),
107            t.as_bytes().iter(),
108            NonZeroU8::new(0xFF),
109        ),
110    }
111}
112
113impl ItemsInner<'_> {
114    fn bytes(&self) -> Bytes<'_> {
115        let mut item_iter = self.data.iter();
116        let str_iter = item_iter.next().map(map_item_iter);
117        Bytes {
118            item_iter,
119            str_iter,
120        }
121    }
122    fn len(&self) -> usize {
123        let mut sum = 0;
124        let mut esc = false;
125        for item in &self.data {
126            match item {
127                TextEsc::Text(t) => {
128                    if esc {
129                        sum += 1;
130                        esc = false;
131                    }
132                    sum += t.len()
133                }
134                TextEsc::Esc(t) => {
135                    esc = true;
136                    sum += 2 + t.len() + 1
137                }
138            }
139        }
140        sum
141    }
142}
143
144struct Bytes<'a> {
145    str_iter: Option<(
146        Option<NonZeroU8>,
147        std::slice::Iter<'a, u8>,
148        Option<NonZeroU8>,
149    )>,
150    item_iter: std::slice::Iter<'a, TextEsc<'a>>,
151}
152
153impl Iterator for Bytes<'_> {
154    type Item = u8;
155    fn next(&mut self) -> Option<Self::Item> {
156        while let Some(ref mut str_iter) = self.str_iter {
157            if let Some(b) = str_iter.0.take() {
158                return Some(b.into());
159            }
160            if let Some(b) = str_iter.1.next() {
161                return Some(*b);
162            }
163            if let Some(b) = str_iter.2.take() {
164                return Some(b.into());
165            }
166            self.str_iter = self.item_iter.next().map(map_item_iter);
167        }
168        None
169    }
170}
171
172impl Eq for ItemsInner<'_> {}
173
174impl<'a> From<&'a str> for ItemsInner<'a> {
175    fn from(s: &'a str) -> ItemsInner<'a> {
176        let mut data = SmallVec::new();
177        data.push(TextEsc::Text(s));
178        ItemsInner { data }
179    }
180}
181
182impl PartialEq for ItemsInner<'_> {
183    fn eq(&self, other: &Self) -> bool {
184        self.bytes().eq(other.bytes())
185    }
186}
187
188struct TextSlice<'a>(&'a [TextEsc<'a>]);
189
190impl fmt::Display for TextSlice<'_> {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        for item in self.0 {
193            match item {
194                TextEsc::Text(t) => f.write_str(t)?,
195                TextEsc::Esc(_) => unreachable!(),
196            }
197        }
198        Ok(())
199    }
200}
201
202impl Serialize for TextSlice<'_> {
203    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
204        serializer.collect_str(self)
205    }
206}
207
208impl Serialize for ItemsInner<'_> {
209    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
210        let mut seq = serializer.serialize_seq(None)?;
211        let mut from = 0;
212        for (i, item) in self.data.iter().enumerate() {
213            if let TextEsc::Esc(t) = item {
214                if from != i {
215                    seq.serialize_element(&TextSlice(&self.data[from..i]))?;
216                }
217                seq.serialize_element(t)?;
218                from = i + 1;
219            }
220        }
221        if from != self.data.len() {
222            seq.serialize_element(&TextSlice(&self.data[from..]))?;
223        }
224        seq.end()
225    }
226}
227
228#[derive(Clone, Copy)]
229struct Str<'a>(Span<'a>);
230
231impl<S: AsRef<str>> Compare<S> for Str<'_> {
232    fn compare(&self, s: S) -> CompareResult {
233        self.0.compare(s.as_ref())
234    }
235    fn compare_no_case(&self, s: S) -> CompareResult {
236        self.0.compare_no_case(s.as_ref())
237    }
238}
239
240impl<'a> ExtendInto for Str<'a> {
241    type Item = char;
242    type Extender = ItemsInner<'a>;
243    fn new_builder(&self) -> Self::Extender {
244        ItemsInner {
245            data: SmallVec::new(),
246        }
247    }
248    fn extend_into(&self, acc: &mut Self::Extender) {
249        acc.data.push(TextEsc::Text(*self.0));
250    }
251}
252
253impl<'a> Input for Str<'a> {
254    type Item = char;
255    type Iter = std::str::Chars<'a>;
256    type IterIndices = std::str::CharIndices<'a>;
257
258    fn input_len(&self) -> usize {
259        self.0.input_len()
260    }
261
262    fn take(&self, count: usize) -> Self {
263        Str(self.0.take(count))
264    }
265
266    fn take_from(&self, index: usize) -> Self {
267        Self(self.0.take_from(index))
268    }
269
270    fn take_split(&self, count: usize) -> (Self, Self) {
271        let (a, b) = self.0.take_split(count);
272        (Str(a), Str(b))
273    }
274
275    fn position<P: Fn(Self::Item) -> bool>(&self, predicate: P) -> Option<usize> {
276        self.0.position(predicate)
277    }
278
279    fn iter_elements(&self) -> Self::Iter {
280        self.0.iter_elements()
281    }
282
283    fn iter_indices(&self) -> Self::IterIndices {
284        self.0.iter_indices()
285    }
286
287    fn slice_index(&self, count: usize) -> Result<usize, Needed> {
288        self.0.slice_index(count)
289    }
290}
291
292impl Offset for Str<'_> {
293    fn offset(&self, second: &Self) -> usize {
294        self.0.offset(&second.0)
295    }
296}
297
298fn escaped_transform_<'a, F, G>(
299    normal: F,
300    control_char: char,
301    transform: G,
302) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, ItemsInner<'a>> + 'a
303where
304    F: Parser<Str<'a>, Output = Str<'a>, Error = NomError<Str<'a>>> + 'a,
305    G: Parser<Str<'a>, Output = TextEsc<'a>, Error = NomError<Str<'a>>> + 'a,
306{
307    let mut e = nom::bytes::escaped_transform(normal, control_char, transform);
308    move |i: Span<'a>| {
309        e.parse_complete(Str(i))
310            .map(|(i, o)| (i.0, o))
311            .map_err(|e| e.map_input(|i| i.0))
312    }
313}
314
315/// Represents a line value, either a pointer or item.
316#[derive(Debug, Eq, PartialEq, Serialize)]
317pub enum Value<'a> {
318    /// Represents a pointer to another record.
319    Pointer(&'a str),
320    /// Represents an actual value.
321    Item(Item<'a>),
322}
323
324/// Represents an entire line or record in the GEDCOM data format.
325#[derive(Debug, Eq, PartialEq, Serialize)]
326pub struct Line<'a> {
327    level: u8,
328    xref: Option<&'a str>,
329    tag: &'a str,
330    value: Option<Value<'a>>,
331}
332
333impl<'a> Line<'a> {
334    /// The level of this record.
335    pub fn level(&self) -> u8 {
336        self.level
337    }
338    /// The optional cross-reference identifier for this record.
339    pub fn xref(&self) -> Option<&'a str> {
340        self.xref
341    }
342    /// The tag for this record.
343    pub fn tag(&self) -> &'a str {
344        self.tag
345    }
346    /// The optional value for this record.
347    pub fn value(&self) -> Option<&Value<'a>> {
348        self.value.as_ref()
349    }
350    fn len(&self) -> usize {
351        let level_len = if self.level < 10 { 1 } else { 2 };
352        let xref_len = if let Some(xref) = self.xref {
353            1 + 2 + xref.len()
354        } else {
355            0
356        };
357        let value_len = match self.value {
358            Some(Value::Pointer(p)) => 1 + 2 + p.len(),
359            Some(Value::Item(ref text)) if text.0.len() == 0 => unreachable!(),
360            Some(Value::Item(ref text)) => {
361                1 + text.0.len() + text.0.bytes().filter(|&c| c == b'@').count()
362            }
363            None => 0,
364        };
365        level_len + xref_len + 1 + self.tag.len() + value_len
366    }
367}
368
369fn line<'a>(terminator: Span<'a>) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Line<'a>> {
370    move |input: Span<'a>| {
371        use nom::ParseTo;
372        use nom::branch::alt;
373        use nom::bytes::{tag, take_while, take_while1};
374        use nom::character::complete::alphanumeric1;
375        use nom::character::one_of;
376        use nom::combinator::{cut, opt, peek, recognize, verify};
377        use nom::sequence::{delimited, preceded, terminated};
378
379        // delim = space
380        let delim_ = tag(" ");
381
382        // digit = U+0030 - U+0039
383        let digit_ = take_while1(|ch: char| ch.is_ascii_digit());
384
385        // level = [ digit | non_zero_digit + digit ]
386        let l_digit_ = alt((tag("0"), preceded(peek(one_of("123456789")), digit_)));
387        let level_ = verify(l_digit_.map_opt(|i: Span<'_>| i.parse_to()), |&o| o < 100);
388
389        // identifier_string = [ alphanum | alphanum + identifier_string ]
390        let identifier_string_ =
391            || verify::<Span<'a>, _, _, _, _>(alphanumeric1, |o: &Span<'_>| o.len() <= 20);
392
393        // pointer = U+0040 + identifier_string + U+0040
394        let pointer_ = || delimited(tag("@"), identifier_string_(), cut(tag("@")));
395
396        // tag = [ [ U+005F ] + alphanum | tag + alphanum ]
397        let tag_ = verify(recognize((opt(tag("_")), alphanumeric1)), |o: &Span<'_>| {
398            o.len() <= 31
399        });
400
401        // escape_text = [ alphanum | escape_text + alphanum | escape_text + space ]
402        let escape_text_plus_space_ =
403            take_while(|ch: char| ch.is_ascii_alphanumeric() || ch == ' ');
404        let escape_text_ = recognize((alphanumeric1, escape_text_plus_space_));
405
406        // escape = U+0040 + U+0023 + escape_text + U+0040
407        let escape_ = delimited(tag("#"), cut(escape_text_), cut(tag("@")))
408            .map(|o: Str<'_>| TextEsc::Esc(*o.0));
409
410        // line_text = [ line_char | line_text + line_char ]
411        let line_text_ = escaped_transform_(
412            take_while1(|ch: char| {
413                !matches!(ch,
414                    // disallowed: U+0000 - U+001F, except U+0009 = most C0 control characters
415                    '\u{0000}'..='\u{0008}' |
416                    '\u{000A}'..='\u{001F}' |
417                    // special: U+0040 + U+0040 = @@
418                    '@' |
419                    // disallowed: U+007F = DEL character
420                    '\u{007F}'
421                )
422            }),
423            '@',
424            alt((
425                tag("@").map(|o: Str<'_>| TextEsc::Text(*o.0)),
426                // An escape sequence must be followed by either a delim (space) or terminator
427                terminated(escape_, alt((tag(" "), peek(tag(*terminator))))),
428            )),
429        );
430
431        // line_item = [ escape | line_text | escape + delim + line_text ]
432        // Note: this is inaccurate, because dates allow text before escapes,
433        // e.g. ABT @#FRENCH R@ 11 NIVO 6
434        let line_item_ = line_text_.map(|t| Value::Item(Item(t)));
435
436        // line_value = [ pointer | line_item ]
437        let line_value_ = alt((pointer_().map(|p| Value::Pointer(*p)), line_item_));
438
439        // terminator = [ carriage_return | line_feed | carriage_return + line_feed ]
440        // use the detected ending
441        let terminator_ = tag(*terminator);
442
443        // line = level + [ delim + xref_ID ] + delim + tag + [ delim + line_value ] + terminator
444        let opt_pointer_ = opt(preceded(tag(" "), pointer_().map(|s| *s)));
445        let opt_line_value = opt(preceded(tag(" "), opt(line_value_)));
446        verify(
447            (
448                level_,
449                opt_pointer_,
450                delim_,
451                tag_,
452                opt_line_value,
453                terminator_,
454            )
455                .map(|(l, x, _, t, v, _)| Line {
456                    level: l,
457                    xref: x,
458                    tag: *t,
459                    value: v.flatten(),
460                }),
461            |l| l.len() + terminator.len() <= 255 && l.level == 0 || l.xref.is_none(),
462        )
463        .parse_complete(input)
464    }
465}
466
467fn verify_lines<'a>(
468    (input, (_, ls)): (Span<'a>, (Span<'a>, Vec<Line<'a>>)),
469) -> Result<Vec<Line<'a>>, Error> {
470    fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Line<'b>>, Error> {
471        Err(InternalError::Verify(s, l + 1).into())
472    }
473    if !input.is_empty() {
474        return v("not all input consumed", 0);
475    }
476    let mut records = std::collections::BTreeSet::new();
477    let mut last: Option<&Line<'_>> = None;
478    for (i, l) in ls.iter().enumerate() {
479        let last_plus_1 = last.map(|r| r.level + 1).unwrap_or(0);
480        if l.level > last_plus_1 {
481            return v("level increase too great", i);
482        }
483        if l.level == last_plus_1
484            && last
485                .map(|r| r.tag == "CONT" || r.tag == "CONC")
486                .unwrap_or(false)
487        {
488            return v("CONT/CONC cannot have a subrecord", i);
489        }
490        if l.tag == "CONT" || l.tag == "CONC" {
491            if l.level == 0 {
492                return v("CONT/CONC cannot be a top level record", i);
493            }
494            if !l
495                .value
496                .as_ref()
497                .map(|v| matches!(v, Value::Item(_)))
498                .unwrap_or(true)
499            {
500                return v("CONT/CONC cannot have a cross reference value", i);
501            }
502            if l.level != last_plus_1
503                && !last
504                    .map(|r| r.tag == "CONT" || r.tag == "CONC")
505                    .unwrap_or(false)
506            {
507                return v(
508                    "CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC",
509                    i,
510                );
511            }
512            if l.level != last_plus_1 && l.level != last_plus_1 - 1 {
513                return v(
514                    "CONT/CONC can only be a subrecord or sibling of the last record",
515                    i,
516                );
517            }
518            if l.level != 0
519                && !last
520                    .and_then(|r| r.value.as_ref())
521                    .map(|v| matches!(v, Value::Item(_)))
522                    .unwrap_or(true)
523            {
524                return v("CONT/CONC cannot follow a cross reference value", i);
525            }
526        }
527        if l.level != last_plus_1
528            && !last
529                .map(|r| r.tag == "CONT" || r.tag == "TRLR" || r.value.is_some())
530                .unwrap_or(true)
531        {
532            return v(
533                "CONT/TRLR are the only records allowed to have no subrecords or value",
534                i - 1,
535            );
536        }
537        if let Some(xref) = l.xref
538            && !records.insert(xref)
539        {
540            return v("duplicate cross reference", i);
541        }
542        last = Some(l);
543    }
544    for (i, l) in ls.iter().enumerate() {
545        if let Some(Value::Pointer(p)) = l.value
546            && !records.contains(p)
547        {
548            return v("missing cross reference", i);
549        }
550    }
551    Ok(ls)
552}
553
554/// Parses a string (GEDCOM file content) into a sequence of `Line`s.
555pub fn lines(input: &str) -> Result<Vec<Line<'_>>, Error> {
556    use nom::branch::alt;
557    use nom::bytes::{tag, take_till};
558    use nom::combinator::{all_consuming, opt, peek, recognize};
559    use nom::multi::many1;
560    use nom::sequence::preceded;
561
562    // [ carriage_return | line_feed | carriage_return + line_feed ]
563    let terminator_ = alt((recognize((tag("\r"), opt(tag("\n")))), tag("\n")));
564
565    let not_line_ending_ = take_till(|ch: char| ch == '\r' || ch == '\n');
566    let find_terminator_ = peek(preceded(not_line_ending_, terminator_));
567    all_consuming(preceded(
568        tag("\u{FEFF}"),
569        find_terminator_.flat_map(|i| many1(line(i)).map(move |o| (i, o))),
570    ))
571    .parse_complete(Span::new(input))
572    .map_err(|e| e.into())
573    .and_then(verify_lines)
574}
575
576/// Represents a logical record in the GEDCOM data format.
577#[allow(single_use_lifetimes)]
578#[derive(Debug, Eq, PartialEq, Serialize)]
579pub struct Record<'a> {
580    level: u8,
581    xref: Option<&'a str>,
582    tag: &'a str,
583    value: Option<Value<'a>>,
584    line: usize,
585    subrecords: Vec<Record<'a>>,
586}
587
588impl<'a> From<(usize, Line<'a>)> for Record<'a> {
589    fn from((i, l): (usize, Line<'a>)) -> Record<'a> {
590        Record {
591            level: l.level,
592            xref: l.xref,
593            tag: l.tag,
594            value: l.value,
595            line: i,
596            subrecords: Vec::new(),
597        }
598    }
599}
600
601fn verify_records<'a>(records: Vec<Record<'a>>) -> Result<Vec<Record<'a>>, Error> {
602    fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
603        Err(InternalError::Verify(s, l + 1).into())
604    }
605    // HEAD must be the first record
606    match records.first() {
607        None => unreachable!(), // The parser requires 1 or more lines.
608        Some(head) if head.level != 0 => unreachable!(), // The first record is always level 0.
609        Some(head) if head.tag != "HEAD" => return v("HEAD must be the first record", 0),
610        Some(head) if head.xref.is_some() => {
611            return v("HEAD must not have a cross-reference identifier", 0);
612        }
613        Some(head) if head.value.is_some() => return v("HEAD must not have a value", 0),
614        _ => {}
615    }
616    // TRLR must be the last record
617    match records.last() {
618        None => return v("TRLR record is required", 0),
619        Some(trlr) if trlr.tag != "TRLR" => {
620            return v("TRLR must be the last record", trlr.line);
621        }
622        Some(trlr) if trlr.level != 0 => {
623            return v("TRLR must be a level 0 record", trlr.line);
624        }
625        Some(trlr) if trlr.xref.is_some() => {
626            return v("TRLR must not have a cross-reference identifier", trlr.line);
627        }
628        Some(trlr) if trlr.value.is_some() => {
629            return v("TRLR must not have a value", trlr.line);
630        }
631        _ => {}
632    }
633    // GEDC must be the first subrecord of HEAD
634    if let Some(head) = records.first() {
635        match head.subrecords.first() {
636            None => unreachable!(), // Already checked due to line value check above.
637            Some(gedc) if gedc.tag != "GEDC" => {
638                return v("GEDC must be the first subrecord of HEAD", gedc.line);
639            }
640            Some(gedc) if gedc.value.is_some() => {
641                return v("GEDC must not have a value", gedc.line);
642            }
643            Some(gedc) => {
644                // VERS must be the first subrecord of GEDC with value "5.5.5"
645                match gedc.subrecords.first() {
646                    None => unreachable!(), // Already checked due to line value check above.
647                    Some(vers) if vers.tag != "VERS" => {
648                        return v("VERS must be the first subrecord of GEDC", vers.line);
649                    }
650                    Some(vers) => {
651                        let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
652                        if vers.value != expected {
653                            return v("GEDC.VERS must have value 5.5.5", vers.line);
654                        }
655                    }
656                }
657                // FORM must be the second subrecord of GEDC
658                match gedc.subrecords.get(1) {
659                    None => {
660                        return v("GEDC must have a FORM subrecord", gedc.line);
661                    }
662                    Some(form) if form.tag != "FORM" => {
663                        return v("FORM must be the second subrecord of GEDC", form.line);
664                    }
665                    Some(form) if form.value.is_none() => {
666                        return v("GEDC.FORM must have a value", form.line);
667                    }
668                    Some(form) => {
669                        // VERS must be the first subrecord of FORM with value "5.5.5"
670                        match form.subrecords.first() {
671                            None => {
672                                return v("GEDC.FORM must have a VERS subrecord", form.line);
673                            }
674                            Some(vers) if vers.tag != "VERS" => {
675                                return v("VERS must be the first subrecord of FORM", vers.line);
676                            }
677                            Some(vers) => {
678                                let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
679                                if vers.value != expected {
680                                    return v("GEDC.FORM.VERS must have value 5.5.5", vers.line);
681                                }
682                            }
683                        }
684                    }
685                }
686            }
687        }
688        match head.subrecords.get(1) {
689            None => {
690                return v("HEAD must have a CHAR subrecord", head.line);
691            }
692            Some(char) if char.tag != "CHAR" => {
693                return v("CHAR must be the second subrecord of HEAD", char.line);
694            }
695            _ => {}
696        }
697    }
698    Ok(records)
699}
700
701/// Parses a string (GEDCOM file content) into a sequence of `Record`s.
702pub fn records(input: &str) -> Result<Vec<Record<'_>>, Error> {
703    lines(input).and_then(|ls| {
704        fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
705            Err(InternalError::Verify(s, l + 1).into())
706        }
707        let mut recs = Record {
708            level: 0,
709            xref: None,
710            tag: "",
711            value: None,
712            line: 0,
713            subrecords: Vec::new(),
714        };
715        let mut stack: Vec<usize> = Vec::new();
716        for (i, l) in ls.into_iter().enumerate() {
717            let lvl = l.level;
718            stack.truncate(lvl.into());
719            let append = if lvl == 0 {
720                &mut recs
721            } else {
722                stack
723                    .iter()
724                    .fold(&mut recs, |acc, &x| &mut acc.subrecords[x])
725            };
726            fn cont_conc<'a>(r: &mut Record<'a>, l: Line<'a>, cont: bool) {
727                let mut v = match r.value.take() {
728                    Some(Value::Item(Item(v))) => v,
729                    Some(Value::Pointer(_)) | None => Default::default(),
730                };
731                if cont {
732                    v.data.push(TextEsc::Text("\n"));
733                }
734                if let Some(Value::Item(Item(i))) = l.value {
735                    v.data.extend(i.data.into_iter());
736                }
737                r.value = Some(Value::Item(Item(v)));
738            }
739            if l.tag == "CONT" || l.tag == "CONC" {
740                // The first 6 lines are fixed as part of the basic form header.
741                if i < 6 {
742                    return v("CONT/CONC not supported as basic form HEAD subrecords", i);
743                }
744                let cont = l.tag == "CONT";
745                cont_conc(append, l, cont);
746            } else {
747                stack.push(append.subrecords.len());
748                append.subrecords.push((i + 1, l).into());
749            }
750        }
751        verify_records(recs.subrecords)
752    })
753}
754
755#[cfg(test)]
756mod tests {
757    use super::*;
758
759    #[test]
760    fn terminators() {
761        let expected_line = Line {
762            level: 0,
763            xref: None,
764            tag: "HEAD",
765            value: None,
766        };
767        let (remaining, result) = line("\r".into())("0 HEAD\r".into()).unwrap();
768        assert_eq!("", *remaining);
769        assert_eq!(expected_line, result);
770        let (remaining, result) = line("\n".into())("0 HEAD\n".into()).unwrap();
771        assert_eq!("", *remaining);
772        assert_eq!(expected_line, result);
773        let (remaining, result) = line("\r\n".into())("0 HEAD\r\n".into()).unwrap();
774        assert_eq!("", *remaining);
775        assert_eq!(expected_line, result);
776    }
777
778    #[track_caller]
779    fn valid_case<'a>(input: &'a str, l: u8, x: Option<&'a str>, t: &'a str, v: Option<Value<'a>>) {
780        let expected_line = Line {
781            level: l,
782            xref: x,
783            tag: t,
784            value: v,
785        };
786        let (remaining, result) = line("\r\n".into())(input.into()).unwrap();
787        assert_eq!("", *remaining);
788        assert_eq!(expected_line, result);
789        eprintln!("{}", input);
790        let c = 2 + if input.ends_with(" \r\n") { 1 } else { 0 };
791        assert_eq!(
792            input.len(),
793            line("\r\n".into())(input.into()).unwrap().1.len() + c
794        );
795    }
796
797    #[track_caller]
798    fn invalid_case(input: &str, len: usize, error: &str) {
799        let l = line("\r\n".into())(input.into());
800        match l {
801            Ok(v) => {
802                eprintln!("{:?}", v);
803                assert!(false);
804            }
805            Err(nom::Err::Incomplete(e)) => {
806                eprintln!("{:?}", e);
807                assert!(false);
808            }
809            Err(nom::Err::Failure(e)) | Err(nom::Err::Error(e)) => {
810                assert_eq!(len, e.input.fragment().len());
811                assert_eq!(error, &format!("{:?}", e));
812            }
813        }
814    }
815
816    #[track_caller]
817    fn invalid_lines(input: &str, error: &str) {
818        match lines(input) {
819            Ok(v) => {
820                eprintln!("{:?}", v);
821                assert!(false);
822            }
823            Err(e) => {
824                assert_eq!(error, &format!("{}", e));
825            }
826        }
827    }
828
829    #[track_caller]
830    fn invalid_records(input: &str, error: &str) {
831        match records(input) {
832            Ok(v) => {
833                eprintln!("{:?}", v);
834                assert!(false);
835            }
836            Err(e) => {
837                assert_eq!(error, &format!("{}", e));
838            }
839        }
840    }
841
842    #[test]
843    fn tags() {
844        valid_case("0 HEAD\r\n", 0, None, "HEAD", None);
845        let upper = "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
846        valid_case(upper, 0, None, &upper[2..28], None);
847        let lower = "0 abcdefghijklmnopqrstuvwxyz\r\n";
848        valid_case(lower, 0, None, &lower[2..28], None);
849        valid_case("0 _0123456789\r\n", 0, None, "_0123456789", None);
850        valid_case("0 ADDR \r\n", 0, None, "ADDR", None);
851        let max_level = "99 ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
852        valid_case(max_level, 99, None, &max_level[3..34], None);
853        let max = "0 @N1234567890123456789@ ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
854        valid_case(max, 0, Some(&max[3..23]), &max[25..56], None);
855    }
856
857    #[test]
858    fn levels() {
859        for i in 0..100 {
860            let l = format!("{} HEAD\r\n", i);
861            valid_case(&l, i, None, "HEAD", None);
862        }
863    }
864
865    #[test]
866    fn simple_value() {
867        let v = Some(Value::Item(Item("UTF-8".into())));
868        valid_case("1 CHAR UTF-8\r\n", 1, None, "CHAR", v);
869    }
870
871    #[test]
872    fn simple_xref() {
873        // Exactly 20 characters in xref - should be valid
874        valid_case(
875            "0 @N1234567890123456789@ NOTE\r\n",
876            0,
877            Some("N1234567890123456789"),
878            "NOTE",
879            None,
880        );
881    }
882
883    #[test]
884    fn simple_pointer() {
885        // Pointer value with exactly 20 characters
886        let v = Some(Value::Pointer("N1234567890123456789"));
887        valid_case("1 NOTE @N1234567890123456789@\r\n", 1, None, "NOTE", v);
888    }
889
890    #[test]
891    fn simple_note() {
892        let v = Some(Value::Item(Item("foo".into())));
893        valid_case("0 @N1@ NOTE foo\r\n", 0, Some("N1"), "NOTE", v);
894    }
895
896    #[test]
897    fn unicode_values() {
898        // Non-ASCII UTF-8 characters should be allowed in values
899        // Japanese
900        valid_case(
901            "1 NOTE こんにちは\r\n",
902            1,
903            None,
904            "NOTE",
905            Some(Value::Item(Item("こんにちは".into()))),
906        );
907
908        // Spanish with accents
909        valid_case(
910            "1 NOTE Señor Ñoño\r\n",
911            1,
912            None,
913            "NOTE",
914            Some(Value::Item(Item("Señor Ñoño".into()))),
915        );
916
917        // Emojis
918        valid_case(
919            "1 NOTE Hello 👋 World 🌍\r\n",
920            1,
921            None,
922            "NOTE",
923            Some(Value::Item(Item("Hello 👋 World 🌍".into()))),
924        );
925
926        // Chinese
927        valid_case(
928            "1 NOTE 你好世界\r\n",
929            1,
930            None,
931            "NOTE",
932            Some(Value::Item(Item("你好世界".into()))),
933        );
934    }
935
936    #[test]
937    fn escape_line_value() {
938        // Note: The optional space after an escape sequence is consumed by the parser
939
940        // French calendar
941        let mut items = ItemsInner {
942            data: SmallVec::new(),
943        };
944        Str("ABT ".into()).extend_into(&mut items);
945        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
946        Str("11 NIVO 6".into()).extend_into(&mut items);
947        let v = Some(Value::Item(Item(items)));
948        valid_case("1 DATE ABT @#DFRENCH R@ 11 NIVO 6\r\n", 1, None, "DATE", v);
949
950        // Hebrew calendar - space after escape is consumed
951        let mut items = ItemsInner {
952            data: SmallVec::new(),
953        };
954        TextEsc::Esc("DHEBREW").extend_into(&mut items);
955        Str("5765".into()).extend_into(&mut items);
956        let v = Some(Value::Item(Item(items)));
957        valid_case("1 DATE @#DHEBREW@ 5765\r\n", 1, None, "DATE", v);
958
959        // Julian calendar
960        let mut items = ItemsInner {
961            data: SmallVec::new(),
962        };
963        TextEsc::Esc("DJULIAN").extend_into(&mut items);
964        Str("1 JAN 1700".into()).extend_into(&mut items);
965        let v = Some(Value::Item(Item(items)));
966        valid_case("1 DATE @#DJULIAN@ 1 JAN 1700\r\n", 1, None, "DATE", v);
967
968        // Multiple escape sequences in one value
969        let mut items = ItemsInner {
970            data: SmallVec::new(),
971        };
972        TextEsc::Esc("DHEBREW").extend_into(&mut items);
973        Str("to ".into()).extend_into(&mut items);
974        TextEsc::Esc("DGREGORIAN").extend_into(&mut items);
975        let v = Some(Value::Item(Item(items)));
976        valid_case("1 DATE @#DHEBREW@ to @#DGREGORIAN@\r\n", 1, None, "DATE", v);
977
978        // Escape at start of value with no trailing text
979        let mut items = ItemsInner {
980            data: SmallVec::new(),
981        };
982        TextEsc::Esc("DROMAN").extend_into(&mut items);
983        let v = Some(Value::Item(Item(items)));
984        valid_case("1 DATE @#DROMAN@\r\n", 1, None, "DATE", v);
985    }
986
987    #[test]
988    fn escape_at() {
989        let v = Some(Value::Item(Item("foo@example.com".into())));
990        valid_case("1 EMAIL foo@@example.com\r\n", 1, None, "EMAIL", v);
991        let v = Some(Value::Item(Item("@foo".into())));
992        valid_case("1 NOTE @@foo\r\n", 1, None, "NOTE", v);
993
994        // @@ in a value should parse as a single @ character
995        valid_case(
996            "1 NOTE @@\r\n",
997            1,
998            None,
999            "NOTE",
1000            Some(Value::Item(Item("@".into()))),
1001        );
1002
1003        // Multiple @@ should parse as multiple @ characters
1004        valid_case(
1005            "1 NOTE @@@@\r\n",
1006            1,
1007            None,
1008            "NOTE",
1009            Some(Value::Item(Item("@@".into()))),
1010        );
1011
1012        // @@ at start and end
1013        valid_case(
1014            "1 NOTE @@hello@@\r\n",
1015            1,
1016            None,
1017            "NOTE",
1018            Some(Value::Item(Item("@hello@".into()))),
1019        );
1020    }
1021
1022    #[test]
1023    fn escape_text() {
1024        // Empty escape @#@ should fail - escape_text requires at least one alphanum
1025        invalid_case(
1026            "1 DATE @#@\r\n",
1027            3,
1028            r#"Error { input: LocatedSpan { offset: 9, line: 1, fragment: "@\r\n", extra: () }, code: AlphaNumeric }"#,
1029        );
1030
1031        // An escape sequence must be followed by either a delim (space) or terminator
1032        invalid_case(
1033            "1 DATE @#DHEBREW@5765\r\n",
1034            16,
1035            r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@#DHEBREW@5765\r\n", extra: () }, code: Tag }"#,
1036        );
1037    }
1038
1039    #[test]
1040    fn invalid_tags() {
1041        invalid_case(
1042            "0 __HEAD\r\n",
1043            7,
1044            r#"Error { input: LocatedSpan { offset: 3, line: 1, fragment: "_HEAD\r\n", extra: () }, code: AlphaNumeric }"#,
1045        );
1046        invalid_case(
1047            "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n",
1048            34,
1049            r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n", extra: () }, code: Verify }"#,
1050        );
1051    }
1052
1053    #[test]
1054    fn invalid_levels() {
1055        invalid_case(
1056            "01 HEAD\r\n",
1057            8,
1058            r#"Error { input: LocatedSpan { offset: 1, line: 1, fragment: "1 HEAD\r\n", extra: () }, code: Tag }"#,
1059        );
1060        invalid_case(
1061            "100 HEAD\r\n",
1062            10,
1063            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "100 HEAD\r\n", extra: () }, code: Verify }"#,
1064        );
1065
1066        invalid_lines(
1067            "\u{FEFF}1 HEAD\r\n",
1068            "verification error: 'level increase too great' at line 1",
1069        );
1070        invalid_lines(
1071            "\u{FEFF}0 HEAD\r\n2 VERS 5.5.5\r\n",
1072            "verification error: 'level increase too great' at line 2",
1073        );
1074    }
1075
1076    #[test]
1077    fn invalid_pointer() {
1078        // 21 characters in xref - should fail (exceeds 20 char limit)
1079        invalid_case(
1080            "0 @N01234567890123456789@ NOTE foo\r\n",
1081            34,
1082            r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "@N01234567890123456789@ NOTE foo\r\n", extra: () }, code: AlphaNumeric }"#,
1083        );
1084
1085        // Pointer value with 21 characters - should fail
1086        invalid_case(
1087            "0 NOTE @N01234567890123456789@\r\n",
1088            25,
1089            r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@N01234567890123456789@\r\n", extra: () }, code: Tag }"#,
1090        );
1091
1092        invalid_case(
1093            "1 @N1@ NOTE foo\r\n",
1094            17,
1095            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "1 @N1@ NOTE foo\r\n", extra: () }, code: Verify }"#,
1096        );
1097    }
1098
1099    #[test]
1100    fn line_length_limit() {
1101        // Line length includes level, spaces, tag, value, and terminator
1102        // Maximum is 255 bytes. Test with a line that exceeds this.
1103        // "0 NOTE " = 7 bytes, terminator = 1 byte, so value can be 247 bytes max
1104        let max_value = "x".repeat(246);
1105        let valid_line = format!("0 NOTE {}\r\n", max_value);
1106        valid_case(
1107            &valid_line,
1108            0,
1109            None,
1110            "NOTE",
1111            Some(Value::Item(Item(max_value.as_str().into()))),
1112        );
1113
1114        // One byte over the limit.
1115        let over_value = "x".repeat(247);
1116        let invalid_line = format!("0 NOTE {}\n", over_value);
1117        invalid_case(
1118            &invalid_line,
1119            1,
1120            "Error { input: LocatedSpan { offset: 254, line: 1, fragment: \"\\n\", extra: () }, code: Tag }",
1121        );
1122    }
1123
1124    #[test]
1125    fn leading_whitespace() {
1126        let expected_line = Line {
1127            level: 0,
1128            xref: None,
1129            tag: "HEAD",
1130            value: None,
1131        };
1132        let (remaining, result) = line("\n".into())("0 HEAD\n\r".into()).unwrap();
1133        assert_eq!("\r", *remaining);
1134        assert_eq!(expected_line, result);
1135
1136        invalid_case(
1137            " 0 HEAD\r\n",
1138            9,
1139            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: " 0 HEAD\r\n", extra: () }, code: OneOf }"#,
1140        );
1141        invalid_case(
1142            "\t0 HEAD\r\n",
1143            9,
1144            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\t0 HEAD\r\n", extra: () }, code: OneOf }"#,
1145        );
1146        invalid_case(
1147            "\r\n0 HEAD\r\n",
1148            10,
1149            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
1150        );
1151        invalid_case(
1152            "\n0 HEAD\r\n",
1153            9,
1154            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
1155        );
1156        invalid_case(
1157            "\r0 HEAD\r\n",
1158            9,
1159            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
1160        );
1161        invalid_case(
1162            "\n\r0 HEAD\r\n",
1163            10,
1164            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
1165        );
1166    }
1167
1168    #[test]
1169    fn multiple_lines() {
1170        let expected_lines = vec![
1171            Line {
1172                level: 0,
1173                xref: None,
1174                tag: "HEAD",
1175                value: None,
1176            },
1177            Line {
1178                level: 1,
1179                xref: None,
1180                tag: "GEDC",
1181                value: None,
1182            },
1183            Line {
1184                level: 2,
1185                xref: None,
1186                tag: "VERS",
1187                value: Some(Value::Item(Item("5.5.5".into()))),
1188            },
1189            Line {
1190                level: 0,
1191                xref: None,
1192                tag: "TRLR",
1193                value: None,
1194            },
1195        ];
1196        let cr = "\u{FEFF}0 HEAD\r1 GEDC\r2 VERS 5.5.5\r0 TRLR\r";
1197        assert_eq!(expected_lines, lines(cr).unwrap());
1198        let lf = "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n";
1199        assert_eq!(expected_lines, lines(lf).unwrap());
1200        let crlf = "\u{FEFF}0 HEAD\r\n1 GEDC\r\n2 VERS 5.5.5\r\n0 TRLR\r\n";
1201        assert_eq!(expected_lines, lines(crlf).unwrap());
1202    }
1203
1204    #[test]
1205    fn invalid_bom() {
1206        // Files must start with UTF-8 BOM (\u{FEFF})
1207        invalid_lines("0 HEAD\n0 TRLR\n", "nom error kind: Tag, line: 1:1");
1208    }
1209
1210    #[test]
1211    fn invalid_xrefs() {
1212        invalid_lines(
1213            "\u{FEFF}0 @N1@ NOTE Test\n1 NOTE @N2@\n",
1214            "verification error: 'missing cross reference' at line 2",
1215        );
1216        invalid_lines(
1217            "\u{FEFF}0 @N1@ NOTE Test\n0 @N1@ NOTE Test\n",
1218            "verification error: 'duplicate cross reference' at line 2",
1219        );
1220    }
1221
1222    #[test]
1223    fn invalid_terminators() {
1224        invalid_lines(
1225            "\u{FEFF}0 HEAD\r0 TRLR\n",
1226            "nom error kind: End of file, line: 1:9",
1227        );
1228        invalid_lines(
1229            "\u{FEFF}0 HEAD\n\r0 TRLR\n\r",
1230            "nom error kind: End of file, line: 2:1",
1231        );
1232    }
1233
1234    #[test]
1235    fn invalid_cont_conc() {
1236        invalid_lines(
1237            "\u{FEFF}0 HEAD\n0 CONC t\n0 TRLR\n",
1238            "verification error: 'CONT/CONC cannot be a top level record' at line 2",
1239        );
1240        invalid_lines(
1241            "\u{FEFF}0 HEAD\n0 CONT t\n0 TRLR\n",
1242            "verification error: 'CONT/CONC cannot be a top level record' at line 2",
1243        );
1244        invalid_lines(
1245            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC\n0 TRLR\n",
1246            "verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 3",
1247        );
1248        invalid_lines(
1249            "\u{FEFF}0 HEAD\n1 TEXT t\n1 CONT\n0 TRLR\n",
1250            "verification error: 'CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC' at line 3",
1251        );
1252        invalid_lines(
1253            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONT\n1 CONT\n0 TRLR\n",
1254            "verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
1255        );
1256        invalid_lines(
1257            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n1 CONC t\n0 TRLR\n",
1258            "verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
1259        );
1260        invalid_lines(
1261            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 CONC t\n0 TRLR\n",
1262            "verification error: 'CONT/CONC cannot have a subrecord' at line 4",
1263        );
1264        invalid_lines(
1265            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 TEXT t\n0 TRLR\n",
1266            "verification error: 'CONT/CONC cannot have a subrecord' at line 4",
1267        );
1268        invalid_lines(
1269            "\u{FEFF}0 @N1@ NOTE Test\n1 CONT @N1@\n",
1270            "verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
1271        );
1272        invalid_lines(
1273            "\u{FEFF}0 @N1@ NOTE Test\n1 CONC @N1@\n",
1274            "verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
1275        );
1276        invalid_lines(
1277            "\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONT more\n",
1278            "verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
1279        );
1280        invalid_lines(
1281            "\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONC more\n",
1282            "verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
1283        );
1284    }
1285
1286    #[track_caller]
1287    fn valid_items(items: &ItemsInner<'_>, len: usize, bytes: &[u8], json: &str) {
1288        assert_eq!(len, items.len());
1289        assert_eq!(bytes, &*items.bytes().collect::<Vec<_>>());
1290        assert_eq!(json, &serde_json::to_string(items).unwrap());
1291    }
1292
1293    #[test]
1294    fn items() {
1295        let mut items = ItemsInner {
1296            data: SmallVec::new(),
1297        };
1298        Str("hello".into()).extend_into(&mut items);
1299        Str(" ".into()).extend_into(&mut items);
1300        Str("world".into()).extend_into(&mut items);
1301        Str("!".into()).extend_into(&mut items);
1302        valid_items(&items, 12, b"hello world!", r#"["hello world!"]"#);
1303        items.data.clear();
1304        TextEsc::Esc("hello").extend_into(&mut items);
1305        valid_items(&items, 8, b"\xFFhello\xFF", r#"["hello"]"#);
1306        items.data.clear();
1307        Str("ABT ".into()).extend_into(&mut items);
1308        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
1309        Str("11 NIVO 6".into()).extend_into(&mut items);
1310        valid_items(
1311            &items,
1312            26,
1313            b"ABT \xFFDFRENCH R\xFF11 NIVO 6",
1314            r#"["ABT ","DFRENCH R","11 NIVO 6"]"#,
1315        );
1316        items.data.clear();
1317        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
1318        Str("11 NIVO 6".into()).extend_into(&mut items);
1319        valid_items(
1320            &items,
1321            22,
1322            b"\xFFDFRENCH R\xFF11 NIVO 6",
1323            r#"["DFRENCH R","11 NIVO 6"]"#,
1324        );
1325    }
1326
1327    #[test]
1328    fn control_characters() {
1329        // Tab (U+0009) is allowed
1330        valid_case(
1331            "1 NOTE hello\tworld\r\n",
1332            1,
1333            None,
1334            "NOTE",
1335            Some(Value::Item(Item("hello\tworld".into()))),
1336        );
1337
1338        // NUL (U+0000) is disallowed - parsing stops at the control character
1339        invalid_case(
1340            "1 NOTE hello\x00world\r\n",
1341            8,
1342            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\0world\r\n", extra: () }, code: Tag }"#,
1343        );
1344
1345        // Bell (U+0007) is disallowed
1346        invalid_case(
1347            "1 NOTE hello\x07world\r\n",
1348            8,
1349            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7}world\r\n", extra: () }, code: Tag }"#,
1350        );
1351
1352        // Backspace (U+0008) is disallowed
1353        invalid_case(
1354            "1 NOTE hello\x08world\r\n",
1355            8,
1356            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{8}world\r\n", extra: () }, code: Tag }"#,
1357        );
1358
1359        // Vertical tab (U+000B) is disallowed
1360        invalid_case(
1361            "1 NOTE hello\x0Bworld\r\n",
1362            8,
1363            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{b}world\r\n", extra: () }, code: Tag }"#,
1364        );
1365
1366        // Form feed (U+000C) is disallowed
1367        invalid_case(
1368            "1 NOTE hello\x0Cworld\r\n",
1369            8,
1370            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{c}world\r\n", extra: () }, code: Tag }"#,
1371        );
1372
1373        // Escape (U+001B) is disallowed
1374        invalid_case(
1375            "1 NOTE hello\x1Bworld\r\n",
1376            8,
1377            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{1b}world\r\n", extra: () }, code: Tag }"#,
1378        );
1379
1380        // DEL (U+007F) is disallowed
1381        invalid_case(
1382            "1 NOTE hello\u{007F}world\r\n",
1383            8,
1384            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7f}world\r\n", extra: () }, code: Tag }"#,
1385        );
1386    }
1387
1388    #[test]
1389    fn valid_records() {
1390        let expected = vec![
1391            Record {
1392                level: 0,
1393                xref: None,
1394                tag: "HEAD",
1395                value: None,
1396                line: 1,
1397                subrecords: vec![
1398                    Record {
1399                        level: 1,
1400                        xref: None,
1401                        tag: "GEDC",
1402                        value: None,
1403                        line: 2,
1404                        subrecords: vec![
1405                            Record {
1406                                level: 2,
1407                                xref: None,
1408                                tag: "VERS",
1409                                value: Some(Value::Item(Item(ItemsInner {
1410                                    data: smallvec::smallvec![TextEsc::Text("5.5.5")],
1411                                }))),
1412                                line: 3,
1413                                subrecords: vec![],
1414                            },
1415                            Record {
1416                                level: 2,
1417                                xref: None,
1418                                tag: "FORM",
1419                                value: Some(Value::Item(Item(ItemsInner {
1420                                    data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
1421                                }))),
1422                                line: 4,
1423                                subrecords: vec![Record {
1424                                    level: 3,
1425                                    xref: None,
1426                                    tag: "VERS",
1427                                    value: Some(Value::Item(Item(ItemsInner {
1428                                        data: smallvec::smallvec![TextEsc::Text("5.5.5")],
1429                                    }))),
1430                                    line: 5,
1431                                    subrecords: vec![],
1432                                }],
1433                            },
1434                        ],
1435                    },
1436                    Record {
1437                        level: 1,
1438                        xref: None,
1439                        tag: "CHAR",
1440                        value: Some(Value::Item(Item(ItemsInner {
1441                            data: smallvec::smallvec![TextEsc::Text("UTF-8")],
1442                        }))),
1443                        line: 6,
1444                        subrecords: vec![],
1445                    },
1446                ],
1447            },
1448            Record {
1449                level: 0,
1450                xref: None,
1451                tag: "TRLR",
1452                value: None,
1453                line: 7,
1454                subrecords: vec![],
1455            },
1456        ];
1457        let r = records(
1458            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n0 TRLR\n",
1459        )
1460        .unwrap();
1461        assert_eq!(expected, r);
1462    }
1463
1464    #[test]
1465    fn cont_conc() {
1466        let expected = vec![
1467            Record {
1468                level: 0,
1469                xref: None,
1470                tag: "HEAD",
1471                value: None,
1472                line: 1,
1473                subrecords: vec![
1474                    Record {
1475                        level: 1,
1476                        xref: None,
1477                        tag: "GEDC",
1478                        value: None,
1479                        line: 2,
1480                        subrecords: vec![
1481                            Record {
1482                                level: 2,
1483                                xref: None,
1484                                tag: "VERS",
1485                                value: Some(Value::Item(Item(ItemsInner {
1486                                    data: smallvec::smallvec![TextEsc::Text("5.5.5")],
1487                                }))),
1488                                line: 3,
1489                                subrecords: vec![],
1490                            },
1491                            Record {
1492                                level: 2,
1493                                xref: None,
1494                                tag: "FORM",
1495                                value: Some(Value::Item(Item(ItemsInner {
1496                                    data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
1497                                }))),
1498                                line: 4,
1499                                subrecords: vec![Record {
1500                                    level: 3,
1501                                    xref: None,
1502                                    tag: "VERS",
1503                                    value: Some(Value::Item(Item(ItemsInner {
1504                                        data: smallvec::smallvec![TextEsc::Text("5.5.5")],
1505                                    }))),
1506                                    line: 5,
1507                                    subrecords: vec![],
1508                                }],
1509                            },
1510                        ],
1511                    },
1512                    Record {
1513                        level: 1,
1514                        xref: None,
1515                        tag: "CHAR",
1516                        value: Some(Value::Item(Item(ItemsInner {
1517                            data: smallvec::smallvec![TextEsc::Text("UTF-8")],
1518                        }))),
1519                        line: 6,
1520                        subrecords: vec![],
1521                    },
1522                    Record {
1523                        level: 1,
1524                        xref: None,
1525                        tag: "TEXT",
1526                        value: Some(Value::Item(Item(ItemsInner {
1527                            data: smallvec::smallvec![
1528                                TextEsc::Text("fir"),
1529                                TextEsc::Text("st"),
1530                                TextEsc::Text("\n"),
1531                                TextEsc::Text("sec"),
1532                                TextEsc::Text("ond"),
1533                            ],
1534                        }))),
1535                        line: 7,
1536                        subrecords: vec![],
1537                    },
1538                ],
1539            },
1540            Record {
1541                level: 0,
1542                xref: None,
1543                tag: "TRLR",
1544                value: None,
1545                line: 11,
1546                subrecords: vec![],
1547            },
1548        ];
1549        let r = records(
1550            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n1 TEXT fir\n2 CONC st\n2 CONT sec\n2 CONC ond\n0 TRLR\n",
1551        )
1552        .unwrap();
1553        assert_eq!(expected, r);
1554    }
1555
1556    #[test]
1557    fn head_record() {
1558        invalid_records("\u{FEFF}\n", "nom error kind: OneOf, line: 1:2");
1559        invalid_records(
1560            "\u{FEFF}0 @I1@ INDI\n1 NAME Test\n0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
1561            "verification error: 'HEAD must be the first record' at line 1",
1562        );
1563        invalid_records(
1564            "\u{FEFF}0 @H1@ HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
1565            "verification error: 'HEAD must not have a cross-reference identifier' at line 1",
1566        );
1567        invalid_records(
1568            "\u{FEFF}0 HEAD something\n0 TRLR\n",
1569            "verification error: 'HEAD must not have a value' at line 1",
1570        );
1571        // CONT/CONC not supported in basic HEAD subrecords (lines 1-6)
1572        invalid_records(
1573            "\u{FEFF}0 HEAD\n1 CONC t\n0 TRLR\n",
1574            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
1575        );
1576        invalid_records(
1577            "\u{FEFF}0 HEAD\n1 CONT t\n0 TRLR\n",
1578            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
1579        );
1580        invalid_records(
1581            "\u{FEFF}0 HEAD\n1 GEDC\n2 CONC t\n0 TRLR\n",
1582            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 3",
1583        );
1584        invalid_records(
1585            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n3 CONC t\n0 TRLR\n",
1586            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 4",
1587        );
1588        invalid_records(
1589            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 CONC t\n0 TRLR\n",
1590            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 5",
1591        );
1592        invalid_records(
1593            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n4 CONC t\n0 TRLR\n",
1594            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 6",
1595        );
1596        // GEDC must be the first subrecord of HEAD
1597        invalid_records(
1598            "\u{FEFF}0 HEAD\n1 NOTE test\n0 TRLR\n",
1599            "verification error: 'GEDC must be the first subrecord of HEAD' at line 3",
1600        );
1601        // HEAD with no subrecords is caught by an earlier check
1602        invalid_records(
1603            "\u{FEFF}0 HEAD\n0 TRLR\n",
1604            "verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 1",
1605        );
1606        invalid_records(
1607            "\u{FEFF}0 HEAD\n1 GEDC something\n2 VERS 5.5.5\n0 TRLR\n",
1608            "verification error: 'GEDC must not have a value' at line 3",
1609        );
1610        // VERS must be the first subrecord of GEDC with value 5.5.5
1611        invalid_records(
1612            "\u{FEFF}0 HEAD\n1 GEDC\n2 FORM TEST-FORM\n0 TRLR\n",
1613            "verification error: 'VERS must be the first subrecord of GEDC' at line 4",
1614        );
1615        invalid_records(
1616            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.1\n2 FORM TEST-FORM\n0 TRLR\n",
1617            "verification error: 'GEDC.VERS must have value 5.5.5' at line 4",
1618        );
1619        // FORM must be the second subrecord of GEDC
1620        invalid_records(
1621            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
1622            "verification error: 'GEDC must have a FORM subrecord' at line 3",
1623        );
1624        invalid_records(
1625            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 NOTE test\n0 TRLR\n",
1626            "verification error: 'FORM must be the second subrecord of GEDC' at line 5",
1627        );
1628        invalid_records(
1629            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM\n3 VERS 5.5.5\n0 TRLR\n",
1630            "verification error: 'GEDC.FORM must have a value' at line 5",
1631        );
1632        // VERS must be the first subrecord of FORM with value 5.5.5
1633        invalid_records(
1634            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n0 TRLR\n",
1635            "verification error: 'GEDC.FORM must have a VERS subrecord' at line 5",
1636        );
1637        invalid_records(
1638            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 NOTE test\n0 TRLR\n",
1639            "verification error: 'VERS must be the first subrecord of FORM' at line 6",
1640        );
1641        invalid_records(
1642            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.1\n0 TRLR\n",
1643            "verification error: 'GEDC.FORM.VERS must have value 5.5.5' at line 6",
1644        );
1645    }
1646
1647    #[test]
1648    fn trlr_record() {
1649        invalid_records(
1650            "\u{FEFF}0 HEAD\n",
1651            "verification error: 'TRLR must be the last record' at line 2",
1652        );
1653        invalid_records(
1654            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR\n0 @I1@ INDI\n1 NAME Test\n",
1655            "verification error: 'TRLR must be the last record' at line 8",
1656        );
1657        invalid_records(
1658            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 @T1@ TRLR\n",
1659            "verification error: 'TRLR must not have a cross-reference identifier' at line 7",
1660        );
1661        invalid_records(
1662            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR something\n",
1663            "verification error: 'TRLR must not have a value' at line 7",
1664        );
1665    }
1666}