gedcom-core 0.0.2

// Copyright 2021-2026 Ahmed Charles <me@ahmedcharles.com>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The core GEDCOM data representation language may be used to represent
//! any form of structured information, not just genealogical data, using
//! a sequential stream of characters.

use std::fmt;
use std::num::NonZeroU8;

use nom::{
    Compare, CompareResult, ExtendInto, IResult, Input, Needed, Offset, Parser,
    error::{Error as NomError, ErrorKind},
};
use nom_locate::LocatedSpan;
use serde::{Serialize, Serializer, ser::SerializeSeq};
use smallvec::SmallVec;
use thiserror::Error;

/// Represents an error with reading a GEDCOM file.
#[derive(Error, Debug)]
#[error(transparent)]
pub struct Error {
    #[from]
    internal: InternalError,
}

impl From<nom::Err<NomError<Span<'_>>>> for Error {
    fn from(err: nom::Err<NomError<Span<'_>>>) -> Error {
        Error {
            internal: match err {
                nom::Err::Incomplete(_) => unreachable!(),
                nom::Err::Failure(e) => {
                    InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
                }
                nom::Err::Error(e) => {
                    InternalError::Nom(e.code, e.input.location_line(), e.input.get_utf8_column())
                }
            },
        }
    }
}

#[derive(Error, Debug)]
enum InternalError {
    #[error("verification error: '{0}' at line {1}")]
    Verify(&'static str, usize),
    #[error("nom error kind: {}, line: {}:{}", .0.description(), .1, .2)]
    Nom(ErrorKind, u32, usize),
}

type Span<'a> = LocatedSpan<&'a str>;

#[derive(Debug)]
enum TextEsc<'a> {
    Text(&'a str),
    Esc(&'a str),
}

impl<'a> ExtendInto for TextEsc<'a> {
    type Item = char;
    type Extender = ItemsInner<'a>;
    fn new_builder(&self) -> Self::Extender {
        ItemsInner {
            data: SmallVec::new(),
        }
    }
    fn extend_into(&self, acc: &mut Self::Extender) {
        acc.data.push(match self {
            TextEsc::Text(t) => TextEsc::Text(t),
            TextEsc::Esc(e) => TextEsc::Esc(e),
        });
    }
}

/// Represents an efficient, extendable string.
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Item<'a>(ItemsInner<'a>);

#[derive(Debug, Default)]
struct ItemsInner<'a> {
    data: SmallVec<[TextEsc<'a>; 1]>,
}

fn map_item_iter<'a>(
    item: &TextEsc<'a>,
) -> (
    Option<NonZeroU8>,
    std::slice::Iter<'a, u8>,
    Option<NonZeroU8>,
) {
    match item {
        TextEsc::Text(t) => (None, t.as_bytes().iter(), None),
        TextEsc::Esc(t) => (
            NonZeroU8::new(0xFF),
            t.as_bytes().iter(),
            NonZeroU8::new(0xFF),
        ),
    }
}

impl ItemsInner<'_> {
    fn bytes(&self) -> Bytes<'_> {
        let mut item_iter = self.data.iter();
        let str_iter = item_iter.next().map(map_item_iter);
        Bytes {
            item_iter,
            str_iter,
        }
    }
    fn len(&self) -> usize {
        let mut sum = 0;
        let mut esc = false;
        for item in &self.data {
            match item {
                TextEsc::Text(t) => {
                    if esc {
                        sum += 1;
                        esc = false;
                    }
                    sum += t.len()
                }
                TextEsc::Esc(t) => {
                    esc = true;
                    sum += 2 + t.len() + 1
                }
            }
        }
        sum
    }
}

struct Bytes<'a> {
    str_iter: Option<(
        Option<NonZeroU8>,
        std::slice::Iter<'a, u8>,
        Option<NonZeroU8>,
    )>,
    item_iter: std::slice::Iter<'a, TextEsc<'a>>,
}

impl Iterator for Bytes<'_> {
    type Item = u8;
    fn next(&mut self) -> Option<Self::Item> {
        while let Some(ref mut str_iter) = self.str_iter {
            if let Some(b) = str_iter.0.take() {
                return Some(b.into());
            }
            if let Some(b) = str_iter.1.next() {
                return Some(*b);
            }
            if let Some(b) = str_iter.2.take() {
                return Some(b.into());
            }
            self.str_iter = self.item_iter.next().map(map_item_iter);
        }
        None
    }
}

impl Eq for ItemsInner<'_> {}

impl<'a> From<&'a str> for ItemsInner<'a> {
    fn from(s: &'a str) -> ItemsInner<'a> {
        let mut data = SmallVec::new();
        data.push(TextEsc::Text(s));
        ItemsInner { data }
    }
}

impl PartialEq for ItemsInner<'_> {
    fn eq(&self, other: &Self) -> bool {
        self.bytes().eq(other.bytes())
    }
}

struct TextSlice<'a>(&'a [TextEsc<'a>]);

impl fmt::Display for TextSlice<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for item in self.0 {
            match item {
                TextEsc::Text(t) => f.write_str(t)?,
                TextEsc::Esc(_) => unreachable!(),
            }
        }
        Ok(())
    }
}

impl Serialize for TextSlice<'_> {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        serializer.collect_str(self)
    }
}

impl Serialize for ItemsInner<'_> {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        let mut seq = serializer.serialize_seq(None)?;
        let mut from = 0;
        for (i, item) in self.data.iter().enumerate() {
            if let TextEsc::Esc(t) = item {
                if from != i {
                    seq.serialize_element(&TextSlice(&self.data[from..i]))?;
                }
                seq.serialize_element(t)?;
                from = i + 1;
            }
        }
        if from != self.data.len() {
            seq.serialize_element(&TextSlice(&self.data[from..]))?;
        }
        seq.end()
    }
}

#[derive(Clone, Copy)]
struct Str<'a>(Span<'a>);

impl<S: AsRef<str>> Compare<S> for Str<'_> {
    fn compare(&self, s: S) -> CompareResult {
        self.0.compare(s.as_ref())
    }
    fn compare_no_case(&self, s: S) -> CompareResult {
        self.0.compare_no_case(s.as_ref())
    }
}

impl<'a> ExtendInto for Str<'a> {
    type Item = char;
    type Extender = ItemsInner<'a>;
    fn new_builder(&self) -> Self::Extender {
        ItemsInner {
            data: SmallVec::new(),
        }
    }
    fn extend_into(&self, acc: &mut Self::Extender) {
        acc.data.push(TextEsc::Text(*self.0));
    }
}

impl<'a> Input for Str<'a> {
    type Item = char;
    type Iter = std::str::Chars<'a>;
    type IterIndices = std::str::CharIndices<'a>;

    fn input_len(&self) -> usize {
        self.0.input_len()
    }

    fn take(&self, count: usize) -> Self {
        Str(self.0.take(count))
    }

    fn take_from(&self, index: usize) -> Self {
        Self(self.0.take_from(index))
    }

    fn take_split(&self, count: usize) -> (Self, Self) {
        let (a, b) = self.0.take_split(count);
        (Str(a), Str(b))
    }

    fn position<P: Fn(Self::Item) -> bool>(&self, predicate: P) -> Option<usize> {
        self.0.position(predicate)
    }

    fn iter_elements(&self) -> Self::Iter {
        self.0.iter_elements()
    }

    fn iter_indices(&self) -> Self::IterIndices {
        self.0.iter_indices()
    }

    fn slice_index(&self, count: usize) -> Result<usize, Needed> {
        self.0.slice_index(count)
    }
}

impl Offset for Str<'_> {
    fn offset(&self, second: &Self) -> usize {
        self.0.offset(&second.0)
    }
}

fn escaped_transform_<'a, F, G>(
    normal: F,
    control_char: char,
    transform: G,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, ItemsInner<'a>> + 'a
where
    F: Parser<Str<'a>, Output = Str<'a>, Error = NomError<Str<'a>>> + 'a,
    G: Parser<Str<'a>, Output = TextEsc<'a>, Error = NomError<Str<'a>>> + 'a,
{
    let mut e = nom::bytes::escaped_transform(normal, control_char, transform);
    move |i: Span<'a>| {
        e.parse_complete(Str(i))
            .map(|(i, o)| (i.0, o))
            .map_err(|e| e.map_input(|i| i.0))
    }
}

/// Represents a line value, either a pointer or item.
#[derive(Debug, Eq, PartialEq, Serialize)]
pub enum Value<'a> {
    /// Represents a pointer to another record.
    Pointer(&'a str),
    /// Represents an actual value.
    Item(Item<'a>),
}

/// Represents an entire line or record in the GEDCOM data format.
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Line<'a> {
    level: u8,
    xref: Option<&'a str>,
    tag: &'a str,
    value: Option<Value<'a>>,
}

impl<'a> Line<'a> {
    /// The level of this record.
    pub fn level(&self) -> u8 {
        self.level
    }
    /// The optional cross-reference identifier for this record.
    pub fn xref(&self) -> Option<&'a str> {
        self.xref
    }
    /// The tag for this record.
    pub fn tag(&self) -> &'a str {
        self.tag
    }
    /// The optional value for this record.
    pub fn value(&self) -> Option<&Value<'a>> {
        self.value.as_ref()
    }
    fn len(&self) -> usize {
        let level_len = if self.level < 10 { 1 } else { 2 };
        let xref_len = if let Some(xref) = self.xref {
            1 + 2 + xref.len()
        } else {
            0
        };
        let value_len = match self.value {
            Some(Value::Pointer(p)) => 1 + 2 + p.len(),
            Some(Value::Item(ref text)) if text.0.len() == 0 => unreachable!(),
            Some(Value::Item(ref text)) => {
                1 + text.0.len() + text.0.bytes().filter(|&c| c == b'@').count()
            }
            None => 0,
        };
        level_len + xref_len + 1 + self.tag.len() + value_len
    }
}

fn line<'a>(terminator: Span<'a>) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Line<'a>> {
    move |input: Span<'a>| {
        use nom::ParseTo;
        use nom::branch::alt;
        use nom::bytes::{tag, take_while, take_while1};
        use nom::character::complete::alphanumeric1;
        use nom::character::one_of;
        use nom::combinator::{cut, opt, peek, recognize, verify};
        use nom::sequence::{delimited, preceded, terminated};

        // delim = space
        let delim_ = tag(" ");

        // digit = U+0030 - U+0039
        let digit_ = take_while1(|ch: char| ch.is_ascii_digit());

        // level = [ digit | non_zero_digit + digit ]
        let l_digit_ = alt((tag("0"), preceded(peek(one_of("123456789")), digit_)));
        let level_ = verify(l_digit_.map_opt(|i: Span<'_>| i.parse_to()), |&o| o < 100);

        // identifier_string = [ alphanum | alphanum + identifier_string ]
        let identifier_string_ =
            || verify::<Span<'a>, _, _, _, _>(alphanumeric1, |o: &Span<'_>| o.len() <= 20);

        // pointer = U+0040 + identifier_string + U+0040
        let pointer_ = || delimited(tag("@"), identifier_string_(), cut(tag("@")));

        // tag = [ [ U+005F ] + alphanum | tag + alphanum ]
        let tag_ = verify(recognize((opt(tag("_")), alphanumeric1)), |o: &Span<'_>| {
            o.len() <= 31
        });

        // escape_text = [ alphanum | escape_text + alphanum | escape_text + space ]
        let escape_text_plus_space_ =
            take_while(|ch: char| ch.is_ascii_alphanumeric() || ch == ' ');
        let escape_text_ = recognize((alphanumeric1, escape_text_plus_space_));

        // escape = U+0040 + U+0023 + escape_text + U+0040
        let escape_ = delimited(tag("#"), cut(escape_text_), cut(tag("@")))
            .map(|o: Str<'_>| TextEsc::Esc(*o.0));

        // line_text = [ line_char | line_text + line_char ]
        let line_text_ = escaped_transform_(
            take_while1(|ch: char| {
                !matches!(ch,
                    // disallowed: U+0000 - U+001F, except U+0009 = most C0 control characters
                    '\u{0000}'..='\u{0008}' |
                    '\u{000A}'..='\u{001F}' |
                    // special: U+0040 + U+0040 = @@
                    '@' |
                    // disallowed: U+007F = DEL character
                    '\u{007F}'
                )
            }),
            '@',
            alt((
                tag("@").map(|o: Str<'_>| TextEsc::Text(*o.0)),
                // An escape sequence must be followed by either a delim (space) or terminator
                terminated(escape_, alt((tag(" "), peek(tag(*terminator))))),
            )),
        );

        // line_item = [ escape | line_text | escape + delim + line_text ]
        // Note: this is inaccurate, because dates allow text before escapes,
        // e.g. ABT @#FRENCH R@ 11 NIVO 6
        let line_item_ = line_text_.map(|t| Value::Item(Item(t)));

        // line_value = [ pointer | line_item ]
        let line_value_ = alt((pointer_().map(|p| Value::Pointer(*p)), line_item_));

        // terminator = [ carriage_return | line_feed | carriage_return + line_feed ]
        // use the detected ending
        let terminator_ = tag(*terminator);

        // line = level + [ delim + xref_ID ] + delim + tag + [ delim + line_value ] + terminator
        let opt_pointer_ = opt(preceded(tag(" "), pointer_().map(|s| *s)));
        let opt_line_value = opt(preceded(tag(" "), opt(line_value_)));
        verify(
            (
                level_,
                opt_pointer_,
                delim_,
                tag_,
                opt_line_value,
                terminator_,
            )
                .map(|(l, x, _, t, v, _)| Line {
                    level: l,
                    xref: x,
                    tag: *t,
                    value: v.flatten(),
                }),
            |l| l.len() + terminator.len() <= 255 && l.level == 0 || l.xref.is_none(),
        )
        .parse_complete(input)
    }
}

fn verify_lines<'a>(
    (input, (_, ls)): (Span<'a>, (Span<'a>, Vec<Line<'a>>)),
) -> Result<Vec<Line<'a>>, Error> {
    fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Line<'b>>, Error> {
        Err(InternalError::Verify(s, l + 1).into())
    }
    if !input.is_empty() {
        return v("not all input consumed", 0);
    }
    let mut records = std::collections::BTreeSet::new();
    let mut last: Option<&Line<'_>> = None;
    for (i, l) in ls.iter().enumerate() {
        let last_plus_1 = last.map(|r| r.level + 1).unwrap_or(0);
        if l.level > last_plus_1 {
            return v("level increase too great", i);
        }
        if l.level == last_plus_1
            && last
                .map(|r| r.tag == "CONT" || r.tag == "CONC")
                .unwrap_or(false)
        {
            return v("CONT/CONC cannot have a subrecord", i);
        }
        if l.tag == "CONT" || l.tag == "CONC" {
            if l.level == 0 {
                return v("CONT/CONC cannot be a top level record", i);
            }
            if !l
                .value
                .as_ref()
                .map(|v| matches!(v, Value::Item(_)))
                .unwrap_or(true)
            {
                return v("CONT/CONC cannot have a cross reference value", i);
            }
            if l.level != last_plus_1
                && !last
                    .map(|r| r.tag == "CONT" || r.tag == "CONC")
                    .unwrap_or(false)
            {
                return v(
                    "CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC",
                    i,
                );
            }
            if l.level != last_plus_1 && l.level != last_plus_1 - 1 {
                return v(
                    "CONT/CONC can only be a subrecord or sibling of the last record",
                    i,
                );
            }
            if l.level != 0
                && !last
                    .and_then(|r| r.value.as_ref())
                    .map(|v| matches!(v, Value::Item(_)))
                    .unwrap_or(true)
            {
                return v("CONT/CONC cannot follow a cross reference value", i);
            }
        }
        if l.level != last_plus_1
            && !last
                .map(|r| r.tag == "CONT" || r.tag == "TRLR" || r.value.is_some())
                .unwrap_or(true)
        {
            return v(
                "CONT/TRLR are the only records allowed to have no subrecords or value",
                i - 1,
            );
        }
        if let Some(xref) = l.xref
            && !records.insert(xref)
        {
            return v("duplicate cross reference", i);
        }
        last = Some(l);
    }
    for (i, l) in ls.iter().enumerate() {
        if let Some(Value::Pointer(p)) = l.value
            && !records.contains(p)
        {
            return v("missing cross reference", i);
        }
    }
    Ok(ls)
}

/// Parses a string (GEDCOM file content) into a sequence of `Line`s.
pub fn lines(input: &str) -> Result<Vec<Line<'_>>, Error> {
    use nom::branch::alt;
    use nom::bytes::{tag, take_till};
    use nom::combinator::{all_consuming, opt, peek, recognize};
    use nom::multi::many1;
    use nom::sequence::preceded;

    // [ carriage_return | line_feed | carriage_return + line_feed ]
    let terminator_ = alt((recognize((tag("\r"), opt(tag("\n")))), tag("\n")));

    let not_line_ending_ = take_till(|ch: char| ch == '\r' || ch == '\n');
    let find_terminator_ = peek(preceded(not_line_ending_, terminator_));
    all_consuming(preceded(
        tag("\u{FEFF}"),
        find_terminator_.flat_map(|i| many1(line(i)).map(move |o| (i, o))),
    ))
    .parse_complete(Span::new(input))
    .map_err(|e| e.into())
    .and_then(verify_lines)
}

/// Represents a logical record in the GEDCOM data format.
#[allow(single_use_lifetimes)]
#[derive(Debug, Eq, PartialEq, Serialize)]
pub struct Record<'a> {
    level: u8,
    xref: Option<&'a str>,
    tag: &'a str,
    value: Option<Value<'a>>,
    line: usize,
    subrecords: Vec<Record<'a>>,
}

impl<'a> From<(usize, Line<'a>)> for Record<'a> {
    fn from((i, l): (usize, Line<'a>)) -> Record<'a> {
        Record {
            level: l.level,
            xref: l.xref,
            tag: l.tag,
            value: l.value,
            line: i,
            subrecords: Vec::new(),
        }
    }
}

fn verify_records<'a>(records: Vec<Record<'a>>) -> Result<Vec<Record<'a>>, Error> {
    fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
        Err(InternalError::Verify(s, l + 1).into())
    }
    // HEAD must be the first record
    match records.first() {
        None => unreachable!(), // The parser requires 1 or more lines.
        Some(head) if head.level != 0 => unreachable!(), // The first record is always level 0.
        Some(head) if head.tag != "HEAD" => return v("HEAD must be the first record", 0),
        Some(head) if head.xref.is_some() => {
            return v("HEAD must not have a cross-reference identifier", 0);
        }
        Some(head) if head.value.is_some() => return v("HEAD must not have a value", 0),
        _ => {}
    }
    // TRLR must be the last record
    match records.last() {
        None => return v("TRLR record is required", 0),
        Some(trlr) if trlr.tag != "TRLR" => {
            return v("TRLR must be the last record", trlr.line);
        }
        Some(trlr) if trlr.level != 0 => {
            return v("TRLR must be a level 0 record", trlr.line);
        }
        Some(trlr) if trlr.xref.is_some() => {
            return v("TRLR must not have a cross-reference identifier", trlr.line);
        }
        Some(trlr) if trlr.value.is_some() => {
            return v("TRLR must not have a value", trlr.line);
        }
        _ => {}
    }
    // GEDC must be the first subrecord of HEAD
    if let Some(head) = records.first() {
        match head.subrecords.first() {
            None => unreachable!(), // Already checked due to line value check above.
            Some(gedc) if gedc.tag != "GEDC" => {
                return v("GEDC must be the first subrecord of HEAD", gedc.line);
            }
            Some(gedc) if gedc.value.is_some() => {
                return v("GEDC must not have a value", gedc.line);
            }
            Some(gedc) => {
                // VERS must be the first subrecord of GEDC with value "5.5.5"
                match gedc.subrecords.first() {
                    None => unreachable!(), // Already checked due to line value check above.
                    Some(vers) if vers.tag != "VERS" => {
                        return v("VERS must be the first subrecord of GEDC", vers.line);
                    }
                    Some(vers) => {
                        let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
                        if vers.value != expected {
                            return v("GEDC.VERS must have value 5.5.5", vers.line);
                        }
                    }
                }
                // FORM must be the second subrecord of GEDC
                match gedc.subrecords.get(1) {
                    None => {
                        return v("GEDC must have a FORM subrecord", gedc.line);
                    }
                    Some(form) if form.tag != "FORM" => {
                        return v("FORM must be the second subrecord of GEDC", form.line);
                    }
                    Some(form) if form.value.is_none() => {
                        return v("GEDC.FORM must have a value", form.line);
                    }
                    Some(form) => {
                        // VERS must be the first subrecord of FORM with value "5.5.5"
                        match form.subrecords.first() {
                            None => {
                                return v("GEDC.FORM must have a VERS subrecord", form.line);
                            }
                            Some(vers) if vers.tag != "VERS" => {
                                return v("VERS must be the first subrecord of FORM", vers.line);
                            }
                            Some(vers) => {
                                let expected = Some(Value::Item(Item(ItemsInner::from("5.5.5"))));
                                if vers.value != expected {
                                    return v("GEDC.FORM.VERS must have value 5.5.5", vers.line);
                                }
                            }
                        }
                    }
                }
            }
        }
        match head.subrecords.get(1) {
            None => {
                return v("HEAD must have a CHAR subrecord", head.line);
            }
            Some(char) if char.tag != "CHAR" => {
                return v("CHAR must be the second subrecord of HEAD", char.line);
            }
            _ => {}
        }
    }
    Ok(records)
}

/// Parses a string (GEDCOM file content) into a sequence of `Record`s.
pub fn records(input: &str) -> Result<Vec<Record<'_>>, Error> {
    lines(input).and_then(|ls| {
        fn v<'b>(s: &'static str, l: usize) -> Result<Vec<Record<'b>>, Error> {
            Err(InternalError::Verify(s, l + 1).into())
        }
        let mut recs = Record {
            level: 0,
            xref: None,
            tag: "",
            value: None,
            line: 0,
            subrecords: Vec::new(),
        };
        let mut stack: Vec<usize> = Vec::new();
        for (i, l) in ls.into_iter().enumerate() {
            let lvl = l.level;
            stack.truncate(lvl.into());
            let append = if lvl == 0 {
                &mut recs
            } else {
                stack
                    .iter()
                    .fold(&mut recs, |acc, &x| &mut acc.subrecords[x])
            };
            fn cont_conc<'a>(r: &mut Record<'a>, l: Line<'a>, cont: bool) {
                let mut v = match r.value.take() {
                    Some(Value::Item(Item(v))) => v,
                    Some(Value::Pointer(_)) | None => Default::default(),
                };
                if cont {
                    v.data.push(TextEsc::Text("\n"));
                }
                if let Some(Value::Item(Item(i))) = l.value {
                    v.data.extend(i.data.into_iter());
                }
                r.value = Some(Value::Item(Item(v)));
            }
            if l.tag == "CONT" || l.tag == "CONC" {
                // The first 6 lines are fixed as part of the basic form header.
                if i < 6 {
                    return v("CONT/CONC not supported as basic form HEAD subrecords", i);
                }
                let cont = l.tag == "CONT";
                cont_conc(append, l, cont);
            } else {
                stack.push(append.subrecords.len());
                append.subrecords.push((i + 1, l).into());
            }
        }
        verify_records(recs.subrecords)
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn terminators() {
        let expected_line = Line {
            level: 0,
            xref: None,
            tag: "HEAD",
            value: None,
        };
        let (remaining, result) = line("\r".into())("0 HEAD\r".into()).unwrap();
        assert_eq!("", *remaining);
        assert_eq!(expected_line, result);
        let (remaining, result) = line("\n".into())("0 HEAD\n".into()).unwrap();
        assert_eq!("", *remaining);
        assert_eq!(expected_line, result);
        let (remaining, result) = line("\r\n".into())("0 HEAD\r\n".into()).unwrap();
        assert_eq!("", *remaining);
        assert_eq!(expected_line, result);
    }

    #[track_caller]
    fn valid_case<'a>(input: &'a str, l: u8, x: Option<&'a str>, t: &'a str, v: Option<Value<'a>>) {
        let expected_line = Line {
            level: l,
            xref: x,
            tag: t,
            value: v,
        };
        let (remaining, result) = line("\r\n".into())(input.into()).unwrap();
        assert_eq!("", *remaining);
        assert_eq!(expected_line, result);
        eprintln!("{}", input);
        let c = 2 + if input.ends_with(" \r\n") { 1 } else { 0 };
        assert_eq!(
            input.len(),
            line("\r\n".into())(input.into()).unwrap().1.len() + c
        );
    }

    #[track_caller]
    fn invalid_case(input: &str, len: usize, error: &str) {
        let l = line("\r\n".into())(input.into());
        match l {
            Ok(v) => {
                eprintln!("{:?}", v);
                assert!(false);
            }
            Err(nom::Err::Incomplete(e)) => {
                eprintln!("{:?}", e);
                assert!(false);
            }
            Err(nom::Err::Failure(e)) | Err(nom::Err::Error(e)) => {
                assert_eq!(len, e.input.fragment().len());
                assert_eq!(error, &format!("{:?}", e));
            }
        }
    }

    #[track_caller]
    fn invalid_lines(input: &str, error: &str) {
        match lines(input) {
            Ok(v) => {
                eprintln!("{:?}", v);
                assert!(false);
            }
            Err(e) => {
                assert_eq!(error, &format!("{}", e));
            }
        }
    }

    #[track_caller]
    fn invalid_records(input: &str, error: &str) {
        match records(input) {
            Ok(v) => {
                eprintln!("{:?}", v);
                assert!(false);
            }
            Err(e) => {
                assert_eq!(error, &format!("{}", e));
            }
        }
    }

    #[test]
    fn tags() {
        valid_case("0 HEAD\r\n", 0, None, "HEAD", None);
        let upper = "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ\r\n";
        valid_case(upper, 0, None, &upper[2..28], None);
        let lower = "0 abcdefghijklmnopqrstuvwxyz\r\n";
        valid_case(lower, 0, None, &lower[2..28], None);
        valid_case("0 _0123456789\r\n", 0, None, "_0123456789", None);
        valid_case("0 ADDR \r\n", 0, None, "ADDR", None);
        let max_level = "99 ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
        valid_case(max_level, 99, None, &max_level[3..34], None);
        let max = "0 @N1234567890123456789@ ABCDEFGHIJKLMNOPQRSTUVWXYZ01234 \r\n";
        valid_case(max, 0, Some(&max[3..23]), &max[25..56], None);
    }

    #[test]
    fn levels() {
        for i in 0..100 {
            let l = format!("{} HEAD\r\n", i);
            valid_case(&l, i, None, "HEAD", None);
        }
    }

    #[test]
    fn simple_value() {
        let v = Some(Value::Item(Item("UTF-8".into())));
        valid_case("1 CHAR UTF-8\r\n", 1, None, "CHAR", v);
    }

    #[test]
    fn simple_xref() {
        // Exactly 20 characters in xref - should be valid
        valid_case(
            "0 @N1234567890123456789@ NOTE\r\n",
            0,
            Some("N1234567890123456789"),
            "NOTE",
            None,
        );
    }

    #[test]
    fn simple_pointer() {
        // Pointer value with exactly 20 characters
        let v = Some(Value::Pointer("N1234567890123456789"));
        valid_case("1 NOTE @N1234567890123456789@\r\n", 1, None, "NOTE", v);
    }

    #[test]
    fn simple_note() {
        let v = Some(Value::Item(Item("foo".into())));
        valid_case("0 @N1@ NOTE foo\r\n", 0, Some("N1"), "NOTE", v);
    }

    #[test]
    fn unicode_values() {
        // Non-ASCII UTF-8 characters should be allowed in values
        // Japanese
        valid_case(
            "1 NOTE こんにちは\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("こんにちは".into()))),
        );

        // Spanish with accents
        valid_case(
            "1 NOTE Señor Ñoño\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("Señor Ñoño".into()))),
        );

        // Emojis
        valid_case(
            "1 NOTE Hello 👋 World 🌍\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("Hello 👋 World 🌍".into()))),
        );

        // Chinese
        valid_case(
            "1 NOTE 你好世界\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("你好世界".into()))),
        );
    }

    #[test]
    fn escape_line_value() {
        // Note: The optional space after an escape sequence is consumed by the parser

        // French calendar
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        Str("ABT ".into()).extend_into(&mut items);
        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
        Str("11 NIVO 6".into()).extend_into(&mut items);
        let v = Some(Value::Item(Item(items)));
        valid_case("1 DATE ABT @#DFRENCH R@ 11 NIVO 6\r\n", 1, None, "DATE", v);

        // Hebrew calendar - space after escape is consumed
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        TextEsc::Esc("DHEBREW").extend_into(&mut items);
        Str("5765".into()).extend_into(&mut items);
        let v = Some(Value::Item(Item(items)));
        valid_case("1 DATE @#DHEBREW@ 5765\r\n", 1, None, "DATE", v);

        // Julian calendar
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        TextEsc::Esc("DJULIAN").extend_into(&mut items);
        Str("1 JAN 1700".into()).extend_into(&mut items);
        let v = Some(Value::Item(Item(items)));
        valid_case("1 DATE @#DJULIAN@ 1 JAN 1700\r\n", 1, None, "DATE", v);

        // Multiple escape sequences in one value
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        TextEsc::Esc("DHEBREW").extend_into(&mut items);
        Str("to ".into()).extend_into(&mut items);
        TextEsc::Esc("DGREGORIAN").extend_into(&mut items);
        let v = Some(Value::Item(Item(items)));
        valid_case("1 DATE @#DHEBREW@ to @#DGREGORIAN@\r\n", 1, None, "DATE", v);

        // Escape at start of value with no trailing text
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        TextEsc::Esc("DROMAN").extend_into(&mut items);
        let v = Some(Value::Item(Item(items)));
        valid_case("1 DATE @#DROMAN@\r\n", 1, None, "DATE", v);
    }

    #[test]
    fn escape_at() {
        let v = Some(Value::Item(Item("foo@example.com".into())));
        valid_case("1 EMAIL foo@@example.com\r\n", 1, None, "EMAIL", v);
        let v = Some(Value::Item(Item("@foo".into())));
        valid_case("1 NOTE @@foo\r\n", 1, None, "NOTE", v);

        // @@ in a value should parse as a single @ character
        valid_case(
            "1 NOTE @@\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("@".into()))),
        );

        // Multiple @@ should parse as multiple @ characters
        valid_case(
            "1 NOTE @@@@\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("@@".into()))),
        );

        // @@ at start and end
        valid_case(
            "1 NOTE @@hello@@\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("@hello@".into()))),
        );
    }

    #[test]
    fn escape_text() {
        // Empty escape @#@ should fail - escape_text requires at least one alphanum
        invalid_case(
            "1 DATE @#@\r\n",
            3,
            r#"Error { input: LocatedSpan { offset: 9, line: 1, fragment: "@\r\n", extra: () }, code: AlphaNumeric }"#,
        );

        // An escape sequence must be followed by either a delim (space) or terminator
        invalid_case(
            "1 DATE @#DHEBREW@5765\r\n",
            16,
            r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@#DHEBREW@5765\r\n", extra: () }, code: Tag }"#,
        );
    }

    #[test]
    fn invalid_tags() {
        invalid_case(
            "0 __HEAD\r\n",
            7,
            r#"Error { input: LocatedSpan { offset: 3, line: 1, fragment: "_HEAD\r\n", extra: () }, code: AlphaNumeric }"#,
        );
        invalid_case(
            "0 ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n",
            34,
            r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345\r\n", extra: () }, code: Verify }"#,
        );
    }

    #[test]
    fn invalid_levels() {
        invalid_case(
            "01 HEAD\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 1, line: 1, fragment: "1 HEAD\r\n", extra: () }, code: Tag }"#,
        );
        invalid_case(
            "100 HEAD\r\n",
            10,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "100 HEAD\r\n", extra: () }, code: Verify }"#,
        );

        invalid_lines(
            "\u{FEFF}1 HEAD\r\n",
            "verification error: 'level increase too great' at line 1",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\r\n2 VERS 5.5.5\r\n",
            "verification error: 'level increase too great' at line 2",
        );
    }

    #[test]
    fn invalid_pointer() {
        // 21 characters in xref - should fail (exceeds 20 char limit)
        invalid_case(
            "0 @N01234567890123456789@ NOTE foo\r\n",
            34,
            r#"Error { input: LocatedSpan { offset: 2, line: 1, fragment: "@N01234567890123456789@ NOTE foo\r\n", extra: () }, code: AlphaNumeric }"#,
        );

        // Pointer value with 21 characters - should fail
        invalid_case(
            "0 NOTE @N01234567890123456789@\r\n",
            25,
            r#"Error { input: LocatedSpan { offset: 7, line: 1, fragment: "@N01234567890123456789@\r\n", extra: () }, code: Tag }"#,
        );

        invalid_case(
            "1 @N1@ NOTE foo\r\n",
            17,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "1 @N1@ NOTE foo\r\n", extra: () }, code: Verify }"#,
        );
    }

    #[test]
    fn line_length_limit() {
        // Line length includes level, spaces, tag, value, and terminator
        // Maximum is 255 bytes. Test with a line that exceeds this.
        // "0 NOTE " = 7 bytes, terminator = 1 byte, so value can be 247 bytes max
        let max_value = "x".repeat(246);
        let valid_line = format!("0 NOTE {}\r\n", max_value);
        valid_case(
            &valid_line,
            0,
            None,
            "NOTE",
            Some(Value::Item(Item(max_value.as_str().into()))),
        );

        // One byte over the limit.
        let over_value = "x".repeat(247);
        let invalid_line = format!("0 NOTE {}\n", over_value);
        invalid_case(
            &invalid_line,
            1,
            "Error { input: LocatedSpan { offset: 254, line: 1, fragment: \"\\n\", extra: () }, code: Tag }",
        );
    }

    #[test]
    fn leading_whitespace() {
        let expected_line = Line {
            level: 0,
            xref: None,
            tag: "HEAD",
            value: None,
        };
        let (remaining, result) = line("\n".into())("0 HEAD\n\r".into()).unwrap();
        assert_eq!("\r", *remaining);
        assert_eq!(expected_line, result);

        invalid_case(
            " 0 HEAD\r\n",
            9,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: " 0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
        invalid_case(
            "\t0 HEAD\r\n",
            9,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\t0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
        invalid_case(
            "\r\n0 HEAD\r\n",
            10,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
        invalid_case(
            "\n0 HEAD\r\n",
            9,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
        invalid_case(
            "\r0 HEAD\r\n",
            9,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
        invalid_case(
            "\n\r0 HEAD\r\n",
            10,
            r#"Error { input: LocatedSpan { offset: 0, line: 1, fragment: "\n\r0 HEAD\r\n", extra: () }, code: OneOf }"#,
        );
    }

    #[test]
    fn multiple_lines() {
        let expected_lines = vec![
            Line {
                level: 0,
                xref: None,
                tag: "HEAD",
                value: None,
            },
            Line {
                level: 1,
                xref: None,
                tag: "GEDC",
                value: None,
            },
            Line {
                level: 2,
                xref: None,
                tag: "VERS",
                value: Some(Value::Item(Item("5.5.5".into()))),
            },
            Line {
                level: 0,
                xref: None,
                tag: "TRLR",
                value: None,
            },
        ];
        let cr = "\u{FEFF}0 HEAD\r1 GEDC\r2 VERS 5.5.5\r0 TRLR\r";
        assert_eq!(expected_lines, lines(cr).unwrap());
        let lf = "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n";
        assert_eq!(expected_lines, lines(lf).unwrap());
        let crlf = "\u{FEFF}0 HEAD\r\n1 GEDC\r\n2 VERS 5.5.5\r\n0 TRLR\r\n";
        assert_eq!(expected_lines, lines(crlf).unwrap());
    }

    #[test]
    fn invalid_bom() {
        // Files must start with UTF-8 BOM (\u{FEFF})
        invalid_lines("0 HEAD\n0 TRLR\n", "nom error kind: Tag, line: 1:1");
    }

    #[test]
    fn invalid_xrefs() {
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n1 NOTE @N2@\n",
            "verification error: 'missing cross reference' at line 2",
        );
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n0 @N1@ NOTE Test\n",
            "verification error: 'duplicate cross reference' at line 2",
        );
    }

    #[test]
    fn invalid_terminators() {
        invalid_lines(
            "\u{FEFF}0 HEAD\r0 TRLR\n",
            "nom error kind: End of file, line: 1:9",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n\r0 TRLR\n\r",
            "nom error kind: End of file, line: 2:1",
        );
    }

    #[test]
    fn invalid_cont_conc() {
        invalid_lines(
            "\u{FEFF}0 HEAD\n0 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC cannot be a top level record' at line 2",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n0 CONT t\n0 TRLR\n",
            "verification error: 'CONT/CONC cannot be a top level record' at line 2",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC\n0 TRLR\n",
            "verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 3",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n1 CONT\n0 TRLR\n",
            "verification error: 'CONT/CONC have to be a direct subrecord or sibling record of CONT/CONC' at line 3",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONT\n1 CONT\n0 TRLR\n",
            "verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n1 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC can only be a subrecord or sibling of the last record' at line 4",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC cannot have a subrecord' at line 4",
        );
        invalid_lines(
            "\u{FEFF}0 HEAD\n1 TEXT t\n2 CONC t\n3 TEXT t\n0 TRLR\n",
            "verification error: 'CONT/CONC cannot have a subrecord' at line 4",
        );
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n1 CONT @N1@\n",
            "verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
        );
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n1 CONC @N1@\n",
            "verification error: 'CONT/CONC cannot have a cross reference value' at line 2",
        );
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONT more\n",
            "verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
        );
        invalid_lines(
            "\u{FEFF}0 @N1@ NOTE Test\n0 @N2@ NOTE @N1@\n1 CONC more\n",
            "verification error: 'CONT/CONC cannot follow a cross reference value' at line 3",
        );
    }

    #[track_caller]
    fn valid_items(items: &ItemsInner<'_>, len: usize, bytes: &[u8], json: &str) {
        assert_eq!(len, items.len());
        assert_eq!(bytes, &*items.bytes().collect::<Vec<_>>());
        assert_eq!(json, &serde_json::to_string(items).unwrap());
    }

    #[test]
    fn items() {
        let mut items = ItemsInner {
            data: SmallVec::new(),
        };
        Str("hello".into()).extend_into(&mut items);
        Str(" ".into()).extend_into(&mut items);
        Str("world".into()).extend_into(&mut items);
        Str("!".into()).extend_into(&mut items);
        valid_items(&items, 12, b"hello world!", r#"["hello world!"]"#);
        items.data.clear();
        TextEsc::Esc("hello").extend_into(&mut items);
        valid_items(&items, 8, b"\xFFhello\xFF", r#"["hello"]"#);
        items.data.clear();
        Str("ABT ".into()).extend_into(&mut items);
        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
        Str("11 NIVO 6".into()).extend_into(&mut items);
        valid_items(
            &items,
            26,
            b"ABT \xFFDFRENCH R\xFF11 NIVO 6",
            r#"["ABT ","DFRENCH R","11 NIVO 6"]"#,
        );
        items.data.clear();
        TextEsc::Esc("DFRENCH R").extend_into(&mut items);
        Str("11 NIVO 6".into()).extend_into(&mut items);
        valid_items(
            &items,
            22,
            b"\xFFDFRENCH R\xFF11 NIVO 6",
            r#"["DFRENCH R","11 NIVO 6"]"#,
        );
    }

    #[test]
    fn control_characters() {
        // Tab (U+0009) is allowed
        valid_case(
            "1 NOTE hello\tworld\r\n",
            1,
            None,
            "NOTE",
            Some(Value::Item(Item("hello\tworld".into()))),
        );

        // NUL (U+0000) is disallowed - parsing stops at the control character
        invalid_case(
            "1 NOTE hello\x00world\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\0world\r\n", extra: () }, code: Tag }"#,
        );

        // Bell (U+0007) is disallowed
        invalid_case(
            "1 NOTE hello\x07world\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7}world\r\n", extra: () }, code: Tag }"#,
        );

        // Backspace (U+0008) is disallowed
        invalid_case(
            "1 NOTE hello\x08world\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{8}world\r\n", extra: () }, code: Tag }"#,
        );

        // Vertical tab (U+000B) is disallowed
        invalid_case(
            "1 NOTE hello\x0Bworld\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{b}world\r\n", extra: () }, code: Tag }"#,
        );

        // Form feed (U+000C) is disallowed
        invalid_case(
            "1 NOTE hello\x0Cworld\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{c}world\r\n", extra: () }, code: Tag }"#,
        );

        // Escape (U+001B) is disallowed
        invalid_case(
            "1 NOTE hello\x1Bworld\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{1b}world\r\n", extra: () }, code: Tag }"#,
        );

        // DEL (U+007F) is disallowed
        invalid_case(
            "1 NOTE hello\u{007F}world\r\n",
            8,
            r#"Error { input: LocatedSpan { offset: 12, line: 1, fragment: "\u{7f}world\r\n", extra: () }, code: Tag }"#,
        );
    }

    #[test]
    fn valid_records() {
        let expected = vec![
            Record {
                level: 0,
                xref: None,
                tag: "HEAD",
                value: None,
                line: 1,
                subrecords: vec![
                    Record {
                        level: 1,
                        xref: None,
                        tag: "GEDC",
                        value: None,
                        line: 2,
                        subrecords: vec![
                            Record {
                                level: 2,
                                xref: None,
                                tag: "VERS",
                                value: Some(Value::Item(Item(ItemsInner {
                                    data: smallvec::smallvec![TextEsc::Text("5.5.5")],
                                }))),
                                line: 3,
                                subrecords: vec![],
                            },
                            Record {
                                level: 2,
                                xref: None,
                                tag: "FORM",
                                value: Some(Value::Item(Item(ItemsInner {
                                    data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
                                }))),
                                line: 4,
                                subrecords: vec![Record {
                                    level: 3,
                                    xref: None,
                                    tag: "VERS",
                                    value: Some(Value::Item(Item(ItemsInner {
                                        data: smallvec::smallvec![TextEsc::Text("5.5.5")],
                                    }))),
                                    line: 5,
                                    subrecords: vec![],
                                }],
                            },
                        ],
                    },
                    Record {
                        level: 1,
                        xref: None,
                        tag: "CHAR",
                        value: Some(Value::Item(Item(ItemsInner {
                            data: smallvec::smallvec![TextEsc::Text("UTF-8")],
                        }))),
                        line: 6,
                        subrecords: vec![],
                    },
                ],
            },
            Record {
                level: 0,
                xref: None,
                tag: "TRLR",
                value: None,
                line: 7,
                subrecords: vec![],
            },
        ];
        let r = records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n0 TRLR\n",
        )
        .unwrap();
        assert_eq!(expected, r);
    }

    #[test]
    fn cont_conc() {
        let expected = vec![
            Record {
                level: 0,
                xref: None,
                tag: "HEAD",
                value: None,
                line: 1,
                subrecords: vec![
                    Record {
                        level: 1,
                        xref: None,
                        tag: "GEDC",
                        value: None,
                        line: 2,
                        subrecords: vec![
                            Record {
                                level: 2,
                                xref: None,
                                tag: "VERS",
                                value: Some(Value::Item(Item(ItemsInner {
                                    data: smallvec::smallvec![TextEsc::Text("5.5.5")],
                                }))),
                                line: 3,
                                subrecords: vec![],
                            },
                            Record {
                                level: 2,
                                xref: None,
                                tag: "FORM",
                                value: Some(Value::Item(Item(ItemsInner {
                                    data: smallvec::smallvec![TextEsc::Text("TEST-FORM")],
                                }))),
                                line: 4,
                                subrecords: vec![Record {
                                    level: 3,
                                    xref: None,
                                    tag: "VERS",
                                    value: Some(Value::Item(Item(ItemsInner {
                                        data: smallvec::smallvec![TextEsc::Text("5.5.5")],
                                    }))),
                                    line: 5,
                                    subrecords: vec![],
                                }],
                            },
                        ],
                    },
                    Record {
                        level: 1,
                        xref: None,
                        tag: "CHAR",
                        value: Some(Value::Item(Item(ItemsInner {
                            data: smallvec::smallvec![TextEsc::Text("UTF-8")],
                        }))),
                        line: 6,
                        subrecords: vec![],
                    },
                    Record {
                        level: 1,
                        xref: None,
                        tag: "TEXT",
                        value: Some(Value::Item(Item(ItemsInner {
                            data: smallvec::smallvec![
                                TextEsc::Text("fir"),
                                TextEsc::Text("st"),
                                TextEsc::Text("\n"),
                                TextEsc::Text("sec"),
                                TextEsc::Text("ond"),
                            ],
                        }))),
                        line: 7,
                        subrecords: vec![],
                    },
                ],
            },
            Record {
                level: 0,
                xref: None,
                tag: "TRLR",
                value: None,
                line: 11,
                subrecords: vec![],
            },
        ];
        let r = records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n1 CHAR UTF-8\n1 TEXT fir\n2 CONC st\n2 CONT sec\n2 CONC ond\n0 TRLR\n",
        )
        .unwrap();
        assert_eq!(expected, r);
    }

    #[test]
    fn head_record() {
        invalid_records("\u{FEFF}\n", "nom error kind: OneOf, line: 1:2");
        invalid_records(
            "\u{FEFF}0 @I1@ INDI\n1 NAME Test\n0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
            "verification error: 'HEAD must be the first record' at line 1",
        );
        invalid_records(
            "\u{FEFF}0 @H1@ HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
            "verification error: 'HEAD must not have a cross-reference identifier' at line 1",
        );
        invalid_records(
            "\u{FEFF}0 HEAD something\n0 TRLR\n",
            "verification error: 'HEAD must not have a value' at line 1",
        );
        // CONT/CONC not supported in basic HEAD subrecords (lines 1-6)
        invalid_records(
            "\u{FEFF}0 HEAD\n1 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 CONT t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 2",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 3",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n3 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 4",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 5",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n4 CONC t\n0 TRLR\n",
            "verification error: 'CONT/CONC not supported as basic form HEAD subrecords' at line 6",
        );
        // GEDC must be the first subrecord of HEAD
        invalid_records(
            "\u{FEFF}0 HEAD\n1 NOTE test\n0 TRLR\n",
            "verification error: 'GEDC must be the first subrecord of HEAD' at line 3",
        );
        // HEAD with no subrecords is caught by an earlier check
        invalid_records(
            "\u{FEFF}0 HEAD\n0 TRLR\n",
            "verification error: 'CONT/TRLR are the only records allowed to have no subrecords or value' at line 1",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC something\n2 VERS 5.5.5\n0 TRLR\n",
            "verification error: 'GEDC must not have a value' at line 3",
        );
        // VERS must be the first subrecord of GEDC with value 5.5.5
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 FORM TEST-FORM\n0 TRLR\n",
            "verification error: 'VERS must be the first subrecord of GEDC' at line 4",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.1\n2 FORM TEST-FORM\n0 TRLR\n",
            "verification error: 'GEDC.VERS must have value 5.5.5' at line 4",
        );
        // FORM must be the second subrecord of GEDC
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n0 TRLR\n",
            "verification error: 'GEDC must have a FORM subrecord' at line 3",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 NOTE test\n0 TRLR\n",
            "verification error: 'FORM must be the second subrecord of GEDC' at line 5",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM\n3 VERS 5.5.5\n0 TRLR\n",
            "verification error: 'GEDC.FORM must have a value' at line 5",
        );
        // VERS must be the first subrecord of FORM with value 5.5.5
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n0 TRLR\n",
            "verification error: 'GEDC.FORM must have a VERS subrecord' at line 5",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 NOTE test\n0 TRLR\n",
            "verification error: 'VERS must be the first subrecord of FORM' at line 6",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.1\n0 TRLR\n",
            "verification error: 'GEDC.FORM.VERS must have value 5.5.5' at line 6",
        );
    }

    #[test]
    fn trlr_record() {
        invalid_records(
            "\u{FEFF}0 HEAD\n",
            "verification error: 'TRLR must be the last record' at line 2",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR\n0 @I1@ INDI\n1 NAME Test\n",
            "verification error: 'TRLR must be the last record' at line 8",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 @T1@ TRLR\n",
            "verification error: 'TRLR must not have a cross-reference identifier' at line 7",
        );
        invalid_records(
            "\u{FEFF}0 HEAD\n1 GEDC\n2 VERS 5.5.5\n2 FORM TEST-FORM\n3 VERS 5.5.5\n0 TRLR something\n",
            "verification error: 'TRLR must not have a value' at line 7",
        );
    }
}