ged_io 0.14.0

A parser for GEDCOM files
Documentation
pub mod citation;
pub mod data;
pub mod quay;
pub mod text;

use crate::{
    parser::{parse_subset, Parser},
    tokenizer::{Token, Tokenizer},
    types::{
        custom::UserDefinedTag, date::change_date::ChangeDate, event::detail::Detail,
        multimedia::Multimedia, note::Note, repository::citation::Citation, source::data::Data,
        Xref,
    },
    GedcomError,
};

#[cfg(feature = "json")]
use serde::{Deserialize, Serialize};

/// Source for genealogy facts
///
/// A source record is a place where you describe the source material
/// from which you have obtained your genealogical information.
///
/// See <https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#SOURCE_RECORD>
#[derive(Clone, Debug, Default, PartialEq)]
#[cfg_attr(feature = "json", derive(Serialize, Deserialize))]
pub struct Source {
    pub xref: Option<String>,
    pub data: Data,
    pub abbreviation: Option<String>,
    pub title: Option<String>,
    pub author: Option<String>,
    pub publication_facts: Option<String>,
    pub citation_from_source: Option<String>,
    pub change_date: Option<Box<ChangeDate>>,
    pub multimedia: Vec<Multimedia>,
    pub notes: Vec<Note>,
    pub repo_citations: Vec<Citation>,
    /// handles "RFN" tag; found in Ancestry.com export
    pub submitter_registered_rfn: Option<String>,
    pub custom_data: Vec<Box<UserDefinedTag>>,
    /// Unique identifier (tag: UID, GEDCOM 7.0).
    ///
    /// A globally unique identifier for this record. In GEDCOM 7.0, this is
    /// a URI that uniquely identifies the record across all datasets.
    ///
    /// See <https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#UID>
    pub uid: Option<String>,
    /// User reference number (tag: REFN).
    ///
    /// A user-defined number or text that the submitter uses to identify
    /// this record. Not guaranteed to be unique.
    pub user_reference_number: Option<String>,
    /// User reference type (tag: TYPE under REFN).
    ///
    /// A user-defined type for the reference number.
    pub user_reference_type: Option<String>,
    /// Automated record ID (tag: RIN).
    ///
    /// A unique record identification number assigned to the record by
    /// the source system. Used for reconciling differences between systems.
    pub automated_record_id: Option<String>,
    /// External identifiers (tag: EXID, GEDCOM 7.0).
    ///
    /// Identifiers maintained by external authorities that apply to this source.
    pub external_ids: Vec<String>,
}

impl Source {
    #[must_use]
    fn with_xref(xref: Option<Xref>) -> Self {
        Self {
            xref,
            ..Default::default()
        }
    }

    /// Creates a new `Source` from a `Tokenizer`.
    ///
    /// # Errors
    ///
    /// This function will return an error if parsing fails.
    #[allow(clippy::double_must_use)]
    pub fn new(
        tokenizer: &mut Tokenizer<'_>,
        level: u8,
        xref: Option<String>,
    ) -> Result<Source, GedcomError> {
        let mut sour = Source::with_xref(xref);
        sour.parse(tokenizer, level)?;
        Ok(sour)
    }

    pub fn add_multimedia(&mut self, media: Multimedia) {
        self.multimedia.push(media);
    }

    pub fn add_note(&mut self, note: Note) {
        self.notes.push(note);
    }

    pub fn add_repo_citation(&mut self, citation: Citation) {
        self.repo_citations.push(citation);
    }
}

impl Parser for Source {
    fn parse(&mut self, tokenizer: &mut Tokenizer<'_>, level: u8) -> Result<(), GedcomError> {
        // skip SOUR tag
        tokenizer.next_token()?;

        let handle_subset = |tag: &str, tokenizer: &mut Tokenizer<'_>| -> Result<(), GedcomError> {
            let mut pointer: Option<String> = None;
            if let Token::Pointer(xref) = &tokenizer.current_token {
                pointer = Some(xref.to_string());
                tokenizer.next_token()?;
            }
            match tag {
                "DATA" => tokenizer.next_token()?,
                "EVEN" => {
                    let events_recorded = tokenizer.take_line_value()?;
                    let mut event = Detail::new(tokenizer, level + 2, "OTHER")?;
                    event.with_source_data(events_recorded);
                    self.data.add_event(event);
                    return Ok(());
                }
                "AGNC" => self.data.agency = Some(tokenizer.take_line_value()?),
                "ABBR" => self.abbreviation = Some(tokenizer.take_continued_text(level + 1)?),
                "CHAN" => self.change_date = Some(Box::new(ChangeDate::new(tokenizer, level + 1)?)),
                "TITL" => self.title = Some(tokenizer.take_continued_text(level + 1)?),
                "AUTH" => self.author = Some(tokenizer.take_continued_text(level + 1)?),
                "PUBL" => self.publication_facts = Some(tokenizer.take_continued_text(level + 1)?),
                "TEXT" => {
                    self.citation_from_source = Some(tokenizer.take_continued_text(level + 1)?);
                }
                "OBJE" => self.add_multimedia(Multimedia::new(tokenizer, level + 1, pointer)?),
                "NOTE" => self.add_note(Note::new(tokenizer, level + 1)?),
                "REPO" => self.add_repo_citation(Citation::new(tokenizer, level + 1)?),
                "RFN" => self.submitter_registered_rfn = Some(tokenizer.take_line_value()?),
                // Unique identifier (GEDCOM 7.0)
                "UID" => self.uid = Some(tokenizer.take_line_value()?),
                // User reference number
                "REFN" => {
                    self.user_reference_number = Some(tokenizer.take_line_value()?);
                    // Note: TYPE substructure would need to be parsed here
                }
                // Automated record ID
                "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()?),
                // External identifier (GEDCOM 7.0)
                "EXID" => self.external_ids.push(tokenizer.take_line_value()?),
                _ => {
                    // Gracefully skip unknown tags
                    tokenizer.take_line_value()?;
                }
            }

            Ok(())
        };

        self.custom_data = parse_subset(tokenizer, level, handle_subset)?;

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use crate::Gedcom;

    #[test]
    fn test_parse_source_citation_record() {
        let sample = "\
            0 HEAD\n\
            1 GEDC\n\
            2 VERS 5.5\n\
            2 FORM LINEAGE-LINKED\n\
            0 @PERSON1@ INDI\n\
            1 SOUR @SOURCE1@\n\
            2 PAGE 42\n\
            0 TRLR";

        let mut ged = Gedcom::new(sample.chars()).unwrap();
        let data = ged.parse_data().unwrap();

        assert_eq!(data.individuals[0].source[0].xref, "@SOURCE1@");
        assert_eq!(data.individuals[0].source[0].page.as_ref().unwrap(), "42");
    }
    #[test]
    fn test_parse_source_citation_data_record() {
        let sample = "\
            0 HEAD\n\
            1 GEDC\n\
            2 VERS 5.5\n\
            2 FORM LINEAGE-LINKED\n\
            0 @PERSON1@ INDI\n\
            1 SOUR @SOURCE1@\n\
            2 PAGE 42\n\
            2 DATA\n\
            3 DATE BEF 1 JAN 1900\n\
            0 TRLR";

        let mut ged = Gedcom::new(sample.chars()).unwrap();
        let data = ged.parse_data().unwrap();
        let citation_data = data.individuals[0].source[0].data.as_ref().unwrap();

        assert_eq!(
            citation_data.date.as_ref().unwrap().value.as_ref().unwrap(),
            "BEF 1 JAN 1900"
        );
    }

    #[test]
    fn test_parse_text_from_source_record() {
        let sample = "\
            0 HEAD\n\
            1 GEDC\n\
            2 VERS 5.5\n\
            2 FORM LINEAGE-LINKED\n\
            0 @PERSON1@ INDI\n\
            1 SOUR @SOURCE1@\n\
            2 PAGE 42\n\
            2 DATA\n\
            3 DATE BEF 1 JAN 1900\n\
            3 TEXT a sample text\n\
            4 CONT Sample text continued here. The word TE\n\
            4 CONC ST should not be broken!\n\
            0 TRLR";

        let mut ged = Gedcom::new(sample.chars()).unwrap();
        let data = ged.parse_data().unwrap();
        let citation_data = data.individuals[0].source[0].data.as_ref().unwrap();

        assert_eq!(
            citation_data.text.as_ref().unwrap().value.as_ref().unwrap(),
            "a sample text\nSample text continued here. The word TEST should not be broken!"
        );
    }

    #[test]
    fn test_parse_certainty_assessment_record() {
        let sample = "\
            0 HEAD\n\
            1 GEDC\n\
            2 VERS 5.5\n\
            2 FORM LINEAGE-LINKED\n\
            0 @PERSON1@ INDI\n\
            1 SOUR @SOURCE1@\n\
            2 PAGE 42\n\
            2 QUAY 1\n\
            0 TRLR";

        let mut ged = Gedcom::new(sample.chars()).unwrap();
        let data = ged.parse_data().unwrap();
        let quay = data.individuals[0].source[0]
            .certainty_assessment
            .as_ref()
            .unwrap();

        assert_eq!(quay.get_int().unwrap(), 1);
    }

    #[test]
    fn test_unknown_stdtag_inside_record_is_skipped() {
        let sample = "\
          0 HEAD\n\
          1 GEDC\n\
          2 VERS 5.5\n\
          0 @S1@ SOUR\n\
          1 TITL Real title\n\
          1 BLAH unknown subtag value\n\
          1 AUTH Real author\n\
          0 TRLR";

        let mut doc = Gedcom::new(sample.chars()).unwrap();
        let data = doc.parse_data().unwrap();
        let source = &data.sources[0];
        assert_eq!(source.title.as_deref(), Some("Real title"));
        assert_eq!(source.author.as_deref(), Some("Real author"));
    }
}