#![deny(clippy::pedantic)]
#![warn(missing_docs)]
use std::str::Chars;
#[cfg(feature = "json")]
use serde::{Deserialize, Serialize};
#[macro_use]
mod util;
pub mod tokenizer;
use tokenizer::{Token, Tokenizer};
pub mod types;
use types::{
Family, Header, Individual, MultimediaRecord, Repository, Source, Submission, Submitter,
UserDefinedTag,
};
pub struct GedcomDocument<'a> {
tokenizer: Tokenizer<'a>,
}
impl<'a> GedcomDocument<'a> {
#[must_use]
pub fn new(chars: Chars<'a>) -> GedcomDocument<'a> {
let mut tokenizer = Tokenizer::new(chars);
tokenizer.next_token();
GedcomDocument { tokenizer }
}
pub fn parse_document(&mut self) -> GedcomData {
GedcomData::new(&mut self.tokenizer, 0)
}
}
pub trait Parser {
fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8);
}
#[must_use]
pub fn parse_ged(content: std::str::Chars) -> GedcomData {
let mut p = GedcomDocument::new(content);
p.parse_document()
}
pub fn parse_subset<F>(
tokenizer: &mut Tokenizer,
level: u8,
mut tag_handler: F,
) -> Vec<Box<UserDefinedTag>>
where
F: FnMut(&str, &mut Tokenizer),
{
let mut non_standard_dataset = Vec::new();
loop {
if let Token::Level(curl_level) = tokenizer.current_token {
if curl_level <= level {
break;
}
}
match &tokenizer.current_token {
Token::Tag(tag) => {
let tag_clone = tag.clone();
tag_handler(tag_clone.as_str(), tokenizer);
}
Token::CustomTag(tag) => {
let tag_clone = tag.clone();
non_standard_dataset.push(Box::new(UserDefinedTag::new(
tokenizer,
level + 1,
&tag_clone,
)));
}
Token::Level(_) => tokenizer.next_token(),
_ => panic!(
"{}, Unhandled Token: {:?}",
tokenizer.debug(),
tokenizer.current_token
),
}
}
non_standard_dataset
}
#[derive(Debug, Default)]
#[cfg_attr(feature = "json", derive(Serialize, Deserialize))]
pub struct GedcomData {
pub header: Option<Header>,
pub submitters: Vec<Submitter>,
pub submissions: Vec<Submission>,
pub individuals: Vec<Individual>,
pub families: Vec<Family>,
pub repositories: Vec<Repository>,
pub sources: Vec<Source>,
pub multimedia: Vec<MultimediaRecord>,
pub custom_data: Vec<Box<UserDefinedTag>>,
}
impl GedcomData {
#[must_use]
pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomData {
let mut data = GedcomData::default();
data.parse(tokenizer, level);
data
}
pub fn add_family(&mut self, family: Family) {
self.families.push(family);
}
pub fn add_individual(&mut self, individual: Individual) {
self.individuals.push(individual);
}
pub fn add_repository(&mut self, repo: Repository) {
self.repositories.push(repo);
}
pub fn add_source(&mut self, source: Source) {
self.sources.push(source);
}
pub fn add_submission(&mut self, submission: Submission) {
self.submissions.push(submission);
}
pub fn add_submitter(&mut self, submitter: Submitter) {
self.submitters.push(submitter);
}
pub fn add_multimedia(&mut self, multimedia: MultimediaRecord) {
self.multimedia.push(multimedia);
}
pub fn add_custom_data(&mut self, non_standard_data: UserDefinedTag) {
self.custom_data.push(Box::new(non_standard_data));
}
pub fn stats(&self) {
println!("----------------------");
println!("| Gedcom Data Stats: |");
println!("----------------------");
println!(" submissions: {}", self.submissions.len());
println!(" submitters: {}", self.submitters.len());
println!(" individuals: {}", self.individuals.len());
println!(" families: {}", self.families.len());
println!(" repositories: {}", self.repositories.len());
println!(" sources: {}", self.sources.len());
println!(" multimedia: {}", self.multimedia.len());
println!("----------------------");
}
}
impl Parser for GedcomData {
fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) {
loop {
let current_level = match tokenizer.current_token {
Token::Level(n) => n,
_ => panic!(
"{} Expected Level, found {:?}",
tokenizer.debug(),
tokenizer.current_token
),
};
tokenizer.next_token();
let mut pointer: Option<String> = None;
if let Token::Pointer(xref) = &tokenizer.current_token {
pointer = Some(xref.to_string());
tokenizer.next_token();
}
if let Token::Tag(tag) = &tokenizer.current_token {
match tag.as_str() {
"HEAD" => self.header = Some(Header::new(tokenizer, level)),
"FAM" => self.add_family(Family::new(tokenizer, level, pointer)),
"INDI" => {
self.add_individual(Individual::new(tokenizer, current_level, pointer))
}
"REPO" => {
self.add_repository(Repository::new(tokenizer, current_level, pointer))
}
"SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)),
"SUBN" => self.add_submission(Submission::new(tokenizer, level, pointer)),
"SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)),
"OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level, pointer)),
"TRLR" => break,
_ => {
println!("{} Unhandled tag {}", tokenizer.debug(), tag);
tokenizer.next_token();
}
};
} else if let Token::CustomTag(tag) = &tokenizer.current_token {
let tag_clone = tag.clone();
self.add_custom_data(UserDefinedTag::new(tokenizer, level + 1, &tag_clone));
while tokenizer.current_token != Token::Level(level) {
tokenizer.next_token();
}
} else {
println!(
"{} Unhandled token {:?}",
tokenizer.debug(),
tokenizer.current_token
);
tokenizer.next_token();
};
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_minimal_document() {
let sample = "\
0 HEAD\n\
1 GEDC\n\
2 VERS 5.5\n\
0 TRLR";
let mut doc = GedcomDocument::new(sample.chars());
let data = doc.parse_document();
let head = data.header.unwrap();
let gedc = head.gedcom.unwrap();
assert_eq!(gedc.version.unwrap(), "5.5");
}
#[test]
fn test_parse_all_record_types() {
let sample = "\
0 HEAD\n\
1 GEDC\n\
2 VERS 5.5\n\
0 @SUBMITTER@ SUBM\n\
0 @PERSON1@ INDI\n\
0 @FAMILY1@ FAM\n\
0 @R1@ REPO\n\
0 @SOURCE1@ SOUR\n\
0 @MEDIA1@ OBJE\n\
0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed\n\
0 TRLR";
let mut doc = GedcomDocument::new(sample.chars());
let data = doc.parse_document();
assert_eq!(data.submitters.len(), 1);
assert_eq!(data.submitters[0].xref.as_ref().unwrap(), "@SUBMITTER@");
assert_eq!(data.individuals.len(), 1);
assert_eq!(data.individuals[0].xref.as_ref().unwrap(), "@PERSON1@");
assert_eq!(data.families.len(), 1);
assert_eq!(data.families[0].xref.as_ref().unwrap(), "@FAMILY1@");
assert_eq!(data.repositories.len(), 1);
assert_eq!(data.repositories[0].xref.as_ref().unwrap(), "@R1@");
assert_eq!(data.sources.len(), 1);
assert_eq!(data.sources[0].xref.as_ref().unwrap(), "@SOURCE1@");
}
}