use crate::{
encoding::{decode_gedcom_bytes, GedcomEncoding},
tokenizer::Tokenizer,
types::GedcomData,
GedcomError,
};
use std::str::Chars;
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug, Clone)]
pub struct ParserConfig {
pub strict_mode: bool,
pub validate_references: bool,
pub ignore_unknown_tags: bool,
pub encoding_detection: bool,
pub date_validation: bool,
pub max_file_size: Option<usize>,
pub preserve_formatting: bool,
}
impl Default for ParserConfig {
fn default() -> Self {
Self {
strict_mode: false,
validate_references: false,
ignore_unknown_tags: false,
encoding_detection: false,
date_validation: false,
max_file_size: None,
preserve_formatting: true,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct GedcomBuilder {
config: ParserConfig,
}
impl GedcomBuilder {
#[must_use]
pub fn new() -> Self {
Self {
config: ParserConfig::default(),
}
}
#[must_use]
pub fn strict_mode(mut self, enabled: bool) -> Self {
self.config.strict_mode = enabled;
self
}
#[must_use]
pub fn validate_references(mut self, enabled: bool) -> Self {
self.config.validate_references = enabled;
self
}
#[must_use]
pub fn ignore_unknown_tags(mut self, enabled: bool) -> Self {
self.config.ignore_unknown_tags = enabled;
self
}
#[must_use]
pub fn encoding_detection(mut self, enabled: bool) -> Self {
self.config.encoding_detection = enabled;
self
}
#[must_use]
pub fn date_validation(mut self, enabled: bool) -> Self {
self.config.date_validation = enabled;
self
}
#[must_use]
pub fn max_file_size(mut self, size: usize) -> Self {
self.config.max_file_size = Some(size);
self
}
#[must_use]
pub fn preserve_formatting(mut self, enabled: bool) -> Self {
self.config.preserve_formatting = enabled;
self
}
#[must_use]
pub fn config(&self) -> &ParserConfig {
&self.config
}
pub fn build(self, chars: Chars<'_>) -> Result<GedcomData, GedcomError> {
let mut tokenizer = Tokenizer::new(chars);
tokenizer.next_token()?;
let data = GedcomData::new(&mut tokenizer, 0)?;
if self.config.validate_references {
self.validate_references_internal(&data)?;
}
Ok(data)
}
pub fn build_from_bytes(self, bytes: &[u8]) -> Result<GedcomData, GedcomError> {
if let Some(max_size) = self.config.max_file_size {
let size = bytes.len();
if size > max_size {
return Err(GedcomError::FileSizeLimitExceeded { size, max_size });
}
}
let (content, _encoding) = decode_gedcom_bytes(bytes)?;
self.build(content.chars())
}
pub fn build_from_bytes_with_encoding(
self,
bytes: &[u8],
encoding: GedcomEncoding,
) -> Result<GedcomData, GedcomError> {
if let Some(max_size) = self.config.max_file_size {
let size = bytes.len();
if size > max_size {
return Err(GedcomError::FileSizeLimitExceeded { size, max_size });
}
}
let (content, _) = crate::encoding::decode_with_encoding(bytes, encoding)?;
self.build(content.chars())
}
pub fn build_from_str(self, content: &str) -> Result<GedcomData, GedcomError> {
if let Some(max_size) = self.config.max_file_size {
let size = content.len();
if size > max_size {
return Err(GedcomError::FileSizeLimitExceeded { size, max_size });
}
}
self.build(content.chars())
}
#[cfg(feature = "gedzip")]
pub fn build_from_gedzip(self, bytes: &[u8]) -> Result<GedcomData, GedcomError> {
use crate::gedzip::GedzipReader;
let cursor = std::io::Cursor::new(bytes);
let mut reader = GedzipReader::new(cursor)
.map_err(|e| GedcomError::InvalidFormat(format!("Invalid GEDZIP archive: {e}")))?;
let gedcom_bytes = reader
.read_gedcom_bytes()
.map_err(|e| GedcomError::InvalidFormat(format!("Failed to read gedcom.ged: {e}")))?;
if let Some(max_size) = self.config.max_file_size {
let size = gedcom_bytes.len();
if size > max_size {
return Err(GedcomError::FileSizeLimitExceeded { size, max_size });
}
}
self.build_from_bytes(&gedcom_bytes)
}
#[allow(clippy::unused_self)]
fn validate_references_internal(&self, data: &GedcomData) -> Result<(), GedcomError> {
use std::collections::HashSet;
let mut xrefs: HashSet<&str> = HashSet::new();
for individual in &data.individuals {
if let Some(ref xref) = individual.xref {
xrefs.insert(xref.as_str());
}
}
for family in &data.families {
if let Some(ref xref) = family.xref {
xrefs.insert(xref.as_str());
}
}
for source in &data.sources {
if let Some(ref xref) = source.xref {
xrefs.insert(xref.as_str());
}
}
for repo in &data.repositories {
if let Some(ref xref) = repo.xref {
xrefs.insert(xref.as_str());
}
}
for submitter in &data.submitters {
if let Some(ref xref) = submitter.xref {
xrefs.insert(xref.as_str());
}
}
for multimedia in &data.multimedia {
if let Some(ref xref) = multimedia.xref {
xrefs.insert(xref.as_str());
}
}
for family in &data.families {
if let Some(ref husb) = family.individual1 {
if !xrefs.contains(husb.as_str()) {
return Err(GedcomError::InvalidFormat(format!(
"Family references non-existent individual: {husb}"
)));
}
}
if let Some(ref wife) = family.individual2 {
if !xrefs.contains(wife.as_str()) {
return Err(GedcomError::InvalidFormat(format!(
"Family references non-existent individual: {wife}"
)));
}
}
for child in &family.children {
if !xrefs.contains(child.as_str()) {
return Err(GedcomError::InvalidFormat(format!(
"Family references non-existent child: {child}"
)));
}
}
}
for individual in &data.individuals {
for family_link in &individual.families {
if !xrefs.contains(family_link.xref.as_str()) {
return Err(GedcomError::InvalidFormat(format!(
"Individual references non-existent family: {}",
family_link.xref
)));
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builder_default() {
let builder = GedcomBuilder::new();
assert!(!builder.config().strict_mode);
assert!(!builder.config().validate_references);
assert!(!builder.config().ignore_unknown_tags);
assert!(!builder.config().encoding_detection);
assert!(!builder.config().date_validation);
assert!(builder.config().max_file_size.is_none());
assert!(builder.config().preserve_formatting);
}
#[test]
fn test_builder_fluent_api() {
let builder = GedcomBuilder::new()
.strict_mode(true)
.validate_references(true)
.ignore_unknown_tags(true)
.encoding_detection(true)
.date_validation(true)
.max_file_size(1_000_000)
.preserve_formatting(false);
assert!(builder.config().strict_mode);
assert!(builder.config().validate_references);
assert!(builder.config().ignore_unknown_tags);
assert!(builder.config().encoding_detection);
assert!(builder.config().date_validation);
assert_eq!(builder.config().max_file_size, Some(1_000_000));
assert!(!builder.config().preserve_formatting);
}
#[test]
fn test_builder_build_minimal() {
let sample = "0 HEAD\n1 GEDC\n2 VERS 5.5\n0 TRLR";
let result = GedcomBuilder::new().build_from_str(sample);
assert!(result.is_ok());
}
#[test]
fn test_builder_with_individuals() {
let sample = "\
0 HEAD\n\
1 GEDC\n\
2 VERS 5.5\n\
0 @I1@ INDI\n\
1 NAME John /Doe/\n\
0 TRLR";
let data = GedcomBuilder::new().build_from_str(sample).unwrap();
assert_eq!(data.individuals.len(), 1);
}
#[test]
fn test_builder_validate_references_error() {
let sample = "\
0 HEAD\n\
1 GEDC\n\
2 VERS 5.5\n\
0 @F1@ FAM\n\
1 HUSB @I_NONEXISTENT@\n\
0 TRLR";
let result = GedcomBuilder::new()
.validate_references(true)
.build_from_str(sample);
assert!(result.is_err());
}
#[test]
fn test_builder_validate_references_success() {
let sample = "\
0 HEAD\n\
1 GEDC\n\
2 VERS 5.5\n\
0 @I1@ INDI\n\
1 NAME John /Doe/\n\
0 @F1@ FAM\n\
1 HUSB @I1@\n\
0 TRLR";
let result = GedcomBuilder::new()
.validate_references(true)
.build_from_str(sample);
assert!(result.is_ok());
}
#[test]
fn test_parser_config_clone() {
let config = ParserConfig {
strict_mode: true,
validate_references: true,
ignore_unknown_tags: true,
encoding_detection: true,
date_validation: true,
max_file_size: Some(1000),
preserve_formatting: false,
};
let cloned = config.clone();
assert_eq!(config.strict_mode, cloned.strict_mode);
assert_eq!(config.validate_references, cloned.validate_references);
assert_eq!(config.date_validation, cloned.date_validation);
assert_eq!(config.max_file_size, cloned.max_file_size);
assert_eq!(config.preserve_formatting, cloned.preserve_formatting);
}
#[test]
fn test_builder_max_file_size_exceeded() {
let large_content = "0 HEAD\n1 GEDC\n2 VERS 5.5\n".to_string()
+ &"0 @I1@ INDI\n1 NAME Test /Person/\n".repeat(100)
+ "0 TRLR";
let result = GedcomBuilder::new()
.max_file_size(100) .build_from_str(&large_content);
match result {
Err(GedcomError::FileSizeLimitExceeded { size, max_size }) => {
assert!(size > 100);
assert_eq!(max_size, 100);
}
_ => panic!("Expected FileSizeLimitExceeded error"),
}
}
#[test]
fn test_builder_clone() {
let builder = GedcomBuilder::new().strict_mode(true);
let cloned = builder.clone();
assert!(cloned.config().strict_mode);
}
}