ged_io/
lib.rs

1/*!
2`ged_io` is a Rust crate for parsing GEDCOM formatted text.
3
4The library works with GEDCOM (Genealogical Data Communication), a text-based format widely
5supported by genealogy software for storing and exchanging family history data. `ged_io` transforms
6this text format into workable Rust data structures.
7
8Basic example:
9
10```rust
11use ged_io::Gedcom;
12use ged_io::GedcomError;
13
14use std::error::Error;
15use std::fs;
16
17// Parse a GEDCOM file
18fn main() -> Result<(), Box<dyn Error>> {
19    let source = fs::read_to_string("./tests/fixtures/sample.ged")?;
20    let mut gedcom = Gedcom::new(source.chars())?;
21    let gedcom_data = gedcom.parse_data()?;
22
23    // Display file statistics
24    gedcom_data.stats();
25    Ok(())
26}
27```
28
29This crate contains an optional `"json"` feature that implements serialization and deserialization to JSON with [`serde`](https://serde.rs).
30
31JSON serialization example:
32
33```rust
34#[cfg(feature = "json")]
35use ged_io::Gedcom;
36#[cfg(feature = "json")]
37use ged_io::GedcomError;
38# #[cfg(feature = "json")]
39# fn main() -> Result<(), Box<dyn std::error::Error>> {
40
41// Parse a GEDCOM file
42let source = std::fs::read_to_string("./tests/fixtures/sample.ged")?;
43let mut gedcom = Gedcom::new(source.chars())?;
44let gedcom_data = gedcom.parse_data()?;
45
46// Serialize to JSON
47let json_output = serde_json::to_string_pretty(&gedcom_data)?;
48println!("{}", json_output);
49
50// Or save to file
51std::fs::write("./target/tmp/family.json", json_output)?;
52# Ok(())
53# }
54# #[cfg(not(feature = "json"))]
55# fn main() {}
56```
57
58## Error Handling Example
59
60This example demonstrates how to handle `GedcomError` when parsing a malformed GEDCOM string.
61
62```rust
63use ged_io::Gedcom;
64use ged_io::GedcomError;
65use std::error::Error;
66
67fn main() -> Result<(), Box<dyn Error>> {
68    let malformed_gedcom = "0 HEAD\n1 GEDC\n2 VERS 5.5\n1 INVALID_TAG\n0 TRLR";
69    let mut gedcom = Gedcom::new(malformed_gedcom.chars())?;
70
71    match gedcom.parse_data() {
72        Ok(_) => println!("Parsing successful!"),
73        Err(e) => {
74            eprintln!("Error parsing GEDCOM: {}", e);
75            match e {
76                GedcomError::ParseError { line, message } => {
77                    eprintln!("Specific Parse Error at line {}: {}", line, message);
78                }
79                GedcomError::InvalidFormat(msg) => {
80                    eprintln!("Specific Invalid Format Error: {}", msg);
81                }
82                GedcomError::EncodingError(msg) => {
83                    eprintln!("Specific Encoding Error: {}", msg);
84                }
85            }
86        }
87    }
88    Ok(())
89}
90```
91*/
92
93#![deny(clippy::pedantic)]
94#![warn(missing_docs)]
95
96#[macro_use]
97mod util;
98/// Error types for the `ged_io` crate.
99pub mod error;
100pub mod parser;
101pub mod tokenizer;
102pub mod types;
103pub use error::GedcomError;
104
105use crate::{tokenizer::Tokenizer, types::GedcomData};
106use std::str::Chars;
107
108/// The main interface for parsing GEDCOM files into structured Rust data types.
109pub struct Gedcom<'a> {
110    tokenizer: Tokenizer<'a>,
111}
112
113impl<'a> Gedcom<'a> {
114    /// Creates a new `Gedcom` parser from a character iterator.
115    ///
116    /// # Errors
117    ///
118    /// Returns an error if the GEDCOM data is malformed.
119    pub fn new(chars: Chars<'a>) -> Result<Gedcom<'a>, GedcomError> {
120        let mut tokenizer = Tokenizer::new(chars);
121        tokenizer.next_token()?;
122        Ok(Gedcom { tokenizer })
123    }
124
125    /// Processes the character data to produce a [`GedcomData`] object containing the parsed
126    /// genealogical information.
127    ///
128    /// # Errors
129    ///
130    /// Returns an error if the GEDCOM data is malformed.
131    pub fn parse_data(&mut self) -> Result<GedcomData, GedcomError> {
132        GedcomData::new(&mut self.tokenizer, 0)
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn test_parse_minimal_document() {
142        let sample = "\
143           0 HEAD\n\
144           1 GEDC\n\
145           2 VERS 5.5\n\
146           0 TRLR";
147
148        let mut doc = Gedcom::new(sample.chars()).unwrap();
149        let data = doc.parse_data().unwrap();
150
151        let head = data.header.unwrap();
152        let gedc = head.gedcom.unwrap();
153        assert_eq!(gedc.version.unwrap(), "5.5");
154    }
155
156    #[test]
157    fn test_parse_all_record_types() {
158        let sample = "\
159            0 HEAD\n\
160            1 GEDC\n\
161            2 VERS 5.5\n\
162            0 @SUBMITTER@ SUBM\n\
163            0 @PERSON1@ INDI\n\
164            0 @FAMILY1@ FAM\n\
165            0 @R1@ REPO\n\
166            0 @SOURCE1@ SOUR\n\
167            0 @MEDIA1@ OBJE\n\
168            0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed\n\
169            0 TRLR";
170
171        let mut doc = Gedcom::new(sample.chars()).unwrap();
172        let data = doc.parse_data().unwrap();
173
174        assert_eq!(data.submitters.len(), 1);
175        assert_eq!(data.submitters[0].xref.as_ref().unwrap(), "@SUBMITTER@");
176
177        assert_eq!(data.individuals.len(), 1);
178        assert_eq!(data.individuals[0].xref.as_ref().unwrap(), "@PERSON1@");
179
180        assert_eq!(data.families.len(), 1);
181        assert_eq!(data.families[0].xref.as_ref().unwrap(), "@FAMILY1@");
182
183        assert_eq!(data.repositories.len(), 1);
184        assert_eq!(data.repositories[0].xref.as_ref().unwrap(), "@R1@");
185
186        assert_eq!(data.sources.len(), 1);
187        assert_eq!(data.sources[0].xref.as_ref().unwrap(), "@SOURCE1@");
188    }
189}