ged_io/lib.rs
1/*!
2`ged_io` is a Rust crate for parsing GEDCOM formatted text.
3
4The library works with GEDCOM (Genealogical Data Communication), a text-based format widely
5supported by genealogy software for storing and exchanging family history data. `ged_io` transforms
6this text format into workable Rust data structures.
7
8Basic example:
9
10```rust
11use ged_io::Gedcom;
12use ged_io::GedcomError;
13
14use std::error::Error;
15use std::fs;
16
17// Parse a GEDCOM file
18fn main() -> Result<(), Box<dyn Error>> {
19 let source = fs::read_to_string("./tests/fixtures/sample.ged")?;
20 let mut gedcom = Gedcom::new(source.chars())?;
21 let gedcom_data = gedcom.parse_data()?;
22
23 // Display file statistics
24 gedcom_data.stats();
25 Ok(())
26}
27```
28
29This crate contains an optional `"json"` feature that implements serialization and deserialization to JSON with [`serde`](https://serde.rs).
30
31JSON serialization example:
32
33```rust
34#[cfg(feature = "json")]
35use ged_io::Gedcom;
36#[cfg(feature = "json")]
37use ged_io::GedcomError;
38# #[cfg(feature = "json")]
39# fn main() -> Result<(), Box<dyn std::error::Error>> {
40
41// Parse a GEDCOM file
42let source = std::fs::read_to_string("./tests/fixtures/sample.ged")?;
43let mut gedcom = Gedcom::new(source.chars())?;
44let gedcom_data = gedcom.parse_data()?;
45
46// Serialize to JSON
47let json_output = serde_json::to_string_pretty(&gedcom_data)?;
48println!("{}", json_output);
49
50// Or save to file
51std::fs::write("./target/tmp/family.json", json_output)?;
52# Ok(())
53# }
54# #[cfg(not(feature = "json"))]
55# fn main() {}
56```
57
58## Error Handling Example
59
60This example demonstrates how to handle `GedcomError` when parsing a malformed GEDCOM string.
61
62```rust
63use ged_io::Gedcom;
64use ged_io::GedcomError;
65use std::error::Error;
66
67fn main() -> Result<(), Box<dyn Error>> {
68 let malformed_gedcom = "0 HEAD\n1 GEDC\n2 VERS 5.5\n1 INVALID_TAG\n0 TRLR";
69 let mut gedcom = Gedcom::new(malformed_gedcom.chars())?;
70
71 match gedcom.parse_data() {
72 Ok(_) => println!("Parsing successful!"),
73 Err(e) => {
74 eprintln!("Error parsing GEDCOM: {}", e);
75 match e {
76 GedcomError::ParseError { line, message } => {
77 eprintln!("Specific Parse Error at line {}: {}", line, message);
78 }
79 GedcomError::InvalidFormat(msg) => {
80 eprintln!("Specific Invalid Format Error: {}", msg);
81 }
82 GedcomError::EncodingError(msg) => {
83 eprintln!("Specific Encoding Error: {}", msg);
84 }
85 }
86 }
87 }
88 Ok(())
89}
90```
91*/
92
93#![deny(clippy::pedantic)]
94#![warn(missing_docs)]
95
96#[macro_use]
97mod util;
98/// Error types for the `ged_io` crate.
99pub mod error;
100pub mod parser;
101pub mod tokenizer;
102pub mod types;
103pub use error::GedcomError;
104
105use crate::{tokenizer::Tokenizer, types::GedcomData};
106use std::str::Chars;
107
108/// The main interface for parsing GEDCOM files into structured Rust data types.
109pub struct Gedcom<'a> {
110 tokenizer: Tokenizer<'a>,
111}
112
113impl<'a> Gedcom<'a> {
114 /// Creates a new `Gedcom` parser from a character iterator.
115 ///
116 /// # Errors
117 ///
118 /// Returns an error if the GEDCOM data is malformed.
119 pub fn new(chars: Chars<'a>) -> Result<Gedcom<'a>, GedcomError> {
120 let mut tokenizer = Tokenizer::new(chars);
121 tokenizer.next_token()?;
122 Ok(Gedcom { tokenizer })
123 }
124
125 /// Processes the character data to produce a [`GedcomData`] object containing the parsed
126 /// genealogical information.
127 ///
128 /// # Errors
129 ///
130 /// Returns an error if the GEDCOM data is malformed.
131 pub fn parse_data(&mut self) -> Result<GedcomData, GedcomError> {
132 GedcomData::new(&mut self.tokenizer, 0)
133 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn test_parse_minimal_document() {
142 let sample = "\
143 0 HEAD\n\
144 1 GEDC\n\
145 2 VERS 5.5\n\
146 0 TRLR";
147
148 let mut doc = Gedcom::new(sample.chars()).unwrap();
149 let data = doc.parse_data().unwrap();
150
151 let head = data.header.unwrap();
152 let gedc = head.gedcom.unwrap();
153 assert_eq!(gedc.version.unwrap(), "5.5");
154 }
155
156 #[test]
157 fn test_parse_all_record_types() {
158 let sample = "\
159 0 HEAD\n\
160 1 GEDC\n\
161 2 VERS 5.5\n\
162 0 @SUBMITTER@ SUBM\n\
163 0 @PERSON1@ INDI\n\
164 0 @FAMILY1@ FAM\n\
165 0 @R1@ REPO\n\
166 0 @SOURCE1@ SOUR\n\
167 0 @MEDIA1@ OBJE\n\
168 0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed\n\
169 0 TRLR";
170
171 let mut doc = Gedcom::new(sample.chars()).unwrap();
172 let data = doc.parse_data().unwrap();
173
174 assert_eq!(data.submitters.len(), 1);
175 assert_eq!(data.submitters[0].xref.as_ref().unwrap(), "@SUBMITTER@");
176
177 assert_eq!(data.individuals.len(), 1);
178 assert_eq!(data.individuals[0].xref.as_ref().unwrap(), "@PERSON1@");
179
180 assert_eq!(data.families.len(), 1);
181 assert_eq!(data.families[0].xref.as_ref().unwrap(), "@FAMILY1@");
182
183 assert_eq!(data.repositories.len(), 1);
184 assert_eq!(data.repositories[0].xref.as_ref().unwrap(), "@R1@");
185
186 assert_eq!(data.sources.len(), 1);
187 assert_eq!(data.sources[0].xref.as_ref().unwrap(), "@SOURCE1@");
188 }
189}