sgf_parse/
parser.rs

1use std::ptr::NonNull;
2
3use crate::go;
4use crate::lexer::{tokenize, LexerError, Token};
5use crate::unknown_game;
6use crate::{GameTree, GameType, SgfNode, SgfProp};
7
8/// Returns the [`GameTree`] values parsed from the provided text using default parsing options.
9///
10/// This function will attempt to convert non-FF\[4\] files to FF\[4\] if possible. Check out
11/// [`parse_with_options`] if you want to change the default behavior.
12///
13/// # Errors
14/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
15///
16/// # Examples
17/// ```
18/// use sgf_parse::{parse, GameType};
19///
20/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
21/// let gametrees = parse(sgf).unwrap();
22/// assert!(gametrees.len() == 2);
23/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
24/// ```
25pub fn parse(text: &str) -> Result<Vec<GameTree>, SgfParseError> {
26    parse_with_options(text, &ParseOptions::default())
27}
28
29/// Returns the [`GameTree`] values parsed from the provided text.
30///
31/// # Errors
32/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
33///
34/// # Examples
35/// ```
36/// use sgf_parse::{parse_with_options, ParseOptions, GameType, SgfParseError};
37///
38/// // Default options
39/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
40/// let gametrees = parse_with_options(sgf, &ParseOptions::default()).unwrap();
41/// assert!(gametrees.len() == 2);
42/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
43///
44/// // Strict FF[4] identifiers
45/// let sgf = "(;SZ[9]CoPyright[Julian Andrews 2025];B[de];W[fe])(;B[de];W[ff])";
46/// let parse_options = ParseOptions {
47///     convert_mixed_case_identifiers: false,
48///     ..ParseOptions::default()
49/// };
50/// let result = parse_with_options(sgf, &parse_options);
51/// assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
52/// ```
53pub fn parse_with_options(
54    text: &str,
55    options: &ParseOptions,
56) -> Result<Vec<GameTree>, SgfParseError> {
57    let text = text.trim();
58    let tokens = tokenize(text)
59        .map(|result| match result {
60            Err(e) => Err(SgfParseError::LexerError(e)),
61            Ok((token, _span)) => Ok(token),
62        })
63        .collect::<Result<Vec<_>, _>>()?;
64    split_by_gametree(&tokens, options.lenient)?
65        .into_iter()
66        .map(|tokens| match find_gametype(tokens)? {
67            GameType::Go => parse_gametree::<go::Prop>(tokens, options),
68            GameType::Unknown => parse_gametree::<unknown_game::Prop>(tokens, options),
69        })
70        .collect::<Result<_, _>>()
71}
72
73/// Options for parsing SGF files.
74///
75/// # Examples
76/// See [`parse_with_options`] for usage examples.
77pub struct ParseOptions {
78    /// Whether to allow conversion of FF\[3\] mixed case identifiers to FF\[4\].
79    ///
80    /// All lower case letters are dropped.
81    /// This should allow parsing any older files which are valid, but not valid FF\[4\].
82    pub convert_mixed_case_identifiers: bool,
83    /// Whether to use lenient parsing.
84    ///
85    /// In lenient mode, the parser should never return an error, but will instead parse the SGF
86    /// until it hits an error, and then return whatever it's managed to parse.
87    pub lenient: bool,
88}
89
90impl Default for ParseOptions {
91    fn default() -> Self {
92        ParseOptions {
93            convert_mixed_case_identifiers: true,
94            lenient: false,
95        }
96    }
97}
98
99/// Error type for failures parsing sgf from text.
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum SgfParseError {
102    LexerError(LexerError),
103    UnexpectedGameTreeStart,
104    UnexpectedGameTreeEnd,
105    UnexpectedProperty,
106    UnexpectedEndOfData,
107    UnexpectedGameType,
108    InvalidFF4Property,
109}
110
111impl From<LexerError> for SgfParseError {
112    fn from(error: LexerError) -> Self {
113        Self::LexerError(error)
114    }
115}
116
117impl std::fmt::Display for SgfParseError {
118    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
119        match self {
120            SgfParseError::LexerError(e) => write!(f, "Error tokenizing: {e}"),
121            SgfParseError::UnexpectedGameTreeStart => write!(f, "Unexpected start of game tree"),
122            SgfParseError::UnexpectedGameTreeEnd => write!(f, "Unexpected end of game tree"),
123            SgfParseError::UnexpectedProperty => write!(f, "Unexpected property"),
124            SgfParseError::UnexpectedEndOfData => write!(f, "Unexpected end of data"),
125            SgfParseError::UnexpectedGameType => write!(f, "Unexpected game type"),
126            SgfParseError::InvalidFF4Property => {
127                write!(
128                    f,
129                    "Invalid FF[4] property without `convert_mixed_case_identifiers`"
130                )
131            }
132        }
133    }
134}
135
136impl std::error::Error for SgfParseError {}
137
138// Split the tokens up into individual gametrees.
139//
140// This will let us easily scan each gametree for GM properties.
141// Only considers StartGameTree/EndGameTree tokens.
142fn split_by_gametree(tokens: &[Token], lenient: bool) -> Result<Vec<&[Token]>, SgfParseError> {
143    let mut gametrees = vec![];
144    let mut gametree_depth: u64 = 0;
145    let mut slice_start = 0;
146    for (i, token) in tokens.iter().enumerate() {
147        match token {
148            Token::StartGameTree => gametree_depth += 1,
149            Token::EndGameTree => {
150                if gametree_depth == 0 {
151                    if lenient {
152                        break;
153                    } else {
154                        return Err(SgfParseError::UnexpectedGameTreeEnd);
155                    }
156                }
157                gametree_depth -= 1;
158                if gametree_depth == 0 {
159                    gametrees.push(&tokens[slice_start..=i]);
160                    slice_start = i + 1;
161                }
162            }
163            _ => {}
164        }
165    }
166    if gametree_depth != 0 {
167        if lenient {
168            // For lenient parsing assume all remaining tokens are part of the last gametree.
169            gametrees.push(&tokens[slice_start..]);
170        } else {
171            return Err(SgfParseError::UnexpectedEndOfData);
172        }
173    }
174
175    Ok(gametrees)
176}
177
178// Parse a single gametree of a known type.
179fn parse_gametree<Prop: SgfProp>(
180    tokens: &[Token],
181    options: &ParseOptions,
182) -> Result<GameTree, SgfParseError>
183where
184    SgfNode<Prop>: std::convert::Into<GameTree>,
185{
186    // TODO: Rewrite this without `unsafe`
187    let mut collection: Vec<SgfNode<Prop>> = vec![];
188    // //// Pointer to the `Vec` of children we're currently building.
189    let mut current_node_list_ptr = NonNull::new(&mut collection).unwrap();
190    // Stack of pointers to incomplete `Vec`s of children.
191    let mut incomplete_child_lists: Vec<NonNull<Vec<SgfNode<Prop>>>> = vec![];
192    //// Using pointers involves some unsafe calls, but should be ok here.
193    //// Since pointers are always initialized from real structs, and those structs
194    //// live for the whole function body, our only safety concern is dangling pointers.
195    ////
196    //// Since we build the tree traversing depth-first those structs shouldn't be
197    //// modified while the pointer is live. Heap-allocated contents of their
198    //// `children` may be modified, but that shouldn't change anything.
199
200    let mut tokens = tokens.iter().peekable();
201    while let Some(token) = tokens.next() {
202        match token {
203            Token::StartGameTree => {
204                // SGF game trees must have a root node.
205                if let Some(node_list_ptr) = incomplete_child_lists.last() {
206                    let node_list = unsafe { node_list_ptr.as_ref() };
207                    if node_list.is_empty() {
208                        if options.lenient {
209                            break;
210                        } else {
211                            return Err(SgfParseError::UnexpectedGameTreeStart);
212                        }
213                    }
214                }
215                incomplete_child_lists.push(current_node_list_ptr);
216            }
217            Token::EndGameTree => match incomplete_child_lists.pop() {
218                Some(node_list) => current_node_list_ptr = node_list,
219                None => {
220                    if options.lenient {
221                        break;
222                    } else {
223                        return Err(SgfParseError::UnexpectedGameTreeEnd);
224                    }
225                }
226            },
227            Token::StartNode => {
228                let mut new_node = SgfNode::default();
229                let mut prop_tokens = vec![];
230                while let Some(Token::Property(_)) = tokens.peek() {
231                    prop_tokens.push(tokens.next().unwrap());
232                }
233                for token in prop_tokens {
234                    match token {
235                        // TODO: Consider refactoring to consume tokens and clone of values.
236                        Token::Property((identifier, values)) => {
237                            let identifier = {
238                                if identifier.chars().all(|c| c.is_ascii_uppercase()) {
239                                    identifier.clone()
240                                } else if options.convert_mixed_case_identifiers {
241                                    identifier
242                                        .chars()
243                                        .filter(|c| c.is_ascii_uppercase())
244                                        .collect()
245                                } else if options.lenient {
246                                    break;
247                                } else {
248                                    return Err(SgfParseError::InvalidFF4Property);
249                                }
250                            };
251                            new_node
252                                .properties
253                                .push(Prop::new(identifier, values.clone()))
254                        }
255                        _ => unreachable!(),
256                    }
257                }
258                let node_list = unsafe { current_node_list_ptr.as_mut() };
259                node_list.push(new_node);
260                current_node_list_ptr =
261                    NonNull::new(&mut node_list.last_mut().unwrap().children).unwrap();
262            }
263            Token::Property(_) => {
264                if options.lenient {
265                    break;
266                } else {
267                    return Err(SgfParseError::UnexpectedProperty);
268                }
269            }
270        }
271    }
272
273    if !options.lenient && (!incomplete_child_lists.is_empty() || collection.len() != 1) {
274        return Err(SgfParseError::UnexpectedEndOfData);
275    }
276    let mut root_node = collection
277        .into_iter()
278        .next()
279        .ok_or(SgfParseError::UnexpectedEndOfData)?;
280    root_node.is_root = true;
281    Ok(root_node.into())
282}
283
284// Figure out which game to parse from a slice of tokens.
285//
286// This function is necessary because we need to know the game before we can do the parsing.
287fn find_gametype(tokens: &[Token]) -> Result<GameType, SgfParseError> {
288    match find_gametree_root_prop_values("GM", tokens)? {
289        None => Ok(GameType::Go),
290        Some(values) => {
291            if values.len() != 1 {
292                return Ok(GameType::Unknown);
293            }
294            match values[0].as_str() {
295                "1" => Ok(GameType::Go),
296                _ => Ok(GameType::Unknown),
297            }
298        }
299    }
300}
301
302// Find the property values for a given identifier in the root node from the gametree's tokens.
303//
304// We use this to determine key root properties (like GM and FF) before parsing.
305// Returns an error if there's more than one match.
306fn find_gametree_root_prop_values<'a>(
307    prop_ident: &'a str,
308    tokens: &'a [Token],
309) -> Result<Option<&'a Vec<String>>, SgfParseError> {
310    // Find the matching property values in the first node.
311    // Skip the initial StartGameTree, StartNode tokens; we'll handle any errors later.
312    let matching_tokens: Vec<&Vec<String>> = tokens
313        .iter()
314        .skip(2)
315        .take_while(|&token| matches!(token, Token::Property(_)))
316        .filter_map(move |token| match token {
317            Token::Property((ident, values)) if ident == prop_ident => Some(values),
318            _ => None,
319        })
320        .collect();
321
322    match matching_tokens.len() {
323        0 => Ok(None),
324        1 => Ok(Some(matching_tokens[0])),
325        _ => Err(SgfParseError::UnexpectedProperty),
326    }
327}
328
329#[cfg(test)]
330mod test {
331    use super::*;
332    use crate::{go, serialize};
333
334    fn load_test_sgf() -> Result<String, Box<dyn std::error::Error>> {
335        // See https://www.red-bean.com/sgf/examples/
336        let mut sgf_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
337        sgf_path.push("resources/test/ff4_ex.sgf");
338        let data = std::fs::read_to_string(sgf_path)?;
339
340        Ok(data)
341    }
342
343    fn get_go_nodes() -> Result<Vec<SgfNode<go::Prop>>, Box<dyn std::error::Error>> {
344        let data = load_test_sgf()?;
345
346        Ok(go::parse(&data)?)
347    }
348
349    fn node_depth(mut sgf_node: &SgfNode<go::Prop>) -> u64 {
350        let mut depth = 1;
351        while sgf_node.children().count() > 0 {
352            depth += 1;
353            sgf_node = sgf_node.children().next().unwrap();
354        }
355        depth
356    }
357
358    #[test]
359    fn sgf_has_two_gametrees() {
360        let sgf_nodes = get_go_nodes().unwrap();
361        assert_eq!(sgf_nodes.len(), 2);
362    }
363
364    #[test]
365    fn gametree_one_has_five_variations() {
366        let sgf_nodes = get_go_nodes().unwrap();
367        let sgf_node = &sgf_nodes[0];
368        assert_eq!(sgf_node.children().count(), 5);
369    }
370
371    #[test]
372    fn gametree_one_has_size_19() {
373        let sgf_nodes = get_go_nodes().unwrap();
374        let sgf_node = &sgf_nodes[0];
375        match sgf_node.get_property("SZ") {
376            Some(go::Prop::SZ(size)) => assert_eq!(size, &(19, 19)),
377            _ => unreachable!("Expected size property"),
378        }
379    }
380
381    #[test]
382    fn gametree_variation_depths() {
383        let sgf_nodes = get_go_nodes().unwrap();
384        let sgf_node = &sgf_nodes[0];
385        let children: Vec<_> = sgf_node.children().collect();
386        assert_eq!(node_depth(children[0]), 13);
387        assert_eq!(node_depth(children[1]), 4);
388        assert_eq!(node_depth(children[2]), 4);
389    }
390
391    #[test]
392    fn gametree_two_has_one_variation() {
393        let sgf_nodes = get_go_nodes().unwrap();
394        let sgf_node = &sgf_nodes[1];
395        assert_eq!(sgf_node.children().count(), 1);
396    }
397
398    #[test]
399    fn serialize_then_parse() {
400        let data = load_test_sgf().unwrap();
401        let gametrees = parse(&data).unwrap();
402        let text = serialize(&gametrees);
403        assert_eq!(gametrees, parse(&text).unwrap());
404    }
405
406    #[test]
407    fn invalid_property() {
408        let input = "(;GM[1]W[rp.pmonpoqprpsornqmpm])";
409        let sgf_nodes = go::parse(input).unwrap();
410        let expected = vec![
411            go::Prop::GM(1),
412            go::Prop::Invalid("W".to_string(), vec!["rp.pmonpoqprpsornqmpm".to_string()]),
413        ];
414
415        assert_eq!(sgf_nodes.len(), 1);
416        let sgf_node = &sgf_nodes[0];
417        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
418    }
419
420    #[test]
421    fn unknown_game() {
422        let input = "(;GM[37]W[rp.pmonpoqprpsornqmpm])";
423        let gametrees = parse(input).unwrap();
424        assert_eq!(gametrees.len(), 1);
425        assert_eq!(gametrees[0].gametype(), GameType::Unknown);
426        let sgf_node = match &gametrees[0] {
427            GameTree::Unknown(node) => node,
428            _ => panic!("Unexpected game type"),
429        };
430        let expected = vec![
431            unknown_game::Prop::GM(37),
432            unknown_game::Prop::W("rp.pmonpoqprpsornqmpm".into()),
433        ];
434
435        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
436    }
437
438    #[test]
439    fn mixed_games() {
440        let input = "(;GM[1];W[dd])(;GM[37]W[rp.pmonpoqprpsornqmpm])";
441        let gametrees = parse(input).unwrap();
442        assert_eq!(gametrees.len(), 2);
443        assert_eq!(gametrees[0].gametype(), GameType::Go);
444        assert_eq!(gametrees[1].gametype(), GameType::Unknown);
445    }
446
447    #[test]
448    fn stack_overflow() {
449        // This input generated a stack overflow with the old code
450        let input = "(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;)";
451        let result = parse(&input);
452        assert!(result.is_ok());
453    }
454
455    #[test]
456    fn converts_up_ff3_property() {
457        let input = "(;GM[1]FF[3]CoPyright[test])";
458        let expected = vec![
459            go::Prop::GM(1),
460            go::Prop::FF(3),
461            go::Prop::CP("test".into()),
462        ];
463
464        let sgf_nodes = go::parse(input).unwrap();
465
466        assert_eq!(sgf_nodes.len(), 1);
467        let properties = sgf_nodes[0].properties().cloned().collect::<Vec<_>>();
468        assert_eq!(properties, expected);
469    }
470
471    #[test]
472    fn doesnt_convert_if_not_allowed() {
473        let input = "(;GM[1]FF[3]CoPyright[test])";
474        let parse_options = ParseOptions {
475            convert_mixed_case_identifiers: false,
476            ..ParseOptions::default()
477        };
478        let result = parse_with_options(input, &parse_options);
479        assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
480    }
481
482    #[test]
483    fn compressed_list_for_unknown_game() {
484        let input = "(;GM[]MA[a:b])";
485        let gametree = parse(&input).unwrap().pop().unwrap();
486        let node = match gametree {
487            GameTree::Unknown(node) => node,
488            _ => panic!("Expected Unknown Game type"),
489        };
490        match node.get_property("MA") {
491            Some(unknown_game::Prop::MA(values)) => {
492                assert_eq!(values.len(), 1);
493                assert!(values.contains("a:b"));
494            }
495            _ => panic!("MA prop not found"),
496        }
497    }
498
499    #[test]
500    fn strips_whitespace() {
501        let input = "\n(;GM[1];B[cc])";
502        let sgf_nodes = go::parse(&input).unwrap();
503        assert_eq!(sgf_nodes.len(), 1);
504    }
505
506    #[test]
507    fn lenient_parsing_unclosed_parens_ok() {
508        let input = "\n(;GM[1];B[cc]";
509        let parse_options = ParseOptions {
510            lenient: true,
511            ..ParseOptions::default()
512        };
513        let sgf_nodes = parse_with_options(input, &parse_options).unwrap();
514        assert_eq!(sgf_nodes.len(), 1);
515    }
516
517    #[test]
518    fn lenient_parsing_ignores_trailing_garbage() {
519        let input = "\n(;GM[1];B[cc]))";
520        let parse_options = ParseOptions {
521            lenient: true,
522            ..ParseOptions::default()
523        };
524        let sgf_nodes = parse_with_options(input, &parse_options).unwrap();
525        assert_eq!(sgf_nodes.len(), 1);
526    }
527}