sgf_parse/
parser.rs

1use std::ptr::NonNull;
2
3use crate::go;
4use crate::lexer::{tokenize, LexerError, Token};
5use crate::unknown_game;
6use crate::{GameTree, GameType, SgfNode, SgfProp};
7
8/// Returns the [`GameTree`] values parsed from the provided text using default parsing options.
9///
10/// This function will attempt to convert non-FF\[4\] files to FF\[4\] if possible. Check out
11/// [`parse_with_options`] if you want to change the default behavior.
12///
13/// # Errors
14/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
15///
16/// # Examples
17/// ```
18/// use sgf_parse::{parse, GameType};
19///
20/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
21/// let gametrees = parse(sgf).unwrap();
22/// assert!(gametrees.len() == 2);
23/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
24/// ```
25pub fn parse(text: &str) -> Result<Vec<GameTree>, SgfParseError> {
26    parse_with_options(text, &ParseOptions::default())
27}
28
29/// Returns the [`GameTree`] values parsed from the provided text.
30///
31/// # Errors
32/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
33///
34/// # Examples
35/// ```
36/// use sgf_parse::{parse_with_options, ParseOptions, GameType, SgfParseError};
37///
38/// // Default options
39/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
40/// let gametrees = parse_with_options(sgf, &ParseOptions::default()).unwrap();
41/// assert!(gametrees.len() == 2);
42/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
43///
44/// // Strict FF[4] identifiers
45/// let sgf = "(;SZ[9]CoPyright[Julian Andrews 2025];B[de];W[fe])(;B[de];W[ff])";
46/// let parse_options = ParseOptions {
47///     convert_mixed_case_identifiers: false,
48///     ..ParseOptions::default()
49/// };
50/// let result = parse_with_options(sgf, &parse_options);
51/// assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
52/// ```
53pub fn parse_with_options(
54    text: &str,
55    options: &ParseOptions,
56) -> Result<Vec<GameTree>, SgfParseError> {
57    let text = text.trim();
58    let tokens = if options.lenient {
59        tokenize(text)
60            .take_while(Result::is_ok)
61            .map(|result| result.unwrap().0)
62            .collect()
63    } else {
64        tokenize(text)
65            .map(|result| match result {
66                Err(e) => Err(SgfParseError::LexerError(e)),
67                Ok((token, _span)) => Ok(token),
68            })
69            .collect::<Result<Vec<_>, _>>()?
70    };
71    split_by_gametree(&tokens, options.lenient)?
72        .into_iter()
73        .map(|tokens| {
74            let gametype = if options.lenient {
75                find_gametype(tokens).unwrap_or(GameType::Go)
76            } else {
77                find_gametype(tokens)?
78            };
79            match gametype {
80                GameType::Go => parse_gametree::<go::Prop>(tokens, options),
81                GameType::Unknown => parse_gametree::<unknown_game::Prop>(tokens, options),
82            }
83        })
84        .collect::<Result<_, _>>()
85}
86
87/// Options for parsing SGF files.
88///
89/// # Examples
90/// See [`parse_with_options`] for usage examples.
91pub struct ParseOptions {
92    /// Whether to allow conversion of FF\[3\] mixed case identifiers to FF\[4\].
93    ///
94    /// All lower case letters are dropped.
95    /// This should allow parsing any older files which are valid, but not valid FF\[4\].
96    pub convert_mixed_case_identifiers: bool,
97    /// Whether to use lenient parsing.
98    ///
99    /// In lenient mode, the parser should never return an error, but will instead parse the SGF
100    /// until it hits an error, and then return whatever it's managed to parse.
101    pub lenient: bool,
102}
103
104impl Default for ParseOptions {
105    fn default() -> Self {
106        ParseOptions {
107            convert_mixed_case_identifiers: true,
108            lenient: false,
109        }
110    }
111}
112
113/// Error type for failures parsing sgf from text.
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum SgfParseError {
116    LexerError(LexerError),
117    UnexpectedGameTreeStart,
118    UnexpectedGameTreeEnd,
119    UnexpectedProperty,
120    UnexpectedEndOfData,
121    UnexpectedGameType,
122    InvalidFF4Property,
123    InvalidGameType,
124    NoGameTrees,
125}
126
127impl From<LexerError> for SgfParseError {
128    fn from(error: LexerError) -> Self {
129        Self::LexerError(error)
130    }
131}
132
133impl std::fmt::Display for SgfParseError {
134    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
135        match self {
136            SgfParseError::LexerError(e) => write!(f, "Error tokenizing: {e}"),
137            SgfParseError::UnexpectedGameTreeStart => write!(f, "Unexpected start of game tree"),
138            SgfParseError::UnexpectedGameTreeEnd => write!(f, "Unexpected end of game tree"),
139            SgfParseError::UnexpectedProperty => write!(f, "Unexpected property"),
140            SgfParseError::UnexpectedEndOfData => write!(f, "Unexpected end of data"),
141            SgfParseError::UnexpectedGameType => write!(f, "Unexpected game type"),
142            SgfParseError::InvalidFF4Property => {
143                write!(
144                    f,
145                    "Invalid FF[4] property without `convert_mixed_case_identifiers`"
146                )
147            }
148            SgfParseError::InvalidGameType => write!(f, "Invalid game type"),
149            SgfParseError::NoGameTrees => write!(f, "No game trees found"),
150        }
151    }
152}
153
154impl std::error::Error for SgfParseError {}
155
156// Split the tokens up into individual gametrees.
157//
158// This will let us easily scan each gametree for GM properties.
159// Only considers StartGameTree/EndGameTree tokens.
160fn split_by_gametree(tokens: &[Token], lenient: bool) -> Result<Vec<&[Token]>, SgfParseError> {
161    let mut gametrees = vec![];
162    let mut gametree_depth: u64 = 0;
163    let mut slice_start = 0;
164    for (i, token) in tokens.iter().enumerate() {
165        match token {
166            Token::StartGameTree => gametree_depth += 1,
167            Token::EndGameTree => {
168                if gametree_depth == 0 {
169                    if lenient {
170                        break;
171                    } else {
172                        return Err(SgfParseError::UnexpectedGameTreeEnd);
173                    }
174                }
175                gametree_depth -= 1;
176                if gametree_depth == 0 {
177                    gametrees.push(&tokens[slice_start..=i]);
178                    slice_start = i + 1;
179                }
180            }
181            _ => {}
182        }
183    }
184    if gametree_depth != 0 {
185        if lenient {
186            // For lenient parsing assume all remaining tokens are part of the last gametree.
187            gametrees.push(&tokens[slice_start..]);
188        } else {
189            return Err(SgfParseError::UnexpectedEndOfData);
190        }
191    }
192    if gametrees.is_empty() {
193        if lenient {
194            // For a valid SGF there needs to be at least one gametree. If we didn't fine one but we're
195            // using lenient mode, just populate an empty gametree.
196            gametrees.push(&[]);
197        } else {
198            return Err(SgfParseError::NoGameTrees);
199        }
200    }
201
202    Ok(gametrees)
203}
204
205// Parse a single gametree of a known type.
206fn parse_gametree<Prop: SgfProp>(
207    tokens: &[Token],
208    options: &ParseOptions,
209) -> Result<GameTree, SgfParseError>
210where
211    SgfNode<Prop>: std::convert::Into<GameTree>,
212{
213    // TODO: Rewrite this without `unsafe`
214    let mut collection: Vec<SgfNode<Prop>> = vec![];
215    // //// Pointer to the `Vec` of children we're currently building.
216    let mut current_node_list_ptr = NonNull::new(&mut collection).unwrap();
217    // Stack of pointers to incomplete `Vec`s of children.
218    let mut incomplete_child_lists: Vec<NonNull<Vec<SgfNode<Prop>>>> = vec![];
219    //// Using pointers involves some unsafe calls, but should be ok here.
220    //// Since pointers are always initialized from real structs, and those structs
221    //// live for the whole function body, our only safety concern is dangling pointers.
222    ////
223    //// Since we build the tree traversing depth-first those structs shouldn't be
224    //// modified while the pointer is live. Heap-allocated contents of their
225    //// `children` may be modified, but that shouldn't change anything.
226
227    let mut tokens = tokens.iter().peekable();
228    while let Some(token) = tokens.next() {
229        match token {
230            Token::StartGameTree => {
231                // SGF game trees must have a root node.
232                if let Some(node_list_ptr) = incomplete_child_lists.last() {
233                    let node_list = unsafe { node_list_ptr.as_ref() };
234                    if node_list.is_empty() {
235                        if options.lenient {
236                            break;
237                        } else {
238                            return Err(SgfParseError::UnexpectedGameTreeStart);
239                        }
240                    }
241                }
242                incomplete_child_lists.push(current_node_list_ptr);
243            }
244            Token::EndGameTree => match incomplete_child_lists.pop() {
245                Some(node_list) => current_node_list_ptr = node_list,
246                None => {
247                    if options.lenient {
248                        break;
249                    } else {
250                        return Err(SgfParseError::UnexpectedGameTreeEnd);
251                    }
252                }
253            },
254            Token::StartNode => {
255                let mut new_node = SgfNode::default();
256                let mut prop_tokens = vec![];
257                while let Some(Token::Property(_)) = tokens.peek() {
258                    prop_tokens.push(tokens.next().unwrap());
259                }
260                for token in prop_tokens {
261                    match token {
262                        // TODO: Consider refactoring to consume tokens and clone of values.
263                        Token::Property((identifier, values)) => {
264                            let identifier = {
265                                if identifier.chars().all(|c| c.is_ascii_uppercase()) {
266                                    identifier.clone()
267                                } else if options.convert_mixed_case_identifiers {
268                                    identifier
269                                        .chars()
270                                        .filter(|c| c.is_ascii_uppercase())
271                                        .collect()
272                                } else if options.lenient {
273                                    break;
274                                } else {
275                                    return Err(SgfParseError::InvalidFF4Property);
276                                }
277                            };
278                            new_node
279                                .properties
280                                .push(Prop::new(identifier, values.clone()))
281                        }
282                        _ => unreachable!(),
283                    }
284                }
285                let node_list = unsafe { current_node_list_ptr.as_mut() };
286                node_list.push(new_node);
287                current_node_list_ptr =
288                    NonNull::new(&mut node_list.last_mut().unwrap().children).unwrap();
289            }
290            Token::Property(_) => {
291                if options.lenient {
292                    break;
293                } else {
294                    return Err(SgfParseError::UnexpectedProperty);
295                }
296            }
297        }
298    }
299
300    if !options.lenient && (!incomplete_child_lists.is_empty() || collection.len() != 1) {
301        return Err(SgfParseError::UnexpectedEndOfData);
302    }
303    let mut root_node = if options.lenient {
304        // A valid game tree must have at least a single (empty) node. So make one!
305        collection.into_iter().next().unwrap_or_default()
306    } else {
307        collection
308            .into_iter()
309            .next()
310            .ok_or(SgfParseError::UnexpectedEndOfData)?
311    };
312    root_node.is_root = true;
313    Ok(root_node.into())
314}
315
316// Figure out which game to parse from a slice of tokens.
317//
318// This function is necessary because we need to know the game before we can do the parsing.
319fn find_gametype(tokens: &[Token]) -> Result<GameType, SgfParseError> {
320    match find_gametree_root_prop_values("GM", tokens)?.map(|v| v.as_slice()) {
321        None => Ok(GameType::Go),
322        Some([value]) if value.parse::<i32>().is_ok() => match value.as_str() {
323            "1" => Ok(GameType::Go),
324            _ => Ok(GameType::Unknown),
325        },
326        _ => Err(SgfParseError::InvalidGameType),
327    }
328}
329
330// Find the property values for a given identifier in the root node from the gametree's tokens.
331//
332// We use this to determine key root properties (like GM and FF) before parsing.
333// Returns an error if there's more than one match.
334fn find_gametree_root_prop_values<'a>(
335    prop_ident: &'a str,
336    tokens: &'a [Token],
337) -> Result<Option<&'a Vec<String>>, SgfParseError> {
338    // Find the matching property values in the first node.
339    // Skip the initial StartGameTree, StartNode tokens; we'll handle any errors later.
340    let matching_tokens: Vec<&Vec<String>> = tokens
341        .iter()
342        .skip(2)
343        .take_while(|&token| matches!(token, Token::Property(_)))
344        .filter_map(move |token| match token {
345            Token::Property((ident, values)) if ident == prop_ident => Some(values),
346            _ => None,
347        })
348        .collect();
349
350    match matching_tokens.len() {
351        0 => Ok(None),
352        1 => Ok(Some(matching_tokens[0])),
353        _ => Err(SgfParseError::UnexpectedProperty),
354    }
355}
356
357#[cfg(test)]
358mod test {
359    use super::*;
360    use crate::{go, serialize};
361
362    fn load_test_sgf() -> Result<String, Box<dyn std::error::Error>> {
363        // See https://www.red-bean.com/sgf/examples/
364        let mut sgf_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
365        sgf_path.push("resources/test/ff4_ex.sgf");
366        let data = std::fs::read_to_string(sgf_path)?;
367
368        Ok(data)
369    }
370
371    fn get_go_nodes() -> Result<Vec<SgfNode<go::Prop>>, Box<dyn std::error::Error>> {
372        let data = load_test_sgf()?;
373
374        Ok(go::parse(&data)?)
375    }
376
377    fn node_depth(mut sgf_node: &SgfNode<go::Prop>) -> u64 {
378        let mut depth = 1;
379        while sgf_node.children().count() > 0 {
380            depth += 1;
381            sgf_node = sgf_node.children().next().unwrap();
382        }
383        depth
384    }
385
386    #[test]
387    fn sgf_has_two_gametrees() {
388        let sgf_nodes = get_go_nodes().unwrap();
389        assert_eq!(sgf_nodes.len(), 2);
390    }
391
392    #[test]
393    fn gametree_one_has_five_variations() {
394        let sgf_nodes = get_go_nodes().unwrap();
395        let sgf_node = &sgf_nodes[0];
396        assert_eq!(sgf_node.children().count(), 5);
397    }
398
399    #[test]
400    fn gametree_one_has_size_19() {
401        let sgf_nodes = get_go_nodes().unwrap();
402        let sgf_node = &sgf_nodes[0];
403        match sgf_node.get_property("SZ") {
404            Some(go::Prop::SZ(size)) => assert_eq!(size, &(19, 19)),
405            _ => unreachable!("Expected size property"),
406        }
407    }
408
409    #[test]
410    fn gametree_variation_depths() {
411        let sgf_nodes = get_go_nodes().unwrap();
412        let sgf_node = &sgf_nodes[0];
413        let children: Vec<_> = sgf_node.children().collect();
414        assert_eq!(node_depth(children[0]), 13);
415        assert_eq!(node_depth(children[1]), 4);
416        assert_eq!(node_depth(children[2]), 4);
417    }
418
419    #[test]
420    fn gametree_two_has_one_variation() {
421        let sgf_nodes = get_go_nodes().unwrap();
422        let sgf_node = &sgf_nodes[1];
423        assert_eq!(sgf_node.children().count(), 1);
424    }
425
426    #[test]
427    fn serialize_then_parse() {
428        let data = load_test_sgf().unwrap();
429        let gametrees = parse(&data).unwrap();
430        let text = serialize(&gametrees);
431        assert_eq!(gametrees, parse(&text).unwrap());
432    }
433
434    #[test]
435    fn invalid_property() {
436        let input = "(;GM[1]W[rp.pmonpoqprpsornqmpm])";
437        let sgf_nodes = go::parse(input).unwrap();
438        let expected = vec![
439            go::Prop::GM(1),
440            go::Prop::Invalid("W".to_string(), vec!["rp.pmonpoqprpsornqmpm".to_string()]),
441        ];
442
443        assert_eq!(sgf_nodes.len(), 1);
444        let sgf_node = &sgf_nodes[0];
445        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
446    }
447
448    #[test]
449    fn unknown_game() {
450        let input = "(;GM[37]W[rp.pmonpoqprpsornqmpm])";
451        let gametrees = parse(input).unwrap();
452        assert_eq!(gametrees.len(), 1);
453        assert_eq!(gametrees[0].gametype(), GameType::Unknown);
454        let sgf_node = match &gametrees[0] {
455            GameTree::Unknown(node) => node,
456            _ => panic!("Unexpected game type"),
457        };
458        let expected = vec![
459            unknown_game::Prop::GM(37),
460            unknown_game::Prop::W("rp.pmonpoqprpsornqmpm".into()),
461        ];
462
463        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
464    }
465
466    #[test]
467    fn mixed_games() {
468        let input = "(;GM[1];W[dd])(;GM[37]W[rp.pmonpoqprpsornqmpm])";
469        let gametrees = parse(input).unwrap();
470        assert_eq!(gametrees.len(), 2);
471        assert_eq!(gametrees[0].gametype(), GameType::Go);
472        assert_eq!(gametrees[1].gametype(), GameType::Unknown);
473    }
474
475    #[test]
476    fn stack_overflow() {
477        // This input generated a stack overflow with the old code
478        let input = "(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;)";
479        let result = parse(&input);
480        assert!(result.is_ok());
481    }
482
483    #[test]
484    fn converts_up_ff3_property() {
485        let input = "(;GM[1]FF[3]CoPyright[test])";
486        let expected = vec![
487            go::Prop::GM(1),
488            go::Prop::FF(3),
489            go::Prop::CP("test".into()),
490        ];
491
492        let sgf_nodes = go::parse(input).unwrap();
493
494        assert_eq!(sgf_nodes.len(), 1);
495        let properties = sgf_nodes[0].properties().cloned().collect::<Vec<_>>();
496        assert_eq!(properties, expected);
497    }
498
499    #[test]
500    fn doesnt_convert_if_not_allowed() {
501        let input = "(;GM[1]FF[3]CoPyright[test])";
502        let parse_options = ParseOptions {
503            convert_mixed_case_identifiers: false,
504            ..ParseOptions::default()
505        };
506        let result = parse_with_options(input, &parse_options);
507        assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
508    }
509
510    #[test]
511    fn compressed_list_for_unknown_game() {
512        let input = "(;GM[127]MA[a:b])";
513        let gametree = parse(&input).unwrap().pop().unwrap();
514        let node = match gametree {
515            GameTree::Unknown(node) => node,
516            _ => panic!("Expected Unknown Game type"),
517        };
518        match node.get_property("MA") {
519            Some(unknown_game::Prop::MA(values)) => {
520                assert_eq!(values.len(), 1);
521                assert!(values.contains("a:b"));
522            }
523            _ => panic!("MA prop not found"),
524        }
525    }
526
527    #[test]
528    fn strips_whitespace() {
529        let input = "\n(;GM[1];B[cc])";
530        let sgf_nodes = go::parse(&input).unwrap();
531        assert_eq!(sgf_nodes.len(), 1);
532    }
533
534    #[test]
535    fn lenient_parsing_unclosed_parens_ok() {
536        let input = "\n(;GM[1];B[cc]";
537        let parse_options = ParseOptions {
538            lenient: true,
539            ..ParseOptions::default()
540        };
541        let game_trees = parse_with_options(input, &parse_options).unwrap();
542        assert_eq!(game_trees.len(), 1);
543    }
544
545    #[test]
546    fn lenient_parsing_ignores_trailing_garbage() {
547        let input = "\n(;GM[1];B[cc]))";
548        let parse_options = ParseOptions {
549            lenient: true,
550            ..ParseOptions::default()
551        };
552        let game_trees = parse_with_options(input, &parse_options).unwrap();
553        assert_eq!(game_trees.len(), 1);
554    }
555
556    #[test]
557    fn lenient_parsing_handles_unescaped_property_end() {
558        let input = "(;B[cc];W[dd];C[username [12k]: foo])";
559        let parse_options = ParseOptions {
560            lenient: true,
561            ..ParseOptions::default()
562        };
563        let game_trees = parse_with_options(input, &parse_options).unwrap();
564        assert_eq!(game_trees.len(), 1);
565        let sgf_node = game_trees[0].as_go_node().unwrap();
566        // Should parse up through "[12k]" successfully
567        assert_eq!(sgf_node.main_variation().count(), 3);
568    }
569
570    #[test]
571    fn lenient_parsing_handles_unclosed_property_value() {
572        let input = "(;B[cc];W[dd];B[ee";
573        let parse_options = ParseOptions {
574            lenient: true,
575            ..ParseOptions::default()
576        };
577        let game_trees = parse_with_options(input, &parse_options).unwrap();
578        assert_eq!(game_trees.len(), 1);
579        let sgf_node = game_trees[0].as_go_node().unwrap();
580        // Should find 3 nodes. The last unfinished node has no properties since "B[ee" is unclosed.
581        assert_eq!(sgf_node.main_variation().count(), 3);
582        assert_eq!(
583            sgf_node.main_variation().last().unwrap().properties.len(),
584            0
585        );
586    }
587
588    #[test]
589    fn lenient_parsing_handles_missing_property_value() {
590        let input = "(;B[cc];W[dd];B";
591        let parse_options = ParseOptions {
592            lenient: true,
593            ..ParseOptions::default()
594        };
595        let game_trees = parse_with_options(input, &parse_options).unwrap();
596        assert_eq!(game_trees.len(), 1);
597        let sgf_node = game_trees[0].as_go_node().unwrap();
598        // Should find 3 nodes. The last node has no properties since "B" is missing its value
599        assert_eq!(sgf_node.main_variation().count(), 3);
600        assert_eq!(
601            sgf_node.main_variation().last().unwrap().properties.len(),
602            0
603        );
604    }
605
606    #[test]
607    fn lenient_parsing_handles_missing_first_node_start() {
608        let input = "(B[cc])";
609        let parse_options = ParseOptions {
610            lenient: true,
611            ..ParseOptions::default()
612        };
613        let game_trees = parse_with_options(input, &parse_options).unwrap();
614        assert_eq!(game_trees.len(), 1);
615        let sgf_node = game_trees[0].as_go_node().unwrap();
616        // A single empty node.
617        assert_eq!(sgf_node.main_variation().count(), 1);
618        assert_eq!(
619            sgf_node.main_variation().last().unwrap().properties.len(),
620            0
621        );
622    }
623}