sgf_parse/
parser.rs

1use std::ptr::NonNull;
2
3use crate::go;
4use crate::lexer::{tokenize, LexerError, Token};
5use crate::unknown_game;
6use crate::{GameTree, GameType, SgfNode, SgfProp};
7
8/// Returns the [`GameTree`] values parsed from the provided text using default parsing options.
9///
10/// This function will attempt to convert non-FF\[4\] files to FF\[4\] if possible. Check out
11/// [`parse_with_options`] if you want to change the default behavior.
12///
13/// # Errors
14/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
15///
16/// # Examples
17/// ```
18/// use sgf_parse::{parse, GameType};
19///
20/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
21/// let gametrees = parse(sgf).unwrap();
22/// assert!(gametrees.len() == 2);
23/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
24/// ```
25pub fn parse(text: &str) -> Result<Vec<GameTree>, SgfParseError> {
26    parse_with_options(text, &ParseOptions::default())
27}
28
29/// Returns the [`GameTree`] values parsed from the provided text.
30///
31/// # Errors
32/// If the text can't be parsed as an SGF FF\[4\] collection, then an error is returned.
33///
34/// # Examples
35/// ```
36/// use sgf_parse::{parse_with_options, ParseOptions, GameType, SgfParseError};
37///
38/// // Default options
39/// let sgf = "(;SZ[9]C[Some comment];B[de];W[fe])(;B[de];W[ff])";
40/// let gametrees = parse_with_options(sgf, &ParseOptions::default()).unwrap();
41/// assert!(gametrees.len() == 2);
42/// assert!(gametrees.iter().all(|gametree| gametree.gametype() == GameType::Go));
43///
44/// // Strict FF[4] identifiers
45/// let sgf = "(;SZ[9]CoPyright[Julian Andrews 2025];B[de];W[fe])(;B[de];W[ff])";
46/// let parse_options = ParseOptions { convert_mixed_case_identifiers: false };
47/// let result = parse_with_options(sgf, &parse_options);
48/// assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
49/// ```
50pub fn parse_with_options(
51    text: &str,
52    options: &ParseOptions,
53) -> Result<Vec<GameTree>, SgfParseError> {
54    let tokens = tokenize(text)
55        .map(|result| match result {
56            Err(e) => Err(SgfParseError::LexerError(e)),
57            Ok((token, _span)) => Ok(token),
58        })
59        .collect::<Result<Vec<_>, _>>()?;
60    split_by_gametree(&tokens)?
61        .into_iter()
62        .map(|tokens| match find_gametype(tokens)? {
63            GameType::Go => parse_gametree::<go::Prop>(tokens, options),
64            GameType::Unknown => parse_gametree::<unknown_game::Prop>(tokens, options),
65        })
66        .collect::<Result<_, _>>()
67}
68
69/// Options for parsing SGF files.
70///
71/// # Examples
72/// See [`parse_with_options`] for usage examples.
73pub struct ParseOptions {
74    /// Whether to allow conversion of FF\[3\] mixed case identifiers to FF\[4\].
75    ///
76    /// All lower case letters are dropped.
77    /// This should allow parsing any older files which are valid, but not valid FF\[4\].
78    pub convert_mixed_case_identifiers: bool,
79}
80
81impl Default for ParseOptions {
82    fn default() -> Self {
83        ParseOptions {
84            convert_mixed_case_identifiers: true,
85        }
86    }
87}
88
89/// Error type for failures parsing sgf from text.
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub enum SgfParseError {
92    LexerError(LexerError),
93    UnexpectedGameTreeStart,
94    UnexpectedGameTreeEnd,
95    UnexpectedProperty,
96    UnexpectedEndOfData,
97    UnexpectedGameType,
98    InvalidFF4Property,
99}
100
101impl From<LexerError> for SgfParseError {
102    fn from(error: LexerError) -> Self {
103        Self::LexerError(error)
104    }
105}
106
107impl std::fmt::Display for SgfParseError {
108    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
109        match self {
110            SgfParseError::LexerError(e) => write!(f, "Error tokenizing: {e}"),
111            SgfParseError::UnexpectedGameTreeStart => write!(f, "Unexpected start of game tree"),
112            SgfParseError::UnexpectedGameTreeEnd => write!(f, "Unexpected end of game tree"),
113            SgfParseError::UnexpectedProperty => write!(f, "Unexpected property"),
114            SgfParseError::UnexpectedEndOfData => write!(f, "Unexpected end of data"),
115            SgfParseError::UnexpectedGameType => write!(f, "Unexpected game type"),
116            SgfParseError::InvalidFF4Property => {
117                write!(
118                    f,
119                    "Invalid FF[4] property without `convert_mixed_case_identifiers`"
120                )
121            }
122        }
123    }
124}
125
126impl std::error::Error for SgfParseError {}
127
128// Split the tokens up into individual gametrees.
129//
130// This will let us easily scan each gametree for GM properties.
131// Only considers StartGameTree/EndGameTree tokens.
132fn split_by_gametree(tokens: &[Token]) -> Result<Vec<&[Token]>, SgfParseError> {
133    let mut gametrees = vec![];
134    let mut gametree_depth: u64 = 0;
135    let mut slice_start = 0;
136    for (i, token) in tokens.iter().enumerate() {
137        match token {
138            Token::StartGameTree => gametree_depth += 1,
139            Token::EndGameTree => {
140                if gametree_depth == 0 {
141                    return Err(SgfParseError::UnexpectedGameTreeEnd);
142                }
143                gametree_depth -= 1;
144                if gametree_depth == 0 {
145                    gametrees.push(&tokens[slice_start..=i]);
146                    slice_start = i + 1;
147                }
148            }
149            _ => {}
150        }
151    }
152    if gametree_depth != 0 {
153        return Err(SgfParseError::UnexpectedEndOfData);
154    }
155
156    Ok(gametrees)
157}
158
159// Parse a single gametree of a known type.
160fn parse_gametree<Prop: SgfProp>(
161    tokens: &[Token],
162    options: &ParseOptions,
163) -> Result<GameTree, SgfParseError>
164where
165    SgfNode<Prop>: std::convert::Into<GameTree>,
166{
167    // TODO: Rewrite this without `unsafe`
168    let mut collection: Vec<SgfNode<Prop>> = vec![];
169    // //// Pointer to the `Vec` of children we're currently building.
170    let mut current_node_list_ptr = NonNull::new(&mut collection).unwrap();
171    // Stack of pointers to incomplete `Vec`s of children.
172    let mut incomplete_child_lists: Vec<NonNull<Vec<SgfNode<Prop>>>> = vec![];
173    //// Using pointers involves some unsafe calls, but should be ok here.
174    //// Since pointers are always initialized from real structs, and those structs
175    //// live for the whole function body, our only safety concern is dangling pointers.
176    ////
177    //// Since we build the tree traversing depth-first those structs shouldn't be
178    //// modified while the pointer is live. Heap-allocated contents of their
179    //// `children` may be modified, but that shouldn't change anything.
180
181    let mut tokens = tokens.iter().peekable();
182    while let Some(token) = tokens.next() {
183        match token {
184            Token::StartGameTree => {
185                // SGF game trees must have a root node.
186                if let Some(node_list_ptr) = incomplete_child_lists.last() {
187                    let node_list = unsafe { node_list_ptr.as_ref() };
188                    if node_list.is_empty() {
189                        return Err(SgfParseError::UnexpectedGameTreeStart);
190                    }
191                }
192                incomplete_child_lists.push(current_node_list_ptr);
193            }
194            Token::EndGameTree => match incomplete_child_lists.pop() {
195                Some(node_list) => current_node_list_ptr = node_list,
196                None => return Err(SgfParseError::UnexpectedGameTreeEnd),
197            },
198            Token::StartNode => {
199                let mut new_node = SgfNode::default();
200                let mut prop_tokens = vec![];
201                while let Some(Token::Property(_)) = tokens.peek() {
202                    prop_tokens.push(tokens.next().unwrap());
203                }
204                for token in prop_tokens {
205                    match token {
206                        // TODO: Consider refactoring to consume tokens and clone of values.
207                        Token::Property((identifier, values)) => {
208                            let identifier = {
209                                if identifier.chars().all(|c| c.is_ascii_uppercase()) {
210                                    identifier.clone()
211                                } else if options.convert_mixed_case_identifiers {
212                                    identifier
213                                        .chars()
214                                        .filter(|c| c.is_ascii_uppercase())
215                                        .collect()
216                                } else {
217                                    return Err(SgfParseError::InvalidFF4Property);
218                                }
219                            };
220                            new_node
221                                .properties
222                                .push(Prop::new(identifier, values.clone()))
223                        }
224                        _ => unreachable!(),
225                    }
226                }
227                let node_list = unsafe { current_node_list_ptr.as_mut() };
228                node_list.push(new_node);
229                current_node_list_ptr =
230                    NonNull::new(&mut node_list.last_mut().unwrap().children).unwrap();
231            }
232            Token::Property(_) => return Err(SgfParseError::UnexpectedProperty),
233        }
234    }
235
236    if !incomplete_child_lists.is_empty() || collection.len() != 1 {
237        return Err(SgfParseError::UnexpectedEndOfData);
238    }
239    let mut root_node = collection.into_iter().next().unwrap();
240    root_node.is_root = true;
241    Ok(root_node.into())
242}
243
244// Figure out which game to parse from a slice of tokens.
245//
246// This function is necessary because we need to know the game before we can do the parsing.
247fn find_gametype(tokens: &[Token]) -> Result<GameType, SgfParseError> {
248    match find_gametree_root_prop_values("GM", tokens)? {
249        None => Ok(GameType::Go),
250        Some(values) => {
251            if values.len() != 1 {
252                return Ok(GameType::Unknown);
253            }
254            match values[0].as_str() {
255                "1" => Ok(GameType::Go),
256                _ => Ok(GameType::Unknown),
257            }
258        }
259    }
260}
261
262// Find the property values for a given identifier in the root node from the gametree's tokens.
263//
264// We use this to determine key root properties (like GM and FF) before parsing.
265// Returns an error if there's more than one match.
266fn find_gametree_root_prop_values<'a>(
267    prop_ident: &'a str,
268    tokens: &'a [Token],
269) -> Result<Option<&'a Vec<String>>, SgfParseError> {
270    // Find the matching property values in the first node.
271    // Skip the initial StartGameTree, StartNode tokens; we'll handle any errors later.
272    let matching_tokens: Vec<&Vec<String>> = tokens
273        .iter()
274        .skip(2)
275        .take_while(|&token| matches!(token, Token::Property(_)))
276        .filter_map(move |token| match token {
277            Token::Property((ident, values)) if ident == prop_ident => Some(values),
278            _ => None,
279        })
280        .collect();
281
282    match matching_tokens.len() {
283        0 => Ok(None),
284        1 => Ok(Some(matching_tokens[0])),
285        _ => Err(SgfParseError::UnexpectedProperty),
286    }
287}
288
289#[cfg(test)]
290mod test {
291    use super::*;
292    use crate::{go, serialize};
293
294    fn load_test_sgf() -> Result<String, Box<dyn std::error::Error>> {
295        // See https://www.red-bean.com/sgf/examples/
296        let mut sgf_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
297        sgf_path.push("resources/test/ff4_ex.sgf");
298        let data = std::fs::read_to_string(sgf_path)?;
299
300        Ok(data)
301    }
302
303    fn get_go_nodes() -> Result<Vec<SgfNode<go::Prop>>, Box<dyn std::error::Error>> {
304        let data = load_test_sgf()?;
305
306        Ok(go::parse(&data)?)
307    }
308
309    fn node_depth(mut sgf_node: &SgfNode<go::Prop>) -> u64 {
310        let mut depth = 1;
311        while sgf_node.children().count() > 0 {
312            depth += 1;
313            sgf_node = sgf_node.children().next().unwrap();
314        }
315        depth
316    }
317
318    #[test]
319    fn sgf_has_two_gametrees() {
320        let sgf_nodes = get_go_nodes().unwrap();
321        assert_eq!(sgf_nodes.len(), 2);
322    }
323
324    #[test]
325    fn gametree_one_has_five_variations() {
326        let sgf_nodes = get_go_nodes().unwrap();
327        let sgf_node = &sgf_nodes[0];
328        assert_eq!(sgf_node.children().count(), 5);
329    }
330
331    #[test]
332    fn gametree_one_has_size_19() {
333        let sgf_nodes = get_go_nodes().unwrap();
334        let sgf_node = &sgf_nodes[0];
335        match sgf_node.get_property("SZ") {
336            Some(go::Prop::SZ(size)) => assert_eq!(size, &(19, 19)),
337            _ => unreachable!("Expected size property"),
338        }
339    }
340
341    #[test]
342    fn gametree_variation_depths() {
343        let sgf_nodes = get_go_nodes().unwrap();
344        let sgf_node = &sgf_nodes[0];
345        let children: Vec<_> = sgf_node.children().collect();
346        assert_eq!(node_depth(children[0]), 13);
347        assert_eq!(node_depth(children[1]), 4);
348        assert_eq!(node_depth(children[2]), 4);
349    }
350
351    #[test]
352    fn gametree_two_has_one_variation() {
353        let sgf_nodes = get_go_nodes().unwrap();
354        let sgf_node = &sgf_nodes[1];
355        assert_eq!(sgf_node.children().count(), 1);
356    }
357
358    #[test]
359    fn serialize_then_parse() {
360        let data = load_test_sgf().unwrap();
361        let gametrees = parse(&data).unwrap();
362        let text = serialize(&gametrees);
363        assert_eq!(gametrees, parse(&text).unwrap());
364    }
365
366    #[test]
367    fn invalid_property() {
368        let input = "(;GM[1]W[rp.pmonpoqprpsornqmpm])";
369        let sgf_nodes = go::parse(input).unwrap();
370        let expected = vec![
371            go::Prop::GM(1),
372            go::Prop::Invalid("W".to_string(), vec!["rp.pmonpoqprpsornqmpm".to_string()]),
373        ];
374
375        assert_eq!(sgf_nodes.len(), 1);
376        let sgf_node = &sgf_nodes[0];
377        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
378    }
379
380    #[test]
381    fn unknown_game() {
382        let input = "(;GM[37]W[rp.pmonpoqprpsornqmpm])";
383        let gametrees = parse(input).unwrap();
384        assert_eq!(gametrees.len(), 1);
385        assert_eq!(gametrees[0].gametype(), GameType::Unknown);
386        let sgf_node = match &gametrees[0] {
387            GameTree::Unknown(node) => node,
388            _ => panic!("Unexpected game type"),
389        };
390        let expected = vec![
391            unknown_game::Prop::GM(37),
392            unknown_game::Prop::W("rp.pmonpoqprpsornqmpm".into()),
393        ];
394
395        assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
396    }
397
398    #[test]
399    fn mixed_games() {
400        let input = "(;GM[1];W[dd])(;GM[37]W[rp.pmonpoqprpsornqmpm])";
401        let gametrees = parse(input).unwrap();
402        assert_eq!(gametrees.len(), 2);
403        assert_eq!(gametrees[0].gametype(), GameType::Go);
404        assert_eq!(gametrees[1].gametype(), GameType::Unknown);
405    }
406
407    #[test]
408    fn stack_overflow() {
409        // This input generated a stack overflow with the old code
410        let input = "(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;)";
411        let result = parse(&input);
412        assert!(result.is_ok());
413    }
414
415    #[test]
416    fn converts_up_ff3_property() {
417        let input = "(;GM[1]FF[3]CoPyright[test])";
418        let expected = vec![
419            go::Prop::GM(1),
420            go::Prop::FF(3),
421            go::Prop::CP("test".into()),
422        ];
423
424        let sgf_nodes = go::parse(input).unwrap();
425
426        assert_eq!(sgf_nodes.len(), 1);
427        let properties = sgf_nodes[0].properties().cloned().collect::<Vec<_>>();
428        assert_eq!(properties, expected);
429    }
430
431    #[test]
432    fn doesnt_convert_if_not_allowed() {
433        let input = "(;GM[1]FF[3]CoPyright[test])";
434        let parse_options = ParseOptions {
435            convert_mixed_case_identifiers: false,
436            ..ParseOptions::default()
437        };
438        let result = parse_with_options(input, &parse_options);
439        assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
440    }
441
442    #[test]
443    fn compressed_list_for_unknown_game() {
444        let input = "(;GM[]MA[a:b])";
445        let gametree = parse(&input).unwrap().pop().unwrap();
446        let node = match gametree {
447            GameTree::Unknown(node) => node,
448            _ => panic!("Expected Unknown Game type"),
449        };
450        match node.get_property("MA") {
451            Some(unknown_game::Prop::MA(values)) => {
452                assert_eq!(values.len(), 1);
453                assert!(values.contains("a:b"));
454            }
455            _ => panic!("MA prop not found"),
456        }
457    }
458}