1use std::ptr::NonNull;
2
3use crate::go;
4use crate::lexer::{tokenize, LexerError, Token};
5use crate::unknown_game;
6use crate::{GameTree, GameType, SgfNode, SgfProp};
7
8pub fn parse(text: &str) -> Result<Vec<GameTree>, SgfParseError> {
26 parse_with_options(text, &ParseOptions::default())
27}
28
29pub fn parse_with_options(
54 text: &str,
55 options: &ParseOptions,
56) -> Result<Vec<GameTree>, SgfParseError> {
57 let text = text.trim();
58 let tokens = tokenize(text)
59 .map(|result| match result {
60 Err(e) => Err(SgfParseError::LexerError(e)),
61 Ok((token, _span)) => Ok(token),
62 })
63 .collect::<Result<Vec<_>, _>>()?;
64 split_by_gametree(&tokens, options.lenient)?
65 .into_iter()
66 .map(|tokens| match find_gametype(tokens)? {
67 GameType::Go => parse_gametree::<go::Prop>(tokens, options),
68 GameType::Unknown => parse_gametree::<unknown_game::Prop>(tokens, options),
69 })
70 .collect::<Result<_, _>>()
71}
72
73pub struct ParseOptions {
78 pub convert_mixed_case_identifiers: bool,
83 pub lenient: bool,
88}
89
90impl Default for ParseOptions {
91 fn default() -> Self {
92 ParseOptions {
93 convert_mixed_case_identifiers: true,
94 lenient: false,
95 }
96 }
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum SgfParseError {
102 LexerError(LexerError),
103 UnexpectedGameTreeStart,
104 UnexpectedGameTreeEnd,
105 UnexpectedProperty,
106 UnexpectedEndOfData,
107 UnexpectedGameType,
108 InvalidFF4Property,
109}
110
111impl From<LexerError> for SgfParseError {
112 fn from(error: LexerError) -> Self {
113 Self::LexerError(error)
114 }
115}
116
117impl std::fmt::Display for SgfParseError {
118 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
119 match self {
120 SgfParseError::LexerError(e) => write!(f, "Error tokenizing: {e}"),
121 SgfParseError::UnexpectedGameTreeStart => write!(f, "Unexpected start of game tree"),
122 SgfParseError::UnexpectedGameTreeEnd => write!(f, "Unexpected end of game tree"),
123 SgfParseError::UnexpectedProperty => write!(f, "Unexpected property"),
124 SgfParseError::UnexpectedEndOfData => write!(f, "Unexpected end of data"),
125 SgfParseError::UnexpectedGameType => write!(f, "Unexpected game type"),
126 SgfParseError::InvalidFF4Property => {
127 write!(
128 f,
129 "Invalid FF[4] property without `convert_mixed_case_identifiers`"
130 )
131 }
132 }
133 }
134}
135
136impl std::error::Error for SgfParseError {}
137
138fn split_by_gametree(tokens: &[Token], lenient: bool) -> Result<Vec<&[Token]>, SgfParseError> {
143 let mut gametrees = vec![];
144 let mut gametree_depth: u64 = 0;
145 let mut slice_start = 0;
146 for (i, token) in tokens.iter().enumerate() {
147 match token {
148 Token::StartGameTree => gametree_depth += 1,
149 Token::EndGameTree => {
150 if gametree_depth == 0 {
151 if lenient {
152 break;
153 } else {
154 return Err(SgfParseError::UnexpectedGameTreeEnd);
155 }
156 }
157 gametree_depth -= 1;
158 if gametree_depth == 0 {
159 gametrees.push(&tokens[slice_start..=i]);
160 slice_start = i + 1;
161 }
162 }
163 _ => {}
164 }
165 }
166 if gametree_depth != 0 {
167 if lenient {
168 gametrees.push(&tokens[slice_start..]);
170 } else {
171 return Err(SgfParseError::UnexpectedEndOfData);
172 }
173 }
174
175 Ok(gametrees)
176}
177
178fn parse_gametree<Prop: SgfProp>(
180 tokens: &[Token],
181 options: &ParseOptions,
182) -> Result<GameTree, SgfParseError>
183where
184 SgfNode<Prop>: std::convert::Into<GameTree>,
185{
186 let mut collection: Vec<SgfNode<Prop>> = vec![];
188 let mut current_node_list_ptr = NonNull::new(&mut collection).unwrap();
190 let mut incomplete_child_lists: Vec<NonNull<Vec<SgfNode<Prop>>>> = vec![];
192 let mut tokens = tokens.iter().peekable();
201 while let Some(token) = tokens.next() {
202 match token {
203 Token::StartGameTree => {
204 if let Some(node_list_ptr) = incomplete_child_lists.last() {
206 let node_list = unsafe { node_list_ptr.as_ref() };
207 if node_list.is_empty() {
208 if options.lenient {
209 break;
210 } else {
211 return Err(SgfParseError::UnexpectedGameTreeStart);
212 }
213 }
214 }
215 incomplete_child_lists.push(current_node_list_ptr);
216 }
217 Token::EndGameTree => match incomplete_child_lists.pop() {
218 Some(node_list) => current_node_list_ptr = node_list,
219 None => {
220 if options.lenient {
221 break;
222 } else {
223 return Err(SgfParseError::UnexpectedGameTreeEnd);
224 }
225 }
226 },
227 Token::StartNode => {
228 let mut new_node = SgfNode::default();
229 let mut prop_tokens = vec![];
230 while let Some(Token::Property(_)) = tokens.peek() {
231 prop_tokens.push(tokens.next().unwrap());
232 }
233 for token in prop_tokens {
234 match token {
235 Token::Property((identifier, values)) => {
237 let identifier = {
238 if identifier.chars().all(|c| c.is_ascii_uppercase()) {
239 identifier.clone()
240 } else if options.convert_mixed_case_identifiers {
241 identifier
242 .chars()
243 .filter(|c| c.is_ascii_uppercase())
244 .collect()
245 } else if options.lenient {
246 break;
247 } else {
248 return Err(SgfParseError::InvalidFF4Property);
249 }
250 };
251 new_node
252 .properties
253 .push(Prop::new(identifier, values.clone()))
254 }
255 _ => unreachable!(),
256 }
257 }
258 let node_list = unsafe { current_node_list_ptr.as_mut() };
259 node_list.push(new_node);
260 current_node_list_ptr =
261 NonNull::new(&mut node_list.last_mut().unwrap().children).unwrap();
262 }
263 Token::Property(_) => {
264 if options.lenient {
265 break;
266 } else {
267 return Err(SgfParseError::UnexpectedProperty);
268 }
269 }
270 }
271 }
272
273 if !options.lenient && (!incomplete_child_lists.is_empty() || collection.len() != 1) {
274 return Err(SgfParseError::UnexpectedEndOfData);
275 }
276 let mut root_node = collection
277 .into_iter()
278 .next()
279 .ok_or(SgfParseError::UnexpectedEndOfData)?;
280 root_node.is_root = true;
281 Ok(root_node.into())
282}
283
284fn find_gametype(tokens: &[Token]) -> Result<GameType, SgfParseError> {
288 match find_gametree_root_prop_values("GM", tokens)? {
289 None => Ok(GameType::Go),
290 Some(values) => {
291 if values.len() != 1 {
292 return Ok(GameType::Unknown);
293 }
294 match values[0].as_str() {
295 "1" => Ok(GameType::Go),
296 _ => Ok(GameType::Unknown),
297 }
298 }
299 }
300}
301
302fn find_gametree_root_prop_values<'a>(
307 prop_ident: &'a str,
308 tokens: &'a [Token],
309) -> Result<Option<&'a Vec<String>>, SgfParseError> {
310 let matching_tokens: Vec<&Vec<String>> = tokens
313 .iter()
314 .skip(2)
315 .take_while(|&token| matches!(token, Token::Property(_)))
316 .filter_map(move |token| match token {
317 Token::Property((ident, values)) if ident == prop_ident => Some(values),
318 _ => None,
319 })
320 .collect();
321
322 match matching_tokens.len() {
323 0 => Ok(None),
324 1 => Ok(Some(matching_tokens[0])),
325 _ => Err(SgfParseError::UnexpectedProperty),
326 }
327}
328
329#[cfg(test)]
330mod test {
331 use super::*;
332 use crate::{go, serialize};
333
334 fn load_test_sgf() -> Result<String, Box<dyn std::error::Error>> {
335 let mut sgf_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
337 sgf_path.push("resources/test/ff4_ex.sgf");
338 let data = std::fs::read_to_string(sgf_path)?;
339
340 Ok(data)
341 }
342
343 fn get_go_nodes() -> Result<Vec<SgfNode<go::Prop>>, Box<dyn std::error::Error>> {
344 let data = load_test_sgf()?;
345
346 Ok(go::parse(&data)?)
347 }
348
349 fn node_depth(mut sgf_node: &SgfNode<go::Prop>) -> u64 {
350 let mut depth = 1;
351 while sgf_node.children().count() > 0 {
352 depth += 1;
353 sgf_node = sgf_node.children().next().unwrap();
354 }
355 depth
356 }
357
358 #[test]
359 fn sgf_has_two_gametrees() {
360 let sgf_nodes = get_go_nodes().unwrap();
361 assert_eq!(sgf_nodes.len(), 2);
362 }
363
364 #[test]
365 fn gametree_one_has_five_variations() {
366 let sgf_nodes = get_go_nodes().unwrap();
367 let sgf_node = &sgf_nodes[0];
368 assert_eq!(sgf_node.children().count(), 5);
369 }
370
371 #[test]
372 fn gametree_one_has_size_19() {
373 let sgf_nodes = get_go_nodes().unwrap();
374 let sgf_node = &sgf_nodes[0];
375 match sgf_node.get_property("SZ") {
376 Some(go::Prop::SZ(size)) => assert_eq!(size, &(19, 19)),
377 _ => unreachable!("Expected size property"),
378 }
379 }
380
381 #[test]
382 fn gametree_variation_depths() {
383 let sgf_nodes = get_go_nodes().unwrap();
384 let sgf_node = &sgf_nodes[0];
385 let children: Vec<_> = sgf_node.children().collect();
386 assert_eq!(node_depth(children[0]), 13);
387 assert_eq!(node_depth(children[1]), 4);
388 assert_eq!(node_depth(children[2]), 4);
389 }
390
391 #[test]
392 fn gametree_two_has_one_variation() {
393 let sgf_nodes = get_go_nodes().unwrap();
394 let sgf_node = &sgf_nodes[1];
395 assert_eq!(sgf_node.children().count(), 1);
396 }
397
398 #[test]
399 fn serialize_then_parse() {
400 let data = load_test_sgf().unwrap();
401 let gametrees = parse(&data).unwrap();
402 let text = serialize(&gametrees);
403 assert_eq!(gametrees, parse(&text).unwrap());
404 }
405
406 #[test]
407 fn invalid_property() {
408 let input = "(;GM[1]W[rp.pmonpoqprpsornqmpm])";
409 let sgf_nodes = go::parse(input).unwrap();
410 let expected = vec![
411 go::Prop::GM(1),
412 go::Prop::Invalid("W".to_string(), vec!["rp.pmonpoqprpsornqmpm".to_string()]),
413 ];
414
415 assert_eq!(sgf_nodes.len(), 1);
416 let sgf_node = &sgf_nodes[0];
417 assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
418 }
419
420 #[test]
421 fn unknown_game() {
422 let input = "(;GM[37]W[rp.pmonpoqprpsornqmpm])";
423 let gametrees = parse(input).unwrap();
424 assert_eq!(gametrees.len(), 1);
425 assert_eq!(gametrees[0].gametype(), GameType::Unknown);
426 let sgf_node = match &gametrees[0] {
427 GameTree::Unknown(node) => node,
428 _ => panic!("Unexpected game type"),
429 };
430 let expected = vec![
431 unknown_game::Prop::GM(37),
432 unknown_game::Prop::W("rp.pmonpoqprpsornqmpm".into()),
433 ];
434
435 assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
436 }
437
438 #[test]
439 fn mixed_games() {
440 let input = "(;GM[1];W[dd])(;GM[37]W[rp.pmonpoqprpsornqmpm])";
441 let gametrees = parse(input).unwrap();
442 assert_eq!(gametrees.len(), 2);
443 assert_eq!(gametrees[0].gametype(), GameType::Go);
444 assert_eq!(gametrees[1].gametype(), GameType::Unknown);
445 }
446
447 #[test]
448 fn stack_overflow() {
449 let input = "(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;)";
451 let result = parse(&input);
452 assert!(result.is_ok());
453 }
454
455 #[test]
456 fn converts_up_ff3_property() {
457 let input = "(;GM[1]FF[3]CoPyright[test])";
458 let expected = vec![
459 go::Prop::GM(1),
460 go::Prop::FF(3),
461 go::Prop::CP("test".into()),
462 ];
463
464 let sgf_nodes = go::parse(input).unwrap();
465
466 assert_eq!(sgf_nodes.len(), 1);
467 let properties = sgf_nodes[0].properties().cloned().collect::<Vec<_>>();
468 assert_eq!(properties, expected);
469 }
470
471 #[test]
472 fn doesnt_convert_if_not_allowed() {
473 let input = "(;GM[1]FF[3]CoPyright[test])";
474 let parse_options = ParseOptions {
475 convert_mixed_case_identifiers: false,
476 ..ParseOptions::default()
477 };
478 let result = parse_with_options(input, &parse_options);
479 assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
480 }
481
482 #[test]
483 fn compressed_list_for_unknown_game() {
484 let input = "(;GM[]MA[a:b])";
485 let gametree = parse(&input).unwrap().pop().unwrap();
486 let node = match gametree {
487 GameTree::Unknown(node) => node,
488 _ => panic!("Expected Unknown Game type"),
489 };
490 match node.get_property("MA") {
491 Some(unknown_game::Prop::MA(values)) => {
492 assert_eq!(values.len(), 1);
493 assert!(values.contains("a:b"));
494 }
495 _ => panic!("MA prop not found"),
496 }
497 }
498
499 #[test]
500 fn strips_whitespace() {
501 let input = "\n(;GM[1];B[cc])";
502 let sgf_nodes = go::parse(&input).unwrap();
503 assert_eq!(sgf_nodes.len(), 1);
504 }
505
506 #[test]
507 fn lenient_parsing_unclosed_parens_ok() {
508 let input = "\n(;GM[1];B[cc]";
509 let parse_options = ParseOptions {
510 lenient: true,
511 ..ParseOptions::default()
512 };
513 let sgf_nodes = parse_with_options(input, &parse_options).unwrap();
514 assert_eq!(sgf_nodes.len(), 1);
515 }
516
517 #[test]
518 fn lenient_parsing_ignores_trailing_garbage() {
519 let input = "\n(;GM[1];B[cc]))";
520 let parse_options = ParseOptions {
521 lenient: true,
522 ..ParseOptions::default()
523 };
524 let sgf_nodes = parse_with_options(input, &parse_options).unwrap();
525 assert_eq!(sgf_nodes.len(), 1);
526 }
527}