1use std::ptr::NonNull;
2
3use crate::go;
4use crate::lexer::{tokenize, LexerError, Token};
5use crate::unknown_game;
6use crate::{GameTree, GameType, SgfNode, SgfProp};
7
8pub fn parse(text: &str) -> Result<Vec<GameTree>, SgfParseError> {
26 parse_with_options(text, &ParseOptions::default())
27}
28
29pub fn parse_with_options(
54 text: &str,
55 options: &ParseOptions,
56) -> Result<Vec<GameTree>, SgfParseError> {
57 let text = text.trim();
58 let tokens = if options.lenient {
59 tokenize(text)
60 .take_while(Result::is_ok)
61 .map(|result| result.unwrap().0)
62 .collect()
63 } else {
64 tokenize(text)
65 .map(|result| match result {
66 Err(e) => Err(SgfParseError::LexerError(e)),
67 Ok((token, _span)) => Ok(token),
68 })
69 .collect::<Result<Vec<_>, _>>()?
70 };
71 split_by_gametree(&tokens, options.lenient)?
72 .into_iter()
73 .map(|tokens| {
74 let gametype = if options.lenient {
75 find_gametype(tokens).unwrap_or(GameType::Go)
76 } else {
77 find_gametype(tokens)?
78 };
79 match gametype {
80 GameType::Go => parse_gametree::<go::Prop>(tokens, options),
81 GameType::Unknown => parse_gametree::<unknown_game::Prop>(tokens, options),
82 }
83 })
84 .collect::<Result<_, _>>()
85}
86
87pub struct ParseOptions {
92 pub convert_mixed_case_identifiers: bool,
97 pub lenient: bool,
102}
103
104impl Default for ParseOptions {
105 fn default() -> Self {
106 ParseOptions {
107 convert_mixed_case_identifiers: true,
108 lenient: false,
109 }
110 }
111}
112
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum SgfParseError {
116 LexerError(LexerError),
117 UnexpectedGameTreeStart,
118 UnexpectedGameTreeEnd,
119 UnexpectedProperty,
120 UnexpectedEndOfData,
121 UnexpectedGameType,
122 InvalidFF4Property,
123 InvalidGameType,
124 NoGameTrees,
125}
126
127impl From<LexerError> for SgfParseError {
128 fn from(error: LexerError) -> Self {
129 Self::LexerError(error)
130 }
131}
132
133impl std::fmt::Display for SgfParseError {
134 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
135 match self {
136 SgfParseError::LexerError(e) => write!(f, "Error tokenizing: {e}"),
137 SgfParseError::UnexpectedGameTreeStart => write!(f, "Unexpected start of game tree"),
138 SgfParseError::UnexpectedGameTreeEnd => write!(f, "Unexpected end of game tree"),
139 SgfParseError::UnexpectedProperty => write!(f, "Unexpected property"),
140 SgfParseError::UnexpectedEndOfData => write!(f, "Unexpected end of data"),
141 SgfParseError::UnexpectedGameType => write!(f, "Unexpected game type"),
142 SgfParseError::InvalidFF4Property => {
143 write!(
144 f,
145 "Invalid FF[4] property without `convert_mixed_case_identifiers`"
146 )
147 }
148 SgfParseError::InvalidGameType => write!(f, "Invalid game type"),
149 SgfParseError::NoGameTrees => write!(f, "No game trees found"),
150 }
151 }
152}
153
154impl std::error::Error for SgfParseError {}
155
156fn split_by_gametree(tokens: &[Token], lenient: bool) -> Result<Vec<&[Token]>, SgfParseError> {
161 let mut gametrees = vec![];
162 let mut gametree_depth: u64 = 0;
163 let mut slice_start = 0;
164 for (i, token) in tokens.iter().enumerate() {
165 match token {
166 Token::StartGameTree => gametree_depth += 1,
167 Token::EndGameTree => {
168 if gametree_depth == 0 {
169 if lenient {
170 break;
171 } else {
172 return Err(SgfParseError::UnexpectedGameTreeEnd);
173 }
174 }
175 gametree_depth -= 1;
176 if gametree_depth == 0 {
177 gametrees.push(&tokens[slice_start..=i]);
178 slice_start = i + 1;
179 }
180 }
181 _ => {}
182 }
183 }
184 if gametree_depth != 0 {
185 if lenient {
186 gametrees.push(&tokens[slice_start..]);
188 } else {
189 return Err(SgfParseError::UnexpectedEndOfData);
190 }
191 }
192 if gametrees.is_empty() {
193 if lenient {
194 gametrees.push(&[]);
197 } else {
198 return Err(SgfParseError::NoGameTrees);
199 }
200 }
201
202 Ok(gametrees)
203}
204
205fn parse_gametree<Prop: SgfProp>(
207 tokens: &[Token],
208 options: &ParseOptions,
209) -> Result<GameTree, SgfParseError>
210where
211 SgfNode<Prop>: std::convert::Into<GameTree>,
212{
213 let mut collection: Vec<SgfNode<Prop>> = vec![];
215 let mut current_node_list_ptr = NonNull::new(&mut collection).unwrap();
217 let mut incomplete_child_lists: Vec<NonNull<Vec<SgfNode<Prop>>>> = vec![];
219 let mut tokens = tokens.iter().peekable();
228 while let Some(token) = tokens.next() {
229 match token {
230 Token::StartGameTree => {
231 if let Some(node_list_ptr) = incomplete_child_lists.last() {
233 let node_list = unsafe { node_list_ptr.as_ref() };
234 if node_list.is_empty() {
235 if options.lenient {
236 break;
237 } else {
238 return Err(SgfParseError::UnexpectedGameTreeStart);
239 }
240 }
241 }
242 incomplete_child_lists.push(current_node_list_ptr);
243 }
244 Token::EndGameTree => match incomplete_child_lists.pop() {
245 Some(node_list) => current_node_list_ptr = node_list,
246 None => {
247 if options.lenient {
248 break;
249 } else {
250 return Err(SgfParseError::UnexpectedGameTreeEnd);
251 }
252 }
253 },
254 Token::StartNode => {
255 let mut new_node = SgfNode::default();
256 let mut prop_tokens = vec![];
257 while let Some(Token::Property(_)) = tokens.peek() {
258 prop_tokens.push(tokens.next().unwrap());
259 }
260 for token in prop_tokens {
261 match token {
262 Token::Property((identifier, values)) => {
264 let identifier = {
265 if identifier.chars().all(|c| c.is_ascii_uppercase()) {
266 identifier.clone()
267 } else if options.convert_mixed_case_identifiers {
268 identifier
269 .chars()
270 .filter(|c| c.is_ascii_uppercase())
271 .collect()
272 } else if options.lenient {
273 break;
274 } else {
275 return Err(SgfParseError::InvalidFF4Property);
276 }
277 };
278 new_node
279 .properties
280 .push(Prop::new(identifier, values.clone()))
281 }
282 _ => unreachable!(),
283 }
284 }
285 let node_list = unsafe { current_node_list_ptr.as_mut() };
286 node_list.push(new_node);
287 current_node_list_ptr =
288 NonNull::new(&mut node_list.last_mut().unwrap().children).unwrap();
289 }
290 Token::Property(_) => {
291 if options.lenient {
292 break;
293 } else {
294 return Err(SgfParseError::UnexpectedProperty);
295 }
296 }
297 }
298 }
299
300 if !options.lenient && (!incomplete_child_lists.is_empty() || collection.len() != 1) {
301 return Err(SgfParseError::UnexpectedEndOfData);
302 }
303 let mut root_node = if options.lenient {
304 collection.into_iter().next().unwrap_or_default()
306 } else {
307 collection
308 .into_iter()
309 .next()
310 .ok_or(SgfParseError::UnexpectedEndOfData)?
311 };
312 root_node.is_root = true;
313 Ok(root_node.into())
314}
315
316fn find_gametype(tokens: &[Token]) -> Result<GameType, SgfParseError> {
320 match find_gametree_root_prop_values("GM", tokens)?.map(|v| v.as_slice()) {
321 None => Ok(GameType::Go),
322 Some([value]) if value.parse::<i32>().is_ok() => match value.as_str() {
323 "1" => Ok(GameType::Go),
324 _ => Ok(GameType::Unknown),
325 },
326 _ => Err(SgfParseError::InvalidGameType),
327 }
328}
329
330fn find_gametree_root_prop_values<'a>(
335 prop_ident: &'a str,
336 tokens: &'a [Token],
337) -> Result<Option<&'a Vec<String>>, SgfParseError> {
338 let matching_tokens: Vec<&Vec<String>> = tokens
341 .iter()
342 .skip(2)
343 .take_while(|&token| matches!(token, Token::Property(_)))
344 .filter_map(move |token| match token {
345 Token::Property((ident, values)) if ident == prop_ident => Some(values),
346 _ => None,
347 })
348 .collect();
349
350 match matching_tokens.len() {
351 0 => Ok(None),
352 1 => Ok(Some(matching_tokens[0])),
353 _ => Err(SgfParseError::UnexpectedProperty),
354 }
355}
356
357#[cfg(test)]
358mod test {
359 use super::*;
360 use crate::{go, serialize};
361
362 fn load_test_sgf() -> Result<String, Box<dyn std::error::Error>> {
363 let mut sgf_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
365 sgf_path.push("resources/test/ff4_ex.sgf");
366 let data = std::fs::read_to_string(sgf_path)?;
367
368 Ok(data)
369 }
370
371 fn get_go_nodes() -> Result<Vec<SgfNode<go::Prop>>, Box<dyn std::error::Error>> {
372 let data = load_test_sgf()?;
373
374 Ok(go::parse(&data)?)
375 }
376
377 fn node_depth(mut sgf_node: &SgfNode<go::Prop>) -> u64 {
378 let mut depth = 1;
379 while sgf_node.children().count() > 0 {
380 depth += 1;
381 sgf_node = sgf_node.children().next().unwrap();
382 }
383 depth
384 }
385
386 #[test]
387 fn sgf_has_two_gametrees() {
388 let sgf_nodes = get_go_nodes().unwrap();
389 assert_eq!(sgf_nodes.len(), 2);
390 }
391
392 #[test]
393 fn gametree_one_has_five_variations() {
394 let sgf_nodes = get_go_nodes().unwrap();
395 let sgf_node = &sgf_nodes[0];
396 assert_eq!(sgf_node.children().count(), 5);
397 }
398
399 #[test]
400 fn gametree_one_has_size_19() {
401 let sgf_nodes = get_go_nodes().unwrap();
402 let sgf_node = &sgf_nodes[0];
403 match sgf_node.get_property("SZ") {
404 Some(go::Prop::SZ(size)) => assert_eq!(size, &(19, 19)),
405 _ => unreachable!("Expected size property"),
406 }
407 }
408
409 #[test]
410 fn gametree_variation_depths() {
411 let sgf_nodes = get_go_nodes().unwrap();
412 let sgf_node = &sgf_nodes[0];
413 let children: Vec<_> = sgf_node.children().collect();
414 assert_eq!(node_depth(children[0]), 13);
415 assert_eq!(node_depth(children[1]), 4);
416 assert_eq!(node_depth(children[2]), 4);
417 }
418
419 #[test]
420 fn gametree_two_has_one_variation() {
421 let sgf_nodes = get_go_nodes().unwrap();
422 let sgf_node = &sgf_nodes[1];
423 assert_eq!(sgf_node.children().count(), 1);
424 }
425
426 #[test]
427 fn serialize_then_parse() {
428 let data = load_test_sgf().unwrap();
429 let gametrees = parse(&data).unwrap();
430 let text = serialize(&gametrees);
431 assert_eq!(gametrees, parse(&text).unwrap());
432 }
433
434 #[test]
435 fn invalid_property() {
436 let input = "(;GM[1]W[rp.pmonpoqprpsornqmpm])";
437 let sgf_nodes = go::parse(input).unwrap();
438 let expected = vec![
439 go::Prop::GM(1),
440 go::Prop::Invalid("W".to_string(), vec!["rp.pmonpoqprpsornqmpm".to_string()]),
441 ];
442
443 assert_eq!(sgf_nodes.len(), 1);
444 let sgf_node = &sgf_nodes[0];
445 assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
446 }
447
448 #[test]
449 fn unknown_game() {
450 let input = "(;GM[37]W[rp.pmonpoqprpsornqmpm])";
451 let gametrees = parse(input).unwrap();
452 assert_eq!(gametrees.len(), 1);
453 assert_eq!(gametrees[0].gametype(), GameType::Unknown);
454 let sgf_node = match &gametrees[0] {
455 GameTree::Unknown(node) => node,
456 _ => panic!("Unexpected game type"),
457 };
458 let expected = vec![
459 unknown_game::Prop::GM(37),
460 unknown_game::Prop::W("rp.pmonpoqprpsornqmpm".into()),
461 ];
462
463 assert_eq!(sgf_node.properties().cloned().collect::<Vec<_>>(), expected);
464 }
465
466 #[test]
467 fn mixed_games() {
468 let input = "(;GM[1];W[dd])(;GM[37]W[rp.pmonpoqprpsornqmpm])";
469 let gametrees = parse(input).unwrap();
470 assert_eq!(gametrees.len(), 2);
471 assert_eq!(gametrees[0].gametype(), GameType::Go);
472 assert_eq!(gametrees[1].gametype(), GameType::Unknown);
473 }
474
475 #[test]
476 fn stack_overflow() {
477 let input = "(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;)";
479 let result = parse(&input);
480 assert!(result.is_ok());
481 }
482
483 #[test]
484 fn converts_up_ff3_property() {
485 let input = "(;GM[1]FF[3]CoPyright[test])";
486 let expected = vec![
487 go::Prop::GM(1),
488 go::Prop::FF(3),
489 go::Prop::CP("test".into()),
490 ];
491
492 let sgf_nodes = go::parse(input).unwrap();
493
494 assert_eq!(sgf_nodes.len(), 1);
495 let properties = sgf_nodes[0].properties().cloned().collect::<Vec<_>>();
496 assert_eq!(properties, expected);
497 }
498
499 #[test]
500 fn doesnt_convert_if_not_allowed() {
501 let input = "(;GM[1]FF[3]CoPyright[test])";
502 let parse_options = ParseOptions {
503 convert_mixed_case_identifiers: false,
504 ..ParseOptions::default()
505 };
506 let result = parse_with_options(input, &parse_options);
507 assert_eq!(result, Err(SgfParseError::InvalidFF4Property));
508 }
509
510 #[test]
511 fn compressed_list_for_unknown_game() {
512 let input = "(;GM[127]MA[a:b])";
513 let gametree = parse(&input).unwrap().pop().unwrap();
514 let node = match gametree {
515 GameTree::Unknown(node) => node,
516 _ => panic!("Expected Unknown Game type"),
517 };
518 match node.get_property("MA") {
519 Some(unknown_game::Prop::MA(values)) => {
520 assert_eq!(values.len(), 1);
521 assert!(values.contains("a:b"));
522 }
523 _ => panic!("MA prop not found"),
524 }
525 }
526
527 #[test]
528 fn strips_whitespace() {
529 let input = "\n(;GM[1];B[cc])";
530 let sgf_nodes = go::parse(&input).unwrap();
531 assert_eq!(sgf_nodes.len(), 1);
532 }
533
534 #[test]
535 fn lenient_parsing_unclosed_parens_ok() {
536 let input = "\n(;GM[1];B[cc]";
537 let parse_options = ParseOptions {
538 lenient: true,
539 ..ParseOptions::default()
540 };
541 let game_trees = parse_with_options(input, &parse_options).unwrap();
542 assert_eq!(game_trees.len(), 1);
543 }
544
545 #[test]
546 fn lenient_parsing_ignores_trailing_garbage() {
547 let input = "\n(;GM[1];B[cc]))";
548 let parse_options = ParseOptions {
549 lenient: true,
550 ..ParseOptions::default()
551 };
552 let game_trees = parse_with_options(input, &parse_options).unwrap();
553 assert_eq!(game_trees.len(), 1);
554 }
555
556 #[test]
557 fn lenient_parsing_handles_unescaped_property_end() {
558 let input = "(;B[cc];W[dd];C[username [12k]: foo])";
559 let parse_options = ParseOptions {
560 lenient: true,
561 ..ParseOptions::default()
562 };
563 let game_trees = parse_with_options(input, &parse_options).unwrap();
564 assert_eq!(game_trees.len(), 1);
565 let sgf_node = game_trees[0].as_go_node().unwrap();
566 assert_eq!(sgf_node.main_variation().count(), 3);
568 }
569
570 #[test]
571 fn lenient_parsing_handles_unclosed_property_value() {
572 let input = "(;B[cc];W[dd];B[ee";
573 let parse_options = ParseOptions {
574 lenient: true,
575 ..ParseOptions::default()
576 };
577 let game_trees = parse_with_options(input, &parse_options).unwrap();
578 assert_eq!(game_trees.len(), 1);
579 let sgf_node = game_trees[0].as_go_node().unwrap();
580 assert_eq!(sgf_node.main_variation().count(), 3);
582 assert_eq!(
583 sgf_node.main_variation().last().unwrap().properties.len(),
584 0
585 );
586 }
587
588 #[test]
589 fn lenient_parsing_handles_missing_property_value() {
590 let input = "(;B[cc];W[dd];B";
591 let parse_options = ParseOptions {
592 lenient: true,
593 ..ParseOptions::default()
594 };
595 let game_trees = parse_with_options(input, &parse_options).unwrap();
596 assert_eq!(game_trees.len(), 1);
597 let sgf_node = game_trees[0].as_go_node().unwrap();
598 assert_eq!(sgf_node.main_variation().count(), 3);
600 assert_eq!(
601 sgf_node.main_variation().last().unwrap().properties.len(),
602 0
603 );
604 }
605
606 #[test]
607 fn lenient_parsing_handles_missing_first_node_start() {
608 let input = "(B[cc])";
609 let parse_options = ParseOptions {
610 lenient: true,
611 ..ParseOptions::default()
612 };
613 let game_trees = parse_with_options(input, &parse_options).unwrap();
614 assert_eq!(game_trees.len(), 1);
615 let sgf_node = game_trees[0].as_go_node().unwrap();
616 assert_eq!(sgf_node.main_variation().count(), 1);
618 assert_eq!(
619 sgf_node.main_variation().last().unwrap().properties.len(),
620 0
621 );
622 }
623}