pgn_reader/
reader.rs

1use std::{
2    cmp::{max, min},
3    convert::Infallible,
4    io::{self, Read, Seek, SeekFrom},
5    mem,
6    ops::ControlFlow,
7};
8
9use shakmaty::{
10    CastlingSide, Color, KnownOutcome, Outcome,
11    san::{San, SanPlus, Suffix},
12};
13
14use crate::{RawTag, Skip, Visitor, buffer::Buffer, comment::RawComment, nag::Nag};
15
16/// Build a [`Reader`] with custom settings.
17#[derive(Debug, Clone)]
18pub struct ReaderBuilder<R> {
19    reader: R,
20    tag_line_bytes: usize,
21    movetext_token_bytes: usize,
22}
23
24impl<R: Read> ReaderBuilder<R> {
25    /// Create a [`ReaderBuilder`] with default settings based on the PGN
26    /// standard.
27    pub fn new(reader: R) -> Self {
28        ReaderBuilder {
29            reader,
30            tag_line_bytes: 255,
31            movetext_token_bytes: 255,
32        }
33    }
34
35    /// Configure the buffer to support *at least* the given tag line length.
36    ///
37    /// Defaults to `255` bytes.
38    pub fn set_supported_tag_line_length(mut self, bytes: usize) -> Self {
39        self.tag_line_bytes = max(255, bytes);
40        self
41    }
42
43    /// Configure the buffer to support *at least* the given comment length.
44    ///
45    /// Longer comments will be split into one or more
46    /// calls to [`Visitor::partial_comment()`] followed by a final call to
47    /// [`Visitor::comment()`].
48    ///
49    /// Defaults to `255` bytes.
50    pub fn set_supported_comment_length(mut self, bytes: usize) -> Self {
51        self.movetext_token_bytes = max(255, bytes) + 2; // Plus '{' and '}'
52        self
53    }
54
55    /// Finalize and create a [`Reader`].
56    pub fn finish(self) -> Reader<R> {
57        Reader {
58            reader: self.reader,
59            tag_line_bytes: self.tag_line_bytes,
60            movetext_token_bytes: self.movetext_token_bytes,
61            buffer: Buffer::with_capacity(max(
62                1 << 14,
63                max(self.tag_line_bytes, self.movetext_token_bytes).next_power_of_two() * 2,
64            )),
65            pending_skip_tags: false,
66            pending_skip_movetext: false,
67        }
68    }
69}
70
71/// Reads a PGN and calls [`Visitor`] methods.
72///
73/// Buffers the underlying reader with an appropriate strategy, so it's *not*
74/// recommended to add an additional layer of buffering like
75/// [`BufReader`](std::io::BufReader).
76#[derive(Debug, Clone)]
77pub struct Reader<R> {
78    buffer: Buffer,
79    reader: R,
80    tag_line_bytes: usize,
81    movetext_token_bytes: usize,
82    pending_skip_tags: bool,
83    pending_skip_movetext: bool,
84}
85
86impl<R: Read> Reader<R> {
87    /// Create a reader with default settings based on the PGN standard.
88    pub fn new(reader: R) -> Reader<R> {
89        ReaderBuilder::new(reader).finish()
90    }
91
92    /// Build a reader with custom settings.
93    ///
94    /// ```no_run
95    /// use std::fs::File;
96    /// use pgn_reader::Reader;
97    ///
98    /// let reader = Reader::build(File::open("example.pgn")?)
99    ///     .set_supported_tag_line_length(1000)
100    ///     .set_supported_comment_length(4000)
101    ///     .finish();
102    /// # Ok::<_, std::io::Error>(())
103    /// ```
104    pub fn build(reader: R) -> ReaderBuilder<R> {
105        ReaderBuilder::new(reader)
106    }
107
108    fn skip_bom(&mut self) -> io::Result<()> {
109        if self
110            .buffer
111            .ensure_bytes(3, &mut self.reader)?
112            .starts_with(b"\xef\xbb\xbf")
113        {
114            self.buffer.consume(3);
115        }
116        Ok(())
117    }
118
119    fn skip_until(&mut self, needle: u8) -> io::Result<()> {
120        while !self.buffer.ensure_bytes(1, &mut self.reader)?.is_empty() {
121            if let Some(pos) = memchr::memchr(needle, self.buffer.data()) {
122                self.buffer.consume(pos);
123                return Ok(());
124            } else {
125                self.buffer.clear();
126            }
127        }
128        Ok(())
129    }
130
131    fn skip_until_after(&mut self, needle: u8) -> io::Result<()> {
132        while !self.buffer.ensure_bytes(1, &mut self.reader)?.is_empty() {
133            if let Some(pos) = memchr::memchr(needle, self.buffer.data()) {
134                self.buffer.consume(pos + 1);
135                return Ok(());
136            } else {
137                self.buffer.clear();
138            }
139        }
140        Ok(())
141    }
142
143    fn skip_whitespace(&mut self, mut is_new_line: bool) -> io::Result<()> {
144        while let &[ch, ..] = self.buffer.ensure_bytes(1, &mut self.reader)? {
145            match ch {
146                b' ' | b'\t' | b'\r' => {
147                    self.buffer.bump();
148                }
149                b'\n' => {
150                    is_new_line = true;
151                    self.buffer.bump();
152                }
153                b'%' if is_new_line => {
154                    self.buffer.bump();
155                    self.skip_until_after(b'\n')?;
156                }
157                _ => return Ok(()),
158            }
159        }
160        Ok(())
161    }
162
163    fn skip_ket(&mut self) -> io::Result<()> {
164        while let &[ch, ..] = self.buffer.ensure_bytes(1, &mut self.reader)? {
165            match ch {
166                b' ' | b'\t' | b'\r' => {
167                    self.buffer.bump();
168                }
169                b']' => {
170                    self.buffer.bump();
171                    return self.skip_whitespace(false);
172                }
173                b'\n' => {
174                    self.buffer.bump();
175                    return self.skip_whitespace(true);
176                }
177                _ => {
178                    return Ok(());
179                }
180            }
181        }
182        Ok(())
183    }
184
185    fn skip_token(&mut self) -> io::Result<()> {
186        while !self.buffer.ensure_bytes(1, &mut self.reader)?.is_empty() {
187            if let Some(end) = self.find_token_end() {
188                self.buffer.consume(end);
189                break;
190            } else {
191                self.buffer.clear();
192            }
193        }
194        Ok(())
195    }
196
197    fn find_token_end(&self) -> Option<usize> {
198        self.buffer.data().iter().copied().position(is_token_end)
199    }
200
201    fn eat_dash(&mut self) -> bool {
202        for dash in [b"-", "–".as_bytes(), "—".as_bytes()] {
203            if self.eat(dash) {
204                return true;
205            }
206        }
207        false
208    }
209
210    #[inline]
211    fn eat(&mut self, bytes: &[u8]) -> bool {
212        if self.buffer.data().starts_with(bytes) {
213            self.buffer.consume(bytes.len());
214            return true;
215        }
216        false
217    }
218
219    fn skip_tags(&mut self) -> io::Result<()> {
220        struct IgnoreTagsVisitor;
221
222        impl Visitor for IgnoreTagsVisitor {
223            type Tags = ();
224            type Movetext = Infallible;
225            type Output = Infallible;
226
227            fn begin_tags(&mut self) -> ControlFlow<Self::Output, Self::Tags> {
228                ControlFlow::Continue(())
229            }
230
231            fn begin_movetext(
232                &mut self,
233                _tags: Self::Tags,
234            ) -> ControlFlow<Self::Output, Self::Movetext> {
235                unreachable!()
236            }
237
238            fn end_game(&mut self, movetext: Self::Movetext) -> Self::Output {
239                match movetext {}
240            }
241        }
242
243        let _ = self.read_tags(&mut IgnoreTagsVisitor, &mut ())?;
244        Ok(())
245    }
246
247    fn read_tags<V: Visitor>(
248        &mut self,
249        visitor: &mut V,
250        tags: &mut V::Tags,
251    ) -> io::Result<ControlFlow<V::Output>> {
252        while let &[b'[', ..] = self
253            .buffer
254            .ensure_bytes(self.tag_line_bytes, &mut self.reader)?
255        {
256            self.buffer.bump();
257
258            let left_quote = match memchr::memchr3(b'"', b'\n', b']', self.buffer.data()) {
259                Some(left_quote) if self.buffer.data()[left_quote] == b'"' => left_quote,
260                Some(eol) => {
261                    self.buffer.consume(eol + 1);
262                    self.skip_ket()?;
263                    continue;
264                }
265                None => {
266                    self.buffer.clear();
267                    self.skip_until_after(b'\n')?;
268                    return Err(io::Error::new(
269                        io::ErrorKind::InvalidData,
270                        "unterminated tag",
271                    ));
272                }
273            };
274
275            let space = if left_quote > 0 && self.buffer.data()[left_quote - 1] == b' ' {
276                left_quote - 1
277            } else {
278                left_quote
279            };
280
281            let value_start = left_quote + 1;
282            let mut right_quote = value_start;
283            let consumed = loop {
284                match memchr::memchr3(b'\\', b'"', b'\n', &self.buffer.data()[right_quote..]) {
285                    Some(delta) if self.buffer.data()[right_quote + delta] == b'"' => {
286                        right_quote += delta;
287                        break right_quote + 1;
288                    }
289                    Some(delta) if self.buffer.data()[right_quote + delta] == b'\n' => {
290                        right_quote += delta;
291                        break right_quote;
292                    }
293                    Some(delta) => {
294                        // Skip escaped character.
295                        right_quote = min(right_quote + delta + 2, self.buffer.len());
296                    }
297                    None => {
298                        self.buffer.clear();
299                        self.skip_until_after(b'\n')?;
300                        return Err(io::Error::new(
301                            io::ErrorKind::InvalidData,
302                            "unterminated tag",
303                        ));
304                    }
305                }
306            };
307
308            let cf = visitor.tag(
309                tags,
310                &self.buffer.data()[..space],
311                RawTag(&self.buffer.data()[value_start..right_quote]),
312            );
313            self.buffer.consume(consumed);
314            self.skip_ket()?;
315            if cf.is_break() {
316                return Ok(cf);
317            }
318        }
319        Ok(ControlFlow::Continue(()))
320    }
321
322    fn skip_movetext(&mut self) -> io::Result<()> {
323        while let &[ch, ..] = self.buffer.ensure_bytes(3, &mut self.reader)? {
324            self.buffer.bump();
325
326            match ch {
327                b'{' => {
328                    self.skip_until_after(b'}')?;
329                }
330                b';' => {
331                    self.skip_until(b'\n')?;
332                }
333                b'\n' => match self.buffer.peek() {
334                    Some(b'%') => {
335                        self.buffer.bump();
336                        self.skip_until(b'\n')?;
337                    }
338                    Some(b'\n' | b'[') => break,
339                    Some(b'\r') => {
340                        self.buffer.bump();
341                        if let Some(b'\n') = self.buffer.peek() {
342                            break;
343                        }
344                    }
345                    _ => continue,
346                },
347                _ => {
348                    if let Some(consumed) = memchr::memchr3(b'\n', b'{', b';', self.buffer.data()) {
349                        self.buffer.consume(consumed);
350                    } else {
351                        self.buffer.clear();
352                    }
353                }
354            }
355        }
356
357        Ok(())
358    }
359
360    fn read_movetext<V: Visitor>(
361        &mut self,
362        visitor: &mut V,
363        movetext: &mut V::Movetext,
364    ) -> io::Result<ControlFlow<V::Output>> {
365        while let &[ch, ..] = self
366            .buffer
367            .ensure_bytes(self.movetext_token_bytes, &mut self.reader)?
368        {
369            match ch {
370                b'{' => {
371                    self.buffer.bump();
372                    loop {
373                        if self.buffer.len() == 0 {
374                            return Err(io::Error::new(
375                                io::ErrorKind::InvalidData,
376                                "unterminated comment",
377                            ));
378                        } else if let Some(right_brace) = memchr::memchr(b'}', self.buffer.data()) {
379                            let cf = visitor
380                                .comment(movetext, RawComment(&self.buffer.data()[..right_brace]));
381                            self.buffer.consume(right_brace + 1);
382                            if cf.is_break() {
383                                return Ok(cf);
384                            }
385                            break;
386                        } else {
387                            let trimmed = trim_partial_utf8(self.buffer.data());
388                            let cf = visitor.partial_comment(movetext, RawComment(trimmed));
389                            if cf.is_break() {
390                                self.skip_until_after(b'}')?;
391                                return Ok(cf);
392                            }
393                            self.buffer.consume(trimmed.len());
394                            self.buffer
395                                .ensure_bytes(self.movetext_token_bytes, &mut self.reader)?;
396                        }
397                    }
398                }
399                b'\n' => {
400                    self.buffer.bump();
401
402                    match self.buffer.peek() {
403                        Some(b'%') => {
404                            self.buffer.bump();
405                            self.skip_until(b'\n')?;
406                        }
407                        Some(b'[' | b'\n') => {
408                            break;
409                        }
410                        Some(b'\r') => {
411                            self.buffer.bump();
412                            if self.buffer.peek() == Some(b'\n') {
413                                break;
414                            }
415                        }
416                        _ => continue,
417                    }
418                }
419                b';' => {
420                    self.buffer.bump();
421                    self.skip_until(b'\n')?;
422                }
423                b'0' => {
424                    self.buffer.bump();
425                    if self.eat_dash() {
426                        if self.eat(b"1") {
427                            let cf = visitor.outcome(
428                                movetext,
429                                Outcome::Known(KnownOutcome::Decisive {
430                                    winner: Color::Black,
431                                }),
432                            );
433                            if cf.is_break() {
434                                return Ok(cf);
435                            }
436                        } else if self.eat(b"0") {
437                            // Castling notation with zeros.
438                            let side = if self.eat_dash() && self.eat(b"0") {
439                                CastlingSide::QueenSide
440                            } else {
441                                CastlingSide::KingSide
442                            };
443                            let suffix = match self.buffer.peek() {
444                                Some(b'+') => {
445                                    self.buffer.bump();
446                                    Some(Suffix::Check)
447                                }
448                                Some(b'#') => {
449                                    self.buffer.bump();
450                                    Some(Suffix::Checkmate)
451                                }
452                                _ => None,
453                            };
454                            let cf = visitor.san(
455                                movetext,
456                                SanPlus {
457                                    san: San::Castle(side),
458                                    suffix,
459                                },
460                            );
461                            if cf.is_break() {
462                                return Ok(cf);
463                            }
464                        }
465                    }
466                }
467                b'1' => {
468                    self.buffer.bump();
469                    if self.eat_dash() {
470                        if self.eat(b"0") {
471                            let cf = visitor.outcome(
472                                movetext,
473                                Outcome::Known(KnownOutcome::Decisive {
474                                    winner: Color::White,
475                                }),
476                            );
477                            if cf.is_break() {
478                                return Ok(cf);
479                            }
480                        }
481                    } else if self.eat(b"/2") && self.eat_dash() && self.eat(b"1/2") {
482                        let cf = visitor.outcome(movetext, Outcome::Known(KnownOutcome::Draw));
483                        if cf.is_break() {
484                            return Ok(cf);
485                        }
486                    } else {
487                        while let Some(b'0'..=b'9') = self.buffer.peek() {
488                            self.buffer.bump();
489                        }
490                        while let Some(b'.' | b' ') = self.buffer.peek() {
491                            self.buffer.bump();
492                        }
493                    }
494                }
495                b'\xc2' => {
496                    // ½-½
497                    self.buffer.bump();
498                    if self.eat(b"\xbd") && self.eat_dash() && self.eat("½".as_bytes()) {
499                        let cf = visitor.outcome(movetext, Outcome::Known(KnownOutcome::Draw));
500                        if cf.is_break() {
501                            return Ok(cf);
502                        }
503                    }
504                }
505                b'2'..=b'9' => {
506                    self.buffer.bump();
507                    while let Some(b'0'..=b'9') = self.buffer.peek() {
508                        self.buffer.bump();
509                    }
510                    while let Some(b'.' | b' ') = self.buffer.peek() {
511                        self.buffer.bump();
512                    }
513                }
514                b'(' => {
515                    self.buffer.bump();
516                    match visitor.begin_variation(movetext) {
517                        ControlFlow::Continue(Skip(true)) => self.skip_variation()?,
518                        ControlFlow::Continue(Skip(false)) => (),
519                        ControlFlow::Break(output) => {
520                            return Ok(ControlFlow::Break(output));
521                        }
522                    }
523                }
524                b')' => {
525                    self.buffer.bump();
526                    let cf = visitor.end_variation(movetext);
527                    if cf.is_break() {
528                        return Ok(cf);
529                    }
530                }
531                b'$' => {
532                    self.buffer.bump();
533                    if let Some(token_end) = self.find_token_end() {
534                        if let Ok(nag) = btoi::btou(&self.buffer.data()[..token_end]) {
535                            let cf = visitor.nag(movetext, Nag(nag));
536                            if cf.is_break() {
537                                return Ok(cf);
538                            }
539                        }
540                        self.buffer.consume(token_end);
541                    } else {
542                        self.buffer.clear();
543                        self.skip_token()?;
544                    }
545                }
546                b'!' => {
547                    self.buffer.bump();
548                    let cf = match self.buffer.peek() {
549                        Some(b'!') => {
550                            self.buffer.bump();
551                            visitor.nag(movetext, Nag::BRILLIANT_MOVE)
552                        }
553                        Some(b'?') => {
554                            self.buffer.bump();
555                            visitor.nag(movetext, Nag::SPECULATIVE_MOVE)
556                        }
557                        _ => visitor.nag(movetext, Nag::GOOD_MOVE),
558                    };
559                    if cf.is_break() {
560                        return Ok(cf);
561                    }
562                }
563                b'?' => {
564                    self.buffer.bump();
565                    let cf = match self.buffer.peek() {
566                        Some(b'!') => {
567                            self.buffer.bump();
568                            visitor.nag(movetext, Nag::DUBIOUS_MOVE)
569                        }
570                        Some(b'?') => {
571                            self.buffer.bump();
572                            visitor.nag(movetext, Nag::BLUNDER)
573                        }
574                        _ => visitor.nag(movetext, Nag::MISTAKE),
575                    };
576                    if cf.is_break() {
577                        return Ok(cf);
578                    }
579                }
580                b'*' => {
581                    self.buffer.bump();
582                    let cf = visitor.outcome(movetext, Outcome::Unknown);
583                    if cf.is_break() {
584                        return Ok(cf);
585                    }
586                }
587                b' ' | b'\t' | b'\r' | b'.' => {
588                    self.buffer.bump();
589                }
590                _ => {
591                    if let Ok((san, bytes)) = SanPlus::from_ascii_prefix(self.buffer.data()) {
592                        self.buffer.consume(bytes);
593                        if self.buffer.peek().is_none_or(is_token_end) {
594                            let cf = visitor.san(movetext, san);
595                            if cf.is_break() {
596                                return Ok(cf);
597                            }
598                        }
599                    } else {
600                        self.buffer.bump();
601                        self.skip_token()?;
602                    }
603                }
604            }
605        }
606
607        Ok(ControlFlow::Continue(()))
608    }
609
610    fn skip_variation(&mut self) -> io::Result<()> {
611        let mut depth = 0usize;
612
613        while let &[ch, ..] = self.buffer.ensure_bytes(3, &mut self.reader)? {
614            match ch {
615                b'(' => {
616                    self.buffer.bump();
617                    depth += 1;
618                }
619                b')' => {
620                    if let Some(d) = depth.checked_sub(1) {
621                        self.buffer.bump();
622                        depth = d;
623                    } else {
624                        break;
625                    }
626                }
627                b'{' => {
628                    self.buffer.bump();
629                    self.skip_until_after(b'}')?;
630                }
631                b';' => {
632                    self.buffer.bump();
633                    self.skip_until(b'\n')?;
634                }
635                b'\n' => {
636                    match self.buffer.data().get(1).copied() {
637                        Some(b'%') => {
638                            self.buffer.consume(2);
639                            self.skip_until(b'\n')?;
640                        }
641                        Some(b'[' | b'\n') => {
642                            // Do not consume the first or second line break.
643                            break;
644                        }
645                        Some(b'\r') if self.buffer.data().get(2).copied() == Some(b'\n') => {
646                            // Do not consume the first or second line break.
647                            break;
648                        }
649                        _ => {
650                            self.buffer.bump();
651                        }
652                    }
653                }
654                _ => {
655                    self.buffer.bump();
656                }
657            }
658        }
659
660        Ok(())
661    }
662
663    fn before_game(&mut self) -> io::Result<()> {
664        if mem::take(&mut self.pending_skip_tags) {
665            self.skip_tags()?;
666        }
667        if mem::take(&mut self.pending_skip_movetext) {
668            self.skip_movetext()?;
669        }
670        self.skip_bom()?;
671        self.skip_whitespace(true)
672    }
673
674    /// Read a single game, if any, and return the result produced by the
675    /// visitor.
676    ///
677    /// Returns `Ok(None)` if the underlying reader is empty.
678    ///
679    /// # Errors
680    ///
681    /// * I/O error from the underlying reader.
682    /// * Irrecoverable parser errors.
683    pub fn read_game<V: Visitor>(&mut self, visitor: &mut V) -> io::Result<Option<V::Output>> {
684        self.before_game()?;
685
686        if self.buffer.ensure_bytes(1, &mut self.reader)?.is_empty() {
687            return Ok(None);
688        }
689
690        let mut tags = match visitor.begin_tags() {
691            ControlFlow::Break(output) => {
692                self.pending_skip_tags = true;
693                self.pending_skip_movetext = true;
694                return Ok(Some(output));
695            }
696            ControlFlow::Continue(tags) => tags,
697        };
698        if let ControlFlow::Break(output) = self.read_tags(visitor, &mut tags)? {
699            self.pending_skip_tags = true;
700            self.pending_skip_movetext = true;
701            return Ok(Some(output));
702        }
703        let mut movetext = match visitor.begin_movetext(tags) {
704            ControlFlow::Break(output) => {
705                self.pending_skip_movetext = true;
706                return Ok(Some(output));
707            }
708            ControlFlow::Continue(movetext) => movetext,
709        };
710        if let ControlFlow::Break(output) = self.read_movetext(visitor, &mut movetext)? {
711            self.pending_skip_movetext = true;
712            return Ok(Some(output));
713        };
714        Ok(Some(visitor.end_game(movetext)))
715    }
716
717    /// Returns whether the reader has another game to parse, but does not
718    /// actually parse it.
719    ///
720    /// # Errors
721    ///
722    /// * I/O error from the underlying reader.
723    /// * Irrecoverable parser errors (while trying to read previous
724    ///   unfinished game to completion).
725    pub fn has_more(&mut self) -> io::Result<bool> {
726        self.before_game()?;
727        Ok(!self.buffer.ensure_bytes(1, &mut self.reader)?.is_empty())
728    }
729
730    /// Skip a single game, if any.
731    ///
732    /// Returns `Ok(true)` if a game found and skipped.
733    ///
734    /// # Errors
735    ///
736    /// * I/O error from the underlying reader.
737    /// * Irrecoverable parser errors.
738    pub fn skip_game(&mut self) -> io::Result<bool> {
739        let has_more = self.has_more()?;
740        self.skip_tags()?;
741        self.skip_movetext()?;
742        Ok(has_more)
743    }
744
745    /// Iterate over all games, yielding the visitor outputs.
746    #[must_use = "iterator is lazy"]
747    pub fn read_games<'a, V: Visitor>(&'a mut self, visitor: &'a mut V) -> ReadGames<'a, R, V> {
748        ReadGames {
749            reader: self,
750            visitor,
751        }
752    }
753
754    /// Visit all games, ignoring the visitor outputs.
755    ///
756    /// # Errors
757    ///
758    /// * I/O error from the underlying reader.
759    /// * Irrecoverable parser errors.
760    pub fn visit_all_games<V: Visitor>(&mut self, visitor: &mut V) -> io::Result<()> {
761        while self.read_game(visitor)?.is_some() {}
762        Ok(())
763    }
764
765    /// The currently buffered bytes.
766    pub fn buffer(&self) -> &[u8] {
767        self.buffer.data()
768    }
769
770    /// Discard the remaining bytes in the buffer ([`Reader::buffer()`]) and
771    /// get the underlying reader.
772    pub fn into_inner(self) -> R {
773        self.reader
774    }
775}
776
777/// Iterator returned by [`Reader::read_games()`].
778#[derive(Debug)]
779#[must_use]
780pub struct ReadGames<'a, R, V> {
781    reader: &'a mut Reader<R>,
782    visitor: &'a mut V,
783}
784
785impl<R: Read, V: Visitor> Iterator for ReadGames<'_, R, V> {
786    type Item = Result<V::Output, io::Error>;
787
788    fn next(&mut self) -> Option<Self::Item> {
789        match self.reader.read_game(self.visitor) {
790            Ok(Some(result)) => Some(Ok(result)),
791            Ok(None) => None,
792            Err(err) => Some(Err(err)),
793        }
794    }
795}
796
797#[inline]
798fn is_token_end(byte: u8) -> bool {
799    matches!(
800        byte,
801        b' ' | b'\t'
802            | b'\n'
803            | b'\r'
804            | b'{'
805            | b'}'
806            | b'('
807            | b')'
808            | b'!'
809            | b'?'
810            | b'$'
811            | b';'
812            | b'.'
813            | b'*'
814    )
815}
816
817impl<R: Seek> Seek for Reader<R> {
818    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
819        let result = if let SeekFrom::Current(offset) = pos {
820            let buffered = self.buffer.len() as i64;
821            if let Some(offset) = offset.checked_sub(buffered) {
822                self.reader.seek(SeekFrom::Current(offset))?
823            } else {
824                self.reader.seek_relative(-buffered)?;
825                self.buffer.clear();
826                self.reader.seek(SeekFrom::Current(offset))?
827            }
828        } else {
829            self.reader.seek(pos)?
830        };
831        self.buffer.clear();
832        Ok(result)
833    }
834
835    fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
836        let buffered = self.buffer.len() as i64;
837        if let Some(offset) = offset.checked_sub(buffered) {
838            self.reader.seek_relative(offset)?;
839            self.buffer.clear();
840            Ok(())
841        } else {
842            self.reader.seek_relative(-buffered)?;
843            self.buffer.clear();
844            self.reader.seek_relative(offset)
845        }
846    }
847
848    fn stream_position(&mut self) -> io::Result<u64> {
849        let buffered = self.buffer.len() as u64;
850        self.reader.stream_position().map(|pos| {
851            pos.checked_sub(buffered)
852                .expect("consistent stream position")
853        })
854    }
855}
856
857// Helper function for handling comments that don't fit in the movetext token
858// buffer. When we encounter these, we'll call `Vistor::comment` several times
859// with roughly buffer-sized chunks, but we want to avoid putting chunk
860// boundaries in the middle of a multibyte UTF-8 sequence. We don't assume or
861// require that the PGN is UTF-8 encoded at all. To prevent an infinite loop,
862// we avoid ever turning a non-empty slice into an empty one.
863fn trim_partial_utf8(buf: &[u8]) -> &[u8] {
864    let scan_start = buf.len().saturating_sub(4);
865    let Some(last_start_byte) = (scan_start..buf.len())
866        .rev()
867        .find(|b| !is_utf8_continuation(buf[*b]))
868    else {
869        return buf;
870    };
871
872    if last_start_byte == 0 || buf.len() - last_start_byte >= utf8_start_width(buf[last_start_byte])
873    {
874        buf
875    } else {
876        &buf[..last_start_byte]
877    }
878}
879
880#[inline]
881fn is_utf8_continuation(b: u8) -> bool {
882    (b & 0b1100_0000) == 0b1000_0000
883}
884
885#[inline]
886fn utf8_start_width(b: u8) -> usize {
887    match b {
888        0x00..=0x7F => 1, // ASCII
889        0xC2..=0xDF => 2, // 2-byte lead (exclude C0/C1 overlong)
890        0xE0..=0xEF => 3, // 3-byte lead
891        0xF0..=0xF4 => 4, // 4-byte lead (UTF-8 valid range)
892        _ => 0,           // continuation byte or invalid lead
893    }
894}
895
896#[cfg(test)]
897mod tests {
898    use super::*;
899    use crate::shakmaty::{Role, Square};
900
901    struct _AssertObjectSafe<R>(Box<Reader<R>>);
902
903    #[derive(Debug, Eq, PartialEq)]
904    enum Token {
905        BeginTags,
906        Tag(Vec<u8>, Vec<u8>),
907        BeginMovetext,
908        San(SanPlus),
909        Nag(Nag),
910        Comment(Vec<u8>),
911        BeginVariation,
912        EndVariation,
913        Outcome(Outcome),
914        EndGame,
915    }
916
917    struct CollectTokens;
918
919    impl Visitor for CollectTokens {
920        type Tags = Vec<Token>;
921        type Movetext = Vec<Token>;
922        type Output = Vec<Token>;
923
924        fn begin_tags(&mut self) -> ControlFlow<Self::Output, Self::Tags> {
925            ControlFlow::Continue(vec![Token::BeginTags])
926        }
927
928        fn tag(
929            &mut self,
930            tags: &mut Self::Tags,
931            name: &[u8],
932            value: RawTag<'_>,
933        ) -> ControlFlow<Self::Output> {
934            tags.push(Token::Tag(name.to_owned(), value.decode().into_owned()));
935            ControlFlow::Continue(())
936        }
937
938        fn begin_movetext(
939            &mut self,
940            mut tags: Self::Tags,
941        ) -> ControlFlow<Self::Output, Self::Movetext> {
942            tags.push(Token::BeginMovetext);
943            ControlFlow::Continue(tags)
944        }
945
946        fn san(
947            &mut self,
948            movetext: &mut Self::Movetext,
949            san_plus: SanPlus,
950        ) -> ControlFlow<Self::Output> {
951            movetext.push(Token::San(san_plus));
952            ControlFlow::Continue(())
953        }
954
955        fn nag(&mut self, movetext: &mut Self::Movetext, nag: Nag) -> ControlFlow<Self::Output> {
956            movetext.push(Token::Nag(nag));
957            ControlFlow::Continue(())
958        }
959
960        fn comment(
961            &mut self,
962            movetext: &mut Self::Movetext,
963            comment: RawComment<'_>,
964        ) -> ControlFlow<Self::Output> {
965            movetext.push(Token::Comment(comment.as_bytes().to_owned()));
966            ControlFlow::Continue(())
967        }
968
969        fn begin_variation(
970            &mut self,
971            movetext: &mut Self::Movetext,
972        ) -> ControlFlow<Self::Output, Skip> {
973            movetext.push(Token::BeginVariation);
974            ControlFlow::Continue(Skip(false))
975        }
976
977        fn end_variation(&mut self, movetext: &mut Self::Movetext) -> ControlFlow<Self::Output> {
978            movetext.push(Token::EndVariation);
979            ControlFlow::Continue(())
980        }
981
982        fn outcome(
983            &mut self,
984            movetext: &mut Self::Movetext,
985            outcome: Outcome,
986        ) -> ControlFlow<Self::Output> {
987            movetext.push(Token::Outcome(outcome));
988            ControlFlow::Continue(())
989        }
990
991        fn end_game(&mut self, mut movetext: Self::Movetext) -> Self::Output {
992            movetext.push(Token::EndGame);
993            movetext
994        }
995    }
996
997    #[test]
998    fn test_empty() -> io::Result<()> {
999        let pgn = b"";
1000
1001        assert!(
1002            Reader::new(io::Cursor::new(pgn))
1003                .read_game(&mut CollectTokens)?
1004                .is_none()
1005        );
1006
1007        assert!(!Reader::new(io::Cursor::new(pgn)).skip_game()?);
1008
1009        Ok(())
1010    }
1011
1012    #[test]
1013    fn test_whitespace() -> io::Result<()> {
1014        let pgn = b"\xef\xbb\xbf  \n\r\t \n\n ";
1015
1016        assert!(
1017            Reader::new(io::Cursor::new(pgn))
1018                .read_game(&mut CollectTokens)?
1019                .is_none()
1020        );
1021
1022        assert!(!Reader::new(io::Cursor::new(pgn)).skip_game()?);
1023
1024        Ok(())
1025    }
1026
1027    #[test]
1028    fn test_trailing_space() -> io::Result<()> {
1029        let pgn = b"1. e4 1-0\n\n\n\n\n  \n";
1030
1031        let mut reader = Reader::new(io::Cursor::new(pgn));
1032        assert!(reader.read_game(&mut CollectTokens)?.is_some());
1033        assert!(reader.read_game(&mut CollectTokens)?.is_none());
1034
1035        let mut reader = Reader::new(io::Cursor::new(pgn));
1036        assert!(reader.skip_game()?);
1037        assert!(!reader.skip_game()?);
1038
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn test_rest_of_line_comment() -> io::Result<()> {
1044        let pgn = b";\n%\n\nX";
1045
1046        let mut reader = Reader::new(io::Cursor::new(pgn));
1047        let mut skip_reader = Reader::new(io::Cursor::new(pgn));
1048
1049        assert!(reader.read_game(&mut CollectTokens)?.is_some());
1050        assert!(skip_reader.skip_game()?);
1051        assert_eq!(reader.stream_position()?, skip_reader.stream_position()?);
1052
1053        assert!(reader.read_game(&mut CollectTokens)?.is_some());
1054        assert!(skip_reader.skip_game()?);
1055        assert_eq!(reader.stream_position()?, skip_reader.stream_position()?);
1056
1057        assert!(reader.read_game(&mut CollectTokens)?.is_none());
1058        assert!(!skip_reader.skip_game()?);
1059
1060        Ok(())
1061    }
1062
1063    #[test]
1064    fn test_long_comment() -> io::Result<()> {
1065        // This repeats a four-byte sequence 257 times, and the buffer is 255+2
1066        // bytes. Since 4 is relatively prime to 257, this ensures that we test
1067        // splitting the sequence no matter where the beginning of the comment
1068        // occurs.
1069        let crabs: String = std::iter::repeat_n('🦀', 257).collect();
1070        let pgn = format!("1. e4 {{{}}}", &crabs).into_bytes();
1071        let mut reader = Reader::new(io::Cursor::new(pgn.as_slice()));
1072        let tokens = reader.read_game(&mut CollectTokens)?.expect("found game");
1073        let mut rebuilt = String::new();
1074        for token in tokens {
1075            if let Token::Comment(bytes) = token {
1076                let s = core::str::from_utf8(bytes.as_slice())
1077                    .expect("reader preserves UTF-8 validity");
1078                rebuilt.push_str(s);
1079            }
1080        }
1081        assert_eq!(rebuilt, crabs);
1082        Ok(())
1083    }
1084
1085    #[test]
1086    fn test_tag_movetext_transition() -> io::Result<()> {
1087        let pgn = br#"
1088            [One "1"] [Two "2"]
1089%[Three "%"]
1090[Three "3"]
1091
1092
1093            [Four "4"]*
1094
1095            [Five "5"]"#;
1096
1097        let mut reader = Reader::new(io::Cursor::new(pgn));
1098
1099        let game = reader.read_game(&mut CollectTokens)?.expect("found game");
1100        assert_eq!(
1101            game,
1102            &[
1103                Token::BeginTags,
1104                Token::Tag(b"One".into(), b"1".into()),
1105                Token::Tag(b"Two".into(), b"2".into()),
1106                Token::Tag(b"Three".into(), b"3".into()),
1107                Token::Tag(b"Four".into(), b"4".into()),
1108                Token::BeginMovetext,
1109                Token::Outcome(Outcome::Unknown),
1110                Token::EndGame,
1111            ]
1112        );
1113
1114        let game = reader.read_game(&mut CollectTokens)?.expect("found game");
1115        assert_eq!(
1116            game,
1117            &[
1118                Token::BeginTags,
1119                Token::Tag(b"Five".into(), b"5".into()),
1120                Token::BeginMovetext,
1121                Token::EndGame,
1122            ]
1123        );
1124
1125        Ok(())
1126    }
1127
1128    #[test]
1129    fn test_movetext() -> io::Result<()> {
1130        let pgn =
1131            br#"[White "hello\" "][Black "world"]1.f3! e5$71 2.g4 ?? (-- {}) O-O O-O-O#!?0-1"#;
1132
1133        let game = Reader::new(io::Cursor::new(pgn))
1134            .read_game(&mut CollectTokens)?
1135            .expect("found game");
1136
1137        assert_eq!(
1138            game,
1139            &[
1140                Token::BeginTags,
1141                Token::Tag(b"White".into(), b"hello\" ".into()),
1142                Token::Tag(b"Black".into(), b"world".into()),
1143                Token::BeginMovetext,
1144                Token::San(SanPlus {
1145                    san: San::Normal {
1146                        role: Role::Pawn,
1147                        file: None,
1148                        rank: None,
1149                        capture: false,
1150                        to: Square::F3,
1151                        promotion: None
1152                    },
1153                    suffix: None
1154                }),
1155                Token::Nag(Nag::GOOD_MOVE),
1156                Token::San(SanPlus {
1157                    san: San::Normal {
1158                        role: Role::Pawn,
1159                        file: None,
1160                        rank: None,
1161                        capture: false,
1162                        to: Square::E5,
1163                        promotion: None
1164                    },
1165                    suffix: None
1166                }),
1167                Token::Nag(Nag(71)),
1168                Token::San(SanPlus {
1169                    san: San::Normal {
1170                        role: Role::Pawn,
1171                        file: None,
1172                        rank: None,
1173                        capture: false,
1174                        to: Square::G4,
1175                        promotion: None
1176                    },
1177                    suffix: None
1178                }),
1179                Token::Nag(Nag::BLUNDER),
1180                Token::BeginVariation,
1181                Token::San(SanPlus {
1182                    san: San::Null,
1183                    suffix: None
1184                }),
1185                Token::Comment(vec![]),
1186                Token::EndVariation,
1187                Token::San(SanPlus {
1188                    san: San::Castle(CastlingSide::KingSide),
1189                    suffix: None,
1190                }),
1191                Token::San(SanPlus {
1192                    san: San::Castle(CastlingSide::QueenSide),
1193                    suffix: Some(Suffix::Checkmate),
1194                }),
1195                Token::Nag(Nag::SPECULATIVE_MOVE),
1196                Token::Outcome(Outcome::Known(KnownOutcome::Decisive {
1197                    winner: Color::Black
1198                })),
1199                Token::EndGame,
1200            ]
1201        );
1202
1203        Ok(())
1204    }
1205
1206    #[test]
1207    fn test_outcomes() -> io::Result<()> {
1208        let pgn = "1-0 0-1 1/2-1/2 1–0 0–1 ½–½".as_bytes();
1209
1210        let game = Reader::new(io::Cursor::new(pgn))
1211            .read_game(&mut CollectTokens)?
1212            .expect("found game");
1213
1214        assert_eq!(
1215            game,
1216            &[
1217                Token::BeginTags,
1218                Token::BeginMovetext,
1219                Token::Outcome(Outcome::Known(KnownOutcome::Decisive {
1220                    winner: Color::White
1221                })),
1222                Token::Outcome(Outcome::Known(KnownOutcome::Decisive {
1223                    winner: Color::Black
1224                })),
1225                Token::Outcome(Outcome::Known(KnownOutcome::Draw)),
1226                Token::Outcome(Outcome::Known(KnownOutcome::Decisive {
1227                    winner: Color::White
1228                })),
1229                Token::Outcome(Outcome::Known(KnownOutcome::Decisive {
1230                    winner: Color::Black
1231                })),
1232                Token::Outcome(Outcome::Known(KnownOutcome::Draw)),
1233                Token::EndGame,
1234            ]
1235        );
1236
1237        Ok(())
1238    }
1239
1240    #[test]
1241    fn test_null_moves() -> io::Result<()> {
1242        let pgn = "-- Z0 --+ Z0+ --# Z0#".as_bytes();
1243
1244        let game = Reader::new(io::Cursor::new(pgn))
1245            .read_game(&mut CollectTokens)?
1246            .expect("found game");
1247
1248        assert_eq!(
1249            game,
1250            &[
1251                Token::BeginTags,
1252                Token::BeginMovetext,
1253                Token::San(SanPlus {
1254                    san: San::Null,
1255                    suffix: None
1256                }),
1257                Token::San(SanPlus {
1258                    san: San::Null,
1259                    suffix: None
1260                }),
1261                Token::San(SanPlus {
1262                    san: San::Null,
1263                    suffix: Some(Suffix::Check),
1264                }),
1265                Token::San(SanPlus {
1266                    san: San::Null,
1267                    suffix: Some(Suffix::Check),
1268                }),
1269                Token::San(SanPlus {
1270                    san: San::Null,
1271                    suffix: Some(Suffix::Checkmate),
1272                }),
1273                Token::San(SanPlus {
1274                    san: San::Null,
1275                    suffix: Some(Suffix::Checkmate),
1276                }),
1277                Token::EndGame,
1278            ]
1279        );
1280
1281        Ok(())
1282    }
1283}