pgn_reader/
tag.rs

1use std::{
2    borrow::Cow,
3    fmt,
4    str::{self, Utf8Error},
5};
6
7/// A tag value.
8///
9/// Provides helper methods for decoding [backslash
10/// escaped](http://www.saremba.de/chessgml/standards/pgn/pgn-complete.htm#c7)
11/// values.
12///
13/// > A quote inside a string is represented by the backslash immediately
14/// > followed by a quote. A backslash inside a string is represented by
15/// > two adjacent backslashes.
16#[derive(Copy, Clone, Eq, PartialEq, Hash)]
17pub struct RawTag<'a>(pub &'a [u8]);
18
19impl<'a> RawTag<'a> {
20    /// Returns the raw byte representation of the tag value.
21    pub fn as_bytes(&self) -> &[u8] {
22        self.0
23    }
24
25    /// Decodes escaped quotes and backslashes into bytes. Allocates only when
26    /// the value actually contains escape sequences.
27    pub fn decode(&self) -> Cow<'a, [u8]> {
28        let mut head = 0;
29        let mut decoded: Vec<u8> = Vec::new();
30        for escape in memchr::memchr_iter(b'\\', self.0) {
31            match self.0.get(escape + 1).cloned() {
32                Some(ch) if ch == b'\\' || ch == b'"' => {
33                    decoded.extend_from_slice(&self.0[head..escape]);
34                    head = escape + 1;
35                }
36                _ => (),
37            }
38        }
39        if head == 0 {
40            Cow::Borrowed(self.0)
41        } else {
42            decoded.extend_from_slice(&self.0[head..]);
43            Cow::Owned(decoded)
44        }
45    }
46
47    /// Tries to decode the tag as UTF-8. This is guaranteed to succeed on
48    /// valid PGNs.
49    ///
50    /// # Errors
51    ///
52    /// Errors if the tag contains an invalid UTF-8 byte sequence.
53    pub fn decode_utf8(&self) -> Result<Cow<'a, str>, Utf8Error> {
54        Ok(match self.decode() {
55            Cow::Borrowed(borrowed) => Cow::Borrowed(str::from_utf8(borrowed)?),
56            Cow::Owned(owned) => Cow::Owned(String::from_utf8(owned).map_err(|e| e.utf8_error())?),
57        })
58    }
59
60    /// Decodes the tag as UTF-8, replacing any invalid byte sequences with
61    /// the placeholder � U+FFFD.
62    pub fn decode_utf8_lossy(&self) -> Cow<'a, str> {
63        match self.decode() {
64            Cow::Borrowed(borrowed) => String::from_utf8_lossy(borrowed),
65            Cow::Owned(owned) => Cow::Owned(String::from_utf8_lossy(&owned).into_owned()),
66        }
67    }
68}
69
70impl<'a> fmt::Debug for RawTag<'a> {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        write!(f, "{:?}", self.decode_utf8_lossy())
73    }
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn test_raw_tag() {
82        let tag = RawTag(b"Hello world");
83        assert_eq!(tag.decode().as_ref(), b"Hello world");
84
85        let tag = RawTag(b"Hello \\world\\");
86        assert_eq!(tag.decode().as_ref(), b"Hello \\world\\");
87
88        let tag = RawTag(b"\\Hello \\\"world\\\\");
89        assert_eq!(tag.decode().as_ref(), b"\\Hello \"world\\");
90    }
91}