gdb_protocol/
parser.rs

1use super::{
2    packet::{Kind, UncheckedPacket},
3    Error,
4};
5
6use std::{cmp, iter, mem};
7
8enum State {
9    Type,         // % or $
10    Data,         // packet-data#
11    Escape,       // "}x" = 'x' | 0x20
12    Repeat,       // "x*y" = "x" * ('y' - 29)
13    Checksum(u8), // checksum
14}
15
16pub const CHECKSUM_LEN: u8 = 2;
17
18pub struct Parser {
19    state: State,
20    kind: Kind,
21    data: Vec<u8>,
22    checksum: [u8; CHECKSUM_LEN as usize],
23}
24impl Default for Parser {
25    fn default() -> Self {
26        Self {
27            state: State::Type,
28
29            // placeholders:
30            kind: Kind::Notification,
31            data: Vec::new(),
32            checksum: [0; CHECKSUM_LEN as usize],
33        }
34    }
35}
36impl Parser {
37    /// Parse as much of `input` as possible into a packet. Returns
38    /// the number of bytes read (the rest will need to be re-fed),
39    /// and maybe a packet which will need handling.
40    ///
41    /// ```rust
42    /// # use gdb_protocol::{Error, packet::{Kind, UncheckedPacket}, parser::Parser};
43    /// # let mut parser = Parser::default();
44    /// assert_eq!(
45    ///     parser.feed(b"$hello#14").unwrap(),
46    ///     (9, Some(UncheckedPacket {
47    ///         kind: Kind::Packet,
48    ///         data: b"hello".to_vec(),
49    ///         checksum: *b"14",
50    ///     }))
51    /// );
52    /// ```
53    ///
54    /// Apart from splitting the input up and expanding the data,
55    /// nothing else is done. No checksums are compared, no data is
56    /// handled. This is just the most basic building block used to
57    /// supply data that can be further validated and interpreted.
58    ///
59    /// ```rust
60    /// # use gdb_protocol::{Error, packet::{Kind, UncheckedPacket}, parser::Parser};
61    /// # let mut parser = Parser::default();
62    /// assert_eq!(
63    ///     parser.feed(b"$in:valid}]}}Hello* }]*!CHECKS#UM").unwrap(),
64    ///     (33, Some(UncheckedPacket {
65    ///         kind: Kind::Packet,
66    ///         data: b"in:valid}]Helloooo}}}}}CHECKS".to_vec(),
67    ///         checksum: *b"UM",
68    ///     }))
69    /// );
70    /// ```
71    ///
72    /// Note that although the GDB protocol mostly only uses 7 bits,
73    /// this will *not* work without the 8th bit clear. This is to
74    /// make the overhead of updating each element in the list
75    /// optional. Although that's simple: *Every* element's 8th bit
76    /// can be cleared so just do that before passing it to the
77    /// parser.
78    ///
79    /// ```rust
80    /// # use gdb_protocol::{Error, packet::{Kind, UncheckedPacket}, parser::Parser};
81    /// # let mut parser = Parser::default();
82    /// assert_eq!(
83    ///     parser.feed(&[b'%', 1, 2, 99, 255, 128, 0, 200, b'#', 0, 0]).unwrap(),
84    ///     (11, Some(UncheckedPacket {
85    ///         kind: Kind::Notification,
86    ///         data: vec![1, 2, 99, 255, 128, 0, 200],
87    ///         checksum: [0, 0],
88    ///     }))
89    /// );
90    /// ```
91    ///
92    /// This is a state machine: You may input half a packet now and
93    /// half in a later invocation.
94    ///
95    /// ```rust
96    /// # use gdb_protocol::{Error, parser::Parser};
97    /// #
98    /// # let full_input = b"$hello#14";
99    /// # #[allow(non_snake_case)]
100    /// # fn getRandomNumber() -> usize {
101    /// #     return 4; // chosen by a fair dice roll.
102    /// #               // guaranteed to be random.
103    /// # }
104    /// # let random_index = getRandomNumber();
105    /// let mut parser1 = Parser::default();
106    /// let (full_len, full_packet) = parser1.feed(full_input)?;
107    ///
108    /// let mut parser2 = Parser::default();
109    /// let (start_input, end_input) = full_input.split_at(random_index);
110    /// let (start_len, start_packet) = parser2.feed(start_input)?;
111    /// let (end_len, end_packet) = parser2.feed(end_input)?;
112    ///
113    /// assert_eq!(start_len + end_len, full_len, "The total consumed lengths must be equal");
114    /// assert_eq!(start_packet.or(end_packet), full_packet, "The end packets must be equal");
115    /// # Ok::<(), Error>(())
116    /// ```
117    pub fn feed(&mut self, input: &[u8]) -> Result<(usize, Option<UncheckedPacket>), Error> {
118        let mut read = 0;
119        loop {
120            let (partial, packet) = self.feed_one(&input[read..])?;
121            read += partial;
122            debug_assert!(read <= input.len());
123
124            if read == input.len() || packet.is_some() {
125                return Ok((read, packet));
126            }
127        }
128    }
129    fn feed_one(&mut self, input: &[u8]) -> Result<(usize, Option<UncheckedPacket>), Error> {
130        let first = match input.first() {
131            Some(b) => *b,
132            None => return Ok((0, None)),
133        };
134
135        match self.state {
136            State::Type => {
137                let start = memchr::memchr2(b'%', b'$', input);
138
139                match start.map(|pos| input[pos]) {
140                    Some(b'%') => self.kind = Kind::Notification,
141                    Some(b'$') => self.kind = Kind::Packet,
142                    Some(_) => unreachable!("did memchr just lie to me?!"),
143                    None => (),
144                }
145
146                if start.is_some() {
147                    self.state = State::Data;
148                }
149
150                Ok((start.map(|n| n + 1).unwrap_or_else(|| input.len()), None))
151            }
152            State::Data => {
153                let end = memchr::memchr3(b'#', b'}', b'*', input);
154
155                match end.map(|pos| input[pos]) {
156                    Some(b'#') => self.state = State::Checksum(0),
157                    Some(b'}') => self.state = State::Escape,
158                    Some(b'*') => self.state = State::Repeat,
159                    Some(_) => unreachable!("did memchr just lie to me?!"),
160                    None => (),
161                }
162
163                self.data
164                    .extend_from_slice(&input[..end.unwrap_or_else(|| input.len())]);
165                Ok((end.map(|n| n + 1).unwrap_or_else(|| input.len()), None))
166            }
167            State::Escape => {
168                self.data.push(first ^ 0x20);
169                self.state = State::Data;
170                Ok((1, None))
171            }
172            State::Repeat => {
173                let c = *self
174                    .data
175                    .last()
176                    .expect("State::Repeat must only be used once data has been inserted");
177                let count = first.saturating_sub(29);
178                self.data.extend(iter::repeat(c).take(count.into()));
179                self.state = State::Data;
180                Ok((1, None))
181            }
182            State::Checksum(mut i) => {
183                let read = cmp::min((CHECKSUM_LEN - i) as usize, input.len());
184
185                self.checksum[i as usize..].copy_from_slice(&input[..read]);
186                i += read as u8; // read <= CHECKSUM_LEN
187
188                if i < CHECKSUM_LEN {
189                    self.state = State::Checksum(i);
190                    Ok((read, None))
191                } else {
192                    self.state = State::Type;
193
194                    Ok((
195                        read,
196                        Some(UncheckedPacket {
197                            kind: self.kind,
198                            data: mem::replace(&mut self.data, Vec::new()),
199                            checksum: self.checksum,
200                        }),
201                    ))
202                }
203            }
204        }
205    }
206}