Skip to main content

sciforge_parser/toml/
parser.rs

1use super::error::{TomlError, TomlErrorKind};
2use super::lexer::{Cursor, Token};
3use super::value::TomlValue;
4
5pub const DEFAULT_MAX_TOML_DEPTH: usize = 64;
6
7#[derive(Clone, Copy, Debug, PartialEq, Eq)]
8pub struct TomlLimits {
9    pub max_depth: usize,
10    pub max_string_len: usize,
11    pub max_array_len: usize,
12    pub max_table_len: usize,
13    pub max_node_count: usize,
14}
15
16pub const DEFAULT_TOML_LIMITS: TomlLimits = TomlLimits {
17    max_depth: DEFAULT_MAX_TOML_DEPTH,
18    max_string_len: 64 * 1024,
19    max_array_len: 16 * 1024,
20    max_table_len: 16 * 1024,
21    max_node_count: 128 * 1024,
22};
23
24pub struct TomlParser<'a> {
25    cursor: Cursor<'a>,
26    limits: TomlLimits,
27    nodes_seen: usize,
28}
29
30impl<'a> TomlParser<'a> {
31    pub const fn new(bytes: &'a [u8]) -> Self {
32        Self {
33            cursor: Cursor::new(bytes),
34            limits: DEFAULT_TOML_LIMITS,
35            nodes_seen: 0,
36        }
37    }
38
39    pub const fn with_max_depth(mut self, max_depth: usize) -> Self {
40        self.limits.max_depth = max_depth;
41        self
42    }
43
44    pub const fn with_limits(mut self, limits: TomlLimits) -> Self {
45        self.limits = limits;
46        self
47    }
48
49    pub fn parse(mut self) -> Result<TomlValue<'a>, TomlError> {
50        self.parse_document()
51    }
52
53    pub fn validate(mut self) -> Result<(), TomlError> {
54        self.parse_document()?;
55        Ok(())
56    }
57
58    fn parse_document(&mut self) -> Result<TomlValue<'a>, TomlError> {
59        self.skip_newlines()?;
60        self.parse_key_values(0)?;
61
62        loop {
63            self.skip_newlines()?;
64            let Some(tok) = self.cursor.next_token()? else {
65                break;
66            };
67            match tok {
68                Token::OpenDoubleBracket => {
69                    self.parse_array_of_tables_header()?;
70                    self.skip_newlines()?;
71                    self.parse_key_values(1)?;
72                }
73                Token::OpenBracket => {
74                    self.parse_table_header()?;
75                    self.skip_newlines()?;
76                    self.parse_key_values(1)?;
77                }
78                Token::Newline => continue,
79                _ => {
80                    return Err(TomlError::new(
81                        TomlErrorKind::UnexpectedToken,
82                        self.cursor.position(),
83                    ));
84                }
85            }
86        }
87
88        Ok(TomlValue::Table)
89    }
90
91    fn skip_newlines(&mut self) -> Result<(), TomlError> {
92        loop {
93            let pos = self.cursor.position();
94            match self.cursor.next_token()? {
95                Some(Token::Newline) => continue,
96                None => break,
97                Some(_) => {
98                    self.cursor.set_position(pos);
99                    break;
100                }
101            }
102        }
103        Ok(())
104    }
105
106    fn parse_table_header(&mut self) -> Result<(), TomlError> {
107        self.bump_node()?;
108        self.parse_key_path()?;
109        self.expect_close_bracket()?;
110        Ok(())
111    }
112
113    fn parse_array_of_tables_header(&mut self) -> Result<(), TomlError> {
114        self.bump_node()?;
115        self.parse_key_path()?;
116        self.expect_close_double_bracket()?;
117        Ok(())
118    }
119
120    fn parse_key_path(&mut self) -> Result<(), TomlError> {
121        self.parse_simple_key()?;
122        loop {
123            let pos = self.cursor.position();
124            match self.cursor.next_token()? {
125                Some(Token::Dot) => {
126                    self.parse_simple_key()?;
127                }
128                Some(_) => {
129                    self.cursor.set_position(pos);
130                    break;
131                }
132                None => break,
133            }
134        }
135        Ok(())
136    }
137
138    fn parse_simple_key(&mut self) -> Result<(), TomlError> {
139        let tok = self
140            .cursor
141            .next_token()?
142            .ok_or(TomlError::new(TomlErrorKind::Eof, self.cursor.position()))?;
143        match tok {
144            Token::BareKey(k) => {
145                if k.len() > self.limits.max_string_len {
146                    return Err(TomlError::new(
147                        TomlErrorKind::MaxStringLengthExceeded,
148                        self.cursor.position(),
149                    ));
150                }
151            }
152            Token::BasicString(s) | Token::LiteralString(s) => {
153                if s.len() > self.limits.max_string_len {
154                    return Err(TomlError::new(
155                        TomlErrorKind::MaxStringLengthExceeded,
156                        self.cursor.position(),
157                    ));
158                }
159            }
160            _ => {
161                return Err(TomlError::new(
162                    TomlErrorKind::InvalidKey,
163                    self.cursor.position(),
164                ));
165            }
166        }
167        Ok(())
168    }
169
170    fn parse_key_values(&mut self, depth: usize) -> Result<(), TomlError> {
171        if depth > self.limits.max_depth {
172            return Err(TomlError::new(
173                TomlErrorKind::MaxDepthExceeded,
174                self.cursor.position(),
175            ));
176        }
177
178        let mut count = 0usize;
179        loop {
180            let pos = self.cursor.position();
181            let Some(tok) = self.cursor.next_token()? else {
182                break;
183            };
184            match tok {
185                Token::BareKey(_) | Token::BasicString(_) | Token::LiteralString(_) => {
186                    self.cursor.set_position(pos);
187                    self.parse_key_path()?;
188                    self.expect_equals()?;
189                    self.parse_value(depth + 1)?;
190                    self.bump_node()?;
191                    count = count.saturating_add(1);
192                    if count > self.limits.max_table_len {
193                        return Err(TomlError::new(
194                            TomlErrorKind::MaxTableLengthExceeded,
195                            self.cursor.position(),
196                        ));
197                    }
198                    self.skip_newlines()?;
199                }
200                Token::Newline => continue,
201                _ => {
202                    self.cursor.set_position(pos);
203                    break;
204                }
205            }
206        }
207        Ok(())
208    }
209
210    fn parse_value(&mut self, depth: usize) -> Result<TomlValue<'a>, TomlError> {
211        if depth > self.limits.max_depth {
212            return Err(TomlError::new(
213                TomlErrorKind::MaxDepthExceeded,
214                self.cursor.position(),
215            ));
216        }
217
218        let tok = self
219            .cursor
220            .next_token()?
221            .ok_or(TomlError::new(TomlErrorKind::Eof, self.cursor.position()))?;
222
223        match tok {
224            Token::BasicString(s) => {
225                if s.len() > self.limits.max_string_len {
226                    return Err(TomlError::new(
227                        TomlErrorKind::MaxStringLengthExceeded,
228                        self.cursor.position(),
229                    ));
230                }
231                Ok(TomlValue::String(s))
232            }
233            Token::LiteralString(s) => {
234                if s.len() > self.limits.max_string_len {
235                    return Err(TomlError::new(
236                        TomlErrorKind::MaxStringLengthExceeded,
237                        self.cursor.position(),
238                    ));
239                }
240                Ok(TomlValue::String(s))
241            }
242            Token::Integer(v) => Ok(TomlValue::Integer(v)),
243            Token::Float(v) => Ok(TomlValue::Float(v)),
244            Token::Bool(v) => Ok(TomlValue::Bool(v)),
245            Token::OpenBracket => self.parse_inline_array(depth),
246            _ => Err(TomlError::new(
247                TomlErrorKind::UnexpectedToken,
248                self.cursor.position(),
249            )),
250        }
251    }
252
253    fn parse_inline_array(&mut self, depth: usize) -> Result<TomlValue<'a>, TomlError> {
254        let mut count = 0usize;
255        loop {
256            let pos = self.cursor.position();
257            let Some(tok) = self.cursor.next_token()? else {
258                return Err(TomlError::new(TomlErrorKind::Eof, pos));
259            };
260            match tok {
261                Token::CloseBracket => return Ok(TomlValue::Array),
262                Token::Comma | Token::Newline => continue,
263                _ => {
264                    self.cursor.set_position(pos);
265                    self.parse_value(depth + 1)?;
266                    self.bump_node()?;
267                    count = count.saturating_add(1);
268                    if count > self.limits.max_array_len {
269                        return Err(TomlError::new(
270                            TomlErrorKind::MaxArrayLengthExceeded,
271                            self.cursor.position(),
272                        ));
273                    }
274                }
275            }
276        }
277    }
278
279    fn bump_node(&mut self) -> Result<(), TomlError> {
280        self.nodes_seen = self.nodes_seen.saturating_add(1);
281        if self.nodes_seen > self.limits.max_node_count {
282            return Err(TomlError::new(
283                TomlErrorKind::MaxNodeCountExceeded,
284                self.cursor.position(),
285            ));
286        }
287        Ok(())
288    }
289
290    fn expect_equals(&mut self) -> Result<(), TomlError> {
291        let tok = self
292            .cursor
293            .next_token()?
294            .ok_or(TomlError::new(TomlErrorKind::Eof, self.cursor.position()))?;
295        if !matches!(tok, Token::Equals) {
296            return Err(TomlError::new(
297                TomlErrorKind::UnexpectedToken,
298                self.cursor.position(),
299            ));
300        }
301        Ok(())
302    }
303
304    fn expect_close_bracket(&mut self) -> Result<(), TomlError> {
305        let tok = self
306            .cursor
307            .next_token()?
308            .ok_or(TomlError::new(TomlErrorKind::Eof, self.cursor.position()))?;
309        if !matches!(tok, Token::CloseBracket) {
310            return Err(TomlError::new(
311                TomlErrorKind::UnexpectedToken,
312                self.cursor.position(),
313            ));
314        }
315        Ok(())
316    }
317
318    fn expect_close_double_bracket(&mut self) -> Result<(), TomlError> {
319        let tok = self
320            .cursor
321            .next_token()?
322            .ok_or(TomlError::new(TomlErrorKind::Eof, self.cursor.position()))?;
323        if !matches!(tok, Token::CloseDoubleBracket) {
324            return Err(TomlError::new(
325                TomlErrorKind::UnexpectedToken,
326                self.cursor.position(),
327            ));
328        }
329        Ok(())
330    }
331}
332
333pub fn parse_toml(bytes: &[u8]) -> Result<TomlValue<'_>, TomlError> {
334    TomlParser::new(bytes).parse()
335}
336
337pub fn parse_toml_with_max_depth(
338    bytes: &[u8],
339    max_depth: usize,
340) -> Result<TomlValue<'_>, TomlError> {
341    TomlParser::new(bytes).with_max_depth(max_depth).parse()
342}
343
344pub fn parse_toml_with_limits(
345    bytes: &[u8],
346    limits: TomlLimits,
347) -> Result<TomlValue<'_>, TomlError> {
348    TomlParser::new(bytes).with_limits(limits).parse()
349}
350
351pub fn validate_toml(bytes: &[u8]) -> Result<(), TomlError> {
352    TomlParser::new(bytes).validate()
353}