1use super::error::{YamlError, YamlErrorKind};
2use super::lexer::{LineCursor, YamlLine};
3use super::scalar::parse_scalar;
4use super::value::YamlValue;
5
6pub const DEFAULT_MAX_YAML_DEPTH: usize = 64;
7
8#[derive(Clone, Copy, Debug, PartialEq, Eq)]
9pub struct YamlLimits {
10 pub max_depth: usize,
11 pub max_scalar_len: usize,
12 pub max_sequence_len: usize,
13 pub max_mapping_len: usize,
14 pub max_node_count: usize,
15}
16
17pub const DEFAULT_YAML_LIMITS: YamlLimits = YamlLimits {
18 max_depth: DEFAULT_MAX_YAML_DEPTH,
19 max_scalar_len: 64 * 1024,
20 max_sequence_len: 16 * 1024,
21 max_mapping_len: 16 * 1024,
22 max_node_count: 128 * 1024,
23};
24
25pub struct YamlParser<'a> {
26 cursor: LineCursor<'a>,
27 limits: YamlLimits,
28 nodes_seen: usize,
29}
30
31impl<'a> YamlParser<'a> {
32 pub const fn new(bytes: &'a [u8]) -> Self {
33 Self {
34 cursor: LineCursor::new(bytes),
35 limits: DEFAULT_YAML_LIMITS,
36 nodes_seen: 0,
37 }
38 }
39
40 pub const fn with_max_depth(mut self, max_depth: usize) -> Self {
41 self.limits.max_depth = max_depth;
42 self
43 }
44
45 pub const fn with_limits(mut self, limits: YamlLimits) -> Self {
46 self.limits = limits;
47 self
48 }
49
50 pub fn parse(mut self) -> Result<YamlValue<'a>, YamlError> {
51 let first = self
52 .cursor
53 .peek()?
54 .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;
55 self.parse_node(first.indent, 0)
56 }
57
58 pub fn validate(mut self) -> Result<(), YamlError> {
59 let first = self
60 .cursor
61 .peek()?
62 .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;
63 self.parse_node(first.indent, 0)?;
64 if self.cursor.peek()?.is_some() {
65 let line = self.cursor.peek()?.expect("peek checked is_some");
66 return Err(YamlError::new(YamlErrorKind::UnexpectedToken, line.offset));
67 }
68 Ok(())
69 }
70
71 fn parse_node(&mut self, base_indent: usize, depth: usize) -> Result<YamlValue<'a>, YamlError> {
72 if depth > self.limits.max_depth {
73 return Err(YamlError::new(
74 YamlErrorKind::MaxDepthExceeded,
75 self.cursor.position(),
76 ));
77 }
78
79 self.nodes_seen = self.nodes_seen.saturating_add(1);
80 if self.nodes_seen > self.limits.max_node_count {
81 return Err(YamlError::new(
82 YamlErrorKind::MaxNodeCountExceeded,
83 self.cursor.position(),
84 ));
85 }
86
87 let line = self
88 .cursor
89 .peek()?
90 .ok_or(YamlError::new(YamlErrorKind::Eof, self.cursor.position()))?;
91
92 if line.indent < base_indent {
93 return Err(YamlError::new(
94 YamlErrorKind::InvalidIndentation,
95 line.offset,
96 ));
97 }
98 if line.indent > base_indent {
99 return Err(YamlError::new(
100 YamlErrorKind::InvalidIndentation,
101 line.offset,
102 ));
103 }
104
105 if is_sequence_entry(line.content) {
106 self.parse_sequence(base_indent, depth + 1)
107 } else if has_mapping_separator(line.content) {
108 self.parse_mapping(base_indent, depth + 1)
109 } else {
110 self.parse_scalar_line(line)
111 }
112 }
113
114 fn parse_scalar_line(&mut self, line: YamlLine<'a>) -> Result<YamlValue<'a>, YamlError> {
115 if line.content.len() > self.limits.max_scalar_len {
116 return Err(YamlError::new(
117 YamlErrorKind::MaxScalarLengthExceeded,
118 line.offset,
119 ));
120 }
121 self.cursor.next()?;
122 parse_scalar(line.content, line.offset)
123 }
124
125 fn parse_sequence(
126 &mut self,
127 base_indent: usize,
128 depth: usize,
129 ) -> Result<YamlValue<'a>, YamlError> {
130 let mut items = Vec::new();
131
132 loop {
133 let Some(line) = self.cursor.peek()? else {
134 break;
135 };
136 if line.indent < base_indent {
137 break;
138 }
139 if line.indent > base_indent {
140 return Err(YamlError::new(
141 YamlErrorKind::InvalidIndentation,
142 line.offset,
143 ));
144 }
145 if !is_sequence_entry(line.content) {
146 break;
147 }
148
149 let item_text = line.content[1..].trim_start();
150 self.cursor.next()?;
151
152 if !item_text.is_empty() {
153 if item_text.len() > self.limits.max_scalar_len {
154 return Err(YamlError::new(
155 YamlErrorKind::MaxScalarLengthExceeded,
156 line.offset,
157 ));
158 }
159 if has_mapping_separator(item_text) {
160 let (key, value_part) = split_mapping_entry(item_text).unwrap();
161 let mut entries = Vec::new();
162 let val = if value_part.is_empty() {
163 let nested = self.cursor.peek()?;
164 if let Some(nl) = nested
165 && nl.indent > base_indent
166 {
167 self.parse_node(nl.indent, depth)?
168 } else {
169 YamlValue::Null
170 }
171 } else if value_part == "[]" {
172 YamlValue::Sequence(Vec::new())
173 } else {
174 parse_scalar(value_part, line.offset)?
175 };
176 entries.push((key, val));
177 loop {
178 let Some(next) = self.cursor.peek()? else {
179 break;
180 };
181 if next.indent <= base_indent || is_sequence_entry(next.content) {
182 break;
183 }
184 if !has_mapping_separator(next.content) {
185 break;
186 }
187 let (nk, nv) = split_mapping_entry(next.content).unwrap();
188 self.cursor.next()?;
189 let val = if nv.is_empty() {
190 let nested = self.cursor.peek()?;
191 if let Some(nl) = nested
192 && nl.indent > next.indent
193 {
194 self.parse_node(nl.indent, depth)?
195 } else {
196 YamlValue::Null
197 }
198 } else if nv == "[]" {
199 YamlValue::Sequence(Vec::new())
200 } else {
201 parse_scalar(nv, next.offset)?
202 };
203 entries.push((nk, val));
204 }
205 items.push(YamlValue::Mapping(entries));
206 } else {
207 items.push(parse_scalar(item_text, line.offset)?);
208 }
209 } else {
210 let nested = self
211 .cursor
212 .peek()?
213 .ok_or(YamlError::new(YamlErrorKind::Eof, line.offset))?;
214 if nested.indent <= base_indent {
215 return Err(YamlError::new(
216 YamlErrorKind::InvalidIndentation,
217 nested.offset,
218 ));
219 }
220 items.push(self.parse_node(nested.indent, depth)?);
221 }
222
223 if items.len() > self.limits.max_sequence_len {
224 return Err(YamlError::new(
225 YamlErrorKind::MaxSequenceLengthExceeded,
226 line.offset,
227 ));
228 }
229 }
230
231 if items.is_empty() {
232 return Err(YamlError::new(
233 YamlErrorKind::UnexpectedToken,
234 self.cursor.position(),
235 ));
236 }
237
238 Ok(YamlValue::Sequence(items))
239 }
240
241 fn parse_mapping(
242 &mut self,
243 base_indent: usize,
244 depth: usize,
245 ) -> Result<YamlValue<'a>, YamlError> {
246 let mut entries = Vec::new();
247
248 loop {
249 let Some(line) = self.cursor.peek()? else {
250 break;
251 };
252 if line.indent < base_indent {
253 break;
254 }
255 if line.indent > base_indent {
256 return Err(YamlError::new(
257 YamlErrorKind::InvalidIndentation,
258 line.offset,
259 ));
260 }
261
262 let Some((key, value_part)) = split_mapping_entry(line.content) else {
263 break;
264 };
265
266 if key.is_empty() {
267 return Err(YamlError::new(
268 YamlErrorKind::InvalidMappingKey,
269 line.offset,
270 ));
271 }
272 if key.len() > self.limits.max_scalar_len {
273 return Err(YamlError::new(
274 YamlErrorKind::MaxScalarLengthExceeded,
275 line.offset,
276 ));
277 }
278
279 self.cursor.next()?;
280
281 let val = if !value_part.is_empty() {
282 if value_part.len() > self.limits.max_scalar_len {
283 return Err(YamlError::new(
284 YamlErrorKind::MaxScalarLengthExceeded,
285 line.offset,
286 ));
287 }
288 if value_part == "[]" {
289 YamlValue::Sequence(Vec::new())
290 } else {
291 parse_scalar(value_part, line.offset)?
292 }
293 } else {
294 let next = self.cursor.peek()?;
295 if let Some(next_line) = next
296 && next_line.indent > base_indent
297 {
298 self.parse_node(next_line.indent, depth)?
299 } else {
300 YamlValue::Null
301 }
302 };
303
304 entries.push((key, val));
305
306 if entries.len() > self.limits.max_mapping_len {
307 return Err(YamlError::new(
308 YamlErrorKind::MaxMappingLengthExceeded,
309 line.offset,
310 ));
311 }
312 }
313
314 if entries.is_empty() {
315 return Err(YamlError::new(
316 YamlErrorKind::UnexpectedToken,
317 self.cursor.position(),
318 ));
319 }
320
321 Ok(YamlValue::Mapping(entries))
322 }
323}
324
325fn is_sequence_entry(content: &str) -> bool {
326 content.starts_with('-') && (content.len() == 1 || content.as_bytes()[1] == b' ')
327}
328
329fn has_mapping_separator(content: &str) -> bool {
330 split_mapping_entry(content).is_some()
331}
332
333fn split_mapping_entry(content: &str) -> Option<(&str, &str)> {
334 let bytes = content.as_bytes();
335 let mut idx = 0usize;
336 while idx < bytes.len() {
337 if bytes[idx] == b':' {
338 let key = content[..idx].trim();
339 let value = content[idx + 1..].trim_start();
340 if key.is_empty() {
341 return None;
342 }
343 return Some((key, value));
344 }
345 idx += 1;
346 }
347 None
348}
349
350pub fn parse_yaml(bytes: &[u8]) -> Result<YamlValue<'_>, YamlError> {
351 YamlParser::new(bytes).parse()
352}
353
354pub fn parse_yaml_with_max_depth(
355 bytes: &[u8],
356 max_depth: usize,
357) -> Result<YamlValue<'_>, YamlError> {
358 YamlParser::new(bytes).with_max_depth(max_depth).parse()
359}
360
361pub fn parse_yaml_with_limits(
362 bytes: &[u8],
363 limits: YamlLimits,
364) -> Result<YamlValue<'_>, YamlError> {
365 YamlParser::new(bytes).with_limits(limits).parse()
366}
367
368pub fn validate_yaml(bytes: &[u8]) -> Result<(), YamlError> {
369 YamlParser::new(bytes).validate()
370}