1mod context;
68mod line_parsing;
69mod options;
70mod utils;
71
72pub use options::{ParseOptions, ParseOptionsBuilder};
74
75use crate::block_string::{try_start_block_string, BlockStringResult, BlockStringState};
76use crate::document::{Document, Item};
77use crate::error::{HedlError, HedlResult};
78use crate::header::parse_header;
79use crate::lex::calculate_indent;
80use crate::limits::{Limits, TimeoutCheckExt, TimeoutContext};
81use crate::preprocess::{is_blank_line, is_comment_line, preprocess};
82use crate::reference::{resolve_references, TypeRegistry};
83use crate::value::Value;
84use context::{pop_frames, Frame};
85use line_parsing::{
86 is_expanded_child_list, is_inline_child_list, parse_expanded_child_list,
87 parse_inline_child_list, parse_matrix_row, parse_non_matrix_line, MatrixParseParams,
88};
89use std::collections::BTreeMap;
90use utils::{check_duplicate_key, finalize_stack, insert_into_current, validate_indent_for_child};
91
92const _STYLE_INLINE_CHILDREN_LIMIT: usize = 10;
96
97pub fn parse(input: &[u8]) -> HedlResult<Document> {
99 parse_with_limits(input, ParseOptions::default())
100}
101
102pub fn parse_with_limits(input: &[u8], options: ParseOptions) -> HedlResult<Document> {
104 let timeout_ctx = TimeoutContext::new(options.limits.timeout);
106
107 let preprocessed = preprocess(input, &options.limits)?;
109
110 let lines: Vec<(usize, &str)> = preprocessed.lines().collect();
112
113 let (header, body_start_idx) = parse_header(&lines, &options.limits, &timeout_ctx)?;
115
116 let body_lines = &lines[body_start_idx..];
118 let mut type_registries = TypeRegistry::new();
119 let root = parse_body(
120 body_lines,
121 &header,
122 &options.limits,
123 &mut type_registries,
124 &timeout_ctx,
125 )?;
126
127 let mut doc = Document::new(header.version);
129 doc.aliases = header.aliases;
130 doc.structs = header.structs;
131 doc.nests = header.nests;
132 doc.root = root;
133
134 timeout_ctx.check_timeout(0)?;
136 resolve_references(&doc, options.reference_mode)?;
137
138 Ok(doc)
139}
140
141struct ParseContext<'a> {
143 header: &'a crate::header::Header,
144 limits: &'a Limits,
145 type_registries: &'a mut TypeRegistry,
146 node_count: &'a mut usize,
147}
148
149fn parse_body(
150 lines: &[(usize, &str)],
151 header: &crate::header::Header,
152 limits: &Limits,
153 type_registries: &mut TypeRegistry,
154 timeout_ctx: &TimeoutContext,
155) -> HedlResult<BTreeMap<String, Item>> {
156 let mut stack: Vec<Frame> = vec![Frame::Root {
157 object: BTreeMap::new(),
158 }];
159 let mut node_count = 0usize;
160 let mut total_keys = 0usize;
161 let mut block_string: Option<BlockStringState> = None;
162
163 let ctx = ParseContext {
165 header,
166 limits,
167 type_registries,
168 node_count: &mut node_count,
169 };
170
171 for result in lines.iter().copied().with_timeout_check(timeout_ctx) {
173 let (line_num, line) = result?;
174 if let Some(ref mut state) = block_string {
176 if let Some(full_content) = state.process_line(line, line_num, limits)? {
178 let value = Value::String(full_content.into());
180 pop_frames(&mut stack, state.indent);
181 insert_into_current(&mut stack, state.key.clone(), Item::Scalar(value));
182 block_string = None;
183 }
184 continue;
185 }
186
187 if is_blank_line(line) || is_comment_line(line) {
189 continue;
190 }
191
192 let indent_info = calculate_indent(line, line_num as u32)
194 .map_err(|e| HedlError::syntax(e.to_string(), line_num))?;
195
196 let indent_info = match indent_info {
197 Some(info) => info,
198 None => continue, };
200
201 if indent_info.level > limits.max_indent_depth {
202 return Err(HedlError::security(
203 format!(
204 "indent depth {} exceeds limit {}",
205 indent_info.level, limits.max_indent_depth
206 ),
207 line_num,
208 ));
209 }
210
211 let indent = indent_info.level;
212 let content = &line[indent_info.spaces..];
213
214 pop_frames(&mut stack, indent);
216
217 if content.starts_with('|') {
219 let params = MatrixParseParams {
220 content,
221 indent,
222 line_num,
223 header: ctx.header,
224 limits: ctx.limits,
225 };
226 parse_matrix_row(&mut stack, ¶ms, ctx.type_registries, ctx.node_count)?;
227 } else if content.starts_with('@') && is_inline_child_list(content) {
228 let params = MatrixParseParams {
230 content,
231 indent,
232 line_num,
233 header: ctx.header,
234 limits: ctx.limits,
235 };
236 parse_inline_child_list(&mut stack, ¶ms, ctx.type_registries, ctx.node_count)?;
237 } else if content.starts_with('@') && is_expanded_child_list(content) {
238 parse_expanded_child_list(
240 &mut stack, content, indent, line_num, ctx.header, ctx.limits,
241 )?;
242 } else {
243 match try_start_block_string(content, indent, line_num)? {
245 BlockStringResult::MultiLineStarted(state) => {
246 validate_indent_for_child(&stack, indent, line_num)?;
248 check_duplicate_key(&stack, &state.key, line_num, limits, &mut total_keys)?;
249 block_string = Some(state);
250 }
251 BlockStringResult::NotBlockString => {
252 parse_non_matrix_line(
253 &mut stack,
254 content,
255 indent,
256 line_num,
257 header,
258 limits,
259 &mut total_keys,
260 )?;
261 }
262 }
263 }
264 }
265
266 if let Some(state) = block_string {
268 return Err(HedlError::syntax(
269 format!(
270 "unclosed block string starting at line {}",
271 state.start_line
272 ),
273 state.start_line,
274 ));
275 }
276
277 finalize_stack(stack)
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284 use crate::reference::ReferenceMode;
285
286 #[test]
289 fn test_builder_new_creates_default_options() {
290 let builder = ParseOptionsBuilder::new();
291 let opts = builder.build();
292
293 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
294 assert_eq!(opts.limits.max_indent_depth, 50);
295 assert_eq!(opts.limits.max_nodes, 10_000_000);
296 }
297
298 #[test]
299 fn test_builder_default_trait() {
300 let builder1 = ParseOptionsBuilder::new();
301 let builder2 = ParseOptionsBuilder::default();
302 let opts1 = builder1.build();
303 let opts2 = builder2.build();
304
305 assert_eq!(opts1.reference_mode, opts2.reference_mode);
306 assert_eq!(opts1.limits.max_indent_depth, opts2.limits.max_indent_depth);
307 }
308
309 #[test]
312 fn test_parse_options_builder_method() {
313 let opts = ParseOptions::builder().build();
314 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
315 }
316
317 #[test]
320 fn test_builder_max_depth() {
321 let opts = ParseOptions::builder().max_depth(100).build();
322
323 assert_eq!(opts.limits.max_indent_depth, 100);
324 }
325
326 #[test]
327 fn test_builder_max_array_length() {
328 let opts = ParseOptions::builder().max_array_length(5000).build();
329
330 assert_eq!(opts.limits.max_nodes, 5000);
331 }
332
333 #[test]
334 fn test_builder_strict_true() {
335 let opts = ParseOptions::builder().strict(true).build();
336
337 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
338 }
339
340 #[test]
341 fn test_builder_strict_false() {
342 let opts = ParseOptions::builder().strict(false).build();
343
344 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
345 }
346
347 #[test]
348 fn test_builder_max_file_size() {
349 let size = 500 * 1024 * 1024;
350 let opts = ParseOptions::builder().max_file_size(size).build();
351
352 assert_eq!(opts.limits.max_file_size, size);
353 }
354
355 #[test]
356 fn test_builder_max_line_length() {
357 let length = 512 * 1024;
358 let opts = ParseOptions::builder().max_line_length(length).build();
359
360 assert_eq!(opts.limits.max_line_length, length);
361 }
362
363 #[test]
364 fn test_builder_max_aliases() {
365 let opts = ParseOptions::builder().max_aliases(5000).build();
366
367 assert_eq!(opts.limits.max_aliases, 5000);
368 }
369
370 #[test]
371 fn test_builder_max_columns() {
372 let opts = ParseOptions::builder().max_columns(50).build();
373
374 assert_eq!(opts.limits.max_columns, 50);
375 }
376
377 #[test]
378 fn test_builder_max_nest_depth() {
379 let opts = ParseOptions::builder().max_nest_depth(50).build();
380
381 assert_eq!(opts.limits.max_nest_depth, 50);
382 }
383
384 #[test]
385 fn test_builder_max_block_string_size() {
386 let size = 5 * 1024 * 1024;
387 let opts = ParseOptions::builder().max_block_string_size(size).build();
388
389 assert_eq!(opts.limits.max_block_string_size, size);
390 }
391
392 #[test]
393 fn test_builder_max_object_keys() {
394 let opts = ParseOptions::builder().max_object_keys(5000).build();
395
396 assert_eq!(opts.limits.max_object_keys, 5000);
397 }
398
399 #[test]
400 fn test_builder_max_total_keys() {
401 let opts = ParseOptions::builder().max_total_keys(5_000_000).build();
402
403 assert_eq!(opts.limits.max_total_keys, 5_000_000);
404 }
405
406 #[test]
409 fn test_builder_multiple_chains() {
410 let opts = ParseOptions::builder()
411 .max_depth(100)
412 .max_array_length(5000)
413 .strict(false)
414 .build();
415
416 assert_eq!(opts.limits.max_indent_depth, 100);
417 assert_eq!(opts.limits.max_nodes, 5000);
418 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
419 }
420
421 #[test]
422 fn test_builder_all_options_chained() {
423 let opts = ParseOptions::builder()
424 .max_depth(75)
425 .max_array_length(2000)
426 .strict(false)
427 .max_file_size(100 * 1024 * 1024)
428 .max_line_length(256 * 1024)
429 .max_aliases(1000)
430 .max_columns(25)
431 .max_nest_depth(30)
432 .max_block_string_size(1024 * 1024)
433 .max_object_keys(1000)
434 .max_total_keys(1_000_000)
435 .build();
436
437 assert_eq!(opts.limits.max_indent_depth, 75);
438 assert_eq!(opts.limits.max_nodes, 2000);
439 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
440 assert_eq!(opts.limits.max_file_size, 100 * 1024 * 1024);
441 assert_eq!(opts.limits.max_line_length, 256 * 1024);
442 assert_eq!(opts.limits.max_aliases, 1000);
443 assert_eq!(opts.limits.max_columns, 25);
444 assert_eq!(opts.limits.max_nest_depth, 30);
445 assert_eq!(opts.limits.max_block_string_size, 1024 * 1024);
446 assert_eq!(opts.limits.max_object_keys, 1000);
447 assert_eq!(opts.limits.max_total_keys, 1_000_000);
448 }
449
450 #[test]
453 fn test_builder_override_previous_value() {
454 let opts = ParseOptions::builder().max_depth(50).max_depth(100).build();
455
456 assert_eq!(opts.limits.max_indent_depth, 100);
457 }
458
459 #[test]
460 fn test_builder_override_multiple_times() {
461 let opts = ParseOptions::builder()
462 .max_array_length(1000)
463 .max_array_length(2000)
464 .max_array_length(3000)
465 .build();
466
467 assert_eq!(opts.limits.max_nodes, 3000);
468 }
469
470 #[test]
473 fn test_builder_default_keeps_other_defaults() {
474 let opts = ParseOptions::builder().max_depth(100).build();
475
476 assert_eq!(opts.limits.max_indent_depth, 100);
477 assert_eq!(opts.limits.max_file_size, 1024 * 1024 * 1024);
479 assert_eq!(opts.limits.max_line_length, 1024 * 1024);
480 assert_eq!(opts.limits.max_nodes, 10_000_000);
481 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
482 }
483
484 #[test]
487 fn test_builder_zero_values() {
488 let opts = ParseOptions::builder()
489 .max_depth(0)
490 .max_array_length(0)
491 .max_aliases(0)
492 .build();
493
494 assert_eq!(opts.limits.max_indent_depth, 0);
495 assert_eq!(opts.limits.max_nodes, 0);
496 assert_eq!(opts.limits.max_aliases, 0);
497 }
498
499 #[test]
500 fn test_builder_max_values() {
501 let opts = ParseOptions::builder()
502 .max_depth(usize::MAX)
503 .max_array_length(usize::MAX)
504 .max_file_size(usize::MAX)
505 .build();
506
507 assert_eq!(opts.limits.max_indent_depth, usize::MAX);
508 assert_eq!(opts.limits.max_nodes, usize::MAX);
509 assert_eq!(opts.limits.max_file_size, usize::MAX);
510 }
511
512 #[test]
515 fn test_builder_build_equivalent_to_default() {
516 let builder_opts = ParseOptions::builder().build();
517 let default_opts = ParseOptions::default();
518
519 assert_eq!(builder_opts.reference_mode, default_opts.reference_mode);
520 assert_eq!(
521 builder_opts.limits.max_indent_depth,
522 default_opts.limits.max_indent_depth
523 );
524 assert_eq!(builder_opts.limits.max_nodes, default_opts.limits.max_nodes);
525 assert_eq!(
526 builder_opts.limits.max_file_size,
527 default_opts.limits.max_file_size
528 );
529 }
530
531 #[test]
532 fn test_builder_clone_independent() {
533 let builder1 = ParseOptions::builder().max_depth(100);
534 let builder2 = builder1.clone().max_depth(200);
535
536 let opts1 = builder1.build();
537 let opts2 = builder2.build();
538
539 assert_eq!(opts1.limits.max_indent_depth, 100);
540 assert_eq!(opts2.limits.max_indent_depth, 200);
541 }
542
543 #[test]
546 fn test_builder_typical_usage_pattern() {
547 let opts = ParseOptions::builder().max_depth(100).strict(true).build();
549
550 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
551 assert_eq!(opts.limits.max_indent_depth, 100);
552 }
553
554 #[test]
555 fn test_builder_lenient_parsing_pattern() {
556 let opts = ParseOptions::builder()
558 .max_array_length(50_000)
559 .strict(false)
560 .max_block_string_size(50 * 1024 * 1024)
561 .build();
562
563 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
564 assert_eq!(opts.limits.max_nodes, 50_000);
565 assert_eq!(opts.limits.max_block_string_size, 50 * 1024 * 1024);
566 }
567
568 #[test]
569 fn test_builder_restricted_parsing_pattern() {
570 let opts = ParseOptions::builder()
572 .max_file_size(10 * 1024 * 1024)
573 .max_line_length(64 * 1024)
574 .max_depth(20)
575 .max_array_length(1000)
576 .strict(true)
577 .build();
578
579 assert_eq!(opts.limits.max_file_size, 10 * 1024 * 1024);
580 assert_eq!(opts.limits.max_line_length, 64 * 1024);
581 assert_eq!(opts.limits.max_indent_depth, 20);
582 assert_eq!(opts.limits.max_nodes, 1000);
583 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
584 }
585
586 #[test]
589 fn test_parse_with_generous_timeout_succeeds() {
590 let doc = b"%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nkey: value\n";
591 let mut opts = ParseOptions::default();
592 opts.limits.timeout = Some(std::time::Duration::from_secs(10));
593 let result = parse_with_limits(doc, opts);
594 assert!(result.is_ok());
595 }
596
597 #[test]
598 fn test_parse_with_no_timeout_succeeds() {
599 let doc = b"%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nkey: value\n";
600 let mut opts = ParseOptions::default();
601 opts.limits.timeout = None;
602 let result = parse_with_limits(doc, opts);
603 assert!(result.is_ok());
604 }
605
606 #[test]
607 fn test_parse_with_very_short_timeout_fails() {
608 let mut doc = String::from("%V:2.0\n%NULL:~\n%QUOTE:\"\n---\ndata:\n");
610 for i in 0..100_000 {
611 doc.push_str(&format!(" key{}: value{}\n", i, i));
612 }
613
614 let mut opts = ParseOptions::default();
615 opts.limits.timeout = Some(std::time::Duration::from_micros(1));
617
618 let result = parse_with_limits(doc.as_bytes(), opts);
619 assert!(result.is_err());
620
621 if let Err(e) = result {
622 let msg = e.to_string();
623 assert!(msg.contains("timeout") || msg.contains("Timeout"));
624 }
625 }
626
627 #[test]
628 fn test_default_timeout_is_reasonable() {
629 let opts = ParseOptions::default();
630 assert_eq!(
631 opts.limits.timeout,
632 Some(std::time::Duration::from_secs(30))
633 );
634 }
635
636 #[test]
637 fn test_unlimited_has_no_timeout() {
638 let limits = Limits::unlimited();
639 assert_eq!(limits.timeout, None);
640 }
641}