1use crate::block_string::{try_start_block_string, BlockStringResult, BlockStringState};
68use crate::document::{Document, Item, MatrixList, Node};
69use crate::error::{HedlError, HedlResult};
70use crate::header::parse_header;
71use crate::inference::{infer_quoted_value, infer_value, InferenceContext};
72use crate::lex::row::parse_csv_row;
73use crate::lex::{calculate_indent, is_valid_key_token, is_valid_type_name, strip_comment};
74use crate::limits::{Limits, TimeoutCheckExt, TimeoutContext};
75use crate::preprocess::{is_blank_line, is_comment_line, preprocess};
76use crate::reference::{register_node, resolve_references, ReferenceMode, TypeRegistry};
77use crate::value::Value;
78use std::collections::BTreeMap;
79
80#[derive(Debug, Clone)]
155pub struct ParseOptions {
156 pub limits: Limits,
158 pub reference_mode: ReferenceMode,
166}
167
168impl Default for ParseOptions {
169 fn default() -> Self {
170 Self {
171 limits: Limits::default(),
172 reference_mode: ReferenceMode::Strict,
173 }
174 }
175}
176
177impl ParseOptions {
178 pub fn builder() -> ParseOptionsBuilder {
189 ParseOptionsBuilder::new()
190 }
191}
192
193#[derive(Debug, Clone)]
211pub struct ParseOptionsBuilder {
212 limits: Limits,
213 reference_mode: ReferenceMode,
214}
215
216impl ParseOptionsBuilder {
217 pub fn new() -> Self {
219 Self {
220 limits: Limits::default(),
221 reference_mode: ReferenceMode::Strict,
222 }
223 }
224
225 pub fn max_depth(mut self, depth: usize) -> Self {
237 self.limits.max_indent_depth = depth;
238 self
239 }
240
241 pub fn max_array_length(mut self, length: usize) -> Self {
253 self.limits.max_nodes = length;
254 self
255 }
256 pub fn reference_mode(mut self, mode: ReferenceMode) -> Self {
271 self.reference_mode = mode;
272 self
273 }
274
275 pub fn strict_refs(mut self) -> Self {
287 self.reference_mode = ReferenceMode::Strict;
288 self
289 }
290
291 pub fn lenient_refs(mut self) -> Self {
303 self.reference_mode = ReferenceMode::Lenient;
304 self
305 }
306
307 pub fn strict(mut self, strict: bool) -> Self {
310 self.reference_mode = ReferenceMode::from(strict);
311 self
312 }
313
314 pub fn max_file_size(mut self, size: usize) -> Self {
326 self.limits.max_file_size = size;
327 self
328 }
329
330 pub fn max_line_length(mut self, length: usize) -> Self {
342 self.limits.max_line_length = length;
343 self
344 }
345
346 pub fn max_aliases(mut self, count: usize) -> Self {
358 self.limits.max_aliases = count;
359 self
360 }
361
362 pub fn max_columns(mut self, count: usize) -> Self {
374 self.limits.max_columns = count;
375 self
376 }
377
378 pub fn max_nest_depth(mut self, depth: usize) -> Self {
390 self.limits.max_nest_depth = depth;
391 self
392 }
393
394 pub fn max_block_string_size(mut self, size: usize) -> Self {
406 self.limits.max_block_string_size = size;
407 self
408 }
409
410 pub fn max_object_keys(mut self, count: usize) -> Self {
422 self.limits.max_object_keys = count;
423 self
424 }
425
426 pub fn max_total_keys(mut self, count: usize) -> Self {
440 self.limits.max_total_keys = count;
441 self
442 }
443
444 pub fn build(self) -> ParseOptions {
446 ParseOptions {
447 limits: self.limits,
448 reference_mode: self.reference_mode,
449 }
450 }
451}
452
453impl Default for ParseOptionsBuilder {
454 fn default() -> Self {
455 Self::new()
456 }
457}
458
459pub fn parse(input: &[u8]) -> HedlResult<Document> {
461 parse_with_limits(input, ParseOptions::default())
462}
463
464pub fn parse_with_limits(input: &[u8], options: ParseOptions) -> HedlResult<Document> {
466 let timeout_ctx = TimeoutContext::new(options.limits.timeout);
468
469 let preprocessed = preprocess(input, &options.limits)?;
471
472 let lines: Vec<(usize, &str)> = preprocessed.lines().collect();
474
475 let (header, body_start_idx) = parse_header(&lines, &options.limits, &timeout_ctx)?;
477
478 let body_lines = &lines[body_start_idx..];
480 let mut type_registries = TypeRegistry::new();
481 let root = parse_body(
482 body_lines,
483 &header,
484 &options.limits,
485 &mut type_registries,
486 &timeout_ctx,
487 )?;
488
489 let mut doc = Document::new(header.version);
491 doc.aliases = header.aliases;
492 doc.structs = header.structs;
493 doc.nests = header.nests;
494 doc.root = root;
495
496 timeout_ctx.check_timeout(0)?;
498 resolve_references(&doc, options.reference_mode)?;
499
500 Ok(doc)
501}
502
503#[derive(Debug)]
506enum Frame {
507 Root {
508 object: BTreeMap<String, Item>,
509 },
510 Object {
511 indent: usize,
512 key: String,
513 object: BTreeMap<String, Item>,
514 },
515 List {
516 #[allow(dead_code)]
517 list_start_indent: usize,
518 row_indent: usize,
519 type_name: String,
520 schema: Vec<String>,
521 last_row_values: Option<Vec<Value>>,
522 list: Vec<Node>,
523 key: String,
524 count_hint: Option<usize>,
525 },
526}
527
528struct ParseContext<'a> {
532 header: &'a crate::header::Header,
533 limits: &'a Limits,
534 type_registries: &'a mut TypeRegistry,
535 node_count: &'a mut usize,
536}
537
538fn parse_body(
539 lines: &[(usize, &str)],
540 header: &crate::header::Header,
541 limits: &Limits,
542 type_registries: &mut TypeRegistry,
543 timeout_ctx: &TimeoutContext,
544) -> HedlResult<BTreeMap<String, Item>> {
545 let mut stack: Vec<Frame> = vec![Frame::Root {
546 object: BTreeMap::new(),
547 }];
548 let mut node_count = 0usize;
549 let mut total_keys = 0usize;
550 let mut block_string: Option<BlockStringState> = None;
551
552 let ctx = ParseContext {
554 header,
555 limits,
556 type_registries,
557 node_count: &mut node_count,
558 };
559
560 for result in lines.iter().copied().with_timeout_check(timeout_ctx) {
562 let (line_num, line) = result?;
563 if let Some(ref mut state) = block_string {
565 if let Some(full_content) = state.process_line(line, line_num, limits)? {
567 let value = Value::String(full_content.into());
569 pop_frames(&mut stack, state.indent);
570 insert_into_current(&mut stack, state.key.clone(), Item::Scalar(value));
571 block_string = None;
572 }
573 continue;
574 }
575
576 if is_blank_line(line) || is_comment_line(line) {
578 continue;
579 }
580
581 let indent_info = calculate_indent(line, line_num as u32)
583 .map_err(|e| HedlError::syntax(e.to_string(), line_num))?;
584
585 let indent_info = match indent_info {
586 Some(info) => info,
587 None => continue, };
589
590 if indent_info.level > limits.max_indent_depth {
591 return Err(HedlError::security(
592 format!(
593 "indent depth {} exceeds limit {}",
594 indent_info.level, limits.max_indent_depth
595 ),
596 line_num,
597 ));
598 }
599
600 let indent = indent_info.level;
601 let content = &line[indent_info.spaces..];
602
603 pop_frames(&mut stack, indent);
605
606 if content.starts_with('|') {
608 parse_matrix_row(
609 &mut stack,
610 content,
611 indent,
612 line_num,
613 ctx.header,
614 ctx.limits,
615 ctx.type_registries,
616 ctx.node_count,
617 )?;
618 } else {
619 match try_start_block_string(content, indent, line_num)? {
621 BlockStringResult::MultiLineStarted(state) => {
622 validate_indent_for_child(&stack, indent, line_num)?;
624 check_duplicate_key(&stack, &state.key, line_num, limits, &mut total_keys)?;
625 block_string = Some(state);
626 }
627 BlockStringResult::NotBlockString => {
628 parse_non_matrix_line(
629 &mut stack,
630 content,
631 indent,
632 line_num,
633 header,
634 limits,
635 &mut total_keys,
636 )?;
637 }
638 }
639 }
640 }
641
642 if let Some(state) = block_string {
644 return Err(HedlError::syntax(
645 format!(
646 "unclosed block string starting at line {}",
647 state.start_line
648 ),
649 state.start_line,
650 ));
651 }
652
653 finalize_stack(stack)
655}
656
657fn pop_frames(stack: &mut Vec<Frame>, current_indent: usize) {
658 while stack.len() > 1 {
659 let should_pop = match stack.last().unwrap() {
660 Frame::Root { .. } => false,
661 Frame::Object { indent, .. } => current_indent <= *indent,
662 Frame::List { row_indent, .. } => current_indent < *row_indent,
663 };
664
665 if should_pop {
666 let frame = stack.pop().unwrap();
667 attach_frame_to_parent(stack, frame);
668 } else {
669 break;
670 }
671 }
672}
673
674fn attach_frame_to_parent(stack: &mut [Frame], frame: Frame) {
675 match frame {
676 Frame::Object { key, object, .. } => {
677 let item = Item::Object(object);
678 insert_into_parent(stack, key, item);
679 }
680 Frame::List {
681 key,
682 type_name,
683 schema,
684 list,
685 count_hint,
686 ..
687 } => {
688 let mut matrix_list = if let Some(count) = count_hint {
689 MatrixList::with_count_hint(type_name, schema, count)
690 } else {
691 MatrixList::new(type_name, schema)
692 };
693 matrix_list.rows = list;
694 insert_into_parent(stack, key, Item::List(matrix_list));
695 }
696 Frame::Root { .. } => {}
697 }
698}
699
700fn insert_into_parent(stack: &mut [Frame], key: String, item: Item) {
701 if let Some(parent) = stack.last_mut() {
702 match parent {
703 Frame::Root { object } | Frame::Object { object, .. } => {
704 object.insert(key, item);
707 }
708 Frame::List { list, .. } => {
709 if let Some(parent_node) = list.last_mut() {
711 if let Item::List(child_list) = item {
712 let children = parent_node
713 .children
714 .get_or_insert_with(|| Box::new(BTreeMap::new()));
715 children
716 .entry(child_list.type_name.clone())
717 .or_default()
718 .extend(child_list.rows);
719 }
720 }
721 }
722 }
723 }
724}
725
726fn parse_non_matrix_line(
727 stack: &mut Vec<Frame>,
728 content: &str,
729 indent: usize,
730 line_num: usize,
731 header: &crate::header::Header,
732 limits: &Limits,
733 total_keys: &mut usize,
734) -> HedlResult<()> {
735 let content = strip_comment(content);
736
737 let colon_pos = content
739 .find(':')
740 .ok_or_else(|| HedlError::syntax("expected ':' in line", line_num))?;
741
742 let key_with_hint = content[..colon_pos].trim();
743 let after_colon = &content[colon_pos + 1..];
744
745 let (key, count_hint) = parse_key_with_count_hint(key_with_hint, line_num)?;
747
748 if !is_valid_key_token(&key) {
750 return Err(HedlError::syntax(format!("invalid key: {}", key), line_num));
751 }
752
753 check_duplicate_key(stack, &key, line_num, limits, total_keys)?;
755
756 let after_colon_trimmed = after_colon.trim();
758
759 if after_colon_trimmed.is_empty() {
760 if count_hint.is_some() {
762 return Err(HedlError::syntax(
763 "count hint not allowed on object declarations",
764 line_num,
765 ));
766 }
767 validate_indent_for_child(stack, indent, line_num)?;
768 stack.push(Frame::Object {
769 indent,
770 key: key.to_string(),
771 object: BTreeMap::new(),
772 });
773 } else if after_colon_trimmed.starts_with('@') && is_list_start(after_colon_trimmed) {
774 if !after_colon.starts_with(' ') {
776 return Err(HedlError::syntax(
777 "space required after ':' before '@'",
778 line_num,
779 ));
780 }
781
782 let parent_list_idx = validate_nested_list_indent(stack, indent, line_num)?;
784
785 let (type_name, schema) = parse_list_start(after_colon_trimmed, line_num, header, limits)?;
786
787 if let Some(_parent_idx) = parent_list_idx {
788 stack.push(Frame::List {
791 list_start_indent: indent,
792 row_indent: indent + 1,
793 type_name,
794 schema,
795 last_row_values: None,
796 list: Vec::new(),
797 key: key.to_string(),
798 count_hint,
799 });
800 } else {
801 stack.push(Frame::List {
803 list_start_indent: indent,
804 row_indent: indent + 1,
805 type_name,
806 schema,
807 last_row_values: None,
808 list: Vec::new(),
809 key: key.to_string(),
810 count_hint,
811 });
812 }
813 } else {
814 if count_hint.is_some() {
816 return Err(HedlError::syntax(
817 "count hint not allowed on scalar values",
818 line_num,
819 ));
820 }
821 if !after_colon.starts_with(' ') {
822 return Err(HedlError::syntax(
823 "space required after ':' in key-value",
824 line_num,
825 ));
826 }
827 validate_indent_for_child(stack, indent, line_num)?;
828 let value_str = after_colon.trim();
829 let ctx = InferenceContext::for_key_value(&header.aliases);
830 let value = if value_str.starts_with('"') {
831 let inner = parse_quoted_string(value_str, line_num)?;
833 infer_quoted_value(&inner)
834 } else {
835 infer_value(value_str, &ctx, line_num)?
836 };
837 insert_into_current(stack, key.to_string(), Item::Scalar(value));
838 }
839
840 Ok(())
841}
842
843fn parse_key_with_count_hint(key: &str, line_num: usize) -> HedlResult<(String, Option<usize>)> {
850 if let Some(paren_pos) = key.find('(') {
851 let key_part = &key[..paren_pos];
853
854 if !key.ends_with(')') {
856 return Err(HedlError::syntax(
857 "unclosed count hint parenthesis",
858 line_num,
859 ));
860 }
861
862 let count_str = &key[paren_pos + 1..key.len() - 1];
863
864 let count = count_str.parse::<usize>().map_err(|_| {
866 HedlError::syntax(format!("invalid count hint: '{}'", count_str), line_num)
867 })?;
868
869 if count == 0 {
870 return Err(HedlError::syntax(
871 "count hint must be greater than zero",
872 line_num,
873 ));
874 }
875
876 Ok((key_part.to_string(), Some(count)))
877 } else {
878 Ok((key.to_string(), None))
879 }
880}
881
882fn is_list_start(s: &str) -> bool {
883 let s = s.trim();
885 if !s.starts_with('@') {
886 return false;
887 }
888 let rest = &s[1..];
889 let type_end = rest
891 .find(|c: char| c == '[' || c.is_whitespace())
892 .unwrap_or(rest.len());
893 let type_name = &rest[..type_end];
894 is_valid_type_name(type_name)
895}
896
897fn parse_list_start(
898 s: &str,
899 line_num: usize,
900 header: &crate::header::Header,
901 limits: &Limits,
902) -> HedlResult<(String, Vec<String>)> {
903 let s = s.trim();
904 let rest = &s[1..]; if let Some(bracket_pos) = rest.find('[') {
907 let type_name = &rest[..bracket_pos];
909 if !is_valid_type_name(type_name) {
910 return Err(HedlError::syntax(
911 format!("invalid type name: {}", type_name),
912 line_num,
913 ));
914 }
915
916 let schema_str = &rest[bracket_pos..];
917 let schema = parse_inline_schema(schema_str, line_num, limits)?;
918
919 if let Some(declared) = header.structs.get(type_name) {
921 if declared != &schema {
922 return Err(HedlError::schema(
923 format!(
924 "inline schema for '{}' doesn't match declared schema",
925 type_name
926 ),
927 line_num,
928 ));
929 }
930 }
931
932 Ok((type_name.to_string(), schema))
933 } else {
934 let type_name = rest.trim();
936 if !is_valid_type_name(type_name) {
937 return Err(HedlError::syntax(
938 format!("invalid type name: {}", type_name),
939 line_num,
940 ));
941 }
942
943 let schema = header
944 .structs
945 .get(type_name)
946 .ok_or_else(|| HedlError::schema(format!("undefined type: {}", type_name), line_num))?;
947
948 Ok((type_name.to_string(), schema.clone()))
949 }
950}
951
952fn parse_inline_schema(s: &str, line_num: usize, limits: &Limits) -> HedlResult<Vec<String>> {
953 if !s.starts_with('[') || !s.ends_with(']') {
954 return Err(HedlError::syntax("invalid inline schema format", line_num));
955 }
956
957 let inner = &s[1..s.len() - 1];
958 let mut columns = Vec::new();
959
960 for part in inner.split(',') {
961 let col = part.trim();
962 if col.is_empty() {
963 continue;
964 }
965 if !is_valid_key_token(col) {
966 return Err(HedlError::syntax(
967 format!("invalid column name: {}", col),
968 line_num,
969 ));
970 }
971 columns.push(col.to_string());
972 }
973
974 if columns.is_empty() {
975 return Err(HedlError::syntax("empty inline schema", line_num));
976 }
977
978 if columns.len() > limits.max_columns {
979 return Err(HedlError::security(
980 format!("too many columns: {}", columns.len()),
981 line_num,
982 ));
983 }
984
985 Ok(columns)
986}
987
988fn parse_row_prefix(content: &str, line_num: usize) -> HedlResult<(Option<usize>, &str)> {
993 if !content.starts_with('|') {
995 return Err(HedlError::syntax(
996 "matrix row must start with '|'",
997 line_num,
998 ));
999 }
1000
1001 let rest = &content[1..]; if rest.starts_with('[') {
1005 if let Some(bracket_end) = rest.find(']') {
1006 let count_str = &rest[1..bracket_end];
1007 if let Ok(count) = count_str.parse::<usize>() {
1008 let data = rest[bracket_end + 1..].trim_start();
1011 return Ok((Some(count), data));
1012 }
1013 }
1014 }
1015
1016 Ok((None, rest))
1018}
1019
1020#[allow(clippy::too_many_arguments)]
1021fn parse_matrix_row(
1022 stack: &mut Vec<Frame>,
1023 content: &str,
1024 indent: usize,
1025 line_num: usize,
1026 header: &crate::header::Header,
1027 limits: &Limits,
1028 type_registries: &mut TypeRegistry,
1029 node_count: &mut usize,
1030) -> HedlResult<()> {
1031 let list_frame_idx = find_list_frame(stack, indent, line_num, header, limits)?;
1033
1034 let (child_count, csv_content) = parse_row_prefix(content, line_num)?;
1036 let csv_content = strip_comment(csv_content).trim();
1037
1038 let (type_name, schema, prev_row) = {
1040 let frame = &stack[list_frame_idx];
1041 match frame {
1042 Frame::List {
1043 type_name,
1044 schema,
1045 last_row_values,
1046 ..
1047 } => (type_name.clone(), schema.clone(), last_row_values.clone()),
1048 _ => unreachable!(),
1049 }
1050 };
1051
1052 let fields =
1054 parse_csv_row(csv_content).map_err(|e| HedlError::syntax(e.to_string(), line_num))?;
1055
1056 if fields.len() != schema.len() {
1058 return Err(HedlError::shape(
1059 format!("expected {} columns, got {}", schema.len(), fields.len()),
1060 line_num,
1061 ));
1062 }
1063
1064 let mut values = Vec::with_capacity(fields.len());
1066 for (col_idx, field) in fields.iter().enumerate() {
1067 let ctx = InferenceContext::for_matrix_cell(
1068 &header.aliases,
1069 col_idx,
1070 prev_row.as_deref(),
1071 &type_name,
1072 );
1073
1074 let value = if field.is_quoted {
1075 infer_quoted_value(&field.value)
1076 } else {
1077 infer_value(&field.value, &ctx, line_num)?
1078 };
1079
1080 values.push(value);
1081 }
1082
1083 let id = match &values[0] {
1085 Value::String(s) => s.clone(),
1086 _ => {
1087 return Err(HedlError::semantic("ID column must be a string", line_num));
1088 }
1089 };
1090
1091 register_node(type_registries, &type_name, &id, line_num, limits)?;
1093
1094 *node_count = node_count
1096 .checked_add(1)
1097 .ok_or_else(|| HedlError::security("node count overflow", line_num))?;
1098 if *node_count > limits.max_nodes {
1099 return Err(HedlError::security(
1100 format!("too many nodes: exceeds limit of {}", limits.max_nodes),
1101 line_num,
1102 ));
1103 }
1104
1105 if let Frame::List {
1107 last_row_values,
1108 list,
1109 ..
1110 } = &mut stack[list_frame_idx]
1111 {
1112 *last_row_values = Some(values.clone());
1114 let mut node = Node::new(&type_name, &*id, values);
1116
1117 if let Some(count) = child_count {
1119 node.set_child_count(count);
1120 }
1121
1122 list.push(node);
1123 }
1124
1125 Ok(())
1126}
1127
1128fn find_list_frame(
1172 stack: &mut Vec<Frame>,
1173 indent: usize,
1174 line_num: usize,
1175 header: &crate::header::Header,
1176 limits: &Limits,
1177) -> HedlResult<usize> {
1178 for (idx, frame) in stack.iter().enumerate().rev() {
1180 if let Frame::List {
1181 row_indent,
1182 type_name,
1183 list,
1184 ..
1185 } = frame
1186 {
1187 if indent == *row_indent {
1188 return Ok(idx);
1190 } else if indent == *row_indent + 1 {
1191 if list.is_empty() {
1194 return Err(HedlError::orphan_row(
1195 "child row has no parent row",
1196 line_num,
1197 ));
1198 }
1199
1200 let child_type = header.nests.get(type_name).ok_or_else(|| {
1201 HedlError::orphan_row(
1202 format!("no NEST rule for parent type '{}'", type_name),
1203 line_num,
1204 )
1205 })?;
1206
1207 let child_schema = header.structs.get(child_type).ok_or_else(|| {
1209 HedlError::schema(format!("child type '{}' not defined", child_type), line_num)
1210 })?;
1211
1212 let current_depth = stack
1216 .iter()
1217 .filter(|f| matches!(f, Frame::List { .. }))
1218 .count();
1219
1220 if current_depth >= limits.max_nest_depth {
1221 return Err(HedlError::security(
1222 format!(
1223 "NEST hierarchy depth {} exceeds maximum allowed depth {}",
1224 current_depth + 1,
1225 limits.max_nest_depth
1226 ),
1227 line_num,
1228 ));
1229 }
1230
1231 stack.push(Frame::List {
1233 list_start_indent: indent - 1,
1234 row_indent: indent,
1235 type_name: child_type.clone(),
1236 schema: child_schema.clone(),
1237 last_row_values: None,
1238 list: Vec::new(),
1239 key: child_type.clone(),
1240 count_hint: None, });
1242
1243 return Ok(stack.len() - 1);
1244 }
1245 }
1246 }
1247
1248 Err(HedlError::syntax(
1249 "matrix row outside of list context",
1250 line_num,
1251 ))
1252}
1253
1254fn validate_indent_for_child(stack: &[Frame], indent: usize, line_num: usize) -> HedlResult<()> {
1255 let expected = match stack.last() {
1256 Some(Frame::Root { .. }) => 0,
1257 Some(Frame::Object {
1258 indent: parent_indent,
1259 ..
1260 }) => parent_indent + 1,
1261 Some(Frame::List { row_indent: _, .. }) => {
1262 return Err(HedlError::syntax(
1263 "cannot add key-value inside list context",
1264 line_num,
1265 ));
1266 }
1267 None => 0,
1268 };
1269
1270 if indent != expected {
1271 return Err(HedlError::syntax(
1272 format!("expected indent level {}, got {}", expected, indent),
1273 line_num,
1274 ));
1275 }
1276
1277 Ok(())
1278}
1279
1280fn validate_nested_list_indent(
1284 stack: &[Frame],
1285 indent: usize,
1286 line_num: usize,
1287) -> HedlResult<Option<usize>> {
1288 for (idx, frame) in stack.iter().enumerate().rev() {
1290 match frame {
1291 Frame::List {
1292 row_indent, list, ..
1293 } => {
1294 if indent == *row_indent + 1 {
1296 if list.is_empty() {
1298 return Err(HedlError::orphan_row(
1299 "nested list declaration has no parent row",
1300 line_num,
1301 ));
1302 }
1303 return Ok(Some(idx));
1304 }
1305 }
1306 Frame::Root { .. } => {
1307 if indent == 0 {
1308 return Ok(None); }
1310 }
1311 Frame::Object {
1312 indent: obj_indent, ..
1313 } => {
1314 if indent == obj_indent + 1 {
1315 return Ok(None); }
1317 }
1318 }
1319 }
1320
1321 Err(HedlError::syntax(
1322 format!(
1323 "invalid indent level {} for nested list declaration",
1324 indent
1325 ),
1326 line_num,
1327 ))
1328}
1329
1330fn check_duplicate_key(
1343 stack: &[Frame],
1344 key: &str,
1345 line_num: usize,
1346 limits: &Limits,
1347 total_keys: &mut usize,
1348) -> HedlResult<()> {
1349 let object_opt = match stack.last() {
1350 Some(Frame::Root { object }) | Some(Frame::Object { object, .. }) => Some(object),
1351 _ => None,
1352 };
1353
1354 if let Some(object) = object_opt {
1355 if object.contains_key(key) {
1357 return Err(HedlError::semantic(
1358 format!("duplicate key: {}", key),
1359 line_num,
1360 ));
1361 }
1362
1363 if object.len() >= limits.max_object_keys {
1365 return Err(HedlError::security(
1366 format!(
1367 "object has too many keys: {} (max: {})",
1368 object.len() + 1,
1369 limits.max_object_keys
1370 ),
1371 line_num,
1372 ));
1373 }
1374
1375 *total_keys = total_keys
1377 .checked_add(1)
1378 .ok_or_else(|| HedlError::security("total key count overflow", line_num))?;
1379
1380 if *total_keys > limits.max_total_keys {
1381 return Err(HedlError::security(
1382 format!(
1383 "too many total keys: {} exceeds limit {}",
1384 *total_keys, limits.max_total_keys
1385 ),
1386 line_num,
1387 ));
1388 }
1389 }
1390
1391 Ok(())
1392}
1393
1394fn insert_into_current(stack: &mut [Frame], key: String, item: Item) {
1395 if let Some(Frame::Root { object } | Frame::Object { object, .. }) = stack.last_mut() {
1396 object.insert(key, item);
1397 }
1398}
1399
1400fn parse_quoted_string(s: &str, line_num: usize) -> HedlResult<String> {
1401 if !s.starts_with('"') {
1402 return Err(HedlError::syntax("expected quoted string", line_num));
1403 }
1404
1405 let mut result = String::new();
1406 let mut chars = s[1..].chars().peekable();
1407
1408 while let Some(ch) = chars.next() {
1409 if ch == '"' {
1410 if chars.peek() == Some(&'"') {
1411 chars.next();
1413 result.push('"');
1414 } else {
1415 return Ok(result);
1417 }
1418 } else {
1419 result.push(ch);
1420 }
1421 }
1422
1423 Err(HedlError::syntax("unclosed quoted string", line_num))
1424}
1425
1426fn finalize_stack(mut stack: Vec<Frame>) -> HedlResult<BTreeMap<String, Item>> {
1427 if stack.len() > 1 {
1433 if let Some(Frame::Object { key, object, .. }) = stack.last() {
1434 if object.is_empty() {
1435 return Err(HedlError::syntax(
1436 format!("truncated input: object '{}' has no children", key),
1437 0,
1438 ));
1439 }
1440 }
1441 }
1442
1443 while stack.len() > 1 {
1445 let frame = stack.pop().unwrap();
1446 attach_frame_to_parent(&mut stack, frame);
1447 }
1448
1449 match stack.pop() {
1451 Some(Frame::Root { object }) => Ok(object),
1452 _ => Ok(BTreeMap::new()),
1453 }
1454}
1455
1456#[cfg(test)]
1457mod tests {
1458 use super::*;
1459
1460 #[test]
1463 fn test_builder_new_creates_default_options() {
1464 let builder = ParseOptionsBuilder::new();
1465 let opts = builder.build();
1466
1467 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1468 assert_eq!(opts.limits.max_indent_depth, 50);
1469 assert_eq!(opts.limits.max_nodes, 10_000_000);
1470 }
1471
1472 #[test]
1473 fn test_builder_default_trait() {
1474 let builder1 = ParseOptionsBuilder::new();
1475 let builder2 = ParseOptionsBuilder::default();
1476 let opts1 = builder1.build();
1477 let opts2 = builder2.build();
1478
1479 assert_eq!(opts1.reference_mode, opts2.reference_mode);
1480 assert_eq!(opts1.limits.max_indent_depth, opts2.limits.max_indent_depth);
1481 }
1482
1483 #[test]
1486 fn test_parse_options_builder_method() {
1487 let opts = ParseOptions::builder().build();
1488 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1489 }
1490
1491 #[test]
1494 fn test_builder_max_depth() {
1495 let opts = ParseOptions::builder().max_depth(100).build();
1496
1497 assert_eq!(opts.limits.max_indent_depth, 100);
1498 }
1499
1500 #[test]
1501 fn test_builder_max_array_length() {
1502 let opts = ParseOptions::builder().max_array_length(5000).build();
1503
1504 assert_eq!(opts.limits.max_nodes, 5000);
1505 }
1506
1507 #[test]
1508 fn test_builder_strict_true() {
1509 let opts = ParseOptions::builder().strict(true).build();
1510
1511 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1512 }
1513
1514 #[test]
1515 fn test_builder_strict_false() {
1516 let opts = ParseOptions::builder().strict(false).build();
1517
1518 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1519 }
1520
1521 #[test]
1522 fn test_builder_max_file_size() {
1523 let size = 500 * 1024 * 1024;
1524 let opts = ParseOptions::builder().max_file_size(size).build();
1525
1526 assert_eq!(opts.limits.max_file_size, size);
1527 }
1528
1529 #[test]
1530 fn test_builder_max_line_length() {
1531 let length = 512 * 1024;
1532 let opts = ParseOptions::builder().max_line_length(length).build();
1533
1534 assert_eq!(opts.limits.max_line_length, length);
1535 }
1536
1537 #[test]
1538 fn test_builder_max_aliases() {
1539 let opts = ParseOptions::builder().max_aliases(5000).build();
1540
1541 assert_eq!(opts.limits.max_aliases, 5000);
1542 }
1543
1544 #[test]
1545 fn test_builder_max_columns() {
1546 let opts = ParseOptions::builder().max_columns(50).build();
1547
1548 assert_eq!(opts.limits.max_columns, 50);
1549 }
1550
1551 #[test]
1552 fn test_builder_max_nest_depth() {
1553 let opts = ParseOptions::builder().max_nest_depth(50).build();
1554
1555 assert_eq!(opts.limits.max_nest_depth, 50);
1556 }
1557
1558 #[test]
1559 fn test_builder_max_block_string_size() {
1560 let size = 5 * 1024 * 1024;
1561 let opts = ParseOptions::builder().max_block_string_size(size).build();
1562
1563 assert_eq!(opts.limits.max_block_string_size, size);
1564 }
1565
1566 #[test]
1567 fn test_builder_max_object_keys() {
1568 let opts = ParseOptions::builder().max_object_keys(5000).build();
1569
1570 assert_eq!(opts.limits.max_object_keys, 5000);
1571 }
1572
1573 #[test]
1574 fn test_builder_max_total_keys() {
1575 let opts = ParseOptions::builder().max_total_keys(5_000_000).build();
1576
1577 assert_eq!(opts.limits.max_total_keys, 5_000_000);
1578 }
1579
1580 #[test]
1583 fn test_builder_multiple_chains() {
1584 let opts = ParseOptions::builder()
1585 .max_depth(100)
1586 .max_array_length(5000)
1587 .strict(false)
1588 .build();
1589
1590 assert_eq!(opts.limits.max_indent_depth, 100);
1591 assert_eq!(opts.limits.max_nodes, 5000);
1592 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1593 }
1594
1595 #[test]
1596 fn test_builder_all_options_chained() {
1597 let opts = ParseOptions::builder()
1598 .max_depth(75)
1599 .max_array_length(2000)
1600 .strict(false)
1601 .max_file_size(100 * 1024 * 1024)
1602 .max_line_length(256 * 1024)
1603 .max_aliases(1000)
1604 .max_columns(25)
1605 .max_nest_depth(30)
1606 .max_block_string_size(1024 * 1024)
1607 .max_object_keys(1000)
1608 .max_total_keys(1_000_000)
1609 .build();
1610
1611 assert_eq!(opts.limits.max_indent_depth, 75);
1612 assert_eq!(opts.limits.max_nodes, 2000);
1613 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1614 assert_eq!(opts.limits.max_file_size, 100 * 1024 * 1024);
1615 assert_eq!(opts.limits.max_line_length, 256 * 1024);
1616 assert_eq!(opts.limits.max_aliases, 1000);
1617 assert_eq!(opts.limits.max_columns, 25);
1618 assert_eq!(opts.limits.max_nest_depth, 30);
1619 assert_eq!(opts.limits.max_block_string_size, 1024 * 1024);
1620 assert_eq!(opts.limits.max_object_keys, 1000);
1621 assert_eq!(opts.limits.max_total_keys, 1_000_000);
1622 }
1623
1624 #[test]
1627 fn test_builder_override_previous_value() {
1628 let opts = ParseOptions::builder().max_depth(50).max_depth(100).build();
1629
1630 assert_eq!(opts.limits.max_indent_depth, 100);
1631 }
1632
1633 #[test]
1634 fn test_builder_override_multiple_times() {
1635 let opts = ParseOptions::builder()
1636 .max_array_length(1000)
1637 .max_array_length(2000)
1638 .max_array_length(3000)
1639 .build();
1640
1641 assert_eq!(opts.limits.max_nodes, 3000);
1642 }
1643
1644 #[test]
1647 fn test_builder_default_keeps_other_defaults() {
1648 let opts = ParseOptions::builder().max_depth(100).build();
1649
1650 assert_eq!(opts.limits.max_indent_depth, 100);
1651 assert_eq!(opts.limits.max_file_size, 1024 * 1024 * 1024);
1653 assert_eq!(opts.limits.max_line_length, 1024 * 1024);
1654 assert_eq!(opts.limits.max_nodes, 10_000_000);
1655 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1656 }
1657
1658 #[test]
1661 fn test_builder_zero_values() {
1662 let opts = ParseOptions::builder()
1663 .max_depth(0)
1664 .max_array_length(0)
1665 .max_aliases(0)
1666 .build();
1667
1668 assert_eq!(opts.limits.max_indent_depth, 0);
1669 assert_eq!(opts.limits.max_nodes, 0);
1670 assert_eq!(opts.limits.max_aliases, 0);
1671 }
1672
1673 #[test]
1674 fn test_builder_max_values() {
1675 let opts = ParseOptions::builder()
1676 .max_depth(usize::MAX)
1677 .max_array_length(usize::MAX)
1678 .max_file_size(usize::MAX)
1679 .build();
1680
1681 assert_eq!(opts.limits.max_indent_depth, usize::MAX);
1682 assert_eq!(opts.limits.max_nodes, usize::MAX);
1683 assert_eq!(opts.limits.max_file_size, usize::MAX);
1684 }
1685
1686 #[test]
1689 fn test_builder_build_equivalent_to_default() {
1690 let builder_opts = ParseOptions::builder().build();
1691 let default_opts = ParseOptions::default();
1692
1693 assert_eq!(builder_opts.reference_mode, default_opts.reference_mode);
1694 assert_eq!(
1695 builder_opts.limits.max_indent_depth,
1696 default_opts.limits.max_indent_depth
1697 );
1698 assert_eq!(builder_opts.limits.max_nodes, default_opts.limits.max_nodes);
1699 assert_eq!(
1700 builder_opts.limits.max_file_size,
1701 default_opts.limits.max_file_size
1702 );
1703 }
1704
1705 #[test]
1706 fn test_builder_clone_independent() {
1707 let builder1 = ParseOptions::builder().max_depth(100);
1708 let builder2 = builder1.clone().max_depth(200);
1709
1710 let opts1 = builder1.build();
1711 let opts2 = builder2.build();
1712
1713 assert_eq!(opts1.limits.max_indent_depth, 100);
1714 assert_eq!(opts2.limits.max_indent_depth, 200);
1715 }
1716
1717 #[test]
1720 fn test_builder_typical_usage_pattern() {
1721 let opts = ParseOptions::builder().max_depth(100).strict(true).build();
1723
1724 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1725 assert_eq!(opts.limits.max_indent_depth, 100);
1726 }
1727
1728 #[test]
1729 fn test_builder_lenient_parsing_pattern() {
1730 let opts = ParseOptions::builder()
1732 .max_array_length(50_000)
1733 .strict(false)
1734 .max_block_string_size(50 * 1024 * 1024)
1735 .build();
1736
1737 assert_eq!(opts.reference_mode, ReferenceMode::Lenient);
1738 assert_eq!(opts.limits.max_nodes, 50_000);
1739 assert_eq!(opts.limits.max_block_string_size, 50 * 1024 * 1024);
1740 }
1741
1742 #[test]
1743 fn test_builder_restricted_parsing_pattern() {
1744 let opts = ParseOptions::builder()
1746 .max_file_size(10 * 1024 * 1024)
1747 .max_line_length(64 * 1024)
1748 .max_depth(20)
1749 .max_array_length(1000)
1750 .strict(true)
1751 .build();
1752
1753 assert_eq!(opts.limits.max_file_size, 10 * 1024 * 1024);
1754 assert_eq!(opts.limits.max_line_length, 64 * 1024);
1755 assert_eq!(opts.limits.max_indent_depth, 20);
1756 assert_eq!(opts.limits.max_nodes, 1000);
1757 assert_eq!(opts.reference_mode, ReferenceMode::Strict);
1758 }
1759
1760 #[test]
1763 fn test_parse_with_generous_timeout_succeeds() {
1764 let doc = b"%VERSION: 1.0\n---\nkey: value\n";
1765 let mut opts = ParseOptions::default();
1766 opts.limits.timeout = Some(std::time::Duration::from_secs(10));
1767 let result = parse_with_limits(doc, opts);
1768 assert!(result.is_ok());
1769 }
1770
1771 #[test]
1772 fn test_parse_with_no_timeout_succeeds() {
1773 let doc = b"%VERSION: 1.0\n---\nkey: value\n";
1774 let mut opts = ParseOptions::default();
1775 opts.limits.timeout = None;
1776 let result = parse_with_limits(doc, opts);
1777 assert!(result.is_ok());
1778 }
1779
1780 #[test]
1781 fn test_parse_with_very_short_timeout_fails() {
1782 let mut doc = String::from("%VERSION: 1.0\n---\ndata:\n");
1784 for i in 0..100_000 {
1785 doc.push_str(&format!(" key{}: value{}\n", i, i));
1786 }
1787
1788 let mut opts = ParseOptions::default();
1789 opts.limits.timeout = Some(std::time::Duration::from_micros(1));
1791
1792 let result = parse_with_limits(doc.as_bytes(), opts);
1793 assert!(result.is_err());
1794
1795 if let Err(e) = result {
1796 let msg = e.to_string();
1797 assert!(msg.contains("timeout") || msg.contains("Timeout"));
1798 }
1799 }
1800
1801 #[test]
1802 fn test_default_timeout_is_reasonable() {
1803 let opts = ParseOptions::default();
1804 assert_eq!(
1805 opts.limits.timeout,
1806 Some(std::time::Duration::from_secs(30))
1807 );
1808 }
1809
1810 #[test]
1811 fn test_unlimited_has_no_timeout() {
1812 let limits = Limits::unlimited();
1813 assert_eq!(limits.timeout, None);
1814 }
1815}