1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56pub const QUILL_TAG: &str = "quill";
58
59#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62 fields: HashMap<String, QuillValue>,
63 quill_tag: String,
64}
65
66impl ParsedDocument {
67 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69 Self {
70 fields,
71 quill_tag: "__default__".to_string(),
72 }
73 }
74
75 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77 Self { fields, quill_tag }
78 }
79
80 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82 decompose(markdown).map_err(crate::error::ParseError::from)
83 }
84
85 pub fn quill_tag(&self) -> &str {
87 &self.quill_tag
88 }
89
90 pub fn body(&self) -> Option<&str> {
92 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93 }
94
95 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97 self.fields.get(name)
98 }
99
100 pub fn fields(&self) -> &HashMap<String, QuillValue> {
102 &self.fields
103 }
104
105 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119 let mut fields = self.fields.clone();
120
121 for (field_name, default_value) in defaults {
122 if !fields.contains_key(field_name) {
124 fields.insert(field_name.clone(), default_value.clone());
125 }
126 }
127
128 Self {
129 fields,
130 quill_tag: self.quill_tag.clone(),
131 }
132 }
133
134 pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152 use crate::schema::coerce_document;
153
154 let coerced_fields = coerce_document(schema, &self.fields);
155
156 Self {
157 fields: coerced_fields,
158 quill_tag: self.quill_tag.clone(),
159 }
160 }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165 start: usize, end: usize, yaml_value: Option<serde_yaml::Value>, tag: Option<String>, quill_name: Option<String>, }
171
172fn is_valid_tag_name(name: &str) -> bool {
174 if name.is_empty() {
175 return false;
176 }
177
178 let mut chars = name.chars();
179 let first = chars.next().unwrap();
180
181 if !first.is_ascii_lowercase() && first != '_' {
182 return false;
183 }
184
185 for ch in chars {
186 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187 return false;
188 }
189 }
190
191 true
192}
193
194fn find_metadata_blocks(
196 markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198 let mut blocks = Vec::new();
199 let mut pos = 0;
200
201 while pos < markdown.len() {
202 let search_str = &markdown[pos..];
204 let delimiter_result = search_str
205 .find("---\n")
206 .map(|p| (p, 4, "\n"))
207 .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210 let abs_pos = pos + delimiter_pos;
211
212 let is_start_of_line = if abs_pos == 0 {
214 true
215 } else {
216 let char_before = markdown.as_bytes()[abs_pos - 1];
217 char_before == b'\n' || char_before == b'\r'
218 };
219
220 if !is_start_of_line {
221 pos = abs_pos + 1;
222 continue;
223 }
224
225 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
229 let before = &markdown[..abs_pos];
231 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232 } else {
233 false
234 };
235
236 let followed_by_blank = if content_start < markdown.len() {
237 markdown[content_start..].starts_with('\n')
238 || markdown[content_start..].starts_with("\r\n")
239 } else {
240 false
241 };
242
243 if preceded_by_blank && followed_by_blank {
245 pos = abs_pos + 3; continue;
248 }
249
250 if followed_by_blank {
253 pos = abs_pos + 3;
256 continue;
257 }
258
259 let rest = &markdown[content_start..];
262
263 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265 let closing_with_newline = closing_patterns
266 .iter()
267 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268 .min_by_key(|(p, _)| *p);
269
270 let closing_at_eof = ["\n---", "\r\n---"]
272 .iter()
273 .filter_map(|delim| {
274 rest.find(delim).and_then(|p| {
275 if p + delim.len() == rest.len() {
276 Some((p, delim.len()))
277 } else {
278 None
279 }
280 })
281 })
282 .min_by_key(|(p, _)| *p);
283
284 let closing_result = match (closing_with_newline, closing_at_eof) {
285 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286 (Some(_), Some(_)) => closing_with_newline,
287 (Some(_), None) => closing_with_newline,
288 (None, Some(_)) => closing_at_eof,
289 (None, None) => None,
290 };
291
292 if let Some((closing_pos, closing_len)) = closing_result {
293 let abs_closing_pos = content_start + closing_pos;
294 let content = &markdown[content_start..abs_closing_pos];
295
296 if content.len() > crate::error::MAX_YAML_SIZE {
298 return Err(format!(
299 "YAML block too large: {} bytes (max: {} bytes)",
300 content.len(),
301 crate::error::MAX_YAML_SIZE
302 )
303 .into());
304 }
305
306 let (tag, quill_name, yaml_value) = if !content.is_empty() {
309 match serde_yaml::from_str::<serde_yaml::Value>(content) {
311 Ok(parsed_yaml) => {
312 if let Some(mapping) = parsed_yaml.as_mapping() {
313 let quill_key = serde_yaml::Value::String("QUILL".to_string());
314 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
315
316 let has_quill = mapping.contains_key(&quill_key);
317 let has_scope = mapping.contains_key(&scope_key);
318
319 if has_quill && has_scope {
320 return Err(
321 "Cannot specify both QUILL and SCOPE in the same block"
322 .into(),
323 );
324 }
325
326 if has_quill {
327 let quill_value = mapping.get(&quill_key).unwrap();
329 let quill_name_str = quill_value
330 .as_str()
331 .ok_or("QUILL value must be a string")?;
332
333 if !is_valid_tag_name(quill_name_str) {
334 return Err(format!(
335 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336 quill_name_str
337 )
338 .into());
339 }
340
341 let mut new_mapping = mapping.clone();
343 new_mapping.remove(&quill_key);
344 let new_value = if new_mapping.is_empty() {
345 None
346 } else {
347 Some(serde_yaml::Value::Mapping(new_mapping))
348 };
349
350 (None, Some(quill_name_str.to_string()), new_value)
351 } else if has_scope {
352 let scope_value = mapping.get(&scope_key).unwrap();
354 let field_name = scope_value
355 .as_str()
356 .ok_or("SCOPE value must be a string")?;
357
358 if !is_valid_tag_name(field_name) {
359 return Err(format!(
360 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
361 field_name
362 )
363 .into());
364 }
365
366 if field_name == BODY_FIELD {
367 return Err(format!(
368 "Cannot use reserved field name '{}' as SCOPE value",
369 BODY_FIELD
370 )
371 .into());
372 }
373
374 let mut new_mapping = mapping.clone();
376 new_mapping.remove(&scope_key);
377 let new_value = if new_mapping.is_empty() {
378 None
379 } else {
380 Some(serde_yaml::Value::Mapping(new_mapping))
381 };
382
383 (Some(field_name.to_string()), None, new_value)
384 } else {
385 (None, None, Some(parsed_yaml))
387 }
388 } else {
389 (None, None, Some(parsed_yaml))
391 }
392 }
393 Err(e) => {
394 return Err(format!("Invalid YAML frontmatter: {}", e).into());
396 }
397 }
398 } else {
399 (None, None, None)
401 };
402
403 blocks.push(MetadataBlock {
404 start: abs_pos,
405 end: abs_closing_pos + closing_len, yaml_value,
407 tag,
408 quill_name,
409 });
410
411 pos = abs_closing_pos + closing_len;
412 } else if abs_pos == 0 {
413 return Err("Frontmatter started but not closed with ---".into());
415 } else {
416 pos = abs_pos + 3;
418 }
419 } else {
420 break;
421 }
422 }
423
424 Ok(blocks)
425}
426
427fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
429 if markdown.len() > crate::error::MAX_INPUT_SIZE {
431 return Err(format!(
432 "Input too large: {} bytes (max: {} bytes)",
433 markdown.len(),
434 crate::error::MAX_INPUT_SIZE
435 )
436 .into());
437 }
438
439 let mut fields = HashMap::new();
440
441 let blocks = find_metadata_blocks(markdown)?;
443
444 if blocks.is_empty() {
445 fields.insert(
447 BODY_FIELD.to_string(),
448 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
449 );
450 return Ok(ParsedDocument::new(fields));
451 }
452
453 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
455 let mut has_global_frontmatter = false;
456 let mut global_frontmatter_index: Option<usize> = None;
457 let mut quill_name: Option<String> = None;
458
459 for (idx, block) in blocks.iter().enumerate() {
461 if let Some(ref name) = block.quill_name {
463 if quill_name.is_some() {
464 return Err("Multiple quill directives found: only one allowed".into());
465 }
466 quill_name = Some(name.clone());
467 }
468
469 if block.tag.is_none() && block.quill_name.is_none() {
471 if has_global_frontmatter {
472 return Err(
473 "Multiple global frontmatter blocks found: only one untagged block allowed"
474 .into(),
475 );
476 }
477 has_global_frontmatter = true;
478 global_frontmatter_index = Some(idx);
479 }
480 }
481
482 if let Some(idx) = global_frontmatter_index {
484 let block = &blocks[idx];
485
486 let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
488 Some(serde_yaml::Value::Mapping(mapping)) => mapping
489 .iter()
490 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
491 .collect(),
492 Some(serde_yaml::Value::Null) => {
493 HashMap::new()
495 }
496 Some(_) => {
497 return Err("Invalid YAML frontmatter: expected a mapping".into());
499 }
500 None => HashMap::new(),
501 };
502
503 for other_block in &blocks {
506 if let Some(ref tag) = other_block.tag {
507 if let Some(global_value) = yaml_fields.get(tag) {
508 if global_value.as_sequence().is_none() {
510 return Err(format!(
511 "Name collision: global field '{}' conflicts with tagged attribute",
512 tag
513 )
514 .into());
515 }
516 }
517 }
518 }
519
520 for (key, value) in yaml_fields {
522 fields.insert(key, QuillValue::from_yaml(value)?);
523 }
524 }
525
526 for block in &blocks {
528 if block.quill_name.is_some() {
529 if let Some(ref yaml_val) = block.yaml_value {
531 let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
532 serde_yaml::Value::Mapping(mapping) => mapping
533 .iter()
534 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
535 .collect(),
536 serde_yaml::Value::Null => {
537 HashMap::new()
539 }
540 _ => {
541 return Err("Invalid YAML in quill block: expected a mapping".into());
542 }
543 };
544
545 for key in yaml_fields.keys() {
547 if fields.contains_key(key) {
548 return Err(format!(
549 "Name collision: quill block field '{}' conflicts with existing field",
550 key
551 )
552 .into());
553 }
554 }
555
556 for (key, value) in yaml_fields {
558 fields.insert(key, QuillValue::from_yaml(value)?);
559 }
560 }
561 }
562 }
563
564 for (idx, block) in blocks.iter().enumerate() {
566 if let Some(ref tag_name) = block.tag {
567 if let Some(existing_value) = fields.get(tag_name) {
570 if existing_value.as_array().is_none() {
571 return Err(format!(
572 "Name collision: tagged attribute '{}' conflicts with global field",
573 tag_name
574 )
575 .into());
576 }
577 }
578
579 let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
581 Some(serde_yaml::Value::Mapping(mapping)) => mapping
582 .iter()
583 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
584 .collect(),
585 Some(serde_yaml::Value::Null) => {
586 HashMap::new()
588 }
589 Some(_) => {
590 return Err(format!(
591 "Invalid YAML in tagged block '{}': expected a mapping",
592 tag_name
593 )
594 .into());
595 }
596 None => HashMap::new(),
597 };
598
599 let body_start = block.end;
601 let body_end = if idx + 1 < blocks.len() {
602 blocks[idx + 1].start
603 } else {
604 markdown.len()
605 };
606 let body = &markdown[body_start..body_end];
607
608 item_fields.insert(
610 BODY_FIELD.to_string(),
611 serde_yaml::Value::String(body.to_string()),
612 );
613
614 let item_value = serde_yaml::to_value(item_fields)?;
616
617 tagged_attributes
619 .entry(tag_name.clone())
620 .or_default()
621 .push(item_value);
622 }
623 }
624
625 let first_non_scope_block_idx = blocks
629 .iter()
630 .position(|b| b.tag.is_none() && b.quill_name.is_none())
631 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
632
633 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
634 let start = blocks[idx].end;
636
637 let end = blocks
639 .iter()
640 .skip(idx + 1)
641 .find(|b| b.tag.is_some())
642 .map(|b| b.start)
643 .unwrap_or(markdown.len());
644
645 (start, end)
646 } else {
647 let end = blocks
649 .iter()
650 .find(|b| b.tag.is_some())
651 .map(|b| b.start)
652 .unwrap_or(0);
653
654 (0, end)
655 };
656
657 let global_body = &markdown[body_start..body_end];
658
659 fields.insert(
660 BODY_FIELD.to_string(),
661 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
662 );
663
664 for (tag_name, items) in tagged_attributes {
667 if let Some(existing_value) = fields.get(&tag_name) {
668 if let Some(existing_array) = existing_value.as_array() {
670 let new_items_json: Vec<serde_json::Value> = items
672 .into_iter()
673 .map(|yaml_val| {
674 serde_json::to_value(&yaml_val)
675 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
676 })
677 .collect::<Result<Vec<_>, _>>()?;
678
679 let mut merged_array = existing_array.clone();
681 merged_array.extend(new_items_json);
682
683 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
685 fields.insert(tag_name, quill_value);
686 } else {
687 return Err(format!(
689 "Internal error: field '{}' exists but is not an array",
690 tag_name
691 )
692 .into());
693 }
694 } else {
695 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
698 fields.insert(tag_name, quill_value);
699 }
700 }
701
702 let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
703 let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
704
705 Ok(parsed)
706}
707
708#[cfg(test)]
709mod tests {
710 use super::*;
711
712 #[test]
713 fn test_no_frontmatter() {
714 let markdown = "# Hello World\n\nThis is a test.";
715 let doc = decompose(markdown).unwrap();
716
717 assert_eq!(doc.body(), Some(markdown));
718 assert_eq!(doc.fields().len(), 1);
719 assert_eq!(doc.quill_tag(), "__default__");
721 }
722
723 #[test]
724 fn test_with_frontmatter() {
725 let markdown = r#"---
726title: Test Document
727author: Test Author
728---
729
730# Hello World
731
732This is the body."#;
733
734 let doc = decompose(markdown).unwrap();
735
736 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
737 assert_eq!(
738 doc.get_field("title").unwrap().as_str().unwrap(),
739 "Test Document"
740 );
741 assert_eq!(
742 doc.get_field("author").unwrap().as_str().unwrap(),
743 "Test Author"
744 );
745 assert_eq!(doc.fields().len(), 3); assert_eq!(doc.quill_tag(), "__default__");
748 }
749
750 #[test]
751 fn test_complex_yaml_frontmatter() {
752 let markdown = r#"---
753title: Complex Document
754tags:
755 - test
756 - yaml
757metadata:
758 version: 1.0
759 nested:
760 field: value
761---
762
763Content here."#;
764
765 let doc = decompose(markdown).unwrap();
766
767 assert_eq!(doc.body(), Some("\nContent here."));
768 assert_eq!(
769 doc.get_field("title").unwrap().as_str().unwrap(),
770 "Complex Document"
771 );
772
773 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
774 assert_eq!(tags.len(), 2);
775 assert_eq!(tags[0].as_str().unwrap(), "test");
776 assert_eq!(tags[1].as_str().unwrap(), "yaml");
777 }
778
779 #[test]
780 fn test_with_defaults_empty_document() {
781 use std::collections::HashMap;
782
783 let mut defaults = HashMap::new();
784 defaults.insert(
785 "status".to_string(),
786 QuillValue::from_json(serde_json::json!("draft")),
787 );
788 defaults.insert(
789 "version".to_string(),
790 QuillValue::from_json(serde_json::json!(1)),
791 );
792
793 let doc = ParsedDocument::new(HashMap::new());
795 let doc_with_defaults = doc.with_defaults(&defaults);
796
797 assert_eq!(
799 doc_with_defaults
800 .get_field("status")
801 .unwrap()
802 .as_str()
803 .unwrap(),
804 "draft"
805 );
806 assert_eq!(
807 doc_with_defaults
808 .get_field("version")
809 .unwrap()
810 .as_number()
811 .unwrap()
812 .as_i64()
813 .unwrap(),
814 1
815 );
816 }
817
818 #[test]
819 fn test_with_defaults_preserves_existing_values() {
820 use std::collections::HashMap;
821
822 let mut defaults = HashMap::new();
823 defaults.insert(
824 "status".to_string(),
825 QuillValue::from_json(serde_json::json!("draft")),
826 );
827
828 let mut fields = HashMap::new();
830 fields.insert(
831 "status".to_string(),
832 QuillValue::from_json(serde_json::json!("published")),
833 );
834 let doc = ParsedDocument::new(fields);
835
836 let doc_with_defaults = doc.with_defaults(&defaults);
837
838 assert_eq!(
840 doc_with_defaults
841 .get_field("status")
842 .unwrap()
843 .as_str()
844 .unwrap(),
845 "published"
846 );
847 }
848
849 #[test]
850 fn test_with_defaults_partial_application() {
851 use std::collections::HashMap;
852
853 let mut defaults = HashMap::new();
854 defaults.insert(
855 "status".to_string(),
856 QuillValue::from_json(serde_json::json!("draft")),
857 );
858 defaults.insert(
859 "version".to_string(),
860 QuillValue::from_json(serde_json::json!(1)),
861 );
862
863 let mut fields = HashMap::new();
865 fields.insert(
866 "status".to_string(),
867 QuillValue::from_json(serde_json::json!("published")),
868 );
869 let doc = ParsedDocument::new(fields);
870
871 let doc_with_defaults = doc.with_defaults(&defaults);
872
873 assert_eq!(
875 doc_with_defaults
876 .get_field("status")
877 .unwrap()
878 .as_str()
879 .unwrap(),
880 "published"
881 );
882 assert_eq!(
883 doc_with_defaults
884 .get_field("version")
885 .unwrap()
886 .as_number()
887 .unwrap()
888 .as_i64()
889 .unwrap(),
890 1
891 );
892 }
893
894 #[test]
895 fn test_with_defaults_no_defaults() {
896 use std::collections::HashMap;
897
898 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
901 let doc_with_defaults = doc.with_defaults(&defaults);
902
903 assert!(doc_with_defaults.fields().is_empty());
905 }
906
907 #[test]
908 fn test_with_defaults_complex_types() {
909 use std::collections::HashMap;
910
911 let mut defaults = HashMap::new();
912 defaults.insert(
913 "tags".to_string(),
914 QuillValue::from_json(serde_json::json!(["default", "tag"])),
915 );
916
917 let doc = ParsedDocument::new(HashMap::new());
918 let doc_with_defaults = doc.with_defaults(&defaults);
919
920 let tags = doc_with_defaults
922 .get_field("tags")
923 .unwrap()
924 .as_sequence()
925 .unwrap();
926 assert_eq!(tags.len(), 2);
927 assert_eq!(tags[0].as_str().unwrap(), "default");
928 assert_eq!(tags[1].as_str().unwrap(), "tag");
929 }
930
931 #[test]
932 fn test_with_coercion_singular_to_array() {
933 use std::collections::HashMap;
934
935 let schema = QuillValue::from_json(serde_json::json!({
936 "$schema": "https://json-schema.org/draft/2019-09/schema",
937 "type": "object",
938 "properties": {
939 "tags": {"type": "array"}
940 }
941 }));
942
943 let mut fields = HashMap::new();
944 fields.insert(
945 "tags".to_string(),
946 QuillValue::from_json(serde_json::json!("single-tag")),
947 );
948 let doc = ParsedDocument::new(fields);
949
950 let coerced_doc = doc.with_coercion(&schema);
951
952 let tags = coerced_doc.get_field("tags").unwrap();
953 assert!(tags.as_array().is_some());
954 let tags_array = tags.as_array().unwrap();
955 assert_eq!(tags_array.len(), 1);
956 assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
957 }
958
959 #[test]
960 fn test_with_coercion_string_to_boolean() {
961 use std::collections::HashMap;
962
963 let schema = QuillValue::from_json(serde_json::json!({
964 "$schema": "https://json-schema.org/draft/2019-09/schema",
965 "type": "object",
966 "properties": {
967 "active": {"type": "boolean"}
968 }
969 }));
970
971 let mut fields = HashMap::new();
972 fields.insert(
973 "active".to_string(),
974 QuillValue::from_json(serde_json::json!("true")),
975 );
976 let doc = ParsedDocument::new(fields);
977
978 let coerced_doc = doc.with_coercion(&schema);
979
980 assert_eq!(
981 coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
982 true
983 );
984 }
985
986 #[test]
987 fn test_with_coercion_string_to_number() {
988 use std::collections::HashMap;
989
990 let schema = QuillValue::from_json(serde_json::json!({
991 "$schema": "https://json-schema.org/draft/2019-09/schema",
992 "type": "object",
993 "properties": {
994 "count": {"type": "number"}
995 }
996 }));
997
998 let mut fields = HashMap::new();
999 fields.insert(
1000 "count".to_string(),
1001 QuillValue::from_json(serde_json::json!("42")),
1002 );
1003 let doc = ParsedDocument::new(fields);
1004
1005 let coerced_doc = doc.with_coercion(&schema);
1006
1007 assert_eq!(
1008 coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1009 42
1010 );
1011 }
1012
1013 #[test]
1014 fn test_invalid_yaml() {
1015 let markdown = r#"---
1016title: [invalid yaml
1017author: missing close bracket
1018---
1019
1020Content here."#;
1021
1022 let result = decompose(markdown);
1023 assert!(result.is_err());
1024 assert!(result
1025 .unwrap_err()
1026 .to_string()
1027 .contains("Invalid YAML frontmatter"));
1028 }
1029
1030 #[test]
1031 fn test_unclosed_frontmatter() {
1032 let markdown = r#"---
1033title: Test
1034author: Test Author
1035
1036Content without closing ---"#;
1037
1038 let result = decompose(markdown);
1039 assert!(result.is_err());
1040 assert!(result.unwrap_err().to_string().contains("not closed"));
1041 }
1042
1043 #[test]
1046 fn test_basic_tagged_block() {
1047 let markdown = r#"---
1048title: Main Document
1049---
1050
1051Main body content.
1052
1053---
1054SCOPE: items
1055name: Item 1
1056---
1057
1058Body of item 1."#;
1059
1060 let doc = decompose(markdown).unwrap();
1061
1062 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1063 assert_eq!(
1064 doc.get_field("title").unwrap().as_str().unwrap(),
1065 "Main Document"
1066 );
1067
1068 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1069 assert_eq!(items.len(), 1);
1070
1071 let item = items[0].as_object().unwrap();
1072 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1073 assert_eq!(
1074 item.get("body").unwrap().as_str().unwrap(),
1075 "\nBody of item 1."
1076 );
1077 }
1078
1079 #[test]
1080 fn test_multiple_tagged_blocks() {
1081 let markdown = r#"---
1082SCOPE: items
1083name: Item 1
1084tags: [a, b]
1085---
1086
1087First item body.
1088
1089---
1090SCOPE: items
1091name: Item 2
1092tags: [c, d]
1093---
1094
1095Second item body."#;
1096
1097 let doc = decompose(markdown).unwrap();
1098
1099 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1100 assert_eq!(items.len(), 2);
1101
1102 let item1 = items[0].as_object().unwrap();
1103 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1104
1105 let item2 = items[1].as_object().unwrap();
1106 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1107 }
1108
1109 #[test]
1110 fn test_mixed_global_and_tagged() {
1111 let markdown = r#"---
1112title: Global
1113author: John Doe
1114---
1115
1116Global body.
1117
1118---
1119SCOPE: sections
1120title: Section 1
1121---
1122
1123Section 1 content.
1124
1125---
1126SCOPE: sections
1127title: Section 2
1128---
1129
1130Section 2 content."#;
1131
1132 let doc = decompose(markdown).unwrap();
1133
1134 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1135 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1136
1137 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1138 assert_eq!(sections.len(), 2);
1139 }
1140
1141 #[test]
1142 fn test_empty_tagged_metadata() {
1143 let markdown = r#"---
1144SCOPE: items
1145---
1146
1147Body without metadata."#;
1148
1149 let doc = decompose(markdown).unwrap();
1150
1151 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1152 assert_eq!(items.len(), 1);
1153
1154 let item = items[0].as_object().unwrap();
1155 assert_eq!(
1156 item.get("body").unwrap().as_str().unwrap(),
1157 "\nBody without metadata."
1158 );
1159 }
1160
1161 #[test]
1162 fn test_tagged_block_without_body() {
1163 let markdown = r#"---
1164SCOPE: items
1165name: Item
1166---"#;
1167
1168 let doc = decompose(markdown).unwrap();
1169
1170 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1171 assert_eq!(items.len(), 1);
1172
1173 let item = items[0].as_object().unwrap();
1174 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1175 }
1176
1177 #[test]
1178 fn test_name_collision_global_and_tagged() {
1179 let markdown = r#"---
1180items: "global value"
1181---
1182
1183Body
1184
1185---
1186SCOPE: items
1187name: Item
1188---
1189
1190Item body"#;
1191
1192 let result = decompose(markdown);
1193 assert!(result.is_err());
1194 assert!(result.unwrap_err().to_string().contains("collision"));
1195 }
1196
1197 #[test]
1198 fn test_global_array_merged_with_scope() {
1199 let markdown = r#"---
1202items:
1203 - name: Global Item 1
1204 value: 100
1205 - name: Global Item 2
1206 value: 200
1207---
1208
1209Global body
1210
1211---
1212SCOPE: items
1213name: Scope Item 1
1214value: 300
1215---
1216
1217Scope item 1 body
1218
1219---
1220SCOPE: items
1221name: Scope Item 2
1222value: 400
1223---
1224
1225Scope item 2 body"#;
1226
1227 let doc = decompose(markdown).unwrap();
1228
1229 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1231 assert_eq!(items.len(), 4);
1232
1233 let item1 = items[0].as_object().unwrap();
1235 assert_eq!(
1236 item1.get("name").unwrap().as_str().unwrap(),
1237 "Global Item 1"
1238 );
1239 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1240
1241 let item2 = items[1].as_object().unwrap();
1242 assert_eq!(
1243 item2.get("name").unwrap().as_str().unwrap(),
1244 "Global Item 2"
1245 );
1246 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1247
1248 let item3 = items[2].as_object().unwrap();
1250 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1251 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1252 assert_eq!(
1253 item3.get("body").unwrap().as_str().unwrap(),
1254 "\nScope item 1 body\n\n"
1255 );
1256
1257 let item4 = items[3].as_object().unwrap();
1258 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1259 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1260 assert_eq!(
1261 item4.get("body").unwrap().as_str().unwrap(),
1262 "\nScope item 2 body"
1263 );
1264 }
1265
1266 #[test]
1267 fn test_empty_global_array_with_scope() {
1268 let markdown = r#"---
1270items: []
1271---
1272
1273Global body
1274
1275---
1276SCOPE: items
1277name: Item 1
1278---
1279
1280Item 1 body"#;
1281
1282 let doc = decompose(markdown).unwrap();
1283
1284 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1285 assert_eq!(items.len(), 1);
1286
1287 let item = items[0].as_object().unwrap();
1288 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1289 }
1290
1291 #[test]
1292 fn test_reserved_field_name() {
1293 let markdown = r#"---
1294SCOPE: body
1295content: Test
1296---"#;
1297
1298 let result = decompose(markdown);
1299 assert!(result.is_err());
1300 assert!(result.unwrap_err().to_string().contains("reserved"));
1301 }
1302
1303 #[test]
1304 fn test_invalid_tag_syntax() {
1305 let markdown = r#"---
1306SCOPE: Invalid-Name
1307title: Test
1308---"#;
1309
1310 let result = decompose(markdown);
1311 assert!(result.is_err());
1312 assert!(result
1313 .unwrap_err()
1314 .to_string()
1315 .contains("Invalid field name"));
1316 }
1317
1318 #[test]
1319 fn test_multiple_global_frontmatter_blocks() {
1320 let markdown = r#"---
1321title: First
1322---
1323
1324Body
1325
1326---
1327author: Second
1328---
1329
1330More body"#;
1331
1332 let result = decompose(markdown);
1333 assert!(result.is_err());
1334 assert!(result
1335 .unwrap_err()
1336 .to_string()
1337 .contains("Multiple global frontmatter"));
1338 }
1339
1340 #[test]
1341 fn test_adjacent_blocks_different_tags() {
1342 let markdown = r#"---
1343SCOPE: items
1344name: Item 1
1345---
1346
1347Item 1 body
1348
1349---
1350SCOPE: sections
1351title: Section 1
1352---
1353
1354Section 1 body"#;
1355
1356 let doc = decompose(markdown).unwrap();
1357
1358 assert!(doc.get_field("items").is_some());
1359 assert!(doc.get_field("sections").is_some());
1360
1361 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1362 assert_eq!(items.len(), 1);
1363
1364 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1365 assert_eq!(sections.len(), 1);
1366 }
1367
1368 #[test]
1369 fn test_order_preservation() {
1370 let markdown = r#"---
1371SCOPE: items
1372id: 1
1373---
1374
1375First
1376
1377---
1378SCOPE: items
1379id: 2
1380---
1381
1382Second
1383
1384---
1385SCOPE: items
1386id: 3
1387---
1388
1389Third"#;
1390
1391 let doc = decompose(markdown).unwrap();
1392
1393 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1394 assert_eq!(items.len(), 3);
1395
1396 for (i, item) in items.iter().enumerate() {
1397 let mapping = item.as_object().unwrap();
1398 let id = mapping.get("id").unwrap().as_i64().unwrap();
1399 assert_eq!(id, (i + 1) as i64);
1400 }
1401 }
1402
1403 #[test]
1404 fn test_product_catalog_integration() {
1405 let markdown = r#"---
1406title: Product Catalog
1407author: John Doe
1408date: 2024-01-01
1409---
1410
1411This is the main catalog description.
1412
1413---
1414SCOPE: products
1415name: Widget A
1416price: 19.99
1417sku: WID-001
1418---
1419
1420The **Widget A** is our most popular product.
1421
1422---
1423SCOPE: products
1424name: Gadget B
1425price: 29.99
1426sku: GAD-002
1427---
1428
1429The **Gadget B** is perfect for professionals.
1430
1431---
1432SCOPE: reviews
1433product: Widget A
1434rating: 5
1435---
1436
1437"Excellent product! Highly recommended."
1438
1439---
1440SCOPE: reviews
1441product: Gadget B
1442rating: 4
1443---
1444
1445"Very good, but a bit pricey.""#;
1446
1447 let doc = decompose(markdown).unwrap();
1448
1449 assert_eq!(
1451 doc.get_field("title").unwrap().as_str().unwrap(),
1452 "Product Catalog"
1453 );
1454 assert_eq!(
1455 doc.get_field("author").unwrap().as_str().unwrap(),
1456 "John Doe"
1457 );
1458 assert_eq!(
1459 doc.get_field("date").unwrap().as_str().unwrap(),
1460 "2024-01-01"
1461 );
1462
1463 assert!(doc.body().unwrap().contains("main catalog description"));
1465
1466 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1468 assert_eq!(products.len(), 2);
1469
1470 let product1 = products[0].as_object().unwrap();
1471 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1472 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1473
1474 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1476 assert_eq!(reviews.len(), 2);
1477
1478 let review1 = reviews[0].as_object().unwrap();
1479 assert_eq!(
1480 review1.get("product").unwrap().as_str().unwrap(),
1481 "Widget A"
1482 );
1483 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1484
1485 assert_eq!(doc.fields().len(), 6);
1487 }
1488
1489 #[test]
1490 fn taro_quill_directive() {
1491 let markdown = r#"---
1492QUILL: usaf_memo
1493memo_for: [ORG/SYMBOL]
1494memo_from: [ORG/SYMBOL]
1495---
1496
1497This is the memo body."#;
1498
1499 let doc = decompose(markdown).unwrap();
1500
1501 assert_eq!(doc.quill_tag(), "usaf_memo");
1503
1504 assert_eq!(
1506 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1507 .as_str()
1508 .unwrap(),
1509 "ORG/SYMBOL"
1510 );
1511
1512 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1514 }
1515
1516 #[test]
1517 fn test_quill_with_scope_blocks() {
1518 let markdown = r#"---
1519QUILL: document
1520title: Test Document
1521---
1522
1523Main body.
1524
1525---
1526SCOPE: sections
1527name: Section 1
1528---
1529
1530Section 1 body."#;
1531
1532 let doc = decompose(markdown).unwrap();
1533
1534 assert_eq!(doc.quill_tag(), "document");
1536
1537 assert_eq!(
1539 doc.get_field("title").unwrap().as_str().unwrap(),
1540 "Test Document"
1541 );
1542
1543 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1545 assert_eq!(sections.len(), 1);
1546
1547 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1549 }
1550
1551 #[test]
1552 fn test_multiple_quill_directives_error() {
1553 let markdown = r#"---
1554QUILL: first
1555---
1556
1557---
1558QUILL: second
1559---"#;
1560
1561 let result = decompose(markdown);
1562 assert!(result.is_err());
1563 assert!(result
1564 .unwrap_err()
1565 .to_string()
1566 .contains("Multiple quill directives"));
1567 }
1568
1569 #[test]
1570 fn test_invalid_quill_name() {
1571 let markdown = r#"---
1572QUILL: Invalid-Name
1573---"#;
1574
1575 let result = decompose(markdown);
1576 assert!(result.is_err());
1577 assert!(result
1578 .unwrap_err()
1579 .to_string()
1580 .contains("Invalid quill name"));
1581 }
1582
1583 #[test]
1584 fn test_quill_wrong_value_type() {
1585 let markdown = r#"---
1586QUILL: 123
1587---"#;
1588
1589 let result = decompose(markdown);
1590 assert!(result.is_err());
1591 assert!(result
1592 .unwrap_err()
1593 .to_string()
1594 .contains("QUILL value must be a string"));
1595 }
1596
1597 #[test]
1598 fn test_scope_wrong_value_type() {
1599 let markdown = r#"---
1600SCOPE: 123
1601---"#;
1602
1603 let result = decompose(markdown);
1604 assert!(result.is_err());
1605 assert!(result
1606 .unwrap_err()
1607 .to_string()
1608 .contains("SCOPE value must be a string"));
1609 }
1610
1611 #[test]
1612 fn test_both_quill_and_scope_error() {
1613 let markdown = r#"---
1614QUILL: test
1615SCOPE: items
1616---"#;
1617
1618 let result = decompose(markdown);
1619 assert!(result.is_err());
1620 assert!(result
1621 .unwrap_err()
1622 .to_string()
1623 .contains("Cannot specify both QUILL and SCOPE"));
1624 }
1625
1626 #[test]
1627 fn test_blank_lines_in_frontmatter() {
1628 let markdown = r#"---
1630title: Test Document
1631author: Test Author
1632
1633description: This has a blank line above it
1634tags:
1635 - one
1636 - two
1637---
1638
1639# Hello World
1640
1641This is the body."#;
1642
1643 let doc = decompose(markdown).unwrap();
1644
1645 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1646 assert_eq!(
1647 doc.get_field("title").unwrap().as_str().unwrap(),
1648 "Test Document"
1649 );
1650 assert_eq!(
1651 doc.get_field("author").unwrap().as_str().unwrap(),
1652 "Test Author"
1653 );
1654 assert_eq!(
1655 doc.get_field("description").unwrap().as_str().unwrap(),
1656 "This has a blank line above it"
1657 );
1658
1659 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1660 assert_eq!(tags.len(), 2);
1661 }
1662
1663 #[test]
1664 fn test_blank_lines_in_scope_blocks() {
1665 let markdown = r#"---
1667SCOPE: items
1668name: Item 1
1669
1670price: 19.99
1671
1672tags:
1673 - electronics
1674 - gadgets
1675---
1676
1677Body of item 1."#;
1678
1679 let doc = decompose(markdown).unwrap();
1680
1681 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1682 assert_eq!(items.len(), 1);
1683
1684 let item = items[0].as_object().unwrap();
1685 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1686 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1687
1688 let tags = item.get("tags").unwrap().as_array().unwrap();
1689 assert_eq!(tags.len(), 2);
1690 }
1691
1692 #[test]
1693 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1694 let markdown = r#"---
1696title: Test
1697---
1698
1699First paragraph.
1700
1701---
1702
1703Second paragraph."#;
1704
1705 let doc = decompose(markdown).unwrap();
1706
1707 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1708
1709 let body = doc.body().unwrap();
1711 assert!(body.contains("First paragraph."));
1712 assert!(body.contains("---"));
1713 assert!(body.contains("Second paragraph."));
1714 }
1715
1716 #[test]
1717 fn test_horizontal_rule_not_preceded_by_blank() {
1718 let markdown = r#"---
1721title: Test
1722---
1723
1724First paragraph.
1725---
1726
1727Second paragraph."#;
1728
1729 let doc = decompose(markdown).unwrap();
1730
1731 let body = doc.body().unwrap();
1732 assert!(body.contains("---"));
1734 }
1735
1736 #[test]
1737 fn test_multiple_blank_lines_in_yaml() {
1738 let markdown = r#"---
1740title: Test
1741
1742
1743author: John Doe
1744
1745
1746version: 1.0
1747---
1748
1749Body content."#;
1750
1751 let doc = decompose(markdown).unwrap();
1752
1753 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1754 assert_eq!(
1755 doc.get_field("author").unwrap().as_str().unwrap(),
1756 "John Doe"
1757 );
1758 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1759 }
1760
1761 #[test]
1762 fn test_html_comment_interaction() {
1763 let markdown = r#"<!---
1764---> the rest of the page content
1765
1766---
1767key: value
1768---
1769"#;
1770 let doc = decompose(markdown).unwrap();
1771
1772 let key = doc.get_field("key").and_then(|v| v.as_str());
1775 assert_eq!(key, Some("value"));
1776 }
1777}
1778#[cfg(test)]
1779mod demo_file_test {
1780 use super::*;
1781
1782 #[test]
1783 fn test_extended_metadata_demo_file() {
1784 let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1785 let doc = decompose(markdown).unwrap();
1786
1787 assert_eq!(
1789 doc.get_field("title").unwrap().as_str().unwrap(),
1790 "Extended Metadata Demo"
1791 );
1792 assert_eq!(
1793 doc.get_field("author").unwrap().as_str().unwrap(),
1794 "Quillmark Team"
1795 );
1796 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1798
1799 assert!(doc
1801 .body()
1802 .unwrap()
1803 .contains("extended YAML metadata standard"));
1804
1805 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1807 assert_eq!(features.len(), 3);
1808
1809 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1811 assert_eq!(use_cases.len(), 2);
1812
1813 let feature1 = features[0].as_object().unwrap();
1815 assert_eq!(
1816 feature1.get("name").unwrap().as_str().unwrap(),
1817 "Tag Directives"
1818 );
1819 }
1820
1821 #[test]
1822 fn test_input_size_limit() {
1823 let size = crate::error::MAX_INPUT_SIZE + 1;
1825 let large_markdown = "a".repeat(size);
1826
1827 let result = decompose(&large_markdown);
1828 assert!(result.is_err());
1829
1830 let err_msg = result.unwrap_err().to_string();
1831 assert!(err_msg.contains("Input too large"));
1832 }
1833
1834 #[test]
1835 fn test_yaml_size_limit() {
1836 let mut markdown = String::from("---\n");
1838
1839 let size = crate::error::MAX_YAML_SIZE + 1;
1841 markdown.push_str("data: \"");
1842 markdown.push_str(&"x".repeat(size));
1843 markdown.push_str("\"\n---\n\nBody");
1844
1845 let result = decompose(&markdown);
1846 assert!(result.is_err());
1847
1848 let err_msg = result.unwrap_err().to_string();
1849 assert!(err_msg.contains("YAML block too large"));
1850 }
1851
1852 #[test]
1853 fn test_input_within_size_limit() {
1854 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1857
1858 let result = decompose(&markdown);
1859 assert!(result.is_ok());
1860 }
1861
1862 #[test]
1863 fn test_yaml_within_size_limit() {
1864 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1866
1867 let result = decompose(&markdown);
1868 assert!(result.is_ok());
1869 }
1870
1871 #[test]
1874 fn test_chevrons_preserved_in_body_no_frontmatter() {
1875 let markdown = "Use <<raw content>> here.";
1876 let doc = decompose(markdown).unwrap();
1877
1878 assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1880 }
1881
1882 #[test]
1883 fn test_chevrons_preserved_in_body_with_frontmatter() {
1884 let markdown = r#"---
1885title: Test
1886---
1887
1888Use <<raw content>> here."#;
1889 let doc = decompose(markdown).unwrap();
1890
1891 assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1893 }
1894
1895 #[test]
1896 fn test_chevrons_preserved_in_yaml_string() {
1897 let markdown = r#"---
1898title: Test <<with chevrons>>
1899---
1900
1901Body content."#;
1902 let doc = decompose(markdown).unwrap();
1903
1904 assert_eq!(
1906 doc.get_field("title").unwrap().as_str().unwrap(),
1907 "Test <<with chevrons>>"
1908 );
1909 }
1910
1911 #[test]
1912 fn test_chevrons_preserved_in_yaml_array() {
1913 let markdown = r#"---
1914items:
1915 - "<<first>>"
1916 - "<<second>>"
1917---
1918
1919Body."#;
1920 let doc = decompose(markdown).unwrap();
1921
1922 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1923 assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1924 assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1925 }
1926
1927 #[test]
1928 fn test_chevrons_preserved_in_yaml_nested() {
1929 let markdown = r#"---
1930metadata:
1931 description: "<<nested value>>"
1932---
1933
1934Body."#;
1935 let doc = decompose(markdown).unwrap();
1936
1937 let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1938 assert_eq!(
1939 metadata.get("description").unwrap().as_str().unwrap(),
1940 "<<nested value>>"
1941 );
1942 }
1943
1944 #[test]
1945 fn test_chevrons_preserved_in_code_blocks() {
1946 let markdown = r#"```
1947<<in code block>>
1948```
1949
1950<<outside code block>>"#;
1951 let doc = decompose(markdown).unwrap();
1952
1953 let body = doc.body().unwrap();
1954 assert!(body.contains("<<in code block>>"));
1956 assert!(body.contains("<<outside code block>>"));
1957 }
1958
1959 #[test]
1960 fn test_chevrons_preserved_in_inline_code() {
1961 let markdown = "`<<in inline code>>` and <<outside inline code>>";
1962 let doc = decompose(markdown).unwrap();
1963
1964 let body = doc.body().unwrap();
1965 assert!(body.contains("`<<in inline code>>`"));
1967 assert!(body.contains("<<outside inline code>>"));
1968 }
1969
1970 #[test]
1971 fn test_chevrons_preserved_in_tagged_block_body() {
1972 let markdown = r#"---
1973title: Main
1974---
1975
1976Main body.
1977
1978---
1979SCOPE: items
1980name: Item 1
1981---
1982
1983Use <<raw>> here."#;
1984 let doc = decompose(markdown).unwrap();
1985
1986 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1987 let item = items[0].as_object().unwrap();
1988 let item_body = item.get("body").unwrap().as_str().unwrap();
1989 assert!(item_body.contains("<<raw>>"));
1991 }
1992
1993 #[test]
1994 fn test_chevrons_preserved_in_tagged_block_yaml() {
1995 let markdown = r#"---
1996title: Main
1997---
1998
1999Main body.
2000
2001---
2002SCOPE: items
2003description: "<<tagged yaml>>"
2004---
2005
2006Item body."#;
2007 let doc = decompose(markdown).unwrap();
2008
2009 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2010 let item = items[0].as_object().unwrap();
2011 assert_eq!(
2013 item.get("description").unwrap().as_str().unwrap(),
2014 "<<tagged yaml>>"
2015 );
2016 }
2017
2018 #[test]
2019 fn test_yaml_numbers_not_affected() {
2020 let markdown = r#"---
2022count: 42
2023---
2024
2025Body."#;
2026 let doc = decompose(markdown).unwrap();
2027 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2028 }
2029
2030 #[test]
2031 fn test_yaml_booleans_not_affected() {
2032 let markdown = r#"---
2034active: true
2035---
2036
2037Body."#;
2038 let doc = decompose(markdown).unwrap();
2039 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2040 }
2041
2042 #[test]
2043 fn test_multiline_chevrons_preserved() {
2044 let markdown = "<<text\nacross lines>>";
2046 let doc = decompose(markdown).unwrap();
2047
2048 let body = doc.body().unwrap();
2049 assert!(body.contains("<<text"));
2051 assert!(body.contains("across lines>>"));
2052 }
2053
2054 #[test]
2055 fn test_unmatched_chevrons_preserved() {
2056 let markdown = "<<unmatched";
2057 let doc = decompose(markdown).unwrap();
2058
2059 let body = doc.body().unwrap();
2060 assert_eq!(body, "<<unmatched");
2062 }
2063}
2064
2065#[cfg(test)]
2067mod robustness_tests {
2068 use super::*;
2069
2070 #[test]
2073 fn test_empty_document() {
2074 let doc = decompose("").unwrap();
2075 assert_eq!(doc.body(), Some(""));
2076 assert_eq!(doc.quill_tag(), "__default__");
2077 }
2078
2079 #[test]
2080 fn test_only_whitespace() {
2081 let doc = decompose(" \n\n \t").unwrap();
2082 assert_eq!(doc.body(), Some(" \n\n \t"));
2083 }
2084
2085 #[test]
2086 fn test_only_dashes() {
2087 let result = decompose("---");
2090 assert!(result.is_ok());
2092 assert_eq!(result.unwrap().body(), Some("---"));
2093 }
2094
2095 #[test]
2096 fn test_dashes_in_middle_of_line() {
2097 let markdown = "some text --- more text";
2099 let doc = decompose(markdown).unwrap();
2100 assert_eq!(doc.body(), Some("some text --- more text"));
2101 }
2102
2103 #[test]
2104 fn test_four_dashes() {
2105 let markdown = "----\ntitle: Test\n----\n\nBody";
2107 let doc = decompose(markdown).unwrap();
2108 assert!(doc.body().unwrap().contains("----"));
2110 }
2111
2112 #[test]
2113 fn test_crlf_line_endings() {
2114 let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2116 let doc = decompose(markdown).unwrap();
2117 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2118 assert!(doc.body().unwrap().contains("Body content."));
2119 }
2120
2121 #[test]
2122 fn test_mixed_line_endings() {
2123 let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2125 let doc = decompose(markdown).unwrap();
2126 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2127 }
2128
2129 #[test]
2130 fn test_frontmatter_at_eof_no_trailing_newline() {
2131 let markdown = "---\ntitle: Test\n---";
2133 let doc = decompose(markdown).unwrap();
2134 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2135 assert_eq!(doc.body(), Some(""));
2136 }
2137
2138 #[test]
2139 fn test_empty_frontmatter() {
2140 let markdown = "---\n \n---\n\nBody content.";
2145 let doc = decompose(markdown).unwrap();
2146 assert!(doc.body().unwrap().contains("Body content."));
2147 assert_eq!(doc.fields().len(), 1);
2149 }
2150
2151 #[test]
2152 fn test_whitespace_only_frontmatter() {
2153 let markdown = "---\n \n\n \n---\n\nBody.";
2155 let doc = decompose(markdown).unwrap();
2156 assert!(doc.body().unwrap().contains("Body."));
2157 }
2158
2159 #[test]
2162 fn test_unicode_in_yaml_keys() {
2163 let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2164 let doc = decompose(markdown).unwrap();
2165 assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2166 assert_eq!(
2167 doc.get_field("タイトル").unwrap().as_str().unwrap(),
2168 "こんにちは"
2169 );
2170 }
2171
2172 #[test]
2173 fn test_unicode_in_yaml_values() {
2174 let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2175 let doc = decompose(markdown).unwrap();
2176 assert_eq!(
2177 doc.get_field("title").unwrap().as_str().unwrap(),
2178 "你好世界 🎉"
2179 );
2180 }
2181
2182 #[test]
2183 fn test_unicode_in_body() {
2184 let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2185 let doc = decompose(markdown).unwrap();
2186 assert!(doc.body().unwrap().contains("日本語テキスト"));
2187 assert!(doc.body().unwrap().contains("🚀"));
2188 }
2189
2190 #[test]
2193 fn test_yaml_multiline_string() {
2194 let markdown = r#"---
2195description: |
2196 This is a
2197 multiline string
2198 with preserved newlines.
2199---
2200
2201Body."#;
2202 let doc = decompose(markdown).unwrap();
2203 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2204 assert!(desc.contains("multiline string"));
2205 assert!(desc.contains('\n'));
2206 }
2207
2208 #[test]
2209 fn test_yaml_folded_string() {
2210 let markdown = r#"---
2211description: >
2212 This is a folded
2213 string that becomes
2214 a single line.
2215---
2216
2217Body."#;
2218 let doc = decompose(markdown).unwrap();
2219 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2220 assert!(desc.contains("folded"));
2222 }
2223
2224 #[test]
2225 fn test_yaml_null_value() {
2226 let markdown = "---\noptional: null\n---\n\nBody.";
2227 let doc = decompose(markdown).unwrap();
2228 assert!(doc.get_field("optional").unwrap().is_null());
2229 }
2230
2231 #[test]
2232 fn test_yaml_empty_string_value() {
2233 let markdown = "---\nempty: \"\"\n---\n\nBody.";
2234 let doc = decompose(markdown).unwrap();
2235 assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2236 }
2237
2238 #[test]
2239 fn test_yaml_special_characters_in_string() {
2240 let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2241 let doc = decompose(markdown).unwrap();
2242 assert_eq!(
2243 doc.get_field("special").unwrap().as_str().unwrap(),
2244 "colon: here, and [brackets]"
2245 );
2246 }
2247
2248 #[test]
2249 fn test_yaml_nested_objects() {
2250 let markdown = r#"---
2251config:
2252 database:
2253 host: localhost
2254 port: 5432
2255 cache:
2256 enabled: true
2257---
2258
2259Body."#;
2260 let doc = decompose(markdown).unwrap();
2261 let config = doc.get_field("config").unwrap().as_object().unwrap();
2262 let db = config.get("database").unwrap().as_object().unwrap();
2263 assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2264 assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2265 }
2266
2267 #[test]
2270 fn test_scope_with_empty_body() {
2271 let markdown = r#"---
2272SCOPE: items
2273name: Item
2274---"#;
2275 let doc = decompose(markdown).unwrap();
2276 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2277 assert_eq!(items.len(), 1);
2278 let item = items[0].as_object().unwrap();
2279 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2280 }
2281
2282 #[test]
2283 fn test_scope_consecutive_blocks() {
2284 let markdown = r#"---
2285SCOPE: a
2286id: 1
2287---
2288---
2289SCOPE: a
2290id: 2
2291---"#;
2292 let doc = decompose(markdown).unwrap();
2293 let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2294 assert_eq!(items.len(), 2);
2295 }
2296
2297 #[test]
2298 fn test_scope_with_body_containing_dashes() {
2299 let markdown = r#"---
2300SCOPE: items
2301name: Item
2302---
2303
2304Some text with --- dashes in it."#;
2305 let doc = decompose(markdown).unwrap();
2306 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2307 let item = items[0].as_object().unwrap();
2308 let body = item.get("body").unwrap().as_str().unwrap();
2309 assert!(body.contains("--- dashes"));
2310 }
2311
2312 #[test]
2315 fn test_quill_with_underscore_prefix() {
2316 let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2317 let doc = decompose(markdown).unwrap();
2318 assert_eq!(doc.quill_tag(), "_internal");
2319 }
2320
2321 #[test]
2322 fn test_quill_with_numbers() {
2323 let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2324 let doc = decompose(markdown).unwrap();
2325 assert_eq!(doc.quill_tag(), "form_8_v2");
2326 }
2327
2328 #[test]
2329 fn test_quill_with_additional_fields() {
2330 let markdown = r#"---
2331QUILL: my_quill
2332title: Document Title
2333author: John Doe
2334---
2335
2336Body content."#;
2337 let doc = decompose(markdown).unwrap();
2338 assert_eq!(doc.quill_tag(), "my_quill");
2339 assert_eq!(
2340 doc.get_field("title").unwrap().as_str().unwrap(),
2341 "Document Title"
2342 );
2343 assert_eq!(
2344 doc.get_field("author").unwrap().as_str().unwrap(),
2345 "John Doe"
2346 );
2347 }
2348
2349 #[test]
2352 fn test_invalid_scope_name_uppercase() {
2353 let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2354 let result = decompose(markdown);
2355 assert!(result.is_err());
2356 assert!(result
2357 .unwrap_err()
2358 .to_string()
2359 .contains("Invalid field name"));
2360 }
2361
2362 #[test]
2363 fn test_invalid_scope_name_starts_with_number() {
2364 let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2365 let result = decompose(markdown);
2366 assert!(result.is_err());
2367 }
2368
2369 #[test]
2370 fn test_invalid_scope_name_with_hyphen() {
2371 let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2372 let result = decompose(markdown);
2373 assert!(result.is_err());
2374 }
2375
2376 #[test]
2377 fn test_invalid_quill_name_uppercase() {
2378 let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2379 let result = decompose(markdown);
2380 assert!(result.is_err());
2381 }
2382
2383 #[test]
2384 fn test_yaml_syntax_error_missing_colon() {
2385 let markdown = "---\ntitle Test\n---\n\nBody.";
2386 let result = decompose(markdown);
2387 assert!(result.is_err());
2388 }
2389
2390 #[test]
2391 fn test_yaml_syntax_error_bad_indentation() {
2392 let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2393 let result = decompose(markdown);
2394 let _ = result;
2397 }
2398
2399 #[test]
2402 fn test_body_with_leading_newlines() {
2403 let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2404 let doc = decompose(markdown).unwrap();
2405 assert!(doc.body().unwrap().starts_with('\n'));
2407 }
2408
2409 #[test]
2410 fn test_body_with_trailing_newlines() {
2411 let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2412 let doc = decompose(markdown).unwrap();
2413 assert!(doc.body().unwrap().ends_with('\n'));
2415 }
2416
2417 #[test]
2418 fn test_no_body_after_frontmatter() {
2419 let markdown = "---\ntitle: Test\n---";
2420 let doc = decompose(markdown).unwrap();
2421 assert_eq!(doc.body(), Some(""));
2422 }
2423
2424 #[test]
2427 fn test_valid_tag_name_single_underscore() {
2428 assert!(is_valid_tag_name("_"));
2429 }
2430
2431 #[test]
2432 fn test_valid_tag_name_underscore_prefix() {
2433 assert!(is_valid_tag_name("_private"));
2434 }
2435
2436 #[test]
2437 fn test_valid_tag_name_with_numbers() {
2438 assert!(is_valid_tag_name("item1"));
2439 assert!(is_valid_tag_name("item_2"));
2440 }
2441
2442 #[test]
2443 fn test_invalid_tag_name_empty() {
2444 assert!(!is_valid_tag_name(""));
2445 }
2446
2447 #[test]
2448 fn test_invalid_tag_name_starts_with_number() {
2449 assert!(!is_valid_tag_name("1item"));
2450 }
2451
2452 #[test]
2453 fn test_invalid_tag_name_uppercase() {
2454 assert!(!is_valid_tag_name("Items"));
2455 assert!(!is_valid_tag_name("ITEMS"));
2456 }
2457
2458 #[test]
2459 fn test_invalid_tag_name_special_chars() {
2460 assert!(!is_valid_tag_name("my-items"));
2461 assert!(!is_valid_tag_name("my.items"));
2462 assert!(!is_valid_tag_name("my items"));
2463 }
2464
2465 #[test]
2468 fn test_guillemet_in_yaml_preserves_non_strings() {
2469 let markdown = r#"---
2470count: 42
2471price: 19.99
2472active: true
2473items:
2474 - first
2475 - 100
2476 - true
2477---
2478
2479Body."#;
2480 let doc = decompose(markdown).unwrap();
2481 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2482 assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2483 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2484 }
2485
2486 #[test]
2487 fn test_guillemet_double_conversion_prevention() {
2488 let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2490 let doc = decompose(markdown).unwrap();
2491 assert_eq!(
2493 doc.get_field("title").unwrap().as_str().unwrap(),
2494 "Already «converted»"
2495 );
2496 }
2497}