1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58 let mut msg = format!("{}: {}", context, e);
59
60 if let Some(loc) = e.location() {
61 msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62 }
63
64 msg
65}
66
67pub const QUILL_TAG: &str = "quill";
69
70#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73 fields: HashMap<String, QuillValue>,
74 quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80 Self {
81 fields,
82 quill_tag: None,
83 }
84 }
85
86 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88 Self { fields, quill_tag }
89 }
90
91 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94 }
95
96 pub fn quill_tag(&self) -> Option<&str> {
98 self.quill_tag.as_deref()
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, QuillValue> {
113 &self.fields
114 }
115
116 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130 let mut fields = self.fields.clone();
131
132 for (field_name, default_value) in defaults {
133 if !fields.contains_key(field_name) {
135 fields.insert(field_name.clone(), default_value.clone());
136 }
137 }
138
139 Self {
140 fields,
141 quill_tag: self.quill_tag.clone(),
142 }
143 }
144}
145
146#[derive(Debug)]
147struct MetadataBlock {
148 start: usize, end: usize, yaml_content: String,
151 tag: Option<String>, quill_name: Option<String>, }
154
155fn is_valid_tag_name(name: &str) -> bool {
157 if name.is_empty() {
158 return false;
159 }
160
161 let mut chars = name.chars();
162 let first = chars.next().unwrap();
163
164 if !first.is_ascii_lowercase() && first != '_' {
165 return false;
166 }
167
168 for ch in chars {
169 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
170 return false;
171 }
172 }
173
174 true
175}
176
177fn find_metadata_blocks(
179 markdown: &str,
180) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
181 let mut blocks = Vec::new();
182 let mut pos = 0;
183
184 while pos < markdown.len() {
185 let search_str = &markdown[pos..];
187 let delimiter_result = if let Some(p) = search_str.find("---\n") {
188 Some((p, 4, "\n"))
189 } else if let Some(p) = search_str.find("---\r\n") {
190 Some((p, 5, "\r\n"))
191 } else {
192 None
193 };
194
195 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
196 let abs_pos = pos + delimiter_pos;
197 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
201 let before = &markdown[..abs_pos];
203 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
204 } else {
205 false
206 };
207
208 let followed_by_blank = if content_start < markdown.len() {
209 markdown[content_start..].starts_with('\n')
210 || markdown[content_start..].starts_with("\r\n")
211 } else {
212 false
213 };
214
215 if preceded_by_blank && followed_by_blank {
217 pos = abs_pos + 3; continue;
220 }
221
222 if followed_by_blank {
225 pos = abs_pos + 3;
228 continue;
229 }
230
231 let rest = &markdown[content_start..];
234
235 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
237 let closing_with_newline = closing_patterns
238 .iter()
239 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
240 .min_by_key(|(p, _)| *p);
241
242 let closing_at_eof = ["\n---", "\r\n---"]
244 .iter()
245 .filter_map(|delim| {
246 rest.find(delim).and_then(|p| {
247 if p + delim.len() == rest.len() {
248 Some((p, delim.len()))
249 } else {
250 None
251 }
252 })
253 })
254 .min_by_key(|(p, _)| *p);
255
256 let closing_result = match (closing_with_newline, closing_at_eof) {
257 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
258 (Some(_), Some(_)) => closing_with_newline,
259 (Some(_), None) => closing_with_newline,
260 (None, Some(_)) => closing_at_eof,
261 (None, None) => None,
262 };
263
264 if let Some((closing_pos, closing_len)) = closing_result {
265 let abs_closing_pos = content_start + closing_pos;
266 let content = &markdown[content_start..abs_closing_pos];
267
268 if content.len() > crate::error::MAX_YAML_SIZE {
270 return Err(format!(
271 "YAML block too large: {} bytes (max: {} bytes)",
272 content.len(),
273 crate::error::MAX_YAML_SIZE
274 )
275 .into());
276 }
277
278 let (tag, quill_name, yaml_content) = if !content.is_empty() {
281 match serde_yaml::from_str::<serde_yaml::Value>(content) {
283 Ok(yaml_value) => {
284 if let Some(mapping) = yaml_value.as_mapping() {
285 let quill_key = serde_yaml::Value::String("QUILL".to_string());
286 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
287
288 let has_quill = mapping.contains_key(&quill_key);
289 let has_scope = mapping.contains_key(&scope_key);
290
291 if has_quill && has_scope {
292 return Err(
293 "Cannot specify both QUILL and SCOPE in the same block"
294 .into(),
295 );
296 }
297
298 if has_quill {
299 let quill_value = mapping.get(&quill_key).unwrap();
301 let quill_name_str = quill_value
302 .as_str()
303 .ok_or_else(|| "QUILL value must be a string")?;
304
305 if !is_valid_tag_name(quill_name_str) {
306 return Err(format!(
307 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
308 quill_name_str
309 )
310 .into());
311 }
312
313 let mut new_mapping = mapping.clone();
315 new_mapping.remove(&quill_key);
316 let new_yaml = serde_yaml::to_string(&new_mapping)
317 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
318
319 (None, Some(quill_name_str.to_string()), new_yaml)
320 } else if has_scope {
321 let scope_value = mapping.get(&scope_key).unwrap();
323 let field_name = scope_value
324 .as_str()
325 .ok_or_else(|| "SCOPE value must be a string")?;
326
327 if !is_valid_tag_name(field_name) {
328 return Err(format!(
329 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
330 field_name
331 )
332 .into());
333 }
334
335 if field_name == BODY_FIELD {
336 return Err(format!(
337 "Cannot use reserved field name '{}' as SCOPE value",
338 BODY_FIELD
339 )
340 .into());
341 }
342
343 let mut new_mapping = mapping.clone();
345 new_mapping.remove(&scope_key);
346 let new_yaml = serde_yaml::to_string(&new_mapping)
347 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
348
349 (Some(field_name.to_string()), None, new_yaml)
350 } else {
351 (None, None, content.to_string())
353 }
354 } else {
355 (None, None, content.to_string())
357 }
358 }
359 Err(_) => {
360 (None, None, content.to_string())
362 }
363 }
364 } else {
365 (None, None, content.to_string())
366 };
367
368 blocks.push(MetadataBlock {
369 start: abs_pos,
370 end: abs_closing_pos + closing_len, yaml_content,
372 tag,
373 quill_name,
374 });
375
376 pos = abs_closing_pos + closing_len;
377 } else if abs_pos == 0 {
378 return Err("Frontmatter started but not closed with ---".into());
380 } else {
381 pos = abs_pos + 3;
383 }
384 } else {
385 break;
386 }
387 }
388
389 Ok(blocks)
390}
391
392fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
394 if markdown.len() > crate::error::MAX_INPUT_SIZE {
396 return Err(format!(
397 "Input too large: {} bytes (max: {} bytes)",
398 markdown.len(),
399 crate::error::MAX_INPUT_SIZE
400 )
401 .into());
402 }
403
404 let mut fields = HashMap::new();
405
406 let blocks = find_metadata_blocks(markdown)?;
408
409 if blocks.is_empty() {
410 fields.insert(
412 BODY_FIELD.to_string(),
413 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
414 );
415 return Ok(ParsedDocument::new(fields));
416 }
417
418 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
420 let mut has_global_frontmatter = false;
421 let mut global_frontmatter_index: Option<usize> = None;
422 let mut quill_name: Option<String> = None;
423
424 for (idx, block) in blocks.iter().enumerate() {
426 if let Some(ref name) = block.quill_name {
428 if quill_name.is_some() {
429 return Err("Multiple quill directives found: only one allowed".into());
430 }
431 quill_name = Some(name.clone());
432 }
433
434 if block.tag.is_none() && block.quill_name.is_none() {
436 if has_global_frontmatter {
437 return Err(
438 "Multiple global frontmatter blocks found: only one untagged block allowed"
439 .into(),
440 );
441 }
442 has_global_frontmatter = true;
443 global_frontmatter_index = Some(idx);
444 }
445 }
446
447 if let Some(idx) = global_frontmatter_index {
449 let block = &blocks[idx];
450
451 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
453 HashMap::new()
454 } else {
455 serde_yaml::from_str(&block.yaml_content)
456 .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
457 };
458
459 for other_block in &blocks {
462 if let Some(ref tag) = other_block.tag {
463 if let Some(global_value) = yaml_fields.get(tag) {
464 if global_value.as_sequence().is_none() {
466 return Err(format!(
467 "Name collision: global field '{}' conflicts with tagged attribute",
468 tag
469 )
470 .into());
471 }
472 }
473 }
474 }
475
476 for (key, value) in yaml_fields {
478 fields.insert(key, QuillValue::from_yaml(value)?);
479 }
480 }
481
482 for block in &blocks {
484 if block.quill_name.is_some() {
485 if !block.yaml_content.is_empty() {
487 let yaml_fields: HashMap<String, serde_yaml::Value> =
488 serde_yaml::from_str(&block.yaml_content)
489 .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
490
491 for key in yaml_fields.keys() {
493 if fields.contains_key(key) {
494 return Err(format!(
495 "Name collision: quill block field '{}' conflicts with existing field",
496 key
497 )
498 .into());
499 }
500 }
501
502 for (key, value) in yaml_fields {
504 fields.insert(key, QuillValue::from_yaml(value)?);
505 }
506 }
507 }
508 }
509
510 for (idx, block) in blocks.iter().enumerate() {
512 if let Some(ref tag_name) = block.tag {
513 if let Some(existing_value) = fields.get(tag_name) {
516 if existing_value.as_array().is_none() {
517 return Err(format!(
518 "Name collision: tagged attribute '{}' conflicts with global field",
519 tag_name
520 )
521 .into());
522 }
523 }
524
525 let mut item_fields: HashMap<String, serde_yaml::Value> = if block
527 .yaml_content
528 .is_empty()
529 {
530 HashMap::new()
531 } else {
532 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
533 yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
534 })?
535 };
536
537 let body_start = block.end;
539 let body_end = if idx + 1 < blocks.len() {
540 blocks[idx + 1].start
541 } else {
542 markdown.len()
543 };
544 let body = &markdown[body_start..body_end];
545
546 item_fields.insert(
548 BODY_FIELD.to_string(),
549 serde_yaml::Value::String(body.to_string()),
550 );
551
552 let item_value = serde_yaml::to_value(item_fields)?;
554
555 tagged_attributes
557 .entry(tag_name.clone())
558 .or_insert_with(Vec::new)
559 .push(item_value);
560 }
561 }
562
563 let first_non_scope_block_idx = blocks
567 .iter()
568 .position(|b| b.tag.is_none() && b.quill_name.is_none())
569 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
570
571 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
572 let start = blocks[idx].end;
574
575 let end = blocks
577 .iter()
578 .skip(idx + 1)
579 .find(|b| b.tag.is_some())
580 .map(|b| b.start)
581 .unwrap_or(markdown.len());
582
583 (start, end)
584 } else {
585 let end = blocks
587 .iter()
588 .find(|b| b.tag.is_some())
589 .map(|b| b.start)
590 .unwrap_or(0);
591
592 (0, end)
593 };
594
595 let global_body = &markdown[body_start..body_end];
596
597 fields.insert(
598 BODY_FIELD.to_string(),
599 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
600 );
601
602 for (tag_name, items) in tagged_attributes {
605 if let Some(existing_value) = fields.get(&tag_name) {
606 if let Some(existing_array) = existing_value.as_array() {
608 let new_items_json: Vec<serde_json::Value> = items
610 .into_iter()
611 .map(|yaml_val| {
612 serde_json::to_value(&yaml_val)
613 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
614 })
615 .collect::<Result<Vec<_>, _>>()?;
616
617 let mut merged_array = existing_array.clone();
619 merged_array.extend(new_items_json);
620
621 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
623 fields.insert(tag_name, quill_value);
624 } else {
625 return Err(format!(
627 "Internal error: field '{}' exists but is not an array",
628 tag_name
629 )
630 .into());
631 }
632 } else {
633 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
635 fields.insert(tag_name, quill_value);
636 }
637 }
638
639 let mut parsed = ParsedDocument::new(fields);
640
641 if let Some(name) = quill_name {
643 parsed.quill_tag = Some(name);
644 }
645
646 Ok(parsed)
647}
648
649#[cfg(test)]
650mod tests {
651 use super::*;
652
653 #[test]
654 fn test_no_frontmatter() {
655 let markdown = "# Hello World\n\nThis is a test.";
656 let doc = decompose(markdown).unwrap();
657
658 assert_eq!(doc.body(), Some(markdown));
659 assert_eq!(doc.fields().len(), 1);
660 }
661
662 #[test]
663 fn test_with_frontmatter() {
664 let markdown = r#"---
665title: Test Document
666author: Test Author
667---
668
669# Hello World
670
671This is the body."#;
672
673 let doc = decompose(markdown).unwrap();
674
675 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
676 assert_eq!(
677 doc.get_field("title").unwrap().as_str().unwrap(),
678 "Test Document"
679 );
680 assert_eq!(
681 doc.get_field("author").unwrap().as_str().unwrap(),
682 "Test Author"
683 );
684 assert_eq!(doc.fields().len(), 3); }
686
687 #[test]
688 fn test_complex_yaml_frontmatter() {
689 let markdown = r#"---
690title: Complex Document
691tags:
692 - test
693 - yaml
694metadata:
695 version: 1.0
696 nested:
697 field: value
698---
699
700Content here."#;
701
702 let doc = decompose(markdown).unwrap();
703
704 assert_eq!(doc.body(), Some("\nContent here."));
705 assert_eq!(
706 doc.get_field("title").unwrap().as_str().unwrap(),
707 "Complex Document"
708 );
709
710 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
711 assert_eq!(tags.len(), 2);
712 assert_eq!(tags[0].as_str().unwrap(), "test");
713 assert_eq!(tags[1].as_str().unwrap(), "yaml");
714 }
715
716 #[test]
717 fn test_with_defaults_empty_document() {
718 use std::collections::HashMap;
719
720 let mut defaults = HashMap::new();
721 defaults.insert(
722 "status".to_string(),
723 QuillValue::from_json(serde_json::json!("draft")),
724 );
725 defaults.insert(
726 "version".to_string(),
727 QuillValue::from_json(serde_json::json!(1)),
728 );
729
730 let doc = ParsedDocument::new(HashMap::new());
732 let doc_with_defaults = doc.with_defaults(&defaults);
733
734 assert_eq!(
736 doc_with_defaults
737 .get_field("status")
738 .unwrap()
739 .as_str()
740 .unwrap(),
741 "draft"
742 );
743 assert_eq!(
744 doc_with_defaults
745 .get_field("version")
746 .unwrap()
747 .as_number()
748 .unwrap()
749 .as_i64()
750 .unwrap(),
751 1
752 );
753 }
754
755 #[test]
756 fn test_with_defaults_preserves_existing_values() {
757 use std::collections::HashMap;
758
759 let mut defaults = HashMap::new();
760 defaults.insert(
761 "status".to_string(),
762 QuillValue::from_json(serde_json::json!("draft")),
763 );
764
765 let mut fields = HashMap::new();
767 fields.insert(
768 "status".to_string(),
769 QuillValue::from_json(serde_json::json!("published")),
770 );
771 let doc = ParsedDocument::new(fields);
772
773 let doc_with_defaults = doc.with_defaults(&defaults);
774
775 assert_eq!(
777 doc_with_defaults
778 .get_field("status")
779 .unwrap()
780 .as_str()
781 .unwrap(),
782 "published"
783 );
784 }
785
786 #[test]
787 fn test_with_defaults_partial_application() {
788 use std::collections::HashMap;
789
790 let mut defaults = HashMap::new();
791 defaults.insert(
792 "status".to_string(),
793 QuillValue::from_json(serde_json::json!("draft")),
794 );
795 defaults.insert(
796 "version".to_string(),
797 QuillValue::from_json(serde_json::json!(1)),
798 );
799
800 let mut fields = HashMap::new();
802 fields.insert(
803 "status".to_string(),
804 QuillValue::from_json(serde_json::json!("published")),
805 );
806 let doc = ParsedDocument::new(fields);
807
808 let doc_with_defaults = doc.with_defaults(&defaults);
809
810 assert_eq!(
812 doc_with_defaults
813 .get_field("status")
814 .unwrap()
815 .as_str()
816 .unwrap(),
817 "published"
818 );
819 assert_eq!(
820 doc_with_defaults
821 .get_field("version")
822 .unwrap()
823 .as_number()
824 .unwrap()
825 .as_i64()
826 .unwrap(),
827 1
828 );
829 }
830
831 #[test]
832 fn test_with_defaults_no_defaults() {
833 use std::collections::HashMap;
834
835 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
838 let doc_with_defaults = doc.with_defaults(&defaults);
839
840 assert!(doc_with_defaults.fields().is_empty());
842 }
843
844 #[test]
845 fn test_with_defaults_complex_types() {
846 use std::collections::HashMap;
847
848 let mut defaults = HashMap::new();
849 defaults.insert(
850 "tags".to_string(),
851 QuillValue::from_json(serde_json::json!(["default", "tag"])),
852 );
853
854 let doc = ParsedDocument::new(HashMap::new());
855 let doc_with_defaults = doc.with_defaults(&defaults);
856
857 let tags = doc_with_defaults
859 .get_field("tags")
860 .unwrap()
861 .as_sequence()
862 .unwrap();
863 assert_eq!(tags.len(), 2);
864 assert_eq!(tags[0].as_str().unwrap(), "default");
865 assert_eq!(tags[1].as_str().unwrap(), "tag");
866 }
867
868 #[test]
869 fn test_invalid_yaml() {
870 let markdown = r#"---
871title: [invalid yaml
872author: missing close bracket
873---
874
875Content here."#;
876
877 let result = decompose(markdown);
878 assert!(result.is_err());
879 assert!(result
880 .unwrap_err()
881 .to_string()
882 .contains("Invalid YAML frontmatter"));
883 }
884
885 #[test]
886 fn test_unclosed_frontmatter() {
887 let markdown = r#"---
888title: Test
889author: Test Author
890
891Content without closing ---"#;
892
893 let result = decompose(markdown);
894 assert!(result.is_err());
895 assert!(result.unwrap_err().to_string().contains("not closed"));
896 }
897
898 #[test]
901 fn test_basic_tagged_block() {
902 let markdown = r#"---
903title: Main Document
904---
905
906Main body content.
907
908---
909SCOPE: items
910name: Item 1
911---
912
913Body of item 1."#;
914
915 let doc = decompose(markdown).unwrap();
916
917 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
918 assert_eq!(
919 doc.get_field("title").unwrap().as_str().unwrap(),
920 "Main Document"
921 );
922
923 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
924 assert_eq!(items.len(), 1);
925
926 let item = items[0].as_object().unwrap();
927 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
928 assert_eq!(
929 item.get("body").unwrap().as_str().unwrap(),
930 "\nBody of item 1."
931 );
932 }
933
934 #[test]
935 fn test_multiple_tagged_blocks() {
936 let markdown = r#"---
937SCOPE: items
938name: Item 1
939tags: [a, b]
940---
941
942First item body.
943
944---
945SCOPE: items
946name: Item 2
947tags: [c, d]
948---
949
950Second item body."#;
951
952 let doc = decompose(markdown).unwrap();
953
954 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
955 assert_eq!(items.len(), 2);
956
957 let item1 = items[0].as_object().unwrap();
958 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
959
960 let item2 = items[1].as_object().unwrap();
961 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
962 }
963
964 #[test]
965 fn test_mixed_global_and_tagged() {
966 let markdown = r#"---
967title: Global
968author: John Doe
969---
970
971Global body.
972
973---
974SCOPE: sections
975title: Section 1
976---
977
978Section 1 content.
979
980---
981SCOPE: sections
982title: Section 2
983---
984
985Section 2 content."#;
986
987 let doc = decompose(markdown).unwrap();
988
989 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
990 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
991
992 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
993 assert_eq!(sections.len(), 2);
994 }
995
996 #[test]
997 fn test_empty_tagged_metadata() {
998 let markdown = r#"---
999SCOPE: items
1000---
1001
1002Body without metadata."#;
1003
1004 let doc = decompose(markdown).unwrap();
1005
1006 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1007 assert_eq!(items.len(), 1);
1008
1009 let item = items[0].as_object().unwrap();
1010 assert_eq!(
1011 item.get("body").unwrap().as_str().unwrap(),
1012 "\nBody without metadata."
1013 );
1014 }
1015
1016 #[test]
1017 fn test_tagged_block_without_body() {
1018 let markdown = r#"---
1019SCOPE: items
1020name: Item
1021---"#;
1022
1023 let doc = decompose(markdown).unwrap();
1024
1025 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1026 assert_eq!(items.len(), 1);
1027
1028 let item = items[0].as_object().unwrap();
1029 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1030 }
1031
1032 #[test]
1033 fn test_name_collision_global_and_tagged() {
1034 let markdown = r#"---
1035items: "global value"
1036---
1037
1038Body
1039
1040---
1041SCOPE: items
1042name: Item
1043---
1044
1045Item body"#;
1046
1047 let result = decompose(markdown);
1048 assert!(result.is_err());
1049 assert!(result.unwrap_err().to_string().contains("collision"));
1050 }
1051
1052 #[test]
1053 fn test_global_array_merged_with_scope() {
1054 let markdown = r#"---
1057items:
1058 - name: Global Item 1
1059 value: 100
1060 - name: Global Item 2
1061 value: 200
1062---
1063
1064Global body
1065
1066---
1067SCOPE: items
1068name: Scope Item 1
1069value: 300
1070---
1071
1072Scope item 1 body
1073
1074---
1075SCOPE: items
1076name: Scope Item 2
1077value: 400
1078---
1079
1080Scope item 2 body"#;
1081
1082 let doc = decompose(markdown).unwrap();
1083
1084 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1086 assert_eq!(items.len(), 4);
1087
1088 let item1 = items[0].as_object().unwrap();
1090 assert_eq!(
1091 item1.get("name").unwrap().as_str().unwrap(),
1092 "Global Item 1"
1093 );
1094 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1095
1096 let item2 = items[1].as_object().unwrap();
1097 assert_eq!(
1098 item2.get("name").unwrap().as_str().unwrap(),
1099 "Global Item 2"
1100 );
1101 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1102
1103 let item3 = items[2].as_object().unwrap();
1105 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1106 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1107 assert_eq!(
1108 item3.get("body").unwrap().as_str().unwrap(),
1109 "\nScope item 1 body\n\n"
1110 );
1111
1112 let item4 = items[3].as_object().unwrap();
1113 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1114 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1115 assert_eq!(
1116 item4.get("body").unwrap().as_str().unwrap(),
1117 "\nScope item 2 body"
1118 );
1119 }
1120
1121 #[test]
1122 fn test_empty_global_array_with_scope() {
1123 let markdown = r#"---
1125items: []
1126---
1127
1128Global body
1129
1130---
1131SCOPE: items
1132name: Item 1
1133---
1134
1135Item 1 body"#;
1136
1137 let doc = decompose(markdown).unwrap();
1138
1139 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1140 assert_eq!(items.len(), 1);
1141
1142 let item = items[0].as_object().unwrap();
1143 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1144 }
1145
1146 #[test]
1147 fn test_reserved_field_name() {
1148 let markdown = r#"---
1149SCOPE: body
1150content: Test
1151---"#;
1152
1153 let result = decompose(markdown);
1154 assert!(result.is_err());
1155 assert!(result.unwrap_err().to_string().contains("reserved"));
1156 }
1157
1158 #[test]
1159 fn test_invalid_tag_syntax() {
1160 let markdown = r#"---
1161SCOPE: Invalid-Name
1162title: Test
1163---"#;
1164
1165 let result = decompose(markdown);
1166 assert!(result.is_err());
1167 assert!(result
1168 .unwrap_err()
1169 .to_string()
1170 .contains("Invalid field name"));
1171 }
1172
1173 #[test]
1174 fn test_multiple_global_frontmatter_blocks() {
1175 let markdown = r#"---
1176title: First
1177---
1178
1179Body
1180
1181---
1182author: Second
1183---
1184
1185More body"#;
1186
1187 let result = decompose(markdown);
1188 assert!(result.is_err());
1189 assert!(result
1190 .unwrap_err()
1191 .to_string()
1192 .contains("Multiple global frontmatter"));
1193 }
1194
1195 #[test]
1196 fn test_adjacent_blocks_different_tags() {
1197 let markdown = r#"---
1198SCOPE: items
1199name: Item 1
1200---
1201
1202Item 1 body
1203
1204---
1205SCOPE: sections
1206title: Section 1
1207---
1208
1209Section 1 body"#;
1210
1211 let doc = decompose(markdown).unwrap();
1212
1213 assert!(doc.get_field("items").is_some());
1214 assert!(doc.get_field("sections").is_some());
1215
1216 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1217 assert_eq!(items.len(), 1);
1218
1219 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1220 assert_eq!(sections.len(), 1);
1221 }
1222
1223 #[test]
1224 fn test_order_preservation() {
1225 let markdown = r#"---
1226SCOPE: items
1227id: 1
1228---
1229
1230First
1231
1232---
1233SCOPE: items
1234id: 2
1235---
1236
1237Second
1238
1239---
1240SCOPE: items
1241id: 3
1242---
1243
1244Third"#;
1245
1246 let doc = decompose(markdown).unwrap();
1247
1248 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1249 assert_eq!(items.len(), 3);
1250
1251 for (i, item) in items.iter().enumerate() {
1252 let mapping = item.as_object().unwrap();
1253 let id = mapping.get("id").unwrap().as_i64().unwrap();
1254 assert_eq!(id, (i + 1) as i64);
1255 }
1256 }
1257
1258 #[test]
1259 fn test_product_catalog_integration() {
1260 let markdown = r#"---
1261title: Product Catalog
1262author: John Doe
1263date: 2024-01-01
1264---
1265
1266This is the main catalog description.
1267
1268---
1269SCOPE: products
1270name: Widget A
1271price: 19.99
1272sku: WID-001
1273---
1274
1275The **Widget A** is our most popular product.
1276
1277---
1278SCOPE: products
1279name: Gadget B
1280price: 29.99
1281sku: GAD-002
1282---
1283
1284The **Gadget B** is perfect for professionals.
1285
1286---
1287SCOPE: reviews
1288product: Widget A
1289rating: 5
1290---
1291
1292"Excellent product! Highly recommended."
1293
1294---
1295SCOPE: reviews
1296product: Gadget B
1297rating: 4
1298---
1299
1300"Very good, but a bit pricey.""#;
1301
1302 let doc = decompose(markdown).unwrap();
1303
1304 assert_eq!(
1306 doc.get_field("title").unwrap().as_str().unwrap(),
1307 "Product Catalog"
1308 );
1309 assert_eq!(
1310 doc.get_field("author").unwrap().as_str().unwrap(),
1311 "John Doe"
1312 );
1313 assert_eq!(
1314 doc.get_field("date").unwrap().as_str().unwrap(),
1315 "2024-01-01"
1316 );
1317
1318 assert!(doc.body().unwrap().contains("main catalog description"));
1320
1321 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1323 assert_eq!(products.len(), 2);
1324
1325 let product1 = products[0].as_object().unwrap();
1326 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1327 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1328
1329 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1331 assert_eq!(reviews.len(), 2);
1332
1333 let review1 = reviews[0].as_object().unwrap();
1334 assert_eq!(
1335 review1.get("product").unwrap().as_str().unwrap(),
1336 "Widget A"
1337 );
1338 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1339
1340 assert_eq!(doc.fields().len(), 6);
1342 }
1343
1344 #[test]
1345 fn taro_quill_directive() {
1346 let markdown = r#"---
1347QUILL: usaf_memo
1348memo_for: [ORG/SYMBOL]
1349memo_from: [ORG/SYMBOL]
1350---
1351
1352This is the memo body."#;
1353
1354 let doc = decompose(markdown).unwrap();
1355
1356 assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1358
1359 assert_eq!(
1361 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1362 .as_str()
1363 .unwrap(),
1364 "ORG/SYMBOL"
1365 );
1366
1367 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1369 }
1370
1371 #[test]
1372 fn test_quill_with_scope_blocks() {
1373 let markdown = r#"---
1374QUILL: document
1375title: Test Document
1376---
1377
1378Main body.
1379
1380---
1381SCOPE: sections
1382name: Section 1
1383---
1384
1385Section 1 body."#;
1386
1387 let doc = decompose(markdown).unwrap();
1388
1389 assert_eq!(doc.quill_tag(), Some("document"));
1391
1392 assert_eq!(
1394 doc.get_field("title").unwrap().as_str().unwrap(),
1395 "Test Document"
1396 );
1397
1398 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1400 assert_eq!(sections.len(), 1);
1401
1402 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1404 }
1405
1406 #[test]
1407 fn test_multiple_quill_directives_error() {
1408 let markdown = r#"---
1409QUILL: first
1410---
1411
1412---
1413QUILL: second
1414---"#;
1415
1416 let result = decompose(markdown);
1417 assert!(result.is_err());
1418 assert!(result
1419 .unwrap_err()
1420 .to_string()
1421 .contains("Multiple quill directives"));
1422 }
1423
1424 #[test]
1425 fn test_invalid_quill_name() {
1426 let markdown = r#"---
1427QUILL: Invalid-Name
1428---"#;
1429
1430 let result = decompose(markdown);
1431 assert!(result.is_err());
1432 assert!(result
1433 .unwrap_err()
1434 .to_string()
1435 .contains("Invalid quill name"));
1436 }
1437
1438 #[test]
1439 fn test_quill_wrong_value_type() {
1440 let markdown = r#"---
1441QUILL: 123
1442---"#;
1443
1444 let result = decompose(markdown);
1445 assert!(result.is_err());
1446 assert!(result
1447 .unwrap_err()
1448 .to_string()
1449 .contains("QUILL value must be a string"));
1450 }
1451
1452 #[test]
1453 fn test_scope_wrong_value_type() {
1454 let markdown = r#"---
1455SCOPE: 123
1456---"#;
1457
1458 let result = decompose(markdown);
1459 assert!(result.is_err());
1460 assert!(result
1461 .unwrap_err()
1462 .to_string()
1463 .contains("SCOPE value must be a string"));
1464 }
1465
1466 #[test]
1467 fn test_both_quill_and_scope_error() {
1468 let markdown = r#"---
1469QUILL: test
1470SCOPE: items
1471---"#;
1472
1473 let result = decompose(markdown);
1474 assert!(result.is_err());
1475 assert!(result
1476 .unwrap_err()
1477 .to_string()
1478 .contains("Cannot specify both QUILL and SCOPE"));
1479 }
1480
1481 #[test]
1482 fn test_blank_lines_in_frontmatter() {
1483 let markdown = r#"---
1485title: Test Document
1486author: Test Author
1487
1488description: This has a blank line above it
1489tags:
1490 - one
1491 - two
1492---
1493
1494# Hello World
1495
1496This is the body."#;
1497
1498 let doc = decompose(markdown).unwrap();
1499
1500 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1501 assert_eq!(
1502 doc.get_field("title").unwrap().as_str().unwrap(),
1503 "Test Document"
1504 );
1505 assert_eq!(
1506 doc.get_field("author").unwrap().as_str().unwrap(),
1507 "Test Author"
1508 );
1509 assert_eq!(
1510 doc.get_field("description").unwrap().as_str().unwrap(),
1511 "This has a blank line above it"
1512 );
1513
1514 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1515 assert_eq!(tags.len(), 2);
1516 }
1517
1518 #[test]
1519 fn test_blank_lines_in_scope_blocks() {
1520 let markdown = r#"---
1522SCOPE: items
1523name: Item 1
1524
1525price: 19.99
1526
1527tags:
1528 - electronics
1529 - gadgets
1530---
1531
1532Body of item 1."#;
1533
1534 let doc = decompose(markdown).unwrap();
1535
1536 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1537 assert_eq!(items.len(), 1);
1538
1539 let item = items[0].as_object().unwrap();
1540 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1541 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1542
1543 let tags = item.get("tags").unwrap().as_array().unwrap();
1544 assert_eq!(tags.len(), 2);
1545 }
1546
1547 #[test]
1548 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1549 let markdown = r#"---
1551title: Test
1552---
1553
1554First paragraph.
1555
1556---
1557
1558Second paragraph."#;
1559
1560 let doc = decompose(markdown).unwrap();
1561
1562 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1563
1564 let body = doc.body().unwrap();
1566 assert!(body.contains("First paragraph."));
1567 assert!(body.contains("---"));
1568 assert!(body.contains("Second paragraph."));
1569 }
1570
1571 #[test]
1572 fn test_horizontal_rule_not_preceded_by_blank() {
1573 let markdown = r#"---
1576title: Test
1577---
1578
1579First paragraph.
1580---
1581
1582Second paragraph."#;
1583
1584 let doc = decompose(markdown).unwrap();
1585
1586 let body = doc.body().unwrap();
1587 assert!(body.contains("---"));
1589 }
1590
1591 #[test]
1592 fn test_multiple_blank_lines_in_yaml() {
1593 let markdown = r#"---
1595title: Test
1596
1597
1598author: John Doe
1599
1600
1601version: 1.0
1602---
1603
1604Body content."#;
1605
1606 let doc = decompose(markdown).unwrap();
1607
1608 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1609 assert_eq!(
1610 doc.get_field("author").unwrap().as_str().unwrap(),
1611 "John Doe"
1612 );
1613 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1614 }
1615}
1616#[cfg(test)]
1617mod demo_file_test {
1618 use super::*;
1619
1620 #[test]
1621 fn test_extended_metadata_demo_file() {
1622 let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1623 let doc = decompose(markdown).unwrap();
1624
1625 assert_eq!(
1627 doc.get_field("title").unwrap().as_str().unwrap(),
1628 "Extended Metadata Demo"
1629 );
1630 assert_eq!(
1631 doc.get_field("author").unwrap().as_str().unwrap(),
1632 "Quillmark Team"
1633 );
1634 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1636
1637 assert!(doc
1639 .body()
1640 .unwrap()
1641 .contains("extended YAML metadata standard"));
1642
1643 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1645 assert_eq!(features.len(), 3);
1646
1647 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1649 assert_eq!(use_cases.len(), 2);
1650
1651 let feature1 = features[0].as_object().unwrap();
1653 assert_eq!(
1654 feature1.get("name").unwrap().as_str().unwrap(),
1655 "Tag Directives"
1656 );
1657 }
1658
1659 #[test]
1660 fn test_input_size_limit() {
1661 let size = crate::error::MAX_INPUT_SIZE + 1;
1663 let large_markdown = "a".repeat(size);
1664
1665 let result = decompose(&large_markdown);
1666 assert!(result.is_err());
1667
1668 let err_msg = result.unwrap_err().to_string();
1669 assert!(err_msg.contains("Input too large"));
1670 }
1671
1672 #[test]
1673 fn test_yaml_size_limit() {
1674 let mut markdown = String::from("---\n");
1676
1677 let size = crate::error::MAX_YAML_SIZE + 1;
1679 markdown.push_str("data: \"");
1680 markdown.push_str(&"x".repeat(size));
1681 markdown.push_str("\"\n---\n\nBody");
1682
1683 let result = decompose(&markdown);
1684 assert!(result.is_err());
1685
1686 let err_msg = result.unwrap_err().to_string();
1687 assert!(err_msg.contains("YAML block too large"));
1688 }
1689
1690 #[test]
1691 fn test_input_within_size_limit() {
1692 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1695
1696 let result = decompose(&markdown);
1697 assert!(result.is_ok());
1698 }
1699
1700 #[test]
1701 fn test_yaml_within_size_limit() {
1702 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1704
1705 let result = decompose(&markdown);
1706 assert!(result.is_ok());
1707 }
1708}