1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58 let mut msg = format!("{}: {}", context, e);
59
60 if let Some(loc) = e.location() {
61 msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62 }
63
64 msg
65}
66
67pub const QUILL_TAG: &str = "quill";
69
70#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73 fields: HashMap<String, QuillValue>,
74 quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80 Self {
81 fields,
82 quill_tag: None,
83 }
84 }
85
86 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88 Self { fields, quill_tag }
89 }
90
91 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94 }
95
96 pub fn quill_tag(&self) -> Option<&str> {
98 self.quill_tag.as_deref()
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, QuillValue> {
113 &self.fields
114 }
115}
116
117#[derive(Debug)]
118struct MetadataBlock {
119 start: usize, end: usize, yaml_content: String,
122 tag: Option<String>, quill_name: Option<String>, }
125
126fn is_valid_tag_name(name: &str) -> bool {
128 if name.is_empty() {
129 return false;
130 }
131
132 let mut chars = name.chars();
133 let first = chars.next().unwrap();
134
135 if !first.is_ascii_lowercase() && first != '_' {
136 return false;
137 }
138
139 for ch in chars {
140 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
141 return false;
142 }
143 }
144
145 true
146}
147
148fn find_metadata_blocks(
150 markdown: &str,
151) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
152 let mut blocks = Vec::new();
153 let mut pos = 0;
154
155 while pos < markdown.len() {
156 let search_str = &markdown[pos..];
158 let delimiter_result = if let Some(p) = search_str.find("---\n") {
159 Some((p, 4, "\n"))
160 } else if let Some(p) = search_str.find("---\r\n") {
161 Some((p, 5, "\r\n"))
162 } else {
163 None
164 };
165
166 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
167 let abs_pos = pos + delimiter_pos;
168 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
172 let before = &markdown[..abs_pos];
174 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
175 } else {
176 false
177 };
178
179 let followed_by_blank = if content_start < markdown.len() {
180 markdown[content_start..].starts_with('\n')
181 || markdown[content_start..].starts_with("\r\n")
182 } else {
183 false
184 };
185
186 if preceded_by_blank && followed_by_blank {
188 pos = abs_pos + 3; continue;
191 }
192
193 if followed_by_blank {
196 pos = abs_pos + 3;
199 continue;
200 }
201
202 let rest = &markdown[content_start..];
205
206 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
208 let closing_with_newline = closing_patterns
209 .iter()
210 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
211 .min_by_key(|(p, _)| *p);
212
213 let closing_at_eof = ["\n---", "\r\n---"]
215 .iter()
216 .filter_map(|delim| {
217 rest.find(delim).and_then(|p| {
218 if p + delim.len() == rest.len() {
219 Some((p, delim.len()))
220 } else {
221 None
222 }
223 })
224 })
225 .min_by_key(|(p, _)| *p);
226
227 let closing_result = match (closing_with_newline, closing_at_eof) {
228 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
229 (Some(_), Some(_)) => closing_with_newline,
230 (Some(_), None) => closing_with_newline,
231 (None, Some(_)) => closing_at_eof,
232 (None, None) => None,
233 };
234
235 if let Some((closing_pos, closing_len)) = closing_result {
236 let abs_closing_pos = content_start + closing_pos;
237 let content = &markdown[content_start..abs_closing_pos];
238
239 if content.len() > crate::error::MAX_YAML_SIZE {
241 return Err(format!(
242 "YAML block too large: {} bytes (max: {} bytes)",
243 content.len(),
244 crate::error::MAX_YAML_SIZE
245 )
246 .into());
247 }
248
249 let (tag, quill_name, yaml_content) = if !content.is_empty() {
252 match serde_yaml::from_str::<serde_yaml::Value>(content) {
254 Ok(yaml_value) => {
255 if let Some(mapping) = yaml_value.as_mapping() {
256 let quill_key = serde_yaml::Value::String("QUILL".to_string());
257 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
258
259 let has_quill = mapping.contains_key(&quill_key);
260 let has_scope = mapping.contains_key(&scope_key);
261
262 if has_quill && has_scope {
263 return Err(
264 "Cannot specify both QUILL and SCOPE in the same block"
265 .into(),
266 );
267 }
268
269 if has_quill {
270 let quill_value = mapping.get(&quill_key).unwrap();
272 let quill_name_str = quill_value
273 .as_str()
274 .ok_or_else(|| "QUILL value must be a string")?;
275
276 if !is_valid_tag_name(quill_name_str) {
277 return Err(format!(
278 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
279 quill_name_str
280 )
281 .into());
282 }
283
284 let mut new_mapping = mapping.clone();
286 new_mapping.remove(&quill_key);
287 let new_yaml = serde_yaml::to_string(&new_mapping)
288 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
289
290 (None, Some(quill_name_str.to_string()), new_yaml)
291 } else if has_scope {
292 let scope_value = mapping.get(&scope_key).unwrap();
294 let field_name = scope_value
295 .as_str()
296 .ok_or_else(|| "SCOPE value must be a string")?;
297
298 if !is_valid_tag_name(field_name) {
299 return Err(format!(
300 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
301 field_name
302 )
303 .into());
304 }
305
306 if field_name == BODY_FIELD {
307 return Err(format!(
308 "Cannot use reserved field name '{}' as SCOPE value",
309 BODY_FIELD
310 )
311 .into());
312 }
313
314 let mut new_mapping = mapping.clone();
316 new_mapping.remove(&scope_key);
317 let new_yaml = serde_yaml::to_string(&new_mapping)
318 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
319
320 (Some(field_name.to_string()), None, new_yaml)
321 } else {
322 (None, None, content.to_string())
324 }
325 } else {
326 (None, None, content.to_string())
328 }
329 }
330 Err(_) => {
331 (None, None, content.to_string())
333 }
334 }
335 } else {
336 (None, None, content.to_string())
337 };
338
339 blocks.push(MetadataBlock {
340 start: abs_pos,
341 end: abs_closing_pos + closing_len, yaml_content,
343 tag,
344 quill_name,
345 });
346
347 pos = abs_closing_pos + closing_len;
348 } else if abs_pos == 0 {
349 return Err("Frontmatter started but not closed with ---".into());
351 } else {
352 pos = abs_pos + 3;
354 }
355 } else {
356 break;
357 }
358 }
359
360 Ok(blocks)
361}
362
363fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
365 if markdown.len() > crate::error::MAX_INPUT_SIZE {
367 return Err(format!(
368 "Input too large: {} bytes (max: {} bytes)",
369 markdown.len(),
370 crate::error::MAX_INPUT_SIZE
371 )
372 .into());
373 }
374
375 let mut fields = HashMap::new();
376
377 let blocks = find_metadata_blocks(markdown)?;
379
380 if blocks.is_empty() {
381 fields.insert(
383 BODY_FIELD.to_string(),
384 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
385 );
386 return Ok(ParsedDocument::new(fields));
387 }
388
389 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
391 let mut has_global_frontmatter = false;
392 let mut global_frontmatter_index: Option<usize> = None;
393 let mut quill_name: Option<String> = None;
394
395 for (idx, block) in blocks.iter().enumerate() {
397 if let Some(ref name) = block.quill_name {
399 if quill_name.is_some() {
400 return Err("Multiple quill directives found: only one allowed".into());
401 }
402 quill_name = Some(name.clone());
403 }
404
405 if block.tag.is_none() && block.quill_name.is_none() {
407 if has_global_frontmatter {
408 return Err(
409 "Multiple global frontmatter blocks found: only one untagged block allowed"
410 .into(),
411 );
412 }
413 has_global_frontmatter = true;
414 global_frontmatter_index = Some(idx);
415 }
416 }
417
418 if let Some(idx) = global_frontmatter_index {
420 let block = &blocks[idx];
421
422 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
424 HashMap::new()
425 } else {
426 serde_yaml::from_str(&block.yaml_content)
427 .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
428 };
429
430 for other_block in &blocks {
433 if let Some(ref tag) = other_block.tag {
434 if let Some(global_value) = yaml_fields.get(tag) {
435 if global_value.as_sequence().is_none() {
437 return Err(format!(
438 "Name collision: global field '{}' conflicts with tagged attribute",
439 tag
440 )
441 .into());
442 }
443 }
444 }
445 }
446
447 for (key, value) in yaml_fields {
449 fields.insert(key, QuillValue::from_yaml(value)?);
450 }
451 }
452
453 for block in &blocks {
455 if block.quill_name.is_some() {
456 if !block.yaml_content.is_empty() {
458 let yaml_fields: HashMap<String, serde_yaml::Value> =
459 serde_yaml::from_str(&block.yaml_content)
460 .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
461
462 for key in yaml_fields.keys() {
464 if fields.contains_key(key) {
465 return Err(format!(
466 "Name collision: quill block field '{}' conflicts with existing field",
467 key
468 )
469 .into());
470 }
471 }
472
473 for (key, value) in yaml_fields {
475 fields.insert(key, QuillValue::from_yaml(value)?);
476 }
477 }
478 }
479 }
480
481 for (idx, block) in blocks.iter().enumerate() {
483 if let Some(ref tag_name) = block.tag {
484 if let Some(existing_value) = fields.get(tag_name) {
487 if existing_value.as_array().is_none() {
488 return Err(format!(
489 "Name collision: tagged attribute '{}' conflicts with global field",
490 tag_name
491 )
492 .into());
493 }
494 }
495
496 let mut item_fields: HashMap<String, serde_yaml::Value> = if block
498 .yaml_content
499 .is_empty()
500 {
501 HashMap::new()
502 } else {
503 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
504 yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
505 })?
506 };
507
508 let body_start = block.end;
510 let body_end = if idx + 1 < blocks.len() {
511 blocks[idx + 1].start
512 } else {
513 markdown.len()
514 };
515 let body = &markdown[body_start..body_end];
516
517 item_fields.insert(
519 BODY_FIELD.to_string(),
520 serde_yaml::Value::String(body.to_string()),
521 );
522
523 let item_value = serde_yaml::to_value(item_fields)?;
525
526 tagged_attributes
528 .entry(tag_name.clone())
529 .or_insert_with(Vec::new)
530 .push(item_value);
531 }
532 }
533
534 let first_non_scope_block_idx = blocks
538 .iter()
539 .position(|b| b.tag.is_none() && b.quill_name.is_none())
540 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
541
542 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
543 let start = blocks[idx].end;
545
546 let end = blocks
548 .iter()
549 .skip(idx + 1)
550 .find(|b| b.tag.is_some())
551 .map(|b| b.start)
552 .unwrap_or(markdown.len());
553
554 (start, end)
555 } else {
556 let end = blocks
558 .iter()
559 .find(|b| b.tag.is_some())
560 .map(|b| b.start)
561 .unwrap_or(0);
562
563 (0, end)
564 };
565
566 let global_body = &markdown[body_start..body_end];
567
568 fields.insert(
569 BODY_FIELD.to_string(),
570 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
571 );
572
573 for (tag_name, items) in tagged_attributes {
576 if let Some(existing_value) = fields.get(&tag_name) {
577 if let Some(existing_array) = existing_value.as_array() {
579 let new_items_json: Vec<serde_json::Value> = items
581 .into_iter()
582 .map(|yaml_val| {
583 serde_json::to_value(&yaml_val)
584 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
585 })
586 .collect::<Result<Vec<_>, _>>()?;
587
588 let mut merged_array = existing_array.clone();
590 merged_array.extend(new_items_json);
591
592 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
594 fields.insert(tag_name, quill_value);
595 } else {
596 return Err(format!(
598 "Internal error: field '{}' exists but is not an array",
599 tag_name
600 )
601 .into());
602 }
603 } else {
604 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
606 fields.insert(tag_name, quill_value);
607 }
608 }
609
610 let mut parsed = ParsedDocument::new(fields);
611
612 if let Some(name) = quill_name {
614 parsed.quill_tag = Some(name);
615 }
616
617 Ok(parsed)
618}
619
620#[cfg(test)]
621mod tests {
622 use super::*;
623
624 #[test]
625 fn test_no_frontmatter() {
626 let markdown = "# Hello World\n\nThis is a test.";
627 let doc = decompose(markdown).unwrap();
628
629 assert_eq!(doc.body(), Some(markdown));
630 assert_eq!(doc.fields().len(), 1);
631 }
632
633 #[test]
634 fn test_with_frontmatter() {
635 let markdown = r#"---
636title: Test Document
637author: Test Author
638---
639
640# Hello World
641
642This is the body."#;
643
644 let doc = decompose(markdown).unwrap();
645
646 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
647 assert_eq!(
648 doc.get_field("title").unwrap().as_str().unwrap(),
649 "Test Document"
650 );
651 assert_eq!(
652 doc.get_field("author").unwrap().as_str().unwrap(),
653 "Test Author"
654 );
655 assert_eq!(doc.fields().len(), 3); }
657
658 #[test]
659 fn test_complex_yaml_frontmatter() {
660 let markdown = r#"---
661title: Complex Document
662tags:
663 - test
664 - yaml
665metadata:
666 version: 1.0
667 nested:
668 field: value
669---
670
671Content here."#;
672
673 let doc = decompose(markdown).unwrap();
674
675 assert_eq!(doc.body(), Some("\nContent here."));
676 assert_eq!(
677 doc.get_field("title").unwrap().as_str().unwrap(),
678 "Complex Document"
679 );
680
681 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
682 assert_eq!(tags.len(), 2);
683 assert_eq!(tags[0].as_str().unwrap(), "test");
684 assert_eq!(tags[1].as_str().unwrap(), "yaml");
685 }
686
687 #[test]
688 fn test_invalid_yaml() {
689 let markdown = r#"---
690title: [invalid yaml
691author: missing close bracket
692---
693
694Content here."#;
695
696 let result = decompose(markdown);
697 assert!(result.is_err());
698 assert!(result
699 .unwrap_err()
700 .to_string()
701 .contains("Invalid YAML frontmatter"));
702 }
703
704 #[test]
705 fn test_unclosed_frontmatter() {
706 let markdown = r#"---
707title: Test
708author: Test Author
709
710Content without closing ---"#;
711
712 let result = decompose(markdown);
713 assert!(result.is_err());
714 assert!(result.unwrap_err().to_string().contains("not closed"));
715 }
716
717 #[test]
720 fn test_basic_tagged_block() {
721 let markdown = r#"---
722title: Main Document
723---
724
725Main body content.
726
727---
728SCOPE: items
729name: Item 1
730---
731
732Body of item 1."#;
733
734 let doc = decompose(markdown).unwrap();
735
736 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
737 assert_eq!(
738 doc.get_field("title").unwrap().as_str().unwrap(),
739 "Main Document"
740 );
741
742 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
743 assert_eq!(items.len(), 1);
744
745 let item = items[0].as_object().unwrap();
746 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
747 assert_eq!(
748 item.get("body").unwrap().as_str().unwrap(),
749 "\nBody of item 1."
750 );
751 }
752
753 #[test]
754 fn test_multiple_tagged_blocks() {
755 let markdown = r#"---
756SCOPE: items
757name: Item 1
758tags: [a, b]
759---
760
761First item body.
762
763---
764SCOPE: items
765name: Item 2
766tags: [c, d]
767---
768
769Second item body."#;
770
771 let doc = decompose(markdown).unwrap();
772
773 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
774 assert_eq!(items.len(), 2);
775
776 let item1 = items[0].as_object().unwrap();
777 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
778
779 let item2 = items[1].as_object().unwrap();
780 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
781 }
782
783 #[test]
784 fn test_mixed_global_and_tagged() {
785 let markdown = r#"---
786title: Global
787author: John Doe
788---
789
790Global body.
791
792---
793SCOPE: sections
794title: Section 1
795---
796
797Section 1 content.
798
799---
800SCOPE: sections
801title: Section 2
802---
803
804Section 2 content."#;
805
806 let doc = decompose(markdown).unwrap();
807
808 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
809 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
810
811 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
812 assert_eq!(sections.len(), 2);
813 }
814
815 #[test]
816 fn test_empty_tagged_metadata() {
817 let markdown = r#"---
818SCOPE: items
819---
820
821Body without metadata."#;
822
823 let doc = decompose(markdown).unwrap();
824
825 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
826 assert_eq!(items.len(), 1);
827
828 let item = items[0].as_object().unwrap();
829 assert_eq!(
830 item.get("body").unwrap().as_str().unwrap(),
831 "\nBody without metadata."
832 );
833 }
834
835 #[test]
836 fn test_tagged_block_without_body() {
837 let markdown = r#"---
838SCOPE: items
839name: Item
840---"#;
841
842 let doc = decompose(markdown).unwrap();
843
844 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
845 assert_eq!(items.len(), 1);
846
847 let item = items[0].as_object().unwrap();
848 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
849 }
850
851 #[test]
852 fn test_name_collision_global_and_tagged() {
853 let markdown = r#"---
854items: "global value"
855---
856
857Body
858
859---
860SCOPE: items
861name: Item
862---
863
864Item body"#;
865
866 let result = decompose(markdown);
867 assert!(result.is_err());
868 assert!(result.unwrap_err().to_string().contains("collision"));
869 }
870
871 #[test]
872 fn test_global_array_merged_with_scope() {
873 let markdown = r#"---
876items:
877 - name: Global Item 1
878 value: 100
879 - name: Global Item 2
880 value: 200
881---
882
883Global body
884
885---
886SCOPE: items
887name: Scope Item 1
888value: 300
889---
890
891Scope item 1 body
892
893---
894SCOPE: items
895name: Scope Item 2
896value: 400
897---
898
899Scope item 2 body"#;
900
901 let doc = decompose(markdown).unwrap();
902
903 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
905 assert_eq!(items.len(), 4);
906
907 let item1 = items[0].as_object().unwrap();
909 assert_eq!(
910 item1.get("name").unwrap().as_str().unwrap(),
911 "Global Item 1"
912 );
913 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
914
915 let item2 = items[1].as_object().unwrap();
916 assert_eq!(
917 item2.get("name").unwrap().as_str().unwrap(),
918 "Global Item 2"
919 );
920 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
921
922 let item3 = items[2].as_object().unwrap();
924 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
925 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
926 assert_eq!(
927 item3.get("body").unwrap().as_str().unwrap(),
928 "\nScope item 1 body\n\n"
929 );
930
931 let item4 = items[3].as_object().unwrap();
932 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
933 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
934 assert_eq!(
935 item4.get("body").unwrap().as_str().unwrap(),
936 "\nScope item 2 body"
937 );
938 }
939
940 #[test]
941 fn test_empty_global_array_with_scope() {
942 let markdown = r#"---
944items: []
945---
946
947Global body
948
949---
950SCOPE: items
951name: Item 1
952---
953
954Item 1 body"#;
955
956 let doc = decompose(markdown).unwrap();
957
958 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
959 assert_eq!(items.len(), 1);
960
961 let item = items[0].as_object().unwrap();
962 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
963 }
964
965 #[test]
966 fn test_reserved_field_name() {
967 let markdown = r#"---
968SCOPE: body
969content: Test
970---"#;
971
972 let result = decompose(markdown);
973 assert!(result.is_err());
974 assert!(result.unwrap_err().to_string().contains("reserved"));
975 }
976
977 #[test]
978 fn test_invalid_tag_syntax() {
979 let markdown = r#"---
980SCOPE: Invalid-Name
981title: Test
982---"#;
983
984 let result = decompose(markdown);
985 assert!(result.is_err());
986 assert!(result
987 .unwrap_err()
988 .to_string()
989 .contains("Invalid field name"));
990 }
991
992 #[test]
993 fn test_multiple_global_frontmatter_blocks() {
994 let markdown = r#"---
995title: First
996---
997
998Body
999
1000---
1001author: Second
1002---
1003
1004More body"#;
1005
1006 let result = decompose(markdown);
1007 assert!(result.is_err());
1008 assert!(result
1009 .unwrap_err()
1010 .to_string()
1011 .contains("Multiple global frontmatter"));
1012 }
1013
1014 #[test]
1015 fn test_adjacent_blocks_different_tags() {
1016 let markdown = r#"---
1017SCOPE: items
1018name: Item 1
1019---
1020
1021Item 1 body
1022
1023---
1024SCOPE: sections
1025title: Section 1
1026---
1027
1028Section 1 body"#;
1029
1030 let doc = decompose(markdown).unwrap();
1031
1032 assert!(doc.get_field("items").is_some());
1033 assert!(doc.get_field("sections").is_some());
1034
1035 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1036 assert_eq!(items.len(), 1);
1037
1038 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1039 assert_eq!(sections.len(), 1);
1040 }
1041
1042 #[test]
1043 fn test_order_preservation() {
1044 let markdown = r#"---
1045SCOPE: items
1046id: 1
1047---
1048
1049First
1050
1051---
1052SCOPE: items
1053id: 2
1054---
1055
1056Second
1057
1058---
1059SCOPE: items
1060id: 3
1061---
1062
1063Third"#;
1064
1065 let doc = decompose(markdown).unwrap();
1066
1067 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1068 assert_eq!(items.len(), 3);
1069
1070 for (i, item) in items.iter().enumerate() {
1071 let mapping = item.as_object().unwrap();
1072 let id = mapping.get("id").unwrap().as_i64().unwrap();
1073 assert_eq!(id, (i + 1) as i64);
1074 }
1075 }
1076
1077 #[test]
1078 fn test_product_catalog_integration() {
1079 let markdown = r#"---
1080title: Product Catalog
1081author: John Doe
1082date: 2024-01-01
1083---
1084
1085This is the main catalog description.
1086
1087---
1088SCOPE: products
1089name: Widget A
1090price: 19.99
1091sku: WID-001
1092---
1093
1094The **Widget A** is our most popular product.
1095
1096---
1097SCOPE: products
1098name: Gadget B
1099price: 29.99
1100sku: GAD-002
1101---
1102
1103The **Gadget B** is perfect for professionals.
1104
1105---
1106SCOPE: reviews
1107product: Widget A
1108rating: 5
1109---
1110
1111"Excellent product! Highly recommended."
1112
1113---
1114SCOPE: reviews
1115product: Gadget B
1116rating: 4
1117---
1118
1119"Very good, but a bit pricey.""#;
1120
1121 let doc = decompose(markdown).unwrap();
1122
1123 assert_eq!(
1125 doc.get_field("title").unwrap().as_str().unwrap(),
1126 "Product Catalog"
1127 );
1128 assert_eq!(
1129 doc.get_field("author").unwrap().as_str().unwrap(),
1130 "John Doe"
1131 );
1132 assert_eq!(
1133 doc.get_field("date").unwrap().as_str().unwrap(),
1134 "2024-01-01"
1135 );
1136
1137 assert!(doc.body().unwrap().contains("main catalog description"));
1139
1140 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1142 assert_eq!(products.len(), 2);
1143
1144 let product1 = products[0].as_object().unwrap();
1145 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1146 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1147
1148 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1150 assert_eq!(reviews.len(), 2);
1151
1152 let review1 = reviews[0].as_object().unwrap();
1153 assert_eq!(
1154 review1.get("product").unwrap().as_str().unwrap(),
1155 "Widget A"
1156 );
1157 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1158
1159 assert_eq!(doc.fields().len(), 6);
1161 }
1162
1163 #[test]
1164 fn taro_quill_directive() {
1165 let markdown = r#"---
1166QUILL: usaf_memo
1167memo_for: [ORG/SYMBOL]
1168memo_from: [ORG/SYMBOL]
1169---
1170
1171This is the memo body."#;
1172
1173 let doc = decompose(markdown).unwrap();
1174
1175 assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1177
1178 assert_eq!(
1180 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1181 .as_str()
1182 .unwrap(),
1183 "ORG/SYMBOL"
1184 );
1185
1186 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1188 }
1189
1190 #[test]
1191 fn test_quill_with_scope_blocks() {
1192 let markdown = r#"---
1193QUILL: document
1194title: Test Document
1195---
1196
1197Main body.
1198
1199---
1200SCOPE: sections
1201name: Section 1
1202---
1203
1204Section 1 body."#;
1205
1206 let doc = decompose(markdown).unwrap();
1207
1208 assert_eq!(doc.quill_tag(), Some("document"));
1210
1211 assert_eq!(
1213 doc.get_field("title").unwrap().as_str().unwrap(),
1214 "Test Document"
1215 );
1216
1217 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1219 assert_eq!(sections.len(), 1);
1220
1221 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1223 }
1224
1225 #[test]
1226 fn test_multiple_quill_directives_error() {
1227 let markdown = r#"---
1228QUILL: first
1229---
1230
1231---
1232QUILL: second
1233---"#;
1234
1235 let result = decompose(markdown);
1236 assert!(result.is_err());
1237 assert!(result
1238 .unwrap_err()
1239 .to_string()
1240 .contains("Multiple quill directives"));
1241 }
1242
1243 #[test]
1244 fn test_invalid_quill_name() {
1245 let markdown = r#"---
1246QUILL: Invalid-Name
1247---"#;
1248
1249 let result = decompose(markdown);
1250 assert!(result.is_err());
1251 assert!(result
1252 .unwrap_err()
1253 .to_string()
1254 .contains("Invalid quill name"));
1255 }
1256
1257 #[test]
1258 fn test_quill_wrong_value_type() {
1259 let markdown = r#"---
1260QUILL: 123
1261---"#;
1262
1263 let result = decompose(markdown);
1264 assert!(result.is_err());
1265 assert!(result
1266 .unwrap_err()
1267 .to_string()
1268 .contains("QUILL value must be a string"));
1269 }
1270
1271 #[test]
1272 fn test_scope_wrong_value_type() {
1273 let markdown = r#"---
1274SCOPE: 123
1275---"#;
1276
1277 let result = decompose(markdown);
1278 assert!(result.is_err());
1279 assert!(result
1280 .unwrap_err()
1281 .to_string()
1282 .contains("SCOPE value must be a string"));
1283 }
1284
1285 #[test]
1286 fn test_both_quill_and_scope_error() {
1287 let markdown = r#"---
1288QUILL: test
1289SCOPE: items
1290---"#;
1291
1292 let result = decompose(markdown);
1293 assert!(result.is_err());
1294 assert!(result
1295 .unwrap_err()
1296 .to_string()
1297 .contains("Cannot specify both QUILL and SCOPE"));
1298 }
1299
1300 #[test]
1301 fn test_blank_lines_in_frontmatter() {
1302 let markdown = r#"---
1304title: Test Document
1305author: Test Author
1306
1307description: This has a blank line above it
1308tags:
1309 - one
1310 - two
1311---
1312
1313# Hello World
1314
1315This is the body."#;
1316
1317 let doc = decompose(markdown).unwrap();
1318
1319 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1320 assert_eq!(
1321 doc.get_field("title").unwrap().as_str().unwrap(),
1322 "Test Document"
1323 );
1324 assert_eq!(
1325 doc.get_field("author").unwrap().as_str().unwrap(),
1326 "Test Author"
1327 );
1328 assert_eq!(
1329 doc.get_field("description").unwrap().as_str().unwrap(),
1330 "This has a blank line above it"
1331 );
1332
1333 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1334 assert_eq!(tags.len(), 2);
1335 }
1336
1337 #[test]
1338 fn test_blank_lines_in_scope_blocks() {
1339 let markdown = r#"---
1341SCOPE: items
1342name: Item 1
1343
1344price: 19.99
1345
1346tags:
1347 - electronics
1348 - gadgets
1349---
1350
1351Body of item 1."#;
1352
1353 let doc = decompose(markdown).unwrap();
1354
1355 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1356 assert_eq!(items.len(), 1);
1357
1358 let item = items[0].as_object().unwrap();
1359 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1360 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1361
1362 let tags = item.get("tags").unwrap().as_array().unwrap();
1363 assert_eq!(tags.len(), 2);
1364 }
1365
1366 #[test]
1367 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1368 let markdown = r#"---
1370title: Test
1371---
1372
1373First paragraph.
1374
1375---
1376
1377Second paragraph."#;
1378
1379 let doc = decompose(markdown).unwrap();
1380
1381 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1382
1383 let body = doc.body().unwrap();
1385 assert!(body.contains("First paragraph."));
1386 assert!(body.contains("---"));
1387 assert!(body.contains("Second paragraph."));
1388 }
1389
1390 #[test]
1391 fn test_horizontal_rule_not_preceded_by_blank() {
1392 let markdown = r#"---
1395title: Test
1396---
1397
1398First paragraph.
1399---
1400
1401Second paragraph."#;
1402
1403 let doc = decompose(markdown).unwrap();
1404
1405 let body = doc.body().unwrap();
1406 assert!(body.contains("---"));
1408 }
1409
1410 #[test]
1411 fn test_multiple_blank_lines_in_yaml() {
1412 let markdown = r#"---
1414title: Test
1415
1416
1417author: John Doe
1418
1419
1420version: 1.0
1421---
1422
1423Body content."#;
1424
1425 let doc = decompose(markdown).unwrap();
1426
1427 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1428 assert_eq!(
1429 doc.get_field("author").unwrap().as_str().unwrap(),
1430 "John Doe"
1431 );
1432 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1433 }
1434}
1435#[cfg(test)]
1436mod demo_file_test {
1437 use super::*;
1438
1439 #[test]
1440 fn test_extended_metadata_demo_file() {
1441 let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1442 let doc = decompose(markdown).unwrap();
1443
1444 assert_eq!(
1446 doc.get_field("title").unwrap().as_str().unwrap(),
1447 "Extended Metadata Demo"
1448 );
1449 assert_eq!(
1450 doc.get_field("author").unwrap().as_str().unwrap(),
1451 "Quillmark Team"
1452 );
1453 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1455
1456 assert!(doc
1458 .body()
1459 .unwrap()
1460 .contains("extended YAML metadata standard"));
1461
1462 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1464 assert_eq!(features.len(), 3);
1465
1466 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1468 assert_eq!(use_cases.len(), 2);
1469
1470 let feature1 = features[0].as_object().unwrap();
1472 assert_eq!(
1473 feature1.get("name").unwrap().as_str().unwrap(),
1474 "Tag Directives"
1475 );
1476 }
1477
1478 #[test]
1479 fn test_input_size_limit() {
1480 let size = crate::error::MAX_INPUT_SIZE + 1;
1482 let large_markdown = "a".repeat(size);
1483
1484 let result = decompose(&large_markdown);
1485 assert!(result.is_err());
1486
1487 let err_msg = result.unwrap_err().to_string();
1488 assert!(err_msg.contains("Input too large"));
1489 }
1490
1491 #[test]
1492 fn test_yaml_size_limit() {
1493 let mut markdown = String::from("---\n");
1495
1496 let size = crate::error::MAX_YAML_SIZE + 1;
1498 markdown.push_str("data: \"");
1499 markdown.push_str(&"x".repeat(size));
1500 markdown.push_str("\"\n---\n\nBody");
1501
1502 let result = decompose(&markdown);
1503 assert!(result.is_err());
1504
1505 let err_msg = result.unwrap_err().to_string();
1506 assert!(err_msg.contains("YAML block too large"));
1507 }
1508
1509 #[test]
1510 fn test_input_within_size_limit() {
1511 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1514
1515 let result = decompose(&markdown);
1516 assert!(result.is_ok());
1517 }
1518
1519 #[test]
1520 fn test_yaml_within_size_limit() {
1521 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1523
1524 let result = decompose(&markdown);
1525 assert!(result.is_ok());
1526 }
1527}