1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58 let mut msg = format!("{}: {}", context, e);
59
60 if let Some(loc) = e.location() {
61 msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62 }
63
64 msg
65}
66
67pub const QUILL_TAG: &str = "quill";
69
70#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73 fields: HashMap<String, QuillValue>,
74 quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80 Self {
81 fields,
82 quill_tag: None,
83 }
84 }
85
86 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88 Self { fields, quill_tag }
89 }
90
91 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94 }
95
96 pub fn quill_tag(&self) -> Option<&str> {
98 self.quill_tag.as_deref()
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, QuillValue> {
113 &self.fields
114 }
115
116 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130 let mut fields = self.fields.clone();
131
132 for (field_name, default_value) in defaults {
133 if !fields.contains_key(field_name) {
135 fields.insert(field_name.clone(), default_value.clone());
136 }
137 }
138
139 Self {
140 fields,
141 quill_tag: self.quill_tag.clone(),
142 }
143 }
144
145 pub fn with_coercion(&self, schema: &QuillValue) -> Self {
163 use crate::schema::coerce_document;
164
165 let coerced_fields = coerce_document(schema, &self.fields);
166
167 Self {
168 fields: coerced_fields,
169 quill_tag: self.quill_tag.clone(),
170 }
171 }
172}
173
174#[derive(Debug)]
175struct MetadataBlock {
176 start: usize, end: usize, yaml_content: String,
179 tag: Option<String>, quill_name: Option<String>, }
182
183fn is_valid_tag_name(name: &str) -> bool {
185 if name.is_empty() {
186 return false;
187 }
188
189 let mut chars = name.chars();
190 let first = chars.next().unwrap();
191
192 if !first.is_ascii_lowercase() && first != '_' {
193 return false;
194 }
195
196 for ch in chars {
197 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
198 return false;
199 }
200 }
201
202 true
203}
204
205fn find_metadata_blocks(
207 markdown: &str,
208) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
209 let mut blocks = Vec::new();
210 let mut pos = 0;
211
212 while pos < markdown.len() {
213 let search_str = &markdown[pos..];
215 let delimiter_result = if let Some(p) = search_str.find("---\n") {
216 Some((p, 4, "\n"))
217 } else if let Some(p) = search_str.find("---\r\n") {
218 Some((p, 5, "\r\n"))
219 } else {
220 None
221 };
222
223 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
224 let abs_pos = pos + delimiter_pos;
225 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
229 let before = &markdown[..abs_pos];
231 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232 } else {
233 false
234 };
235
236 let followed_by_blank = if content_start < markdown.len() {
237 markdown[content_start..].starts_with('\n')
238 || markdown[content_start..].starts_with("\r\n")
239 } else {
240 false
241 };
242
243 if preceded_by_blank && followed_by_blank {
245 pos = abs_pos + 3; continue;
248 }
249
250 if followed_by_blank {
253 pos = abs_pos + 3;
256 continue;
257 }
258
259 let rest = &markdown[content_start..];
262
263 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265 let closing_with_newline = closing_patterns
266 .iter()
267 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268 .min_by_key(|(p, _)| *p);
269
270 let closing_at_eof = ["\n---", "\r\n---"]
272 .iter()
273 .filter_map(|delim| {
274 rest.find(delim).and_then(|p| {
275 if p + delim.len() == rest.len() {
276 Some((p, delim.len()))
277 } else {
278 None
279 }
280 })
281 })
282 .min_by_key(|(p, _)| *p);
283
284 let closing_result = match (closing_with_newline, closing_at_eof) {
285 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286 (Some(_), Some(_)) => closing_with_newline,
287 (Some(_), None) => closing_with_newline,
288 (None, Some(_)) => closing_at_eof,
289 (None, None) => None,
290 };
291
292 if let Some((closing_pos, closing_len)) = closing_result {
293 let abs_closing_pos = content_start + closing_pos;
294 let content = &markdown[content_start..abs_closing_pos];
295
296 if content.len() > crate::error::MAX_YAML_SIZE {
298 return Err(format!(
299 "YAML block too large: {} bytes (max: {} bytes)",
300 content.len(),
301 crate::error::MAX_YAML_SIZE
302 )
303 .into());
304 }
305
306 let (tag, quill_name, yaml_content) = if !content.is_empty() {
309 match serde_yaml::from_str::<serde_yaml::Value>(content) {
311 Ok(yaml_value) => {
312 if let Some(mapping) = yaml_value.as_mapping() {
313 let quill_key = serde_yaml::Value::String("QUILL".to_string());
314 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
315
316 let has_quill = mapping.contains_key(&quill_key);
317 let has_scope = mapping.contains_key(&scope_key);
318
319 if has_quill && has_scope {
320 return Err(
321 "Cannot specify both QUILL and SCOPE in the same block"
322 .into(),
323 );
324 }
325
326 if has_quill {
327 let quill_value = mapping.get(&quill_key).unwrap();
329 let quill_name_str = quill_value
330 .as_str()
331 .ok_or_else(|| "QUILL value must be a string")?;
332
333 if !is_valid_tag_name(quill_name_str) {
334 return Err(format!(
335 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336 quill_name_str
337 )
338 .into());
339 }
340
341 let mut new_mapping = mapping.clone();
343 new_mapping.remove(&quill_key);
344 let new_yaml = serde_yaml::to_string(&new_mapping)
345 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
346
347 (None, Some(quill_name_str.to_string()), new_yaml)
348 } else if has_scope {
349 let scope_value = mapping.get(&scope_key).unwrap();
351 let field_name = scope_value
352 .as_str()
353 .ok_or_else(|| "SCOPE value must be a string")?;
354
355 if !is_valid_tag_name(field_name) {
356 return Err(format!(
357 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
358 field_name
359 )
360 .into());
361 }
362
363 if field_name == BODY_FIELD {
364 return Err(format!(
365 "Cannot use reserved field name '{}' as SCOPE value",
366 BODY_FIELD
367 )
368 .into());
369 }
370
371 let mut new_mapping = mapping.clone();
373 new_mapping.remove(&scope_key);
374 let new_yaml = serde_yaml::to_string(&new_mapping)
375 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
376
377 (Some(field_name.to_string()), None, new_yaml)
378 } else {
379 (None, None, content.to_string())
381 }
382 } else {
383 (None, None, content.to_string())
385 }
386 }
387 Err(_) => {
388 (None, None, content.to_string())
390 }
391 }
392 } else {
393 (None, None, content.to_string())
394 };
395
396 blocks.push(MetadataBlock {
397 start: abs_pos,
398 end: abs_closing_pos + closing_len, yaml_content,
400 tag,
401 quill_name,
402 });
403
404 pos = abs_closing_pos + closing_len;
405 } else if abs_pos == 0 {
406 return Err("Frontmatter started but not closed with ---".into());
408 } else {
409 pos = abs_pos + 3;
411 }
412 } else {
413 break;
414 }
415 }
416
417 Ok(blocks)
418}
419
420fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
422 if markdown.len() > crate::error::MAX_INPUT_SIZE {
424 return Err(format!(
425 "Input too large: {} bytes (max: {} bytes)",
426 markdown.len(),
427 crate::error::MAX_INPUT_SIZE
428 )
429 .into());
430 }
431
432 let mut fields = HashMap::new();
433
434 let blocks = find_metadata_blocks(markdown)?;
436
437 if blocks.is_empty() {
438 fields.insert(
440 BODY_FIELD.to_string(),
441 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
442 );
443 return Ok(ParsedDocument::new(fields));
444 }
445
446 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
448 let mut has_global_frontmatter = false;
449 let mut global_frontmatter_index: Option<usize> = None;
450 let mut quill_name: Option<String> = None;
451
452 for (idx, block) in blocks.iter().enumerate() {
454 if let Some(ref name) = block.quill_name {
456 if quill_name.is_some() {
457 return Err("Multiple quill directives found: only one allowed".into());
458 }
459 quill_name = Some(name.clone());
460 }
461
462 if block.tag.is_none() && block.quill_name.is_none() {
464 if has_global_frontmatter {
465 return Err(
466 "Multiple global frontmatter blocks found: only one untagged block allowed"
467 .into(),
468 );
469 }
470 has_global_frontmatter = true;
471 global_frontmatter_index = Some(idx);
472 }
473 }
474
475 if let Some(idx) = global_frontmatter_index {
477 let block = &blocks[idx];
478
479 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
481 HashMap::new()
482 } else {
483 serde_yaml::from_str(&block.yaml_content)
484 .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
485 };
486
487 for other_block in &blocks {
490 if let Some(ref tag) = other_block.tag {
491 if let Some(global_value) = yaml_fields.get(tag) {
492 if global_value.as_sequence().is_none() {
494 return Err(format!(
495 "Name collision: global field '{}' conflicts with tagged attribute",
496 tag
497 )
498 .into());
499 }
500 }
501 }
502 }
503
504 for (key, value) in yaml_fields {
506 fields.insert(key, QuillValue::from_yaml(value)?);
507 }
508 }
509
510 for block in &blocks {
512 if block.quill_name.is_some() {
513 if !block.yaml_content.is_empty() {
515 let yaml_fields: HashMap<String, serde_yaml::Value> =
516 serde_yaml::from_str(&block.yaml_content)
517 .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
518
519 for key in yaml_fields.keys() {
521 if fields.contains_key(key) {
522 return Err(format!(
523 "Name collision: quill block field '{}' conflicts with existing field",
524 key
525 )
526 .into());
527 }
528 }
529
530 for (key, value) in yaml_fields {
532 fields.insert(key, QuillValue::from_yaml(value)?);
533 }
534 }
535 }
536 }
537
538 for (idx, block) in blocks.iter().enumerate() {
540 if let Some(ref tag_name) = block.tag {
541 if let Some(existing_value) = fields.get(tag_name) {
544 if existing_value.as_array().is_none() {
545 return Err(format!(
546 "Name collision: tagged attribute '{}' conflicts with global field",
547 tag_name
548 )
549 .into());
550 }
551 }
552
553 let mut item_fields: HashMap<String, serde_yaml::Value> = if block
555 .yaml_content
556 .is_empty()
557 {
558 HashMap::new()
559 } else {
560 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
561 yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
562 })?
563 };
564
565 let body_start = block.end;
567 let body_end = if idx + 1 < blocks.len() {
568 blocks[idx + 1].start
569 } else {
570 markdown.len()
571 };
572 let body = &markdown[body_start..body_end];
573
574 item_fields.insert(
576 BODY_FIELD.to_string(),
577 serde_yaml::Value::String(body.to_string()),
578 );
579
580 let item_value = serde_yaml::to_value(item_fields)?;
582
583 tagged_attributes
585 .entry(tag_name.clone())
586 .or_insert_with(Vec::new)
587 .push(item_value);
588 }
589 }
590
591 let first_non_scope_block_idx = blocks
595 .iter()
596 .position(|b| b.tag.is_none() && b.quill_name.is_none())
597 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
598
599 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
600 let start = blocks[idx].end;
602
603 let end = blocks
605 .iter()
606 .skip(idx + 1)
607 .find(|b| b.tag.is_some())
608 .map(|b| b.start)
609 .unwrap_or(markdown.len());
610
611 (start, end)
612 } else {
613 let end = blocks
615 .iter()
616 .find(|b| b.tag.is_some())
617 .map(|b| b.start)
618 .unwrap_or(0);
619
620 (0, end)
621 };
622
623 let global_body = &markdown[body_start..body_end];
624
625 fields.insert(
626 BODY_FIELD.to_string(),
627 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
628 );
629
630 for (tag_name, items) in tagged_attributes {
633 if let Some(existing_value) = fields.get(&tag_name) {
634 if let Some(existing_array) = existing_value.as_array() {
636 let new_items_json: Vec<serde_json::Value> = items
638 .into_iter()
639 .map(|yaml_val| {
640 serde_json::to_value(&yaml_val)
641 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
642 })
643 .collect::<Result<Vec<_>, _>>()?;
644
645 let mut merged_array = existing_array.clone();
647 merged_array.extend(new_items_json);
648
649 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
651 fields.insert(tag_name, quill_value);
652 } else {
653 return Err(format!(
655 "Internal error: field '{}' exists but is not an array",
656 tag_name
657 )
658 .into());
659 }
660 } else {
661 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
663 fields.insert(tag_name, quill_value);
664 }
665 }
666
667 let mut parsed = ParsedDocument::new(fields);
668
669 if let Some(name) = quill_name {
671 parsed.quill_tag = Some(name);
672 }
673
674 Ok(parsed)
675}
676
677#[cfg(test)]
678mod tests {
679 use super::*;
680
681 #[test]
682 fn test_no_frontmatter() {
683 let markdown = "# Hello World\n\nThis is a test.";
684 let doc = decompose(markdown).unwrap();
685
686 assert_eq!(doc.body(), Some(markdown));
687 assert_eq!(doc.fields().len(), 1);
688 }
689
690 #[test]
691 fn test_with_frontmatter() {
692 let markdown = r#"---
693title: Test Document
694author: Test Author
695---
696
697# Hello World
698
699This is the body."#;
700
701 let doc = decompose(markdown).unwrap();
702
703 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
704 assert_eq!(
705 doc.get_field("title").unwrap().as_str().unwrap(),
706 "Test Document"
707 );
708 assert_eq!(
709 doc.get_field("author").unwrap().as_str().unwrap(),
710 "Test Author"
711 );
712 assert_eq!(doc.fields().len(), 3); }
714
715 #[test]
716 fn test_complex_yaml_frontmatter() {
717 let markdown = r#"---
718title: Complex Document
719tags:
720 - test
721 - yaml
722metadata:
723 version: 1.0
724 nested:
725 field: value
726---
727
728Content here."#;
729
730 let doc = decompose(markdown).unwrap();
731
732 assert_eq!(doc.body(), Some("\nContent here."));
733 assert_eq!(
734 doc.get_field("title").unwrap().as_str().unwrap(),
735 "Complex Document"
736 );
737
738 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
739 assert_eq!(tags.len(), 2);
740 assert_eq!(tags[0].as_str().unwrap(), "test");
741 assert_eq!(tags[1].as_str().unwrap(), "yaml");
742 }
743
744 #[test]
745 fn test_with_defaults_empty_document() {
746 use std::collections::HashMap;
747
748 let mut defaults = HashMap::new();
749 defaults.insert(
750 "status".to_string(),
751 QuillValue::from_json(serde_json::json!("draft")),
752 );
753 defaults.insert(
754 "version".to_string(),
755 QuillValue::from_json(serde_json::json!(1)),
756 );
757
758 let doc = ParsedDocument::new(HashMap::new());
760 let doc_with_defaults = doc.with_defaults(&defaults);
761
762 assert_eq!(
764 doc_with_defaults
765 .get_field("status")
766 .unwrap()
767 .as_str()
768 .unwrap(),
769 "draft"
770 );
771 assert_eq!(
772 doc_with_defaults
773 .get_field("version")
774 .unwrap()
775 .as_number()
776 .unwrap()
777 .as_i64()
778 .unwrap(),
779 1
780 );
781 }
782
783 #[test]
784 fn test_with_defaults_preserves_existing_values() {
785 use std::collections::HashMap;
786
787 let mut defaults = HashMap::new();
788 defaults.insert(
789 "status".to_string(),
790 QuillValue::from_json(serde_json::json!("draft")),
791 );
792
793 let mut fields = HashMap::new();
795 fields.insert(
796 "status".to_string(),
797 QuillValue::from_json(serde_json::json!("published")),
798 );
799 let doc = ParsedDocument::new(fields);
800
801 let doc_with_defaults = doc.with_defaults(&defaults);
802
803 assert_eq!(
805 doc_with_defaults
806 .get_field("status")
807 .unwrap()
808 .as_str()
809 .unwrap(),
810 "published"
811 );
812 }
813
814 #[test]
815 fn test_with_defaults_partial_application() {
816 use std::collections::HashMap;
817
818 let mut defaults = HashMap::new();
819 defaults.insert(
820 "status".to_string(),
821 QuillValue::from_json(serde_json::json!("draft")),
822 );
823 defaults.insert(
824 "version".to_string(),
825 QuillValue::from_json(serde_json::json!(1)),
826 );
827
828 let mut fields = HashMap::new();
830 fields.insert(
831 "status".to_string(),
832 QuillValue::from_json(serde_json::json!("published")),
833 );
834 let doc = ParsedDocument::new(fields);
835
836 let doc_with_defaults = doc.with_defaults(&defaults);
837
838 assert_eq!(
840 doc_with_defaults
841 .get_field("status")
842 .unwrap()
843 .as_str()
844 .unwrap(),
845 "published"
846 );
847 assert_eq!(
848 doc_with_defaults
849 .get_field("version")
850 .unwrap()
851 .as_number()
852 .unwrap()
853 .as_i64()
854 .unwrap(),
855 1
856 );
857 }
858
859 #[test]
860 fn test_with_defaults_no_defaults() {
861 use std::collections::HashMap;
862
863 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
866 let doc_with_defaults = doc.with_defaults(&defaults);
867
868 assert!(doc_with_defaults.fields().is_empty());
870 }
871
872 #[test]
873 fn test_with_defaults_complex_types() {
874 use std::collections::HashMap;
875
876 let mut defaults = HashMap::new();
877 defaults.insert(
878 "tags".to_string(),
879 QuillValue::from_json(serde_json::json!(["default", "tag"])),
880 );
881
882 let doc = ParsedDocument::new(HashMap::new());
883 let doc_with_defaults = doc.with_defaults(&defaults);
884
885 let tags = doc_with_defaults
887 .get_field("tags")
888 .unwrap()
889 .as_sequence()
890 .unwrap();
891 assert_eq!(tags.len(), 2);
892 assert_eq!(tags[0].as_str().unwrap(), "default");
893 assert_eq!(tags[1].as_str().unwrap(), "tag");
894 }
895
896 #[test]
897 fn test_with_coercion_singular_to_array() {
898 use std::collections::HashMap;
899
900 let schema = QuillValue::from_json(serde_json::json!({
901 "$schema": "https://json-schema.org/draft/2019-09/schema",
902 "type": "object",
903 "properties": {
904 "tags": {"type": "array"}
905 }
906 }));
907
908 let mut fields = HashMap::new();
909 fields.insert(
910 "tags".to_string(),
911 QuillValue::from_json(serde_json::json!("single-tag")),
912 );
913 let doc = ParsedDocument::new(fields);
914
915 let coerced_doc = doc.with_coercion(&schema);
916
917 let tags = coerced_doc.get_field("tags").unwrap();
918 assert!(tags.as_array().is_some());
919 let tags_array = tags.as_array().unwrap();
920 assert_eq!(tags_array.len(), 1);
921 assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
922 }
923
924 #[test]
925 fn test_with_coercion_string_to_boolean() {
926 use std::collections::HashMap;
927
928 let schema = QuillValue::from_json(serde_json::json!({
929 "$schema": "https://json-schema.org/draft/2019-09/schema",
930 "type": "object",
931 "properties": {
932 "active": {"type": "boolean"}
933 }
934 }));
935
936 let mut fields = HashMap::new();
937 fields.insert(
938 "active".to_string(),
939 QuillValue::from_json(serde_json::json!("true")),
940 );
941 let doc = ParsedDocument::new(fields);
942
943 let coerced_doc = doc.with_coercion(&schema);
944
945 assert_eq!(
946 coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
947 true
948 );
949 }
950
951 #[test]
952 fn test_with_coercion_string_to_number() {
953 use std::collections::HashMap;
954
955 let schema = QuillValue::from_json(serde_json::json!({
956 "$schema": "https://json-schema.org/draft/2019-09/schema",
957 "type": "object",
958 "properties": {
959 "count": {"type": "number"}
960 }
961 }));
962
963 let mut fields = HashMap::new();
964 fields.insert(
965 "count".to_string(),
966 QuillValue::from_json(serde_json::json!("42")),
967 );
968 let doc = ParsedDocument::new(fields);
969
970 let coerced_doc = doc.with_coercion(&schema);
971
972 assert_eq!(
973 coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
974 42
975 );
976 }
977
978 #[test]
979 fn test_invalid_yaml() {
980 let markdown = r#"---
981title: [invalid yaml
982author: missing close bracket
983---
984
985Content here."#;
986
987 let result = decompose(markdown);
988 assert!(result.is_err());
989 assert!(result
990 .unwrap_err()
991 .to_string()
992 .contains("Invalid YAML frontmatter"));
993 }
994
995 #[test]
996 fn test_unclosed_frontmatter() {
997 let markdown = r#"---
998title: Test
999author: Test Author
1000
1001Content without closing ---"#;
1002
1003 let result = decompose(markdown);
1004 assert!(result.is_err());
1005 assert!(result.unwrap_err().to_string().contains("not closed"));
1006 }
1007
1008 #[test]
1011 fn test_basic_tagged_block() {
1012 let markdown = r#"---
1013title: Main Document
1014---
1015
1016Main body content.
1017
1018---
1019SCOPE: items
1020name: Item 1
1021---
1022
1023Body of item 1."#;
1024
1025 let doc = decompose(markdown).unwrap();
1026
1027 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1028 assert_eq!(
1029 doc.get_field("title").unwrap().as_str().unwrap(),
1030 "Main Document"
1031 );
1032
1033 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1034 assert_eq!(items.len(), 1);
1035
1036 let item = items[0].as_object().unwrap();
1037 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1038 assert_eq!(
1039 item.get("body").unwrap().as_str().unwrap(),
1040 "\nBody of item 1."
1041 );
1042 }
1043
1044 #[test]
1045 fn test_multiple_tagged_blocks() {
1046 let markdown = r#"---
1047SCOPE: items
1048name: Item 1
1049tags: [a, b]
1050---
1051
1052First item body.
1053
1054---
1055SCOPE: items
1056name: Item 2
1057tags: [c, d]
1058---
1059
1060Second item body."#;
1061
1062 let doc = decompose(markdown).unwrap();
1063
1064 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1065 assert_eq!(items.len(), 2);
1066
1067 let item1 = items[0].as_object().unwrap();
1068 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1069
1070 let item2 = items[1].as_object().unwrap();
1071 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1072 }
1073
1074 #[test]
1075 fn test_mixed_global_and_tagged() {
1076 let markdown = r#"---
1077title: Global
1078author: John Doe
1079---
1080
1081Global body.
1082
1083---
1084SCOPE: sections
1085title: Section 1
1086---
1087
1088Section 1 content.
1089
1090---
1091SCOPE: sections
1092title: Section 2
1093---
1094
1095Section 2 content."#;
1096
1097 let doc = decompose(markdown).unwrap();
1098
1099 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1100 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1101
1102 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1103 assert_eq!(sections.len(), 2);
1104 }
1105
1106 #[test]
1107 fn test_empty_tagged_metadata() {
1108 let markdown = r#"---
1109SCOPE: items
1110---
1111
1112Body without metadata."#;
1113
1114 let doc = decompose(markdown).unwrap();
1115
1116 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1117 assert_eq!(items.len(), 1);
1118
1119 let item = items[0].as_object().unwrap();
1120 assert_eq!(
1121 item.get("body").unwrap().as_str().unwrap(),
1122 "\nBody without metadata."
1123 );
1124 }
1125
1126 #[test]
1127 fn test_tagged_block_without_body() {
1128 let markdown = r#"---
1129SCOPE: items
1130name: Item
1131---"#;
1132
1133 let doc = decompose(markdown).unwrap();
1134
1135 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1136 assert_eq!(items.len(), 1);
1137
1138 let item = items[0].as_object().unwrap();
1139 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1140 }
1141
1142 #[test]
1143 fn test_name_collision_global_and_tagged() {
1144 let markdown = r#"---
1145items: "global value"
1146---
1147
1148Body
1149
1150---
1151SCOPE: items
1152name: Item
1153---
1154
1155Item body"#;
1156
1157 let result = decompose(markdown);
1158 assert!(result.is_err());
1159 assert!(result.unwrap_err().to_string().contains("collision"));
1160 }
1161
1162 #[test]
1163 fn test_global_array_merged_with_scope() {
1164 let markdown = r#"---
1167items:
1168 - name: Global Item 1
1169 value: 100
1170 - name: Global Item 2
1171 value: 200
1172---
1173
1174Global body
1175
1176---
1177SCOPE: items
1178name: Scope Item 1
1179value: 300
1180---
1181
1182Scope item 1 body
1183
1184---
1185SCOPE: items
1186name: Scope Item 2
1187value: 400
1188---
1189
1190Scope item 2 body"#;
1191
1192 let doc = decompose(markdown).unwrap();
1193
1194 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1196 assert_eq!(items.len(), 4);
1197
1198 let item1 = items[0].as_object().unwrap();
1200 assert_eq!(
1201 item1.get("name").unwrap().as_str().unwrap(),
1202 "Global Item 1"
1203 );
1204 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1205
1206 let item2 = items[1].as_object().unwrap();
1207 assert_eq!(
1208 item2.get("name").unwrap().as_str().unwrap(),
1209 "Global Item 2"
1210 );
1211 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1212
1213 let item3 = items[2].as_object().unwrap();
1215 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1216 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1217 assert_eq!(
1218 item3.get("body").unwrap().as_str().unwrap(),
1219 "\nScope item 1 body\n\n"
1220 );
1221
1222 let item4 = items[3].as_object().unwrap();
1223 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1224 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1225 assert_eq!(
1226 item4.get("body").unwrap().as_str().unwrap(),
1227 "\nScope item 2 body"
1228 );
1229 }
1230
1231 #[test]
1232 fn test_empty_global_array_with_scope() {
1233 let markdown = r#"---
1235items: []
1236---
1237
1238Global body
1239
1240---
1241SCOPE: items
1242name: Item 1
1243---
1244
1245Item 1 body"#;
1246
1247 let doc = decompose(markdown).unwrap();
1248
1249 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1250 assert_eq!(items.len(), 1);
1251
1252 let item = items[0].as_object().unwrap();
1253 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1254 }
1255
1256 #[test]
1257 fn test_reserved_field_name() {
1258 let markdown = r#"---
1259SCOPE: body
1260content: Test
1261---"#;
1262
1263 let result = decompose(markdown);
1264 assert!(result.is_err());
1265 assert!(result.unwrap_err().to_string().contains("reserved"));
1266 }
1267
1268 #[test]
1269 fn test_invalid_tag_syntax() {
1270 let markdown = r#"---
1271SCOPE: Invalid-Name
1272title: Test
1273---"#;
1274
1275 let result = decompose(markdown);
1276 assert!(result.is_err());
1277 assert!(result
1278 .unwrap_err()
1279 .to_string()
1280 .contains("Invalid field name"));
1281 }
1282
1283 #[test]
1284 fn test_multiple_global_frontmatter_blocks() {
1285 let markdown = r#"---
1286title: First
1287---
1288
1289Body
1290
1291---
1292author: Second
1293---
1294
1295More body"#;
1296
1297 let result = decompose(markdown);
1298 assert!(result.is_err());
1299 assert!(result
1300 .unwrap_err()
1301 .to_string()
1302 .contains("Multiple global frontmatter"));
1303 }
1304
1305 #[test]
1306 fn test_adjacent_blocks_different_tags() {
1307 let markdown = r#"---
1308SCOPE: items
1309name: Item 1
1310---
1311
1312Item 1 body
1313
1314---
1315SCOPE: sections
1316title: Section 1
1317---
1318
1319Section 1 body"#;
1320
1321 let doc = decompose(markdown).unwrap();
1322
1323 assert!(doc.get_field("items").is_some());
1324 assert!(doc.get_field("sections").is_some());
1325
1326 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1327 assert_eq!(items.len(), 1);
1328
1329 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1330 assert_eq!(sections.len(), 1);
1331 }
1332
1333 #[test]
1334 fn test_order_preservation() {
1335 let markdown = r#"---
1336SCOPE: items
1337id: 1
1338---
1339
1340First
1341
1342---
1343SCOPE: items
1344id: 2
1345---
1346
1347Second
1348
1349---
1350SCOPE: items
1351id: 3
1352---
1353
1354Third"#;
1355
1356 let doc = decompose(markdown).unwrap();
1357
1358 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1359 assert_eq!(items.len(), 3);
1360
1361 for (i, item) in items.iter().enumerate() {
1362 let mapping = item.as_object().unwrap();
1363 let id = mapping.get("id").unwrap().as_i64().unwrap();
1364 assert_eq!(id, (i + 1) as i64);
1365 }
1366 }
1367
1368 #[test]
1369 fn test_product_catalog_integration() {
1370 let markdown = r#"---
1371title: Product Catalog
1372author: John Doe
1373date: 2024-01-01
1374---
1375
1376This is the main catalog description.
1377
1378---
1379SCOPE: products
1380name: Widget A
1381price: 19.99
1382sku: WID-001
1383---
1384
1385The **Widget A** is our most popular product.
1386
1387---
1388SCOPE: products
1389name: Gadget B
1390price: 29.99
1391sku: GAD-002
1392---
1393
1394The **Gadget B** is perfect for professionals.
1395
1396---
1397SCOPE: reviews
1398product: Widget A
1399rating: 5
1400---
1401
1402"Excellent product! Highly recommended."
1403
1404---
1405SCOPE: reviews
1406product: Gadget B
1407rating: 4
1408---
1409
1410"Very good, but a bit pricey.""#;
1411
1412 let doc = decompose(markdown).unwrap();
1413
1414 assert_eq!(
1416 doc.get_field("title").unwrap().as_str().unwrap(),
1417 "Product Catalog"
1418 );
1419 assert_eq!(
1420 doc.get_field("author").unwrap().as_str().unwrap(),
1421 "John Doe"
1422 );
1423 assert_eq!(
1424 doc.get_field("date").unwrap().as_str().unwrap(),
1425 "2024-01-01"
1426 );
1427
1428 assert!(doc.body().unwrap().contains("main catalog description"));
1430
1431 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1433 assert_eq!(products.len(), 2);
1434
1435 let product1 = products[0].as_object().unwrap();
1436 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1437 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1438
1439 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1441 assert_eq!(reviews.len(), 2);
1442
1443 let review1 = reviews[0].as_object().unwrap();
1444 assert_eq!(
1445 review1.get("product").unwrap().as_str().unwrap(),
1446 "Widget A"
1447 );
1448 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1449
1450 assert_eq!(doc.fields().len(), 6);
1452 }
1453
1454 #[test]
1455 fn taro_quill_directive() {
1456 let markdown = r#"---
1457QUILL: usaf_memo
1458memo_for: [ORG/SYMBOL]
1459memo_from: [ORG/SYMBOL]
1460---
1461
1462This is the memo body."#;
1463
1464 let doc = decompose(markdown).unwrap();
1465
1466 assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1468
1469 assert_eq!(
1471 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1472 .as_str()
1473 .unwrap(),
1474 "ORG/SYMBOL"
1475 );
1476
1477 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1479 }
1480
1481 #[test]
1482 fn test_quill_with_scope_blocks() {
1483 let markdown = r#"---
1484QUILL: document
1485title: Test Document
1486---
1487
1488Main body.
1489
1490---
1491SCOPE: sections
1492name: Section 1
1493---
1494
1495Section 1 body."#;
1496
1497 let doc = decompose(markdown).unwrap();
1498
1499 assert_eq!(doc.quill_tag(), Some("document"));
1501
1502 assert_eq!(
1504 doc.get_field("title").unwrap().as_str().unwrap(),
1505 "Test Document"
1506 );
1507
1508 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1510 assert_eq!(sections.len(), 1);
1511
1512 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1514 }
1515
1516 #[test]
1517 fn test_multiple_quill_directives_error() {
1518 let markdown = r#"---
1519QUILL: first
1520---
1521
1522---
1523QUILL: second
1524---"#;
1525
1526 let result = decompose(markdown);
1527 assert!(result.is_err());
1528 assert!(result
1529 .unwrap_err()
1530 .to_string()
1531 .contains("Multiple quill directives"));
1532 }
1533
1534 #[test]
1535 fn test_invalid_quill_name() {
1536 let markdown = r#"---
1537QUILL: Invalid-Name
1538---"#;
1539
1540 let result = decompose(markdown);
1541 assert!(result.is_err());
1542 assert!(result
1543 .unwrap_err()
1544 .to_string()
1545 .contains("Invalid quill name"));
1546 }
1547
1548 #[test]
1549 fn test_quill_wrong_value_type() {
1550 let markdown = r#"---
1551QUILL: 123
1552---"#;
1553
1554 let result = decompose(markdown);
1555 assert!(result.is_err());
1556 assert!(result
1557 .unwrap_err()
1558 .to_string()
1559 .contains("QUILL value must be a string"));
1560 }
1561
1562 #[test]
1563 fn test_scope_wrong_value_type() {
1564 let markdown = r#"---
1565SCOPE: 123
1566---"#;
1567
1568 let result = decompose(markdown);
1569 assert!(result.is_err());
1570 assert!(result
1571 .unwrap_err()
1572 .to_string()
1573 .contains("SCOPE value must be a string"));
1574 }
1575
1576 #[test]
1577 fn test_both_quill_and_scope_error() {
1578 let markdown = r#"---
1579QUILL: test
1580SCOPE: items
1581---"#;
1582
1583 let result = decompose(markdown);
1584 assert!(result.is_err());
1585 assert!(result
1586 .unwrap_err()
1587 .to_string()
1588 .contains("Cannot specify both QUILL and SCOPE"));
1589 }
1590
1591 #[test]
1592 fn test_blank_lines_in_frontmatter() {
1593 let markdown = r#"---
1595title: Test Document
1596author: Test Author
1597
1598description: This has a blank line above it
1599tags:
1600 - one
1601 - two
1602---
1603
1604# Hello World
1605
1606This is the body."#;
1607
1608 let doc = decompose(markdown).unwrap();
1609
1610 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1611 assert_eq!(
1612 doc.get_field("title").unwrap().as_str().unwrap(),
1613 "Test Document"
1614 );
1615 assert_eq!(
1616 doc.get_field("author").unwrap().as_str().unwrap(),
1617 "Test Author"
1618 );
1619 assert_eq!(
1620 doc.get_field("description").unwrap().as_str().unwrap(),
1621 "This has a blank line above it"
1622 );
1623
1624 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1625 assert_eq!(tags.len(), 2);
1626 }
1627
1628 #[test]
1629 fn test_blank_lines_in_scope_blocks() {
1630 let markdown = r#"---
1632SCOPE: items
1633name: Item 1
1634
1635price: 19.99
1636
1637tags:
1638 - electronics
1639 - gadgets
1640---
1641
1642Body of item 1."#;
1643
1644 let doc = decompose(markdown).unwrap();
1645
1646 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1647 assert_eq!(items.len(), 1);
1648
1649 let item = items[0].as_object().unwrap();
1650 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1651 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1652
1653 let tags = item.get("tags").unwrap().as_array().unwrap();
1654 assert_eq!(tags.len(), 2);
1655 }
1656
1657 #[test]
1658 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1659 let markdown = r#"---
1661title: Test
1662---
1663
1664First paragraph.
1665
1666---
1667
1668Second paragraph."#;
1669
1670 let doc = decompose(markdown).unwrap();
1671
1672 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1673
1674 let body = doc.body().unwrap();
1676 assert!(body.contains("First paragraph."));
1677 assert!(body.contains("---"));
1678 assert!(body.contains("Second paragraph."));
1679 }
1680
1681 #[test]
1682 fn test_horizontal_rule_not_preceded_by_blank() {
1683 let markdown = r#"---
1686title: Test
1687---
1688
1689First paragraph.
1690---
1691
1692Second paragraph."#;
1693
1694 let doc = decompose(markdown).unwrap();
1695
1696 let body = doc.body().unwrap();
1697 assert!(body.contains("---"));
1699 }
1700
1701 #[test]
1702 fn test_multiple_blank_lines_in_yaml() {
1703 let markdown = r#"---
1705title: Test
1706
1707
1708author: John Doe
1709
1710
1711version: 1.0
1712---
1713
1714Body content."#;
1715
1716 let doc = decompose(markdown).unwrap();
1717
1718 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1719 assert_eq!(
1720 doc.get_field("author").unwrap().as_str().unwrap(),
1721 "John Doe"
1722 );
1723 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1724 }
1725}
1726#[cfg(test)]
1727mod demo_file_test {
1728 use super::*;
1729
1730 #[test]
1731 fn test_extended_metadata_demo_file() {
1732 let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1733 let doc = decompose(markdown).unwrap();
1734
1735 assert_eq!(
1737 doc.get_field("title").unwrap().as_str().unwrap(),
1738 "Extended Metadata Demo"
1739 );
1740 assert_eq!(
1741 doc.get_field("author").unwrap().as_str().unwrap(),
1742 "Quillmark Team"
1743 );
1744 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1746
1747 assert!(doc
1749 .body()
1750 .unwrap()
1751 .contains("extended YAML metadata standard"));
1752
1753 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1755 assert_eq!(features.len(), 3);
1756
1757 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1759 assert_eq!(use_cases.len(), 2);
1760
1761 let feature1 = features[0].as_object().unwrap();
1763 assert_eq!(
1764 feature1.get("name").unwrap().as_str().unwrap(),
1765 "Tag Directives"
1766 );
1767 }
1768
1769 #[test]
1770 fn test_input_size_limit() {
1771 let size = crate::error::MAX_INPUT_SIZE + 1;
1773 let large_markdown = "a".repeat(size);
1774
1775 let result = decompose(&large_markdown);
1776 assert!(result.is_err());
1777
1778 let err_msg = result.unwrap_err().to_string();
1779 assert!(err_msg.contains("Input too large"));
1780 }
1781
1782 #[test]
1783 fn test_yaml_size_limit() {
1784 let mut markdown = String::from("---\n");
1786
1787 let size = crate::error::MAX_YAML_SIZE + 1;
1789 markdown.push_str("data: \"");
1790 markdown.push_str(&"x".repeat(size));
1791 markdown.push_str("\"\n---\n\nBody");
1792
1793 let result = decompose(&markdown);
1794 assert!(result.is_err());
1795
1796 let err_msg = result.unwrap_err().to_string();
1797 assert!(err_msg.contains("YAML block too large"));
1798 }
1799
1800 #[test]
1801 fn test_input_within_size_limit() {
1802 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1805
1806 let result = decompose(&markdown);
1807 assert!(result.is_ok());
1808 }
1809
1810 #[test]
1811 fn test_yaml_within_size_limit() {
1812 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1814
1815 let result = decompose(&markdown);
1816 assert!(result.is_ok());
1817 }
1818}