1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58 let mut msg = format!("{}: {}", context, e);
59
60 if let Some(loc) = e.location() {
61 msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62 }
63
64 msg
65}
66
67pub const QUILL_TAG: &str = "quill";
69
70#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73 fields: HashMap<String, QuillValue>,
74 quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80 Self {
81 fields,
82 quill_tag: None,
83 }
84 }
85
86 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88 Self { fields, quill_tag }
89 }
90
91 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94 }
95
96 pub fn quill_tag(&self) -> Option<&str> {
98 self.quill_tag.as_deref()
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, QuillValue> {
113 &self.fields
114 }
115
116 pub fn with_defaults(
130 &self,
131 field_schemas: &HashMap<String, crate::quill::FieldSchema>,
132 ) -> Self {
133 let mut fields = self.fields.clone();
134
135 for (field_name, schema) in field_schemas {
136 if !fields.contains_key(field_name) {
138 if let Some(ref default_value) = schema.default {
139 fields.insert(field_name.clone(), default_value.clone());
140 }
141 }
142 }
143
144 Self {
145 fields,
146 quill_tag: self.quill_tag.clone(),
147 }
148 }
149}
150
151#[derive(Debug)]
152struct MetadataBlock {
153 start: usize, end: usize, yaml_content: String,
156 tag: Option<String>, quill_name: Option<String>, }
159
160fn is_valid_tag_name(name: &str) -> bool {
162 if name.is_empty() {
163 return false;
164 }
165
166 let mut chars = name.chars();
167 let first = chars.next().unwrap();
168
169 if !first.is_ascii_lowercase() && first != '_' {
170 return false;
171 }
172
173 for ch in chars {
174 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
175 return false;
176 }
177 }
178
179 true
180}
181
182fn find_metadata_blocks(
184 markdown: &str,
185) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
186 let mut blocks = Vec::new();
187 let mut pos = 0;
188
189 while pos < markdown.len() {
190 let search_str = &markdown[pos..];
192 let delimiter_result = if let Some(p) = search_str.find("---\n") {
193 Some((p, 4, "\n"))
194 } else if let Some(p) = search_str.find("---\r\n") {
195 Some((p, 5, "\r\n"))
196 } else {
197 None
198 };
199
200 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
201 let abs_pos = pos + delimiter_pos;
202 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
206 let before = &markdown[..abs_pos];
208 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
209 } else {
210 false
211 };
212
213 let followed_by_blank = if content_start < markdown.len() {
214 markdown[content_start..].starts_with('\n')
215 || markdown[content_start..].starts_with("\r\n")
216 } else {
217 false
218 };
219
220 if preceded_by_blank && followed_by_blank {
222 pos = abs_pos + 3; continue;
225 }
226
227 if followed_by_blank {
230 pos = abs_pos + 3;
233 continue;
234 }
235
236 let rest = &markdown[content_start..];
239
240 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
242 let closing_with_newline = closing_patterns
243 .iter()
244 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
245 .min_by_key(|(p, _)| *p);
246
247 let closing_at_eof = ["\n---", "\r\n---"]
249 .iter()
250 .filter_map(|delim| {
251 rest.find(delim).and_then(|p| {
252 if p + delim.len() == rest.len() {
253 Some((p, delim.len()))
254 } else {
255 None
256 }
257 })
258 })
259 .min_by_key(|(p, _)| *p);
260
261 let closing_result = match (closing_with_newline, closing_at_eof) {
262 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
263 (Some(_), Some(_)) => closing_with_newline,
264 (Some(_), None) => closing_with_newline,
265 (None, Some(_)) => closing_at_eof,
266 (None, None) => None,
267 };
268
269 if let Some((closing_pos, closing_len)) = closing_result {
270 let abs_closing_pos = content_start + closing_pos;
271 let content = &markdown[content_start..abs_closing_pos];
272
273 if content.len() > crate::error::MAX_YAML_SIZE {
275 return Err(format!(
276 "YAML block too large: {} bytes (max: {} bytes)",
277 content.len(),
278 crate::error::MAX_YAML_SIZE
279 )
280 .into());
281 }
282
283 let (tag, quill_name, yaml_content) = if !content.is_empty() {
286 match serde_yaml::from_str::<serde_yaml::Value>(content) {
288 Ok(yaml_value) => {
289 if let Some(mapping) = yaml_value.as_mapping() {
290 let quill_key = serde_yaml::Value::String("QUILL".to_string());
291 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
292
293 let has_quill = mapping.contains_key(&quill_key);
294 let has_scope = mapping.contains_key(&scope_key);
295
296 if has_quill && has_scope {
297 return Err(
298 "Cannot specify both QUILL and SCOPE in the same block"
299 .into(),
300 );
301 }
302
303 if has_quill {
304 let quill_value = mapping.get(&quill_key).unwrap();
306 let quill_name_str = quill_value
307 .as_str()
308 .ok_or_else(|| "QUILL value must be a string")?;
309
310 if !is_valid_tag_name(quill_name_str) {
311 return Err(format!(
312 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
313 quill_name_str
314 )
315 .into());
316 }
317
318 let mut new_mapping = mapping.clone();
320 new_mapping.remove(&quill_key);
321 let new_yaml = serde_yaml::to_string(&new_mapping)
322 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
323
324 (None, Some(quill_name_str.to_string()), new_yaml)
325 } else if has_scope {
326 let scope_value = mapping.get(&scope_key).unwrap();
328 let field_name = scope_value
329 .as_str()
330 .ok_or_else(|| "SCOPE value must be a string")?;
331
332 if !is_valid_tag_name(field_name) {
333 return Err(format!(
334 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
335 field_name
336 )
337 .into());
338 }
339
340 if field_name == BODY_FIELD {
341 return Err(format!(
342 "Cannot use reserved field name '{}' as SCOPE value",
343 BODY_FIELD
344 )
345 .into());
346 }
347
348 let mut new_mapping = mapping.clone();
350 new_mapping.remove(&scope_key);
351 let new_yaml = serde_yaml::to_string(&new_mapping)
352 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
353
354 (Some(field_name.to_string()), None, new_yaml)
355 } else {
356 (None, None, content.to_string())
358 }
359 } else {
360 (None, None, content.to_string())
362 }
363 }
364 Err(_) => {
365 (None, None, content.to_string())
367 }
368 }
369 } else {
370 (None, None, content.to_string())
371 };
372
373 blocks.push(MetadataBlock {
374 start: abs_pos,
375 end: abs_closing_pos + closing_len, yaml_content,
377 tag,
378 quill_name,
379 });
380
381 pos = abs_closing_pos + closing_len;
382 } else if abs_pos == 0 {
383 return Err("Frontmatter started but not closed with ---".into());
385 } else {
386 pos = abs_pos + 3;
388 }
389 } else {
390 break;
391 }
392 }
393
394 Ok(blocks)
395}
396
397fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
399 if markdown.len() > crate::error::MAX_INPUT_SIZE {
401 return Err(format!(
402 "Input too large: {} bytes (max: {} bytes)",
403 markdown.len(),
404 crate::error::MAX_INPUT_SIZE
405 )
406 .into());
407 }
408
409 let mut fields = HashMap::new();
410
411 let blocks = find_metadata_blocks(markdown)?;
413
414 if blocks.is_empty() {
415 fields.insert(
417 BODY_FIELD.to_string(),
418 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
419 );
420 return Ok(ParsedDocument::new(fields));
421 }
422
423 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
425 let mut has_global_frontmatter = false;
426 let mut global_frontmatter_index: Option<usize> = None;
427 let mut quill_name: Option<String> = None;
428
429 for (idx, block) in blocks.iter().enumerate() {
431 if let Some(ref name) = block.quill_name {
433 if quill_name.is_some() {
434 return Err("Multiple quill directives found: only one allowed".into());
435 }
436 quill_name = Some(name.clone());
437 }
438
439 if block.tag.is_none() && block.quill_name.is_none() {
441 if has_global_frontmatter {
442 return Err(
443 "Multiple global frontmatter blocks found: only one untagged block allowed"
444 .into(),
445 );
446 }
447 has_global_frontmatter = true;
448 global_frontmatter_index = Some(idx);
449 }
450 }
451
452 if let Some(idx) = global_frontmatter_index {
454 let block = &blocks[idx];
455
456 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
458 HashMap::new()
459 } else {
460 serde_yaml::from_str(&block.yaml_content)
461 .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
462 };
463
464 for other_block in &blocks {
467 if let Some(ref tag) = other_block.tag {
468 if let Some(global_value) = yaml_fields.get(tag) {
469 if global_value.as_sequence().is_none() {
471 return Err(format!(
472 "Name collision: global field '{}' conflicts with tagged attribute",
473 tag
474 )
475 .into());
476 }
477 }
478 }
479 }
480
481 for (key, value) in yaml_fields {
483 fields.insert(key, QuillValue::from_yaml(value)?);
484 }
485 }
486
487 for block in &blocks {
489 if block.quill_name.is_some() {
490 if !block.yaml_content.is_empty() {
492 let yaml_fields: HashMap<String, serde_yaml::Value> =
493 serde_yaml::from_str(&block.yaml_content)
494 .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
495
496 for key in yaml_fields.keys() {
498 if fields.contains_key(key) {
499 return Err(format!(
500 "Name collision: quill block field '{}' conflicts with existing field",
501 key
502 )
503 .into());
504 }
505 }
506
507 for (key, value) in yaml_fields {
509 fields.insert(key, QuillValue::from_yaml(value)?);
510 }
511 }
512 }
513 }
514
515 for (idx, block) in blocks.iter().enumerate() {
517 if let Some(ref tag_name) = block.tag {
518 if let Some(existing_value) = fields.get(tag_name) {
521 if existing_value.as_array().is_none() {
522 return Err(format!(
523 "Name collision: tagged attribute '{}' conflicts with global field",
524 tag_name
525 )
526 .into());
527 }
528 }
529
530 let mut item_fields: HashMap<String, serde_yaml::Value> = if block
532 .yaml_content
533 .is_empty()
534 {
535 HashMap::new()
536 } else {
537 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
538 yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
539 })?
540 };
541
542 let body_start = block.end;
544 let body_end = if idx + 1 < blocks.len() {
545 blocks[idx + 1].start
546 } else {
547 markdown.len()
548 };
549 let body = &markdown[body_start..body_end];
550
551 item_fields.insert(
553 BODY_FIELD.to_string(),
554 serde_yaml::Value::String(body.to_string()),
555 );
556
557 let item_value = serde_yaml::to_value(item_fields)?;
559
560 tagged_attributes
562 .entry(tag_name.clone())
563 .or_insert_with(Vec::new)
564 .push(item_value);
565 }
566 }
567
568 let first_non_scope_block_idx = blocks
572 .iter()
573 .position(|b| b.tag.is_none() && b.quill_name.is_none())
574 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
575
576 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
577 let start = blocks[idx].end;
579
580 let end = blocks
582 .iter()
583 .skip(idx + 1)
584 .find(|b| b.tag.is_some())
585 .map(|b| b.start)
586 .unwrap_or(markdown.len());
587
588 (start, end)
589 } else {
590 let end = blocks
592 .iter()
593 .find(|b| b.tag.is_some())
594 .map(|b| b.start)
595 .unwrap_or(0);
596
597 (0, end)
598 };
599
600 let global_body = &markdown[body_start..body_end];
601
602 fields.insert(
603 BODY_FIELD.to_string(),
604 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
605 );
606
607 for (tag_name, items) in tagged_attributes {
610 if let Some(existing_value) = fields.get(&tag_name) {
611 if let Some(existing_array) = existing_value.as_array() {
613 let new_items_json: Vec<serde_json::Value> = items
615 .into_iter()
616 .map(|yaml_val| {
617 serde_json::to_value(&yaml_val)
618 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
619 })
620 .collect::<Result<Vec<_>, _>>()?;
621
622 let mut merged_array = existing_array.clone();
624 merged_array.extend(new_items_json);
625
626 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
628 fields.insert(tag_name, quill_value);
629 } else {
630 return Err(format!(
632 "Internal error: field '{}' exists but is not an array",
633 tag_name
634 )
635 .into());
636 }
637 } else {
638 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
640 fields.insert(tag_name, quill_value);
641 }
642 }
643
644 let mut parsed = ParsedDocument::new(fields);
645
646 if let Some(name) = quill_name {
648 parsed.quill_tag = Some(name);
649 }
650
651 Ok(parsed)
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657
658 #[test]
659 fn test_no_frontmatter() {
660 let markdown = "# Hello World\n\nThis is a test.";
661 let doc = decompose(markdown).unwrap();
662
663 assert_eq!(doc.body(), Some(markdown));
664 assert_eq!(doc.fields().len(), 1);
665 }
666
667 #[test]
668 fn test_with_frontmatter() {
669 let markdown = r#"---
670title: Test Document
671author: Test Author
672---
673
674# Hello World
675
676This is the body."#;
677
678 let doc = decompose(markdown).unwrap();
679
680 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
681 assert_eq!(
682 doc.get_field("title").unwrap().as_str().unwrap(),
683 "Test Document"
684 );
685 assert_eq!(
686 doc.get_field("author").unwrap().as_str().unwrap(),
687 "Test Author"
688 );
689 assert_eq!(doc.fields().len(), 3); }
691
692 #[test]
693 fn test_complex_yaml_frontmatter() {
694 let markdown = r#"---
695title: Complex Document
696tags:
697 - test
698 - yaml
699metadata:
700 version: 1.0
701 nested:
702 field: value
703---
704
705Content here."#;
706
707 let doc = decompose(markdown).unwrap();
708
709 assert_eq!(doc.body(), Some("\nContent here."));
710 assert_eq!(
711 doc.get_field("title").unwrap().as_str().unwrap(),
712 "Complex Document"
713 );
714
715 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
716 assert_eq!(tags.len(), 2);
717 assert_eq!(tags[0].as_str().unwrap(), "test");
718 assert_eq!(tags[1].as_str().unwrap(), "yaml");
719 }
720
721 #[test]
722 fn test_with_defaults_empty_document() {
723 use crate::quill::FieldSchema;
724 use std::collections::HashMap;
725
726 let mut field_schemas = HashMap::new();
727 let mut schema1 = FieldSchema::new("status".to_string(), "Document status".to_string());
728 schema1.default = Some(QuillValue::from_json(serde_json::json!("draft")));
729 field_schemas.insert("status".to_string(), schema1);
730
731 let mut schema2 = FieldSchema::new("version".to_string(), "Version number".to_string());
732 schema2.default = Some(QuillValue::from_json(serde_json::json!(1)));
733 field_schemas.insert("version".to_string(), schema2);
734
735 let doc = ParsedDocument::new(HashMap::new());
737 let doc_with_defaults = doc.with_defaults(&field_schemas);
738
739 assert_eq!(
741 doc_with_defaults
742 .get_field("status")
743 .unwrap()
744 .as_str()
745 .unwrap(),
746 "draft"
747 );
748 assert_eq!(
749 doc_with_defaults
750 .get_field("version")
751 .unwrap()
752 .as_number()
753 .unwrap()
754 .as_i64()
755 .unwrap(),
756 1
757 );
758 }
759
760 #[test]
761 fn test_with_defaults_preserves_existing_values() {
762 use crate::quill::FieldSchema;
763 use std::collections::HashMap;
764
765 let mut field_schemas = HashMap::new();
766 let mut schema = FieldSchema::new("status".to_string(), "Document status".to_string());
767 schema.default = Some(QuillValue::from_json(serde_json::json!("draft")));
768 field_schemas.insert("status".to_string(), schema);
769
770 let mut fields = HashMap::new();
772 fields.insert(
773 "status".to_string(),
774 QuillValue::from_json(serde_json::json!("published")),
775 );
776 let doc = ParsedDocument::new(fields);
777
778 let doc_with_defaults = doc.with_defaults(&field_schemas);
779
780 assert_eq!(
782 doc_with_defaults
783 .get_field("status")
784 .unwrap()
785 .as_str()
786 .unwrap(),
787 "published"
788 );
789 }
790
791 #[test]
792 fn test_with_defaults_partial_application() {
793 use crate::quill::FieldSchema;
794 use std::collections::HashMap;
795
796 let mut field_schemas = HashMap::new();
797
798 let mut schema1 = FieldSchema::new("status".to_string(), "Document status".to_string());
799 schema1.default = Some(QuillValue::from_json(serde_json::json!("draft")));
800 field_schemas.insert("status".to_string(), schema1);
801
802 let mut schema2 = FieldSchema::new("version".to_string(), "Version number".to_string());
803 schema2.default = Some(QuillValue::from_json(serde_json::json!(1)));
804 field_schemas.insert("version".to_string(), schema2);
805
806 let mut fields = HashMap::new();
808 fields.insert(
809 "status".to_string(),
810 QuillValue::from_json(serde_json::json!("published")),
811 );
812 let doc = ParsedDocument::new(fields);
813
814 let doc_with_defaults = doc.with_defaults(&field_schemas);
815
816 assert_eq!(
818 doc_with_defaults
819 .get_field("status")
820 .unwrap()
821 .as_str()
822 .unwrap(),
823 "published"
824 );
825 assert_eq!(
826 doc_with_defaults
827 .get_field("version")
828 .unwrap()
829 .as_number()
830 .unwrap()
831 .as_i64()
832 .unwrap(),
833 1
834 );
835 }
836
837 #[test]
838 fn test_with_defaults_no_defaults() {
839 use crate::quill::FieldSchema;
840 use std::collections::HashMap;
841
842 let mut field_schemas = HashMap::new();
843
844 let schema = FieldSchema::new("title".to_string(), "Document title".to_string());
846 field_schemas.insert("title".to_string(), schema);
847
848 let doc = ParsedDocument::new(HashMap::new());
849 let doc_with_defaults = doc.with_defaults(&field_schemas);
850
851 assert!(doc_with_defaults.get_field("title").is_none());
853 }
854
855 #[test]
856 fn test_with_defaults_complex_types() {
857 use crate::quill::FieldSchema;
858 use std::collections::HashMap;
859
860 let mut field_schemas = HashMap::new();
861
862 let mut schema = FieldSchema::new("tags".to_string(), "Document tags".to_string());
863 schema.default = Some(QuillValue::from_json(serde_json::json!(["default", "tag"])));
864 field_schemas.insert("tags".to_string(), schema);
865
866 let doc = ParsedDocument::new(HashMap::new());
867 let doc_with_defaults = doc.with_defaults(&field_schemas);
868
869 let tags = doc_with_defaults
871 .get_field("tags")
872 .unwrap()
873 .as_sequence()
874 .unwrap();
875 assert_eq!(tags.len(), 2);
876 assert_eq!(tags[0].as_str().unwrap(), "default");
877 assert_eq!(tags[1].as_str().unwrap(), "tag");
878 }
879
880 #[test]
881 fn test_invalid_yaml() {
882 let markdown = r#"---
883title: [invalid yaml
884author: missing close bracket
885---
886
887Content here."#;
888
889 let result = decompose(markdown);
890 assert!(result.is_err());
891 assert!(result
892 .unwrap_err()
893 .to_string()
894 .contains("Invalid YAML frontmatter"));
895 }
896
897 #[test]
898 fn test_unclosed_frontmatter() {
899 let markdown = r#"---
900title: Test
901author: Test Author
902
903Content without closing ---"#;
904
905 let result = decompose(markdown);
906 assert!(result.is_err());
907 assert!(result.unwrap_err().to_string().contains("not closed"));
908 }
909
910 #[test]
913 fn test_basic_tagged_block() {
914 let markdown = r#"---
915title: Main Document
916---
917
918Main body content.
919
920---
921SCOPE: items
922name: Item 1
923---
924
925Body of item 1."#;
926
927 let doc = decompose(markdown).unwrap();
928
929 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
930 assert_eq!(
931 doc.get_field("title").unwrap().as_str().unwrap(),
932 "Main Document"
933 );
934
935 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
936 assert_eq!(items.len(), 1);
937
938 let item = items[0].as_object().unwrap();
939 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
940 assert_eq!(
941 item.get("body").unwrap().as_str().unwrap(),
942 "\nBody of item 1."
943 );
944 }
945
946 #[test]
947 fn test_multiple_tagged_blocks() {
948 let markdown = r#"---
949SCOPE: items
950name: Item 1
951tags: [a, b]
952---
953
954First item body.
955
956---
957SCOPE: items
958name: Item 2
959tags: [c, d]
960---
961
962Second item body."#;
963
964 let doc = decompose(markdown).unwrap();
965
966 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
967 assert_eq!(items.len(), 2);
968
969 let item1 = items[0].as_object().unwrap();
970 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
971
972 let item2 = items[1].as_object().unwrap();
973 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
974 }
975
976 #[test]
977 fn test_mixed_global_and_tagged() {
978 let markdown = r#"---
979title: Global
980author: John Doe
981---
982
983Global body.
984
985---
986SCOPE: sections
987title: Section 1
988---
989
990Section 1 content.
991
992---
993SCOPE: sections
994title: Section 2
995---
996
997Section 2 content."#;
998
999 let doc = decompose(markdown).unwrap();
1000
1001 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1002 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1003
1004 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1005 assert_eq!(sections.len(), 2);
1006 }
1007
1008 #[test]
1009 fn test_empty_tagged_metadata() {
1010 let markdown = r#"---
1011SCOPE: items
1012---
1013
1014Body without metadata."#;
1015
1016 let doc = decompose(markdown).unwrap();
1017
1018 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1019 assert_eq!(items.len(), 1);
1020
1021 let item = items[0].as_object().unwrap();
1022 assert_eq!(
1023 item.get("body").unwrap().as_str().unwrap(),
1024 "\nBody without metadata."
1025 );
1026 }
1027
1028 #[test]
1029 fn test_tagged_block_without_body() {
1030 let markdown = r#"---
1031SCOPE: items
1032name: Item
1033---"#;
1034
1035 let doc = decompose(markdown).unwrap();
1036
1037 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1038 assert_eq!(items.len(), 1);
1039
1040 let item = items[0].as_object().unwrap();
1041 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1042 }
1043
1044 #[test]
1045 fn test_name_collision_global_and_tagged() {
1046 let markdown = r#"---
1047items: "global value"
1048---
1049
1050Body
1051
1052---
1053SCOPE: items
1054name: Item
1055---
1056
1057Item body"#;
1058
1059 let result = decompose(markdown);
1060 assert!(result.is_err());
1061 assert!(result.unwrap_err().to_string().contains("collision"));
1062 }
1063
1064 #[test]
1065 fn test_global_array_merged_with_scope() {
1066 let markdown = r#"---
1069items:
1070 - name: Global Item 1
1071 value: 100
1072 - name: Global Item 2
1073 value: 200
1074---
1075
1076Global body
1077
1078---
1079SCOPE: items
1080name: Scope Item 1
1081value: 300
1082---
1083
1084Scope item 1 body
1085
1086---
1087SCOPE: items
1088name: Scope Item 2
1089value: 400
1090---
1091
1092Scope item 2 body"#;
1093
1094 let doc = decompose(markdown).unwrap();
1095
1096 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1098 assert_eq!(items.len(), 4);
1099
1100 let item1 = items[0].as_object().unwrap();
1102 assert_eq!(
1103 item1.get("name").unwrap().as_str().unwrap(),
1104 "Global Item 1"
1105 );
1106 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1107
1108 let item2 = items[1].as_object().unwrap();
1109 assert_eq!(
1110 item2.get("name").unwrap().as_str().unwrap(),
1111 "Global Item 2"
1112 );
1113 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1114
1115 let item3 = items[2].as_object().unwrap();
1117 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1118 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1119 assert_eq!(
1120 item3.get("body").unwrap().as_str().unwrap(),
1121 "\nScope item 1 body\n\n"
1122 );
1123
1124 let item4 = items[3].as_object().unwrap();
1125 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1126 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1127 assert_eq!(
1128 item4.get("body").unwrap().as_str().unwrap(),
1129 "\nScope item 2 body"
1130 );
1131 }
1132
1133 #[test]
1134 fn test_empty_global_array_with_scope() {
1135 let markdown = r#"---
1137items: []
1138---
1139
1140Global body
1141
1142---
1143SCOPE: items
1144name: Item 1
1145---
1146
1147Item 1 body"#;
1148
1149 let doc = decompose(markdown).unwrap();
1150
1151 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1152 assert_eq!(items.len(), 1);
1153
1154 let item = items[0].as_object().unwrap();
1155 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1156 }
1157
1158 #[test]
1159 fn test_reserved_field_name() {
1160 let markdown = r#"---
1161SCOPE: body
1162content: Test
1163---"#;
1164
1165 let result = decompose(markdown);
1166 assert!(result.is_err());
1167 assert!(result.unwrap_err().to_string().contains("reserved"));
1168 }
1169
1170 #[test]
1171 fn test_invalid_tag_syntax() {
1172 let markdown = r#"---
1173SCOPE: Invalid-Name
1174title: Test
1175---"#;
1176
1177 let result = decompose(markdown);
1178 assert!(result.is_err());
1179 assert!(result
1180 .unwrap_err()
1181 .to_string()
1182 .contains("Invalid field name"));
1183 }
1184
1185 #[test]
1186 fn test_multiple_global_frontmatter_blocks() {
1187 let markdown = r#"---
1188title: First
1189---
1190
1191Body
1192
1193---
1194author: Second
1195---
1196
1197More body"#;
1198
1199 let result = decompose(markdown);
1200 assert!(result.is_err());
1201 assert!(result
1202 .unwrap_err()
1203 .to_string()
1204 .contains("Multiple global frontmatter"));
1205 }
1206
1207 #[test]
1208 fn test_adjacent_blocks_different_tags() {
1209 let markdown = r#"---
1210SCOPE: items
1211name: Item 1
1212---
1213
1214Item 1 body
1215
1216---
1217SCOPE: sections
1218title: Section 1
1219---
1220
1221Section 1 body"#;
1222
1223 let doc = decompose(markdown).unwrap();
1224
1225 assert!(doc.get_field("items").is_some());
1226 assert!(doc.get_field("sections").is_some());
1227
1228 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1229 assert_eq!(items.len(), 1);
1230
1231 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1232 assert_eq!(sections.len(), 1);
1233 }
1234
1235 #[test]
1236 fn test_order_preservation() {
1237 let markdown = r#"---
1238SCOPE: items
1239id: 1
1240---
1241
1242First
1243
1244---
1245SCOPE: items
1246id: 2
1247---
1248
1249Second
1250
1251---
1252SCOPE: items
1253id: 3
1254---
1255
1256Third"#;
1257
1258 let doc = decompose(markdown).unwrap();
1259
1260 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1261 assert_eq!(items.len(), 3);
1262
1263 for (i, item) in items.iter().enumerate() {
1264 let mapping = item.as_object().unwrap();
1265 let id = mapping.get("id").unwrap().as_i64().unwrap();
1266 assert_eq!(id, (i + 1) as i64);
1267 }
1268 }
1269
1270 #[test]
1271 fn test_product_catalog_integration() {
1272 let markdown = r#"---
1273title: Product Catalog
1274author: John Doe
1275date: 2024-01-01
1276---
1277
1278This is the main catalog description.
1279
1280---
1281SCOPE: products
1282name: Widget A
1283price: 19.99
1284sku: WID-001
1285---
1286
1287The **Widget A** is our most popular product.
1288
1289---
1290SCOPE: products
1291name: Gadget B
1292price: 29.99
1293sku: GAD-002
1294---
1295
1296The **Gadget B** is perfect for professionals.
1297
1298---
1299SCOPE: reviews
1300product: Widget A
1301rating: 5
1302---
1303
1304"Excellent product! Highly recommended."
1305
1306---
1307SCOPE: reviews
1308product: Gadget B
1309rating: 4
1310---
1311
1312"Very good, but a bit pricey.""#;
1313
1314 let doc = decompose(markdown).unwrap();
1315
1316 assert_eq!(
1318 doc.get_field("title").unwrap().as_str().unwrap(),
1319 "Product Catalog"
1320 );
1321 assert_eq!(
1322 doc.get_field("author").unwrap().as_str().unwrap(),
1323 "John Doe"
1324 );
1325 assert_eq!(
1326 doc.get_field("date").unwrap().as_str().unwrap(),
1327 "2024-01-01"
1328 );
1329
1330 assert!(doc.body().unwrap().contains("main catalog description"));
1332
1333 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1335 assert_eq!(products.len(), 2);
1336
1337 let product1 = products[0].as_object().unwrap();
1338 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1339 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1340
1341 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1343 assert_eq!(reviews.len(), 2);
1344
1345 let review1 = reviews[0].as_object().unwrap();
1346 assert_eq!(
1347 review1.get("product").unwrap().as_str().unwrap(),
1348 "Widget A"
1349 );
1350 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1351
1352 assert_eq!(doc.fields().len(), 6);
1354 }
1355
1356 #[test]
1357 fn taro_quill_directive() {
1358 let markdown = r#"---
1359QUILL: usaf_memo
1360memo_for: [ORG/SYMBOL]
1361memo_from: [ORG/SYMBOL]
1362---
1363
1364This is the memo body."#;
1365
1366 let doc = decompose(markdown).unwrap();
1367
1368 assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1370
1371 assert_eq!(
1373 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1374 .as_str()
1375 .unwrap(),
1376 "ORG/SYMBOL"
1377 );
1378
1379 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1381 }
1382
1383 #[test]
1384 fn test_quill_with_scope_blocks() {
1385 let markdown = r#"---
1386QUILL: document
1387title: Test Document
1388---
1389
1390Main body.
1391
1392---
1393SCOPE: sections
1394name: Section 1
1395---
1396
1397Section 1 body."#;
1398
1399 let doc = decompose(markdown).unwrap();
1400
1401 assert_eq!(doc.quill_tag(), Some("document"));
1403
1404 assert_eq!(
1406 doc.get_field("title").unwrap().as_str().unwrap(),
1407 "Test Document"
1408 );
1409
1410 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1412 assert_eq!(sections.len(), 1);
1413
1414 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1416 }
1417
1418 #[test]
1419 fn test_multiple_quill_directives_error() {
1420 let markdown = r#"---
1421QUILL: first
1422---
1423
1424---
1425QUILL: second
1426---"#;
1427
1428 let result = decompose(markdown);
1429 assert!(result.is_err());
1430 assert!(result
1431 .unwrap_err()
1432 .to_string()
1433 .contains("Multiple quill directives"));
1434 }
1435
1436 #[test]
1437 fn test_invalid_quill_name() {
1438 let markdown = r#"---
1439QUILL: Invalid-Name
1440---"#;
1441
1442 let result = decompose(markdown);
1443 assert!(result.is_err());
1444 assert!(result
1445 .unwrap_err()
1446 .to_string()
1447 .contains("Invalid quill name"));
1448 }
1449
1450 #[test]
1451 fn test_quill_wrong_value_type() {
1452 let markdown = r#"---
1453QUILL: 123
1454---"#;
1455
1456 let result = decompose(markdown);
1457 assert!(result.is_err());
1458 assert!(result
1459 .unwrap_err()
1460 .to_string()
1461 .contains("QUILL value must be a string"));
1462 }
1463
1464 #[test]
1465 fn test_scope_wrong_value_type() {
1466 let markdown = r#"---
1467SCOPE: 123
1468---"#;
1469
1470 let result = decompose(markdown);
1471 assert!(result.is_err());
1472 assert!(result
1473 .unwrap_err()
1474 .to_string()
1475 .contains("SCOPE value must be a string"));
1476 }
1477
1478 #[test]
1479 fn test_both_quill_and_scope_error() {
1480 let markdown = r#"---
1481QUILL: test
1482SCOPE: items
1483---"#;
1484
1485 let result = decompose(markdown);
1486 assert!(result.is_err());
1487 assert!(result
1488 .unwrap_err()
1489 .to_string()
1490 .contains("Cannot specify both QUILL and SCOPE"));
1491 }
1492
1493 #[test]
1494 fn test_blank_lines_in_frontmatter() {
1495 let markdown = r#"---
1497title: Test Document
1498author: Test Author
1499
1500description: This has a blank line above it
1501tags:
1502 - one
1503 - two
1504---
1505
1506# Hello World
1507
1508This is the body."#;
1509
1510 let doc = decompose(markdown).unwrap();
1511
1512 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1513 assert_eq!(
1514 doc.get_field("title").unwrap().as_str().unwrap(),
1515 "Test Document"
1516 );
1517 assert_eq!(
1518 doc.get_field("author").unwrap().as_str().unwrap(),
1519 "Test Author"
1520 );
1521 assert_eq!(
1522 doc.get_field("description").unwrap().as_str().unwrap(),
1523 "This has a blank line above it"
1524 );
1525
1526 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1527 assert_eq!(tags.len(), 2);
1528 }
1529
1530 #[test]
1531 fn test_blank_lines_in_scope_blocks() {
1532 let markdown = r#"---
1534SCOPE: items
1535name: Item 1
1536
1537price: 19.99
1538
1539tags:
1540 - electronics
1541 - gadgets
1542---
1543
1544Body of item 1."#;
1545
1546 let doc = decompose(markdown).unwrap();
1547
1548 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1549 assert_eq!(items.len(), 1);
1550
1551 let item = items[0].as_object().unwrap();
1552 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1553 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1554
1555 let tags = item.get("tags").unwrap().as_array().unwrap();
1556 assert_eq!(tags.len(), 2);
1557 }
1558
1559 #[test]
1560 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1561 let markdown = r#"---
1563title: Test
1564---
1565
1566First paragraph.
1567
1568---
1569
1570Second paragraph."#;
1571
1572 let doc = decompose(markdown).unwrap();
1573
1574 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1575
1576 let body = doc.body().unwrap();
1578 assert!(body.contains("First paragraph."));
1579 assert!(body.contains("---"));
1580 assert!(body.contains("Second paragraph."));
1581 }
1582
1583 #[test]
1584 fn test_horizontal_rule_not_preceded_by_blank() {
1585 let markdown = r#"---
1588title: Test
1589---
1590
1591First paragraph.
1592---
1593
1594Second paragraph."#;
1595
1596 let doc = decompose(markdown).unwrap();
1597
1598 let body = doc.body().unwrap();
1599 assert!(body.contains("---"));
1601 }
1602
1603 #[test]
1604 fn test_multiple_blank_lines_in_yaml() {
1605 let markdown = r#"---
1607title: Test
1608
1609
1610author: John Doe
1611
1612
1613version: 1.0
1614---
1615
1616Body content."#;
1617
1618 let doc = decompose(markdown).unwrap();
1619
1620 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1621 assert_eq!(
1622 doc.get_field("author").unwrap().as_str().unwrap(),
1623 "John Doe"
1624 );
1625 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1626 }
1627}
1628#[cfg(test)]
1629mod demo_file_test {
1630 use super::*;
1631
1632 #[test]
1633 fn test_extended_metadata_demo_file() {
1634 let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1635 let doc = decompose(markdown).unwrap();
1636
1637 assert_eq!(
1639 doc.get_field("title").unwrap().as_str().unwrap(),
1640 "Extended Metadata Demo"
1641 );
1642 assert_eq!(
1643 doc.get_field("author").unwrap().as_str().unwrap(),
1644 "Quillmark Team"
1645 );
1646 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1648
1649 assert!(doc
1651 .body()
1652 .unwrap()
1653 .contains("extended YAML metadata standard"));
1654
1655 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1657 assert_eq!(features.len(), 3);
1658
1659 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1661 assert_eq!(use_cases.len(), 2);
1662
1663 let feature1 = features[0].as_object().unwrap();
1665 assert_eq!(
1666 feature1.get("name").unwrap().as_str().unwrap(),
1667 "Tag Directives"
1668 );
1669 }
1670
1671 #[test]
1672 fn test_input_size_limit() {
1673 let size = crate::error::MAX_INPUT_SIZE + 1;
1675 let large_markdown = "a".repeat(size);
1676
1677 let result = decompose(&large_markdown);
1678 assert!(result.is_err());
1679
1680 let err_msg = result.unwrap_err().to_string();
1681 assert!(err_msg.contains("Input too large"));
1682 }
1683
1684 #[test]
1685 fn test_yaml_size_limit() {
1686 let mut markdown = String::from("---\n");
1688
1689 let size = crate::error::MAX_YAML_SIZE + 1;
1691 markdown.push_str("data: \"");
1692 markdown.push_str(&"x".repeat(size));
1693 markdown.push_str("\"\n---\n\nBody");
1694
1695 let result = decompose(&markdown);
1696 assert!(result.is_err());
1697
1698 let err_msg = result.unwrap_err().to_string();
1699 assert!(err_msg.contains("YAML block too large"));
1700 }
1701
1702 #[test]
1703 fn test_input_within_size_limit() {
1704 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1707
1708 let result = decompose(&markdown);
1709 assert!(result.is_ok());
1710 }
1711
1712 #[test]
1713 fn test_yaml_within_size_limit() {
1714 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1716
1717 let result = decompose(&markdown);
1718 assert!(result.is_ok());
1719 }
1720}