1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58 let mut msg = format!("{}: {}", context, e);
59
60 if let Some(loc) = e.location() {
61 msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62 }
63
64 msg
65}
66
67pub const QUILL_TAG: &str = "quill";
69
70#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73 fields: HashMap<String, QuillValue>,
74 quill_tag: String,
75}
76
77impl ParsedDocument {
78 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80 Self {
81 fields,
82 quill_tag: "__default__".to_string(),
83 }
84 }
85
86 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
88 Self { fields, quill_tag }
89 }
90
91 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94 }
95
96 pub fn quill_tag(&self) -> &str {
98 &self.quill_tag
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, QuillValue> {
113 &self.fields
114 }
115
116 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130 let mut fields = self.fields.clone();
131
132 for (field_name, default_value) in defaults {
133 if !fields.contains_key(field_name) {
135 fields.insert(field_name.clone(), default_value.clone());
136 }
137 }
138
139 Self {
140 fields,
141 quill_tag: self.quill_tag.clone(),
142 }
143 }
144
145 pub fn with_coercion(&self, schema: &QuillValue) -> Self {
163 use crate::schema::coerce_document;
164
165 let coerced_fields = coerce_document(schema, &self.fields);
166
167 Self {
168 fields: coerced_fields,
169 quill_tag: self.quill_tag.clone(),
170 }
171 }
172}
173
174#[derive(Debug)]
175struct MetadataBlock {
176 start: usize, end: usize, yaml_content: String,
179 tag: Option<String>, quill_name: Option<String>, }
182
183fn is_valid_tag_name(name: &str) -> bool {
185 if name.is_empty() {
186 return false;
187 }
188
189 let mut chars = name.chars();
190 let first = chars.next().unwrap();
191
192 if !first.is_ascii_lowercase() && first != '_' {
193 return false;
194 }
195
196 for ch in chars {
197 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
198 return false;
199 }
200 }
201
202 true
203}
204
205fn find_metadata_blocks(
207 markdown: &str,
208) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
209 let mut blocks = Vec::new();
210 let mut pos = 0;
211
212 while pos < markdown.len() {
213 let search_str = &markdown[pos..];
215 let delimiter_result = if let Some(p) = search_str.find("---\n") {
216 Some((p, 4, "\n"))
217 } else if let Some(p) = search_str.find("---\r\n") {
218 Some((p, 5, "\r\n"))
219 } else {
220 None
221 };
222
223 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
224 let abs_pos = pos + delimiter_pos;
225
226 let is_start_of_line = if abs_pos == 0 {
228 true
229 } else {
230 let char_before = markdown.as_bytes()[abs_pos - 1];
231 char_before == b'\n' || char_before == b'\r'
232 };
233
234 if !is_start_of_line {
235 pos = abs_pos + 1;
236 continue;
237 }
238
239 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
243 let before = &markdown[..abs_pos];
245 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
246 } else {
247 false
248 };
249
250 let followed_by_blank = if content_start < markdown.len() {
251 markdown[content_start..].starts_with('\n')
252 || markdown[content_start..].starts_with("\r\n")
253 } else {
254 false
255 };
256
257 if preceded_by_blank && followed_by_blank {
259 pos = abs_pos + 3; continue;
262 }
263
264 if followed_by_blank {
267 pos = abs_pos + 3;
270 continue;
271 }
272
273 let rest = &markdown[content_start..];
276
277 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
279 let closing_with_newline = closing_patterns
280 .iter()
281 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
282 .min_by_key(|(p, _)| *p);
283
284 let closing_at_eof = ["\n---", "\r\n---"]
286 .iter()
287 .filter_map(|delim| {
288 rest.find(delim).and_then(|p| {
289 if p + delim.len() == rest.len() {
290 Some((p, delim.len()))
291 } else {
292 None
293 }
294 })
295 })
296 .min_by_key(|(p, _)| *p);
297
298 let closing_result = match (closing_with_newline, closing_at_eof) {
299 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
300 (Some(_), Some(_)) => closing_with_newline,
301 (Some(_), None) => closing_with_newline,
302 (None, Some(_)) => closing_at_eof,
303 (None, None) => None,
304 };
305
306 if let Some((closing_pos, closing_len)) = closing_result {
307 let abs_closing_pos = content_start + closing_pos;
308 let content = &markdown[content_start..abs_closing_pos];
309
310 if content.len() > crate::error::MAX_YAML_SIZE {
312 return Err(format!(
313 "YAML block too large: {} bytes (max: {} bytes)",
314 content.len(),
315 crate::error::MAX_YAML_SIZE
316 )
317 .into());
318 }
319
320 let (tag, quill_name, yaml_content) = if !content.is_empty() {
323 match serde_yaml::from_str::<serde_yaml::Value>(content) {
325 Ok(yaml_value) => {
326 if let Some(mapping) = yaml_value.as_mapping() {
327 let quill_key = serde_yaml::Value::String("QUILL".to_string());
328 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
329
330 let has_quill = mapping.contains_key(&quill_key);
331 let has_scope = mapping.contains_key(&scope_key);
332
333 if has_quill && has_scope {
334 return Err(
335 "Cannot specify both QUILL and SCOPE in the same block"
336 .into(),
337 );
338 }
339
340 if has_quill {
341 let quill_value = mapping.get(&quill_key).unwrap();
343 let quill_name_str = quill_value
344 .as_str()
345 .ok_or_else(|| "QUILL value must be a string")?;
346
347 if !is_valid_tag_name(quill_name_str) {
348 return Err(format!(
349 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
350 quill_name_str
351 )
352 .into());
353 }
354
355 let mut new_mapping = mapping.clone();
357 new_mapping.remove(&quill_key);
358 let new_yaml = serde_yaml::to_string(&new_mapping)
359 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
360
361 (None, Some(quill_name_str.to_string()), new_yaml)
362 } else if has_scope {
363 let scope_value = mapping.get(&scope_key).unwrap();
365 let field_name = scope_value
366 .as_str()
367 .ok_or_else(|| "SCOPE value must be a string")?;
368
369 if !is_valid_tag_name(field_name) {
370 return Err(format!(
371 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
372 field_name
373 )
374 .into());
375 }
376
377 if field_name == BODY_FIELD {
378 return Err(format!(
379 "Cannot use reserved field name '{}' as SCOPE value",
380 BODY_FIELD
381 )
382 .into());
383 }
384
385 let mut new_mapping = mapping.clone();
387 new_mapping.remove(&scope_key);
388 let new_yaml = serde_yaml::to_string(&new_mapping)
389 .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
390
391 (Some(field_name.to_string()), None, new_yaml)
392 } else {
393 (None, None, content.to_string())
395 }
396 } else {
397 (None, None, content.to_string())
399 }
400 }
401 Err(_) => {
402 (None, None, content.to_string())
404 }
405 }
406 } else {
407 (None, None, content.to_string())
408 };
409
410 blocks.push(MetadataBlock {
411 start: abs_pos,
412 end: abs_closing_pos + closing_len, yaml_content,
414 tag,
415 quill_name,
416 });
417
418 pos = abs_closing_pos + closing_len;
419 } else if abs_pos == 0 {
420 return Err("Frontmatter started but not closed with ---".into());
422 } else {
423 pos = abs_pos + 3;
425 }
426 } else {
427 break;
428 }
429 }
430
431 Ok(blocks)
432}
433
434fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
436 if markdown.len() > crate::error::MAX_INPUT_SIZE {
438 return Err(format!(
439 "Input too large: {} bytes (max: {} bytes)",
440 markdown.len(),
441 crate::error::MAX_INPUT_SIZE
442 )
443 .into());
444 }
445
446 let mut fields = HashMap::new();
447
448 let blocks = find_metadata_blocks(markdown)?;
450
451 if blocks.is_empty() {
452 fields.insert(
454 BODY_FIELD.to_string(),
455 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
456 );
457 return Ok(ParsedDocument::new(fields));
458 }
459
460 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
462 let mut has_global_frontmatter = false;
463 let mut global_frontmatter_index: Option<usize> = None;
464 let mut quill_name: Option<String> = None;
465
466 for (idx, block) in blocks.iter().enumerate() {
468 if let Some(ref name) = block.quill_name {
470 if quill_name.is_some() {
471 return Err("Multiple quill directives found: only one allowed".into());
472 }
473 quill_name = Some(name.clone());
474 }
475
476 if block.tag.is_none() && block.quill_name.is_none() {
478 if has_global_frontmatter {
479 return Err(
480 "Multiple global frontmatter blocks found: only one untagged block allowed"
481 .into(),
482 );
483 }
484 has_global_frontmatter = true;
485 global_frontmatter_index = Some(idx);
486 }
487 }
488
489 if let Some(idx) = global_frontmatter_index {
491 let block = &blocks[idx];
492
493 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
495 HashMap::new()
496 } else {
497 serde_yaml::from_str(&block.yaml_content)
498 .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
499 };
500
501 for other_block in &blocks {
504 if let Some(ref tag) = other_block.tag {
505 if let Some(global_value) = yaml_fields.get(tag) {
506 if global_value.as_sequence().is_none() {
508 return Err(format!(
509 "Name collision: global field '{}' conflicts with tagged attribute",
510 tag
511 )
512 .into());
513 }
514 }
515 }
516 }
517
518 for (key, value) in yaml_fields {
520 fields.insert(key, QuillValue::from_yaml(value)?);
521 }
522 }
523
524 for block in &blocks {
526 if block.quill_name.is_some() {
527 if !block.yaml_content.is_empty() {
529 let yaml_fields: HashMap<String, serde_yaml::Value> =
530 serde_yaml::from_str(&block.yaml_content)
531 .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
532
533 for key in yaml_fields.keys() {
535 if fields.contains_key(key) {
536 return Err(format!(
537 "Name collision: quill block field '{}' conflicts with existing field",
538 key
539 )
540 .into());
541 }
542 }
543
544 for (key, value) in yaml_fields {
546 fields.insert(key, QuillValue::from_yaml(value)?);
547 }
548 }
549 }
550 }
551
552 for (idx, block) in blocks.iter().enumerate() {
554 if let Some(ref tag_name) = block.tag {
555 if let Some(existing_value) = fields.get(tag_name) {
558 if existing_value.as_array().is_none() {
559 return Err(format!(
560 "Name collision: tagged attribute '{}' conflicts with global field",
561 tag_name
562 )
563 .into());
564 }
565 }
566
567 let mut item_fields: HashMap<String, serde_yaml::Value> = if block
569 .yaml_content
570 .is_empty()
571 {
572 HashMap::new()
573 } else {
574 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
575 yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
576 })?
577 };
578
579 let body_start = block.end;
581 let body_end = if idx + 1 < blocks.len() {
582 blocks[idx + 1].start
583 } else {
584 markdown.len()
585 };
586 let body = &markdown[body_start..body_end];
587
588 item_fields.insert(
590 BODY_FIELD.to_string(),
591 serde_yaml::Value::String(body.to_string()),
592 );
593
594 let item_value = serde_yaml::to_value(item_fields)?;
596
597 tagged_attributes
599 .entry(tag_name.clone())
600 .or_insert_with(Vec::new)
601 .push(item_value);
602 }
603 }
604
605 let first_non_scope_block_idx = blocks
609 .iter()
610 .position(|b| b.tag.is_none() && b.quill_name.is_none())
611 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
612
613 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
614 let start = blocks[idx].end;
616
617 let end = blocks
619 .iter()
620 .skip(idx + 1)
621 .find(|b| b.tag.is_some())
622 .map(|b| b.start)
623 .unwrap_or(markdown.len());
624
625 (start, end)
626 } else {
627 let end = blocks
629 .iter()
630 .find(|b| b.tag.is_some())
631 .map(|b| b.start)
632 .unwrap_or(0);
633
634 (0, end)
635 };
636
637 let global_body = &markdown[body_start..body_end];
638
639 fields.insert(
640 BODY_FIELD.to_string(),
641 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
642 );
643
644 for (tag_name, items) in tagged_attributes {
647 if let Some(existing_value) = fields.get(&tag_name) {
648 if let Some(existing_array) = existing_value.as_array() {
650 let new_items_json: Vec<serde_json::Value> = items
652 .into_iter()
653 .map(|yaml_val| {
654 serde_json::to_value(&yaml_val)
655 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
656 })
657 .collect::<Result<Vec<_>, _>>()?;
658
659 let mut merged_array = existing_array.clone();
661 merged_array.extend(new_items_json);
662
663 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
665 fields.insert(tag_name, quill_value);
666 } else {
667 return Err(format!(
669 "Internal error: field '{}' exists but is not an array",
670 tag_name
671 )
672 .into());
673 }
674 } else {
675 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
677 fields.insert(tag_name, quill_value);
678 }
679 }
680
681 let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
682 let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
683
684 Ok(parsed)
685}
686
687#[cfg(test)]
688mod tests {
689 use super::*;
690
691 #[test]
692 fn test_no_frontmatter() {
693 let markdown = "# Hello World\n\nThis is a test.";
694 let doc = decompose(markdown).unwrap();
695
696 assert_eq!(doc.body(), Some(markdown));
697 assert_eq!(doc.fields().len(), 1);
698 assert_eq!(doc.quill_tag(), "__default__");
700 }
701
702 #[test]
703 fn test_with_frontmatter() {
704 let markdown = r#"---
705title: Test Document
706author: Test Author
707---
708
709# Hello World
710
711This is the body."#;
712
713 let doc = decompose(markdown).unwrap();
714
715 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
716 assert_eq!(
717 doc.get_field("title").unwrap().as_str().unwrap(),
718 "Test Document"
719 );
720 assert_eq!(
721 doc.get_field("author").unwrap().as_str().unwrap(),
722 "Test Author"
723 );
724 assert_eq!(doc.fields().len(), 3); assert_eq!(doc.quill_tag(), "__default__");
727 }
728
729 #[test]
730 fn test_complex_yaml_frontmatter() {
731 let markdown = r#"---
732title: Complex Document
733tags:
734 - test
735 - yaml
736metadata:
737 version: 1.0
738 nested:
739 field: value
740---
741
742Content here."#;
743
744 let doc = decompose(markdown).unwrap();
745
746 assert_eq!(doc.body(), Some("\nContent here."));
747 assert_eq!(
748 doc.get_field("title").unwrap().as_str().unwrap(),
749 "Complex Document"
750 );
751
752 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
753 assert_eq!(tags.len(), 2);
754 assert_eq!(tags[0].as_str().unwrap(), "test");
755 assert_eq!(tags[1].as_str().unwrap(), "yaml");
756 }
757
758 #[test]
759 fn test_with_defaults_empty_document() {
760 use std::collections::HashMap;
761
762 let mut defaults = HashMap::new();
763 defaults.insert(
764 "status".to_string(),
765 QuillValue::from_json(serde_json::json!("draft")),
766 );
767 defaults.insert(
768 "version".to_string(),
769 QuillValue::from_json(serde_json::json!(1)),
770 );
771
772 let doc = ParsedDocument::new(HashMap::new());
774 let doc_with_defaults = doc.with_defaults(&defaults);
775
776 assert_eq!(
778 doc_with_defaults
779 .get_field("status")
780 .unwrap()
781 .as_str()
782 .unwrap(),
783 "draft"
784 );
785 assert_eq!(
786 doc_with_defaults
787 .get_field("version")
788 .unwrap()
789 .as_number()
790 .unwrap()
791 .as_i64()
792 .unwrap(),
793 1
794 );
795 }
796
797 #[test]
798 fn test_with_defaults_preserves_existing_values() {
799 use std::collections::HashMap;
800
801 let mut defaults = HashMap::new();
802 defaults.insert(
803 "status".to_string(),
804 QuillValue::from_json(serde_json::json!("draft")),
805 );
806
807 let mut fields = HashMap::new();
809 fields.insert(
810 "status".to_string(),
811 QuillValue::from_json(serde_json::json!("published")),
812 );
813 let doc = ParsedDocument::new(fields);
814
815 let doc_with_defaults = doc.with_defaults(&defaults);
816
817 assert_eq!(
819 doc_with_defaults
820 .get_field("status")
821 .unwrap()
822 .as_str()
823 .unwrap(),
824 "published"
825 );
826 }
827
828 #[test]
829 fn test_with_defaults_partial_application() {
830 use std::collections::HashMap;
831
832 let mut defaults = HashMap::new();
833 defaults.insert(
834 "status".to_string(),
835 QuillValue::from_json(serde_json::json!("draft")),
836 );
837 defaults.insert(
838 "version".to_string(),
839 QuillValue::from_json(serde_json::json!(1)),
840 );
841
842 let mut fields = HashMap::new();
844 fields.insert(
845 "status".to_string(),
846 QuillValue::from_json(serde_json::json!("published")),
847 );
848 let doc = ParsedDocument::new(fields);
849
850 let doc_with_defaults = doc.with_defaults(&defaults);
851
852 assert_eq!(
854 doc_with_defaults
855 .get_field("status")
856 .unwrap()
857 .as_str()
858 .unwrap(),
859 "published"
860 );
861 assert_eq!(
862 doc_with_defaults
863 .get_field("version")
864 .unwrap()
865 .as_number()
866 .unwrap()
867 .as_i64()
868 .unwrap(),
869 1
870 );
871 }
872
873 #[test]
874 fn test_with_defaults_no_defaults() {
875 use std::collections::HashMap;
876
877 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
880 let doc_with_defaults = doc.with_defaults(&defaults);
881
882 assert!(doc_with_defaults.fields().is_empty());
884 }
885
886 #[test]
887 fn test_with_defaults_complex_types() {
888 use std::collections::HashMap;
889
890 let mut defaults = HashMap::new();
891 defaults.insert(
892 "tags".to_string(),
893 QuillValue::from_json(serde_json::json!(["default", "tag"])),
894 );
895
896 let doc = ParsedDocument::new(HashMap::new());
897 let doc_with_defaults = doc.with_defaults(&defaults);
898
899 let tags = doc_with_defaults
901 .get_field("tags")
902 .unwrap()
903 .as_sequence()
904 .unwrap();
905 assert_eq!(tags.len(), 2);
906 assert_eq!(tags[0].as_str().unwrap(), "default");
907 assert_eq!(tags[1].as_str().unwrap(), "tag");
908 }
909
910 #[test]
911 fn test_with_coercion_singular_to_array() {
912 use std::collections::HashMap;
913
914 let schema = QuillValue::from_json(serde_json::json!({
915 "$schema": "https://json-schema.org/draft/2019-09/schema",
916 "type": "object",
917 "properties": {
918 "tags": {"type": "array"}
919 }
920 }));
921
922 let mut fields = HashMap::new();
923 fields.insert(
924 "tags".to_string(),
925 QuillValue::from_json(serde_json::json!("single-tag")),
926 );
927 let doc = ParsedDocument::new(fields);
928
929 let coerced_doc = doc.with_coercion(&schema);
930
931 let tags = coerced_doc.get_field("tags").unwrap();
932 assert!(tags.as_array().is_some());
933 let tags_array = tags.as_array().unwrap();
934 assert_eq!(tags_array.len(), 1);
935 assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
936 }
937
938 #[test]
939 fn test_with_coercion_string_to_boolean() {
940 use std::collections::HashMap;
941
942 let schema = QuillValue::from_json(serde_json::json!({
943 "$schema": "https://json-schema.org/draft/2019-09/schema",
944 "type": "object",
945 "properties": {
946 "active": {"type": "boolean"}
947 }
948 }));
949
950 let mut fields = HashMap::new();
951 fields.insert(
952 "active".to_string(),
953 QuillValue::from_json(serde_json::json!("true")),
954 );
955 let doc = ParsedDocument::new(fields);
956
957 let coerced_doc = doc.with_coercion(&schema);
958
959 assert_eq!(
960 coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
961 true
962 );
963 }
964
965 #[test]
966 fn test_with_coercion_string_to_number() {
967 use std::collections::HashMap;
968
969 let schema = QuillValue::from_json(serde_json::json!({
970 "$schema": "https://json-schema.org/draft/2019-09/schema",
971 "type": "object",
972 "properties": {
973 "count": {"type": "number"}
974 }
975 }));
976
977 let mut fields = HashMap::new();
978 fields.insert(
979 "count".to_string(),
980 QuillValue::from_json(serde_json::json!("42")),
981 );
982 let doc = ParsedDocument::new(fields);
983
984 let coerced_doc = doc.with_coercion(&schema);
985
986 assert_eq!(
987 coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
988 42
989 );
990 }
991
992 #[test]
993 fn test_invalid_yaml() {
994 let markdown = r#"---
995title: [invalid yaml
996author: missing close bracket
997---
998
999Content here."#;
1000
1001 let result = decompose(markdown);
1002 assert!(result.is_err());
1003 assert!(result
1004 .unwrap_err()
1005 .to_string()
1006 .contains("Invalid YAML frontmatter"));
1007 }
1008
1009 #[test]
1010 fn test_unclosed_frontmatter() {
1011 let markdown = r#"---
1012title: Test
1013author: Test Author
1014
1015Content without closing ---"#;
1016
1017 let result = decompose(markdown);
1018 assert!(result.is_err());
1019 assert!(result.unwrap_err().to_string().contains("not closed"));
1020 }
1021
1022 #[test]
1025 fn test_basic_tagged_block() {
1026 let markdown = r#"---
1027title: Main Document
1028---
1029
1030Main body content.
1031
1032---
1033SCOPE: items
1034name: Item 1
1035---
1036
1037Body of item 1."#;
1038
1039 let doc = decompose(markdown).unwrap();
1040
1041 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1042 assert_eq!(
1043 doc.get_field("title").unwrap().as_str().unwrap(),
1044 "Main Document"
1045 );
1046
1047 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1048 assert_eq!(items.len(), 1);
1049
1050 let item = items[0].as_object().unwrap();
1051 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1052 assert_eq!(
1053 item.get("body").unwrap().as_str().unwrap(),
1054 "\nBody of item 1."
1055 );
1056 }
1057
1058 #[test]
1059 fn test_multiple_tagged_blocks() {
1060 let markdown = r#"---
1061SCOPE: items
1062name: Item 1
1063tags: [a, b]
1064---
1065
1066First item body.
1067
1068---
1069SCOPE: items
1070name: Item 2
1071tags: [c, d]
1072---
1073
1074Second item body."#;
1075
1076 let doc = decompose(markdown).unwrap();
1077
1078 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1079 assert_eq!(items.len(), 2);
1080
1081 let item1 = items[0].as_object().unwrap();
1082 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1083
1084 let item2 = items[1].as_object().unwrap();
1085 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1086 }
1087
1088 #[test]
1089 fn test_mixed_global_and_tagged() {
1090 let markdown = r#"---
1091title: Global
1092author: John Doe
1093---
1094
1095Global body.
1096
1097---
1098SCOPE: sections
1099title: Section 1
1100---
1101
1102Section 1 content.
1103
1104---
1105SCOPE: sections
1106title: Section 2
1107---
1108
1109Section 2 content."#;
1110
1111 let doc = decompose(markdown).unwrap();
1112
1113 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1114 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1115
1116 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1117 assert_eq!(sections.len(), 2);
1118 }
1119
1120 #[test]
1121 fn test_empty_tagged_metadata() {
1122 let markdown = r#"---
1123SCOPE: items
1124---
1125
1126Body without metadata."#;
1127
1128 let doc = decompose(markdown).unwrap();
1129
1130 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1131 assert_eq!(items.len(), 1);
1132
1133 let item = items[0].as_object().unwrap();
1134 assert_eq!(
1135 item.get("body").unwrap().as_str().unwrap(),
1136 "\nBody without metadata."
1137 );
1138 }
1139
1140 #[test]
1141 fn test_tagged_block_without_body() {
1142 let markdown = r#"---
1143SCOPE: items
1144name: Item
1145---"#;
1146
1147 let doc = decompose(markdown).unwrap();
1148
1149 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1150 assert_eq!(items.len(), 1);
1151
1152 let item = items[0].as_object().unwrap();
1153 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1154 }
1155
1156 #[test]
1157 fn test_name_collision_global_and_tagged() {
1158 let markdown = r#"---
1159items: "global value"
1160---
1161
1162Body
1163
1164---
1165SCOPE: items
1166name: Item
1167---
1168
1169Item body"#;
1170
1171 let result = decompose(markdown);
1172 assert!(result.is_err());
1173 assert!(result.unwrap_err().to_string().contains("collision"));
1174 }
1175
1176 #[test]
1177 fn test_global_array_merged_with_scope() {
1178 let markdown = r#"---
1181items:
1182 - name: Global Item 1
1183 value: 100
1184 - name: Global Item 2
1185 value: 200
1186---
1187
1188Global body
1189
1190---
1191SCOPE: items
1192name: Scope Item 1
1193value: 300
1194---
1195
1196Scope item 1 body
1197
1198---
1199SCOPE: items
1200name: Scope Item 2
1201value: 400
1202---
1203
1204Scope item 2 body"#;
1205
1206 let doc = decompose(markdown).unwrap();
1207
1208 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1210 assert_eq!(items.len(), 4);
1211
1212 let item1 = items[0].as_object().unwrap();
1214 assert_eq!(
1215 item1.get("name").unwrap().as_str().unwrap(),
1216 "Global Item 1"
1217 );
1218 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1219
1220 let item2 = items[1].as_object().unwrap();
1221 assert_eq!(
1222 item2.get("name").unwrap().as_str().unwrap(),
1223 "Global Item 2"
1224 );
1225 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1226
1227 let item3 = items[2].as_object().unwrap();
1229 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1230 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1231 assert_eq!(
1232 item3.get("body").unwrap().as_str().unwrap(),
1233 "\nScope item 1 body\n\n"
1234 );
1235
1236 let item4 = items[3].as_object().unwrap();
1237 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1238 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1239 assert_eq!(
1240 item4.get("body").unwrap().as_str().unwrap(),
1241 "\nScope item 2 body"
1242 );
1243 }
1244
1245 #[test]
1246 fn test_empty_global_array_with_scope() {
1247 let markdown = r#"---
1249items: []
1250---
1251
1252Global body
1253
1254---
1255SCOPE: items
1256name: Item 1
1257---
1258
1259Item 1 body"#;
1260
1261 let doc = decompose(markdown).unwrap();
1262
1263 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1264 assert_eq!(items.len(), 1);
1265
1266 let item = items[0].as_object().unwrap();
1267 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1268 }
1269
1270 #[test]
1271 fn test_reserved_field_name() {
1272 let markdown = r#"---
1273SCOPE: body
1274content: Test
1275---"#;
1276
1277 let result = decompose(markdown);
1278 assert!(result.is_err());
1279 assert!(result.unwrap_err().to_string().contains("reserved"));
1280 }
1281
1282 #[test]
1283 fn test_invalid_tag_syntax() {
1284 let markdown = r#"---
1285SCOPE: Invalid-Name
1286title: Test
1287---"#;
1288
1289 let result = decompose(markdown);
1290 assert!(result.is_err());
1291 assert!(result
1292 .unwrap_err()
1293 .to_string()
1294 .contains("Invalid field name"));
1295 }
1296
1297 #[test]
1298 fn test_multiple_global_frontmatter_blocks() {
1299 let markdown = r#"---
1300title: First
1301---
1302
1303Body
1304
1305---
1306author: Second
1307---
1308
1309More body"#;
1310
1311 let result = decompose(markdown);
1312 assert!(result.is_err());
1313 assert!(result
1314 .unwrap_err()
1315 .to_string()
1316 .contains("Multiple global frontmatter"));
1317 }
1318
1319 #[test]
1320 fn test_adjacent_blocks_different_tags() {
1321 let markdown = r#"---
1322SCOPE: items
1323name: Item 1
1324---
1325
1326Item 1 body
1327
1328---
1329SCOPE: sections
1330title: Section 1
1331---
1332
1333Section 1 body"#;
1334
1335 let doc = decompose(markdown).unwrap();
1336
1337 assert!(doc.get_field("items").is_some());
1338 assert!(doc.get_field("sections").is_some());
1339
1340 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1341 assert_eq!(items.len(), 1);
1342
1343 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1344 assert_eq!(sections.len(), 1);
1345 }
1346
1347 #[test]
1348 fn test_order_preservation() {
1349 let markdown = r#"---
1350SCOPE: items
1351id: 1
1352---
1353
1354First
1355
1356---
1357SCOPE: items
1358id: 2
1359---
1360
1361Second
1362
1363---
1364SCOPE: items
1365id: 3
1366---
1367
1368Third"#;
1369
1370 let doc = decompose(markdown).unwrap();
1371
1372 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1373 assert_eq!(items.len(), 3);
1374
1375 for (i, item) in items.iter().enumerate() {
1376 let mapping = item.as_object().unwrap();
1377 let id = mapping.get("id").unwrap().as_i64().unwrap();
1378 assert_eq!(id, (i + 1) as i64);
1379 }
1380 }
1381
1382 #[test]
1383 fn test_product_catalog_integration() {
1384 let markdown = r#"---
1385title: Product Catalog
1386author: John Doe
1387date: 2024-01-01
1388---
1389
1390This is the main catalog description.
1391
1392---
1393SCOPE: products
1394name: Widget A
1395price: 19.99
1396sku: WID-001
1397---
1398
1399The **Widget A** is our most popular product.
1400
1401---
1402SCOPE: products
1403name: Gadget B
1404price: 29.99
1405sku: GAD-002
1406---
1407
1408The **Gadget B** is perfect for professionals.
1409
1410---
1411SCOPE: reviews
1412product: Widget A
1413rating: 5
1414---
1415
1416"Excellent product! Highly recommended."
1417
1418---
1419SCOPE: reviews
1420product: Gadget B
1421rating: 4
1422---
1423
1424"Very good, but a bit pricey.""#;
1425
1426 let doc = decompose(markdown).unwrap();
1427
1428 assert_eq!(
1430 doc.get_field("title").unwrap().as_str().unwrap(),
1431 "Product Catalog"
1432 );
1433 assert_eq!(
1434 doc.get_field("author").unwrap().as_str().unwrap(),
1435 "John Doe"
1436 );
1437 assert_eq!(
1438 doc.get_field("date").unwrap().as_str().unwrap(),
1439 "2024-01-01"
1440 );
1441
1442 assert!(doc.body().unwrap().contains("main catalog description"));
1444
1445 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1447 assert_eq!(products.len(), 2);
1448
1449 let product1 = products[0].as_object().unwrap();
1450 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1451 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1452
1453 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1455 assert_eq!(reviews.len(), 2);
1456
1457 let review1 = reviews[0].as_object().unwrap();
1458 assert_eq!(
1459 review1.get("product").unwrap().as_str().unwrap(),
1460 "Widget A"
1461 );
1462 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1463
1464 assert_eq!(doc.fields().len(), 6);
1466 }
1467
1468 #[test]
1469 fn taro_quill_directive() {
1470 let markdown = r#"---
1471QUILL: usaf_memo
1472memo_for: [ORG/SYMBOL]
1473memo_from: [ORG/SYMBOL]
1474---
1475
1476This is the memo body."#;
1477
1478 let doc = decompose(markdown).unwrap();
1479
1480 assert_eq!(doc.quill_tag(), "usaf_memo");
1482
1483 assert_eq!(
1485 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1486 .as_str()
1487 .unwrap(),
1488 "ORG/SYMBOL"
1489 );
1490
1491 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1493 }
1494
1495 #[test]
1496 fn test_quill_with_scope_blocks() {
1497 let markdown = r#"---
1498QUILL: document
1499title: Test Document
1500---
1501
1502Main body.
1503
1504---
1505SCOPE: sections
1506name: Section 1
1507---
1508
1509Section 1 body."#;
1510
1511 let doc = decompose(markdown).unwrap();
1512
1513 assert_eq!(doc.quill_tag(), "document");
1515
1516 assert_eq!(
1518 doc.get_field("title").unwrap().as_str().unwrap(),
1519 "Test Document"
1520 );
1521
1522 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1524 assert_eq!(sections.len(), 1);
1525
1526 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1528 }
1529
1530 #[test]
1531 fn test_multiple_quill_directives_error() {
1532 let markdown = r#"---
1533QUILL: first
1534---
1535
1536---
1537QUILL: second
1538---"#;
1539
1540 let result = decompose(markdown);
1541 assert!(result.is_err());
1542 assert!(result
1543 .unwrap_err()
1544 .to_string()
1545 .contains("Multiple quill directives"));
1546 }
1547
1548 #[test]
1549 fn test_invalid_quill_name() {
1550 let markdown = r#"---
1551QUILL: Invalid-Name
1552---"#;
1553
1554 let result = decompose(markdown);
1555 assert!(result.is_err());
1556 assert!(result
1557 .unwrap_err()
1558 .to_string()
1559 .contains("Invalid quill name"));
1560 }
1561
1562 #[test]
1563 fn test_quill_wrong_value_type() {
1564 let markdown = r#"---
1565QUILL: 123
1566---"#;
1567
1568 let result = decompose(markdown);
1569 assert!(result.is_err());
1570 assert!(result
1571 .unwrap_err()
1572 .to_string()
1573 .contains("QUILL value must be a string"));
1574 }
1575
1576 #[test]
1577 fn test_scope_wrong_value_type() {
1578 let markdown = r#"---
1579SCOPE: 123
1580---"#;
1581
1582 let result = decompose(markdown);
1583 assert!(result.is_err());
1584 assert!(result
1585 .unwrap_err()
1586 .to_string()
1587 .contains("SCOPE value must be a string"));
1588 }
1589
1590 #[test]
1591 fn test_both_quill_and_scope_error() {
1592 let markdown = r#"---
1593QUILL: test
1594SCOPE: items
1595---"#;
1596
1597 let result = decompose(markdown);
1598 assert!(result.is_err());
1599 assert!(result
1600 .unwrap_err()
1601 .to_string()
1602 .contains("Cannot specify both QUILL and SCOPE"));
1603 }
1604
1605 #[test]
1606 fn test_blank_lines_in_frontmatter() {
1607 let markdown = r#"---
1609title: Test Document
1610author: Test Author
1611
1612description: This has a blank line above it
1613tags:
1614 - one
1615 - two
1616---
1617
1618# Hello World
1619
1620This is the body."#;
1621
1622 let doc = decompose(markdown).unwrap();
1623
1624 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1625 assert_eq!(
1626 doc.get_field("title").unwrap().as_str().unwrap(),
1627 "Test Document"
1628 );
1629 assert_eq!(
1630 doc.get_field("author").unwrap().as_str().unwrap(),
1631 "Test Author"
1632 );
1633 assert_eq!(
1634 doc.get_field("description").unwrap().as_str().unwrap(),
1635 "This has a blank line above it"
1636 );
1637
1638 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1639 assert_eq!(tags.len(), 2);
1640 }
1641
1642 #[test]
1643 fn test_blank_lines_in_scope_blocks() {
1644 let markdown = r#"---
1646SCOPE: items
1647name: Item 1
1648
1649price: 19.99
1650
1651tags:
1652 - electronics
1653 - gadgets
1654---
1655
1656Body of item 1."#;
1657
1658 let doc = decompose(markdown).unwrap();
1659
1660 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1661 assert_eq!(items.len(), 1);
1662
1663 let item = items[0].as_object().unwrap();
1664 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1665 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1666
1667 let tags = item.get("tags").unwrap().as_array().unwrap();
1668 assert_eq!(tags.len(), 2);
1669 }
1670
1671 #[test]
1672 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1673 let markdown = r#"---
1675title: Test
1676---
1677
1678First paragraph.
1679
1680---
1681
1682Second paragraph."#;
1683
1684 let doc = decompose(markdown).unwrap();
1685
1686 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1687
1688 let body = doc.body().unwrap();
1690 assert!(body.contains("First paragraph."));
1691 assert!(body.contains("---"));
1692 assert!(body.contains("Second paragraph."));
1693 }
1694
1695 #[test]
1696 fn test_horizontal_rule_not_preceded_by_blank() {
1697 let markdown = r#"---
1700title: Test
1701---
1702
1703First paragraph.
1704---
1705
1706Second paragraph."#;
1707
1708 let doc = decompose(markdown).unwrap();
1709
1710 let body = doc.body().unwrap();
1711 assert!(body.contains("---"));
1713 }
1714
1715 #[test]
1716 fn test_multiple_blank_lines_in_yaml() {
1717 let markdown = r#"---
1719title: Test
1720
1721
1722author: John Doe
1723
1724
1725version: 1.0
1726---
1727
1728Body content."#;
1729
1730 let doc = decompose(markdown).unwrap();
1731
1732 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1733 assert_eq!(
1734 doc.get_field("author").unwrap().as_str().unwrap(),
1735 "John Doe"
1736 );
1737 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1738 }
1739
1740 #[test]
1741 fn test_html_comment_interaction() {
1742 let markdown = r#"<!---
1743---> the rest of the page content
1744
1745---
1746key: value
1747---
1748"#;
1749 let doc = decompose(markdown).unwrap();
1750
1751 let key = doc.get_field("key").and_then(|v| v.as_str());
1754 assert_eq!(key, Some("value"));
1755 }
1756}
1757#[cfg(test)]
1758mod demo_file_test {
1759 use super::*;
1760
1761 #[test]
1762 fn test_extended_metadata_demo_file() {
1763 let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1764 let doc = decompose(markdown).unwrap();
1765
1766 assert_eq!(
1768 doc.get_field("title").unwrap().as_str().unwrap(),
1769 "Extended Metadata Demo"
1770 );
1771 assert_eq!(
1772 doc.get_field("author").unwrap().as_str().unwrap(),
1773 "Quillmark Team"
1774 );
1775 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1777
1778 assert!(doc
1780 .body()
1781 .unwrap()
1782 .contains("extended YAML metadata standard"));
1783
1784 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1786 assert_eq!(features.len(), 3);
1787
1788 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1790 assert_eq!(use_cases.len(), 2);
1791
1792 let feature1 = features[0].as_object().unwrap();
1794 assert_eq!(
1795 feature1.get("name").unwrap().as_str().unwrap(),
1796 "Tag Directives"
1797 );
1798 }
1799
1800 #[test]
1801 fn test_input_size_limit() {
1802 let size = crate::error::MAX_INPUT_SIZE + 1;
1804 let large_markdown = "a".repeat(size);
1805
1806 let result = decompose(&large_markdown);
1807 assert!(result.is_err());
1808
1809 let err_msg = result.unwrap_err().to_string();
1810 assert!(err_msg.contains("Input too large"));
1811 }
1812
1813 #[test]
1814 fn test_yaml_size_limit() {
1815 let mut markdown = String::from("---\n");
1817
1818 let size = crate::error::MAX_YAML_SIZE + 1;
1820 markdown.push_str("data: \"");
1821 markdown.push_str(&"x".repeat(size));
1822 markdown.push_str("\"\n---\n\nBody");
1823
1824 let result = decompose(&markdown);
1825 assert!(result.is_err());
1826
1827 let err_msg = result.unwrap_err().to_string();
1828 assert!(err_msg.contains("YAML block too large"));
1829 }
1830
1831 #[test]
1832 fn test_input_within_size_limit() {
1833 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1836
1837 let result = decompose(&markdown);
1838 assert!(result.is_ok());
1839 }
1840
1841 #[test]
1842 fn test_yaml_within_size_limit() {
1843 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1845
1846 let result = decompose(&markdown);
1847 assert!(result.is_ok());
1848 }
1849}