1use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53pub const BODY_FIELD: &str = "body";
55
56pub const QUILL_TAG: &str = "quill";
58
59#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62 fields: HashMap<String, QuillValue>,
63 quill_tag: String,
64}
65
66impl ParsedDocument {
67 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69 Self {
70 fields,
71 quill_tag: "__default__".to_string(),
72 }
73 }
74
75 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77 Self { fields, quill_tag }
78 }
79
80 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
83 }
84
85 pub fn quill_tag(&self) -> &str {
87 &self.quill_tag
88 }
89
90 pub fn body(&self) -> Option<&str> {
92 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93 }
94
95 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97 self.fields.get(name)
98 }
99
100 pub fn fields(&self) -> &HashMap<String, QuillValue> {
102 &self.fields
103 }
104
105 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119 let mut fields = self.fields.clone();
120
121 for (field_name, default_value) in defaults {
122 if !fields.contains_key(field_name) {
124 fields.insert(field_name.clone(), default_value.clone());
125 }
126 }
127
128 Self {
129 fields,
130 quill_tag: self.quill_tag.clone(),
131 }
132 }
133
134 pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152 use crate::schema::coerce_document;
153
154 let coerced_fields = coerce_document(schema, &self.fields);
155
156 Self {
157 fields: coerced_fields,
158 quill_tag: self.quill_tag.clone(),
159 }
160 }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165 start: usize, end: usize, yaml_value: Option<serde_yaml::Value>, tag: Option<String>, quill_name: Option<String>, }
171
172fn is_valid_tag_name(name: &str) -> bool {
174 if name.is_empty() {
175 return false;
176 }
177
178 let mut chars = name.chars();
179 let first = chars.next().unwrap();
180
181 if !first.is_ascii_lowercase() && first != '_' {
182 return false;
183 }
184
185 for ch in chars {
186 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187 return false;
188 }
189 }
190
191 true
192}
193
194fn find_metadata_blocks(
196 markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198 let mut blocks = Vec::new();
199 let mut pos = 0;
200
201 while pos < markdown.len() {
202 let search_str = &markdown[pos..];
204 let delimiter_result = if let Some(p) = search_str.find("---\n") {
205 Some((p, 4, "\n"))
206 } else if let Some(p) = search_str.find("---\r\n") {
207 Some((p, 5, "\r\n"))
208 } else {
209 None
210 };
211
212 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
213 let abs_pos = pos + delimiter_pos;
214
215 let is_start_of_line = if abs_pos == 0 {
217 true
218 } else {
219 let char_before = markdown.as_bytes()[abs_pos - 1];
220 char_before == b'\n' || char_before == b'\r'
221 };
222
223 if !is_start_of_line {
224 pos = abs_pos + 1;
225 continue;
226 }
227
228 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
232 let before = &markdown[..abs_pos];
234 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
235 } else {
236 false
237 };
238
239 let followed_by_blank = if content_start < markdown.len() {
240 markdown[content_start..].starts_with('\n')
241 || markdown[content_start..].starts_with("\r\n")
242 } else {
243 false
244 };
245
246 if preceded_by_blank && followed_by_blank {
248 pos = abs_pos + 3; continue;
251 }
252
253 if followed_by_blank {
256 pos = abs_pos + 3;
259 continue;
260 }
261
262 let rest = &markdown[content_start..];
265
266 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
268 let closing_with_newline = closing_patterns
269 .iter()
270 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
271 .min_by_key(|(p, _)| *p);
272
273 let closing_at_eof = ["\n---", "\r\n---"]
275 .iter()
276 .filter_map(|delim| {
277 rest.find(delim).and_then(|p| {
278 if p + delim.len() == rest.len() {
279 Some((p, delim.len()))
280 } else {
281 None
282 }
283 })
284 })
285 .min_by_key(|(p, _)| *p);
286
287 let closing_result = match (closing_with_newline, closing_at_eof) {
288 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
289 (Some(_), Some(_)) => closing_with_newline,
290 (Some(_), None) => closing_with_newline,
291 (None, Some(_)) => closing_at_eof,
292 (None, None) => None,
293 };
294
295 if let Some((closing_pos, closing_len)) = closing_result {
296 let abs_closing_pos = content_start + closing_pos;
297 let content = &markdown[content_start..abs_closing_pos];
298
299 if content.len() > crate::error::MAX_YAML_SIZE {
301 return Err(format!(
302 "YAML block too large: {} bytes (max: {} bytes)",
303 content.len(),
304 crate::error::MAX_YAML_SIZE
305 )
306 .into());
307 }
308
309 let (tag, quill_name, yaml_value) = if !content.is_empty() {
312 match serde_yaml::from_str::<serde_yaml::Value>(content) {
314 Ok(parsed_yaml) => {
315 if let Some(mapping) = parsed_yaml.as_mapping() {
316 let quill_key = serde_yaml::Value::String("QUILL".to_string());
317 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
318
319 let has_quill = mapping.contains_key(&quill_key);
320 let has_scope = mapping.contains_key(&scope_key);
321
322 if has_quill && has_scope {
323 return Err(
324 "Cannot specify both QUILL and SCOPE in the same block"
325 .into(),
326 );
327 }
328
329 if has_quill {
330 let quill_value = mapping.get(&quill_key).unwrap();
332 let quill_name_str = quill_value
333 .as_str()
334 .ok_or_else(|| "QUILL value must be a string")?;
335
336 if !is_valid_tag_name(quill_name_str) {
337 return Err(format!(
338 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
339 quill_name_str
340 )
341 .into());
342 }
343
344 let mut new_mapping = mapping.clone();
346 new_mapping.remove(&quill_key);
347 let new_value = if new_mapping.is_empty() {
348 None
349 } else {
350 Some(serde_yaml::Value::Mapping(new_mapping))
351 };
352
353 (None, Some(quill_name_str.to_string()), new_value)
354 } else if has_scope {
355 let scope_value = mapping.get(&scope_key).unwrap();
357 let field_name = scope_value
358 .as_str()
359 .ok_or_else(|| "SCOPE value must be a string")?;
360
361 if !is_valid_tag_name(field_name) {
362 return Err(format!(
363 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
364 field_name
365 )
366 .into());
367 }
368
369 if field_name == BODY_FIELD {
370 return Err(format!(
371 "Cannot use reserved field name '{}' as SCOPE value",
372 BODY_FIELD
373 )
374 .into());
375 }
376
377 let mut new_mapping = mapping.clone();
379 new_mapping.remove(&scope_key);
380 let new_value = if new_mapping.is_empty() {
381 None
382 } else {
383 Some(serde_yaml::Value::Mapping(new_mapping))
384 };
385
386 (Some(field_name.to_string()), None, new_value)
387 } else {
388 (None, None, Some(parsed_yaml))
390 }
391 } else {
392 (None, None, Some(parsed_yaml))
394 }
395 }
396 Err(e) => {
397 return Err(format!("Invalid YAML frontmatter: {}", e).into());
399 }
400 }
401 } else {
402 (None, None, None)
404 };
405
406 blocks.push(MetadataBlock {
407 start: abs_pos,
408 end: abs_closing_pos + closing_len, yaml_value,
410 tag,
411 quill_name,
412 });
413
414 pos = abs_closing_pos + closing_len;
415 } else if abs_pos == 0 {
416 return Err("Frontmatter started but not closed with ---".into());
418 } else {
419 pos = abs_pos + 3;
421 }
422 } else {
423 break;
424 }
425 }
426
427 Ok(blocks)
428}
429
430fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
432 if markdown.len() > crate::error::MAX_INPUT_SIZE {
434 return Err(format!(
435 "Input too large: {} bytes (max: {} bytes)",
436 markdown.len(),
437 crate::error::MAX_INPUT_SIZE
438 )
439 .into());
440 }
441
442 let mut fields = HashMap::new();
443
444 let blocks = find_metadata_blocks(markdown)?;
446
447 if blocks.is_empty() {
448 fields.insert(
450 BODY_FIELD.to_string(),
451 QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
452 );
453 return Ok(ParsedDocument::new(fields));
454 }
455
456 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
458 let mut has_global_frontmatter = false;
459 let mut global_frontmatter_index: Option<usize> = None;
460 let mut quill_name: Option<String> = None;
461
462 for (idx, block) in blocks.iter().enumerate() {
464 if let Some(ref name) = block.quill_name {
466 if quill_name.is_some() {
467 return Err("Multiple quill directives found: only one allowed".into());
468 }
469 quill_name = Some(name.clone());
470 }
471
472 if block.tag.is_none() && block.quill_name.is_none() {
474 if has_global_frontmatter {
475 return Err(
476 "Multiple global frontmatter blocks found: only one untagged block allowed"
477 .into(),
478 );
479 }
480 has_global_frontmatter = true;
481 global_frontmatter_index = Some(idx);
482 }
483 }
484
485 if let Some(idx) = global_frontmatter_index {
487 let block = &blocks[idx];
488
489 let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
491 Some(serde_yaml::Value::Mapping(mapping)) => mapping
492 .iter()
493 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
494 .collect(),
495 Some(serde_yaml::Value::Null) => {
496 HashMap::new()
498 }
499 Some(_) => {
500 return Err("Invalid YAML frontmatter: expected a mapping".into());
502 }
503 None => HashMap::new(),
504 };
505
506 for other_block in &blocks {
509 if let Some(ref tag) = other_block.tag {
510 if let Some(global_value) = yaml_fields.get(tag) {
511 if global_value.as_sequence().is_none() {
513 return Err(format!(
514 "Name collision: global field '{}' conflicts with tagged attribute",
515 tag
516 )
517 .into());
518 }
519 }
520 }
521 }
522
523 for (key, value) in yaml_fields {
525 fields.insert(key, QuillValue::from_yaml(value)?);
526 }
527 }
528
529 for block in &blocks {
531 if block.quill_name.is_some() {
532 if let Some(ref yaml_val) = block.yaml_value {
534 let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
535 serde_yaml::Value::Mapping(mapping) => mapping
536 .iter()
537 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
538 .collect(),
539 serde_yaml::Value::Null => {
540 HashMap::new()
542 }
543 _ => {
544 return Err("Invalid YAML in quill block: expected a mapping".into());
545 }
546 };
547
548 for key in yaml_fields.keys() {
550 if fields.contains_key(key) {
551 return Err(format!(
552 "Name collision: quill block field '{}' conflicts with existing field",
553 key
554 )
555 .into());
556 }
557 }
558
559 for (key, value) in yaml_fields {
561 fields.insert(key, QuillValue::from_yaml(value)?);
562 }
563 }
564 }
565 }
566
567 for (idx, block) in blocks.iter().enumerate() {
569 if let Some(ref tag_name) = block.tag {
570 if let Some(existing_value) = fields.get(tag_name) {
573 if existing_value.as_array().is_none() {
574 return Err(format!(
575 "Name collision: tagged attribute '{}' conflicts with global field",
576 tag_name
577 )
578 .into());
579 }
580 }
581
582 let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
584 Some(serde_yaml::Value::Mapping(mapping)) => mapping
585 .iter()
586 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
587 .collect(),
588 Some(serde_yaml::Value::Null) => {
589 HashMap::new()
591 }
592 Some(_) => {
593 return Err(format!(
594 "Invalid YAML in tagged block '{}': expected a mapping",
595 tag_name
596 )
597 .into());
598 }
599 None => HashMap::new(),
600 };
601
602 let body_start = block.end;
604 let body_end = if idx + 1 < blocks.len() {
605 blocks[idx + 1].start
606 } else {
607 markdown.len()
608 };
609 let body = &markdown[body_start..body_end];
610
611 item_fields.insert(
613 BODY_FIELD.to_string(),
614 serde_yaml::Value::String(body.to_string()),
615 );
616
617 let item_value = serde_yaml::to_value(item_fields)?;
619
620 tagged_attributes
622 .entry(tag_name.clone())
623 .or_insert_with(Vec::new)
624 .push(item_value);
625 }
626 }
627
628 let first_non_scope_block_idx = blocks
632 .iter()
633 .position(|b| b.tag.is_none() && b.quill_name.is_none())
634 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
635
636 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
637 let start = blocks[idx].end;
639
640 let end = blocks
642 .iter()
643 .skip(idx + 1)
644 .find(|b| b.tag.is_some())
645 .map(|b| b.start)
646 .unwrap_or(markdown.len());
647
648 (start, end)
649 } else {
650 let end = blocks
652 .iter()
653 .find(|b| b.tag.is_some())
654 .map(|b| b.start)
655 .unwrap_or(0);
656
657 (0, end)
658 };
659
660 let global_body = &markdown[body_start..body_end];
661
662 fields.insert(
663 BODY_FIELD.to_string(),
664 QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
665 );
666
667 for (tag_name, items) in tagged_attributes {
670 if let Some(existing_value) = fields.get(&tag_name) {
671 if let Some(existing_array) = existing_value.as_array() {
673 let new_items_json: Vec<serde_json::Value> = items
675 .into_iter()
676 .map(|yaml_val| {
677 serde_json::to_value(&yaml_val)
678 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
679 })
680 .collect::<Result<Vec<_>, _>>()?;
681
682 let mut merged_array = existing_array.clone();
684 merged_array.extend(new_items_json);
685
686 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
688 fields.insert(tag_name, quill_value);
689 } else {
690 return Err(format!(
692 "Internal error: field '{}' exists but is not an array",
693 tag_name
694 )
695 .into());
696 }
697 } else {
698 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
701 fields.insert(tag_name, quill_value);
702 }
703 }
704
705 let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
706 let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
707
708 Ok(parsed)
709}
710
711#[cfg(test)]
712mod tests {
713 use super::*;
714
715 #[test]
716 fn test_no_frontmatter() {
717 let markdown = "# Hello World\n\nThis is a test.";
718 let doc = decompose(markdown).unwrap();
719
720 assert_eq!(doc.body(), Some(markdown));
721 assert_eq!(doc.fields().len(), 1);
722 assert_eq!(doc.quill_tag(), "__default__");
724 }
725
726 #[test]
727 fn test_with_frontmatter() {
728 let markdown = r#"---
729title: Test Document
730author: Test Author
731---
732
733# Hello World
734
735This is the body."#;
736
737 let doc = decompose(markdown).unwrap();
738
739 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
740 assert_eq!(
741 doc.get_field("title").unwrap().as_str().unwrap(),
742 "Test Document"
743 );
744 assert_eq!(
745 doc.get_field("author").unwrap().as_str().unwrap(),
746 "Test Author"
747 );
748 assert_eq!(doc.fields().len(), 3); assert_eq!(doc.quill_tag(), "__default__");
751 }
752
753 #[test]
754 fn test_complex_yaml_frontmatter() {
755 let markdown = r#"---
756title: Complex Document
757tags:
758 - test
759 - yaml
760metadata:
761 version: 1.0
762 nested:
763 field: value
764---
765
766Content here."#;
767
768 let doc = decompose(markdown).unwrap();
769
770 assert_eq!(doc.body(), Some("\nContent here."));
771 assert_eq!(
772 doc.get_field("title").unwrap().as_str().unwrap(),
773 "Complex Document"
774 );
775
776 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
777 assert_eq!(tags.len(), 2);
778 assert_eq!(tags[0].as_str().unwrap(), "test");
779 assert_eq!(tags[1].as_str().unwrap(), "yaml");
780 }
781
782 #[test]
783 fn test_with_defaults_empty_document() {
784 use std::collections::HashMap;
785
786 let mut defaults = HashMap::new();
787 defaults.insert(
788 "status".to_string(),
789 QuillValue::from_json(serde_json::json!("draft")),
790 );
791 defaults.insert(
792 "version".to_string(),
793 QuillValue::from_json(serde_json::json!(1)),
794 );
795
796 let doc = ParsedDocument::new(HashMap::new());
798 let doc_with_defaults = doc.with_defaults(&defaults);
799
800 assert_eq!(
802 doc_with_defaults
803 .get_field("status")
804 .unwrap()
805 .as_str()
806 .unwrap(),
807 "draft"
808 );
809 assert_eq!(
810 doc_with_defaults
811 .get_field("version")
812 .unwrap()
813 .as_number()
814 .unwrap()
815 .as_i64()
816 .unwrap(),
817 1
818 );
819 }
820
821 #[test]
822 fn test_with_defaults_preserves_existing_values() {
823 use std::collections::HashMap;
824
825 let mut defaults = HashMap::new();
826 defaults.insert(
827 "status".to_string(),
828 QuillValue::from_json(serde_json::json!("draft")),
829 );
830
831 let mut fields = HashMap::new();
833 fields.insert(
834 "status".to_string(),
835 QuillValue::from_json(serde_json::json!("published")),
836 );
837 let doc = ParsedDocument::new(fields);
838
839 let doc_with_defaults = doc.with_defaults(&defaults);
840
841 assert_eq!(
843 doc_with_defaults
844 .get_field("status")
845 .unwrap()
846 .as_str()
847 .unwrap(),
848 "published"
849 );
850 }
851
852 #[test]
853 fn test_with_defaults_partial_application() {
854 use std::collections::HashMap;
855
856 let mut defaults = HashMap::new();
857 defaults.insert(
858 "status".to_string(),
859 QuillValue::from_json(serde_json::json!("draft")),
860 );
861 defaults.insert(
862 "version".to_string(),
863 QuillValue::from_json(serde_json::json!(1)),
864 );
865
866 let mut fields = HashMap::new();
868 fields.insert(
869 "status".to_string(),
870 QuillValue::from_json(serde_json::json!("published")),
871 );
872 let doc = ParsedDocument::new(fields);
873
874 let doc_with_defaults = doc.with_defaults(&defaults);
875
876 assert_eq!(
878 doc_with_defaults
879 .get_field("status")
880 .unwrap()
881 .as_str()
882 .unwrap(),
883 "published"
884 );
885 assert_eq!(
886 doc_with_defaults
887 .get_field("version")
888 .unwrap()
889 .as_number()
890 .unwrap()
891 .as_i64()
892 .unwrap(),
893 1
894 );
895 }
896
897 #[test]
898 fn test_with_defaults_no_defaults() {
899 use std::collections::HashMap;
900
901 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
904 let doc_with_defaults = doc.with_defaults(&defaults);
905
906 assert!(doc_with_defaults.fields().is_empty());
908 }
909
910 #[test]
911 fn test_with_defaults_complex_types() {
912 use std::collections::HashMap;
913
914 let mut defaults = HashMap::new();
915 defaults.insert(
916 "tags".to_string(),
917 QuillValue::from_json(serde_json::json!(["default", "tag"])),
918 );
919
920 let doc = ParsedDocument::new(HashMap::new());
921 let doc_with_defaults = doc.with_defaults(&defaults);
922
923 let tags = doc_with_defaults
925 .get_field("tags")
926 .unwrap()
927 .as_sequence()
928 .unwrap();
929 assert_eq!(tags.len(), 2);
930 assert_eq!(tags[0].as_str().unwrap(), "default");
931 assert_eq!(tags[1].as_str().unwrap(), "tag");
932 }
933
934 #[test]
935 fn test_with_coercion_singular_to_array() {
936 use std::collections::HashMap;
937
938 let schema = QuillValue::from_json(serde_json::json!({
939 "$schema": "https://json-schema.org/draft/2019-09/schema",
940 "type": "object",
941 "properties": {
942 "tags": {"type": "array"}
943 }
944 }));
945
946 let mut fields = HashMap::new();
947 fields.insert(
948 "tags".to_string(),
949 QuillValue::from_json(serde_json::json!("single-tag")),
950 );
951 let doc = ParsedDocument::new(fields);
952
953 let coerced_doc = doc.with_coercion(&schema);
954
955 let tags = coerced_doc.get_field("tags").unwrap();
956 assert!(tags.as_array().is_some());
957 let tags_array = tags.as_array().unwrap();
958 assert_eq!(tags_array.len(), 1);
959 assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
960 }
961
962 #[test]
963 fn test_with_coercion_string_to_boolean() {
964 use std::collections::HashMap;
965
966 let schema = QuillValue::from_json(serde_json::json!({
967 "$schema": "https://json-schema.org/draft/2019-09/schema",
968 "type": "object",
969 "properties": {
970 "active": {"type": "boolean"}
971 }
972 }));
973
974 let mut fields = HashMap::new();
975 fields.insert(
976 "active".to_string(),
977 QuillValue::from_json(serde_json::json!("true")),
978 );
979 let doc = ParsedDocument::new(fields);
980
981 let coerced_doc = doc.with_coercion(&schema);
982
983 assert_eq!(
984 coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
985 true
986 );
987 }
988
989 #[test]
990 fn test_with_coercion_string_to_number() {
991 use std::collections::HashMap;
992
993 let schema = QuillValue::from_json(serde_json::json!({
994 "$schema": "https://json-schema.org/draft/2019-09/schema",
995 "type": "object",
996 "properties": {
997 "count": {"type": "number"}
998 }
999 }));
1000
1001 let mut fields = HashMap::new();
1002 fields.insert(
1003 "count".to_string(),
1004 QuillValue::from_json(serde_json::json!("42")),
1005 );
1006 let doc = ParsedDocument::new(fields);
1007
1008 let coerced_doc = doc.with_coercion(&schema);
1009
1010 assert_eq!(
1011 coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1012 42
1013 );
1014 }
1015
1016 #[test]
1017 fn test_invalid_yaml() {
1018 let markdown = r#"---
1019title: [invalid yaml
1020author: missing close bracket
1021---
1022
1023Content here."#;
1024
1025 let result = decompose(markdown);
1026 assert!(result.is_err());
1027 assert!(result
1028 .unwrap_err()
1029 .to_string()
1030 .contains("Invalid YAML frontmatter"));
1031 }
1032
1033 #[test]
1034 fn test_unclosed_frontmatter() {
1035 let markdown = r#"---
1036title: Test
1037author: Test Author
1038
1039Content without closing ---"#;
1040
1041 let result = decompose(markdown);
1042 assert!(result.is_err());
1043 assert!(result.unwrap_err().to_string().contains("not closed"));
1044 }
1045
1046 #[test]
1049 fn test_basic_tagged_block() {
1050 let markdown = r#"---
1051title: Main Document
1052---
1053
1054Main body content.
1055
1056---
1057SCOPE: items
1058name: Item 1
1059---
1060
1061Body of item 1."#;
1062
1063 let doc = decompose(markdown).unwrap();
1064
1065 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1066 assert_eq!(
1067 doc.get_field("title").unwrap().as_str().unwrap(),
1068 "Main Document"
1069 );
1070
1071 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1072 assert_eq!(items.len(), 1);
1073
1074 let item = items[0].as_object().unwrap();
1075 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1076 assert_eq!(
1077 item.get("body").unwrap().as_str().unwrap(),
1078 "\nBody of item 1."
1079 );
1080 }
1081
1082 #[test]
1083 fn test_multiple_tagged_blocks() {
1084 let markdown = r#"---
1085SCOPE: items
1086name: Item 1
1087tags: [a, b]
1088---
1089
1090First item body.
1091
1092---
1093SCOPE: items
1094name: Item 2
1095tags: [c, d]
1096---
1097
1098Second item body."#;
1099
1100 let doc = decompose(markdown).unwrap();
1101
1102 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1103 assert_eq!(items.len(), 2);
1104
1105 let item1 = items[0].as_object().unwrap();
1106 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1107
1108 let item2 = items[1].as_object().unwrap();
1109 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1110 }
1111
1112 #[test]
1113 fn test_mixed_global_and_tagged() {
1114 let markdown = r#"---
1115title: Global
1116author: John Doe
1117---
1118
1119Global body.
1120
1121---
1122SCOPE: sections
1123title: Section 1
1124---
1125
1126Section 1 content.
1127
1128---
1129SCOPE: sections
1130title: Section 2
1131---
1132
1133Section 2 content."#;
1134
1135 let doc = decompose(markdown).unwrap();
1136
1137 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1138 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1139
1140 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1141 assert_eq!(sections.len(), 2);
1142 }
1143
1144 #[test]
1145 fn test_empty_tagged_metadata() {
1146 let markdown = r#"---
1147SCOPE: items
1148---
1149
1150Body without metadata."#;
1151
1152 let doc = decompose(markdown).unwrap();
1153
1154 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1155 assert_eq!(items.len(), 1);
1156
1157 let item = items[0].as_object().unwrap();
1158 assert_eq!(
1159 item.get("body").unwrap().as_str().unwrap(),
1160 "\nBody without metadata."
1161 );
1162 }
1163
1164 #[test]
1165 fn test_tagged_block_without_body() {
1166 let markdown = r#"---
1167SCOPE: items
1168name: Item
1169---"#;
1170
1171 let doc = decompose(markdown).unwrap();
1172
1173 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1174 assert_eq!(items.len(), 1);
1175
1176 let item = items[0].as_object().unwrap();
1177 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1178 }
1179
1180 #[test]
1181 fn test_name_collision_global_and_tagged() {
1182 let markdown = r#"---
1183items: "global value"
1184---
1185
1186Body
1187
1188---
1189SCOPE: items
1190name: Item
1191---
1192
1193Item body"#;
1194
1195 let result = decompose(markdown);
1196 assert!(result.is_err());
1197 assert!(result.unwrap_err().to_string().contains("collision"));
1198 }
1199
1200 #[test]
1201 fn test_global_array_merged_with_scope() {
1202 let markdown = r#"---
1205items:
1206 - name: Global Item 1
1207 value: 100
1208 - name: Global Item 2
1209 value: 200
1210---
1211
1212Global body
1213
1214---
1215SCOPE: items
1216name: Scope Item 1
1217value: 300
1218---
1219
1220Scope item 1 body
1221
1222---
1223SCOPE: items
1224name: Scope Item 2
1225value: 400
1226---
1227
1228Scope item 2 body"#;
1229
1230 let doc = decompose(markdown).unwrap();
1231
1232 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1234 assert_eq!(items.len(), 4);
1235
1236 let item1 = items[0].as_object().unwrap();
1238 assert_eq!(
1239 item1.get("name").unwrap().as_str().unwrap(),
1240 "Global Item 1"
1241 );
1242 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1243
1244 let item2 = items[1].as_object().unwrap();
1245 assert_eq!(
1246 item2.get("name").unwrap().as_str().unwrap(),
1247 "Global Item 2"
1248 );
1249 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1250
1251 let item3 = items[2].as_object().unwrap();
1253 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1254 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1255 assert_eq!(
1256 item3.get("body").unwrap().as_str().unwrap(),
1257 "\nScope item 1 body\n\n"
1258 );
1259
1260 let item4 = items[3].as_object().unwrap();
1261 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1262 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1263 assert_eq!(
1264 item4.get("body").unwrap().as_str().unwrap(),
1265 "\nScope item 2 body"
1266 );
1267 }
1268
1269 #[test]
1270 fn test_empty_global_array_with_scope() {
1271 let markdown = r#"---
1273items: []
1274---
1275
1276Global body
1277
1278---
1279SCOPE: items
1280name: Item 1
1281---
1282
1283Item 1 body"#;
1284
1285 let doc = decompose(markdown).unwrap();
1286
1287 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1288 assert_eq!(items.len(), 1);
1289
1290 let item = items[0].as_object().unwrap();
1291 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1292 }
1293
1294 #[test]
1295 fn test_reserved_field_name() {
1296 let markdown = r#"---
1297SCOPE: body
1298content: Test
1299---"#;
1300
1301 let result = decompose(markdown);
1302 assert!(result.is_err());
1303 assert!(result.unwrap_err().to_string().contains("reserved"));
1304 }
1305
1306 #[test]
1307 fn test_invalid_tag_syntax() {
1308 let markdown = r#"---
1309SCOPE: Invalid-Name
1310title: Test
1311---"#;
1312
1313 let result = decompose(markdown);
1314 assert!(result.is_err());
1315 assert!(result
1316 .unwrap_err()
1317 .to_string()
1318 .contains("Invalid field name"));
1319 }
1320
1321 #[test]
1322 fn test_multiple_global_frontmatter_blocks() {
1323 let markdown = r#"---
1324title: First
1325---
1326
1327Body
1328
1329---
1330author: Second
1331---
1332
1333More body"#;
1334
1335 let result = decompose(markdown);
1336 assert!(result.is_err());
1337 assert!(result
1338 .unwrap_err()
1339 .to_string()
1340 .contains("Multiple global frontmatter"));
1341 }
1342
1343 #[test]
1344 fn test_adjacent_blocks_different_tags() {
1345 let markdown = r#"---
1346SCOPE: items
1347name: Item 1
1348---
1349
1350Item 1 body
1351
1352---
1353SCOPE: sections
1354title: Section 1
1355---
1356
1357Section 1 body"#;
1358
1359 let doc = decompose(markdown).unwrap();
1360
1361 assert!(doc.get_field("items").is_some());
1362 assert!(doc.get_field("sections").is_some());
1363
1364 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1365 assert_eq!(items.len(), 1);
1366
1367 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1368 assert_eq!(sections.len(), 1);
1369 }
1370
1371 #[test]
1372 fn test_order_preservation() {
1373 let markdown = r#"---
1374SCOPE: items
1375id: 1
1376---
1377
1378First
1379
1380---
1381SCOPE: items
1382id: 2
1383---
1384
1385Second
1386
1387---
1388SCOPE: items
1389id: 3
1390---
1391
1392Third"#;
1393
1394 let doc = decompose(markdown).unwrap();
1395
1396 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1397 assert_eq!(items.len(), 3);
1398
1399 for (i, item) in items.iter().enumerate() {
1400 let mapping = item.as_object().unwrap();
1401 let id = mapping.get("id").unwrap().as_i64().unwrap();
1402 assert_eq!(id, (i + 1) as i64);
1403 }
1404 }
1405
1406 #[test]
1407 fn test_product_catalog_integration() {
1408 let markdown = r#"---
1409title: Product Catalog
1410author: John Doe
1411date: 2024-01-01
1412---
1413
1414This is the main catalog description.
1415
1416---
1417SCOPE: products
1418name: Widget A
1419price: 19.99
1420sku: WID-001
1421---
1422
1423The **Widget A** is our most popular product.
1424
1425---
1426SCOPE: products
1427name: Gadget B
1428price: 29.99
1429sku: GAD-002
1430---
1431
1432The **Gadget B** is perfect for professionals.
1433
1434---
1435SCOPE: reviews
1436product: Widget A
1437rating: 5
1438---
1439
1440"Excellent product! Highly recommended."
1441
1442---
1443SCOPE: reviews
1444product: Gadget B
1445rating: 4
1446---
1447
1448"Very good, but a bit pricey.""#;
1449
1450 let doc = decompose(markdown).unwrap();
1451
1452 assert_eq!(
1454 doc.get_field("title").unwrap().as_str().unwrap(),
1455 "Product Catalog"
1456 );
1457 assert_eq!(
1458 doc.get_field("author").unwrap().as_str().unwrap(),
1459 "John Doe"
1460 );
1461 assert_eq!(
1462 doc.get_field("date").unwrap().as_str().unwrap(),
1463 "2024-01-01"
1464 );
1465
1466 assert!(doc.body().unwrap().contains("main catalog description"));
1468
1469 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1471 assert_eq!(products.len(), 2);
1472
1473 let product1 = products[0].as_object().unwrap();
1474 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1475 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1476
1477 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1479 assert_eq!(reviews.len(), 2);
1480
1481 let review1 = reviews[0].as_object().unwrap();
1482 assert_eq!(
1483 review1.get("product").unwrap().as_str().unwrap(),
1484 "Widget A"
1485 );
1486 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1487
1488 assert_eq!(doc.fields().len(), 6);
1490 }
1491
1492 #[test]
1493 fn taro_quill_directive() {
1494 let markdown = r#"---
1495QUILL: usaf_memo
1496memo_for: [ORG/SYMBOL]
1497memo_from: [ORG/SYMBOL]
1498---
1499
1500This is the memo body."#;
1501
1502 let doc = decompose(markdown).unwrap();
1503
1504 assert_eq!(doc.quill_tag(), "usaf_memo");
1506
1507 assert_eq!(
1509 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1510 .as_str()
1511 .unwrap(),
1512 "ORG/SYMBOL"
1513 );
1514
1515 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1517 }
1518
1519 #[test]
1520 fn test_quill_with_scope_blocks() {
1521 let markdown = r#"---
1522QUILL: document
1523title: Test Document
1524---
1525
1526Main body.
1527
1528---
1529SCOPE: sections
1530name: Section 1
1531---
1532
1533Section 1 body."#;
1534
1535 let doc = decompose(markdown).unwrap();
1536
1537 assert_eq!(doc.quill_tag(), "document");
1539
1540 assert_eq!(
1542 doc.get_field("title").unwrap().as_str().unwrap(),
1543 "Test Document"
1544 );
1545
1546 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1548 assert_eq!(sections.len(), 1);
1549
1550 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1552 }
1553
1554 #[test]
1555 fn test_multiple_quill_directives_error() {
1556 let markdown = r#"---
1557QUILL: first
1558---
1559
1560---
1561QUILL: second
1562---"#;
1563
1564 let result = decompose(markdown);
1565 assert!(result.is_err());
1566 assert!(result
1567 .unwrap_err()
1568 .to_string()
1569 .contains("Multiple quill directives"));
1570 }
1571
1572 #[test]
1573 fn test_invalid_quill_name() {
1574 let markdown = r#"---
1575QUILL: Invalid-Name
1576---"#;
1577
1578 let result = decompose(markdown);
1579 assert!(result.is_err());
1580 assert!(result
1581 .unwrap_err()
1582 .to_string()
1583 .contains("Invalid quill name"));
1584 }
1585
1586 #[test]
1587 fn test_quill_wrong_value_type() {
1588 let markdown = r#"---
1589QUILL: 123
1590---"#;
1591
1592 let result = decompose(markdown);
1593 assert!(result.is_err());
1594 assert!(result
1595 .unwrap_err()
1596 .to_string()
1597 .contains("QUILL value must be a string"));
1598 }
1599
1600 #[test]
1601 fn test_scope_wrong_value_type() {
1602 let markdown = r#"---
1603SCOPE: 123
1604---"#;
1605
1606 let result = decompose(markdown);
1607 assert!(result.is_err());
1608 assert!(result
1609 .unwrap_err()
1610 .to_string()
1611 .contains("SCOPE value must be a string"));
1612 }
1613
1614 #[test]
1615 fn test_both_quill_and_scope_error() {
1616 let markdown = r#"---
1617QUILL: test
1618SCOPE: items
1619---"#;
1620
1621 let result = decompose(markdown);
1622 assert!(result.is_err());
1623 assert!(result
1624 .unwrap_err()
1625 .to_string()
1626 .contains("Cannot specify both QUILL and SCOPE"));
1627 }
1628
1629 #[test]
1630 fn test_blank_lines_in_frontmatter() {
1631 let markdown = r#"---
1633title: Test Document
1634author: Test Author
1635
1636description: This has a blank line above it
1637tags:
1638 - one
1639 - two
1640---
1641
1642# Hello World
1643
1644This is the body."#;
1645
1646 let doc = decompose(markdown).unwrap();
1647
1648 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1649 assert_eq!(
1650 doc.get_field("title").unwrap().as_str().unwrap(),
1651 "Test Document"
1652 );
1653 assert_eq!(
1654 doc.get_field("author").unwrap().as_str().unwrap(),
1655 "Test Author"
1656 );
1657 assert_eq!(
1658 doc.get_field("description").unwrap().as_str().unwrap(),
1659 "This has a blank line above it"
1660 );
1661
1662 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1663 assert_eq!(tags.len(), 2);
1664 }
1665
1666 #[test]
1667 fn test_blank_lines_in_scope_blocks() {
1668 let markdown = r#"---
1670SCOPE: items
1671name: Item 1
1672
1673price: 19.99
1674
1675tags:
1676 - electronics
1677 - gadgets
1678---
1679
1680Body of item 1."#;
1681
1682 let doc = decompose(markdown).unwrap();
1683
1684 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1685 assert_eq!(items.len(), 1);
1686
1687 let item = items[0].as_object().unwrap();
1688 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1689 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1690
1691 let tags = item.get("tags").unwrap().as_array().unwrap();
1692 assert_eq!(tags.len(), 2);
1693 }
1694
1695 #[test]
1696 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1697 let markdown = r#"---
1699title: Test
1700---
1701
1702First paragraph.
1703
1704---
1705
1706Second paragraph."#;
1707
1708 let doc = decompose(markdown).unwrap();
1709
1710 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1711
1712 let body = doc.body().unwrap();
1714 assert!(body.contains("First paragraph."));
1715 assert!(body.contains("---"));
1716 assert!(body.contains("Second paragraph."));
1717 }
1718
1719 #[test]
1720 fn test_horizontal_rule_not_preceded_by_blank() {
1721 let markdown = r#"---
1724title: Test
1725---
1726
1727First paragraph.
1728---
1729
1730Second paragraph."#;
1731
1732 let doc = decompose(markdown).unwrap();
1733
1734 let body = doc.body().unwrap();
1735 assert!(body.contains("---"));
1737 }
1738
1739 #[test]
1740 fn test_multiple_blank_lines_in_yaml() {
1741 let markdown = r#"---
1743title: Test
1744
1745
1746author: John Doe
1747
1748
1749version: 1.0
1750---
1751
1752Body content."#;
1753
1754 let doc = decompose(markdown).unwrap();
1755
1756 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1757 assert_eq!(
1758 doc.get_field("author").unwrap().as_str().unwrap(),
1759 "John Doe"
1760 );
1761 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1762 }
1763
1764 #[test]
1765 fn test_html_comment_interaction() {
1766 let markdown = r#"<!---
1767---> the rest of the page content
1768
1769---
1770key: value
1771---
1772"#;
1773 let doc = decompose(markdown).unwrap();
1774
1775 let key = doc.get_field("key").and_then(|v| v.as_str());
1778 assert_eq!(key, Some("value"));
1779 }
1780}
1781#[cfg(test)]
1782mod demo_file_test {
1783 use super::*;
1784
1785 #[test]
1786 fn test_extended_metadata_demo_file() {
1787 let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1788 let doc = decompose(markdown).unwrap();
1789
1790 assert_eq!(
1792 doc.get_field("title").unwrap().as_str().unwrap(),
1793 "Extended Metadata Demo"
1794 );
1795 assert_eq!(
1796 doc.get_field("author").unwrap().as_str().unwrap(),
1797 "Quillmark Team"
1798 );
1799 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1801
1802 assert!(doc
1804 .body()
1805 .unwrap()
1806 .contains("extended YAML metadata standard"));
1807
1808 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1810 assert_eq!(features.len(), 3);
1811
1812 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1814 assert_eq!(use_cases.len(), 2);
1815
1816 let feature1 = features[0].as_object().unwrap();
1818 assert_eq!(
1819 feature1.get("name").unwrap().as_str().unwrap(),
1820 "Tag Directives"
1821 );
1822 }
1823
1824 #[test]
1825 fn test_input_size_limit() {
1826 let size = crate::error::MAX_INPUT_SIZE + 1;
1828 let large_markdown = "a".repeat(size);
1829
1830 let result = decompose(&large_markdown);
1831 assert!(result.is_err());
1832
1833 let err_msg = result.unwrap_err().to_string();
1834 assert!(err_msg.contains("Input too large"));
1835 }
1836
1837 #[test]
1838 fn test_yaml_size_limit() {
1839 let mut markdown = String::from("---\n");
1841
1842 let size = crate::error::MAX_YAML_SIZE + 1;
1844 markdown.push_str("data: \"");
1845 markdown.push_str(&"x".repeat(size));
1846 markdown.push_str("\"\n---\n\nBody");
1847
1848 let result = decompose(&markdown);
1849 assert!(result.is_err());
1850
1851 let err_msg = result.unwrap_err().to_string();
1852 assert!(err_msg.contains("YAML block too large"));
1853 }
1854
1855 #[test]
1856 fn test_input_within_size_limit() {
1857 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1860
1861 let result = decompose(&markdown);
1862 assert!(result.is_ok());
1863 }
1864
1865 #[test]
1866 fn test_yaml_within_size_limit() {
1867 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1869
1870 let result = decompose(&markdown);
1871 assert!(result.is_ok());
1872 }
1873
1874 #[test]
1877 fn test_chevrons_preserved_in_body_no_frontmatter() {
1878 let markdown = "Use <<raw content>> here.";
1879 let doc = decompose(markdown).unwrap();
1880
1881 assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1883 }
1884
1885 #[test]
1886 fn test_chevrons_preserved_in_body_with_frontmatter() {
1887 let markdown = r#"---
1888title: Test
1889---
1890
1891Use <<raw content>> here."#;
1892 let doc = decompose(markdown).unwrap();
1893
1894 assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1896 }
1897
1898 #[test]
1899 fn test_chevrons_preserved_in_yaml_string() {
1900 let markdown = r#"---
1901title: Test <<with chevrons>>
1902---
1903
1904Body content."#;
1905 let doc = decompose(markdown).unwrap();
1906
1907 assert_eq!(
1909 doc.get_field("title").unwrap().as_str().unwrap(),
1910 "Test <<with chevrons>>"
1911 );
1912 }
1913
1914 #[test]
1915 fn test_chevrons_preserved_in_yaml_array() {
1916 let markdown = r#"---
1917items:
1918 - "<<first>>"
1919 - "<<second>>"
1920---
1921
1922Body."#;
1923 let doc = decompose(markdown).unwrap();
1924
1925 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1926 assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1927 assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1928 }
1929
1930 #[test]
1931 fn test_chevrons_preserved_in_yaml_nested() {
1932 let markdown = r#"---
1933metadata:
1934 description: "<<nested value>>"
1935---
1936
1937Body."#;
1938 let doc = decompose(markdown).unwrap();
1939
1940 let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1941 assert_eq!(
1942 metadata.get("description").unwrap().as_str().unwrap(),
1943 "<<nested value>>"
1944 );
1945 }
1946
1947 #[test]
1948 fn test_chevrons_preserved_in_code_blocks() {
1949 let markdown = r#"```
1950<<in code block>>
1951```
1952
1953<<outside code block>>"#;
1954 let doc = decompose(markdown).unwrap();
1955
1956 let body = doc.body().unwrap();
1957 assert!(body.contains("<<in code block>>"));
1959 assert!(body.contains("<<outside code block>>"));
1960 }
1961
1962 #[test]
1963 fn test_chevrons_preserved_in_inline_code() {
1964 let markdown = "`<<in inline code>>` and <<outside inline code>>";
1965 let doc = decompose(markdown).unwrap();
1966
1967 let body = doc.body().unwrap();
1968 assert!(body.contains("`<<in inline code>>`"));
1970 assert!(body.contains("<<outside inline code>>"));
1971 }
1972
1973 #[test]
1974 fn test_chevrons_preserved_in_tagged_block_body() {
1975 let markdown = r#"---
1976title: Main
1977---
1978
1979Main body.
1980
1981---
1982SCOPE: items
1983name: Item 1
1984---
1985
1986Use <<raw>> here."#;
1987 let doc = decompose(markdown).unwrap();
1988
1989 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1990 let item = items[0].as_object().unwrap();
1991 let item_body = item.get("body").unwrap().as_str().unwrap();
1992 assert!(item_body.contains("<<raw>>"));
1994 }
1995
1996 #[test]
1997 fn test_chevrons_preserved_in_tagged_block_yaml() {
1998 let markdown = r#"---
1999title: Main
2000---
2001
2002Main body.
2003
2004---
2005SCOPE: items
2006description: "<<tagged yaml>>"
2007---
2008
2009Item body."#;
2010 let doc = decompose(markdown).unwrap();
2011
2012 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2013 let item = items[0].as_object().unwrap();
2014 assert_eq!(
2016 item.get("description").unwrap().as_str().unwrap(),
2017 "<<tagged yaml>>"
2018 );
2019 }
2020
2021 #[test]
2022 fn test_yaml_numbers_not_affected() {
2023 let markdown = r#"---
2025count: 42
2026---
2027
2028Body."#;
2029 let doc = decompose(markdown).unwrap();
2030 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2031 }
2032
2033 #[test]
2034 fn test_yaml_booleans_not_affected() {
2035 let markdown = r#"---
2037active: true
2038---
2039
2040Body."#;
2041 let doc = decompose(markdown).unwrap();
2042 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2043 }
2044
2045 #[test]
2046 fn test_multiline_chevrons_preserved() {
2047 let markdown = "<<text\nacross lines>>";
2049 let doc = decompose(markdown).unwrap();
2050
2051 let body = doc.body().unwrap();
2052 assert!(body.contains("<<text"));
2054 assert!(body.contains("across lines>>"));
2055 }
2056
2057 #[test]
2058 fn test_unmatched_chevrons_preserved() {
2059 let markdown = "<<unmatched";
2060 let doc = decompose(markdown).unwrap();
2061
2062 let body = doc.body().unwrap();
2063 assert_eq!(body, "<<unmatched");
2065 }
2066}
2067
2068#[cfg(test)]
2070mod robustness_tests {
2071 use super::*;
2072
2073 #[test]
2076 fn test_empty_document() {
2077 let doc = decompose("").unwrap();
2078 assert_eq!(doc.body(), Some(""));
2079 assert_eq!(doc.quill_tag(), "__default__");
2080 }
2081
2082 #[test]
2083 fn test_only_whitespace() {
2084 let doc = decompose(" \n\n \t").unwrap();
2085 assert_eq!(doc.body(), Some(" \n\n \t"));
2086 }
2087
2088 #[test]
2089 fn test_only_dashes() {
2090 let result = decompose("---");
2093 assert!(result.is_ok());
2095 assert_eq!(result.unwrap().body(), Some("---"));
2096 }
2097
2098 #[test]
2099 fn test_dashes_in_middle_of_line() {
2100 let markdown = "some text --- more text";
2102 let doc = decompose(markdown).unwrap();
2103 assert_eq!(doc.body(), Some("some text --- more text"));
2104 }
2105
2106 #[test]
2107 fn test_four_dashes() {
2108 let markdown = "----\ntitle: Test\n----\n\nBody";
2110 let doc = decompose(markdown).unwrap();
2111 assert!(doc.body().unwrap().contains("----"));
2113 }
2114
2115 #[test]
2116 fn test_crlf_line_endings() {
2117 let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2119 let doc = decompose(markdown).unwrap();
2120 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2121 assert!(doc.body().unwrap().contains("Body content."));
2122 }
2123
2124 #[test]
2125 fn test_mixed_line_endings() {
2126 let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2128 let doc = decompose(markdown).unwrap();
2129 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2130 }
2131
2132 #[test]
2133 fn test_frontmatter_at_eof_no_trailing_newline() {
2134 let markdown = "---\ntitle: Test\n---";
2136 let doc = decompose(markdown).unwrap();
2137 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2138 assert_eq!(doc.body(), Some(""));
2139 }
2140
2141 #[test]
2142 fn test_empty_frontmatter() {
2143 let markdown = "---\n \n---\n\nBody content.";
2148 let doc = decompose(markdown).unwrap();
2149 assert!(doc.body().unwrap().contains("Body content."));
2150 assert_eq!(doc.fields().len(), 1);
2152 }
2153
2154 #[test]
2155 fn test_whitespace_only_frontmatter() {
2156 let markdown = "---\n \n\n \n---\n\nBody.";
2158 let doc = decompose(markdown).unwrap();
2159 assert!(doc.body().unwrap().contains("Body."));
2160 }
2161
2162 #[test]
2165 fn test_unicode_in_yaml_keys() {
2166 let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2167 let doc = decompose(markdown).unwrap();
2168 assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2169 assert_eq!(
2170 doc.get_field("タイトル").unwrap().as_str().unwrap(),
2171 "こんにちは"
2172 );
2173 }
2174
2175 #[test]
2176 fn test_unicode_in_yaml_values() {
2177 let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2178 let doc = decompose(markdown).unwrap();
2179 assert_eq!(
2180 doc.get_field("title").unwrap().as_str().unwrap(),
2181 "你好世界 🎉"
2182 );
2183 }
2184
2185 #[test]
2186 fn test_unicode_in_body() {
2187 let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2188 let doc = decompose(markdown).unwrap();
2189 assert!(doc.body().unwrap().contains("日本語テキスト"));
2190 assert!(doc.body().unwrap().contains("🚀"));
2191 }
2192
2193 #[test]
2196 fn test_yaml_multiline_string() {
2197 let markdown = r#"---
2198description: |
2199 This is a
2200 multiline string
2201 with preserved newlines.
2202---
2203
2204Body."#;
2205 let doc = decompose(markdown).unwrap();
2206 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2207 assert!(desc.contains("multiline string"));
2208 assert!(desc.contains('\n'));
2209 }
2210
2211 #[test]
2212 fn test_yaml_folded_string() {
2213 let markdown = r#"---
2214description: >
2215 This is a folded
2216 string that becomes
2217 a single line.
2218---
2219
2220Body."#;
2221 let doc = decompose(markdown).unwrap();
2222 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2223 assert!(desc.contains("folded"));
2225 }
2226
2227 #[test]
2228 fn test_yaml_null_value() {
2229 let markdown = "---\noptional: null\n---\n\nBody.";
2230 let doc = decompose(markdown).unwrap();
2231 assert!(doc.get_field("optional").unwrap().is_null());
2232 }
2233
2234 #[test]
2235 fn test_yaml_empty_string_value() {
2236 let markdown = "---\nempty: \"\"\n---\n\nBody.";
2237 let doc = decompose(markdown).unwrap();
2238 assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2239 }
2240
2241 #[test]
2242 fn test_yaml_special_characters_in_string() {
2243 let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2244 let doc = decompose(markdown).unwrap();
2245 assert_eq!(
2246 doc.get_field("special").unwrap().as_str().unwrap(),
2247 "colon: here, and [brackets]"
2248 );
2249 }
2250
2251 #[test]
2252 fn test_yaml_nested_objects() {
2253 let markdown = r#"---
2254config:
2255 database:
2256 host: localhost
2257 port: 5432
2258 cache:
2259 enabled: true
2260---
2261
2262Body."#;
2263 let doc = decompose(markdown).unwrap();
2264 let config = doc.get_field("config").unwrap().as_object().unwrap();
2265 let db = config.get("database").unwrap().as_object().unwrap();
2266 assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2267 assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2268 }
2269
2270 #[test]
2273 fn test_scope_with_empty_body() {
2274 let markdown = r#"---
2275SCOPE: items
2276name: Item
2277---"#;
2278 let doc = decompose(markdown).unwrap();
2279 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2280 assert_eq!(items.len(), 1);
2281 let item = items[0].as_object().unwrap();
2282 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2283 }
2284
2285 #[test]
2286 fn test_scope_consecutive_blocks() {
2287 let markdown = r#"---
2288SCOPE: a
2289id: 1
2290---
2291---
2292SCOPE: a
2293id: 2
2294---"#;
2295 let doc = decompose(markdown).unwrap();
2296 let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2297 assert_eq!(items.len(), 2);
2298 }
2299
2300 #[test]
2301 fn test_scope_with_body_containing_dashes() {
2302 let markdown = r#"---
2303SCOPE: items
2304name: Item
2305---
2306
2307Some text with --- dashes in it."#;
2308 let doc = decompose(markdown).unwrap();
2309 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2310 let item = items[0].as_object().unwrap();
2311 let body = item.get("body").unwrap().as_str().unwrap();
2312 assert!(body.contains("--- dashes"));
2313 }
2314
2315 #[test]
2318 fn test_quill_with_underscore_prefix() {
2319 let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2320 let doc = decompose(markdown).unwrap();
2321 assert_eq!(doc.quill_tag(), "_internal");
2322 }
2323
2324 #[test]
2325 fn test_quill_with_numbers() {
2326 let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2327 let doc = decompose(markdown).unwrap();
2328 assert_eq!(doc.quill_tag(), "form_8_v2");
2329 }
2330
2331 #[test]
2332 fn test_quill_with_additional_fields() {
2333 let markdown = r#"---
2334QUILL: my_quill
2335title: Document Title
2336author: John Doe
2337---
2338
2339Body content."#;
2340 let doc = decompose(markdown).unwrap();
2341 assert_eq!(doc.quill_tag(), "my_quill");
2342 assert_eq!(
2343 doc.get_field("title").unwrap().as_str().unwrap(),
2344 "Document Title"
2345 );
2346 assert_eq!(
2347 doc.get_field("author").unwrap().as_str().unwrap(),
2348 "John Doe"
2349 );
2350 }
2351
2352 #[test]
2355 fn test_invalid_scope_name_uppercase() {
2356 let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2357 let result = decompose(markdown);
2358 assert!(result.is_err());
2359 assert!(result
2360 .unwrap_err()
2361 .to_string()
2362 .contains("Invalid field name"));
2363 }
2364
2365 #[test]
2366 fn test_invalid_scope_name_starts_with_number() {
2367 let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2368 let result = decompose(markdown);
2369 assert!(result.is_err());
2370 }
2371
2372 #[test]
2373 fn test_invalid_scope_name_with_hyphen() {
2374 let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2375 let result = decompose(markdown);
2376 assert!(result.is_err());
2377 }
2378
2379 #[test]
2380 fn test_invalid_quill_name_uppercase() {
2381 let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2382 let result = decompose(markdown);
2383 assert!(result.is_err());
2384 }
2385
2386 #[test]
2387 fn test_yaml_syntax_error_missing_colon() {
2388 let markdown = "---\ntitle Test\n---\n\nBody.";
2389 let result = decompose(markdown);
2390 assert!(result.is_err());
2391 }
2392
2393 #[test]
2394 fn test_yaml_syntax_error_bad_indentation() {
2395 let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2396 let result = decompose(markdown);
2397 let _ = result;
2400 }
2401
2402 #[test]
2405 fn test_body_with_leading_newlines() {
2406 let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2407 let doc = decompose(markdown).unwrap();
2408 assert!(doc.body().unwrap().starts_with('\n'));
2410 }
2411
2412 #[test]
2413 fn test_body_with_trailing_newlines() {
2414 let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2415 let doc = decompose(markdown).unwrap();
2416 assert!(doc.body().unwrap().ends_with('\n'));
2418 }
2419
2420 #[test]
2421 fn test_no_body_after_frontmatter() {
2422 let markdown = "---\ntitle: Test\n---";
2423 let doc = decompose(markdown).unwrap();
2424 assert_eq!(doc.body(), Some(""));
2425 }
2426
2427 #[test]
2430 fn test_valid_tag_name_single_underscore() {
2431 assert!(is_valid_tag_name("_"));
2432 }
2433
2434 #[test]
2435 fn test_valid_tag_name_underscore_prefix() {
2436 assert!(is_valid_tag_name("_private"));
2437 }
2438
2439 #[test]
2440 fn test_valid_tag_name_with_numbers() {
2441 assert!(is_valid_tag_name("item1"));
2442 assert!(is_valid_tag_name("item_2"));
2443 }
2444
2445 #[test]
2446 fn test_invalid_tag_name_empty() {
2447 assert!(!is_valid_tag_name(""));
2448 }
2449
2450 #[test]
2451 fn test_invalid_tag_name_starts_with_number() {
2452 assert!(!is_valid_tag_name("1item"));
2453 }
2454
2455 #[test]
2456 fn test_invalid_tag_name_uppercase() {
2457 assert!(!is_valid_tag_name("Items"));
2458 assert!(!is_valid_tag_name("ITEMS"));
2459 }
2460
2461 #[test]
2462 fn test_invalid_tag_name_special_chars() {
2463 assert!(!is_valid_tag_name("my-items"));
2464 assert!(!is_valid_tag_name("my.items"));
2465 assert!(!is_valid_tag_name("my items"));
2466 }
2467
2468 #[test]
2471 fn test_guillemet_in_yaml_preserves_non_strings() {
2472 let markdown = r#"---
2473count: 42
2474price: 19.99
2475active: true
2476items:
2477 - first
2478 - 100
2479 - true
2480---
2481
2482Body."#;
2483 let doc = decompose(markdown).unwrap();
2484 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2485 assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2486 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2487 }
2488
2489 #[test]
2490 fn test_guillemet_double_conversion_prevention() {
2491 let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2493 let doc = decompose(markdown).unwrap();
2494 assert_eq!(
2496 doc.get_field("title").unwrap().as_str().unwrap(),
2497 "Already «converted»"
2498 );
2499 }
2500}