1use std::collections::HashMap;
50
51use crate::guillemet::{preprocess_guillemets, preprocess_markdown_guillemets};
52use crate::value::QuillValue;
53
54pub const BODY_FIELD: &str = "body";
56
57fn preprocess_yaml_guillemets(value: serde_yaml::Value) -> serde_yaml::Value {
63 match value {
64 serde_yaml::Value::String(s) => serde_yaml::Value::String(preprocess_guillemets(&s)),
65 serde_yaml::Value::Sequence(seq) => {
66 serde_yaml::Value::Sequence(seq.into_iter().map(preprocess_yaml_guillemets).collect())
67 }
68 serde_yaml::Value::Mapping(map) => {
69 let new_map: serde_yaml::Mapping = map
70 .into_iter()
71 .map(|(k, v)| (k, preprocess_yaml_guillemets(v)))
72 .collect();
73 serde_yaml::Value::Mapping(new_map)
74 }
75 other => other,
77 }
78}
79
80pub const QUILL_TAG: &str = "quill";
82
83#[derive(Debug, Clone)]
85pub struct ParsedDocument {
86 fields: HashMap<String, QuillValue>,
87 quill_tag: String,
88}
89
90impl ParsedDocument {
91 pub fn new(fields: HashMap<String, QuillValue>) -> Self {
93 Self {
94 fields,
95 quill_tag: "__default__".to_string(),
96 }
97 }
98
99 pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
101 Self { fields, quill_tag }
102 }
103
104 pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
106 decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
107 }
108
109 pub fn quill_tag(&self) -> &str {
111 &self.quill_tag
112 }
113
114 pub fn body(&self) -> Option<&str> {
116 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
117 }
118
119 pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
121 self.fields.get(name)
122 }
123
124 pub fn fields(&self) -> &HashMap<String, QuillValue> {
126 &self.fields
127 }
128
129 pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
143 let mut fields = self.fields.clone();
144
145 for (field_name, default_value) in defaults {
146 if !fields.contains_key(field_name) {
148 fields.insert(field_name.clone(), default_value.clone());
149 }
150 }
151
152 Self {
153 fields,
154 quill_tag: self.quill_tag.clone(),
155 }
156 }
157
158 pub fn with_coercion(&self, schema: &QuillValue) -> Self {
176 use crate::schema::coerce_document;
177
178 let coerced_fields = coerce_document(schema, &self.fields);
179
180 Self {
181 fields: coerced_fields,
182 quill_tag: self.quill_tag.clone(),
183 }
184 }
185}
186
187#[derive(Debug)]
188struct MetadataBlock {
189 start: usize, end: usize, yaml_value: Option<serde_yaml::Value>, tag: Option<String>, quill_name: Option<String>, }
195
196fn is_valid_tag_name(name: &str) -> bool {
198 if name.is_empty() {
199 return false;
200 }
201
202 let mut chars = name.chars();
203 let first = chars.next().unwrap();
204
205 if !first.is_ascii_lowercase() && first != '_' {
206 return false;
207 }
208
209 for ch in chars {
210 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
211 return false;
212 }
213 }
214
215 true
216}
217
218fn find_metadata_blocks(
220 markdown: &str,
221) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
222 let mut blocks = Vec::new();
223 let mut pos = 0;
224
225 while pos < markdown.len() {
226 let search_str = &markdown[pos..];
228 let delimiter_result = if let Some(p) = search_str.find("---\n") {
229 Some((p, 4, "\n"))
230 } else if let Some(p) = search_str.find("---\r\n") {
231 Some((p, 5, "\r\n"))
232 } else {
233 None
234 };
235
236 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
237 let abs_pos = pos + delimiter_pos;
238
239 let is_start_of_line = if abs_pos == 0 {
241 true
242 } else {
243 let char_before = markdown.as_bytes()[abs_pos - 1];
244 char_before == b'\n' || char_before == b'\r'
245 };
246
247 if !is_start_of_line {
248 pos = abs_pos + 1;
249 continue;
250 }
251
252 let content_start = abs_pos + delimiter_len; let preceded_by_blank = if abs_pos > 0 {
256 let before = &markdown[..abs_pos];
258 before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
259 } else {
260 false
261 };
262
263 let followed_by_blank = if content_start < markdown.len() {
264 markdown[content_start..].starts_with('\n')
265 || markdown[content_start..].starts_with("\r\n")
266 } else {
267 false
268 };
269
270 if preceded_by_blank && followed_by_blank {
272 pos = abs_pos + 3; continue;
275 }
276
277 if followed_by_blank {
280 pos = abs_pos + 3;
283 continue;
284 }
285
286 let rest = &markdown[content_start..];
289
290 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
292 let closing_with_newline = closing_patterns
293 .iter()
294 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
295 .min_by_key(|(p, _)| *p);
296
297 let closing_at_eof = ["\n---", "\r\n---"]
299 .iter()
300 .filter_map(|delim| {
301 rest.find(delim).and_then(|p| {
302 if p + delim.len() == rest.len() {
303 Some((p, delim.len()))
304 } else {
305 None
306 }
307 })
308 })
309 .min_by_key(|(p, _)| *p);
310
311 let closing_result = match (closing_with_newline, closing_at_eof) {
312 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
313 (Some(_), Some(_)) => closing_with_newline,
314 (Some(_), None) => closing_with_newline,
315 (None, Some(_)) => closing_at_eof,
316 (None, None) => None,
317 };
318
319 if let Some((closing_pos, closing_len)) = closing_result {
320 let abs_closing_pos = content_start + closing_pos;
321 let content = &markdown[content_start..abs_closing_pos];
322
323 if content.len() > crate::error::MAX_YAML_SIZE {
325 return Err(format!(
326 "YAML block too large: {} bytes (max: {} bytes)",
327 content.len(),
328 crate::error::MAX_YAML_SIZE
329 )
330 .into());
331 }
332
333 let (tag, quill_name, yaml_value) = if !content.is_empty() {
336 match serde_yaml::from_str::<serde_yaml::Value>(content) {
338 Ok(parsed_yaml) => {
339 if let Some(mapping) = parsed_yaml.as_mapping() {
340 let quill_key = serde_yaml::Value::String("QUILL".to_string());
341 let scope_key = serde_yaml::Value::String("SCOPE".to_string());
342
343 let has_quill = mapping.contains_key(&quill_key);
344 let has_scope = mapping.contains_key(&scope_key);
345
346 if has_quill && has_scope {
347 return Err(
348 "Cannot specify both QUILL and SCOPE in the same block"
349 .into(),
350 );
351 }
352
353 if has_quill {
354 let quill_value = mapping.get(&quill_key).unwrap();
356 let quill_name_str = quill_value
357 .as_str()
358 .ok_or_else(|| "QUILL value must be a string")?;
359
360 if !is_valid_tag_name(quill_name_str) {
361 return Err(format!(
362 "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
363 quill_name_str
364 )
365 .into());
366 }
367
368 let mut new_mapping = mapping.clone();
370 new_mapping.remove(&quill_key);
371 let new_value = if new_mapping.is_empty() {
372 None
373 } else {
374 Some(serde_yaml::Value::Mapping(new_mapping))
375 };
376
377 (None, Some(quill_name_str.to_string()), new_value)
378 } else if has_scope {
379 let scope_value = mapping.get(&scope_key).unwrap();
381 let field_name = scope_value
382 .as_str()
383 .ok_or_else(|| "SCOPE value must be a string")?;
384
385 if !is_valid_tag_name(field_name) {
386 return Err(format!(
387 "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
388 field_name
389 )
390 .into());
391 }
392
393 if field_name == BODY_FIELD {
394 return Err(format!(
395 "Cannot use reserved field name '{}' as SCOPE value",
396 BODY_FIELD
397 )
398 .into());
399 }
400
401 let mut new_mapping = mapping.clone();
403 new_mapping.remove(&scope_key);
404 let new_value = if new_mapping.is_empty() {
405 None
406 } else {
407 Some(serde_yaml::Value::Mapping(new_mapping))
408 };
409
410 (Some(field_name.to_string()), None, new_value)
411 } else {
412 (None, None, Some(parsed_yaml))
414 }
415 } else {
416 (None, None, Some(parsed_yaml))
418 }
419 }
420 Err(e) => {
421 return Err(format!("Invalid YAML frontmatter: {}", e).into());
423 }
424 }
425 } else {
426 (None, None, None)
428 };
429
430 blocks.push(MetadataBlock {
431 start: abs_pos,
432 end: abs_closing_pos + closing_len, yaml_value,
434 tag,
435 quill_name,
436 });
437
438 pos = abs_closing_pos + closing_len;
439 } else if abs_pos == 0 {
440 return Err("Frontmatter started but not closed with ---".into());
442 } else {
443 pos = abs_pos + 3;
445 }
446 } else {
447 break;
448 }
449 }
450
451 Ok(blocks)
452}
453
454fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
456 if markdown.len() > crate::error::MAX_INPUT_SIZE {
458 return Err(format!(
459 "Input too large: {} bytes (max: {} bytes)",
460 markdown.len(),
461 crate::error::MAX_INPUT_SIZE
462 )
463 .into());
464 }
465
466 let mut fields = HashMap::new();
467
468 let blocks = find_metadata_blocks(markdown)?;
470
471 if blocks.is_empty() {
472 let preprocessed_body = preprocess_markdown_guillemets(markdown);
475 fields.insert(
476 BODY_FIELD.to_string(),
477 QuillValue::from_json(serde_json::Value::String(preprocessed_body)),
478 );
479 return Ok(ParsedDocument::new(fields));
480 }
481
482 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
484 let mut has_global_frontmatter = false;
485 let mut global_frontmatter_index: Option<usize> = None;
486 let mut quill_name: Option<String> = None;
487
488 for (idx, block) in blocks.iter().enumerate() {
490 if let Some(ref name) = block.quill_name {
492 if quill_name.is_some() {
493 return Err("Multiple quill directives found: only one allowed".into());
494 }
495 quill_name = Some(name.clone());
496 }
497
498 if block.tag.is_none() && block.quill_name.is_none() {
500 if has_global_frontmatter {
501 return Err(
502 "Multiple global frontmatter blocks found: only one untagged block allowed"
503 .into(),
504 );
505 }
506 has_global_frontmatter = true;
507 global_frontmatter_index = Some(idx);
508 }
509 }
510
511 if let Some(idx) = global_frontmatter_index {
513 let block = &blocks[idx];
514
515 let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
517 Some(serde_yaml::Value::Mapping(mapping)) => mapping
518 .iter()
519 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
520 .collect(),
521 Some(serde_yaml::Value::Null) => {
522 HashMap::new()
524 }
525 Some(_) => {
526 return Err("Invalid YAML frontmatter: expected a mapping".into());
528 }
529 None => HashMap::new(),
530 };
531
532 for other_block in &blocks {
535 if let Some(ref tag) = other_block.tag {
536 if let Some(global_value) = yaml_fields.get(tag) {
537 if global_value.as_sequence().is_none() {
539 return Err(format!(
540 "Name collision: global field '{}' conflicts with tagged attribute",
541 tag
542 )
543 .into());
544 }
545 }
546 }
547 }
548
549 for (key, value) in yaml_fields {
552 let preprocessed = preprocess_yaml_guillemets(value);
553 fields.insert(key, QuillValue::from_yaml(preprocessed)?);
554 }
555 }
556
557 for block in &blocks {
559 if block.quill_name.is_some() {
560 if let Some(ref yaml_val) = block.yaml_value {
562 let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
563 serde_yaml::Value::Mapping(mapping) => mapping
564 .iter()
565 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
566 .collect(),
567 serde_yaml::Value::Null => {
568 HashMap::new()
570 }
571 _ => {
572 return Err("Invalid YAML in quill block: expected a mapping".into());
573 }
574 };
575
576 for key in yaml_fields.keys() {
578 if fields.contains_key(key) {
579 return Err(format!(
580 "Name collision: quill block field '{}' conflicts with existing field",
581 key
582 )
583 .into());
584 }
585 }
586
587 for (key, value) in yaml_fields {
590 let preprocessed = preprocess_yaml_guillemets(value);
591 fields.insert(key, QuillValue::from_yaml(preprocessed)?);
592 }
593 }
594 }
595 }
596
597 for (idx, block) in blocks.iter().enumerate() {
599 if let Some(ref tag_name) = block.tag {
600 if let Some(existing_value) = fields.get(tag_name) {
603 if existing_value.as_array().is_none() {
604 return Err(format!(
605 "Name collision: tagged attribute '{}' conflicts with global field",
606 tag_name
607 )
608 .into());
609 }
610 }
611
612 let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
614 Some(serde_yaml::Value::Mapping(mapping)) => mapping
615 .iter()
616 .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
617 .collect(),
618 Some(serde_yaml::Value::Null) => {
619 HashMap::new()
621 }
622 Some(_) => {
623 return Err(format!(
624 "Invalid YAML in tagged block '{}': expected a mapping",
625 tag_name
626 )
627 .into());
628 }
629 None => HashMap::new(),
630 };
631
632 let body_start = block.end;
634 let body_end = if idx + 1 < blocks.len() {
635 blocks[idx + 1].start
636 } else {
637 markdown.len()
638 };
639 let body = &markdown[body_start..body_end];
640
641 let preprocessed_body = preprocess_markdown_guillemets(body);
643
644 item_fields.insert(
646 BODY_FIELD.to_string(),
647 serde_yaml::Value::String(preprocessed_body),
648 );
649
650 let preprocessed_fields: HashMap<String, serde_yaml::Value> = item_fields
652 .into_iter()
653 .map(|(k, v)| (k, preprocess_yaml_guillemets(v)))
654 .collect();
655
656 let item_value = serde_yaml::to_value(preprocessed_fields)?;
658
659 tagged_attributes
661 .entry(tag_name.clone())
662 .or_insert_with(Vec::new)
663 .push(item_value);
664 }
665 }
666
667 let first_non_scope_block_idx = blocks
671 .iter()
672 .position(|b| b.tag.is_none() && b.quill_name.is_none())
673 .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
674
675 let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
676 let start = blocks[idx].end;
678
679 let end = blocks
681 .iter()
682 .skip(idx + 1)
683 .find(|b| b.tag.is_some())
684 .map(|b| b.start)
685 .unwrap_or(markdown.len());
686
687 (start, end)
688 } else {
689 let end = blocks
691 .iter()
692 .find(|b| b.tag.is_some())
693 .map(|b| b.start)
694 .unwrap_or(0);
695
696 (0, end)
697 };
698
699 let global_body = &markdown[body_start..body_end];
700
701 let preprocessed_global_body = preprocess_markdown_guillemets(global_body);
703
704 fields.insert(
705 BODY_FIELD.to_string(),
706 QuillValue::from_json(serde_json::Value::String(preprocessed_global_body)),
707 );
708
709 for (tag_name, items) in tagged_attributes {
712 if let Some(existing_value) = fields.get(&tag_name) {
713 if let Some(existing_array) = existing_value.as_array() {
715 let new_items_json: Vec<serde_json::Value> = items
718 .into_iter()
719 .map(|yaml_val| {
720 serde_json::to_value(&yaml_val)
721 .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
722 })
723 .collect::<Result<Vec<_>, _>>()?;
724
725 let mut merged_array = existing_array.clone();
727 merged_array.extend(new_items_json);
728
729 let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
731 fields.insert(tag_name, quill_value);
732 } else {
733 return Err(format!(
735 "Internal error: field '{}' exists but is not an array",
736 tag_name
737 )
738 .into());
739 }
740 } else {
741 let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
744 fields.insert(tag_name, quill_value);
745 }
746 }
747
748 let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
749 let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
750
751 Ok(parsed)
752}
753
754#[cfg(test)]
755mod tests {
756 use super::*;
757
758 #[test]
759 fn test_no_frontmatter() {
760 let markdown = "# Hello World\n\nThis is a test.";
761 let doc = decompose(markdown).unwrap();
762
763 assert_eq!(doc.body(), Some(markdown));
764 assert_eq!(doc.fields().len(), 1);
765 assert_eq!(doc.quill_tag(), "__default__");
767 }
768
769 #[test]
770 fn test_with_frontmatter() {
771 let markdown = r#"---
772title: Test Document
773author: Test Author
774---
775
776# Hello World
777
778This is the body."#;
779
780 let doc = decompose(markdown).unwrap();
781
782 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
783 assert_eq!(
784 doc.get_field("title").unwrap().as_str().unwrap(),
785 "Test Document"
786 );
787 assert_eq!(
788 doc.get_field("author").unwrap().as_str().unwrap(),
789 "Test Author"
790 );
791 assert_eq!(doc.fields().len(), 3); assert_eq!(doc.quill_tag(), "__default__");
794 }
795
796 #[test]
797 fn test_complex_yaml_frontmatter() {
798 let markdown = r#"---
799title: Complex Document
800tags:
801 - test
802 - yaml
803metadata:
804 version: 1.0
805 nested:
806 field: value
807---
808
809Content here."#;
810
811 let doc = decompose(markdown).unwrap();
812
813 assert_eq!(doc.body(), Some("\nContent here."));
814 assert_eq!(
815 doc.get_field("title").unwrap().as_str().unwrap(),
816 "Complex Document"
817 );
818
819 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
820 assert_eq!(tags.len(), 2);
821 assert_eq!(tags[0].as_str().unwrap(), "test");
822 assert_eq!(tags[1].as_str().unwrap(), "yaml");
823 }
824
825 #[test]
826 fn test_with_defaults_empty_document() {
827 use std::collections::HashMap;
828
829 let mut defaults = HashMap::new();
830 defaults.insert(
831 "status".to_string(),
832 QuillValue::from_json(serde_json::json!("draft")),
833 );
834 defaults.insert(
835 "version".to_string(),
836 QuillValue::from_json(serde_json::json!(1)),
837 );
838
839 let doc = ParsedDocument::new(HashMap::new());
841 let doc_with_defaults = doc.with_defaults(&defaults);
842
843 assert_eq!(
845 doc_with_defaults
846 .get_field("status")
847 .unwrap()
848 .as_str()
849 .unwrap(),
850 "draft"
851 );
852 assert_eq!(
853 doc_with_defaults
854 .get_field("version")
855 .unwrap()
856 .as_number()
857 .unwrap()
858 .as_i64()
859 .unwrap(),
860 1
861 );
862 }
863
864 #[test]
865 fn test_with_defaults_preserves_existing_values() {
866 use std::collections::HashMap;
867
868 let mut defaults = HashMap::new();
869 defaults.insert(
870 "status".to_string(),
871 QuillValue::from_json(serde_json::json!("draft")),
872 );
873
874 let mut fields = HashMap::new();
876 fields.insert(
877 "status".to_string(),
878 QuillValue::from_json(serde_json::json!("published")),
879 );
880 let doc = ParsedDocument::new(fields);
881
882 let doc_with_defaults = doc.with_defaults(&defaults);
883
884 assert_eq!(
886 doc_with_defaults
887 .get_field("status")
888 .unwrap()
889 .as_str()
890 .unwrap(),
891 "published"
892 );
893 }
894
895 #[test]
896 fn test_with_defaults_partial_application() {
897 use std::collections::HashMap;
898
899 let mut defaults = HashMap::new();
900 defaults.insert(
901 "status".to_string(),
902 QuillValue::from_json(serde_json::json!("draft")),
903 );
904 defaults.insert(
905 "version".to_string(),
906 QuillValue::from_json(serde_json::json!(1)),
907 );
908
909 let mut fields = HashMap::new();
911 fields.insert(
912 "status".to_string(),
913 QuillValue::from_json(serde_json::json!("published")),
914 );
915 let doc = ParsedDocument::new(fields);
916
917 let doc_with_defaults = doc.with_defaults(&defaults);
918
919 assert_eq!(
921 doc_with_defaults
922 .get_field("status")
923 .unwrap()
924 .as_str()
925 .unwrap(),
926 "published"
927 );
928 assert_eq!(
929 doc_with_defaults
930 .get_field("version")
931 .unwrap()
932 .as_number()
933 .unwrap()
934 .as_i64()
935 .unwrap(),
936 1
937 );
938 }
939
940 #[test]
941 fn test_with_defaults_no_defaults() {
942 use std::collections::HashMap;
943
944 let defaults = HashMap::new(); let doc = ParsedDocument::new(HashMap::new());
947 let doc_with_defaults = doc.with_defaults(&defaults);
948
949 assert!(doc_with_defaults.fields().is_empty());
951 }
952
953 #[test]
954 fn test_with_defaults_complex_types() {
955 use std::collections::HashMap;
956
957 let mut defaults = HashMap::new();
958 defaults.insert(
959 "tags".to_string(),
960 QuillValue::from_json(serde_json::json!(["default", "tag"])),
961 );
962
963 let doc = ParsedDocument::new(HashMap::new());
964 let doc_with_defaults = doc.with_defaults(&defaults);
965
966 let tags = doc_with_defaults
968 .get_field("tags")
969 .unwrap()
970 .as_sequence()
971 .unwrap();
972 assert_eq!(tags.len(), 2);
973 assert_eq!(tags[0].as_str().unwrap(), "default");
974 assert_eq!(tags[1].as_str().unwrap(), "tag");
975 }
976
977 #[test]
978 fn test_with_coercion_singular_to_array() {
979 use std::collections::HashMap;
980
981 let schema = QuillValue::from_json(serde_json::json!({
982 "$schema": "https://json-schema.org/draft/2019-09/schema",
983 "type": "object",
984 "properties": {
985 "tags": {"type": "array"}
986 }
987 }));
988
989 let mut fields = HashMap::new();
990 fields.insert(
991 "tags".to_string(),
992 QuillValue::from_json(serde_json::json!("single-tag")),
993 );
994 let doc = ParsedDocument::new(fields);
995
996 let coerced_doc = doc.with_coercion(&schema);
997
998 let tags = coerced_doc.get_field("tags").unwrap();
999 assert!(tags.as_array().is_some());
1000 let tags_array = tags.as_array().unwrap();
1001 assert_eq!(tags_array.len(), 1);
1002 assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
1003 }
1004
1005 #[test]
1006 fn test_with_coercion_string_to_boolean() {
1007 use std::collections::HashMap;
1008
1009 let schema = QuillValue::from_json(serde_json::json!({
1010 "$schema": "https://json-schema.org/draft/2019-09/schema",
1011 "type": "object",
1012 "properties": {
1013 "active": {"type": "boolean"}
1014 }
1015 }));
1016
1017 let mut fields = HashMap::new();
1018 fields.insert(
1019 "active".to_string(),
1020 QuillValue::from_json(serde_json::json!("true")),
1021 );
1022 let doc = ParsedDocument::new(fields);
1023
1024 let coerced_doc = doc.with_coercion(&schema);
1025
1026 assert_eq!(
1027 coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
1028 true
1029 );
1030 }
1031
1032 #[test]
1033 fn test_with_coercion_string_to_number() {
1034 use std::collections::HashMap;
1035
1036 let schema = QuillValue::from_json(serde_json::json!({
1037 "$schema": "https://json-schema.org/draft/2019-09/schema",
1038 "type": "object",
1039 "properties": {
1040 "count": {"type": "number"}
1041 }
1042 }));
1043
1044 let mut fields = HashMap::new();
1045 fields.insert(
1046 "count".to_string(),
1047 QuillValue::from_json(serde_json::json!("42")),
1048 );
1049 let doc = ParsedDocument::new(fields);
1050
1051 let coerced_doc = doc.with_coercion(&schema);
1052
1053 assert_eq!(
1054 coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1055 42
1056 );
1057 }
1058
1059 #[test]
1060 fn test_invalid_yaml() {
1061 let markdown = r#"---
1062title: [invalid yaml
1063author: missing close bracket
1064---
1065
1066Content here."#;
1067
1068 let result = decompose(markdown);
1069 assert!(result.is_err());
1070 assert!(result
1071 .unwrap_err()
1072 .to_string()
1073 .contains("Invalid YAML frontmatter"));
1074 }
1075
1076 #[test]
1077 fn test_unclosed_frontmatter() {
1078 let markdown = r#"---
1079title: Test
1080author: Test Author
1081
1082Content without closing ---"#;
1083
1084 let result = decompose(markdown);
1085 assert!(result.is_err());
1086 assert!(result.unwrap_err().to_string().contains("not closed"));
1087 }
1088
1089 #[test]
1092 fn test_basic_tagged_block() {
1093 let markdown = r#"---
1094title: Main Document
1095---
1096
1097Main body content.
1098
1099---
1100SCOPE: items
1101name: Item 1
1102---
1103
1104Body of item 1."#;
1105
1106 let doc = decompose(markdown).unwrap();
1107
1108 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1109 assert_eq!(
1110 doc.get_field("title").unwrap().as_str().unwrap(),
1111 "Main Document"
1112 );
1113
1114 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1115 assert_eq!(items.len(), 1);
1116
1117 let item = items[0].as_object().unwrap();
1118 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1119 assert_eq!(
1120 item.get("body").unwrap().as_str().unwrap(),
1121 "\nBody of item 1."
1122 );
1123 }
1124
1125 #[test]
1126 fn test_multiple_tagged_blocks() {
1127 let markdown = r#"---
1128SCOPE: items
1129name: Item 1
1130tags: [a, b]
1131---
1132
1133First item body.
1134
1135---
1136SCOPE: items
1137name: Item 2
1138tags: [c, d]
1139---
1140
1141Second item body."#;
1142
1143 let doc = decompose(markdown).unwrap();
1144
1145 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1146 assert_eq!(items.len(), 2);
1147
1148 let item1 = items[0].as_object().unwrap();
1149 assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1150
1151 let item2 = items[1].as_object().unwrap();
1152 assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1153 }
1154
1155 #[test]
1156 fn test_mixed_global_and_tagged() {
1157 let markdown = r#"---
1158title: Global
1159author: John Doe
1160---
1161
1162Global body.
1163
1164---
1165SCOPE: sections
1166title: Section 1
1167---
1168
1169Section 1 content.
1170
1171---
1172SCOPE: sections
1173title: Section 2
1174---
1175
1176Section 2 content."#;
1177
1178 let doc = decompose(markdown).unwrap();
1179
1180 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1181 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1182
1183 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1184 assert_eq!(sections.len(), 2);
1185 }
1186
1187 #[test]
1188 fn test_empty_tagged_metadata() {
1189 let markdown = r#"---
1190SCOPE: items
1191---
1192
1193Body without metadata."#;
1194
1195 let doc = decompose(markdown).unwrap();
1196
1197 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1198 assert_eq!(items.len(), 1);
1199
1200 let item = items[0].as_object().unwrap();
1201 assert_eq!(
1202 item.get("body").unwrap().as_str().unwrap(),
1203 "\nBody without metadata."
1204 );
1205 }
1206
1207 #[test]
1208 fn test_tagged_block_without_body() {
1209 let markdown = r#"---
1210SCOPE: items
1211name: Item
1212---"#;
1213
1214 let doc = decompose(markdown).unwrap();
1215
1216 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1217 assert_eq!(items.len(), 1);
1218
1219 let item = items[0].as_object().unwrap();
1220 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1221 }
1222
1223 #[test]
1224 fn test_name_collision_global_and_tagged() {
1225 let markdown = r#"---
1226items: "global value"
1227---
1228
1229Body
1230
1231---
1232SCOPE: items
1233name: Item
1234---
1235
1236Item body"#;
1237
1238 let result = decompose(markdown);
1239 assert!(result.is_err());
1240 assert!(result.unwrap_err().to_string().contains("collision"));
1241 }
1242
1243 #[test]
1244 fn test_global_array_merged_with_scope() {
1245 let markdown = r#"---
1248items:
1249 - name: Global Item 1
1250 value: 100
1251 - name: Global Item 2
1252 value: 200
1253---
1254
1255Global body
1256
1257---
1258SCOPE: items
1259name: Scope Item 1
1260value: 300
1261---
1262
1263Scope item 1 body
1264
1265---
1266SCOPE: items
1267name: Scope Item 2
1268value: 400
1269---
1270
1271Scope item 2 body"#;
1272
1273 let doc = decompose(markdown).unwrap();
1274
1275 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1277 assert_eq!(items.len(), 4);
1278
1279 let item1 = items[0].as_object().unwrap();
1281 assert_eq!(
1282 item1.get("name").unwrap().as_str().unwrap(),
1283 "Global Item 1"
1284 );
1285 assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1286
1287 let item2 = items[1].as_object().unwrap();
1288 assert_eq!(
1289 item2.get("name").unwrap().as_str().unwrap(),
1290 "Global Item 2"
1291 );
1292 assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1293
1294 let item3 = items[2].as_object().unwrap();
1296 assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1297 assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1298 assert_eq!(
1299 item3.get("body").unwrap().as_str().unwrap(),
1300 "\nScope item 1 body\n\n"
1301 );
1302
1303 let item4 = items[3].as_object().unwrap();
1304 assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1305 assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1306 assert_eq!(
1307 item4.get("body").unwrap().as_str().unwrap(),
1308 "\nScope item 2 body"
1309 );
1310 }
1311
1312 #[test]
1313 fn test_empty_global_array_with_scope() {
1314 let markdown = r#"---
1316items: []
1317---
1318
1319Global body
1320
1321---
1322SCOPE: items
1323name: Item 1
1324---
1325
1326Item 1 body"#;
1327
1328 let doc = decompose(markdown).unwrap();
1329
1330 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1331 assert_eq!(items.len(), 1);
1332
1333 let item = items[0].as_object().unwrap();
1334 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1335 }
1336
1337 #[test]
1338 fn test_reserved_field_name() {
1339 let markdown = r#"---
1340SCOPE: body
1341content: Test
1342---"#;
1343
1344 let result = decompose(markdown);
1345 assert!(result.is_err());
1346 assert!(result.unwrap_err().to_string().contains("reserved"));
1347 }
1348
1349 #[test]
1350 fn test_invalid_tag_syntax() {
1351 let markdown = r#"---
1352SCOPE: Invalid-Name
1353title: Test
1354---"#;
1355
1356 let result = decompose(markdown);
1357 assert!(result.is_err());
1358 assert!(result
1359 .unwrap_err()
1360 .to_string()
1361 .contains("Invalid field name"));
1362 }
1363
1364 #[test]
1365 fn test_multiple_global_frontmatter_blocks() {
1366 let markdown = r#"---
1367title: First
1368---
1369
1370Body
1371
1372---
1373author: Second
1374---
1375
1376More body"#;
1377
1378 let result = decompose(markdown);
1379 assert!(result.is_err());
1380 assert!(result
1381 .unwrap_err()
1382 .to_string()
1383 .contains("Multiple global frontmatter"));
1384 }
1385
1386 #[test]
1387 fn test_adjacent_blocks_different_tags() {
1388 let markdown = r#"---
1389SCOPE: items
1390name: Item 1
1391---
1392
1393Item 1 body
1394
1395---
1396SCOPE: sections
1397title: Section 1
1398---
1399
1400Section 1 body"#;
1401
1402 let doc = decompose(markdown).unwrap();
1403
1404 assert!(doc.get_field("items").is_some());
1405 assert!(doc.get_field("sections").is_some());
1406
1407 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1408 assert_eq!(items.len(), 1);
1409
1410 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1411 assert_eq!(sections.len(), 1);
1412 }
1413
1414 #[test]
1415 fn test_order_preservation() {
1416 let markdown = r#"---
1417SCOPE: items
1418id: 1
1419---
1420
1421First
1422
1423---
1424SCOPE: items
1425id: 2
1426---
1427
1428Second
1429
1430---
1431SCOPE: items
1432id: 3
1433---
1434
1435Third"#;
1436
1437 let doc = decompose(markdown).unwrap();
1438
1439 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1440 assert_eq!(items.len(), 3);
1441
1442 for (i, item) in items.iter().enumerate() {
1443 let mapping = item.as_object().unwrap();
1444 let id = mapping.get("id").unwrap().as_i64().unwrap();
1445 assert_eq!(id, (i + 1) as i64);
1446 }
1447 }
1448
1449 #[test]
1450 fn test_product_catalog_integration() {
1451 let markdown = r#"---
1452title: Product Catalog
1453author: John Doe
1454date: 2024-01-01
1455---
1456
1457This is the main catalog description.
1458
1459---
1460SCOPE: products
1461name: Widget A
1462price: 19.99
1463sku: WID-001
1464---
1465
1466The **Widget A** is our most popular product.
1467
1468---
1469SCOPE: products
1470name: Gadget B
1471price: 29.99
1472sku: GAD-002
1473---
1474
1475The **Gadget B** is perfect for professionals.
1476
1477---
1478SCOPE: reviews
1479product: Widget A
1480rating: 5
1481---
1482
1483"Excellent product! Highly recommended."
1484
1485---
1486SCOPE: reviews
1487product: Gadget B
1488rating: 4
1489---
1490
1491"Very good, but a bit pricey.""#;
1492
1493 let doc = decompose(markdown).unwrap();
1494
1495 assert_eq!(
1497 doc.get_field("title").unwrap().as_str().unwrap(),
1498 "Product Catalog"
1499 );
1500 assert_eq!(
1501 doc.get_field("author").unwrap().as_str().unwrap(),
1502 "John Doe"
1503 );
1504 assert_eq!(
1505 doc.get_field("date").unwrap().as_str().unwrap(),
1506 "2024-01-01"
1507 );
1508
1509 assert!(doc.body().unwrap().contains("main catalog description"));
1511
1512 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1514 assert_eq!(products.len(), 2);
1515
1516 let product1 = products[0].as_object().unwrap();
1517 assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1518 assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1519
1520 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1522 assert_eq!(reviews.len(), 2);
1523
1524 let review1 = reviews[0].as_object().unwrap();
1525 assert_eq!(
1526 review1.get("product").unwrap().as_str().unwrap(),
1527 "Widget A"
1528 );
1529 assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1530
1531 assert_eq!(doc.fields().len(), 6);
1533 }
1534
1535 #[test]
1536 fn taro_quill_directive() {
1537 let markdown = r#"---
1538QUILL: usaf_memo
1539memo_for: [ORG/SYMBOL]
1540memo_from: [ORG/SYMBOL]
1541---
1542
1543This is the memo body."#;
1544
1545 let doc = decompose(markdown).unwrap();
1546
1547 assert_eq!(doc.quill_tag(), "usaf_memo");
1549
1550 assert_eq!(
1552 doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1553 .as_str()
1554 .unwrap(),
1555 "ORG/SYMBOL"
1556 );
1557
1558 assert_eq!(doc.body(), Some("\nThis is the memo body."));
1560 }
1561
1562 #[test]
1563 fn test_quill_with_scope_blocks() {
1564 let markdown = r#"---
1565QUILL: document
1566title: Test Document
1567---
1568
1569Main body.
1570
1571---
1572SCOPE: sections
1573name: Section 1
1574---
1575
1576Section 1 body."#;
1577
1578 let doc = decompose(markdown).unwrap();
1579
1580 assert_eq!(doc.quill_tag(), "document");
1582
1583 assert_eq!(
1585 doc.get_field("title").unwrap().as_str().unwrap(),
1586 "Test Document"
1587 );
1588
1589 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1591 assert_eq!(sections.len(), 1);
1592
1593 assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1595 }
1596
1597 #[test]
1598 fn test_multiple_quill_directives_error() {
1599 let markdown = r#"---
1600QUILL: first
1601---
1602
1603---
1604QUILL: second
1605---"#;
1606
1607 let result = decompose(markdown);
1608 assert!(result.is_err());
1609 assert!(result
1610 .unwrap_err()
1611 .to_string()
1612 .contains("Multiple quill directives"));
1613 }
1614
1615 #[test]
1616 fn test_invalid_quill_name() {
1617 let markdown = r#"---
1618QUILL: Invalid-Name
1619---"#;
1620
1621 let result = decompose(markdown);
1622 assert!(result.is_err());
1623 assert!(result
1624 .unwrap_err()
1625 .to_string()
1626 .contains("Invalid quill name"));
1627 }
1628
1629 #[test]
1630 fn test_quill_wrong_value_type() {
1631 let markdown = r#"---
1632QUILL: 123
1633---"#;
1634
1635 let result = decompose(markdown);
1636 assert!(result.is_err());
1637 assert!(result
1638 .unwrap_err()
1639 .to_string()
1640 .contains("QUILL value must be a string"));
1641 }
1642
1643 #[test]
1644 fn test_scope_wrong_value_type() {
1645 let markdown = r#"---
1646SCOPE: 123
1647---"#;
1648
1649 let result = decompose(markdown);
1650 assert!(result.is_err());
1651 assert!(result
1652 .unwrap_err()
1653 .to_string()
1654 .contains("SCOPE value must be a string"));
1655 }
1656
1657 #[test]
1658 fn test_both_quill_and_scope_error() {
1659 let markdown = r#"---
1660QUILL: test
1661SCOPE: items
1662---"#;
1663
1664 let result = decompose(markdown);
1665 assert!(result.is_err());
1666 assert!(result
1667 .unwrap_err()
1668 .to_string()
1669 .contains("Cannot specify both QUILL and SCOPE"));
1670 }
1671
1672 #[test]
1673 fn test_blank_lines_in_frontmatter() {
1674 let markdown = r#"---
1676title: Test Document
1677author: Test Author
1678
1679description: This has a blank line above it
1680tags:
1681 - one
1682 - two
1683---
1684
1685# Hello World
1686
1687This is the body."#;
1688
1689 let doc = decompose(markdown).unwrap();
1690
1691 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1692 assert_eq!(
1693 doc.get_field("title").unwrap().as_str().unwrap(),
1694 "Test Document"
1695 );
1696 assert_eq!(
1697 doc.get_field("author").unwrap().as_str().unwrap(),
1698 "Test Author"
1699 );
1700 assert_eq!(
1701 doc.get_field("description").unwrap().as_str().unwrap(),
1702 "This has a blank line above it"
1703 );
1704
1705 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1706 assert_eq!(tags.len(), 2);
1707 }
1708
1709 #[test]
1710 fn test_blank_lines_in_scope_blocks() {
1711 let markdown = r#"---
1713SCOPE: items
1714name: Item 1
1715
1716price: 19.99
1717
1718tags:
1719 - electronics
1720 - gadgets
1721---
1722
1723Body of item 1."#;
1724
1725 let doc = decompose(markdown).unwrap();
1726
1727 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1728 assert_eq!(items.len(), 1);
1729
1730 let item = items[0].as_object().unwrap();
1731 assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1732 assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1733
1734 let tags = item.get("tags").unwrap().as_array().unwrap();
1735 assert_eq!(tags.len(), 2);
1736 }
1737
1738 #[test]
1739 fn test_horizontal_rule_with_blank_lines_above_and_below() {
1740 let markdown = r#"---
1742title: Test
1743---
1744
1745First paragraph.
1746
1747---
1748
1749Second paragraph."#;
1750
1751 let doc = decompose(markdown).unwrap();
1752
1753 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1754
1755 let body = doc.body().unwrap();
1757 assert!(body.contains("First paragraph."));
1758 assert!(body.contains("---"));
1759 assert!(body.contains("Second paragraph."));
1760 }
1761
1762 #[test]
1763 fn test_horizontal_rule_not_preceded_by_blank() {
1764 let markdown = r#"---
1767title: Test
1768---
1769
1770First paragraph.
1771---
1772
1773Second paragraph."#;
1774
1775 let doc = decompose(markdown).unwrap();
1776
1777 let body = doc.body().unwrap();
1778 assert!(body.contains("---"));
1780 }
1781
1782 #[test]
1783 fn test_multiple_blank_lines_in_yaml() {
1784 let markdown = r#"---
1786title: Test
1787
1788
1789author: John Doe
1790
1791
1792version: 1.0
1793---
1794
1795Body content."#;
1796
1797 let doc = decompose(markdown).unwrap();
1798
1799 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1800 assert_eq!(
1801 doc.get_field("author").unwrap().as_str().unwrap(),
1802 "John Doe"
1803 );
1804 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1805 }
1806
1807 #[test]
1808 fn test_html_comment_interaction() {
1809 let markdown = r#"<!---
1810---> the rest of the page content
1811
1812---
1813key: value
1814---
1815"#;
1816 let doc = decompose(markdown).unwrap();
1817
1818 let key = doc.get_field("key").and_then(|v| v.as_str());
1821 assert_eq!(key, Some("value"));
1822 }
1823}
1824#[cfg(test)]
1825mod demo_file_test {
1826 use super::*;
1827
1828 #[test]
1829 fn test_extended_metadata_demo_file() {
1830 let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1831 let doc = decompose(markdown).unwrap();
1832
1833 assert_eq!(
1835 doc.get_field("title").unwrap().as_str().unwrap(),
1836 "Extended Metadata Demo"
1837 );
1838 assert_eq!(
1839 doc.get_field("author").unwrap().as_str().unwrap(),
1840 "Quillmark Team"
1841 );
1842 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1844
1845 assert!(doc
1847 .body()
1848 .unwrap()
1849 .contains("extended YAML metadata standard"));
1850
1851 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1853 assert_eq!(features.len(), 3);
1854
1855 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1857 assert_eq!(use_cases.len(), 2);
1858
1859 let feature1 = features[0].as_object().unwrap();
1861 assert_eq!(
1862 feature1.get("name").unwrap().as_str().unwrap(),
1863 "Tag Directives"
1864 );
1865 }
1866
1867 #[test]
1868 fn test_input_size_limit() {
1869 let size = crate::error::MAX_INPUT_SIZE + 1;
1871 let large_markdown = "a".repeat(size);
1872
1873 let result = decompose(&large_markdown);
1874 assert!(result.is_err());
1875
1876 let err_msg = result.unwrap_err().to_string();
1877 assert!(err_msg.contains("Input too large"));
1878 }
1879
1880 #[test]
1881 fn test_yaml_size_limit() {
1882 let mut markdown = String::from("---\n");
1884
1885 let size = crate::error::MAX_YAML_SIZE + 1;
1887 markdown.push_str("data: \"");
1888 markdown.push_str(&"x".repeat(size));
1889 markdown.push_str("\"\n---\n\nBody");
1890
1891 let result = decompose(&markdown);
1892 assert!(result.is_err());
1893
1894 let err_msg = result.unwrap_err().to_string();
1895 assert!(err_msg.contains("YAML block too large"));
1896 }
1897
1898 #[test]
1899 fn test_input_within_size_limit() {
1900 let size = 1000; let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1903
1904 let result = decompose(&markdown);
1905 assert!(result.is_ok());
1906 }
1907
1908 #[test]
1909 fn test_yaml_within_size_limit() {
1910 let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1912
1913 let result = decompose(&markdown);
1914 assert!(result.is_ok());
1915 }
1916
1917 #[test]
1919 fn test_guillemet_in_body_no_frontmatter() {
1920 let markdown = "Use <<raw content>> here.";
1921 let doc = decompose(markdown).unwrap();
1922
1923 assert_eq!(doc.body(), Some("Use «raw content» here."));
1925 }
1926
1927 #[test]
1928 fn test_guillemet_in_body_with_frontmatter() {
1929 let markdown = r#"---
1930title: Test
1931---
1932
1933Use <<raw content>> here."#;
1934 let doc = decompose(markdown).unwrap();
1935
1936 assert_eq!(doc.body(), Some("\nUse «raw content» here."));
1938 }
1939
1940 #[test]
1941 fn test_guillemet_in_yaml_string() {
1942 let markdown = r#"---
1943title: Test <<with chevrons>>
1944---
1945
1946Body content."#;
1947 let doc = decompose(markdown).unwrap();
1948
1949 assert_eq!(
1951 doc.get_field("title").unwrap().as_str().unwrap(),
1952 "Test «with chevrons»"
1953 );
1954 }
1955
1956 #[test]
1957 fn test_guillemet_in_yaml_array() {
1958 let markdown = r#"---
1959items:
1960 - "<<first>>"
1961 - "<<second>>"
1962---
1963
1964Body."#;
1965 let doc = decompose(markdown).unwrap();
1966
1967 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1968 assert_eq!(items[0].as_str().unwrap(), "«first»");
1969 assert_eq!(items[1].as_str().unwrap(), "«second»");
1970 }
1971
1972 #[test]
1973 fn test_guillemet_in_yaml_nested() {
1974 let markdown = r#"---
1975metadata:
1976 description: "<<nested value>>"
1977---
1978
1979Body."#;
1980 let doc = decompose(markdown).unwrap();
1981
1982 let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1983 assert_eq!(
1984 metadata.get("description").unwrap().as_str().unwrap(),
1985 "«nested value»"
1986 );
1987 }
1988
1989 #[test]
1990 fn test_guillemet_in_body_skips_code_blocks() {
1991 let markdown = r#"```
1992<<not converted>>
1993```
1994
1995<<converted>>"#;
1996 let doc = decompose(markdown).unwrap();
1997
1998 let body = doc.body().unwrap();
1999 assert!(body.contains("<<not converted>>"));
2001 assert!(body.contains("«converted»"));
2003 }
2004
2005 #[test]
2006 fn test_guillemet_in_body_skips_inline_code() {
2007 let markdown = "`<<not converted>>` and <<converted>>";
2008 let doc = decompose(markdown).unwrap();
2009
2010 let body = doc.body().unwrap();
2011 assert!(body.contains("`<<not converted>>`"));
2013 assert!(body.contains("«converted»"));
2015 }
2016
2017 #[test]
2018 fn test_guillemet_in_tagged_block_body() {
2019 let markdown = r#"---
2020title: Main
2021---
2022
2023Main body.
2024
2025---
2026SCOPE: items
2027name: Item 1
2028---
2029
2030Use <<raw>> here."#;
2031 let doc = decompose(markdown).unwrap();
2032
2033 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2034 let item = items[0].as_object().unwrap();
2035 let item_body = item.get("body").unwrap().as_str().unwrap();
2036 assert!(item_body.contains("«raw»"));
2038 }
2039
2040 #[test]
2041 fn test_guillemet_in_tagged_block_yaml() {
2042 let markdown = r#"---
2043title: Main
2044---
2045
2046Main body.
2047
2048---
2049SCOPE: items
2050description: "<<tagged yaml>>"
2051---
2052
2053Item body."#;
2054 let doc = decompose(markdown).unwrap();
2055
2056 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2057 let item = items[0].as_object().unwrap();
2058 assert_eq!(
2060 item.get("description").unwrap().as_str().unwrap(),
2061 "«tagged yaml»"
2062 );
2063 }
2064
2065 #[test]
2066 fn test_guillemet_not_converted_in_yaml_numbers() {
2067 let markdown = r#"---
2069count: 42
2070---
2071
2072Body."#;
2073 let doc = decompose(markdown).unwrap();
2074 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2075 }
2076
2077 #[test]
2078 fn test_guillemet_not_converted_in_yaml_booleans() {
2079 let markdown = r#"---
2081active: true
2082---
2083
2084Body."#;
2085 let doc = decompose(markdown).unwrap();
2086 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2087 }
2088
2089 #[test]
2090 fn test_guillemet_multiline_not_converted() {
2091 let markdown = "<<text\nacross lines>>";
2093 let doc = decompose(markdown).unwrap();
2094
2095 let body = doc.body().unwrap();
2096 assert!(!body.contains('«'));
2098 assert!(!body.contains('»'));
2099 }
2100
2101 #[test]
2102 fn test_guillemet_unmatched_not_converted() {
2103 let markdown = "<<unmatched";
2104 let doc = decompose(markdown).unwrap();
2105
2106 let body = doc.body().unwrap();
2107 assert_eq!(body, "<<unmatched");
2109 }
2110}
2111
2112#[cfg(test)]
2114mod robustness_tests {
2115 use super::*;
2116
2117 #[test]
2120 fn test_empty_document() {
2121 let doc = decompose("").unwrap();
2122 assert_eq!(doc.body(), Some(""));
2123 assert_eq!(doc.quill_tag(), "__default__");
2124 }
2125
2126 #[test]
2127 fn test_only_whitespace() {
2128 let doc = decompose(" \n\n \t").unwrap();
2129 assert_eq!(doc.body(), Some(" \n\n \t"));
2130 }
2131
2132 #[test]
2133 fn test_only_dashes() {
2134 let result = decompose("---");
2137 assert!(result.is_ok());
2139 assert_eq!(result.unwrap().body(), Some("---"));
2140 }
2141
2142 #[test]
2143 fn test_dashes_in_middle_of_line() {
2144 let markdown = "some text --- more text";
2146 let doc = decompose(markdown).unwrap();
2147 assert_eq!(doc.body(), Some("some text --- more text"));
2148 }
2149
2150 #[test]
2151 fn test_four_dashes() {
2152 let markdown = "----\ntitle: Test\n----\n\nBody";
2154 let doc = decompose(markdown).unwrap();
2155 assert!(doc.body().unwrap().contains("----"));
2157 }
2158
2159 #[test]
2160 fn test_crlf_line_endings() {
2161 let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2163 let doc = decompose(markdown).unwrap();
2164 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2165 assert!(doc.body().unwrap().contains("Body content."));
2166 }
2167
2168 #[test]
2169 fn test_mixed_line_endings() {
2170 let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2172 let doc = decompose(markdown).unwrap();
2173 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2174 }
2175
2176 #[test]
2177 fn test_frontmatter_at_eof_no_trailing_newline() {
2178 let markdown = "---\ntitle: Test\n---";
2180 let doc = decompose(markdown).unwrap();
2181 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2182 assert_eq!(doc.body(), Some(""));
2183 }
2184
2185 #[test]
2186 fn test_empty_frontmatter() {
2187 let markdown = "---\n \n---\n\nBody content.";
2192 let doc = decompose(markdown).unwrap();
2193 assert!(doc.body().unwrap().contains("Body content."));
2194 assert_eq!(doc.fields().len(), 1);
2196 }
2197
2198 #[test]
2199 fn test_whitespace_only_frontmatter() {
2200 let markdown = "---\n \n\n \n---\n\nBody.";
2202 let doc = decompose(markdown).unwrap();
2203 assert!(doc.body().unwrap().contains("Body."));
2204 }
2205
2206 #[test]
2209 fn test_unicode_in_yaml_keys() {
2210 let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2211 let doc = decompose(markdown).unwrap();
2212 assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2213 assert_eq!(
2214 doc.get_field("タイトル").unwrap().as_str().unwrap(),
2215 "こんにちは"
2216 );
2217 }
2218
2219 #[test]
2220 fn test_unicode_in_yaml_values() {
2221 let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2222 let doc = decompose(markdown).unwrap();
2223 assert_eq!(
2224 doc.get_field("title").unwrap().as_str().unwrap(),
2225 "你好世界 🎉"
2226 );
2227 }
2228
2229 #[test]
2230 fn test_unicode_in_body() {
2231 let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2232 let doc = decompose(markdown).unwrap();
2233 assert!(doc.body().unwrap().contains("日本語テキスト"));
2234 assert!(doc.body().unwrap().contains("🚀"));
2235 }
2236
2237 #[test]
2240 fn test_yaml_multiline_string() {
2241 let markdown = r#"---
2242description: |
2243 This is a
2244 multiline string
2245 with preserved newlines.
2246---
2247
2248Body."#;
2249 let doc = decompose(markdown).unwrap();
2250 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2251 assert!(desc.contains("multiline string"));
2252 assert!(desc.contains('\n'));
2253 }
2254
2255 #[test]
2256 fn test_yaml_folded_string() {
2257 let markdown = r#"---
2258description: >
2259 This is a folded
2260 string that becomes
2261 a single line.
2262---
2263
2264Body."#;
2265 let doc = decompose(markdown).unwrap();
2266 let desc = doc.get_field("description").unwrap().as_str().unwrap();
2267 assert!(desc.contains("folded"));
2269 }
2270
2271 #[test]
2272 fn test_yaml_null_value() {
2273 let markdown = "---\noptional: null\n---\n\nBody.";
2274 let doc = decompose(markdown).unwrap();
2275 assert!(doc.get_field("optional").unwrap().is_null());
2276 }
2277
2278 #[test]
2279 fn test_yaml_empty_string_value() {
2280 let markdown = "---\nempty: \"\"\n---\n\nBody.";
2281 let doc = decompose(markdown).unwrap();
2282 assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2283 }
2284
2285 #[test]
2286 fn test_yaml_special_characters_in_string() {
2287 let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2288 let doc = decompose(markdown).unwrap();
2289 assert_eq!(
2290 doc.get_field("special").unwrap().as_str().unwrap(),
2291 "colon: here, and [brackets]"
2292 );
2293 }
2294
2295 #[test]
2296 fn test_yaml_nested_objects() {
2297 let markdown = r#"---
2298config:
2299 database:
2300 host: localhost
2301 port: 5432
2302 cache:
2303 enabled: true
2304---
2305
2306Body."#;
2307 let doc = decompose(markdown).unwrap();
2308 let config = doc.get_field("config").unwrap().as_object().unwrap();
2309 let db = config.get("database").unwrap().as_object().unwrap();
2310 assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2311 assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2312 }
2313
2314 #[test]
2317 fn test_scope_with_empty_body() {
2318 let markdown = r#"---
2319SCOPE: items
2320name: Item
2321---"#;
2322 let doc = decompose(markdown).unwrap();
2323 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2324 assert_eq!(items.len(), 1);
2325 let item = items[0].as_object().unwrap();
2326 assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2327 }
2328
2329 #[test]
2330 fn test_scope_consecutive_blocks() {
2331 let markdown = r#"---
2332SCOPE: a
2333id: 1
2334---
2335---
2336SCOPE: a
2337id: 2
2338---"#;
2339 let doc = decompose(markdown).unwrap();
2340 let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2341 assert_eq!(items.len(), 2);
2342 }
2343
2344 #[test]
2345 fn test_scope_with_body_containing_dashes() {
2346 let markdown = r#"---
2347SCOPE: items
2348name: Item
2349---
2350
2351Some text with --- dashes in it."#;
2352 let doc = decompose(markdown).unwrap();
2353 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2354 let item = items[0].as_object().unwrap();
2355 let body = item.get("body").unwrap().as_str().unwrap();
2356 assert!(body.contains("--- dashes"));
2357 }
2358
2359 #[test]
2362 fn test_quill_with_underscore_prefix() {
2363 let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2364 let doc = decompose(markdown).unwrap();
2365 assert_eq!(doc.quill_tag(), "_internal");
2366 }
2367
2368 #[test]
2369 fn test_quill_with_numbers() {
2370 let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2371 let doc = decompose(markdown).unwrap();
2372 assert_eq!(doc.quill_tag(), "form_8_v2");
2373 }
2374
2375 #[test]
2376 fn test_quill_with_additional_fields() {
2377 let markdown = r#"---
2378QUILL: my_quill
2379title: Document Title
2380author: John Doe
2381---
2382
2383Body content."#;
2384 let doc = decompose(markdown).unwrap();
2385 assert_eq!(doc.quill_tag(), "my_quill");
2386 assert_eq!(
2387 doc.get_field("title").unwrap().as_str().unwrap(),
2388 "Document Title"
2389 );
2390 assert_eq!(
2391 doc.get_field("author").unwrap().as_str().unwrap(),
2392 "John Doe"
2393 );
2394 }
2395
2396 #[test]
2399 fn test_invalid_scope_name_uppercase() {
2400 let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2401 let result = decompose(markdown);
2402 assert!(result.is_err());
2403 assert!(result
2404 .unwrap_err()
2405 .to_string()
2406 .contains("Invalid field name"));
2407 }
2408
2409 #[test]
2410 fn test_invalid_scope_name_starts_with_number() {
2411 let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2412 let result = decompose(markdown);
2413 assert!(result.is_err());
2414 }
2415
2416 #[test]
2417 fn test_invalid_scope_name_with_hyphen() {
2418 let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2419 let result = decompose(markdown);
2420 assert!(result.is_err());
2421 }
2422
2423 #[test]
2424 fn test_invalid_quill_name_uppercase() {
2425 let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2426 let result = decompose(markdown);
2427 assert!(result.is_err());
2428 }
2429
2430 #[test]
2431 fn test_yaml_syntax_error_missing_colon() {
2432 let markdown = "---\ntitle Test\n---\n\nBody.";
2433 let result = decompose(markdown);
2434 assert!(result.is_err());
2435 }
2436
2437 #[test]
2438 fn test_yaml_syntax_error_bad_indentation() {
2439 let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2440 let result = decompose(markdown);
2441 let _ = result;
2444 }
2445
2446 #[test]
2449 fn test_body_with_leading_newlines() {
2450 let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2451 let doc = decompose(markdown).unwrap();
2452 assert!(doc.body().unwrap().starts_with('\n'));
2454 }
2455
2456 #[test]
2457 fn test_body_with_trailing_newlines() {
2458 let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2459 let doc = decompose(markdown).unwrap();
2460 assert!(doc.body().unwrap().ends_with('\n'));
2462 }
2463
2464 #[test]
2465 fn test_no_body_after_frontmatter() {
2466 let markdown = "---\ntitle: Test\n---";
2467 let doc = decompose(markdown).unwrap();
2468 assert_eq!(doc.body(), Some(""));
2469 }
2470
2471 #[test]
2474 fn test_valid_tag_name_single_underscore() {
2475 assert!(is_valid_tag_name("_"));
2476 }
2477
2478 #[test]
2479 fn test_valid_tag_name_underscore_prefix() {
2480 assert!(is_valid_tag_name("_private"));
2481 }
2482
2483 #[test]
2484 fn test_valid_tag_name_with_numbers() {
2485 assert!(is_valid_tag_name("item1"));
2486 assert!(is_valid_tag_name("item_2"));
2487 }
2488
2489 #[test]
2490 fn test_invalid_tag_name_empty() {
2491 assert!(!is_valid_tag_name(""));
2492 }
2493
2494 #[test]
2495 fn test_invalid_tag_name_starts_with_number() {
2496 assert!(!is_valid_tag_name("1item"));
2497 }
2498
2499 #[test]
2500 fn test_invalid_tag_name_uppercase() {
2501 assert!(!is_valid_tag_name("Items"));
2502 assert!(!is_valid_tag_name("ITEMS"));
2503 }
2504
2505 #[test]
2506 fn test_invalid_tag_name_special_chars() {
2507 assert!(!is_valid_tag_name("my-items"));
2508 assert!(!is_valid_tag_name("my.items"));
2509 assert!(!is_valid_tag_name("my items"));
2510 }
2511
2512 #[test]
2515 fn test_guillemet_in_yaml_preserves_non_strings() {
2516 let markdown = r#"---
2517count: 42
2518price: 19.99
2519active: true
2520items:
2521 - first
2522 - 100
2523 - true
2524---
2525
2526Body."#;
2527 let doc = decompose(markdown).unwrap();
2528 assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2529 assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2530 assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2531 }
2532
2533 #[test]
2534 fn test_guillemet_double_conversion_prevention() {
2535 let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2537 let doc = decompose(markdown).unwrap();
2538 assert_eq!(
2540 doc.get_field("title").unwrap().as_str().unwrap(),
2541 "Already «converted»"
2542 );
2543 }
2544}