1use std::collections::HashMap;
85
86pub const BODY_FIELD: &str = "body";
88
89#[derive(Debug, Clone)]
91pub struct ParsedDocument {
92 fields: HashMap<String, serde_yaml::Value>,
93}
94
95impl ParsedDocument {
96 pub fn new(fields: HashMap<String, serde_yaml::Value>) -> Self {
98 Self { fields }
99 }
100
101 pub fn body(&self) -> Option<&str> {
103 self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104 }
105
106 pub fn get_field(&self, name: &str) -> Option<&serde_yaml::Value> {
108 self.fields.get(name)
109 }
110
111 pub fn fields(&self) -> &HashMap<String, serde_yaml::Value> {
113 &self.fields
114 }
115}
116
117#[derive(Debug)]
118struct MetadataBlock {
119 start: usize, end: usize, yaml_content: String,
122 tag: Option<String>, }
124
125fn is_valid_tag_name(name: &str) -> bool {
127 if name.is_empty() {
128 return false;
129 }
130
131 let mut chars = name.chars();
132 let first = chars.next().unwrap();
133
134 if !first.is_ascii_lowercase() && first != '_' {
135 return false;
136 }
137
138 for ch in chars {
139 if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
140 return false;
141 }
142 }
143
144 true
145}
146
147fn find_metadata_blocks(
149 markdown: &str,
150) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
151 let mut blocks = Vec::new();
152 let mut pos = 0;
153
154 while pos < markdown.len() {
155 let search_str = &markdown[pos..];
157 let delimiter_result = if let Some(p) = search_str.find("---\n") {
158 Some((p, 4, "\n"))
159 } else if let Some(p) = search_str.find("---\r\n") {
160 Some((p, 5, "\r\n"))
161 } else {
162 None
163 };
164
165 if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
166 let abs_pos = pos + delimiter_pos;
167 let content_start = abs_pos + delimiter_len; let followed_by_blank = if content_start < markdown.len() {
171 markdown[content_start..].starts_with('\n')
172 || markdown[content_start..].starts_with("\r\n")
173 } else {
174 false
175 };
176
177 if followed_by_blank {
178 pos = abs_pos + 3; continue;
181 }
182
183 let rest = &markdown[content_start..];
186
187 let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
189 let closing_with_newline = closing_patterns
190 .iter()
191 .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
192 .min_by_key(|(p, _)| *p);
193
194 let closing_at_eof = ["\n---", "\r\n---"]
196 .iter()
197 .filter_map(|delim| {
198 rest.find(delim).and_then(|p| {
199 if p + delim.len() == rest.len() {
200 Some((p, delim.len()))
201 } else {
202 None
203 }
204 })
205 })
206 .min_by_key(|(p, _)| *p);
207
208 let closing_result = match (closing_with_newline, closing_at_eof) {
209 (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
210 (Some(_), Some(_)) => closing_with_newline,
211 (Some(_), None) => closing_with_newline,
212 (None, Some(_)) => closing_at_eof,
213 (None, None) => None,
214 };
215
216 if let Some((closing_pos, closing_len)) = closing_result {
217 let abs_closing_pos = content_start + closing_pos;
218 let content = &markdown[content_start..abs_closing_pos];
219
220 if content.contains("\n\n") || content.contains("\r\n\r\n") {
222 if abs_pos == 0 {
224 return Err("Frontmatter started but not closed with ---".into());
226 }
227 pos = abs_pos + 3;
229 continue;
230 }
231
232 let (tag, yaml_content) = if content.starts_with('!') {
234 if let Some(newline_pos) = content.find(|c| c == '\n' || c == '\r') {
235 let tag_line = &content[1..newline_pos];
236 let yaml_start = if content[newline_pos..].starts_with("\r\n") {
238 newline_pos + 2
239 } else {
240 newline_pos + 1
241 };
242 let yaml = if yaml_start < content.len() {
243 &content[yaml_start..]
244 } else {
245 ""
246 };
247 (Some(tag_line.trim().to_string()), yaml.to_string())
248 } else {
249 (Some(content[1..].trim().to_string()), String::new())
251 }
252 } else {
253 (None, content.to_string())
254 };
255
256 if let Some(ref tag_name) = tag {
258 if !is_valid_tag_name(tag_name) {
259 return Err(format!(
260 "Invalid tag name '{}': must match pattern [a-z_][a-z0-9_]*",
261 tag_name
262 )
263 .into());
264 }
265 if tag_name == BODY_FIELD {
266 return Err(format!(
267 "Cannot use reserved field name '{}' as tag directive",
268 BODY_FIELD
269 )
270 .into());
271 }
272 }
273
274 blocks.push(MetadataBlock {
275 start: abs_pos,
276 end: abs_closing_pos + closing_len, yaml_content,
278 tag,
279 });
280
281 pos = abs_closing_pos + closing_len;
282 } else if abs_pos == 0 {
283 return Err("Frontmatter started but not closed with ---".into());
285 } else {
286 pos = abs_pos + 3;
288 }
289 } else {
290 break;
291 }
292 }
293
294 Ok(blocks)
295}
296
297pub fn decompose(
299 markdown: &str,
300) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
301 let mut fields = HashMap::new();
302
303 let blocks = find_metadata_blocks(markdown)?;
305
306 if blocks.is_empty() {
307 fields.insert(
309 BODY_FIELD.to_string(),
310 serde_yaml::Value::String(markdown.to_string()),
311 );
312 return Ok(ParsedDocument::new(fields));
313 }
314
315 let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
317 let mut has_global_frontmatter = false;
318 let mut global_frontmatter_index: Option<usize> = None;
319
320 for (idx, block) in blocks.iter().enumerate() {
322 if block.tag.is_none() {
323 if has_global_frontmatter {
324 return Err(
325 "Multiple global frontmatter blocks found: only one untagged block allowed"
326 .into(),
327 );
328 }
329 has_global_frontmatter = true;
330 global_frontmatter_index = Some(idx);
331 }
332 }
333
334 if let Some(idx) = global_frontmatter_index {
336 let block = &blocks[idx];
337
338 let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
340 HashMap::new()
341 } else {
342 serde_yaml::from_str(&block.yaml_content)
343 .map_err(|e| format!("Invalid YAML frontmatter: {}", e))?
344 };
345
346 for other_block in &blocks {
348 if let Some(ref tag) = other_block.tag {
349 if yaml_fields.contains_key(tag) {
350 return Err(format!(
351 "Name collision: global field '{}' conflicts with tagged attribute",
352 tag
353 )
354 .into());
355 }
356 }
357 }
358
359 fields.extend(yaml_fields);
360 }
361
362 for (idx, block) in blocks.iter().enumerate() {
364 if let Some(ref tag_name) = block.tag {
365 if fields.contains_key(tag_name) {
367 return Err(format!(
368 "Name collision: tagged attribute '{}' conflicts with global field",
369 tag_name
370 )
371 .into());
372 }
373
374 let mut item_fields: HashMap<String, serde_yaml::Value> =
376 if block.yaml_content.is_empty() {
377 HashMap::new()
378 } else {
379 serde_yaml::from_str(&block.yaml_content).map_err(|e| {
380 format!("Invalid YAML in tagged block '{}': {}", tag_name, e)
381 })?
382 };
383
384 let body_start = block.end;
386 let body_end = if idx + 1 < blocks.len() {
387 blocks[idx + 1].start
388 } else {
389 markdown.len()
390 };
391 let body = &markdown[body_start..body_end];
392
393 item_fields.insert(
395 BODY_FIELD.to_string(),
396 serde_yaml::Value::String(body.to_string()),
397 );
398
399 let item_value = serde_yaml::to_value(item_fields)?;
401
402 tagged_attributes
404 .entry(tag_name.clone())
405 .or_insert_with(Vec::new)
406 .push(item_value);
407 }
408 }
409
410 let (body_start, body_end) = if let Some(idx) = global_frontmatter_index {
412 let start = blocks[idx].end;
414
415 let end = blocks
417 .iter()
418 .skip(idx + 1)
419 .find(|b| b.tag.is_some())
420 .map(|b| b.start)
421 .unwrap_or(markdown.len());
422
423 (start, end)
424 } else {
425 let end = blocks
427 .iter()
428 .find(|b| b.tag.is_some())
429 .map(|b| b.start)
430 .unwrap_or(0);
431
432 (0, end)
433 };
434
435 let global_body = &markdown[body_start..body_end];
436
437 fields.insert(
438 BODY_FIELD.to_string(),
439 serde_yaml::Value::String(global_body.to_string()),
440 );
441
442 for (tag_name, items) in tagged_attributes {
444 fields.insert(tag_name, serde_yaml::Value::Sequence(items));
445 }
446
447 Ok(ParsedDocument::new(fields))
448}
449
450#[cfg(test)]
451mod tests {
452 use super::*;
453
454 #[test]
455 fn test_no_frontmatter() {
456 let markdown = "# Hello World\n\nThis is a test.";
457 let doc = decompose(markdown).unwrap();
458
459 assert_eq!(doc.body(), Some(markdown));
460 assert_eq!(doc.fields().len(), 1);
461 }
462
463 #[test]
464 fn test_with_frontmatter() {
465 let markdown = r#"---
466title: Test Document
467author: Test Author
468---
469
470# Hello World
471
472This is the body."#;
473
474 let doc = decompose(markdown).unwrap();
475
476 assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
477 assert_eq!(
478 doc.get_field("title").unwrap().as_str().unwrap(),
479 "Test Document"
480 );
481 assert_eq!(
482 doc.get_field("author").unwrap().as_str().unwrap(),
483 "Test Author"
484 );
485 assert_eq!(doc.fields().len(), 3); }
487
488 #[test]
489 fn test_complex_yaml_frontmatter() {
490 let markdown = r#"---
491title: Complex Document
492tags:
493 - test
494 - yaml
495metadata:
496 version: 1.0
497 nested:
498 field: value
499---
500
501Content here."#;
502
503 let doc = decompose(markdown).unwrap();
504
505 assert_eq!(doc.body(), Some("\nContent here."));
506 assert_eq!(
507 doc.get_field("title").unwrap().as_str().unwrap(),
508 "Complex Document"
509 );
510
511 let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
512 assert_eq!(tags.len(), 2);
513 assert_eq!(tags[0].as_str().unwrap(), "test");
514 assert_eq!(tags[1].as_str().unwrap(), "yaml");
515 }
516
517 #[test]
518 fn test_invalid_yaml() {
519 let markdown = r#"---
520title: [invalid yaml
521author: missing close bracket
522---
523
524Content here."#;
525
526 let result = decompose(markdown);
527 assert!(result.is_err());
528 assert!(result
529 .unwrap_err()
530 .to_string()
531 .contains("Invalid YAML frontmatter"));
532 }
533
534 #[test]
535 fn test_unclosed_frontmatter() {
536 let markdown = r#"---
537title: Test
538author: Test Author
539
540Content without closing ---"#;
541
542 let result = decompose(markdown);
543 assert!(result.is_err());
544 assert!(result.unwrap_err().to_string().contains("not closed"));
545 }
546
547 #[test]
550 fn test_basic_tagged_block() {
551 let markdown = r#"---
552title: Main Document
553---
554
555Main body content.
556
557---
558!items
559name: Item 1
560---
561
562Body of item 1."#;
563
564 let doc = decompose(markdown).unwrap();
565
566 assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
567 assert_eq!(
568 doc.get_field("title").unwrap().as_str().unwrap(),
569 "Main Document"
570 );
571
572 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
573 assert_eq!(items.len(), 1);
574
575 let item = items[0].as_mapping().unwrap();
576 assert_eq!(
577 item.get(&serde_yaml::Value::String("name".to_string()))
578 .unwrap()
579 .as_str()
580 .unwrap(),
581 "Item 1"
582 );
583 assert_eq!(
584 item.get(&serde_yaml::Value::String("body".to_string()))
585 .unwrap()
586 .as_str()
587 .unwrap(),
588 "\nBody of item 1."
589 );
590 }
591
592 #[test]
593 fn test_multiple_tagged_blocks() {
594 let markdown = r#"---
595!items
596name: Item 1
597tags: [a, b]
598---
599
600First item body.
601
602---
603!items
604name: Item 2
605tags: [c, d]
606---
607
608Second item body."#;
609
610 let doc = decompose(markdown).unwrap();
611
612 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
613 assert_eq!(items.len(), 2);
614
615 let item1 = items[0].as_mapping().unwrap();
616 assert_eq!(
617 item1
618 .get(&serde_yaml::Value::String("name".to_string()))
619 .unwrap()
620 .as_str()
621 .unwrap(),
622 "Item 1"
623 );
624
625 let item2 = items[1].as_mapping().unwrap();
626 assert_eq!(
627 item2
628 .get(&serde_yaml::Value::String("name".to_string()))
629 .unwrap()
630 .as_str()
631 .unwrap(),
632 "Item 2"
633 );
634 }
635
636 #[test]
637 fn test_mixed_global_and_tagged() {
638 let markdown = r#"---
639title: Global
640author: John Doe
641---
642
643Global body.
644
645---
646!sections
647title: Section 1
648---
649
650Section 1 content.
651
652---
653!sections
654title: Section 2
655---
656
657Section 2 content."#;
658
659 let doc = decompose(markdown).unwrap();
660
661 assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
662 assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
663
664 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
665 assert_eq!(sections.len(), 2);
666 }
667
668 #[test]
669 fn test_empty_tagged_metadata() {
670 let markdown = r#"---
671!items
672---
673
674Body without metadata."#;
675
676 let doc = decompose(markdown).unwrap();
677
678 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
679 assert_eq!(items.len(), 1);
680
681 let item = items[0].as_mapping().unwrap();
682 assert_eq!(
683 item.get(&serde_yaml::Value::String("body".to_string()))
684 .unwrap()
685 .as_str()
686 .unwrap(),
687 "\nBody without metadata."
688 );
689 }
690
691 #[test]
692 fn test_tagged_block_without_body() {
693 let markdown = r#"---
694!items
695name: Item
696---"#;
697
698 let doc = decompose(markdown).unwrap();
699
700 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
701 assert_eq!(items.len(), 1);
702
703 let item = items[0].as_mapping().unwrap();
704 assert_eq!(
705 item.get(&serde_yaml::Value::String("body".to_string()))
706 .unwrap()
707 .as_str()
708 .unwrap(),
709 ""
710 );
711 }
712
713 #[test]
714 fn test_name_collision_global_and_tagged() {
715 let markdown = r#"---
716items: "global value"
717---
718
719Body
720
721---
722!items
723name: Item
724---
725
726Item body"#;
727
728 let result = decompose(markdown);
729 assert!(result.is_err());
730 assert!(result.unwrap_err().to_string().contains("collision"));
731 }
732
733 #[test]
734 fn test_reserved_field_name() {
735 let markdown = r#"---
736!body
737content: Test
738---"#;
739
740 let result = decompose(markdown);
741 assert!(result.is_err());
742 assert!(result.unwrap_err().to_string().contains("reserved"));
743 }
744
745 #[test]
746 fn test_invalid_tag_syntax() {
747 let markdown = r#"---
748!Invalid-Name
749title: Test
750---"#;
751
752 let result = decompose(markdown);
753 assert!(result.is_err());
754 assert!(result.unwrap_err().to_string().contains("Invalid tag name"));
755 }
756
757 #[test]
758 fn test_multiple_global_frontmatter_blocks() {
759 let markdown = r#"---
760title: First
761---
762
763Body
764
765---
766author: Second
767---
768
769More body"#;
770
771 let result = decompose(markdown);
772 assert!(result.is_err());
773 assert!(result
774 .unwrap_err()
775 .to_string()
776 .contains("Multiple global frontmatter"));
777 }
778
779 #[test]
780 fn test_adjacent_blocks_different_tags() {
781 let markdown = r#"---
782!items
783name: Item 1
784---
785
786Item 1 body
787
788---
789!sections
790title: Section 1
791---
792
793Section 1 body"#;
794
795 let doc = decompose(markdown).unwrap();
796
797 assert!(doc.get_field("items").is_some());
798 assert!(doc.get_field("sections").is_some());
799
800 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
801 assert_eq!(items.len(), 1);
802
803 let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
804 assert_eq!(sections.len(), 1);
805 }
806
807 #[test]
808 fn test_order_preservation() {
809 let markdown = r#"---
810!items
811id: 1
812---
813
814First
815
816---
817!items
818id: 2
819---
820
821Second
822
823---
824!items
825id: 3
826---
827
828Third"#;
829
830 let doc = decompose(markdown).unwrap();
831
832 let items = doc.get_field("items").unwrap().as_sequence().unwrap();
833 assert_eq!(items.len(), 3);
834
835 for (i, item) in items.iter().enumerate() {
836 let mapping = item.as_mapping().unwrap();
837 let id = mapping
838 .get(&serde_yaml::Value::String("id".to_string()))
839 .unwrap()
840 .as_i64()
841 .unwrap();
842 assert_eq!(id, (i + 1) as i64);
843 }
844 }
845
846 #[test]
847 fn test_product_catalog_integration() {
848 let markdown = r#"---
849title: Product Catalog
850author: John Doe
851date: 2024-01-01
852---
853
854This is the main catalog description.
855
856---
857!products
858name: Widget A
859price: 19.99
860sku: WID-001
861---
862
863The **Widget A** is our most popular product.
864
865---
866!products
867name: Gadget B
868price: 29.99
869sku: GAD-002
870---
871
872The **Gadget B** is perfect for professionals.
873
874---
875!reviews
876product: Widget A
877rating: 5
878---
879
880"Excellent product! Highly recommended."
881
882---
883!reviews
884product: Gadget B
885rating: 4
886---
887
888"Very good, but a bit pricey.""#;
889
890 let doc = decompose(markdown).unwrap();
891
892 assert_eq!(
894 doc.get_field("title").unwrap().as_str().unwrap(),
895 "Product Catalog"
896 );
897 assert_eq!(
898 doc.get_field("author").unwrap().as_str().unwrap(),
899 "John Doe"
900 );
901 assert_eq!(
902 doc.get_field("date").unwrap().as_str().unwrap(),
903 "2024-01-01"
904 );
905
906 assert!(doc.body().unwrap().contains("main catalog description"));
908
909 let products = doc.get_field("products").unwrap().as_sequence().unwrap();
911 assert_eq!(products.len(), 2);
912
913 let product1 = products[0].as_mapping().unwrap();
914 assert_eq!(
915 product1
916 .get(&serde_yaml::Value::String("name".to_string()))
917 .unwrap()
918 .as_str()
919 .unwrap(),
920 "Widget A"
921 );
922 assert_eq!(
923 product1
924 .get(&serde_yaml::Value::String("price".to_string()))
925 .unwrap()
926 .as_f64()
927 .unwrap(),
928 19.99
929 );
930
931 let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
933 assert_eq!(reviews.len(), 2);
934
935 let review1 = reviews[0].as_mapping().unwrap();
936 assert_eq!(
937 review1
938 .get(&serde_yaml::Value::String("product".to_string()))
939 .unwrap()
940 .as_str()
941 .unwrap(),
942 "Widget A"
943 );
944 assert_eq!(
945 review1
946 .get(&serde_yaml::Value::String("rating".to_string()))
947 .unwrap()
948 .as_i64()
949 .unwrap(),
950 5
951 );
952
953 assert_eq!(doc.fields().len(), 6);
955 }
956}
957#[cfg(test)]
958mod demo_file_test {
959 use super::*;
960
961 #[test]
962 fn test_extended_metadata_demo_file() {
963 let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
964 let doc = decompose(markdown).unwrap();
965
966 assert_eq!(
968 doc.get_field("title").unwrap().as_str().unwrap(),
969 "Extended Metadata Demo"
970 );
971 assert_eq!(
972 doc.get_field("author").unwrap().as_str().unwrap(),
973 "Quillmark Team"
974 );
975 assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
977
978 assert!(doc
980 .body()
981 .unwrap()
982 .contains("extended YAML metadata standard"));
983
984 let features = doc.get_field("features").unwrap().as_sequence().unwrap();
986 assert_eq!(features.len(), 3);
987
988 let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
990 assert_eq!(use_cases.len(), 2);
991
992 let feature1 = features[0].as_mapping().unwrap();
994 assert_eq!(
995 feature1
996 .get(&serde_yaml::Value::String("name".to_string()))
997 .unwrap()
998 .as_str()
999 .unwrap(),
1000 "Tag Directives"
1001 );
1002 }
1003}