quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`decompose`] function for parsing markdown documents
8//! and the [`ParsedDocument`] type for accessing parsed content.
9//!
10//! ## Key Types
11//!
12//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
13//! - [`BODY_FIELD`]: Constant for the field name storing document body
14//!
15//! ## Examples
16//!
17//! ### Basic Parsing
18//!
19//! ```
20//! use quillmark_core::decompose;
21//!
22//! let markdown = r#"---
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = decompose(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ### Extended Metadata with Tags
39//!
40//! ```
41//! use quillmark_core::decompose;
42//!
43//! let markdown = r#"---
44//! catalog_title: Product Catalog
45//! ---
46//!
47//! # Products
48//!
49//! ---
50//! !products
51//! name: Widget
52//! price: 19.99
53//! ---
54//!
55//! A versatile widget for all occasions.
56//! "#;
57//!
58//! let doc = decompose(markdown).unwrap();
59//!
60//! // Access tagged collections
61//! if let Some(products) = doc.get_field("products")
62//!     .and_then(|v| v.as_sequence())
63//! {
64//!     for product in products {
65//!         let name = product.get("name").and_then(|v| v.as_str()).unwrap();
66//!         let price = product.get("price").and_then(|v| v.as_f64()).unwrap();
67//!         println!("{}: ${}", name, price);
68//!     }
69//! }
70//! ```
71//!
72//! ## Error Handling
73//!
74//! The [`decompose`] function returns errors for:
75//! - Malformed YAML syntax
76//! - Unclosed frontmatter blocks
77//! - Multiple global frontmatter blocks
78//! - Invalid tag directive syntax
79//! - Reserved field name usage
80//! - Name collisions
81//!
82//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/quillmark-core/docs/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
83
84use std::collections::HashMap;
85
86/// The field name used to store the document body
87pub const BODY_FIELD: &str = "body";
88
89/// A parsed markdown document with frontmatter
90#[derive(Debug, Clone)]
91pub struct ParsedDocument {
92    fields: HashMap<String, serde_yaml::Value>,
93}
94
95impl ParsedDocument {
96    /// Create a new ParsedDocument with the given fields
97    pub fn new(fields: HashMap<String, serde_yaml::Value>) -> Self {
98        Self { fields }
99    }
100
101    /// Get the document body
102    pub fn body(&self) -> Option<&str> {
103        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104    }
105
106    /// Get a specific field
107    pub fn get_field(&self, name: &str) -> Option<&serde_yaml::Value> {
108        self.fields.get(name)
109    }
110
111    /// Get all fields (including body)
112    pub fn fields(&self) -> &HashMap<String, serde_yaml::Value> {
113        &self.fields
114    }
115}
116
117#[derive(Debug)]
118struct MetadataBlock {
119    start: usize, // Position of opening "---"
120    end: usize,   // Position after closing "---\n"
121    yaml_content: String,
122    tag: Option<String>, // Tag directive if present
123}
124
125/// Validate tag name follows pattern [a-z_][a-z0-9_]*
126fn is_valid_tag_name(name: &str) -> bool {
127    if name.is_empty() {
128        return false;
129    }
130
131    let mut chars = name.chars();
132    let first = chars.next().unwrap();
133
134    if !first.is_ascii_lowercase() && first != '_' {
135        return false;
136    }
137
138    for ch in chars {
139        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
140            return false;
141        }
142    }
143
144    true
145}
146
147/// Find all metadata blocks in the document
148fn find_metadata_blocks(
149    markdown: &str,
150) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
151    let mut blocks = Vec::new();
152    let mut pos = 0;
153
154    while pos < markdown.len() {
155        // Look for opening "---\n" or "---\r\n"
156        let search_str = &markdown[pos..];
157        let delimiter_result = if let Some(p) = search_str.find("---\n") {
158            Some((p, 4, "\n"))
159        } else if let Some(p) = search_str.find("---\r\n") {
160            Some((p, 5, "\r\n"))
161        } else {
162            None
163        };
164
165        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
166            let abs_pos = pos + delimiter_pos;
167            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
168
169            // Check if opening --- is followed by a blank line (horizontal rule, not metadata)
170            let followed_by_blank = if content_start < markdown.len() {
171                markdown[content_start..].starts_with('\n')
172                    || markdown[content_start..].starts_with("\r\n")
173            } else {
174                false
175            };
176
177            if followed_by_blank {
178                // This is a horizontal rule in the body, skip it
179                pos = abs_pos + 3; // Skip past "---"
180                continue;
181            }
182
183            // Found potential metadata block opening
184            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
185            let rest = &markdown[content_start..];
186
187            // First try to find delimiters with trailing newlines
188            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
189            let closing_with_newline = closing_patterns
190                .iter()
191                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
192                .min_by_key(|(p, _)| *p);
193
194            // Also check for closing at end of document (no trailing newline)
195            let closing_at_eof = ["\n---", "\r\n---"]
196                .iter()
197                .filter_map(|delim| {
198                    rest.find(delim).and_then(|p| {
199                        if p + delim.len() == rest.len() {
200                            Some((p, delim.len()))
201                        } else {
202                            None
203                        }
204                    })
205                })
206                .min_by_key(|(p, _)| *p);
207
208            let closing_result = match (closing_with_newline, closing_at_eof) {
209                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
210                (Some(_), Some(_)) => closing_with_newline,
211                (Some(_), None) => closing_with_newline,
212                (None, Some(_)) => closing_at_eof,
213                (None, None) => None,
214            };
215
216            if let Some((closing_pos, closing_len)) = closing_result {
217                let abs_closing_pos = content_start + closing_pos;
218                let content = &markdown[content_start..abs_closing_pos];
219
220                // Check YAML size limit
221                if content.len() > crate::error::MAX_YAML_SIZE {
222                    return Err(format!(
223                        "YAML block too large: {} bytes (max: {} bytes)",
224                        content.len(),
225                        crate::error::MAX_YAML_SIZE
226                    )
227                    .into());
228                }
229
230                // Check if the block is contiguous (no blank lines in the YAML content)
231                if content.contains("\n\n") || content.contains("\r\n\r\n") {
232                    // Not a contiguous block
233                    if abs_pos == 0 {
234                        // Started at beginning but has blank lines - this is an error
235                        return Err("Frontmatter started but not closed with ---".into());
236                    }
237                    // Otherwise treat as horizontal rule in body
238                    pos = abs_pos + 3;
239                    continue;
240                }
241
242                // Extract tag directive if present
243                let (tag, yaml_content) = if content.starts_with('!') {
244                    if let Some(newline_pos) = content.find(|c| c == '\n' || c == '\r') {
245                        let tag_line = &content[1..newline_pos];
246                        // Skip newline(s) after tag
247                        let yaml_start = if content[newline_pos..].starts_with("\r\n") {
248                            newline_pos + 2
249                        } else {
250                            newline_pos + 1
251                        };
252                        let yaml = if yaml_start < content.len() {
253                            &content[yaml_start..]
254                        } else {
255                            ""
256                        };
257                        (Some(tag_line.trim().to_string()), yaml.to_string())
258                    } else {
259                        // Tag directive with no YAML content (entire content is just tag)
260                        (Some(content[1..].trim().to_string()), String::new())
261                    }
262                } else {
263                    (None, content.to_string())
264                };
265
266                // Validate tag name if present
267                if let Some(ref tag_name) = tag {
268                    if !is_valid_tag_name(tag_name) {
269                        return Err(format!(
270                            "Invalid tag name '{}': must match pattern [a-z_][a-z0-9_]*",
271                            tag_name
272                        )
273                        .into());
274                    }
275                    if tag_name == BODY_FIELD {
276                        return Err(format!(
277                            "Cannot use reserved field name '{}' as tag directive",
278                            BODY_FIELD
279                        )
280                        .into());
281                    }
282                }
283
284                blocks.push(MetadataBlock {
285                    start: abs_pos,
286                    end: abs_closing_pos + closing_len, // After closing delimiter
287                    yaml_content,
288                    tag,
289                });
290
291                pos = abs_closing_pos + closing_len;
292            } else if abs_pos == 0 {
293                // Frontmatter started but not closed
294                return Err("Frontmatter started but not closed with ---".into());
295            } else {
296                // Not a valid metadata block, skip this position
297                pos = abs_pos + 3;
298            }
299        } else {
300            break;
301        }
302    }
303
304    Ok(blocks)
305}
306
307/// Decompose markdown into frontmatter fields and body
308pub fn decompose(
309    markdown: &str,
310) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
311    // Check input size limit
312    if markdown.len() > crate::error::MAX_INPUT_SIZE {
313        return Err(format!(
314            "Input too large: {} bytes (max: {} bytes)",
315            markdown.len(),
316            crate::error::MAX_INPUT_SIZE
317        )
318        .into());
319    }
320
321    let mut fields = HashMap::new();
322
323    // Find all metadata blocks
324    let blocks = find_metadata_blocks(markdown)?;
325
326    if blocks.is_empty() {
327        // No metadata blocks, entire content is body
328        fields.insert(
329            BODY_FIELD.to_string(),
330            serde_yaml::Value::String(markdown.to_string()),
331        );
332        return Ok(ParsedDocument::new(fields));
333    }
334
335    // Track which attributes are used for tagged blocks
336    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
337    let mut has_global_frontmatter = false;
338    let mut global_frontmatter_index: Option<usize> = None;
339
340    // First pass: identify global frontmatter and validate
341    for (idx, block) in blocks.iter().enumerate() {
342        if block.tag.is_none() {
343            if has_global_frontmatter {
344                return Err(
345                    "Multiple global frontmatter blocks found: only one untagged block allowed"
346                        .into(),
347                );
348            }
349            has_global_frontmatter = true;
350            global_frontmatter_index = Some(idx);
351        }
352    }
353
354    // Parse global frontmatter if present
355    if let Some(idx) = global_frontmatter_index {
356        let block = &blocks[idx];
357
358        // Parse YAML frontmatter
359        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
360            HashMap::new()
361        } else {
362            serde_yaml::from_str(&block.yaml_content)
363                .map_err(|e| format!("Invalid YAML frontmatter: {}", e))?
364        };
365
366        // Check that all tagged blocks don't conflict with global fields
367        for other_block in &blocks {
368            if let Some(ref tag) = other_block.tag {
369                if yaml_fields.contains_key(tag) {
370                    return Err(format!(
371                        "Name collision: global field '{}' conflicts with tagged attribute",
372                        tag
373                    )
374                    .into());
375                }
376            }
377        }
378
379        fields.extend(yaml_fields);
380    }
381
382    // Parse tagged blocks
383    for (idx, block) in blocks.iter().enumerate() {
384        if let Some(ref tag_name) = block.tag {
385            // Check if this conflicts with global fields
386            if fields.contains_key(tag_name) {
387                return Err(format!(
388                    "Name collision: tagged attribute '{}' conflicts with global field",
389                    tag_name
390                )
391                .into());
392            }
393
394            // Parse YAML metadata
395            let mut item_fields: HashMap<String, serde_yaml::Value> =
396                if block.yaml_content.is_empty() {
397                    HashMap::new()
398                } else {
399                    serde_yaml::from_str(&block.yaml_content).map_err(|e| {
400                        format!("Invalid YAML in tagged block '{}': {}", tag_name, e)
401                    })?
402                };
403
404            // Extract body for this tagged block
405            let body_start = block.end;
406            let body_end = if idx + 1 < blocks.len() {
407                blocks[idx + 1].start
408            } else {
409                markdown.len()
410            };
411            let body = &markdown[body_start..body_end];
412
413            // Add body to item fields
414            item_fields.insert(
415                BODY_FIELD.to_string(),
416                serde_yaml::Value::String(body.to_string()),
417            );
418
419            // Convert HashMap to serde_yaml::Value::Mapping
420            let item_value = serde_yaml::to_value(item_fields)?;
421
422            // Add to collection
423            tagged_attributes
424                .entry(tag_name.clone())
425                .or_insert_with(Vec::new)
426                .push(item_value);
427        }
428    }
429
430    // Extract global body
431    let (body_start, body_end) = if let Some(idx) = global_frontmatter_index {
432        // Global body starts after frontmatter
433        let start = blocks[idx].end;
434
435        // Global body ends at the first tagged block after the frontmatter, or EOF
436        let end = blocks
437            .iter()
438            .skip(idx + 1)
439            .find(|b| b.tag.is_some())
440            .map(|b| b.start)
441            .unwrap_or(markdown.len());
442
443        (start, end)
444    } else {
445        // No global frontmatter - body is everything before the first tagged block
446        let end = blocks
447            .iter()
448            .find(|b| b.tag.is_some())
449            .map(|b| b.start)
450            .unwrap_or(0);
451
452        (0, end)
453    };
454
455    let global_body = &markdown[body_start..body_end];
456
457    fields.insert(
458        BODY_FIELD.to_string(),
459        serde_yaml::Value::String(global_body.to_string()),
460    );
461
462    // Add all tagged collections to fields
463    for (tag_name, items) in tagged_attributes {
464        fields.insert(tag_name, serde_yaml::Value::Sequence(items));
465    }
466
467    Ok(ParsedDocument::new(fields))
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn test_no_frontmatter() {
476        let markdown = "# Hello World\n\nThis is a test.";
477        let doc = decompose(markdown).unwrap();
478
479        assert_eq!(doc.body(), Some(markdown));
480        assert_eq!(doc.fields().len(), 1);
481    }
482
483    #[test]
484    fn test_with_frontmatter() {
485        let markdown = r#"---
486title: Test Document
487author: Test Author
488---
489
490# Hello World
491
492This is the body."#;
493
494        let doc = decompose(markdown).unwrap();
495
496        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
497        assert_eq!(
498            doc.get_field("title").unwrap().as_str().unwrap(),
499            "Test Document"
500        );
501        assert_eq!(
502            doc.get_field("author").unwrap().as_str().unwrap(),
503            "Test Author"
504        );
505        assert_eq!(doc.fields().len(), 3); // title, author, body
506    }
507
508    #[test]
509    fn test_complex_yaml_frontmatter() {
510        let markdown = r#"---
511title: Complex Document
512tags:
513  - test
514  - yaml
515metadata:
516  version: 1.0
517  nested:
518    field: value
519---
520
521Content here."#;
522
523        let doc = decompose(markdown).unwrap();
524
525        assert_eq!(doc.body(), Some("\nContent here."));
526        assert_eq!(
527            doc.get_field("title").unwrap().as_str().unwrap(),
528            "Complex Document"
529        );
530
531        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
532        assert_eq!(tags.len(), 2);
533        assert_eq!(tags[0].as_str().unwrap(), "test");
534        assert_eq!(tags[1].as_str().unwrap(), "yaml");
535    }
536
537    #[test]
538    fn test_invalid_yaml() {
539        let markdown = r#"---
540title: [invalid yaml
541author: missing close bracket
542---
543
544Content here."#;
545
546        let result = decompose(markdown);
547        assert!(result.is_err());
548        assert!(result
549            .unwrap_err()
550            .to_string()
551            .contains("Invalid YAML frontmatter"));
552    }
553
554    #[test]
555    fn test_unclosed_frontmatter() {
556        let markdown = r#"---
557title: Test
558author: Test Author
559
560Content without closing ---"#;
561
562        let result = decompose(markdown);
563        assert!(result.is_err());
564        assert!(result.unwrap_err().to_string().contains("not closed"));
565    }
566
567    // Extended metadata tests
568
569    #[test]
570    fn test_basic_tagged_block() {
571        let markdown = r#"---
572title: Main Document
573---
574
575Main body content.
576
577---
578!items
579name: Item 1
580---
581
582Body of item 1."#;
583
584        let doc = decompose(markdown).unwrap();
585
586        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
587        assert_eq!(
588            doc.get_field("title").unwrap().as_str().unwrap(),
589            "Main Document"
590        );
591
592        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
593        assert_eq!(items.len(), 1);
594
595        let item = items[0].as_mapping().unwrap();
596        assert_eq!(
597            item.get(&serde_yaml::Value::String("name".to_string()))
598                .unwrap()
599                .as_str()
600                .unwrap(),
601            "Item 1"
602        );
603        assert_eq!(
604            item.get(&serde_yaml::Value::String("body".to_string()))
605                .unwrap()
606                .as_str()
607                .unwrap(),
608            "\nBody of item 1."
609        );
610    }
611
612    #[test]
613    fn test_multiple_tagged_blocks() {
614        let markdown = r#"---
615!items
616name: Item 1
617tags: [a, b]
618---
619
620First item body.
621
622---
623!items
624name: Item 2
625tags: [c, d]
626---
627
628Second item body."#;
629
630        let doc = decompose(markdown).unwrap();
631
632        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
633        assert_eq!(items.len(), 2);
634
635        let item1 = items[0].as_mapping().unwrap();
636        assert_eq!(
637            item1
638                .get(&serde_yaml::Value::String("name".to_string()))
639                .unwrap()
640                .as_str()
641                .unwrap(),
642            "Item 1"
643        );
644
645        let item2 = items[1].as_mapping().unwrap();
646        assert_eq!(
647            item2
648                .get(&serde_yaml::Value::String("name".to_string()))
649                .unwrap()
650                .as_str()
651                .unwrap(),
652            "Item 2"
653        );
654    }
655
656    #[test]
657    fn test_mixed_global_and_tagged() {
658        let markdown = r#"---
659title: Global
660author: John Doe
661---
662
663Global body.
664
665---
666!sections
667title: Section 1
668---
669
670Section 1 content.
671
672---
673!sections
674title: Section 2
675---
676
677Section 2 content."#;
678
679        let doc = decompose(markdown).unwrap();
680
681        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
682        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
683
684        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
685        assert_eq!(sections.len(), 2);
686    }
687
688    #[test]
689    fn test_empty_tagged_metadata() {
690        let markdown = r#"---
691!items
692---
693
694Body without metadata."#;
695
696        let doc = decompose(markdown).unwrap();
697
698        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
699        assert_eq!(items.len(), 1);
700
701        let item = items[0].as_mapping().unwrap();
702        assert_eq!(
703            item.get(&serde_yaml::Value::String("body".to_string()))
704                .unwrap()
705                .as_str()
706                .unwrap(),
707            "\nBody without metadata."
708        );
709    }
710
711    #[test]
712    fn test_tagged_block_without_body() {
713        let markdown = r#"---
714!items
715name: Item
716---"#;
717
718        let doc = decompose(markdown).unwrap();
719
720        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
721        assert_eq!(items.len(), 1);
722
723        let item = items[0].as_mapping().unwrap();
724        assert_eq!(
725            item.get(&serde_yaml::Value::String("body".to_string()))
726                .unwrap()
727                .as_str()
728                .unwrap(),
729            ""
730        );
731    }
732
733    #[test]
734    fn test_name_collision_global_and_tagged() {
735        let markdown = r#"---
736items: "global value"
737---
738
739Body
740
741---
742!items
743name: Item
744---
745
746Item body"#;
747
748        let result = decompose(markdown);
749        assert!(result.is_err());
750        assert!(result.unwrap_err().to_string().contains("collision"));
751    }
752
753    #[test]
754    fn test_reserved_field_name() {
755        let markdown = r#"---
756!body
757content: Test
758---"#;
759
760        let result = decompose(markdown);
761        assert!(result.is_err());
762        assert!(result.unwrap_err().to_string().contains("reserved"));
763    }
764
765    #[test]
766    fn test_invalid_tag_syntax() {
767        let markdown = r#"---
768!Invalid-Name
769title: Test
770---"#;
771
772        let result = decompose(markdown);
773        assert!(result.is_err());
774        assert!(result.unwrap_err().to_string().contains("Invalid tag name"));
775    }
776
777    #[test]
778    fn test_multiple_global_frontmatter_blocks() {
779        let markdown = r#"---
780title: First
781---
782
783Body
784
785---
786author: Second
787---
788
789More body"#;
790
791        let result = decompose(markdown);
792        assert!(result.is_err());
793        assert!(result
794            .unwrap_err()
795            .to_string()
796            .contains("Multiple global frontmatter"));
797    }
798
799    #[test]
800    fn test_adjacent_blocks_different_tags() {
801        let markdown = r#"---
802!items
803name: Item 1
804---
805
806Item 1 body
807
808---
809!sections
810title: Section 1
811---
812
813Section 1 body"#;
814
815        let doc = decompose(markdown).unwrap();
816
817        assert!(doc.get_field("items").is_some());
818        assert!(doc.get_field("sections").is_some());
819
820        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
821        assert_eq!(items.len(), 1);
822
823        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
824        assert_eq!(sections.len(), 1);
825    }
826
827    #[test]
828    fn test_order_preservation() {
829        let markdown = r#"---
830!items
831id: 1
832---
833
834First
835
836---
837!items
838id: 2
839---
840
841Second
842
843---
844!items
845id: 3
846---
847
848Third"#;
849
850        let doc = decompose(markdown).unwrap();
851
852        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
853        assert_eq!(items.len(), 3);
854
855        for (i, item) in items.iter().enumerate() {
856            let mapping = item.as_mapping().unwrap();
857            let id = mapping
858                .get(&serde_yaml::Value::String("id".to_string()))
859                .unwrap()
860                .as_i64()
861                .unwrap();
862            assert_eq!(id, (i + 1) as i64);
863        }
864    }
865
866    #[test]
867    fn test_product_catalog_integration() {
868        let markdown = r#"---
869title: Product Catalog
870author: John Doe
871date: 2024-01-01
872---
873
874This is the main catalog description.
875
876---
877!products
878name: Widget A
879price: 19.99
880sku: WID-001
881---
882
883The **Widget A** is our most popular product.
884
885---
886!products
887name: Gadget B
888price: 29.99
889sku: GAD-002
890---
891
892The **Gadget B** is perfect for professionals.
893
894---
895!reviews
896product: Widget A
897rating: 5
898---
899
900"Excellent product! Highly recommended."
901
902---
903!reviews
904product: Gadget B
905rating: 4
906---
907
908"Very good, but a bit pricey.""#;
909
910        let doc = decompose(markdown).unwrap();
911
912        // Verify global fields
913        assert_eq!(
914            doc.get_field("title").unwrap().as_str().unwrap(),
915            "Product Catalog"
916        );
917        assert_eq!(
918            doc.get_field("author").unwrap().as_str().unwrap(),
919            "John Doe"
920        );
921        assert_eq!(
922            doc.get_field("date").unwrap().as_str().unwrap(),
923            "2024-01-01"
924        );
925
926        // Verify global body
927        assert!(doc.body().unwrap().contains("main catalog description"));
928
929        // Verify products collection
930        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
931        assert_eq!(products.len(), 2);
932
933        let product1 = products[0].as_mapping().unwrap();
934        assert_eq!(
935            product1
936                .get(&serde_yaml::Value::String("name".to_string()))
937                .unwrap()
938                .as_str()
939                .unwrap(),
940            "Widget A"
941        );
942        assert_eq!(
943            product1
944                .get(&serde_yaml::Value::String("price".to_string()))
945                .unwrap()
946                .as_f64()
947                .unwrap(),
948            19.99
949        );
950
951        // Verify reviews collection
952        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
953        assert_eq!(reviews.len(), 2);
954
955        let review1 = reviews[0].as_mapping().unwrap();
956        assert_eq!(
957            review1
958                .get(&serde_yaml::Value::String("product".to_string()))
959                .unwrap()
960                .as_str()
961                .unwrap(),
962            "Widget A"
963        );
964        assert_eq!(
965            review1
966                .get(&serde_yaml::Value::String("rating".to_string()))
967                .unwrap()
968                .as_i64()
969                .unwrap(),
970            5
971        );
972
973        // Total fields: title, author, date, body, products, reviews = 6
974        assert_eq!(doc.fields().len(), 6);
975    }
976}
977#[cfg(test)]
978mod demo_file_test {
979    use super::*;
980
981    #[test]
982    fn test_extended_metadata_demo_file() {
983        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
984        let doc = decompose(markdown).unwrap();
985
986        // Verify global fields
987        assert_eq!(
988            doc.get_field("title").unwrap().as_str().unwrap(),
989            "Extended Metadata Demo"
990        );
991        assert_eq!(
992            doc.get_field("author").unwrap().as_str().unwrap(),
993            "Quillmark Team"
994        );
995        // version is parsed as a number by YAML
996        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
997
998        // Verify body
999        assert!(doc
1000            .body()
1001            .unwrap()
1002            .contains("extended YAML metadata standard"));
1003
1004        // Verify features collection
1005        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1006        assert_eq!(features.len(), 3);
1007
1008        // Verify use_cases collection
1009        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1010        assert_eq!(use_cases.len(), 2);
1011
1012        // Check first feature
1013        let feature1 = features[0].as_mapping().unwrap();
1014        assert_eq!(
1015            feature1
1016                .get(&serde_yaml::Value::String("name".to_string()))
1017                .unwrap()
1018                .as_str()
1019                .unwrap(),
1020            "Tag Directives"
1021        );
1022    }
1023
1024    #[test]
1025    fn test_input_size_limit() {
1026        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1027        let size = crate::error::MAX_INPUT_SIZE + 1;
1028        let large_markdown = "a".repeat(size);
1029
1030        let result = decompose(&large_markdown);
1031        assert!(result.is_err());
1032
1033        let err_msg = result.unwrap_err().to_string();
1034        assert!(err_msg.contains("Input too large"));
1035    }
1036
1037    #[test]
1038    fn test_yaml_size_limit() {
1039        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1040        let mut markdown = String::from("---\n");
1041
1042        // Create a very large YAML field
1043        let size = crate::error::MAX_YAML_SIZE + 1;
1044        markdown.push_str("data: \"");
1045        markdown.push_str(&"x".repeat(size));
1046        markdown.push_str("\"\n---\n\nBody");
1047
1048        let result = decompose(&markdown);
1049        assert!(result.is_err());
1050
1051        let err_msg = result.unwrap_err().to_string();
1052        assert!(err_msg.contains("YAML block too large"));
1053    }
1054
1055    #[test]
1056    fn test_input_within_size_limit() {
1057        // Create markdown just under the limit
1058        let size = 1000; // Much smaller than limit
1059        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1060
1061        let result = decompose(&markdown);
1062        assert!(result.is_ok());
1063    }
1064
1065    #[test]
1066    fn test_yaml_within_size_limit() {
1067        // Create YAML block well within the limit
1068        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1069
1070        let result = decompose(&markdown);
1071        assert!(result.is_ok());
1072    }
1073}