quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`decompose`] function for parsing markdown documents
8//! and the [`ParsedDocument`] type for accessing parsed content.
9//!
10//! ## Key Types
11//!
12//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
13//! - [`BODY_FIELD`]: Constant for the field name storing document body
14//!
15//! ## Examples
16//!
17//! ### Basic Parsing
18//!
19//! ```
20//! use quillmark_core::decompose;
21//!
22//! let markdown = r#"---
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = decompose(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ### Extended Metadata with Tags
39//!
40//! ```
41//! use quillmark_core::decompose;
42//!
43//! let markdown = r#"---
44//! catalog_title: Product Catalog
45//! ---
46//!
47//! # Products
48//!
49//! ---
50//! SCOPE: products
51//! name: Widget
52//! price: 19.99
53//! ---
54//!
55//! A versatile widget for all occasions.
56//! "#;
57//!
58//! let doc = decompose(markdown).unwrap();
59//!
60//! // Access tagged collections
61//! if let Some(products) = doc.get_field("products")
62//!     .and_then(|v| v.as_sequence())
63//! {
64//!     for product in products {
65//!         let name = product.get("name").and_then(|v| v.as_str()).unwrap();
66//!         let price = product.get("price").and_then(|v| v.as_f64()).unwrap();
67//!         println!("{}: ${}", name, price);
68//!     }
69//! }
70//! ```
71//!
72//! ## Error Handling
73//!
74//! The [`decompose`] function returns errors for:
75//! - Malformed YAML syntax
76//! - Unclosed frontmatter blocks
77//! - Multiple global frontmatter blocks
78//! - Both QUILL and SCOPE specified in the same block
79//! - Reserved field name usage
80//! - Name collisions
81//!
82//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
83
84use std::collections::HashMap;
85
86use crate::value::QuillValue;
87
88/// The field name used to store the document body
89pub const BODY_FIELD: &str = "body";
90
91/// Reserved tag name for quill specification
92pub const QUILL_TAG: &str = "quill";
93
94/// A parsed markdown document with frontmatter
95#[derive(Debug, Clone)]
96pub struct ParsedDocument {
97    fields: HashMap<String, QuillValue>,
98    quill_tag: Option<String>,
99}
100
101impl ParsedDocument {
102    /// Create a new ParsedDocument with the given fields
103    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
104        Self {
105            fields,
106            quill_tag: None,
107        }
108    }
109
110    /// Create a ParsedDocument from markdown string
111    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
112        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
113    }
114
115    /// Get the quill tag if specified (from QUILL key)
116    pub fn quill_tag(&self) -> Option<&str> {
117        self.quill_tag.as_deref()
118    }
119
120    /// Get the document body
121    pub fn body(&self) -> Option<&str> {
122        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
123    }
124
125    /// Get a specific field
126    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
127        self.fields.get(name)
128    }
129
130    /// Get all fields (including body)
131    pub fn fields(&self) -> &HashMap<String, QuillValue> {
132        &self.fields
133    }
134}
135
136#[derive(Debug)]
137struct MetadataBlock {
138    start: usize, // Position of opening "---"
139    end: usize,   // Position after closing "---\n"
140    yaml_content: String,
141    tag: Option<String>,        // Field name from SCOPE key
142    quill_name: Option<String>, // Quill name from QUILL key
143}
144
145/// Validate tag name follows pattern [a-z_][a-z0-9_]*
146fn is_valid_tag_name(name: &str) -> bool {
147    if name.is_empty() {
148        return false;
149    }
150
151    let mut chars = name.chars();
152    let first = chars.next().unwrap();
153
154    if !first.is_ascii_lowercase() && first != '_' {
155        return false;
156    }
157
158    for ch in chars {
159        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
160            return false;
161        }
162    }
163
164    true
165}
166
167/// Find all metadata blocks in the document
168fn find_metadata_blocks(
169    markdown: &str,
170) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
171    let mut blocks = Vec::new();
172    let mut pos = 0;
173
174    while pos < markdown.len() {
175        // Look for opening "---\n" or "---\r\n"
176        let search_str = &markdown[pos..];
177        let delimiter_result = if let Some(p) = search_str.find("---\n") {
178            Some((p, 4, "\n"))
179        } else if let Some(p) = search_str.find("---\r\n") {
180            Some((p, 5, "\r\n"))
181        } else {
182            None
183        };
184
185        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
186            let abs_pos = pos + delimiter_pos;
187            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
188
189            // Check if opening --- is followed by a blank line (horizontal rule, not metadata)
190            let followed_by_blank = if content_start < markdown.len() {
191                markdown[content_start..].starts_with('\n')
192                    || markdown[content_start..].starts_with("\r\n")
193            } else {
194                false
195            };
196
197            if followed_by_blank {
198                // This is a horizontal rule in the body, skip it
199                pos = abs_pos + 3; // Skip past "---"
200                continue;
201            }
202
203            // Found potential metadata block opening
204            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
205            let rest = &markdown[content_start..];
206
207            // First try to find delimiters with trailing newlines
208            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
209            let closing_with_newline = closing_patterns
210                .iter()
211                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
212                .min_by_key(|(p, _)| *p);
213
214            // Also check for closing at end of document (no trailing newline)
215            let closing_at_eof = ["\n---", "\r\n---"]
216                .iter()
217                .filter_map(|delim| {
218                    rest.find(delim).and_then(|p| {
219                        if p + delim.len() == rest.len() {
220                            Some((p, delim.len()))
221                        } else {
222                            None
223                        }
224                    })
225                })
226                .min_by_key(|(p, _)| *p);
227
228            let closing_result = match (closing_with_newline, closing_at_eof) {
229                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
230                (Some(_), Some(_)) => closing_with_newline,
231                (Some(_), None) => closing_with_newline,
232                (None, Some(_)) => closing_at_eof,
233                (None, None) => None,
234            };
235
236            if let Some((closing_pos, closing_len)) = closing_result {
237                let abs_closing_pos = content_start + closing_pos;
238                let content = &markdown[content_start..abs_closing_pos];
239
240                // Check YAML size limit
241                if content.len() > crate::error::MAX_YAML_SIZE {
242                    return Err(format!(
243                        "YAML block too large: {} bytes (max: {} bytes)",
244                        content.len(),
245                        crate::error::MAX_YAML_SIZE
246                    )
247                    .into());
248                }
249
250                // Check if the block is contiguous (no blank lines in the YAML content)
251                if content.contains("\n\n") || content.contains("\r\n\r\n") {
252                    // Not a contiguous block
253                    if abs_pos == 0 {
254                        // Started at beginning but has blank lines - this is an error
255                        return Err("Frontmatter started but not closed with ---".into());
256                    }
257                    // Otherwise treat as horizontal rule in body
258                    pos = abs_pos + 3;
259                    continue;
260                }
261
262                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
263                // First, try to parse as YAML
264                let (tag, quill_name, yaml_content) = if !content.is_empty() {
265                    // Try to parse the YAML to check for reserved keys
266                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
267                        Ok(yaml_value) => {
268                            if let Some(mapping) = yaml_value.as_mapping() {
269                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
270                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
271
272                                let has_quill = mapping.contains_key(&quill_key);
273                                let has_scope = mapping.contains_key(&scope_key);
274
275                                if has_quill && has_scope {
276                                    return Err(
277                                        "Cannot specify both QUILL and SCOPE in the same block"
278                                            .into(),
279                                    );
280                                }
281
282                                if has_quill {
283                                    // Extract quill name
284                                    let quill_value = mapping.get(&quill_key).unwrap();
285                                    let quill_name_str = quill_value
286                                        .as_str()
287                                        .ok_or_else(|| "QUILL value must be a string")?;
288
289                                    if !is_valid_tag_name(quill_name_str) {
290                                        return Err(format!(
291                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
292                                            quill_name_str
293                                        )
294                                        .into());
295                                    }
296
297                                    // Remove QUILL from the YAML content for processing
298                                    let mut new_mapping = mapping.clone();
299                                    new_mapping.remove(&quill_key);
300                                    let new_yaml = serde_yaml::to_string(&new_mapping)
301                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
302
303                                    (None, Some(quill_name_str.to_string()), new_yaml)
304                                } else if has_scope {
305                                    // Extract scope field name
306                                    let scope_value = mapping.get(&scope_key).unwrap();
307                                    let field_name = scope_value
308                                        .as_str()
309                                        .ok_or_else(|| "SCOPE value must be a string")?;
310
311                                    if !is_valid_tag_name(field_name) {
312                                        return Err(format!(
313                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
314                                            field_name
315                                        )
316                                        .into());
317                                    }
318
319                                    if field_name == BODY_FIELD {
320                                        return Err(format!(
321                                            "Cannot use reserved field name '{}' as SCOPE value",
322                                            BODY_FIELD
323                                        )
324                                        .into());
325                                    }
326
327                                    // Remove SCOPE from the YAML content for processing
328                                    let mut new_mapping = mapping.clone();
329                                    new_mapping.remove(&scope_key);
330                                    let new_yaml = serde_yaml::to_string(&new_mapping)
331                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
332
333                                    (Some(field_name.to_string()), None, new_yaml)
334                                } else {
335                                    // No reserved keys, treat as normal YAML
336                                    (None, None, content.to_string())
337                                }
338                            } else {
339                                // Not a mapping, treat as normal YAML
340                                (None, None, content.to_string())
341                            }
342                        }
343                        Err(_) => {
344                            // If YAML parsing fails here, we'll catch it later
345                            (None, None, content.to_string())
346                        }
347                    }
348                } else {
349                    (None, None, content.to_string())
350                };
351
352                blocks.push(MetadataBlock {
353                    start: abs_pos,
354                    end: abs_closing_pos + closing_len, // After closing delimiter
355                    yaml_content,
356                    tag,
357                    quill_name,
358                });
359
360                pos = abs_closing_pos + closing_len;
361            } else if abs_pos == 0 {
362                // Frontmatter started but not closed
363                return Err("Frontmatter started but not closed with ---".into());
364            } else {
365                // Not a valid metadata block, skip this position
366                pos = abs_pos + 3;
367            }
368        } else {
369            break;
370        }
371    }
372
373    Ok(blocks)
374}
375
376/// Decompose markdown into frontmatter fields and body
377pub fn decompose(
378    markdown: &str,
379) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
380    // Check input size limit
381    if markdown.len() > crate::error::MAX_INPUT_SIZE {
382        return Err(format!(
383            "Input too large: {} bytes (max: {} bytes)",
384            markdown.len(),
385            crate::error::MAX_INPUT_SIZE
386        )
387        .into());
388    }
389
390    let mut fields = HashMap::new();
391
392    // Find all metadata blocks
393    let blocks = find_metadata_blocks(markdown)?;
394
395    if blocks.is_empty() {
396        // No metadata blocks, entire content is body
397        fields.insert(
398            BODY_FIELD.to_string(),
399            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
400        );
401        return Ok(ParsedDocument::new(fields));
402    }
403
404    // Track which attributes are used for tagged blocks
405    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
406    let mut has_global_frontmatter = false;
407    let mut global_frontmatter_index: Option<usize> = None;
408    let mut quill_name: Option<String> = None;
409
410    // First pass: identify global frontmatter, quill directive, and validate
411    for (idx, block) in blocks.iter().enumerate() {
412        // Check for quill directive
413        if let Some(ref name) = block.quill_name {
414            if quill_name.is_some() {
415                return Err("Multiple quill directives found: only one allowed".into());
416            }
417            quill_name = Some(name.clone());
418        }
419
420        // Check for global frontmatter (no tag and no quill directive)
421        if block.tag.is_none() && block.quill_name.is_none() {
422            if has_global_frontmatter {
423                return Err(
424                    "Multiple global frontmatter blocks found: only one untagged block allowed"
425                        .into(),
426                );
427            }
428            has_global_frontmatter = true;
429            global_frontmatter_index = Some(idx);
430        }
431    }
432
433    // Parse global frontmatter if present
434    if let Some(idx) = global_frontmatter_index {
435        let block = &blocks[idx];
436
437        // Parse YAML frontmatter
438        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
439            HashMap::new()
440        } else {
441            serde_yaml::from_str(&block.yaml_content)
442                .map_err(|e| format!("Invalid YAML frontmatter: {}", e))?
443        };
444
445        // Check that all tagged blocks don't conflict with global fields
446        for other_block in &blocks {
447            if let Some(ref tag) = other_block.tag {
448                if yaml_fields.contains_key(tag) {
449                    return Err(format!(
450                        "Name collision: global field '{}' conflicts with tagged attribute",
451                        tag
452                    )
453                    .into());
454                }
455            }
456        }
457
458        // Convert YAML values to QuillValue at boundary
459        for (key, value) in yaml_fields {
460            fields.insert(key, QuillValue::from_yaml(value)?);
461        }
462    }
463
464    // Process blocks with quill directives
465    for block in &blocks {
466        if block.quill_name.is_some() {
467            // Quill directive blocks can have YAML content (becomes part of frontmatter)
468            if !block.yaml_content.is_empty() {
469                let yaml_fields: HashMap<String, serde_yaml::Value> =
470                    serde_yaml::from_str(&block.yaml_content)
471                        .map_err(|e| format!("Invalid YAML in quill block: {}", e))?;
472
473                // Check for conflicts with existing fields
474                for key in yaml_fields.keys() {
475                    if fields.contains_key(key) {
476                        return Err(format!(
477                            "Name collision: quill block field '{}' conflicts with existing field",
478                            key
479                        )
480                        .into());
481                    }
482                }
483
484                // Convert YAML values to QuillValue at boundary
485                for (key, value) in yaml_fields {
486                    fields.insert(key, QuillValue::from_yaml(value)?);
487                }
488            }
489        }
490    }
491
492    // Parse tagged blocks
493    for (idx, block) in blocks.iter().enumerate() {
494        if let Some(ref tag_name) = block.tag {
495            // Check if this conflicts with global fields
496            if fields.contains_key(tag_name) {
497                return Err(format!(
498                    "Name collision: tagged attribute '{}' conflicts with global field",
499                    tag_name
500                )
501                .into());
502            }
503
504            // Parse YAML metadata
505            let mut item_fields: HashMap<String, serde_yaml::Value> =
506                if block.yaml_content.is_empty() {
507                    HashMap::new()
508                } else {
509                    serde_yaml::from_str(&block.yaml_content).map_err(|e| {
510                        format!("Invalid YAML in tagged block '{}': {}", tag_name, e)
511                    })?
512                };
513
514            // Extract body for this tagged block
515            let body_start = block.end;
516            let body_end = if idx + 1 < blocks.len() {
517                blocks[idx + 1].start
518            } else {
519                markdown.len()
520            };
521            let body = &markdown[body_start..body_end];
522
523            // Add body to item fields
524            item_fields.insert(
525                BODY_FIELD.to_string(),
526                serde_yaml::Value::String(body.to_string()),
527            );
528
529            // Convert HashMap to serde_yaml::Value::Mapping
530            let item_value = serde_yaml::to_value(item_fields)?;
531
532            // Add to collection
533            tagged_attributes
534                .entry(tag_name.clone())
535                .or_insert_with(Vec::new)
536                .push(item_value);
537        }
538    }
539
540    // Extract global body
541    // Body starts after global frontmatter or quill block (whichever comes first)
542    // Body ends at the first scope block or EOF
543    let first_non_scope_block_idx = blocks
544        .iter()
545        .position(|b| b.tag.is_none() && b.quill_name.is_none())
546        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
547
548    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
549        // Body starts after the first non-scope block (global frontmatter or quill)
550        let start = blocks[idx].end;
551
552        // Body ends at the first scope block after this, or EOF
553        let end = blocks
554            .iter()
555            .skip(idx + 1)
556            .find(|b| b.tag.is_some())
557            .map(|b| b.start)
558            .unwrap_or(markdown.len());
559
560        (start, end)
561    } else {
562        // No global frontmatter or quill block - body is everything before the first scope block
563        let end = blocks
564            .iter()
565            .find(|b| b.tag.is_some())
566            .map(|b| b.start)
567            .unwrap_or(0);
568
569        (0, end)
570    };
571
572    let global_body = &markdown[body_start..body_end];
573
574    fields.insert(
575        BODY_FIELD.to_string(),
576        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
577    );
578
579    // Add all tagged collections to fields (convert to QuillValue)
580    for (tag_name, items) in tagged_attributes {
581        let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
582        fields.insert(tag_name, quill_value);
583    }
584
585    let mut parsed = ParsedDocument::new(fields);
586
587    // Set quill tag if present
588    if let Some(name) = quill_name {
589        parsed.quill_tag = Some(name);
590    }
591
592    Ok(parsed)
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598
599    #[test]
600    fn test_no_frontmatter() {
601        let markdown = "# Hello World\n\nThis is a test.";
602        let doc = decompose(markdown).unwrap();
603
604        assert_eq!(doc.body(), Some(markdown));
605        assert_eq!(doc.fields().len(), 1);
606    }
607
608    #[test]
609    fn test_with_frontmatter() {
610        let markdown = r#"---
611title: Test Document
612author: Test Author
613---
614
615# Hello World
616
617This is the body."#;
618
619        let doc = decompose(markdown).unwrap();
620
621        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
622        assert_eq!(
623            doc.get_field("title").unwrap().as_str().unwrap(),
624            "Test Document"
625        );
626        assert_eq!(
627            doc.get_field("author").unwrap().as_str().unwrap(),
628            "Test Author"
629        );
630        assert_eq!(doc.fields().len(), 3); // title, author, body
631    }
632
633    #[test]
634    fn test_complex_yaml_frontmatter() {
635        let markdown = r#"---
636title: Complex Document
637tags:
638  - test
639  - yaml
640metadata:
641  version: 1.0
642  nested:
643    field: value
644---
645
646Content here."#;
647
648        let doc = decompose(markdown).unwrap();
649
650        assert_eq!(doc.body(), Some("\nContent here."));
651        assert_eq!(
652            doc.get_field("title").unwrap().as_str().unwrap(),
653            "Complex Document"
654        );
655
656        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
657        assert_eq!(tags.len(), 2);
658        assert_eq!(tags[0].as_str().unwrap(), "test");
659        assert_eq!(tags[1].as_str().unwrap(), "yaml");
660    }
661
662    #[test]
663    fn test_invalid_yaml() {
664        let markdown = r#"---
665title: [invalid yaml
666author: missing close bracket
667---
668
669Content here."#;
670
671        let result = decompose(markdown);
672        assert!(result.is_err());
673        assert!(result
674            .unwrap_err()
675            .to_string()
676            .contains("Invalid YAML frontmatter"));
677    }
678
679    #[test]
680    fn test_unclosed_frontmatter() {
681        let markdown = r#"---
682title: Test
683author: Test Author
684
685Content without closing ---"#;
686
687        let result = decompose(markdown);
688        assert!(result.is_err());
689        assert!(result.unwrap_err().to_string().contains("not closed"));
690    }
691
692    // Extended metadata tests
693
694    #[test]
695    fn test_basic_tagged_block() {
696        let markdown = r#"---
697title: Main Document
698---
699
700Main body content.
701
702---
703SCOPE: items
704name: Item 1
705---
706
707Body of item 1."#;
708
709        let doc = decompose(markdown).unwrap();
710
711        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
712        assert_eq!(
713            doc.get_field("title").unwrap().as_str().unwrap(),
714            "Main Document"
715        );
716
717        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
718        assert_eq!(items.len(), 1);
719
720        let item = items[0].as_object().unwrap();
721        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
722        assert_eq!(
723            item.get("body").unwrap().as_str().unwrap(),
724            "\nBody of item 1."
725        );
726    }
727
728    #[test]
729    fn test_multiple_tagged_blocks() {
730        let markdown = r#"---
731SCOPE: items
732name: Item 1
733tags: [a, b]
734---
735
736First item body.
737
738---
739SCOPE: items
740name: Item 2
741tags: [c, d]
742---
743
744Second item body."#;
745
746        let doc = decompose(markdown).unwrap();
747
748        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
749        assert_eq!(items.len(), 2);
750
751        let item1 = items[0].as_object().unwrap();
752        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
753
754        let item2 = items[1].as_object().unwrap();
755        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
756    }
757
758    #[test]
759    fn test_mixed_global_and_tagged() {
760        let markdown = r#"---
761title: Global
762author: John Doe
763---
764
765Global body.
766
767---
768SCOPE: sections
769title: Section 1
770---
771
772Section 1 content.
773
774---
775SCOPE: sections
776title: Section 2
777---
778
779Section 2 content."#;
780
781        let doc = decompose(markdown).unwrap();
782
783        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
784        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
785
786        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
787        assert_eq!(sections.len(), 2);
788    }
789
790    #[test]
791    fn test_empty_tagged_metadata() {
792        let markdown = r#"---
793SCOPE: items
794---
795
796Body without metadata."#;
797
798        let doc = decompose(markdown).unwrap();
799
800        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
801        assert_eq!(items.len(), 1);
802
803        let item = items[0].as_object().unwrap();
804        assert_eq!(
805            item.get("body").unwrap().as_str().unwrap(),
806            "\nBody without metadata."
807        );
808    }
809
810    #[test]
811    fn test_tagged_block_without_body() {
812        let markdown = r#"---
813SCOPE: items
814name: Item
815---"#;
816
817        let doc = decompose(markdown).unwrap();
818
819        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
820        assert_eq!(items.len(), 1);
821
822        let item = items[0].as_object().unwrap();
823        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
824    }
825
826    #[test]
827    fn test_name_collision_global_and_tagged() {
828        let markdown = r#"---
829items: "global value"
830---
831
832Body
833
834---
835SCOPE: items
836name: Item
837---
838
839Item body"#;
840
841        let result = decompose(markdown);
842        assert!(result.is_err());
843        assert!(result.unwrap_err().to_string().contains("collision"));
844    }
845
846    #[test]
847    fn test_reserved_field_name() {
848        let markdown = r#"---
849SCOPE: body
850content: Test
851---"#;
852
853        let result = decompose(markdown);
854        assert!(result.is_err());
855        assert!(result.unwrap_err().to_string().contains("reserved"));
856    }
857
858    #[test]
859    fn test_invalid_tag_syntax() {
860        let markdown = r#"---
861SCOPE: Invalid-Name
862title: Test
863---"#;
864
865        let result = decompose(markdown);
866        assert!(result.is_err());
867        assert!(result
868            .unwrap_err()
869            .to_string()
870            .contains("Invalid field name"));
871    }
872
873    #[test]
874    fn test_multiple_global_frontmatter_blocks() {
875        let markdown = r#"---
876title: First
877---
878
879Body
880
881---
882author: Second
883---
884
885More body"#;
886
887        let result = decompose(markdown);
888        assert!(result.is_err());
889        assert!(result
890            .unwrap_err()
891            .to_string()
892            .contains("Multiple global frontmatter"));
893    }
894
895    #[test]
896    fn test_adjacent_blocks_different_tags() {
897        let markdown = r#"---
898SCOPE: items
899name: Item 1
900---
901
902Item 1 body
903
904---
905SCOPE: sections
906title: Section 1
907---
908
909Section 1 body"#;
910
911        let doc = decompose(markdown).unwrap();
912
913        assert!(doc.get_field("items").is_some());
914        assert!(doc.get_field("sections").is_some());
915
916        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
917        assert_eq!(items.len(), 1);
918
919        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
920        assert_eq!(sections.len(), 1);
921    }
922
923    #[test]
924    fn test_order_preservation() {
925        let markdown = r#"---
926SCOPE: items
927id: 1
928---
929
930First
931
932---
933SCOPE: items
934id: 2
935---
936
937Second
938
939---
940SCOPE: items
941id: 3
942---
943
944Third"#;
945
946        let doc = decompose(markdown).unwrap();
947
948        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
949        assert_eq!(items.len(), 3);
950
951        for (i, item) in items.iter().enumerate() {
952            let mapping = item.as_object().unwrap();
953            let id = mapping.get("id").unwrap().as_i64().unwrap();
954            assert_eq!(id, (i + 1) as i64);
955        }
956    }
957
958    #[test]
959    fn test_product_catalog_integration() {
960        let markdown = r#"---
961title: Product Catalog
962author: John Doe
963date: 2024-01-01
964---
965
966This is the main catalog description.
967
968---
969SCOPE: products
970name: Widget A
971price: 19.99
972sku: WID-001
973---
974
975The **Widget A** is our most popular product.
976
977---
978SCOPE: products
979name: Gadget B
980price: 29.99
981sku: GAD-002
982---
983
984The **Gadget B** is perfect for professionals.
985
986---
987SCOPE: reviews
988product: Widget A
989rating: 5
990---
991
992"Excellent product! Highly recommended."
993
994---
995SCOPE: reviews
996product: Gadget B
997rating: 4
998---
999
1000"Very good, but a bit pricey.""#;
1001
1002        let doc = decompose(markdown).unwrap();
1003
1004        // Verify global fields
1005        assert_eq!(
1006            doc.get_field("title").unwrap().as_str().unwrap(),
1007            "Product Catalog"
1008        );
1009        assert_eq!(
1010            doc.get_field("author").unwrap().as_str().unwrap(),
1011            "John Doe"
1012        );
1013        assert_eq!(
1014            doc.get_field("date").unwrap().as_str().unwrap(),
1015            "2024-01-01"
1016        );
1017
1018        // Verify global body
1019        assert!(doc.body().unwrap().contains("main catalog description"));
1020
1021        // Verify products collection
1022        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1023        assert_eq!(products.len(), 2);
1024
1025        let product1 = products[0].as_object().unwrap();
1026        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1027        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1028
1029        // Verify reviews collection
1030        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1031        assert_eq!(reviews.len(), 2);
1032
1033        let review1 = reviews[0].as_object().unwrap();
1034        assert_eq!(
1035            review1.get("product").unwrap().as_str().unwrap(),
1036            "Widget A"
1037        );
1038        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1039
1040        // Total fields: title, author, date, body, products, reviews = 6
1041        assert_eq!(doc.fields().len(), 6);
1042    }
1043
1044    #[test]
1045    fn test_quill_directive() {
1046        let markdown = r#"---
1047QUILL: usaf_memo
1048memo_for: [ORG/SYMBOL]
1049memo_from: [ORG/SYMBOL]
1050---
1051
1052This is the memo body."#;
1053
1054        let doc = decompose(markdown).unwrap();
1055
1056        // Verify quill tag is set
1057        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1058
1059        // Verify fields from quill block become frontmatter
1060        assert_eq!(
1061            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1062                .as_str()
1063                .unwrap(),
1064            "ORG/SYMBOL"
1065        );
1066
1067        // Verify body
1068        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1069    }
1070
1071    #[test]
1072    fn test_quill_with_scope_blocks() {
1073        let markdown = r#"---
1074QUILL: document
1075title: Test Document
1076---
1077
1078Main body.
1079
1080---
1081SCOPE: sections
1082name: Section 1
1083---
1084
1085Section 1 body."#;
1086
1087        let doc = decompose(markdown).unwrap();
1088
1089        // Verify quill tag
1090        assert_eq!(doc.quill_tag(), Some("document"));
1091
1092        // Verify global field from quill block
1093        assert_eq!(
1094            doc.get_field("title").unwrap().as_str().unwrap(),
1095            "Test Document"
1096        );
1097
1098        // Verify scope blocks work
1099        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1100        assert_eq!(sections.len(), 1);
1101
1102        // Verify body
1103        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1104    }
1105
1106    #[test]
1107    fn test_multiple_quill_directives_error() {
1108        let markdown = r#"---
1109QUILL: first
1110---
1111
1112---
1113QUILL: second
1114---"#;
1115
1116        let result = decompose(markdown);
1117        assert!(result.is_err());
1118        assert!(result
1119            .unwrap_err()
1120            .to_string()
1121            .contains("Multiple quill directives"));
1122    }
1123
1124    #[test]
1125    fn test_invalid_quill_name() {
1126        let markdown = r#"---
1127QUILL: Invalid-Name
1128---"#;
1129
1130        let result = decompose(markdown);
1131        assert!(result.is_err());
1132        assert!(result
1133            .unwrap_err()
1134            .to_string()
1135            .contains("Invalid quill name"));
1136    }
1137
1138    #[test]
1139    fn test_quill_wrong_value_type() {
1140        let markdown = r#"---
1141QUILL: 123
1142---"#;
1143
1144        let result = decompose(markdown);
1145        assert!(result.is_err());
1146        assert!(result
1147            .unwrap_err()
1148            .to_string()
1149            .contains("QUILL value must be a string"));
1150    }
1151
1152    #[test]
1153    fn test_scope_wrong_value_type() {
1154        let markdown = r#"---
1155SCOPE: 123
1156---"#;
1157
1158        let result = decompose(markdown);
1159        assert!(result.is_err());
1160        assert!(result
1161            .unwrap_err()
1162            .to_string()
1163            .contains("SCOPE value must be a string"));
1164    }
1165
1166    #[test]
1167    fn test_both_quill_and_scope_error() {
1168        let markdown = r#"---
1169QUILL: test
1170SCOPE: items
1171---"#;
1172
1173        let result = decompose(markdown);
1174        assert!(result.is_err());
1175        assert!(result
1176            .unwrap_err()
1177            .to_string()
1178            .contains("Cannot specify both QUILL and SCOPE"));
1179    }
1180}
1181#[cfg(test)]
1182mod demo_file_test {
1183    use super::*;
1184
1185    #[test]
1186    fn test_extended_metadata_demo_file() {
1187        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1188        let doc = decompose(markdown).unwrap();
1189
1190        // Verify global fields
1191        assert_eq!(
1192            doc.get_field("title").unwrap().as_str().unwrap(),
1193            "Extended Metadata Demo"
1194        );
1195        assert_eq!(
1196            doc.get_field("author").unwrap().as_str().unwrap(),
1197            "Quillmark Team"
1198        );
1199        // version is parsed as a number by YAML
1200        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1201
1202        // Verify body
1203        assert!(doc
1204            .body()
1205            .unwrap()
1206            .contains("extended YAML metadata standard"));
1207
1208        // Verify features collection
1209        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1210        assert_eq!(features.len(), 3);
1211
1212        // Verify use_cases collection
1213        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1214        assert_eq!(use_cases.len(), 2);
1215
1216        // Check first feature
1217        let feature1 = features[0].as_object().unwrap();
1218        assert_eq!(
1219            feature1.get("name").unwrap().as_str().unwrap(),
1220            "Tag Directives"
1221        );
1222    }
1223
1224    #[test]
1225    fn test_input_size_limit() {
1226        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1227        let size = crate::error::MAX_INPUT_SIZE + 1;
1228        let large_markdown = "a".repeat(size);
1229
1230        let result = decompose(&large_markdown);
1231        assert!(result.is_err());
1232
1233        let err_msg = result.unwrap_err().to_string();
1234        assert!(err_msg.contains("Input too large"));
1235    }
1236
1237    #[test]
1238    fn test_yaml_size_limit() {
1239        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1240        let mut markdown = String::from("---\n");
1241
1242        // Create a very large YAML field
1243        let size = crate::error::MAX_YAML_SIZE + 1;
1244        markdown.push_str("data: \"");
1245        markdown.push_str(&"x".repeat(size));
1246        markdown.push_str("\"\n---\n\nBody");
1247
1248        let result = decompose(&markdown);
1249        assert!(result.is_err());
1250
1251        let err_msg = result.unwrap_err().to_string();
1252        assert!(err_msg.contains("YAML block too large"));
1253    }
1254
1255    #[test]
1256    fn test_input_within_size_limit() {
1257        // Create markdown just under the limit
1258        let size = 1000; // Much smaller than limit
1259        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1260
1261        let result = decompose(&markdown);
1262        assert!(result.is_ok());
1263    }
1264
1265    #[test]
1266    fn test_yaml_within_size_limit() {
1267        // Create YAML block well within the limit
1268        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1269
1270        let result = decompose(&markdown);
1271        assert!(result.is_ok());
1272    }
1273}