quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`decompose`] function for parsing markdown documents
8//! and the [`ParsedDocument`] type for accessing parsed content.
9//!
10//! ## Key Types
11//!
12//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
13//! - [`BODY_FIELD`]: Constant for the field name storing document body
14//!
15//! ## Examples
16//!
17//! ### Basic Parsing
18//!
19//! ```
20//! use quillmark_core::decompose;
21//!
22//! let markdown = r#"---
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = decompose(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ### Extended Metadata with Tags
39//!
40//! ```
41//! use quillmark_core::decompose;
42//!
43//! let markdown = r#"---
44//! catalog_title: Product Catalog
45//! ---
46//!
47//! # Products
48//!
49//! ---
50//! !scope products
51//! name: Widget
52//! price: 19.99
53//! ---
54//!
55//! A versatile widget for all occasions.
56//! "#;
57//!
58//! let doc = decompose(markdown).unwrap();
59//!
60//! // Access tagged collections
61//! if let Some(products) = doc.get_field("products")
62//!     .and_then(|v| v.as_sequence())
63//! {
64//!     for product in products {
65//!         let name = product.get("name").and_then(|v| v.as_str()).unwrap();
66//!         let price = product.get("price").and_then(|v| v.as_f64()).unwrap();
67//!         println!("{}: ${}", name, price);
68//!     }
69//! }
70//! ```
71//!
72//! ## Error Handling
73//!
74//! The [`decompose`] function returns errors for:
75//! - Malformed YAML syntax
76//! - Unclosed frontmatter blocks
77//! - Multiple global frontmatter blocks
78//! - Invalid tag directive syntax
79//! - Reserved field name usage
80//! - Name collisions
81//!
82//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/quillmark-core/docs/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
83
84use std::collections::HashMap;
85
86/// The field name used to store the document body
87pub const BODY_FIELD: &str = "body";
88
89/// Reserved tag name for quill specification
90pub const QUILL_TAG: &str = "quill";
91
92/// A parsed markdown document with frontmatter
93#[derive(Debug, Clone)]
94pub struct ParsedDocument {
95    fields: HashMap<String, serde_yaml::Value>,
96    quill_tag: Option<String>,
97}
98
99impl ParsedDocument {
100    /// Create a new ParsedDocument with the given fields
101    pub fn new(fields: HashMap<String, serde_yaml::Value>) -> Self {
102        Self {
103            fields,
104            quill_tag: None,
105        }
106    }
107
108    /// Create a ParsedDocument from markdown string
109    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
110        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
111    }
112
113    /// Get the quill tag if specified (from !quill directive)
114    pub fn quill_tag(&self) -> Option<&str> {
115        self.quill_tag.as_deref()
116    }
117
118    /// Get the document body
119    pub fn body(&self) -> Option<&str> {
120        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
121    }
122
123    /// Get a specific field
124    pub fn get_field(&self, name: &str) -> Option<&serde_yaml::Value> {
125        self.fields.get(name)
126    }
127
128    /// Get all fields (including body)
129    pub fn fields(&self) -> &HashMap<String, serde_yaml::Value> {
130        &self.fields
131    }
132}
133
134#[derive(Debug)]
135struct MetadataBlock {
136    start: usize, // Position of opening "---"
137    end: usize,   // Position after closing "---\n"
138    yaml_content: String,
139    tag: Option<String>, // Tag directive if present (for !scope directives)
140    quill_name: Option<String>, // Quill name if !quill directive present
141}
142
143/// Validate tag name follows pattern [a-z_][a-z0-9_]*
144fn is_valid_tag_name(name: &str) -> bool {
145    if name.is_empty() {
146        return false;
147    }
148
149    let mut chars = name.chars();
150    let first = chars.next().unwrap();
151
152    if !first.is_ascii_lowercase() && first != '_' {
153        return false;
154    }
155
156    for ch in chars {
157        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
158            return false;
159        }
160    }
161
162    true
163}
164
165/// Find all metadata blocks in the document
166fn find_metadata_blocks(
167    markdown: &str,
168) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
169    let mut blocks = Vec::new();
170    let mut pos = 0;
171
172    while pos < markdown.len() {
173        // Look for opening "---\n" or "---\r\n"
174        let search_str = &markdown[pos..];
175        let delimiter_result = if let Some(p) = search_str.find("---\n") {
176            Some((p, 4, "\n"))
177        } else if let Some(p) = search_str.find("---\r\n") {
178            Some((p, 5, "\r\n"))
179        } else {
180            None
181        };
182
183        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
184            let abs_pos = pos + delimiter_pos;
185            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
186
187            // Check if opening --- is followed by a blank line (horizontal rule, not metadata)
188            let followed_by_blank = if content_start < markdown.len() {
189                markdown[content_start..].starts_with('\n')
190                    || markdown[content_start..].starts_with("\r\n")
191            } else {
192                false
193            };
194
195            if followed_by_blank {
196                // This is a horizontal rule in the body, skip it
197                pos = abs_pos + 3; // Skip past "---"
198                continue;
199            }
200
201            // Found potential metadata block opening
202            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
203            let rest = &markdown[content_start..];
204
205            // First try to find delimiters with trailing newlines
206            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
207            let closing_with_newline = closing_patterns
208                .iter()
209                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
210                .min_by_key(|(p, _)| *p);
211
212            // Also check for closing at end of document (no trailing newline)
213            let closing_at_eof = ["\n---", "\r\n---"]
214                .iter()
215                .filter_map(|delim| {
216                    rest.find(delim).and_then(|p| {
217                        if p + delim.len() == rest.len() {
218                            Some((p, delim.len()))
219                        } else {
220                            None
221                        }
222                    })
223                })
224                .min_by_key(|(p, _)| *p);
225
226            let closing_result = match (closing_with_newline, closing_at_eof) {
227                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
228                (Some(_), Some(_)) => closing_with_newline,
229                (Some(_), None) => closing_with_newline,
230                (None, Some(_)) => closing_at_eof,
231                (None, None) => None,
232            };
233
234            if let Some((closing_pos, closing_len)) = closing_result {
235                let abs_closing_pos = content_start + closing_pos;
236                let content = &markdown[content_start..abs_closing_pos];
237
238                // Check YAML size limit
239                if content.len() > crate::error::MAX_YAML_SIZE {
240                    return Err(format!(
241                        "YAML block too large: {} bytes (max: {} bytes)",
242                        content.len(),
243                        crate::error::MAX_YAML_SIZE
244                    )
245                    .into());
246                }
247
248                // Check if the block is contiguous (no blank lines in the YAML content)
249                if content.contains("\n\n") || content.contains("\r\n\r\n") {
250                    // Not a contiguous block
251                    if abs_pos == 0 {
252                        // Started at beginning but has blank lines - this is an error
253                        return Err("Frontmatter started but not closed with ---".into());
254                    }
255                    // Otherwise treat as horizontal rule in body
256                    pos = abs_pos + 3;
257                    continue;
258                }
259
260                // Extract tag directive if present
261                // New format: !scope {field} or !quill {quill_name}
262                let (tag, quill_name, yaml_content) = if content.starts_with('!') {
263                    if let Some(newline_pos) = content.find(|c| c == '\n' || c == '\r') {
264                        let directive_line = &content[1..newline_pos];
265                        // Skip newline(s) after directive
266                        let yaml_start = if content[newline_pos..].starts_with("\r\n") {
267                            newline_pos + 2
268                        } else {
269                            newline_pos + 1
270                        };
271                        let yaml = if yaml_start < content.len() {
272                            &content[yaml_start..]
273                        } else {
274                            ""
275                        };
276
277                        // Parse directive: "!scope field" or "!quill name"
278                        let directive_parts: Vec<&str> =
279                            directive_line.trim().split_whitespace().collect();
280
281                        if directive_parts.is_empty() {
282                            return Err("Empty tag directive".into());
283                        }
284
285                        match directive_parts[0] {
286                            "scope" => {
287                                // !scope field_name
288                                if directive_parts.len() != 2 {
289                                    return Err(format!(
290                                        "Invalid scope directive: expected '!scope field_name', got '!{}'",
291                                        directive_line
292                                    )
293                                    .into());
294                                }
295                                let field_name = directive_parts[1];
296                                if !is_valid_tag_name(field_name) {
297                                    return Err(format!(
298                                        "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
299                                        field_name
300                                    )
301                                    .into());
302                                }
303                                if field_name == BODY_FIELD {
304                                    return Err(format!(
305                                        "Cannot use reserved field name '{}' as scope directive",
306                                        BODY_FIELD
307                                    )
308                                    .into());
309                                }
310                                (Some(field_name.to_string()), None, yaml.to_string())
311                            }
312                            "quill" => {
313                                // !quill quill_name
314                                if directive_parts.len() != 2 {
315                                    return Err(format!(
316                                        "Invalid quill directive: expected '!quill quill_name', got '!{}'",
317                                        directive_line
318                                    )
319                                    .into());
320                                }
321                                let quill = directive_parts[1];
322                                if !is_valid_tag_name(quill) {
323                                    return Err(format!(
324                                        "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
325                                        quill
326                                    )
327                                    .into());
328                                }
329                                (None, Some(quill.to_string()), yaml.to_string())
330                            }
331                            _ => {
332                                return Err(format!(
333                                    "Invalid directive '{}': expected 'scope' or 'quill'",
334                                    directive_parts[0]
335                                )
336                                .into());
337                            }
338                        }
339                    } else {
340                        // Tag directive with no YAML content (entire content is just tag)
341                        let directive_line = content[1..].trim();
342                        let directive_parts: Vec<&str> =
343                            directive_line.split_whitespace().collect();
344
345                        if directive_parts.is_empty() {
346                            return Err("Empty tag directive".into());
347                        }
348
349                        match directive_parts[0] {
350                            "scope" => {
351                                if directive_parts.len() != 2 {
352                                    return Err(format!(
353                                        "Invalid scope directive: expected '!scope field_name', got '!{}'",
354                                        directive_line
355                                    )
356                                    .into());
357                                }
358                                let field_name = directive_parts[1];
359                                if !is_valid_tag_name(field_name) {
360                                    return Err(format!(
361                                        "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
362                                        field_name
363                                    )
364                                    .into());
365                                }
366                                if field_name == BODY_FIELD {
367                                    return Err(format!(
368                                        "Cannot use reserved field name '{}' as scope directive",
369                                        BODY_FIELD
370                                    )
371                                    .into());
372                                }
373                                (Some(field_name.to_string()), None, String::new())
374                            }
375                            "quill" => {
376                                if directive_parts.len() != 2 {
377                                    return Err(format!(
378                                        "Invalid quill directive: expected '!quill quill_name', got '!{}'",
379                                        directive_line
380                                    )
381                                    .into());
382                                }
383                                let quill = directive_parts[1];
384                                if !is_valid_tag_name(quill) {
385                                    return Err(format!(
386                                        "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
387                                        quill
388                                    )
389                                    .into());
390                                }
391                                (None, Some(quill.to_string()), String::new())
392                            }
393                            _ => {
394                                return Err(format!(
395                                    "Invalid directive '{}': expected 'scope' or 'quill'",
396                                    directive_parts[0]
397                                )
398                                .into());
399                            }
400                        }
401                    }
402                } else {
403                    (None, None, content.to_string())
404                };
405
406                blocks.push(MetadataBlock {
407                    start: abs_pos,
408                    end: abs_closing_pos + closing_len, // After closing delimiter
409                    yaml_content,
410                    tag,
411                    quill_name,
412                });
413
414                pos = abs_closing_pos + closing_len;
415            } else if abs_pos == 0 {
416                // Frontmatter started but not closed
417                return Err("Frontmatter started but not closed with ---".into());
418            } else {
419                // Not a valid metadata block, skip this position
420                pos = abs_pos + 3;
421            }
422        } else {
423            break;
424        }
425    }
426
427    Ok(blocks)
428}
429
430/// Decompose markdown into frontmatter fields and body
431pub fn decompose(
432    markdown: &str,
433) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
434    // Check input size limit
435    if markdown.len() > crate::error::MAX_INPUT_SIZE {
436        return Err(format!(
437            "Input too large: {} bytes (max: {} bytes)",
438            markdown.len(),
439            crate::error::MAX_INPUT_SIZE
440        )
441        .into());
442    }
443
444    let mut fields = HashMap::new();
445
446    // Find all metadata blocks
447    let blocks = find_metadata_blocks(markdown)?;
448
449    if blocks.is_empty() {
450        // No metadata blocks, entire content is body
451        fields.insert(
452            BODY_FIELD.to_string(),
453            serde_yaml::Value::String(markdown.to_string()),
454        );
455        return Ok(ParsedDocument::new(fields));
456    }
457
458    // Track which attributes are used for tagged blocks
459    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
460    let mut has_global_frontmatter = false;
461    let mut global_frontmatter_index: Option<usize> = None;
462    let mut quill_name: Option<String> = None;
463
464    // First pass: identify global frontmatter, quill directive, and validate
465    for (idx, block) in blocks.iter().enumerate() {
466        // Check for quill directive
467        if let Some(ref name) = block.quill_name {
468            if quill_name.is_some() {
469                return Err("Multiple quill directives found: only one allowed".into());
470            }
471            quill_name = Some(name.clone());
472        }
473
474        // Check for global frontmatter (no tag and no quill directive)
475        if block.tag.is_none() && block.quill_name.is_none() {
476            if has_global_frontmatter {
477                return Err(
478                    "Multiple global frontmatter blocks found: only one untagged block allowed"
479                        .into(),
480                );
481            }
482            has_global_frontmatter = true;
483            global_frontmatter_index = Some(idx);
484        }
485    }
486
487    // Parse global frontmatter if present
488    if let Some(idx) = global_frontmatter_index {
489        let block = &blocks[idx];
490
491        // Parse YAML frontmatter
492        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
493            HashMap::new()
494        } else {
495            serde_yaml::from_str(&block.yaml_content)
496                .map_err(|e| format!("Invalid YAML frontmatter: {}", e))?
497        };
498
499        // Check that all tagged blocks don't conflict with global fields
500        for other_block in &blocks {
501            if let Some(ref tag) = other_block.tag {
502                if yaml_fields.contains_key(tag) {
503                    return Err(format!(
504                        "Name collision: global field '{}' conflicts with tagged attribute",
505                        tag
506                    )
507                    .into());
508                }
509            }
510        }
511
512        fields.extend(yaml_fields);
513    }
514
515    // Process blocks with quill directives
516    for block in &blocks {
517        if block.quill_name.is_some() {
518            // Quill directive blocks can have YAML content (becomes part of frontmatter)
519            if !block.yaml_content.is_empty() {
520                let yaml_fields: HashMap<String, serde_yaml::Value> =
521                    serde_yaml::from_str(&block.yaml_content)
522                        .map_err(|e| format!("Invalid YAML in quill block: {}", e))?;
523
524                // Check for conflicts with existing fields
525                for key in yaml_fields.keys() {
526                    if fields.contains_key(key) {
527                        return Err(format!(
528                            "Name collision: quill block field '{}' conflicts with existing field",
529                            key
530                        )
531                        .into());
532                    }
533                }
534
535                fields.extend(yaml_fields);
536            }
537        }
538    }
539
540    // Parse tagged blocks
541    for (idx, block) in blocks.iter().enumerate() {
542        if let Some(ref tag_name) = block.tag {
543            // Check if this conflicts with global fields
544            if fields.contains_key(tag_name) {
545                return Err(format!(
546                    "Name collision: tagged attribute '{}' conflicts with global field",
547                    tag_name
548                )
549                .into());
550            }
551
552            // Parse YAML metadata
553            let mut item_fields: HashMap<String, serde_yaml::Value> =
554                if block.yaml_content.is_empty() {
555                    HashMap::new()
556                } else {
557                    serde_yaml::from_str(&block.yaml_content).map_err(|e| {
558                        format!("Invalid YAML in tagged block '{}': {}", tag_name, e)
559                    })?
560                };
561
562            // Extract body for this tagged block
563            let body_start = block.end;
564            let body_end = if idx + 1 < blocks.len() {
565                blocks[idx + 1].start
566            } else {
567                markdown.len()
568            };
569            let body = &markdown[body_start..body_end];
570
571            // Add body to item fields
572            item_fields.insert(
573                BODY_FIELD.to_string(),
574                serde_yaml::Value::String(body.to_string()),
575            );
576
577            // Convert HashMap to serde_yaml::Value::Mapping
578            let item_value = serde_yaml::to_value(item_fields)?;
579
580            // Add to collection
581            tagged_attributes
582                .entry(tag_name.clone())
583                .or_insert_with(Vec::new)
584                .push(item_value);
585        }
586    }
587
588    // Extract global body
589    // Body starts after global frontmatter or quill block (whichever comes first)
590    // Body ends at the first scope block or EOF
591    let first_non_scope_block_idx = blocks
592        .iter()
593        .position(|b| b.tag.is_none() && b.quill_name.is_none())
594        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
595
596    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
597        // Body starts after the first non-scope block (global frontmatter or quill)
598        let start = blocks[idx].end;
599
600        // Body ends at the first scope block after this, or EOF
601        let end = blocks
602            .iter()
603            .skip(idx + 1)
604            .find(|b| b.tag.is_some())
605            .map(|b| b.start)
606            .unwrap_or(markdown.len());
607
608        (start, end)
609    } else {
610        // No global frontmatter or quill block - body is everything before the first scope block
611        let end = blocks
612            .iter()
613            .find(|b| b.tag.is_some())
614            .map(|b| b.start)
615            .unwrap_or(0);
616
617        (0, end)
618    };
619
620    let global_body = &markdown[body_start..body_end];
621
622    fields.insert(
623        BODY_FIELD.to_string(),
624        serde_yaml::Value::String(global_body.to_string()),
625    );
626
627    // Add all tagged collections to fields
628    for (tag_name, items) in tagged_attributes {
629        fields.insert(tag_name, serde_yaml::Value::Sequence(items));
630    }
631
632    let mut parsed = ParsedDocument::new(fields);
633
634    // Set quill tag if present
635    if let Some(name) = quill_name {
636        parsed.quill_tag = Some(name);
637    }
638
639    Ok(parsed)
640}
641
642#[cfg(test)]
643mod tests {
644    use super::*;
645
646    #[test]
647    fn test_no_frontmatter() {
648        let markdown = "# Hello World\n\nThis is a test.";
649        let doc = decompose(markdown).unwrap();
650
651        assert_eq!(doc.body(), Some(markdown));
652        assert_eq!(doc.fields().len(), 1);
653    }
654
655    #[test]
656    fn test_with_frontmatter() {
657        let markdown = r#"---
658title: Test Document
659author: Test Author
660---
661
662# Hello World
663
664This is the body."#;
665
666        let doc = decompose(markdown).unwrap();
667
668        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
669        assert_eq!(
670            doc.get_field("title").unwrap().as_str().unwrap(),
671            "Test Document"
672        );
673        assert_eq!(
674            doc.get_field("author").unwrap().as_str().unwrap(),
675            "Test Author"
676        );
677        assert_eq!(doc.fields().len(), 3); // title, author, body
678    }
679
680    #[test]
681    fn test_complex_yaml_frontmatter() {
682        let markdown = r#"---
683title: Complex Document
684tags:
685  - test
686  - yaml
687metadata:
688  version: 1.0
689  nested:
690    field: value
691---
692
693Content here."#;
694
695        let doc = decompose(markdown).unwrap();
696
697        assert_eq!(doc.body(), Some("\nContent here."));
698        assert_eq!(
699            doc.get_field("title").unwrap().as_str().unwrap(),
700            "Complex Document"
701        );
702
703        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
704        assert_eq!(tags.len(), 2);
705        assert_eq!(tags[0].as_str().unwrap(), "test");
706        assert_eq!(tags[1].as_str().unwrap(), "yaml");
707    }
708
709    #[test]
710    fn test_invalid_yaml() {
711        let markdown = r#"---
712title: [invalid yaml
713author: missing close bracket
714---
715
716Content here."#;
717
718        let result = decompose(markdown);
719        assert!(result.is_err());
720        assert!(result
721            .unwrap_err()
722            .to_string()
723            .contains("Invalid YAML frontmatter"));
724    }
725
726    #[test]
727    fn test_unclosed_frontmatter() {
728        let markdown = r#"---
729title: Test
730author: Test Author
731
732Content without closing ---"#;
733
734        let result = decompose(markdown);
735        assert!(result.is_err());
736        assert!(result.unwrap_err().to_string().contains("not closed"));
737    }
738
739    // Extended metadata tests
740
741    #[test]
742    fn test_basic_tagged_block() {
743        let markdown = r#"---
744title: Main Document
745---
746
747Main body content.
748
749---
750!scope items
751name: Item 1
752---
753
754Body of item 1."#;
755
756        let doc = decompose(markdown).unwrap();
757
758        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
759        assert_eq!(
760            doc.get_field("title").unwrap().as_str().unwrap(),
761            "Main Document"
762        );
763
764        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
765        assert_eq!(items.len(), 1);
766
767        let item = items[0].as_mapping().unwrap();
768        assert_eq!(
769            item.get(&serde_yaml::Value::String("name".to_string()))
770                .unwrap()
771                .as_str()
772                .unwrap(),
773            "Item 1"
774        );
775        assert_eq!(
776            item.get(&serde_yaml::Value::String("body".to_string()))
777                .unwrap()
778                .as_str()
779                .unwrap(),
780            "\nBody of item 1."
781        );
782    }
783
784    #[test]
785    fn test_multiple_tagged_blocks() {
786        let markdown = r#"---
787!scope items
788name: Item 1
789tags: [a, b]
790---
791
792First item body.
793
794---
795!scope items
796name: Item 2
797tags: [c, d]
798---
799
800Second item body."#;
801
802        let doc = decompose(markdown).unwrap();
803
804        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
805        assert_eq!(items.len(), 2);
806
807        let item1 = items[0].as_mapping().unwrap();
808        assert_eq!(
809            item1
810                .get(&serde_yaml::Value::String("name".to_string()))
811                .unwrap()
812                .as_str()
813                .unwrap(),
814            "Item 1"
815        );
816
817        let item2 = items[1].as_mapping().unwrap();
818        assert_eq!(
819            item2
820                .get(&serde_yaml::Value::String("name".to_string()))
821                .unwrap()
822                .as_str()
823                .unwrap(),
824            "Item 2"
825        );
826    }
827
828    #[test]
829    fn test_mixed_global_and_tagged() {
830        let markdown = r#"---
831title: Global
832author: John Doe
833---
834
835Global body.
836
837---
838!scope sections
839title: Section 1
840---
841
842Section 1 content.
843
844---
845!scope sections
846title: Section 2
847---
848
849Section 2 content."#;
850
851        let doc = decompose(markdown).unwrap();
852
853        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
854        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
855
856        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
857        assert_eq!(sections.len(), 2);
858    }
859
860    #[test]
861    fn test_empty_tagged_metadata() {
862        let markdown = r#"---
863!scope items
864---
865
866Body without metadata."#;
867
868        let doc = decompose(markdown).unwrap();
869
870        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
871        assert_eq!(items.len(), 1);
872
873        let item = items[0].as_mapping().unwrap();
874        assert_eq!(
875            item.get(&serde_yaml::Value::String("body".to_string()))
876                .unwrap()
877                .as_str()
878                .unwrap(),
879            "\nBody without metadata."
880        );
881    }
882
883    #[test]
884    fn test_tagged_block_without_body() {
885        let markdown = r#"---
886!scope items
887name: Item
888---"#;
889
890        let doc = decompose(markdown).unwrap();
891
892        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
893        assert_eq!(items.len(), 1);
894
895        let item = items[0].as_mapping().unwrap();
896        assert_eq!(
897            item.get(&serde_yaml::Value::String("body".to_string()))
898                .unwrap()
899                .as_str()
900                .unwrap(),
901            ""
902        );
903    }
904
905    #[test]
906    fn test_name_collision_global_and_tagged() {
907        let markdown = r#"---
908items: "global value"
909---
910
911Body
912
913---
914!scope items
915name: Item
916---
917
918Item body"#;
919
920        let result = decompose(markdown);
921        assert!(result.is_err());
922        assert!(result.unwrap_err().to_string().contains("collision"));
923    }
924
925    #[test]
926    fn test_reserved_field_name() {
927        let markdown = r#"---
928!scope body
929content: Test
930---"#;
931
932        let result = decompose(markdown);
933        assert!(result.is_err());
934        assert!(result.unwrap_err().to_string().contains("reserved"));
935    }
936
937    #[test]
938    fn test_invalid_tag_syntax() {
939        let markdown = r#"---
940!scope Invalid-Name
941title: Test
942---"#;
943
944        let result = decompose(markdown);
945        assert!(result.is_err());
946        assert!(result
947            .unwrap_err()
948            .to_string()
949            .contains("Invalid field name"));
950    }
951
952    #[test]
953    fn test_multiple_global_frontmatter_blocks() {
954        let markdown = r#"---
955title: First
956---
957
958Body
959
960---
961author: Second
962---
963
964More body"#;
965
966        let result = decompose(markdown);
967        assert!(result.is_err());
968        assert!(result
969            .unwrap_err()
970            .to_string()
971            .contains("Multiple global frontmatter"));
972    }
973
974    #[test]
975    fn test_adjacent_blocks_different_tags() {
976        let markdown = r#"---
977!scope items
978name: Item 1
979---
980
981Item 1 body
982
983---
984!scope sections
985title: Section 1
986---
987
988Section 1 body"#;
989
990        let doc = decompose(markdown).unwrap();
991
992        assert!(doc.get_field("items").is_some());
993        assert!(doc.get_field("sections").is_some());
994
995        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
996        assert_eq!(items.len(), 1);
997
998        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
999        assert_eq!(sections.len(), 1);
1000    }
1001
1002    #[test]
1003    fn test_order_preservation() {
1004        let markdown = r#"---
1005!scope items
1006id: 1
1007---
1008
1009First
1010
1011---
1012!scope items
1013id: 2
1014---
1015
1016Second
1017
1018---
1019!scope items
1020id: 3
1021---
1022
1023Third"#;
1024
1025        let doc = decompose(markdown).unwrap();
1026
1027        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1028        assert_eq!(items.len(), 3);
1029
1030        for (i, item) in items.iter().enumerate() {
1031            let mapping = item.as_mapping().unwrap();
1032            let id = mapping
1033                .get(&serde_yaml::Value::String("id".to_string()))
1034                .unwrap()
1035                .as_i64()
1036                .unwrap();
1037            assert_eq!(id, (i + 1) as i64);
1038        }
1039    }
1040
1041    #[test]
1042    fn test_product_catalog_integration() {
1043        let markdown = r#"---
1044title: Product Catalog
1045author: John Doe
1046date: 2024-01-01
1047---
1048
1049This is the main catalog description.
1050
1051---
1052!scope products
1053name: Widget A
1054price: 19.99
1055sku: WID-001
1056---
1057
1058The **Widget A** is our most popular product.
1059
1060---
1061!scope products
1062name: Gadget B
1063price: 29.99
1064sku: GAD-002
1065---
1066
1067The **Gadget B** is perfect for professionals.
1068
1069---
1070!scope reviews
1071product: Widget A
1072rating: 5
1073---
1074
1075"Excellent product! Highly recommended."
1076
1077---
1078!scope reviews
1079product: Gadget B
1080rating: 4
1081---
1082
1083"Very good, but a bit pricey.""#;
1084
1085        let doc = decompose(markdown).unwrap();
1086
1087        // Verify global fields
1088        assert_eq!(
1089            doc.get_field("title").unwrap().as_str().unwrap(),
1090            "Product Catalog"
1091        );
1092        assert_eq!(
1093            doc.get_field("author").unwrap().as_str().unwrap(),
1094            "John Doe"
1095        );
1096        assert_eq!(
1097            doc.get_field("date").unwrap().as_str().unwrap(),
1098            "2024-01-01"
1099        );
1100
1101        // Verify global body
1102        assert!(doc.body().unwrap().contains("main catalog description"));
1103
1104        // Verify products collection
1105        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1106        assert_eq!(products.len(), 2);
1107
1108        let product1 = products[0].as_mapping().unwrap();
1109        assert_eq!(
1110            product1
1111                .get(&serde_yaml::Value::String("name".to_string()))
1112                .unwrap()
1113                .as_str()
1114                .unwrap(),
1115            "Widget A"
1116        );
1117        assert_eq!(
1118            product1
1119                .get(&serde_yaml::Value::String("price".to_string()))
1120                .unwrap()
1121                .as_f64()
1122                .unwrap(),
1123            19.99
1124        );
1125
1126        // Verify reviews collection
1127        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1128        assert_eq!(reviews.len(), 2);
1129
1130        let review1 = reviews[0].as_mapping().unwrap();
1131        assert_eq!(
1132            review1
1133                .get(&serde_yaml::Value::String("product".to_string()))
1134                .unwrap()
1135                .as_str()
1136                .unwrap(),
1137            "Widget A"
1138        );
1139        assert_eq!(
1140            review1
1141                .get(&serde_yaml::Value::String("rating".to_string()))
1142                .unwrap()
1143                .as_i64()
1144                .unwrap(),
1145            5
1146        );
1147
1148        // Total fields: title, author, date, body, products, reviews = 6
1149        assert_eq!(doc.fields().len(), 6);
1150    }
1151
1152    #[test]
1153    fn test_quill_directive() {
1154        let markdown = r#"---
1155!quill usaf_memo
1156memo_for: [ORG/SYMBOL]
1157memo_from: [ORG/SYMBOL]
1158---
1159
1160This is the memo body."#;
1161
1162        let doc = decompose(markdown).unwrap();
1163
1164        // Verify quill tag is set
1165        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1166
1167        // Verify fields from quill block become frontmatter
1168        assert_eq!(
1169            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1170                .as_str()
1171                .unwrap(),
1172            "ORG/SYMBOL"
1173        );
1174
1175        // Verify body
1176        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1177    }
1178
1179    #[test]
1180    fn test_quill_with_scope_blocks() {
1181        let markdown = r#"---
1182!quill document
1183title: Test Document
1184---
1185
1186Main body.
1187
1188---
1189!scope sections
1190name: Section 1
1191---
1192
1193Section 1 body."#;
1194
1195        let doc = decompose(markdown).unwrap();
1196
1197        // Verify quill tag
1198        assert_eq!(doc.quill_tag(), Some("document"));
1199
1200        // Verify global field from quill block
1201        assert_eq!(
1202            doc.get_field("title").unwrap().as_str().unwrap(),
1203            "Test Document"
1204        );
1205
1206        // Verify scope blocks work
1207        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1208        assert_eq!(sections.len(), 1);
1209
1210        // Verify body
1211        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1212    }
1213
1214    #[test]
1215    fn test_multiple_quill_directives_error() {
1216        let markdown = r#"---
1217!quill first
1218---
1219
1220---
1221!quill second
1222---"#;
1223
1224        let result = decompose(markdown);
1225        assert!(result.is_err());
1226        assert!(result
1227            .unwrap_err()
1228            .to_string()
1229            .contains("Multiple quill directives"));
1230    }
1231
1232    #[test]
1233    fn test_invalid_quill_name() {
1234        let markdown = r#"---
1235!quill Invalid-Name
1236---"#;
1237
1238        let result = decompose(markdown);
1239        assert!(result.is_err());
1240        assert!(result
1241            .unwrap_err()
1242            .to_string()
1243            .contains("Invalid quill name"));
1244    }
1245
1246    #[test]
1247    fn test_quill_directive_wrong_format() {
1248        let markdown = r#"---
1249!quill
1250---"#;
1251
1252        let result = decompose(markdown);
1253        assert!(result.is_err());
1254        assert!(result
1255            .unwrap_err()
1256            .to_string()
1257            .contains("Invalid quill directive"));
1258    }
1259
1260    #[test]
1261    fn test_scope_directive_wrong_format() {
1262        let markdown = r#"---
1263!scope
1264---"#;
1265
1266        let result = decompose(markdown);
1267        assert!(result.is_err());
1268        assert!(result
1269            .unwrap_err()
1270            .to_string()
1271            .contains("Invalid scope directive"));
1272    }
1273
1274    #[test]
1275    fn test_unknown_directive() {
1276        let markdown = r#"---
1277!unknown test
1278---"#;
1279
1280        let result = decompose(markdown);
1281        assert!(result.is_err());
1282        assert!(result
1283            .unwrap_err()
1284            .to_string()
1285            .contains("Invalid directive"));
1286    }
1287}
1288#[cfg(test)]
1289mod demo_file_test {
1290    use super::*;
1291
1292    #[test]
1293    fn test_extended_metadata_demo_file() {
1294        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1295        let doc = decompose(markdown).unwrap();
1296
1297        // Verify global fields
1298        assert_eq!(
1299            doc.get_field("title").unwrap().as_str().unwrap(),
1300            "Extended Metadata Demo"
1301        );
1302        assert_eq!(
1303            doc.get_field("author").unwrap().as_str().unwrap(),
1304            "Quillmark Team"
1305        );
1306        // version is parsed as a number by YAML
1307        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1308
1309        // Verify body
1310        assert!(doc
1311            .body()
1312            .unwrap()
1313            .contains("extended YAML metadata standard"));
1314
1315        // Verify features collection
1316        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1317        assert_eq!(features.len(), 3);
1318
1319        // Verify use_cases collection
1320        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1321        assert_eq!(use_cases.len(), 2);
1322
1323        // Check first feature
1324        let feature1 = features[0].as_mapping().unwrap();
1325        assert_eq!(
1326            feature1
1327                .get(&serde_yaml::Value::String("name".to_string()))
1328                .unwrap()
1329                .as_str()
1330                .unwrap(),
1331            "Tag Directives"
1332        );
1333    }
1334
1335    #[test]
1336    fn test_input_size_limit() {
1337        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1338        let size = crate::error::MAX_INPUT_SIZE + 1;
1339        let large_markdown = "a".repeat(size);
1340
1341        let result = decompose(&large_markdown);
1342        assert!(result.is_err());
1343
1344        let err_msg = result.unwrap_err().to_string();
1345        assert!(err_msg.contains("Input too large"));
1346    }
1347
1348    #[test]
1349    fn test_yaml_size_limit() {
1350        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1351        let mut markdown = String::from("---\n");
1352
1353        // Create a very large YAML field
1354        let size = crate::error::MAX_YAML_SIZE + 1;
1355        markdown.push_str("data: \"");
1356        markdown.push_str(&"x".repeat(size));
1357        markdown.push_str("\"\n---\n\nBody");
1358
1359        let result = decompose(&markdown);
1360        assert!(result.is_err());
1361
1362        let err_msg = result.unwrap_err().to_string();
1363        assert!(err_msg.contains("YAML block too large"));
1364    }
1365
1366    #[test]
1367    fn test_input_within_size_limit() {
1368        // Create markdown just under the limit
1369        let size = 1000; // Much smaller than limit
1370        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1371
1372        let result = decompose(&markdown);
1373        assert!(result.is_ok());
1374    }
1375
1376    #[test]
1377    fn test_yaml_within_size_limit() {
1378        // Create YAML block well within the limit
1379        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1380
1381        let result = decompose(&markdown);
1382        assert!(result.is_ok());
1383    }
1384}