quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`decompose`] function for parsing markdown documents
8//! and the [`ParsedDocument`] type for accessing parsed content.
9//!
10//! ## Key Types
11//!
12//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
13//! - [`BODY_FIELD`]: Constant for the field name storing document body
14//!
15//! ## Examples
16//!
17//! ### Basic Parsing
18//!
19//! ```
20//! use quillmark_core::decompose;
21//!
22//! let markdown = r#"---
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = decompose(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ### Extended Metadata with Tags
39//!
40//! ```
41//! use quillmark_core::decompose;
42//!
43//! let markdown = r#"---
44//! catalog_title: Product Catalog
45//! ---
46//!
47//! # Products
48//!
49//! ---
50//! SCOPE: products
51//! name: Widget
52//! price: 19.99
53//! ---
54//!
55//! A versatile widget for all occasions.
56//! "#;
57//!
58//! let doc = decompose(markdown).unwrap();
59//!
60//! // Access tagged collections
61//! if let Some(products) = doc.get_field("products")
62//!     .and_then(|v| v.as_sequence())
63//! {
64//!     for product in products {
65//!         let name = product.get("name").and_then(|v| v.as_str()).unwrap();
66//!         let price = product.get("price").and_then(|v| v.as_f64()).unwrap();
67//!         println!("{}: ${}", name, price);
68//!     }
69//! }
70//! ```
71//!
72//! ## Error Handling
73//!
74//! The [`decompose`] function returns errors for:
75//! - Malformed YAML syntax
76//! - Unclosed frontmatter blocks
77//! - Multiple global frontmatter blocks
78//! - Both QUILL and SCOPE specified in the same block
79//! - Reserved field name usage
80//! - Name collisions
81//!
82//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/quillmark-core/docs/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
83
84use std::collections::HashMap;
85
86/// The field name used to store the document body
87pub const BODY_FIELD: &str = "body";
88
89/// Reserved tag name for quill specification
90pub const QUILL_TAG: &str = "quill";
91
92/// A parsed markdown document with frontmatter
93#[derive(Debug, Clone)]
94pub struct ParsedDocument {
95    fields: HashMap<String, serde_yaml::Value>,
96    quill_tag: Option<String>,
97}
98
99impl ParsedDocument {
100    /// Create a new ParsedDocument with the given fields
101    pub fn new(fields: HashMap<String, serde_yaml::Value>) -> Self {
102        Self {
103            fields,
104            quill_tag: None,
105        }
106    }
107
108    /// Create a ParsedDocument from markdown string
109    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
110        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
111    }
112
113    /// Get the quill tag if specified (from QUILL key)
114    pub fn quill_tag(&self) -> Option<&str> {
115        self.quill_tag.as_deref()
116    }
117
118    /// Get the document body
119    pub fn body(&self) -> Option<&str> {
120        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
121    }
122
123    /// Get a specific field
124    pub fn get_field(&self, name: &str) -> Option<&serde_yaml::Value> {
125        self.fields.get(name)
126    }
127
128    /// Get all fields (including body)
129    pub fn fields(&self) -> &HashMap<String, serde_yaml::Value> {
130        &self.fields
131    }
132}
133
134#[derive(Debug)]
135struct MetadataBlock {
136    start: usize, // Position of opening "---"
137    end: usize,   // Position after closing "---\n"
138    yaml_content: String,
139    tag: Option<String>,        // Field name from SCOPE key
140    quill_name: Option<String>, // Quill name from QUILL key
141}
142
143/// Validate tag name follows pattern [a-z_][a-z0-9_]*
144fn is_valid_tag_name(name: &str) -> bool {
145    if name.is_empty() {
146        return false;
147    }
148
149    let mut chars = name.chars();
150    let first = chars.next().unwrap();
151
152    if !first.is_ascii_lowercase() && first != '_' {
153        return false;
154    }
155
156    for ch in chars {
157        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
158            return false;
159        }
160    }
161
162    true
163}
164
165/// Find all metadata blocks in the document
166fn find_metadata_blocks(
167    markdown: &str,
168) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
169    let mut blocks = Vec::new();
170    let mut pos = 0;
171
172    while pos < markdown.len() {
173        // Look for opening "---\n" or "---\r\n"
174        let search_str = &markdown[pos..];
175        let delimiter_result = if let Some(p) = search_str.find("---\n") {
176            Some((p, 4, "\n"))
177        } else if let Some(p) = search_str.find("---\r\n") {
178            Some((p, 5, "\r\n"))
179        } else {
180            None
181        };
182
183        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
184            let abs_pos = pos + delimiter_pos;
185            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
186
187            // Check if opening --- is followed by a blank line (horizontal rule, not metadata)
188            let followed_by_blank = if content_start < markdown.len() {
189                markdown[content_start..].starts_with('\n')
190                    || markdown[content_start..].starts_with("\r\n")
191            } else {
192                false
193            };
194
195            if followed_by_blank {
196                // This is a horizontal rule in the body, skip it
197                pos = abs_pos + 3; // Skip past "---"
198                continue;
199            }
200
201            // Found potential metadata block opening
202            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
203            let rest = &markdown[content_start..];
204
205            // First try to find delimiters with trailing newlines
206            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
207            let closing_with_newline = closing_patterns
208                .iter()
209                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
210                .min_by_key(|(p, _)| *p);
211
212            // Also check for closing at end of document (no trailing newline)
213            let closing_at_eof = ["\n---", "\r\n---"]
214                .iter()
215                .filter_map(|delim| {
216                    rest.find(delim).and_then(|p| {
217                        if p + delim.len() == rest.len() {
218                            Some((p, delim.len()))
219                        } else {
220                            None
221                        }
222                    })
223                })
224                .min_by_key(|(p, _)| *p);
225
226            let closing_result = match (closing_with_newline, closing_at_eof) {
227                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
228                (Some(_), Some(_)) => closing_with_newline,
229                (Some(_), None) => closing_with_newline,
230                (None, Some(_)) => closing_at_eof,
231                (None, None) => None,
232            };
233
234            if let Some((closing_pos, closing_len)) = closing_result {
235                let abs_closing_pos = content_start + closing_pos;
236                let content = &markdown[content_start..abs_closing_pos];
237
238                // Check YAML size limit
239                if content.len() > crate::error::MAX_YAML_SIZE {
240                    return Err(format!(
241                        "YAML block too large: {} bytes (max: {} bytes)",
242                        content.len(),
243                        crate::error::MAX_YAML_SIZE
244                    )
245                    .into());
246                }
247
248                // Check if the block is contiguous (no blank lines in the YAML content)
249                if content.contains("\n\n") || content.contains("\r\n\r\n") {
250                    // Not a contiguous block
251                    if abs_pos == 0 {
252                        // Started at beginning but has blank lines - this is an error
253                        return Err("Frontmatter started but not closed with ---".into());
254                    }
255                    // Otherwise treat as horizontal rule in body
256                    pos = abs_pos + 3;
257                    continue;
258                }
259
260                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
261                // First, try to parse as YAML
262                let (tag, quill_name, yaml_content) = if !content.is_empty() {
263                    // Try to parse the YAML to check for reserved keys
264                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
265                        Ok(yaml_value) => {
266                            if let Some(mapping) = yaml_value.as_mapping() {
267                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
268                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
269
270                                let has_quill = mapping.contains_key(&quill_key);
271                                let has_scope = mapping.contains_key(&scope_key);
272
273                                if has_quill && has_scope {
274                                    return Err(
275                                        "Cannot specify both QUILL and SCOPE in the same block"
276                                            .into(),
277                                    );
278                                }
279
280                                if has_quill {
281                                    // Extract quill name
282                                    let quill_value = mapping.get(&quill_key).unwrap();
283                                    let quill_name_str = quill_value
284                                        .as_str()
285                                        .ok_or_else(|| "QUILL value must be a string")?;
286
287                                    if !is_valid_tag_name(quill_name_str) {
288                                        return Err(format!(
289                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
290                                            quill_name_str
291                                        )
292                                        .into());
293                                    }
294
295                                    // Remove QUILL from the YAML content for processing
296                                    let mut new_mapping = mapping.clone();
297                                    new_mapping.remove(&quill_key);
298                                    let new_yaml = serde_yaml::to_string(&new_mapping)
299                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
300
301                                    (None, Some(quill_name_str.to_string()), new_yaml)
302                                } else if has_scope {
303                                    // Extract scope field name
304                                    let scope_value = mapping.get(&scope_key).unwrap();
305                                    let field_name = scope_value
306                                        .as_str()
307                                        .ok_or_else(|| "SCOPE value must be a string")?;
308
309                                    if !is_valid_tag_name(field_name) {
310                                        return Err(format!(
311                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
312                                            field_name
313                                        )
314                                        .into());
315                                    }
316
317                                    if field_name == BODY_FIELD {
318                                        return Err(format!(
319                                            "Cannot use reserved field name '{}' as SCOPE value",
320                                            BODY_FIELD
321                                        )
322                                        .into());
323                                    }
324
325                                    // Remove SCOPE from the YAML content for processing
326                                    let mut new_mapping = mapping.clone();
327                                    new_mapping.remove(&scope_key);
328                                    let new_yaml = serde_yaml::to_string(&new_mapping)
329                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
330
331                                    (Some(field_name.to_string()), None, new_yaml)
332                                } else {
333                                    // No reserved keys, treat as normal YAML
334                                    (None, None, content.to_string())
335                                }
336                            } else {
337                                // Not a mapping, treat as normal YAML
338                                (None, None, content.to_string())
339                            }
340                        }
341                        Err(_) => {
342                            // If YAML parsing fails here, we'll catch it later
343                            (None, None, content.to_string())
344                        }
345                    }
346                } else {
347                    (None, None, content.to_string())
348                };
349
350                blocks.push(MetadataBlock {
351                    start: abs_pos,
352                    end: abs_closing_pos + closing_len, // After closing delimiter
353                    yaml_content,
354                    tag,
355                    quill_name,
356                });
357
358                pos = abs_closing_pos + closing_len;
359            } else if abs_pos == 0 {
360                // Frontmatter started but not closed
361                return Err("Frontmatter started but not closed with ---".into());
362            } else {
363                // Not a valid metadata block, skip this position
364                pos = abs_pos + 3;
365            }
366        } else {
367            break;
368        }
369    }
370
371    Ok(blocks)
372}
373
374/// Decompose markdown into frontmatter fields and body
375pub fn decompose(
376    markdown: &str,
377) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
378    // Check input size limit
379    if markdown.len() > crate::error::MAX_INPUT_SIZE {
380        return Err(format!(
381            "Input too large: {} bytes (max: {} bytes)",
382            markdown.len(),
383            crate::error::MAX_INPUT_SIZE
384        )
385        .into());
386    }
387
388    let mut fields = HashMap::new();
389
390    // Find all metadata blocks
391    let blocks = find_metadata_blocks(markdown)?;
392
393    if blocks.is_empty() {
394        // No metadata blocks, entire content is body
395        fields.insert(
396            BODY_FIELD.to_string(),
397            serde_yaml::Value::String(markdown.to_string()),
398        );
399        return Ok(ParsedDocument::new(fields));
400    }
401
402    // Track which attributes are used for tagged blocks
403    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
404    let mut has_global_frontmatter = false;
405    let mut global_frontmatter_index: Option<usize> = None;
406    let mut quill_name: Option<String> = None;
407
408    // First pass: identify global frontmatter, quill directive, and validate
409    for (idx, block) in blocks.iter().enumerate() {
410        // Check for quill directive
411        if let Some(ref name) = block.quill_name {
412            if quill_name.is_some() {
413                return Err("Multiple quill directives found: only one allowed".into());
414            }
415            quill_name = Some(name.clone());
416        }
417
418        // Check for global frontmatter (no tag and no quill directive)
419        if block.tag.is_none() && block.quill_name.is_none() {
420            if has_global_frontmatter {
421                return Err(
422                    "Multiple global frontmatter blocks found: only one untagged block allowed"
423                        .into(),
424                );
425            }
426            has_global_frontmatter = true;
427            global_frontmatter_index = Some(idx);
428        }
429    }
430
431    // Parse global frontmatter if present
432    if let Some(idx) = global_frontmatter_index {
433        let block = &blocks[idx];
434
435        // Parse YAML frontmatter
436        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
437            HashMap::new()
438        } else {
439            serde_yaml::from_str(&block.yaml_content)
440                .map_err(|e| format!("Invalid YAML frontmatter: {}", e))?
441        };
442
443        // Check that all tagged blocks don't conflict with global fields
444        for other_block in &blocks {
445            if let Some(ref tag) = other_block.tag {
446                if yaml_fields.contains_key(tag) {
447                    return Err(format!(
448                        "Name collision: global field '{}' conflicts with tagged attribute",
449                        tag
450                    )
451                    .into());
452                }
453            }
454        }
455
456        fields.extend(yaml_fields);
457    }
458
459    // Process blocks with quill directives
460    for block in &blocks {
461        if block.quill_name.is_some() {
462            // Quill directive blocks can have YAML content (becomes part of frontmatter)
463            if !block.yaml_content.is_empty() {
464                let yaml_fields: HashMap<String, serde_yaml::Value> =
465                    serde_yaml::from_str(&block.yaml_content)
466                        .map_err(|e| format!("Invalid YAML in quill block: {}", e))?;
467
468                // Check for conflicts with existing fields
469                for key in yaml_fields.keys() {
470                    if fields.contains_key(key) {
471                        return Err(format!(
472                            "Name collision: quill block field '{}' conflicts with existing field",
473                            key
474                        )
475                        .into());
476                    }
477                }
478
479                fields.extend(yaml_fields);
480            }
481        }
482    }
483
484    // Parse tagged blocks
485    for (idx, block) in blocks.iter().enumerate() {
486        if let Some(ref tag_name) = block.tag {
487            // Check if this conflicts with global fields
488            if fields.contains_key(tag_name) {
489                return Err(format!(
490                    "Name collision: tagged attribute '{}' conflicts with global field",
491                    tag_name
492                )
493                .into());
494            }
495
496            // Parse YAML metadata
497            let mut item_fields: HashMap<String, serde_yaml::Value> =
498                if block.yaml_content.is_empty() {
499                    HashMap::new()
500                } else {
501                    serde_yaml::from_str(&block.yaml_content).map_err(|e| {
502                        format!("Invalid YAML in tagged block '{}': {}", tag_name, e)
503                    })?
504                };
505
506            // Extract body for this tagged block
507            let body_start = block.end;
508            let body_end = if idx + 1 < blocks.len() {
509                blocks[idx + 1].start
510            } else {
511                markdown.len()
512            };
513            let body = &markdown[body_start..body_end];
514
515            // Add body to item fields
516            item_fields.insert(
517                BODY_FIELD.to_string(),
518                serde_yaml::Value::String(body.to_string()),
519            );
520
521            // Convert HashMap to serde_yaml::Value::Mapping
522            let item_value = serde_yaml::to_value(item_fields)?;
523
524            // Add to collection
525            tagged_attributes
526                .entry(tag_name.clone())
527                .or_insert_with(Vec::new)
528                .push(item_value);
529        }
530    }
531
532    // Extract global body
533    // Body starts after global frontmatter or quill block (whichever comes first)
534    // Body ends at the first scope block or EOF
535    let first_non_scope_block_idx = blocks
536        .iter()
537        .position(|b| b.tag.is_none() && b.quill_name.is_none())
538        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
539
540    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
541        // Body starts after the first non-scope block (global frontmatter or quill)
542        let start = blocks[idx].end;
543
544        // Body ends at the first scope block after this, or EOF
545        let end = blocks
546            .iter()
547            .skip(idx + 1)
548            .find(|b| b.tag.is_some())
549            .map(|b| b.start)
550            .unwrap_or(markdown.len());
551
552        (start, end)
553    } else {
554        // No global frontmatter or quill block - body is everything before the first scope block
555        let end = blocks
556            .iter()
557            .find(|b| b.tag.is_some())
558            .map(|b| b.start)
559            .unwrap_or(0);
560
561        (0, end)
562    };
563
564    let global_body = &markdown[body_start..body_end];
565
566    fields.insert(
567        BODY_FIELD.to_string(),
568        serde_yaml::Value::String(global_body.to_string()),
569    );
570
571    // Add all tagged collections to fields
572    for (tag_name, items) in tagged_attributes {
573        fields.insert(tag_name, serde_yaml::Value::Sequence(items));
574    }
575
576    let mut parsed = ParsedDocument::new(fields);
577
578    // Set quill tag if present
579    if let Some(name) = quill_name {
580        parsed.quill_tag = Some(name);
581    }
582
583    Ok(parsed)
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589
590    #[test]
591    fn test_no_frontmatter() {
592        let markdown = "# Hello World\n\nThis is a test.";
593        let doc = decompose(markdown).unwrap();
594
595        assert_eq!(doc.body(), Some(markdown));
596        assert_eq!(doc.fields().len(), 1);
597    }
598
599    #[test]
600    fn test_with_frontmatter() {
601        let markdown = r#"---
602title: Test Document
603author: Test Author
604---
605
606# Hello World
607
608This is the body."#;
609
610        let doc = decompose(markdown).unwrap();
611
612        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
613        assert_eq!(
614            doc.get_field("title").unwrap().as_str().unwrap(),
615            "Test Document"
616        );
617        assert_eq!(
618            doc.get_field("author").unwrap().as_str().unwrap(),
619            "Test Author"
620        );
621        assert_eq!(doc.fields().len(), 3); // title, author, body
622    }
623
624    #[test]
625    fn test_complex_yaml_frontmatter() {
626        let markdown = r#"---
627title: Complex Document
628tags:
629  - test
630  - yaml
631metadata:
632  version: 1.0
633  nested:
634    field: value
635---
636
637Content here."#;
638
639        let doc = decompose(markdown).unwrap();
640
641        assert_eq!(doc.body(), Some("\nContent here."));
642        assert_eq!(
643            doc.get_field("title").unwrap().as_str().unwrap(),
644            "Complex Document"
645        );
646
647        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
648        assert_eq!(tags.len(), 2);
649        assert_eq!(tags[0].as_str().unwrap(), "test");
650        assert_eq!(tags[1].as_str().unwrap(), "yaml");
651    }
652
653    #[test]
654    fn test_invalid_yaml() {
655        let markdown = r#"---
656title: [invalid yaml
657author: missing close bracket
658---
659
660Content here."#;
661
662        let result = decompose(markdown);
663        assert!(result.is_err());
664        assert!(result
665            .unwrap_err()
666            .to_string()
667            .contains("Invalid YAML frontmatter"));
668    }
669
670    #[test]
671    fn test_unclosed_frontmatter() {
672        let markdown = r#"---
673title: Test
674author: Test Author
675
676Content without closing ---"#;
677
678        let result = decompose(markdown);
679        assert!(result.is_err());
680        assert!(result.unwrap_err().to_string().contains("not closed"));
681    }
682
683    // Extended metadata tests
684
685    #[test]
686    fn test_basic_tagged_block() {
687        let markdown = r#"---
688title: Main Document
689---
690
691Main body content.
692
693---
694SCOPE: items
695name: Item 1
696---
697
698Body of item 1."#;
699
700        let doc = decompose(markdown).unwrap();
701
702        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
703        assert_eq!(
704            doc.get_field("title").unwrap().as_str().unwrap(),
705            "Main Document"
706        );
707
708        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
709        assert_eq!(items.len(), 1);
710
711        let item = items[0].as_mapping().unwrap();
712        assert_eq!(
713            item.get(&serde_yaml::Value::String("name".to_string()))
714                .unwrap()
715                .as_str()
716                .unwrap(),
717            "Item 1"
718        );
719        assert_eq!(
720            item.get(&serde_yaml::Value::String("body".to_string()))
721                .unwrap()
722                .as_str()
723                .unwrap(),
724            "\nBody of item 1."
725        );
726    }
727
728    #[test]
729    fn test_multiple_tagged_blocks() {
730        let markdown = r#"---
731SCOPE: items
732name: Item 1
733tags: [a, b]
734---
735
736First item body.
737
738---
739SCOPE: items
740name: Item 2
741tags: [c, d]
742---
743
744Second item body."#;
745
746        let doc = decompose(markdown).unwrap();
747
748        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
749        assert_eq!(items.len(), 2);
750
751        let item1 = items[0].as_mapping().unwrap();
752        assert_eq!(
753            item1
754                .get(&serde_yaml::Value::String("name".to_string()))
755                .unwrap()
756                .as_str()
757                .unwrap(),
758            "Item 1"
759        );
760
761        let item2 = items[1].as_mapping().unwrap();
762        assert_eq!(
763            item2
764                .get(&serde_yaml::Value::String("name".to_string()))
765                .unwrap()
766                .as_str()
767                .unwrap(),
768            "Item 2"
769        );
770    }
771
772    #[test]
773    fn test_mixed_global_and_tagged() {
774        let markdown = r#"---
775title: Global
776author: John Doe
777---
778
779Global body.
780
781---
782SCOPE: sections
783title: Section 1
784---
785
786Section 1 content.
787
788---
789SCOPE: sections
790title: Section 2
791---
792
793Section 2 content."#;
794
795        let doc = decompose(markdown).unwrap();
796
797        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
798        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
799
800        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
801        assert_eq!(sections.len(), 2);
802    }
803
804    #[test]
805    fn test_empty_tagged_metadata() {
806        let markdown = r#"---
807SCOPE: items
808---
809
810Body without metadata."#;
811
812        let doc = decompose(markdown).unwrap();
813
814        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
815        assert_eq!(items.len(), 1);
816
817        let item = items[0].as_mapping().unwrap();
818        assert_eq!(
819            item.get(&serde_yaml::Value::String("body".to_string()))
820                .unwrap()
821                .as_str()
822                .unwrap(),
823            "\nBody without metadata."
824        );
825    }
826
827    #[test]
828    fn test_tagged_block_without_body() {
829        let markdown = r#"---
830SCOPE: items
831name: Item
832---"#;
833
834        let doc = decompose(markdown).unwrap();
835
836        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
837        assert_eq!(items.len(), 1);
838
839        let item = items[0].as_mapping().unwrap();
840        assert_eq!(
841            item.get(&serde_yaml::Value::String("body".to_string()))
842                .unwrap()
843                .as_str()
844                .unwrap(),
845            ""
846        );
847    }
848
849    #[test]
850    fn test_name_collision_global_and_tagged() {
851        let markdown = r#"---
852items: "global value"
853---
854
855Body
856
857---
858SCOPE: items
859name: Item
860---
861
862Item body"#;
863
864        let result = decompose(markdown);
865        assert!(result.is_err());
866        assert!(result.unwrap_err().to_string().contains("collision"));
867    }
868
869    #[test]
870    fn test_reserved_field_name() {
871        let markdown = r#"---
872SCOPE: body
873content: Test
874---"#;
875
876        let result = decompose(markdown);
877        assert!(result.is_err());
878        assert!(result.unwrap_err().to_string().contains("reserved"));
879    }
880
881    #[test]
882    fn test_invalid_tag_syntax() {
883        let markdown = r#"---
884SCOPE: Invalid-Name
885title: Test
886---"#;
887
888        let result = decompose(markdown);
889        assert!(result.is_err());
890        assert!(result
891            .unwrap_err()
892            .to_string()
893            .contains("Invalid field name"));
894    }
895
896    #[test]
897    fn test_multiple_global_frontmatter_blocks() {
898        let markdown = r#"---
899title: First
900---
901
902Body
903
904---
905author: Second
906---
907
908More body"#;
909
910        let result = decompose(markdown);
911        assert!(result.is_err());
912        assert!(result
913            .unwrap_err()
914            .to_string()
915            .contains("Multiple global frontmatter"));
916    }
917
918    #[test]
919    fn test_adjacent_blocks_different_tags() {
920        let markdown = r#"---
921SCOPE: items
922name: Item 1
923---
924
925Item 1 body
926
927---
928SCOPE: sections
929title: Section 1
930---
931
932Section 1 body"#;
933
934        let doc = decompose(markdown).unwrap();
935
936        assert!(doc.get_field("items").is_some());
937        assert!(doc.get_field("sections").is_some());
938
939        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
940        assert_eq!(items.len(), 1);
941
942        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
943        assert_eq!(sections.len(), 1);
944    }
945
946    #[test]
947    fn test_order_preservation() {
948        let markdown = r#"---
949SCOPE: items
950id: 1
951---
952
953First
954
955---
956SCOPE: items
957id: 2
958---
959
960Second
961
962---
963SCOPE: items
964id: 3
965---
966
967Third"#;
968
969        let doc = decompose(markdown).unwrap();
970
971        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
972        assert_eq!(items.len(), 3);
973
974        for (i, item) in items.iter().enumerate() {
975            let mapping = item.as_mapping().unwrap();
976            let id = mapping
977                .get(&serde_yaml::Value::String("id".to_string()))
978                .unwrap()
979                .as_i64()
980                .unwrap();
981            assert_eq!(id, (i + 1) as i64);
982        }
983    }
984
985    #[test]
986    fn test_product_catalog_integration() {
987        let markdown = r#"---
988title: Product Catalog
989author: John Doe
990date: 2024-01-01
991---
992
993This is the main catalog description.
994
995---
996SCOPE: products
997name: Widget A
998price: 19.99
999sku: WID-001
1000---
1001
1002The **Widget A** is our most popular product.
1003
1004---
1005SCOPE: products
1006name: Gadget B
1007price: 29.99
1008sku: GAD-002
1009---
1010
1011The **Gadget B** is perfect for professionals.
1012
1013---
1014SCOPE: reviews
1015product: Widget A
1016rating: 5
1017---
1018
1019"Excellent product! Highly recommended."
1020
1021---
1022SCOPE: reviews
1023product: Gadget B
1024rating: 4
1025---
1026
1027"Very good, but a bit pricey.""#;
1028
1029        let doc = decompose(markdown).unwrap();
1030
1031        // Verify global fields
1032        assert_eq!(
1033            doc.get_field("title").unwrap().as_str().unwrap(),
1034            "Product Catalog"
1035        );
1036        assert_eq!(
1037            doc.get_field("author").unwrap().as_str().unwrap(),
1038            "John Doe"
1039        );
1040        assert_eq!(
1041            doc.get_field("date").unwrap().as_str().unwrap(),
1042            "2024-01-01"
1043        );
1044
1045        // Verify global body
1046        assert!(doc.body().unwrap().contains("main catalog description"));
1047
1048        // Verify products collection
1049        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1050        assert_eq!(products.len(), 2);
1051
1052        let product1 = products[0].as_mapping().unwrap();
1053        assert_eq!(
1054            product1
1055                .get(&serde_yaml::Value::String("name".to_string()))
1056                .unwrap()
1057                .as_str()
1058                .unwrap(),
1059            "Widget A"
1060        );
1061        assert_eq!(
1062            product1
1063                .get(&serde_yaml::Value::String("price".to_string()))
1064                .unwrap()
1065                .as_f64()
1066                .unwrap(),
1067            19.99
1068        );
1069
1070        // Verify reviews collection
1071        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1072        assert_eq!(reviews.len(), 2);
1073
1074        let review1 = reviews[0].as_mapping().unwrap();
1075        assert_eq!(
1076            review1
1077                .get(&serde_yaml::Value::String("product".to_string()))
1078                .unwrap()
1079                .as_str()
1080                .unwrap(),
1081            "Widget A"
1082        );
1083        assert_eq!(
1084            review1
1085                .get(&serde_yaml::Value::String("rating".to_string()))
1086                .unwrap()
1087                .as_i64()
1088                .unwrap(),
1089            5
1090        );
1091
1092        // Total fields: title, author, date, body, products, reviews = 6
1093        assert_eq!(doc.fields().len(), 6);
1094    }
1095
1096    #[test]
1097    fn test_quill_directive() {
1098        let markdown = r#"---
1099QUILL: usaf_memo
1100memo_for: [ORG/SYMBOL]
1101memo_from: [ORG/SYMBOL]
1102---
1103
1104This is the memo body."#;
1105
1106        let doc = decompose(markdown).unwrap();
1107
1108        // Verify quill tag is set
1109        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1110
1111        // Verify fields from quill block become frontmatter
1112        assert_eq!(
1113            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1114                .as_str()
1115                .unwrap(),
1116            "ORG/SYMBOL"
1117        );
1118
1119        // Verify body
1120        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1121    }
1122
1123    #[test]
1124    fn test_quill_with_scope_blocks() {
1125        let markdown = r#"---
1126QUILL: document
1127title: Test Document
1128---
1129
1130Main body.
1131
1132---
1133SCOPE: sections
1134name: Section 1
1135---
1136
1137Section 1 body."#;
1138
1139        let doc = decompose(markdown).unwrap();
1140
1141        // Verify quill tag
1142        assert_eq!(doc.quill_tag(), Some("document"));
1143
1144        // Verify global field from quill block
1145        assert_eq!(
1146            doc.get_field("title").unwrap().as_str().unwrap(),
1147            "Test Document"
1148        );
1149
1150        // Verify scope blocks work
1151        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1152        assert_eq!(sections.len(), 1);
1153
1154        // Verify body
1155        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1156    }
1157
1158    #[test]
1159    fn test_multiple_quill_directives_error() {
1160        let markdown = r#"---
1161QUILL: first
1162---
1163
1164---
1165QUILL: second
1166---"#;
1167
1168        let result = decompose(markdown);
1169        assert!(result.is_err());
1170        assert!(result
1171            .unwrap_err()
1172            .to_string()
1173            .contains("Multiple quill directives"));
1174    }
1175
1176    #[test]
1177    fn test_invalid_quill_name() {
1178        let markdown = r#"---
1179QUILL: Invalid-Name
1180---"#;
1181
1182        let result = decompose(markdown);
1183        assert!(result.is_err());
1184        assert!(result
1185            .unwrap_err()
1186            .to_string()
1187            .contains("Invalid quill name"));
1188    }
1189
1190    #[test]
1191    fn test_quill_wrong_value_type() {
1192        let markdown = r#"---
1193QUILL: 123
1194---"#;
1195
1196        let result = decompose(markdown);
1197        assert!(result.is_err());
1198        assert!(result
1199            .unwrap_err()
1200            .to_string()
1201            .contains("QUILL value must be a string"));
1202    }
1203
1204    #[test]
1205    fn test_scope_wrong_value_type() {
1206        let markdown = r#"---
1207SCOPE: 123
1208---"#;
1209
1210        let result = decompose(markdown);
1211        assert!(result.is_err());
1212        assert!(result
1213            .unwrap_err()
1214            .to_string()
1215            .contains("SCOPE value must be a string"));
1216    }
1217
1218    #[test]
1219    fn test_both_quill_and_scope_error() {
1220        let markdown = r#"---
1221QUILL: test
1222SCOPE: items
1223---"#;
1224
1225        let result = decompose(markdown);
1226        assert!(result.is_err());
1227        assert!(result
1228            .unwrap_err()
1229            .to_string()
1230            .contains("Cannot specify both QUILL and SCOPE"));
1231    }
1232}
1233#[cfg(test)]
1234mod demo_file_test {
1235    use super::*;
1236
1237    #[test]
1238    fn test_extended_metadata_demo_file() {
1239        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1240        let doc = decompose(markdown).unwrap();
1241
1242        // Verify global fields
1243        assert_eq!(
1244            doc.get_field("title").unwrap().as_str().unwrap(),
1245            "Extended Metadata Demo"
1246        );
1247        assert_eq!(
1248            doc.get_field("author").unwrap().as_str().unwrap(),
1249            "Quillmark Team"
1250        );
1251        // version is parsed as a number by YAML
1252        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1253
1254        // Verify body
1255        assert!(doc
1256            .body()
1257            .unwrap()
1258            .contains("extended YAML metadata standard"));
1259
1260        // Verify features collection
1261        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1262        assert_eq!(features.len(), 3);
1263
1264        // Verify use_cases collection
1265        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1266        assert_eq!(use_cases.len(), 2);
1267
1268        // Check first feature
1269        let feature1 = features[0].as_mapping().unwrap();
1270        assert_eq!(
1271            feature1
1272                .get(&serde_yaml::Value::String("name".to_string()))
1273                .unwrap()
1274                .as_str()
1275                .unwrap(),
1276            "Tag Directives"
1277        );
1278    }
1279
1280    #[test]
1281    fn test_input_size_limit() {
1282        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1283        let size = crate::error::MAX_INPUT_SIZE + 1;
1284        let large_markdown = "a".repeat(size);
1285
1286        let result = decompose(&large_markdown);
1287        assert!(result.is_err());
1288
1289        let err_msg = result.unwrap_err().to_string();
1290        assert!(err_msg.contains("Input too large"));
1291    }
1292
1293    #[test]
1294    fn test_yaml_size_limit() {
1295        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1296        let mut markdown = String::from("---\n");
1297
1298        // Create a very large YAML field
1299        let size = crate::error::MAX_YAML_SIZE + 1;
1300        markdown.push_str("data: \"");
1301        markdown.push_str(&"x".repeat(size));
1302        markdown.push_str("\"\n---\n\nBody");
1303
1304        let result = decompose(&markdown);
1305        assert!(result.is_err());
1306
1307        let err_msg = result.unwrap_err().to_string();
1308        assert!(err_msg.contains("YAML block too large"));
1309    }
1310
1311    #[test]
1312    fn test_input_within_size_limit() {
1313        // Create markdown just under the limit
1314        let size = 1000; // Much smaller than limit
1315        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1316
1317        let result = decompose(&markdown);
1318        assert!(result.is_ok());
1319    }
1320
1321    #[test]
1322    fn test_yaml_within_size_limit() {
1323        // Create YAML block well within the limit
1324        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1325
1326        let result = decompose(&markdown);
1327        assert!(result.is_ok());
1328    }
1329}