quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Helper function to convert serde_yaml::Error with location extraction
57fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58    let mut msg = format!("{}: {}", context, e);
59
60    if let Some(loc) = e.location() {
61        msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62    }
63
64    msg
65}
66
67/// Reserved tag name for quill specification
68pub const QUILL_TAG: &str = "quill";
69
70/// A parsed markdown document with frontmatter
71#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73    fields: HashMap<String, QuillValue>,
74    quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78    /// Create a new ParsedDocument with the given fields
79    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80        Self {
81            fields,
82            quill_tag: None,
83        }
84    }
85
86    /// Create a ParsedDocument from fields and optional quill tag
87    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88        Self { fields, quill_tag }
89    }
90
91    /// Create a ParsedDocument from markdown string
92    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94    }
95
96    /// Get the quill tag if specified (from QUILL key)
97    pub fn quill_tag(&self) -> Option<&str> {
98        self.quill_tag.as_deref()
99    }
100
101    /// Get the document body
102    pub fn body(&self) -> Option<&str> {
103        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104    }
105
106    /// Get a specific field
107    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108        self.fields.get(name)
109    }
110
111    /// Get all fields (including body)
112    pub fn fields(&self) -> &HashMap<String, QuillValue> {
113        &self.fields
114    }
115
116    /// Create a new ParsedDocument with default values applied
117    ///
118    /// This method creates a new ParsedDocument with default values applied for any
119    /// fields that are missing from the original document but have defaults specified.
120    /// Existing fields are preserved and not overwritten.
121    ///
122    /// # Arguments
123    ///
124    /// * `defaults` - A HashMap of field names to their default QuillValues
125    ///
126    /// # Returns
127    ///
128    /// A new ParsedDocument with defaults applied for missing fields
129    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130        let mut fields = self.fields.clone();
131
132        for (field_name, default_value) in defaults {
133            // Only apply default if field is missing
134            if !fields.contains_key(field_name) {
135                fields.insert(field_name.clone(), default_value.clone());
136            }
137        }
138
139        Self {
140            fields,
141            quill_tag: self.quill_tag.clone(),
142        }
143    }
144}
145
146#[derive(Debug)]
147struct MetadataBlock {
148    start: usize, // Position of opening "---"
149    end: usize,   // Position after closing "---\n"
150    yaml_content: String,
151    tag: Option<String>,        // Field name from SCOPE key
152    quill_name: Option<String>, // Quill name from QUILL key
153}
154
155/// Validate tag name follows pattern [a-z_][a-z0-9_]*
156fn is_valid_tag_name(name: &str) -> bool {
157    if name.is_empty() {
158        return false;
159    }
160
161    let mut chars = name.chars();
162    let first = chars.next().unwrap();
163
164    if !first.is_ascii_lowercase() && first != '_' {
165        return false;
166    }
167
168    for ch in chars {
169        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
170            return false;
171        }
172    }
173
174    true
175}
176
177/// Find all metadata blocks in the document
178fn find_metadata_blocks(
179    markdown: &str,
180) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
181    let mut blocks = Vec::new();
182    let mut pos = 0;
183
184    while pos < markdown.len() {
185        // Look for opening "---\n" or "---\r\n"
186        let search_str = &markdown[pos..];
187        let delimiter_result = if let Some(p) = search_str.find("---\n") {
188            Some((p, 4, "\n"))
189        } else if let Some(p) = search_str.find("---\r\n") {
190            Some((p, 5, "\r\n"))
191        } else {
192            None
193        };
194
195        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
196            let abs_pos = pos + delimiter_pos;
197            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
198
199            // Check if this --- is a horizontal rule (blank lines above AND below)
200            let preceded_by_blank = if abs_pos > 0 {
201                // Check if there's a blank line before the ---
202                let before = &markdown[..abs_pos];
203                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
204            } else {
205                false
206            };
207
208            let followed_by_blank = if content_start < markdown.len() {
209                markdown[content_start..].starts_with('\n')
210                    || markdown[content_start..].starts_with("\r\n")
211            } else {
212                false
213            };
214
215            // Horizontal rule: blank lines both above and below
216            if preceded_by_blank && followed_by_blank {
217                // This is a horizontal rule in the body, skip it
218                pos = abs_pos + 3; // Skip past "---"
219                continue;
220            }
221
222            // Check if followed by non-blank line (or if we're at document start)
223            // This starts a metadata block
224            if followed_by_blank {
225                // --- followed by blank line but NOT preceded by blank line
226                // This is NOT a metadata block opening, skip it
227                pos = abs_pos + 3;
228                continue;
229            }
230
231            // Found potential metadata block opening (followed by non-blank line)
232            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
233            let rest = &markdown[content_start..];
234
235            // First try to find delimiters with trailing newlines
236            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
237            let closing_with_newline = closing_patterns
238                .iter()
239                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
240                .min_by_key(|(p, _)| *p);
241
242            // Also check for closing at end of document (no trailing newline)
243            let closing_at_eof = ["\n---", "\r\n---"]
244                .iter()
245                .filter_map(|delim| {
246                    rest.find(delim).and_then(|p| {
247                        if p + delim.len() == rest.len() {
248                            Some((p, delim.len()))
249                        } else {
250                            None
251                        }
252                    })
253                })
254                .min_by_key(|(p, _)| *p);
255
256            let closing_result = match (closing_with_newline, closing_at_eof) {
257                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
258                (Some(_), Some(_)) => closing_with_newline,
259                (Some(_), None) => closing_with_newline,
260                (None, Some(_)) => closing_at_eof,
261                (None, None) => None,
262            };
263
264            if let Some((closing_pos, closing_len)) = closing_result {
265                let abs_closing_pos = content_start + closing_pos;
266                let content = &markdown[content_start..abs_closing_pos];
267
268                // Check YAML size limit
269                if content.len() > crate::error::MAX_YAML_SIZE {
270                    return Err(format!(
271                        "YAML block too large: {} bytes (max: {} bytes)",
272                        content.len(),
273                        crate::error::MAX_YAML_SIZE
274                    )
275                    .into());
276                }
277
278                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
279                // First, try to parse as YAML
280                let (tag, quill_name, yaml_content) = if !content.is_empty() {
281                    // Try to parse the YAML to check for reserved keys
282                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
283                        Ok(yaml_value) => {
284                            if let Some(mapping) = yaml_value.as_mapping() {
285                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
286                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
287
288                                let has_quill = mapping.contains_key(&quill_key);
289                                let has_scope = mapping.contains_key(&scope_key);
290
291                                if has_quill && has_scope {
292                                    return Err(
293                                        "Cannot specify both QUILL and SCOPE in the same block"
294                                            .into(),
295                                    );
296                                }
297
298                                if has_quill {
299                                    // Extract quill name
300                                    let quill_value = mapping.get(&quill_key).unwrap();
301                                    let quill_name_str = quill_value
302                                        .as_str()
303                                        .ok_or_else(|| "QUILL value must be a string")?;
304
305                                    if !is_valid_tag_name(quill_name_str) {
306                                        return Err(format!(
307                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
308                                            quill_name_str
309                                        )
310                                        .into());
311                                    }
312
313                                    // Remove QUILL from the YAML content for processing
314                                    let mut new_mapping = mapping.clone();
315                                    new_mapping.remove(&quill_key);
316                                    let new_yaml = serde_yaml::to_string(&new_mapping)
317                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
318
319                                    (None, Some(quill_name_str.to_string()), new_yaml)
320                                } else if has_scope {
321                                    // Extract scope field name
322                                    let scope_value = mapping.get(&scope_key).unwrap();
323                                    let field_name = scope_value
324                                        .as_str()
325                                        .ok_or_else(|| "SCOPE value must be a string")?;
326
327                                    if !is_valid_tag_name(field_name) {
328                                        return Err(format!(
329                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
330                                            field_name
331                                        )
332                                        .into());
333                                    }
334
335                                    if field_name == BODY_FIELD {
336                                        return Err(format!(
337                                            "Cannot use reserved field name '{}' as SCOPE value",
338                                            BODY_FIELD
339                                        )
340                                        .into());
341                                    }
342
343                                    // Remove SCOPE from the YAML content for processing
344                                    let mut new_mapping = mapping.clone();
345                                    new_mapping.remove(&scope_key);
346                                    let new_yaml = serde_yaml::to_string(&new_mapping)
347                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
348
349                                    (Some(field_name.to_string()), None, new_yaml)
350                                } else {
351                                    // No reserved keys, treat as normal YAML
352                                    (None, None, content.to_string())
353                                }
354                            } else {
355                                // Not a mapping, treat as normal YAML
356                                (None, None, content.to_string())
357                            }
358                        }
359                        Err(_) => {
360                            // If YAML parsing fails here, we'll catch it later
361                            (None, None, content.to_string())
362                        }
363                    }
364                } else {
365                    (None, None, content.to_string())
366                };
367
368                blocks.push(MetadataBlock {
369                    start: abs_pos,
370                    end: abs_closing_pos + closing_len, // After closing delimiter
371                    yaml_content,
372                    tag,
373                    quill_name,
374                });
375
376                pos = abs_closing_pos + closing_len;
377            } else if abs_pos == 0 {
378                // Frontmatter started but not closed
379                return Err("Frontmatter started but not closed with ---".into());
380            } else {
381                // Not a valid metadata block, skip this position
382                pos = abs_pos + 3;
383            }
384        } else {
385            break;
386        }
387    }
388
389    Ok(blocks)
390}
391
392/// Decompose markdown into frontmatter fields and body
393fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
394    // Check input size limit
395    if markdown.len() > crate::error::MAX_INPUT_SIZE {
396        return Err(format!(
397            "Input too large: {} bytes (max: {} bytes)",
398            markdown.len(),
399            crate::error::MAX_INPUT_SIZE
400        )
401        .into());
402    }
403
404    let mut fields = HashMap::new();
405
406    // Find all metadata blocks
407    let blocks = find_metadata_blocks(markdown)?;
408
409    if blocks.is_empty() {
410        // No metadata blocks, entire content is body
411        fields.insert(
412            BODY_FIELD.to_string(),
413            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
414        );
415        return Ok(ParsedDocument::new(fields));
416    }
417
418    // Track which attributes are used for tagged blocks
419    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
420    let mut has_global_frontmatter = false;
421    let mut global_frontmatter_index: Option<usize> = None;
422    let mut quill_name: Option<String> = None;
423
424    // First pass: identify global frontmatter, quill directive, and validate
425    for (idx, block) in blocks.iter().enumerate() {
426        // Check for quill directive
427        if let Some(ref name) = block.quill_name {
428            if quill_name.is_some() {
429                return Err("Multiple quill directives found: only one allowed".into());
430            }
431            quill_name = Some(name.clone());
432        }
433
434        // Check for global frontmatter (no tag and no quill directive)
435        if block.tag.is_none() && block.quill_name.is_none() {
436            if has_global_frontmatter {
437                return Err(
438                    "Multiple global frontmatter blocks found: only one untagged block allowed"
439                        .into(),
440                );
441            }
442            has_global_frontmatter = true;
443            global_frontmatter_index = Some(idx);
444        }
445    }
446
447    // Parse global frontmatter if present
448    if let Some(idx) = global_frontmatter_index {
449        let block = &blocks[idx];
450
451        // Parse YAML frontmatter
452        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
453            HashMap::new()
454        } else {
455            serde_yaml::from_str(&block.yaml_content)
456                .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
457        };
458
459        // Check that all tagged blocks don't conflict with global fields
460        // Exception: if the global field is an array, allow it (we'll merge later)
461        for other_block in &blocks {
462            if let Some(ref tag) = other_block.tag {
463                if let Some(global_value) = yaml_fields.get(tag) {
464                    // Check if the global value is an array
465                    if global_value.as_sequence().is_none() {
466                        return Err(format!(
467                            "Name collision: global field '{}' conflicts with tagged attribute",
468                            tag
469                        )
470                        .into());
471                    }
472                }
473            }
474        }
475
476        // Convert YAML values to QuillValue at boundary
477        for (key, value) in yaml_fields {
478            fields.insert(key, QuillValue::from_yaml(value)?);
479        }
480    }
481
482    // Process blocks with quill directives
483    for block in &blocks {
484        if block.quill_name.is_some() {
485            // Quill directive blocks can have YAML content (becomes part of frontmatter)
486            if !block.yaml_content.is_empty() {
487                let yaml_fields: HashMap<String, serde_yaml::Value> =
488                    serde_yaml::from_str(&block.yaml_content)
489                        .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
490
491                // Check for conflicts with existing fields
492                for key in yaml_fields.keys() {
493                    if fields.contains_key(key) {
494                        return Err(format!(
495                            "Name collision: quill block field '{}' conflicts with existing field",
496                            key
497                        )
498                        .into());
499                    }
500                }
501
502                // Convert YAML values to QuillValue at boundary
503                for (key, value) in yaml_fields {
504                    fields.insert(key, QuillValue::from_yaml(value)?);
505                }
506            }
507        }
508    }
509
510    // Parse tagged blocks
511    for (idx, block) in blocks.iter().enumerate() {
512        if let Some(ref tag_name) = block.tag {
513            // Check if this conflicts with global fields
514            // Exception: if the global field is an array, allow it (we'll merge later)
515            if let Some(existing_value) = fields.get(tag_name) {
516                if existing_value.as_array().is_none() {
517                    return Err(format!(
518                        "Name collision: tagged attribute '{}' conflicts with global field",
519                        tag_name
520                    )
521                    .into());
522                }
523            }
524
525            // Parse YAML metadata
526            let mut item_fields: HashMap<String, serde_yaml::Value> = if block
527                .yaml_content
528                .is_empty()
529            {
530                HashMap::new()
531            } else {
532                serde_yaml::from_str(&block.yaml_content).map_err(|e| {
533                    yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
534                })?
535            };
536
537            // Extract body for this tagged block
538            let body_start = block.end;
539            let body_end = if idx + 1 < blocks.len() {
540                blocks[idx + 1].start
541            } else {
542                markdown.len()
543            };
544            let body = &markdown[body_start..body_end];
545
546            // Add body to item fields
547            item_fields.insert(
548                BODY_FIELD.to_string(),
549                serde_yaml::Value::String(body.to_string()),
550            );
551
552            // Convert HashMap to serde_yaml::Value::Mapping
553            let item_value = serde_yaml::to_value(item_fields)?;
554
555            // Add to collection
556            tagged_attributes
557                .entry(tag_name.clone())
558                .or_insert_with(Vec::new)
559                .push(item_value);
560        }
561    }
562
563    // Extract global body
564    // Body starts after global frontmatter or quill block (whichever comes first)
565    // Body ends at the first scope block or EOF
566    let first_non_scope_block_idx = blocks
567        .iter()
568        .position(|b| b.tag.is_none() && b.quill_name.is_none())
569        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
570
571    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
572        // Body starts after the first non-scope block (global frontmatter or quill)
573        let start = blocks[idx].end;
574
575        // Body ends at the first scope block after this, or EOF
576        let end = blocks
577            .iter()
578            .skip(idx + 1)
579            .find(|b| b.tag.is_some())
580            .map(|b| b.start)
581            .unwrap_or(markdown.len());
582
583        (start, end)
584    } else {
585        // No global frontmatter or quill block - body is everything before the first scope block
586        let end = blocks
587            .iter()
588            .find(|b| b.tag.is_some())
589            .map(|b| b.start)
590            .unwrap_or(0);
591
592        (0, end)
593    };
594
595    let global_body = &markdown[body_start..body_end];
596
597    fields.insert(
598        BODY_FIELD.to_string(),
599        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
600    );
601
602    // Add all tagged collections to fields (convert to QuillValue)
603    // If a field already exists and is an array, merge the new items into it
604    for (tag_name, items) in tagged_attributes {
605        if let Some(existing_value) = fields.get(&tag_name) {
606            // The existing value must be an array (checked earlier)
607            if let Some(existing_array) = existing_value.as_array() {
608                // Convert new items from YAML to JSON
609                let new_items_json: Vec<serde_json::Value> = items
610                    .into_iter()
611                    .map(|yaml_val| {
612                        serde_json::to_value(&yaml_val)
613                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
614                    })
615                    .collect::<Result<Vec<_>, _>>()?;
616
617                // Combine existing and new items
618                let mut merged_array = existing_array.clone();
619                merged_array.extend(new_items_json);
620
621                // Create QuillValue from merged JSON array
622                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
623                fields.insert(tag_name, quill_value);
624            } else {
625                // This should not happen due to earlier validation, but handle it gracefully
626                return Err(format!(
627                    "Internal error: field '{}' exists but is not an array",
628                    tag_name
629                )
630                .into());
631            }
632        } else {
633            // No existing field, just create a new sequence
634            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
635            fields.insert(tag_name, quill_value);
636        }
637    }
638
639    let mut parsed = ParsedDocument::new(fields);
640
641    // Set quill tag if present
642    if let Some(name) = quill_name {
643        parsed.quill_tag = Some(name);
644    }
645
646    Ok(parsed)
647}
648
649#[cfg(test)]
650mod tests {
651    use super::*;
652
653    #[test]
654    fn test_no_frontmatter() {
655        let markdown = "# Hello World\n\nThis is a test.";
656        let doc = decompose(markdown).unwrap();
657
658        assert_eq!(doc.body(), Some(markdown));
659        assert_eq!(doc.fields().len(), 1);
660    }
661
662    #[test]
663    fn test_with_frontmatter() {
664        let markdown = r#"---
665title: Test Document
666author: Test Author
667---
668
669# Hello World
670
671This is the body."#;
672
673        let doc = decompose(markdown).unwrap();
674
675        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
676        assert_eq!(
677            doc.get_field("title").unwrap().as_str().unwrap(),
678            "Test Document"
679        );
680        assert_eq!(
681            doc.get_field("author").unwrap().as_str().unwrap(),
682            "Test Author"
683        );
684        assert_eq!(doc.fields().len(), 3); // title, author, body
685    }
686
687    #[test]
688    fn test_complex_yaml_frontmatter() {
689        let markdown = r#"---
690title: Complex Document
691tags:
692  - test
693  - yaml
694metadata:
695  version: 1.0
696  nested:
697    field: value
698---
699
700Content here."#;
701
702        let doc = decompose(markdown).unwrap();
703
704        assert_eq!(doc.body(), Some("\nContent here."));
705        assert_eq!(
706            doc.get_field("title").unwrap().as_str().unwrap(),
707            "Complex Document"
708        );
709
710        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
711        assert_eq!(tags.len(), 2);
712        assert_eq!(tags[0].as_str().unwrap(), "test");
713        assert_eq!(tags[1].as_str().unwrap(), "yaml");
714    }
715
716    #[test]
717    fn test_with_defaults_empty_document() {
718        use std::collections::HashMap;
719
720        let mut defaults = HashMap::new();
721        defaults.insert(
722            "status".to_string(),
723            QuillValue::from_json(serde_json::json!("draft")),
724        );
725        defaults.insert(
726            "version".to_string(),
727            QuillValue::from_json(serde_json::json!(1)),
728        );
729
730        // Create an empty parsed document
731        let doc = ParsedDocument::new(HashMap::new());
732        let doc_with_defaults = doc.with_defaults(&defaults);
733
734        // Check that defaults were applied
735        assert_eq!(
736            doc_with_defaults
737                .get_field("status")
738                .unwrap()
739                .as_str()
740                .unwrap(),
741            "draft"
742        );
743        assert_eq!(
744            doc_with_defaults
745                .get_field("version")
746                .unwrap()
747                .as_number()
748                .unwrap()
749                .as_i64()
750                .unwrap(),
751            1
752        );
753    }
754
755    #[test]
756    fn test_with_defaults_preserves_existing_values() {
757        use std::collections::HashMap;
758
759        let mut defaults = HashMap::new();
760        defaults.insert(
761            "status".to_string(),
762            QuillValue::from_json(serde_json::json!("draft")),
763        );
764
765        // Create document with existing status
766        let mut fields = HashMap::new();
767        fields.insert(
768            "status".to_string(),
769            QuillValue::from_json(serde_json::json!("published")),
770        );
771        let doc = ParsedDocument::new(fields);
772
773        let doc_with_defaults = doc.with_defaults(&defaults);
774
775        // Existing value should be preserved
776        assert_eq!(
777            doc_with_defaults
778                .get_field("status")
779                .unwrap()
780                .as_str()
781                .unwrap(),
782            "published"
783        );
784    }
785
786    #[test]
787    fn test_with_defaults_partial_application() {
788        use std::collections::HashMap;
789
790        let mut defaults = HashMap::new();
791        defaults.insert(
792            "status".to_string(),
793            QuillValue::from_json(serde_json::json!("draft")),
794        );
795        defaults.insert(
796            "version".to_string(),
797            QuillValue::from_json(serde_json::json!(1)),
798        );
799
800        // Create document with only one field
801        let mut fields = HashMap::new();
802        fields.insert(
803            "status".to_string(),
804            QuillValue::from_json(serde_json::json!("published")),
805        );
806        let doc = ParsedDocument::new(fields);
807
808        let doc_with_defaults = doc.with_defaults(&defaults);
809
810        // Existing field preserved, missing field gets default
811        assert_eq!(
812            doc_with_defaults
813                .get_field("status")
814                .unwrap()
815                .as_str()
816                .unwrap(),
817            "published"
818        );
819        assert_eq!(
820            doc_with_defaults
821                .get_field("version")
822                .unwrap()
823                .as_number()
824                .unwrap()
825                .as_i64()
826                .unwrap(),
827            1
828        );
829    }
830
831    #[test]
832    fn test_with_defaults_no_defaults() {
833        use std::collections::HashMap;
834
835        let defaults = HashMap::new(); // Empty defaults map
836
837        let doc = ParsedDocument::new(HashMap::new());
838        let doc_with_defaults = doc.with_defaults(&defaults);
839
840        // No defaults should be applied
841        assert!(doc_with_defaults.fields().is_empty());
842    }
843
844    #[test]
845    fn test_with_defaults_complex_types() {
846        use std::collections::HashMap;
847
848        let mut defaults = HashMap::new();
849        defaults.insert(
850            "tags".to_string(),
851            QuillValue::from_json(serde_json::json!(["default", "tag"])),
852        );
853
854        let doc = ParsedDocument::new(HashMap::new());
855        let doc_with_defaults = doc.with_defaults(&defaults);
856
857        // Complex default value should be applied
858        let tags = doc_with_defaults
859            .get_field("tags")
860            .unwrap()
861            .as_sequence()
862            .unwrap();
863        assert_eq!(tags.len(), 2);
864        assert_eq!(tags[0].as_str().unwrap(), "default");
865        assert_eq!(tags[1].as_str().unwrap(), "tag");
866    }
867
868    #[test]
869    fn test_invalid_yaml() {
870        let markdown = r#"---
871title: [invalid yaml
872author: missing close bracket
873---
874
875Content here."#;
876
877        let result = decompose(markdown);
878        assert!(result.is_err());
879        assert!(result
880            .unwrap_err()
881            .to_string()
882            .contains("Invalid YAML frontmatter"));
883    }
884
885    #[test]
886    fn test_unclosed_frontmatter() {
887        let markdown = r#"---
888title: Test
889author: Test Author
890
891Content without closing ---"#;
892
893        let result = decompose(markdown);
894        assert!(result.is_err());
895        assert!(result.unwrap_err().to_string().contains("not closed"));
896    }
897
898    // Extended metadata tests
899
900    #[test]
901    fn test_basic_tagged_block() {
902        let markdown = r#"---
903title: Main Document
904---
905
906Main body content.
907
908---
909SCOPE: items
910name: Item 1
911---
912
913Body of item 1."#;
914
915        let doc = decompose(markdown).unwrap();
916
917        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
918        assert_eq!(
919            doc.get_field("title").unwrap().as_str().unwrap(),
920            "Main Document"
921        );
922
923        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
924        assert_eq!(items.len(), 1);
925
926        let item = items[0].as_object().unwrap();
927        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
928        assert_eq!(
929            item.get("body").unwrap().as_str().unwrap(),
930            "\nBody of item 1."
931        );
932    }
933
934    #[test]
935    fn test_multiple_tagged_blocks() {
936        let markdown = r#"---
937SCOPE: items
938name: Item 1
939tags: [a, b]
940---
941
942First item body.
943
944---
945SCOPE: items
946name: Item 2
947tags: [c, d]
948---
949
950Second item body."#;
951
952        let doc = decompose(markdown).unwrap();
953
954        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
955        assert_eq!(items.len(), 2);
956
957        let item1 = items[0].as_object().unwrap();
958        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
959
960        let item2 = items[1].as_object().unwrap();
961        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
962    }
963
964    #[test]
965    fn test_mixed_global_and_tagged() {
966        let markdown = r#"---
967title: Global
968author: John Doe
969---
970
971Global body.
972
973---
974SCOPE: sections
975title: Section 1
976---
977
978Section 1 content.
979
980---
981SCOPE: sections
982title: Section 2
983---
984
985Section 2 content."#;
986
987        let doc = decompose(markdown).unwrap();
988
989        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
990        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
991
992        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
993        assert_eq!(sections.len(), 2);
994    }
995
996    #[test]
997    fn test_empty_tagged_metadata() {
998        let markdown = r#"---
999SCOPE: items
1000---
1001
1002Body without metadata."#;
1003
1004        let doc = decompose(markdown).unwrap();
1005
1006        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1007        assert_eq!(items.len(), 1);
1008
1009        let item = items[0].as_object().unwrap();
1010        assert_eq!(
1011            item.get("body").unwrap().as_str().unwrap(),
1012            "\nBody without metadata."
1013        );
1014    }
1015
1016    #[test]
1017    fn test_tagged_block_without_body() {
1018        let markdown = r#"---
1019SCOPE: items
1020name: Item
1021---"#;
1022
1023        let doc = decompose(markdown).unwrap();
1024
1025        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1026        assert_eq!(items.len(), 1);
1027
1028        let item = items[0].as_object().unwrap();
1029        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1030    }
1031
1032    #[test]
1033    fn test_name_collision_global_and_tagged() {
1034        let markdown = r#"---
1035items: "global value"
1036---
1037
1038Body
1039
1040---
1041SCOPE: items
1042name: Item
1043---
1044
1045Item body"#;
1046
1047        let result = decompose(markdown);
1048        assert!(result.is_err());
1049        assert!(result.unwrap_err().to_string().contains("collision"));
1050    }
1051
1052    #[test]
1053    fn test_global_array_merged_with_scope() {
1054        // When global frontmatter has an array field with the same name as a SCOPE,
1055        // the SCOPE items should be added to the array
1056        let markdown = r#"---
1057items:
1058  - name: Global Item 1
1059    value: 100
1060  - name: Global Item 2
1061    value: 200
1062---
1063
1064Global body
1065
1066---
1067SCOPE: items
1068name: Scope Item 1
1069value: 300
1070---
1071
1072Scope item 1 body
1073
1074---
1075SCOPE: items
1076name: Scope Item 2
1077value: 400
1078---
1079
1080Scope item 2 body"#;
1081
1082        let doc = decompose(markdown).unwrap();
1083
1084        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1085        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1086        assert_eq!(items.len(), 4);
1087
1088        // Verify first two items (from global array)
1089        let item1 = items[0].as_object().unwrap();
1090        assert_eq!(
1091            item1.get("name").unwrap().as_str().unwrap(),
1092            "Global Item 1"
1093        );
1094        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1095
1096        let item2 = items[1].as_object().unwrap();
1097        assert_eq!(
1098            item2.get("name").unwrap().as_str().unwrap(),
1099            "Global Item 2"
1100        );
1101        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1102
1103        // Verify last two items (from SCOPE blocks)
1104        let item3 = items[2].as_object().unwrap();
1105        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1106        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1107        assert_eq!(
1108            item3.get("body").unwrap().as_str().unwrap(),
1109            "\nScope item 1 body\n\n"
1110        );
1111
1112        let item4 = items[3].as_object().unwrap();
1113        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1114        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1115        assert_eq!(
1116            item4.get("body").unwrap().as_str().unwrap(),
1117            "\nScope item 2 body"
1118        );
1119    }
1120
1121    #[test]
1122    fn test_empty_global_array_with_scope() {
1123        // Edge case: global frontmatter has an empty array
1124        let markdown = r#"---
1125items: []
1126---
1127
1128Global body
1129
1130---
1131SCOPE: items
1132name: Item 1
1133---
1134
1135Item 1 body"#;
1136
1137        let doc = decompose(markdown).unwrap();
1138
1139        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1140        assert_eq!(items.len(), 1);
1141
1142        let item = items[0].as_object().unwrap();
1143        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1144    }
1145
1146    #[test]
1147    fn test_reserved_field_name() {
1148        let markdown = r#"---
1149SCOPE: body
1150content: Test
1151---"#;
1152
1153        let result = decompose(markdown);
1154        assert!(result.is_err());
1155        assert!(result.unwrap_err().to_string().contains("reserved"));
1156    }
1157
1158    #[test]
1159    fn test_invalid_tag_syntax() {
1160        let markdown = r#"---
1161SCOPE: Invalid-Name
1162title: Test
1163---"#;
1164
1165        let result = decompose(markdown);
1166        assert!(result.is_err());
1167        assert!(result
1168            .unwrap_err()
1169            .to_string()
1170            .contains("Invalid field name"));
1171    }
1172
1173    #[test]
1174    fn test_multiple_global_frontmatter_blocks() {
1175        let markdown = r#"---
1176title: First
1177---
1178
1179Body
1180
1181---
1182author: Second
1183---
1184
1185More body"#;
1186
1187        let result = decompose(markdown);
1188        assert!(result.is_err());
1189        assert!(result
1190            .unwrap_err()
1191            .to_string()
1192            .contains("Multiple global frontmatter"));
1193    }
1194
1195    #[test]
1196    fn test_adjacent_blocks_different_tags() {
1197        let markdown = r#"---
1198SCOPE: items
1199name: Item 1
1200---
1201
1202Item 1 body
1203
1204---
1205SCOPE: sections
1206title: Section 1
1207---
1208
1209Section 1 body"#;
1210
1211        let doc = decompose(markdown).unwrap();
1212
1213        assert!(doc.get_field("items").is_some());
1214        assert!(doc.get_field("sections").is_some());
1215
1216        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1217        assert_eq!(items.len(), 1);
1218
1219        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1220        assert_eq!(sections.len(), 1);
1221    }
1222
1223    #[test]
1224    fn test_order_preservation() {
1225        let markdown = r#"---
1226SCOPE: items
1227id: 1
1228---
1229
1230First
1231
1232---
1233SCOPE: items
1234id: 2
1235---
1236
1237Second
1238
1239---
1240SCOPE: items
1241id: 3
1242---
1243
1244Third"#;
1245
1246        let doc = decompose(markdown).unwrap();
1247
1248        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1249        assert_eq!(items.len(), 3);
1250
1251        for (i, item) in items.iter().enumerate() {
1252            let mapping = item.as_object().unwrap();
1253            let id = mapping.get("id").unwrap().as_i64().unwrap();
1254            assert_eq!(id, (i + 1) as i64);
1255        }
1256    }
1257
1258    #[test]
1259    fn test_product_catalog_integration() {
1260        let markdown = r#"---
1261title: Product Catalog
1262author: John Doe
1263date: 2024-01-01
1264---
1265
1266This is the main catalog description.
1267
1268---
1269SCOPE: products
1270name: Widget A
1271price: 19.99
1272sku: WID-001
1273---
1274
1275The **Widget A** is our most popular product.
1276
1277---
1278SCOPE: products
1279name: Gadget B
1280price: 29.99
1281sku: GAD-002
1282---
1283
1284The **Gadget B** is perfect for professionals.
1285
1286---
1287SCOPE: reviews
1288product: Widget A
1289rating: 5
1290---
1291
1292"Excellent product! Highly recommended."
1293
1294---
1295SCOPE: reviews
1296product: Gadget B
1297rating: 4
1298---
1299
1300"Very good, but a bit pricey.""#;
1301
1302        let doc = decompose(markdown).unwrap();
1303
1304        // Verify global fields
1305        assert_eq!(
1306            doc.get_field("title").unwrap().as_str().unwrap(),
1307            "Product Catalog"
1308        );
1309        assert_eq!(
1310            doc.get_field("author").unwrap().as_str().unwrap(),
1311            "John Doe"
1312        );
1313        assert_eq!(
1314            doc.get_field("date").unwrap().as_str().unwrap(),
1315            "2024-01-01"
1316        );
1317
1318        // Verify global body
1319        assert!(doc.body().unwrap().contains("main catalog description"));
1320
1321        // Verify products collection
1322        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1323        assert_eq!(products.len(), 2);
1324
1325        let product1 = products[0].as_object().unwrap();
1326        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1327        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1328
1329        // Verify reviews collection
1330        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1331        assert_eq!(reviews.len(), 2);
1332
1333        let review1 = reviews[0].as_object().unwrap();
1334        assert_eq!(
1335            review1.get("product").unwrap().as_str().unwrap(),
1336            "Widget A"
1337        );
1338        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1339
1340        // Total fields: title, author, date, body, products, reviews = 6
1341        assert_eq!(doc.fields().len(), 6);
1342    }
1343
1344    #[test]
1345    fn taro_quill_directive() {
1346        let markdown = r#"---
1347QUILL: usaf_memo
1348memo_for: [ORG/SYMBOL]
1349memo_from: [ORG/SYMBOL]
1350---
1351
1352This is the memo body."#;
1353
1354        let doc = decompose(markdown).unwrap();
1355
1356        // Verify quill tag is set
1357        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1358
1359        // Verify fields from quill block become frontmatter
1360        assert_eq!(
1361            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1362                .as_str()
1363                .unwrap(),
1364            "ORG/SYMBOL"
1365        );
1366
1367        // Verify body
1368        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1369    }
1370
1371    #[test]
1372    fn test_quill_with_scope_blocks() {
1373        let markdown = r#"---
1374QUILL: document
1375title: Test Document
1376---
1377
1378Main body.
1379
1380---
1381SCOPE: sections
1382name: Section 1
1383---
1384
1385Section 1 body."#;
1386
1387        let doc = decompose(markdown).unwrap();
1388
1389        // Verify quill tag
1390        assert_eq!(doc.quill_tag(), Some("document"));
1391
1392        // Verify global field from quill block
1393        assert_eq!(
1394            doc.get_field("title").unwrap().as_str().unwrap(),
1395            "Test Document"
1396        );
1397
1398        // Verify scope blocks work
1399        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1400        assert_eq!(sections.len(), 1);
1401
1402        // Verify body
1403        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1404    }
1405
1406    #[test]
1407    fn test_multiple_quill_directives_error() {
1408        let markdown = r#"---
1409QUILL: first
1410---
1411
1412---
1413QUILL: second
1414---"#;
1415
1416        let result = decompose(markdown);
1417        assert!(result.is_err());
1418        assert!(result
1419            .unwrap_err()
1420            .to_string()
1421            .contains("Multiple quill directives"));
1422    }
1423
1424    #[test]
1425    fn test_invalid_quill_name() {
1426        let markdown = r#"---
1427QUILL: Invalid-Name
1428---"#;
1429
1430        let result = decompose(markdown);
1431        assert!(result.is_err());
1432        assert!(result
1433            .unwrap_err()
1434            .to_string()
1435            .contains("Invalid quill name"));
1436    }
1437
1438    #[test]
1439    fn test_quill_wrong_value_type() {
1440        let markdown = r#"---
1441QUILL: 123
1442---"#;
1443
1444        let result = decompose(markdown);
1445        assert!(result.is_err());
1446        assert!(result
1447            .unwrap_err()
1448            .to_string()
1449            .contains("QUILL value must be a string"));
1450    }
1451
1452    #[test]
1453    fn test_scope_wrong_value_type() {
1454        let markdown = r#"---
1455SCOPE: 123
1456---"#;
1457
1458        let result = decompose(markdown);
1459        assert!(result.is_err());
1460        assert!(result
1461            .unwrap_err()
1462            .to_string()
1463            .contains("SCOPE value must be a string"));
1464    }
1465
1466    #[test]
1467    fn test_both_quill_and_scope_error() {
1468        let markdown = r#"---
1469QUILL: test
1470SCOPE: items
1471---"#;
1472
1473        let result = decompose(markdown);
1474        assert!(result.is_err());
1475        assert!(result
1476            .unwrap_err()
1477            .to_string()
1478            .contains("Cannot specify both QUILL and SCOPE"));
1479    }
1480
1481    #[test]
1482    fn test_blank_lines_in_frontmatter() {
1483        // New parsing standard: blank lines are allowed within YAML blocks
1484        let markdown = r#"---
1485title: Test Document
1486author: Test Author
1487
1488description: This has a blank line above it
1489tags:
1490  - one
1491  - two
1492---
1493
1494# Hello World
1495
1496This is the body."#;
1497
1498        let doc = decompose(markdown).unwrap();
1499
1500        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1501        assert_eq!(
1502            doc.get_field("title").unwrap().as_str().unwrap(),
1503            "Test Document"
1504        );
1505        assert_eq!(
1506            doc.get_field("author").unwrap().as_str().unwrap(),
1507            "Test Author"
1508        );
1509        assert_eq!(
1510            doc.get_field("description").unwrap().as_str().unwrap(),
1511            "This has a blank line above it"
1512        );
1513
1514        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1515        assert_eq!(tags.len(), 2);
1516    }
1517
1518    #[test]
1519    fn test_blank_lines_in_scope_blocks() {
1520        // Blank lines should be allowed in SCOPE blocks too
1521        let markdown = r#"---
1522SCOPE: items
1523name: Item 1
1524
1525price: 19.99
1526
1527tags:
1528  - electronics
1529  - gadgets
1530---
1531
1532Body of item 1."#;
1533
1534        let doc = decompose(markdown).unwrap();
1535
1536        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1537        assert_eq!(items.len(), 1);
1538
1539        let item = items[0].as_object().unwrap();
1540        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1541        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1542
1543        let tags = item.get("tags").unwrap().as_array().unwrap();
1544        assert_eq!(tags.len(), 2);
1545    }
1546
1547    #[test]
1548    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1549        // Horizontal rule: blank lines both above AND below the ---
1550        let markdown = r#"---
1551title: Test
1552---
1553
1554First paragraph.
1555
1556---
1557
1558Second paragraph."#;
1559
1560        let doc = decompose(markdown).unwrap();
1561
1562        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1563
1564        // The body should contain the horizontal rule (---) as part of the content
1565        let body = doc.body().unwrap();
1566        assert!(body.contains("First paragraph."));
1567        assert!(body.contains("---"));
1568        assert!(body.contains("Second paragraph."));
1569    }
1570
1571    #[test]
1572    fn test_horizontal_rule_not_preceded_by_blank() {
1573        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1574        // It's also NOT a valid metadata block opening (since it's followed by blank)
1575        let markdown = r#"---
1576title: Test
1577---
1578
1579First paragraph.
1580---
1581
1582Second paragraph."#;
1583
1584        let doc = decompose(markdown).unwrap();
1585
1586        let body = doc.body().unwrap();
1587        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1588        assert!(body.contains("---"));
1589    }
1590
1591    #[test]
1592    fn test_multiple_blank_lines_in_yaml() {
1593        // Multiple blank lines should also be allowed
1594        let markdown = r#"---
1595title: Test
1596
1597
1598author: John Doe
1599
1600
1601version: 1.0
1602---
1603
1604Body content."#;
1605
1606        let doc = decompose(markdown).unwrap();
1607
1608        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1609        assert_eq!(
1610            doc.get_field("author").unwrap().as_str().unwrap(),
1611            "John Doe"
1612        );
1613        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1614    }
1615}
1616#[cfg(test)]
1617mod demo_file_test {
1618    use super::*;
1619
1620    #[test]
1621    fn test_extended_metadata_demo_file() {
1622        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1623        let doc = decompose(markdown).unwrap();
1624
1625        // Verify global fields
1626        assert_eq!(
1627            doc.get_field("title").unwrap().as_str().unwrap(),
1628            "Extended Metadata Demo"
1629        );
1630        assert_eq!(
1631            doc.get_field("author").unwrap().as_str().unwrap(),
1632            "Quillmark Team"
1633        );
1634        // version is parsed as a number by YAML
1635        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1636
1637        // Verify body
1638        assert!(doc
1639            .body()
1640            .unwrap()
1641            .contains("extended YAML metadata standard"));
1642
1643        // Verify features collection
1644        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1645        assert_eq!(features.len(), 3);
1646
1647        // Verify use_cases collection
1648        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1649        assert_eq!(use_cases.len(), 2);
1650
1651        // Check first feature
1652        let feature1 = features[0].as_object().unwrap();
1653        assert_eq!(
1654            feature1.get("name").unwrap().as_str().unwrap(),
1655            "Tag Directives"
1656        );
1657    }
1658
1659    #[test]
1660    fn test_input_size_limit() {
1661        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1662        let size = crate::error::MAX_INPUT_SIZE + 1;
1663        let large_markdown = "a".repeat(size);
1664
1665        let result = decompose(&large_markdown);
1666        assert!(result.is_err());
1667
1668        let err_msg = result.unwrap_err().to_string();
1669        assert!(err_msg.contains("Input too large"));
1670    }
1671
1672    #[test]
1673    fn test_yaml_size_limit() {
1674        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1675        let mut markdown = String::from("---\n");
1676
1677        // Create a very large YAML field
1678        let size = crate::error::MAX_YAML_SIZE + 1;
1679        markdown.push_str("data: \"");
1680        markdown.push_str(&"x".repeat(size));
1681        markdown.push_str("\"\n---\n\nBody");
1682
1683        let result = decompose(&markdown);
1684        assert!(result.is_err());
1685
1686        let err_msg = result.unwrap_err().to_string();
1687        assert!(err_msg.contains("YAML block too large"));
1688    }
1689
1690    #[test]
1691    fn test_input_within_size_limit() {
1692        // Create markdown just under the limit
1693        let size = 1000; // Much smaller than limit
1694        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1695
1696        let result = decompose(&markdown);
1697        assert!(result.is_ok());
1698    }
1699
1700    #[test]
1701    fn test_yaml_within_size_limit() {
1702        // Create YAML block well within the limit
1703        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1704
1705        let result = decompose(&markdown);
1706        assert!(result.is_ok());
1707    }
1708}