quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Helper function to convert serde_yaml::Error with location extraction
57fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58    let mut msg = format!("{}: {}", context, e);
59
60    if let Some(loc) = e.location() {
61        msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62    }
63
64    msg
65}
66
67/// Reserved tag name for quill specification
68pub const QUILL_TAG: &str = "quill";
69
70/// A parsed markdown document with frontmatter
71#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73    fields: HashMap<String, QuillValue>,
74    quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78    /// Create a new ParsedDocument with the given fields
79    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80        Self {
81            fields,
82            quill_tag: None,
83        }
84    }
85
86    /// Create a ParsedDocument from fields and optional quill tag
87    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88        Self { fields, quill_tag }
89    }
90
91    /// Create a ParsedDocument from markdown string
92    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94    }
95
96    /// Get the quill tag if specified (from QUILL key)
97    pub fn quill_tag(&self) -> Option<&str> {
98        self.quill_tag.as_deref()
99    }
100
101    /// Get the document body
102    pub fn body(&self) -> Option<&str> {
103        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104    }
105
106    /// Get a specific field
107    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108        self.fields.get(name)
109    }
110
111    /// Get all fields (including body)
112    pub fn fields(&self) -> &HashMap<String, QuillValue> {
113        &self.fields
114    }
115
116    /// Create a new ParsedDocument with default values applied
117    ///
118    /// This method creates a new ParsedDocument with default values applied for any
119    /// fields that are missing from the original document but have defaults specified.
120    /// Existing fields are preserved and not overwritten.
121    ///
122    /// # Arguments
123    ///
124    /// * `defaults` - A HashMap of field names to their default QuillValues
125    ///
126    /// # Returns
127    ///
128    /// A new ParsedDocument with defaults applied for missing fields
129    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130        let mut fields = self.fields.clone();
131
132        for (field_name, default_value) in defaults {
133            // Only apply default if field is missing
134            if !fields.contains_key(field_name) {
135                fields.insert(field_name.clone(), default_value.clone());
136            }
137        }
138
139        Self {
140            fields,
141            quill_tag: self.quill_tag.clone(),
142        }
143    }
144
145    /// Create a new ParsedDocument with coerced field values
146    ///
147    /// This method applies type coercions to field values based on the schema.
148    /// Coercions include:
149    /// - Singular values to arrays when schema expects array
150    /// - String "true"/"false" to boolean
151    /// - Numbers to boolean (0=false, non-zero=true)
152    /// - String numbers to number type
153    /// - Boolean to number (true=1, false=0)
154    ///
155    /// # Arguments
156    ///
157    /// * `schema` - A JSON Schema object defining expected field types
158    ///
159    /// # Returns
160    ///
161    /// A new ParsedDocument with coerced field values
162    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
163        use crate::schema::coerce_document;
164
165        let coerced_fields = coerce_document(schema, &self.fields);
166
167        Self {
168            fields: coerced_fields,
169            quill_tag: self.quill_tag.clone(),
170        }
171    }
172}
173
174#[derive(Debug)]
175struct MetadataBlock {
176    start: usize, // Position of opening "---"
177    end: usize,   // Position after closing "---\n"
178    yaml_content: String,
179    tag: Option<String>,        // Field name from SCOPE key
180    quill_name: Option<String>, // Quill name from QUILL key
181}
182
183/// Validate tag name follows pattern [a-z_][a-z0-9_]*
184fn is_valid_tag_name(name: &str) -> bool {
185    if name.is_empty() {
186        return false;
187    }
188
189    let mut chars = name.chars();
190    let first = chars.next().unwrap();
191
192    if !first.is_ascii_lowercase() && first != '_' {
193        return false;
194    }
195
196    for ch in chars {
197        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
198            return false;
199        }
200    }
201
202    true
203}
204
205/// Find all metadata blocks in the document
206fn find_metadata_blocks(
207    markdown: &str,
208) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
209    let mut blocks = Vec::new();
210    let mut pos = 0;
211
212    while pos < markdown.len() {
213        // Look for opening "---\n" or "---\r\n"
214        let search_str = &markdown[pos..];
215        let delimiter_result = if let Some(p) = search_str.find("---\n") {
216            Some((p, 4, "\n"))
217        } else if let Some(p) = search_str.find("---\r\n") {
218            Some((p, 5, "\r\n"))
219        } else {
220            None
221        };
222
223        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
224            let abs_pos = pos + delimiter_pos;
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_content) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(yaml_value) => {
312                            if let Some(mapping) = yaml_value.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
315
316                                let has_quill = mapping.contains_key(&quill_key);
317                                let has_scope = mapping.contains_key(&scope_key);
318
319                                if has_quill && has_scope {
320                                    return Err(
321                                        "Cannot specify both QUILL and SCOPE in the same block"
322                                            .into(),
323                                    );
324                                }
325
326                                if has_quill {
327                                    // Extract quill name
328                                    let quill_value = mapping.get(&quill_key).unwrap();
329                                    let quill_name_str = quill_value
330                                        .as_str()
331                                        .ok_or_else(|| "QUILL value must be a string")?;
332
333                                    if !is_valid_tag_name(quill_name_str) {
334                                        return Err(format!(
335                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336                                            quill_name_str
337                                        )
338                                        .into());
339                                    }
340
341                                    // Remove QUILL from the YAML content for processing
342                                    let mut new_mapping = mapping.clone();
343                                    new_mapping.remove(&quill_key);
344                                    let new_yaml = serde_yaml::to_string(&new_mapping)
345                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
346
347                                    (None, Some(quill_name_str.to_string()), new_yaml)
348                                } else if has_scope {
349                                    // Extract scope field name
350                                    let scope_value = mapping.get(&scope_key).unwrap();
351                                    let field_name = scope_value
352                                        .as_str()
353                                        .ok_or_else(|| "SCOPE value must be a string")?;
354
355                                    if !is_valid_tag_name(field_name) {
356                                        return Err(format!(
357                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
358                                            field_name
359                                        )
360                                        .into());
361                                    }
362
363                                    if field_name == BODY_FIELD {
364                                        return Err(format!(
365                                            "Cannot use reserved field name '{}' as SCOPE value",
366                                            BODY_FIELD
367                                        )
368                                        .into());
369                                    }
370
371                                    // Remove SCOPE from the YAML content for processing
372                                    let mut new_mapping = mapping.clone();
373                                    new_mapping.remove(&scope_key);
374                                    let new_yaml = serde_yaml::to_string(&new_mapping)
375                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
376
377                                    (Some(field_name.to_string()), None, new_yaml)
378                                } else {
379                                    // No reserved keys, treat as normal YAML
380                                    (None, None, content.to_string())
381                                }
382                            } else {
383                                // Not a mapping, treat as normal YAML
384                                (None, None, content.to_string())
385                            }
386                        }
387                        Err(_) => {
388                            // If YAML parsing fails here, we'll catch it later
389                            (None, None, content.to_string())
390                        }
391                    }
392                } else {
393                    (None, None, content.to_string())
394                };
395
396                blocks.push(MetadataBlock {
397                    start: abs_pos,
398                    end: abs_closing_pos + closing_len, // After closing delimiter
399                    yaml_content,
400                    tag,
401                    quill_name,
402                });
403
404                pos = abs_closing_pos + closing_len;
405            } else if abs_pos == 0 {
406                // Frontmatter started but not closed
407                return Err("Frontmatter started but not closed with ---".into());
408            } else {
409                // Not a valid metadata block, skip this position
410                pos = abs_pos + 3;
411            }
412        } else {
413            break;
414        }
415    }
416
417    Ok(blocks)
418}
419
420/// Decompose markdown into frontmatter fields and body
421fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
422    // Check input size limit
423    if markdown.len() > crate::error::MAX_INPUT_SIZE {
424        return Err(format!(
425            "Input too large: {} bytes (max: {} bytes)",
426            markdown.len(),
427            crate::error::MAX_INPUT_SIZE
428        )
429        .into());
430    }
431
432    let mut fields = HashMap::new();
433
434    // Find all metadata blocks
435    let blocks = find_metadata_blocks(markdown)?;
436
437    if blocks.is_empty() {
438        // No metadata blocks, entire content is body
439        fields.insert(
440            BODY_FIELD.to_string(),
441            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
442        );
443        return Ok(ParsedDocument::new(fields));
444    }
445
446    // Track which attributes are used for tagged blocks
447    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
448    let mut has_global_frontmatter = false;
449    let mut global_frontmatter_index: Option<usize> = None;
450    let mut quill_name: Option<String> = None;
451
452    // First pass: identify global frontmatter, quill directive, and validate
453    for (idx, block) in blocks.iter().enumerate() {
454        // Check for quill directive
455        if let Some(ref name) = block.quill_name {
456            if quill_name.is_some() {
457                return Err("Multiple quill directives found: only one allowed".into());
458            }
459            quill_name = Some(name.clone());
460        }
461
462        // Check for global frontmatter (no tag and no quill directive)
463        if block.tag.is_none() && block.quill_name.is_none() {
464            if has_global_frontmatter {
465                return Err(
466                    "Multiple global frontmatter blocks found: only one untagged block allowed"
467                        .into(),
468                );
469            }
470            has_global_frontmatter = true;
471            global_frontmatter_index = Some(idx);
472        }
473    }
474
475    // Parse global frontmatter if present
476    if let Some(idx) = global_frontmatter_index {
477        let block = &blocks[idx];
478
479        // Parse YAML frontmatter
480        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
481            HashMap::new()
482        } else {
483            serde_yaml::from_str(&block.yaml_content)
484                .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
485        };
486
487        // Check that all tagged blocks don't conflict with global fields
488        // Exception: if the global field is an array, allow it (we'll merge later)
489        for other_block in &blocks {
490            if let Some(ref tag) = other_block.tag {
491                if let Some(global_value) = yaml_fields.get(tag) {
492                    // Check if the global value is an array
493                    if global_value.as_sequence().is_none() {
494                        return Err(format!(
495                            "Name collision: global field '{}' conflicts with tagged attribute",
496                            tag
497                        )
498                        .into());
499                    }
500                }
501            }
502        }
503
504        // Convert YAML values to QuillValue at boundary
505        for (key, value) in yaml_fields {
506            fields.insert(key, QuillValue::from_yaml(value)?);
507        }
508    }
509
510    // Process blocks with quill directives
511    for block in &blocks {
512        if block.quill_name.is_some() {
513            // Quill directive blocks can have YAML content (becomes part of frontmatter)
514            if !block.yaml_content.is_empty() {
515                let yaml_fields: HashMap<String, serde_yaml::Value> =
516                    serde_yaml::from_str(&block.yaml_content)
517                        .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
518
519                // Check for conflicts with existing fields
520                for key in yaml_fields.keys() {
521                    if fields.contains_key(key) {
522                        return Err(format!(
523                            "Name collision: quill block field '{}' conflicts with existing field",
524                            key
525                        )
526                        .into());
527                    }
528                }
529
530                // Convert YAML values to QuillValue at boundary
531                for (key, value) in yaml_fields {
532                    fields.insert(key, QuillValue::from_yaml(value)?);
533                }
534            }
535        }
536    }
537
538    // Parse tagged blocks
539    for (idx, block) in blocks.iter().enumerate() {
540        if let Some(ref tag_name) = block.tag {
541            // Check if this conflicts with global fields
542            // Exception: if the global field is an array, allow it (we'll merge later)
543            if let Some(existing_value) = fields.get(tag_name) {
544                if existing_value.as_array().is_none() {
545                    return Err(format!(
546                        "Name collision: tagged attribute '{}' conflicts with global field",
547                        tag_name
548                    )
549                    .into());
550                }
551            }
552
553            // Parse YAML metadata
554            let mut item_fields: HashMap<String, serde_yaml::Value> = if block
555                .yaml_content
556                .is_empty()
557            {
558                HashMap::new()
559            } else {
560                serde_yaml::from_str(&block.yaml_content).map_err(|e| {
561                    yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
562                })?
563            };
564
565            // Extract body for this tagged block
566            let body_start = block.end;
567            let body_end = if idx + 1 < blocks.len() {
568                blocks[idx + 1].start
569            } else {
570                markdown.len()
571            };
572            let body = &markdown[body_start..body_end];
573
574            // Add body to item fields
575            item_fields.insert(
576                BODY_FIELD.to_string(),
577                serde_yaml::Value::String(body.to_string()),
578            );
579
580            // Convert HashMap to serde_yaml::Value::Mapping
581            let item_value = serde_yaml::to_value(item_fields)?;
582
583            // Add to collection
584            tagged_attributes
585                .entry(tag_name.clone())
586                .or_insert_with(Vec::new)
587                .push(item_value);
588        }
589    }
590
591    // Extract global body
592    // Body starts after global frontmatter or quill block (whichever comes first)
593    // Body ends at the first scope block or EOF
594    let first_non_scope_block_idx = blocks
595        .iter()
596        .position(|b| b.tag.is_none() && b.quill_name.is_none())
597        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
598
599    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
600        // Body starts after the first non-scope block (global frontmatter or quill)
601        let start = blocks[idx].end;
602
603        // Body ends at the first scope block after this, or EOF
604        let end = blocks
605            .iter()
606            .skip(idx + 1)
607            .find(|b| b.tag.is_some())
608            .map(|b| b.start)
609            .unwrap_or(markdown.len());
610
611        (start, end)
612    } else {
613        // No global frontmatter or quill block - body is everything before the first scope block
614        let end = blocks
615            .iter()
616            .find(|b| b.tag.is_some())
617            .map(|b| b.start)
618            .unwrap_or(0);
619
620        (0, end)
621    };
622
623    let global_body = &markdown[body_start..body_end];
624
625    fields.insert(
626        BODY_FIELD.to_string(),
627        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
628    );
629
630    // Add all tagged collections to fields (convert to QuillValue)
631    // If a field already exists and is an array, merge the new items into it
632    for (tag_name, items) in tagged_attributes {
633        if let Some(existing_value) = fields.get(&tag_name) {
634            // The existing value must be an array (checked earlier)
635            if let Some(existing_array) = existing_value.as_array() {
636                // Convert new items from YAML to JSON
637                let new_items_json: Vec<serde_json::Value> = items
638                    .into_iter()
639                    .map(|yaml_val| {
640                        serde_json::to_value(&yaml_val)
641                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
642                    })
643                    .collect::<Result<Vec<_>, _>>()?;
644
645                // Combine existing and new items
646                let mut merged_array = existing_array.clone();
647                merged_array.extend(new_items_json);
648
649                // Create QuillValue from merged JSON array
650                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
651                fields.insert(tag_name, quill_value);
652            } else {
653                // This should not happen due to earlier validation, but handle it gracefully
654                return Err(format!(
655                    "Internal error: field '{}' exists but is not an array",
656                    tag_name
657                )
658                .into());
659            }
660        } else {
661            // No existing field, just create a new sequence
662            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
663            fields.insert(tag_name, quill_value);
664        }
665    }
666
667    let mut parsed = ParsedDocument::new(fields);
668
669    // Set quill tag if present
670    if let Some(name) = quill_name {
671        parsed.quill_tag = Some(name);
672    }
673
674    Ok(parsed)
675}
676
677#[cfg(test)]
678mod tests {
679    use super::*;
680
681    #[test]
682    fn test_no_frontmatter() {
683        let markdown = "# Hello World\n\nThis is a test.";
684        let doc = decompose(markdown).unwrap();
685
686        assert_eq!(doc.body(), Some(markdown));
687        assert_eq!(doc.fields().len(), 1);
688    }
689
690    #[test]
691    fn test_with_frontmatter() {
692        let markdown = r#"---
693title: Test Document
694author: Test Author
695---
696
697# Hello World
698
699This is the body."#;
700
701        let doc = decompose(markdown).unwrap();
702
703        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
704        assert_eq!(
705            doc.get_field("title").unwrap().as_str().unwrap(),
706            "Test Document"
707        );
708        assert_eq!(
709            doc.get_field("author").unwrap().as_str().unwrap(),
710            "Test Author"
711        );
712        assert_eq!(doc.fields().len(), 3); // title, author, body
713    }
714
715    #[test]
716    fn test_complex_yaml_frontmatter() {
717        let markdown = r#"---
718title: Complex Document
719tags:
720  - test
721  - yaml
722metadata:
723  version: 1.0
724  nested:
725    field: value
726---
727
728Content here."#;
729
730        let doc = decompose(markdown).unwrap();
731
732        assert_eq!(doc.body(), Some("\nContent here."));
733        assert_eq!(
734            doc.get_field("title").unwrap().as_str().unwrap(),
735            "Complex Document"
736        );
737
738        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
739        assert_eq!(tags.len(), 2);
740        assert_eq!(tags[0].as_str().unwrap(), "test");
741        assert_eq!(tags[1].as_str().unwrap(), "yaml");
742    }
743
744    #[test]
745    fn test_with_defaults_empty_document() {
746        use std::collections::HashMap;
747
748        let mut defaults = HashMap::new();
749        defaults.insert(
750            "status".to_string(),
751            QuillValue::from_json(serde_json::json!("draft")),
752        );
753        defaults.insert(
754            "version".to_string(),
755            QuillValue::from_json(serde_json::json!(1)),
756        );
757
758        // Create an empty parsed document
759        let doc = ParsedDocument::new(HashMap::new());
760        let doc_with_defaults = doc.with_defaults(&defaults);
761
762        // Check that defaults were applied
763        assert_eq!(
764            doc_with_defaults
765                .get_field("status")
766                .unwrap()
767                .as_str()
768                .unwrap(),
769            "draft"
770        );
771        assert_eq!(
772            doc_with_defaults
773                .get_field("version")
774                .unwrap()
775                .as_number()
776                .unwrap()
777                .as_i64()
778                .unwrap(),
779            1
780        );
781    }
782
783    #[test]
784    fn test_with_defaults_preserves_existing_values() {
785        use std::collections::HashMap;
786
787        let mut defaults = HashMap::new();
788        defaults.insert(
789            "status".to_string(),
790            QuillValue::from_json(serde_json::json!("draft")),
791        );
792
793        // Create document with existing status
794        let mut fields = HashMap::new();
795        fields.insert(
796            "status".to_string(),
797            QuillValue::from_json(serde_json::json!("published")),
798        );
799        let doc = ParsedDocument::new(fields);
800
801        let doc_with_defaults = doc.with_defaults(&defaults);
802
803        // Existing value should be preserved
804        assert_eq!(
805            doc_with_defaults
806                .get_field("status")
807                .unwrap()
808                .as_str()
809                .unwrap(),
810            "published"
811        );
812    }
813
814    #[test]
815    fn test_with_defaults_partial_application() {
816        use std::collections::HashMap;
817
818        let mut defaults = HashMap::new();
819        defaults.insert(
820            "status".to_string(),
821            QuillValue::from_json(serde_json::json!("draft")),
822        );
823        defaults.insert(
824            "version".to_string(),
825            QuillValue::from_json(serde_json::json!(1)),
826        );
827
828        // Create document with only one field
829        let mut fields = HashMap::new();
830        fields.insert(
831            "status".to_string(),
832            QuillValue::from_json(serde_json::json!("published")),
833        );
834        let doc = ParsedDocument::new(fields);
835
836        let doc_with_defaults = doc.with_defaults(&defaults);
837
838        // Existing field preserved, missing field gets default
839        assert_eq!(
840            doc_with_defaults
841                .get_field("status")
842                .unwrap()
843                .as_str()
844                .unwrap(),
845            "published"
846        );
847        assert_eq!(
848            doc_with_defaults
849                .get_field("version")
850                .unwrap()
851                .as_number()
852                .unwrap()
853                .as_i64()
854                .unwrap(),
855            1
856        );
857    }
858
859    #[test]
860    fn test_with_defaults_no_defaults() {
861        use std::collections::HashMap;
862
863        let defaults = HashMap::new(); // Empty defaults map
864
865        let doc = ParsedDocument::new(HashMap::new());
866        let doc_with_defaults = doc.with_defaults(&defaults);
867
868        // No defaults should be applied
869        assert!(doc_with_defaults.fields().is_empty());
870    }
871
872    #[test]
873    fn test_with_defaults_complex_types() {
874        use std::collections::HashMap;
875
876        let mut defaults = HashMap::new();
877        defaults.insert(
878            "tags".to_string(),
879            QuillValue::from_json(serde_json::json!(["default", "tag"])),
880        );
881
882        let doc = ParsedDocument::new(HashMap::new());
883        let doc_with_defaults = doc.with_defaults(&defaults);
884
885        // Complex default value should be applied
886        let tags = doc_with_defaults
887            .get_field("tags")
888            .unwrap()
889            .as_sequence()
890            .unwrap();
891        assert_eq!(tags.len(), 2);
892        assert_eq!(tags[0].as_str().unwrap(), "default");
893        assert_eq!(tags[1].as_str().unwrap(), "tag");
894    }
895
896    #[test]
897    fn test_with_coercion_singular_to_array() {
898        use std::collections::HashMap;
899
900        let schema = QuillValue::from_json(serde_json::json!({
901            "$schema": "https://json-schema.org/draft/2019-09/schema",
902            "type": "object",
903            "properties": {
904                "tags": {"type": "array"}
905            }
906        }));
907
908        let mut fields = HashMap::new();
909        fields.insert(
910            "tags".to_string(),
911            QuillValue::from_json(serde_json::json!("single-tag")),
912        );
913        let doc = ParsedDocument::new(fields);
914
915        let coerced_doc = doc.with_coercion(&schema);
916
917        let tags = coerced_doc.get_field("tags").unwrap();
918        assert!(tags.as_array().is_some());
919        let tags_array = tags.as_array().unwrap();
920        assert_eq!(tags_array.len(), 1);
921        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
922    }
923
924    #[test]
925    fn test_with_coercion_string_to_boolean() {
926        use std::collections::HashMap;
927
928        let schema = QuillValue::from_json(serde_json::json!({
929            "$schema": "https://json-schema.org/draft/2019-09/schema",
930            "type": "object",
931            "properties": {
932                "active": {"type": "boolean"}
933            }
934        }));
935
936        let mut fields = HashMap::new();
937        fields.insert(
938            "active".to_string(),
939            QuillValue::from_json(serde_json::json!("true")),
940        );
941        let doc = ParsedDocument::new(fields);
942
943        let coerced_doc = doc.with_coercion(&schema);
944
945        assert_eq!(
946            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
947            true
948        );
949    }
950
951    #[test]
952    fn test_with_coercion_string_to_number() {
953        use std::collections::HashMap;
954
955        let schema = QuillValue::from_json(serde_json::json!({
956            "$schema": "https://json-schema.org/draft/2019-09/schema",
957            "type": "object",
958            "properties": {
959                "count": {"type": "number"}
960            }
961        }));
962
963        let mut fields = HashMap::new();
964        fields.insert(
965            "count".to_string(),
966            QuillValue::from_json(serde_json::json!("42")),
967        );
968        let doc = ParsedDocument::new(fields);
969
970        let coerced_doc = doc.with_coercion(&schema);
971
972        assert_eq!(
973            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
974            42
975        );
976    }
977
978    #[test]
979    fn test_invalid_yaml() {
980        let markdown = r#"---
981title: [invalid yaml
982author: missing close bracket
983---
984
985Content here."#;
986
987        let result = decompose(markdown);
988        assert!(result.is_err());
989        assert!(result
990            .unwrap_err()
991            .to_string()
992            .contains("Invalid YAML frontmatter"));
993    }
994
995    #[test]
996    fn test_unclosed_frontmatter() {
997        let markdown = r#"---
998title: Test
999author: Test Author
1000
1001Content without closing ---"#;
1002
1003        let result = decompose(markdown);
1004        assert!(result.is_err());
1005        assert!(result.unwrap_err().to_string().contains("not closed"));
1006    }
1007
1008    // Extended metadata tests
1009
1010    #[test]
1011    fn test_basic_tagged_block() {
1012        let markdown = r#"---
1013title: Main Document
1014---
1015
1016Main body content.
1017
1018---
1019SCOPE: items
1020name: Item 1
1021---
1022
1023Body of item 1."#;
1024
1025        let doc = decompose(markdown).unwrap();
1026
1027        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1028        assert_eq!(
1029            doc.get_field("title").unwrap().as_str().unwrap(),
1030            "Main Document"
1031        );
1032
1033        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1034        assert_eq!(items.len(), 1);
1035
1036        let item = items[0].as_object().unwrap();
1037        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1038        assert_eq!(
1039            item.get("body").unwrap().as_str().unwrap(),
1040            "\nBody of item 1."
1041        );
1042    }
1043
1044    #[test]
1045    fn test_multiple_tagged_blocks() {
1046        let markdown = r#"---
1047SCOPE: items
1048name: Item 1
1049tags: [a, b]
1050---
1051
1052First item body.
1053
1054---
1055SCOPE: items
1056name: Item 2
1057tags: [c, d]
1058---
1059
1060Second item body."#;
1061
1062        let doc = decompose(markdown).unwrap();
1063
1064        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1065        assert_eq!(items.len(), 2);
1066
1067        let item1 = items[0].as_object().unwrap();
1068        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1069
1070        let item2 = items[1].as_object().unwrap();
1071        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1072    }
1073
1074    #[test]
1075    fn test_mixed_global_and_tagged() {
1076        let markdown = r#"---
1077title: Global
1078author: John Doe
1079---
1080
1081Global body.
1082
1083---
1084SCOPE: sections
1085title: Section 1
1086---
1087
1088Section 1 content.
1089
1090---
1091SCOPE: sections
1092title: Section 2
1093---
1094
1095Section 2 content."#;
1096
1097        let doc = decompose(markdown).unwrap();
1098
1099        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1100        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1101
1102        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1103        assert_eq!(sections.len(), 2);
1104    }
1105
1106    #[test]
1107    fn test_empty_tagged_metadata() {
1108        let markdown = r#"---
1109SCOPE: items
1110---
1111
1112Body without metadata."#;
1113
1114        let doc = decompose(markdown).unwrap();
1115
1116        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1117        assert_eq!(items.len(), 1);
1118
1119        let item = items[0].as_object().unwrap();
1120        assert_eq!(
1121            item.get("body").unwrap().as_str().unwrap(),
1122            "\nBody without metadata."
1123        );
1124    }
1125
1126    #[test]
1127    fn test_tagged_block_without_body() {
1128        let markdown = r#"---
1129SCOPE: items
1130name: Item
1131---"#;
1132
1133        let doc = decompose(markdown).unwrap();
1134
1135        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1136        assert_eq!(items.len(), 1);
1137
1138        let item = items[0].as_object().unwrap();
1139        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1140    }
1141
1142    #[test]
1143    fn test_name_collision_global_and_tagged() {
1144        let markdown = r#"---
1145items: "global value"
1146---
1147
1148Body
1149
1150---
1151SCOPE: items
1152name: Item
1153---
1154
1155Item body"#;
1156
1157        let result = decompose(markdown);
1158        assert!(result.is_err());
1159        assert!(result.unwrap_err().to_string().contains("collision"));
1160    }
1161
1162    #[test]
1163    fn test_global_array_merged_with_scope() {
1164        // When global frontmatter has an array field with the same name as a SCOPE,
1165        // the SCOPE items should be added to the array
1166        let markdown = r#"---
1167items:
1168  - name: Global Item 1
1169    value: 100
1170  - name: Global Item 2
1171    value: 200
1172---
1173
1174Global body
1175
1176---
1177SCOPE: items
1178name: Scope Item 1
1179value: 300
1180---
1181
1182Scope item 1 body
1183
1184---
1185SCOPE: items
1186name: Scope Item 2
1187value: 400
1188---
1189
1190Scope item 2 body"#;
1191
1192        let doc = decompose(markdown).unwrap();
1193
1194        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1195        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1196        assert_eq!(items.len(), 4);
1197
1198        // Verify first two items (from global array)
1199        let item1 = items[0].as_object().unwrap();
1200        assert_eq!(
1201            item1.get("name").unwrap().as_str().unwrap(),
1202            "Global Item 1"
1203        );
1204        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1205
1206        let item2 = items[1].as_object().unwrap();
1207        assert_eq!(
1208            item2.get("name").unwrap().as_str().unwrap(),
1209            "Global Item 2"
1210        );
1211        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1212
1213        // Verify last two items (from SCOPE blocks)
1214        let item3 = items[2].as_object().unwrap();
1215        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1216        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1217        assert_eq!(
1218            item3.get("body").unwrap().as_str().unwrap(),
1219            "\nScope item 1 body\n\n"
1220        );
1221
1222        let item4 = items[3].as_object().unwrap();
1223        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1224        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1225        assert_eq!(
1226            item4.get("body").unwrap().as_str().unwrap(),
1227            "\nScope item 2 body"
1228        );
1229    }
1230
1231    #[test]
1232    fn test_empty_global_array_with_scope() {
1233        // Edge case: global frontmatter has an empty array
1234        let markdown = r#"---
1235items: []
1236---
1237
1238Global body
1239
1240---
1241SCOPE: items
1242name: Item 1
1243---
1244
1245Item 1 body"#;
1246
1247        let doc = decompose(markdown).unwrap();
1248
1249        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1250        assert_eq!(items.len(), 1);
1251
1252        let item = items[0].as_object().unwrap();
1253        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1254    }
1255
1256    #[test]
1257    fn test_reserved_field_name() {
1258        let markdown = r#"---
1259SCOPE: body
1260content: Test
1261---"#;
1262
1263        let result = decompose(markdown);
1264        assert!(result.is_err());
1265        assert!(result.unwrap_err().to_string().contains("reserved"));
1266    }
1267
1268    #[test]
1269    fn test_invalid_tag_syntax() {
1270        let markdown = r#"---
1271SCOPE: Invalid-Name
1272title: Test
1273---"#;
1274
1275        let result = decompose(markdown);
1276        assert!(result.is_err());
1277        assert!(result
1278            .unwrap_err()
1279            .to_string()
1280            .contains("Invalid field name"));
1281    }
1282
1283    #[test]
1284    fn test_multiple_global_frontmatter_blocks() {
1285        let markdown = r#"---
1286title: First
1287---
1288
1289Body
1290
1291---
1292author: Second
1293---
1294
1295More body"#;
1296
1297        let result = decompose(markdown);
1298        assert!(result.is_err());
1299        assert!(result
1300            .unwrap_err()
1301            .to_string()
1302            .contains("Multiple global frontmatter"));
1303    }
1304
1305    #[test]
1306    fn test_adjacent_blocks_different_tags() {
1307        let markdown = r#"---
1308SCOPE: items
1309name: Item 1
1310---
1311
1312Item 1 body
1313
1314---
1315SCOPE: sections
1316title: Section 1
1317---
1318
1319Section 1 body"#;
1320
1321        let doc = decompose(markdown).unwrap();
1322
1323        assert!(doc.get_field("items").is_some());
1324        assert!(doc.get_field("sections").is_some());
1325
1326        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1327        assert_eq!(items.len(), 1);
1328
1329        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1330        assert_eq!(sections.len(), 1);
1331    }
1332
1333    #[test]
1334    fn test_order_preservation() {
1335        let markdown = r#"---
1336SCOPE: items
1337id: 1
1338---
1339
1340First
1341
1342---
1343SCOPE: items
1344id: 2
1345---
1346
1347Second
1348
1349---
1350SCOPE: items
1351id: 3
1352---
1353
1354Third"#;
1355
1356        let doc = decompose(markdown).unwrap();
1357
1358        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1359        assert_eq!(items.len(), 3);
1360
1361        for (i, item) in items.iter().enumerate() {
1362            let mapping = item.as_object().unwrap();
1363            let id = mapping.get("id").unwrap().as_i64().unwrap();
1364            assert_eq!(id, (i + 1) as i64);
1365        }
1366    }
1367
1368    #[test]
1369    fn test_product_catalog_integration() {
1370        let markdown = r#"---
1371title: Product Catalog
1372author: John Doe
1373date: 2024-01-01
1374---
1375
1376This is the main catalog description.
1377
1378---
1379SCOPE: products
1380name: Widget A
1381price: 19.99
1382sku: WID-001
1383---
1384
1385The **Widget A** is our most popular product.
1386
1387---
1388SCOPE: products
1389name: Gadget B
1390price: 29.99
1391sku: GAD-002
1392---
1393
1394The **Gadget B** is perfect for professionals.
1395
1396---
1397SCOPE: reviews
1398product: Widget A
1399rating: 5
1400---
1401
1402"Excellent product! Highly recommended."
1403
1404---
1405SCOPE: reviews
1406product: Gadget B
1407rating: 4
1408---
1409
1410"Very good, but a bit pricey.""#;
1411
1412        let doc = decompose(markdown).unwrap();
1413
1414        // Verify global fields
1415        assert_eq!(
1416            doc.get_field("title").unwrap().as_str().unwrap(),
1417            "Product Catalog"
1418        );
1419        assert_eq!(
1420            doc.get_field("author").unwrap().as_str().unwrap(),
1421            "John Doe"
1422        );
1423        assert_eq!(
1424            doc.get_field("date").unwrap().as_str().unwrap(),
1425            "2024-01-01"
1426        );
1427
1428        // Verify global body
1429        assert!(doc.body().unwrap().contains("main catalog description"));
1430
1431        // Verify products collection
1432        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1433        assert_eq!(products.len(), 2);
1434
1435        let product1 = products[0].as_object().unwrap();
1436        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1437        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1438
1439        // Verify reviews collection
1440        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1441        assert_eq!(reviews.len(), 2);
1442
1443        let review1 = reviews[0].as_object().unwrap();
1444        assert_eq!(
1445            review1.get("product").unwrap().as_str().unwrap(),
1446            "Widget A"
1447        );
1448        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1449
1450        // Total fields: title, author, date, body, products, reviews = 6
1451        assert_eq!(doc.fields().len(), 6);
1452    }
1453
1454    #[test]
1455    fn taro_quill_directive() {
1456        let markdown = r#"---
1457QUILL: usaf_memo
1458memo_for: [ORG/SYMBOL]
1459memo_from: [ORG/SYMBOL]
1460---
1461
1462This is the memo body."#;
1463
1464        let doc = decompose(markdown).unwrap();
1465
1466        // Verify quill tag is set
1467        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1468
1469        // Verify fields from quill block become frontmatter
1470        assert_eq!(
1471            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1472                .as_str()
1473                .unwrap(),
1474            "ORG/SYMBOL"
1475        );
1476
1477        // Verify body
1478        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1479    }
1480
1481    #[test]
1482    fn test_quill_with_scope_blocks() {
1483        let markdown = r#"---
1484QUILL: document
1485title: Test Document
1486---
1487
1488Main body.
1489
1490---
1491SCOPE: sections
1492name: Section 1
1493---
1494
1495Section 1 body."#;
1496
1497        let doc = decompose(markdown).unwrap();
1498
1499        // Verify quill tag
1500        assert_eq!(doc.quill_tag(), Some("document"));
1501
1502        // Verify global field from quill block
1503        assert_eq!(
1504            doc.get_field("title").unwrap().as_str().unwrap(),
1505            "Test Document"
1506        );
1507
1508        // Verify scope blocks work
1509        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1510        assert_eq!(sections.len(), 1);
1511
1512        // Verify body
1513        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1514    }
1515
1516    #[test]
1517    fn test_multiple_quill_directives_error() {
1518        let markdown = r#"---
1519QUILL: first
1520---
1521
1522---
1523QUILL: second
1524---"#;
1525
1526        let result = decompose(markdown);
1527        assert!(result.is_err());
1528        assert!(result
1529            .unwrap_err()
1530            .to_string()
1531            .contains("Multiple quill directives"));
1532    }
1533
1534    #[test]
1535    fn test_invalid_quill_name() {
1536        let markdown = r#"---
1537QUILL: Invalid-Name
1538---"#;
1539
1540        let result = decompose(markdown);
1541        assert!(result.is_err());
1542        assert!(result
1543            .unwrap_err()
1544            .to_string()
1545            .contains("Invalid quill name"));
1546    }
1547
1548    #[test]
1549    fn test_quill_wrong_value_type() {
1550        let markdown = r#"---
1551QUILL: 123
1552---"#;
1553
1554        let result = decompose(markdown);
1555        assert!(result.is_err());
1556        assert!(result
1557            .unwrap_err()
1558            .to_string()
1559            .contains("QUILL value must be a string"));
1560    }
1561
1562    #[test]
1563    fn test_scope_wrong_value_type() {
1564        let markdown = r#"---
1565SCOPE: 123
1566---"#;
1567
1568        let result = decompose(markdown);
1569        assert!(result.is_err());
1570        assert!(result
1571            .unwrap_err()
1572            .to_string()
1573            .contains("SCOPE value must be a string"));
1574    }
1575
1576    #[test]
1577    fn test_both_quill_and_scope_error() {
1578        let markdown = r#"---
1579QUILL: test
1580SCOPE: items
1581---"#;
1582
1583        let result = decompose(markdown);
1584        assert!(result.is_err());
1585        assert!(result
1586            .unwrap_err()
1587            .to_string()
1588            .contains("Cannot specify both QUILL and SCOPE"));
1589    }
1590
1591    #[test]
1592    fn test_blank_lines_in_frontmatter() {
1593        // New parsing standard: blank lines are allowed within YAML blocks
1594        let markdown = r#"---
1595title: Test Document
1596author: Test Author
1597
1598description: This has a blank line above it
1599tags:
1600  - one
1601  - two
1602---
1603
1604# Hello World
1605
1606This is the body."#;
1607
1608        let doc = decompose(markdown).unwrap();
1609
1610        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1611        assert_eq!(
1612            doc.get_field("title").unwrap().as_str().unwrap(),
1613            "Test Document"
1614        );
1615        assert_eq!(
1616            doc.get_field("author").unwrap().as_str().unwrap(),
1617            "Test Author"
1618        );
1619        assert_eq!(
1620            doc.get_field("description").unwrap().as_str().unwrap(),
1621            "This has a blank line above it"
1622        );
1623
1624        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1625        assert_eq!(tags.len(), 2);
1626    }
1627
1628    #[test]
1629    fn test_blank_lines_in_scope_blocks() {
1630        // Blank lines should be allowed in SCOPE blocks too
1631        let markdown = r#"---
1632SCOPE: items
1633name: Item 1
1634
1635price: 19.99
1636
1637tags:
1638  - electronics
1639  - gadgets
1640---
1641
1642Body of item 1."#;
1643
1644        let doc = decompose(markdown).unwrap();
1645
1646        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1647        assert_eq!(items.len(), 1);
1648
1649        let item = items[0].as_object().unwrap();
1650        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1651        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1652
1653        let tags = item.get("tags").unwrap().as_array().unwrap();
1654        assert_eq!(tags.len(), 2);
1655    }
1656
1657    #[test]
1658    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1659        // Horizontal rule: blank lines both above AND below the ---
1660        let markdown = r#"---
1661title: Test
1662---
1663
1664First paragraph.
1665
1666---
1667
1668Second paragraph."#;
1669
1670        let doc = decompose(markdown).unwrap();
1671
1672        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1673
1674        // The body should contain the horizontal rule (---) as part of the content
1675        let body = doc.body().unwrap();
1676        assert!(body.contains("First paragraph."));
1677        assert!(body.contains("---"));
1678        assert!(body.contains("Second paragraph."));
1679    }
1680
1681    #[test]
1682    fn test_horizontal_rule_not_preceded_by_blank() {
1683        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1684        // It's also NOT a valid metadata block opening (since it's followed by blank)
1685        let markdown = r#"---
1686title: Test
1687---
1688
1689First paragraph.
1690---
1691
1692Second paragraph."#;
1693
1694        let doc = decompose(markdown).unwrap();
1695
1696        let body = doc.body().unwrap();
1697        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1698        assert!(body.contains("---"));
1699    }
1700
1701    #[test]
1702    fn test_multiple_blank_lines_in_yaml() {
1703        // Multiple blank lines should also be allowed
1704        let markdown = r#"---
1705title: Test
1706
1707
1708author: John Doe
1709
1710
1711version: 1.0
1712---
1713
1714Body content."#;
1715
1716        let doc = decompose(markdown).unwrap();
1717
1718        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1719        assert_eq!(
1720            doc.get_field("author").unwrap().as_str().unwrap(),
1721            "John Doe"
1722        );
1723        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1724    }
1725}
1726#[cfg(test)]
1727mod demo_file_test {
1728    use super::*;
1729
1730    #[test]
1731    fn test_extended_metadata_demo_file() {
1732        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1733        let doc = decompose(markdown).unwrap();
1734
1735        // Verify global fields
1736        assert_eq!(
1737            doc.get_field("title").unwrap().as_str().unwrap(),
1738            "Extended Metadata Demo"
1739        );
1740        assert_eq!(
1741            doc.get_field("author").unwrap().as_str().unwrap(),
1742            "Quillmark Team"
1743        );
1744        // version is parsed as a number by YAML
1745        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1746
1747        // Verify body
1748        assert!(doc
1749            .body()
1750            .unwrap()
1751            .contains("extended YAML metadata standard"));
1752
1753        // Verify features collection
1754        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1755        assert_eq!(features.len(), 3);
1756
1757        // Verify use_cases collection
1758        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1759        assert_eq!(use_cases.len(), 2);
1760
1761        // Check first feature
1762        let feature1 = features[0].as_object().unwrap();
1763        assert_eq!(
1764            feature1.get("name").unwrap().as_str().unwrap(),
1765            "Tag Directives"
1766        );
1767    }
1768
1769    #[test]
1770    fn test_input_size_limit() {
1771        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1772        let size = crate::error::MAX_INPUT_SIZE + 1;
1773        let large_markdown = "a".repeat(size);
1774
1775        let result = decompose(&large_markdown);
1776        assert!(result.is_err());
1777
1778        let err_msg = result.unwrap_err().to_string();
1779        assert!(err_msg.contains("Input too large"));
1780    }
1781
1782    #[test]
1783    fn test_yaml_size_limit() {
1784        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1785        let mut markdown = String::from("---\n");
1786
1787        // Create a very large YAML field
1788        let size = crate::error::MAX_YAML_SIZE + 1;
1789        markdown.push_str("data: \"");
1790        markdown.push_str(&"x".repeat(size));
1791        markdown.push_str("\"\n---\n\nBody");
1792
1793        let result = decompose(&markdown);
1794        assert!(result.is_err());
1795
1796        let err_msg = result.unwrap_err().to_string();
1797        assert!(err_msg.contains("YAML block too large"));
1798    }
1799
1800    #[test]
1801    fn test_input_within_size_limit() {
1802        // Create markdown just under the limit
1803        let size = 1000; // Much smaller than limit
1804        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1805
1806        let result = decompose(&markdown);
1807        assert!(result.is_ok());
1808    }
1809
1810    #[test]
1811    fn test_yaml_within_size_limit() {
1812        // Create YAML block well within the limit
1813        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1814
1815        let result = decompose(&markdown);
1816        assert!(result.is_ok());
1817    }
1818}