Skip to main content

quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! QUILL: my_quill
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ## Error Handling
39//!
40//! The [`ParsedDocument::from_markdown`] function returns errors for:
41//! - Malformed YAML syntax
42//! - Unclosed frontmatter blocks
43//! - Multiple global frontmatter blocks
44//! - Both QUILL and CARD specified in the same block
45//! - Reserved field name usage
46//! - Name collisions
47//!
48//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
49
50use std::collections::HashMap;
51use std::str::FromStr;
52
53use crate::error::ParseError;
54use crate::value::QuillValue;
55use crate::version::QuillReference;
56
57/// The field name used to store the document body
58pub const BODY_FIELD: &str = "BODY";
59
60/// A parsed markdown document with frontmatter
61#[derive(Debug, Clone)]
62pub struct ParsedDocument {
63    fields: HashMap<String, QuillValue>,
64    quill_ref: QuillReference,
65}
66
67impl ParsedDocument {
68    /// Create a ParsedDocument from fields and quill reference
69    pub fn new(fields: HashMap<String, QuillValue>, quill_ref: QuillReference) -> Self {
70        Self { fields, quill_ref }
71    }
72
73    /// Create a ParsedDocument from markdown string
74    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
75        decompose(markdown)
76    }
77
78    /// Get the quill reference (name + version selector)
79    pub fn quill_reference(&self) -> &QuillReference {
80        &self.quill_ref
81    }
82
83    /// Get the document body
84    pub fn body(&self) -> Option<&str> {
85        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
86    }
87
88    /// Get a specific field
89    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
90        self.fields.get(name)
91    }
92
93    /// Get all fields (including body)
94    pub fn fields(&self) -> &HashMap<String, QuillValue> {
95        &self.fields
96    }
97
98    /// Create a new ParsedDocument with default values applied
99    ///
100    /// This method creates a new ParsedDocument with default values applied for any
101    /// fields that are missing from the original document but have defaults specified.
102    /// Existing fields are preserved and not overwritten.
103    ///
104    /// # Arguments
105    ///
106    /// * `defaults` - A HashMap of field names to their default QuillValues
107    ///
108    /// # Returns
109    ///
110    /// A new ParsedDocument with defaults applied for missing fields
111    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
112        let mut fields = self.fields.clone();
113
114        for (field_name, default_value) in defaults {
115            // Only apply default if field is missing
116            if !fields.contains_key(field_name) {
117                fields.insert(field_name.clone(), default_value.clone());
118            }
119        }
120
121        Self {
122            fields,
123            quill_ref: self.quill_ref.clone(),
124        }
125    }
126}
127
128#[derive(Debug)]
129struct MetadataBlock {
130    start: usize,                          // Position of opening "---"
131    end: usize,                            // Position after closing "---\n"
132    yaml_value: Option<serde_json::Value>, // Parsed YAML as JSON (None if empty or parse failed)
133    tag: Option<String>,                   // Field name from CARD key
134    quill_ref: Option<String>,             // Quill reference from QUILL key
135}
136
137/// Validate tag name follows pattern [a-z_][a-z0-9_]*
138fn is_valid_tag_name(name: &str) -> bool {
139    if name.is_empty() {
140        return false;
141    }
142
143    let mut chars = name.chars();
144    let first = chars.next().unwrap();
145
146    if !first.is_ascii_lowercase() && first != '_' {
147        return false;
148    }
149
150    for ch in chars {
151        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
152            return false;
153        }
154    }
155
156    true
157}
158
159/// Check if a position is inside a fenced code block.
160///
161/// Uses CommonMark-style fenced block detection:
162/// - Backticks (```) and tildes (~~~) are supported
163/// - Opening fences are 3+ matching fence characters
164/// - Closing fences use the same fence character and length >= opening fence
165fn is_inside_fenced_block(markdown: &str, pos: usize) -> bool {
166    let before = &markdown[..pos];
167    let mut open_fence: Option<(u8, usize)> = None; // (fence char, opening run length)
168
169    // Check if document starts with a fence
170    if let Some((fence_char, fence_len, is_closing)) = fence_marker_at(before, 0, open_fence) {
171        if is_closing {
172            open_fence = None;
173        } else {
174            open_fence = Some((fence_char, fence_len));
175        }
176    }
177
178    // Scan line starts after newlines
179    for (i, _) in before.match_indices('\n') {
180        if let Some((fence_char, fence_len, is_closing)) =
181            fence_marker_at(before, i + 1, open_fence)
182        {
183            if is_closing {
184                open_fence = None;
185            } else {
186                open_fence = Some((fence_char, fence_len));
187            }
188        }
189    }
190
191    open_fence.is_some()
192}
193
194/// Detects a CommonMark fence marker at a given line start position.
195///
196/// Returns (fence_char, fence_len, is_closing).
197fn fence_marker_at(
198    text: &str,
199    pos: usize,
200    open_fence: Option<(u8, usize)>,
201) -> Option<(u8, usize, bool)> {
202    if pos >= text.len() {
203        return None;
204    }
205
206    // Extract line [pos, end)
207    let line_end = text[pos..]
208        .find('\n')
209        .map(|offset| pos + offset)
210        .unwrap_or(text.len());
211    let line = &text[pos..line_end];
212
213    // Optional indentation is up to 3 spaces (CommonMark fence rule)
214    let indent = line.as_bytes().iter().take_while(|&&b| b == b' ').count();
215    if indent > 3 {
216        return None;
217    }
218
219    let trimmed = &line[indent..];
220    let bytes = trimmed.as_bytes();
221    let Some(&first) = bytes.first() else {
222        return None;
223    };
224    if first != b'`' && first != b'~' {
225        return None;
226    }
227
228    let run_len = bytes.iter().take_while(|&&b| b == first).count();
229    if run_len < 3 {
230        return None;
231    }
232
233    let rest = &trimmed[run_len..];
234
235    match open_fence {
236        Some((open_char, open_len)) => {
237            if first != open_char || run_len < open_len {
238                return None;
239            }
240            // Closing fence line may only contain trailing spaces/tabs
241            if rest.chars().all(|c| c == ' ' || c == '\t') {
242                Some((first, run_len, true))
243            } else {
244                None
245            }
246        }
247        None => Some((first, run_len, false)),
248    }
249}
250
251/// Creates serde_saphyr Options with security budgets configured.
252///
253/// Uses MAX_YAML_DEPTH from error.rs to limit nesting depth at the parser level,
254/// which is more robust than heuristic-based pre-parse checks.
255fn yaml_parse_options() -> serde_saphyr::Options {
256    let budget = serde_saphyr::Budget {
257        max_depth: crate::error::MAX_YAML_DEPTH,
258        ..Default::default()
259    };
260    serde_saphyr::Options {
261        budget: Some(budget),
262        ..Default::default()
263    }
264}
265
266/// Find all metadata blocks in the document
267fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
268    let mut blocks = Vec::new();
269    let mut pos = 0;
270
271    while pos < markdown.len() {
272        // Look for opening "---\n" or "---\r\n"
273        let search_str = &markdown[pos..];
274        let delimiter_result = search_str
275            .find("---\n")
276            .map(|p| (p, 4, "\n"))
277            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
278
279        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
280            let abs_pos = pos + delimiter_pos;
281
282            // Check if the delimiter is at the start of a line
283            let is_start_of_line = if abs_pos == 0 {
284                true
285            } else {
286                let char_before = markdown.as_bytes()[abs_pos - 1];
287                char_before == b'\n' || char_before == b'\r'
288            };
289
290            if !is_start_of_line {
291                pos = abs_pos + 1;
292                continue;
293            }
294
295            // Skip if inside a fenced code block
296            if is_inside_fenced_block(markdown, abs_pos) {
297                pos = abs_pos + 3;
298                continue;
299            }
300
301            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
302
303            // Triple dashes are always metadata block delimiters (never horizontal rules)
304
305            // Found potential metadata block opening
306            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
307            let rest = &markdown[content_start..];
308
309            // First try to find delimiters with trailing newlines
310            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
311            let closing_with_newline = closing_patterns
312                .iter()
313                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
314                .min_by_key(|(p, _)| *p);
315
316            // Also check for closing at end of document (no trailing newline)
317            let closing_at_eof = ["\n---", "\r\n---"]
318                .iter()
319                .filter_map(|delim| {
320                    rest.find(delim).and_then(|p| {
321                        if p + delim.len() == rest.len() {
322                            Some((p, delim.len()))
323                        } else {
324                            None
325                        }
326                    })
327                })
328                .min_by_key(|(p, _)| *p);
329
330            let closing_result = match (closing_with_newline, closing_at_eof) {
331                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
332                (Some(_), Some(_)) => closing_with_newline,
333                (Some(_), None) => closing_with_newline,
334                (None, Some(_)) => closing_at_eof,
335                (None, None) => None,
336            };
337
338            if let Some((closing_pos, closing_len)) = closing_result {
339                let abs_closing_pos = content_start + closing_pos;
340                let content = &markdown[content_start..abs_closing_pos];
341
342                // Check YAML size limit
343                if content.len() > crate::error::MAX_YAML_SIZE {
344                    return Err(crate::error::ParseError::InputTooLarge {
345                        size: content.len(),
346                        max: crate::error::MAX_YAML_SIZE,
347                    });
348                }
349
350                // Parse YAML content to check for reserved keys (QUILL, CARD)
351                // Uses configured budget to limit nesting depth (prevents stack overflow)
352                // Normalize: treat whitespace-only content as empty frontmatter
353                let content = content.trim();
354                let (tag, quill_ref, yaml_value) = if !content.is_empty() {
355                    // Try to parse the YAML with security budgets
356                    match serde_saphyr::from_str_with_options::<serde_json::Value>(
357                        content,
358                        yaml_parse_options(),
359                    ) {
360                        Ok(parsed_yaml) => {
361                            if let Some(mapping) = parsed_yaml.as_object() {
362                                let quill_key = "QUILL";
363                                let card_key = "CARD";
364
365                                let has_quill = mapping.contains_key(quill_key);
366                                let has_card = mapping.contains_key(card_key);
367
368                                if has_quill && has_card {
369                                    return Err(crate::error::ParseError::InvalidStructure(
370                                        "Cannot specify both QUILL and CARD in the same block"
371                                            .to_string(),
372                                    ));
373                                }
374
375                                // Check for reserved field names (BODY, CARDS)
376                                const RESERVED_FIELDS: &[&str] = &["BODY", "CARDS"];
377                                for reserved in RESERVED_FIELDS {
378                                    if mapping.contains_key(*reserved) {
379                                        return Err(crate::error::ParseError::InvalidStructure(
380                                            format!(
381                                                "Reserved field name '{}' cannot be used in YAML frontmatter",
382                                                reserved
383                                            ),
384                                        ));
385                                    }
386                                }
387
388                                if has_quill {
389                                    // Extract and parse quill reference
390                                    let quill_value = mapping.get(quill_key).unwrap();
391                                    let quill_ref_str = quill_value
392                                        .as_str()
393                                        .ok_or("QUILL value must be a string")?;
394
395                                    // Parse as QuillReference to validate name and version
396                                    let _quill_ref =
397                                        quill_ref_str.parse::<QuillReference>().map_err(|e| {
398                                            crate::error::ParseError::InvalidStructure(format!(
399                                                "Invalid QUILL reference '{}': {}",
400                                                quill_ref_str, e
401                                            ))
402                                        })?;
403
404                                    // Remove QUILL from the YAML value for processing
405                                    let mut new_mapping = mapping.clone();
406                                    new_mapping.remove(quill_key);
407                                    let new_value = if new_mapping.is_empty() {
408                                        None
409                                    } else {
410                                        Some(serde_json::Value::Object(new_mapping))
411                                    };
412
413                                    (None, Some(quill_ref_str.to_string()), new_value)
414                                } else if has_card {
415                                    // Extract card field name
416                                    let card_value = mapping.get(card_key).unwrap();
417                                    let field_name =
418                                        card_value.as_str().ok_or("CARD value must be a string")?;
419
420                                    if !is_valid_tag_name(field_name) {
421                                        return Err(crate::error::ParseError::InvalidStructure(format!(
422                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
423                                            field_name
424                                        )));
425                                    }
426
427                                    // Remove CARD from the YAML value for processing
428                                    let mut new_mapping = mapping.clone();
429                                    new_mapping.remove(card_key);
430                                    let new_value = if new_mapping.is_empty() {
431                                        None
432                                    } else {
433                                        Some(serde_json::Value::Object(new_mapping))
434                                    };
435
436                                    (Some(field_name.to_string()), None, new_value)
437                                } else {
438                                    // No reserved keys, keep the parsed YAML
439                                    (None, None, Some(parsed_yaml))
440                                }
441                            } else {
442                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
443                                (None, None, Some(parsed_yaml))
444                            }
445                        }
446                        Err(e) => {
447                            // Calculate line number for the start of this block
448                            let block_start_line = markdown[..abs_pos].lines().count() + 1;
449                            return Err(crate::error::ParseError::YamlErrorWithLocation {
450                                message: e.to_string(),
451                                line: block_start_line,
452                                block_index: blocks.len(),
453                            });
454                        }
455                    }
456                } else {
457                    // Empty content
458                    (None, None, None)
459                };
460
461                blocks.push(MetadataBlock {
462                    start: abs_pos,
463                    end: abs_closing_pos + closing_len, // After closing delimiter
464                    yaml_value,
465                    tag,
466                    quill_ref,
467                });
468
469                // Check card count limit to prevent memory exhaustion
470                if blocks.len() > crate::error::MAX_CARD_COUNT {
471                    return Err(crate::error::ParseError::InputTooLarge {
472                        size: blocks.len(),
473                        max: crate::error::MAX_CARD_COUNT,
474                    });
475                }
476
477                pos = abs_closing_pos + closing_len;
478            } else {
479                // Metadata block started but not closed
480                return Err(crate::error::ParseError::InvalidStructure(
481                    "Metadata block started but not closed with ---".to_string(),
482                ));
483            }
484        } else {
485            break;
486        }
487    }
488
489    Ok(blocks)
490}
491
492/// Decompose markdown into frontmatter fields and body
493fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
494    // Check input size limit
495    if markdown.len() > crate::error::MAX_INPUT_SIZE {
496        return Err(crate::error::ParseError::InputTooLarge {
497            size: markdown.len(),
498            max: crate::error::MAX_INPUT_SIZE,
499        });
500    }
501
502    let mut fields = HashMap::new();
503
504    // Find all metadata blocks
505    let blocks = find_metadata_blocks(markdown)?;
506
507    if blocks.is_empty() {
508        // No metadata blocks — entire content is body, but QUILL is required
509        return Err(crate::error::ParseError::InvalidStructure(
510            "Missing required QUILL field. Add `QUILL: <name>` to the frontmatter.".to_string(),
511        ));
512    }
513
514    // Collect all card items into unified CARDS array
515    let mut cards_array: Vec<serde_json::Value> = Vec::new();
516    let mut global_frontmatter_index: Option<usize> = None;
517    let mut quill_ref: Option<String> = None;
518
519    // First pass: identify global frontmatter, quill directive, and validate
520    for (idx, block) in blocks.iter().enumerate() {
521        if idx == 0 {
522            // Top-level frontmatter: can have QUILL or neither (not considered a card)
523            if let Some(ref name) = block.quill_ref {
524                quill_ref = Some(name.clone());
525            }
526            // If it has neither QUILL nor CARD, it's global frontmatter
527            if block.tag.is_none() && block.quill_ref.is_none() {
528                global_frontmatter_index = Some(idx);
529            }
530        } else {
531            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
532            if block.quill_ref.is_some() {
533                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
534            }
535            if block.tag.is_none() {
536                // Inline block without CARD
537                return Err(crate::error::ParseError::missing_card_directive());
538            }
539        }
540    }
541
542    // Parse global frontmatter if present
543    if let Some(idx) = global_frontmatter_index {
544        let block = &blocks[idx];
545
546        // Get parsed JSON fields directly (already parsed in find_metadata_blocks)
547        let json_fields: HashMap<String, serde_json::Value> = match &block.yaml_value {
548            Some(serde_json::Value::Object(mapping)) => mapping
549                .iter()
550                .map(|(k, v)| (k.clone(), v.clone()))
551                .collect(),
552            Some(serde_json::Value::Null) => {
553                // Null value (from whitespace-only YAML) - treat as empty mapping
554                HashMap::new()
555            }
556            Some(_) => {
557                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
558                return Err(crate::error::ParseError::InvalidStructure(
559                    "Invalid YAML frontmatter: expected a mapping".to_string(),
560                ));
561            }
562            None => HashMap::new(),
563        };
564
565        // Convert JSON values to QuillValue at boundary
566        for (key, value) in json_fields {
567            fields.insert(key, QuillValue::from_json(value));
568        }
569    }
570
571    // Process blocks with quill directives
572    for block in &blocks {
573        if block.quill_ref.is_some() {
574            // Quill directive blocks can have YAML content (becomes part of frontmatter)
575            if let Some(ref json_val) = block.yaml_value {
576                let json_fields: HashMap<String, serde_json::Value> = match json_val {
577                    serde_json::Value::Object(mapping) => mapping
578                        .iter()
579                        .map(|(k, v)| (k.clone(), v.clone()))
580                        .collect(),
581                    serde_json::Value::Null => {
582                        // Null value (from whitespace-only YAML) - treat as empty mapping
583                        HashMap::new()
584                    }
585                    _ => {
586                        return Err(crate::error::ParseError::InvalidStructure(
587                            "Invalid YAML in quill block: expected a mapping".to_string(),
588                        ));
589                    }
590                };
591
592                // Check for conflicts with existing fields
593                for key in json_fields.keys() {
594                    if fields.contains_key(key) {
595                        return Err(crate::error::ParseError::InvalidStructure(format!(
596                            "Name collision: quill block field '{}' conflicts with existing field",
597                            key
598                        )));
599                    }
600                }
601
602                // Convert JSON values to QuillValue at boundary
603                for (key, value) in json_fields {
604                    fields.insert(key, QuillValue::from_json(value));
605                }
606            }
607        }
608    }
609
610    // Parse tagged blocks (CARD blocks)
611    for (idx, block) in blocks.iter().enumerate() {
612        if let Some(ref tag_name) = block.tag {
613            // Get YAML metadata directly (already parsed in find_metadata_blocks)
614            // Get JSON metadata directly (already parsed in find_metadata_blocks)
615            let mut item_fields: serde_json::Map<String, serde_json::Value> =
616                match &block.yaml_value {
617                    Some(serde_json::Value::Object(mapping)) => mapping.clone(),
618                    Some(serde_json::Value::Null) => {
619                        // Null value (from whitespace-only YAML) - treat as empty mapping
620                        serde_json::Map::new()
621                    }
622                    Some(_) => {
623                        return Err(crate::error::ParseError::InvalidStructure(format!(
624                            "Invalid YAML in card block '{}': expected a mapping",
625                            tag_name
626                        )));
627                    }
628                    None => serde_json::Map::new(),
629                };
630
631            // Extract body for this card block
632            let body_start = block.end;
633            let body_end = if idx + 1 < blocks.len() {
634                blocks[idx + 1].start
635            } else {
636                markdown.len()
637            };
638            let body = &markdown[body_start..body_end];
639
640            // Add body to item fields
641            item_fields.insert(
642                BODY_FIELD.to_string(),
643                serde_json::Value::String(body.to_string()),
644            );
645
646            // Add CARD discriminator field
647            item_fields.insert(
648                "CARD".to_string(),
649                serde_json::Value::String(tag_name.clone()),
650            );
651
652            // Add to CARDS array
653            cards_array.push(serde_json::Value::Object(item_fields));
654        }
655    }
656
657    // Extract global body
658    // Body starts after global frontmatter or quill block (whichever comes first)
659    // Body ends at the first card block or EOF
660    let first_non_card_block_idx = blocks
661        .iter()
662        .position(|b| b.tag.is_none() && b.quill_ref.is_none())
663        .or_else(|| blocks.iter().position(|b| b.quill_ref.is_some()));
664
665    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
666        // Body starts after the first non-card block (global frontmatter or quill)
667        let start = blocks[idx].end;
668
669        // Body ends at the first card block after this, or EOF
670        let end = blocks
671            .iter()
672            .skip(idx + 1)
673            .find(|b| b.tag.is_some())
674            .map(|b| b.start)
675            .unwrap_or(markdown.len());
676
677        (start, end)
678    } else {
679        // No global frontmatter or quill block - body is everything before the first card block
680        let end = blocks
681            .iter()
682            .find(|b| b.tag.is_some())
683            .map(|b| b.start)
684            .unwrap_or(0);
685
686        (0, end)
687    };
688
689    let global_body = &markdown[body_start..body_end];
690
691    fields.insert(
692        BODY_FIELD.to_string(),
693        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
694    );
695
696    // Always add CARDS array to fields (may be empty)
697    fields.insert(
698        "CARDS".to_string(),
699        QuillValue::from_json(serde_json::Value::Array(cards_array)),
700    );
701
702    // Check field count limit to prevent memory exhaustion
703    if fields.len() > crate::error::MAX_FIELD_COUNT {
704        return Err(crate::error::ParseError::InputTooLarge {
705            size: fields.len(),
706            max: crate::error::MAX_FIELD_COUNT,
707        });
708    }
709
710    let quill_tag = quill_ref.ok_or_else(|| {
711        ParseError::InvalidStructure(
712            "Missing required QUILL field. Add `QUILL: <name>` to the frontmatter.".to_string(),
713        )
714    })?;
715    let quill_ref = QuillReference::from_str(&quill_tag).map_err(|e| {
716        ParseError::InvalidStructure(format!("Invalid QUILL tag '{}': {}", quill_tag, e))
717    })?;
718    let parsed = ParsedDocument::new(fields, quill_ref);
719
720    Ok(parsed)
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726
727    #[test]
728    fn test_no_frontmatter() {
729        let markdown = "# Hello World\n\nThis is a test.";
730        let result = decompose(markdown);
731        assert!(result.is_err());
732        assert!(result
733            .unwrap_err()
734            .to_string()
735            .contains("Missing required QUILL field"));
736    }
737
738    #[test]
739    fn test_with_frontmatter() {
740        let markdown = r#"---
741QUILL: test_quill
742title: Test Document
743author: Test Author
744---
745
746# Hello World
747
748This is the body."#;
749
750        let doc = decompose(markdown).unwrap();
751
752        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
753        assert_eq!(
754            doc.get_field("title").unwrap().as_str().unwrap(),
755            "Test Document"
756        );
757        assert_eq!(
758            doc.get_field("author").unwrap().as_str().unwrap(),
759            "Test Author"
760        );
761        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
762        assert_eq!(doc.quill_reference().name, "test_quill");
763    }
764
765    #[test]
766    fn test_whitespace_frontmatter() {
767        // Frontmatter with only whitespace has no QUILL → error
768        let markdown = "---\n   \n---\n\n# Hello";
769        let result = decompose(markdown);
770        assert!(result.is_err());
771        assert!(result
772            .unwrap_err()
773            .to_string()
774            .contains("Missing required QUILL field"));
775    }
776
777    #[test]
778    fn test_complex_yaml_frontmatter() {
779        let markdown = r#"---
780QUILL: test_quill
781title: Complex Document
782tags:
783  - test
784  - yaml
785metadata:
786  version: 1.0
787  nested:
788    field: value
789---
790
791Content here."#;
792
793        let doc = decompose(markdown).unwrap();
794
795        assert_eq!(doc.body(), Some("\nContent here."));
796        assert_eq!(
797            doc.get_field("title").unwrap().as_str().unwrap(),
798            "Complex Document"
799        );
800
801        let tags = doc.get_field("tags").unwrap().as_array().unwrap();
802        assert_eq!(tags.len(), 2);
803        assert_eq!(tags[0].as_str().unwrap(), "test");
804        assert_eq!(tags[1].as_str().unwrap(), "yaml");
805    }
806
807    #[test]
808    fn test_with_defaults_empty_document() {
809        use std::collections::HashMap;
810
811        let mut defaults = HashMap::new();
812        defaults.insert(
813            "status".to_string(),
814            QuillValue::from_json(serde_json::json!("draft")),
815        );
816        defaults.insert(
817            "version".to_string(),
818            QuillValue::from_json(serde_json::json!(1)),
819        );
820
821        // Create an empty parsed document
822        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
823        let doc_with_defaults = doc.with_defaults(&defaults);
824
825        // Check that defaults were applied
826        assert_eq!(
827            doc_with_defaults
828                .get_field("status")
829                .unwrap()
830                .as_str()
831                .unwrap(),
832            "draft"
833        );
834        assert_eq!(
835            doc_with_defaults
836                .get_field("version")
837                .unwrap()
838                .as_number()
839                .unwrap()
840                .as_i64()
841                .unwrap(),
842            1
843        );
844    }
845
846    #[test]
847    fn test_with_defaults_preserves_existing_values() {
848        use std::collections::HashMap;
849
850        let mut defaults = HashMap::new();
851        defaults.insert(
852            "status".to_string(),
853            QuillValue::from_json(serde_json::json!("draft")),
854        );
855
856        // Create document with existing status
857        let mut fields = HashMap::new();
858        fields.insert(
859            "status".to_string(),
860            QuillValue::from_json(serde_json::json!("published")),
861        );
862        let doc = ParsedDocument::new(fields, QuillReference::latest("test".to_string()));
863
864        let doc_with_defaults = doc.with_defaults(&defaults);
865
866        // Existing value should be preserved
867        assert_eq!(
868            doc_with_defaults
869                .get_field("status")
870                .unwrap()
871                .as_str()
872                .unwrap(),
873            "published"
874        );
875    }
876
877    #[test]
878    fn test_with_defaults_partial_application() {
879        use std::collections::HashMap;
880
881        let mut defaults = HashMap::new();
882        defaults.insert(
883            "status".to_string(),
884            QuillValue::from_json(serde_json::json!("draft")),
885        );
886        defaults.insert(
887            "version".to_string(),
888            QuillValue::from_json(serde_json::json!(1)),
889        );
890
891        // Create document with only one field
892        let mut fields = HashMap::new();
893        fields.insert(
894            "status".to_string(),
895            QuillValue::from_json(serde_json::json!("published")),
896        );
897        let doc = ParsedDocument::new(fields, QuillReference::latest("test".to_string()));
898
899        let doc_with_defaults = doc.with_defaults(&defaults);
900
901        // Existing field preserved, missing field gets default
902        assert_eq!(
903            doc_with_defaults
904                .get_field("status")
905                .unwrap()
906                .as_str()
907                .unwrap(),
908            "published"
909        );
910        assert_eq!(
911            doc_with_defaults
912                .get_field("version")
913                .unwrap()
914                .as_number()
915                .unwrap()
916                .as_i64()
917                .unwrap(),
918            1
919        );
920    }
921
922    #[test]
923    fn test_with_defaults_no_defaults() {
924        use std::collections::HashMap;
925
926        let defaults = HashMap::new(); // Empty defaults map
927
928        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
929        let doc_with_defaults = doc.with_defaults(&defaults);
930
931        // No defaults should be applied
932        assert!(doc_with_defaults.fields().is_empty());
933    }
934
935    #[test]
936    fn test_with_defaults_complex_types() {
937        use std::collections::HashMap;
938
939        let mut defaults = HashMap::new();
940        defaults.insert(
941            "tags".to_string(),
942            QuillValue::from_json(serde_json::json!(["default", "tag"])),
943        );
944
945        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
946        let doc_with_defaults = doc.with_defaults(&defaults);
947
948        // Complex default value should be applied
949        let tags = doc_with_defaults
950            .get_field("tags")
951            .unwrap()
952            .as_array()
953            .unwrap();
954        assert_eq!(tags.len(), 2);
955        assert_eq!(tags[0].as_str().unwrap(), "default");
956        assert_eq!(tags[1].as_str().unwrap(), "tag");
957    }
958
959    #[test]
960    fn test_invalid_yaml() {
961        let markdown = r#"---
962title: [invalid yaml
963author: missing close bracket
964---
965
966Content here."#;
967
968        let result = decompose(markdown);
969        assert!(result.is_err());
970        // Error message now includes location context
971        assert!(result.unwrap_err().to_string().contains("YAML error"));
972    }
973
974    #[test]
975    fn test_unclosed_frontmatter() {
976        let markdown = r#"---
977title: Test
978author: Test Author
979
980Content without closing ---"#;
981
982        let result = decompose(markdown);
983        assert!(result.is_err());
984        assert!(result.unwrap_err().to_string().contains("not closed"));
985    }
986
987    // Extended metadata tests
988
989    #[test]
990    fn test_basic_tagged_block() {
991        let markdown = r#"---
992QUILL: test_quill
993title: Main Document
994---
995
996Main body content.
997
998---
999CARD: items
1000name: Item 1
1001---
1002
1003Body of item 1."#;
1004
1005        let doc = decompose(markdown).unwrap();
1006
1007        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1008        assert_eq!(
1009            doc.get_field("title").unwrap().as_str().unwrap(),
1010            "Main Document"
1011        );
1012
1013        // Cards are now in CARDS array with CARD discriminator
1014        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1015        assert_eq!(cards.len(), 1);
1016
1017        let item = cards[0].as_object().unwrap();
1018        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1019        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1020        assert_eq!(
1021            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1022            "\nBody of item 1."
1023        );
1024    }
1025
1026    #[test]
1027    fn test_multiple_tagged_blocks() {
1028        let markdown = r#"---
1029QUILL: test_quill
1030---
1031
1032---
1033CARD: items
1034name: Item 1
1035tags: [a, b]
1036---
1037
1038First item body.
1039
1040---
1041CARD: items
1042name: Item 2
1043tags: [c, d]
1044---
1045
1046Second item body."#;
1047
1048        let doc = decompose(markdown).unwrap();
1049
1050        // Cards are in CARDS array
1051        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1052        assert_eq!(cards.len(), 2);
1053
1054        let item1 = cards[0].as_object().unwrap();
1055        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1056        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1057
1058        let item2 = cards[1].as_object().unwrap();
1059        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1060        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1061    }
1062
1063    #[test]
1064    fn test_mixed_global_and_tagged() {
1065        let markdown = r#"---
1066QUILL: test_quill
1067title: Global
1068author: John Doe
1069---
1070
1071Global body.
1072
1073---
1074CARD: sections
1075title: Section 1
1076---
1077
1078Section 1 content.
1079
1080---
1081CARD: sections
1082title: Section 2
1083---
1084
1085Section 2 content."#;
1086
1087        let doc = decompose(markdown).unwrap();
1088
1089        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1090        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1091
1092        // Cards are in unified CARDS array
1093        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1094        assert_eq!(cards.len(), 2);
1095        assert_eq!(
1096            cards[0]
1097                .as_object()
1098                .unwrap()
1099                .get("CARD")
1100                .unwrap()
1101                .as_str()
1102                .unwrap(),
1103            "sections"
1104        );
1105    }
1106
1107    #[test]
1108    fn test_empty_tagged_metadata() {
1109        let markdown = r#"---
1110QUILL: test_quill
1111---
1112
1113---
1114CARD: items
1115---
1116
1117Body without metadata."#;
1118
1119        let doc = decompose(markdown).unwrap();
1120
1121        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1122        assert_eq!(cards.len(), 1);
1123
1124        let item = cards[0].as_object().unwrap();
1125        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1126        assert_eq!(
1127            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1128            "\nBody without metadata."
1129        );
1130    }
1131
1132    #[test]
1133    fn test_tagged_block_without_body() {
1134        let markdown = r#"---
1135QUILL: test_quill
1136---
1137
1138---
1139CARD: items
1140name: Item
1141---"#;
1142
1143        let doc = decompose(markdown).unwrap();
1144
1145        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1146        assert_eq!(cards.len(), 1);
1147
1148        let item = cards[0].as_object().unwrap();
1149        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1150        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1151    }
1152
1153    #[test]
1154    fn test_name_collision_global_and_tagged() {
1155        let markdown = r#"---
1156QUILL: test_quill
1157items: "global value"
1158---
1159
1160Body
1161
1162---
1163CARD: items
1164name: Item
1165---
1166
1167Item body"#;
1168
1169        let result = decompose(markdown);
1170        assert!(result.is_ok(), "Name collision should be allowed now");
1171    }
1172
1173    #[test]
1174    fn test_card_name_collision_with_array_field() {
1175        // CARD type names CAN now conflict with frontmatter field names
1176        let markdown = r#"---
1177QUILL: test_quill
1178items:
1179  - name: Global Item 1
1180    value: 100
1181---
1182
1183Global body
1184
1185---
1186CARD: items
1187name: Scope Item 1
1188---
1189
1190Scope item 1 body"#;
1191
1192        let result = decompose(markdown);
1193        assert!(
1194            result.is_ok(),
1195            "Collision with array field should be allowed"
1196        );
1197    }
1198
1199    #[test]
1200    fn test_empty_global_array_with_card() {
1201        // CARD type names CAN now conflict with frontmatter field names
1202        let markdown = r#"---
1203QUILL: test_quill
1204items: []
1205---
1206
1207Global body
1208
1209---
1210CARD: items
1211name: Item 1
1212---
1213
1214Item 1 body"#;
1215
1216        let result = decompose(markdown);
1217        assert!(
1218            result.is_ok(),
1219            "Collision with empty array field should be allowed"
1220        );
1221    }
1222
1223    #[test]
1224    fn test_reserved_field_body_rejected() {
1225        let markdown = r#"---
1226CARD: section
1227BODY: Test
1228---"#;
1229
1230        let result = decompose(markdown);
1231        assert!(result.is_err(), "BODY is a reserved field name");
1232        assert!(result
1233            .unwrap_err()
1234            .to_string()
1235            .contains("Reserved field name"));
1236    }
1237
1238    #[test]
1239    fn test_reserved_field_cards_rejected() {
1240        let markdown = r#"---
1241title: Test
1242CARDS: []
1243---"#;
1244
1245        let result = decompose(markdown);
1246        assert!(result.is_err(), "CARDS is a reserved field name");
1247        assert!(result
1248            .unwrap_err()
1249            .to_string()
1250            .contains("Reserved field name"));
1251    }
1252
1253    #[test]
1254    fn test_delimiter_inside_fenced_code_block_backticks() {
1255        let markdown = r#"---
1256QUILL: test_quill
1257title: Test
1258---
1259Here is some code:
1260
1261```yaml
1262---
1263fake: frontmatter
1264---
1265```
1266
1267More content.
1268"#;
1269
1270        let doc = decompose(markdown).unwrap();
1271        // The --- inside the code block should NOT be parsed as metadata
1272        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1273        assert!(doc.get_field("fake").is_none());
1274    }
1275
1276    #[test]
1277    fn test_tildes_are_fences() {
1278        // Per CommonMark: tildes (~~~) are valid fenced code block delimiters.
1279        // So --- inside ~~~ should NOT be parsed as a metadata block.
1280        let markdown = r#"---
1281QUILL: test_quill
1282title: Test
1283---
1284Here is some code:
1285
1286~~~yaml
1287---
1288CARD: code_example
1289fake: frontmatter
1290---
1291~~~
1292
1293More content.
1294"#;
1295
1296        let doc = decompose(markdown).unwrap();
1297        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1298        assert!(doc.get_field("fake").is_none());
1299    }
1300
1301    #[test]
1302    fn test_four_backticks_are_fences() {
1303        // Per CommonMark: 4+ backticks are valid fenced code block delimiters.
1304        // So --- inside ```` should NOT be parsed as a metadata block.
1305        let markdown = r#"---
1306QUILL: test_quill
1307title: Test
1308---
1309Here is some code:
1310
1311````yaml
1312---
1313CARD: code_example
1314fake: frontmatter
1315---
1316````
1317
1318More content.
1319"#;
1320
1321        let doc = decompose(markdown).unwrap();
1322        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1323        assert!(doc.get_field("fake").is_none());
1324    }
1325
1326    #[test]
1327    fn test_invalid_tag_syntax() {
1328        let markdown = r#"---
1329CARD: Invalid-Name
1330title: Test
1331---"#;
1332
1333        let result = decompose(markdown);
1334        assert!(result.is_err());
1335        assert!(result
1336            .unwrap_err()
1337            .to_string()
1338            .contains("Invalid card field name"));
1339    }
1340
1341    #[test]
1342    fn test_multiple_global_frontmatter_blocks() {
1343        let markdown = r#"---
1344title: First
1345---
1346
1347Body
1348
1349---
1350author: Second
1351---
1352
1353More body"#;
1354
1355        let result = decompose(markdown);
1356        assert!(result.is_err());
1357
1358        // Verify the error message contains CARD hint
1359        let err = result.unwrap_err();
1360        let err_str = err.to_string();
1361        assert!(
1362            err_str.contains("CARD"),
1363            "Error should mention CARD directive: {}",
1364            err_str
1365        );
1366        assert!(
1367            err_str.contains("missing"),
1368            "Error should indicate missing directive: {}",
1369            err_str
1370        );
1371    }
1372
1373    #[test]
1374    fn test_adjacent_blocks_different_tags() {
1375        let markdown = r#"---
1376QUILL: test_quill
1377---
1378
1379---
1380CARD: items
1381name: Item 1
1382---
1383
1384Item 1 body
1385
1386---
1387CARD: sections
1388title: Section 1
1389---
1390
1391Section 1 body"#;
1392
1393        let doc = decompose(markdown).unwrap();
1394
1395        // All cards in unified CARDS array
1396        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1397        assert_eq!(cards.len(), 2);
1398
1399        // First card is "items" type
1400        let item = cards[0].as_object().unwrap();
1401        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1402        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1403
1404        // Second card is "sections" type
1405        let section = cards[1].as_object().unwrap();
1406        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1407        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1408    }
1409
1410    #[test]
1411    fn test_order_preservation() {
1412        let markdown = r#"---
1413QUILL: test_quill
1414---
1415
1416---
1417CARD: items
1418id: 1
1419---
1420
1421First
1422
1423---
1424CARD: items
1425id: 2
1426---
1427
1428Second
1429
1430---
1431CARD: items
1432id: 3
1433---
1434
1435Third"#;
1436
1437        let doc = decompose(markdown).unwrap();
1438
1439        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1440        assert_eq!(cards.len(), 3);
1441
1442        for (i, card) in cards.iter().enumerate() {
1443            let mapping = card.as_object().unwrap();
1444            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1445            let id = mapping.get("id").unwrap().as_i64().unwrap();
1446            assert_eq!(id, (i + 1) as i64);
1447        }
1448    }
1449
1450    #[test]
1451    fn test_product_catalog_integration() {
1452        let markdown = r#"---
1453QUILL: test_quill
1454title: Product Catalog
1455author: John Doe
1456date: 2024-01-01
1457---
1458
1459This is the main catalog description.
1460
1461---
1462CARD: products
1463name: Widget A
1464price: 19.99
1465sku: WID-001
1466---
1467
1468The **Widget A** is our most popular product.
1469
1470---
1471CARD: products
1472name: Gadget B
1473price: 29.99
1474sku: GAD-002
1475---
1476
1477The **Gadget B** is perfect for professionals.
1478
1479---
1480CARD: reviews
1481product: Widget A
1482rating: 5
1483---
1484
1485"Excellent product! Highly recommended."
1486
1487---
1488CARD: reviews
1489product: Gadget B
1490rating: 4
1491---
1492
1493"Very good, but a bit pricey.""#;
1494
1495        let doc = decompose(markdown).unwrap();
1496
1497        // Verify global fields
1498        assert_eq!(
1499            doc.get_field("title").unwrap().as_str().unwrap(),
1500            "Product Catalog"
1501        );
1502        assert_eq!(
1503            doc.get_field("author").unwrap().as_str().unwrap(),
1504            "John Doe"
1505        );
1506        assert_eq!(
1507            doc.get_field("date").unwrap().as_str().unwrap(),
1508            "2024-01-01"
1509        );
1510
1511        // Verify global body
1512        assert!(doc.body().unwrap().contains("main catalog description"));
1513
1514        // All cards in unified CARDS array
1515        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1516        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1517
1518        // First 2 are products
1519        let product1 = cards[0].as_object().unwrap();
1520        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1521        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1522        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1523
1524        let product2 = cards[1].as_object().unwrap();
1525        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1526        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1527
1528        // Last 2 are reviews
1529        let review1 = cards[2].as_object().unwrap();
1530        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1531        assert_eq!(
1532            review1.get("product").unwrap().as_str().unwrap(),
1533            "Widget A"
1534        );
1535        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1536
1537        // Total fields: title, author, date, body, CARDS = 5
1538        assert_eq!(doc.fields().len(), 5);
1539    }
1540
1541    #[test]
1542    fn taro_quill_directive() {
1543        let markdown = r#"---
1544QUILL: usaf_memo
1545memo_for: [ORG/SYMBOL]
1546memo_from: [ORG/SYMBOL]
1547---
1548
1549This is the memo body."#;
1550
1551        let doc = decompose(markdown).unwrap();
1552
1553        // Verify quill tag is set
1554        assert_eq!(doc.quill_reference().name, "usaf_memo");
1555
1556        // Verify fields from quill block become frontmatter
1557        assert_eq!(
1558            doc.get_field("memo_for").unwrap().as_array().unwrap()[0]
1559                .as_str()
1560                .unwrap(),
1561            "ORG/SYMBOL"
1562        );
1563
1564        // Verify body
1565        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1566    }
1567
1568    #[test]
1569    fn test_quill_with_card_blocks() {
1570        let markdown = r#"---
1571QUILL: document
1572title: Test Document
1573---
1574
1575Main body.
1576
1577---
1578CARD: sections
1579name: Section 1
1580---
1581
1582Section 1 body."#;
1583
1584        let doc = decompose(markdown).unwrap();
1585
1586        // Verify quill tag
1587        assert_eq!(doc.quill_reference().name, "document");
1588
1589        // Verify global field from quill block
1590        assert_eq!(
1591            doc.get_field("title").unwrap().as_str().unwrap(),
1592            "Test Document"
1593        );
1594
1595        // Verify card blocks work via CARDS array
1596        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1597        assert_eq!(cards.len(), 1);
1598        assert_eq!(
1599            cards[0]
1600                .as_object()
1601                .unwrap()
1602                .get("CARD")
1603                .unwrap()
1604                .as_str()
1605                .unwrap(),
1606            "sections"
1607        );
1608
1609        // Verify body
1610        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1611    }
1612
1613    #[test]
1614    fn test_multiple_quill_directives_error() {
1615        let markdown = r#"---
1616QUILL: first
1617---
1618
1619---
1620QUILL: second
1621---"#;
1622
1623        let result = decompose(markdown);
1624        assert!(result.is_err());
1625        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1626        assert!(result
1627            .unwrap_err()
1628            .to_string()
1629            .contains("top-level frontmatter"));
1630    }
1631
1632    #[test]
1633    fn test_invalid_quill_ref() {
1634        let markdown = r#"---
1635QUILL: Invalid-Name
1636---"#;
1637
1638        let result = decompose(markdown);
1639        assert!(result.is_err());
1640        assert!(result
1641            .unwrap_err()
1642            .to_string()
1643            .contains("Invalid QUILL reference"));
1644    }
1645
1646    #[test]
1647    fn test_quill_wrong_value_type() {
1648        let markdown = r#"---
1649QUILL: 123
1650---"#;
1651
1652        let result = decompose(markdown);
1653        assert!(result.is_err());
1654        assert!(result
1655            .unwrap_err()
1656            .to_string()
1657            .contains("QUILL value must be a string"));
1658    }
1659
1660    #[test]
1661    fn test_card_wrong_value_type() {
1662        let markdown = r#"---
1663CARD: 123
1664---"#;
1665
1666        let result = decompose(markdown);
1667        assert!(result.is_err());
1668        assert!(result
1669            .unwrap_err()
1670            .to_string()
1671            .contains("CARD value must be a string"));
1672    }
1673
1674    #[test]
1675    fn test_both_quill_and_card_error() {
1676        let markdown = r#"---
1677QUILL: test
1678CARD: items
1679---"#;
1680
1681        let result = decompose(markdown);
1682        assert!(result.is_err());
1683        assert!(result
1684            .unwrap_err()
1685            .to_string()
1686            .contains("Cannot specify both QUILL and CARD"));
1687    }
1688
1689    #[test]
1690    fn test_blank_lines_in_frontmatter() {
1691        // New parsing standard: blank lines are allowed within YAML blocks
1692        let markdown = r#"---
1693QUILL: test_quill
1694title: Test Document
1695author: Test Author
1696
1697description: This has a blank line above it
1698tags:
1699  - one
1700  - two
1701---
1702
1703# Hello World
1704
1705This is the body."#;
1706
1707        let doc = decompose(markdown).unwrap();
1708
1709        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1710        assert_eq!(
1711            doc.get_field("title").unwrap().as_str().unwrap(),
1712            "Test Document"
1713        );
1714        assert_eq!(
1715            doc.get_field("author").unwrap().as_str().unwrap(),
1716            "Test Author"
1717        );
1718        assert_eq!(
1719            doc.get_field("description").unwrap().as_str().unwrap(),
1720            "This has a blank line above it"
1721        );
1722
1723        let tags = doc.get_field("tags").unwrap().as_array().unwrap();
1724        assert_eq!(tags.len(), 2);
1725    }
1726
1727    #[test]
1728    fn test_blank_lines_in_scope_blocks() {
1729        // Blank lines should be allowed in CARD blocks too
1730        let markdown = r#"---
1731QUILL: test_quill
1732---
1733
1734---
1735CARD: items
1736name: Item 1
1737
1738price: 19.99
1739
1740tags:
1741  - electronics
1742  - gadgets
1743---
1744
1745Body of item 1."#;
1746
1747        let doc = decompose(markdown).unwrap();
1748
1749        // Cards are in CARDS array
1750        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1751        assert_eq!(cards.len(), 1);
1752
1753        let item = cards[0].as_object().unwrap();
1754        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1755        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1756        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1757
1758        let tags = item.get("tags").unwrap().as_array().unwrap();
1759        assert_eq!(tags.len(), 2);
1760    }
1761
1762    #[test]
1763    fn test_triple_dash_in_body_is_parsed_as_inline_metadata_block() {
1764        // Triple dashes are always metadata delimiters, never horizontal rules
1765        let markdown = r#"---
1766QUILL: test_quill
1767title: Test
1768---
1769
1770First paragraph.
1771
1772---
1773
1774Second paragraph."#;
1775
1776        let err = decompose(markdown).unwrap_err();
1777
1778        assert!(matches!(
1779            err,
1780            ParseError::InvalidStructure(ref msg) if msg.contains("not closed with ---")
1781        ));
1782    }
1783
1784    #[test]
1785    fn test_triple_dash_with_single_surrounding_newline_is_also_metadata() {
1786        // Triple dashes without CARD in body are rejected as inline metadata blocks
1787        let markdown = r#"---
1788QUILL: test_quill
1789title: Test
1790---
1791
1792First paragraph.
1793---
1794
1795Second paragraph."#;
1796
1797        let err = decompose(markdown).unwrap_err();
1798
1799        assert!(matches!(
1800            err,
1801            ParseError::InvalidStructure(ref msg) if msg.contains("not closed with ---")
1802        ));
1803    }
1804
1805    #[test]
1806    fn test_multiple_blank_lines_in_yaml() {
1807        // Multiple blank lines should also be allowed
1808        let markdown = r#"---
1809QUILL: test_quill
1810title: Test
1811
1812
1813author: John Doe
1814
1815
1816version: 1.0
1817---
1818
1819Body content."#;
1820
1821        let doc = decompose(markdown).unwrap();
1822
1823        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1824        assert_eq!(
1825            doc.get_field("author").unwrap().as_str().unwrap(),
1826            "John Doe"
1827        );
1828        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1829    }
1830
1831    #[test]
1832    fn test_html_comment_interaction() {
1833        let markdown = r#"<!---
1834---> the rest of the page content
1835
1836---
1837QUILL: test_quill
1838key: value
1839---
1840"#;
1841        let doc = decompose(markdown).unwrap();
1842
1843        // The comment should be ignored (or at least not cause a parse error)
1844        // The frontmatter should be parsed
1845        let key = doc.get_field("key").and_then(|v| v.as_str());
1846        assert_eq!(key, Some("value"));
1847    }
1848}
1849#[cfg(test)]
1850mod demo_file_test {
1851    use super::*;
1852
1853    #[test]
1854    fn test_extended_metadata_demo_file() {
1855        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1856        let doc = decompose(markdown).unwrap();
1857
1858        // Verify global fields
1859        assert_eq!(
1860            doc.get_field("title").unwrap().as_str().unwrap(),
1861            "Extended Metadata Demo"
1862        );
1863        assert_eq!(
1864            doc.get_field("author").unwrap().as_str().unwrap(),
1865            "Quillmark Team"
1866        );
1867        // version is parsed as a number by YAML
1868        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1869
1870        // Verify body
1871        assert!(doc
1872            .body()
1873            .unwrap()
1874            .contains("extended YAML metadata standard"));
1875
1876        // All cards are now in unified CARDS array
1877        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
1878        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1879
1880        // Count features and use_cases cards
1881        let features_count = cards
1882            .iter()
1883            .filter(|c| {
1884                c.as_object()
1885                    .unwrap()
1886                    .get("CARD")
1887                    .unwrap()
1888                    .as_str()
1889                    .unwrap()
1890                    == "features"
1891            })
1892            .count();
1893        let use_cases_count = cards
1894            .iter()
1895            .filter(|c| {
1896                c.as_object()
1897                    .unwrap()
1898                    .get("CARD")
1899                    .unwrap()
1900                    .as_str()
1901                    .unwrap()
1902                    == "use_cases"
1903            })
1904            .count();
1905        assert_eq!(features_count, 3);
1906        assert_eq!(use_cases_count, 2);
1907
1908        // Check first card is a feature
1909        let feature1 = cards[0].as_object().unwrap();
1910        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1911        assert_eq!(
1912            feature1.get("name").unwrap().as_str().unwrap(),
1913            "Tag Directives"
1914        );
1915    }
1916
1917    #[test]
1918    fn test_input_size_limit() {
1919        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1920        let size = crate::error::MAX_INPUT_SIZE + 1;
1921        let large_markdown = "a".repeat(size);
1922
1923        let result = decompose(&large_markdown);
1924        assert!(result.is_err());
1925
1926        let err_msg = result.unwrap_err().to_string();
1927        assert!(err_msg.contains("Input too large"));
1928    }
1929
1930    #[test]
1931    fn test_yaml_size_limit() {
1932        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1933        let mut markdown = String::from("---\n");
1934
1935        // Create a very large YAML field
1936        let size = crate::error::MAX_YAML_SIZE + 1;
1937        markdown.push_str("data: \"");
1938        markdown.push_str(&"x".repeat(size));
1939        markdown.push_str("\"\n---\n\nBody");
1940
1941        let result = decompose(&markdown);
1942        assert!(result.is_err());
1943
1944        let err_msg = result.unwrap_err().to_string();
1945        assert!(err_msg.contains("Input too large"));
1946    }
1947
1948    #[test]
1949    fn test_input_within_size_limit() {
1950        // Create markdown just under the limit
1951        let size = 1000; // Much smaller than limit
1952        let markdown = format!(
1953            "---\nQUILL: test_quill\ntitle: Test\n---\n\n{}",
1954            "a".repeat(size)
1955        );
1956
1957        let result = decompose(&markdown);
1958        assert!(result.is_ok());
1959    }
1960
1961    #[test]
1962    fn test_yaml_within_size_limit() {
1963        // Create YAML block well within the limit
1964        let markdown = "---\nQUILL: test_quill\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1965
1966        let result = decompose(markdown);
1967        assert!(result.is_ok());
1968    }
1969
1970    #[test]
1971    fn test_yaml_depth_limit() {
1972        // Create deeply nested YAML that exceeds MAX_YAML_DEPTH (100 levels)
1973        // This tests serde-saphyr's Budget.max_depth enforcement
1974        let mut yaml_content = String::new();
1975        for i in 0..110 {
1976            yaml_content.push_str(&"  ".repeat(i));
1977            yaml_content.push_str(&format!("level{}: value\n", i));
1978        }
1979
1980        let markdown = format!("---\n{}---\n\nBody", yaml_content);
1981        let result = decompose(&markdown);
1982
1983        assert!(result.is_err());
1984        let err_msg = result.unwrap_err().to_string();
1985        // serde-saphyr returns "budget exceeded" or similar for depth violations
1986        assert!(
1987            err_msg.to_lowercase().contains("budget")
1988                || err_msg.to_lowercase().contains("depth")
1989                || err_msg.contains("YAML"),
1990            "Expected depth/budget error, got: {}",
1991            err_msg
1992        );
1993    }
1994
1995    #[test]
1996    fn test_yaml_depth_within_limit() {
1997        // Create reasonably nested YAML (should succeed)
1998        let markdown = r#"---
1999QUILL: test_quill
2000level1:
2001  level2:
2002    level3:
2003      level4:
2004        value: test
2005---
2006
2007Body content"#;
2008
2009        let result = decompose(markdown);
2010        assert!(result.is_ok());
2011    }
2012
2013    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
2014    // Guillemet conversion now happens in process_plate, not during parsing
2015    #[test]
2016    fn test_chevrons_preserved_in_body_no_frontmatter() {
2017        let markdown = "---\nQUILL: test_quill\n---\nUse <<raw content>> here.";
2018        let doc = decompose(markdown).unwrap();
2019
2020        // Body should preserve chevrons (conversion happens later in process_plate)
2021        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
2022    }
2023
2024    #[test]
2025    fn test_chevrons_preserved_in_body_with_frontmatter() {
2026        let markdown = r#"---
2027QUILL: test_quill
2028title: Test
2029---
2030
2031Use <<raw content>> here."#;
2032        let doc = decompose(markdown).unwrap();
2033
2034        // Body should preserve chevrons
2035        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
2036    }
2037
2038    #[test]
2039    fn test_chevrons_preserved_in_yaml_string() {
2040        let markdown = r#"---
2041QUILL: test_quill
2042title: Test <<with chevrons>>
2043---
2044
2045Body content."#;
2046        let doc = decompose(markdown).unwrap();
2047
2048        // YAML string values should preserve chevrons
2049        assert_eq!(
2050            doc.get_field("title").unwrap().as_str().unwrap(),
2051            "Test <<with chevrons>>"
2052        );
2053    }
2054
2055    #[test]
2056    fn test_chevrons_preserved_in_yaml_array() {
2057        let markdown = r#"---
2058QUILL: test_quill
2059items:
2060  - "<<first>>"
2061  - "<<second>>"
2062---
2063
2064Body."#;
2065        let doc = decompose(markdown).unwrap();
2066
2067        let items = doc.get_field("items").unwrap().as_array().unwrap();
2068        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
2069        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
2070    }
2071
2072    #[test]
2073    fn test_chevrons_preserved_in_yaml_nested() {
2074        let markdown = r#"---
2075QUILL: test_quill
2076metadata:
2077  description: "<<nested value>>"
2078---
2079
2080Body."#;
2081        let doc = decompose(markdown).unwrap();
2082
2083        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
2084        assert_eq!(
2085            metadata.get("description").unwrap().as_str().unwrap(),
2086            "<<nested value>>"
2087        );
2088    }
2089
2090    #[test]
2091    fn test_chevrons_preserved_in_code_blocks() {
2092        let markdown =
2093            "---\nQUILL: test_quill\n---\n```\n<<in code block>>\n```\n\n<<outside code block>>";
2094        let doc = decompose(markdown).unwrap();
2095
2096        let body = doc.body().unwrap();
2097        // All chevrons should be preserved (no conversion during parsing)
2098        assert!(body.contains("<<in code block>>"));
2099        assert!(body.contains("<<outside code block>>"));
2100    }
2101
2102    #[test]
2103    fn test_chevrons_preserved_in_inline_code() {
2104        let markdown =
2105            "---\nQUILL: test_quill\n---\n`<<in inline code>>` and <<outside inline code>>";
2106        let doc = decompose(markdown).unwrap();
2107
2108        let body = doc.body().unwrap();
2109        // All chevrons should be preserved
2110        assert!(body.contains("`<<in inline code>>`"));
2111        assert!(body.contains("<<outside inline code>>"));
2112    }
2113
2114    #[test]
2115    fn test_chevrons_preserved_in_tagged_block_body() {
2116        let markdown = r#"---
2117QUILL: test_quill
2118title: Main
2119---
2120
2121Main body.
2122
2123---
2124CARD: items
2125name: Item 1
2126---
2127
2128Use <<raw>> here."#;
2129        let doc = decompose(markdown).unwrap();
2130
2131        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2132        let item = cards[0].as_object().unwrap();
2133        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2134        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2135        // Tagged block body should preserve chevrons
2136        assert!(item_body.contains("<<raw>>"));
2137    }
2138
2139    #[test]
2140    fn test_chevrons_preserved_in_tagged_block_yaml() {
2141        let markdown = r#"---
2142QUILL: test_quill
2143title: Main
2144---
2145
2146Main body.
2147
2148---
2149CARD: items
2150description: "<<tagged yaml>>"
2151---
2152
2153Item body."#;
2154        let doc = decompose(markdown).unwrap();
2155
2156        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2157        let item = cards[0].as_object().unwrap();
2158        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2159        // Tagged block YAML should preserve chevrons
2160        assert_eq!(
2161            item.get("description").unwrap().as_str().unwrap(),
2162            "<<tagged yaml>>"
2163        );
2164    }
2165
2166    #[test]
2167    fn test_yaml_numbers_not_affected() {
2168        // Numbers should not be affected
2169        let markdown = r#"---
2170QUILL: test_quill
2171count: 42
2172---
2173
2174Body."#;
2175        let doc = decompose(markdown).unwrap();
2176        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2177    }
2178
2179    #[test]
2180    fn test_yaml_booleans_not_affected() {
2181        // Booleans should not be affected
2182        let markdown = r#"---
2183QUILL: test_quill
2184active: true
2185---
2186
2187Body."#;
2188        let doc = decompose(markdown).unwrap();
2189        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2190    }
2191
2192    #[test]
2193    fn test_multiline_chevrons_preserved() {
2194        // Multiline chevrons should be preserved as-is
2195        let markdown = "---\nQUILL: test_quill\n---\n<<text\nacross lines>>";
2196        let doc = decompose(markdown).unwrap();
2197
2198        let body = doc.body().unwrap();
2199        // Should contain the original chevrons
2200        assert!(body.contains("<<text"));
2201        assert!(body.contains("across lines>>"));
2202    }
2203
2204    #[test]
2205    fn test_unmatched_chevrons_preserved() {
2206        let markdown = "---\nQUILL: test_quill\n---\n<<unmatched";
2207        let doc = decompose(markdown).unwrap();
2208
2209        let body = doc.body().unwrap();
2210        // Unmatched should remain as-is
2211        assert_eq!(body, "<<unmatched");
2212    }
2213}
2214
2215// Additional robustness tests
2216#[cfg(test)]
2217mod robustness_tests {
2218    use super::*;
2219
2220    // Edge cases for delimiter handling
2221
2222    #[test]
2223    fn test_empty_document() {
2224        let result = decompose("");
2225        assert!(result.is_err());
2226        assert!(result
2227            .unwrap_err()
2228            .to_string()
2229            .contains("Missing required QUILL field"));
2230    }
2231
2232    #[test]
2233    fn test_only_whitespace() {
2234        let result = decompose("   \n\n   \t");
2235        assert!(result.is_err());
2236        assert!(result
2237            .unwrap_err()
2238            .to_string()
2239            .contains("Missing required QUILL field"));
2240    }
2241
2242    #[test]
2243    fn test_only_dashes() {
2244        // "---" without newline is not a frontmatter delimiter → no blocks → QUILL error
2245        let result = decompose("---");
2246        assert!(result.is_err());
2247        assert!(result
2248            .unwrap_err()
2249            .to_string()
2250            .contains("Missing required QUILL field"));
2251    }
2252
2253    #[test]
2254    fn test_dashes_in_middle_of_line() {
2255        // --- not at start of line should not be treated as delimiter
2256        let markdown = "---\nQUILL: test_quill\n---\nsome text --- more text";
2257        let doc = decompose(markdown).unwrap();
2258        assert_eq!(doc.body(), Some("some text --- more text"));
2259    }
2260
2261    #[test]
2262    fn test_four_dashes() {
2263        // ---- is not a valid delimiter — QUILL required
2264        let result = decompose("----\ntitle: Test\n----\n\nBody");
2265        assert!(result.is_err());
2266        assert!(result
2267            .unwrap_err()
2268            .to_string()
2269            .contains("Missing required QUILL field"));
2270    }
2271
2272    #[test]
2273    fn test_crlf_line_endings() {
2274        // Windows-style line endings
2275        let markdown = "---\r\nQUILL: test_quill\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2276        let doc = decompose(markdown).unwrap();
2277        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2278        assert!(doc.body().unwrap().contains("Body content."));
2279    }
2280
2281    #[test]
2282    fn test_mixed_line_endings() {
2283        // Mix of \n and \r\n
2284        let markdown = "---\nQUILL: test_quill\r\ntitle: Test\r\n---\n\nBody.";
2285        let doc = decompose(markdown).unwrap();
2286        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2287    }
2288
2289    #[test]
2290    fn test_frontmatter_at_eof_no_trailing_newline() {
2291        // Frontmatter closed at EOF without trailing newline
2292        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---";
2293        let doc = decompose(markdown).unwrap();
2294        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2295        assert_eq!(doc.body(), Some(""));
2296    }
2297
2298    #[test]
2299    fn test_empty_frontmatter() {
2300        // Empty/whitespace-only frontmatter has no QUILL → error
2301        let markdown = "---\n \n---\n\nBody content.";
2302        let result = decompose(markdown);
2303        assert!(result.is_err());
2304        assert!(result
2305            .unwrap_err()
2306            .to_string()
2307            .contains("Missing required QUILL field"));
2308    }
2309
2310    #[test]
2311    fn test_whitespace_only_frontmatter() {
2312        // Frontmatter with only whitespace → no QUILL → error
2313        let markdown = "---\n   \n\n   \n---\n\nBody.";
2314        let result = decompose(markdown);
2315        assert!(result.is_err());
2316        assert!(result
2317            .unwrap_err()
2318            .to_string()
2319            .contains("Missing required QUILL field"));
2320    }
2321
2322    // Unicode handling
2323
2324    #[test]
2325    fn test_unicode_in_yaml_keys() {
2326        let markdown = "---\nQUILL: test_quill\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2327        let doc = decompose(markdown).unwrap();
2328        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2329        assert_eq!(
2330            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2331            "こんにちは"
2332        );
2333    }
2334
2335    #[test]
2336    fn test_unicode_in_yaml_values() {
2337        let markdown = "---\nQUILL: test_quill\ntitle: 你好世界 🎉\n---\n\nBody.";
2338        let doc = decompose(markdown).unwrap();
2339        assert_eq!(
2340            doc.get_field("title").unwrap().as_str().unwrap(),
2341            "你好世界 🎉"
2342        );
2343    }
2344
2345    #[test]
2346    fn test_unicode_in_body() {
2347        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2348        let doc = decompose(markdown).unwrap();
2349        assert!(doc.body().unwrap().contains("日本語テキスト"));
2350        assert!(doc.body().unwrap().contains("🚀"));
2351    }
2352
2353    // YAML edge cases
2354
2355    #[test]
2356    fn test_yaml_multiline_string() {
2357        let markdown = r#"---
2358QUILL: test_quill
2359description: |
2360  This is a
2361  multiline string
2362  with preserved newlines.
2363---
2364
2365Body."#;
2366        let doc = decompose(markdown).unwrap();
2367        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2368        assert!(desc.contains("multiline string"));
2369        assert!(desc.contains('\n'));
2370    }
2371
2372    #[test]
2373    fn test_yaml_folded_string() {
2374        let markdown = r#"---
2375QUILL: test_quill
2376description: >
2377  This is a folded
2378  string that becomes
2379  a single line.
2380---
2381
2382Body."#;
2383        let doc = decompose(markdown).unwrap();
2384        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2385        // Folded strings join lines with spaces
2386        assert!(desc.contains("folded"));
2387    }
2388
2389    #[test]
2390    fn test_yaml_null_value() {
2391        let markdown = "---\nQUILL: test_quill\noptional: null\n---\n\nBody.";
2392        let doc = decompose(markdown).unwrap();
2393        assert!(doc.get_field("optional").unwrap().is_null());
2394    }
2395
2396    #[test]
2397    fn test_yaml_empty_string_value() {
2398        let markdown = "---\nQUILL: test_quill\nempty: \"\"\n---\n\nBody.";
2399        let doc = decompose(markdown).unwrap();
2400        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2401    }
2402
2403    #[test]
2404    fn test_yaml_special_characters_in_string() {
2405        let markdown =
2406            "---\nQUILL: test_quill\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2407        let doc = decompose(markdown).unwrap();
2408        assert_eq!(
2409            doc.get_field("special").unwrap().as_str().unwrap(),
2410            "colon: here, and [brackets]"
2411        );
2412    }
2413
2414    #[test]
2415    fn test_yaml_nested_objects() {
2416        let markdown = r#"---
2417QUILL: test_quill
2418config:
2419  database:
2420    host: localhost
2421    port: 5432
2422  cache:
2423    enabled: true
2424---
2425
2426Body."#;
2427        let doc = decompose(markdown).unwrap();
2428        let config = doc.get_field("config").unwrap().as_object().unwrap();
2429        let db = config.get("database").unwrap().as_object().unwrap();
2430        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2431        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2432    }
2433
2434    // CARD block edge cases
2435
2436    #[test]
2437    fn test_card_with_empty_body() {
2438        let markdown = r#"---
2439QUILL: test_quill
2440---
2441
2442---
2443CARD: items
2444name: Item
2445---"#;
2446        let doc = decompose(markdown).unwrap();
2447        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2448        assert_eq!(cards.len(), 1);
2449        let item = cards[0].as_object().unwrap();
2450        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2451        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2452    }
2453
2454    #[test]
2455    fn test_card_consecutive_blocks() {
2456        let markdown = r#"---
2457QUILL: test_quill
2458---
2459
2460---
2461CARD: a
2462id: 1
2463---
2464---
2465CARD: a
2466id: 2
2467---"#;
2468        let doc = decompose(markdown).unwrap();
2469        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2470        assert_eq!(cards.len(), 2);
2471        assert_eq!(
2472            cards[0]
2473                .as_object()
2474                .unwrap()
2475                .get("CARD")
2476                .unwrap()
2477                .as_str()
2478                .unwrap(),
2479            "a"
2480        );
2481        assert_eq!(
2482            cards[1]
2483                .as_object()
2484                .unwrap()
2485                .get("CARD")
2486                .unwrap()
2487                .as_str()
2488                .unwrap(),
2489            "a"
2490        );
2491    }
2492
2493    #[test]
2494    fn test_card_with_body_containing_dashes() {
2495        let markdown = r#"---
2496QUILL: test_quill
2497---
2498
2499---
2500CARD: items
2501name: Item
2502---
2503
2504Some text with --- dashes in it."#;
2505        let doc = decompose(markdown).unwrap();
2506        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2507        let item = cards[0].as_object().unwrap();
2508        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2509        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2510        assert!(body.contains("--- dashes"));
2511    }
2512
2513    // QUILL directive edge cases
2514
2515    #[test]
2516    fn test_quill_with_underscore_prefix() {
2517        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2518        let doc = decompose(markdown).unwrap();
2519        assert_eq!(doc.quill_reference().name, "_internal");
2520    }
2521
2522    #[test]
2523    fn test_quill_with_numbers() {
2524        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2525        let doc = decompose(markdown).unwrap();
2526        assert_eq!(doc.quill_reference().name, "form_8_v2");
2527    }
2528
2529    #[test]
2530    fn test_quill_with_additional_fields() {
2531        let markdown = r#"---
2532QUILL: my_quill
2533title: Document Title
2534author: John Doe
2535---
2536
2537Body content."#;
2538        let doc = decompose(markdown).unwrap();
2539        assert_eq!(doc.quill_reference().name, "my_quill");
2540        assert_eq!(
2541            doc.get_field("title").unwrap().as_str().unwrap(),
2542            "Document Title"
2543        );
2544        assert_eq!(
2545            doc.get_field("author").unwrap().as_str().unwrap(),
2546            "John Doe"
2547        );
2548    }
2549
2550    // Error handling
2551
2552    #[test]
2553    fn test_invalid_scope_name_uppercase() {
2554        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2555        let result = decompose(markdown);
2556        assert!(result.is_err());
2557        assert!(result
2558            .unwrap_err()
2559            .to_string()
2560            .contains("Invalid card field name"));
2561    }
2562
2563    #[test]
2564    fn test_invalid_scope_name_starts_with_number() {
2565        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2566        let result = decompose(markdown);
2567        assert!(result.is_err());
2568    }
2569
2570    #[test]
2571    fn test_invalid_scope_name_with_hyphen() {
2572        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2573        let result = decompose(markdown);
2574        assert!(result.is_err());
2575    }
2576
2577    #[test]
2578    fn test_invalid_quill_ref_uppercase() {
2579        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2580        let result = decompose(markdown);
2581        assert!(result.is_err());
2582    }
2583
2584    #[test]
2585    fn test_yaml_syntax_error_missing_colon() {
2586        let markdown = "---\ntitle Test\n---\n\nBody.";
2587        let result = decompose(markdown);
2588        assert!(result.is_err());
2589    }
2590
2591    #[test]
2592    fn test_yaml_syntax_error_bad_indentation() {
2593        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2594        let result = decompose(markdown);
2595        // Bad indentation may or may not be an error depending on YAML parser
2596        // Just ensure it doesn't panic
2597        let _ = result;
2598    }
2599
2600    // Body extraction edge cases
2601
2602    #[test]
2603    fn test_body_with_leading_newlines() {
2604        let markdown =
2605            "---\nQUILL: test_quill\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2606        let doc = decompose(markdown).unwrap();
2607        // Body should preserve leading newlines after frontmatter
2608        assert!(doc.body().unwrap().starts_with('\n'));
2609    }
2610
2611    #[test]
2612    fn test_body_with_trailing_newlines() {
2613        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---\n\nBody.\n\n\n";
2614        let doc = decompose(markdown).unwrap();
2615        // Body should preserve trailing newlines
2616        assert!(doc.body().unwrap().ends_with('\n'));
2617    }
2618
2619    #[test]
2620    fn test_no_body_after_frontmatter() {
2621        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---";
2622        let doc = decompose(markdown).unwrap();
2623        assert_eq!(doc.body(), Some(""));
2624    }
2625
2626    // Tag name validation
2627
2628    #[test]
2629    fn test_valid_tag_name_single_underscore() {
2630        assert!(is_valid_tag_name("_"));
2631    }
2632
2633    #[test]
2634    fn test_valid_tag_name_underscore_prefix() {
2635        assert!(is_valid_tag_name("_private"));
2636    }
2637
2638    #[test]
2639    fn test_valid_tag_name_with_numbers() {
2640        assert!(is_valid_tag_name("item1"));
2641        assert!(is_valid_tag_name("item_2"));
2642    }
2643
2644    #[test]
2645    fn test_invalid_tag_name_empty() {
2646        assert!(!is_valid_tag_name(""));
2647    }
2648
2649    #[test]
2650    fn test_invalid_tag_name_starts_with_number() {
2651        assert!(!is_valid_tag_name("1item"));
2652    }
2653
2654    #[test]
2655    fn test_invalid_tag_name_uppercase() {
2656        assert!(!is_valid_tag_name("Items"));
2657        assert!(!is_valid_tag_name("ITEMS"));
2658    }
2659
2660    #[test]
2661    fn test_invalid_tag_name_special_chars() {
2662        assert!(!is_valid_tag_name("my-items"));
2663        assert!(!is_valid_tag_name("my.items"));
2664        assert!(!is_valid_tag_name("my items"));
2665    }
2666
2667    // Guillemet preprocessing in YAML
2668
2669    #[test]
2670    fn test_guillemet_in_yaml_preserves_non_strings() {
2671        let markdown = r#"---
2672QUILL: test_quill
2673count: 42
2674price: 19.99
2675active: true
2676items:
2677  - first
2678  - 100
2679  - true
2680---
2681
2682Body."#;
2683        let doc = decompose(markdown).unwrap();
2684        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2685        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2686        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2687    }
2688
2689    #[test]
2690    fn test_guillemet_double_conversion_prevention() {
2691        // Ensure «» in input doesn't get double-processed
2692        let markdown = "---\nQUILL: test_quill\ntitle: Already «converted»\n---\n\nBody.";
2693        let doc = decompose(markdown).unwrap();
2694        // Should remain as-is (not double-escaped)
2695        assert_eq!(
2696            doc.get_field("title").unwrap().as_str().unwrap(),
2697            "Already «converted»"
2698        );
2699    }
2700
2701    #[test]
2702    fn test_allowed_card_field_collision() {
2703        let markdown = r#"---
2704QUILL: test_quill
2705my_card: "some global value"
2706---
2707
2708---
2709CARD: my_card
2710title: "My Card"
2711---
2712Body
2713"#;
2714        // This should SUCCEED according to new PARSE.md
2715        let doc = decompose(markdown).unwrap();
2716
2717        // Verify global field exists
2718        assert_eq!(
2719            doc.get_field("my_card").unwrap().as_str().unwrap(),
2720            "some global value"
2721        );
2722
2723        // Verify Card exists in CARDS array
2724        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2725        assert!(!cards.is_empty());
2726        let card = cards
2727            .iter()
2728            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2729            .expect("Card not found");
2730        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2731    }
2732
2733    #[test]
2734    fn test_yaml_custom_tags_in_frontmatter() {
2735        // User-defined YAML tags like !fill should be accepted and ignored
2736        let markdown = r#"---
2737QUILL: test_quill
2738memo_from: !fill 2d lt example
2739regular_field: normal value
2740---
2741
2742Body content."#;
2743        let doc = decompose(markdown).unwrap();
2744
2745        // The tag !fill should be ignored, value parsed as string "2d lt example"
2746        assert_eq!(
2747            doc.get_field("memo_from").unwrap().as_str().unwrap(),
2748            "2d lt example"
2749        );
2750        // Regular fields should still work
2751        assert_eq!(
2752            doc.get_field("regular_field").unwrap().as_str().unwrap(),
2753            "normal value"
2754        );
2755        assert_eq!(doc.body(), Some("\nBody content."));
2756    }
2757
2758    /// Test the exact example from EXTENDED_MARKDOWN.md (lines 92-127)
2759    #[test]
2760    fn test_spec_example() {
2761        let markdown = r#"---
2762title: My Document
2763QUILL: blog_post
2764---
2765Main document body.
2766
2767***
2768
2769More content after horizontal rule.
2770
2771---
2772CARD: section
2773heading: Introduction
2774---
2775Introduction content.
2776
2777---
2778CARD: section
2779heading: Conclusion
2780---
2781Conclusion content.
2782"#;
2783
2784        let doc = decompose(markdown).unwrap();
2785
2786        // Verify global fields
2787        assert_eq!(
2788            doc.get_field("title").unwrap().as_str().unwrap(),
2789            "My Document"
2790        );
2791        assert_eq!(doc.quill_reference().name, "blog_post");
2792
2793        // Verify body contains horizontal rule (*** preserved)
2794        let body = doc.body().unwrap();
2795        assert!(body.contains("Main document body."));
2796        assert!(body.contains("***"));
2797        assert!(body.contains("More content after horizontal rule."));
2798
2799        // Verify CARDS array
2800        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2801        assert_eq!(cards.len(), 2);
2802
2803        // First card
2804        let card1 = cards[0].as_object().unwrap();
2805        assert_eq!(card1.get("CARD").unwrap().as_str().unwrap(), "section");
2806        assert_eq!(
2807            card1.get("heading").unwrap().as_str().unwrap(),
2808            "Introduction"
2809        );
2810        assert_eq!(
2811            card1.get("BODY").unwrap().as_str().unwrap(),
2812            "Introduction content.\n\n"
2813        );
2814
2815        // Second card
2816        let card2 = cards[1].as_object().unwrap();
2817        assert_eq!(card2.get("CARD").unwrap().as_str().unwrap(), "section");
2818        assert_eq!(
2819            card2.get("heading").unwrap().as_str().unwrap(),
2820            "Conclusion"
2821        );
2822        assert_eq!(
2823            card2.get("BODY").unwrap().as_str().unwrap(),
2824            "Conclusion content.\n"
2825        );
2826    }
2827
2828    #[test]
2829    fn test_missing_quill_field_errors() {
2830        let markdown = "---\ntitle: No quill here\n---\n# Body";
2831        let result = decompose(markdown);
2832        assert!(result.is_err());
2833        assert!(result
2834            .unwrap_err()
2835            .to_string()
2836            .contains("Missing required QUILL field"));
2837    }
2838}