Skip to main content

quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! QUILL: my_quill
23//! title: My Document
24//! author: John Doe
25//! ---
26//!
27//! # Introduction
28//!
29//! Document content here.
30//! "#;
31//!
32//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
33//! let title = doc.get_field("title")
34//!     .and_then(|v| v.as_str())
35//!     .unwrap_or("Untitled");
36//! ```
37//!
38//! ## Error Handling
39//!
40//! The [`ParsedDocument::from_markdown`] function returns errors for:
41//! - Malformed YAML syntax
42//! - Unclosed frontmatter blocks
43//! - Multiple global frontmatter blocks
44//! - Both QUILL and CARD specified in the same block
45//! - Reserved field name usage
46//! - Name collisions
47//!
48//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
49
50use std::collections::HashMap;
51use std::str::FromStr;
52
53use crate::error::ParseError;
54use crate::value::QuillValue;
55use crate::version::QuillReference;
56
57/// The field name used to store the document body
58pub const BODY_FIELD: &str = "BODY";
59
60/// A parsed markdown document with frontmatter
61#[derive(Debug, Clone)]
62pub struct ParsedDocument {
63    fields: HashMap<String, QuillValue>,
64    quill_ref: QuillReference,
65}
66
67impl ParsedDocument {
68    /// Create a ParsedDocument from fields and quill reference
69    pub fn new(fields: HashMap<String, QuillValue>, quill_ref: QuillReference) -> Self {
70        Self { fields, quill_ref }
71    }
72
73    /// Create a ParsedDocument from markdown string
74    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
75        decompose(markdown)
76    }
77
78    /// Get the quill reference (name + version selector)
79    pub fn quill_reference(&self) -> &QuillReference {
80        &self.quill_ref
81    }
82
83    /// Get the document body
84    pub fn body(&self) -> Option<&str> {
85        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
86    }
87
88    /// Get a specific field
89    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
90        self.fields.get(name)
91    }
92
93    /// Get all fields (including body)
94    pub fn fields(&self) -> &HashMap<String, QuillValue> {
95        &self.fields
96    }
97
98    /// Create a new ParsedDocument with default values applied
99    ///
100    /// This method creates a new ParsedDocument with default values applied for any
101    /// fields that are missing from the original document but have defaults specified.
102    /// Existing fields are preserved and not overwritten.
103    ///
104    /// # Arguments
105    ///
106    /// * `defaults` - A HashMap of field names to their default QuillValues
107    ///
108    /// # Returns
109    ///
110    /// A new ParsedDocument with defaults applied for missing fields
111    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
112        let mut fields = self.fields.clone();
113
114        for (field_name, default_value) in defaults {
115            // Only apply default if field is missing
116            if !fields.contains_key(field_name) {
117                fields.insert(field_name.clone(), default_value.clone());
118            }
119        }
120
121        Self {
122            fields,
123            quill_ref: self.quill_ref.clone(),
124        }
125    }
126}
127
128#[derive(Debug)]
129struct MetadataBlock {
130    start: usize,                          // Position of opening "---"
131    end: usize,                            // Position after closing "---\n"
132    yaml_value: Option<serde_json::Value>, // Parsed YAML as JSON (None if empty or parse failed)
133    tag: Option<String>,                   // Field name from CARD key
134    quill_ref: Option<String>,             // Quill reference from QUILL key
135}
136
137/// Validate tag name follows pattern [a-z_][a-z0-9_]*
138fn is_valid_tag_name(name: &str) -> bool {
139    if name.is_empty() {
140        return false;
141    }
142
143    let mut chars = name.chars();
144    let first = chars.next().unwrap();
145
146    if !first.is_ascii_lowercase() && first != '_' {
147        return false;
148    }
149
150    for ch in chars {
151        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
152            return false;
153        }
154    }
155
156    true
157}
158
159/// Check if a position is inside a fenced code block
160///
161/// This uses strict fence detection per EXTENDED_MARKDOWN.md specification:
162/// - Only exactly 3 backticks (```) are valid fences
163/// - Tildes (~~~) are NOT treated as fences
164/// - 4+ backticks are NOT treated as fences
165fn is_inside_fenced_block(markdown: &str, pos: usize) -> bool {
166    let before = &markdown[..pos];
167    let mut in_fence = false;
168
169    // Check if document starts with exactly ```
170    if is_exact_fence_at(before, 0) {
171        in_fence = !in_fence;
172    }
173
174    // Scan for fence toggles after newlines
175    for (i, _) in before.match_indices('\n') {
176        if is_exact_fence_at(before, i + 1) {
177            in_fence = !in_fence;
178        }
179    }
180
181    in_fence
182}
183
184/// Check if position starts exactly 3 backticks (not 2, not 4+)
185///
186/// Strict specification: only exactly ``` is a valid fence marker.
187fn is_exact_fence_at(text: &str, pos: usize) -> bool {
188    if pos >= text.len() {
189        return false;
190    }
191    let remaining = &text[pos..];
192    if !remaining.starts_with("```") {
193        return false;
194    }
195    // Ensure it's exactly 3 backticks (4th char is not a backtick)
196    remaining.len() == 3 || remaining.as_bytes().get(3) != Some(&b'`')
197}
198
199/// Creates serde_saphyr Options with security budgets configured.
200///
201/// Uses MAX_YAML_DEPTH from error.rs to limit nesting depth at the parser level,
202/// which is more robust than heuristic-based pre-parse checks.
203fn yaml_parse_options() -> serde_saphyr::Options {
204    let budget = serde_saphyr::Budget {
205        max_depth: crate::error::MAX_YAML_DEPTH,
206        ..Default::default()
207    };
208    serde_saphyr::Options {
209        budget: Some(budget),
210        ..Default::default()
211    }
212}
213
214/// Find all metadata blocks in the document
215fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
216    let mut blocks = Vec::new();
217    let mut pos = 0;
218
219    while pos < markdown.len() {
220        // Look for opening "---\n" or "---\r\n"
221        let search_str = &markdown[pos..];
222        let delimiter_result = search_str
223            .find("---\n")
224            .map(|p| (p, 4, "\n"))
225            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
226
227        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
228            let abs_pos = pos + delimiter_pos;
229
230            // Check if the delimiter is at the start of a line
231            let is_start_of_line = if abs_pos == 0 {
232                true
233            } else {
234                let char_before = markdown.as_bytes()[abs_pos - 1];
235                char_before == b'\n' || char_before == b'\r'
236            };
237
238            if !is_start_of_line {
239                pos = abs_pos + 1;
240                continue;
241            }
242
243            // Skip if inside a fenced code block
244            if is_inside_fenced_block(markdown, abs_pos) {
245                pos = abs_pos + 3;
246                continue;
247            }
248
249            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
250
251            // Check if this --- is a horizontal rule (blank lines above AND below)
252            let preceded_by_blank = if abs_pos > 0 {
253                // Check if there's a blank line before the ---
254                let before = &markdown[..abs_pos];
255                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
256            } else {
257                false
258            };
259
260            let followed_by_blank = if content_start < markdown.len() {
261                markdown[content_start..].starts_with('\n')
262                    || markdown[content_start..].starts_with("\r\n")
263            } else {
264                false
265            };
266
267            // Horizontal rule: blank lines both above and below
268            if preceded_by_blank && followed_by_blank {
269                // This is a horizontal rule in the body, skip it
270                pos = abs_pos + 3; // Skip past "---"
271                continue;
272            }
273
274            // Check if followed by non-blank line (or if we're at document start)
275            // This starts a metadata block
276            if followed_by_blank {
277                // --- followed by blank line but NOT preceded by blank line
278                // This is NOT a metadata block opening, skip it
279                pos = abs_pos + 3;
280                continue;
281            }
282
283            // Found potential metadata block opening (followed by non-blank line)
284            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
285            let rest = &markdown[content_start..];
286
287            // First try to find delimiters with trailing newlines
288            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
289            let closing_with_newline = closing_patterns
290                .iter()
291                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
292                .min_by_key(|(p, _)| *p);
293
294            // Also check for closing at end of document (no trailing newline)
295            let closing_at_eof = ["\n---", "\r\n---"]
296                .iter()
297                .filter_map(|delim| {
298                    rest.find(delim).and_then(|p| {
299                        if p + delim.len() == rest.len() {
300                            Some((p, delim.len()))
301                        } else {
302                            None
303                        }
304                    })
305                })
306                .min_by_key(|(p, _)| *p);
307
308            let closing_result = match (closing_with_newline, closing_at_eof) {
309                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
310                (Some(_), Some(_)) => closing_with_newline,
311                (Some(_), None) => closing_with_newline,
312                (None, Some(_)) => closing_at_eof,
313                (None, None) => None,
314            };
315
316            if let Some((closing_pos, closing_len)) = closing_result {
317                let abs_closing_pos = content_start + closing_pos;
318                let content = &markdown[content_start..abs_closing_pos];
319
320                // Check YAML size limit
321                if content.len() > crate::error::MAX_YAML_SIZE {
322                    return Err(crate::error::ParseError::InputTooLarge {
323                        size: content.len(),
324                        max: crate::error::MAX_YAML_SIZE,
325                    });
326                }
327
328                // Parse YAML content to check for reserved keys (QUILL, CARD)
329                // Uses configured budget to limit nesting depth (prevents stack overflow)
330                // Normalize: treat whitespace-only content as empty frontmatter
331                let content = content.trim();
332                let (tag, quill_ref, yaml_value) = if !content.is_empty() {
333                    // Try to parse the YAML with security budgets
334                    match serde_saphyr::from_str_with_options::<serde_json::Value>(
335                        content,
336                        yaml_parse_options(),
337                    ) {
338                        Ok(parsed_yaml) => {
339                            if let Some(mapping) = parsed_yaml.as_object() {
340                                let quill_key = "QUILL";
341                                let card_key = "CARD";
342
343                                let has_quill = mapping.contains_key(quill_key);
344                                let has_card = mapping.contains_key(card_key);
345
346                                if has_quill && has_card {
347                                    return Err(crate::error::ParseError::InvalidStructure(
348                                        "Cannot specify both QUILL and CARD in the same block"
349                                            .to_string(),
350                                    ));
351                                }
352
353                                // Check for reserved field names (BODY, CARDS)
354                                const RESERVED_FIELDS: &[&str] = &["BODY", "CARDS"];
355                                for reserved in RESERVED_FIELDS {
356                                    if mapping.contains_key(*reserved) {
357                                        return Err(crate::error::ParseError::InvalidStructure(
358                                            format!(
359                                                "Reserved field name '{}' cannot be used in YAML frontmatter",
360                                                reserved
361                                            ),
362                                        ));
363                                    }
364                                }
365
366                                if has_quill {
367                                    // Extract and parse quill reference
368                                    let quill_value = mapping.get(quill_key).unwrap();
369                                    let quill_ref_str = quill_value
370                                        .as_str()
371                                        .ok_or("QUILL value must be a string")?;
372
373                                    // Parse as QuillReference to validate name and version
374                                    let _quill_ref =
375                                        quill_ref_str.parse::<QuillReference>().map_err(|e| {
376                                            crate::error::ParseError::InvalidStructure(format!(
377                                                "Invalid QUILL reference '{}': {}",
378                                                quill_ref_str, e
379                                            ))
380                                        })?;
381
382                                    // Remove QUILL from the YAML value for processing
383                                    let mut new_mapping = mapping.clone();
384                                    new_mapping.remove(quill_key);
385                                    let new_value = if new_mapping.is_empty() {
386                                        None
387                                    } else {
388                                        Some(serde_json::Value::Object(new_mapping))
389                                    };
390
391                                    (None, Some(quill_ref_str.to_string()), new_value)
392                                } else if has_card {
393                                    // Extract card field name
394                                    let card_value = mapping.get(card_key).unwrap();
395                                    let field_name =
396                                        card_value.as_str().ok_or("CARD value must be a string")?;
397
398                                    if !is_valid_tag_name(field_name) {
399                                        return Err(crate::error::ParseError::InvalidStructure(format!(
400                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
401                                            field_name
402                                        )));
403                                    }
404
405                                    // Remove CARD from the YAML value for processing
406                                    let mut new_mapping = mapping.clone();
407                                    new_mapping.remove(card_key);
408                                    let new_value = if new_mapping.is_empty() {
409                                        None
410                                    } else {
411                                        Some(serde_json::Value::Object(new_mapping))
412                                    };
413
414                                    (Some(field_name.to_string()), None, new_value)
415                                } else {
416                                    // No reserved keys, keep the parsed YAML
417                                    (None, None, Some(parsed_yaml))
418                                }
419                            } else {
420                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
421                                (None, None, Some(parsed_yaml))
422                            }
423                        }
424                        Err(e) => {
425                            // Calculate line number for the start of this block
426                            let block_start_line = markdown[..abs_pos].lines().count() + 1;
427                            return Err(crate::error::ParseError::YamlErrorWithLocation {
428                                message: e.to_string(),
429                                line: block_start_line,
430                                block_index: blocks.len(),
431                            });
432                        }
433                    }
434                } else {
435                    // Empty content
436                    (None, None, None)
437                };
438
439                blocks.push(MetadataBlock {
440                    start: abs_pos,
441                    end: abs_closing_pos + closing_len, // After closing delimiter
442                    yaml_value,
443                    tag,
444                    quill_ref,
445                });
446
447                // Check card count limit to prevent memory exhaustion
448                if blocks.len() > crate::error::MAX_CARD_COUNT {
449                    return Err(crate::error::ParseError::InputTooLarge {
450                        size: blocks.len(),
451                        max: crate::error::MAX_CARD_COUNT,
452                    });
453                }
454
455                pos = abs_closing_pos + closing_len;
456            } else if abs_pos == 0 {
457                // Frontmatter started but not closed
458                return Err(crate::error::ParseError::InvalidStructure(
459                    "Frontmatter started but not closed with ---".to_string(),
460                ));
461            } else {
462                // Not a valid metadata block, skip this position
463                pos = abs_pos + 3;
464            }
465        } else {
466            break;
467        }
468    }
469
470    Ok(blocks)
471}
472
473/// Decompose markdown into frontmatter fields and body
474fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
475    // Check input size limit
476    if markdown.len() > crate::error::MAX_INPUT_SIZE {
477        return Err(crate::error::ParseError::InputTooLarge {
478            size: markdown.len(),
479            max: crate::error::MAX_INPUT_SIZE,
480        });
481    }
482
483    let mut fields = HashMap::new();
484
485    // Find all metadata blocks
486    let blocks = find_metadata_blocks(markdown)?;
487
488    if blocks.is_empty() {
489        // No metadata blocks — entire content is body, but QUILL is required
490        return Err(crate::error::ParseError::InvalidStructure(
491            "Missing required QUILL field. Add `QUILL: <name>` to the frontmatter.".to_string(),
492        ));
493    }
494
495    // Collect all card items into unified CARDS array
496    let mut cards_array: Vec<serde_json::Value> = Vec::new();
497    let mut global_frontmatter_index: Option<usize> = None;
498    let mut quill_ref: Option<String> = None;
499
500    // First pass: identify global frontmatter, quill directive, and validate
501    for (idx, block) in blocks.iter().enumerate() {
502        if idx == 0 {
503            // Top-level frontmatter: can have QUILL or neither (not considered a card)
504            if let Some(ref name) = block.quill_ref {
505                quill_ref = Some(name.clone());
506            }
507            // If it has neither QUILL nor CARD, it's global frontmatter
508            if block.tag.is_none() && block.quill_ref.is_none() {
509                global_frontmatter_index = Some(idx);
510            }
511        } else {
512            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
513            if block.quill_ref.is_some() {
514                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
515            }
516            if block.tag.is_none() {
517                // Inline block without CARD
518                return Err(crate::error::ParseError::missing_card_directive());
519            }
520        }
521    }
522
523    // Parse global frontmatter if present
524    if let Some(idx) = global_frontmatter_index {
525        let block = &blocks[idx];
526
527        // Get parsed JSON fields directly (already parsed in find_metadata_blocks)
528        let json_fields: HashMap<String, serde_json::Value> = match &block.yaml_value {
529            Some(serde_json::Value::Object(mapping)) => mapping
530                .iter()
531                .map(|(k, v)| (k.clone(), v.clone()))
532                .collect(),
533            Some(serde_json::Value::Null) => {
534                // Null value (from whitespace-only YAML) - treat as empty mapping
535                HashMap::new()
536            }
537            Some(_) => {
538                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
539                return Err(crate::error::ParseError::InvalidStructure(
540                    "Invalid YAML frontmatter: expected a mapping".to_string(),
541                ));
542            }
543            None => HashMap::new(),
544        };
545
546        // Convert JSON values to QuillValue at boundary
547        for (key, value) in json_fields {
548            fields.insert(key, QuillValue::from_json(value));
549        }
550    }
551
552    // Process blocks with quill directives
553    for block in &blocks {
554        if block.quill_ref.is_some() {
555            // Quill directive blocks can have YAML content (becomes part of frontmatter)
556            if let Some(ref json_val) = block.yaml_value {
557                let json_fields: HashMap<String, serde_json::Value> = match json_val {
558                    serde_json::Value::Object(mapping) => mapping
559                        .iter()
560                        .map(|(k, v)| (k.clone(), v.clone()))
561                        .collect(),
562                    serde_json::Value::Null => {
563                        // Null value (from whitespace-only YAML) - treat as empty mapping
564                        HashMap::new()
565                    }
566                    _ => {
567                        return Err(crate::error::ParseError::InvalidStructure(
568                            "Invalid YAML in quill block: expected a mapping".to_string(),
569                        ));
570                    }
571                };
572
573                // Check for conflicts with existing fields
574                for key in json_fields.keys() {
575                    if fields.contains_key(key) {
576                        return Err(crate::error::ParseError::InvalidStructure(format!(
577                            "Name collision: quill block field '{}' conflicts with existing field",
578                            key
579                        )));
580                    }
581                }
582
583                // Convert JSON values to QuillValue at boundary
584                for (key, value) in json_fields {
585                    fields.insert(key, QuillValue::from_json(value));
586                }
587            }
588        }
589    }
590
591    // Parse tagged blocks (CARD blocks)
592    for (idx, block) in blocks.iter().enumerate() {
593        if let Some(ref tag_name) = block.tag {
594            // Get YAML metadata directly (already parsed in find_metadata_blocks)
595            // Get JSON metadata directly (already parsed in find_metadata_blocks)
596            let mut item_fields: serde_json::Map<String, serde_json::Value> =
597                match &block.yaml_value {
598                    Some(serde_json::Value::Object(mapping)) => mapping.clone(),
599                    Some(serde_json::Value::Null) => {
600                        // Null value (from whitespace-only YAML) - treat as empty mapping
601                        serde_json::Map::new()
602                    }
603                    Some(_) => {
604                        return Err(crate::error::ParseError::InvalidStructure(format!(
605                            "Invalid YAML in card block '{}': expected a mapping",
606                            tag_name
607                        )));
608                    }
609                    None => serde_json::Map::new(),
610                };
611
612            // Extract body for this card block
613            let body_start = block.end;
614            let body_end = if idx + 1 < blocks.len() {
615                blocks[idx + 1].start
616            } else {
617                markdown.len()
618            };
619            let body = &markdown[body_start..body_end];
620
621            // Add body to item fields
622            item_fields.insert(
623                BODY_FIELD.to_string(),
624                serde_json::Value::String(body.to_string()),
625            );
626
627            // Add CARD discriminator field
628            item_fields.insert(
629                "CARD".to_string(),
630                serde_json::Value::String(tag_name.clone()),
631            );
632
633            // Add to CARDS array
634            cards_array.push(serde_json::Value::Object(item_fields));
635        }
636    }
637
638    // Extract global body
639    // Body starts after global frontmatter or quill block (whichever comes first)
640    // Body ends at the first card block or EOF
641    let first_non_card_block_idx = blocks
642        .iter()
643        .position(|b| b.tag.is_none() && b.quill_ref.is_none())
644        .or_else(|| blocks.iter().position(|b| b.quill_ref.is_some()));
645
646    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
647        // Body starts after the first non-card block (global frontmatter or quill)
648        let start = blocks[idx].end;
649
650        // Body ends at the first card block after this, or EOF
651        let end = blocks
652            .iter()
653            .skip(idx + 1)
654            .find(|b| b.tag.is_some())
655            .map(|b| b.start)
656            .unwrap_or(markdown.len());
657
658        (start, end)
659    } else {
660        // No global frontmatter or quill block - body is everything before the first card block
661        let end = blocks
662            .iter()
663            .find(|b| b.tag.is_some())
664            .map(|b| b.start)
665            .unwrap_or(0);
666
667        (0, end)
668    };
669
670    let global_body = &markdown[body_start..body_end];
671
672    fields.insert(
673        BODY_FIELD.to_string(),
674        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
675    );
676
677    // Always add CARDS array to fields (may be empty)
678    fields.insert(
679        "CARDS".to_string(),
680        QuillValue::from_json(serde_json::Value::Array(cards_array)),
681    );
682
683    // Check field count limit to prevent memory exhaustion
684    if fields.len() > crate::error::MAX_FIELD_COUNT {
685        return Err(crate::error::ParseError::InputTooLarge {
686            size: fields.len(),
687            max: crate::error::MAX_FIELD_COUNT,
688        });
689    }
690
691    let quill_tag = quill_ref.ok_or_else(|| {
692        ParseError::InvalidStructure(
693            "Missing required QUILL field. Add `QUILL: <name>` to the frontmatter.".to_string(),
694        )
695    })?;
696    let quill_ref = QuillReference::from_str(&quill_tag).map_err(|e| {
697        ParseError::InvalidStructure(format!("Invalid QUILL tag '{}': {}", quill_tag, e))
698    })?;
699    let parsed = ParsedDocument::new(fields, quill_ref);
700
701    Ok(parsed)
702}
703
704#[cfg(test)]
705mod tests {
706    use super::*;
707
708    #[test]
709    fn test_no_frontmatter() {
710        let markdown = "# Hello World\n\nThis is a test.";
711        let result = decompose(markdown);
712        assert!(result.is_err());
713        assert!(result
714            .unwrap_err()
715            .to_string()
716            .contains("Missing required QUILL field"));
717    }
718
719    #[test]
720    fn test_with_frontmatter() {
721        let markdown = r#"---
722QUILL: test_quill
723title: Test Document
724author: Test Author
725---
726
727# Hello World
728
729This is the body."#;
730
731        let doc = decompose(markdown).unwrap();
732
733        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
734        assert_eq!(
735            doc.get_field("title").unwrap().as_str().unwrap(),
736            "Test Document"
737        );
738        assert_eq!(
739            doc.get_field("author").unwrap().as_str().unwrap(),
740            "Test Author"
741        );
742        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
743        assert_eq!(doc.quill_reference().name, "test_quill");
744    }
745
746    #[test]
747    fn test_whitespace_frontmatter() {
748        // Frontmatter with only whitespace has no QUILL → error
749        let markdown = "---\n   \n---\n\n# Hello";
750        let result = decompose(markdown);
751        assert!(result.is_err());
752        assert!(result
753            .unwrap_err()
754            .to_string()
755            .contains("Missing required QUILL field"));
756    }
757
758    #[test]
759    fn test_complex_yaml_frontmatter() {
760        let markdown = r#"---
761QUILL: test_quill
762title: Complex Document
763tags:
764  - test
765  - yaml
766metadata:
767  version: 1.0
768  nested:
769    field: value
770---
771
772Content here."#;
773
774        let doc = decompose(markdown).unwrap();
775
776        assert_eq!(doc.body(), Some("\nContent here."));
777        assert_eq!(
778            doc.get_field("title").unwrap().as_str().unwrap(),
779            "Complex Document"
780        );
781
782        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
783        assert_eq!(tags.len(), 2);
784        assert_eq!(tags[0].as_str().unwrap(), "test");
785        assert_eq!(tags[1].as_str().unwrap(), "yaml");
786    }
787
788    #[test]
789    fn test_with_defaults_empty_document() {
790        use std::collections::HashMap;
791
792        let mut defaults = HashMap::new();
793        defaults.insert(
794            "status".to_string(),
795            QuillValue::from_json(serde_json::json!("draft")),
796        );
797        defaults.insert(
798            "version".to_string(),
799            QuillValue::from_json(serde_json::json!(1)),
800        );
801
802        // Create an empty parsed document
803        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
804        let doc_with_defaults = doc.with_defaults(&defaults);
805
806        // Check that defaults were applied
807        assert_eq!(
808            doc_with_defaults
809                .get_field("status")
810                .unwrap()
811                .as_str()
812                .unwrap(),
813            "draft"
814        );
815        assert_eq!(
816            doc_with_defaults
817                .get_field("version")
818                .unwrap()
819                .as_number()
820                .unwrap()
821                .as_i64()
822                .unwrap(),
823            1
824        );
825    }
826
827    #[test]
828    fn test_with_defaults_preserves_existing_values() {
829        use std::collections::HashMap;
830
831        let mut defaults = HashMap::new();
832        defaults.insert(
833            "status".to_string(),
834            QuillValue::from_json(serde_json::json!("draft")),
835        );
836
837        // Create document with existing status
838        let mut fields = HashMap::new();
839        fields.insert(
840            "status".to_string(),
841            QuillValue::from_json(serde_json::json!("published")),
842        );
843        let doc = ParsedDocument::new(fields, QuillReference::latest("test".to_string()));
844
845        let doc_with_defaults = doc.with_defaults(&defaults);
846
847        // Existing value should be preserved
848        assert_eq!(
849            doc_with_defaults
850                .get_field("status")
851                .unwrap()
852                .as_str()
853                .unwrap(),
854            "published"
855        );
856    }
857
858    #[test]
859    fn test_with_defaults_partial_application() {
860        use std::collections::HashMap;
861
862        let mut defaults = HashMap::new();
863        defaults.insert(
864            "status".to_string(),
865            QuillValue::from_json(serde_json::json!("draft")),
866        );
867        defaults.insert(
868            "version".to_string(),
869            QuillValue::from_json(serde_json::json!(1)),
870        );
871
872        // Create document with only one field
873        let mut fields = HashMap::new();
874        fields.insert(
875            "status".to_string(),
876            QuillValue::from_json(serde_json::json!("published")),
877        );
878        let doc = ParsedDocument::new(fields, QuillReference::latest("test".to_string()));
879
880        let doc_with_defaults = doc.with_defaults(&defaults);
881
882        // Existing field preserved, missing field gets default
883        assert_eq!(
884            doc_with_defaults
885                .get_field("status")
886                .unwrap()
887                .as_str()
888                .unwrap(),
889            "published"
890        );
891        assert_eq!(
892            doc_with_defaults
893                .get_field("version")
894                .unwrap()
895                .as_number()
896                .unwrap()
897                .as_i64()
898                .unwrap(),
899            1
900        );
901    }
902
903    #[test]
904    fn test_with_defaults_no_defaults() {
905        use std::collections::HashMap;
906
907        let defaults = HashMap::new(); // Empty defaults map
908
909        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
910        let doc_with_defaults = doc.with_defaults(&defaults);
911
912        // No defaults should be applied
913        assert!(doc_with_defaults.fields().is_empty());
914    }
915
916    #[test]
917    fn test_with_defaults_complex_types() {
918        use std::collections::HashMap;
919
920        let mut defaults = HashMap::new();
921        defaults.insert(
922            "tags".to_string(),
923            QuillValue::from_json(serde_json::json!(["default", "tag"])),
924        );
925
926        let doc = ParsedDocument::new(HashMap::new(), QuillReference::latest("test".to_string()));
927        let doc_with_defaults = doc.with_defaults(&defaults);
928
929        // Complex default value should be applied
930        let tags = doc_with_defaults
931            .get_field("tags")
932            .unwrap()
933            .as_sequence()
934            .unwrap();
935        assert_eq!(tags.len(), 2);
936        assert_eq!(tags[0].as_str().unwrap(), "default");
937        assert_eq!(tags[1].as_str().unwrap(), "tag");
938    }
939
940    #[test]
941    fn test_invalid_yaml() {
942        let markdown = r#"---
943title: [invalid yaml
944author: missing close bracket
945---
946
947Content here."#;
948
949        let result = decompose(markdown);
950        assert!(result.is_err());
951        // Error message now includes location context
952        assert!(result.unwrap_err().to_string().contains("YAML error"));
953    }
954
955    #[test]
956    fn test_unclosed_frontmatter() {
957        let markdown = r#"---
958title: Test
959author: Test Author
960
961Content without closing ---"#;
962
963        let result = decompose(markdown);
964        assert!(result.is_err());
965        assert!(result.unwrap_err().to_string().contains("not closed"));
966    }
967
968    // Extended metadata tests
969
970    #[test]
971    fn test_basic_tagged_block() {
972        let markdown = r#"---
973QUILL: test_quill
974title: Main Document
975---
976
977Main body content.
978
979---
980CARD: items
981name: Item 1
982---
983
984Body of item 1."#;
985
986        let doc = decompose(markdown).unwrap();
987
988        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
989        assert_eq!(
990            doc.get_field("title").unwrap().as_str().unwrap(),
991            "Main Document"
992        );
993
994        // Cards are now in CARDS array with CARD discriminator
995        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
996        assert_eq!(cards.len(), 1);
997
998        let item = cards[0].as_object().unwrap();
999        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1000        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1001        assert_eq!(
1002            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1003            "\nBody of item 1."
1004        );
1005    }
1006
1007    #[test]
1008    fn test_multiple_tagged_blocks() {
1009        let markdown = r#"---
1010QUILL: test_quill
1011---
1012
1013---
1014CARD: items
1015name: Item 1
1016tags: [a, b]
1017---
1018
1019First item body.
1020
1021---
1022CARD: items
1023name: Item 2
1024tags: [c, d]
1025---
1026
1027Second item body."#;
1028
1029        let doc = decompose(markdown).unwrap();
1030
1031        // Cards are in CARDS array
1032        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1033        assert_eq!(cards.len(), 2);
1034
1035        let item1 = cards[0].as_object().unwrap();
1036        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1037        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1038
1039        let item2 = cards[1].as_object().unwrap();
1040        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1041        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1042    }
1043
1044    #[test]
1045    fn test_mixed_global_and_tagged() {
1046        let markdown = r#"---
1047QUILL: test_quill
1048title: Global
1049author: John Doe
1050---
1051
1052Global body.
1053
1054---
1055CARD: sections
1056title: Section 1
1057---
1058
1059Section 1 content.
1060
1061---
1062CARD: sections
1063title: Section 2
1064---
1065
1066Section 2 content."#;
1067
1068        let doc = decompose(markdown).unwrap();
1069
1070        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1071        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1072
1073        // Cards are in unified CARDS array
1074        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1075        assert_eq!(cards.len(), 2);
1076        assert_eq!(
1077            cards[0]
1078                .as_object()
1079                .unwrap()
1080                .get("CARD")
1081                .unwrap()
1082                .as_str()
1083                .unwrap(),
1084            "sections"
1085        );
1086    }
1087
1088    #[test]
1089    fn test_empty_tagged_metadata() {
1090        let markdown = r#"---
1091QUILL: test_quill
1092---
1093
1094---
1095CARD: items
1096---
1097
1098Body without metadata."#;
1099
1100        let doc = decompose(markdown).unwrap();
1101
1102        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1103        assert_eq!(cards.len(), 1);
1104
1105        let item = cards[0].as_object().unwrap();
1106        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1107        assert_eq!(
1108            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1109            "\nBody without metadata."
1110        );
1111    }
1112
1113    #[test]
1114    fn test_tagged_block_without_body() {
1115        let markdown = r#"---
1116QUILL: test_quill
1117---
1118
1119---
1120CARD: items
1121name: Item
1122---"#;
1123
1124        let doc = decompose(markdown).unwrap();
1125
1126        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1127        assert_eq!(cards.len(), 1);
1128
1129        let item = cards[0].as_object().unwrap();
1130        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1131        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1132    }
1133
1134    #[test]
1135    fn test_name_collision_global_and_tagged() {
1136        let markdown = r#"---
1137QUILL: test_quill
1138items: "global value"
1139---
1140
1141Body
1142
1143---
1144CARD: items
1145name: Item
1146---
1147
1148Item body"#;
1149
1150        let result = decompose(markdown);
1151        assert!(result.is_ok(), "Name collision should be allowed now");
1152    }
1153
1154    #[test]
1155    fn test_card_name_collision_with_array_field() {
1156        // CARD type names CAN now conflict with frontmatter field names
1157        let markdown = r#"---
1158QUILL: test_quill
1159items:
1160  - name: Global Item 1
1161    value: 100
1162---
1163
1164Global body
1165
1166---
1167CARD: items
1168name: Scope Item 1
1169---
1170
1171Scope item 1 body"#;
1172
1173        let result = decompose(markdown);
1174        assert!(
1175            result.is_ok(),
1176            "Collision with array field should be allowed"
1177        );
1178    }
1179
1180    #[test]
1181    fn test_empty_global_array_with_card() {
1182        // CARD type names CAN now conflict with frontmatter field names
1183        let markdown = r#"---
1184QUILL: test_quill
1185items: []
1186---
1187
1188Global body
1189
1190---
1191CARD: items
1192name: Item 1
1193---
1194
1195Item 1 body"#;
1196
1197        let result = decompose(markdown);
1198        assert!(
1199            result.is_ok(),
1200            "Collision with empty array field should be allowed"
1201        );
1202    }
1203
1204    #[test]
1205    fn test_reserved_field_body_rejected() {
1206        let markdown = r#"---
1207CARD: section
1208BODY: Test
1209---"#;
1210
1211        let result = decompose(markdown);
1212        assert!(result.is_err(), "BODY is a reserved field name");
1213        assert!(result
1214            .unwrap_err()
1215            .to_string()
1216            .contains("Reserved field name"));
1217    }
1218
1219    #[test]
1220    fn test_reserved_field_cards_rejected() {
1221        let markdown = r#"---
1222title: Test
1223CARDS: []
1224---"#;
1225
1226        let result = decompose(markdown);
1227        assert!(result.is_err(), "CARDS is a reserved field name");
1228        assert!(result
1229            .unwrap_err()
1230            .to_string()
1231            .contains("Reserved field name"));
1232    }
1233
1234    #[test]
1235    fn test_delimiter_inside_fenced_code_block_backticks() {
1236        let markdown = r#"---
1237QUILL: test_quill
1238title: Test
1239---
1240Here is some code:
1241
1242```yaml
1243---
1244fake: frontmatter
1245---
1246```
1247
1248More content.
1249"#;
1250
1251        let doc = decompose(markdown).unwrap();
1252        // The --- inside the code block should NOT be parsed as metadata
1253        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1254        assert!(doc.get_field("fake").is_none());
1255    }
1256
1257    #[test]
1258    fn test_tildes_are_not_fences() {
1259        // Per EXTENDED_MARKDOWN.md: tildes (~~~) are NOT treated as fences
1260        // So --- inside ~~~ WILL be parsed as a metadata block
1261        let markdown = r#"---
1262QUILL: test_quill
1263title: Test
1264---
1265Here is some code:
1266
1267~~~yaml
1268---
1269CARD: code_example
1270fake: frontmatter
1271---
1272~~~
1273
1274More content.
1275"#;
1276
1277        let doc = decompose(markdown).unwrap();
1278        // The --- should be parsed as a CARD block since tildes aren't fences
1279        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1280        assert_eq!(cards.len(), 1);
1281        assert_eq!(
1282            cards[0].get("fake").unwrap().as_str().unwrap(),
1283            "frontmatter"
1284        );
1285    }
1286
1287    #[test]
1288    fn test_four_backticks_are_not_fences() {
1289        // Per EXTENDED_MARKDOWN.md: only exactly 3 backticks are valid fences
1290        // 4+ backticks are NOT treated as fences
1291        let markdown = r#"---
1292QUILL: test_quill
1293title: Test
1294---
1295Here is some code:
1296
1297````yaml
1298---
1299CARD: code_example
1300fake: frontmatter
1301---
1302````
1303
1304More content.
1305"#;
1306
1307        let doc = decompose(markdown).unwrap();
1308        // The --- should be parsed as a CARD block since 4 backticks aren't a fence
1309        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1310        assert_eq!(cards.len(), 1);
1311        assert_eq!(
1312            cards[0].get("fake").unwrap().as_str().unwrap(),
1313            "frontmatter"
1314        );
1315    }
1316
1317    #[test]
1318    fn test_invalid_tag_syntax() {
1319        let markdown = r#"---
1320CARD: Invalid-Name
1321title: Test
1322---"#;
1323
1324        let result = decompose(markdown);
1325        assert!(result.is_err());
1326        assert!(result
1327            .unwrap_err()
1328            .to_string()
1329            .contains("Invalid card field name"));
1330    }
1331
1332    #[test]
1333    fn test_multiple_global_frontmatter_blocks() {
1334        let markdown = r#"---
1335title: First
1336---
1337
1338Body
1339
1340---
1341author: Second
1342---
1343
1344More body"#;
1345
1346        let result = decompose(markdown);
1347        assert!(result.is_err());
1348
1349        // Verify the error message contains CARD hint
1350        let err = result.unwrap_err();
1351        let err_str = err.to_string();
1352        assert!(
1353            err_str.contains("CARD"),
1354            "Error should mention CARD directive: {}",
1355            err_str
1356        );
1357        assert!(
1358            err_str.contains("missing"),
1359            "Error should indicate missing directive: {}",
1360            err_str
1361        );
1362    }
1363
1364    #[test]
1365    fn test_adjacent_blocks_different_tags() {
1366        let markdown = r#"---
1367QUILL: test_quill
1368---
1369
1370---
1371CARD: items
1372name: Item 1
1373---
1374
1375Item 1 body
1376
1377---
1378CARD: sections
1379title: Section 1
1380---
1381
1382Section 1 body"#;
1383
1384        let doc = decompose(markdown).unwrap();
1385
1386        // All cards in unified CARDS array
1387        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1388        assert_eq!(cards.len(), 2);
1389
1390        // First card is "items" type
1391        let item = cards[0].as_object().unwrap();
1392        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1393        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1394
1395        // Second card is "sections" type
1396        let section = cards[1].as_object().unwrap();
1397        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1398        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1399    }
1400
1401    #[test]
1402    fn test_order_preservation() {
1403        let markdown = r#"---
1404QUILL: test_quill
1405---
1406
1407---
1408CARD: items
1409id: 1
1410---
1411
1412First
1413
1414---
1415CARD: items
1416id: 2
1417---
1418
1419Second
1420
1421---
1422CARD: items
1423id: 3
1424---
1425
1426Third"#;
1427
1428        let doc = decompose(markdown).unwrap();
1429
1430        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1431        assert_eq!(cards.len(), 3);
1432
1433        for (i, card) in cards.iter().enumerate() {
1434            let mapping = card.as_object().unwrap();
1435            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1436            let id = mapping.get("id").unwrap().as_i64().unwrap();
1437            assert_eq!(id, (i + 1) as i64);
1438        }
1439    }
1440
1441    #[test]
1442    fn test_product_catalog_integration() {
1443        let markdown = r#"---
1444QUILL: test_quill
1445title: Product Catalog
1446author: John Doe
1447date: 2024-01-01
1448---
1449
1450This is the main catalog description.
1451
1452---
1453CARD: products
1454name: Widget A
1455price: 19.99
1456sku: WID-001
1457---
1458
1459The **Widget A** is our most popular product.
1460
1461---
1462CARD: products
1463name: Gadget B
1464price: 29.99
1465sku: GAD-002
1466---
1467
1468The **Gadget B** is perfect for professionals.
1469
1470---
1471CARD: reviews
1472product: Widget A
1473rating: 5
1474---
1475
1476"Excellent product! Highly recommended."
1477
1478---
1479CARD: reviews
1480product: Gadget B
1481rating: 4
1482---
1483
1484"Very good, but a bit pricey.""#;
1485
1486        let doc = decompose(markdown).unwrap();
1487
1488        // Verify global fields
1489        assert_eq!(
1490            doc.get_field("title").unwrap().as_str().unwrap(),
1491            "Product Catalog"
1492        );
1493        assert_eq!(
1494            doc.get_field("author").unwrap().as_str().unwrap(),
1495            "John Doe"
1496        );
1497        assert_eq!(
1498            doc.get_field("date").unwrap().as_str().unwrap(),
1499            "2024-01-01"
1500        );
1501
1502        // Verify global body
1503        assert!(doc.body().unwrap().contains("main catalog description"));
1504
1505        // All cards in unified CARDS array
1506        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1507        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1508
1509        // First 2 are products
1510        let product1 = cards[0].as_object().unwrap();
1511        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1512        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1513        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1514
1515        let product2 = cards[1].as_object().unwrap();
1516        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1517        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1518
1519        // Last 2 are reviews
1520        let review1 = cards[2].as_object().unwrap();
1521        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1522        assert_eq!(
1523            review1.get("product").unwrap().as_str().unwrap(),
1524            "Widget A"
1525        );
1526        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1527
1528        // Total fields: title, author, date, body, CARDS = 5
1529        assert_eq!(doc.fields().len(), 5);
1530    }
1531
1532    #[test]
1533    fn taro_quill_directive() {
1534        let markdown = r#"---
1535QUILL: usaf_memo
1536memo_for: [ORG/SYMBOL]
1537memo_from: [ORG/SYMBOL]
1538---
1539
1540This is the memo body."#;
1541
1542        let doc = decompose(markdown).unwrap();
1543
1544        // Verify quill tag is set
1545        assert_eq!(doc.quill_reference().name, "usaf_memo");
1546
1547        // Verify fields from quill block become frontmatter
1548        assert_eq!(
1549            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1550                .as_str()
1551                .unwrap(),
1552            "ORG/SYMBOL"
1553        );
1554
1555        // Verify body
1556        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1557    }
1558
1559    #[test]
1560    fn test_quill_with_card_blocks() {
1561        let markdown = r#"---
1562QUILL: document
1563title: Test Document
1564---
1565
1566Main body.
1567
1568---
1569CARD: sections
1570name: Section 1
1571---
1572
1573Section 1 body."#;
1574
1575        let doc = decompose(markdown).unwrap();
1576
1577        // Verify quill tag
1578        assert_eq!(doc.quill_reference().name, "document");
1579
1580        // Verify global field from quill block
1581        assert_eq!(
1582            doc.get_field("title").unwrap().as_str().unwrap(),
1583            "Test Document"
1584        );
1585
1586        // Verify card blocks work via CARDS array
1587        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1588        assert_eq!(cards.len(), 1);
1589        assert_eq!(
1590            cards[0]
1591                .as_object()
1592                .unwrap()
1593                .get("CARD")
1594                .unwrap()
1595                .as_str()
1596                .unwrap(),
1597            "sections"
1598        );
1599
1600        // Verify body
1601        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1602    }
1603
1604    #[test]
1605    fn test_multiple_quill_directives_error() {
1606        let markdown = r#"---
1607QUILL: first
1608---
1609
1610---
1611QUILL: second
1612---"#;
1613
1614        let result = decompose(markdown);
1615        assert!(result.is_err());
1616        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1617        assert!(result
1618            .unwrap_err()
1619            .to_string()
1620            .contains("top-level frontmatter"));
1621    }
1622
1623    #[test]
1624    fn test_invalid_quill_ref() {
1625        let markdown = r#"---
1626QUILL: Invalid-Name
1627---"#;
1628
1629        let result = decompose(markdown);
1630        assert!(result.is_err());
1631        assert!(result
1632            .unwrap_err()
1633            .to_string()
1634            .contains("Invalid QUILL reference"));
1635    }
1636
1637    #[test]
1638    fn test_quill_wrong_value_type() {
1639        let markdown = r#"---
1640QUILL: 123
1641---"#;
1642
1643        let result = decompose(markdown);
1644        assert!(result.is_err());
1645        assert!(result
1646            .unwrap_err()
1647            .to_string()
1648            .contains("QUILL value must be a string"));
1649    }
1650
1651    #[test]
1652    fn test_card_wrong_value_type() {
1653        let markdown = r#"---
1654CARD: 123
1655---"#;
1656
1657        let result = decompose(markdown);
1658        assert!(result.is_err());
1659        assert!(result
1660            .unwrap_err()
1661            .to_string()
1662            .contains("CARD value must be a string"));
1663    }
1664
1665    #[test]
1666    fn test_both_quill_and_card_error() {
1667        let markdown = r#"---
1668QUILL: test
1669CARD: items
1670---"#;
1671
1672        let result = decompose(markdown);
1673        assert!(result.is_err());
1674        assert!(result
1675            .unwrap_err()
1676            .to_string()
1677            .contains("Cannot specify both QUILL and CARD"));
1678    }
1679
1680    #[test]
1681    fn test_blank_lines_in_frontmatter() {
1682        // New parsing standard: blank lines are allowed within YAML blocks
1683        let markdown = r#"---
1684QUILL: test_quill
1685title: Test Document
1686author: Test Author
1687
1688description: This has a blank line above it
1689tags:
1690  - one
1691  - two
1692---
1693
1694# Hello World
1695
1696This is the body."#;
1697
1698        let doc = decompose(markdown).unwrap();
1699
1700        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1701        assert_eq!(
1702            doc.get_field("title").unwrap().as_str().unwrap(),
1703            "Test Document"
1704        );
1705        assert_eq!(
1706            doc.get_field("author").unwrap().as_str().unwrap(),
1707            "Test Author"
1708        );
1709        assert_eq!(
1710            doc.get_field("description").unwrap().as_str().unwrap(),
1711            "This has a blank line above it"
1712        );
1713
1714        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1715        assert_eq!(tags.len(), 2);
1716    }
1717
1718    #[test]
1719    fn test_blank_lines_in_scope_blocks() {
1720        // Blank lines should be allowed in CARD blocks too
1721        let markdown = r#"---
1722QUILL: test_quill
1723---
1724
1725---
1726CARD: items
1727name: Item 1
1728
1729price: 19.99
1730
1731tags:
1732  - electronics
1733  - gadgets
1734---
1735
1736Body of item 1."#;
1737
1738        let doc = decompose(markdown).unwrap();
1739
1740        // Cards are in CARDS array
1741        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1742        assert_eq!(cards.len(), 1);
1743
1744        let item = cards[0].as_object().unwrap();
1745        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1746        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1747        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1748
1749        let tags = item.get("tags").unwrap().as_array().unwrap();
1750        assert_eq!(tags.len(), 2);
1751    }
1752
1753    #[test]
1754    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1755        // Horizontal rule: blank lines both above AND below the ---
1756        let markdown = r#"---
1757QUILL: test_quill
1758title: Test
1759---
1760
1761First paragraph.
1762
1763---
1764
1765Second paragraph."#;
1766
1767        let doc = decompose(markdown).unwrap();
1768
1769        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1770
1771        // The body should contain the horizontal rule (---) as part of the content
1772        let body = doc.body().unwrap();
1773        assert!(body.contains("First paragraph."));
1774        assert!(body.contains("---"));
1775        assert!(body.contains("Second paragraph."));
1776    }
1777
1778    #[test]
1779    fn test_horizontal_rule_not_preceded_by_blank() {
1780        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1781        // It's also NOT a valid metadata block opening (since it's followed by blank)
1782        let markdown = r#"---
1783QUILL: test_quill
1784title: Test
1785---
1786
1787First paragraph.
1788---
1789
1790Second paragraph."#;
1791
1792        let doc = decompose(markdown).unwrap();
1793
1794        let body = doc.body().unwrap();
1795        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1796        assert!(body.contains("---"));
1797    }
1798
1799    #[test]
1800    fn test_multiple_blank_lines_in_yaml() {
1801        // Multiple blank lines should also be allowed
1802        let markdown = r#"---
1803QUILL: test_quill
1804title: Test
1805
1806
1807author: John Doe
1808
1809
1810version: 1.0
1811---
1812
1813Body content."#;
1814
1815        let doc = decompose(markdown).unwrap();
1816
1817        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1818        assert_eq!(
1819            doc.get_field("author").unwrap().as_str().unwrap(),
1820            "John Doe"
1821        );
1822        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1823    }
1824
1825    #[test]
1826    fn test_html_comment_interaction() {
1827        let markdown = r#"<!---
1828---> the rest of the page content
1829
1830---
1831QUILL: test_quill
1832key: value
1833---
1834"#;
1835        let doc = decompose(markdown).unwrap();
1836
1837        // The comment should be ignored (or at least not cause a parse error)
1838        // The frontmatter should be parsed
1839        let key = doc.get_field("key").and_then(|v| v.as_str());
1840        assert_eq!(key, Some("value"));
1841    }
1842}
1843#[cfg(test)]
1844mod demo_file_test {
1845    use super::*;
1846
1847    #[test]
1848    fn test_extended_metadata_demo_file() {
1849        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1850        let doc = decompose(markdown).unwrap();
1851
1852        // Verify global fields
1853        assert_eq!(
1854            doc.get_field("title").unwrap().as_str().unwrap(),
1855            "Extended Metadata Demo"
1856        );
1857        assert_eq!(
1858            doc.get_field("author").unwrap().as_str().unwrap(),
1859            "Quillmark Team"
1860        );
1861        // version is parsed as a number by YAML
1862        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1863
1864        // Verify body
1865        assert!(doc
1866            .body()
1867            .unwrap()
1868            .contains("extended YAML metadata standard"));
1869
1870        // All cards are now in unified CARDS array
1871        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1872        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1873
1874        // Count features and use_cases cards
1875        let features_count = cards
1876            .iter()
1877            .filter(|c| {
1878                c.as_object()
1879                    .unwrap()
1880                    .get("CARD")
1881                    .unwrap()
1882                    .as_str()
1883                    .unwrap()
1884                    == "features"
1885            })
1886            .count();
1887        let use_cases_count = cards
1888            .iter()
1889            .filter(|c| {
1890                c.as_object()
1891                    .unwrap()
1892                    .get("CARD")
1893                    .unwrap()
1894                    .as_str()
1895                    .unwrap()
1896                    == "use_cases"
1897            })
1898            .count();
1899        assert_eq!(features_count, 3);
1900        assert_eq!(use_cases_count, 2);
1901
1902        // Check first card is a feature
1903        let feature1 = cards[0].as_object().unwrap();
1904        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1905        assert_eq!(
1906            feature1.get("name").unwrap().as_str().unwrap(),
1907            "Tag Directives"
1908        );
1909    }
1910
1911    #[test]
1912    fn test_input_size_limit() {
1913        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1914        let size = crate::error::MAX_INPUT_SIZE + 1;
1915        let large_markdown = "a".repeat(size);
1916
1917        let result = decompose(&large_markdown);
1918        assert!(result.is_err());
1919
1920        let err_msg = result.unwrap_err().to_string();
1921        assert!(err_msg.contains("Input too large"));
1922    }
1923
1924    #[test]
1925    fn test_yaml_size_limit() {
1926        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1927        let mut markdown = String::from("---\n");
1928
1929        // Create a very large YAML field
1930        let size = crate::error::MAX_YAML_SIZE + 1;
1931        markdown.push_str("data: \"");
1932        markdown.push_str(&"x".repeat(size));
1933        markdown.push_str("\"\n---\n\nBody");
1934
1935        let result = decompose(&markdown);
1936        assert!(result.is_err());
1937
1938        let err_msg = result.unwrap_err().to_string();
1939        assert!(err_msg.contains("Input too large"));
1940    }
1941
1942    #[test]
1943    fn test_input_within_size_limit() {
1944        // Create markdown just under the limit
1945        let size = 1000; // Much smaller than limit
1946        let markdown = format!(
1947            "---\nQUILL: test_quill\ntitle: Test\n---\n\n{}",
1948            "a".repeat(size)
1949        );
1950
1951        let result = decompose(&markdown);
1952        assert!(result.is_ok());
1953    }
1954
1955    #[test]
1956    fn test_yaml_within_size_limit() {
1957        // Create YAML block well within the limit
1958        let markdown = "---\nQUILL: test_quill\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1959
1960        let result = decompose(markdown);
1961        assert!(result.is_ok());
1962    }
1963
1964    #[test]
1965    fn test_yaml_depth_limit() {
1966        // Create deeply nested YAML that exceeds MAX_YAML_DEPTH (100 levels)
1967        // This tests serde-saphyr's Budget.max_depth enforcement
1968        let mut yaml_content = String::new();
1969        for i in 0..110 {
1970            yaml_content.push_str(&"  ".repeat(i));
1971            yaml_content.push_str(&format!("level{}: value\n", i));
1972        }
1973
1974        let markdown = format!("---\n{}---\n\nBody", yaml_content);
1975        let result = decompose(&markdown);
1976
1977        assert!(result.is_err());
1978        let err_msg = result.unwrap_err().to_string();
1979        // serde-saphyr returns "budget exceeded" or similar for depth violations
1980        assert!(
1981            err_msg.to_lowercase().contains("budget")
1982                || err_msg.to_lowercase().contains("depth")
1983                || err_msg.contains("YAML"),
1984            "Expected depth/budget error, got: {}",
1985            err_msg
1986        );
1987    }
1988
1989    #[test]
1990    fn test_yaml_depth_within_limit() {
1991        // Create reasonably nested YAML (should succeed)
1992        let markdown = r#"---
1993QUILL: test_quill
1994level1:
1995  level2:
1996    level3:
1997      level4:
1998        value: test
1999---
2000
2001Body content"#;
2002
2003        let result = decompose(markdown);
2004        assert!(result.is_ok());
2005    }
2006
2007    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
2008    // Guillemet conversion now happens in process_plate, not during parsing
2009    #[test]
2010    fn test_chevrons_preserved_in_body_no_frontmatter() {
2011        let markdown = "---\nQUILL: test_quill\n---\nUse <<raw content>> here.";
2012        let doc = decompose(markdown).unwrap();
2013
2014        // Body should preserve chevrons (conversion happens later in process_plate)
2015        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
2016    }
2017
2018    #[test]
2019    fn test_chevrons_preserved_in_body_with_frontmatter() {
2020        let markdown = r#"---
2021QUILL: test_quill
2022title: Test
2023---
2024
2025Use <<raw content>> here."#;
2026        let doc = decompose(markdown).unwrap();
2027
2028        // Body should preserve chevrons
2029        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
2030    }
2031
2032    #[test]
2033    fn test_chevrons_preserved_in_yaml_string() {
2034        let markdown = r#"---
2035QUILL: test_quill
2036title: Test <<with chevrons>>
2037---
2038
2039Body content."#;
2040        let doc = decompose(markdown).unwrap();
2041
2042        // YAML string values should preserve chevrons
2043        assert_eq!(
2044            doc.get_field("title").unwrap().as_str().unwrap(),
2045            "Test <<with chevrons>>"
2046        );
2047    }
2048
2049    #[test]
2050    fn test_chevrons_preserved_in_yaml_array() {
2051        let markdown = r#"---
2052QUILL: test_quill
2053items:
2054  - "<<first>>"
2055  - "<<second>>"
2056---
2057
2058Body."#;
2059        let doc = decompose(markdown).unwrap();
2060
2061        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2062        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
2063        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
2064    }
2065
2066    #[test]
2067    fn test_chevrons_preserved_in_yaml_nested() {
2068        let markdown = r#"---
2069QUILL: test_quill
2070metadata:
2071  description: "<<nested value>>"
2072---
2073
2074Body."#;
2075        let doc = decompose(markdown).unwrap();
2076
2077        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
2078        assert_eq!(
2079            metadata.get("description").unwrap().as_str().unwrap(),
2080            "<<nested value>>"
2081        );
2082    }
2083
2084    #[test]
2085    fn test_chevrons_preserved_in_code_blocks() {
2086        let markdown =
2087            "---\nQUILL: test_quill\n---\n```\n<<in code block>>\n```\n\n<<outside code block>>";
2088        let doc = decompose(markdown).unwrap();
2089
2090        let body = doc.body().unwrap();
2091        // All chevrons should be preserved (no conversion during parsing)
2092        assert!(body.contains("<<in code block>>"));
2093        assert!(body.contains("<<outside code block>>"));
2094    }
2095
2096    #[test]
2097    fn test_chevrons_preserved_in_inline_code() {
2098        let markdown =
2099            "---\nQUILL: test_quill\n---\n`<<in inline code>>` and <<outside inline code>>";
2100        let doc = decompose(markdown).unwrap();
2101
2102        let body = doc.body().unwrap();
2103        // All chevrons should be preserved
2104        assert!(body.contains("`<<in inline code>>`"));
2105        assert!(body.contains("<<outside inline code>>"));
2106    }
2107
2108    #[test]
2109    fn test_chevrons_preserved_in_tagged_block_body() {
2110        let markdown = r#"---
2111QUILL: test_quill
2112title: Main
2113---
2114
2115Main body.
2116
2117---
2118CARD: items
2119name: Item 1
2120---
2121
2122Use <<raw>> here."#;
2123        let doc = decompose(markdown).unwrap();
2124
2125        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2126        let item = cards[0].as_object().unwrap();
2127        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2128        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2129        // Tagged block body should preserve chevrons
2130        assert!(item_body.contains("<<raw>>"));
2131    }
2132
2133    #[test]
2134    fn test_chevrons_preserved_in_tagged_block_yaml() {
2135        let markdown = r#"---
2136QUILL: test_quill
2137title: Main
2138---
2139
2140Main body.
2141
2142---
2143CARD: items
2144description: "<<tagged yaml>>"
2145---
2146
2147Item body."#;
2148        let doc = decompose(markdown).unwrap();
2149
2150        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2151        let item = cards[0].as_object().unwrap();
2152        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2153        // Tagged block YAML should preserve chevrons
2154        assert_eq!(
2155            item.get("description").unwrap().as_str().unwrap(),
2156            "<<tagged yaml>>"
2157        );
2158    }
2159
2160    #[test]
2161    fn test_yaml_numbers_not_affected() {
2162        // Numbers should not be affected
2163        let markdown = r#"---
2164QUILL: test_quill
2165count: 42
2166---
2167
2168Body."#;
2169        let doc = decompose(markdown).unwrap();
2170        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2171    }
2172
2173    #[test]
2174    fn test_yaml_booleans_not_affected() {
2175        // Booleans should not be affected
2176        let markdown = r#"---
2177QUILL: test_quill
2178active: true
2179---
2180
2181Body."#;
2182        let doc = decompose(markdown).unwrap();
2183        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2184    }
2185
2186    #[test]
2187    fn test_multiline_chevrons_preserved() {
2188        // Multiline chevrons should be preserved as-is
2189        let markdown = "---\nQUILL: test_quill\n---\n<<text\nacross lines>>";
2190        let doc = decompose(markdown).unwrap();
2191
2192        let body = doc.body().unwrap();
2193        // Should contain the original chevrons
2194        assert!(body.contains("<<text"));
2195        assert!(body.contains("across lines>>"));
2196    }
2197
2198    #[test]
2199    fn test_unmatched_chevrons_preserved() {
2200        let markdown = "---\nQUILL: test_quill\n---\n<<unmatched";
2201        let doc = decompose(markdown).unwrap();
2202
2203        let body = doc.body().unwrap();
2204        // Unmatched should remain as-is
2205        assert_eq!(body, "<<unmatched");
2206    }
2207}
2208
2209// Additional robustness tests
2210#[cfg(test)]
2211mod robustness_tests {
2212    use super::*;
2213
2214    // Edge cases for delimiter handling
2215
2216    #[test]
2217    fn test_empty_document() {
2218        let result = decompose("");
2219        assert!(result.is_err());
2220        assert!(result
2221            .unwrap_err()
2222            .to_string()
2223            .contains("Missing required QUILL field"));
2224    }
2225
2226    #[test]
2227    fn test_only_whitespace() {
2228        let result = decompose("   \n\n   \t");
2229        assert!(result.is_err());
2230        assert!(result
2231            .unwrap_err()
2232            .to_string()
2233            .contains("Missing required QUILL field"));
2234    }
2235
2236    #[test]
2237    fn test_only_dashes() {
2238        // "---" without newline is not a frontmatter delimiter → no blocks → QUILL error
2239        let result = decompose("---");
2240        assert!(result.is_err());
2241        assert!(result
2242            .unwrap_err()
2243            .to_string()
2244            .contains("Missing required QUILL field"));
2245    }
2246
2247    #[test]
2248    fn test_dashes_in_middle_of_line() {
2249        // --- not at start of line should not be treated as delimiter
2250        let markdown = "---\nQUILL: test_quill\n---\nsome text --- more text";
2251        let doc = decompose(markdown).unwrap();
2252        assert_eq!(doc.body(), Some("some text --- more text"));
2253    }
2254
2255    #[test]
2256    fn test_four_dashes() {
2257        // ---- is not a valid delimiter — QUILL required
2258        let result = decompose("----\ntitle: Test\n----\n\nBody");
2259        assert!(result.is_err());
2260        assert!(result
2261            .unwrap_err()
2262            .to_string()
2263            .contains("Missing required QUILL field"));
2264    }
2265
2266    #[test]
2267    fn test_crlf_line_endings() {
2268        // Windows-style line endings
2269        let markdown = "---\r\nQUILL: test_quill\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2270        let doc = decompose(markdown).unwrap();
2271        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2272        assert!(doc.body().unwrap().contains("Body content."));
2273    }
2274
2275    #[test]
2276    fn test_mixed_line_endings() {
2277        // Mix of \n and \r\n
2278        let markdown = "---\nQUILL: test_quill\r\ntitle: Test\r\n---\n\nBody.";
2279        let doc = decompose(markdown).unwrap();
2280        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2281    }
2282
2283    #[test]
2284    fn test_frontmatter_at_eof_no_trailing_newline() {
2285        // Frontmatter closed at EOF without trailing newline
2286        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---";
2287        let doc = decompose(markdown).unwrap();
2288        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2289        assert_eq!(doc.body(), Some(""));
2290    }
2291
2292    #[test]
2293    fn test_empty_frontmatter() {
2294        // Empty/whitespace-only frontmatter has no QUILL → error
2295        let markdown = "---\n \n---\n\nBody content.";
2296        let result = decompose(markdown);
2297        assert!(result.is_err());
2298        assert!(result
2299            .unwrap_err()
2300            .to_string()
2301            .contains("Missing required QUILL field"));
2302    }
2303
2304    #[test]
2305    fn test_whitespace_only_frontmatter() {
2306        // Frontmatter with only whitespace → no QUILL → error
2307        let markdown = "---\n   \n\n   \n---\n\nBody.";
2308        let result = decompose(markdown);
2309        assert!(result.is_err());
2310        assert!(result
2311            .unwrap_err()
2312            .to_string()
2313            .contains("Missing required QUILL field"));
2314    }
2315
2316    // Unicode handling
2317
2318    #[test]
2319    fn test_unicode_in_yaml_keys() {
2320        let markdown = "---\nQUILL: test_quill\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2321        let doc = decompose(markdown).unwrap();
2322        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2323        assert_eq!(
2324            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2325            "こんにちは"
2326        );
2327    }
2328
2329    #[test]
2330    fn test_unicode_in_yaml_values() {
2331        let markdown = "---\nQUILL: test_quill\ntitle: 你好世界 🎉\n---\n\nBody.";
2332        let doc = decompose(markdown).unwrap();
2333        assert_eq!(
2334            doc.get_field("title").unwrap().as_str().unwrap(),
2335            "你好世界 🎉"
2336        );
2337    }
2338
2339    #[test]
2340    fn test_unicode_in_body() {
2341        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2342        let doc = decompose(markdown).unwrap();
2343        assert!(doc.body().unwrap().contains("日本語テキスト"));
2344        assert!(doc.body().unwrap().contains("🚀"));
2345    }
2346
2347    // YAML edge cases
2348
2349    #[test]
2350    fn test_yaml_multiline_string() {
2351        let markdown = r#"---
2352QUILL: test_quill
2353description: |
2354  This is a
2355  multiline string
2356  with preserved newlines.
2357---
2358
2359Body."#;
2360        let doc = decompose(markdown).unwrap();
2361        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2362        assert!(desc.contains("multiline string"));
2363        assert!(desc.contains('\n'));
2364    }
2365
2366    #[test]
2367    fn test_yaml_folded_string() {
2368        let markdown = r#"---
2369QUILL: test_quill
2370description: >
2371  This is a folded
2372  string that becomes
2373  a single line.
2374---
2375
2376Body."#;
2377        let doc = decompose(markdown).unwrap();
2378        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2379        // Folded strings join lines with spaces
2380        assert!(desc.contains("folded"));
2381    }
2382
2383    #[test]
2384    fn test_yaml_null_value() {
2385        let markdown = "---\nQUILL: test_quill\noptional: null\n---\n\nBody.";
2386        let doc = decompose(markdown).unwrap();
2387        assert!(doc.get_field("optional").unwrap().is_null());
2388    }
2389
2390    #[test]
2391    fn test_yaml_empty_string_value() {
2392        let markdown = "---\nQUILL: test_quill\nempty: \"\"\n---\n\nBody.";
2393        let doc = decompose(markdown).unwrap();
2394        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2395    }
2396
2397    #[test]
2398    fn test_yaml_special_characters_in_string() {
2399        let markdown =
2400            "---\nQUILL: test_quill\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2401        let doc = decompose(markdown).unwrap();
2402        assert_eq!(
2403            doc.get_field("special").unwrap().as_str().unwrap(),
2404            "colon: here, and [brackets]"
2405        );
2406    }
2407
2408    #[test]
2409    fn test_yaml_nested_objects() {
2410        let markdown = r#"---
2411QUILL: test_quill
2412config:
2413  database:
2414    host: localhost
2415    port: 5432
2416  cache:
2417    enabled: true
2418---
2419
2420Body."#;
2421        let doc = decompose(markdown).unwrap();
2422        let config = doc.get_field("config").unwrap().as_object().unwrap();
2423        let db = config.get("database").unwrap().as_object().unwrap();
2424        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2425        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2426    }
2427
2428    // CARD block edge cases
2429
2430    #[test]
2431    fn test_card_with_empty_body() {
2432        let markdown = r#"---
2433QUILL: test_quill
2434---
2435
2436---
2437CARD: items
2438name: Item
2439---"#;
2440        let doc = decompose(markdown).unwrap();
2441        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2442        assert_eq!(cards.len(), 1);
2443        let item = cards[0].as_object().unwrap();
2444        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2445        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2446    }
2447
2448    #[test]
2449    fn test_card_consecutive_blocks() {
2450        let markdown = r#"---
2451QUILL: test_quill
2452---
2453
2454---
2455CARD: a
2456id: 1
2457---
2458---
2459CARD: a
2460id: 2
2461---"#;
2462        let doc = decompose(markdown).unwrap();
2463        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2464        assert_eq!(cards.len(), 2);
2465        assert_eq!(
2466            cards[0]
2467                .as_object()
2468                .unwrap()
2469                .get("CARD")
2470                .unwrap()
2471                .as_str()
2472                .unwrap(),
2473            "a"
2474        );
2475        assert_eq!(
2476            cards[1]
2477                .as_object()
2478                .unwrap()
2479                .get("CARD")
2480                .unwrap()
2481                .as_str()
2482                .unwrap(),
2483            "a"
2484        );
2485    }
2486
2487    #[test]
2488    fn test_card_with_body_containing_dashes() {
2489        let markdown = r#"---
2490QUILL: test_quill
2491---
2492
2493---
2494CARD: items
2495name: Item
2496---
2497
2498Some text with --- dashes in it."#;
2499        let doc = decompose(markdown).unwrap();
2500        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2501        let item = cards[0].as_object().unwrap();
2502        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2503        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2504        assert!(body.contains("--- dashes"));
2505    }
2506
2507    // QUILL directive edge cases
2508
2509    #[test]
2510    fn test_quill_with_underscore_prefix() {
2511        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2512        let doc = decompose(markdown).unwrap();
2513        assert_eq!(doc.quill_reference().name, "_internal");
2514    }
2515
2516    #[test]
2517    fn test_quill_with_numbers() {
2518        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2519        let doc = decompose(markdown).unwrap();
2520        assert_eq!(doc.quill_reference().name, "form_8_v2");
2521    }
2522
2523    #[test]
2524    fn test_quill_with_additional_fields() {
2525        let markdown = r#"---
2526QUILL: my_quill
2527title: Document Title
2528author: John Doe
2529---
2530
2531Body content."#;
2532        let doc = decompose(markdown).unwrap();
2533        assert_eq!(doc.quill_reference().name, "my_quill");
2534        assert_eq!(
2535            doc.get_field("title").unwrap().as_str().unwrap(),
2536            "Document Title"
2537        );
2538        assert_eq!(
2539            doc.get_field("author").unwrap().as_str().unwrap(),
2540            "John Doe"
2541        );
2542    }
2543
2544    // Error handling
2545
2546    #[test]
2547    fn test_invalid_scope_name_uppercase() {
2548        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2549        let result = decompose(markdown);
2550        assert!(result.is_err());
2551        assert!(result
2552            .unwrap_err()
2553            .to_string()
2554            .contains("Invalid card field name"));
2555    }
2556
2557    #[test]
2558    fn test_invalid_scope_name_starts_with_number() {
2559        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2560        let result = decompose(markdown);
2561        assert!(result.is_err());
2562    }
2563
2564    #[test]
2565    fn test_invalid_scope_name_with_hyphen() {
2566        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2567        let result = decompose(markdown);
2568        assert!(result.is_err());
2569    }
2570
2571    #[test]
2572    fn test_invalid_quill_ref_uppercase() {
2573        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2574        let result = decompose(markdown);
2575        assert!(result.is_err());
2576    }
2577
2578    #[test]
2579    fn test_yaml_syntax_error_missing_colon() {
2580        let markdown = "---\ntitle Test\n---\n\nBody.";
2581        let result = decompose(markdown);
2582        assert!(result.is_err());
2583    }
2584
2585    #[test]
2586    fn test_yaml_syntax_error_bad_indentation() {
2587        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2588        let result = decompose(markdown);
2589        // Bad indentation may or may not be an error depending on YAML parser
2590        // Just ensure it doesn't panic
2591        let _ = result;
2592    }
2593
2594    // Body extraction edge cases
2595
2596    #[test]
2597    fn test_body_with_leading_newlines() {
2598        let markdown =
2599            "---\nQUILL: test_quill\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2600        let doc = decompose(markdown).unwrap();
2601        // Body should preserve leading newlines after frontmatter
2602        assert!(doc.body().unwrap().starts_with('\n'));
2603    }
2604
2605    #[test]
2606    fn test_body_with_trailing_newlines() {
2607        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---\n\nBody.\n\n\n";
2608        let doc = decompose(markdown).unwrap();
2609        // Body should preserve trailing newlines
2610        assert!(doc.body().unwrap().ends_with('\n'));
2611    }
2612
2613    #[test]
2614    fn test_no_body_after_frontmatter() {
2615        let markdown = "---\nQUILL: test_quill\ntitle: Test\n---";
2616        let doc = decompose(markdown).unwrap();
2617        assert_eq!(doc.body(), Some(""));
2618    }
2619
2620    // Tag name validation
2621
2622    #[test]
2623    fn test_valid_tag_name_single_underscore() {
2624        assert!(is_valid_tag_name("_"));
2625    }
2626
2627    #[test]
2628    fn test_valid_tag_name_underscore_prefix() {
2629        assert!(is_valid_tag_name("_private"));
2630    }
2631
2632    #[test]
2633    fn test_valid_tag_name_with_numbers() {
2634        assert!(is_valid_tag_name("item1"));
2635        assert!(is_valid_tag_name("item_2"));
2636    }
2637
2638    #[test]
2639    fn test_invalid_tag_name_empty() {
2640        assert!(!is_valid_tag_name(""));
2641    }
2642
2643    #[test]
2644    fn test_invalid_tag_name_starts_with_number() {
2645        assert!(!is_valid_tag_name("1item"));
2646    }
2647
2648    #[test]
2649    fn test_invalid_tag_name_uppercase() {
2650        assert!(!is_valid_tag_name("Items"));
2651        assert!(!is_valid_tag_name("ITEMS"));
2652    }
2653
2654    #[test]
2655    fn test_invalid_tag_name_special_chars() {
2656        assert!(!is_valid_tag_name("my-items"));
2657        assert!(!is_valid_tag_name("my.items"));
2658        assert!(!is_valid_tag_name("my items"));
2659    }
2660
2661    // Guillemet preprocessing in YAML
2662
2663    #[test]
2664    fn test_guillemet_in_yaml_preserves_non_strings() {
2665        let markdown = r#"---
2666QUILL: test_quill
2667count: 42
2668price: 19.99
2669active: true
2670items:
2671  - first
2672  - 100
2673  - true
2674---
2675
2676Body."#;
2677        let doc = decompose(markdown).unwrap();
2678        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2679        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2680        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2681    }
2682
2683    #[test]
2684    fn test_guillemet_double_conversion_prevention() {
2685        // Ensure «» in input doesn't get double-processed
2686        let markdown = "---\nQUILL: test_quill\ntitle: Already «converted»\n---\n\nBody.";
2687        let doc = decompose(markdown).unwrap();
2688        // Should remain as-is (not double-escaped)
2689        assert_eq!(
2690            doc.get_field("title").unwrap().as_str().unwrap(),
2691            "Already «converted»"
2692        );
2693    }
2694
2695    #[test]
2696    fn test_allowed_card_field_collision() {
2697        let markdown = r#"---
2698QUILL: test_quill
2699my_card: "some global value"
2700---
2701
2702---
2703CARD: my_card
2704title: "My Card"
2705---
2706Body
2707"#;
2708        // This should SUCCEED according to new PARSE.md
2709        let doc = decompose(markdown).unwrap();
2710
2711        // Verify global field exists
2712        assert_eq!(
2713            doc.get_field("my_card").unwrap().as_str().unwrap(),
2714            "some global value"
2715        );
2716
2717        // Verify Card exists in CARDS array
2718        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2719        assert!(!cards.is_empty());
2720        let card = cards
2721            .iter()
2722            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2723            .expect("Card not found");
2724        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2725    }
2726
2727    #[test]
2728    fn test_yaml_custom_tags_in_frontmatter() {
2729        // User-defined YAML tags like !fill should be accepted and ignored
2730        let markdown = r#"---
2731QUILL: test_quill
2732memo_from: !fill 2d lt example
2733regular_field: normal value
2734---
2735
2736Body content."#;
2737        let doc = decompose(markdown).unwrap();
2738
2739        // The tag !fill should be ignored, value parsed as string "2d lt example"
2740        assert_eq!(
2741            doc.get_field("memo_from").unwrap().as_str().unwrap(),
2742            "2d lt example"
2743        );
2744        // Regular fields should still work
2745        assert_eq!(
2746            doc.get_field("regular_field").unwrap().as_str().unwrap(),
2747            "normal value"
2748        );
2749        assert_eq!(doc.body(), Some("\nBody content."));
2750    }
2751
2752    /// Test the exact example from EXTENDED_MARKDOWN.md (lines 92-127)
2753    #[test]
2754    fn test_spec_example() {
2755        let markdown = r#"---
2756title: My Document
2757QUILL: blog_post
2758---
2759Main document body.
2760
2761***
2762
2763More content after horizontal rule.
2764
2765---
2766CARD: section
2767heading: Introduction
2768---
2769Introduction content.
2770
2771---
2772CARD: section
2773heading: Conclusion
2774---
2775Conclusion content.
2776"#;
2777
2778        let doc = decompose(markdown).unwrap();
2779
2780        // Verify global fields
2781        assert_eq!(
2782            doc.get_field("title").unwrap().as_str().unwrap(),
2783            "My Document"
2784        );
2785        assert_eq!(doc.quill_reference().name, "blog_post");
2786
2787        // Verify body contains horizontal rule (*** preserved)
2788        let body = doc.body().unwrap();
2789        assert!(body.contains("Main document body."));
2790        assert!(body.contains("***"));
2791        assert!(body.contains("More content after horizontal rule."));
2792
2793        // Verify CARDS array
2794        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2795        assert_eq!(cards.len(), 2);
2796
2797        // First card
2798        let card1 = cards[0].as_object().unwrap();
2799        assert_eq!(card1.get("CARD").unwrap().as_str().unwrap(), "section");
2800        assert_eq!(
2801            card1.get("heading").unwrap().as_str().unwrap(),
2802            "Introduction"
2803        );
2804        assert_eq!(
2805            card1.get("BODY").unwrap().as_str().unwrap(),
2806            "Introduction content.\n\n"
2807        );
2808
2809        // Second card
2810        let card2 = cards[1].as_object().unwrap();
2811        assert_eq!(card2.get("CARD").unwrap().as_str().unwrap(), "section");
2812        assert_eq!(
2813            card2.get("heading").unwrap().as_str().unwrap(),
2814            "Conclusion"
2815        );
2816        assert_eq!(
2817            card2.get("BODY").unwrap().as_str().unwrap(),
2818            "Conclusion content.\n"
2819        );
2820    }
2821
2822    #[test]
2823    fn test_missing_quill_field_errors() {
2824        let markdown = "---\ntitle: No quill here\n---\n# Body";
2825        let result = decompose(markdown);
2826        assert!(result.is_err());
2827        assert!(result
2828            .unwrap_err()
2829            .to_string()
2830            .contains("Missing required QUILL field"));
2831    }
2832}