quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50use std::str::FromStr;
51
52use crate::error::ParseError;
53use crate::value::QuillValue;
54use crate::version::QuillReference;
55
56/// The field name used to store the document body
57pub const BODY_FIELD: &str = "BODY";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_ref: QuillReference,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_ref: QuillReference::latest("__default__".to_string()),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill reference
76    pub fn with_quill_ref(fields: HashMap<String, QuillValue>, quill_ref: QuillReference) -> Self {
77        Self { fields, quill_ref }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown)
83    }
84
85    /// Get the quill reference (name + version selector)
86    pub fn quill_reference(&self) -> &QuillReference {
87        &self.quill_ref
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_ref: self.quill_ref.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_ref: self.quill_ref.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_json::Value>, // Parsed YAML as JSON (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Check if a position is inside a fenced code block
195///
196/// This uses strict fence detection per EXTENDED_MARKDOWN.md specification:
197/// - Only exactly 3 backticks (```) are valid fences
198/// - Tildes (~~~) are NOT treated as fences
199/// - 4+ backticks are NOT treated as fences
200fn is_inside_fenced_block(markdown: &str, pos: usize) -> bool {
201    let before = &markdown[..pos];
202    let mut in_fence = false;
203
204    // Check if document starts with exactly ```
205    if is_exact_fence_at(before, 0) {
206        in_fence = !in_fence;
207    }
208
209    // Scan for fence toggles after newlines
210    for (i, _) in before.match_indices('\n') {
211        if is_exact_fence_at(before, i + 1) {
212            in_fence = !in_fence;
213        }
214    }
215
216    in_fence
217}
218
219/// Check if position starts exactly 3 backticks (not 2, not 4+)
220///
221/// Strict specification: only exactly ``` is a valid fence marker.
222fn is_exact_fence_at(text: &str, pos: usize) -> bool {
223    if pos >= text.len() {
224        return false;
225    }
226    let remaining = &text[pos..];
227    if !remaining.starts_with("```") {
228        return false;
229    }
230    // Ensure it's exactly 3 backticks (4th char is not a backtick)
231    remaining.len() == 3 || remaining.as_bytes().get(3) != Some(&b'`')
232}
233
234/// Creates serde_saphyr Options with security budgets configured.
235///
236/// Uses MAX_YAML_DEPTH from error.rs to limit nesting depth at the parser level,
237/// which is more robust than heuristic-based pre-parse checks.
238fn yaml_parse_options() -> serde_saphyr::Options {
239    let budget = serde_saphyr::Budget {
240        max_depth: crate::error::MAX_YAML_DEPTH,
241        ..Default::default()
242    };
243    serde_saphyr::Options {
244        budget: Some(budget),
245        ..Default::default()
246    }
247}
248
249/// Find all metadata blocks in the document
250fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
251    let mut blocks = Vec::new();
252    let mut pos = 0;
253
254    while pos < markdown.len() {
255        // Look for opening "---\n" or "---\r\n"
256        let search_str = &markdown[pos..];
257        let delimiter_result = search_str
258            .find("---\n")
259            .map(|p| (p, 4, "\n"))
260            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
261
262        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
263            let abs_pos = pos + delimiter_pos;
264
265            // Check if the delimiter is at the start of a line
266            let is_start_of_line = if abs_pos == 0 {
267                true
268            } else {
269                let char_before = markdown.as_bytes()[abs_pos - 1];
270                char_before == b'\n' || char_before == b'\r'
271            };
272
273            if !is_start_of_line {
274                pos = abs_pos + 1;
275                continue;
276            }
277
278            // Skip if inside a fenced code block
279            if is_inside_fenced_block(markdown, abs_pos) {
280                pos = abs_pos + 3;
281                continue;
282            }
283
284            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
285
286            // Check if this --- is a horizontal rule (blank lines above AND below)
287            let preceded_by_blank = if abs_pos > 0 {
288                // Check if there's a blank line before the ---
289                let before = &markdown[..abs_pos];
290                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
291            } else {
292                false
293            };
294
295            let followed_by_blank = if content_start < markdown.len() {
296                markdown[content_start..].starts_with('\n')
297                    || markdown[content_start..].starts_with("\r\n")
298            } else {
299                false
300            };
301
302            // Horizontal rule: blank lines both above and below
303            if preceded_by_blank && followed_by_blank {
304                // This is a horizontal rule in the body, skip it
305                pos = abs_pos + 3; // Skip past "---"
306                continue;
307            }
308
309            // Check if followed by non-blank line (or if we're at document start)
310            // This starts a metadata block
311            if followed_by_blank {
312                // --- followed by blank line but NOT preceded by blank line
313                // This is NOT a metadata block opening, skip it
314                pos = abs_pos + 3;
315                continue;
316            }
317
318            // Found potential metadata block opening (followed by non-blank line)
319            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
320            let rest = &markdown[content_start..];
321
322            // First try to find delimiters with trailing newlines
323            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
324            let closing_with_newline = closing_patterns
325                .iter()
326                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
327                .min_by_key(|(p, _)| *p);
328
329            // Also check for closing at end of document (no trailing newline)
330            let closing_at_eof = ["\n---", "\r\n---"]
331                .iter()
332                .filter_map(|delim| {
333                    rest.find(delim).and_then(|p| {
334                        if p + delim.len() == rest.len() {
335                            Some((p, delim.len()))
336                        } else {
337                            None
338                        }
339                    })
340                })
341                .min_by_key(|(p, _)| *p);
342
343            let closing_result = match (closing_with_newline, closing_at_eof) {
344                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
345                (Some(_), Some(_)) => closing_with_newline,
346                (Some(_), None) => closing_with_newline,
347                (None, Some(_)) => closing_at_eof,
348                (None, None) => None,
349            };
350
351            if let Some((closing_pos, closing_len)) = closing_result {
352                let abs_closing_pos = content_start + closing_pos;
353                let content = &markdown[content_start..abs_closing_pos];
354
355                // Check YAML size limit
356                if content.len() > crate::error::MAX_YAML_SIZE {
357                    return Err(crate::error::ParseError::InputTooLarge {
358                        size: content.len(),
359                        max: crate::error::MAX_YAML_SIZE,
360                    });
361                }
362
363                // Parse YAML content to check for reserved keys (QUILL, CARD)
364                // Uses configured budget to limit nesting depth (prevents stack overflow)
365                // Normalize: treat whitespace-only content as empty frontmatter
366                let content = content.trim();
367                let (tag, quill_name, yaml_value) = if !content.is_empty() {
368                    // Try to parse the YAML with security budgets
369                    match serde_saphyr::from_str_with_options::<serde_json::Value>(
370                        content,
371                        yaml_parse_options(),
372                    ) {
373                        Ok(parsed_yaml) => {
374                            if let Some(mapping) = parsed_yaml.as_object() {
375                                let quill_key = "QUILL";
376                                let card_key = "CARD";
377
378                                let has_quill = mapping.contains_key(quill_key);
379                                let has_card = mapping.contains_key(card_key);
380
381                                if has_quill && has_card {
382                                    return Err(crate::error::ParseError::InvalidStructure(
383                                        "Cannot specify both QUILL and CARD in the same block"
384                                            .to_string(),
385                                    ));
386                                }
387
388                                // Check for reserved field names (BODY, CARDS)
389                                const RESERVED_FIELDS: &[&str] = &["BODY", "CARDS"];
390                                for reserved in RESERVED_FIELDS {
391                                    if mapping.contains_key(*reserved) {
392                                        return Err(crate::error::ParseError::InvalidStructure(
393                                            format!(
394                                                "Reserved field name '{}' cannot be used in YAML frontmatter",
395                                                reserved
396                                            ),
397                                        ));
398                                    }
399                                }
400
401                                if has_quill {
402                                    // Extract and parse quill reference
403                                    let quill_value = mapping.get(quill_key).unwrap();
404                                    let quill_ref_str = quill_value
405                                        .as_str()
406                                        .ok_or("QUILL value must be a string")?;
407
408                                    // Parse as QuillReference to validate name and version
409                                    let _quill_ref =
410                                        quill_ref_str.parse::<QuillReference>().map_err(|e| {
411                                            crate::error::ParseError::InvalidStructure(format!(
412                                                "Invalid QUILL reference '{}': {}",
413                                                quill_ref_str, e
414                                            ))
415                                        })?;
416
417                                    // Remove QUILL from the YAML value for processing
418                                    let mut new_mapping = mapping.clone();
419                                    new_mapping.remove(quill_key);
420                                    let new_value = if new_mapping.is_empty() {
421                                        None
422                                    } else {
423                                        Some(serde_json::Value::Object(new_mapping))
424                                    };
425
426                                    (None, Some(quill_ref_str.to_string()), new_value)
427                                } else if has_card {
428                                    // Extract card field name
429                                    let card_value = mapping.get(card_key).unwrap();
430                                    let field_name =
431                                        card_value.as_str().ok_or("CARD value must be a string")?;
432
433                                    if !is_valid_tag_name(field_name) {
434                                        return Err(crate::error::ParseError::InvalidStructure(format!(
435                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
436                                            field_name
437                                        )));
438                                    }
439
440                                    // Remove CARD from the YAML value for processing
441                                    let mut new_mapping = mapping.clone();
442                                    new_mapping.remove(card_key);
443                                    let new_value = if new_mapping.is_empty() {
444                                        None
445                                    } else {
446                                        Some(serde_json::Value::Object(new_mapping))
447                                    };
448
449                                    (Some(field_name.to_string()), None, new_value)
450                                } else {
451                                    // No reserved keys, keep the parsed YAML
452                                    (None, None, Some(parsed_yaml))
453                                }
454                            } else {
455                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
456                                (None, None, Some(parsed_yaml))
457                            }
458                        }
459                        Err(e) => {
460                            // Calculate line number for the start of this block
461                            let block_start_line = markdown[..abs_pos].lines().count() + 1;
462                            return Err(crate::error::ParseError::YamlErrorWithLocation {
463                                message: e.to_string(),
464                                line: block_start_line,
465                                block_index: blocks.len(),
466                            });
467                        }
468                    }
469                } else {
470                    // Empty content
471                    (None, None, None)
472                };
473
474                blocks.push(MetadataBlock {
475                    start: abs_pos,
476                    end: abs_closing_pos + closing_len, // After closing delimiter
477                    yaml_value,
478                    tag,
479                    quill_name,
480                });
481
482                // Check card count limit to prevent memory exhaustion
483                if blocks.len() > crate::error::MAX_CARD_COUNT {
484                    return Err(crate::error::ParseError::InputTooLarge {
485                        size: blocks.len(),
486                        max: crate::error::MAX_CARD_COUNT,
487                    });
488                }
489
490                pos = abs_closing_pos + closing_len;
491            } else if abs_pos == 0 {
492                // Frontmatter started but not closed
493                return Err(crate::error::ParseError::InvalidStructure(
494                    "Frontmatter started but not closed with ---".to_string(),
495                ));
496            } else {
497                // Not a valid metadata block, skip this position
498                pos = abs_pos + 3;
499            }
500        } else {
501            break;
502        }
503    }
504
505    Ok(blocks)
506}
507
508/// Decompose markdown into frontmatter fields and body
509fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
510    // Check input size limit
511    if markdown.len() > crate::error::MAX_INPUT_SIZE {
512        return Err(crate::error::ParseError::InputTooLarge {
513            size: markdown.len(),
514            max: crate::error::MAX_INPUT_SIZE,
515        });
516    }
517
518    let mut fields = HashMap::new();
519
520    // Find all metadata blocks
521    let blocks = find_metadata_blocks(markdown)?;
522
523    if blocks.is_empty() {
524        // No metadata blocks, entire content is body
525        fields.insert(
526            BODY_FIELD.to_string(),
527            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
528        );
529        return Ok(ParsedDocument::new(fields));
530    }
531
532    // Collect all card items into unified CARDS array
533    let mut cards_array: Vec<serde_json::Value> = Vec::new();
534    let mut global_frontmatter_index: Option<usize> = None;
535    let mut quill_name: Option<String> = None;
536
537    // First pass: identify global frontmatter, quill directive, and validate
538    for (idx, block) in blocks.iter().enumerate() {
539        if idx == 0 {
540            // Top-level frontmatter: can have QUILL or neither (not considered a card)
541            if let Some(ref name) = block.quill_name {
542                quill_name = Some(name.clone());
543            }
544            // If it has neither QUILL nor CARD, it's global frontmatter
545            if block.tag.is_none() && block.quill_name.is_none() {
546                global_frontmatter_index = Some(idx);
547            }
548        } else {
549            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
550            if block.quill_name.is_some() {
551                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
552            }
553            if block.tag.is_none() {
554                // Inline block without CARD
555                return Err(crate::error::ParseError::missing_card_directive());
556            }
557        }
558    }
559
560    // Parse global frontmatter if present
561    if let Some(idx) = global_frontmatter_index {
562        let block = &blocks[idx];
563
564        // Get parsed JSON fields directly (already parsed in find_metadata_blocks)
565        let json_fields: HashMap<String, serde_json::Value> = match &block.yaml_value {
566            Some(serde_json::Value::Object(mapping)) => mapping
567                .iter()
568                .map(|(k, v)| (k.clone(), v.clone()))
569                .collect(),
570            Some(serde_json::Value::Null) => {
571                // Null value (from whitespace-only YAML) - treat as empty mapping
572                HashMap::new()
573            }
574            Some(_) => {
575                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
576                return Err(crate::error::ParseError::InvalidStructure(
577                    "Invalid YAML frontmatter: expected a mapping".to_string(),
578                ));
579            }
580            None => HashMap::new(),
581        };
582
583        // Convert JSON values to QuillValue at boundary
584        for (key, value) in json_fields {
585            fields.insert(key, QuillValue::from_json(value));
586        }
587    }
588
589    // Process blocks with quill directives
590    for block in &blocks {
591        if block.quill_name.is_some() {
592            // Quill directive blocks can have YAML content (becomes part of frontmatter)
593            if let Some(ref json_val) = block.yaml_value {
594                let json_fields: HashMap<String, serde_json::Value> = match json_val {
595                    serde_json::Value::Object(mapping) => mapping
596                        .iter()
597                        .map(|(k, v)| (k.clone(), v.clone()))
598                        .collect(),
599                    serde_json::Value::Null => {
600                        // Null value (from whitespace-only YAML) - treat as empty mapping
601                        HashMap::new()
602                    }
603                    _ => {
604                        return Err(crate::error::ParseError::InvalidStructure(
605                            "Invalid YAML in quill block: expected a mapping".to_string(),
606                        ));
607                    }
608                };
609
610                // Check for conflicts with existing fields
611                for key in json_fields.keys() {
612                    if fields.contains_key(key) {
613                        return Err(crate::error::ParseError::InvalidStructure(format!(
614                            "Name collision: quill block field '{}' conflicts with existing field",
615                            key
616                        )));
617                    }
618                }
619
620                // Convert JSON values to QuillValue at boundary
621                for (key, value) in json_fields {
622                    fields.insert(key, QuillValue::from_json(value));
623                }
624            }
625        }
626    }
627
628    // Parse tagged blocks (CARD blocks)
629    for (idx, block) in blocks.iter().enumerate() {
630        if let Some(ref tag_name) = block.tag {
631            // Get YAML metadata directly (already parsed in find_metadata_blocks)
632            // Get JSON metadata directly (already parsed in find_metadata_blocks)
633            let mut item_fields: serde_json::Map<String, serde_json::Value> =
634                match &block.yaml_value {
635                    Some(serde_json::Value::Object(mapping)) => mapping.clone(),
636                    Some(serde_json::Value::Null) => {
637                        // Null value (from whitespace-only YAML) - treat as empty mapping
638                        serde_json::Map::new()
639                    }
640                    Some(_) => {
641                        return Err(crate::error::ParseError::InvalidStructure(format!(
642                            "Invalid YAML in card block '{}': expected a mapping",
643                            tag_name
644                        )));
645                    }
646                    None => serde_json::Map::new(),
647                };
648
649            // Extract body for this card block
650            let body_start = block.end;
651            let body_end = if idx + 1 < blocks.len() {
652                blocks[idx + 1].start
653            } else {
654                markdown.len()
655            };
656            let body = &markdown[body_start..body_end];
657
658            // Add body to item fields
659            item_fields.insert(
660                BODY_FIELD.to_string(),
661                serde_json::Value::String(body.to_string()),
662            );
663
664            // Add CARD discriminator field
665            item_fields.insert(
666                "CARD".to_string(),
667                serde_json::Value::String(tag_name.clone()),
668            );
669
670            // Add to CARDS array
671            cards_array.push(serde_json::Value::Object(item_fields));
672        }
673    }
674
675    // Extract global body
676    // Body starts after global frontmatter or quill block (whichever comes first)
677    // Body ends at the first card block or EOF
678    let first_non_card_block_idx = blocks
679        .iter()
680        .position(|b| b.tag.is_none() && b.quill_name.is_none())
681        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
682
683    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
684        // Body starts after the first non-card block (global frontmatter or quill)
685        let start = blocks[idx].end;
686
687        // Body ends at the first card block after this, or EOF
688        let end = blocks
689            .iter()
690            .skip(idx + 1)
691            .find(|b| b.tag.is_some())
692            .map(|b| b.start)
693            .unwrap_or(markdown.len());
694
695        (start, end)
696    } else {
697        // No global frontmatter or quill block - body is everything before the first card block
698        let end = blocks
699            .iter()
700            .find(|b| b.tag.is_some())
701            .map(|b| b.start)
702            .unwrap_or(0);
703
704        (0, end)
705    };
706
707    let global_body = &markdown[body_start..body_end];
708
709    fields.insert(
710        BODY_FIELD.to_string(),
711        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
712    );
713
714    // Always add CARDS array to fields (may be empty)
715    fields.insert(
716        "CARDS".to_string(),
717        QuillValue::from_json(serde_json::Value::Array(cards_array)),
718    );
719
720    // Check field count limit to prevent memory exhaustion
721    if fields.len() > crate::error::MAX_FIELD_COUNT {
722        return Err(crate::error::ParseError::InputTooLarge {
723            size: fields.len(),
724            max: crate::error::MAX_FIELD_COUNT,
725        });
726    }
727
728    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
729    let quill_ref = QuillReference::from_str(&quill_tag).map_err(|e| {
730        ParseError::InvalidStructure(format!("Invalid QUILL tag '{}': {}", quill_tag, e))
731    })?;
732    let parsed = ParsedDocument::with_quill_ref(fields, quill_ref);
733
734    Ok(parsed)
735}
736
737#[cfg(test)]
738mod tests {
739    use super::*;
740
741    #[test]
742    fn test_no_frontmatter() {
743        let markdown = "# Hello World\n\nThis is a test.";
744        let doc = decompose(markdown).unwrap();
745
746        assert_eq!(doc.body(), Some(markdown));
747        assert_eq!(doc.fields().len(), 1);
748        // Verify default quill tag is set
749        assert_eq!(doc.quill_reference().name, "__default__");
750    }
751
752    #[test]
753    fn test_with_frontmatter() {
754        let markdown = r#"---
755title: Test Document
756author: Test Author
757---
758
759# Hello World
760
761This is the body."#;
762
763        let doc = decompose(markdown).unwrap();
764
765        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
766        assert_eq!(
767            doc.get_field("title").unwrap().as_str().unwrap(),
768            "Test Document"
769        );
770        assert_eq!(
771            doc.get_field("author").unwrap().as_str().unwrap(),
772            "Test Author"
773        );
774        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
775                                           // Verify default quill tag is set when no QUILL directive
776        assert_eq!(doc.quill_reference().name, "__default__");
777    }
778
779    #[test]
780    fn test_whitespace_frontmatter() {
781        // Frontmatter with only whitespace should be treated as empty/valid
782        // and not error out or be treated as null YAML
783        let markdown = "---\n   \n---\n\n# Hello";
784        let doc = decompose(markdown).unwrap();
785
786        assert_eq!(doc.body(), Some("\n# Hello"));
787        // Should have default fields (BODY + CARDS) but no others
788        // (unless defaults are applied later, but decompose returns basics)
789        assert!(doc.get_field("title").is_none());
790        assert_eq!(doc.fields().len(), 2); // BODY, CARDS
791    }
792
793    #[test]
794    fn test_complex_yaml_frontmatter() {
795        let markdown = r#"---
796title: Complex Document
797tags:
798  - test
799  - yaml
800metadata:
801  version: 1.0
802  nested:
803    field: value
804---
805
806Content here."#;
807
808        let doc = decompose(markdown).unwrap();
809
810        assert_eq!(doc.body(), Some("\nContent here."));
811        assert_eq!(
812            doc.get_field("title").unwrap().as_str().unwrap(),
813            "Complex Document"
814        );
815
816        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
817        assert_eq!(tags.len(), 2);
818        assert_eq!(tags[0].as_str().unwrap(), "test");
819        assert_eq!(tags[1].as_str().unwrap(), "yaml");
820    }
821
822    #[test]
823    fn test_with_defaults_empty_document() {
824        use std::collections::HashMap;
825
826        let mut defaults = HashMap::new();
827        defaults.insert(
828            "status".to_string(),
829            QuillValue::from_json(serde_json::json!("draft")),
830        );
831        defaults.insert(
832            "version".to_string(),
833            QuillValue::from_json(serde_json::json!(1)),
834        );
835
836        // Create an empty parsed document
837        let doc = ParsedDocument::new(HashMap::new());
838        let doc_with_defaults = doc.with_defaults(&defaults);
839
840        // Check that defaults were applied
841        assert_eq!(
842            doc_with_defaults
843                .get_field("status")
844                .unwrap()
845                .as_str()
846                .unwrap(),
847            "draft"
848        );
849        assert_eq!(
850            doc_with_defaults
851                .get_field("version")
852                .unwrap()
853                .as_number()
854                .unwrap()
855                .as_i64()
856                .unwrap(),
857            1
858        );
859    }
860
861    #[test]
862    fn test_with_defaults_preserves_existing_values() {
863        use std::collections::HashMap;
864
865        let mut defaults = HashMap::new();
866        defaults.insert(
867            "status".to_string(),
868            QuillValue::from_json(serde_json::json!("draft")),
869        );
870
871        // Create document with existing status
872        let mut fields = HashMap::new();
873        fields.insert(
874            "status".to_string(),
875            QuillValue::from_json(serde_json::json!("published")),
876        );
877        let doc = ParsedDocument::new(fields);
878
879        let doc_with_defaults = doc.with_defaults(&defaults);
880
881        // Existing value should be preserved
882        assert_eq!(
883            doc_with_defaults
884                .get_field("status")
885                .unwrap()
886                .as_str()
887                .unwrap(),
888            "published"
889        );
890    }
891
892    #[test]
893    fn test_with_defaults_partial_application() {
894        use std::collections::HashMap;
895
896        let mut defaults = HashMap::new();
897        defaults.insert(
898            "status".to_string(),
899            QuillValue::from_json(serde_json::json!("draft")),
900        );
901        defaults.insert(
902            "version".to_string(),
903            QuillValue::from_json(serde_json::json!(1)),
904        );
905
906        // Create document with only one field
907        let mut fields = HashMap::new();
908        fields.insert(
909            "status".to_string(),
910            QuillValue::from_json(serde_json::json!("published")),
911        );
912        let doc = ParsedDocument::new(fields);
913
914        let doc_with_defaults = doc.with_defaults(&defaults);
915
916        // Existing field preserved, missing field gets default
917        assert_eq!(
918            doc_with_defaults
919                .get_field("status")
920                .unwrap()
921                .as_str()
922                .unwrap(),
923            "published"
924        );
925        assert_eq!(
926            doc_with_defaults
927                .get_field("version")
928                .unwrap()
929                .as_number()
930                .unwrap()
931                .as_i64()
932                .unwrap(),
933            1
934        );
935    }
936
937    #[test]
938    fn test_with_defaults_no_defaults() {
939        use std::collections::HashMap;
940
941        let defaults = HashMap::new(); // Empty defaults map
942
943        let doc = ParsedDocument::new(HashMap::new());
944        let doc_with_defaults = doc.with_defaults(&defaults);
945
946        // No defaults should be applied
947        assert!(doc_with_defaults.fields().is_empty());
948    }
949
950    #[test]
951    fn test_with_defaults_complex_types() {
952        use std::collections::HashMap;
953
954        let mut defaults = HashMap::new();
955        defaults.insert(
956            "tags".to_string(),
957            QuillValue::from_json(serde_json::json!(["default", "tag"])),
958        );
959
960        let doc = ParsedDocument::new(HashMap::new());
961        let doc_with_defaults = doc.with_defaults(&defaults);
962
963        // Complex default value should be applied
964        let tags = doc_with_defaults
965            .get_field("tags")
966            .unwrap()
967            .as_sequence()
968            .unwrap();
969        assert_eq!(tags.len(), 2);
970        assert_eq!(tags[0].as_str().unwrap(), "default");
971        assert_eq!(tags[1].as_str().unwrap(), "tag");
972    }
973
974    #[test]
975    fn test_with_coercion_singular_to_array() {
976        use std::collections::HashMap;
977
978        let schema = QuillValue::from_json(serde_json::json!({
979            "$schema": "https://json-schema.org/draft/2019-09/schema",
980            "type": "object",
981            "properties": {
982                "tags": {"type": "array"}
983            }
984        }));
985
986        let mut fields = HashMap::new();
987        fields.insert(
988            "tags".to_string(),
989            QuillValue::from_json(serde_json::json!("single-tag")),
990        );
991        let doc = ParsedDocument::new(fields);
992
993        let coerced_doc = doc.with_coercion(&schema);
994
995        let tags = coerced_doc.get_field("tags").unwrap();
996        assert!(tags.as_array().is_some());
997        let tags_array = tags.as_array().unwrap();
998        assert_eq!(tags_array.len(), 1);
999        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
1000    }
1001
1002    #[test]
1003    fn test_with_coercion_string_to_boolean() {
1004        use std::collections::HashMap;
1005
1006        let schema = QuillValue::from_json(serde_json::json!({
1007            "$schema": "https://json-schema.org/draft/2019-09/schema",
1008            "type": "object",
1009            "properties": {
1010                "active": {"type": "boolean"}
1011            }
1012        }));
1013
1014        let mut fields = HashMap::new();
1015        fields.insert(
1016            "active".to_string(),
1017            QuillValue::from_json(serde_json::json!("true")),
1018        );
1019        let doc = ParsedDocument::new(fields);
1020
1021        let coerced_doc = doc.with_coercion(&schema);
1022
1023        assert!(coerced_doc.get_field("active").unwrap().as_bool().unwrap());
1024    }
1025
1026    #[test]
1027    fn test_with_coercion_string_to_number() {
1028        use std::collections::HashMap;
1029
1030        let schema = QuillValue::from_json(serde_json::json!({
1031            "$schema": "https://json-schema.org/draft/2019-09/schema",
1032            "type": "object",
1033            "properties": {
1034                "count": {"type": "number"}
1035            }
1036        }));
1037
1038        let mut fields = HashMap::new();
1039        fields.insert(
1040            "count".to_string(),
1041            QuillValue::from_json(serde_json::json!("42")),
1042        );
1043        let doc = ParsedDocument::new(fields);
1044
1045        let coerced_doc = doc.with_coercion(&schema);
1046
1047        assert_eq!(
1048            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1049            42
1050        );
1051    }
1052
1053    #[test]
1054    fn test_invalid_yaml() {
1055        let markdown = r#"---
1056title: [invalid yaml
1057author: missing close bracket
1058---
1059
1060Content here."#;
1061
1062        let result = decompose(markdown);
1063        assert!(result.is_err());
1064        // Error message now includes location context
1065        assert!(result.unwrap_err().to_string().contains("YAML error"));
1066    }
1067
1068    #[test]
1069    fn test_unclosed_frontmatter() {
1070        let markdown = r#"---
1071title: Test
1072author: Test Author
1073
1074Content without closing ---"#;
1075
1076        let result = decompose(markdown);
1077        assert!(result.is_err());
1078        assert!(result.unwrap_err().to_string().contains("not closed"));
1079    }
1080
1081    // Extended metadata tests
1082
1083    #[test]
1084    fn test_basic_tagged_block() {
1085        let markdown = r#"---
1086title: Main Document
1087---
1088
1089Main body content.
1090
1091---
1092CARD: items
1093name: Item 1
1094---
1095
1096Body of item 1."#;
1097
1098        let doc = decompose(markdown).unwrap();
1099
1100        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1101        assert_eq!(
1102            doc.get_field("title").unwrap().as_str().unwrap(),
1103            "Main Document"
1104        );
1105
1106        // Cards are now in CARDS array with CARD discriminator
1107        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1108        assert_eq!(cards.len(), 1);
1109
1110        let item = cards[0].as_object().unwrap();
1111        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1112        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1113        assert_eq!(
1114            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1115            "\nBody of item 1."
1116        );
1117    }
1118
1119    #[test]
1120    fn test_multiple_tagged_blocks() {
1121        let markdown = r#"---
1122CARD: items
1123name: Item 1
1124tags: [a, b]
1125---
1126
1127First item body.
1128
1129---
1130CARD: items
1131name: Item 2
1132tags: [c, d]
1133---
1134
1135Second item body."#;
1136
1137        let doc = decompose(markdown).unwrap();
1138
1139        // Cards are in CARDS array
1140        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1141        assert_eq!(cards.len(), 2);
1142
1143        let item1 = cards[0].as_object().unwrap();
1144        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1145        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1146
1147        let item2 = cards[1].as_object().unwrap();
1148        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1149        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1150    }
1151
1152    #[test]
1153    fn test_mixed_global_and_tagged() {
1154        let markdown = r#"---
1155title: Global
1156author: John Doe
1157---
1158
1159Global body.
1160
1161---
1162CARD: sections
1163title: Section 1
1164---
1165
1166Section 1 content.
1167
1168---
1169CARD: sections
1170title: Section 2
1171---
1172
1173Section 2 content."#;
1174
1175        let doc = decompose(markdown).unwrap();
1176
1177        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1178        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1179
1180        // Cards are in unified CARDS array
1181        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1182        assert_eq!(cards.len(), 2);
1183        assert_eq!(
1184            cards[0]
1185                .as_object()
1186                .unwrap()
1187                .get("CARD")
1188                .unwrap()
1189                .as_str()
1190                .unwrap(),
1191            "sections"
1192        );
1193    }
1194
1195    #[test]
1196    fn test_empty_tagged_metadata() {
1197        let markdown = r#"---
1198CARD: items
1199---
1200
1201Body without metadata."#;
1202
1203        let doc = decompose(markdown).unwrap();
1204
1205        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1206        assert_eq!(cards.len(), 1);
1207
1208        let item = cards[0].as_object().unwrap();
1209        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1210        assert_eq!(
1211            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1212            "\nBody without metadata."
1213        );
1214    }
1215
1216    #[test]
1217    fn test_tagged_block_without_body() {
1218        let markdown = r#"---
1219CARD: items
1220name: Item
1221---"#;
1222
1223        let doc = decompose(markdown).unwrap();
1224
1225        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1226        assert_eq!(cards.len(), 1);
1227
1228        let item = cards[0].as_object().unwrap();
1229        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1230        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1231    }
1232
1233    #[test]
1234    fn test_name_collision_global_and_tagged() {
1235        let markdown = r#"---
1236items: "global value"
1237---
1238
1239Body
1240
1241---
1242CARD: items
1243name: Item
1244---
1245
1246Item body"#;
1247
1248        let result = decompose(markdown);
1249        assert!(result.is_ok(), "Name collision should be allowed now");
1250    }
1251
1252    #[test]
1253    fn test_card_name_collision_with_array_field() {
1254        // CARD type names CAN now conflict with frontmatter field names
1255        let markdown = r#"---
1256items:
1257  - name: Global Item 1
1258    value: 100
1259---
1260
1261Global body
1262
1263---
1264CARD: items
1265name: Scope Item 1
1266---
1267
1268Scope item 1 body"#;
1269
1270        let result = decompose(markdown);
1271        assert!(
1272            result.is_ok(),
1273            "Collision with array field should be allowed"
1274        );
1275    }
1276
1277    #[test]
1278    fn test_empty_global_array_with_card() {
1279        // CARD type names CAN now conflict with frontmatter field names
1280        let markdown = r#"---
1281items: []
1282---
1283
1284Global body
1285
1286---
1287CARD: items
1288name: Item 1
1289---
1290
1291Item 1 body"#;
1292
1293        let result = decompose(markdown);
1294        assert!(
1295            result.is_ok(),
1296            "Collision with empty array field should be allowed"
1297        );
1298    }
1299
1300    #[test]
1301    fn test_reserved_field_body_rejected() {
1302        let markdown = r#"---
1303CARD: section
1304BODY: Test
1305---"#;
1306
1307        let result = decompose(markdown);
1308        assert!(result.is_err(), "BODY is a reserved field name");
1309        assert!(result
1310            .unwrap_err()
1311            .to_string()
1312            .contains("Reserved field name"));
1313    }
1314
1315    #[test]
1316    fn test_reserved_field_cards_rejected() {
1317        let markdown = r#"---
1318title: Test
1319CARDS: []
1320---"#;
1321
1322        let result = decompose(markdown);
1323        assert!(result.is_err(), "CARDS is a reserved field name");
1324        assert!(result
1325            .unwrap_err()
1326            .to_string()
1327            .contains("Reserved field name"));
1328    }
1329
1330    #[test]
1331    fn test_delimiter_inside_fenced_code_block_backticks() {
1332        let markdown = r#"---
1333title: Test
1334---
1335Here is some code:
1336
1337```yaml
1338---
1339fake: frontmatter
1340---
1341```
1342
1343More content.
1344"#;
1345
1346        let doc = decompose(markdown).unwrap();
1347        // The --- inside the code block should NOT be parsed as metadata
1348        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1349        assert!(doc.get_field("fake").is_none());
1350    }
1351
1352    #[test]
1353    fn test_tildes_are_not_fences() {
1354        // Per EXTENDED_MARKDOWN.md: tildes (~~~) are NOT treated as fences
1355        // So --- inside ~~~ WILL be parsed as a metadata block
1356        let markdown = r#"---
1357title: Test
1358---
1359Here is some code:
1360
1361~~~yaml
1362---
1363CARD: code_example
1364fake: frontmatter
1365---
1366~~~
1367
1368More content.
1369"#;
1370
1371        let doc = decompose(markdown).unwrap();
1372        // The --- should be parsed as a CARD block since tildes aren't fences
1373        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1374        assert_eq!(cards.len(), 1);
1375        assert_eq!(
1376            cards[0].get("fake").unwrap().as_str().unwrap(),
1377            "frontmatter"
1378        );
1379    }
1380
1381    #[test]
1382    fn test_four_backticks_are_not_fences() {
1383        // Per EXTENDED_MARKDOWN.md: only exactly 3 backticks are valid fences
1384        // 4+ backticks are NOT treated as fences
1385        let markdown = r#"---
1386title: Test
1387---
1388Here is some code:
1389
1390````yaml
1391---
1392CARD: code_example
1393fake: frontmatter
1394---
1395````
1396
1397More content.
1398"#;
1399
1400        let doc = decompose(markdown).unwrap();
1401        // The --- should be parsed as a CARD block since 4 backticks aren't a fence
1402        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1403        assert_eq!(cards.len(), 1);
1404        assert_eq!(
1405            cards[0].get("fake").unwrap().as_str().unwrap(),
1406            "frontmatter"
1407        );
1408    }
1409
1410    #[test]
1411    fn test_invalid_tag_syntax() {
1412        let markdown = r#"---
1413CARD: Invalid-Name
1414title: Test
1415---"#;
1416
1417        let result = decompose(markdown);
1418        assert!(result.is_err());
1419        assert!(result
1420            .unwrap_err()
1421            .to_string()
1422            .contains("Invalid card field name"));
1423    }
1424
1425    #[test]
1426    fn test_multiple_global_frontmatter_blocks() {
1427        let markdown = r#"---
1428title: First
1429---
1430
1431Body
1432
1433---
1434author: Second
1435---
1436
1437More body"#;
1438
1439        let result = decompose(markdown);
1440        assert!(result.is_err());
1441
1442        // Verify the error message contains CARD hint
1443        let err = result.unwrap_err();
1444        let err_str = err.to_string();
1445        assert!(
1446            err_str.contains("CARD"),
1447            "Error should mention CARD directive: {}",
1448            err_str
1449        );
1450        assert!(
1451            err_str.contains("missing"),
1452            "Error should indicate missing directive: {}",
1453            err_str
1454        );
1455    }
1456
1457    #[test]
1458    fn test_adjacent_blocks_different_tags() {
1459        let markdown = r#"---
1460CARD: items
1461name: Item 1
1462---
1463
1464Item 1 body
1465
1466---
1467CARD: sections
1468title: Section 1
1469---
1470
1471Section 1 body"#;
1472
1473        let doc = decompose(markdown).unwrap();
1474
1475        // All cards in unified CARDS array
1476        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1477        assert_eq!(cards.len(), 2);
1478
1479        // First card is "items" type
1480        let item = cards[0].as_object().unwrap();
1481        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1482        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1483
1484        // Second card is "sections" type
1485        let section = cards[1].as_object().unwrap();
1486        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1487        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1488    }
1489
1490    #[test]
1491    fn test_order_preservation() {
1492        let markdown = r#"---
1493CARD: items
1494id: 1
1495---
1496
1497First
1498
1499---
1500CARD: items
1501id: 2
1502---
1503
1504Second
1505
1506---
1507CARD: items
1508id: 3
1509---
1510
1511Third"#;
1512
1513        let doc = decompose(markdown).unwrap();
1514
1515        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1516        assert_eq!(cards.len(), 3);
1517
1518        for (i, card) in cards.iter().enumerate() {
1519            let mapping = card.as_object().unwrap();
1520            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1521            let id = mapping.get("id").unwrap().as_i64().unwrap();
1522            assert_eq!(id, (i + 1) as i64);
1523        }
1524    }
1525
1526    #[test]
1527    fn test_product_catalog_integration() {
1528        let markdown = r#"---
1529title: Product Catalog
1530author: John Doe
1531date: 2024-01-01
1532---
1533
1534This is the main catalog description.
1535
1536---
1537CARD: products
1538name: Widget A
1539price: 19.99
1540sku: WID-001
1541---
1542
1543The **Widget A** is our most popular product.
1544
1545---
1546CARD: products
1547name: Gadget B
1548price: 29.99
1549sku: GAD-002
1550---
1551
1552The **Gadget B** is perfect for professionals.
1553
1554---
1555CARD: reviews
1556product: Widget A
1557rating: 5
1558---
1559
1560"Excellent product! Highly recommended."
1561
1562---
1563CARD: reviews
1564product: Gadget B
1565rating: 4
1566---
1567
1568"Very good, but a bit pricey.""#;
1569
1570        let doc = decompose(markdown).unwrap();
1571
1572        // Verify global fields
1573        assert_eq!(
1574            doc.get_field("title").unwrap().as_str().unwrap(),
1575            "Product Catalog"
1576        );
1577        assert_eq!(
1578            doc.get_field("author").unwrap().as_str().unwrap(),
1579            "John Doe"
1580        );
1581        assert_eq!(
1582            doc.get_field("date").unwrap().as_str().unwrap(),
1583            "2024-01-01"
1584        );
1585
1586        // Verify global body
1587        assert!(doc.body().unwrap().contains("main catalog description"));
1588
1589        // All cards in unified CARDS array
1590        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1591        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1592
1593        // First 2 are products
1594        let product1 = cards[0].as_object().unwrap();
1595        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1596        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1597        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1598
1599        let product2 = cards[1].as_object().unwrap();
1600        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1601        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1602
1603        // Last 2 are reviews
1604        let review1 = cards[2].as_object().unwrap();
1605        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1606        assert_eq!(
1607            review1.get("product").unwrap().as_str().unwrap(),
1608            "Widget A"
1609        );
1610        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1611
1612        // Total fields: title, author, date, body, CARDS = 5
1613        assert_eq!(doc.fields().len(), 5);
1614    }
1615
1616    #[test]
1617    fn taro_quill_directive() {
1618        let markdown = r#"---
1619QUILL: usaf_memo
1620memo_for: [ORG/SYMBOL]
1621memo_from: [ORG/SYMBOL]
1622---
1623
1624This is the memo body."#;
1625
1626        let doc = decompose(markdown).unwrap();
1627
1628        // Verify quill tag is set
1629        assert_eq!(doc.quill_reference().name, "usaf_memo");
1630
1631        // Verify fields from quill block become frontmatter
1632        assert_eq!(
1633            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1634                .as_str()
1635                .unwrap(),
1636            "ORG/SYMBOL"
1637        );
1638
1639        // Verify body
1640        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1641    }
1642
1643    #[test]
1644    fn test_quill_with_card_blocks() {
1645        let markdown = r#"---
1646QUILL: document
1647title: Test Document
1648---
1649
1650Main body.
1651
1652---
1653CARD: sections
1654name: Section 1
1655---
1656
1657Section 1 body."#;
1658
1659        let doc = decompose(markdown).unwrap();
1660
1661        // Verify quill tag
1662        assert_eq!(doc.quill_reference().name, "document");
1663
1664        // Verify global field from quill block
1665        assert_eq!(
1666            doc.get_field("title").unwrap().as_str().unwrap(),
1667            "Test Document"
1668        );
1669
1670        // Verify card blocks work via CARDS array
1671        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1672        assert_eq!(cards.len(), 1);
1673        assert_eq!(
1674            cards[0]
1675                .as_object()
1676                .unwrap()
1677                .get("CARD")
1678                .unwrap()
1679                .as_str()
1680                .unwrap(),
1681            "sections"
1682        );
1683
1684        // Verify body
1685        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1686    }
1687
1688    #[test]
1689    fn test_multiple_quill_directives_error() {
1690        let markdown = r#"---
1691QUILL: first
1692---
1693
1694---
1695QUILL: second
1696---"#;
1697
1698        let result = decompose(markdown);
1699        assert!(result.is_err());
1700        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1701        assert!(result
1702            .unwrap_err()
1703            .to_string()
1704            .contains("top-level frontmatter"));
1705    }
1706
1707    #[test]
1708    fn test_invalid_quill_name() {
1709        let markdown = r#"---
1710QUILL: Invalid-Name
1711---"#;
1712
1713        let result = decompose(markdown);
1714        assert!(result.is_err());
1715        assert!(result
1716            .unwrap_err()
1717            .to_string()
1718            .contains("Invalid QUILL reference"));
1719    }
1720
1721    #[test]
1722    fn test_quill_wrong_value_type() {
1723        let markdown = r#"---
1724QUILL: 123
1725---"#;
1726
1727        let result = decompose(markdown);
1728        assert!(result.is_err());
1729        assert!(result
1730            .unwrap_err()
1731            .to_string()
1732            .contains("QUILL value must be a string"));
1733    }
1734
1735    #[test]
1736    fn test_card_wrong_value_type() {
1737        let markdown = r#"---
1738CARD: 123
1739---"#;
1740
1741        let result = decompose(markdown);
1742        assert!(result.is_err());
1743        assert!(result
1744            .unwrap_err()
1745            .to_string()
1746            .contains("CARD value must be a string"));
1747    }
1748
1749    #[test]
1750    fn test_both_quill_and_card_error() {
1751        let markdown = r#"---
1752QUILL: test
1753CARD: items
1754---"#;
1755
1756        let result = decompose(markdown);
1757        assert!(result.is_err());
1758        assert!(result
1759            .unwrap_err()
1760            .to_string()
1761            .contains("Cannot specify both QUILL and CARD"));
1762    }
1763
1764    #[test]
1765    fn test_blank_lines_in_frontmatter() {
1766        // New parsing standard: blank lines are allowed within YAML blocks
1767        let markdown = r#"---
1768title: Test Document
1769author: Test Author
1770
1771description: This has a blank line above it
1772tags:
1773  - one
1774  - two
1775---
1776
1777# Hello World
1778
1779This is the body."#;
1780
1781        let doc = decompose(markdown).unwrap();
1782
1783        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1784        assert_eq!(
1785            doc.get_field("title").unwrap().as_str().unwrap(),
1786            "Test Document"
1787        );
1788        assert_eq!(
1789            doc.get_field("author").unwrap().as_str().unwrap(),
1790            "Test Author"
1791        );
1792        assert_eq!(
1793            doc.get_field("description").unwrap().as_str().unwrap(),
1794            "This has a blank line above it"
1795        );
1796
1797        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1798        assert_eq!(tags.len(), 2);
1799    }
1800
1801    #[test]
1802    fn test_blank_lines_in_scope_blocks() {
1803        // Blank lines should be allowed in CARD blocks too
1804        let markdown = r#"---
1805CARD: items
1806name: Item 1
1807
1808price: 19.99
1809
1810tags:
1811  - electronics
1812  - gadgets
1813---
1814
1815Body of item 1."#;
1816
1817        let doc = decompose(markdown).unwrap();
1818
1819        // Cards are in CARDS array
1820        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1821        assert_eq!(cards.len(), 1);
1822
1823        let item = cards[0].as_object().unwrap();
1824        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1825        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1826        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1827
1828        let tags = item.get("tags").unwrap().as_array().unwrap();
1829        assert_eq!(tags.len(), 2);
1830    }
1831
1832    #[test]
1833    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1834        // Horizontal rule: blank lines both above AND below the ---
1835        let markdown = r#"---
1836title: Test
1837---
1838
1839First paragraph.
1840
1841---
1842
1843Second paragraph."#;
1844
1845        let doc = decompose(markdown).unwrap();
1846
1847        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1848
1849        // The body should contain the horizontal rule (---) as part of the content
1850        let body = doc.body().unwrap();
1851        assert!(body.contains("First paragraph."));
1852        assert!(body.contains("---"));
1853        assert!(body.contains("Second paragraph."));
1854    }
1855
1856    #[test]
1857    fn test_horizontal_rule_not_preceded_by_blank() {
1858        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1859        // It's also NOT a valid metadata block opening (since it's followed by blank)
1860        let markdown = r#"---
1861title: Test
1862---
1863
1864First paragraph.
1865---
1866
1867Second paragraph."#;
1868
1869        let doc = decompose(markdown).unwrap();
1870
1871        let body = doc.body().unwrap();
1872        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1873        assert!(body.contains("---"));
1874    }
1875
1876    #[test]
1877    fn test_multiple_blank_lines_in_yaml() {
1878        // Multiple blank lines should also be allowed
1879        let markdown = r#"---
1880title: Test
1881
1882
1883author: John Doe
1884
1885
1886version: 1.0
1887---
1888
1889Body content."#;
1890
1891        let doc = decompose(markdown).unwrap();
1892
1893        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1894        assert_eq!(
1895            doc.get_field("author").unwrap().as_str().unwrap(),
1896            "John Doe"
1897        );
1898        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1899    }
1900
1901    #[test]
1902    fn test_html_comment_interaction() {
1903        let markdown = r#"<!---
1904---> the rest of the page content
1905
1906---
1907key: value
1908---
1909"#;
1910        let doc = decompose(markdown).unwrap();
1911
1912        // The comment should be ignored (or at least not cause a parse error)
1913        // The frontmatter should be parsed
1914        let key = doc.get_field("key").and_then(|v| v.as_str());
1915        assert_eq!(key, Some("value"));
1916    }
1917}
1918#[cfg(test)]
1919mod demo_file_test {
1920    use super::*;
1921
1922    #[test]
1923    fn test_extended_metadata_demo_file() {
1924        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1925        let doc = decompose(markdown).unwrap();
1926
1927        // Verify global fields
1928        assert_eq!(
1929            doc.get_field("title").unwrap().as_str().unwrap(),
1930            "Extended Metadata Demo"
1931        );
1932        assert_eq!(
1933            doc.get_field("author").unwrap().as_str().unwrap(),
1934            "Quillmark Team"
1935        );
1936        // version is parsed as a number by YAML
1937        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1938
1939        // Verify body
1940        assert!(doc
1941            .body()
1942            .unwrap()
1943            .contains("extended YAML metadata standard"));
1944
1945        // All cards are now in unified CARDS array
1946        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1947        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1948
1949        // Count features and use_cases cards
1950        let features_count = cards
1951            .iter()
1952            .filter(|c| {
1953                c.as_object()
1954                    .unwrap()
1955                    .get("CARD")
1956                    .unwrap()
1957                    .as_str()
1958                    .unwrap()
1959                    == "features"
1960            })
1961            .count();
1962        let use_cases_count = cards
1963            .iter()
1964            .filter(|c| {
1965                c.as_object()
1966                    .unwrap()
1967                    .get("CARD")
1968                    .unwrap()
1969                    .as_str()
1970                    .unwrap()
1971                    == "use_cases"
1972            })
1973            .count();
1974        assert_eq!(features_count, 3);
1975        assert_eq!(use_cases_count, 2);
1976
1977        // Check first card is a feature
1978        let feature1 = cards[0].as_object().unwrap();
1979        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1980        assert_eq!(
1981            feature1.get("name").unwrap().as_str().unwrap(),
1982            "Tag Directives"
1983        );
1984    }
1985
1986    #[test]
1987    fn test_input_size_limit() {
1988        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1989        let size = crate::error::MAX_INPUT_SIZE + 1;
1990        let large_markdown = "a".repeat(size);
1991
1992        let result = decompose(&large_markdown);
1993        assert!(result.is_err());
1994
1995        let err_msg = result.unwrap_err().to_string();
1996        assert!(err_msg.contains("Input too large"));
1997    }
1998
1999    #[test]
2000    fn test_yaml_size_limit() {
2001        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
2002        let mut markdown = String::from("---\n");
2003
2004        // Create a very large YAML field
2005        let size = crate::error::MAX_YAML_SIZE + 1;
2006        markdown.push_str("data: \"");
2007        markdown.push_str(&"x".repeat(size));
2008        markdown.push_str("\"\n---\n\nBody");
2009
2010        let result = decompose(&markdown);
2011        assert!(result.is_err());
2012
2013        let err_msg = result.unwrap_err().to_string();
2014        assert!(err_msg.contains("Input too large"));
2015    }
2016
2017    #[test]
2018    fn test_input_within_size_limit() {
2019        // Create markdown just under the limit
2020        let size = 1000; // Much smaller than limit
2021        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
2022
2023        let result = decompose(&markdown);
2024        assert!(result.is_ok());
2025    }
2026
2027    #[test]
2028    fn test_yaml_within_size_limit() {
2029        // Create YAML block well within the limit
2030        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
2031
2032        let result = decompose(markdown);
2033        assert!(result.is_ok());
2034    }
2035
2036    #[test]
2037    fn test_yaml_depth_limit() {
2038        // Create deeply nested YAML that exceeds MAX_YAML_DEPTH (100 levels)
2039        // This tests serde-saphyr's Budget.max_depth enforcement
2040        let mut yaml_content = String::new();
2041        for i in 0..110 {
2042            yaml_content.push_str(&"  ".repeat(i));
2043            yaml_content.push_str(&format!("level{}: value\n", i));
2044        }
2045
2046        let markdown = format!("---\n{}---\n\nBody", yaml_content);
2047        let result = decompose(&markdown);
2048
2049        assert!(result.is_err());
2050        let err_msg = result.unwrap_err().to_string();
2051        // serde-saphyr returns "budget exceeded" or similar for depth violations
2052        assert!(
2053            err_msg.to_lowercase().contains("budget")
2054                || err_msg.to_lowercase().contains("depth")
2055                || err_msg.contains("YAML"),
2056            "Expected depth/budget error, got: {}",
2057            err_msg
2058        );
2059    }
2060
2061    #[test]
2062    fn test_yaml_depth_within_limit() {
2063        // Create reasonably nested YAML (should succeed)
2064        let markdown = r#"---
2065level1:
2066  level2:
2067    level3:
2068      level4:
2069        value: test
2070---
2071
2072Body content"#;
2073
2074        let result = decompose(markdown);
2075        assert!(result.is_ok());
2076    }
2077
2078    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
2079    // Guillemet conversion now happens in process_plate, not during parsing
2080    #[test]
2081    fn test_chevrons_preserved_in_body_no_frontmatter() {
2082        let markdown = "Use <<raw content>> here.";
2083        let doc = decompose(markdown).unwrap();
2084
2085        // Body should preserve chevrons (conversion happens later in process_plate)
2086        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
2087    }
2088
2089    #[test]
2090    fn test_chevrons_preserved_in_body_with_frontmatter() {
2091        let markdown = r#"---
2092title: Test
2093---
2094
2095Use <<raw content>> here."#;
2096        let doc = decompose(markdown).unwrap();
2097
2098        // Body should preserve chevrons
2099        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
2100    }
2101
2102    #[test]
2103    fn test_chevrons_preserved_in_yaml_string() {
2104        let markdown = r#"---
2105title: Test <<with chevrons>>
2106---
2107
2108Body content."#;
2109        let doc = decompose(markdown).unwrap();
2110
2111        // YAML string values should preserve chevrons
2112        assert_eq!(
2113            doc.get_field("title").unwrap().as_str().unwrap(),
2114            "Test <<with chevrons>>"
2115        );
2116    }
2117
2118    #[test]
2119    fn test_chevrons_preserved_in_yaml_array() {
2120        let markdown = r#"---
2121items:
2122  - "<<first>>"
2123  - "<<second>>"
2124---
2125
2126Body."#;
2127        let doc = decompose(markdown).unwrap();
2128
2129        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2130        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
2131        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
2132    }
2133
2134    #[test]
2135    fn test_chevrons_preserved_in_yaml_nested() {
2136        let markdown = r#"---
2137metadata:
2138  description: "<<nested value>>"
2139---
2140
2141Body."#;
2142        let doc = decompose(markdown).unwrap();
2143
2144        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
2145        assert_eq!(
2146            metadata.get("description").unwrap().as_str().unwrap(),
2147            "<<nested value>>"
2148        );
2149    }
2150
2151    #[test]
2152    fn test_chevrons_preserved_in_code_blocks() {
2153        let markdown = r#"```
2154<<in code block>>
2155```
2156
2157<<outside code block>>"#;
2158        let doc = decompose(markdown).unwrap();
2159
2160        let body = doc.body().unwrap();
2161        // All chevrons should be preserved (no conversion during parsing)
2162        assert!(body.contains("<<in code block>>"));
2163        assert!(body.contains("<<outside code block>>"));
2164    }
2165
2166    #[test]
2167    fn test_chevrons_preserved_in_inline_code() {
2168        let markdown = "`<<in inline code>>` and <<outside inline code>>";
2169        let doc = decompose(markdown).unwrap();
2170
2171        let body = doc.body().unwrap();
2172        // All chevrons should be preserved
2173        assert!(body.contains("`<<in inline code>>`"));
2174        assert!(body.contains("<<outside inline code>>"));
2175    }
2176
2177    #[test]
2178    fn test_chevrons_preserved_in_tagged_block_body() {
2179        let markdown = r#"---
2180title: Main
2181---
2182
2183Main body.
2184
2185---
2186CARD: items
2187name: Item 1
2188---
2189
2190Use <<raw>> here."#;
2191        let doc = decompose(markdown).unwrap();
2192
2193        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2194        let item = cards[0].as_object().unwrap();
2195        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2196        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2197        // Tagged block body should preserve chevrons
2198        assert!(item_body.contains("<<raw>>"));
2199    }
2200
2201    #[test]
2202    fn test_chevrons_preserved_in_tagged_block_yaml() {
2203        let markdown = r#"---
2204title: Main
2205---
2206
2207Main body.
2208
2209---
2210CARD: items
2211description: "<<tagged yaml>>"
2212---
2213
2214Item body."#;
2215        let doc = decompose(markdown).unwrap();
2216
2217        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2218        let item = cards[0].as_object().unwrap();
2219        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2220        // Tagged block YAML should preserve chevrons
2221        assert_eq!(
2222            item.get("description").unwrap().as_str().unwrap(),
2223            "<<tagged yaml>>"
2224        );
2225    }
2226
2227    #[test]
2228    fn test_yaml_numbers_not_affected() {
2229        // Numbers should not be affected
2230        let markdown = r#"---
2231count: 42
2232---
2233
2234Body."#;
2235        let doc = decompose(markdown).unwrap();
2236        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2237    }
2238
2239    #[test]
2240    fn test_yaml_booleans_not_affected() {
2241        // Booleans should not be affected
2242        let markdown = r#"---
2243active: true
2244---
2245
2246Body."#;
2247        let doc = decompose(markdown).unwrap();
2248        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2249    }
2250
2251    #[test]
2252    fn test_multiline_chevrons_preserved() {
2253        // Multiline chevrons should be preserved as-is
2254        let markdown = "<<text\nacross lines>>";
2255        let doc = decompose(markdown).unwrap();
2256
2257        let body = doc.body().unwrap();
2258        // Should contain the original chevrons
2259        assert!(body.contains("<<text"));
2260        assert!(body.contains("across lines>>"));
2261    }
2262
2263    #[test]
2264    fn test_unmatched_chevrons_preserved() {
2265        let markdown = "<<unmatched";
2266        let doc = decompose(markdown).unwrap();
2267
2268        let body = doc.body().unwrap();
2269        // Unmatched should remain as-is
2270        assert_eq!(body, "<<unmatched");
2271    }
2272}
2273
2274// Additional robustness tests
2275#[cfg(test)]
2276mod robustness_tests {
2277    use super::*;
2278
2279    // Edge cases for delimiter handling
2280
2281    #[test]
2282    fn test_empty_document() {
2283        let doc = decompose("").unwrap();
2284        assert_eq!(doc.body(), Some(""));
2285        assert_eq!(doc.quill_reference().name, "__default__");
2286    }
2287
2288    #[test]
2289    fn test_only_whitespace() {
2290        let doc = decompose("   \n\n   \t").unwrap();
2291        assert_eq!(doc.body(), Some("   \n\n   \t"));
2292    }
2293
2294    #[test]
2295    fn test_only_dashes() {
2296        // Just "---" at document start without newline is not treated as frontmatter opener
2297        // (requires "---\n" to start a frontmatter block)
2298        let result = decompose("---");
2299        // This is NOT an error - "---" alone without newline is just body content
2300        assert!(result.is_ok());
2301        assert_eq!(result.unwrap().body(), Some("---"));
2302    }
2303
2304    #[test]
2305    fn test_dashes_in_middle_of_line() {
2306        // --- not at start of line should not be treated as delimiter
2307        let markdown = "some text --- more text";
2308        let doc = decompose(markdown).unwrap();
2309        assert_eq!(doc.body(), Some("some text --- more text"));
2310    }
2311
2312    #[test]
2313    fn test_four_dashes() {
2314        // ---- is not a valid delimiter
2315        let markdown = "----\ntitle: Test\n----\n\nBody";
2316        let doc = decompose(markdown).unwrap();
2317        // Should treat entire content as body
2318        assert!(doc.body().unwrap().contains("----"));
2319    }
2320
2321    #[test]
2322    fn test_crlf_line_endings() {
2323        // Windows-style line endings
2324        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2325        let doc = decompose(markdown).unwrap();
2326        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2327        assert!(doc.body().unwrap().contains("Body content."));
2328    }
2329
2330    #[test]
2331    fn test_mixed_line_endings() {
2332        // Mix of \n and \r\n
2333        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2334        let doc = decompose(markdown).unwrap();
2335        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2336    }
2337
2338    #[test]
2339    fn test_frontmatter_at_eof_no_trailing_newline() {
2340        // Frontmatter closed at EOF without trailing newline
2341        let markdown = "---\ntitle: Test\n---";
2342        let doc = decompose(markdown).unwrap();
2343        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2344        assert_eq!(doc.body(), Some(""));
2345    }
2346
2347    #[test]
2348    fn test_empty_frontmatter() {
2349        // Empty frontmatter block - requires content between delimiters
2350        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2351        // is treated as horizontal rule logic, not empty frontmatter
2352        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2353        let markdown = "---\n \n---\n\nBody content.";
2354        let doc = decompose(markdown).unwrap();
2355        assert!(doc.body().unwrap().contains("Body content."));
2356        // Should have body and CARDS fields
2357        assert_eq!(doc.fields().len(), 2);
2358    }
2359
2360    #[test]
2361    fn test_whitespace_only_frontmatter() {
2362        // Frontmatter with only whitespace
2363        let markdown = "---\n   \n\n   \n---\n\nBody.";
2364        let doc = decompose(markdown).unwrap();
2365        assert!(doc.body().unwrap().contains("Body."));
2366    }
2367
2368    // Unicode handling
2369
2370    #[test]
2371    fn test_unicode_in_yaml_keys() {
2372        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2373        let doc = decompose(markdown).unwrap();
2374        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2375        assert_eq!(
2376            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2377            "こんにちは"
2378        );
2379    }
2380
2381    #[test]
2382    fn test_unicode_in_yaml_values() {
2383        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2384        let doc = decompose(markdown).unwrap();
2385        assert_eq!(
2386            doc.get_field("title").unwrap().as_str().unwrap(),
2387            "你好世界 🎉"
2388        );
2389    }
2390
2391    #[test]
2392    fn test_unicode_in_body() {
2393        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2394        let doc = decompose(markdown).unwrap();
2395        assert!(doc.body().unwrap().contains("日本語テキスト"));
2396        assert!(doc.body().unwrap().contains("🚀"));
2397    }
2398
2399    // YAML edge cases
2400
2401    #[test]
2402    fn test_yaml_multiline_string() {
2403        let markdown = r#"---
2404description: |
2405  This is a
2406  multiline string
2407  with preserved newlines.
2408---
2409
2410Body."#;
2411        let doc = decompose(markdown).unwrap();
2412        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2413        assert!(desc.contains("multiline string"));
2414        assert!(desc.contains('\n'));
2415    }
2416
2417    #[test]
2418    fn test_yaml_folded_string() {
2419        let markdown = r#"---
2420description: >
2421  This is a folded
2422  string that becomes
2423  a single line.
2424---
2425
2426Body."#;
2427        let doc = decompose(markdown).unwrap();
2428        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2429        // Folded strings join lines with spaces
2430        assert!(desc.contains("folded"));
2431    }
2432
2433    #[test]
2434    fn test_yaml_null_value() {
2435        let markdown = "---\noptional: null\n---\n\nBody.";
2436        let doc = decompose(markdown).unwrap();
2437        assert!(doc.get_field("optional").unwrap().is_null());
2438    }
2439
2440    #[test]
2441    fn test_yaml_empty_string_value() {
2442        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2443        let doc = decompose(markdown).unwrap();
2444        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2445    }
2446
2447    #[test]
2448    fn test_yaml_special_characters_in_string() {
2449        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2450        let doc = decompose(markdown).unwrap();
2451        assert_eq!(
2452            doc.get_field("special").unwrap().as_str().unwrap(),
2453            "colon: here, and [brackets]"
2454        );
2455    }
2456
2457    #[test]
2458    fn test_yaml_nested_objects() {
2459        let markdown = r#"---
2460config:
2461  database:
2462    host: localhost
2463    port: 5432
2464  cache:
2465    enabled: true
2466---
2467
2468Body."#;
2469        let doc = decompose(markdown).unwrap();
2470        let config = doc.get_field("config").unwrap().as_object().unwrap();
2471        let db = config.get("database").unwrap().as_object().unwrap();
2472        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2473        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2474    }
2475
2476    // CARD block edge cases
2477
2478    #[test]
2479    fn test_card_with_empty_body() {
2480        let markdown = r#"---
2481CARD: items
2482name: Item
2483---"#;
2484        let doc = decompose(markdown).unwrap();
2485        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2486        assert_eq!(cards.len(), 1);
2487        let item = cards[0].as_object().unwrap();
2488        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2489        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2490    }
2491
2492    #[test]
2493    fn test_card_consecutive_blocks() {
2494        let markdown = r#"---
2495CARD: a
2496id: 1
2497---
2498---
2499CARD: a
2500id: 2
2501---"#;
2502        let doc = decompose(markdown).unwrap();
2503        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2504        assert_eq!(cards.len(), 2);
2505        assert_eq!(
2506            cards[0]
2507                .as_object()
2508                .unwrap()
2509                .get("CARD")
2510                .unwrap()
2511                .as_str()
2512                .unwrap(),
2513            "a"
2514        );
2515        assert_eq!(
2516            cards[1]
2517                .as_object()
2518                .unwrap()
2519                .get("CARD")
2520                .unwrap()
2521                .as_str()
2522                .unwrap(),
2523            "a"
2524        );
2525    }
2526
2527    #[test]
2528    fn test_card_with_body_containing_dashes() {
2529        let markdown = r#"---
2530CARD: items
2531name: Item
2532---
2533
2534Some text with --- dashes in it."#;
2535        let doc = decompose(markdown).unwrap();
2536        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2537        let item = cards[0].as_object().unwrap();
2538        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2539        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2540        assert!(body.contains("--- dashes"));
2541    }
2542
2543    // QUILL directive edge cases
2544
2545    #[test]
2546    fn test_quill_with_underscore_prefix() {
2547        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2548        let doc = decompose(markdown).unwrap();
2549        assert_eq!(doc.quill_reference().name, "_internal");
2550    }
2551
2552    #[test]
2553    fn test_quill_with_numbers() {
2554        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2555        let doc = decompose(markdown).unwrap();
2556        assert_eq!(doc.quill_reference().name, "form_8_v2");
2557    }
2558
2559    #[test]
2560    fn test_quill_with_additional_fields() {
2561        let markdown = r#"---
2562QUILL: my_quill
2563title: Document Title
2564author: John Doe
2565---
2566
2567Body content."#;
2568        let doc = decompose(markdown).unwrap();
2569        assert_eq!(doc.quill_reference().name, "my_quill");
2570        assert_eq!(
2571            doc.get_field("title").unwrap().as_str().unwrap(),
2572            "Document Title"
2573        );
2574        assert_eq!(
2575            doc.get_field("author").unwrap().as_str().unwrap(),
2576            "John Doe"
2577        );
2578    }
2579
2580    // Error handling
2581
2582    #[test]
2583    fn test_invalid_scope_name_uppercase() {
2584        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2585        let result = decompose(markdown);
2586        assert!(result.is_err());
2587        assert!(result
2588            .unwrap_err()
2589            .to_string()
2590            .contains("Invalid card field name"));
2591    }
2592
2593    #[test]
2594    fn test_invalid_scope_name_starts_with_number() {
2595        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2596        let result = decompose(markdown);
2597        assert!(result.is_err());
2598    }
2599
2600    #[test]
2601    fn test_invalid_scope_name_with_hyphen() {
2602        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2603        let result = decompose(markdown);
2604        assert!(result.is_err());
2605    }
2606
2607    #[test]
2608    fn test_invalid_quill_name_uppercase() {
2609        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2610        let result = decompose(markdown);
2611        assert!(result.is_err());
2612    }
2613
2614    #[test]
2615    fn test_yaml_syntax_error_missing_colon() {
2616        let markdown = "---\ntitle Test\n---\n\nBody.";
2617        let result = decompose(markdown);
2618        assert!(result.is_err());
2619    }
2620
2621    #[test]
2622    fn test_yaml_syntax_error_bad_indentation() {
2623        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2624        let result = decompose(markdown);
2625        // Bad indentation may or may not be an error depending on YAML parser
2626        // Just ensure it doesn't panic
2627        let _ = result;
2628    }
2629
2630    // Body extraction edge cases
2631
2632    #[test]
2633    fn test_body_with_leading_newlines() {
2634        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2635        let doc = decompose(markdown).unwrap();
2636        // Body should preserve leading newlines after frontmatter
2637        assert!(doc.body().unwrap().starts_with('\n'));
2638    }
2639
2640    #[test]
2641    fn test_body_with_trailing_newlines() {
2642        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2643        let doc = decompose(markdown).unwrap();
2644        // Body should preserve trailing newlines
2645        assert!(doc.body().unwrap().ends_with('\n'));
2646    }
2647
2648    #[test]
2649    fn test_no_body_after_frontmatter() {
2650        let markdown = "---\ntitle: Test\n---";
2651        let doc = decompose(markdown).unwrap();
2652        assert_eq!(doc.body(), Some(""));
2653    }
2654
2655    // Tag name validation
2656
2657    #[test]
2658    fn test_valid_tag_name_single_underscore() {
2659        assert!(is_valid_tag_name("_"));
2660    }
2661
2662    #[test]
2663    fn test_valid_tag_name_underscore_prefix() {
2664        assert!(is_valid_tag_name("_private"));
2665    }
2666
2667    #[test]
2668    fn test_valid_tag_name_with_numbers() {
2669        assert!(is_valid_tag_name("item1"));
2670        assert!(is_valid_tag_name("item_2"));
2671    }
2672
2673    #[test]
2674    fn test_invalid_tag_name_empty() {
2675        assert!(!is_valid_tag_name(""));
2676    }
2677
2678    #[test]
2679    fn test_invalid_tag_name_starts_with_number() {
2680        assert!(!is_valid_tag_name("1item"));
2681    }
2682
2683    #[test]
2684    fn test_invalid_tag_name_uppercase() {
2685        assert!(!is_valid_tag_name("Items"));
2686        assert!(!is_valid_tag_name("ITEMS"));
2687    }
2688
2689    #[test]
2690    fn test_invalid_tag_name_special_chars() {
2691        assert!(!is_valid_tag_name("my-items"));
2692        assert!(!is_valid_tag_name("my.items"));
2693        assert!(!is_valid_tag_name("my items"));
2694    }
2695
2696    // Guillemet preprocessing in YAML
2697
2698    #[test]
2699    fn test_guillemet_in_yaml_preserves_non_strings() {
2700        let markdown = r#"---
2701count: 42
2702price: 19.99
2703active: true
2704items:
2705  - first
2706  - 100
2707  - true
2708---
2709
2710Body."#;
2711        let doc = decompose(markdown).unwrap();
2712        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2713        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2714        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2715    }
2716
2717    #[test]
2718    fn test_guillemet_double_conversion_prevention() {
2719        // Ensure «» in input doesn't get double-processed
2720        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2721        let doc = decompose(markdown).unwrap();
2722        // Should remain as-is (not double-escaped)
2723        assert_eq!(
2724            doc.get_field("title").unwrap().as_str().unwrap(),
2725            "Already «converted»"
2726        );
2727    }
2728
2729    #[test]
2730    fn test_allowed_card_field_collision() {
2731        let markdown = r#"---
2732my_card: "some global value"
2733---
2734
2735---
2736CARD: my_card
2737title: "My Card"
2738---
2739Body
2740"#;
2741        // This should SUCCEED according to new PARSE.md
2742        let doc = decompose(markdown).unwrap();
2743
2744        // Verify global field exists
2745        assert_eq!(
2746            doc.get_field("my_card").unwrap().as_str().unwrap(),
2747            "some global value"
2748        );
2749
2750        // Verify Card exists in CARDS array
2751        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2752        assert!(!cards.is_empty());
2753        let card = cards
2754            .iter()
2755            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2756            .expect("Card not found");
2757        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2758    }
2759
2760    #[test]
2761    fn test_yaml_custom_tags_in_frontmatter() {
2762        // User-defined YAML tags like !fill should be accepted and ignored
2763        let markdown = r#"---
2764memo_from: !fill 2d lt example
2765regular_field: normal value
2766---
2767
2768Body content."#;
2769        let doc = decompose(markdown).unwrap();
2770
2771        // The tag !fill should be ignored, value parsed as string "2d lt example"
2772        assert_eq!(
2773            doc.get_field("memo_from").unwrap().as_str().unwrap(),
2774            "2d lt example"
2775        );
2776        // Regular fields should still work
2777        assert_eq!(
2778            doc.get_field("regular_field").unwrap().as_str().unwrap(),
2779            "normal value"
2780        );
2781        assert_eq!(doc.body(), Some("\nBody content."));
2782    }
2783
2784    /// Test the exact example from EXTENDED_MARKDOWN.md (lines 92-127)
2785    #[test]
2786    fn test_spec_example() {
2787        let markdown = r#"---
2788title: My Document
2789QUILL: blog_post
2790---
2791Main document body.
2792
2793***
2794
2795More content after horizontal rule.
2796
2797---
2798CARD: section
2799heading: Introduction
2800---
2801Introduction content.
2802
2803---
2804CARD: section
2805heading: Conclusion
2806---
2807Conclusion content.
2808"#;
2809
2810        let doc = decompose(markdown).unwrap();
2811
2812        // Verify global fields
2813        assert_eq!(
2814            doc.get_field("title").unwrap().as_str().unwrap(),
2815            "My Document"
2816        );
2817        assert_eq!(doc.quill_reference().name, "blog_post");
2818
2819        // Verify body contains horizontal rule (*** preserved)
2820        let body = doc.body().unwrap();
2821        assert!(body.contains("Main document body."));
2822        assert!(body.contains("***"));
2823        assert!(body.contains("More content after horizontal rule."));
2824
2825        // Verify CARDS array
2826        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2827        assert_eq!(cards.len(), 2);
2828
2829        // First card
2830        let card1 = cards[0].as_object().unwrap();
2831        assert_eq!(card1.get("CARD").unwrap().as_str().unwrap(), "section");
2832        assert_eq!(
2833            card1.get("heading").unwrap().as_str().unwrap(),
2834            "Introduction"
2835        );
2836        assert_eq!(
2837            card1.get("BODY").unwrap().as_str().unwrap(),
2838            "Introduction content.\n\n"
2839        );
2840
2841        // Second card
2842        let card2 = cards[1].as_object().unwrap();
2843        assert_eq!(card2.get("CARD").unwrap().as_str().unwrap(), "section");
2844        assert_eq!(
2845            card2.get("heading").unwrap().as_str().unwrap(),
2846            "Conclusion"
2847        );
2848        assert_eq!(
2849            card2.get("BODY").unwrap().as_str().unwrap(),
2850            "Conclusion content.\n"
2851        );
2852    }
2853}