quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "BODY";
55
56/// A parsed markdown document with frontmatter
57#[derive(Debug, Clone)]
58pub struct ParsedDocument {
59    fields: HashMap<String, QuillValue>,
60    quill_tag: String,
61}
62
63impl ParsedDocument {
64    /// Create a new ParsedDocument with the given fields
65    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
66        Self {
67            fields,
68            quill_tag: "__default__".to_string(),
69        }
70    }
71
72    /// Create a ParsedDocument from fields and quill tag
73    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
74        Self { fields, quill_tag }
75    }
76
77    /// Create a ParsedDocument from markdown string
78    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
79        decompose(markdown)
80    }
81
82    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
83    pub fn quill_tag(&self) -> &str {
84        &self.quill_tag
85    }
86
87    /// Get the document body
88    pub fn body(&self) -> Option<&str> {
89        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
90    }
91
92    /// Get a specific field
93    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
94        self.fields.get(name)
95    }
96
97    /// Get all fields (including body)
98    pub fn fields(&self) -> &HashMap<String, QuillValue> {
99        &self.fields
100    }
101
102    /// Create a new ParsedDocument with default values applied
103    ///
104    /// This method creates a new ParsedDocument with default values applied for any
105    /// fields that are missing from the original document but have defaults specified.
106    /// Existing fields are preserved and not overwritten.
107    ///
108    /// # Arguments
109    ///
110    /// * `defaults` - A HashMap of field names to their default QuillValues
111    ///
112    /// # Returns
113    ///
114    /// A new ParsedDocument with defaults applied for missing fields
115    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
116        let mut fields = self.fields.clone();
117
118        for (field_name, default_value) in defaults {
119            // Only apply default if field is missing
120            if !fields.contains_key(field_name) {
121                fields.insert(field_name.clone(), default_value.clone());
122            }
123        }
124
125        Self {
126            fields,
127            quill_tag: self.quill_tag.clone(),
128        }
129    }
130
131    /// Create a new ParsedDocument with coerced field values
132    ///
133    /// This method applies type coercions to field values based on the schema.
134    /// Coercions include:
135    /// - Singular values to arrays when schema expects array
136    /// - String "true"/"false" to boolean
137    /// - Numbers to boolean (0=false, non-zero=true)
138    /// - String numbers to number type
139    /// - Boolean to number (true=1, false=0)
140    ///
141    /// # Arguments
142    ///
143    /// * `schema` - A JSON Schema object defining expected field types
144    ///
145    /// # Returns
146    ///
147    /// A new ParsedDocument with coerced field values
148    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
149        use crate::schema::coerce_document;
150
151        let coerced_fields = coerce_document(schema, &self.fields);
152
153        Self {
154            fields: coerced_fields,
155            quill_tag: self.quill_tag.clone(),
156        }
157    }
158}
159
160#[derive(Debug)]
161struct MetadataBlock {
162    start: usize,                          // Position of opening "---"
163    end: usize,                            // Position after closing "---\n"
164    yaml_value: Option<serde_json::Value>, // Parsed YAML as JSON (None if empty or parse failed)
165    tag: Option<String>,                   // Field name from CARD key
166    quill_name: Option<String>,            // Quill name from QUILL key
167}
168
169/// Validate tag name follows pattern [a-z_][a-z0-9_]*
170fn is_valid_tag_name(name: &str) -> bool {
171    if name.is_empty() {
172        return false;
173    }
174
175    let mut chars = name.chars();
176    let first = chars.next().unwrap();
177
178    if !first.is_ascii_lowercase() && first != '_' {
179        return false;
180    }
181
182    for ch in chars {
183        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
184            return false;
185        }
186    }
187
188    true
189}
190
191/// Check if a position is inside a fenced code block
192///
193/// This uses strict fence detection per EXTENDED_MARKDOWN.md specification:
194/// - Only exactly 3 backticks (```) are valid fences
195/// - Tildes (~~~) are NOT treated as fences
196/// - 4+ backticks are NOT treated as fences
197fn is_inside_fenced_block(markdown: &str, pos: usize) -> bool {
198    let before = &markdown[..pos];
199    let mut in_fence = false;
200
201    // Check if document starts with exactly ```
202    if is_exact_fence_at(before, 0) {
203        in_fence = !in_fence;
204    }
205
206    // Scan for fence toggles after newlines
207    for (i, _) in before.match_indices('\n') {
208        if is_exact_fence_at(before, i + 1) {
209            in_fence = !in_fence;
210        }
211    }
212
213    in_fence
214}
215
216/// Check if position starts exactly 3 backticks (not 2, not 4+)
217///
218/// Strict specification: only exactly ``` is a valid fence marker.
219fn is_exact_fence_at(text: &str, pos: usize) -> bool {
220    if pos >= text.len() {
221        return false;
222    }
223    let remaining = &text[pos..];
224    if !remaining.starts_with("```") {
225        return false;
226    }
227    // Ensure it's exactly 3 backticks (4th char is not a backtick)
228    remaining.len() == 3 || remaining.as_bytes().get(3) != Some(&b'`')
229}
230
231/// Creates serde_saphyr Options with security budgets configured.
232///
233/// Uses MAX_YAML_DEPTH from error.rs to limit nesting depth at the parser level,
234/// which is more robust than heuristic-based pre-parse checks.
235fn yaml_parse_options() -> serde_saphyr::Options {
236    let budget = serde_saphyr::Budget {
237        max_depth: crate::error::MAX_YAML_DEPTH,
238        ..Default::default()
239    };
240    serde_saphyr::Options {
241        budget: Some(budget),
242        ..Default::default()
243    }
244}
245
246/// Find all metadata blocks in the document
247fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
248    let mut blocks = Vec::new();
249    let mut pos = 0;
250
251    while pos < markdown.len() {
252        // Look for opening "---\n" or "---\r\n"
253        let search_str = &markdown[pos..];
254        let delimiter_result = search_str
255            .find("---\n")
256            .map(|p| (p, 4, "\n"))
257            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
258
259        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
260            let abs_pos = pos + delimiter_pos;
261
262            // Check if the delimiter is at the start of a line
263            let is_start_of_line = if abs_pos == 0 {
264                true
265            } else {
266                let char_before = markdown.as_bytes()[abs_pos - 1];
267                char_before == b'\n' || char_before == b'\r'
268            };
269
270            if !is_start_of_line {
271                pos = abs_pos + 1;
272                continue;
273            }
274
275            // Skip if inside a fenced code block
276            if is_inside_fenced_block(markdown, abs_pos) {
277                pos = abs_pos + 3;
278                continue;
279            }
280
281            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
282
283            // Check if this --- is a horizontal rule (blank lines above AND below)
284            let preceded_by_blank = if abs_pos > 0 {
285                // Check if there's a blank line before the ---
286                let before = &markdown[..abs_pos];
287                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
288            } else {
289                false
290            };
291
292            let followed_by_blank = if content_start < markdown.len() {
293                markdown[content_start..].starts_with('\n')
294                    || markdown[content_start..].starts_with("\r\n")
295            } else {
296                false
297            };
298
299            // Horizontal rule: blank lines both above and below
300            if preceded_by_blank && followed_by_blank {
301                // This is a horizontal rule in the body, skip it
302                pos = abs_pos + 3; // Skip past "---"
303                continue;
304            }
305
306            // Check if followed by non-blank line (or if we're at document start)
307            // This starts a metadata block
308            if followed_by_blank {
309                // --- followed by blank line but NOT preceded by blank line
310                // This is NOT a metadata block opening, skip it
311                pos = abs_pos + 3;
312                continue;
313            }
314
315            // Found potential metadata block opening (followed by non-blank line)
316            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
317            let rest = &markdown[content_start..];
318
319            // First try to find delimiters with trailing newlines
320            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
321            let closing_with_newline = closing_patterns
322                .iter()
323                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
324                .min_by_key(|(p, _)| *p);
325
326            // Also check for closing at end of document (no trailing newline)
327            let closing_at_eof = ["\n---", "\r\n---"]
328                .iter()
329                .filter_map(|delim| {
330                    rest.find(delim).and_then(|p| {
331                        if p + delim.len() == rest.len() {
332                            Some((p, delim.len()))
333                        } else {
334                            None
335                        }
336                    })
337                })
338                .min_by_key(|(p, _)| *p);
339
340            let closing_result = match (closing_with_newline, closing_at_eof) {
341                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
342                (Some(_), Some(_)) => closing_with_newline,
343                (Some(_), None) => closing_with_newline,
344                (None, Some(_)) => closing_at_eof,
345                (None, None) => None,
346            };
347
348            if let Some((closing_pos, closing_len)) = closing_result {
349                let abs_closing_pos = content_start + closing_pos;
350                let content = &markdown[content_start..abs_closing_pos];
351
352                // Check YAML size limit
353                if content.len() > crate::error::MAX_YAML_SIZE {
354                    return Err(crate::error::ParseError::InputTooLarge {
355                        size: content.len(),
356                        max: crate::error::MAX_YAML_SIZE,
357                    });
358                }
359
360                // Parse YAML content to check for reserved keys (QUILL, CARD)
361                // Uses configured budget to limit nesting depth (prevents stack overflow)
362                // Normalize: treat whitespace-only content as empty frontmatter
363                let content = content.trim();
364                let (tag, quill_name, yaml_value) = if !content.is_empty() {
365                    // Try to parse the YAML with security budgets
366                    match serde_saphyr::from_str_with_options::<serde_json::Value>(
367                        content,
368                        yaml_parse_options(),
369                    ) {
370                        Ok(parsed_yaml) => {
371                            if let Some(mapping) = parsed_yaml.as_object() {
372                                let quill_key = "QUILL";
373                                let card_key = "CARD";
374
375                                let has_quill = mapping.contains_key(quill_key);
376                                let has_card = mapping.contains_key(card_key);
377
378                                if has_quill && has_card {
379                                    return Err(crate::error::ParseError::InvalidStructure(
380                                        "Cannot specify both QUILL and CARD in the same block"
381                                            .to_string(),
382                                    ));
383                                }
384
385                                // Check for reserved field names (BODY, CARDS)
386                                const RESERVED_FIELDS: &[&str] = &["BODY", "CARDS"];
387                                for reserved in RESERVED_FIELDS {
388                                    if mapping.contains_key(*reserved) {
389                                        return Err(crate::error::ParseError::InvalidStructure(
390                                            format!(
391                                                "Reserved field name '{}' cannot be used in YAML frontmatter",
392                                                reserved
393                                            ),
394                                        ));
395                                    }
396                                }
397
398                                if has_quill {
399                                    // Extract quill name
400                                    let quill_value = mapping.get(quill_key).unwrap();
401                                    let quill_name_str = quill_value
402                                        .as_str()
403                                        .ok_or("QUILL value must be a string")?;
404
405                                    if !is_valid_tag_name(quill_name_str) {
406                                        return Err(crate::error::ParseError::InvalidStructure(format!(
407                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
408                                            quill_name_str
409                                        )));
410                                    }
411
412                                    // Remove QUILL from the YAML value for processing
413                                    let mut new_mapping = mapping.clone();
414                                    new_mapping.remove(quill_key);
415                                    let new_value = if new_mapping.is_empty() {
416                                        None
417                                    } else {
418                                        Some(serde_json::Value::Object(new_mapping))
419                                    };
420
421                                    (None, Some(quill_name_str.to_string()), new_value)
422                                } else if has_card {
423                                    // Extract card field name
424                                    let card_value = mapping.get(card_key).unwrap();
425                                    let field_name =
426                                        card_value.as_str().ok_or("CARD value must be a string")?;
427
428                                    if !is_valid_tag_name(field_name) {
429                                        return Err(crate::error::ParseError::InvalidStructure(format!(
430                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
431                                            field_name
432                                        )));
433                                    }
434
435                                    // Remove CARD from the YAML value for processing
436                                    let mut new_mapping = mapping.clone();
437                                    new_mapping.remove(card_key);
438                                    let new_value = if new_mapping.is_empty() {
439                                        None
440                                    } else {
441                                        Some(serde_json::Value::Object(new_mapping))
442                                    };
443
444                                    (Some(field_name.to_string()), None, new_value)
445                                } else {
446                                    // No reserved keys, keep the parsed YAML
447                                    (None, None, Some(parsed_yaml))
448                                }
449                            } else {
450                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
451                                (None, None, Some(parsed_yaml))
452                            }
453                        }
454                        Err(e) => {
455                            // Calculate line number for the start of this block
456                            let block_start_line = markdown[..abs_pos].lines().count() + 1;
457                            return Err(crate::error::ParseError::YamlErrorWithLocation {
458                                message: e.to_string(),
459                                line: block_start_line,
460                                block_index: blocks.len(),
461                            });
462                        }
463                    }
464                } else {
465                    // Empty content
466                    (None, None, None)
467                };
468
469                blocks.push(MetadataBlock {
470                    start: abs_pos,
471                    end: abs_closing_pos + closing_len, // After closing delimiter
472                    yaml_value,
473                    tag,
474                    quill_name,
475                });
476
477                // Check card count limit to prevent memory exhaustion
478                if blocks.len() > crate::error::MAX_CARD_COUNT {
479                    return Err(crate::error::ParseError::InputTooLarge {
480                        size: blocks.len(),
481                        max: crate::error::MAX_CARD_COUNT,
482                    });
483                }
484
485                pos = abs_closing_pos + closing_len;
486            } else if abs_pos == 0 {
487                // Frontmatter started but not closed
488                return Err(crate::error::ParseError::InvalidStructure(
489                    "Frontmatter started but not closed with ---".to_string(),
490                ));
491            } else {
492                // Not a valid metadata block, skip this position
493                pos = abs_pos + 3;
494            }
495        } else {
496            break;
497        }
498    }
499
500    Ok(blocks)
501}
502
503/// Decompose markdown into frontmatter fields and body
504fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
505    // Check input size limit
506    if markdown.len() > crate::error::MAX_INPUT_SIZE {
507        return Err(crate::error::ParseError::InputTooLarge {
508            size: markdown.len(),
509            max: crate::error::MAX_INPUT_SIZE,
510        });
511    }
512
513    let mut fields = HashMap::new();
514
515    // Find all metadata blocks
516    let blocks = find_metadata_blocks(markdown)?;
517
518    if blocks.is_empty() {
519        // No metadata blocks, entire content is body
520        fields.insert(
521            BODY_FIELD.to_string(),
522            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
523        );
524        return Ok(ParsedDocument::new(fields));
525    }
526
527    // Collect all card items into unified CARDS array
528    let mut cards_array: Vec<serde_json::Value> = Vec::new();
529    let mut global_frontmatter_index: Option<usize> = None;
530    let mut quill_name: Option<String> = None;
531
532    // First pass: identify global frontmatter, quill directive, and validate
533    for (idx, block) in blocks.iter().enumerate() {
534        if idx == 0 {
535            // Top-level frontmatter: can have QUILL or neither (not considered a card)
536            if let Some(ref name) = block.quill_name {
537                quill_name = Some(name.clone());
538            }
539            // If it has neither QUILL nor CARD, it's global frontmatter
540            if block.tag.is_none() && block.quill_name.is_none() {
541                global_frontmatter_index = Some(idx);
542            }
543        } else {
544            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
545            if block.quill_name.is_some() {
546                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
547            }
548            if block.tag.is_none() {
549                // Inline block without CARD
550                return Err(crate::error::ParseError::missing_card_directive());
551            }
552        }
553    }
554
555    // Parse global frontmatter if present
556    if let Some(idx) = global_frontmatter_index {
557        let block = &blocks[idx];
558
559        // Get parsed JSON fields directly (already parsed in find_metadata_blocks)
560        let json_fields: HashMap<String, serde_json::Value> = match &block.yaml_value {
561            Some(serde_json::Value::Object(mapping)) => mapping
562                .iter()
563                .map(|(k, v)| (k.clone(), v.clone()))
564                .collect(),
565            Some(serde_json::Value::Null) => {
566                // Null value (from whitespace-only YAML) - treat as empty mapping
567                HashMap::new()
568            }
569            Some(_) => {
570                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
571                return Err(crate::error::ParseError::InvalidStructure(
572                    "Invalid YAML frontmatter: expected a mapping".to_string(),
573                ));
574            }
575            None => HashMap::new(),
576        };
577
578        // Convert JSON values to QuillValue at boundary
579        for (key, value) in json_fields {
580            fields.insert(key, QuillValue::from_json(value));
581        }
582    }
583
584    // Process blocks with quill directives
585    for block in &blocks {
586        if block.quill_name.is_some() {
587            // Quill directive blocks can have YAML content (becomes part of frontmatter)
588            if let Some(ref json_val) = block.yaml_value {
589                let json_fields: HashMap<String, serde_json::Value> = match json_val {
590                    serde_json::Value::Object(mapping) => mapping
591                        .iter()
592                        .map(|(k, v)| (k.clone(), v.clone()))
593                        .collect(),
594                    serde_json::Value::Null => {
595                        // Null value (from whitespace-only YAML) - treat as empty mapping
596                        HashMap::new()
597                    }
598                    _ => {
599                        return Err(crate::error::ParseError::InvalidStructure(
600                            "Invalid YAML in quill block: expected a mapping".to_string(),
601                        ));
602                    }
603                };
604
605                // Check for conflicts with existing fields
606                for key in json_fields.keys() {
607                    if fields.contains_key(key) {
608                        return Err(crate::error::ParseError::InvalidStructure(format!(
609                            "Name collision: quill block field '{}' conflicts with existing field",
610                            key
611                        )));
612                    }
613                }
614
615                // Convert JSON values to QuillValue at boundary
616                for (key, value) in json_fields {
617                    fields.insert(key, QuillValue::from_json(value));
618                }
619            }
620        }
621    }
622
623    // Parse tagged blocks (CARD blocks)
624    for (idx, block) in blocks.iter().enumerate() {
625        if let Some(ref tag_name) = block.tag {
626            // Get YAML metadata directly (already parsed in find_metadata_blocks)
627            // Get JSON metadata directly (already parsed in find_metadata_blocks)
628            let mut item_fields: serde_json::Map<String, serde_json::Value> =
629                match &block.yaml_value {
630                    Some(serde_json::Value::Object(mapping)) => mapping.clone(),
631                    Some(serde_json::Value::Null) => {
632                        // Null value (from whitespace-only YAML) - treat as empty mapping
633                        serde_json::Map::new()
634                    }
635                    Some(_) => {
636                        return Err(crate::error::ParseError::InvalidStructure(format!(
637                            "Invalid YAML in card block '{}': expected a mapping",
638                            tag_name
639                        )));
640                    }
641                    None => serde_json::Map::new(),
642                };
643
644            // Extract body for this card block
645            let body_start = block.end;
646            let body_end = if idx + 1 < blocks.len() {
647                blocks[idx + 1].start
648            } else {
649                markdown.len()
650            };
651            let body = &markdown[body_start..body_end];
652
653            // Add body to item fields
654            item_fields.insert(
655                BODY_FIELD.to_string(),
656                serde_json::Value::String(body.to_string()),
657            );
658
659            // Add CARD discriminator field
660            item_fields.insert(
661                "CARD".to_string(),
662                serde_json::Value::String(tag_name.clone()),
663            );
664
665            // Add to CARDS array
666            cards_array.push(serde_json::Value::Object(item_fields));
667        }
668    }
669
670    // Extract global body
671    // Body starts after global frontmatter or quill block (whichever comes first)
672    // Body ends at the first card block or EOF
673    let first_non_card_block_idx = blocks
674        .iter()
675        .position(|b| b.tag.is_none() && b.quill_name.is_none())
676        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
677
678    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
679        // Body starts after the first non-card block (global frontmatter or quill)
680        let start = blocks[idx].end;
681
682        // Body ends at the first card block after this, or EOF
683        let end = blocks
684            .iter()
685            .skip(idx + 1)
686            .find(|b| b.tag.is_some())
687            .map(|b| b.start)
688            .unwrap_or(markdown.len());
689
690        (start, end)
691    } else {
692        // No global frontmatter or quill block - body is everything before the first card block
693        let end = blocks
694            .iter()
695            .find(|b| b.tag.is_some())
696            .map(|b| b.start)
697            .unwrap_or(0);
698
699        (0, end)
700    };
701
702    let global_body = &markdown[body_start..body_end];
703
704    fields.insert(
705        BODY_FIELD.to_string(),
706        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
707    );
708
709    // Always add CARDS array to fields (may be empty)
710    fields.insert(
711        "CARDS".to_string(),
712        QuillValue::from_json(serde_json::Value::Array(cards_array)),
713    );
714
715    // Check field count limit to prevent memory exhaustion
716    if fields.len() > crate::error::MAX_FIELD_COUNT {
717        return Err(crate::error::ParseError::InputTooLarge {
718            size: fields.len(),
719            max: crate::error::MAX_FIELD_COUNT,
720        });
721    }
722
723    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
724    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
725
726    Ok(parsed)
727}
728
729#[cfg(test)]
730mod tests {
731    use super::*;
732
733    #[test]
734    fn test_no_frontmatter() {
735        let markdown = "# Hello World\n\nThis is a test.";
736        let doc = decompose(markdown).unwrap();
737
738        assert_eq!(doc.body(), Some(markdown));
739        assert_eq!(doc.fields().len(), 1);
740        // Verify default quill tag is set
741        assert_eq!(doc.quill_tag(), "__default__");
742    }
743
744    #[test]
745    fn test_with_frontmatter() {
746        let markdown = r#"---
747title: Test Document
748author: Test Author
749---
750
751# Hello World
752
753This is the body."#;
754
755        let doc = decompose(markdown).unwrap();
756
757        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
758        assert_eq!(
759            doc.get_field("title").unwrap().as_str().unwrap(),
760            "Test Document"
761        );
762        assert_eq!(
763            doc.get_field("author").unwrap().as_str().unwrap(),
764            "Test Author"
765        );
766        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
767                                           // Verify default quill tag is set when no QUILL directive
768        assert_eq!(doc.quill_tag(), "__default__");
769    }
770
771    #[test]
772    fn test_whitespace_frontmatter() {
773        // Frontmatter with only whitespace should be treated as empty/valid
774        // and not error out or be treated as null YAML
775        let markdown = "---\n   \n---\n\n# Hello";
776        let doc = decompose(markdown).unwrap();
777
778        assert_eq!(doc.body(), Some("\n# Hello"));
779        // Should have default fields (BODY + CARDS) but no others
780        // (unless defaults are applied later, but decompose returns basics)
781        assert!(doc.get_field("title").is_none());
782        assert_eq!(doc.fields().len(), 2); // BODY, CARDS
783    }
784
785    #[test]
786    fn test_complex_yaml_frontmatter() {
787        let markdown = r#"---
788title: Complex Document
789tags:
790  - test
791  - yaml
792metadata:
793  version: 1.0
794  nested:
795    field: value
796---
797
798Content here."#;
799
800        let doc = decompose(markdown).unwrap();
801
802        assert_eq!(doc.body(), Some("\nContent here."));
803        assert_eq!(
804            doc.get_field("title").unwrap().as_str().unwrap(),
805            "Complex Document"
806        );
807
808        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
809        assert_eq!(tags.len(), 2);
810        assert_eq!(tags[0].as_str().unwrap(), "test");
811        assert_eq!(tags[1].as_str().unwrap(), "yaml");
812    }
813
814    #[test]
815    fn test_with_defaults_empty_document() {
816        use std::collections::HashMap;
817
818        let mut defaults = HashMap::new();
819        defaults.insert(
820            "status".to_string(),
821            QuillValue::from_json(serde_json::json!("draft")),
822        );
823        defaults.insert(
824            "version".to_string(),
825            QuillValue::from_json(serde_json::json!(1)),
826        );
827
828        // Create an empty parsed document
829        let doc = ParsedDocument::new(HashMap::new());
830        let doc_with_defaults = doc.with_defaults(&defaults);
831
832        // Check that defaults were applied
833        assert_eq!(
834            doc_with_defaults
835                .get_field("status")
836                .unwrap()
837                .as_str()
838                .unwrap(),
839            "draft"
840        );
841        assert_eq!(
842            doc_with_defaults
843                .get_field("version")
844                .unwrap()
845                .as_number()
846                .unwrap()
847                .as_i64()
848                .unwrap(),
849            1
850        );
851    }
852
853    #[test]
854    fn test_with_defaults_preserves_existing_values() {
855        use std::collections::HashMap;
856
857        let mut defaults = HashMap::new();
858        defaults.insert(
859            "status".to_string(),
860            QuillValue::from_json(serde_json::json!("draft")),
861        );
862
863        // Create document with existing status
864        let mut fields = HashMap::new();
865        fields.insert(
866            "status".to_string(),
867            QuillValue::from_json(serde_json::json!("published")),
868        );
869        let doc = ParsedDocument::new(fields);
870
871        let doc_with_defaults = doc.with_defaults(&defaults);
872
873        // Existing value should be preserved
874        assert_eq!(
875            doc_with_defaults
876                .get_field("status")
877                .unwrap()
878                .as_str()
879                .unwrap(),
880            "published"
881        );
882    }
883
884    #[test]
885    fn test_with_defaults_partial_application() {
886        use std::collections::HashMap;
887
888        let mut defaults = HashMap::new();
889        defaults.insert(
890            "status".to_string(),
891            QuillValue::from_json(serde_json::json!("draft")),
892        );
893        defaults.insert(
894            "version".to_string(),
895            QuillValue::from_json(serde_json::json!(1)),
896        );
897
898        // Create document with only one field
899        let mut fields = HashMap::new();
900        fields.insert(
901            "status".to_string(),
902            QuillValue::from_json(serde_json::json!("published")),
903        );
904        let doc = ParsedDocument::new(fields);
905
906        let doc_with_defaults = doc.with_defaults(&defaults);
907
908        // Existing field preserved, missing field gets default
909        assert_eq!(
910            doc_with_defaults
911                .get_field("status")
912                .unwrap()
913                .as_str()
914                .unwrap(),
915            "published"
916        );
917        assert_eq!(
918            doc_with_defaults
919                .get_field("version")
920                .unwrap()
921                .as_number()
922                .unwrap()
923                .as_i64()
924                .unwrap(),
925            1
926        );
927    }
928
929    #[test]
930    fn test_with_defaults_no_defaults() {
931        use std::collections::HashMap;
932
933        let defaults = HashMap::new(); // Empty defaults map
934
935        let doc = ParsedDocument::new(HashMap::new());
936        let doc_with_defaults = doc.with_defaults(&defaults);
937
938        // No defaults should be applied
939        assert!(doc_with_defaults.fields().is_empty());
940    }
941
942    #[test]
943    fn test_with_defaults_complex_types() {
944        use std::collections::HashMap;
945
946        let mut defaults = HashMap::new();
947        defaults.insert(
948            "tags".to_string(),
949            QuillValue::from_json(serde_json::json!(["default", "tag"])),
950        );
951
952        let doc = ParsedDocument::new(HashMap::new());
953        let doc_with_defaults = doc.with_defaults(&defaults);
954
955        // Complex default value should be applied
956        let tags = doc_with_defaults
957            .get_field("tags")
958            .unwrap()
959            .as_sequence()
960            .unwrap();
961        assert_eq!(tags.len(), 2);
962        assert_eq!(tags[0].as_str().unwrap(), "default");
963        assert_eq!(tags[1].as_str().unwrap(), "tag");
964    }
965
966    #[test]
967    fn test_with_coercion_singular_to_array() {
968        use std::collections::HashMap;
969
970        let schema = QuillValue::from_json(serde_json::json!({
971            "$schema": "https://json-schema.org/draft/2019-09/schema",
972            "type": "object",
973            "properties": {
974                "tags": {"type": "array"}
975            }
976        }));
977
978        let mut fields = HashMap::new();
979        fields.insert(
980            "tags".to_string(),
981            QuillValue::from_json(serde_json::json!("single-tag")),
982        );
983        let doc = ParsedDocument::new(fields);
984
985        let coerced_doc = doc.with_coercion(&schema);
986
987        let tags = coerced_doc.get_field("tags").unwrap();
988        assert!(tags.as_array().is_some());
989        let tags_array = tags.as_array().unwrap();
990        assert_eq!(tags_array.len(), 1);
991        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
992    }
993
994    #[test]
995    fn test_with_coercion_string_to_boolean() {
996        use std::collections::HashMap;
997
998        let schema = QuillValue::from_json(serde_json::json!({
999            "$schema": "https://json-schema.org/draft/2019-09/schema",
1000            "type": "object",
1001            "properties": {
1002                "active": {"type": "boolean"}
1003            }
1004        }));
1005
1006        let mut fields = HashMap::new();
1007        fields.insert(
1008            "active".to_string(),
1009            QuillValue::from_json(serde_json::json!("true")),
1010        );
1011        let doc = ParsedDocument::new(fields);
1012
1013        let coerced_doc = doc.with_coercion(&schema);
1014
1015        assert!(coerced_doc.get_field("active").unwrap().as_bool().unwrap());
1016    }
1017
1018    #[test]
1019    fn test_with_coercion_string_to_number() {
1020        use std::collections::HashMap;
1021
1022        let schema = QuillValue::from_json(serde_json::json!({
1023            "$schema": "https://json-schema.org/draft/2019-09/schema",
1024            "type": "object",
1025            "properties": {
1026                "count": {"type": "number"}
1027            }
1028        }));
1029
1030        let mut fields = HashMap::new();
1031        fields.insert(
1032            "count".to_string(),
1033            QuillValue::from_json(serde_json::json!("42")),
1034        );
1035        let doc = ParsedDocument::new(fields);
1036
1037        let coerced_doc = doc.with_coercion(&schema);
1038
1039        assert_eq!(
1040            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1041            42
1042        );
1043    }
1044
1045    #[test]
1046    fn test_invalid_yaml() {
1047        let markdown = r#"---
1048title: [invalid yaml
1049author: missing close bracket
1050---
1051
1052Content here."#;
1053
1054        let result = decompose(markdown);
1055        assert!(result.is_err());
1056        // Error message now includes location context
1057        assert!(result.unwrap_err().to_string().contains("YAML error"));
1058    }
1059
1060    #[test]
1061    fn test_unclosed_frontmatter() {
1062        let markdown = r#"---
1063title: Test
1064author: Test Author
1065
1066Content without closing ---"#;
1067
1068        let result = decompose(markdown);
1069        assert!(result.is_err());
1070        assert!(result.unwrap_err().to_string().contains("not closed"));
1071    }
1072
1073    // Extended metadata tests
1074
1075    #[test]
1076    fn test_basic_tagged_block() {
1077        let markdown = r#"---
1078title: Main Document
1079---
1080
1081Main body content.
1082
1083---
1084CARD: items
1085name: Item 1
1086---
1087
1088Body of item 1."#;
1089
1090        let doc = decompose(markdown).unwrap();
1091
1092        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1093        assert_eq!(
1094            doc.get_field("title").unwrap().as_str().unwrap(),
1095            "Main Document"
1096        );
1097
1098        // Cards are now in CARDS array with CARD discriminator
1099        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1100        assert_eq!(cards.len(), 1);
1101
1102        let item = cards[0].as_object().unwrap();
1103        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1104        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1105        assert_eq!(
1106            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1107            "\nBody of item 1."
1108        );
1109    }
1110
1111    #[test]
1112    fn test_multiple_tagged_blocks() {
1113        let markdown = r#"---
1114CARD: items
1115name: Item 1
1116tags: [a, b]
1117---
1118
1119First item body.
1120
1121---
1122CARD: items
1123name: Item 2
1124tags: [c, d]
1125---
1126
1127Second item body."#;
1128
1129        let doc = decompose(markdown).unwrap();
1130
1131        // Cards are in CARDS array
1132        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1133        assert_eq!(cards.len(), 2);
1134
1135        let item1 = cards[0].as_object().unwrap();
1136        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1137        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1138
1139        let item2 = cards[1].as_object().unwrap();
1140        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1141        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1142    }
1143
1144    #[test]
1145    fn test_mixed_global_and_tagged() {
1146        let markdown = r#"---
1147title: Global
1148author: John Doe
1149---
1150
1151Global body.
1152
1153---
1154CARD: sections
1155title: Section 1
1156---
1157
1158Section 1 content.
1159
1160---
1161CARD: sections
1162title: Section 2
1163---
1164
1165Section 2 content."#;
1166
1167        let doc = decompose(markdown).unwrap();
1168
1169        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1170        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1171
1172        // Cards are in unified CARDS array
1173        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1174        assert_eq!(cards.len(), 2);
1175        assert_eq!(
1176            cards[0]
1177                .as_object()
1178                .unwrap()
1179                .get("CARD")
1180                .unwrap()
1181                .as_str()
1182                .unwrap(),
1183            "sections"
1184        );
1185    }
1186
1187    #[test]
1188    fn test_empty_tagged_metadata() {
1189        let markdown = r#"---
1190CARD: items
1191---
1192
1193Body without metadata."#;
1194
1195        let doc = decompose(markdown).unwrap();
1196
1197        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1198        assert_eq!(cards.len(), 1);
1199
1200        let item = cards[0].as_object().unwrap();
1201        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1202        assert_eq!(
1203            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1204            "\nBody without metadata."
1205        );
1206    }
1207
1208    #[test]
1209    fn test_tagged_block_without_body() {
1210        let markdown = r#"---
1211CARD: items
1212name: Item
1213---"#;
1214
1215        let doc = decompose(markdown).unwrap();
1216
1217        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1218        assert_eq!(cards.len(), 1);
1219
1220        let item = cards[0].as_object().unwrap();
1221        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1222        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1223    }
1224
1225    #[test]
1226    fn test_name_collision_global_and_tagged() {
1227        let markdown = r#"---
1228items: "global value"
1229---
1230
1231Body
1232
1233---
1234CARD: items
1235name: Item
1236---
1237
1238Item body"#;
1239
1240        let result = decompose(markdown);
1241        assert!(result.is_ok(), "Name collision should be allowed now");
1242    }
1243
1244    #[test]
1245    fn test_card_name_collision_with_array_field() {
1246        // CARD type names CAN now conflict with frontmatter field names
1247        let markdown = r#"---
1248items:
1249  - name: Global Item 1
1250    value: 100
1251---
1252
1253Global body
1254
1255---
1256CARD: items
1257name: Scope Item 1
1258---
1259
1260Scope item 1 body"#;
1261
1262        let result = decompose(markdown);
1263        assert!(
1264            result.is_ok(),
1265            "Collision with array field should be allowed"
1266        );
1267    }
1268
1269    #[test]
1270    fn test_empty_global_array_with_card() {
1271        // CARD type names CAN now conflict with frontmatter field names
1272        let markdown = r#"---
1273items: []
1274---
1275
1276Global body
1277
1278---
1279CARD: items
1280name: Item 1
1281---
1282
1283Item 1 body"#;
1284
1285        let result = decompose(markdown);
1286        assert!(
1287            result.is_ok(),
1288            "Collision with empty array field should be allowed"
1289        );
1290    }
1291
1292    #[test]
1293    fn test_reserved_field_body_rejected() {
1294        let markdown = r#"---
1295CARD: section
1296BODY: Test
1297---"#;
1298
1299        let result = decompose(markdown);
1300        assert!(result.is_err(), "BODY is a reserved field name");
1301        assert!(result
1302            .unwrap_err()
1303            .to_string()
1304            .contains("Reserved field name"));
1305    }
1306
1307    #[test]
1308    fn test_reserved_field_cards_rejected() {
1309        let markdown = r#"---
1310title: Test
1311CARDS: []
1312---"#;
1313
1314        let result = decompose(markdown);
1315        assert!(result.is_err(), "CARDS is a reserved field name");
1316        assert!(result
1317            .unwrap_err()
1318            .to_string()
1319            .contains("Reserved field name"));
1320    }
1321
1322    #[test]
1323    fn test_delimiter_inside_fenced_code_block_backticks() {
1324        let markdown = r#"---
1325title: Test
1326---
1327Here is some code:
1328
1329```yaml
1330---
1331fake: frontmatter
1332---
1333```
1334
1335More content.
1336"#;
1337
1338        let doc = decompose(markdown).unwrap();
1339        // The --- inside the code block should NOT be parsed as metadata
1340        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1341        assert!(doc.get_field("fake").is_none());
1342    }
1343
1344    #[test]
1345    fn test_tildes_are_not_fences() {
1346        // Per EXTENDED_MARKDOWN.md: tildes (~~~) are NOT treated as fences
1347        // So --- inside ~~~ WILL be parsed as a metadata block
1348        let markdown = r#"---
1349title: Test
1350---
1351Here is some code:
1352
1353~~~yaml
1354---
1355CARD: code_example
1356fake: frontmatter
1357---
1358~~~
1359
1360More content.
1361"#;
1362
1363        let doc = decompose(markdown).unwrap();
1364        // The --- should be parsed as a CARD block since tildes aren't fences
1365        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1366        assert_eq!(cards.len(), 1);
1367        assert_eq!(
1368            cards[0].get("fake").unwrap().as_str().unwrap(),
1369            "frontmatter"
1370        );
1371    }
1372
1373    #[test]
1374    fn test_four_backticks_are_not_fences() {
1375        // Per EXTENDED_MARKDOWN.md: only exactly 3 backticks are valid fences
1376        // 4+ backticks are NOT treated as fences
1377        let markdown = r#"---
1378title: Test
1379---
1380Here is some code:
1381
1382````yaml
1383---
1384CARD: code_example
1385fake: frontmatter
1386---
1387````
1388
1389More content.
1390"#;
1391
1392        let doc = decompose(markdown).unwrap();
1393        // The --- should be parsed as a CARD block since 4 backticks aren't a fence
1394        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1395        assert_eq!(cards.len(), 1);
1396        assert_eq!(
1397            cards[0].get("fake").unwrap().as_str().unwrap(),
1398            "frontmatter"
1399        );
1400    }
1401
1402    #[test]
1403    fn test_invalid_tag_syntax() {
1404        let markdown = r#"---
1405CARD: Invalid-Name
1406title: Test
1407---"#;
1408
1409        let result = decompose(markdown);
1410        assert!(result.is_err());
1411        assert!(result
1412            .unwrap_err()
1413            .to_string()
1414            .contains("Invalid card field name"));
1415    }
1416
1417    #[test]
1418    fn test_multiple_global_frontmatter_blocks() {
1419        let markdown = r#"---
1420title: First
1421---
1422
1423Body
1424
1425---
1426author: Second
1427---
1428
1429More body"#;
1430
1431        let result = decompose(markdown);
1432        assert!(result.is_err());
1433
1434        // Verify the error message contains CARD hint
1435        let err = result.unwrap_err();
1436        let err_str = err.to_string();
1437        assert!(
1438            err_str.contains("CARD"),
1439            "Error should mention CARD directive: {}",
1440            err_str
1441        );
1442        assert!(
1443            err_str.contains("missing"),
1444            "Error should indicate missing directive: {}",
1445            err_str
1446        );
1447    }
1448
1449    #[test]
1450    fn test_adjacent_blocks_different_tags() {
1451        let markdown = r#"---
1452CARD: items
1453name: Item 1
1454---
1455
1456Item 1 body
1457
1458---
1459CARD: sections
1460title: Section 1
1461---
1462
1463Section 1 body"#;
1464
1465        let doc = decompose(markdown).unwrap();
1466
1467        // All cards in unified CARDS array
1468        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1469        assert_eq!(cards.len(), 2);
1470
1471        // First card is "items" type
1472        let item = cards[0].as_object().unwrap();
1473        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1474        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1475
1476        // Second card is "sections" type
1477        let section = cards[1].as_object().unwrap();
1478        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1479        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1480    }
1481
1482    #[test]
1483    fn test_order_preservation() {
1484        let markdown = r#"---
1485CARD: items
1486id: 1
1487---
1488
1489First
1490
1491---
1492CARD: items
1493id: 2
1494---
1495
1496Second
1497
1498---
1499CARD: items
1500id: 3
1501---
1502
1503Third"#;
1504
1505        let doc = decompose(markdown).unwrap();
1506
1507        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1508        assert_eq!(cards.len(), 3);
1509
1510        for (i, card) in cards.iter().enumerate() {
1511            let mapping = card.as_object().unwrap();
1512            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1513            let id = mapping.get("id").unwrap().as_i64().unwrap();
1514            assert_eq!(id, (i + 1) as i64);
1515        }
1516    }
1517
1518    #[test]
1519    fn test_product_catalog_integration() {
1520        let markdown = r#"---
1521title: Product Catalog
1522author: John Doe
1523date: 2024-01-01
1524---
1525
1526This is the main catalog description.
1527
1528---
1529CARD: products
1530name: Widget A
1531price: 19.99
1532sku: WID-001
1533---
1534
1535The **Widget A** is our most popular product.
1536
1537---
1538CARD: products
1539name: Gadget B
1540price: 29.99
1541sku: GAD-002
1542---
1543
1544The **Gadget B** is perfect for professionals.
1545
1546---
1547CARD: reviews
1548product: Widget A
1549rating: 5
1550---
1551
1552"Excellent product! Highly recommended."
1553
1554---
1555CARD: reviews
1556product: Gadget B
1557rating: 4
1558---
1559
1560"Very good, but a bit pricey.""#;
1561
1562        let doc = decompose(markdown).unwrap();
1563
1564        // Verify global fields
1565        assert_eq!(
1566            doc.get_field("title").unwrap().as_str().unwrap(),
1567            "Product Catalog"
1568        );
1569        assert_eq!(
1570            doc.get_field("author").unwrap().as_str().unwrap(),
1571            "John Doe"
1572        );
1573        assert_eq!(
1574            doc.get_field("date").unwrap().as_str().unwrap(),
1575            "2024-01-01"
1576        );
1577
1578        // Verify global body
1579        assert!(doc.body().unwrap().contains("main catalog description"));
1580
1581        // All cards in unified CARDS array
1582        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1583        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1584
1585        // First 2 are products
1586        let product1 = cards[0].as_object().unwrap();
1587        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1588        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1589        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1590
1591        let product2 = cards[1].as_object().unwrap();
1592        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1593        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1594
1595        // Last 2 are reviews
1596        let review1 = cards[2].as_object().unwrap();
1597        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1598        assert_eq!(
1599            review1.get("product").unwrap().as_str().unwrap(),
1600            "Widget A"
1601        );
1602        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1603
1604        // Total fields: title, author, date, body, CARDS = 5
1605        assert_eq!(doc.fields().len(), 5);
1606    }
1607
1608    #[test]
1609    fn taro_quill_directive() {
1610        let markdown = r#"---
1611QUILL: usaf_memo
1612memo_for: [ORG/SYMBOL]
1613memo_from: [ORG/SYMBOL]
1614---
1615
1616This is the memo body."#;
1617
1618        let doc = decompose(markdown).unwrap();
1619
1620        // Verify quill tag is set
1621        assert_eq!(doc.quill_tag(), "usaf_memo");
1622
1623        // Verify fields from quill block become frontmatter
1624        assert_eq!(
1625            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1626                .as_str()
1627                .unwrap(),
1628            "ORG/SYMBOL"
1629        );
1630
1631        // Verify body
1632        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1633    }
1634
1635    #[test]
1636    fn test_quill_with_card_blocks() {
1637        let markdown = r#"---
1638QUILL: document
1639title: Test Document
1640---
1641
1642Main body.
1643
1644---
1645CARD: sections
1646name: Section 1
1647---
1648
1649Section 1 body."#;
1650
1651        let doc = decompose(markdown).unwrap();
1652
1653        // Verify quill tag
1654        assert_eq!(doc.quill_tag(), "document");
1655
1656        // Verify global field from quill block
1657        assert_eq!(
1658            doc.get_field("title").unwrap().as_str().unwrap(),
1659            "Test Document"
1660        );
1661
1662        // Verify card blocks work via CARDS array
1663        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1664        assert_eq!(cards.len(), 1);
1665        assert_eq!(
1666            cards[0]
1667                .as_object()
1668                .unwrap()
1669                .get("CARD")
1670                .unwrap()
1671                .as_str()
1672                .unwrap(),
1673            "sections"
1674        );
1675
1676        // Verify body
1677        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1678    }
1679
1680    #[test]
1681    fn test_multiple_quill_directives_error() {
1682        let markdown = r#"---
1683QUILL: first
1684---
1685
1686---
1687QUILL: second
1688---"#;
1689
1690        let result = decompose(markdown);
1691        assert!(result.is_err());
1692        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1693        assert!(result
1694            .unwrap_err()
1695            .to_string()
1696            .contains("top-level frontmatter"));
1697    }
1698
1699    #[test]
1700    fn test_invalid_quill_name() {
1701        let markdown = r#"---
1702QUILL: Invalid-Name
1703---"#;
1704
1705        let result = decompose(markdown);
1706        assert!(result.is_err());
1707        assert!(result
1708            .unwrap_err()
1709            .to_string()
1710            .contains("Invalid quill name"));
1711    }
1712
1713    #[test]
1714    fn test_quill_wrong_value_type() {
1715        let markdown = r#"---
1716QUILL: 123
1717---"#;
1718
1719        let result = decompose(markdown);
1720        assert!(result.is_err());
1721        assert!(result
1722            .unwrap_err()
1723            .to_string()
1724            .contains("QUILL value must be a string"));
1725    }
1726
1727    #[test]
1728    fn test_card_wrong_value_type() {
1729        let markdown = r#"---
1730CARD: 123
1731---"#;
1732
1733        let result = decompose(markdown);
1734        assert!(result.is_err());
1735        assert!(result
1736            .unwrap_err()
1737            .to_string()
1738            .contains("CARD value must be a string"));
1739    }
1740
1741    #[test]
1742    fn test_both_quill_and_card_error() {
1743        let markdown = r#"---
1744QUILL: test
1745CARD: items
1746---"#;
1747
1748        let result = decompose(markdown);
1749        assert!(result.is_err());
1750        assert!(result
1751            .unwrap_err()
1752            .to_string()
1753            .contains("Cannot specify both QUILL and CARD"));
1754    }
1755
1756    #[test]
1757    fn test_blank_lines_in_frontmatter() {
1758        // New parsing standard: blank lines are allowed within YAML blocks
1759        let markdown = r#"---
1760title: Test Document
1761author: Test Author
1762
1763description: This has a blank line above it
1764tags:
1765  - one
1766  - two
1767---
1768
1769# Hello World
1770
1771This is the body."#;
1772
1773        let doc = decompose(markdown).unwrap();
1774
1775        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1776        assert_eq!(
1777            doc.get_field("title").unwrap().as_str().unwrap(),
1778            "Test Document"
1779        );
1780        assert_eq!(
1781            doc.get_field("author").unwrap().as_str().unwrap(),
1782            "Test Author"
1783        );
1784        assert_eq!(
1785            doc.get_field("description").unwrap().as_str().unwrap(),
1786            "This has a blank line above it"
1787        );
1788
1789        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1790        assert_eq!(tags.len(), 2);
1791    }
1792
1793    #[test]
1794    fn test_blank_lines_in_scope_blocks() {
1795        // Blank lines should be allowed in CARD blocks too
1796        let markdown = r#"---
1797CARD: items
1798name: Item 1
1799
1800price: 19.99
1801
1802tags:
1803  - electronics
1804  - gadgets
1805---
1806
1807Body of item 1."#;
1808
1809        let doc = decompose(markdown).unwrap();
1810
1811        // Cards are in CARDS array
1812        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1813        assert_eq!(cards.len(), 1);
1814
1815        let item = cards[0].as_object().unwrap();
1816        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1817        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1818        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1819
1820        let tags = item.get("tags").unwrap().as_array().unwrap();
1821        assert_eq!(tags.len(), 2);
1822    }
1823
1824    #[test]
1825    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1826        // Horizontal rule: blank lines both above AND below the ---
1827        let markdown = r#"---
1828title: Test
1829---
1830
1831First paragraph.
1832
1833---
1834
1835Second paragraph."#;
1836
1837        let doc = decompose(markdown).unwrap();
1838
1839        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1840
1841        // The body should contain the horizontal rule (---) as part of the content
1842        let body = doc.body().unwrap();
1843        assert!(body.contains("First paragraph."));
1844        assert!(body.contains("---"));
1845        assert!(body.contains("Second paragraph."));
1846    }
1847
1848    #[test]
1849    fn test_horizontal_rule_not_preceded_by_blank() {
1850        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1851        // It's also NOT a valid metadata block opening (since it's followed by blank)
1852        let markdown = r#"---
1853title: Test
1854---
1855
1856First paragraph.
1857---
1858
1859Second paragraph."#;
1860
1861        let doc = decompose(markdown).unwrap();
1862
1863        let body = doc.body().unwrap();
1864        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1865        assert!(body.contains("---"));
1866    }
1867
1868    #[test]
1869    fn test_multiple_blank_lines_in_yaml() {
1870        // Multiple blank lines should also be allowed
1871        let markdown = r#"---
1872title: Test
1873
1874
1875author: John Doe
1876
1877
1878version: 1.0
1879---
1880
1881Body content."#;
1882
1883        let doc = decompose(markdown).unwrap();
1884
1885        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1886        assert_eq!(
1887            doc.get_field("author").unwrap().as_str().unwrap(),
1888            "John Doe"
1889        );
1890        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1891    }
1892
1893    #[test]
1894    fn test_html_comment_interaction() {
1895        let markdown = r#"<!---
1896---> the rest of the page content
1897
1898---
1899key: value
1900---
1901"#;
1902        let doc = decompose(markdown).unwrap();
1903
1904        // The comment should be ignored (or at least not cause a parse error)
1905        // The frontmatter should be parsed
1906        let key = doc.get_field("key").and_then(|v| v.as_str());
1907        assert_eq!(key, Some("value"));
1908    }
1909}
1910#[cfg(test)]
1911mod demo_file_test {
1912    use super::*;
1913
1914    #[test]
1915    fn test_extended_metadata_demo_file() {
1916        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1917        let doc = decompose(markdown).unwrap();
1918
1919        // Verify global fields
1920        assert_eq!(
1921            doc.get_field("title").unwrap().as_str().unwrap(),
1922            "Extended Metadata Demo"
1923        );
1924        assert_eq!(
1925            doc.get_field("author").unwrap().as_str().unwrap(),
1926            "Quillmark Team"
1927        );
1928        // version is parsed as a number by YAML
1929        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1930
1931        // Verify body
1932        assert!(doc
1933            .body()
1934            .unwrap()
1935            .contains("extended YAML metadata standard"));
1936
1937        // All cards are now in unified CARDS array
1938        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1939        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1940
1941        // Count features and use_cases cards
1942        let features_count = cards
1943            .iter()
1944            .filter(|c| {
1945                c.as_object()
1946                    .unwrap()
1947                    .get("CARD")
1948                    .unwrap()
1949                    .as_str()
1950                    .unwrap()
1951                    == "features"
1952            })
1953            .count();
1954        let use_cases_count = cards
1955            .iter()
1956            .filter(|c| {
1957                c.as_object()
1958                    .unwrap()
1959                    .get("CARD")
1960                    .unwrap()
1961                    .as_str()
1962                    .unwrap()
1963                    == "use_cases"
1964            })
1965            .count();
1966        assert_eq!(features_count, 3);
1967        assert_eq!(use_cases_count, 2);
1968
1969        // Check first card is a feature
1970        let feature1 = cards[0].as_object().unwrap();
1971        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1972        assert_eq!(
1973            feature1.get("name").unwrap().as_str().unwrap(),
1974            "Tag Directives"
1975        );
1976    }
1977
1978    #[test]
1979    fn test_input_size_limit() {
1980        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1981        let size = crate::error::MAX_INPUT_SIZE + 1;
1982        let large_markdown = "a".repeat(size);
1983
1984        let result = decompose(&large_markdown);
1985        assert!(result.is_err());
1986
1987        let err_msg = result.unwrap_err().to_string();
1988        assert!(err_msg.contains("Input too large"));
1989    }
1990
1991    #[test]
1992    fn test_yaml_size_limit() {
1993        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1994        let mut markdown = String::from("---\n");
1995
1996        // Create a very large YAML field
1997        let size = crate::error::MAX_YAML_SIZE + 1;
1998        markdown.push_str("data: \"");
1999        markdown.push_str(&"x".repeat(size));
2000        markdown.push_str("\"\n---\n\nBody");
2001
2002        let result = decompose(&markdown);
2003        assert!(result.is_err());
2004
2005        let err_msg = result.unwrap_err().to_string();
2006        assert!(err_msg.contains("Input too large"));
2007    }
2008
2009    #[test]
2010    fn test_input_within_size_limit() {
2011        // Create markdown just under the limit
2012        let size = 1000; // Much smaller than limit
2013        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
2014
2015        let result = decompose(&markdown);
2016        assert!(result.is_ok());
2017    }
2018
2019    #[test]
2020    fn test_yaml_within_size_limit() {
2021        // Create YAML block well within the limit
2022        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
2023
2024        let result = decompose(markdown);
2025        assert!(result.is_ok());
2026    }
2027
2028    #[test]
2029    fn test_yaml_depth_limit() {
2030        // Create deeply nested YAML that exceeds MAX_YAML_DEPTH (100 levels)
2031        // This tests serde-saphyr's Budget.max_depth enforcement
2032        let mut yaml_content = String::new();
2033        for i in 0..110 {
2034            yaml_content.push_str(&"  ".repeat(i));
2035            yaml_content.push_str(&format!("level{}: value\n", i));
2036        }
2037
2038        let markdown = format!("---\n{}---\n\nBody", yaml_content);
2039        let result = decompose(&markdown);
2040
2041        assert!(result.is_err());
2042        let err_msg = result.unwrap_err().to_string();
2043        // serde-saphyr returns "budget exceeded" or similar for depth violations
2044        assert!(
2045            err_msg.to_lowercase().contains("budget")
2046                || err_msg.to_lowercase().contains("depth")
2047                || err_msg.contains("YAML"),
2048            "Expected depth/budget error, got: {}",
2049            err_msg
2050        );
2051    }
2052
2053    #[test]
2054    fn test_yaml_depth_within_limit() {
2055        // Create reasonably nested YAML (should succeed)
2056        let markdown = r#"---
2057level1:
2058  level2:
2059    level3:
2060      level4:
2061        value: test
2062---
2063
2064Body content"#;
2065
2066        let result = decompose(markdown);
2067        assert!(result.is_ok());
2068    }
2069
2070    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
2071    // Guillemet conversion now happens in process_plate, not during parsing
2072    #[test]
2073    fn test_chevrons_preserved_in_body_no_frontmatter() {
2074        let markdown = "Use <<raw content>> here.";
2075        let doc = decompose(markdown).unwrap();
2076
2077        // Body should preserve chevrons (conversion happens later in process_plate)
2078        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
2079    }
2080
2081    #[test]
2082    fn test_chevrons_preserved_in_body_with_frontmatter() {
2083        let markdown = r#"---
2084title: Test
2085---
2086
2087Use <<raw content>> here."#;
2088        let doc = decompose(markdown).unwrap();
2089
2090        // Body should preserve chevrons
2091        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
2092    }
2093
2094    #[test]
2095    fn test_chevrons_preserved_in_yaml_string() {
2096        let markdown = r#"---
2097title: Test <<with chevrons>>
2098---
2099
2100Body content."#;
2101        let doc = decompose(markdown).unwrap();
2102
2103        // YAML string values should preserve chevrons
2104        assert_eq!(
2105            doc.get_field("title").unwrap().as_str().unwrap(),
2106            "Test <<with chevrons>>"
2107        );
2108    }
2109
2110    #[test]
2111    fn test_chevrons_preserved_in_yaml_array() {
2112        let markdown = r#"---
2113items:
2114  - "<<first>>"
2115  - "<<second>>"
2116---
2117
2118Body."#;
2119        let doc = decompose(markdown).unwrap();
2120
2121        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2122        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
2123        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
2124    }
2125
2126    #[test]
2127    fn test_chevrons_preserved_in_yaml_nested() {
2128        let markdown = r#"---
2129metadata:
2130  description: "<<nested value>>"
2131---
2132
2133Body."#;
2134        let doc = decompose(markdown).unwrap();
2135
2136        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
2137        assert_eq!(
2138            metadata.get("description").unwrap().as_str().unwrap(),
2139            "<<nested value>>"
2140        );
2141    }
2142
2143    #[test]
2144    fn test_chevrons_preserved_in_code_blocks() {
2145        let markdown = r#"```
2146<<in code block>>
2147```
2148
2149<<outside code block>>"#;
2150        let doc = decompose(markdown).unwrap();
2151
2152        let body = doc.body().unwrap();
2153        // All chevrons should be preserved (no conversion during parsing)
2154        assert!(body.contains("<<in code block>>"));
2155        assert!(body.contains("<<outside code block>>"));
2156    }
2157
2158    #[test]
2159    fn test_chevrons_preserved_in_inline_code() {
2160        let markdown = "`<<in inline code>>` and <<outside inline code>>";
2161        let doc = decompose(markdown).unwrap();
2162
2163        let body = doc.body().unwrap();
2164        // All chevrons should be preserved
2165        assert!(body.contains("`<<in inline code>>`"));
2166        assert!(body.contains("<<outside inline code>>"));
2167    }
2168
2169    #[test]
2170    fn test_chevrons_preserved_in_tagged_block_body() {
2171        let markdown = r#"---
2172title: Main
2173---
2174
2175Main body.
2176
2177---
2178CARD: items
2179name: Item 1
2180---
2181
2182Use <<raw>> here."#;
2183        let doc = decompose(markdown).unwrap();
2184
2185        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2186        let item = cards[0].as_object().unwrap();
2187        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2188        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2189        // Tagged block body should preserve chevrons
2190        assert!(item_body.contains("<<raw>>"));
2191    }
2192
2193    #[test]
2194    fn test_chevrons_preserved_in_tagged_block_yaml() {
2195        let markdown = r#"---
2196title: Main
2197---
2198
2199Main body.
2200
2201---
2202CARD: items
2203description: "<<tagged yaml>>"
2204---
2205
2206Item body."#;
2207        let doc = decompose(markdown).unwrap();
2208
2209        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2210        let item = cards[0].as_object().unwrap();
2211        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2212        // Tagged block YAML should preserve chevrons
2213        assert_eq!(
2214            item.get("description").unwrap().as_str().unwrap(),
2215            "<<tagged yaml>>"
2216        );
2217    }
2218
2219    #[test]
2220    fn test_yaml_numbers_not_affected() {
2221        // Numbers should not be affected
2222        let markdown = r#"---
2223count: 42
2224---
2225
2226Body."#;
2227        let doc = decompose(markdown).unwrap();
2228        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2229    }
2230
2231    #[test]
2232    fn test_yaml_booleans_not_affected() {
2233        // Booleans should not be affected
2234        let markdown = r#"---
2235active: true
2236---
2237
2238Body."#;
2239        let doc = decompose(markdown).unwrap();
2240        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2241    }
2242
2243    #[test]
2244    fn test_multiline_chevrons_preserved() {
2245        // Multiline chevrons should be preserved as-is
2246        let markdown = "<<text\nacross lines>>";
2247        let doc = decompose(markdown).unwrap();
2248
2249        let body = doc.body().unwrap();
2250        // Should contain the original chevrons
2251        assert!(body.contains("<<text"));
2252        assert!(body.contains("across lines>>"));
2253    }
2254
2255    #[test]
2256    fn test_unmatched_chevrons_preserved() {
2257        let markdown = "<<unmatched";
2258        let doc = decompose(markdown).unwrap();
2259
2260        let body = doc.body().unwrap();
2261        // Unmatched should remain as-is
2262        assert_eq!(body, "<<unmatched");
2263    }
2264}
2265
2266// Additional robustness tests
2267#[cfg(test)]
2268mod robustness_tests {
2269    use super::*;
2270
2271    // Edge cases for delimiter handling
2272
2273    #[test]
2274    fn test_empty_document() {
2275        let doc = decompose("").unwrap();
2276        assert_eq!(doc.body(), Some(""));
2277        assert_eq!(doc.quill_tag(), "__default__");
2278    }
2279
2280    #[test]
2281    fn test_only_whitespace() {
2282        let doc = decompose("   \n\n   \t").unwrap();
2283        assert_eq!(doc.body(), Some("   \n\n   \t"));
2284    }
2285
2286    #[test]
2287    fn test_only_dashes() {
2288        // Just "---" at document start without newline is not treated as frontmatter opener
2289        // (requires "---\n" to start a frontmatter block)
2290        let result = decompose("---");
2291        // This is NOT an error - "---" alone without newline is just body content
2292        assert!(result.is_ok());
2293        assert_eq!(result.unwrap().body(), Some("---"));
2294    }
2295
2296    #[test]
2297    fn test_dashes_in_middle_of_line() {
2298        // --- not at start of line should not be treated as delimiter
2299        let markdown = "some text --- more text";
2300        let doc = decompose(markdown).unwrap();
2301        assert_eq!(doc.body(), Some("some text --- more text"));
2302    }
2303
2304    #[test]
2305    fn test_four_dashes() {
2306        // ---- is not a valid delimiter
2307        let markdown = "----\ntitle: Test\n----\n\nBody";
2308        let doc = decompose(markdown).unwrap();
2309        // Should treat entire content as body
2310        assert!(doc.body().unwrap().contains("----"));
2311    }
2312
2313    #[test]
2314    fn test_crlf_line_endings() {
2315        // Windows-style line endings
2316        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2317        let doc = decompose(markdown).unwrap();
2318        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2319        assert!(doc.body().unwrap().contains("Body content."));
2320    }
2321
2322    #[test]
2323    fn test_mixed_line_endings() {
2324        // Mix of \n and \r\n
2325        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2326        let doc = decompose(markdown).unwrap();
2327        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2328    }
2329
2330    #[test]
2331    fn test_frontmatter_at_eof_no_trailing_newline() {
2332        // Frontmatter closed at EOF without trailing newline
2333        let markdown = "---\ntitle: Test\n---";
2334        let doc = decompose(markdown).unwrap();
2335        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2336        assert_eq!(doc.body(), Some(""));
2337    }
2338
2339    #[test]
2340    fn test_empty_frontmatter() {
2341        // Empty frontmatter block - requires content between delimiters
2342        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2343        // is treated as horizontal rule logic, not empty frontmatter
2344        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2345        let markdown = "---\n \n---\n\nBody content.";
2346        let doc = decompose(markdown).unwrap();
2347        assert!(doc.body().unwrap().contains("Body content."));
2348        // Should have body and CARDS fields
2349        assert_eq!(doc.fields().len(), 2);
2350    }
2351
2352    #[test]
2353    fn test_whitespace_only_frontmatter() {
2354        // Frontmatter with only whitespace
2355        let markdown = "---\n   \n\n   \n---\n\nBody.";
2356        let doc = decompose(markdown).unwrap();
2357        assert!(doc.body().unwrap().contains("Body."));
2358    }
2359
2360    // Unicode handling
2361
2362    #[test]
2363    fn test_unicode_in_yaml_keys() {
2364        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2365        let doc = decompose(markdown).unwrap();
2366        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2367        assert_eq!(
2368            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2369            "こんにちは"
2370        );
2371    }
2372
2373    #[test]
2374    fn test_unicode_in_yaml_values() {
2375        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2376        let doc = decompose(markdown).unwrap();
2377        assert_eq!(
2378            doc.get_field("title").unwrap().as_str().unwrap(),
2379            "你好世界 🎉"
2380        );
2381    }
2382
2383    #[test]
2384    fn test_unicode_in_body() {
2385        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2386        let doc = decompose(markdown).unwrap();
2387        assert!(doc.body().unwrap().contains("日本語テキスト"));
2388        assert!(doc.body().unwrap().contains("🚀"));
2389    }
2390
2391    // YAML edge cases
2392
2393    #[test]
2394    fn test_yaml_multiline_string() {
2395        let markdown = r#"---
2396description: |
2397  This is a
2398  multiline string
2399  with preserved newlines.
2400---
2401
2402Body."#;
2403        let doc = decompose(markdown).unwrap();
2404        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2405        assert!(desc.contains("multiline string"));
2406        assert!(desc.contains('\n'));
2407    }
2408
2409    #[test]
2410    fn test_yaml_folded_string() {
2411        let markdown = r#"---
2412description: >
2413  This is a folded
2414  string that becomes
2415  a single line.
2416---
2417
2418Body."#;
2419        let doc = decompose(markdown).unwrap();
2420        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2421        // Folded strings join lines with spaces
2422        assert!(desc.contains("folded"));
2423    }
2424
2425    #[test]
2426    fn test_yaml_null_value() {
2427        let markdown = "---\noptional: null\n---\n\nBody.";
2428        let doc = decompose(markdown).unwrap();
2429        assert!(doc.get_field("optional").unwrap().is_null());
2430    }
2431
2432    #[test]
2433    fn test_yaml_empty_string_value() {
2434        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2435        let doc = decompose(markdown).unwrap();
2436        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2437    }
2438
2439    #[test]
2440    fn test_yaml_special_characters_in_string() {
2441        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2442        let doc = decompose(markdown).unwrap();
2443        assert_eq!(
2444            doc.get_field("special").unwrap().as_str().unwrap(),
2445            "colon: here, and [brackets]"
2446        );
2447    }
2448
2449    #[test]
2450    fn test_yaml_nested_objects() {
2451        let markdown = r#"---
2452config:
2453  database:
2454    host: localhost
2455    port: 5432
2456  cache:
2457    enabled: true
2458---
2459
2460Body."#;
2461        let doc = decompose(markdown).unwrap();
2462        let config = doc.get_field("config").unwrap().as_object().unwrap();
2463        let db = config.get("database").unwrap().as_object().unwrap();
2464        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2465        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2466    }
2467
2468    // CARD block edge cases
2469
2470    #[test]
2471    fn test_card_with_empty_body() {
2472        let markdown = r#"---
2473CARD: items
2474name: Item
2475---"#;
2476        let doc = decompose(markdown).unwrap();
2477        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2478        assert_eq!(cards.len(), 1);
2479        let item = cards[0].as_object().unwrap();
2480        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2481        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2482    }
2483
2484    #[test]
2485    fn test_card_consecutive_blocks() {
2486        let markdown = r#"---
2487CARD: a
2488id: 1
2489---
2490---
2491CARD: a
2492id: 2
2493---"#;
2494        let doc = decompose(markdown).unwrap();
2495        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2496        assert_eq!(cards.len(), 2);
2497        assert_eq!(
2498            cards[0]
2499                .as_object()
2500                .unwrap()
2501                .get("CARD")
2502                .unwrap()
2503                .as_str()
2504                .unwrap(),
2505            "a"
2506        );
2507        assert_eq!(
2508            cards[1]
2509                .as_object()
2510                .unwrap()
2511                .get("CARD")
2512                .unwrap()
2513                .as_str()
2514                .unwrap(),
2515            "a"
2516        );
2517    }
2518
2519    #[test]
2520    fn test_card_with_body_containing_dashes() {
2521        let markdown = r#"---
2522CARD: items
2523name: Item
2524---
2525
2526Some text with --- dashes in it."#;
2527        let doc = decompose(markdown).unwrap();
2528        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2529        let item = cards[0].as_object().unwrap();
2530        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2531        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2532        assert!(body.contains("--- dashes"));
2533    }
2534
2535    // QUILL directive edge cases
2536
2537    #[test]
2538    fn test_quill_with_underscore_prefix() {
2539        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2540        let doc = decompose(markdown).unwrap();
2541        assert_eq!(doc.quill_tag(), "_internal");
2542    }
2543
2544    #[test]
2545    fn test_quill_with_numbers() {
2546        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2547        let doc = decompose(markdown).unwrap();
2548        assert_eq!(doc.quill_tag(), "form_8_v2");
2549    }
2550
2551    #[test]
2552    fn test_quill_with_additional_fields() {
2553        let markdown = r#"---
2554QUILL: my_quill
2555title: Document Title
2556author: John Doe
2557---
2558
2559Body content."#;
2560        let doc = decompose(markdown).unwrap();
2561        assert_eq!(doc.quill_tag(), "my_quill");
2562        assert_eq!(
2563            doc.get_field("title").unwrap().as_str().unwrap(),
2564            "Document Title"
2565        );
2566        assert_eq!(
2567            doc.get_field("author").unwrap().as_str().unwrap(),
2568            "John Doe"
2569        );
2570    }
2571
2572    // Error handling
2573
2574    #[test]
2575    fn test_invalid_scope_name_uppercase() {
2576        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2577        let result = decompose(markdown);
2578        assert!(result.is_err());
2579        assert!(result
2580            .unwrap_err()
2581            .to_string()
2582            .contains("Invalid card field name"));
2583    }
2584
2585    #[test]
2586    fn test_invalid_scope_name_starts_with_number() {
2587        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2588        let result = decompose(markdown);
2589        assert!(result.is_err());
2590    }
2591
2592    #[test]
2593    fn test_invalid_scope_name_with_hyphen() {
2594        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2595        let result = decompose(markdown);
2596        assert!(result.is_err());
2597    }
2598
2599    #[test]
2600    fn test_invalid_quill_name_uppercase() {
2601        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2602        let result = decompose(markdown);
2603        assert!(result.is_err());
2604    }
2605
2606    #[test]
2607    fn test_yaml_syntax_error_missing_colon() {
2608        let markdown = "---\ntitle Test\n---\n\nBody.";
2609        let result = decompose(markdown);
2610        assert!(result.is_err());
2611    }
2612
2613    #[test]
2614    fn test_yaml_syntax_error_bad_indentation() {
2615        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2616        let result = decompose(markdown);
2617        // Bad indentation may or may not be an error depending on YAML parser
2618        // Just ensure it doesn't panic
2619        let _ = result;
2620    }
2621
2622    // Body extraction edge cases
2623
2624    #[test]
2625    fn test_body_with_leading_newlines() {
2626        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2627        let doc = decompose(markdown).unwrap();
2628        // Body should preserve leading newlines after frontmatter
2629        assert!(doc.body().unwrap().starts_with('\n'));
2630    }
2631
2632    #[test]
2633    fn test_body_with_trailing_newlines() {
2634        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2635        let doc = decompose(markdown).unwrap();
2636        // Body should preserve trailing newlines
2637        assert!(doc.body().unwrap().ends_with('\n'));
2638    }
2639
2640    #[test]
2641    fn test_no_body_after_frontmatter() {
2642        let markdown = "---\ntitle: Test\n---";
2643        let doc = decompose(markdown).unwrap();
2644        assert_eq!(doc.body(), Some(""));
2645    }
2646
2647    // Tag name validation
2648
2649    #[test]
2650    fn test_valid_tag_name_single_underscore() {
2651        assert!(is_valid_tag_name("_"));
2652    }
2653
2654    #[test]
2655    fn test_valid_tag_name_underscore_prefix() {
2656        assert!(is_valid_tag_name("_private"));
2657    }
2658
2659    #[test]
2660    fn test_valid_tag_name_with_numbers() {
2661        assert!(is_valid_tag_name("item1"));
2662        assert!(is_valid_tag_name("item_2"));
2663    }
2664
2665    #[test]
2666    fn test_invalid_tag_name_empty() {
2667        assert!(!is_valid_tag_name(""));
2668    }
2669
2670    #[test]
2671    fn test_invalid_tag_name_starts_with_number() {
2672        assert!(!is_valid_tag_name("1item"));
2673    }
2674
2675    #[test]
2676    fn test_invalid_tag_name_uppercase() {
2677        assert!(!is_valid_tag_name("Items"));
2678        assert!(!is_valid_tag_name("ITEMS"));
2679    }
2680
2681    #[test]
2682    fn test_invalid_tag_name_special_chars() {
2683        assert!(!is_valid_tag_name("my-items"));
2684        assert!(!is_valid_tag_name("my.items"));
2685        assert!(!is_valid_tag_name("my items"));
2686    }
2687
2688    // Guillemet preprocessing in YAML
2689
2690    #[test]
2691    fn test_guillemet_in_yaml_preserves_non_strings() {
2692        let markdown = r#"---
2693count: 42
2694price: 19.99
2695active: true
2696items:
2697  - first
2698  - 100
2699  - true
2700---
2701
2702Body."#;
2703        let doc = decompose(markdown).unwrap();
2704        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2705        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2706        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2707    }
2708
2709    #[test]
2710    fn test_guillemet_double_conversion_prevention() {
2711        // Ensure «» in input doesn't get double-processed
2712        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2713        let doc = decompose(markdown).unwrap();
2714        // Should remain as-is (not double-escaped)
2715        assert_eq!(
2716            doc.get_field("title").unwrap().as_str().unwrap(),
2717            "Already «converted»"
2718        );
2719    }
2720
2721    #[test]
2722    fn test_allowed_card_field_collision() {
2723        let markdown = r#"---
2724my_card: "some global value"
2725---
2726
2727---
2728CARD: my_card
2729title: "My Card"
2730---
2731Body
2732"#;
2733        // This should SUCCEED according to new PARSE.md
2734        let doc = decompose(markdown).unwrap();
2735
2736        // Verify global field exists
2737        assert_eq!(
2738            doc.get_field("my_card").unwrap().as_str().unwrap(),
2739            "some global value"
2740        );
2741
2742        // Verify Card exists in CARDS array
2743        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2744        assert!(!cards.is_empty());
2745        let card = cards
2746            .iter()
2747            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2748            .expect("Card not found");
2749        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2750    }
2751
2752    #[test]
2753    fn test_yaml_custom_tags_in_frontmatter() {
2754        // User-defined YAML tags like !fill should be accepted and ignored
2755        let markdown = r#"---
2756memo_from: !fill 2d lt example
2757regular_field: normal value
2758---
2759
2760Body content."#;
2761        let doc = decompose(markdown).unwrap();
2762
2763        // The tag !fill should be ignored, value parsed as string "2d lt example"
2764        assert_eq!(
2765            doc.get_field("memo_from").unwrap().as_str().unwrap(),
2766            "2d lt example"
2767        );
2768        // Regular fields should still work
2769        assert_eq!(
2770            doc.get_field("regular_field").unwrap().as_str().unwrap(),
2771            "normal value"
2772        );
2773        assert_eq!(doc.body(), Some("\nBody content."));
2774    }
2775
2776    /// Test the exact example from EXTENDED_MARKDOWN.md (lines 92-127)
2777    #[test]
2778    fn test_spec_example() {
2779        let markdown = r#"---
2780title: My Document
2781QUILL: blog_post
2782---
2783Main document body.
2784
2785***
2786
2787More content after horizontal rule.
2788
2789---
2790CARD: section
2791heading: Introduction
2792---
2793Introduction content.
2794
2795---
2796CARD: section
2797heading: Conclusion
2798---
2799Conclusion content.
2800"#;
2801
2802        let doc = decompose(markdown).unwrap();
2803
2804        // Verify global fields
2805        assert_eq!(
2806            doc.get_field("title").unwrap().as_str().unwrap(),
2807            "My Document"
2808        );
2809        assert_eq!(doc.quill_tag(), "blog_post");
2810
2811        // Verify body contains horizontal rule (*** preserved)
2812        let body = doc.body().unwrap();
2813        assert!(body.contains("Main document body."));
2814        assert!(body.contains("***"));
2815        assert!(body.contains("More content after horizontal rule."));
2816
2817        // Verify CARDS array
2818        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2819        assert_eq!(cards.len(), 2);
2820
2821        // First card
2822        let card1 = cards[0].as_object().unwrap();
2823        assert_eq!(card1.get("CARD").unwrap().as_str().unwrap(), "section");
2824        assert_eq!(
2825            card1.get("heading").unwrap().as_str().unwrap(),
2826            "Introduction"
2827        );
2828        assert_eq!(
2829            card1.get("BODY").unwrap().as_str().unwrap(),
2830            "Introduction content.\n\n"
2831        );
2832
2833        // Second card
2834        let card2 = cards[1].as_object().unwrap();
2835        assert_eq!(card2.get("CARD").unwrap().as_str().unwrap(), "section");
2836        assert_eq!(
2837            card2.get("heading").unwrap().as_str().unwrap(),
2838            "Conclusion"
2839        );
2840        assert_eq!(
2841            card2.get("BODY").unwrap().as_str().unwrap(),
2842            "Conclusion content.\n"
2843        );
2844    }
2845}