quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "BODY";
55
56/// A parsed markdown document with frontmatter
57#[derive(Debug, Clone)]
58pub struct ParsedDocument {
59    fields: HashMap<String, QuillValue>,
60    quill_tag: String,
61}
62
63impl ParsedDocument {
64    /// Create a new ParsedDocument with the given fields
65    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
66        Self {
67            fields,
68            quill_tag: "__default__".to_string(),
69        }
70    }
71
72    /// Create a ParsedDocument from fields and quill tag
73    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
74        Self { fields, quill_tag }
75    }
76
77    /// Create a ParsedDocument from markdown string
78    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
79        decompose(markdown)
80    }
81
82    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
83    pub fn quill_tag(&self) -> &str {
84        &self.quill_tag
85    }
86
87    /// Get the document body
88    pub fn body(&self) -> Option<&str> {
89        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
90    }
91
92    /// Get a specific field
93    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
94        self.fields.get(name)
95    }
96
97    /// Get all fields (including body)
98    pub fn fields(&self) -> &HashMap<String, QuillValue> {
99        &self.fields
100    }
101
102    /// Create a new ParsedDocument with default values applied
103    ///
104    /// This method creates a new ParsedDocument with default values applied for any
105    /// fields that are missing from the original document but have defaults specified.
106    /// Existing fields are preserved and not overwritten.
107    ///
108    /// # Arguments
109    ///
110    /// * `defaults` - A HashMap of field names to their default QuillValues
111    ///
112    /// # Returns
113    ///
114    /// A new ParsedDocument with defaults applied for missing fields
115    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
116        let mut fields = self.fields.clone();
117
118        for (field_name, default_value) in defaults {
119            // Only apply default if field is missing
120            if !fields.contains_key(field_name) {
121                fields.insert(field_name.clone(), default_value.clone());
122            }
123        }
124
125        Self {
126            fields,
127            quill_tag: self.quill_tag.clone(),
128        }
129    }
130
131    /// Create a new ParsedDocument with coerced field values
132    ///
133    /// This method applies type coercions to field values based on the schema.
134    /// Coercions include:
135    /// - Singular values to arrays when schema expects array
136    /// - String "true"/"false" to boolean
137    /// - Numbers to boolean (0=false, non-zero=true)
138    /// - String numbers to number type
139    /// - Boolean to number (true=1, false=0)
140    ///
141    /// # Arguments
142    ///
143    /// * `schema` - A JSON Schema object defining expected field types
144    ///
145    /// # Returns
146    ///
147    /// A new ParsedDocument with coerced field values
148    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
149        use crate::schema::coerce_document;
150
151        let coerced_fields = coerce_document(schema, &self.fields);
152
153        Self {
154            fields: coerced_fields,
155            quill_tag: self.quill_tag.clone(),
156        }
157    }
158}
159
160#[derive(Debug)]
161struct MetadataBlock {
162    start: usize,                          // Position of opening "---"
163    end: usize,                            // Position after closing "---\n"
164    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
165    tag: Option<String>,                   // Field name from CARD key
166    quill_name: Option<String>,            // Quill name from QUILL key
167}
168
169/// Validate tag name follows pattern [a-z_][a-z0-9_]*
170fn is_valid_tag_name(name: &str) -> bool {
171    if name.is_empty() {
172        return false;
173    }
174
175    let mut chars = name.chars();
176    let first = chars.next().unwrap();
177
178    if !first.is_ascii_lowercase() && first != '_' {
179        return false;
180    }
181
182    for ch in chars {
183        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
184            return false;
185        }
186    }
187
188    true
189}
190
191/// Find all metadata blocks in the document
192fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
193    let mut blocks = Vec::new();
194    let mut pos = 0;
195
196    while pos < markdown.len() {
197        // Look for opening "---\n" or "---\r\n"
198        let search_str = &markdown[pos..];
199        let delimiter_result = search_str
200            .find("---\n")
201            .map(|p| (p, 4, "\n"))
202            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
203
204        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
205            let abs_pos = pos + delimiter_pos;
206
207            // Check if the delimiter is at the start of a line
208            let is_start_of_line = if abs_pos == 0 {
209                true
210            } else {
211                let char_before = markdown.as_bytes()[abs_pos - 1];
212                char_before == b'\n' || char_before == b'\r'
213            };
214
215            if !is_start_of_line {
216                pos = abs_pos + 1;
217                continue;
218            }
219
220            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
221
222            // Check if this --- is a horizontal rule (blank lines above AND below)
223            let preceded_by_blank = if abs_pos > 0 {
224                // Check if there's a blank line before the ---
225                let before = &markdown[..abs_pos];
226                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
227            } else {
228                false
229            };
230
231            let followed_by_blank = if content_start < markdown.len() {
232                markdown[content_start..].starts_with('\n')
233                    || markdown[content_start..].starts_with("\r\n")
234            } else {
235                false
236            };
237
238            // Horizontal rule: blank lines both above and below
239            if preceded_by_blank && followed_by_blank {
240                // This is a horizontal rule in the body, skip it
241                pos = abs_pos + 3; // Skip past "---"
242                continue;
243            }
244
245            // Check if followed by non-blank line (or if we're at document start)
246            // This starts a metadata block
247            if followed_by_blank {
248                // --- followed by blank line but NOT preceded by blank line
249                // This is NOT a metadata block opening, skip it
250                pos = abs_pos + 3;
251                continue;
252            }
253
254            // Found potential metadata block opening (followed by non-blank line)
255            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
256            let rest = &markdown[content_start..];
257
258            // First try to find delimiters with trailing newlines
259            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
260            let closing_with_newline = closing_patterns
261                .iter()
262                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
263                .min_by_key(|(p, _)| *p);
264
265            // Also check for closing at end of document (no trailing newline)
266            let closing_at_eof = ["\n---", "\r\n---"]
267                .iter()
268                .filter_map(|delim| {
269                    rest.find(delim).and_then(|p| {
270                        if p + delim.len() == rest.len() {
271                            Some((p, delim.len()))
272                        } else {
273                            None
274                        }
275                    })
276                })
277                .min_by_key(|(p, _)| *p);
278
279            let closing_result = match (closing_with_newline, closing_at_eof) {
280                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
281                (Some(_), Some(_)) => closing_with_newline,
282                (Some(_), None) => closing_with_newline,
283                (None, Some(_)) => closing_at_eof,
284                (None, None) => None,
285            };
286
287            if let Some((closing_pos, closing_len)) = closing_result {
288                let abs_closing_pos = content_start + closing_pos;
289                let content = &markdown[content_start..abs_closing_pos];
290
291                // Check YAML size limit
292                if content.len() > crate::error::MAX_YAML_SIZE {
293                    return Err(crate::error::ParseError::InputTooLarge {
294                        size: content.len(),
295                        max: crate::error::MAX_YAML_SIZE,
296                    });
297                }
298
299                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
300                // First, try to parse as YAML
301                let (tag, quill_name, yaml_value) = if !content.is_empty() {
302                    // Try to parse the YAML to check for reserved keys
303                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
304                        Ok(parsed_yaml) => {
305                            if let Some(mapping) = parsed_yaml.as_mapping() {
306                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
307                                let card_key = serde_yaml::Value::String("CARD".to_string());
308                                let scope_key = serde_yaml::Value::String("SCOPE".to_string()); // Backwards compatibility alias
309
310                                let has_quill = mapping.contains_key(&quill_key);
311                                let has_card = mapping.contains_key(&card_key);
312                                let has_scope = mapping.contains_key(&scope_key);
313
314                                // CARD and SCOPE are aliases - can't use both
315                                if has_card && has_scope {
316                                    return Err(crate::error::ParseError::InvalidStructure(
317                                        "Cannot specify both CARD and SCOPE in the same block (SCOPE is an alias for CARD)"
318                                            .to_string(),
319                                    ));
320                                }
321
322                                let effective_card_key = if has_card {
323                                    Some(&card_key)
324                                } else if has_scope {
325                                    Some(&scope_key)
326                                } else {
327                                    None
328                                };
329
330                                if has_quill && effective_card_key.is_some() {
331                                    return Err(crate::error::ParseError::InvalidStructure(
332                                        "Cannot specify both QUILL and CARD/SCOPE in the same block"
333                                            .to_string(),
334                                    ));
335                                }
336
337                                if has_quill {
338                                    // Extract quill name
339                                    let quill_value = mapping.get(&quill_key).unwrap();
340                                    let quill_name_str = quill_value
341                                        .as_str()
342                                        .ok_or("QUILL value must be a string")?;
343
344                                    if !is_valid_tag_name(quill_name_str) {
345                                        return Err(crate::error::ParseError::InvalidStructure(format!(
346                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
347                                            quill_name_str
348                                        )));
349                                    }
350
351                                    // Remove QUILL from the YAML value for processing
352                                    let mut new_mapping = mapping.clone();
353                                    new_mapping.remove(&quill_key);
354                                    let new_value = if new_mapping.is_empty() {
355                                        None
356                                    } else {
357                                        Some(serde_yaml::Value::Mapping(new_mapping))
358                                    };
359
360                                    (None, Some(quill_name_str.to_string()), new_value)
361                                } else if let Some(card_key_used) = effective_card_key {
362                                    // Extract card field name (handles both CARD and SCOPE)
363                                    let card_value = mapping.get(card_key_used).unwrap();
364                                    let field_name = card_value
365                                        .as_str()
366                                        .ok_or("CARD/SCOPE value must be a string")?;
367
368                                    if !is_valid_tag_name(field_name) {
369                                        return Err(crate::error::ParseError::InvalidStructure(format!(
370                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
371                                            field_name
372                                        )));
373                                    }
374
375                                    // Remove CARD/SCOPE from the YAML value for processing
376                                    let mut new_mapping = mapping.clone();
377                                    new_mapping.remove(card_key_used);
378                                    let new_value = if new_mapping.is_empty() {
379                                        None
380                                    } else {
381                                        Some(serde_yaml::Value::Mapping(new_mapping))
382                                    };
383
384                                    (Some(field_name.to_string()), None, new_value)
385                                } else {
386                                    // No reserved keys, keep the parsed YAML
387                                    (None, None, Some(parsed_yaml))
388                                }
389                            } else {
390                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
391                                (None, None, Some(parsed_yaml))
392                            }
393                        }
394                        Err(e) => {
395                            // YAML parsing failed - return error with context
396                            return Err(crate::error::ParseError::YamlError(e));
397                        }
398                    }
399                } else {
400                    // Empty content
401                    (None, None, None)
402                };
403
404                blocks.push(MetadataBlock {
405                    start: abs_pos,
406                    end: abs_closing_pos + closing_len, // After closing delimiter
407                    yaml_value,
408                    tag,
409                    quill_name,
410                });
411
412                pos = abs_closing_pos + closing_len;
413            } else if abs_pos == 0 {
414                // Frontmatter started but not closed
415                return Err(crate::error::ParseError::InvalidStructure(
416                    "Frontmatter started but not closed with ---".to_string(),
417                ));
418            } else {
419                // Not a valid metadata block, skip this position
420                pos = abs_pos + 3;
421            }
422        } else {
423            break;
424        }
425    }
426
427    Ok(blocks)
428}
429
430/// Decompose markdown into frontmatter fields and body
431fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
432    // Check input size limit
433    if markdown.len() > crate::error::MAX_INPUT_SIZE {
434        return Err(crate::error::ParseError::InputTooLarge {
435            size: markdown.len(),
436            max: crate::error::MAX_INPUT_SIZE,
437        });
438    }
439
440    let mut fields = HashMap::new();
441
442    // Find all metadata blocks
443    let blocks = find_metadata_blocks(markdown)?;
444
445    if blocks.is_empty() {
446        // No metadata blocks, entire content is body
447        fields.insert(
448            BODY_FIELD.to_string(),
449            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
450        );
451        return Ok(ParsedDocument::new(fields));
452    }
453
454    // Collect all card items into unified CARDS array
455    let mut cards_array: Vec<serde_json::Value> = Vec::new();
456    let mut global_frontmatter_index: Option<usize> = None;
457    let mut quill_name: Option<String> = None;
458
459    // First pass: identify global frontmatter, quill directive, and validate
460    for (idx, block) in blocks.iter().enumerate() {
461        if idx == 0 {
462            // Top-level frontmatter: can have QUILL or neither (not considered a card)
463            if let Some(ref name) = block.quill_name {
464                quill_name = Some(name.clone());
465            }
466            // If it has neither QUILL nor CARD, it's global frontmatter
467            if block.tag.is_none() && block.quill_name.is_none() {
468                global_frontmatter_index = Some(idx);
469            }
470        } else {
471            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
472            if block.quill_name.is_some() {
473                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
474            }
475            if block.tag.is_none() {
476                // Inline block without CARD
477                return Err(crate::error::ParseError::missing_card_directive());
478            }
479        }
480    }
481
482    // Parse global frontmatter if present
483    if let Some(idx) = global_frontmatter_index {
484        let block = &blocks[idx];
485
486        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
487        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
488            Some(serde_yaml::Value::Mapping(mapping)) => mapping
489                .iter()
490                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
491                .collect(),
492            Some(serde_yaml::Value::Null) => {
493                // Null value (from whitespace-only YAML) - treat as empty mapping
494                HashMap::new()
495            }
496            Some(_) => {
497                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
498                return Err(crate::error::ParseError::InvalidStructure(
499                    "Invalid YAML frontmatter: expected a mapping".to_string(),
500                ));
501            }
502            None => HashMap::new(),
503        };
504
505        // Convert YAML values to QuillValue at boundary
506        for (key, value) in yaml_fields {
507            fields.insert(key, QuillValue::from_yaml(value)?);
508        }
509    }
510
511    // Process blocks with quill directives
512    for block in &blocks {
513        if block.quill_name.is_some() {
514            // Quill directive blocks can have YAML content (becomes part of frontmatter)
515            if let Some(ref yaml_val) = block.yaml_value {
516                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
517                    serde_yaml::Value::Mapping(mapping) => mapping
518                        .iter()
519                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
520                        .collect(),
521                    serde_yaml::Value::Null => {
522                        // Null value (from whitespace-only YAML) - treat as empty mapping
523                        HashMap::new()
524                    }
525                    _ => {
526                        return Err(crate::error::ParseError::InvalidStructure(
527                            "Invalid YAML in quill block: expected a mapping".to_string(),
528                        ));
529                    }
530                };
531
532                // Check for conflicts with existing fields
533                for key in yaml_fields.keys() {
534                    if fields.contains_key(key) {
535                        return Err(crate::error::ParseError::InvalidStructure(format!(
536                            "Name collision: quill block field '{}' conflicts with existing field",
537                            key
538                        )));
539                    }
540                }
541
542                // Convert YAML values to QuillValue at boundary
543                for (key, value) in yaml_fields {
544                    fields.insert(key, QuillValue::from_yaml(value)?);
545                }
546            }
547        }
548    }
549
550    // Parse tagged blocks (CARD blocks)
551    for (idx, block) in blocks.iter().enumerate() {
552        if let Some(ref tag_name) = block.tag {
553            // Get YAML metadata directly (already parsed in find_metadata_blocks)
554            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
555                Some(serde_yaml::Value::Mapping(mapping)) => mapping
556                    .iter()
557                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
558                    .collect(),
559                Some(serde_yaml::Value::Null) => {
560                    // Null value (from whitespace-only YAML) - treat as empty mapping
561                    HashMap::new()
562                }
563                Some(_) => {
564                    return Err(crate::error::ParseError::InvalidStructure(format!(
565                        "Invalid YAML in card block '{}': expected a mapping",
566                        tag_name
567                    )));
568                }
569                None => HashMap::new(),
570            };
571
572            // Extract body for this card block
573            let body_start = block.end;
574            let body_end = if idx + 1 < blocks.len() {
575                blocks[idx + 1].start
576            } else {
577                markdown.len()
578            };
579            let body = &markdown[body_start..body_end];
580
581            // Add body to item fields
582            item_fields.insert(
583                BODY_FIELD.to_string(),
584                serde_yaml::Value::String(body.to_string()),
585            );
586
587            // Add CARD discriminator field
588            item_fields.insert(
589                "CARD".to_string(),
590                serde_yaml::Value::String(tag_name.clone()),
591            );
592
593            // Convert to JSON and add to CARDS array
594            let item_json = serde_json::to_value(&item_fields)
595                .map_err(|e| format!("Failed to convert card to JSON: {}", e))?;
596            cards_array.push(item_json);
597        }
598    }
599
600    // Extract global body
601    // Body starts after global frontmatter or quill block (whichever comes first)
602    // Body ends at the first card block or EOF
603    let first_non_card_block_idx = blocks
604        .iter()
605        .position(|b| b.tag.is_none() && b.quill_name.is_none())
606        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
607
608    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
609        // Body starts after the first non-card block (global frontmatter or quill)
610        let start = blocks[idx].end;
611
612        // Body ends at the first card block after this, or EOF
613        let end = blocks
614            .iter()
615            .skip(idx + 1)
616            .find(|b| b.tag.is_some())
617            .map(|b| b.start)
618            .unwrap_or(markdown.len());
619
620        (start, end)
621    } else {
622        // No global frontmatter or quill block - body is everything before the first card block
623        let end = blocks
624            .iter()
625            .find(|b| b.tag.is_some())
626            .map(|b| b.start)
627            .unwrap_or(0);
628
629        (0, end)
630    };
631
632    let global_body = &markdown[body_start..body_end];
633
634    fields.insert(
635        BODY_FIELD.to_string(),
636        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
637    );
638
639    // Always add CARDS array to fields (may be empty)
640    fields.insert(
641        "CARDS".to_string(),
642        QuillValue::from_json(serde_json::Value::Array(cards_array)),
643    );
644
645    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
646    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
647
648    Ok(parsed)
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_no_frontmatter() {
657        let markdown = "# Hello World\n\nThis is a test.";
658        let doc = decompose(markdown).unwrap();
659
660        assert_eq!(doc.body(), Some(markdown));
661        assert_eq!(doc.fields().len(), 1);
662        // Verify default quill tag is set
663        assert_eq!(doc.quill_tag(), "__default__");
664    }
665
666    #[test]
667    fn test_with_frontmatter() {
668        let markdown = r#"---
669title: Test Document
670author: Test Author
671---
672
673# Hello World
674
675This is the body."#;
676
677        let doc = decompose(markdown).unwrap();
678
679        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
680        assert_eq!(
681            doc.get_field("title").unwrap().as_str().unwrap(),
682            "Test Document"
683        );
684        assert_eq!(
685            doc.get_field("author").unwrap().as_str().unwrap(),
686            "Test Author"
687        );
688        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
689                                           // Verify default quill tag is set when no QUILL directive
690        assert_eq!(doc.quill_tag(), "__default__");
691    }
692
693    #[test]
694    fn test_complex_yaml_frontmatter() {
695        let markdown = r#"---
696title: Complex Document
697tags:
698  - test
699  - yaml
700metadata:
701  version: 1.0
702  nested:
703    field: value
704---
705
706Content here."#;
707
708        let doc = decompose(markdown).unwrap();
709
710        assert_eq!(doc.body(), Some("\nContent here."));
711        assert_eq!(
712            doc.get_field("title").unwrap().as_str().unwrap(),
713            "Complex Document"
714        );
715
716        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
717        assert_eq!(tags.len(), 2);
718        assert_eq!(tags[0].as_str().unwrap(), "test");
719        assert_eq!(tags[1].as_str().unwrap(), "yaml");
720    }
721
722    #[test]
723    fn test_with_defaults_empty_document() {
724        use std::collections::HashMap;
725
726        let mut defaults = HashMap::new();
727        defaults.insert(
728            "status".to_string(),
729            QuillValue::from_json(serde_json::json!("draft")),
730        );
731        defaults.insert(
732            "version".to_string(),
733            QuillValue::from_json(serde_json::json!(1)),
734        );
735
736        // Create an empty parsed document
737        let doc = ParsedDocument::new(HashMap::new());
738        let doc_with_defaults = doc.with_defaults(&defaults);
739
740        // Check that defaults were applied
741        assert_eq!(
742            doc_with_defaults
743                .get_field("status")
744                .unwrap()
745                .as_str()
746                .unwrap(),
747            "draft"
748        );
749        assert_eq!(
750            doc_with_defaults
751                .get_field("version")
752                .unwrap()
753                .as_number()
754                .unwrap()
755                .as_i64()
756                .unwrap(),
757            1
758        );
759    }
760
761    #[test]
762    fn test_with_defaults_preserves_existing_values() {
763        use std::collections::HashMap;
764
765        let mut defaults = HashMap::new();
766        defaults.insert(
767            "status".to_string(),
768            QuillValue::from_json(serde_json::json!("draft")),
769        );
770
771        // Create document with existing status
772        let mut fields = HashMap::new();
773        fields.insert(
774            "status".to_string(),
775            QuillValue::from_json(serde_json::json!("published")),
776        );
777        let doc = ParsedDocument::new(fields);
778
779        let doc_with_defaults = doc.with_defaults(&defaults);
780
781        // Existing value should be preserved
782        assert_eq!(
783            doc_with_defaults
784                .get_field("status")
785                .unwrap()
786                .as_str()
787                .unwrap(),
788            "published"
789        );
790    }
791
792    #[test]
793    fn test_with_defaults_partial_application() {
794        use std::collections::HashMap;
795
796        let mut defaults = HashMap::new();
797        defaults.insert(
798            "status".to_string(),
799            QuillValue::from_json(serde_json::json!("draft")),
800        );
801        defaults.insert(
802            "version".to_string(),
803            QuillValue::from_json(serde_json::json!(1)),
804        );
805
806        // Create document with only one field
807        let mut fields = HashMap::new();
808        fields.insert(
809            "status".to_string(),
810            QuillValue::from_json(serde_json::json!("published")),
811        );
812        let doc = ParsedDocument::new(fields);
813
814        let doc_with_defaults = doc.with_defaults(&defaults);
815
816        // Existing field preserved, missing field gets default
817        assert_eq!(
818            doc_with_defaults
819                .get_field("status")
820                .unwrap()
821                .as_str()
822                .unwrap(),
823            "published"
824        );
825        assert_eq!(
826            doc_with_defaults
827                .get_field("version")
828                .unwrap()
829                .as_number()
830                .unwrap()
831                .as_i64()
832                .unwrap(),
833            1
834        );
835    }
836
837    #[test]
838    fn test_with_defaults_no_defaults() {
839        use std::collections::HashMap;
840
841        let defaults = HashMap::new(); // Empty defaults map
842
843        let doc = ParsedDocument::new(HashMap::new());
844        let doc_with_defaults = doc.with_defaults(&defaults);
845
846        // No defaults should be applied
847        assert!(doc_with_defaults.fields().is_empty());
848    }
849
850    #[test]
851    fn test_with_defaults_complex_types() {
852        use std::collections::HashMap;
853
854        let mut defaults = HashMap::new();
855        defaults.insert(
856            "tags".to_string(),
857            QuillValue::from_json(serde_json::json!(["default", "tag"])),
858        );
859
860        let doc = ParsedDocument::new(HashMap::new());
861        let doc_with_defaults = doc.with_defaults(&defaults);
862
863        // Complex default value should be applied
864        let tags = doc_with_defaults
865            .get_field("tags")
866            .unwrap()
867            .as_sequence()
868            .unwrap();
869        assert_eq!(tags.len(), 2);
870        assert_eq!(tags[0].as_str().unwrap(), "default");
871        assert_eq!(tags[1].as_str().unwrap(), "tag");
872    }
873
874    #[test]
875    fn test_with_coercion_singular_to_array() {
876        use std::collections::HashMap;
877
878        let schema = QuillValue::from_json(serde_json::json!({
879            "$schema": "https://json-schema.org/draft/2019-09/schema",
880            "type": "object",
881            "properties": {
882                "tags": {"type": "array"}
883            }
884        }));
885
886        let mut fields = HashMap::new();
887        fields.insert(
888            "tags".to_string(),
889            QuillValue::from_json(serde_json::json!("single-tag")),
890        );
891        let doc = ParsedDocument::new(fields);
892
893        let coerced_doc = doc.with_coercion(&schema);
894
895        let tags = coerced_doc.get_field("tags").unwrap();
896        assert!(tags.as_array().is_some());
897        let tags_array = tags.as_array().unwrap();
898        assert_eq!(tags_array.len(), 1);
899        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
900    }
901
902    #[test]
903    fn test_with_coercion_string_to_boolean() {
904        use std::collections::HashMap;
905
906        let schema = QuillValue::from_json(serde_json::json!({
907            "$schema": "https://json-schema.org/draft/2019-09/schema",
908            "type": "object",
909            "properties": {
910                "active": {"type": "boolean"}
911            }
912        }));
913
914        let mut fields = HashMap::new();
915        fields.insert(
916            "active".to_string(),
917            QuillValue::from_json(serde_json::json!("true")),
918        );
919        let doc = ParsedDocument::new(fields);
920
921        let coerced_doc = doc.with_coercion(&schema);
922
923        assert!(coerced_doc.get_field("active").unwrap().as_bool().unwrap());
924    }
925
926    #[test]
927    fn test_with_coercion_string_to_number() {
928        use std::collections::HashMap;
929
930        let schema = QuillValue::from_json(serde_json::json!({
931            "$schema": "https://json-schema.org/draft/2019-09/schema",
932            "type": "object",
933            "properties": {
934                "count": {"type": "number"}
935            }
936        }));
937
938        let mut fields = HashMap::new();
939        fields.insert(
940            "count".to_string(),
941            QuillValue::from_json(serde_json::json!("42")),
942        );
943        let doc = ParsedDocument::new(fields);
944
945        let coerced_doc = doc.with_coercion(&schema);
946
947        assert_eq!(
948            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
949            42
950        );
951    }
952
953    #[test]
954    fn test_invalid_yaml() {
955        let markdown = r#"---
956title: [invalid yaml
957author: missing close bracket
958---
959
960Content here."#;
961
962        let result = decompose(markdown);
963        assert!(result.is_err());
964        assert!(result
965            .unwrap_err()
966            .to_string()
967            .contains("YAML parsing error"));
968    }
969
970    #[test]
971    fn test_unclosed_frontmatter() {
972        let markdown = r#"---
973title: Test
974author: Test Author
975
976Content without closing ---"#;
977
978        let result = decompose(markdown);
979        assert!(result.is_err());
980        assert!(result.unwrap_err().to_string().contains("not closed"));
981    }
982
983    // Extended metadata tests
984
985    #[test]
986    fn test_basic_tagged_block() {
987        let markdown = r#"---
988title: Main Document
989---
990
991Main body content.
992
993---
994CARD: items
995name: Item 1
996---
997
998Body of item 1."#;
999
1000        let doc = decompose(markdown).unwrap();
1001
1002        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1003        assert_eq!(
1004            doc.get_field("title").unwrap().as_str().unwrap(),
1005            "Main Document"
1006        );
1007
1008        // Cards are now in CARDS array with CARD discriminator
1009        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1010        assert_eq!(cards.len(), 1);
1011
1012        let item = cards[0].as_object().unwrap();
1013        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1014        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1015        assert_eq!(
1016            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1017            "\nBody of item 1."
1018        );
1019    }
1020
1021    #[test]
1022    fn test_multiple_tagged_blocks() {
1023        let markdown = r#"---
1024CARD: items
1025name: Item 1
1026tags: [a, b]
1027---
1028
1029First item body.
1030
1031---
1032CARD: items
1033name: Item 2
1034tags: [c, d]
1035---
1036
1037Second item body."#;
1038
1039        let doc = decompose(markdown).unwrap();
1040
1041        // Cards are in CARDS array
1042        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1043        assert_eq!(cards.len(), 2);
1044
1045        let item1 = cards[0].as_object().unwrap();
1046        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1047        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1048
1049        let item2 = cards[1].as_object().unwrap();
1050        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1051        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1052    }
1053
1054    #[test]
1055    fn test_mixed_global_and_tagged() {
1056        let markdown = r#"---
1057title: Global
1058author: John Doe
1059---
1060
1061Global body.
1062
1063---
1064CARD: sections
1065title: Section 1
1066---
1067
1068Section 1 content.
1069
1070---
1071CARD: sections
1072title: Section 2
1073---
1074
1075Section 2 content."#;
1076
1077        let doc = decompose(markdown).unwrap();
1078
1079        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1080        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1081
1082        // Cards are in unified CARDS array
1083        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1084        assert_eq!(cards.len(), 2);
1085        assert_eq!(
1086            cards[0]
1087                .as_object()
1088                .unwrap()
1089                .get("CARD")
1090                .unwrap()
1091                .as_str()
1092                .unwrap(),
1093            "sections"
1094        );
1095    }
1096
1097    #[test]
1098    fn test_empty_tagged_metadata() {
1099        let markdown = r#"---
1100CARD: items
1101---
1102
1103Body without metadata."#;
1104
1105        let doc = decompose(markdown).unwrap();
1106
1107        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1108        assert_eq!(cards.len(), 1);
1109
1110        let item = cards[0].as_object().unwrap();
1111        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1112        assert_eq!(
1113            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1114            "\nBody without metadata."
1115        );
1116    }
1117
1118    #[test]
1119    fn test_tagged_block_without_body() {
1120        let markdown = r#"---
1121CARD: items
1122name: Item
1123---"#;
1124
1125        let doc = decompose(markdown).unwrap();
1126
1127        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1128        assert_eq!(cards.len(), 1);
1129
1130        let item = cards[0].as_object().unwrap();
1131        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1132        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1133    }
1134
1135    #[test]
1136    fn test_name_collision_global_and_tagged() {
1137        let markdown = r#"---
1138items: "global value"
1139---
1140
1141Body
1142
1143---
1144CARD: items
1145name: Item
1146---
1147
1148Item body"#;
1149
1150        let result = decompose(markdown);
1151        assert!(result.is_ok(), "Name collision should be allowed now");
1152    }
1153
1154    #[test]
1155    fn test_card_name_collision_with_array_field() {
1156        // CARD type names CAN now conflict with frontmatter field names
1157        let markdown = r#"---
1158items:
1159  - name: Global Item 1
1160    value: 100
1161---
1162
1163Global body
1164
1165---
1166CARD: items
1167name: Scope Item 1
1168---
1169
1170Scope item 1 body"#;
1171
1172        let result = decompose(markdown);
1173        assert!(
1174            result.is_ok(),
1175            "Collision with array field should be allowed"
1176        );
1177    }
1178
1179    #[test]
1180    fn test_empty_global_array_with_card() {
1181        // CARD type names CAN now conflict with frontmatter field names
1182        let markdown = r#"---
1183items: []
1184---
1185
1186Global body
1187
1188---
1189CARD: items
1190name: Item 1
1191---
1192
1193Item 1 body"#;
1194
1195        let result = decompose(markdown);
1196        assert!(
1197            result.is_ok(),
1198            "Collision with empty array field should be allowed"
1199        );
1200    }
1201
1202    #[test]
1203    fn test_reserved_field_name() {
1204        let markdown = r#"---
1205CARD: body
1206BODY: Test
1207---"#;
1208
1209        let result = decompose(markdown);
1210        assert!(
1211            result.is_ok(),
1212            "Reserved field name should be allowed as card name"
1213        );
1214    }
1215
1216    #[test]
1217    fn test_invalid_tag_syntax() {
1218        let markdown = r#"---
1219CARD: Invalid-Name
1220title: Test
1221---"#;
1222
1223        let result = decompose(markdown);
1224        assert!(result.is_err());
1225        assert!(result
1226            .unwrap_err()
1227            .to_string()
1228            .contains("Invalid card field name"));
1229    }
1230
1231    #[test]
1232    fn test_multiple_global_frontmatter_blocks() {
1233        let markdown = r#"---
1234title: First
1235---
1236
1237Body
1238
1239---
1240author: Second
1241---
1242
1243More body"#;
1244
1245        let result = decompose(markdown);
1246        assert!(result.is_err());
1247
1248        // Verify the error message contains CARD hint
1249        let err = result.unwrap_err();
1250        let err_str = err.to_string();
1251        assert!(
1252            err_str.contains("CARD"),
1253            "Error should mention CARD directive: {}",
1254            err_str
1255        );
1256        assert!(
1257            err_str.contains("missing"),
1258            "Error should indicate missing directive: {}",
1259            err_str
1260        );
1261    }
1262
1263    #[test]
1264    fn test_adjacent_blocks_different_tags() {
1265        let markdown = r#"---
1266CARD: items
1267name: Item 1
1268---
1269
1270Item 1 body
1271
1272---
1273CARD: sections
1274title: Section 1
1275---
1276
1277Section 1 body"#;
1278
1279        let doc = decompose(markdown).unwrap();
1280
1281        // All cards in unified CARDS array
1282        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1283        assert_eq!(cards.len(), 2);
1284
1285        // First card is "items" type
1286        let item = cards[0].as_object().unwrap();
1287        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1288        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1289
1290        // Second card is "sections" type
1291        let section = cards[1].as_object().unwrap();
1292        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1293        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1294    }
1295
1296    #[test]
1297    fn test_order_preservation() {
1298        let markdown = r#"---
1299CARD: items
1300id: 1
1301---
1302
1303First
1304
1305---
1306CARD: items
1307id: 2
1308---
1309
1310Second
1311
1312---
1313CARD: items
1314id: 3
1315---
1316
1317Third"#;
1318
1319        let doc = decompose(markdown).unwrap();
1320
1321        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1322        assert_eq!(cards.len(), 3);
1323
1324        for (i, card) in cards.iter().enumerate() {
1325            let mapping = card.as_object().unwrap();
1326            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1327            let id = mapping.get("id").unwrap().as_i64().unwrap();
1328            assert_eq!(id, (i + 1) as i64);
1329        }
1330    }
1331
1332    #[test]
1333    fn test_product_catalog_integration() {
1334        let markdown = r#"---
1335title: Product Catalog
1336author: John Doe
1337date: 2024-01-01
1338---
1339
1340This is the main catalog description.
1341
1342---
1343CARD: products
1344name: Widget A
1345price: 19.99
1346sku: WID-001
1347---
1348
1349The **Widget A** is our most popular product.
1350
1351---
1352CARD: products
1353name: Gadget B
1354price: 29.99
1355sku: GAD-002
1356---
1357
1358The **Gadget B** is perfect for professionals.
1359
1360---
1361CARD: reviews
1362product: Widget A
1363rating: 5
1364---
1365
1366"Excellent product! Highly recommended."
1367
1368---
1369CARD: reviews
1370product: Gadget B
1371rating: 4
1372---
1373
1374"Very good, but a bit pricey.""#;
1375
1376        let doc = decompose(markdown).unwrap();
1377
1378        // Verify global fields
1379        assert_eq!(
1380            doc.get_field("title").unwrap().as_str().unwrap(),
1381            "Product Catalog"
1382        );
1383        assert_eq!(
1384            doc.get_field("author").unwrap().as_str().unwrap(),
1385            "John Doe"
1386        );
1387        assert_eq!(
1388            doc.get_field("date").unwrap().as_str().unwrap(),
1389            "2024-01-01"
1390        );
1391
1392        // Verify global body
1393        assert!(doc.body().unwrap().contains("main catalog description"));
1394
1395        // All cards in unified CARDS array
1396        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1397        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1398
1399        // First 2 are products
1400        let product1 = cards[0].as_object().unwrap();
1401        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1402        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1403        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1404
1405        let product2 = cards[1].as_object().unwrap();
1406        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1407        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1408
1409        // Last 2 are reviews
1410        let review1 = cards[2].as_object().unwrap();
1411        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1412        assert_eq!(
1413            review1.get("product").unwrap().as_str().unwrap(),
1414            "Widget A"
1415        );
1416        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1417
1418        // Total fields: title, author, date, body, CARDS = 5
1419        assert_eq!(doc.fields().len(), 5);
1420    }
1421
1422    #[test]
1423    fn taro_quill_directive() {
1424        let markdown = r#"---
1425QUILL: usaf_memo
1426memo_for: [ORG/SYMBOL]
1427memo_from: [ORG/SYMBOL]
1428---
1429
1430This is the memo body."#;
1431
1432        let doc = decompose(markdown).unwrap();
1433
1434        // Verify quill tag is set
1435        assert_eq!(doc.quill_tag(), "usaf_memo");
1436
1437        // Verify fields from quill block become frontmatter
1438        assert_eq!(
1439            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1440                .as_str()
1441                .unwrap(),
1442            "ORG/SYMBOL"
1443        );
1444
1445        // Verify body
1446        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1447    }
1448
1449    #[test]
1450    fn test_quill_with_card_blocks() {
1451        let markdown = r#"---
1452QUILL: document
1453title: Test Document
1454---
1455
1456Main body.
1457
1458---
1459CARD: sections
1460name: Section 1
1461---
1462
1463Section 1 body."#;
1464
1465        let doc = decompose(markdown).unwrap();
1466
1467        // Verify quill tag
1468        assert_eq!(doc.quill_tag(), "document");
1469
1470        // Verify global field from quill block
1471        assert_eq!(
1472            doc.get_field("title").unwrap().as_str().unwrap(),
1473            "Test Document"
1474        );
1475
1476        // Verify card blocks work via CARDS array
1477        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1478        assert_eq!(cards.len(), 1);
1479        assert_eq!(
1480            cards[0]
1481                .as_object()
1482                .unwrap()
1483                .get("CARD")
1484                .unwrap()
1485                .as_str()
1486                .unwrap(),
1487            "sections"
1488        );
1489
1490        // Verify body
1491        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1492    }
1493
1494    #[test]
1495    fn test_multiple_quill_directives_error() {
1496        let markdown = r#"---
1497QUILL: first
1498---
1499
1500---
1501QUILL: second
1502---"#;
1503
1504        let result = decompose(markdown);
1505        assert!(result.is_err());
1506        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1507        assert!(result
1508            .unwrap_err()
1509            .to_string()
1510            .contains("top-level frontmatter"));
1511    }
1512
1513    #[test]
1514    fn test_invalid_quill_name() {
1515        let markdown = r#"---
1516QUILL: Invalid-Name
1517---"#;
1518
1519        let result = decompose(markdown);
1520        assert!(result.is_err());
1521        assert!(result
1522            .unwrap_err()
1523            .to_string()
1524            .contains("Invalid quill name"));
1525    }
1526
1527    #[test]
1528    fn test_quill_wrong_value_type() {
1529        let markdown = r#"---
1530QUILL: 123
1531---"#;
1532
1533        let result = decompose(markdown);
1534        assert!(result.is_err());
1535        assert!(result
1536            .unwrap_err()
1537            .to_string()
1538            .contains("QUILL value must be a string"));
1539    }
1540
1541    #[test]
1542    fn test_card_wrong_value_type() {
1543        let markdown = r#"---
1544CARD: 123
1545---"#;
1546
1547        let result = decompose(markdown);
1548        assert!(result.is_err());
1549        assert!(result
1550            .unwrap_err()
1551            .to_string()
1552            .contains("CARD/SCOPE value must be a string"));
1553    }
1554
1555    #[test]
1556    fn test_both_quill_and_card_error() {
1557        let markdown = r#"---
1558QUILL: test
1559CARD: items
1560---"#;
1561
1562        let result = decompose(markdown);
1563        assert!(result.is_err());
1564        assert!(result
1565            .unwrap_err()
1566            .to_string()
1567            .contains("Cannot specify both QUILL and CARD"));
1568    }
1569
1570    #[test]
1571    fn test_blank_lines_in_frontmatter() {
1572        // New parsing standard: blank lines are allowed within YAML blocks
1573        let markdown = r#"---
1574title: Test Document
1575author: Test Author
1576
1577description: This has a blank line above it
1578tags:
1579  - one
1580  - two
1581---
1582
1583# Hello World
1584
1585This is the body."#;
1586
1587        let doc = decompose(markdown).unwrap();
1588
1589        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1590        assert_eq!(
1591            doc.get_field("title").unwrap().as_str().unwrap(),
1592            "Test Document"
1593        );
1594        assert_eq!(
1595            doc.get_field("author").unwrap().as_str().unwrap(),
1596            "Test Author"
1597        );
1598        assert_eq!(
1599            doc.get_field("description").unwrap().as_str().unwrap(),
1600            "This has a blank line above it"
1601        );
1602
1603        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1604        assert_eq!(tags.len(), 2);
1605    }
1606
1607    #[test]
1608    fn test_blank_lines_in_scope_blocks() {
1609        // Blank lines should be allowed in CARD blocks too
1610        let markdown = r#"---
1611CARD: items
1612name: Item 1
1613
1614price: 19.99
1615
1616tags:
1617  - electronics
1618  - gadgets
1619---
1620
1621Body of item 1."#;
1622
1623        let doc = decompose(markdown).unwrap();
1624
1625        // Cards are in CARDS array
1626        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1627        assert_eq!(cards.len(), 1);
1628
1629        let item = cards[0].as_object().unwrap();
1630        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1631        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1632        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1633
1634        let tags = item.get("tags").unwrap().as_array().unwrap();
1635        assert_eq!(tags.len(), 2);
1636    }
1637
1638    #[test]
1639    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1640        // Horizontal rule: blank lines both above AND below the ---
1641        let markdown = r#"---
1642title: Test
1643---
1644
1645First paragraph.
1646
1647---
1648
1649Second paragraph."#;
1650
1651        let doc = decompose(markdown).unwrap();
1652
1653        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1654
1655        // The body should contain the horizontal rule (---) as part of the content
1656        let body = doc.body().unwrap();
1657        assert!(body.contains("First paragraph."));
1658        assert!(body.contains("---"));
1659        assert!(body.contains("Second paragraph."));
1660    }
1661
1662    #[test]
1663    fn test_horizontal_rule_not_preceded_by_blank() {
1664        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1665        // It's also NOT a valid metadata block opening (since it's followed by blank)
1666        let markdown = r#"---
1667title: Test
1668---
1669
1670First paragraph.
1671---
1672
1673Second paragraph."#;
1674
1675        let doc = decompose(markdown).unwrap();
1676
1677        let body = doc.body().unwrap();
1678        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1679        assert!(body.contains("---"));
1680    }
1681
1682    #[test]
1683    fn test_multiple_blank_lines_in_yaml() {
1684        // Multiple blank lines should also be allowed
1685        let markdown = r#"---
1686title: Test
1687
1688
1689author: John Doe
1690
1691
1692version: 1.0
1693---
1694
1695Body content."#;
1696
1697        let doc = decompose(markdown).unwrap();
1698
1699        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1700        assert_eq!(
1701            doc.get_field("author").unwrap().as_str().unwrap(),
1702            "John Doe"
1703        );
1704        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1705    }
1706
1707    #[test]
1708    fn test_html_comment_interaction() {
1709        let markdown = r#"<!---
1710---> the rest of the page content
1711
1712---
1713key: value
1714---
1715"#;
1716        let doc = decompose(markdown).unwrap();
1717
1718        // The comment should be ignored (or at least not cause a parse error)
1719        // The frontmatter should be parsed
1720        let key = doc.get_field("key").and_then(|v| v.as_str());
1721        assert_eq!(key, Some("value"));
1722    }
1723}
1724#[cfg(test)]
1725mod demo_file_test {
1726    use super::*;
1727
1728    #[test]
1729    fn test_extended_metadata_demo_file() {
1730        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1731        let doc = decompose(markdown).unwrap();
1732
1733        // Verify global fields
1734        assert_eq!(
1735            doc.get_field("title").unwrap().as_str().unwrap(),
1736            "Extended Metadata Demo"
1737        );
1738        assert_eq!(
1739            doc.get_field("author").unwrap().as_str().unwrap(),
1740            "Quillmark Team"
1741        );
1742        // version is parsed as a number by YAML
1743        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1744
1745        // Verify body
1746        assert!(doc
1747            .body()
1748            .unwrap()
1749            .contains("extended YAML metadata standard"));
1750
1751        // All cards are now in unified CARDS array
1752        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1753        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1754
1755        // Count features and use_cases cards
1756        let features_count = cards
1757            .iter()
1758            .filter(|c| {
1759                c.as_object()
1760                    .unwrap()
1761                    .get("CARD")
1762                    .unwrap()
1763                    .as_str()
1764                    .unwrap()
1765                    == "features"
1766            })
1767            .count();
1768        let use_cases_count = cards
1769            .iter()
1770            .filter(|c| {
1771                c.as_object()
1772                    .unwrap()
1773                    .get("CARD")
1774                    .unwrap()
1775                    .as_str()
1776                    .unwrap()
1777                    == "use_cases"
1778            })
1779            .count();
1780        assert_eq!(features_count, 3);
1781        assert_eq!(use_cases_count, 2);
1782
1783        // Check first card is a feature
1784        let feature1 = cards[0].as_object().unwrap();
1785        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1786        assert_eq!(
1787            feature1.get("name").unwrap().as_str().unwrap(),
1788            "Tag Directives"
1789        );
1790    }
1791
1792    #[test]
1793    fn test_input_size_limit() {
1794        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1795        let size = crate::error::MAX_INPUT_SIZE + 1;
1796        let large_markdown = "a".repeat(size);
1797
1798        let result = decompose(&large_markdown);
1799        assert!(result.is_err());
1800
1801        let err_msg = result.unwrap_err().to_string();
1802        assert!(err_msg.contains("Input too large"));
1803    }
1804
1805    #[test]
1806    fn test_yaml_size_limit() {
1807        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1808        let mut markdown = String::from("---\n");
1809
1810        // Create a very large YAML field
1811        let size = crate::error::MAX_YAML_SIZE + 1;
1812        markdown.push_str("data: \"");
1813        markdown.push_str(&"x".repeat(size));
1814        markdown.push_str("\"\n---\n\nBody");
1815
1816        let result = decompose(&markdown);
1817        assert!(result.is_err());
1818
1819        let err_msg = result.unwrap_err().to_string();
1820        assert!(err_msg.contains("Input too large"));
1821    }
1822
1823    #[test]
1824    fn test_input_within_size_limit() {
1825        // Create markdown just under the limit
1826        let size = 1000; // Much smaller than limit
1827        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1828
1829        let result = decompose(&markdown);
1830        assert!(result.is_ok());
1831    }
1832
1833    #[test]
1834    fn test_yaml_within_size_limit() {
1835        // Create YAML block well within the limit
1836        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1837
1838        let result = decompose(markdown);
1839        assert!(result.is_ok());
1840    }
1841
1842    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1843    // Guillemet conversion now happens in process_plate, not during parsing
1844    #[test]
1845    fn test_chevrons_preserved_in_body_no_frontmatter() {
1846        let markdown = "Use <<raw content>> here.";
1847        let doc = decompose(markdown).unwrap();
1848
1849        // Body should preserve chevrons (conversion happens later in process_plate)
1850        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1851    }
1852
1853    #[test]
1854    fn test_chevrons_preserved_in_body_with_frontmatter() {
1855        let markdown = r#"---
1856title: Test
1857---
1858
1859Use <<raw content>> here."#;
1860        let doc = decompose(markdown).unwrap();
1861
1862        // Body should preserve chevrons
1863        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1864    }
1865
1866    #[test]
1867    fn test_chevrons_preserved_in_yaml_string() {
1868        let markdown = r#"---
1869title: Test <<with chevrons>>
1870---
1871
1872Body content."#;
1873        let doc = decompose(markdown).unwrap();
1874
1875        // YAML string values should preserve chevrons
1876        assert_eq!(
1877            doc.get_field("title").unwrap().as_str().unwrap(),
1878            "Test <<with chevrons>>"
1879        );
1880    }
1881
1882    #[test]
1883    fn test_chevrons_preserved_in_yaml_array() {
1884        let markdown = r#"---
1885items:
1886  - "<<first>>"
1887  - "<<second>>"
1888---
1889
1890Body."#;
1891        let doc = decompose(markdown).unwrap();
1892
1893        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1894        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1895        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1896    }
1897
1898    #[test]
1899    fn test_chevrons_preserved_in_yaml_nested() {
1900        let markdown = r#"---
1901metadata:
1902  description: "<<nested value>>"
1903---
1904
1905Body."#;
1906        let doc = decompose(markdown).unwrap();
1907
1908        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1909        assert_eq!(
1910            metadata.get("description").unwrap().as_str().unwrap(),
1911            "<<nested value>>"
1912        );
1913    }
1914
1915    #[test]
1916    fn test_chevrons_preserved_in_code_blocks() {
1917        let markdown = r#"```
1918<<in code block>>
1919```
1920
1921<<outside code block>>"#;
1922        let doc = decompose(markdown).unwrap();
1923
1924        let body = doc.body().unwrap();
1925        // All chevrons should be preserved (no conversion during parsing)
1926        assert!(body.contains("<<in code block>>"));
1927        assert!(body.contains("<<outside code block>>"));
1928    }
1929
1930    #[test]
1931    fn test_chevrons_preserved_in_inline_code() {
1932        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1933        let doc = decompose(markdown).unwrap();
1934
1935        let body = doc.body().unwrap();
1936        // All chevrons should be preserved
1937        assert!(body.contains("`<<in inline code>>`"));
1938        assert!(body.contains("<<outside inline code>>"));
1939    }
1940
1941    #[test]
1942    fn test_chevrons_preserved_in_tagged_block_body() {
1943        let markdown = r#"---
1944title: Main
1945---
1946
1947Main body.
1948
1949---
1950CARD: items
1951name: Item 1
1952---
1953
1954Use <<raw>> here."#;
1955        let doc = decompose(markdown).unwrap();
1956
1957        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1958        let item = cards[0].as_object().unwrap();
1959        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1960        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
1961        // Tagged block body should preserve chevrons
1962        assert!(item_body.contains("<<raw>>"));
1963    }
1964
1965    #[test]
1966    fn test_chevrons_preserved_in_tagged_block_yaml() {
1967        let markdown = r#"---
1968title: Main
1969---
1970
1971Main body.
1972
1973---
1974CARD: items
1975description: "<<tagged yaml>>"
1976---
1977
1978Item body."#;
1979        let doc = decompose(markdown).unwrap();
1980
1981        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1982        let item = cards[0].as_object().unwrap();
1983        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1984        // Tagged block YAML should preserve chevrons
1985        assert_eq!(
1986            item.get("description").unwrap().as_str().unwrap(),
1987            "<<tagged yaml>>"
1988        );
1989    }
1990
1991    #[test]
1992    fn test_yaml_numbers_not_affected() {
1993        // Numbers should not be affected
1994        let markdown = r#"---
1995count: 42
1996---
1997
1998Body."#;
1999        let doc = decompose(markdown).unwrap();
2000        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2001    }
2002
2003    #[test]
2004    fn test_yaml_booleans_not_affected() {
2005        // Booleans should not be affected
2006        let markdown = r#"---
2007active: true
2008---
2009
2010Body."#;
2011        let doc = decompose(markdown).unwrap();
2012        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2013    }
2014
2015    #[test]
2016    fn test_multiline_chevrons_preserved() {
2017        // Multiline chevrons should be preserved as-is
2018        let markdown = "<<text\nacross lines>>";
2019        let doc = decompose(markdown).unwrap();
2020
2021        let body = doc.body().unwrap();
2022        // Should contain the original chevrons
2023        assert!(body.contains("<<text"));
2024        assert!(body.contains("across lines>>"));
2025    }
2026
2027    #[test]
2028    fn test_unmatched_chevrons_preserved() {
2029        let markdown = "<<unmatched";
2030        let doc = decompose(markdown).unwrap();
2031
2032        let body = doc.body().unwrap();
2033        // Unmatched should remain as-is
2034        assert_eq!(body, "<<unmatched");
2035    }
2036}
2037
2038// Additional robustness tests
2039#[cfg(test)]
2040mod robustness_tests {
2041    use super::*;
2042
2043    // Edge cases for delimiter handling
2044
2045    #[test]
2046    fn test_empty_document() {
2047        let doc = decompose("").unwrap();
2048        assert_eq!(doc.body(), Some(""));
2049        assert_eq!(doc.quill_tag(), "__default__");
2050    }
2051
2052    #[test]
2053    fn test_only_whitespace() {
2054        let doc = decompose("   \n\n   \t").unwrap();
2055        assert_eq!(doc.body(), Some("   \n\n   \t"));
2056    }
2057
2058    #[test]
2059    fn test_only_dashes() {
2060        // Just "---" at document start without newline is not treated as frontmatter opener
2061        // (requires "---\n" to start a frontmatter block)
2062        let result = decompose("---");
2063        // This is NOT an error - "---" alone without newline is just body content
2064        assert!(result.is_ok());
2065        assert_eq!(result.unwrap().body(), Some("---"));
2066    }
2067
2068    #[test]
2069    fn test_dashes_in_middle_of_line() {
2070        // --- not at start of line should not be treated as delimiter
2071        let markdown = "some text --- more text";
2072        let doc = decompose(markdown).unwrap();
2073        assert_eq!(doc.body(), Some("some text --- more text"));
2074    }
2075
2076    #[test]
2077    fn test_four_dashes() {
2078        // ---- is not a valid delimiter
2079        let markdown = "----\ntitle: Test\n----\n\nBody";
2080        let doc = decompose(markdown).unwrap();
2081        // Should treat entire content as body
2082        assert!(doc.body().unwrap().contains("----"));
2083    }
2084
2085    #[test]
2086    fn test_crlf_line_endings() {
2087        // Windows-style line endings
2088        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2089        let doc = decompose(markdown).unwrap();
2090        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2091        assert!(doc.body().unwrap().contains("Body content."));
2092    }
2093
2094    #[test]
2095    fn test_mixed_line_endings() {
2096        // Mix of \n and \r\n
2097        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2098        let doc = decompose(markdown).unwrap();
2099        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2100    }
2101
2102    #[test]
2103    fn test_frontmatter_at_eof_no_trailing_newline() {
2104        // Frontmatter closed at EOF without trailing newline
2105        let markdown = "---\ntitle: Test\n---";
2106        let doc = decompose(markdown).unwrap();
2107        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2108        assert_eq!(doc.body(), Some(""));
2109    }
2110
2111    #[test]
2112    fn test_empty_frontmatter() {
2113        // Empty frontmatter block - requires content between delimiters
2114        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2115        // is treated as horizontal rule logic, not empty frontmatter
2116        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2117        let markdown = "---\n \n---\n\nBody content.";
2118        let doc = decompose(markdown).unwrap();
2119        assert!(doc.body().unwrap().contains("Body content."));
2120        // Should have body and CARDS fields
2121        assert_eq!(doc.fields().len(), 2);
2122    }
2123
2124    #[test]
2125    fn test_whitespace_only_frontmatter() {
2126        // Frontmatter with only whitespace
2127        let markdown = "---\n   \n\n   \n---\n\nBody.";
2128        let doc = decompose(markdown).unwrap();
2129        assert!(doc.body().unwrap().contains("Body."));
2130    }
2131
2132    // Unicode handling
2133
2134    #[test]
2135    fn test_unicode_in_yaml_keys() {
2136        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2137        let doc = decompose(markdown).unwrap();
2138        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2139        assert_eq!(
2140            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2141            "こんにちは"
2142        );
2143    }
2144
2145    #[test]
2146    fn test_unicode_in_yaml_values() {
2147        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2148        let doc = decompose(markdown).unwrap();
2149        assert_eq!(
2150            doc.get_field("title").unwrap().as_str().unwrap(),
2151            "你好世界 🎉"
2152        );
2153    }
2154
2155    #[test]
2156    fn test_unicode_in_body() {
2157        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2158        let doc = decompose(markdown).unwrap();
2159        assert!(doc.body().unwrap().contains("日本語テキスト"));
2160        assert!(doc.body().unwrap().contains("🚀"));
2161    }
2162
2163    // YAML edge cases
2164
2165    #[test]
2166    fn test_yaml_multiline_string() {
2167        let markdown = r#"---
2168description: |
2169  This is a
2170  multiline string
2171  with preserved newlines.
2172---
2173
2174Body."#;
2175        let doc = decompose(markdown).unwrap();
2176        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2177        assert!(desc.contains("multiline string"));
2178        assert!(desc.contains('\n'));
2179    }
2180
2181    #[test]
2182    fn test_yaml_folded_string() {
2183        let markdown = r#"---
2184description: >
2185  This is a folded
2186  string that becomes
2187  a single line.
2188---
2189
2190Body."#;
2191        let doc = decompose(markdown).unwrap();
2192        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2193        // Folded strings join lines with spaces
2194        assert!(desc.contains("folded"));
2195    }
2196
2197    #[test]
2198    fn test_yaml_null_value() {
2199        let markdown = "---\noptional: null\n---\n\nBody.";
2200        let doc = decompose(markdown).unwrap();
2201        assert!(doc.get_field("optional").unwrap().is_null());
2202    }
2203
2204    #[test]
2205    fn test_yaml_empty_string_value() {
2206        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2207        let doc = decompose(markdown).unwrap();
2208        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2209    }
2210
2211    #[test]
2212    fn test_yaml_special_characters_in_string() {
2213        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2214        let doc = decompose(markdown).unwrap();
2215        assert_eq!(
2216            doc.get_field("special").unwrap().as_str().unwrap(),
2217            "colon: here, and [brackets]"
2218        );
2219    }
2220
2221    #[test]
2222    fn test_yaml_nested_objects() {
2223        let markdown = r#"---
2224config:
2225  database:
2226    host: localhost
2227    port: 5432
2228  cache:
2229    enabled: true
2230---
2231
2232Body."#;
2233        let doc = decompose(markdown).unwrap();
2234        let config = doc.get_field("config").unwrap().as_object().unwrap();
2235        let db = config.get("database").unwrap().as_object().unwrap();
2236        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2237        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2238    }
2239
2240    // CARD block edge cases
2241
2242    #[test]
2243    fn test_card_with_empty_body() {
2244        let markdown = r#"---
2245CARD: items
2246name: Item
2247---"#;
2248        let doc = decompose(markdown).unwrap();
2249        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2250        assert_eq!(cards.len(), 1);
2251        let item = cards[0].as_object().unwrap();
2252        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2253        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2254    }
2255
2256    #[test]
2257    fn test_card_consecutive_blocks() {
2258        let markdown = r#"---
2259CARD: a
2260id: 1
2261---
2262---
2263CARD: a
2264id: 2
2265---"#;
2266        let doc = decompose(markdown).unwrap();
2267        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2268        assert_eq!(cards.len(), 2);
2269        assert_eq!(
2270            cards[0]
2271                .as_object()
2272                .unwrap()
2273                .get("CARD")
2274                .unwrap()
2275                .as_str()
2276                .unwrap(),
2277            "a"
2278        );
2279        assert_eq!(
2280            cards[1]
2281                .as_object()
2282                .unwrap()
2283                .get("CARD")
2284                .unwrap()
2285                .as_str()
2286                .unwrap(),
2287            "a"
2288        );
2289    }
2290
2291    #[test]
2292    fn test_card_with_body_containing_dashes() {
2293        let markdown = r#"---
2294CARD: items
2295name: Item
2296---
2297
2298Some text with --- dashes in it."#;
2299        let doc = decompose(markdown).unwrap();
2300        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2301        let item = cards[0].as_object().unwrap();
2302        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2303        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2304        assert!(body.contains("--- dashes"));
2305    }
2306
2307    // QUILL directive edge cases
2308
2309    #[test]
2310    fn test_quill_with_underscore_prefix() {
2311        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2312        let doc = decompose(markdown).unwrap();
2313        assert_eq!(doc.quill_tag(), "_internal");
2314    }
2315
2316    #[test]
2317    fn test_quill_with_numbers() {
2318        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2319        let doc = decompose(markdown).unwrap();
2320        assert_eq!(doc.quill_tag(), "form_8_v2");
2321    }
2322
2323    #[test]
2324    fn test_quill_with_additional_fields() {
2325        let markdown = r#"---
2326QUILL: my_quill
2327title: Document Title
2328author: John Doe
2329---
2330
2331Body content."#;
2332        let doc = decompose(markdown).unwrap();
2333        assert_eq!(doc.quill_tag(), "my_quill");
2334        assert_eq!(
2335            doc.get_field("title").unwrap().as_str().unwrap(),
2336            "Document Title"
2337        );
2338        assert_eq!(
2339            doc.get_field("author").unwrap().as_str().unwrap(),
2340            "John Doe"
2341        );
2342    }
2343
2344    // Error handling
2345
2346    #[test]
2347    fn test_invalid_scope_name_uppercase() {
2348        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2349        let result = decompose(markdown);
2350        assert!(result.is_err());
2351        assert!(result
2352            .unwrap_err()
2353            .to_string()
2354            .contains("Invalid card field name"));
2355    }
2356
2357    #[test]
2358    fn test_invalid_scope_name_starts_with_number() {
2359        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2360        let result = decompose(markdown);
2361        assert!(result.is_err());
2362    }
2363
2364    #[test]
2365    fn test_invalid_scope_name_with_hyphen() {
2366        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2367        let result = decompose(markdown);
2368        assert!(result.is_err());
2369    }
2370
2371    #[test]
2372    fn test_invalid_quill_name_uppercase() {
2373        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2374        let result = decompose(markdown);
2375        assert!(result.is_err());
2376    }
2377
2378    #[test]
2379    fn test_yaml_syntax_error_missing_colon() {
2380        let markdown = "---\ntitle Test\n---\n\nBody.";
2381        let result = decompose(markdown);
2382        assert!(result.is_err());
2383    }
2384
2385    #[test]
2386    fn test_yaml_syntax_error_bad_indentation() {
2387        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2388        let result = decompose(markdown);
2389        // Bad indentation may or may not be an error depending on YAML parser
2390        // Just ensure it doesn't panic
2391        let _ = result;
2392    }
2393
2394    // Body extraction edge cases
2395
2396    #[test]
2397    fn test_body_with_leading_newlines() {
2398        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2399        let doc = decompose(markdown).unwrap();
2400        // Body should preserve leading newlines after frontmatter
2401        assert!(doc.body().unwrap().starts_with('\n'));
2402    }
2403
2404    #[test]
2405    fn test_body_with_trailing_newlines() {
2406        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2407        let doc = decompose(markdown).unwrap();
2408        // Body should preserve trailing newlines
2409        assert!(doc.body().unwrap().ends_with('\n'));
2410    }
2411
2412    #[test]
2413    fn test_no_body_after_frontmatter() {
2414        let markdown = "---\ntitle: Test\n---";
2415        let doc = decompose(markdown).unwrap();
2416        assert_eq!(doc.body(), Some(""));
2417    }
2418
2419    // Tag name validation
2420
2421    #[test]
2422    fn test_valid_tag_name_single_underscore() {
2423        assert!(is_valid_tag_name("_"));
2424    }
2425
2426    #[test]
2427    fn test_valid_tag_name_underscore_prefix() {
2428        assert!(is_valid_tag_name("_private"));
2429    }
2430
2431    #[test]
2432    fn test_valid_tag_name_with_numbers() {
2433        assert!(is_valid_tag_name("item1"));
2434        assert!(is_valid_tag_name("item_2"));
2435    }
2436
2437    #[test]
2438    fn test_invalid_tag_name_empty() {
2439        assert!(!is_valid_tag_name(""));
2440    }
2441
2442    #[test]
2443    fn test_invalid_tag_name_starts_with_number() {
2444        assert!(!is_valid_tag_name("1item"));
2445    }
2446
2447    #[test]
2448    fn test_invalid_tag_name_uppercase() {
2449        assert!(!is_valid_tag_name("Items"));
2450        assert!(!is_valid_tag_name("ITEMS"));
2451    }
2452
2453    #[test]
2454    fn test_invalid_tag_name_special_chars() {
2455        assert!(!is_valid_tag_name("my-items"));
2456        assert!(!is_valid_tag_name("my.items"));
2457        assert!(!is_valid_tag_name("my items"));
2458    }
2459
2460    // Guillemet preprocessing in YAML
2461
2462    #[test]
2463    fn test_guillemet_in_yaml_preserves_non_strings() {
2464        let markdown = r#"---
2465count: 42
2466price: 19.99
2467active: true
2468items:
2469  - first
2470  - 100
2471  - true
2472---
2473
2474Body."#;
2475        let doc = decompose(markdown).unwrap();
2476        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2477        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2478        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2479    }
2480
2481    #[test]
2482    fn test_guillemet_double_conversion_prevention() {
2483        // Ensure «» in input doesn't get double-processed
2484        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2485        let doc = decompose(markdown).unwrap();
2486        // Should remain as-is (not double-escaped)
2487        assert_eq!(
2488            doc.get_field("title").unwrap().as_str().unwrap(),
2489            "Already «converted»"
2490        );
2491    }
2492
2493    #[test]
2494    fn test_allowed_card_field_collision() {
2495        let markdown = r#"---
2496my_card: "some global value"
2497---
2498
2499---
2500CARD: my_card
2501title: "My Card"
2502---
2503Body
2504"#;
2505        // This should SUCCEED according to new PARSE.md
2506        let doc = decompose(markdown).unwrap();
2507
2508        // Verify global field exists
2509        assert_eq!(
2510            doc.get_field("my_card").unwrap().as_str().unwrap(),
2511            "some global value"
2512        );
2513
2514        // Verify Card exists in CARDS array
2515        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2516        assert!(!cards.is_empty());
2517        let card = cards
2518            .iter()
2519            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2520            .expect("Card not found");
2521        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2522    }
2523}