quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(crate::error::ParseError::from)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = search_str
205            .find("---\n")
206            .map(|p| (p, 4, "\n"))
207            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210            let abs_pos = pos + delimiter_pos;
211
212            // Check if the delimiter is at the start of a line
213            let is_start_of_line = if abs_pos == 0 {
214                true
215            } else {
216                let char_before = markdown.as_bytes()[abs_pos - 1];
217                char_before == b'\n' || char_before == b'\r'
218            };
219
220            if !is_start_of_line {
221                pos = abs_pos + 1;
222                continue;
223            }
224
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_value) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(parsed_yaml) => {
312                            if let Some(mapping) = parsed_yaml.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let card_key = serde_yaml::Value::String("CARD".to_string());
315
316                                let has_quill = mapping.contains_key(&quill_key);
317                                let has_card = mapping.contains_key(&card_key);
318
319                                if has_quill && has_card {
320                                    return Err(
321                                        "Cannot specify both QUILL and CARD in the same block"
322                                            .into(),
323                                    );
324                                }
325
326                                if has_quill {
327                                    // Extract quill name
328                                    let quill_value = mapping.get(&quill_key).unwrap();
329                                    let quill_name_str = quill_value
330                                        .as_str()
331                                        .ok_or("QUILL value must be a string")?;
332
333                                    if !is_valid_tag_name(quill_name_str) {
334                                        return Err(format!(
335                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336                                            quill_name_str
337                                        )
338                                        .into());
339                                    }
340
341                                    // Remove QUILL from the YAML value for processing
342                                    let mut new_mapping = mapping.clone();
343                                    new_mapping.remove(&quill_key);
344                                    let new_value = if new_mapping.is_empty() {
345                                        None
346                                    } else {
347                                        Some(serde_yaml::Value::Mapping(new_mapping))
348                                    };
349
350                                    (None, Some(quill_name_str.to_string()), new_value)
351                                } else if has_card {
352                                    // Extract scope field name
353                                    let card_value = mapping.get(&card_key).unwrap();
354                                    let field_name =
355                                        card_value.as_str().ok_or("CARD value must be a string")?;
356
357                                    if !is_valid_tag_name(field_name) {
358                                        return Err(format!(
359                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
360                                            field_name
361                                        )
362                                        .into());
363                                    }
364
365                                    if field_name == BODY_FIELD {
366                                        return Err(format!(
367                                            "Cannot use reserved field name '{}' as CARD value",
368                                            BODY_FIELD
369                                        )
370                                        .into());
371                                    }
372
373                                    // Remove CARD from the YAML value for processing
374                                    let mut new_mapping = mapping.clone();
375                                    new_mapping.remove(&card_key);
376                                    let new_value = if new_mapping.is_empty() {
377                                        None
378                                    } else {
379                                        Some(serde_yaml::Value::Mapping(new_mapping))
380                                    };
381
382                                    (Some(field_name.to_string()), None, new_value)
383                                } else {
384                                    // No reserved keys, keep the parsed YAML
385                                    (None, None, Some(parsed_yaml))
386                                }
387                            } else {
388                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
389                                (None, None, Some(parsed_yaml))
390                            }
391                        }
392                        Err(e) => {
393                            // YAML parsing failed - return error with context
394                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
395                        }
396                    }
397                } else {
398                    // Empty content
399                    (None, None, None)
400                };
401
402                blocks.push(MetadataBlock {
403                    start: abs_pos,
404                    end: abs_closing_pos + closing_len, // After closing delimiter
405                    yaml_value,
406                    tag,
407                    quill_name,
408                });
409
410                pos = abs_closing_pos + closing_len;
411            } else if abs_pos == 0 {
412                // Frontmatter started but not closed
413                return Err("Frontmatter started but not closed with ---".into());
414            } else {
415                // Not a valid metadata block, skip this position
416                pos = abs_pos + 3;
417            }
418        } else {
419            break;
420        }
421    }
422
423    Ok(blocks)
424}
425
426/// Decompose markdown into frontmatter fields and body
427fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
428    // Check input size limit
429    if markdown.len() > crate::error::MAX_INPUT_SIZE {
430        return Err(format!(
431            "Input too large: {} bytes (max: {} bytes)",
432            markdown.len(),
433            crate::error::MAX_INPUT_SIZE
434        )
435        .into());
436    }
437
438    let mut fields = HashMap::new();
439
440    // Find all metadata blocks
441    let blocks = find_metadata_blocks(markdown)?;
442
443    if blocks.is_empty() {
444        // No metadata blocks, entire content is body
445        fields.insert(
446            BODY_FIELD.to_string(),
447            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
448        );
449        return Ok(ParsedDocument::new(fields));
450    }
451
452    // Collect all card items into unified CARDS array
453    let mut cards_array: Vec<serde_json::Value> = Vec::new();
454    let mut global_frontmatter_index: Option<usize> = None;
455    let mut quill_name: Option<String> = None;
456
457    // First pass: identify global frontmatter, quill directive, and validate
458    for (idx, block) in blocks.iter().enumerate() {
459        if idx == 0 {
460            // Top-level frontmatter: can have QUILL or neither (not considered a card)
461            if let Some(ref name) = block.quill_name {
462                quill_name = Some(name.clone());
463            }
464            // If it has neither QUILL nor CARD, it's global frontmatter
465            if block.tag.is_none() && block.quill_name.is_none() {
466                global_frontmatter_index = Some(idx);
467            }
468        } else {
469            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
470            if block.quill_name.is_some() {
471                return Err("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".into());
472            }
473            if block.tag.is_none() {
474                // Inline block without CARD
475                return Err(Box::new(crate::error::ParseError::missing_card_directive()));
476            }
477        }
478    }
479
480    // Parse global frontmatter if present
481    if let Some(idx) = global_frontmatter_index {
482        let block = &blocks[idx];
483
484        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
485        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
486            Some(serde_yaml::Value::Mapping(mapping)) => mapping
487                .iter()
488                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
489                .collect(),
490            Some(serde_yaml::Value::Null) => {
491                // Null value (from whitespace-only YAML) - treat as empty mapping
492                HashMap::new()
493            }
494            Some(_) => {
495                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
496                return Err("Invalid YAML frontmatter: expected a mapping".into());
497            }
498            None => HashMap::new(),
499        };
500
501        // Check that all tagged blocks don't conflict with global fields
502        // Exception: if the global field is an array, allow it (we'll merge later)
503        for other_block in &blocks {
504            if let Some(ref tag) = other_block.tag {
505                if let Some(global_value) = yaml_fields.get(tag) {
506                    // Check if the global value is an array
507                    if global_value.as_sequence().is_none() {
508                        return Err(format!(
509                            "Name collision: global field '{}' conflicts with tagged attribute",
510                            tag
511                        )
512                        .into());
513                    }
514                }
515            }
516        }
517
518        // Convert YAML values to QuillValue at boundary
519        for (key, value) in yaml_fields {
520            fields.insert(key, QuillValue::from_yaml(value)?);
521        }
522    }
523
524    // Process blocks with quill directives
525    for block in &blocks {
526        if block.quill_name.is_some() {
527            // Quill directive blocks can have YAML content (becomes part of frontmatter)
528            if let Some(ref yaml_val) = block.yaml_value {
529                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
530                    serde_yaml::Value::Mapping(mapping) => mapping
531                        .iter()
532                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
533                        .collect(),
534                    serde_yaml::Value::Null => {
535                        // Null value (from whitespace-only YAML) - treat as empty mapping
536                        HashMap::new()
537                    }
538                    _ => {
539                        return Err("Invalid YAML in quill block: expected a mapping".into());
540                    }
541                };
542
543                // Check for conflicts with existing fields
544                for key in yaml_fields.keys() {
545                    if fields.contains_key(key) {
546                        return Err(format!(
547                            "Name collision: quill block field '{}' conflicts with existing field",
548                            key
549                        )
550                        .into());
551                    }
552                }
553
554                // Convert YAML values to QuillValue at boundary
555                for (key, value) in yaml_fields {
556                    fields.insert(key, QuillValue::from_yaml(value)?);
557                }
558            }
559        }
560    }
561
562    // Parse tagged blocks (CARD blocks)
563    for (idx, block) in blocks.iter().enumerate() {
564        if let Some(ref tag_name) = block.tag {
565            // Card names cannot conflict with frontmatter field names
566            if fields.contains_key(tag_name) {
567                return Err(format!(
568                    "Name collision: CARD type '{}' conflicts with frontmatter field name",
569                    tag_name
570                )
571                .into());
572            }
573
574            // Get YAML metadata directly (already parsed in find_metadata_blocks)
575            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
576                Some(serde_yaml::Value::Mapping(mapping)) => mapping
577                    .iter()
578                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
579                    .collect(),
580                Some(serde_yaml::Value::Null) => {
581                    // Null value (from whitespace-only YAML) - treat as empty mapping
582                    HashMap::new()
583                }
584                Some(_) => {
585                    return Err(format!(
586                        "Invalid YAML in card block '{}': expected a mapping",
587                        tag_name
588                    )
589                    .into());
590                }
591                None => HashMap::new(),
592            };
593
594            // Extract body for this card block
595            let body_start = block.end;
596            let body_end = if idx + 1 < blocks.len() {
597                blocks[idx + 1].start
598            } else {
599                markdown.len()
600            };
601            let body = &markdown[body_start..body_end];
602
603            // Add body to item fields
604            item_fields.insert(
605                BODY_FIELD.to_string(),
606                serde_yaml::Value::String(body.to_string()),
607            );
608
609            // Add CARD discriminator field
610            item_fields.insert(
611                "CARD".to_string(),
612                serde_yaml::Value::String(tag_name.clone()),
613            );
614
615            // Convert to JSON and add to CARDS array
616            let item_json = serde_json::to_value(&item_fields)
617                .map_err(|e| format!("Failed to convert card to JSON: {}", e))?;
618            cards_array.push(item_json);
619        }
620    }
621
622    // Extract global body
623    // Body starts after global frontmatter or quill block (whichever comes first)
624    // Body ends at the first card block or EOF
625    let first_non_card_block_idx = blocks
626        .iter()
627        .position(|b| b.tag.is_none() && b.quill_name.is_none())
628        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
629
630    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
631        // Body starts after the first non-card block (global frontmatter or quill)
632        let start = blocks[idx].end;
633
634        // Body ends at the first card block after this, or EOF
635        let end = blocks
636            .iter()
637            .skip(idx + 1)
638            .find(|b| b.tag.is_some())
639            .map(|b| b.start)
640            .unwrap_or(markdown.len());
641
642        (start, end)
643    } else {
644        // No global frontmatter or quill block - body is everything before the first card block
645        let end = blocks
646            .iter()
647            .find(|b| b.tag.is_some())
648            .map(|b| b.start)
649            .unwrap_or(0);
650
651        (0, end)
652    };
653
654    let global_body = &markdown[body_start..body_end];
655
656    fields.insert(
657        BODY_FIELD.to_string(),
658        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
659    );
660
661    // Always add CARDS array to fields (may be empty)
662    fields.insert(
663        "CARDS".to_string(),
664        QuillValue::from_json(serde_json::Value::Array(cards_array)),
665    );
666
667    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
668    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
669
670    Ok(parsed)
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676
677    #[test]
678    fn test_no_frontmatter() {
679        let markdown = "# Hello World\n\nThis is a test.";
680        let doc = decompose(markdown).unwrap();
681
682        assert_eq!(doc.body(), Some(markdown));
683        assert_eq!(doc.fields().len(), 1);
684        // Verify default quill tag is set
685        assert_eq!(doc.quill_tag(), "__default__");
686    }
687
688    #[test]
689    fn test_with_frontmatter() {
690        let markdown = r#"---
691title: Test Document
692author: Test Author
693---
694
695# Hello World
696
697This is the body."#;
698
699        let doc = decompose(markdown).unwrap();
700
701        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
702        assert_eq!(
703            doc.get_field("title").unwrap().as_str().unwrap(),
704            "Test Document"
705        );
706        assert_eq!(
707            doc.get_field("author").unwrap().as_str().unwrap(),
708            "Test Author"
709        );
710        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
711                                           // Verify default quill tag is set when no QUILL directive
712        assert_eq!(doc.quill_tag(), "__default__");
713    }
714
715    #[test]
716    fn test_complex_yaml_frontmatter() {
717        let markdown = r#"---
718title: Complex Document
719tags:
720  - test
721  - yaml
722metadata:
723  version: 1.0
724  nested:
725    field: value
726---
727
728Content here."#;
729
730        let doc = decompose(markdown).unwrap();
731
732        assert_eq!(doc.body(), Some("\nContent here."));
733        assert_eq!(
734            doc.get_field("title").unwrap().as_str().unwrap(),
735            "Complex Document"
736        );
737
738        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
739        assert_eq!(tags.len(), 2);
740        assert_eq!(tags[0].as_str().unwrap(), "test");
741        assert_eq!(tags[1].as_str().unwrap(), "yaml");
742    }
743
744    #[test]
745    fn test_with_defaults_empty_document() {
746        use std::collections::HashMap;
747
748        let mut defaults = HashMap::new();
749        defaults.insert(
750            "status".to_string(),
751            QuillValue::from_json(serde_json::json!("draft")),
752        );
753        defaults.insert(
754            "version".to_string(),
755            QuillValue::from_json(serde_json::json!(1)),
756        );
757
758        // Create an empty parsed document
759        let doc = ParsedDocument::new(HashMap::new());
760        let doc_with_defaults = doc.with_defaults(&defaults);
761
762        // Check that defaults were applied
763        assert_eq!(
764            doc_with_defaults
765                .get_field("status")
766                .unwrap()
767                .as_str()
768                .unwrap(),
769            "draft"
770        );
771        assert_eq!(
772            doc_with_defaults
773                .get_field("version")
774                .unwrap()
775                .as_number()
776                .unwrap()
777                .as_i64()
778                .unwrap(),
779            1
780        );
781    }
782
783    #[test]
784    fn test_with_defaults_preserves_existing_values() {
785        use std::collections::HashMap;
786
787        let mut defaults = HashMap::new();
788        defaults.insert(
789            "status".to_string(),
790            QuillValue::from_json(serde_json::json!("draft")),
791        );
792
793        // Create document with existing status
794        let mut fields = HashMap::new();
795        fields.insert(
796            "status".to_string(),
797            QuillValue::from_json(serde_json::json!("published")),
798        );
799        let doc = ParsedDocument::new(fields);
800
801        let doc_with_defaults = doc.with_defaults(&defaults);
802
803        // Existing value should be preserved
804        assert_eq!(
805            doc_with_defaults
806                .get_field("status")
807                .unwrap()
808                .as_str()
809                .unwrap(),
810            "published"
811        );
812    }
813
814    #[test]
815    fn test_with_defaults_partial_application() {
816        use std::collections::HashMap;
817
818        let mut defaults = HashMap::new();
819        defaults.insert(
820            "status".to_string(),
821            QuillValue::from_json(serde_json::json!("draft")),
822        );
823        defaults.insert(
824            "version".to_string(),
825            QuillValue::from_json(serde_json::json!(1)),
826        );
827
828        // Create document with only one field
829        let mut fields = HashMap::new();
830        fields.insert(
831            "status".to_string(),
832            QuillValue::from_json(serde_json::json!("published")),
833        );
834        let doc = ParsedDocument::new(fields);
835
836        let doc_with_defaults = doc.with_defaults(&defaults);
837
838        // Existing field preserved, missing field gets default
839        assert_eq!(
840            doc_with_defaults
841                .get_field("status")
842                .unwrap()
843                .as_str()
844                .unwrap(),
845            "published"
846        );
847        assert_eq!(
848            doc_with_defaults
849                .get_field("version")
850                .unwrap()
851                .as_number()
852                .unwrap()
853                .as_i64()
854                .unwrap(),
855            1
856        );
857    }
858
859    #[test]
860    fn test_with_defaults_no_defaults() {
861        use std::collections::HashMap;
862
863        let defaults = HashMap::new(); // Empty defaults map
864
865        let doc = ParsedDocument::new(HashMap::new());
866        let doc_with_defaults = doc.with_defaults(&defaults);
867
868        // No defaults should be applied
869        assert!(doc_with_defaults.fields().is_empty());
870    }
871
872    #[test]
873    fn test_with_defaults_complex_types() {
874        use std::collections::HashMap;
875
876        let mut defaults = HashMap::new();
877        defaults.insert(
878            "tags".to_string(),
879            QuillValue::from_json(serde_json::json!(["default", "tag"])),
880        );
881
882        let doc = ParsedDocument::new(HashMap::new());
883        let doc_with_defaults = doc.with_defaults(&defaults);
884
885        // Complex default value should be applied
886        let tags = doc_with_defaults
887            .get_field("tags")
888            .unwrap()
889            .as_sequence()
890            .unwrap();
891        assert_eq!(tags.len(), 2);
892        assert_eq!(tags[0].as_str().unwrap(), "default");
893        assert_eq!(tags[1].as_str().unwrap(), "tag");
894    }
895
896    #[test]
897    fn test_with_coercion_singular_to_array() {
898        use std::collections::HashMap;
899
900        let schema = QuillValue::from_json(serde_json::json!({
901            "$schema": "https://json-schema.org/draft/2019-09/schema",
902            "type": "object",
903            "properties": {
904                "tags": {"type": "array"}
905            }
906        }));
907
908        let mut fields = HashMap::new();
909        fields.insert(
910            "tags".to_string(),
911            QuillValue::from_json(serde_json::json!("single-tag")),
912        );
913        let doc = ParsedDocument::new(fields);
914
915        let coerced_doc = doc.with_coercion(&schema);
916
917        let tags = coerced_doc.get_field("tags").unwrap();
918        assert!(tags.as_array().is_some());
919        let tags_array = tags.as_array().unwrap();
920        assert_eq!(tags_array.len(), 1);
921        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
922    }
923
924    #[test]
925    fn test_with_coercion_string_to_boolean() {
926        use std::collections::HashMap;
927
928        let schema = QuillValue::from_json(serde_json::json!({
929            "$schema": "https://json-schema.org/draft/2019-09/schema",
930            "type": "object",
931            "properties": {
932                "active": {"type": "boolean"}
933            }
934        }));
935
936        let mut fields = HashMap::new();
937        fields.insert(
938            "active".to_string(),
939            QuillValue::from_json(serde_json::json!("true")),
940        );
941        let doc = ParsedDocument::new(fields);
942
943        let coerced_doc = doc.with_coercion(&schema);
944
945        assert_eq!(
946            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
947            true
948        );
949    }
950
951    #[test]
952    fn test_with_coercion_string_to_number() {
953        use std::collections::HashMap;
954
955        let schema = QuillValue::from_json(serde_json::json!({
956            "$schema": "https://json-schema.org/draft/2019-09/schema",
957            "type": "object",
958            "properties": {
959                "count": {"type": "number"}
960            }
961        }));
962
963        let mut fields = HashMap::new();
964        fields.insert(
965            "count".to_string(),
966            QuillValue::from_json(serde_json::json!("42")),
967        );
968        let doc = ParsedDocument::new(fields);
969
970        let coerced_doc = doc.with_coercion(&schema);
971
972        assert_eq!(
973            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
974            42
975        );
976    }
977
978    #[test]
979    fn test_invalid_yaml() {
980        let markdown = r#"---
981title: [invalid yaml
982author: missing close bracket
983---
984
985Content here."#;
986
987        let result = decompose(markdown);
988        assert!(result.is_err());
989        assert!(result
990            .unwrap_err()
991            .to_string()
992            .contains("Invalid YAML frontmatter"));
993    }
994
995    #[test]
996    fn test_unclosed_frontmatter() {
997        let markdown = r#"---
998title: Test
999author: Test Author
1000
1001Content without closing ---"#;
1002
1003        let result = decompose(markdown);
1004        assert!(result.is_err());
1005        assert!(result.unwrap_err().to_string().contains("not closed"));
1006    }
1007
1008    // Extended metadata tests
1009
1010    #[test]
1011    fn test_basic_tagged_block() {
1012        let markdown = r#"---
1013title: Main Document
1014---
1015
1016Main body content.
1017
1018---
1019CARD: items
1020name: Item 1
1021---
1022
1023Body of item 1."#;
1024
1025        let doc = decompose(markdown).unwrap();
1026
1027        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1028        assert_eq!(
1029            doc.get_field("title").unwrap().as_str().unwrap(),
1030            "Main Document"
1031        );
1032
1033        // Cards are now in CARDS array with CARD discriminator
1034        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1035        assert_eq!(cards.len(), 1);
1036
1037        let item = cards[0].as_object().unwrap();
1038        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1039        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1040        assert_eq!(
1041            item.get("body").unwrap().as_str().unwrap(),
1042            "\nBody of item 1."
1043        );
1044    }
1045
1046    #[test]
1047    fn test_multiple_tagged_blocks() {
1048        let markdown = r#"---
1049CARD: items
1050name: Item 1
1051tags: [a, b]
1052---
1053
1054First item body.
1055
1056---
1057CARD: items
1058name: Item 2
1059tags: [c, d]
1060---
1061
1062Second item body."#;
1063
1064        let doc = decompose(markdown).unwrap();
1065
1066        // Cards are in CARDS array
1067        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1068        assert_eq!(cards.len(), 2);
1069
1070        let item1 = cards[0].as_object().unwrap();
1071        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1072        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1073
1074        let item2 = cards[1].as_object().unwrap();
1075        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1076        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1077    }
1078
1079    #[test]
1080    fn test_mixed_global_and_tagged() {
1081        let markdown = r#"---
1082title: Global
1083author: John Doe
1084---
1085
1086Global body.
1087
1088---
1089CARD: sections
1090title: Section 1
1091---
1092
1093Section 1 content.
1094
1095---
1096CARD: sections
1097title: Section 2
1098---
1099
1100Section 2 content."#;
1101
1102        let doc = decompose(markdown).unwrap();
1103
1104        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1105        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1106
1107        // Cards are in unified CARDS array
1108        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1109        assert_eq!(cards.len(), 2);
1110        assert_eq!(
1111            cards[0]
1112                .as_object()
1113                .unwrap()
1114                .get("CARD")
1115                .unwrap()
1116                .as_str()
1117                .unwrap(),
1118            "sections"
1119        );
1120    }
1121
1122    #[test]
1123    fn test_empty_tagged_metadata() {
1124        let markdown = r#"---
1125CARD: items
1126---
1127
1128Body without metadata."#;
1129
1130        let doc = decompose(markdown).unwrap();
1131
1132        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1133        assert_eq!(cards.len(), 1);
1134
1135        let item = cards[0].as_object().unwrap();
1136        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1137        assert_eq!(
1138            item.get("body").unwrap().as_str().unwrap(),
1139            "\nBody without metadata."
1140        );
1141    }
1142
1143    #[test]
1144    fn test_tagged_block_without_body() {
1145        let markdown = r#"---
1146CARD: items
1147name: Item
1148---"#;
1149
1150        let doc = decompose(markdown).unwrap();
1151
1152        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1153        assert_eq!(cards.len(), 1);
1154
1155        let item = cards[0].as_object().unwrap();
1156        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1157        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1158    }
1159
1160    #[test]
1161    fn test_name_collision_global_and_tagged() {
1162        let markdown = r#"---
1163items: "global value"
1164---
1165
1166Body
1167
1168---
1169CARD: items
1170name: Item
1171---
1172
1173Item body"#;
1174
1175        let result = decompose(markdown);
1176        assert!(result.is_err());
1177        assert!(result.unwrap_err().to_string().contains("collision"));
1178    }
1179
1180    #[test]
1181    fn test_card_name_collision_with_array_field() {
1182        // CARD type names cannot conflict with any frontmatter field names (including arrays)
1183        let markdown = r#"---
1184items:
1185  - name: Global Item 1
1186    value: 100
1187---
1188
1189Global body
1190
1191---
1192CARD: items
1193name: Scope Item 1
1194---
1195
1196Scope item 1 body"#;
1197
1198        let result = decompose(markdown);
1199        assert!(result.is_err());
1200        assert!(result.unwrap_err().to_string().contains("collision"));
1201    }
1202
1203    #[test]
1204    fn test_empty_global_array_with_card() {
1205        // CARD type names cannot conflict with any frontmatter field names (even empty arrays)
1206        let markdown = r#"---
1207items: []
1208---
1209
1210Global body
1211
1212---
1213CARD: items
1214name: Item 1
1215---
1216
1217Item 1 body"#;
1218
1219        let result = decompose(markdown);
1220        assert!(result.is_err());
1221        assert!(result.unwrap_err().to_string().contains("collision"));
1222    }
1223
1224    #[test]
1225    fn test_reserved_field_name() {
1226        let markdown = r#"---
1227CARD: body
1228content: Test
1229---"#;
1230
1231        let result = decompose(markdown);
1232        assert!(result.is_err());
1233        assert!(result.unwrap_err().to_string().contains("reserved"));
1234    }
1235
1236    #[test]
1237    fn test_invalid_tag_syntax() {
1238        let markdown = r#"---
1239CARD: Invalid-Name
1240title: Test
1241---"#;
1242
1243        let result = decompose(markdown);
1244        assert!(result.is_err());
1245        assert!(result
1246            .unwrap_err()
1247            .to_string()
1248            .contains("Invalid field name"));
1249    }
1250
1251    #[test]
1252    fn test_multiple_global_frontmatter_blocks() {
1253        let markdown = r#"---
1254title: First
1255---
1256
1257Body
1258
1259---
1260author: Second
1261---
1262
1263More body"#;
1264
1265        let result = decompose(markdown);
1266        assert!(result.is_err());
1267
1268        // Verify the error message contains CARD hint
1269        let err = result.unwrap_err();
1270        let err_str = err.to_string();
1271        assert!(
1272            err_str.contains("CARD"),
1273            "Error should mention CARD directive: {}",
1274            err_str
1275        );
1276        assert!(
1277            err_str.contains("missing"),
1278            "Error should indicate missing directive: {}",
1279            err_str
1280        );
1281    }
1282
1283    #[test]
1284    fn test_adjacent_blocks_different_tags() {
1285        let markdown = r#"---
1286CARD: items
1287name: Item 1
1288---
1289
1290Item 1 body
1291
1292---
1293CARD: sections
1294title: Section 1
1295---
1296
1297Section 1 body"#;
1298
1299        let doc = decompose(markdown).unwrap();
1300
1301        // All cards in unified CARDS array
1302        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1303        assert_eq!(cards.len(), 2);
1304
1305        // First card is "items" type
1306        let item = cards[0].as_object().unwrap();
1307        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1308        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1309
1310        // Second card is "sections" type
1311        let section = cards[1].as_object().unwrap();
1312        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1313        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1314    }
1315
1316    #[test]
1317    fn test_order_preservation() {
1318        let markdown = r#"---
1319CARD: items
1320id: 1
1321---
1322
1323First
1324
1325---
1326CARD: items
1327id: 2
1328---
1329
1330Second
1331
1332---
1333CARD: items
1334id: 3
1335---
1336
1337Third"#;
1338
1339        let doc = decompose(markdown).unwrap();
1340
1341        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1342        assert_eq!(cards.len(), 3);
1343
1344        for (i, card) in cards.iter().enumerate() {
1345            let mapping = card.as_object().unwrap();
1346            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1347            let id = mapping.get("id").unwrap().as_i64().unwrap();
1348            assert_eq!(id, (i + 1) as i64);
1349        }
1350    }
1351
1352    #[test]
1353    fn test_product_catalog_integration() {
1354        let markdown = r#"---
1355title: Product Catalog
1356author: John Doe
1357date: 2024-01-01
1358---
1359
1360This is the main catalog description.
1361
1362---
1363CARD: products
1364name: Widget A
1365price: 19.99
1366sku: WID-001
1367---
1368
1369The **Widget A** is our most popular product.
1370
1371---
1372CARD: products
1373name: Gadget B
1374price: 29.99
1375sku: GAD-002
1376---
1377
1378The **Gadget B** is perfect for professionals.
1379
1380---
1381CARD: reviews
1382product: Widget A
1383rating: 5
1384---
1385
1386"Excellent product! Highly recommended."
1387
1388---
1389CARD: reviews
1390product: Gadget B
1391rating: 4
1392---
1393
1394"Very good, but a bit pricey.""#;
1395
1396        let doc = decompose(markdown).unwrap();
1397
1398        // Verify global fields
1399        assert_eq!(
1400            doc.get_field("title").unwrap().as_str().unwrap(),
1401            "Product Catalog"
1402        );
1403        assert_eq!(
1404            doc.get_field("author").unwrap().as_str().unwrap(),
1405            "John Doe"
1406        );
1407        assert_eq!(
1408            doc.get_field("date").unwrap().as_str().unwrap(),
1409            "2024-01-01"
1410        );
1411
1412        // Verify global body
1413        assert!(doc.body().unwrap().contains("main catalog description"));
1414
1415        // All cards in unified CARDS array
1416        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1417        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1418
1419        // First 2 are products
1420        let product1 = cards[0].as_object().unwrap();
1421        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1422        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1423        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1424
1425        let product2 = cards[1].as_object().unwrap();
1426        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1427        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1428
1429        // Last 2 are reviews
1430        let review1 = cards[2].as_object().unwrap();
1431        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1432        assert_eq!(
1433            review1.get("product").unwrap().as_str().unwrap(),
1434            "Widget A"
1435        );
1436        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1437
1438        // Total fields: title, author, date, body, CARDS = 5
1439        assert_eq!(doc.fields().len(), 5);
1440    }
1441
1442    #[test]
1443    fn taro_quill_directive() {
1444        let markdown = r#"---
1445QUILL: usaf_memo
1446memo_for: [ORG/SYMBOL]
1447memo_from: [ORG/SYMBOL]
1448---
1449
1450This is the memo body."#;
1451
1452        let doc = decompose(markdown).unwrap();
1453
1454        // Verify quill tag is set
1455        assert_eq!(doc.quill_tag(), "usaf_memo");
1456
1457        // Verify fields from quill block become frontmatter
1458        assert_eq!(
1459            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1460                .as_str()
1461                .unwrap(),
1462            "ORG/SYMBOL"
1463        );
1464
1465        // Verify body
1466        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1467    }
1468
1469    #[test]
1470    fn test_quill_with_card_blocks() {
1471        let markdown = r#"---
1472QUILL: document
1473title: Test Document
1474---
1475
1476Main body.
1477
1478---
1479CARD: sections
1480name: Section 1
1481---
1482
1483Section 1 body."#;
1484
1485        let doc = decompose(markdown).unwrap();
1486
1487        // Verify quill tag
1488        assert_eq!(doc.quill_tag(), "document");
1489
1490        // Verify global field from quill block
1491        assert_eq!(
1492            doc.get_field("title").unwrap().as_str().unwrap(),
1493            "Test Document"
1494        );
1495
1496        // Verify card blocks work via CARDS array
1497        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1498        assert_eq!(cards.len(), 1);
1499        assert_eq!(
1500            cards[0]
1501                .as_object()
1502                .unwrap()
1503                .get("CARD")
1504                .unwrap()
1505                .as_str()
1506                .unwrap(),
1507            "sections"
1508        );
1509
1510        // Verify body
1511        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1512    }
1513
1514    #[test]
1515    fn test_multiple_quill_directives_error() {
1516        let markdown = r#"---
1517QUILL: first
1518---
1519
1520---
1521QUILL: second
1522---"#;
1523
1524        let result = decompose(markdown);
1525        assert!(result.is_err());
1526        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1527        assert!(result
1528            .unwrap_err()
1529            .to_string()
1530            .contains("top-level frontmatter"));
1531    }
1532
1533    #[test]
1534    fn test_invalid_quill_name() {
1535        let markdown = r#"---
1536QUILL: Invalid-Name
1537---"#;
1538
1539        let result = decompose(markdown);
1540        assert!(result.is_err());
1541        assert!(result
1542            .unwrap_err()
1543            .to_string()
1544            .contains("Invalid quill name"));
1545    }
1546
1547    #[test]
1548    fn test_quill_wrong_value_type() {
1549        let markdown = r#"---
1550QUILL: 123
1551---"#;
1552
1553        let result = decompose(markdown);
1554        assert!(result.is_err());
1555        assert!(result
1556            .unwrap_err()
1557            .to_string()
1558            .contains("QUILL value must be a string"));
1559    }
1560
1561    #[test]
1562    fn test_card_wrong_value_type() {
1563        let markdown = r#"---
1564CARD: 123
1565---"#;
1566
1567        let result = decompose(markdown);
1568        assert!(result.is_err());
1569        assert!(result
1570            .unwrap_err()
1571            .to_string()
1572            .contains("CARD value must be a string"));
1573    }
1574
1575    #[test]
1576    fn test_both_quill_and_card_error() {
1577        let markdown = r#"---
1578QUILL: test
1579CARD: items
1580---"#;
1581
1582        let result = decompose(markdown);
1583        assert!(result.is_err());
1584        assert!(result
1585            .unwrap_err()
1586            .to_string()
1587            .contains("Cannot specify both QUILL and CARD"));
1588    }
1589
1590    #[test]
1591    fn test_blank_lines_in_frontmatter() {
1592        // New parsing standard: blank lines are allowed within YAML blocks
1593        let markdown = r#"---
1594title: Test Document
1595author: Test Author
1596
1597description: This has a blank line above it
1598tags:
1599  - one
1600  - two
1601---
1602
1603# Hello World
1604
1605This is the body."#;
1606
1607        let doc = decompose(markdown).unwrap();
1608
1609        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1610        assert_eq!(
1611            doc.get_field("title").unwrap().as_str().unwrap(),
1612            "Test Document"
1613        );
1614        assert_eq!(
1615            doc.get_field("author").unwrap().as_str().unwrap(),
1616            "Test Author"
1617        );
1618        assert_eq!(
1619            doc.get_field("description").unwrap().as_str().unwrap(),
1620            "This has a blank line above it"
1621        );
1622
1623        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1624        assert_eq!(tags.len(), 2);
1625    }
1626
1627    #[test]
1628    fn test_blank_lines_in_scope_blocks() {
1629        // Blank lines should be allowed in CARD blocks too
1630        let markdown = r#"---
1631CARD: items
1632name: Item 1
1633
1634price: 19.99
1635
1636tags:
1637  - electronics
1638  - gadgets
1639---
1640
1641Body of item 1."#;
1642
1643        let doc = decompose(markdown).unwrap();
1644
1645        // Cards are in CARDS array
1646        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1647        assert_eq!(cards.len(), 1);
1648
1649        let item = cards[0].as_object().unwrap();
1650        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1651        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1652        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1653
1654        let tags = item.get("tags").unwrap().as_array().unwrap();
1655        assert_eq!(tags.len(), 2);
1656    }
1657
1658    #[test]
1659    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1660        // Horizontal rule: blank lines both above AND below the ---
1661        let markdown = r#"---
1662title: Test
1663---
1664
1665First paragraph.
1666
1667---
1668
1669Second paragraph."#;
1670
1671        let doc = decompose(markdown).unwrap();
1672
1673        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1674
1675        // The body should contain the horizontal rule (---) as part of the content
1676        let body = doc.body().unwrap();
1677        assert!(body.contains("First paragraph."));
1678        assert!(body.contains("---"));
1679        assert!(body.contains("Second paragraph."));
1680    }
1681
1682    #[test]
1683    fn test_horizontal_rule_not_preceded_by_blank() {
1684        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1685        // It's also NOT a valid metadata block opening (since it's followed by blank)
1686        let markdown = r#"---
1687title: Test
1688---
1689
1690First paragraph.
1691---
1692
1693Second paragraph."#;
1694
1695        let doc = decompose(markdown).unwrap();
1696
1697        let body = doc.body().unwrap();
1698        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1699        assert!(body.contains("---"));
1700    }
1701
1702    #[test]
1703    fn test_multiple_blank_lines_in_yaml() {
1704        // Multiple blank lines should also be allowed
1705        let markdown = r#"---
1706title: Test
1707
1708
1709author: John Doe
1710
1711
1712version: 1.0
1713---
1714
1715Body content."#;
1716
1717        let doc = decompose(markdown).unwrap();
1718
1719        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1720        assert_eq!(
1721            doc.get_field("author").unwrap().as_str().unwrap(),
1722            "John Doe"
1723        );
1724        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1725    }
1726
1727    #[test]
1728    fn test_html_comment_interaction() {
1729        let markdown = r#"<!---
1730---> the rest of the page content
1731
1732---
1733key: value
1734---
1735"#;
1736        let doc = decompose(markdown).unwrap();
1737
1738        // The comment should be ignored (or at least not cause a parse error)
1739        // The frontmatter should be parsed
1740        let key = doc.get_field("key").and_then(|v| v.as_str());
1741        assert_eq!(key, Some("value"));
1742    }
1743}
1744#[cfg(test)]
1745mod demo_file_test {
1746    use super::*;
1747
1748    #[test]
1749    fn test_extended_metadata_demo_file() {
1750        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1751        let doc = decompose(markdown).unwrap();
1752
1753        // Verify global fields
1754        assert_eq!(
1755            doc.get_field("title").unwrap().as_str().unwrap(),
1756            "Extended Metadata Demo"
1757        );
1758        assert_eq!(
1759            doc.get_field("author").unwrap().as_str().unwrap(),
1760            "Quillmark Team"
1761        );
1762        // version is parsed as a number by YAML
1763        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1764
1765        // Verify body
1766        assert!(doc
1767            .body()
1768            .unwrap()
1769            .contains("extended YAML metadata standard"));
1770
1771        // All cards are now in unified CARDS array
1772        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1773        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1774
1775        // Count features and use_cases cards
1776        let features_count = cards
1777            .iter()
1778            .filter(|c| {
1779                c.as_object()
1780                    .unwrap()
1781                    .get("CARD")
1782                    .unwrap()
1783                    .as_str()
1784                    .unwrap()
1785                    == "features"
1786            })
1787            .count();
1788        let use_cases_count = cards
1789            .iter()
1790            .filter(|c| {
1791                c.as_object()
1792                    .unwrap()
1793                    .get("CARD")
1794                    .unwrap()
1795                    .as_str()
1796                    .unwrap()
1797                    == "use_cases"
1798            })
1799            .count();
1800        assert_eq!(features_count, 3);
1801        assert_eq!(use_cases_count, 2);
1802
1803        // Check first card is a feature
1804        let feature1 = cards[0].as_object().unwrap();
1805        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1806        assert_eq!(
1807            feature1.get("name").unwrap().as_str().unwrap(),
1808            "Tag Directives"
1809        );
1810    }
1811
1812    #[test]
1813    fn test_input_size_limit() {
1814        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1815        let size = crate::error::MAX_INPUT_SIZE + 1;
1816        let large_markdown = "a".repeat(size);
1817
1818        let result = decompose(&large_markdown);
1819        assert!(result.is_err());
1820
1821        let err_msg = result.unwrap_err().to_string();
1822        assert!(err_msg.contains("Input too large"));
1823    }
1824
1825    #[test]
1826    fn test_yaml_size_limit() {
1827        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1828        let mut markdown = String::from("---\n");
1829
1830        // Create a very large YAML field
1831        let size = crate::error::MAX_YAML_SIZE + 1;
1832        markdown.push_str("data: \"");
1833        markdown.push_str(&"x".repeat(size));
1834        markdown.push_str("\"\n---\n\nBody");
1835
1836        let result = decompose(&markdown);
1837        assert!(result.is_err());
1838
1839        let err_msg = result.unwrap_err().to_string();
1840        assert!(err_msg.contains("YAML block too large"));
1841    }
1842
1843    #[test]
1844    fn test_input_within_size_limit() {
1845        // Create markdown just under the limit
1846        let size = 1000; // Much smaller than limit
1847        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1848
1849        let result = decompose(&markdown);
1850        assert!(result.is_ok());
1851    }
1852
1853    #[test]
1854    fn test_yaml_within_size_limit() {
1855        // Create YAML block well within the limit
1856        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1857
1858        let result = decompose(&markdown);
1859        assert!(result.is_ok());
1860    }
1861
1862    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1863    // Guillemet conversion now happens in process_plate, not during parsing
1864    #[test]
1865    fn test_chevrons_preserved_in_body_no_frontmatter() {
1866        let markdown = "Use <<raw content>> here.";
1867        let doc = decompose(markdown).unwrap();
1868
1869        // Body should preserve chevrons (conversion happens later in process_plate)
1870        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1871    }
1872
1873    #[test]
1874    fn test_chevrons_preserved_in_body_with_frontmatter() {
1875        let markdown = r#"---
1876title: Test
1877---
1878
1879Use <<raw content>> here."#;
1880        let doc = decompose(markdown).unwrap();
1881
1882        // Body should preserve chevrons
1883        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1884    }
1885
1886    #[test]
1887    fn test_chevrons_preserved_in_yaml_string() {
1888        let markdown = r#"---
1889title: Test <<with chevrons>>
1890---
1891
1892Body content."#;
1893        let doc = decompose(markdown).unwrap();
1894
1895        // YAML string values should preserve chevrons
1896        assert_eq!(
1897            doc.get_field("title").unwrap().as_str().unwrap(),
1898            "Test <<with chevrons>>"
1899        );
1900    }
1901
1902    #[test]
1903    fn test_chevrons_preserved_in_yaml_array() {
1904        let markdown = r#"---
1905items:
1906  - "<<first>>"
1907  - "<<second>>"
1908---
1909
1910Body."#;
1911        let doc = decompose(markdown).unwrap();
1912
1913        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1914        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1915        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1916    }
1917
1918    #[test]
1919    fn test_chevrons_preserved_in_yaml_nested() {
1920        let markdown = r#"---
1921metadata:
1922  description: "<<nested value>>"
1923---
1924
1925Body."#;
1926        let doc = decompose(markdown).unwrap();
1927
1928        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1929        assert_eq!(
1930            metadata.get("description").unwrap().as_str().unwrap(),
1931            "<<nested value>>"
1932        );
1933    }
1934
1935    #[test]
1936    fn test_chevrons_preserved_in_code_blocks() {
1937        let markdown = r#"```
1938<<in code block>>
1939```
1940
1941<<outside code block>>"#;
1942        let doc = decompose(markdown).unwrap();
1943
1944        let body = doc.body().unwrap();
1945        // All chevrons should be preserved (no conversion during parsing)
1946        assert!(body.contains("<<in code block>>"));
1947        assert!(body.contains("<<outside code block>>"));
1948    }
1949
1950    #[test]
1951    fn test_chevrons_preserved_in_inline_code() {
1952        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1953        let doc = decompose(markdown).unwrap();
1954
1955        let body = doc.body().unwrap();
1956        // All chevrons should be preserved
1957        assert!(body.contains("`<<in inline code>>`"));
1958        assert!(body.contains("<<outside inline code>>"));
1959    }
1960
1961    #[test]
1962    fn test_chevrons_preserved_in_tagged_block_body() {
1963        let markdown = r#"---
1964title: Main
1965---
1966
1967Main body.
1968
1969---
1970CARD: items
1971name: Item 1
1972---
1973
1974Use <<raw>> here."#;
1975        let doc = decompose(markdown).unwrap();
1976
1977        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1978        let item = cards[0].as_object().unwrap();
1979        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1980        let item_body = item.get("body").unwrap().as_str().unwrap();
1981        // Tagged block body should preserve chevrons
1982        assert!(item_body.contains("<<raw>>"));
1983    }
1984
1985    #[test]
1986    fn test_chevrons_preserved_in_tagged_block_yaml() {
1987        let markdown = r#"---
1988title: Main
1989---
1990
1991Main body.
1992
1993---
1994CARD: items
1995description: "<<tagged yaml>>"
1996---
1997
1998Item body."#;
1999        let doc = decompose(markdown).unwrap();
2000
2001        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2002        let item = cards[0].as_object().unwrap();
2003        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2004        // Tagged block YAML should preserve chevrons
2005        assert_eq!(
2006            item.get("description").unwrap().as_str().unwrap(),
2007            "<<tagged yaml>>"
2008        );
2009    }
2010
2011    #[test]
2012    fn test_yaml_numbers_not_affected() {
2013        // Numbers should not be affected
2014        let markdown = r#"---
2015count: 42
2016---
2017
2018Body."#;
2019        let doc = decompose(markdown).unwrap();
2020        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2021    }
2022
2023    #[test]
2024    fn test_yaml_booleans_not_affected() {
2025        // Booleans should not be affected
2026        let markdown = r#"---
2027active: true
2028---
2029
2030Body."#;
2031        let doc = decompose(markdown).unwrap();
2032        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2033    }
2034
2035    #[test]
2036    fn test_multiline_chevrons_preserved() {
2037        // Multiline chevrons should be preserved as-is
2038        let markdown = "<<text\nacross lines>>";
2039        let doc = decompose(markdown).unwrap();
2040
2041        let body = doc.body().unwrap();
2042        // Should contain the original chevrons
2043        assert!(body.contains("<<text"));
2044        assert!(body.contains("across lines>>"));
2045    }
2046
2047    #[test]
2048    fn test_unmatched_chevrons_preserved() {
2049        let markdown = "<<unmatched";
2050        let doc = decompose(markdown).unwrap();
2051
2052        let body = doc.body().unwrap();
2053        // Unmatched should remain as-is
2054        assert_eq!(body, "<<unmatched");
2055    }
2056}
2057
2058// Additional robustness tests
2059#[cfg(test)]
2060mod robustness_tests {
2061    use super::*;
2062
2063    // Edge cases for delimiter handling
2064
2065    #[test]
2066    fn test_empty_document() {
2067        let doc = decompose("").unwrap();
2068        assert_eq!(doc.body(), Some(""));
2069        assert_eq!(doc.quill_tag(), "__default__");
2070    }
2071
2072    #[test]
2073    fn test_only_whitespace() {
2074        let doc = decompose("   \n\n   \t").unwrap();
2075        assert_eq!(doc.body(), Some("   \n\n   \t"));
2076    }
2077
2078    #[test]
2079    fn test_only_dashes() {
2080        // Just "---" at document start without newline is not treated as frontmatter opener
2081        // (requires "---\n" to start a frontmatter block)
2082        let result = decompose("---");
2083        // This is NOT an error - "---" alone without newline is just body content
2084        assert!(result.is_ok());
2085        assert_eq!(result.unwrap().body(), Some("---"));
2086    }
2087
2088    #[test]
2089    fn test_dashes_in_middle_of_line() {
2090        // --- not at start of line should not be treated as delimiter
2091        let markdown = "some text --- more text";
2092        let doc = decompose(markdown).unwrap();
2093        assert_eq!(doc.body(), Some("some text --- more text"));
2094    }
2095
2096    #[test]
2097    fn test_four_dashes() {
2098        // ---- is not a valid delimiter
2099        let markdown = "----\ntitle: Test\n----\n\nBody";
2100        let doc = decompose(markdown).unwrap();
2101        // Should treat entire content as body
2102        assert!(doc.body().unwrap().contains("----"));
2103    }
2104
2105    #[test]
2106    fn test_crlf_line_endings() {
2107        // Windows-style line endings
2108        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2109        let doc = decompose(markdown).unwrap();
2110        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2111        assert!(doc.body().unwrap().contains("Body content."));
2112    }
2113
2114    #[test]
2115    fn test_mixed_line_endings() {
2116        // Mix of \n and \r\n
2117        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2118        let doc = decompose(markdown).unwrap();
2119        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_at_eof_no_trailing_newline() {
2124        // Frontmatter closed at EOF without trailing newline
2125        let markdown = "---\ntitle: Test\n---";
2126        let doc = decompose(markdown).unwrap();
2127        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2128        assert_eq!(doc.body(), Some(""));
2129    }
2130
2131    #[test]
2132    fn test_empty_frontmatter() {
2133        // Empty frontmatter block - requires content between delimiters
2134        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2135        // is treated as horizontal rule logic, not empty frontmatter
2136        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2137        let markdown = "---\n \n---\n\nBody content.";
2138        let doc = decompose(markdown).unwrap();
2139        assert!(doc.body().unwrap().contains("Body content."));
2140        // Should have body and CARDS fields
2141        assert_eq!(doc.fields().len(), 2);
2142    }
2143
2144    #[test]
2145    fn test_whitespace_only_frontmatter() {
2146        // Frontmatter with only whitespace
2147        let markdown = "---\n   \n\n   \n---\n\nBody.";
2148        let doc = decompose(markdown).unwrap();
2149        assert!(doc.body().unwrap().contains("Body."));
2150    }
2151
2152    // Unicode handling
2153
2154    #[test]
2155    fn test_unicode_in_yaml_keys() {
2156        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2157        let doc = decompose(markdown).unwrap();
2158        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2159        assert_eq!(
2160            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2161            "こんにちは"
2162        );
2163    }
2164
2165    #[test]
2166    fn test_unicode_in_yaml_values() {
2167        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2168        let doc = decompose(markdown).unwrap();
2169        assert_eq!(
2170            doc.get_field("title").unwrap().as_str().unwrap(),
2171            "你好世界 🎉"
2172        );
2173    }
2174
2175    #[test]
2176    fn test_unicode_in_body() {
2177        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2178        let doc = decompose(markdown).unwrap();
2179        assert!(doc.body().unwrap().contains("日本語テキスト"));
2180        assert!(doc.body().unwrap().contains("🚀"));
2181    }
2182
2183    // YAML edge cases
2184
2185    #[test]
2186    fn test_yaml_multiline_string() {
2187        let markdown = r#"---
2188description: |
2189  This is a
2190  multiline string
2191  with preserved newlines.
2192---
2193
2194Body."#;
2195        let doc = decompose(markdown).unwrap();
2196        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2197        assert!(desc.contains("multiline string"));
2198        assert!(desc.contains('\n'));
2199    }
2200
2201    #[test]
2202    fn test_yaml_folded_string() {
2203        let markdown = r#"---
2204description: >
2205  This is a folded
2206  string that becomes
2207  a single line.
2208---
2209
2210Body."#;
2211        let doc = decompose(markdown).unwrap();
2212        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2213        // Folded strings join lines with spaces
2214        assert!(desc.contains("folded"));
2215    }
2216
2217    #[test]
2218    fn test_yaml_null_value() {
2219        let markdown = "---\noptional: null\n---\n\nBody.";
2220        let doc = decompose(markdown).unwrap();
2221        assert!(doc.get_field("optional").unwrap().is_null());
2222    }
2223
2224    #[test]
2225    fn test_yaml_empty_string_value() {
2226        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2227        let doc = decompose(markdown).unwrap();
2228        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2229    }
2230
2231    #[test]
2232    fn test_yaml_special_characters_in_string() {
2233        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2234        let doc = decompose(markdown).unwrap();
2235        assert_eq!(
2236            doc.get_field("special").unwrap().as_str().unwrap(),
2237            "colon: here, and [brackets]"
2238        );
2239    }
2240
2241    #[test]
2242    fn test_yaml_nested_objects() {
2243        let markdown = r#"---
2244config:
2245  database:
2246    host: localhost
2247    port: 5432
2248  cache:
2249    enabled: true
2250---
2251
2252Body."#;
2253        let doc = decompose(markdown).unwrap();
2254        let config = doc.get_field("config").unwrap().as_object().unwrap();
2255        let db = config.get("database").unwrap().as_object().unwrap();
2256        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2257        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2258    }
2259
2260    // CARD block edge cases
2261
2262    #[test]
2263    fn test_card_with_empty_body() {
2264        let markdown = r#"---
2265CARD: items
2266name: Item
2267---"#;
2268        let doc = decompose(markdown).unwrap();
2269        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2270        assert_eq!(cards.len(), 1);
2271        let item = cards[0].as_object().unwrap();
2272        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2273        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2274    }
2275
2276    #[test]
2277    fn test_card_consecutive_blocks() {
2278        let markdown = r#"---
2279CARD: a
2280id: 1
2281---
2282---
2283CARD: a
2284id: 2
2285---"#;
2286        let doc = decompose(markdown).unwrap();
2287        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2288        assert_eq!(cards.len(), 2);
2289        assert_eq!(
2290            cards[0]
2291                .as_object()
2292                .unwrap()
2293                .get("CARD")
2294                .unwrap()
2295                .as_str()
2296                .unwrap(),
2297            "a"
2298        );
2299        assert_eq!(
2300            cards[1]
2301                .as_object()
2302                .unwrap()
2303                .get("CARD")
2304                .unwrap()
2305                .as_str()
2306                .unwrap(),
2307            "a"
2308        );
2309    }
2310
2311    #[test]
2312    fn test_card_with_body_containing_dashes() {
2313        let markdown = r#"---
2314CARD: items
2315name: Item
2316---
2317
2318Some text with --- dashes in it."#;
2319        let doc = decompose(markdown).unwrap();
2320        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2321        let item = cards[0].as_object().unwrap();
2322        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2323        let body = item.get("body").unwrap().as_str().unwrap();
2324        assert!(body.contains("--- dashes"));
2325    }
2326
2327    // QUILL directive edge cases
2328
2329    #[test]
2330    fn test_quill_with_underscore_prefix() {
2331        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2332        let doc = decompose(markdown).unwrap();
2333        assert_eq!(doc.quill_tag(), "_internal");
2334    }
2335
2336    #[test]
2337    fn test_quill_with_numbers() {
2338        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2339        let doc = decompose(markdown).unwrap();
2340        assert_eq!(doc.quill_tag(), "form_8_v2");
2341    }
2342
2343    #[test]
2344    fn test_quill_with_additional_fields() {
2345        let markdown = r#"---
2346QUILL: my_quill
2347title: Document Title
2348author: John Doe
2349---
2350
2351Body content."#;
2352        let doc = decompose(markdown).unwrap();
2353        assert_eq!(doc.quill_tag(), "my_quill");
2354        assert_eq!(
2355            doc.get_field("title").unwrap().as_str().unwrap(),
2356            "Document Title"
2357        );
2358        assert_eq!(
2359            doc.get_field("author").unwrap().as_str().unwrap(),
2360            "John Doe"
2361        );
2362    }
2363
2364    // Error handling
2365
2366    #[test]
2367    fn test_invalid_scope_name_uppercase() {
2368        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2369        let result = decompose(markdown);
2370        assert!(result.is_err());
2371        assert!(result
2372            .unwrap_err()
2373            .to_string()
2374            .contains("Invalid field name"));
2375    }
2376
2377    #[test]
2378    fn test_invalid_scope_name_starts_with_number() {
2379        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2380        let result = decompose(markdown);
2381        assert!(result.is_err());
2382    }
2383
2384    #[test]
2385    fn test_invalid_scope_name_with_hyphen() {
2386        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2387        let result = decompose(markdown);
2388        assert!(result.is_err());
2389    }
2390
2391    #[test]
2392    fn test_invalid_quill_name_uppercase() {
2393        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2394        let result = decompose(markdown);
2395        assert!(result.is_err());
2396    }
2397
2398    #[test]
2399    fn test_yaml_syntax_error_missing_colon() {
2400        let markdown = "---\ntitle Test\n---\n\nBody.";
2401        let result = decompose(markdown);
2402        assert!(result.is_err());
2403    }
2404
2405    #[test]
2406    fn test_yaml_syntax_error_bad_indentation() {
2407        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2408        let result = decompose(markdown);
2409        // Bad indentation may or may not be an error depending on YAML parser
2410        // Just ensure it doesn't panic
2411        let _ = result;
2412    }
2413
2414    // Body extraction edge cases
2415
2416    #[test]
2417    fn test_body_with_leading_newlines() {
2418        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2419        let doc = decompose(markdown).unwrap();
2420        // Body should preserve leading newlines after frontmatter
2421        assert!(doc.body().unwrap().starts_with('\n'));
2422    }
2423
2424    #[test]
2425    fn test_body_with_trailing_newlines() {
2426        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2427        let doc = decompose(markdown).unwrap();
2428        // Body should preserve trailing newlines
2429        assert!(doc.body().unwrap().ends_with('\n'));
2430    }
2431
2432    #[test]
2433    fn test_no_body_after_frontmatter() {
2434        let markdown = "---\ntitle: Test\n---";
2435        let doc = decompose(markdown).unwrap();
2436        assert_eq!(doc.body(), Some(""));
2437    }
2438
2439    // Tag name validation
2440
2441    #[test]
2442    fn test_valid_tag_name_single_underscore() {
2443        assert!(is_valid_tag_name("_"));
2444    }
2445
2446    #[test]
2447    fn test_valid_tag_name_underscore_prefix() {
2448        assert!(is_valid_tag_name("_private"));
2449    }
2450
2451    #[test]
2452    fn test_valid_tag_name_with_numbers() {
2453        assert!(is_valid_tag_name("item1"));
2454        assert!(is_valid_tag_name("item_2"));
2455    }
2456
2457    #[test]
2458    fn test_invalid_tag_name_empty() {
2459        assert!(!is_valid_tag_name(""));
2460    }
2461
2462    #[test]
2463    fn test_invalid_tag_name_starts_with_number() {
2464        assert!(!is_valid_tag_name("1item"));
2465    }
2466
2467    #[test]
2468    fn test_invalid_tag_name_uppercase() {
2469        assert!(!is_valid_tag_name("Items"));
2470        assert!(!is_valid_tag_name("ITEMS"));
2471    }
2472
2473    #[test]
2474    fn test_invalid_tag_name_special_chars() {
2475        assert!(!is_valid_tag_name("my-items"));
2476        assert!(!is_valid_tag_name("my.items"));
2477        assert!(!is_valid_tag_name("my items"));
2478    }
2479
2480    // Guillemet preprocessing in YAML
2481
2482    #[test]
2483    fn test_guillemet_in_yaml_preserves_non_strings() {
2484        let markdown = r#"---
2485count: 42
2486price: 19.99
2487active: true
2488items:
2489  - first
2490  - 100
2491  - true
2492---
2493
2494Body."#;
2495        let doc = decompose(markdown).unwrap();
2496        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2497        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2498        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2499    }
2500
2501    #[test]
2502    fn test_guillemet_double_conversion_prevention() {
2503        // Ensure «» in input doesn't get double-processed
2504        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2505        let doc = decompose(markdown).unwrap();
2506        // Should remain as-is (not double-escaped)
2507        assert_eq!(
2508            doc.get_field("title").unwrap().as_str().unwrap(),
2509            "Already «converted»"
2510        );
2511    }
2512}