quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
196    let mut blocks = Vec::new();
197    let mut pos = 0;
198
199    while pos < markdown.len() {
200        // Look for opening "---\n" or "---\r\n"
201        let search_str = &markdown[pos..];
202        let delimiter_result = search_str
203            .find("---\n")
204            .map(|p| (p, 4, "\n"))
205            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
206
207        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
208            let abs_pos = pos + delimiter_pos;
209
210            // Check if the delimiter is at the start of a line
211            let is_start_of_line = if abs_pos == 0 {
212                true
213            } else {
214                let char_before = markdown.as_bytes()[abs_pos - 1];
215                char_before == b'\n' || char_before == b'\r'
216            };
217
218            if !is_start_of_line {
219                pos = abs_pos + 1;
220                continue;
221            }
222
223            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
224
225            // Check if this --- is a horizontal rule (blank lines above AND below)
226            let preceded_by_blank = if abs_pos > 0 {
227                // Check if there's a blank line before the ---
228                let before = &markdown[..abs_pos];
229                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
230            } else {
231                false
232            };
233
234            let followed_by_blank = if content_start < markdown.len() {
235                markdown[content_start..].starts_with('\n')
236                    || markdown[content_start..].starts_with("\r\n")
237            } else {
238                false
239            };
240
241            // Horizontal rule: blank lines both above and below
242            if preceded_by_blank && followed_by_blank {
243                // This is a horizontal rule in the body, skip it
244                pos = abs_pos + 3; // Skip past "---"
245                continue;
246            }
247
248            // Check if followed by non-blank line (or if we're at document start)
249            // This starts a metadata block
250            if followed_by_blank {
251                // --- followed by blank line but NOT preceded by blank line
252                // This is NOT a metadata block opening, skip it
253                pos = abs_pos + 3;
254                continue;
255            }
256
257            // Found potential metadata block opening (followed by non-blank line)
258            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
259            let rest = &markdown[content_start..];
260
261            // First try to find delimiters with trailing newlines
262            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
263            let closing_with_newline = closing_patterns
264                .iter()
265                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
266                .min_by_key(|(p, _)| *p);
267
268            // Also check for closing at end of document (no trailing newline)
269            let closing_at_eof = ["\n---", "\r\n---"]
270                .iter()
271                .filter_map(|delim| {
272                    rest.find(delim).and_then(|p| {
273                        if p + delim.len() == rest.len() {
274                            Some((p, delim.len()))
275                        } else {
276                            None
277                        }
278                    })
279                })
280                .min_by_key(|(p, _)| *p);
281
282            let closing_result = match (closing_with_newline, closing_at_eof) {
283                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
284                (Some(_), Some(_)) => closing_with_newline,
285                (Some(_), None) => closing_with_newline,
286                (None, Some(_)) => closing_at_eof,
287                (None, None) => None,
288            };
289
290            if let Some((closing_pos, closing_len)) = closing_result {
291                let abs_closing_pos = content_start + closing_pos;
292                let content = &markdown[content_start..abs_closing_pos];
293
294                // Check YAML size limit
295                if content.len() > crate::error::MAX_YAML_SIZE {
296                    return Err(crate::error::ParseError::InputTooLarge {
297                        size: content.len(),
298                        max: crate::error::MAX_YAML_SIZE,
299                    });
300                }
301
302                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
303                // First, try to parse as YAML
304                let (tag, quill_name, yaml_value) = if !content.is_empty() {
305                    // Try to parse the YAML to check for reserved keys
306                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
307                        Ok(parsed_yaml) => {
308                            if let Some(mapping) = parsed_yaml.as_mapping() {
309                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
310                                let card_key = serde_yaml::Value::String("CARD".to_string());
311                                let scope_key = serde_yaml::Value::String("SCOPE".to_string()); // Backwards compatibility alias
312
313                                let has_quill = mapping.contains_key(&quill_key);
314                                let has_card = mapping.contains_key(&card_key);
315                                let has_scope = mapping.contains_key(&scope_key);
316
317                                // CARD and SCOPE are aliases - can't use both
318                                if has_card && has_scope {
319                                    return Err(crate::error::ParseError::InvalidStructure(
320                                        "Cannot specify both CARD and SCOPE in the same block (SCOPE is an alias for CARD)"
321                                            .to_string(),
322                                    ));
323                                }
324
325                                let effective_card_key = if has_card {
326                                    Some(&card_key)
327                                } else if has_scope {
328                                    Some(&scope_key)
329                                } else {
330                                    None
331                                };
332
333                                if has_quill && effective_card_key.is_some() {
334                                    return Err(crate::error::ParseError::InvalidStructure(
335                                        "Cannot specify both QUILL and CARD/SCOPE in the same block"
336                                            .to_string(),
337                                    ));
338                                }
339
340                                if has_quill {
341                                    // Extract quill name
342                                    let quill_value = mapping.get(&quill_key).unwrap();
343                                    let quill_name_str = quill_value
344                                        .as_str()
345                                        .ok_or("QUILL value must be a string")?;
346
347                                    if !is_valid_tag_name(quill_name_str) {
348                                        return Err(crate::error::ParseError::InvalidStructure(format!(
349                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
350                                            quill_name_str
351                                        )));
352                                    }
353
354                                    // Remove QUILL from the YAML value for processing
355                                    let mut new_mapping = mapping.clone();
356                                    new_mapping.remove(&quill_key);
357                                    let new_value = if new_mapping.is_empty() {
358                                        None
359                                    } else {
360                                        Some(serde_yaml::Value::Mapping(new_mapping))
361                                    };
362
363                                    (None, Some(quill_name_str.to_string()), new_value)
364                                } else if let Some(card_key_used) = effective_card_key {
365                                    // Extract card field name (handles both CARD and SCOPE)
366                                    let card_value = mapping.get(card_key_used).unwrap();
367                                    let field_name = card_value
368                                        .as_str()
369                                        .ok_or("CARD/SCOPE value must be a string")?;
370
371                                    if !is_valid_tag_name(field_name) {
372                                        return Err(crate::error::ParseError::InvalidStructure(format!(
373                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
374                                            field_name
375                                        )));
376                                    }
377
378                                    if field_name == BODY_FIELD {
379                                        return Err(crate::error::ParseError::InvalidStructure(format!(
380                                            "Cannot use reserved field name '{}' as CARD/SCOPE value",
381                                            BODY_FIELD
382                                        )));
383                                    }
384
385                                    // Remove CARD/SCOPE from the YAML value for processing
386                                    let mut new_mapping = mapping.clone();
387                                    new_mapping.remove(card_key_used);
388                                    let new_value = if new_mapping.is_empty() {
389                                        None
390                                    } else {
391                                        Some(serde_yaml::Value::Mapping(new_mapping))
392                                    };
393
394                                    (Some(field_name.to_string()), None, new_value)
395                                } else {
396                                    // No reserved keys, keep the parsed YAML
397                                    (None, None, Some(parsed_yaml))
398                                }
399                            } else {
400                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
401                                (None, None, Some(parsed_yaml))
402                            }
403                        }
404                        Err(e) => {
405                            // YAML parsing failed - return error with context
406                            return Err(crate::error::ParseError::YamlError(e));
407                        }
408                    }
409                } else {
410                    // Empty content
411                    (None, None, None)
412                };
413
414                blocks.push(MetadataBlock {
415                    start: abs_pos,
416                    end: abs_closing_pos + closing_len, // After closing delimiter
417                    yaml_value,
418                    tag,
419                    quill_name,
420                });
421
422                pos = abs_closing_pos + closing_len;
423            } else if abs_pos == 0 {
424                // Frontmatter started but not closed
425                return Err(crate::error::ParseError::InvalidStructure(
426                    "Frontmatter started but not closed with ---".to_string(),
427                ));
428            } else {
429                // Not a valid metadata block, skip this position
430                pos = abs_pos + 3;
431            }
432        } else {
433            break;
434        }
435    }
436
437    Ok(blocks)
438}
439
440/// Decompose markdown into frontmatter fields and body
441fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
442    // Check input size limit
443    if markdown.len() > crate::error::MAX_INPUT_SIZE {
444        return Err(crate::error::ParseError::InputTooLarge {
445            size: markdown.len(),
446            max: crate::error::MAX_INPUT_SIZE,
447        });
448    }
449
450    let mut fields = HashMap::new();
451
452    // Find all metadata blocks
453    let blocks = find_metadata_blocks(markdown)?;
454
455    if blocks.is_empty() {
456        // No metadata blocks, entire content is body
457        fields.insert(
458            BODY_FIELD.to_string(),
459            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
460        );
461        return Ok(ParsedDocument::new(fields));
462    }
463
464    // Collect all card items into unified CARDS array
465    let mut cards_array: Vec<serde_json::Value> = Vec::new();
466    let mut global_frontmatter_index: Option<usize> = None;
467    let mut quill_name: Option<String> = None;
468
469    // First pass: identify global frontmatter, quill directive, and validate
470    for (idx, block) in blocks.iter().enumerate() {
471        if idx == 0 {
472            // Top-level frontmatter: can have QUILL or neither (not considered a card)
473            if let Some(ref name) = block.quill_name {
474                quill_name = Some(name.clone());
475            }
476            // If it has neither QUILL nor CARD, it's global frontmatter
477            if block.tag.is_none() && block.quill_name.is_none() {
478                global_frontmatter_index = Some(idx);
479            }
480        } else {
481            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
482            if block.quill_name.is_some() {
483                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
484            }
485            if block.tag.is_none() {
486                // Inline block without CARD
487                return Err(crate::error::ParseError::missing_card_directive());
488            }
489        }
490    }
491
492    // Parse global frontmatter if present
493    if let Some(idx) = global_frontmatter_index {
494        let block = &blocks[idx];
495
496        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
497        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
498            Some(serde_yaml::Value::Mapping(mapping)) => mapping
499                .iter()
500                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
501                .collect(),
502            Some(serde_yaml::Value::Null) => {
503                // Null value (from whitespace-only YAML) - treat as empty mapping
504                HashMap::new()
505            }
506            Some(_) => {
507                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
508                return Err(crate::error::ParseError::InvalidStructure(
509                    "Invalid YAML frontmatter: expected a mapping".to_string(),
510                ));
511            }
512            None => HashMap::new(),
513        };
514
515        // Check that all tagged blocks don't conflict with global fields
516        // Exception: if the global field is an array, allow it (we'll merge later)
517        for other_block in &blocks {
518            if let Some(ref tag) = other_block.tag {
519                if let Some(global_value) = yaml_fields.get(tag) {
520                    // Check if the global value is an array
521                    if global_value.as_sequence().is_none() {
522                        return Err(crate::error::ParseError::InvalidStructure(format!(
523                            "Name collision: global field '{}' conflicts with tagged attribute",
524                            tag
525                        )));
526                    }
527                }
528            }
529        }
530
531        // Convert YAML values to QuillValue at boundary
532        for (key, value) in yaml_fields {
533            fields.insert(key, QuillValue::from_yaml(value)?);
534        }
535    }
536
537    // Process blocks with quill directives
538    for block in &blocks {
539        if block.quill_name.is_some() {
540            // Quill directive blocks can have YAML content (becomes part of frontmatter)
541            if let Some(ref yaml_val) = block.yaml_value {
542                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
543                    serde_yaml::Value::Mapping(mapping) => mapping
544                        .iter()
545                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
546                        .collect(),
547                    serde_yaml::Value::Null => {
548                        // Null value (from whitespace-only YAML) - treat as empty mapping
549                        HashMap::new()
550                    }
551                    _ => {
552                        return Err(crate::error::ParseError::InvalidStructure(
553                            "Invalid YAML in quill block: expected a mapping".to_string(),
554                        ));
555                    }
556                };
557
558                // Check for conflicts with existing fields
559                for key in yaml_fields.keys() {
560                    if fields.contains_key(key) {
561                        return Err(crate::error::ParseError::InvalidStructure(format!(
562                            "Name collision: quill block field '{}' conflicts with existing field",
563                            key
564                        )));
565                    }
566                }
567
568                // Convert YAML values to QuillValue at boundary
569                for (key, value) in yaml_fields {
570                    fields.insert(key, QuillValue::from_yaml(value)?);
571                }
572            }
573        }
574    }
575
576    // Parse tagged blocks (CARD blocks)
577    for (idx, block) in blocks.iter().enumerate() {
578        if let Some(ref tag_name) = block.tag {
579            // Card names cannot conflict with frontmatter field names
580            if fields.contains_key(tag_name) {
581                return Err(crate::error::ParseError::InvalidStructure(format!(
582                    "Name collision: CARD type '{}' conflicts with frontmatter field name",
583                    tag_name
584                )));
585            }
586
587            // Get YAML metadata directly (already parsed in find_metadata_blocks)
588            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
589                Some(serde_yaml::Value::Mapping(mapping)) => mapping
590                    .iter()
591                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
592                    .collect(),
593                Some(serde_yaml::Value::Null) => {
594                    // Null value (from whitespace-only YAML) - treat as empty mapping
595                    HashMap::new()
596                }
597                Some(_) => {
598                    return Err(crate::error::ParseError::InvalidStructure(format!(
599                        "Invalid YAML in card block '{}': expected a mapping",
600                        tag_name
601                    )));
602                }
603                None => HashMap::new(),
604            };
605
606            // Extract body for this card block
607            let body_start = block.end;
608            let body_end = if idx + 1 < blocks.len() {
609                blocks[idx + 1].start
610            } else {
611                markdown.len()
612            };
613            let body = &markdown[body_start..body_end];
614
615            // Add body to item fields
616            item_fields.insert(
617                BODY_FIELD.to_string(),
618                serde_yaml::Value::String(body.to_string()),
619            );
620
621            // Add CARD discriminator field
622            item_fields.insert(
623                "CARD".to_string(),
624                serde_yaml::Value::String(tag_name.clone()),
625            );
626
627            // Convert to JSON and add to CARDS array
628            let item_json = serde_json::to_value(&item_fields)
629                .map_err(|e| format!("Failed to convert card to JSON: {}", e))?;
630            cards_array.push(item_json);
631        }
632    }
633
634    // Extract global body
635    // Body starts after global frontmatter or quill block (whichever comes first)
636    // Body ends at the first card block or EOF
637    let first_non_card_block_idx = blocks
638        .iter()
639        .position(|b| b.tag.is_none() && b.quill_name.is_none())
640        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
641
642    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
643        // Body starts after the first non-card block (global frontmatter or quill)
644        let start = blocks[idx].end;
645
646        // Body ends at the first card block after this, or EOF
647        let end = blocks
648            .iter()
649            .skip(idx + 1)
650            .find(|b| b.tag.is_some())
651            .map(|b| b.start)
652            .unwrap_or(markdown.len());
653
654        (start, end)
655    } else {
656        // No global frontmatter or quill block - body is everything before the first card block
657        let end = blocks
658            .iter()
659            .find(|b| b.tag.is_some())
660            .map(|b| b.start)
661            .unwrap_or(0);
662
663        (0, end)
664    };
665
666    let global_body = &markdown[body_start..body_end];
667
668    fields.insert(
669        BODY_FIELD.to_string(),
670        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
671    );
672
673    // Always add CARDS array to fields (may be empty)
674    fields.insert(
675        "CARDS".to_string(),
676        QuillValue::from_json(serde_json::Value::Array(cards_array)),
677    );
678
679    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
680    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
681
682    Ok(parsed)
683}
684
685#[cfg(test)]
686mod tests {
687    use super::*;
688
689    #[test]
690    fn test_no_frontmatter() {
691        let markdown = "# Hello World\n\nThis is a test.";
692        let doc = decompose(markdown).unwrap();
693
694        assert_eq!(doc.body(), Some(markdown));
695        assert_eq!(doc.fields().len(), 1);
696        // Verify default quill tag is set
697        assert_eq!(doc.quill_tag(), "__default__");
698    }
699
700    #[test]
701    fn test_with_frontmatter() {
702        let markdown = r#"---
703title: Test Document
704author: Test Author
705---
706
707# Hello World
708
709This is the body."#;
710
711        let doc = decompose(markdown).unwrap();
712
713        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
714        assert_eq!(
715            doc.get_field("title").unwrap().as_str().unwrap(),
716            "Test Document"
717        );
718        assert_eq!(
719            doc.get_field("author").unwrap().as_str().unwrap(),
720            "Test Author"
721        );
722        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
723                                           // Verify default quill tag is set when no QUILL directive
724        assert_eq!(doc.quill_tag(), "__default__");
725    }
726
727    #[test]
728    fn test_complex_yaml_frontmatter() {
729        let markdown = r#"---
730title: Complex Document
731tags:
732  - test
733  - yaml
734metadata:
735  version: 1.0
736  nested:
737    field: value
738---
739
740Content here."#;
741
742        let doc = decompose(markdown).unwrap();
743
744        assert_eq!(doc.body(), Some("\nContent here."));
745        assert_eq!(
746            doc.get_field("title").unwrap().as_str().unwrap(),
747            "Complex Document"
748        );
749
750        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
751        assert_eq!(tags.len(), 2);
752        assert_eq!(tags[0].as_str().unwrap(), "test");
753        assert_eq!(tags[1].as_str().unwrap(), "yaml");
754    }
755
756    #[test]
757    fn test_with_defaults_empty_document() {
758        use std::collections::HashMap;
759
760        let mut defaults = HashMap::new();
761        defaults.insert(
762            "status".to_string(),
763            QuillValue::from_json(serde_json::json!("draft")),
764        );
765        defaults.insert(
766            "version".to_string(),
767            QuillValue::from_json(serde_json::json!(1)),
768        );
769
770        // Create an empty parsed document
771        let doc = ParsedDocument::new(HashMap::new());
772        let doc_with_defaults = doc.with_defaults(&defaults);
773
774        // Check that defaults were applied
775        assert_eq!(
776            doc_with_defaults
777                .get_field("status")
778                .unwrap()
779                .as_str()
780                .unwrap(),
781            "draft"
782        );
783        assert_eq!(
784            doc_with_defaults
785                .get_field("version")
786                .unwrap()
787                .as_number()
788                .unwrap()
789                .as_i64()
790                .unwrap(),
791            1
792        );
793    }
794
795    #[test]
796    fn test_with_defaults_preserves_existing_values() {
797        use std::collections::HashMap;
798
799        let mut defaults = HashMap::new();
800        defaults.insert(
801            "status".to_string(),
802            QuillValue::from_json(serde_json::json!("draft")),
803        );
804
805        // Create document with existing status
806        let mut fields = HashMap::new();
807        fields.insert(
808            "status".to_string(),
809            QuillValue::from_json(serde_json::json!("published")),
810        );
811        let doc = ParsedDocument::new(fields);
812
813        let doc_with_defaults = doc.with_defaults(&defaults);
814
815        // Existing value should be preserved
816        assert_eq!(
817            doc_with_defaults
818                .get_field("status")
819                .unwrap()
820                .as_str()
821                .unwrap(),
822            "published"
823        );
824    }
825
826    #[test]
827    fn test_with_defaults_partial_application() {
828        use std::collections::HashMap;
829
830        let mut defaults = HashMap::new();
831        defaults.insert(
832            "status".to_string(),
833            QuillValue::from_json(serde_json::json!("draft")),
834        );
835        defaults.insert(
836            "version".to_string(),
837            QuillValue::from_json(serde_json::json!(1)),
838        );
839
840        // Create document with only one field
841        let mut fields = HashMap::new();
842        fields.insert(
843            "status".to_string(),
844            QuillValue::from_json(serde_json::json!("published")),
845        );
846        let doc = ParsedDocument::new(fields);
847
848        let doc_with_defaults = doc.with_defaults(&defaults);
849
850        // Existing field preserved, missing field gets default
851        assert_eq!(
852            doc_with_defaults
853                .get_field("status")
854                .unwrap()
855                .as_str()
856                .unwrap(),
857            "published"
858        );
859        assert_eq!(
860            doc_with_defaults
861                .get_field("version")
862                .unwrap()
863                .as_number()
864                .unwrap()
865                .as_i64()
866                .unwrap(),
867            1
868        );
869    }
870
871    #[test]
872    fn test_with_defaults_no_defaults() {
873        use std::collections::HashMap;
874
875        let defaults = HashMap::new(); // Empty defaults map
876
877        let doc = ParsedDocument::new(HashMap::new());
878        let doc_with_defaults = doc.with_defaults(&defaults);
879
880        // No defaults should be applied
881        assert!(doc_with_defaults.fields().is_empty());
882    }
883
884    #[test]
885    fn test_with_defaults_complex_types() {
886        use std::collections::HashMap;
887
888        let mut defaults = HashMap::new();
889        defaults.insert(
890            "tags".to_string(),
891            QuillValue::from_json(serde_json::json!(["default", "tag"])),
892        );
893
894        let doc = ParsedDocument::new(HashMap::new());
895        let doc_with_defaults = doc.with_defaults(&defaults);
896
897        // Complex default value should be applied
898        let tags = doc_with_defaults
899            .get_field("tags")
900            .unwrap()
901            .as_sequence()
902            .unwrap();
903        assert_eq!(tags.len(), 2);
904        assert_eq!(tags[0].as_str().unwrap(), "default");
905        assert_eq!(tags[1].as_str().unwrap(), "tag");
906    }
907
908    #[test]
909    fn test_with_coercion_singular_to_array() {
910        use std::collections::HashMap;
911
912        let schema = QuillValue::from_json(serde_json::json!({
913            "$schema": "https://json-schema.org/draft/2019-09/schema",
914            "type": "object",
915            "properties": {
916                "tags": {"type": "array"}
917            }
918        }));
919
920        let mut fields = HashMap::new();
921        fields.insert(
922            "tags".to_string(),
923            QuillValue::from_json(serde_json::json!("single-tag")),
924        );
925        let doc = ParsedDocument::new(fields);
926
927        let coerced_doc = doc.with_coercion(&schema);
928
929        let tags = coerced_doc.get_field("tags").unwrap();
930        assert!(tags.as_array().is_some());
931        let tags_array = tags.as_array().unwrap();
932        assert_eq!(tags_array.len(), 1);
933        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
934    }
935
936    #[test]
937    fn test_with_coercion_string_to_boolean() {
938        use std::collections::HashMap;
939
940        let schema = QuillValue::from_json(serde_json::json!({
941            "$schema": "https://json-schema.org/draft/2019-09/schema",
942            "type": "object",
943            "properties": {
944                "active": {"type": "boolean"}
945            }
946        }));
947
948        let mut fields = HashMap::new();
949        fields.insert(
950            "active".to_string(),
951            QuillValue::from_json(serde_json::json!("true")),
952        );
953        let doc = ParsedDocument::new(fields);
954
955        let coerced_doc = doc.with_coercion(&schema);
956
957        assert_eq!(
958            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
959            true
960        );
961    }
962
963    #[test]
964    fn test_with_coercion_string_to_number() {
965        use std::collections::HashMap;
966
967        let schema = QuillValue::from_json(serde_json::json!({
968            "$schema": "https://json-schema.org/draft/2019-09/schema",
969            "type": "object",
970            "properties": {
971                "count": {"type": "number"}
972            }
973        }));
974
975        let mut fields = HashMap::new();
976        fields.insert(
977            "count".to_string(),
978            QuillValue::from_json(serde_json::json!("42")),
979        );
980        let doc = ParsedDocument::new(fields);
981
982        let coerced_doc = doc.with_coercion(&schema);
983
984        assert_eq!(
985            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
986            42
987        );
988    }
989
990    #[test]
991    fn test_invalid_yaml() {
992        let markdown = r#"---
993title: [invalid yaml
994author: missing close bracket
995---
996
997Content here."#;
998
999        let result = decompose(markdown);
1000        assert!(result.is_err());
1001        assert!(result
1002            .unwrap_err()
1003            .to_string()
1004            .contains("YAML parsing error"));
1005    }
1006
1007    #[test]
1008    fn test_unclosed_frontmatter() {
1009        let markdown = r#"---
1010title: Test
1011author: Test Author
1012
1013Content without closing ---"#;
1014
1015        let result = decompose(markdown);
1016        assert!(result.is_err());
1017        assert!(result.unwrap_err().to_string().contains("not closed"));
1018    }
1019
1020    // Extended metadata tests
1021
1022    #[test]
1023    fn test_basic_tagged_block() {
1024        let markdown = r#"---
1025title: Main Document
1026---
1027
1028Main body content.
1029
1030---
1031CARD: items
1032name: Item 1
1033---
1034
1035Body of item 1."#;
1036
1037        let doc = decompose(markdown).unwrap();
1038
1039        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1040        assert_eq!(
1041            doc.get_field("title").unwrap().as_str().unwrap(),
1042            "Main Document"
1043        );
1044
1045        // Cards are now in CARDS array with CARD discriminator
1046        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1047        assert_eq!(cards.len(), 1);
1048
1049        let item = cards[0].as_object().unwrap();
1050        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1051        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1052        assert_eq!(
1053            item.get("body").unwrap().as_str().unwrap(),
1054            "\nBody of item 1."
1055        );
1056    }
1057
1058    #[test]
1059    fn test_multiple_tagged_blocks() {
1060        let markdown = r#"---
1061CARD: items
1062name: Item 1
1063tags: [a, b]
1064---
1065
1066First item body.
1067
1068---
1069CARD: items
1070name: Item 2
1071tags: [c, d]
1072---
1073
1074Second item body."#;
1075
1076        let doc = decompose(markdown).unwrap();
1077
1078        // Cards are in CARDS array
1079        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1080        assert_eq!(cards.len(), 2);
1081
1082        let item1 = cards[0].as_object().unwrap();
1083        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1084        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1085
1086        let item2 = cards[1].as_object().unwrap();
1087        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1088        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1089    }
1090
1091    #[test]
1092    fn test_mixed_global_and_tagged() {
1093        let markdown = r#"---
1094title: Global
1095author: John Doe
1096---
1097
1098Global body.
1099
1100---
1101CARD: sections
1102title: Section 1
1103---
1104
1105Section 1 content.
1106
1107---
1108CARD: sections
1109title: Section 2
1110---
1111
1112Section 2 content."#;
1113
1114        let doc = decompose(markdown).unwrap();
1115
1116        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1117        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1118
1119        // Cards are in unified CARDS array
1120        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1121        assert_eq!(cards.len(), 2);
1122        assert_eq!(
1123            cards[0]
1124                .as_object()
1125                .unwrap()
1126                .get("CARD")
1127                .unwrap()
1128                .as_str()
1129                .unwrap(),
1130            "sections"
1131        );
1132    }
1133
1134    #[test]
1135    fn test_empty_tagged_metadata() {
1136        let markdown = r#"---
1137CARD: items
1138---
1139
1140Body without metadata."#;
1141
1142        let doc = decompose(markdown).unwrap();
1143
1144        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1145        assert_eq!(cards.len(), 1);
1146
1147        let item = cards[0].as_object().unwrap();
1148        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1149        assert_eq!(
1150            item.get("body").unwrap().as_str().unwrap(),
1151            "\nBody without metadata."
1152        );
1153    }
1154
1155    #[test]
1156    fn test_tagged_block_without_body() {
1157        let markdown = r#"---
1158CARD: items
1159name: Item
1160---"#;
1161
1162        let doc = decompose(markdown).unwrap();
1163
1164        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1165        assert_eq!(cards.len(), 1);
1166
1167        let item = cards[0].as_object().unwrap();
1168        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1169        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1170    }
1171
1172    #[test]
1173    fn test_name_collision_global_and_tagged() {
1174        let markdown = r#"---
1175items: "global value"
1176---
1177
1178Body
1179
1180---
1181CARD: items
1182name: Item
1183---
1184
1185Item body"#;
1186
1187        let result = decompose(markdown);
1188        assert!(result.is_err());
1189        assert!(result.unwrap_err().to_string().contains("collision"));
1190    }
1191
1192    #[test]
1193    fn test_card_name_collision_with_array_field() {
1194        // CARD type names cannot conflict with any frontmatter field names (including arrays)
1195        let markdown = r#"---
1196items:
1197  - name: Global Item 1
1198    value: 100
1199---
1200
1201Global body
1202
1203---
1204CARD: items
1205name: Scope Item 1
1206---
1207
1208Scope item 1 body"#;
1209
1210        let result = decompose(markdown);
1211        assert!(result.is_err());
1212        assert!(result.unwrap_err().to_string().contains("collision"));
1213    }
1214
1215    #[test]
1216    fn test_empty_global_array_with_card() {
1217        // CARD type names cannot conflict with any frontmatter field names (even empty arrays)
1218        let markdown = r#"---
1219items: []
1220---
1221
1222Global body
1223
1224---
1225CARD: items
1226name: Item 1
1227---
1228
1229Item 1 body"#;
1230
1231        let result = decompose(markdown);
1232        assert!(result.is_err());
1233        assert!(result.unwrap_err().to_string().contains("collision"));
1234    }
1235
1236    #[test]
1237    fn test_reserved_field_name() {
1238        let markdown = r#"---
1239CARD: body
1240content: Test
1241---"#;
1242
1243        let result = decompose(markdown);
1244        assert!(result.is_err());
1245        assert!(result.unwrap_err().to_string().contains("reserved"));
1246    }
1247
1248    #[test]
1249    fn test_invalid_tag_syntax() {
1250        let markdown = r#"---
1251CARD: Invalid-Name
1252title: Test
1253---"#;
1254
1255        let result = decompose(markdown);
1256        assert!(result.is_err());
1257        assert!(result
1258            .unwrap_err()
1259            .to_string()
1260            .contains("Invalid card field name"));
1261    }
1262
1263    #[test]
1264    fn test_multiple_global_frontmatter_blocks() {
1265        let markdown = r#"---
1266title: First
1267---
1268
1269Body
1270
1271---
1272author: Second
1273---
1274
1275More body"#;
1276
1277        let result = decompose(markdown);
1278        assert!(result.is_err());
1279
1280        // Verify the error message contains CARD hint
1281        let err = result.unwrap_err();
1282        let err_str = err.to_string();
1283        assert!(
1284            err_str.contains("CARD"),
1285            "Error should mention CARD directive: {}",
1286            err_str
1287        );
1288        assert!(
1289            err_str.contains("missing"),
1290            "Error should indicate missing directive: {}",
1291            err_str
1292        );
1293    }
1294
1295    #[test]
1296    fn test_adjacent_blocks_different_tags() {
1297        let markdown = r#"---
1298CARD: items
1299name: Item 1
1300---
1301
1302Item 1 body
1303
1304---
1305CARD: sections
1306title: Section 1
1307---
1308
1309Section 1 body"#;
1310
1311        let doc = decompose(markdown).unwrap();
1312
1313        // All cards in unified CARDS array
1314        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1315        assert_eq!(cards.len(), 2);
1316
1317        // First card is "items" type
1318        let item = cards[0].as_object().unwrap();
1319        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1320        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1321
1322        // Second card is "sections" type
1323        let section = cards[1].as_object().unwrap();
1324        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1325        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1326    }
1327
1328    #[test]
1329    fn test_order_preservation() {
1330        let markdown = r#"---
1331CARD: items
1332id: 1
1333---
1334
1335First
1336
1337---
1338CARD: items
1339id: 2
1340---
1341
1342Second
1343
1344---
1345CARD: items
1346id: 3
1347---
1348
1349Third"#;
1350
1351        let doc = decompose(markdown).unwrap();
1352
1353        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1354        assert_eq!(cards.len(), 3);
1355
1356        for (i, card) in cards.iter().enumerate() {
1357            let mapping = card.as_object().unwrap();
1358            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1359            let id = mapping.get("id").unwrap().as_i64().unwrap();
1360            assert_eq!(id, (i + 1) as i64);
1361        }
1362    }
1363
1364    #[test]
1365    fn test_product_catalog_integration() {
1366        let markdown = r#"---
1367title: Product Catalog
1368author: John Doe
1369date: 2024-01-01
1370---
1371
1372This is the main catalog description.
1373
1374---
1375CARD: products
1376name: Widget A
1377price: 19.99
1378sku: WID-001
1379---
1380
1381The **Widget A** is our most popular product.
1382
1383---
1384CARD: products
1385name: Gadget B
1386price: 29.99
1387sku: GAD-002
1388---
1389
1390The **Gadget B** is perfect for professionals.
1391
1392---
1393CARD: reviews
1394product: Widget A
1395rating: 5
1396---
1397
1398"Excellent product! Highly recommended."
1399
1400---
1401CARD: reviews
1402product: Gadget B
1403rating: 4
1404---
1405
1406"Very good, but a bit pricey.""#;
1407
1408        let doc = decompose(markdown).unwrap();
1409
1410        // Verify global fields
1411        assert_eq!(
1412            doc.get_field("title").unwrap().as_str().unwrap(),
1413            "Product Catalog"
1414        );
1415        assert_eq!(
1416            doc.get_field("author").unwrap().as_str().unwrap(),
1417            "John Doe"
1418        );
1419        assert_eq!(
1420            doc.get_field("date").unwrap().as_str().unwrap(),
1421            "2024-01-01"
1422        );
1423
1424        // Verify global body
1425        assert!(doc.body().unwrap().contains("main catalog description"));
1426
1427        // All cards in unified CARDS array
1428        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1429        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1430
1431        // First 2 are products
1432        let product1 = cards[0].as_object().unwrap();
1433        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1434        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1435        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1436
1437        let product2 = cards[1].as_object().unwrap();
1438        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1439        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1440
1441        // Last 2 are reviews
1442        let review1 = cards[2].as_object().unwrap();
1443        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1444        assert_eq!(
1445            review1.get("product").unwrap().as_str().unwrap(),
1446            "Widget A"
1447        );
1448        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1449
1450        // Total fields: title, author, date, body, CARDS = 5
1451        assert_eq!(doc.fields().len(), 5);
1452    }
1453
1454    #[test]
1455    fn taro_quill_directive() {
1456        let markdown = r#"---
1457QUILL: usaf_memo
1458memo_for: [ORG/SYMBOL]
1459memo_from: [ORG/SYMBOL]
1460---
1461
1462This is the memo body."#;
1463
1464        let doc = decompose(markdown).unwrap();
1465
1466        // Verify quill tag is set
1467        assert_eq!(doc.quill_tag(), "usaf_memo");
1468
1469        // Verify fields from quill block become frontmatter
1470        assert_eq!(
1471            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1472                .as_str()
1473                .unwrap(),
1474            "ORG/SYMBOL"
1475        );
1476
1477        // Verify body
1478        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1479    }
1480
1481    #[test]
1482    fn test_quill_with_card_blocks() {
1483        let markdown = r#"---
1484QUILL: document
1485title: Test Document
1486---
1487
1488Main body.
1489
1490---
1491CARD: sections
1492name: Section 1
1493---
1494
1495Section 1 body."#;
1496
1497        let doc = decompose(markdown).unwrap();
1498
1499        // Verify quill tag
1500        assert_eq!(doc.quill_tag(), "document");
1501
1502        // Verify global field from quill block
1503        assert_eq!(
1504            doc.get_field("title").unwrap().as_str().unwrap(),
1505            "Test Document"
1506        );
1507
1508        // Verify card blocks work via CARDS array
1509        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1510        assert_eq!(cards.len(), 1);
1511        assert_eq!(
1512            cards[0]
1513                .as_object()
1514                .unwrap()
1515                .get("CARD")
1516                .unwrap()
1517                .as_str()
1518                .unwrap(),
1519            "sections"
1520        );
1521
1522        // Verify body
1523        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1524    }
1525
1526    #[test]
1527    fn test_multiple_quill_directives_error() {
1528        let markdown = r#"---
1529QUILL: first
1530---
1531
1532---
1533QUILL: second
1534---"#;
1535
1536        let result = decompose(markdown);
1537        assert!(result.is_err());
1538        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1539        assert!(result
1540            .unwrap_err()
1541            .to_string()
1542            .contains("top-level frontmatter"));
1543    }
1544
1545    #[test]
1546    fn test_invalid_quill_name() {
1547        let markdown = r#"---
1548QUILL: Invalid-Name
1549---"#;
1550
1551        let result = decompose(markdown);
1552        assert!(result.is_err());
1553        assert!(result
1554            .unwrap_err()
1555            .to_string()
1556            .contains("Invalid quill name"));
1557    }
1558
1559    #[test]
1560    fn test_quill_wrong_value_type() {
1561        let markdown = r#"---
1562QUILL: 123
1563---"#;
1564
1565        let result = decompose(markdown);
1566        assert!(result.is_err());
1567        assert!(result
1568            .unwrap_err()
1569            .to_string()
1570            .contains("QUILL value must be a string"));
1571    }
1572
1573    #[test]
1574    fn test_card_wrong_value_type() {
1575        let markdown = r#"---
1576CARD: 123
1577---"#;
1578
1579        let result = decompose(markdown);
1580        assert!(result.is_err());
1581        assert!(result
1582            .unwrap_err()
1583            .to_string()
1584            .contains("CARD/SCOPE value must be a string"));
1585    }
1586
1587    #[test]
1588    fn test_both_quill_and_card_error() {
1589        let markdown = r#"---
1590QUILL: test
1591CARD: items
1592---"#;
1593
1594        let result = decompose(markdown);
1595        assert!(result.is_err());
1596        assert!(result
1597            .unwrap_err()
1598            .to_string()
1599            .contains("Cannot specify both QUILL and CARD"));
1600    }
1601
1602    #[test]
1603    fn test_blank_lines_in_frontmatter() {
1604        // New parsing standard: blank lines are allowed within YAML blocks
1605        let markdown = r#"---
1606title: Test Document
1607author: Test Author
1608
1609description: This has a blank line above it
1610tags:
1611  - one
1612  - two
1613---
1614
1615# Hello World
1616
1617This is the body."#;
1618
1619        let doc = decompose(markdown).unwrap();
1620
1621        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1622        assert_eq!(
1623            doc.get_field("title").unwrap().as_str().unwrap(),
1624            "Test Document"
1625        );
1626        assert_eq!(
1627            doc.get_field("author").unwrap().as_str().unwrap(),
1628            "Test Author"
1629        );
1630        assert_eq!(
1631            doc.get_field("description").unwrap().as_str().unwrap(),
1632            "This has a blank line above it"
1633        );
1634
1635        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1636        assert_eq!(tags.len(), 2);
1637    }
1638
1639    #[test]
1640    fn test_blank_lines_in_scope_blocks() {
1641        // Blank lines should be allowed in CARD blocks too
1642        let markdown = r#"---
1643CARD: items
1644name: Item 1
1645
1646price: 19.99
1647
1648tags:
1649  - electronics
1650  - gadgets
1651---
1652
1653Body of item 1."#;
1654
1655        let doc = decompose(markdown).unwrap();
1656
1657        // Cards are in CARDS array
1658        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1659        assert_eq!(cards.len(), 1);
1660
1661        let item = cards[0].as_object().unwrap();
1662        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1663        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1664        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1665
1666        let tags = item.get("tags").unwrap().as_array().unwrap();
1667        assert_eq!(tags.len(), 2);
1668    }
1669
1670    #[test]
1671    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1672        // Horizontal rule: blank lines both above AND below the ---
1673        let markdown = r#"---
1674title: Test
1675---
1676
1677First paragraph.
1678
1679---
1680
1681Second paragraph."#;
1682
1683        let doc = decompose(markdown).unwrap();
1684
1685        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1686
1687        // The body should contain the horizontal rule (---) as part of the content
1688        let body = doc.body().unwrap();
1689        assert!(body.contains("First paragraph."));
1690        assert!(body.contains("---"));
1691        assert!(body.contains("Second paragraph."));
1692    }
1693
1694    #[test]
1695    fn test_horizontal_rule_not_preceded_by_blank() {
1696        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1697        // It's also NOT a valid metadata block opening (since it's followed by blank)
1698        let markdown = r#"---
1699title: Test
1700---
1701
1702First paragraph.
1703---
1704
1705Second paragraph."#;
1706
1707        let doc = decompose(markdown).unwrap();
1708
1709        let body = doc.body().unwrap();
1710        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1711        assert!(body.contains("---"));
1712    }
1713
1714    #[test]
1715    fn test_multiple_blank_lines_in_yaml() {
1716        // Multiple blank lines should also be allowed
1717        let markdown = r#"---
1718title: Test
1719
1720
1721author: John Doe
1722
1723
1724version: 1.0
1725---
1726
1727Body content."#;
1728
1729        let doc = decompose(markdown).unwrap();
1730
1731        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1732        assert_eq!(
1733            doc.get_field("author").unwrap().as_str().unwrap(),
1734            "John Doe"
1735        );
1736        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1737    }
1738
1739    #[test]
1740    fn test_html_comment_interaction() {
1741        let markdown = r#"<!---
1742---> the rest of the page content
1743
1744---
1745key: value
1746---
1747"#;
1748        let doc = decompose(markdown).unwrap();
1749
1750        // The comment should be ignored (or at least not cause a parse error)
1751        // The frontmatter should be parsed
1752        let key = doc.get_field("key").and_then(|v| v.as_str());
1753        assert_eq!(key, Some("value"));
1754    }
1755}
1756#[cfg(test)]
1757mod demo_file_test {
1758    use super::*;
1759
1760    #[test]
1761    fn test_extended_metadata_demo_file() {
1762        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1763        let doc = decompose(markdown).unwrap();
1764
1765        // Verify global fields
1766        assert_eq!(
1767            doc.get_field("title").unwrap().as_str().unwrap(),
1768            "Extended Metadata Demo"
1769        );
1770        assert_eq!(
1771            doc.get_field("author").unwrap().as_str().unwrap(),
1772            "Quillmark Team"
1773        );
1774        // version is parsed as a number by YAML
1775        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1776
1777        // Verify body
1778        assert!(doc
1779            .body()
1780            .unwrap()
1781            .contains("extended YAML metadata standard"));
1782
1783        // All cards are now in unified CARDS array
1784        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1785        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1786
1787        // Count features and use_cases cards
1788        let features_count = cards
1789            .iter()
1790            .filter(|c| {
1791                c.as_object()
1792                    .unwrap()
1793                    .get("CARD")
1794                    .unwrap()
1795                    .as_str()
1796                    .unwrap()
1797                    == "features"
1798            })
1799            .count();
1800        let use_cases_count = cards
1801            .iter()
1802            .filter(|c| {
1803                c.as_object()
1804                    .unwrap()
1805                    .get("CARD")
1806                    .unwrap()
1807                    .as_str()
1808                    .unwrap()
1809                    == "use_cases"
1810            })
1811            .count();
1812        assert_eq!(features_count, 3);
1813        assert_eq!(use_cases_count, 2);
1814
1815        // Check first card is a feature
1816        let feature1 = cards[0].as_object().unwrap();
1817        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1818        assert_eq!(
1819            feature1.get("name").unwrap().as_str().unwrap(),
1820            "Tag Directives"
1821        );
1822    }
1823
1824    #[test]
1825    fn test_input_size_limit() {
1826        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1827        let size = crate::error::MAX_INPUT_SIZE + 1;
1828        let large_markdown = "a".repeat(size);
1829
1830        let result = decompose(&large_markdown);
1831        assert!(result.is_err());
1832
1833        let err_msg = result.unwrap_err().to_string();
1834        assert!(err_msg.contains("Input too large"));
1835    }
1836
1837    #[test]
1838    fn test_yaml_size_limit() {
1839        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1840        let mut markdown = String::from("---\n");
1841
1842        // Create a very large YAML field
1843        let size = crate::error::MAX_YAML_SIZE + 1;
1844        markdown.push_str("data: \"");
1845        markdown.push_str(&"x".repeat(size));
1846        markdown.push_str("\"\n---\n\nBody");
1847
1848        let result = decompose(&markdown);
1849        assert!(result.is_err());
1850
1851        let err_msg = result.unwrap_err().to_string();
1852        assert!(err_msg.contains("Input too large"));
1853    }
1854
1855    #[test]
1856    fn test_input_within_size_limit() {
1857        // Create markdown just under the limit
1858        let size = 1000; // Much smaller than limit
1859        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1860
1861        let result = decompose(&markdown);
1862        assert!(result.is_ok());
1863    }
1864
1865    #[test]
1866    fn test_yaml_within_size_limit() {
1867        // Create YAML block well within the limit
1868        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1869
1870        let result = decompose(&markdown);
1871        assert!(result.is_ok());
1872    }
1873
1874    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1875    // Guillemet conversion now happens in process_plate, not during parsing
1876    #[test]
1877    fn test_chevrons_preserved_in_body_no_frontmatter() {
1878        let markdown = "Use <<raw content>> here.";
1879        let doc = decompose(markdown).unwrap();
1880
1881        // Body should preserve chevrons (conversion happens later in process_plate)
1882        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1883    }
1884
1885    #[test]
1886    fn test_chevrons_preserved_in_body_with_frontmatter() {
1887        let markdown = r#"---
1888title: Test
1889---
1890
1891Use <<raw content>> here."#;
1892        let doc = decompose(markdown).unwrap();
1893
1894        // Body should preserve chevrons
1895        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1896    }
1897
1898    #[test]
1899    fn test_chevrons_preserved_in_yaml_string() {
1900        let markdown = r#"---
1901title: Test <<with chevrons>>
1902---
1903
1904Body content."#;
1905        let doc = decompose(markdown).unwrap();
1906
1907        // YAML string values should preserve chevrons
1908        assert_eq!(
1909            doc.get_field("title").unwrap().as_str().unwrap(),
1910            "Test <<with chevrons>>"
1911        );
1912    }
1913
1914    #[test]
1915    fn test_chevrons_preserved_in_yaml_array() {
1916        let markdown = r#"---
1917items:
1918  - "<<first>>"
1919  - "<<second>>"
1920---
1921
1922Body."#;
1923        let doc = decompose(markdown).unwrap();
1924
1925        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1926        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1927        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1928    }
1929
1930    #[test]
1931    fn test_chevrons_preserved_in_yaml_nested() {
1932        let markdown = r#"---
1933metadata:
1934  description: "<<nested value>>"
1935---
1936
1937Body."#;
1938        let doc = decompose(markdown).unwrap();
1939
1940        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1941        assert_eq!(
1942            metadata.get("description").unwrap().as_str().unwrap(),
1943            "<<nested value>>"
1944        );
1945    }
1946
1947    #[test]
1948    fn test_chevrons_preserved_in_code_blocks() {
1949        let markdown = r#"```
1950<<in code block>>
1951```
1952
1953<<outside code block>>"#;
1954        let doc = decompose(markdown).unwrap();
1955
1956        let body = doc.body().unwrap();
1957        // All chevrons should be preserved (no conversion during parsing)
1958        assert!(body.contains("<<in code block>>"));
1959        assert!(body.contains("<<outside code block>>"));
1960    }
1961
1962    #[test]
1963    fn test_chevrons_preserved_in_inline_code() {
1964        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1965        let doc = decompose(markdown).unwrap();
1966
1967        let body = doc.body().unwrap();
1968        // All chevrons should be preserved
1969        assert!(body.contains("`<<in inline code>>`"));
1970        assert!(body.contains("<<outside inline code>>"));
1971    }
1972
1973    #[test]
1974    fn test_chevrons_preserved_in_tagged_block_body() {
1975        let markdown = r#"---
1976title: Main
1977---
1978
1979Main body.
1980
1981---
1982CARD: items
1983name: Item 1
1984---
1985
1986Use <<raw>> here."#;
1987        let doc = decompose(markdown).unwrap();
1988
1989        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1990        let item = cards[0].as_object().unwrap();
1991        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1992        let item_body = item.get("body").unwrap().as_str().unwrap();
1993        // Tagged block body should preserve chevrons
1994        assert!(item_body.contains("<<raw>>"));
1995    }
1996
1997    #[test]
1998    fn test_chevrons_preserved_in_tagged_block_yaml() {
1999        let markdown = r#"---
2000title: Main
2001---
2002
2003Main body.
2004
2005---
2006CARD: items
2007description: "<<tagged yaml>>"
2008---
2009
2010Item body."#;
2011        let doc = decompose(markdown).unwrap();
2012
2013        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2014        let item = cards[0].as_object().unwrap();
2015        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2016        // Tagged block YAML should preserve chevrons
2017        assert_eq!(
2018            item.get("description").unwrap().as_str().unwrap(),
2019            "<<tagged yaml>>"
2020        );
2021    }
2022
2023    #[test]
2024    fn test_yaml_numbers_not_affected() {
2025        // Numbers should not be affected
2026        let markdown = r#"---
2027count: 42
2028---
2029
2030Body."#;
2031        let doc = decompose(markdown).unwrap();
2032        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2033    }
2034
2035    #[test]
2036    fn test_yaml_booleans_not_affected() {
2037        // Booleans should not be affected
2038        let markdown = r#"---
2039active: true
2040---
2041
2042Body."#;
2043        let doc = decompose(markdown).unwrap();
2044        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2045    }
2046
2047    #[test]
2048    fn test_multiline_chevrons_preserved() {
2049        // Multiline chevrons should be preserved as-is
2050        let markdown = "<<text\nacross lines>>";
2051        let doc = decompose(markdown).unwrap();
2052
2053        let body = doc.body().unwrap();
2054        // Should contain the original chevrons
2055        assert!(body.contains("<<text"));
2056        assert!(body.contains("across lines>>"));
2057    }
2058
2059    #[test]
2060    fn test_unmatched_chevrons_preserved() {
2061        let markdown = "<<unmatched";
2062        let doc = decompose(markdown).unwrap();
2063
2064        let body = doc.body().unwrap();
2065        // Unmatched should remain as-is
2066        assert_eq!(body, "<<unmatched");
2067    }
2068}
2069
2070// Additional robustness tests
2071#[cfg(test)]
2072mod robustness_tests {
2073    use super::*;
2074
2075    // Edge cases for delimiter handling
2076
2077    #[test]
2078    fn test_empty_document() {
2079        let doc = decompose("").unwrap();
2080        assert_eq!(doc.body(), Some(""));
2081        assert_eq!(doc.quill_tag(), "__default__");
2082    }
2083
2084    #[test]
2085    fn test_only_whitespace() {
2086        let doc = decompose("   \n\n   \t").unwrap();
2087        assert_eq!(doc.body(), Some("   \n\n   \t"));
2088    }
2089
2090    #[test]
2091    fn test_only_dashes() {
2092        // Just "---" at document start without newline is not treated as frontmatter opener
2093        // (requires "---\n" to start a frontmatter block)
2094        let result = decompose("---");
2095        // This is NOT an error - "---" alone without newline is just body content
2096        assert!(result.is_ok());
2097        assert_eq!(result.unwrap().body(), Some("---"));
2098    }
2099
2100    #[test]
2101    fn test_dashes_in_middle_of_line() {
2102        // --- not at start of line should not be treated as delimiter
2103        let markdown = "some text --- more text";
2104        let doc = decompose(markdown).unwrap();
2105        assert_eq!(doc.body(), Some("some text --- more text"));
2106    }
2107
2108    #[test]
2109    fn test_four_dashes() {
2110        // ---- is not a valid delimiter
2111        let markdown = "----\ntitle: Test\n----\n\nBody";
2112        let doc = decompose(markdown).unwrap();
2113        // Should treat entire content as body
2114        assert!(doc.body().unwrap().contains("----"));
2115    }
2116
2117    #[test]
2118    fn test_crlf_line_endings() {
2119        // Windows-style line endings
2120        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2121        let doc = decompose(markdown).unwrap();
2122        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2123        assert!(doc.body().unwrap().contains("Body content."));
2124    }
2125
2126    #[test]
2127    fn test_mixed_line_endings() {
2128        // Mix of \n and \r\n
2129        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2130        let doc = decompose(markdown).unwrap();
2131        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2132    }
2133
2134    #[test]
2135    fn test_frontmatter_at_eof_no_trailing_newline() {
2136        // Frontmatter closed at EOF without trailing newline
2137        let markdown = "---\ntitle: Test\n---";
2138        let doc = decompose(markdown).unwrap();
2139        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2140        assert_eq!(doc.body(), Some(""));
2141    }
2142
2143    #[test]
2144    fn test_empty_frontmatter() {
2145        // Empty frontmatter block - requires content between delimiters
2146        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2147        // is treated as horizontal rule logic, not empty frontmatter
2148        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2149        let markdown = "---\n \n---\n\nBody content.";
2150        let doc = decompose(markdown).unwrap();
2151        assert!(doc.body().unwrap().contains("Body content."));
2152        // Should have body and CARDS fields
2153        assert_eq!(doc.fields().len(), 2);
2154    }
2155
2156    #[test]
2157    fn test_whitespace_only_frontmatter() {
2158        // Frontmatter with only whitespace
2159        let markdown = "---\n   \n\n   \n---\n\nBody.";
2160        let doc = decompose(markdown).unwrap();
2161        assert!(doc.body().unwrap().contains("Body."));
2162    }
2163
2164    // Unicode handling
2165
2166    #[test]
2167    fn test_unicode_in_yaml_keys() {
2168        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2169        let doc = decompose(markdown).unwrap();
2170        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2171        assert_eq!(
2172            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2173            "こんにちは"
2174        );
2175    }
2176
2177    #[test]
2178    fn test_unicode_in_yaml_values() {
2179        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2180        let doc = decompose(markdown).unwrap();
2181        assert_eq!(
2182            doc.get_field("title").unwrap().as_str().unwrap(),
2183            "你好世界 🎉"
2184        );
2185    }
2186
2187    #[test]
2188    fn test_unicode_in_body() {
2189        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2190        let doc = decompose(markdown).unwrap();
2191        assert!(doc.body().unwrap().contains("日本語テキスト"));
2192        assert!(doc.body().unwrap().contains("🚀"));
2193    }
2194
2195    // YAML edge cases
2196
2197    #[test]
2198    fn test_yaml_multiline_string() {
2199        let markdown = r#"---
2200description: |
2201  This is a
2202  multiline string
2203  with preserved newlines.
2204---
2205
2206Body."#;
2207        let doc = decompose(markdown).unwrap();
2208        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2209        assert!(desc.contains("multiline string"));
2210        assert!(desc.contains('\n'));
2211    }
2212
2213    #[test]
2214    fn test_yaml_folded_string() {
2215        let markdown = r#"---
2216description: >
2217  This is a folded
2218  string that becomes
2219  a single line.
2220---
2221
2222Body."#;
2223        let doc = decompose(markdown).unwrap();
2224        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2225        // Folded strings join lines with spaces
2226        assert!(desc.contains("folded"));
2227    }
2228
2229    #[test]
2230    fn test_yaml_null_value() {
2231        let markdown = "---\noptional: null\n---\n\nBody.";
2232        let doc = decompose(markdown).unwrap();
2233        assert!(doc.get_field("optional").unwrap().is_null());
2234    }
2235
2236    #[test]
2237    fn test_yaml_empty_string_value() {
2238        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2239        let doc = decompose(markdown).unwrap();
2240        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2241    }
2242
2243    #[test]
2244    fn test_yaml_special_characters_in_string() {
2245        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2246        let doc = decompose(markdown).unwrap();
2247        assert_eq!(
2248            doc.get_field("special").unwrap().as_str().unwrap(),
2249            "colon: here, and [brackets]"
2250        );
2251    }
2252
2253    #[test]
2254    fn test_yaml_nested_objects() {
2255        let markdown = r#"---
2256config:
2257  database:
2258    host: localhost
2259    port: 5432
2260  cache:
2261    enabled: true
2262---
2263
2264Body."#;
2265        let doc = decompose(markdown).unwrap();
2266        let config = doc.get_field("config").unwrap().as_object().unwrap();
2267        let db = config.get("database").unwrap().as_object().unwrap();
2268        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2269        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2270    }
2271
2272    // CARD block edge cases
2273
2274    #[test]
2275    fn test_card_with_empty_body() {
2276        let markdown = r#"---
2277CARD: items
2278name: Item
2279---"#;
2280        let doc = decompose(markdown).unwrap();
2281        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2282        assert_eq!(cards.len(), 1);
2283        let item = cards[0].as_object().unwrap();
2284        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2285        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2286    }
2287
2288    #[test]
2289    fn test_card_consecutive_blocks() {
2290        let markdown = r#"---
2291CARD: a
2292id: 1
2293---
2294---
2295CARD: a
2296id: 2
2297---"#;
2298        let doc = decompose(markdown).unwrap();
2299        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2300        assert_eq!(cards.len(), 2);
2301        assert_eq!(
2302            cards[0]
2303                .as_object()
2304                .unwrap()
2305                .get("CARD")
2306                .unwrap()
2307                .as_str()
2308                .unwrap(),
2309            "a"
2310        );
2311        assert_eq!(
2312            cards[1]
2313                .as_object()
2314                .unwrap()
2315                .get("CARD")
2316                .unwrap()
2317                .as_str()
2318                .unwrap(),
2319            "a"
2320        );
2321    }
2322
2323    #[test]
2324    fn test_card_with_body_containing_dashes() {
2325        let markdown = r#"---
2326CARD: items
2327name: Item
2328---
2329
2330Some text with --- dashes in it."#;
2331        let doc = decompose(markdown).unwrap();
2332        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2333        let item = cards[0].as_object().unwrap();
2334        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2335        let body = item.get("body").unwrap().as_str().unwrap();
2336        assert!(body.contains("--- dashes"));
2337    }
2338
2339    // QUILL directive edge cases
2340
2341    #[test]
2342    fn test_quill_with_underscore_prefix() {
2343        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2344        let doc = decompose(markdown).unwrap();
2345        assert_eq!(doc.quill_tag(), "_internal");
2346    }
2347
2348    #[test]
2349    fn test_quill_with_numbers() {
2350        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2351        let doc = decompose(markdown).unwrap();
2352        assert_eq!(doc.quill_tag(), "form_8_v2");
2353    }
2354
2355    #[test]
2356    fn test_quill_with_additional_fields() {
2357        let markdown = r#"---
2358QUILL: my_quill
2359title: Document Title
2360author: John Doe
2361---
2362
2363Body content."#;
2364        let doc = decompose(markdown).unwrap();
2365        assert_eq!(doc.quill_tag(), "my_quill");
2366        assert_eq!(
2367            doc.get_field("title").unwrap().as_str().unwrap(),
2368            "Document Title"
2369        );
2370        assert_eq!(
2371            doc.get_field("author").unwrap().as_str().unwrap(),
2372            "John Doe"
2373        );
2374    }
2375
2376    // Error handling
2377
2378    #[test]
2379    fn test_invalid_scope_name_uppercase() {
2380        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2381        let result = decompose(markdown);
2382        assert!(result.is_err());
2383        assert!(result
2384            .unwrap_err()
2385            .to_string()
2386            .contains("Invalid card field name"));
2387    }
2388
2389    #[test]
2390    fn test_invalid_scope_name_starts_with_number() {
2391        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2392        let result = decompose(markdown);
2393        assert!(result.is_err());
2394    }
2395
2396    #[test]
2397    fn test_invalid_scope_name_with_hyphen() {
2398        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2399        let result = decompose(markdown);
2400        assert!(result.is_err());
2401    }
2402
2403    #[test]
2404    fn test_invalid_quill_name_uppercase() {
2405        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2406        let result = decompose(markdown);
2407        assert!(result.is_err());
2408    }
2409
2410    #[test]
2411    fn test_yaml_syntax_error_missing_colon() {
2412        let markdown = "---\ntitle Test\n---\n\nBody.";
2413        let result = decompose(markdown);
2414        assert!(result.is_err());
2415    }
2416
2417    #[test]
2418    fn test_yaml_syntax_error_bad_indentation() {
2419        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2420        let result = decompose(markdown);
2421        // Bad indentation may or may not be an error depending on YAML parser
2422        // Just ensure it doesn't panic
2423        let _ = result;
2424    }
2425
2426    // Body extraction edge cases
2427
2428    #[test]
2429    fn test_body_with_leading_newlines() {
2430        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2431        let doc = decompose(markdown).unwrap();
2432        // Body should preserve leading newlines after frontmatter
2433        assert!(doc.body().unwrap().starts_with('\n'));
2434    }
2435
2436    #[test]
2437    fn test_body_with_trailing_newlines() {
2438        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2439        let doc = decompose(markdown).unwrap();
2440        // Body should preserve trailing newlines
2441        assert!(doc.body().unwrap().ends_with('\n'));
2442    }
2443
2444    #[test]
2445    fn test_no_body_after_frontmatter() {
2446        let markdown = "---\ntitle: Test\n---";
2447        let doc = decompose(markdown).unwrap();
2448        assert_eq!(doc.body(), Some(""));
2449    }
2450
2451    // Tag name validation
2452
2453    #[test]
2454    fn test_valid_tag_name_single_underscore() {
2455        assert!(is_valid_tag_name("_"));
2456    }
2457
2458    #[test]
2459    fn test_valid_tag_name_underscore_prefix() {
2460        assert!(is_valid_tag_name("_private"));
2461    }
2462
2463    #[test]
2464    fn test_valid_tag_name_with_numbers() {
2465        assert!(is_valid_tag_name("item1"));
2466        assert!(is_valid_tag_name("item_2"));
2467    }
2468
2469    #[test]
2470    fn test_invalid_tag_name_empty() {
2471        assert!(!is_valid_tag_name(""));
2472    }
2473
2474    #[test]
2475    fn test_invalid_tag_name_starts_with_number() {
2476        assert!(!is_valid_tag_name("1item"));
2477    }
2478
2479    #[test]
2480    fn test_invalid_tag_name_uppercase() {
2481        assert!(!is_valid_tag_name("Items"));
2482        assert!(!is_valid_tag_name("ITEMS"));
2483    }
2484
2485    #[test]
2486    fn test_invalid_tag_name_special_chars() {
2487        assert!(!is_valid_tag_name("my-items"));
2488        assert!(!is_valid_tag_name("my.items"));
2489        assert!(!is_valid_tag_name("my items"));
2490    }
2491
2492    // Guillemet preprocessing in YAML
2493
2494    #[test]
2495    fn test_guillemet_in_yaml_preserves_non_strings() {
2496        let markdown = r#"---
2497count: 42
2498price: 19.99
2499active: true
2500items:
2501  - first
2502  - 100
2503  - true
2504---
2505
2506Body."#;
2507        let doc = decompose(markdown).unwrap();
2508        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2509        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2510        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2511    }
2512
2513    #[test]
2514    fn test_guillemet_double_conversion_prevention() {
2515        // Ensure «» in input doesn't get double-processed
2516        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2517        let doc = decompose(markdown).unwrap();
2518        // Should remain as-is (not double-escaped)
2519        assert_eq!(
2520            doc.get_field("title").unwrap().as_str().unwrap(),
2521            "Already «converted»"
2522        );
2523    }
2524}