quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(crate::error::ParseError::from)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = search_str
205            .find("---\n")
206            .map(|p| (p, 4, "\n"))
207            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210            let abs_pos = pos + delimiter_pos;
211
212            // Check if the delimiter is at the start of a line
213            let is_start_of_line = if abs_pos == 0 {
214                true
215            } else {
216                let char_before = markdown.as_bytes()[abs_pos - 1];
217                char_before == b'\n' || char_before == b'\r'
218            };
219
220            if !is_start_of_line {
221                pos = abs_pos + 1;
222                continue;
223            }
224
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_value) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(parsed_yaml) => {
312                            if let Some(mapping) = parsed_yaml.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let card_key = serde_yaml::Value::String("CARD".to_string());
315                                let scope_key = serde_yaml::Value::String("SCOPE".to_string()); // Backwards compatibility alias
316
317                                let has_quill = mapping.contains_key(&quill_key);
318                                let has_card = mapping.contains_key(&card_key);
319                                let has_scope = mapping.contains_key(&scope_key);
320
321                                // CARD and SCOPE are aliases - can't use both
322                                if has_card && has_scope {
323                                    return Err(
324                                        "Cannot specify both CARD and SCOPE in the same block (SCOPE is an alias for CARD)"
325                                            .into(),
326                                    );
327                                }
328
329                                let effective_card_key = if has_card {
330                                    Some(&card_key)
331                                } else if has_scope {
332                                    Some(&scope_key)
333                                } else {
334                                    None
335                                };
336
337                                if has_quill && effective_card_key.is_some() {
338                                    return Err(
339                                        "Cannot specify both QUILL and CARD/SCOPE in the same block"
340                                            .into(),
341                                    );
342                                }
343
344                                if has_quill {
345                                    // Extract quill name
346                                    let quill_value = mapping.get(&quill_key).unwrap();
347                                    let quill_name_str = quill_value
348                                        .as_str()
349                                        .ok_or("QUILL value must be a string")?;
350
351                                    if !is_valid_tag_name(quill_name_str) {
352                                        return Err(format!(
353                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
354                                            quill_name_str
355                                        )
356                                        .into());
357                                    }
358
359                                    // Remove QUILL from the YAML value for processing
360                                    let mut new_mapping = mapping.clone();
361                                    new_mapping.remove(&quill_key);
362                                    let new_value = if new_mapping.is_empty() {
363                                        None
364                                    } else {
365                                        Some(serde_yaml::Value::Mapping(new_mapping))
366                                    };
367
368                                    (None, Some(quill_name_str.to_string()), new_value)
369                                } else if let Some(card_key_used) = effective_card_key {
370                                    // Extract card field name (handles both CARD and SCOPE)
371                                    let card_value = mapping.get(card_key_used).unwrap();
372                                    let field_name = card_value
373                                        .as_str()
374                                        .ok_or("CARD/SCOPE value must be a string")?;
375
376                                    if !is_valid_tag_name(field_name) {
377                                        return Err(format!(
378                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
379                                            field_name
380                                        )
381                                        .into());
382                                    }
383
384                                    if field_name == BODY_FIELD {
385                                        return Err(format!(
386                                            "Cannot use reserved field name '{}' as CARD/SCOPE value",
387                                            BODY_FIELD
388                                        )
389                                        .into());
390                                    }
391
392                                    // Remove CARD/SCOPE from the YAML value for processing
393                                    let mut new_mapping = mapping.clone();
394                                    new_mapping.remove(card_key_used);
395                                    let new_value = if new_mapping.is_empty() {
396                                        None
397                                    } else {
398                                        Some(serde_yaml::Value::Mapping(new_mapping))
399                                    };
400
401                                    (Some(field_name.to_string()), None, new_value)
402                                } else {
403                                    // No reserved keys, keep the parsed YAML
404                                    (None, None, Some(parsed_yaml))
405                                }
406                            } else {
407                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
408                                (None, None, Some(parsed_yaml))
409                            }
410                        }
411                        Err(e) => {
412                            // YAML parsing failed - return error with context
413                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
414                        }
415                    }
416                } else {
417                    // Empty content
418                    (None, None, None)
419                };
420
421                blocks.push(MetadataBlock {
422                    start: abs_pos,
423                    end: abs_closing_pos + closing_len, // After closing delimiter
424                    yaml_value,
425                    tag,
426                    quill_name,
427                });
428
429                pos = abs_closing_pos + closing_len;
430            } else if abs_pos == 0 {
431                // Frontmatter started but not closed
432                return Err("Frontmatter started but not closed with ---".into());
433            } else {
434                // Not a valid metadata block, skip this position
435                pos = abs_pos + 3;
436            }
437        } else {
438            break;
439        }
440    }
441
442    Ok(blocks)
443}
444
445/// Decompose markdown into frontmatter fields and body
446fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
447    // Check input size limit
448    if markdown.len() > crate::error::MAX_INPUT_SIZE {
449        return Err(format!(
450            "Input too large: {} bytes (max: {} bytes)",
451            markdown.len(),
452            crate::error::MAX_INPUT_SIZE
453        )
454        .into());
455    }
456
457    let mut fields = HashMap::new();
458
459    // Find all metadata blocks
460    let blocks = find_metadata_blocks(markdown)?;
461
462    if blocks.is_empty() {
463        // No metadata blocks, entire content is body
464        fields.insert(
465            BODY_FIELD.to_string(),
466            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
467        );
468        return Ok(ParsedDocument::new(fields));
469    }
470
471    // Collect all card items into unified CARDS array
472    let mut cards_array: Vec<serde_json::Value> = Vec::new();
473    let mut global_frontmatter_index: Option<usize> = None;
474    let mut quill_name: Option<String> = None;
475
476    // First pass: identify global frontmatter, quill directive, and validate
477    for (idx, block) in blocks.iter().enumerate() {
478        if idx == 0 {
479            // Top-level frontmatter: can have QUILL or neither (not considered a card)
480            if let Some(ref name) = block.quill_name {
481                quill_name = Some(name.clone());
482            }
483            // If it has neither QUILL nor CARD, it's global frontmatter
484            if block.tag.is_none() && block.quill_name.is_none() {
485                global_frontmatter_index = Some(idx);
486            }
487        } else {
488            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
489            if block.quill_name.is_some() {
490                return Err("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".into());
491            }
492            if block.tag.is_none() {
493                // Inline block without CARD
494                return Err(Box::new(crate::error::ParseError::missing_card_directive()));
495            }
496        }
497    }
498
499    // Parse global frontmatter if present
500    if let Some(idx) = global_frontmatter_index {
501        let block = &blocks[idx];
502
503        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
504        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
505            Some(serde_yaml::Value::Mapping(mapping)) => mapping
506                .iter()
507                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
508                .collect(),
509            Some(serde_yaml::Value::Null) => {
510                // Null value (from whitespace-only YAML) - treat as empty mapping
511                HashMap::new()
512            }
513            Some(_) => {
514                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
515                return Err("Invalid YAML frontmatter: expected a mapping".into());
516            }
517            None => HashMap::new(),
518        };
519
520        // Check that all tagged blocks don't conflict with global fields
521        // Exception: if the global field is an array, allow it (we'll merge later)
522        for other_block in &blocks {
523            if let Some(ref tag) = other_block.tag {
524                if let Some(global_value) = yaml_fields.get(tag) {
525                    // Check if the global value is an array
526                    if global_value.as_sequence().is_none() {
527                        return Err(format!(
528                            "Name collision: global field '{}' conflicts with tagged attribute",
529                            tag
530                        )
531                        .into());
532                    }
533                }
534            }
535        }
536
537        // Convert YAML values to QuillValue at boundary
538        for (key, value) in yaml_fields {
539            fields.insert(key, QuillValue::from_yaml(value)?);
540        }
541    }
542
543    // Process blocks with quill directives
544    for block in &blocks {
545        if block.quill_name.is_some() {
546            // Quill directive blocks can have YAML content (becomes part of frontmatter)
547            if let Some(ref yaml_val) = block.yaml_value {
548                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
549                    serde_yaml::Value::Mapping(mapping) => mapping
550                        .iter()
551                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
552                        .collect(),
553                    serde_yaml::Value::Null => {
554                        // Null value (from whitespace-only YAML) - treat as empty mapping
555                        HashMap::new()
556                    }
557                    _ => {
558                        return Err("Invalid YAML in quill block: expected a mapping".into());
559                    }
560                };
561
562                // Check for conflicts with existing fields
563                for key in yaml_fields.keys() {
564                    if fields.contains_key(key) {
565                        return Err(format!(
566                            "Name collision: quill block field '{}' conflicts with existing field",
567                            key
568                        )
569                        .into());
570                    }
571                }
572
573                // Convert YAML values to QuillValue at boundary
574                for (key, value) in yaml_fields {
575                    fields.insert(key, QuillValue::from_yaml(value)?);
576                }
577            }
578        }
579    }
580
581    // Parse tagged blocks (CARD blocks)
582    for (idx, block) in blocks.iter().enumerate() {
583        if let Some(ref tag_name) = block.tag {
584            // Card names cannot conflict with frontmatter field names
585            if fields.contains_key(tag_name) {
586                return Err(format!(
587                    "Name collision: CARD type '{}' conflicts with frontmatter field name",
588                    tag_name
589                )
590                .into());
591            }
592
593            // Get YAML metadata directly (already parsed in find_metadata_blocks)
594            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
595                Some(serde_yaml::Value::Mapping(mapping)) => mapping
596                    .iter()
597                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
598                    .collect(),
599                Some(serde_yaml::Value::Null) => {
600                    // Null value (from whitespace-only YAML) - treat as empty mapping
601                    HashMap::new()
602                }
603                Some(_) => {
604                    return Err(format!(
605                        "Invalid YAML in card block '{}': expected a mapping",
606                        tag_name
607                    )
608                    .into());
609                }
610                None => HashMap::new(),
611            };
612
613            // Extract body for this card block
614            let body_start = block.end;
615            let body_end = if idx + 1 < blocks.len() {
616                blocks[idx + 1].start
617            } else {
618                markdown.len()
619            };
620            let body = &markdown[body_start..body_end];
621
622            // Add body to item fields
623            item_fields.insert(
624                BODY_FIELD.to_string(),
625                serde_yaml::Value::String(body.to_string()),
626            );
627
628            // Add CARD discriminator field
629            item_fields.insert(
630                "CARD".to_string(),
631                serde_yaml::Value::String(tag_name.clone()),
632            );
633
634            // Convert to JSON and add to CARDS array
635            let item_json = serde_json::to_value(&item_fields)
636                .map_err(|e| format!("Failed to convert card to JSON: {}", e))?;
637            cards_array.push(item_json);
638        }
639    }
640
641    // Extract global body
642    // Body starts after global frontmatter or quill block (whichever comes first)
643    // Body ends at the first card block or EOF
644    let first_non_card_block_idx = blocks
645        .iter()
646        .position(|b| b.tag.is_none() && b.quill_name.is_none())
647        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
648
649    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
650        // Body starts after the first non-card block (global frontmatter or quill)
651        let start = blocks[idx].end;
652
653        // Body ends at the first card block after this, or EOF
654        let end = blocks
655            .iter()
656            .skip(idx + 1)
657            .find(|b| b.tag.is_some())
658            .map(|b| b.start)
659            .unwrap_or(markdown.len());
660
661        (start, end)
662    } else {
663        // No global frontmatter or quill block - body is everything before the first card block
664        let end = blocks
665            .iter()
666            .find(|b| b.tag.is_some())
667            .map(|b| b.start)
668            .unwrap_or(0);
669
670        (0, end)
671    };
672
673    let global_body = &markdown[body_start..body_end];
674
675    fields.insert(
676        BODY_FIELD.to_string(),
677        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
678    );
679
680    // Always add CARDS array to fields (may be empty)
681    fields.insert(
682        "CARDS".to_string(),
683        QuillValue::from_json(serde_json::Value::Array(cards_array)),
684    );
685
686    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
687    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
688
689    Ok(parsed)
690}
691
692#[cfg(test)]
693mod tests {
694    use super::*;
695
696    #[test]
697    fn test_no_frontmatter() {
698        let markdown = "# Hello World\n\nThis is a test.";
699        let doc = decompose(markdown).unwrap();
700
701        assert_eq!(doc.body(), Some(markdown));
702        assert_eq!(doc.fields().len(), 1);
703        // Verify default quill tag is set
704        assert_eq!(doc.quill_tag(), "__default__");
705    }
706
707    #[test]
708    fn test_with_frontmatter() {
709        let markdown = r#"---
710title: Test Document
711author: Test Author
712---
713
714# Hello World
715
716This is the body."#;
717
718        let doc = decompose(markdown).unwrap();
719
720        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
721        assert_eq!(
722            doc.get_field("title").unwrap().as_str().unwrap(),
723            "Test Document"
724        );
725        assert_eq!(
726            doc.get_field("author").unwrap().as_str().unwrap(),
727            "Test Author"
728        );
729        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
730                                           // Verify default quill tag is set when no QUILL directive
731        assert_eq!(doc.quill_tag(), "__default__");
732    }
733
734    #[test]
735    fn test_complex_yaml_frontmatter() {
736        let markdown = r#"---
737title: Complex Document
738tags:
739  - test
740  - yaml
741metadata:
742  version: 1.0
743  nested:
744    field: value
745---
746
747Content here."#;
748
749        let doc = decompose(markdown).unwrap();
750
751        assert_eq!(doc.body(), Some("\nContent here."));
752        assert_eq!(
753            doc.get_field("title").unwrap().as_str().unwrap(),
754            "Complex Document"
755        );
756
757        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
758        assert_eq!(tags.len(), 2);
759        assert_eq!(tags[0].as_str().unwrap(), "test");
760        assert_eq!(tags[1].as_str().unwrap(), "yaml");
761    }
762
763    #[test]
764    fn test_with_defaults_empty_document() {
765        use std::collections::HashMap;
766
767        let mut defaults = HashMap::new();
768        defaults.insert(
769            "status".to_string(),
770            QuillValue::from_json(serde_json::json!("draft")),
771        );
772        defaults.insert(
773            "version".to_string(),
774            QuillValue::from_json(serde_json::json!(1)),
775        );
776
777        // Create an empty parsed document
778        let doc = ParsedDocument::new(HashMap::new());
779        let doc_with_defaults = doc.with_defaults(&defaults);
780
781        // Check that defaults were applied
782        assert_eq!(
783            doc_with_defaults
784                .get_field("status")
785                .unwrap()
786                .as_str()
787                .unwrap(),
788            "draft"
789        );
790        assert_eq!(
791            doc_with_defaults
792                .get_field("version")
793                .unwrap()
794                .as_number()
795                .unwrap()
796                .as_i64()
797                .unwrap(),
798            1
799        );
800    }
801
802    #[test]
803    fn test_with_defaults_preserves_existing_values() {
804        use std::collections::HashMap;
805
806        let mut defaults = HashMap::new();
807        defaults.insert(
808            "status".to_string(),
809            QuillValue::from_json(serde_json::json!("draft")),
810        );
811
812        // Create document with existing status
813        let mut fields = HashMap::new();
814        fields.insert(
815            "status".to_string(),
816            QuillValue::from_json(serde_json::json!("published")),
817        );
818        let doc = ParsedDocument::new(fields);
819
820        let doc_with_defaults = doc.with_defaults(&defaults);
821
822        // Existing value should be preserved
823        assert_eq!(
824            doc_with_defaults
825                .get_field("status")
826                .unwrap()
827                .as_str()
828                .unwrap(),
829            "published"
830        );
831    }
832
833    #[test]
834    fn test_with_defaults_partial_application() {
835        use std::collections::HashMap;
836
837        let mut defaults = HashMap::new();
838        defaults.insert(
839            "status".to_string(),
840            QuillValue::from_json(serde_json::json!("draft")),
841        );
842        defaults.insert(
843            "version".to_string(),
844            QuillValue::from_json(serde_json::json!(1)),
845        );
846
847        // Create document with only one field
848        let mut fields = HashMap::new();
849        fields.insert(
850            "status".to_string(),
851            QuillValue::from_json(serde_json::json!("published")),
852        );
853        let doc = ParsedDocument::new(fields);
854
855        let doc_with_defaults = doc.with_defaults(&defaults);
856
857        // Existing field preserved, missing field gets default
858        assert_eq!(
859            doc_with_defaults
860                .get_field("status")
861                .unwrap()
862                .as_str()
863                .unwrap(),
864            "published"
865        );
866        assert_eq!(
867            doc_with_defaults
868                .get_field("version")
869                .unwrap()
870                .as_number()
871                .unwrap()
872                .as_i64()
873                .unwrap(),
874            1
875        );
876    }
877
878    #[test]
879    fn test_with_defaults_no_defaults() {
880        use std::collections::HashMap;
881
882        let defaults = HashMap::new(); // Empty defaults map
883
884        let doc = ParsedDocument::new(HashMap::new());
885        let doc_with_defaults = doc.with_defaults(&defaults);
886
887        // No defaults should be applied
888        assert!(doc_with_defaults.fields().is_empty());
889    }
890
891    #[test]
892    fn test_with_defaults_complex_types() {
893        use std::collections::HashMap;
894
895        let mut defaults = HashMap::new();
896        defaults.insert(
897            "tags".to_string(),
898            QuillValue::from_json(serde_json::json!(["default", "tag"])),
899        );
900
901        let doc = ParsedDocument::new(HashMap::new());
902        let doc_with_defaults = doc.with_defaults(&defaults);
903
904        // Complex default value should be applied
905        let tags = doc_with_defaults
906            .get_field("tags")
907            .unwrap()
908            .as_sequence()
909            .unwrap();
910        assert_eq!(tags.len(), 2);
911        assert_eq!(tags[0].as_str().unwrap(), "default");
912        assert_eq!(tags[1].as_str().unwrap(), "tag");
913    }
914
915    #[test]
916    fn test_with_coercion_singular_to_array() {
917        use std::collections::HashMap;
918
919        let schema = QuillValue::from_json(serde_json::json!({
920            "$schema": "https://json-schema.org/draft/2019-09/schema",
921            "type": "object",
922            "properties": {
923                "tags": {"type": "array"}
924            }
925        }));
926
927        let mut fields = HashMap::new();
928        fields.insert(
929            "tags".to_string(),
930            QuillValue::from_json(serde_json::json!("single-tag")),
931        );
932        let doc = ParsedDocument::new(fields);
933
934        let coerced_doc = doc.with_coercion(&schema);
935
936        let tags = coerced_doc.get_field("tags").unwrap();
937        assert!(tags.as_array().is_some());
938        let tags_array = tags.as_array().unwrap();
939        assert_eq!(tags_array.len(), 1);
940        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
941    }
942
943    #[test]
944    fn test_with_coercion_string_to_boolean() {
945        use std::collections::HashMap;
946
947        let schema = QuillValue::from_json(serde_json::json!({
948            "$schema": "https://json-schema.org/draft/2019-09/schema",
949            "type": "object",
950            "properties": {
951                "active": {"type": "boolean"}
952            }
953        }));
954
955        let mut fields = HashMap::new();
956        fields.insert(
957            "active".to_string(),
958            QuillValue::from_json(serde_json::json!("true")),
959        );
960        let doc = ParsedDocument::new(fields);
961
962        let coerced_doc = doc.with_coercion(&schema);
963
964        assert_eq!(
965            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
966            true
967        );
968    }
969
970    #[test]
971    fn test_with_coercion_string_to_number() {
972        use std::collections::HashMap;
973
974        let schema = QuillValue::from_json(serde_json::json!({
975            "$schema": "https://json-schema.org/draft/2019-09/schema",
976            "type": "object",
977            "properties": {
978                "count": {"type": "number"}
979            }
980        }));
981
982        let mut fields = HashMap::new();
983        fields.insert(
984            "count".to_string(),
985            QuillValue::from_json(serde_json::json!("42")),
986        );
987        let doc = ParsedDocument::new(fields);
988
989        let coerced_doc = doc.with_coercion(&schema);
990
991        assert_eq!(
992            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
993            42
994        );
995    }
996
997    #[test]
998    fn test_invalid_yaml() {
999        let markdown = r#"---
1000title: [invalid yaml
1001author: missing close bracket
1002---
1003
1004Content here."#;
1005
1006        let result = decompose(markdown);
1007        assert!(result.is_err());
1008        assert!(result
1009            .unwrap_err()
1010            .to_string()
1011            .contains("Invalid YAML frontmatter"));
1012    }
1013
1014    #[test]
1015    fn test_unclosed_frontmatter() {
1016        let markdown = r#"---
1017title: Test
1018author: Test Author
1019
1020Content without closing ---"#;
1021
1022        let result = decompose(markdown);
1023        assert!(result.is_err());
1024        assert!(result.unwrap_err().to_string().contains("not closed"));
1025    }
1026
1027    // Extended metadata tests
1028
1029    #[test]
1030    fn test_basic_tagged_block() {
1031        let markdown = r#"---
1032title: Main Document
1033---
1034
1035Main body content.
1036
1037---
1038CARD: items
1039name: Item 1
1040---
1041
1042Body of item 1."#;
1043
1044        let doc = decompose(markdown).unwrap();
1045
1046        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1047        assert_eq!(
1048            doc.get_field("title").unwrap().as_str().unwrap(),
1049            "Main Document"
1050        );
1051
1052        // Cards are now in CARDS array with CARD discriminator
1053        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1054        assert_eq!(cards.len(), 1);
1055
1056        let item = cards[0].as_object().unwrap();
1057        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1058        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1059        assert_eq!(
1060            item.get("body").unwrap().as_str().unwrap(),
1061            "\nBody of item 1."
1062        );
1063    }
1064
1065    #[test]
1066    fn test_multiple_tagged_blocks() {
1067        let markdown = r#"---
1068CARD: items
1069name: Item 1
1070tags: [a, b]
1071---
1072
1073First item body.
1074
1075---
1076CARD: items
1077name: Item 2
1078tags: [c, d]
1079---
1080
1081Second item body."#;
1082
1083        let doc = decompose(markdown).unwrap();
1084
1085        // Cards are in CARDS array
1086        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1087        assert_eq!(cards.len(), 2);
1088
1089        let item1 = cards[0].as_object().unwrap();
1090        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1091        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1092
1093        let item2 = cards[1].as_object().unwrap();
1094        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1095        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1096    }
1097
1098    #[test]
1099    fn test_mixed_global_and_tagged() {
1100        let markdown = r#"---
1101title: Global
1102author: John Doe
1103---
1104
1105Global body.
1106
1107---
1108CARD: sections
1109title: Section 1
1110---
1111
1112Section 1 content.
1113
1114---
1115CARD: sections
1116title: Section 2
1117---
1118
1119Section 2 content."#;
1120
1121        let doc = decompose(markdown).unwrap();
1122
1123        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1124        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1125
1126        // Cards are in unified CARDS array
1127        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1128        assert_eq!(cards.len(), 2);
1129        assert_eq!(
1130            cards[0]
1131                .as_object()
1132                .unwrap()
1133                .get("CARD")
1134                .unwrap()
1135                .as_str()
1136                .unwrap(),
1137            "sections"
1138        );
1139    }
1140
1141    #[test]
1142    fn test_empty_tagged_metadata() {
1143        let markdown = r#"---
1144CARD: items
1145---
1146
1147Body without metadata."#;
1148
1149        let doc = decompose(markdown).unwrap();
1150
1151        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1152        assert_eq!(cards.len(), 1);
1153
1154        let item = cards[0].as_object().unwrap();
1155        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1156        assert_eq!(
1157            item.get("body").unwrap().as_str().unwrap(),
1158            "\nBody without metadata."
1159        );
1160    }
1161
1162    #[test]
1163    fn test_tagged_block_without_body() {
1164        let markdown = r#"---
1165CARD: items
1166name: Item
1167---"#;
1168
1169        let doc = decompose(markdown).unwrap();
1170
1171        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1172        assert_eq!(cards.len(), 1);
1173
1174        let item = cards[0].as_object().unwrap();
1175        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1176        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1177    }
1178
1179    #[test]
1180    fn test_name_collision_global_and_tagged() {
1181        let markdown = r#"---
1182items: "global value"
1183---
1184
1185Body
1186
1187---
1188CARD: items
1189name: Item
1190---
1191
1192Item body"#;
1193
1194        let result = decompose(markdown);
1195        assert!(result.is_err());
1196        assert!(result.unwrap_err().to_string().contains("collision"));
1197    }
1198
1199    #[test]
1200    fn test_card_name_collision_with_array_field() {
1201        // CARD type names cannot conflict with any frontmatter field names (including arrays)
1202        let markdown = r#"---
1203items:
1204  - name: Global Item 1
1205    value: 100
1206---
1207
1208Global body
1209
1210---
1211CARD: items
1212name: Scope Item 1
1213---
1214
1215Scope item 1 body"#;
1216
1217        let result = decompose(markdown);
1218        assert!(result.is_err());
1219        assert!(result.unwrap_err().to_string().contains("collision"));
1220    }
1221
1222    #[test]
1223    fn test_empty_global_array_with_card() {
1224        // CARD type names cannot conflict with any frontmatter field names (even empty arrays)
1225        let markdown = r#"---
1226items: []
1227---
1228
1229Global body
1230
1231---
1232CARD: items
1233name: Item 1
1234---
1235
1236Item 1 body"#;
1237
1238        let result = decompose(markdown);
1239        assert!(result.is_err());
1240        assert!(result.unwrap_err().to_string().contains("collision"));
1241    }
1242
1243    #[test]
1244    fn test_reserved_field_name() {
1245        let markdown = r#"---
1246CARD: body
1247content: Test
1248---"#;
1249
1250        let result = decompose(markdown);
1251        assert!(result.is_err());
1252        assert!(result.unwrap_err().to_string().contains("reserved"));
1253    }
1254
1255    #[test]
1256    fn test_invalid_tag_syntax() {
1257        let markdown = r#"---
1258CARD: Invalid-Name
1259title: Test
1260---"#;
1261
1262        let result = decompose(markdown);
1263        assert!(result.is_err());
1264        assert!(result
1265            .unwrap_err()
1266            .to_string()
1267            .contains("Invalid card field name"));
1268    }
1269
1270    #[test]
1271    fn test_multiple_global_frontmatter_blocks() {
1272        let markdown = r#"---
1273title: First
1274---
1275
1276Body
1277
1278---
1279author: Second
1280---
1281
1282More body"#;
1283
1284        let result = decompose(markdown);
1285        assert!(result.is_err());
1286
1287        // Verify the error message contains CARD hint
1288        let err = result.unwrap_err();
1289        let err_str = err.to_string();
1290        assert!(
1291            err_str.contains("CARD"),
1292            "Error should mention CARD directive: {}",
1293            err_str
1294        );
1295        assert!(
1296            err_str.contains("missing"),
1297            "Error should indicate missing directive: {}",
1298            err_str
1299        );
1300    }
1301
1302    #[test]
1303    fn test_adjacent_blocks_different_tags() {
1304        let markdown = r#"---
1305CARD: items
1306name: Item 1
1307---
1308
1309Item 1 body
1310
1311---
1312CARD: sections
1313title: Section 1
1314---
1315
1316Section 1 body"#;
1317
1318        let doc = decompose(markdown).unwrap();
1319
1320        // All cards in unified CARDS array
1321        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1322        assert_eq!(cards.len(), 2);
1323
1324        // First card is "items" type
1325        let item = cards[0].as_object().unwrap();
1326        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1327        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1328
1329        // Second card is "sections" type
1330        let section = cards[1].as_object().unwrap();
1331        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1332        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1333    }
1334
1335    #[test]
1336    fn test_order_preservation() {
1337        let markdown = r#"---
1338CARD: items
1339id: 1
1340---
1341
1342First
1343
1344---
1345CARD: items
1346id: 2
1347---
1348
1349Second
1350
1351---
1352CARD: items
1353id: 3
1354---
1355
1356Third"#;
1357
1358        let doc = decompose(markdown).unwrap();
1359
1360        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1361        assert_eq!(cards.len(), 3);
1362
1363        for (i, card) in cards.iter().enumerate() {
1364            let mapping = card.as_object().unwrap();
1365            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1366            let id = mapping.get("id").unwrap().as_i64().unwrap();
1367            assert_eq!(id, (i + 1) as i64);
1368        }
1369    }
1370
1371    #[test]
1372    fn test_product_catalog_integration() {
1373        let markdown = r#"---
1374title: Product Catalog
1375author: John Doe
1376date: 2024-01-01
1377---
1378
1379This is the main catalog description.
1380
1381---
1382CARD: products
1383name: Widget A
1384price: 19.99
1385sku: WID-001
1386---
1387
1388The **Widget A** is our most popular product.
1389
1390---
1391CARD: products
1392name: Gadget B
1393price: 29.99
1394sku: GAD-002
1395---
1396
1397The **Gadget B** is perfect for professionals.
1398
1399---
1400CARD: reviews
1401product: Widget A
1402rating: 5
1403---
1404
1405"Excellent product! Highly recommended."
1406
1407---
1408CARD: reviews
1409product: Gadget B
1410rating: 4
1411---
1412
1413"Very good, but a bit pricey.""#;
1414
1415        let doc = decompose(markdown).unwrap();
1416
1417        // Verify global fields
1418        assert_eq!(
1419            doc.get_field("title").unwrap().as_str().unwrap(),
1420            "Product Catalog"
1421        );
1422        assert_eq!(
1423            doc.get_field("author").unwrap().as_str().unwrap(),
1424            "John Doe"
1425        );
1426        assert_eq!(
1427            doc.get_field("date").unwrap().as_str().unwrap(),
1428            "2024-01-01"
1429        );
1430
1431        // Verify global body
1432        assert!(doc.body().unwrap().contains("main catalog description"));
1433
1434        // All cards in unified CARDS array
1435        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1436        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1437
1438        // First 2 are products
1439        let product1 = cards[0].as_object().unwrap();
1440        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1441        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1442        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1443
1444        let product2 = cards[1].as_object().unwrap();
1445        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1446        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1447
1448        // Last 2 are reviews
1449        let review1 = cards[2].as_object().unwrap();
1450        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1451        assert_eq!(
1452            review1.get("product").unwrap().as_str().unwrap(),
1453            "Widget A"
1454        );
1455        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1456
1457        // Total fields: title, author, date, body, CARDS = 5
1458        assert_eq!(doc.fields().len(), 5);
1459    }
1460
1461    #[test]
1462    fn taro_quill_directive() {
1463        let markdown = r#"---
1464QUILL: usaf_memo
1465memo_for: [ORG/SYMBOL]
1466memo_from: [ORG/SYMBOL]
1467---
1468
1469This is the memo body."#;
1470
1471        let doc = decompose(markdown).unwrap();
1472
1473        // Verify quill tag is set
1474        assert_eq!(doc.quill_tag(), "usaf_memo");
1475
1476        // Verify fields from quill block become frontmatter
1477        assert_eq!(
1478            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1479                .as_str()
1480                .unwrap(),
1481            "ORG/SYMBOL"
1482        );
1483
1484        // Verify body
1485        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1486    }
1487
1488    #[test]
1489    fn test_quill_with_card_blocks() {
1490        let markdown = r#"---
1491QUILL: document
1492title: Test Document
1493---
1494
1495Main body.
1496
1497---
1498CARD: sections
1499name: Section 1
1500---
1501
1502Section 1 body."#;
1503
1504        let doc = decompose(markdown).unwrap();
1505
1506        // Verify quill tag
1507        assert_eq!(doc.quill_tag(), "document");
1508
1509        // Verify global field from quill block
1510        assert_eq!(
1511            doc.get_field("title").unwrap().as_str().unwrap(),
1512            "Test Document"
1513        );
1514
1515        // Verify card blocks work via CARDS array
1516        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1517        assert_eq!(cards.len(), 1);
1518        assert_eq!(
1519            cards[0]
1520                .as_object()
1521                .unwrap()
1522                .get("CARD")
1523                .unwrap()
1524                .as_str()
1525                .unwrap(),
1526            "sections"
1527        );
1528
1529        // Verify body
1530        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1531    }
1532
1533    #[test]
1534    fn test_multiple_quill_directives_error() {
1535        let markdown = r#"---
1536QUILL: first
1537---
1538
1539---
1540QUILL: second
1541---"#;
1542
1543        let result = decompose(markdown);
1544        assert!(result.is_err());
1545        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1546        assert!(result
1547            .unwrap_err()
1548            .to_string()
1549            .contains("top-level frontmatter"));
1550    }
1551
1552    #[test]
1553    fn test_invalid_quill_name() {
1554        let markdown = r#"---
1555QUILL: Invalid-Name
1556---"#;
1557
1558        let result = decompose(markdown);
1559        assert!(result.is_err());
1560        assert!(result
1561            .unwrap_err()
1562            .to_string()
1563            .contains("Invalid quill name"));
1564    }
1565
1566    #[test]
1567    fn test_quill_wrong_value_type() {
1568        let markdown = r#"---
1569QUILL: 123
1570---"#;
1571
1572        let result = decompose(markdown);
1573        assert!(result.is_err());
1574        assert!(result
1575            .unwrap_err()
1576            .to_string()
1577            .contains("QUILL value must be a string"));
1578    }
1579
1580    #[test]
1581    fn test_card_wrong_value_type() {
1582        let markdown = r#"---
1583CARD: 123
1584---"#;
1585
1586        let result = decompose(markdown);
1587        assert!(result.is_err());
1588        assert!(result
1589            .unwrap_err()
1590            .to_string()
1591            .contains("CARD/SCOPE value must be a string"));
1592    }
1593
1594    #[test]
1595    fn test_both_quill_and_card_error() {
1596        let markdown = r#"---
1597QUILL: test
1598CARD: items
1599---"#;
1600
1601        let result = decompose(markdown);
1602        assert!(result.is_err());
1603        assert!(result
1604            .unwrap_err()
1605            .to_string()
1606            .contains("Cannot specify both QUILL and CARD"));
1607    }
1608
1609    #[test]
1610    fn test_blank_lines_in_frontmatter() {
1611        // New parsing standard: blank lines are allowed within YAML blocks
1612        let markdown = r#"---
1613title: Test Document
1614author: Test Author
1615
1616description: This has a blank line above it
1617tags:
1618  - one
1619  - two
1620---
1621
1622# Hello World
1623
1624This is the body."#;
1625
1626        let doc = decompose(markdown).unwrap();
1627
1628        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1629        assert_eq!(
1630            doc.get_field("title").unwrap().as_str().unwrap(),
1631            "Test Document"
1632        );
1633        assert_eq!(
1634            doc.get_field("author").unwrap().as_str().unwrap(),
1635            "Test Author"
1636        );
1637        assert_eq!(
1638            doc.get_field("description").unwrap().as_str().unwrap(),
1639            "This has a blank line above it"
1640        );
1641
1642        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1643        assert_eq!(tags.len(), 2);
1644    }
1645
1646    #[test]
1647    fn test_blank_lines_in_scope_blocks() {
1648        // Blank lines should be allowed in CARD blocks too
1649        let markdown = r#"---
1650CARD: items
1651name: Item 1
1652
1653price: 19.99
1654
1655tags:
1656  - electronics
1657  - gadgets
1658---
1659
1660Body of item 1."#;
1661
1662        let doc = decompose(markdown).unwrap();
1663
1664        // Cards are in CARDS array
1665        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1666        assert_eq!(cards.len(), 1);
1667
1668        let item = cards[0].as_object().unwrap();
1669        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1670        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1671        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1672
1673        let tags = item.get("tags").unwrap().as_array().unwrap();
1674        assert_eq!(tags.len(), 2);
1675    }
1676
1677    #[test]
1678    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1679        // Horizontal rule: blank lines both above AND below the ---
1680        let markdown = r#"---
1681title: Test
1682---
1683
1684First paragraph.
1685
1686---
1687
1688Second paragraph."#;
1689
1690        let doc = decompose(markdown).unwrap();
1691
1692        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1693
1694        // The body should contain the horizontal rule (---) as part of the content
1695        let body = doc.body().unwrap();
1696        assert!(body.contains("First paragraph."));
1697        assert!(body.contains("---"));
1698        assert!(body.contains("Second paragraph."));
1699    }
1700
1701    #[test]
1702    fn test_horizontal_rule_not_preceded_by_blank() {
1703        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1704        // It's also NOT a valid metadata block opening (since it's followed by blank)
1705        let markdown = r#"---
1706title: Test
1707---
1708
1709First paragraph.
1710---
1711
1712Second paragraph."#;
1713
1714        let doc = decompose(markdown).unwrap();
1715
1716        let body = doc.body().unwrap();
1717        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1718        assert!(body.contains("---"));
1719    }
1720
1721    #[test]
1722    fn test_multiple_blank_lines_in_yaml() {
1723        // Multiple blank lines should also be allowed
1724        let markdown = r#"---
1725title: Test
1726
1727
1728author: John Doe
1729
1730
1731version: 1.0
1732---
1733
1734Body content."#;
1735
1736        let doc = decompose(markdown).unwrap();
1737
1738        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1739        assert_eq!(
1740            doc.get_field("author").unwrap().as_str().unwrap(),
1741            "John Doe"
1742        );
1743        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1744    }
1745
1746    #[test]
1747    fn test_html_comment_interaction() {
1748        let markdown = r#"<!---
1749---> the rest of the page content
1750
1751---
1752key: value
1753---
1754"#;
1755        let doc = decompose(markdown).unwrap();
1756
1757        // The comment should be ignored (or at least not cause a parse error)
1758        // The frontmatter should be parsed
1759        let key = doc.get_field("key").and_then(|v| v.as_str());
1760        assert_eq!(key, Some("value"));
1761    }
1762}
1763#[cfg(test)]
1764mod demo_file_test {
1765    use super::*;
1766
1767    #[test]
1768    fn test_extended_metadata_demo_file() {
1769        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1770        let doc = decompose(markdown).unwrap();
1771
1772        // Verify global fields
1773        assert_eq!(
1774            doc.get_field("title").unwrap().as_str().unwrap(),
1775            "Extended Metadata Demo"
1776        );
1777        assert_eq!(
1778            doc.get_field("author").unwrap().as_str().unwrap(),
1779            "Quillmark Team"
1780        );
1781        // version is parsed as a number by YAML
1782        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1783
1784        // Verify body
1785        assert!(doc
1786            .body()
1787            .unwrap()
1788            .contains("extended YAML metadata standard"));
1789
1790        // All cards are now in unified CARDS array
1791        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1792        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1793
1794        // Count features and use_cases cards
1795        let features_count = cards
1796            .iter()
1797            .filter(|c| {
1798                c.as_object()
1799                    .unwrap()
1800                    .get("CARD")
1801                    .unwrap()
1802                    .as_str()
1803                    .unwrap()
1804                    == "features"
1805            })
1806            .count();
1807        let use_cases_count = cards
1808            .iter()
1809            .filter(|c| {
1810                c.as_object()
1811                    .unwrap()
1812                    .get("CARD")
1813                    .unwrap()
1814                    .as_str()
1815                    .unwrap()
1816                    == "use_cases"
1817            })
1818            .count();
1819        assert_eq!(features_count, 3);
1820        assert_eq!(use_cases_count, 2);
1821
1822        // Check first card is a feature
1823        let feature1 = cards[0].as_object().unwrap();
1824        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1825        assert_eq!(
1826            feature1.get("name").unwrap().as_str().unwrap(),
1827            "Tag Directives"
1828        );
1829    }
1830
1831    #[test]
1832    fn test_input_size_limit() {
1833        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1834        let size = crate::error::MAX_INPUT_SIZE + 1;
1835        let large_markdown = "a".repeat(size);
1836
1837        let result = decompose(&large_markdown);
1838        assert!(result.is_err());
1839
1840        let err_msg = result.unwrap_err().to_string();
1841        assert!(err_msg.contains("Input too large"));
1842    }
1843
1844    #[test]
1845    fn test_yaml_size_limit() {
1846        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1847        let mut markdown = String::from("---\n");
1848
1849        // Create a very large YAML field
1850        let size = crate::error::MAX_YAML_SIZE + 1;
1851        markdown.push_str("data: \"");
1852        markdown.push_str(&"x".repeat(size));
1853        markdown.push_str("\"\n---\n\nBody");
1854
1855        let result = decompose(&markdown);
1856        assert!(result.is_err());
1857
1858        let err_msg = result.unwrap_err().to_string();
1859        assert!(err_msg.contains("YAML block too large"));
1860    }
1861
1862    #[test]
1863    fn test_input_within_size_limit() {
1864        // Create markdown just under the limit
1865        let size = 1000; // Much smaller than limit
1866        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1867
1868        let result = decompose(&markdown);
1869        assert!(result.is_ok());
1870    }
1871
1872    #[test]
1873    fn test_yaml_within_size_limit() {
1874        // Create YAML block well within the limit
1875        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1876
1877        let result = decompose(&markdown);
1878        assert!(result.is_ok());
1879    }
1880
1881    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1882    // Guillemet conversion now happens in process_plate, not during parsing
1883    #[test]
1884    fn test_chevrons_preserved_in_body_no_frontmatter() {
1885        let markdown = "Use <<raw content>> here.";
1886        let doc = decompose(markdown).unwrap();
1887
1888        // Body should preserve chevrons (conversion happens later in process_plate)
1889        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1890    }
1891
1892    #[test]
1893    fn test_chevrons_preserved_in_body_with_frontmatter() {
1894        let markdown = r#"---
1895title: Test
1896---
1897
1898Use <<raw content>> here."#;
1899        let doc = decompose(markdown).unwrap();
1900
1901        // Body should preserve chevrons
1902        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1903    }
1904
1905    #[test]
1906    fn test_chevrons_preserved_in_yaml_string() {
1907        let markdown = r#"---
1908title: Test <<with chevrons>>
1909---
1910
1911Body content."#;
1912        let doc = decompose(markdown).unwrap();
1913
1914        // YAML string values should preserve chevrons
1915        assert_eq!(
1916            doc.get_field("title").unwrap().as_str().unwrap(),
1917            "Test <<with chevrons>>"
1918        );
1919    }
1920
1921    #[test]
1922    fn test_chevrons_preserved_in_yaml_array() {
1923        let markdown = r#"---
1924items:
1925  - "<<first>>"
1926  - "<<second>>"
1927---
1928
1929Body."#;
1930        let doc = decompose(markdown).unwrap();
1931
1932        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1933        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1934        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1935    }
1936
1937    #[test]
1938    fn test_chevrons_preserved_in_yaml_nested() {
1939        let markdown = r#"---
1940metadata:
1941  description: "<<nested value>>"
1942---
1943
1944Body."#;
1945        let doc = decompose(markdown).unwrap();
1946
1947        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1948        assert_eq!(
1949            metadata.get("description").unwrap().as_str().unwrap(),
1950            "<<nested value>>"
1951        );
1952    }
1953
1954    #[test]
1955    fn test_chevrons_preserved_in_code_blocks() {
1956        let markdown = r#"```
1957<<in code block>>
1958```
1959
1960<<outside code block>>"#;
1961        let doc = decompose(markdown).unwrap();
1962
1963        let body = doc.body().unwrap();
1964        // All chevrons should be preserved (no conversion during parsing)
1965        assert!(body.contains("<<in code block>>"));
1966        assert!(body.contains("<<outside code block>>"));
1967    }
1968
1969    #[test]
1970    fn test_chevrons_preserved_in_inline_code() {
1971        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1972        let doc = decompose(markdown).unwrap();
1973
1974        let body = doc.body().unwrap();
1975        // All chevrons should be preserved
1976        assert!(body.contains("`<<in inline code>>`"));
1977        assert!(body.contains("<<outside inline code>>"));
1978    }
1979
1980    #[test]
1981    fn test_chevrons_preserved_in_tagged_block_body() {
1982        let markdown = r#"---
1983title: Main
1984---
1985
1986Main body.
1987
1988---
1989CARD: items
1990name: Item 1
1991---
1992
1993Use <<raw>> here."#;
1994        let doc = decompose(markdown).unwrap();
1995
1996        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1997        let item = cards[0].as_object().unwrap();
1998        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1999        let item_body = item.get("body").unwrap().as_str().unwrap();
2000        // Tagged block body should preserve chevrons
2001        assert!(item_body.contains("<<raw>>"));
2002    }
2003
2004    #[test]
2005    fn test_chevrons_preserved_in_tagged_block_yaml() {
2006        let markdown = r#"---
2007title: Main
2008---
2009
2010Main body.
2011
2012---
2013CARD: items
2014description: "<<tagged yaml>>"
2015---
2016
2017Item body."#;
2018        let doc = decompose(markdown).unwrap();
2019
2020        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2021        let item = cards[0].as_object().unwrap();
2022        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2023        // Tagged block YAML should preserve chevrons
2024        assert_eq!(
2025            item.get("description").unwrap().as_str().unwrap(),
2026            "<<tagged yaml>>"
2027        );
2028    }
2029
2030    #[test]
2031    fn test_yaml_numbers_not_affected() {
2032        // Numbers should not be affected
2033        let markdown = r#"---
2034count: 42
2035---
2036
2037Body."#;
2038        let doc = decompose(markdown).unwrap();
2039        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2040    }
2041
2042    #[test]
2043    fn test_yaml_booleans_not_affected() {
2044        // Booleans should not be affected
2045        let markdown = r#"---
2046active: true
2047---
2048
2049Body."#;
2050        let doc = decompose(markdown).unwrap();
2051        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2052    }
2053
2054    #[test]
2055    fn test_multiline_chevrons_preserved() {
2056        // Multiline chevrons should be preserved as-is
2057        let markdown = "<<text\nacross lines>>";
2058        let doc = decompose(markdown).unwrap();
2059
2060        let body = doc.body().unwrap();
2061        // Should contain the original chevrons
2062        assert!(body.contains("<<text"));
2063        assert!(body.contains("across lines>>"));
2064    }
2065
2066    #[test]
2067    fn test_unmatched_chevrons_preserved() {
2068        let markdown = "<<unmatched";
2069        let doc = decompose(markdown).unwrap();
2070
2071        let body = doc.body().unwrap();
2072        // Unmatched should remain as-is
2073        assert_eq!(body, "<<unmatched");
2074    }
2075}
2076
2077// Additional robustness tests
2078#[cfg(test)]
2079mod robustness_tests {
2080    use super::*;
2081
2082    // Edge cases for delimiter handling
2083
2084    #[test]
2085    fn test_empty_document() {
2086        let doc = decompose("").unwrap();
2087        assert_eq!(doc.body(), Some(""));
2088        assert_eq!(doc.quill_tag(), "__default__");
2089    }
2090
2091    #[test]
2092    fn test_only_whitespace() {
2093        let doc = decompose("   \n\n   \t").unwrap();
2094        assert_eq!(doc.body(), Some("   \n\n   \t"));
2095    }
2096
2097    #[test]
2098    fn test_only_dashes() {
2099        // Just "---" at document start without newline is not treated as frontmatter opener
2100        // (requires "---\n" to start a frontmatter block)
2101        let result = decompose("---");
2102        // This is NOT an error - "---" alone without newline is just body content
2103        assert!(result.is_ok());
2104        assert_eq!(result.unwrap().body(), Some("---"));
2105    }
2106
2107    #[test]
2108    fn test_dashes_in_middle_of_line() {
2109        // --- not at start of line should not be treated as delimiter
2110        let markdown = "some text --- more text";
2111        let doc = decompose(markdown).unwrap();
2112        assert_eq!(doc.body(), Some("some text --- more text"));
2113    }
2114
2115    #[test]
2116    fn test_four_dashes() {
2117        // ---- is not a valid delimiter
2118        let markdown = "----\ntitle: Test\n----\n\nBody";
2119        let doc = decompose(markdown).unwrap();
2120        // Should treat entire content as body
2121        assert!(doc.body().unwrap().contains("----"));
2122    }
2123
2124    #[test]
2125    fn test_crlf_line_endings() {
2126        // Windows-style line endings
2127        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2128        let doc = decompose(markdown).unwrap();
2129        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2130        assert!(doc.body().unwrap().contains("Body content."));
2131    }
2132
2133    #[test]
2134    fn test_mixed_line_endings() {
2135        // Mix of \n and \r\n
2136        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2137        let doc = decompose(markdown).unwrap();
2138        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2139    }
2140
2141    #[test]
2142    fn test_frontmatter_at_eof_no_trailing_newline() {
2143        // Frontmatter closed at EOF without trailing newline
2144        let markdown = "---\ntitle: Test\n---";
2145        let doc = decompose(markdown).unwrap();
2146        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2147        assert_eq!(doc.body(), Some(""));
2148    }
2149
2150    #[test]
2151    fn test_empty_frontmatter() {
2152        // Empty frontmatter block - requires content between delimiters
2153        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2154        // is treated as horizontal rule logic, not empty frontmatter
2155        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2156        let markdown = "---\n \n---\n\nBody content.";
2157        let doc = decompose(markdown).unwrap();
2158        assert!(doc.body().unwrap().contains("Body content."));
2159        // Should have body and CARDS fields
2160        assert_eq!(doc.fields().len(), 2);
2161    }
2162
2163    #[test]
2164    fn test_whitespace_only_frontmatter() {
2165        // Frontmatter with only whitespace
2166        let markdown = "---\n   \n\n   \n---\n\nBody.";
2167        let doc = decompose(markdown).unwrap();
2168        assert!(doc.body().unwrap().contains("Body."));
2169    }
2170
2171    // Unicode handling
2172
2173    #[test]
2174    fn test_unicode_in_yaml_keys() {
2175        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2176        let doc = decompose(markdown).unwrap();
2177        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2178        assert_eq!(
2179            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2180            "こんにちは"
2181        );
2182    }
2183
2184    #[test]
2185    fn test_unicode_in_yaml_values() {
2186        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2187        let doc = decompose(markdown).unwrap();
2188        assert_eq!(
2189            doc.get_field("title").unwrap().as_str().unwrap(),
2190            "你好世界 🎉"
2191        );
2192    }
2193
2194    #[test]
2195    fn test_unicode_in_body() {
2196        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2197        let doc = decompose(markdown).unwrap();
2198        assert!(doc.body().unwrap().contains("日本語テキスト"));
2199        assert!(doc.body().unwrap().contains("🚀"));
2200    }
2201
2202    // YAML edge cases
2203
2204    #[test]
2205    fn test_yaml_multiline_string() {
2206        let markdown = r#"---
2207description: |
2208  This is a
2209  multiline string
2210  with preserved newlines.
2211---
2212
2213Body."#;
2214        let doc = decompose(markdown).unwrap();
2215        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2216        assert!(desc.contains("multiline string"));
2217        assert!(desc.contains('\n'));
2218    }
2219
2220    #[test]
2221    fn test_yaml_folded_string() {
2222        let markdown = r#"---
2223description: >
2224  This is a folded
2225  string that becomes
2226  a single line.
2227---
2228
2229Body."#;
2230        let doc = decompose(markdown).unwrap();
2231        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2232        // Folded strings join lines with spaces
2233        assert!(desc.contains("folded"));
2234    }
2235
2236    #[test]
2237    fn test_yaml_null_value() {
2238        let markdown = "---\noptional: null\n---\n\nBody.";
2239        let doc = decompose(markdown).unwrap();
2240        assert!(doc.get_field("optional").unwrap().is_null());
2241    }
2242
2243    #[test]
2244    fn test_yaml_empty_string_value() {
2245        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2246        let doc = decompose(markdown).unwrap();
2247        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2248    }
2249
2250    #[test]
2251    fn test_yaml_special_characters_in_string() {
2252        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2253        let doc = decompose(markdown).unwrap();
2254        assert_eq!(
2255            doc.get_field("special").unwrap().as_str().unwrap(),
2256            "colon: here, and [brackets]"
2257        );
2258    }
2259
2260    #[test]
2261    fn test_yaml_nested_objects() {
2262        let markdown = r#"---
2263config:
2264  database:
2265    host: localhost
2266    port: 5432
2267  cache:
2268    enabled: true
2269---
2270
2271Body."#;
2272        let doc = decompose(markdown).unwrap();
2273        let config = doc.get_field("config").unwrap().as_object().unwrap();
2274        let db = config.get("database").unwrap().as_object().unwrap();
2275        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2276        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2277    }
2278
2279    // CARD block edge cases
2280
2281    #[test]
2282    fn test_card_with_empty_body() {
2283        let markdown = r#"---
2284CARD: items
2285name: Item
2286---"#;
2287        let doc = decompose(markdown).unwrap();
2288        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2289        assert_eq!(cards.len(), 1);
2290        let item = cards[0].as_object().unwrap();
2291        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2292        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2293    }
2294
2295    #[test]
2296    fn test_card_consecutive_blocks() {
2297        let markdown = r#"---
2298CARD: a
2299id: 1
2300---
2301---
2302CARD: a
2303id: 2
2304---"#;
2305        let doc = decompose(markdown).unwrap();
2306        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2307        assert_eq!(cards.len(), 2);
2308        assert_eq!(
2309            cards[0]
2310                .as_object()
2311                .unwrap()
2312                .get("CARD")
2313                .unwrap()
2314                .as_str()
2315                .unwrap(),
2316            "a"
2317        );
2318        assert_eq!(
2319            cards[1]
2320                .as_object()
2321                .unwrap()
2322                .get("CARD")
2323                .unwrap()
2324                .as_str()
2325                .unwrap(),
2326            "a"
2327        );
2328    }
2329
2330    #[test]
2331    fn test_card_with_body_containing_dashes() {
2332        let markdown = r#"---
2333CARD: items
2334name: Item
2335---
2336
2337Some text with --- dashes in it."#;
2338        let doc = decompose(markdown).unwrap();
2339        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2340        let item = cards[0].as_object().unwrap();
2341        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2342        let body = item.get("body").unwrap().as_str().unwrap();
2343        assert!(body.contains("--- dashes"));
2344    }
2345
2346    // QUILL directive edge cases
2347
2348    #[test]
2349    fn test_quill_with_underscore_prefix() {
2350        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2351        let doc = decompose(markdown).unwrap();
2352        assert_eq!(doc.quill_tag(), "_internal");
2353    }
2354
2355    #[test]
2356    fn test_quill_with_numbers() {
2357        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2358        let doc = decompose(markdown).unwrap();
2359        assert_eq!(doc.quill_tag(), "form_8_v2");
2360    }
2361
2362    #[test]
2363    fn test_quill_with_additional_fields() {
2364        let markdown = r#"---
2365QUILL: my_quill
2366title: Document Title
2367author: John Doe
2368---
2369
2370Body content."#;
2371        let doc = decompose(markdown).unwrap();
2372        assert_eq!(doc.quill_tag(), "my_quill");
2373        assert_eq!(
2374            doc.get_field("title").unwrap().as_str().unwrap(),
2375            "Document Title"
2376        );
2377        assert_eq!(
2378            doc.get_field("author").unwrap().as_str().unwrap(),
2379            "John Doe"
2380        );
2381    }
2382
2383    // Error handling
2384
2385    #[test]
2386    fn test_invalid_scope_name_uppercase() {
2387        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2388        let result = decompose(markdown);
2389        assert!(result.is_err());
2390        assert!(result
2391            .unwrap_err()
2392            .to_string()
2393            .contains("Invalid card field name"));
2394    }
2395
2396    #[test]
2397    fn test_invalid_scope_name_starts_with_number() {
2398        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2399        let result = decompose(markdown);
2400        assert!(result.is_err());
2401    }
2402
2403    #[test]
2404    fn test_invalid_scope_name_with_hyphen() {
2405        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2406        let result = decompose(markdown);
2407        assert!(result.is_err());
2408    }
2409
2410    #[test]
2411    fn test_invalid_quill_name_uppercase() {
2412        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2413        let result = decompose(markdown);
2414        assert!(result.is_err());
2415    }
2416
2417    #[test]
2418    fn test_yaml_syntax_error_missing_colon() {
2419        let markdown = "---\ntitle Test\n---\n\nBody.";
2420        let result = decompose(markdown);
2421        assert!(result.is_err());
2422    }
2423
2424    #[test]
2425    fn test_yaml_syntax_error_bad_indentation() {
2426        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2427        let result = decompose(markdown);
2428        // Bad indentation may or may not be an error depending on YAML parser
2429        // Just ensure it doesn't panic
2430        let _ = result;
2431    }
2432
2433    // Body extraction edge cases
2434
2435    #[test]
2436    fn test_body_with_leading_newlines() {
2437        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2438        let doc = decompose(markdown).unwrap();
2439        // Body should preserve leading newlines after frontmatter
2440        assert!(doc.body().unwrap().starts_with('\n'));
2441    }
2442
2443    #[test]
2444    fn test_body_with_trailing_newlines() {
2445        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2446        let doc = decompose(markdown).unwrap();
2447        // Body should preserve trailing newlines
2448        assert!(doc.body().unwrap().ends_with('\n'));
2449    }
2450
2451    #[test]
2452    fn test_no_body_after_frontmatter() {
2453        let markdown = "---\ntitle: Test\n---";
2454        let doc = decompose(markdown).unwrap();
2455        assert_eq!(doc.body(), Some(""));
2456    }
2457
2458    // Tag name validation
2459
2460    #[test]
2461    fn test_valid_tag_name_single_underscore() {
2462        assert!(is_valid_tag_name("_"));
2463    }
2464
2465    #[test]
2466    fn test_valid_tag_name_underscore_prefix() {
2467        assert!(is_valid_tag_name("_private"));
2468    }
2469
2470    #[test]
2471    fn test_valid_tag_name_with_numbers() {
2472        assert!(is_valid_tag_name("item1"));
2473        assert!(is_valid_tag_name("item_2"));
2474    }
2475
2476    #[test]
2477    fn test_invalid_tag_name_empty() {
2478        assert!(!is_valid_tag_name(""));
2479    }
2480
2481    #[test]
2482    fn test_invalid_tag_name_starts_with_number() {
2483        assert!(!is_valid_tag_name("1item"));
2484    }
2485
2486    #[test]
2487    fn test_invalid_tag_name_uppercase() {
2488        assert!(!is_valid_tag_name("Items"));
2489        assert!(!is_valid_tag_name("ITEMS"));
2490    }
2491
2492    #[test]
2493    fn test_invalid_tag_name_special_chars() {
2494        assert!(!is_valid_tag_name("my-items"));
2495        assert!(!is_valid_tag_name("my.items"));
2496        assert!(!is_valid_tag_name("my items"));
2497    }
2498
2499    // Guillemet preprocessing in YAML
2500
2501    #[test]
2502    fn test_guillemet_in_yaml_preserves_non_strings() {
2503        let markdown = r#"---
2504count: 42
2505price: 19.99
2506active: true
2507items:
2508  - first
2509  - 100
2510  - true
2511---
2512
2513Body."#;
2514        let doc = decompose(markdown).unwrap();
2515        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2516        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2517        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2518    }
2519
2520    #[test]
2521    fn test_guillemet_double_conversion_prevention() {
2522        // Ensure «» in input doesn't get double-processed
2523        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2524        let doc = decompose(markdown).unwrap();
2525        // Should remain as-is (not double-escaped)
2526        assert_eq!(
2527            doc.get_field("title").unwrap().as_str().unwrap(),
2528            "Already «converted»"
2529        );
2530    }
2531}