quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(crate::error::ParseError::from)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = search_str
205            .find("---\n")
206            .map(|p| (p, 4, "\n"))
207            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210            let abs_pos = pos + delimiter_pos;
211
212            // Check if the delimiter is at the start of a line
213            let is_start_of_line = if abs_pos == 0 {
214                true
215            } else {
216                let char_before = markdown.as_bytes()[abs_pos - 1];
217                char_before == b'\n' || char_before == b'\r'
218            };
219
220            if !is_start_of_line {
221                pos = abs_pos + 1;
222                continue;
223            }
224
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_value) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(parsed_yaml) => {
312                            if let Some(mapping) = parsed_yaml.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let card_key = serde_yaml::Value::String("CARD".to_string());
315
316                                let has_quill = mapping.contains_key(&quill_key);
317                                let has_card = mapping.contains_key(&card_key);
318
319                                if has_quill && has_card {
320                                    return Err(
321                                        "Cannot specify both QUILL and CARD in the same block"
322                                            .into(),
323                                    );
324                                }
325
326                                if has_quill {
327                                    // Extract quill name
328                                    let quill_value = mapping.get(&quill_key).unwrap();
329                                    let quill_name_str = quill_value
330                                        .as_str()
331                                        .ok_or("QUILL value must be a string")?;
332
333                                    if !is_valid_tag_name(quill_name_str) {
334                                        return Err(format!(
335                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336                                            quill_name_str
337                                        )
338                                        .into());
339                                    }
340
341                                    // Remove QUILL from the YAML value for processing
342                                    let mut new_mapping = mapping.clone();
343                                    new_mapping.remove(&quill_key);
344                                    let new_value = if new_mapping.is_empty() {
345                                        None
346                                    } else {
347                                        Some(serde_yaml::Value::Mapping(new_mapping))
348                                    };
349
350                                    (None, Some(quill_name_str.to_string()), new_value)
351                                } else if has_card {
352                                    // Extract scope field name
353                                    let card_value = mapping.get(&card_key).unwrap();
354                                    let field_name =
355                                        card_value.as_str().ok_or("CARD value must be a string")?;
356
357                                    if !is_valid_tag_name(field_name) {
358                                        return Err(format!(
359                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
360                                            field_name
361                                        )
362                                        .into());
363                                    }
364
365                                    if field_name == BODY_FIELD {
366                                        return Err(format!(
367                                            "Cannot use reserved field name '{}' as CARD value",
368                                            BODY_FIELD
369                                        )
370                                        .into());
371                                    }
372
373                                    // Remove CARD from the YAML value for processing
374                                    let mut new_mapping = mapping.clone();
375                                    new_mapping.remove(&card_key);
376                                    let new_value = if new_mapping.is_empty() {
377                                        None
378                                    } else {
379                                        Some(serde_yaml::Value::Mapping(new_mapping))
380                                    };
381
382                                    (Some(field_name.to_string()), None, new_value)
383                                } else {
384                                    // No reserved keys, keep the parsed YAML
385                                    (None, None, Some(parsed_yaml))
386                                }
387                            } else {
388                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
389                                (None, None, Some(parsed_yaml))
390                            }
391                        }
392                        Err(e) => {
393                            // YAML parsing failed - return error with context
394                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
395                        }
396                    }
397                } else {
398                    // Empty content
399                    (None, None, None)
400                };
401
402                blocks.push(MetadataBlock {
403                    start: abs_pos,
404                    end: abs_closing_pos + closing_len, // After closing delimiter
405                    yaml_value,
406                    tag,
407                    quill_name,
408                });
409
410                pos = abs_closing_pos + closing_len;
411            } else if abs_pos == 0 {
412                // Frontmatter started but not closed
413                return Err("Frontmatter started but not closed with ---".into());
414            } else {
415                // Not a valid metadata block, skip this position
416                pos = abs_pos + 3;
417            }
418        } else {
419            break;
420        }
421    }
422
423    Ok(blocks)
424}
425
426/// Decompose markdown into frontmatter fields and body
427fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
428    // Check input size limit
429    if markdown.len() > crate::error::MAX_INPUT_SIZE {
430        return Err(format!(
431            "Input too large: {} bytes (max: {} bytes)",
432            markdown.len(),
433            crate::error::MAX_INPUT_SIZE
434        )
435        .into());
436    }
437
438    let mut fields = HashMap::new();
439
440    // Find all metadata blocks
441    let blocks = find_metadata_blocks(markdown)?;
442
443    if blocks.is_empty() {
444        // No metadata blocks, entire content is body
445        fields.insert(
446            BODY_FIELD.to_string(),
447            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
448        );
449        return Ok(ParsedDocument::new(fields));
450    }
451
452    // Track which attributes are used for tagged blocks
453    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
454    let mut has_global_frontmatter = false;
455    let mut global_frontmatter_index: Option<usize> = None;
456    let mut quill_name: Option<String> = None;
457
458    // First pass: identify global frontmatter, quill directive, and validate
459    for (idx, block) in blocks.iter().enumerate() {
460        // Check for quill directive
461        if let Some(ref name) = block.quill_name {
462            if quill_name.is_some() {
463                return Err("Multiple quill directives found: only one allowed".into());
464            }
465            quill_name = Some(name.clone());
466        }
467
468        // Check for global frontmatter (no tag and no quill directive)
469        if block.tag.is_none() && block.quill_name.is_none() {
470            if has_global_frontmatter {
471                return Err(Box::new(crate::error::ParseError::missing_card_directive()));
472            }
473            has_global_frontmatter = true;
474            global_frontmatter_index = Some(idx);
475        }
476    }
477
478    // Parse global frontmatter if present
479    if let Some(idx) = global_frontmatter_index {
480        let block = &blocks[idx];
481
482        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
483        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
484            Some(serde_yaml::Value::Mapping(mapping)) => mapping
485                .iter()
486                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
487                .collect(),
488            Some(serde_yaml::Value::Null) => {
489                // Null value (from whitespace-only YAML) - treat as empty mapping
490                HashMap::new()
491            }
492            Some(_) => {
493                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
494                return Err("Invalid YAML frontmatter: expected a mapping".into());
495            }
496            None => HashMap::new(),
497        };
498
499        // Check that all tagged blocks don't conflict with global fields
500        // Exception: if the global field is an array, allow it (we'll merge later)
501        for other_block in &blocks {
502            if let Some(ref tag) = other_block.tag {
503                if let Some(global_value) = yaml_fields.get(tag) {
504                    // Check if the global value is an array
505                    if global_value.as_sequence().is_none() {
506                        return Err(format!(
507                            "Name collision: global field '{}' conflicts with tagged attribute",
508                            tag
509                        )
510                        .into());
511                    }
512                }
513            }
514        }
515
516        // Convert YAML values to QuillValue at boundary
517        for (key, value) in yaml_fields {
518            fields.insert(key, QuillValue::from_yaml(value)?);
519        }
520    }
521
522    // Process blocks with quill directives
523    for block in &blocks {
524        if block.quill_name.is_some() {
525            // Quill directive blocks can have YAML content (becomes part of frontmatter)
526            if let Some(ref yaml_val) = block.yaml_value {
527                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
528                    serde_yaml::Value::Mapping(mapping) => mapping
529                        .iter()
530                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
531                        .collect(),
532                    serde_yaml::Value::Null => {
533                        // Null value (from whitespace-only YAML) - treat as empty mapping
534                        HashMap::new()
535                    }
536                    _ => {
537                        return Err("Invalid YAML in quill block: expected a mapping".into());
538                    }
539                };
540
541                // Check for conflicts with existing fields
542                for key in yaml_fields.keys() {
543                    if fields.contains_key(key) {
544                        return Err(format!(
545                            "Name collision: quill block field '{}' conflicts with existing field",
546                            key
547                        )
548                        .into());
549                    }
550                }
551
552                // Convert YAML values to QuillValue at boundary
553                for (key, value) in yaml_fields {
554                    fields.insert(key, QuillValue::from_yaml(value)?);
555                }
556            }
557        }
558    }
559
560    // Parse tagged blocks
561    for (idx, block) in blocks.iter().enumerate() {
562        if let Some(ref tag_name) = block.tag {
563            // Check if this conflicts with global fields
564            // Exception: if the global field is an array, allow it (we'll merge later)
565            if let Some(existing_value) = fields.get(tag_name) {
566                if existing_value.as_array().is_none() {
567                    return Err(format!(
568                        "Name collision: tagged attribute '{}' conflicts with global field",
569                        tag_name
570                    )
571                    .into());
572                }
573            }
574
575            // Get YAML metadata directly (already parsed in find_metadata_blocks)
576            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
577                Some(serde_yaml::Value::Mapping(mapping)) => mapping
578                    .iter()
579                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
580                    .collect(),
581                Some(serde_yaml::Value::Null) => {
582                    // Null value (from whitespace-only YAML) - treat as empty mapping
583                    HashMap::new()
584                }
585                Some(_) => {
586                    return Err(format!(
587                        "Invalid YAML in tagged block '{}': expected a mapping",
588                        tag_name
589                    )
590                    .into());
591                }
592                None => HashMap::new(),
593            };
594
595            // Extract body for this tagged block
596            let body_start = block.end;
597            let body_end = if idx + 1 < blocks.len() {
598                blocks[idx + 1].start
599            } else {
600                markdown.len()
601            };
602            let body = &markdown[body_start..body_end];
603
604            // Add body to item fields
605            item_fields.insert(
606                BODY_FIELD.to_string(),
607                serde_yaml::Value::String(body.to_string()),
608            );
609
610            // Convert HashMap to serde_yaml::Value::Mapping
611            let item_value = serde_yaml::to_value(item_fields)?;
612
613            // Add to collection
614            tagged_attributes
615                .entry(tag_name.clone())
616                .or_default()
617                .push(item_value);
618        }
619    }
620
621    // Extract global body
622    // Body starts after global frontmatter or quill block (whichever comes first)
623    // Body ends at the first card block or EOF
624    let first_non_card_block_idx = blocks
625        .iter()
626        .position(|b| b.tag.is_none() && b.quill_name.is_none())
627        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
628
629    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
630        // Body starts after the first non-card block (global frontmatter or quill)
631        let start = blocks[idx].end;
632
633        // Body ends at the first card block after this, or EOF
634        let end = blocks
635            .iter()
636            .skip(idx + 1)
637            .find(|b| b.tag.is_some())
638            .map(|b| b.start)
639            .unwrap_or(markdown.len());
640
641        (start, end)
642    } else {
643        // No global frontmatter or quill block - body is everything before the first card block
644        let end = blocks
645            .iter()
646            .find(|b| b.tag.is_some())
647            .map(|b| b.start)
648            .unwrap_or(0);
649
650        (0, end)
651    };
652
653    let global_body = &markdown[body_start..body_end];
654
655    fields.insert(
656        BODY_FIELD.to_string(),
657        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
658    );
659
660    // Add all tagged collections to fields (convert to QuillValue)
661    // If a field already exists and is an array, merge the new items into it
662    for (tag_name, items) in tagged_attributes {
663        if let Some(existing_value) = fields.get(&tag_name) {
664            // The existing value must be an array (checked earlier)
665            if let Some(existing_array) = existing_value.as_array() {
666                // Convert new items from YAML to JSON
667                let new_items_json: Vec<serde_json::Value> = items
668                    .into_iter()
669                    .map(|yaml_val| {
670                        serde_json::to_value(&yaml_val)
671                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
672                    })
673                    .collect::<Result<Vec<_>, _>>()?;
674
675                // Combine existing and new items
676                let mut merged_array = existing_array.clone();
677                merged_array.extend(new_items_json);
678
679                // Create QuillValue from merged JSON array
680                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
681                fields.insert(tag_name, quill_value);
682            } else {
683                // This should not happen due to earlier validation, but handle it gracefully
684                return Err(format!(
685                    "Internal error: field '{}' exists but is not an array",
686                    tag_name
687                )
688                .into());
689            }
690        } else {
691            // No existing field, just create a new sequence
692            // Note: guillemets in items were already preprocessed when the items were created
693            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
694            fields.insert(tag_name, quill_value);
695        }
696    }
697
698    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
699    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
700
701    Ok(parsed)
702}
703
704#[cfg(test)]
705mod tests {
706    use super::*;
707
708    #[test]
709    fn test_no_frontmatter() {
710        let markdown = "# Hello World\n\nThis is a test.";
711        let doc = decompose(markdown).unwrap();
712
713        assert_eq!(doc.body(), Some(markdown));
714        assert_eq!(doc.fields().len(), 1);
715        // Verify default quill tag is set
716        assert_eq!(doc.quill_tag(), "__default__");
717    }
718
719    #[test]
720    fn test_with_frontmatter() {
721        let markdown = r#"---
722title: Test Document
723author: Test Author
724---
725
726# Hello World
727
728This is the body."#;
729
730        let doc = decompose(markdown).unwrap();
731
732        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
733        assert_eq!(
734            doc.get_field("title").unwrap().as_str().unwrap(),
735            "Test Document"
736        );
737        assert_eq!(
738            doc.get_field("author").unwrap().as_str().unwrap(),
739            "Test Author"
740        );
741        assert_eq!(doc.fields().len(), 3); // title, author, body
742                                           // Verify default quill tag is set when no QUILL directive
743        assert_eq!(doc.quill_tag(), "__default__");
744    }
745
746    #[test]
747    fn test_complex_yaml_frontmatter() {
748        let markdown = r#"---
749title: Complex Document
750tags:
751  - test
752  - yaml
753metadata:
754  version: 1.0
755  nested:
756    field: value
757---
758
759Content here."#;
760
761        let doc = decompose(markdown).unwrap();
762
763        assert_eq!(doc.body(), Some("\nContent here."));
764        assert_eq!(
765            doc.get_field("title").unwrap().as_str().unwrap(),
766            "Complex Document"
767        );
768
769        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
770        assert_eq!(tags.len(), 2);
771        assert_eq!(tags[0].as_str().unwrap(), "test");
772        assert_eq!(tags[1].as_str().unwrap(), "yaml");
773    }
774
775    #[test]
776    fn test_with_defaults_empty_document() {
777        use std::collections::HashMap;
778
779        let mut defaults = HashMap::new();
780        defaults.insert(
781            "status".to_string(),
782            QuillValue::from_json(serde_json::json!("draft")),
783        );
784        defaults.insert(
785            "version".to_string(),
786            QuillValue::from_json(serde_json::json!(1)),
787        );
788
789        // Create an empty parsed document
790        let doc = ParsedDocument::new(HashMap::new());
791        let doc_with_defaults = doc.with_defaults(&defaults);
792
793        // Check that defaults were applied
794        assert_eq!(
795            doc_with_defaults
796                .get_field("status")
797                .unwrap()
798                .as_str()
799                .unwrap(),
800            "draft"
801        );
802        assert_eq!(
803            doc_with_defaults
804                .get_field("version")
805                .unwrap()
806                .as_number()
807                .unwrap()
808                .as_i64()
809                .unwrap(),
810            1
811        );
812    }
813
814    #[test]
815    fn test_with_defaults_preserves_existing_values() {
816        use std::collections::HashMap;
817
818        let mut defaults = HashMap::new();
819        defaults.insert(
820            "status".to_string(),
821            QuillValue::from_json(serde_json::json!("draft")),
822        );
823
824        // Create document with existing status
825        let mut fields = HashMap::new();
826        fields.insert(
827            "status".to_string(),
828            QuillValue::from_json(serde_json::json!("published")),
829        );
830        let doc = ParsedDocument::new(fields);
831
832        let doc_with_defaults = doc.with_defaults(&defaults);
833
834        // Existing value should be preserved
835        assert_eq!(
836            doc_with_defaults
837                .get_field("status")
838                .unwrap()
839                .as_str()
840                .unwrap(),
841            "published"
842        );
843    }
844
845    #[test]
846    fn test_with_defaults_partial_application() {
847        use std::collections::HashMap;
848
849        let mut defaults = HashMap::new();
850        defaults.insert(
851            "status".to_string(),
852            QuillValue::from_json(serde_json::json!("draft")),
853        );
854        defaults.insert(
855            "version".to_string(),
856            QuillValue::from_json(serde_json::json!(1)),
857        );
858
859        // Create document with only one field
860        let mut fields = HashMap::new();
861        fields.insert(
862            "status".to_string(),
863            QuillValue::from_json(serde_json::json!("published")),
864        );
865        let doc = ParsedDocument::new(fields);
866
867        let doc_with_defaults = doc.with_defaults(&defaults);
868
869        // Existing field preserved, missing field gets default
870        assert_eq!(
871            doc_with_defaults
872                .get_field("status")
873                .unwrap()
874                .as_str()
875                .unwrap(),
876            "published"
877        );
878        assert_eq!(
879            doc_with_defaults
880                .get_field("version")
881                .unwrap()
882                .as_number()
883                .unwrap()
884                .as_i64()
885                .unwrap(),
886            1
887        );
888    }
889
890    #[test]
891    fn test_with_defaults_no_defaults() {
892        use std::collections::HashMap;
893
894        let defaults = HashMap::new(); // Empty defaults map
895
896        let doc = ParsedDocument::new(HashMap::new());
897        let doc_with_defaults = doc.with_defaults(&defaults);
898
899        // No defaults should be applied
900        assert!(doc_with_defaults.fields().is_empty());
901    }
902
903    #[test]
904    fn test_with_defaults_complex_types() {
905        use std::collections::HashMap;
906
907        let mut defaults = HashMap::new();
908        defaults.insert(
909            "tags".to_string(),
910            QuillValue::from_json(serde_json::json!(["default", "tag"])),
911        );
912
913        let doc = ParsedDocument::new(HashMap::new());
914        let doc_with_defaults = doc.with_defaults(&defaults);
915
916        // Complex default value should be applied
917        let tags = doc_with_defaults
918            .get_field("tags")
919            .unwrap()
920            .as_sequence()
921            .unwrap();
922        assert_eq!(tags.len(), 2);
923        assert_eq!(tags[0].as_str().unwrap(), "default");
924        assert_eq!(tags[1].as_str().unwrap(), "tag");
925    }
926
927    #[test]
928    fn test_with_coercion_singular_to_array() {
929        use std::collections::HashMap;
930
931        let schema = QuillValue::from_json(serde_json::json!({
932            "$schema": "https://json-schema.org/draft/2019-09/schema",
933            "type": "object",
934            "properties": {
935                "tags": {"type": "array"}
936            }
937        }));
938
939        let mut fields = HashMap::new();
940        fields.insert(
941            "tags".to_string(),
942            QuillValue::from_json(serde_json::json!("single-tag")),
943        );
944        let doc = ParsedDocument::new(fields);
945
946        let coerced_doc = doc.with_coercion(&schema);
947
948        let tags = coerced_doc.get_field("tags").unwrap();
949        assert!(tags.as_array().is_some());
950        let tags_array = tags.as_array().unwrap();
951        assert_eq!(tags_array.len(), 1);
952        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
953    }
954
955    #[test]
956    fn test_with_coercion_string_to_boolean() {
957        use std::collections::HashMap;
958
959        let schema = QuillValue::from_json(serde_json::json!({
960            "$schema": "https://json-schema.org/draft/2019-09/schema",
961            "type": "object",
962            "properties": {
963                "active": {"type": "boolean"}
964            }
965        }));
966
967        let mut fields = HashMap::new();
968        fields.insert(
969            "active".to_string(),
970            QuillValue::from_json(serde_json::json!("true")),
971        );
972        let doc = ParsedDocument::new(fields);
973
974        let coerced_doc = doc.with_coercion(&schema);
975
976        assert_eq!(
977            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
978            true
979        );
980    }
981
982    #[test]
983    fn test_with_coercion_string_to_number() {
984        use std::collections::HashMap;
985
986        let schema = QuillValue::from_json(serde_json::json!({
987            "$schema": "https://json-schema.org/draft/2019-09/schema",
988            "type": "object",
989            "properties": {
990                "count": {"type": "number"}
991            }
992        }));
993
994        let mut fields = HashMap::new();
995        fields.insert(
996            "count".to_string(),
997            QuillValue::from_json(serde_json::json!("42")),
998        );
999        let doc = ParsedDocument::new(fields);
1000
1001        let coerced_doc = doc.with_coercion(&schema);
1002
1003        assert_eq!(
1004            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1005            42
1006        );
1007    }
1008
1009    #[test]
1010    fn test_invalid_yaml() {
1011        let markdown = r#"---
1012title: [invalid yaml
1013author: missing close bracket
1014---
1015
1016Content here."#;
1017
1018        let result = decompose(markdown);
1019        assert!(result.is_err());
1020        assert!(result
1021            .unwrap_err()
1022            .to_string()
1023            .contains("Invalid YAML frontmatter"));
1024    }
1025
1026    #[test]
1027    fn test_unclosed_frontmatter() {
1028        let markdown = r#"---
1029title: Test
1030author: Test Author
1031
1032Content without closing ---"#;
1033
1034        let result = decompose(markdown);
1035        assert!(result.is_err());
1036        assert!(result.unwrap_err().to_string().contains("not closed"));
1037    }
1038
1039    // Extended metadata tests
1040
1041    #[test]
1042    fn test_basic_tagged_block() {
1043        let markdown = r#"---
1044title: Main Document
1045---
1046
1047Main body content.
1048
1049---
1050CARD: items
1051name: Item 1
1052---
1053
1054Body of item 1."#;
1055
1056        let doc = decompose(markdown).unwrap();
1057
1058        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1059        assert_eq!(
1060            doc.get_field("title").unwrap().as_str().unwrap(),
1061            "Main Document"
1062        );
1063
1064        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1065        assert_eq!(items.len(), 1);
1066
1067        let item = items[0].as_object().unwrap();
1068        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1069        assert_eq!(
1070            item.get("body").unwrap().as_str().unwrap(),
1071            "\nBody of item 1."
1072        );
1073    }
1074
1075    #[test]
1076    fn test_multiple_tagged_blocks() {
1077        let markdown = r#"---
1078CARD: items
1079name: Item 1
1080tags: [a, b]
1081---
1082
1083First item body.
1084
1085---
1086CARD: items
1087name: Item 2
1088tags: [c, d]
1089---
1090
1091Second item body."#;
1092
1093        let doc = decompose(markdown).unwrap();
1094
1095        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1096        assert_eq!(items.len(), 2);
1097
1098        let item1 = items[0].as_object().unwrap();
1099        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1100
1101        let item2 = items[1].as_object().unwrap();
1102        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1103    }
1104
1105    #[test]
1106    fn test_mixed_global_and_tagged() {
1107        let markdown = r#"---
1108title: Global
1109author: John Doe
1110---
1111
1112Global body.
1113
1114---
1115CARD: sections
1116title: Section 1
1117---
1118
1119Section 1 content.
1120
1121---
1122CARD: sections
1123title: Section 2
1124---
1125
1126Section 2 content."#;
1127
1128        let doc = decompose(markdown).unwrap();
1129
1130        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1131        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1132
1133        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1134        assert_eq!(sections.len(), 2);
1135    }
1136
1137    #[test]
1138    fn test_empty_tagged_metadata() {
1139        let markdown = r#"---
1140CARD: items
1141---
1142
1143Body without metadata."#;
1144
1145        let doc = decompose(markdown).unwrap();
1146
1147        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1148        assert_eq!(items.len(), 1);
1149
1150        let item = items[0].as_object().unwrap();
1151        assert_eq!(
1152            item.get("body").unwrap().as_str().unwrap(),
1153            "\nBody without metadata."
1154        );
1155    }
1156
1157    #[test]
1158    fn test_tagged_block_without_body() {
1159        let markdown = r#"---
1160CARD: items
1161name: Item
1162---"#;
1163
1164        let doc = decompose(markdown).unwrap();
1165
1166        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1167        assert_eq!(items.len(), 1);
1168
1169        let item = items[0].as_object().unwrap();
1170        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1171    }
1172
1173    #[test]
1174    fn test_name_collision_global_and_tagged() {
1175        let markdown = r#"---
1176items: "global value"
1177---
1178
1179Body
1180
1181---
1182CARD: items
1183name: Item
1184---
1185
1186Item body"#;
1187
1188        let result = decompose(markdown);
1189        assert!(result.is_err());
1190        assert!(result.unwrap_err().to_string().contains("collision"));
1191    }
1192
1193    #[test]
1194    fn test_global_array_merged_with_card() {
1195        // When global frontmatter has an array field with the same name as a SCOPE,
1196        // the CARD items should be added to the array
1197        let markdown = r#"---
1198items:
1199  - name: Global Item 1
1200    value: 100
1201  - name: Global Item 2
1202    value: 200
1203---
1204
1205Global body
1206
1207---
1208CARD: items
1209name: Scope Item 1
1210value: 300
1211---
1212
1213Scope item 1 body
1214
1215---
1216CARD: items
1217name: Scope Item 2
1218value: 400
1219---
1220
1221Scope item 2 body"#;
1222
1223        let doc = decompose(markdown).unwrap();
1224
1225        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1226        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1227        assert_eq!(items.len(), 4);
1228
1229        // Verify first two items (from global array)
1230        let item1 = items[0].as_object().unwrap();
1231        assert_eq!(
1232            item1.get("name").unwrap().as_str().unwrap(),
1233            "Global Item 1"
1234        );
1235        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1236
1237        let item2 = items[1].as_object().unwrap();
1238        assert_eq!(
1239            item2.get("name").unwrap().as_str().unwrap(),
1240            "Global Item 2"
1241        );
1242        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1243
1244        // Verify last two items (from CARD blocks)
1245        let item3 = items[2].as_object().unwrap();
1246        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1247        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1248        assert_eq!(
1249            item3.get("body").unwrap().as_str().unwrap(),
1250            "\nScope item 1 body\n\n"
1251        );
1252
1253        let item4 = items[3].as_object().unwrap();
1254        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1255        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1256        assert_eq!(
1257            item4.get("body").unwrap().as_str().unwrap(),
1258            "\nScope item 2 body"
1259        );
1260    }
1261
1262    #[test]
1263    fn test_empty_global_array_with_card() {
1264        // Edge case: global frontmatter has an empty array
1265        let markdown = r#"---
1266items: []
1267---
1268
1269Global body
1270
1271---
1272CARD: items
1273name: Item 1
1274---
1275
1276Item 1 body"#;
1277
1278        let doc = decompose(markdown).unwrap();
1279
1280        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1281        assert_eq!(items.len(), 1);
1282
1283        let item = items[0].as_object().unwrap();
1284        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1285    }
1286
1287    #[test]
1288    fn test_reserved_field_name() {
1289        let markdown = r#"---
1290CARD: body
1291content: Test
1292---"#;
1293
1294        let result = decompose(markdown);
1295        assert!(result.is_err());
1296        assert!(result.unwrap_err().to_string().contains("reserved"));
1297    }
1298
1299    #[test]
1300    fn test_invalid_tag_syntax() {
1301        let markdown = r#"---
1302CARD: Invalid-Name
1303title: Test
1304---"#;
1305
1306        let result = decompose(markdown);
1307        assert!(result.is_err());
1308        assert!(result
1309            .unwrap_err()
1310            .to_string()
1311            .contains("Invalid field name"));
1312    }
1313
1314    #[test]
1315    fn test_multiple_global_frontmatter_blocks() {
1316        let markdown = r#"---
1317title: First
1318---
1319
1320Body
1321
1322---
1323author: Second
1324---
1325
1326More body"#;
1327
1328        let result = decompose(markdown);
1329        assert!(result.is_err());
1330
1331        // Verify the error message contains CARD hint
1332        let err = result.unwrap_err();
1333        let err_str = err.to_string();
1334        assert!(
1335            err_str.contains("CARD"),
1336            "Error should mention CARD directive: {}",
1337            err_str
1338        );
1339        assert!(
1340            err_str.contains("missing"),
1341            "Error should indicate missing directive: {}",
1342            err_str
1343        );
1344    }
1345
1346    #[test]
1347    fn test_adjacent_blocks_different_tags() {
1348        let markdown = r#"---
1349CARD: items
1350name: Item 1
1351---
1352
1353Item 1 body
1354
1355---
1356CARD: sections
1357title: Section 1
1358---
1359
1360Section 1 body"#;
1361
1362        let doc = decompose(markdown).unwrap();
1363
1364        assert!(doc.get_field("items").is_some());
1365        assert!(doc.get_field("sections").is_some());
1366
1367        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1368        assert_eq!(items.len(), 1);
1369
1370        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1371        assert_eq!(sections.len(), 1);
1372    }
1373
1374    #[test]
1375    fn test_order_preservation() {
1376        let markdown = r#"---
1377CARD: items
1378id: 1
1379---
1380
1381First
1382
1383---
1384CARD: items
1385id: 2
1386---
1387
1388Second
1389
1390---
1391CARD: items
1392id: 3
1393---
1394
1395Third"#;
1396
1397        let doc = decompose(markdown).unwrap();
1398
1399        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1400        assert_eq!(items.len(), 3);
1401
1402        for (i, item) in items.iter().enumerate() {
1403            let mapping = item.as_object().unwrap();
1404            let id = mapping.get("id").unwrap().as_i64().unwrap();
1405            assert_eq!(id, (i + 1) as i64);
1406        }
1407    }
1408
1409    #[test]
1410    fn test_product_catalog_integration() {
1411        let markdown = r#"---
1412title: Product Catalog
1413author: John Doe
1414date: 2024-01-01
1415---
1416
1417This is the main catalog description.
1418
1419---
1420CARD: products
1421name: Widget A
1422price: 19.99
1423sku: WID-001
1424---
1425
1426The **Widget A** is our most popular product.
1427
1428---
1429CARD: products
1430name: Gadget B
1431price: 29.99
1432sku: GAD-002
1433---
1434
1435The **Gadget B** is perfect for professionals.
1436
1437---
1438CARD: reviews
1439product: Widget A
1440rating: 5
1441---
1442
1443"Excellent product! Highly recommended."
1444
1445---
1446CARD: reviews
1447product: Gadget B
1448rating: 4
1449---
1450
1451"Very good, but a bit pricey.""#;
1452
1453        let doc = decompose(markdown).unwrap();
1454
1455        // Verify global fields
1456        assert_eq!(
1457            doc.get_field("title").unwrap().as_str().unwrap(),
1458            "Product Catalog"
1459        );
1460        assert_eq!(
1461            doc.get_field("author").unwrap().as_str().unwrap(),
1462            "John Doe"
1463        );
1464        assert_eq!(
1465            doc.get_field("date").unwrap().as_str().unwrap(),
1466            "2024-01-01"
1467        );
1468
1469        // Verify global body
1470        assert!(doc.body().unwrap().contains("main catalog description"));
1471
1472        // Verify products collection
1473        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1474        assert_eq!(products.len(), 2);
1475
1476        let product1 = products[0].as_object().unwrap();
1477        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1478        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1479
1480        // Verify reviews collection
1481        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1482        assert_eq!(reviews.len(), 2);
1483
1484        let review1 = reviews[0].as_object().unwrap();
1485        assert_eq!(
1486            review1.get("product").unwrap().as_str().unwrap(),
1487            "Widget A"
1488        );
1489        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1490
1491        // Total fields: title, author, date, body, products, reviews = 6
1492        assert_eq!(doc.fields().len(), 6);
1493    }
1494
1495    #[test]
1496    fn taro_quill_directive() {
1497        let markdown = r#"---
1498QUILL: usaf_memo
1499memo_for: [ORG/SYMBOL]
1500memo_from: [ORG/SYMBOL]
1501---
1502
1503This is the memo body."#;
1504
1505        let doc = decompose(markdown).unwrap();
1506
1507        // Verify quill tag is set
1508        assert_eq!(doc.quill_tag(), "usaf_memo");
1509
1510        // Verify fields from quill block become frontmatter
1511        assert_eq!(
1512            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1513                .as_str()
1514                .unwrap(),
1515            "ORG/SYMBOL"
1516        );
1517
1518        // Verify body
1519        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1520    }
1521
1522    #[test]
1523    fn test_quill_with_card_blocks() {
1524        let markdown = r#"---
1525QUILL: document
1526title: Test Document
1527---
1528
1529Main body.
1530
1531---
1532CARD: sections
1533name: Section 1
1534---
1535
1536Section 1 body."#;
1537
1538        let doc = decompose(markdown).unwrap();
1539
1540        // Verify quill tag
1541        assert_eq!(doc.quill_tag(), "document");
1542
1543        // Verify global field from quill block
1544        assert_eq!(
1545            doc.get_field("title").unwrap().as_str().unwrap(),
1546            "Test Document"
1547        );
1548
1549        // Verify card blocks work
1550        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1551        assert_eq!(sections.len(), 1);
1552
1553        // Verify body
1554        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1555    }
1556
1557    #[test]
1558    fn test_multiple_quill_directives_error() {
1559        let markdown = r#"---
1560QUILL: first
1561---
1562
1563---
1564QUILL: second
1565---"#;
1566
1567        let result = decompose(markdown);
1568        assert!(result.is_err());
1569        assert!(result
1570            .unwrap_err()
1571            .to_string()
1572            .contains("Multiple quill directives"));
1573    }
1574
1575    #[test]
1576    fn test_invalid_quill_name() {
1577        let markdown = r#"---
1578QUILL: Invalid-Name
1579---"#;
1580
1581        let result = decompose(markdown);
1582        assert!(result.is_err());
1583        assert!(result
1584            .unwrap_err()
1585            .to_string()
1586            .contains("Invalid quill name"));
1587    }
1588
1589    #[test]
1590    fn test_quill_wrong_value_type() {
1591        let markdown = r#"---
1592QUILL: 123
1593---"#;
1594
1595        let result = decompose(markdown);
1596        assert!(result.is_err());
1597        assert!(result
1598            .unwrap_err()
1599            .to_string()
1600            .contains("QUILL value must be a string"));
1601    }
1602
1603    #[test]
1604    fn test_card_wrong_value_type() {
1605        let markdown = r#"---
1606CARD: 123
1607---"#;
1608
1609        let result = decompose(markdown);
1610        assert!(result.is_err());
1611        assert!(result
1612            .unwrap_err()
1613            .to_string()
1614            .contains("CARD value must be a string"));
1615    }
1616
1617    #[test]
1618    fn test_both_quill_and_card_error() {
1619        let markdown = r#"---
1620QUILL: test
1621CARD: items
1622---"#;
1623
1624        let result = decompose(markdown);
1625        assert!(result.is_err());
1626        assert!(result
1627            .unwrap_err()
1628            .to_string()
1629            .contains("Cannot specify both QUILL and CARD"));
1630    }
1631
1632    #[test]
1633    fn test_blank_lines_in_frontmatter() {
1634        // New parsing standard: blank lines are allowed within YAML blocks
1635        let markdown = r#"---
1636title: Test Document
1637author: Test Author
1638
1639description: This has a blank line above it
1640tags:
1641  - one
1642  - two
1643---
1644
1645# Hello World
1646
1647This is the body."#;
1648
1649        let doc = decompose(markdown).unwrap();
1650
1651        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1652        assert_eq!(
1653            doc.get_field("title").unwrap().as_str().unwrap(),
1654            "Test Document"
1655        );
1656        assert_eq!(
1657            doc.get_field("author").unwrap().as_str().unwrap(),
1658            "Test Author"
1659        );
1660        assert_eq!(
1661            doc.get_field("description").unwrap().as_str().unwrap(),
1662            "This has a blank line above it"
1663        );
1664
1665        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1666        assert_eq!(tags.len(), 2);
1667    }
1668
1669    #[test]
1670    fn test_blank_lines_in_scope_blocks() {
1671        // Blank lines should be allowed in CARD blocks too
1672        let markdown = r#"---
1673CARD: items
1674name: Item 1
1675
1676price: 19.99
1677
1678tags:
1679  - electronics
1680  - gadgets
1681---
1682
1683Body of item 1."#;
1684
1685        let doc = decompose(markdown).unwrap();
1686
1687        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1688        assert_eq!(items.len(), 1);
1689
1690        let item = items[0].as_object().unwrap();
1691        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1692        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1693
1694        let tags = item.get("tags").unwrap().as_array().unwrap();
1695        assert_eq!(tags.len(), 2);
1696    }
1697
1698    #[test]
1699    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1700        // Horizontal rule: blank lines both above AND below the ---
1701        let markdown = r#"---
1702title: Test
1703---
1704
1705First paragraph.
1706
1707---
1708
1709Second paragraph."#;
1710
1711        let doc = decompose(markdown).unwrap();
1712
1713        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1714
1715        // The body should contain the horizontal rule (---) as part of the content
1716        let body = doc.body().unwrap();
1717        assert!(body.contains("First paragraph."));
1718        assert!(body.contains("---"));
1719        assert!(body.contains("Second paragraph."));
1720    }
1721
1722    #[test]
1723    fn test_horizontal_rule_not_preceded_by_blank() {
1724        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1725        // It's also NOT a valid metadata block opening (since it's followed by blank)
1726        let markdown = r#"---
1727title: Test
1728---
1729
1730First paragraph.
1731---
1732
1733Second paragraph."#;
1734
1735        let doc = decompose(markdown).unwrap();
1736
1737        let body = doc.body().unwrap();
1738        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1739        assert!(body.contains("---"));
1740    }
1741
1742    #[test]
1743    fn test_multiple_blank_lines_in_yaml() {
1744        // Multiple blank lines should also be allowed
1745        let markdown = r#"---
1746title: Test
1747
1748
1749author: John Doe
1750
1751
1752version: 1.0
1753---
1754
1755Body content."#;
1756
1757        let doc = decompose(markdown).unwrap();
1758
1759        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1760        assert_eq!(
1761            doc.get_field("author").unwrap().as_str().unwrap(),
1762            "John Doe"
1763        );
1764        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1765    }
1766
1767    #[test]
1768    fn test_html_comment_interaction() {
1769        let markdown = r#"<!---
1770---> the rest of the page content
1771
1772---
1773key: value
1774---
1775"#;
1776        let doc = decompose(markdown).unwrap();
1777
1778        // The comment should be ignored (or at least not cause a parse error)
1779        // The frontmatter should be parsed
1780        let key = doc.get_field("key").and_then(|v| v.as_str());
1781        assert_eq!(key, Some("value"));
1782    }
1783}
1784#[cfg(test)]
1785mod demo_file_test {
1786    use super::*;
1787
1788    #[test]
1789    fn test_extended_metadata_demo_file() {
1790        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1791        let doc = decompose(markdown).unwrap();
1792
1793        // Verify global fields
1794        assert_eq!(
1795            doc.get_field("title").unwrap().as_str().unwrap(),
1796            "Extended Metadata Demo"
1797        );
1798        assert_eq!(
1799            doc.get_field("author").unwrap().as_str().unwrap(),
1800            "Quillmark Team"
1801        );
1802        // version is parsed as a number by YAML
1803        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1804
1805        // Verify body
1806        assert!(doc
1807            .body()
1808            .unwrap()
1809            .contains("extended YAML metadata standard"));
1810
1811        // Verify features collection
1812        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1813        assert_eq!(features.len(), 3);
1814
1815        // Verify use_cases collection
1816        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1817        assert_eq!(use_cases.len(), 2);
1818
1819        // Check first feature
1820        let feature1 = features[0].as_object().unwrap();
1821        assert_eq!(
1822            feature1.get("name").unwrap().as_str().unwrap(),
1823            "Tag Directives"
1824        );
1825    }
1826
1827    #[test]
1828    fn test_input_size_limit() {
1829        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1830        let size = crate::error::MAX_INPUT_SIZE + 1;
1831        let large_markdown = "a".repeat(size);
1832
1833        let result = decompose(&large_markdown);
1834        assert!(result.is_err());
1835
1836        let err_msg = result.unwrap_err().to_string();
1837        assert!(err_msg.contains("Input too large"));
1838    }
1839
1840    #[test]
1841    fn test_yaml_size_limit() {
1842        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1843        let mut markdown = String::from("---\n");
1844
1845        // Create a very large YAML field
1846        let size = crate::error::MAX_YAML_SIZE + 1;
1847        markdown.push_str("data: \"");
1848        markdown.push_str(&"x".repeat(size));
1849        markdown.push_str("\"\n---\n\nBody");
1850
1851        let result = decompose(&markdown);
1852        assert!(result.is_err());
1853
1854        let err_msg = result.unwrap_err().to_string();
1855        assert!(err_msg.contains("YAML block too large"));
1856    }
1857
1858    #[test]
1859    fn test_input_within_size_limit() {
1860        // Create markdown just under the limit
1861        let size = 1000; // Much smaller than limit
1862        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1863
1864        let result = decompose(&markdown);
1865        assert!(result.is_ok());
1866    }
1867
1868    #[test]
1869    fn test_yaml_within_size_limit() {
1870        // Create YAML block well within the limit
1871        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1872
1873        let result = decompose(&markdown);
1874        assert!(result.is_ok());
1875    }
1876
1877    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1878    // Guillemet conversion now happens in process_plate, not during parsing
1879    #[test]
1880    fn test_chevrons_preserved_in_body_no_frontmatter() {
1881        let markdown = "Use <<raw content>> here.";
1882        let doc = decompose(markdown).unwrap();
1883
1884        // Body should preserve chevrons (conversion happens later in process_plate)
1885        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1886    }
1887
1888    #[test]
1889    fn test_chevrons_preserved_in_body_with_frontmatter() {
1890        let markdown = r#"---
1891title: Test
1892---
1893
1894Use <<raw content>> here."#;
1895        let doc = decompose(markdown).unwrap();
1896
1897        // Body should preserve chevrons
1898        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1899    }
1900
1901    #[test]
1902    fn test_chevrons_preserved_in_yaml_string() {
1903        let markdown = r#"---
1904title: Test <<with chevrons>>
1905---
1906
1907Body content."#;
1908        let doc = decompose(markdown).unwrap();
1909
1910        // YAML string values should preserve chevrons
1911        assert_eq!(
1912            doc.get_field("title").unwrap().as_str().unwrap(),
1913            "Test <<with chevrons>>"
1914        );
1915    }
1916
1917    #[test]
1918    fn test_chevrons_preserved_in_yaml_array() {
1919        let markdown = r#"---
1920items:
1921  - "<<first>>"
1922  - "<<second>>"
1923---
1924
1925Body."#;
1926        let doc = decompose(markdown).unwrap();
1927
1928        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1929        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1930        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1931    }
1932
1933    #[test]
1934    fn test_chevrons_preserved_in_yaml_nested() {
1935        let markdown = r#"---
1936metadata:
1937  description: "<<nested value>>"
1938---
1939
1940Body."#;
1941        let doc = decompose(markdown).unwrap();
1942
1943        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1944        assert_eq!(
1945            metadata.get("description").unwrap().as_str().unwrap(),
1946            "<<nested value>>"
1947        );
1948    }
1949
1950    #[test]
1951    fn test_chevrons_preserved_in_code_blocks() {
1952        let markdown = r#"```
1953<<in code block>>
1954```
1955
1956<<outside code block>>"#;
1957        let doc = decompose(markdown).unwrap();
1958
1959        let body = doc.body().unwrap();
1960        // All chevrons should be preserved (no conversion during parsing)
1961        assert!(body.contains("<<in code block>>"));
1962        assert!(body.contains("<<outside code block>>"));
1963    }
1964
1965    #[test]
1966    fn test_chevrons_preserved_in_inline_code() {
1967        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1968        let doc = decompose(markdown).unwrap();
1969
1970        let body = doc.body().unwrap();
1971        // All chevrons should be preserved
1972        assert!(body.contains("`<<in inline code>>`"));
1973        assert!(body.contains("<<outside inline code>>"));
1974    }
1975
1976    #[test]
1977    fn test_chevrons_preserved_in_tagged_block_body() {
1978        let markdown = r#"---
1979title: Main
1980---
1981
1982Main body.
1983
1984---
1985CARD: items
1986name: Item 1
1987---
1988
1989Use <<raw>> here."#;
1990        let doc = decompose(markdown).unwrap();
1991
1992        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1993        let item = items[0].as_object().unwrap();
1994        let item_body = item.get("body").unwrap().as_str().unwrap();
1995        // Tagged block body should preserve chevrons
1996        assert!(item_body.contains("<<raw>>"));
1997    }
1998
1999    #[test]
2000    fn test_chevrons_preserved_in_tagged_block_yaml() {
2001        let markdown = r#"---
2002title: Main
2003---
2004
2005Main body.
2006
2007---
2008CARD: items
2009description: "<<tagged yaml>>"
2010---
2011
2012Item body."#;
2013        let doc = decompose(markdown).unwrap();
2014
2015        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2016        let item = items[0].as_object().unwrap();
2017        // Tagged block YAML should preserve chevrons
2018        assert_eq!(
2019            item.get("description").unwrap().as_str().unwrap(),
2020            "<<tagged yaml>>"
2021        );
2022    }
2023
2024    #[test]
2025    fn test_yaml_numbers_not_affected() {
2026        // Numbers should not be affected
2027        let markdown = r#"---
2028count: 42
2029---
2030
2031Body."#;
2032        let doc = decompose(markdown).unwrap();
2033        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2034    }
2035
2036    #[test]
2037    fn test_yaml_booleans_not_affected() {
2038        // Booleans should not be affected
2039        let markdown = r#"---
2040active: true
2041---
2042
2043Body."#;
2044        let doc = decompose(markdown).unwrap();
2045        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2046    }
2047
2048    #[test]
2049    fn test_multiline_chevrons_preserved() {
2050        // Multiline chevrons should be preserved as-is
2051        let markdown = "<<text\nacross lines>>";
2052        let doc = decompose(markdown).unwrap();
2053
2054        let body = doc.body().unwrap();
2055        // Should contain the original chevrons
2056        assert!(body.contains("<<text"));
2057        assert!(body.contains("across lines>>"));
2058    }
2059
2060    #[test]
2061    fn test_unmatched_chevrons_preserved() {
2062        let markdown = "<<unmatched";
2063        let doc = decompose(markdown).unwrap();
2064
2065        let body = doc.body().unwrap();
2066        // Unmatched should remain as-is
2067        assert_eq!(body, "<<unmatched");
2068    }
2069}
2070
2071// Additional robustness tests
2072#[cfg(test)]
2073mod robustness_tests {
2074    use super::*;
2075
2076    // Edge cases for delimiter handling
2077
2078    #[test]
2079    fn test_empty_document() {
2080        let doc = decompose("").unwrap();
2081        assert_eq!(doc.body(), Some(""));
2082        assert_eq!(doc.quill_tag(), "__default__");
2083    }
2084
2085    #[test]
2086    fn test_only_whitespace() {
2087        let doc = decompose("   \n\n   \t").unwrap();
2088        assert_eq!(doc.body(), Some("   \n\n   \t"));
2089    }
2090
2091    #[test]
2092    fn test_only_dashes() {
2093        // Just "---" at document start without newline is not treated as frontmatter opener
2094        // (requires "---\n" to start a frontmatter block)
2095        let result = decompose("---");
2096        // This is NOT an error - "---" alone without newline is just body content
2097        assert!(result.is_ok());
2098        assert_eq!(result.unwrap().body(), Some("---"));
2099    }
2100
2101    #[test]
2102    fn test_dashes_in_middle_of_line() {
2103        // --- not at start of line should not be treated as delimiter
2104        let markdown = "some text --- more text";
2105        let doc = decompose(markdown).unwrap();
2106        assert_eq!(doc.body(), Some("some text --- more text"));
2107    }
2108
2109    #[test]
2110    fn test_four_dashes() {
2111        // ---- is not a valid delimiter
2112        let markdown = "----\ntitle: Test\n----\n\nBody";
2113        let doc = decompose(markdown).unwrap();
2114        // Should treat entire content as body
2115        assert!(doc.body().unwrap().contains("----"));
2116    }
2117
2118    #[test]
2119    fn test_crlf_line_endings() {
2120        // Windows-style line endings
2121        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2122        let doc = decompose(markdown).unwrap();
2123        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2124        assert!(doc.body().unwrap().contains("Body content."));
2125    }
2126
2127    #[test]
2128    fn test_mixed_line_endings() {
2129        // Mix of \n and \r\n
2130        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2131        let doc = decompose(markdown).unwrap();
2132        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2133    }
2134
2135    #[test]
2136    fn test_frontmatter_at_eof_no_trailing_newline() {
2137        // Frontmatter closed at EOF without trailing newline
2138        let markdown = "---\ntitle: Test\n---";
2139        let doc = decompose(markdown).unwrap();
2140        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2141        assert_eq!(doc.body(), Some(""));
2142    }
2143
2144    #[test]
2145    fn test_empty_frontmatter() {
2146        // Empty frontmatter block - requires content between delimiters
2147        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2148        // is treated as horizontal rule logic, not empty frontmatter
2149        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2150        let markdown = "---\n \n---\n\nBody content.";
2151        let doc = decompose(markdown).unwrap();
2152        assert!(doc.body().unwrap().contains("Body content."));
2153        // Should only have body field
2154        assert_eq!(doc.fields().len(), 1);
2155    }
2156
2157    #[test]
2158    fn test_whitespace_only_frontmatter() {
2159        // Frontmatter with only whitespace
2160        let markdown = "---\n   \n\n   \n---\n\nBody.";
2161        let doc = decompose(markdown).unwrap();
2162        assert!(doc.body().unwrap().contains("Body."));
2163    }
2164
2165    // Unicode handling
2166
2167    #[test]
2168    fn test_unicode_in_yaml_keys() {
2169        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2170        let doc = decompose(markdown).unwrap();
2171        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2172        assert_eq!(
2173            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2174            "こんにちは"
2175        );
2176    }
2177
2178    #[test]
2179    fn test_unicode_in_yaml_values() {
2180        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2181        let doc = decompose(markdown).unwrap();
2182        assert_eq!(
2183            doc.get_field("title").unwrap().as_str().unwrap(),
2184            "你好世界 🎉"
2185        );
2186    }
2187
2188    #[test]
2189    fn test_unicode_in_body() {
2190        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2191        let doc = decompose(markdown).unwrap();
2192        assert!(doc.body().unwrap().contains("日本語テキスト"));
2193        assert!(doc.body().unwrap().contains("🚀"));
2194    }
2195
2196    // YAML edge cases
2197
2198    #[test]
2199    fn test_yaml_multiline_string() {
2200        let markdown = r#"---
2201description: |
2202  This is a
2203  multiline string
2204  with preserved newlines.
2205---
2206
2207Body."#;
2208        let doc = decompose(markdown).unwrap();
2209        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2210        assert!(desc.contains("multiline string"));
2211        assert!(desc.contains('\n'));
2212    }
2213
2214    #[test]
2215    fn test_yaml_folded_string() {
2216        let markdown = r#"---
2217description: >
2218  This is a folded
2219  string that becomes
2220  a single line.
2221---
2222
2223Body."#;
2224        let doc = decompose(markdown).unwrap();
2225        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2226        // Folded strings join lines with spaces
2227        assert!(desc.contains("folded"));
2228    }
2229
2230    #[test]
2231    fn test_yaml_null_value() {
2232        let markdown = "---\noptional: null\n---\n\nBody.";
2233        let doc = decompose(markdown).unwrap();
2234        assert!(doc.get_field("optional").unwrap().is_null());
2235    }
2236
2237    #[test]
2238    fn test_yaml_empty_string_value() {
2239        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2240        let doc = decompose(markdown).unwrap();
2241        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2242    }
2243
2244    #[test]
2245    fn test_yaml_special_characters_in_string() {
2246        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2247        let doc = decompose(markdown).unwrap();
2248        assert_eq!(
2249            doc.get_field("special").unwrap().as_str().unwrap(),
2250            "colon: here, and [brackets]"
2251        );
2252    }
2253
2254    #[test]
2255    fn test_yaml_nested_objects() {
2256        let markdown = r#"---
2257config:
2258  database:
2259    host: localhost
2260    port: 5432
2261  cache:
2262    enabled: true
2263---
2264
2265Body."#;
2266        let doc = decompose(markdown).unwrap();
2267        let config = doc.get_field("config").unwrap().as_object().unwrap();
2268        let db = config.get("database").unwrap().as_object().unwrap();
2269        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2270        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2271    }
2272
2273    // CARD block edge cases
2274
2275    #[test]
2276    fn test_card_with_empty_body() {
2277        let markdown = r#"---
2278CARD: items
2279name: Item
2280---"#;
2281        let doc = decompose(markdown).unwrap();
2282        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2283        assert_eq!(items.len(), 1);
2284        let item = items[0].as_object().unwrap();
2285        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2286    }
2287
2288    #[test]
2289    fn test_card_consecutive_blocks() {
2290        let markdown = r#"---
2291CARD: a
2292id: 1
2293---
2294---
2295CARD: a
2296id: 2
2297---"#;
2298        let doc = decompose(markdown).unwrap();
2299        let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2300        assert_eq!(items.len(), 2);
2301    }
2302
2303    #[test]
2304    fn test_card_with_body_containing_dashes() {
2305        let markdown = r#"---
2306CARD: items
2307name: Item
2308---
2309
2310Some text with --- dashes in it."#;
2311        let doc = decompose(markdown).unwrap();
2312        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2313        let item = items[0].as_object().unwrap();
2314        let body = item.get("body").unwrap().as_str().unwrap();
2315        assert!(body.contains("--- dashes"));
2316    }
2317
2318    // QUILL directive edge cases
2319
2320    #[test]
2321    fn test_quill_with_underscore_prefix() {
2322        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2323        let doc = decompose(markdown).unwrap();
2324        assert_eq!(doc.quill_tag(), "_internal");
2325    }
2326
2327    #[test]
2328    fn test_quill_with_numbers() {
2329        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2330        let doc = decompose(markdown).unwrap();
2331        assert_eq!(doc.quill_tag(), "form_8_v2");
2332    }
2333
2334    #[test]
2335    fn test_quill_with_additional_fields() {
2336        let markdown = r#"---
2337QUILL: my_quill
2338title: Document Title
2339author: John Doe
2340---
2341
2342Body content."#;
2343        let doc = decompose(markdown).unwrap();
2344        assert_eq!(doc.quill_tag(), "my_quill");
2345        assert_eq!(
2346            doc.get_field("title").unwrap().as_str().unwrap(),
2347            "Document Title"
2348        );
2349        assert_eq!(
2350            doc.get_field("author").unwrap().as_str().unwrap(),
2351            "John Doe"
2352        );
2353    }
2354
2355    // Error handling
2356
2357    #[test]
2358    fn test_invalid_scope_name_uppercase() {
2359        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2360        let result = decompose(markdown);
2361        assert!(result.is_err());
2362        assert!(result
2363            .unwrap_err()
2364            .to_string()
2365            .contains("Invalid field name"));
2366    }
2367
2368    #[test]
2369    fn test_invalid_scope_name_starts_with_number() {
2370        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2371        let result = decompose(markdown);
2372        assert!(result.is_err());
2373    }
2374
2375    #[test]
2376    fn test_invalid_scope_name_with_hyphen() {
2377        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2378        let result = decompose(markdown);
2379        assert!(result.is_err());
2380    }
2381
2382    #[test]
2383    fn test_invalid_quill_name_uppercase() {
2384        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2385        let result = decompose(markdown);
2386        assert!(result.is_err());
2387    }
2388
2389    #[test]
2390    fn test_yaml_syntax_error_missing_colon() {
2391        let markdown = "---\ntitle Test\n---\n\nBody.";
2392        let result = decompose(markdown);
2393        assert!(result.is_err());
2394    }
2395
2396    #[test]
2397    fn test_yaml_syntax_error_bad_indentation() {
2398        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2399        let result = decompose(markdown);
2400        // Bad indentation may or may not be an error depending on YAML parser
2401        // Just ensure it doesn't panic
2402        let _ = result;
2403    }
2404
2405    // Body extraction edge cases
2406
2407    #[test]
2408    fn test_body_with_leading_newlines() {
2409        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2410        let doc = decompose(markdown).unwrap();
2411        // Body should preserve leading newlines after frontmatter
2412        assert!(doc.body().unwrap().starts_with('\n'));
2413    }
2414
2415    #[test]
2416    fn test_body_with_trailing_newlines() {
2417        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2418        let doc = decompose(markdown).unwrap();
2419        // Body should preserve trailing newlines
2420        assert!(doc.body().unwrap().ends_with('\n'));
2421    }
2422
2423    #[test]
2424    fn test_no_body_after_frontmatter() {
2425        let markdown = "---\ntitle: Test\n---";
2426        let doc = decompose(markdown).unwrap();
2427        assert_eq!(doc.body(), Some(""));
2428    }
2429
2430    // Tag name validation
2431
2432    #[test]
2433    fn test_valid_tag_name_single_underscore() {
2434        assert!(is_valid_tag_name("_"));
2435    }
2436
2437    #[test]
2438    fn test_valid_tag_name_underscore_prefix() {
2439        assert!(is_valid_tag_name("_private"));
2440    }
2441
2442    #[test]
2443    fn test_valid_tag_name_with_numbers() {
2444        assert!(is_valid_tag_name("item1"));
2445        assert!(is_valid_tag_name("item_2"));
2446    }
2447
2448    #[test]
2449    fn test_invalid_tag_name_empty() {
2450        assert!(!is_valid_tag_name(""));
2451    }
2452
2453    #[test]
2454    fn test_invalid_tag_name_starts_with_number() {
2455        assert!(!is_valid_tag_name("1item"));
2456    }
2457
2458    #[test]
2459    fn test_invalid_tag_name_uppercase() {
2460        assert!(!is_valid_tag_name("Items"));
2461        assert!(!is_valid_tag_name("ITEMS"));
2462    }
2463
2464    #[test]
2465    fn test_invalid_tag_name_special_chars() {
2466        assert!(!is_valid_tag_name("my-items"));
2467        assert!(!is_valid_tag_name("my.items"));
2468        assert!(!is_valid_tag_name("my items"));
2469    }
2470
2471    // Guillemet preprocessing in YAML
2472
2473    #[test]
2474    fn test_guillemet_in_yaml_preserves_non_strings() {
2475        let markdown = r#"---
2476count: 42
2477price: 19.99
2478active: true
2479items:
2480  - first
2481  - 100
2482  - true
2483---
2484
2485Body."#;
2486        let doc = decompose(markdown).unwrap();
2487        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2488        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2489        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2490    }
2491
2492    #[test]
2493    fn test_guillemet_double_conversion_prevention() {
2494        // Ensure «» in input doesn't get double-processed
2495        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2496        let doc = decompose(markdown).unwrap();
2497        // Should remain as-is (not double-escaped)
2498        assert_eq!(
2499            doc.get_field("title").unwrap().as_str().unwrap(),
2500            "Already «converted»"
2501        );
2502    }
2503}