quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(crate::error::ParseError::from)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from CARD key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = search_str
205            .find("---\n")
206            .map(|p| (p, 4, "\n"))
207            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210            let abs_pos = pos + delimiter_pos;
211
212            // Check if the delimiter is at the start of a line
213            let is_start_of_line = if abs_pos == 0 {
214                true
215            } else {
216                let char_before = markdown.as_bytes()[abs_pos - 1];
217                char_before == b'\n' || char_before == b'\r'
218            };
219
220            if !is_start_of_line {
221                pos = abs_pos + 1;
222                continue;
223            }
224
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_value) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(parsed_yaml) => {
312                            if let Some(mapping) = parsed_yaml.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let card_key = serde_yaml::Value::String("CARD".to_string());
315
316                                let has_quill = mapping.contains_key(&quill_key);
317                                let has_card = mapping.contains_key(&card_key);
318
319                                if has_quill && has_card {
320                                    return Err(
321                                        "Cannot specify both QUILL and CARD in the same block"
322                                            .into(),
323                                    );
324                                }
325
326                                if has_quill {
327                                    // Extract quill name
328                                    let quill_value = mapping.get(&quill_key).unwrap();
329                                    let quill_name_str = quill_value
330                                        .as_str()
331                                        .ok_or("QUILL value must be a string")?;
332
333                                    if !is_valid_tag_name(quill_name_str) {
334                                        return Err(format!(
335                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336                                            quill_name_str
337                                        )
338                                        .into());
339                                    }
340
341                                    // Remove QUILL from the YAML value for processing
342                                    let mut new_mapping = mapping.clone();
343                                    new_mapping.remove(&quill_key);
344                                    let new_value = if new_mapping.is_empty() {
345                                        None
346                                    } else {
347                                        Some(serde_yaml::Value::Mapping(new_mapping))
348                                    };
349
350                                    (None, Some(quill_name_str.to_string()), new_value)
351                                } else if has_card {
352                                    // Extract scope field name
353                                    let card_value = mapping.get(&card_key).unwrap();
354                                    let field_name =
355                                        card_value.as_str().ok_or("CARD value must be a string")?;
356
357                                    if !is_valid_tag_name(field_name) {
358                                        return Err(format!(
359                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
360                                            field_name
361                                        )
362                                        .into());
363                                    }
364
365                                    if field_name == BODY_FIELD {
366                                        return Err(format!(
367                                            "Cannot use reserved field name '{}' as CARD value",
368                                            BODY_FIELD
369                                        )
370                                        .into());
371                                    }
372
373                                    // Remove CARD from the YAML value for processing
374                                    let mut new_mapping = mapping.clone();
375                                    new_mapping.remove(&card_key);
376                                    let new_value = if new_mapping.is_empty() {
377                                        None
378                                    } else {
379                                        Some(serde_yaml::Value::Mapping(new_mapping))
380                                    };
381
382                                    (Some(field_name.to_string()), None, new_value)
383                                } else {
384                                    // No reserved keys, keep the parsed YAML
385                                    (None, None, Some(parsed_yaml))
386                                }
387                            } else {
388                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
389                                (None, None, Some(parsed_yaml))
390                            }
391                        }
392                        Err(e) => {
393                            // YAML parsing failed - return error with context
394                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
395                        }
396                    }
397                } else {
398                    // Empty content
399                    (None, None, None)
400                };
401
402                blocks.push(MetadataBlock {
403                    start: abs_pos,
404                    end: abs_closing_pos + closing_len, // After closing delimiter
405                    yaml_value,
406                    tag,
407                    quill_name,
408                });
409
410                pos = abs_closing_pos + closing_len;
411            } else if abs_pos == 0 {
412                // Frontmatter started but not closed
413                return Err("Frontmatter started but not closed with ---".into());
414            } else {
415                // Not a valid metadata block, skip this position
416                pos = abs_pos + 3;
417            }
418        } else {
419            break;
420        }
421    }
422
423    Ok(blocks)
424}
425
426/// Decompose markdown into frontmatter fields and body
427fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
428    // Check input size limit
429    if markdown.len() > crate::error::MAX_INPUT_SIZE {
430        return Err(format!(
431            "Input too large: {} bytes (max: {} bytes)",
432            markdown.len(),
433            crate::error::MAX_INPUT_SIZE
434        )
435        .into());
436    }
437
438    let mut fields = HashMap::new();
439
440    // Find all metadata blocks
441    let blocks = find_metadata_blocks(markdown)?;
442
443    if blocks.is_empty() {
444        // No metadata blocks, entire content is body
445        fields.insert(
446            BODY_FIELD.to_string(),
447            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
448        );
449        return Ok(ParsedDocument::new(fields));
450    }
451
452    // Track which attributes are used for tagged blocks
453    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
454    let mut has_global_frontmatter = false;
455    let mut global_frontmatter_index: Option<usize> = None;
456    let mut quill_name: Option<String> = None;
457
458    // First pass: identify global frontmatter, quill directive, and validate
459    for (idx, block) in blocks.iter().enumerate() {
460        // Check for quill directive
461        if let Some(ref name) = block.quill_name {
462            if quill_name.is_some() {
463                return Err("Multiple quill directives found: only one allowed".into());
464            }
465            quill_name = Some(name.clone());
466        }
467
468        // Check for global frontmatter (no tag and no quill directive)
469        if block.tag.is_none() && block.quill_name.is_none() {
470            if has_global_frontmatter {
471                return Err(
472                    "Multiple global frontmatter blocks found: only one untagged block allowed"
473                        .into(),
474                );
475            }
476            has_global_frontmatter = true;
477            global_frontmatter_index = Some(idx);
478        }
479    }
480
481    // Parse global frontmatter if present
482    if let Some(idx) = global_frontmatter_index {
483        let block = &blocks[idx];
484
485        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
486        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
487            Some(serde_yaml::Value::Mapping(mapping)) => mapping
488                .iter()
489                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
490                .collect(),
491            Some(serde_yaml::Value::Null) => {
492                // Null value (from whitespace-only YAML) - treat as empty mapping
493                HashMap::new()
494            }
495            Some(_) => {
496                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
497                return Err("Invalid YAML frontmatter: expected a mapping".into());
498            }
499            None => HashMap::new(),
500        };
501
502        // Check that all tagged blocks don't conflict with global fields
503        // Exception: if the global field is an array, allow it (we'll merge later)
504        for other_block in &blocks {
505            if let Some(ref tag) = other_block.tag {
506                if let Some(global_value) = yaml_fields.get(tag) {
507                    // Check if the global value is an array
508                    if global_value.as_sequence().is_none() {
509                        return Err(format!(
510                            "Name collision: global field '{}' conflicts with tagged attribute",
511                            tag
512                        )
513                        .into());
514                    }
515                }
516            }
517        }
518
519        // Convert YAML values to QuillValue at boundary
520        for (key, value) in yaml_fields {
521            fields.insert(key, QuillValue::from_yaml(value)?);
522        }
523    }
524
525    // Process blocks with quill directives
526    for block in &blocks {
527        if block.quill_name.is_some() {
528            // Quill directive blocks can have YAML content (becomes part of frontmatter)
529            if let Some(ref yaml_val) = block.yaml_value {
530                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
531                    serde_yaml::Value::Mapping(mapping) => mapping
532                        .iter()
533                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
534                        .collect(),
535                    serde_yaml::Value::Null => {
536                        // Null value (from whitespace-only YAML) - treat as empty mapping
537                        HashMap::new()
538                    }
539                    _ => {
540                        return Err("Invalid YAML in quill block: expected a mapping".into());
541                    }
542                };
543
544                // Check for conflicts with existing fields
545                for key in yaml_fields.keys() {
546                    if fields.contains_key(key) {
547                        return Err(format!(
548                            "Name collision: quill block field '{}' conflicts with existing field",
549                            key
550                        )
551                        .into());
552                    }
553                }
554
555                // Convert YAML values to QuillValue at boundary
556                for (key, value) in yaml_fields {
557                    fields.insert(key, QuillValue::from_yaml(value)?);
558                }
559            }
560        }
561    }
562
563    // Parse tagged blocks
564    for (idx, block) in blocks.iter().enumerate() {
565        if let Some(ref tag_name) = block.tag {
566            // Check if this conflicts with global fields
567            // Exception: if the global field is an array, allow it (we'll merge later)
568            if let Some(existing_value) = fields.get(tag_name) {
569                if existing_value.as_array().is_none() {
570                    return Err(format!(
571                        "Name collision: tagged attribute '{}' conflicts with global field",
572                        tag_name
573                    )
574                    .into());
575                }
576            }
577
578            // Get YAML metadata directly (already parsed in find_metadata_blocks)
579            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
580                Some(serde_yaml::Value::Mapping(mapping)) => mapping
581                    .iter()
582                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
583                    .collect(),
584                Some(serde_yaml::Value::Null) => {
585                    // Null value (from whitespace-only YAML) - treat as empty mapping
586                    HashMap::new()
587                }
588                Some(_) => {
589                    return Err(format!(
590                        "Invalid YAML in tagged block '{}': expected a mapping",
591                        tag_name
592                    )
593                    .into());
594                }
595                None => HashMap::new(),
596            };
597
598            // Extract body for this tagged block
599            let body_start = block.end;
600            let body_end = if idx + 1 < blocks.len() {
601                blocks[idx + 1].start
602            } else {
603                markdown.len()
604            };
605            let body = &markdown[body_start..body_end];
606
607            // Add body to item fields
608            item_fields.insert(
609                BODY_FIELD.to_string(),
610                serde_yaml::Value::String(body.to_string()),
611            );
612
613            // Convert HashMap to serde_yaml::Value::Mapping
614            let item_value = serde_yaml::to_value(item_fields)?;
615
616            // Add to collection
617            tagged_attributes
618                .entry(tag_name.clone())
619                .or_default()
620                .push(item_value);
621        }
622    }
623
624    // Extract global body
625    // Body starts after global frontmatter or quill block (whichever comes first)
626    // Body ends at the first card block or EOF
627    let first_non_card_block_idx = blocks
628        .iter()
629        .position(|b| b.tag.is_none() && b.quill_name.is_none())
630        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
631
632    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
633        // Body starts after the first non-card block (global frontmatter or quill)
634        let start = blocks[idx].end;
635
636        // Body ends at the first card block after this, or EOF
637        let end = blocks
638            .iter()
639            .skip(idx + 1)
640            .find(|b| b.tag.is_some())
641            .map(|b| b.start)
642            .unwrap_or(markdown.len());
643
644        (start, end)
645    } else {
646        // No global frontmatter or quill block - body is everything before the first card block
647        let end = blocks
648            .iter()
649            .find(|b| b.tag.is_some())
650            .map(|b| b.start)
651            .unwrap_or(0);
652
653        (0, end)
654    };
655
656    let global_body = &markdown[body_start..body_end];
657
658    fields.insert(
659        BODY_FIELD.to_string(),
660        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
661    );
662
663    // Add all tagged collections to fields (convert to QuillValue)
664    // If a field already exists and is an array, merge the new items into it
665    for (tag_name, items) in tagged_attributes {
666        if let Some(existing_value) = fields.get(&tag_name) {
667            // The existing value must be an array (checked earlier)
668            if let Some(existing_array) = existing_value.as_array() {
669                // Convert new items from YAML to JSON
670                let new_items_json: Vec<serde_json::Value> = items
671                    .into_iter()
672                    .map(|yaml_val| {
673                        serde_json::to_value(&yaml_val)
674                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
675                    })
676                    .collect::<Result<Vec<_>, _>>()?;
677
678                // Combine existing and new items
679                let mut merged_array = existing_array.clone();
680                merged_array.extend(new_items_json);
681
682                // Create QuillValue from merged JSON array
683                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
684                fields.insert(tag_name, quill_value);
685            } else {
686                // This should not happen due to earlier validation, but handle it gracefully
687                return Err(format!(
688                    "Internal error: field '{}' exists but is not an array",
689                    tag_name
690                )
691                .into());
692            }
693        } else {
694            // No existing field, just create a new sequence
695            // Note: guillemets in items were already preprocessed when the items were created
696            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
697            fields.insert(tag_name, quill_value);
698        }
699    }
700
701    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
702    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
703
704    Ok(parsed)
705}
706
707#[cfg(test)]
708mod tests {
709    use super::*;
710
711    #[test]
712    fn test_no_frontmatter() {
713        let markdown = "# Hello World\n\nThis is a test.";
714        let doc = decompose(markdown).unwrap();
715
716        assert_eq!(doc.body(), Some(markdown));
717        assert_eq!(doc.fields().len(), 1);
718        // Verify default quill tag is set
719        assert_eq!(doc.quill_tag(), "__default__");
720    }
721
722    #[test]
723    fn test_with_frontmatter() {
724        let markdown = r#"---
725title: Test Document
726author: Test Author
727---
728
729# Hello World
730
731This is the body."#;
732
733        let doc = decompose(markdown).unwrap();
734
735        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
736        assert_eq!(
737            doc.get_field("title").unwrap().as_str().unwrap(),
738            "Test Document"
739        );
740        assert_eq!(
741            doc.get_field("author").unwrap().as_str().unwrap(),
742            "Test Author"
743        );
744        assert_eq!(doc.fields().len(), 3); // title, author, body
745                                           // Verify default quill tag is set when no QUILL directive
746        assert_eq!(doc.quill_tag(), "__default__");
747    }
748
749    #[test]
750    fn test_complex_yaml_frontmatter() {
751        let markdown = r#"---
752title: Complex Document
753tags:
754  - test
755  - yaml
756metadata:
757  version: 1.0
758  nested:
759    field: value
760---
761
762Content here."#;
763
764        let doc = decompose(markdown).unwrap();
765
766        assert_eq!(doc.body(), Some("\nContent here."));
767        assert_eq!(
768            doc.get_field("title").unwrap().as_str().unwrap(),
769            "Complex Document"
770        );
771
772        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
773        assert_eq!(tags.len(), 2);
774        assert_eq!(tags[0].as_str().unwrap(), "test");
775        assert_eq!(tags[1].as_str().unwrap(), "yaml");
776    }
777
778    #[test]
779    fn test_with_defaults_empty_document() {
780        use std::collections::HashMap;
781
782        let mut defaults = HashMap::new();
783        defaults.insert(
784            "status".to_string(),
785            QuillValue::from_json(serde_json::json!("draft")),
786        );
787        defaults.insert(
788            "version".to_string(),
789            QuillValue::from_json(serde_json::json!(1)),
790        );
791
792        // Create an empty parsed document
793        let doc = ParsedDocument::new(HashMap::new());
794        let doc_with_defaults = doc.with_defaults(&defaults);
795
796        // Check that defaults were applied
797        assert_eq!(
798            doc_with_defaults
799                .get_field("status")
800                .unwrap()
801                .as_str()
802                .unwrap(),
803            "draft"
804        );
805        assert_eq!(
806            doc_with_defaults
807                .get_field("version")
808                .unwrap()
809                .as_number()
810                .unwrap()
811                .as_i64()
812                .unwrap(),
813            1
814        );
815    }
816
817    #[test]
818    fn test_with_defaults_preserves_existing_values() {
819        use std::collections::HashMap;
820
821        let mut defaults = HashMap::new();
822        defaults.insert(
823            "status".to_string(),
824            QuillValue::from_json(serde_json::json!("draft")),
825        );
826
827        // Create document with existing status
828        let mut fields = HashMap::new();
829        fields.insert(
830            "status".to_string(),
831            QuillValue::from_json(serde_json::json!("published")),
832        );
833        let doc = ParsedDocument::new(fields);
834
835        let doc_with_defaults = doc.with_defaults(&defaults);
836
837        // Existing value should be preserved
838        assert_eq!(
839            doc_with_defaults
840                .get_field("status")
841                .unwrap()
842                .as_str()
843                .unwrap(),
844            "published"
845        );
846    }
847
848    #[test]
849    fn test_with_defaults_partial_application() {
850        use std::collections::HashMap;
851
852        let mut defaults = HashMap::new();
853        defaults.insert(
854            "status".to_string(),
855            QuillValue::from_json(serde_json::json!("draft")),
856        );
857        defaults.insert(
858            "version".to_string(),
859            QuillValue::from_json(serde_json::json!(1)),
860        );
861
862        // Create document with only one field
863        let mut fields = HashMap::new();
864        fields.insert(
865            "status".to_string(),
866            QuillValue::from_json(serde_json::json!("published")),
867        );
868        let doc = ParsedDocument::new(fields);
869
870        let doc_with_defaults = doc.with_defaults(&defaults);
871
872        // Existing field preserved, missing field gets default
873        assert_eq!(
874            doc_with_defaults
875                .get_field("status")
876                .unwrap()
877                .as_str()
878                .unwrap(),
879            "published"
880        );
881        assert_eq!(
882            doc_with_defaults
883                .get_field("version")
884                .unwrap()
885                .as_number()
886                .unwrap()
887                .as_i64()
888                .unwrap(),
889            1
890        );
891    }
892
893    #[test]
894    fn test_with_defaults_no_defaults() {
895        use std::collections::HashMap;
896
897        let defaults = HashMap::new(); // Empty defaults map
898
899        let doc = ParsedDocument::new(HashMap::new());
900        let doc_with_defaults = doc.with_defaults(&defaults);
901
902        // No defaults should be applied
903        assert!(doc_with_defaults.fields().is_empty());
904    }
905
906    #[test]
907    fn test_with_defaults_complex_types() {
908        use std::collections::HashMap;
909
910        let mut defaults = HashMap::new();
911        defaults.insert(
912            "tags".to_string(),
913            QuillValue::from_json(serde_json::json!(["default", "tag"])),
914        );
915
916        let doc = ParsedDocument::new(HashMap::new());
917        let doc_with_defaults = doc.with_defaults(&defaults);
918
919        // Complex default value should be applied
920        let tags = doc_with_defaults
921            .get_field("tags")
922            .unwrap()
923            .as_sequence()
924            .unwrap();
925        assert_eq!(tags.len(), 2);
926        assert_eq!(tags[0].as_str().unwrap(), "default");
927        assert_eq!(tags[1].as_str().unwrap(), "tag");
928    }
929
930    #[test]
931    fn test_with_coercion_singular_to_array() {
932        use std::collections::HashMap;
933
934        let schema = QuillValue::from_json(serde_json::json!({
935            "$schema": "https://json-schema.org/draft/2019-09/schema",
936            "type": "object",
937            "properties": {
938                "tags": {"type": "array"}
939            }
940        }));
941
942        let mut fields = HashMap::new();
943        fields.insert(
944            "tags".to_string(),
945            QuillValue::from_json(serde_json::json!("single-tag")),
946        );
947        let doc = ParsedDocument::new(fields);
948
949        let coerced_doc = doc.with_coercion(&schema);
950
951        let tags = coerced_doc.get_field("tags").unwrap();
952        assert!(tags.as_array().is_some());
953        let tags_array = tags.as_array().unwrap();
954        assert_eq!(tags_array.len(), 1);
955        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
956    }
957
958    #[test]
959    fn test_with_coercion_string_to_boolean() {
960        use std::collections::HashMap;
961
962        let schema = QuillValue::from_json(serde_json::json!({
963            "$schema": "https://json-schema.org/draft/2019-09/schema",
964            "type": "object",
965            "properties": {
966                "active": {"type": "boolean"}
967            }
968        }));
969
970        let mut fields = HashMap::new();
971        fields.insert(
972            "active".to_string(),
973            QuillValue::from_json(serde_json::json!("true")),
974        );
975        let doc = ParsedDocument::new(fields);
976
977        let coerced_doc = doc.with_coercion(&schema);
978
979        assert_eq!(
980            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
981            true
982        );
983    }
984
985    #[test]
986    fn test_with_coercion_string_to_number() {
987        use std::collections::HashMap;
988
989        let schema = QuillValue::from_json(serde_json::json!({
990            "$schema": "https://json-schema.org/draft/2019-09/schema",
991            "type": "object",
992            "properties": {
993                "count": {"type": "number"}
994            }
995        }));
996
997        let mut fields = HashMap::new();
998        fields.insert(
999            "count".to_string(),
1000            QuillValue::from_json(serde_json::json!("42")),
1001        );
1002        let doc = ParsedDocument::new(fields);
1003
1004        let coerced_doc = doc.with_coercion(&schema);
1005
1006        assert_eq!(
1007            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1008            42
1009        );
1010    }
1011
1012    #[test]
1013    fn test_invalid_yaml() {
1014        let markdown = r#"---
1015title: [invalid yaml
1016author: missing close bracket
1017---
1018
1019Content here."#;
1020
1021        let result = decompose(markdown);
1022        assert!(result.is_err());
1023        assert!(result
1024            .unwrap_err()
1025            .to_string()
1026            .contains("Invalid YAML frontmatter"));
1027    }
1028
1029    #[test]
1030    fn test_unclosed_frontmatter() {
1031        let markdown = r#"---
1032title: Test
1033author: Test Author
1034
1035Content without closing ---"#;
1036
1037        let result = decompose(markdown);
1038        assert!(result.is_err());
1039        assert!(result.unwrap_err().to_string().contains("not closed"));
1040    }
1041
1042    // Extended metadata tests
1043
1044    #[test]
1045    fn test_basic_tagged_block() {
1046        let markdown = r#"---
1047title: Main Document
1048---
1049
1050Main body content.
1051
1052---
1053CARD: items
1054name: Item 1
1055---
1056
1057Body of item 1."#;
1058
1059        let doc = decompose(markdown).unwrap();
1060
1061        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1062        assert_eq!(
1063            doc.get_field("title").unwrap().as_str().unwrap(),
1064            "Main Document"
1065        );
1066
1067        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1068        assert_eq!(items.len(), 1);
1069
1070        let item = items[0].as_object().unwrap();
1071        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1072        assert_eq!(
1073            item.get("body").unwrap().as_str().unwrap(),
1074            "\nBody of item 1."
1075        );
1076    }
1077
1078    #[test]
1079    fn test_multiple_tagged_blocks() {
1080        let markdown = r#"---
1081CARD: items
1082name: Item 1
1083tags: [a, b]
1084---
1085
1086First item body.
1087
1088---
1089CARD: items
1090name: Item 2
1091tags: [c, d]
1092---
1093
1094Second item body."#;
1095
1096        let doc = decompose(markdown).unwrap();
1097
1098        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1099        assert_eq!(items.len(), 2);
1100
1101        let item1 = items[0].as_object().unwrap();
1102        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1103
1104        let item2 = items[1].as_object().unwrap();
1105        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1106    }
1107
1108    #[test]
1109    fn test_mixed_global_and_tagged() {
1110        let markdown = r#"---
1111title: Global
1112author: John Doe
1113---
1114
1115Global body.
1116
1117---
1118CARD: sections
1119title: Section 1
1120---
1121
1122Section 1 content.
1123
1124---
1125CARD: sections
1126title: Section 2
1127---
1128
1129Section 2 content."#;
1130
1131        let doc = decompose(markdown).unwrap();
1132
1133        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1134        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1135
1136        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1137        assert_eq!(sections.len(), 2);
1138    }
1139
1140    #[test]
1141    fn test_empty_tagged_metadata() {
1142        let markdown = r#"---
1143CARD: items
1144---
1145
1146Body without metadata."#;
1147
1148        let doc = decompose(markdown).unwrap();
1149
1150        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1151        assert_eq!(items.len(), 1);
1152
1153        let item = items[0].as_object().unwrap();
1154        assert_eq!(
1155            item.get("body").unwrap().as_str().unwrap(),
1156            "\nBody without metadata."
1157        );
1158    }
1159
1160    #[test]
1161    fn test_tagged_block_without_body() {
1162        let markdown = r#"---
1163CARD: items
1164name: Item
1165---"#;
1166
1167        let doc = decompose(markdown).unwrap();
1168
1169        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1170        assert_eq!(items.len(), 1);
1171
1172        let item = items[0].as_object().unwrap();
1173        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1174    }
1175
1176    #[test]
1177    fn test_name_collision_global_and_tagged() {
1178        let markdown = r#"---
1179items: "global value"
1180---
1181
1182Body
1183
1184---
1185CARD: items
1186name: Item
1187---
1188
1189Item body"#;
1190
1191        let result = decompose(markdown);
1192        assert!(result.is_err());
1193        assert!(result.unwrap_err().to_string().contains("collision"));
1194    }
1195
1196    #[test]
1197    fn test_global_array_merged_with_card() {
1198        // When global frontmatter has an array field with the same name as a SCOPE,
1199        // the CARD items should be added to the array
1200        let markdown = r#"---
1201items:
1202  - name: Global Item 1
1203    value: 100
1204  - name: Global Item 2
1205    value: 200
1206---
1207
1208Global body
1209
1210---
1211CARD: items
1212name: Scope Item 1
1213value: 300
1214---
1215
1216Scope item 1 body
1217
1218---
1219CARD: items
1220name: Scope Item 2
1221value: 400
1222---
1223
1224Scope item 2 body"#;
1225
1226        let doc = decompose(markdown).unwrap();
1227
1228        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1229        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1230        assert_eq!(items.len(), 4);
1231
1232        // Verify first two items (from global array)
1233        let item1 = items[0].as_object().unwrap();
1234        assert_eq!(
1235            item1.get("name").unwrap().as_str().unwrap(),
1236            "Global Item 1"
1237        );
1238        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1239
1240        let item2 = items[1].as_object().unwrap();
1241        assert_eq!(
1242            item2.get("name").unwrap().as_str().unwrap(),
1243            "Global Item 2"
1244        );
1245        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1246
1247        // Verify last two items (from CARD blocks)
1248        let item3 = items[2].as_object().unwrap();
1249        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1250        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1251        assert_eq!(
1252            item3.get("body").unwrap().as_str().unwrap(),
1253            "\nScope item 1 body\n\n"
1254        );
1255
1256        let item4 = items[3].as_object().unwrap();
1257        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1258        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1259        assert_eq!(
1260            item4.get("body").unwrap().as_str().unwrap(),
1261            "\nScope item 2 body"
1262        );
1263    }
1264
1265    #[test]
1266    fn test_empty_global_array_with_card() {
1267        // Edge case: global frontmatter has an empty array
1268        let markdown = r#"---
1269items: []
1270---
1271
1272Global body
1273
1274---
1275CARD: items
1276name: Item 1
1277---
1278
1279Item 1 body"#;
1280
1281        let doc = decompose(markdown).unwrap();
1282
1283        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1284        assert_eq!(items.len(), 1);
1285
1286        let item = items[0].as_object().unwrap();
1287        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1288    }
1289
1290    #[test]
1291    fn test_reserved_field_name() {
1292        let markdown = r#"---
1293CARD: body
1294content: Test
1295---"#;
1296
1297        let result = decompose(markdown);
1298        assert!(result.is_err());
1299        assert!(result.unwrap_err().to_string().contains("reserved"));
1300    }
1301
1302    #[test]
1303    fn test_invalid_tag_syntax() {
1304        let markdown = r#"---
1305CARD: Invalid-Name
1306title: Test
1307---"#;
1308
1309        let result = decompose(markdown);
1310        assert!(result.is_err());
1311        assert!(result
1312            .unwrap_err()
1313            .to_string()
1314            .contains("Invalid field name"));
1315    }
1316
1317    #[test]
1318    fn test_multiple_global_frontmatter_blocks() {
1319        let markdown = r#"---
1320title: First
1321---
1322
1323Body
1324
1325---
1326author: Second
1327---
1328
1329More body"#;
1330
1331        let result = decompose(markdown);
1332        assert!(result.is_err());
1333        assert!(result
1334            .unwrap_err()
1335            .to_string()
1336            .contains("Multiple global frontmatter"));
1337    }
1338
1339    #[test]
1340    fn test_adjacent_blocks_different_tags() {
1341        let markdown = r#"---
1342CARD: items
1343name: Item 1
1344---
1345
1346Item 1 body
1347
1348---
1349CARD: sections
1350title: Section 1
1351---
1352
1353Section 1 body"#;
1354
1355        let doc = decompose(markdown).unwrap();
1356
1357        assert!(doc.get_field("items").is_some());
1358        assert!(doc.get_field("sections").is_some());
1359
1360        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1361        assert_eq!(items.len(), 1);
1362
1363        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1364        assert_eq!(sections.len(), 1);
1365    }
1366
1367    #[test]
1368    fn test_order_preservation() {
1369        let markdown = r#"---
1370CARD: items
1371id: 1
1372---
1373
1374First
1375
1376---
1377CARD: items
1378id: 2
1379---
1380
1381Second
1382
1383---
1384CARD: items
1385id: 3
1386---
1387
1388Third"#;
1389
1390        let doc = decompose(markdown).unwrap();
1391
1392        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1393        assert_eq!(items.len(), 3);
1394
1395        for (i, item) in items.iter().enumerate() {
1396            let mapping = item.as_object().unwrap();
1397            let id = mapping.get("id").unwrap().as_i64().unwrap();
1398            assert_eq!(id, (i + 1) as i64);
1399        }
1400    }
1401
1402    #[test]
1403    fn test_product_catalog_integration() {
1404        let markdown = r#"---
1405title: Product Catalog
1406author: John Doe
1407date: 2024-01-01
1408---
1409
1410This is the main catalog description.
1411
1412---
1413CARD: products
1414name: Widget A
1415price: 19.99
1416sku: WID-001
1417---
1418
1419The **Widget A** is our most popular product.
1420
1421---
1422CARD: products
1423name: Gadget B
1424price: 29.99
1425sku: GAD-002
1426---
1427
1428The **Gadget B** is perfect for professionals.
1429
1430---
1431CARD: reviews
1432product: Widget A
1433rating: 5
1434---
1435
1436"Excellent product! Highly recommended."
1437
1438---
1439CARD: reviews
1440product: Gadget B
1441rating: 4
1442---
1443
1444"Very good, but a bit pricey.""#;
1445
1446        let doc = decompose(markdown).unwrap();
1447
1448        // Verify global fields
1449        assert_eq!(
1450            doc.get_field("title").unwrap().as_str().unwrap(),
1451            "Product Catalog"
1452        );
1453        assert_eq!(
1454            doc.get_field("author").unwrap().as_str().unwrap(),
1455            "John Doe"
1456        );
1457        assert_eq!(
1458            doc.get_field("date").unwrap().as_str().unwrap(),
1459            "2024-01-01"
1460        );
1461
1462        // Verify global body
1463        assert!(doc.body().unwrap().contains("main catalog description"));
1464
1465        // Verify products collection
1466        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1467        assert_eq!(products.len(), 2);
1468
1469        let product1 = products[0].as_object().unwrap();
1470        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1471        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1472
1473        // Verify reviews collection
1474        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1475        assert_eq!(reviews.len(), 2);
1476
1477        let review1 = reviews[0].as_object().unwrap();
1478        assert_eq!(
1479            review1.get("product").unwrap().as_str().unwrap(),
1480            "Widget A"
1481        );
1482        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1483
1484        // Total fields: title, author, date, body, products, reviews = 6
1485        assert_eq!(doc.fields().len(), 6);
1486    }
1487
1488    #[test]
1489    fn taro_quill_directive() {
1490        let markdown = r#"---
1491QUILL: usaf_memo
1492memo_for: [ORG/SYMBOL]
1493memo_from: [ORG/SYMBOL]
1494---
1495
1496This is the memo body."#;
1497
1498        let doc = decompose(markdown).unwrap();
1499
1500        // Verify quill tag is set
1501        assert_eq!(doc.quill_tag(), "usaf_memo");
1502
1503        // Verify fields from quill block become frontmatter
1504        assert_eq!(
1505            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1506                .as_str()
1507                .unwrap(),
1508            "ORG/SYMBOL"
1509        );
1510
1511        // Verify body
1512        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1513    }
1514
1515    #[test]
1516    fn test_quill_with_card_blocks() {
1517        let markdown = r#"---
1518QUILL: document
1519title: Test Document
1520---
1521
1522Main body.
1523
1524---
1525CARD: sections
1526name: Section 1
1527---
1528
1529Section 1 body."#;
1530
1531        let doc = decompose(markdown).unwrap();
1532
1533        // Verify quill tag
1534        assert_eq!(doc.quill_tag(), "document");
1535
1536        // Verify global field from quill block
1537        assert_eq!(
1538            doc.get_field("title").unwrap().as_str().unwrap(),
1539            "Test Document"
1540        );
1541
1542        // Verify card blocks work
1543        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1544        assert_eq!(sections.len(), 1);
1545
1546        // Verify body
1547        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1548    }
1549
1550    #[test]
1551    fn test_multiple_quill_directives_error() {
1552        let markdown = r#"---
1553QUILL: first
1554---
1555
1556---
1557QUILL: second
1558---"#;
1559
1560        let result = decompose(markdown);
1561        assert!(result.is_err());
1562        assert!(result
1563            .unwrap_err()
1564            .to_string()
1565            .contains("Multiple quill directives"));
1566    }
1567
1568    #[test]
1569    fn test_invalid_quill_name() {
1570        let markdown = r#"---
1571QUILL: Invalid-Name
1572---"#;
1573
1574        let result = decompose(markdown);
1575        assert!(result.is_err());
1576        assert!(result
1577            .unwrap_err()
1578            .to_string()
1579            .contains("Invalid quill name"));
1580    }
1581
1582    #[test]
1583    fn test_quill_wrong_value_type() {
1584        let markdown = r#"---
1585QUILL: 123
1586---"#;
1587
1588        let result = decompose(markdown);
1589        assert!(result.is_err());
1590        assert!(result
1591            .unwrap_err()
1592            .to_string()
1593            .contains("QUILL value must be a string"));
1594    }
1595
1596    #[test]
1597    fn test_card_wrong_value_type() {
1598        let markdown = r#"---
1599CARD: 123
1600---"#;
1601
1602        let result = decompose(markdown);
1603        assert!(result.is_err());
1604        assert!(result
1605            .unwrap_err()
1606            .to_string()
1607            .contains("CARD value must be a string"));
1608    }
1609
1610    #[test]
1611    fn test_both_quill_and_card_error() {
1612        let markdown = r#"---
1613QUILL: test
1614CARD: items
1615---"#;
1616
1617        let result = decompose(markdown);
1618        assert!(result.is_err());
1619        assert!(result
1620            .unwrap_err()
1621            .to_string()
1622            .contains("Cannot specify both QUILL and CARD"));
1623    }
1624
1625    #[test]
1626    fn test_blank_lines_in_frontmatter() {
1627        // New parsing standard: blank lines are allowed within YAML blocks
1628        let markdown = r#"---
1629title: Test Document
1630author: Test Author
1631
1632description: This has a blank line above it
1633tags:
1634  - one
1635  - two
1636---
1637
1638# Hello World
1639
1640This is the body."#;
1641
1642        let doc = decompose(markdown).unwrap();
1643
1644        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1645        assert_eq!(
1646            doc.get_field("title").unwrap().as_str().unwrap(),
1647            "Test Document"
1648        );
1649        assert_eq!(
1650            doc.get_field("author").unwrap().as_str().unwrap(),
1651            "Test Author"
1652        );
1653        assert_eq!(
1654            doc.get_field("description").unwrap().as_str().unwrap(),
1655            "This has a blank line above it"
1656        );
1657
1658        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1659        assert_eq!(tags.len(), 2);
1660    }
1661
1662    #[test]
1663    fn test_blank_lines_in_scope_blocks() {
1664        // Blank lines should be allowed in CARD blocks too
1665        let markdown = r#"---
1666CARD: items
1667name: Item 1
1668
1669price: 19.99
1670
1671tags:
1672  - electronics
1673  - gadgets
1674---
1675
1676Body of item 1."#;
1677
1678        let doc = decompose(markdown).unwrap();
1679
1680        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1681        assert_eq!(items.len(), 1);
1682
1683        let item = items[0].as_object().unwrap();
1684        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1685        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1686
1687        let tags = item.get("tags").unwrap().as_array().unwrap();
1688        assert_eq!(tags.len(), 2);
1689    }
1690
1691    #[test]
1692    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1693        // Horizontal rule: blank lines both above AND below the ---
1694        let markdown = r#"---
1695title: Test
1696---
1697
1698First paragraph.
1699
1700---
1701
1702Second paragraph."#;
1703
1704        let doc = decompose(markdown).unwrap();
1705
1706        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1707
1708        // The body should contain the horizontal rule (---) as part of the content
1709        let body = doc.body().unwrap();
1710        assert!(body.contains("First paragraph."));
1711        assert!(body.contains("---"));
1712        assert!(body.contains("Second paragraph."));
1713    }
1714
1715    #[test]
1716    fn test_horizontal_rule_not_preceded_by_blank() {
1717        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1718        // It's also NOT a valid metadata block opening (since it's followed by blank)
1719        let markdown = r#"---
1720title: Test
1721---
1722
1723First paragraph.
1724---
1725
1726Second paragraph."#;
1727
1728        let doc = decompose(markdown).unwrap();
1729
1730        let body = doc.body().unwrap();
1731        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1732        assert!(body.contains("---"));
1733    }
1734
1735    #[test]
1736    fn test_multiple_blank_lines_in_yaml() {
1737        // Multiple blank lines should also be allowed
1738        let markdown = r#"---
1739title: Test
1740
1741
1742author: John Doe
1743
1744
1745version: 1.0
1746---
1747
1748Body content."#;
1749
1750        let doc = decompose(markdown).unwrap();
1751
1752        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1753        assert_eq!(
1754            doc.get_field("author").unwrap().as_str().unwrap(),
1755            "John Doe"
1756        );
1757        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1758    }
1759
1760    #[test]
1761    fn test_html_comment_interaction() {
1762        let markdown = r#"<!---
1763---> the rest of the page content
1764
1765---
1766key: value
1767---
1768"#;
1769        let doc = decompose(markdown).unwrap();
1770
1771        // The comment should be ignored (or at least not cause a parse error)
1772        // The frontmatter should be parsed
1773        let key = doc.get_field("key").and_then(|v| v.as_str());
1774        assert_eq!(key, Some("value"));
1775    }
1776}
1777#[cfg(test)]
1778mod demo_file_test {
1779    use super::*;
1780
1781    #[test]
1782    fn test_extended_metadata_demo_file() {
1783        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1784        let doc = decompose(markdown).unwrap();
1785
1786        // Verify global fields
1787        assert_eq!(
1788            doc.get_field("title").unwrap().as_str().unwrap(),
1789            "Extended Metadata Demo"
1790        );
1791        assert_eq!(
1792            doc.get_field("author").unwrap().as_str().unwrap(),
1793            "Quillmark Team"
1794        );
1795        // version is parsed as a number by YAML
1796        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1797
1798        // Verify body
1799        assert!(doc
1800            .body()
1801            .unwrap()
1802            .contains("extended YAML metadata standard"));
1803
1804        // Verify features collection
1805        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1806        assert_eq!(features.len(), 3);
1807
1808        // Verify use_cases collection
1809        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1810        assert_eq!(use_cases.len(), 2);
1811
1812        // Check first feature
1813        let feature1 = features[0].as_object().unwrap();
1814        assert_eq!(
1815            feature1.get("name").unwrap().as_str().unwrap(),
1816            "Tag Directives"
1817        );
1818    }
1819
1820    #[test]
1821    fn test_input_size_limit() {
1822        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1823        let size = crate::error::MAX_INPUT_SIZE + 1;
1824        let large_markdown = "a".repeat(size);
1825
1826        let result = decompose(&large_markdown);
1827        assert!(result.is_err());
1828
1829        let err_msg = result.unwrap_err().to_string();
1830        assert!(err_msg.contains("Input too large"));
1831    }
1832
1833    #[test]
1834    fn test_yaml_size_limit() {
1835        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1836        let mut markdown = String::from("---\n");
1837
1838        // Create a very large YAML field
1839        let size = crate::error::MAX_YAML_SIZE + 1;
1840        markdown.push_str("data: \"");
1841        markdown.push_str(&"x".repeat(size));
1842        markdown.push_str("\"\n---\n\nBody");
1843
1844        let result = decompose(&markdown);
1845        assert!(result.is_err());
1846
1847        let err_msg = result.unwrap_err().to_string();
1848        assert!(err_msg.contains("YAML block too large"));
1849    }
1850
1851    #[test]
1852    fn test_input_within_size_limit() {
1853        // Create markdown just under the limit
1854        let size = 1000; // Much smaller than limit
1855        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1856
1857        let result = decompose(&markdown);
1858        assert!(result.is_ok());
1859    }
1860
1861    #[test]
1862    fn test_yaml_within_size_limit() {
1863        // Create YAML block well within the limit
1864        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1865
1866        let result = decompose(&markdown);
1867        assert!(result.is_ok());
1868    }
1869
1870    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1871    // Guillemet conversion now happens in process_plate, not during parsing
1872    #[test]
1873    fn test_chevrons_preserved_in_body_no_frontmatter() {
1874        let markdown = "Use <<raw content>> here.";
1875        let doc = decompose(markdown).unwrap();
1876
1877        // Body should preserve chevrons (conversion happens later in process_plate)
1878        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1879    }
1880
1881    #[test]
1882    fn test_chevrons_preserved_in_body_with_frontmatter() {
1883        let markdown = r#"---
1884title: Test
1885---
1886
1887Use <<raw content>> here."#;
1888        let doc = decompose(markdown).unwrap();
1889
1890        // Body should preserve chevrons
1891        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1892    }
1893
1894    #[test]
1895    fn test_chevrons_preserved_in_yaml_string() {
1896        let markdown = r#"---
1897title: Test <<with chevrons>>
1898---
1899
1900Body content."#;
1901        let doc = decompose(markdown).unwrap();
1902
1903        // YAML string values should preserve chevrons
1904        assert_eq!(
1905            doc.get_field("title").unwrap().as_str().unwrap(),
1906            "Test <<with chevrons>>"
1907        );
1908    }
1909
1910    #[test]
1911    fn test_chevrons_preserved_in_yaml_array() {
1912        let markdown = r#"---
1913items:
1914  - "<<first>>"
1915  - "<<second>>"
1916---
1917
1918Body."#;
1919        let doc = decompose(markdown).unwrap();
1920
1921        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1922        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1923        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1924    }
1925
1926    #[test]
1927    fn test_chevrons_preserved_in_yaml_nested() {
1928        let markdown = r#"---
1929metadata:
1930  description: "<<nested value>>"
1931---
1932
1933Body."#;
1934        let doc = decompose(markdown).unwrap();
1935
1936        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1937        assert_eq!(
1938            metadata.get("description").unwrap().as_str().unwrap(),
1939            "<<nested value>>"
1940        );
1941    }
1942
1943    #[test]
1944    fn test_chevrons_preserved_in_code_blocks() {
1945        let markdown = r#"```
1946<<in code block>>
1947```
1948
1949<<outside code block>>"#;
1950        let doc = decompose(markdown).unwrap();
1951
1952        let body = doc.body().unwrap();
1953        // All chevrons should be preserved (no conversion during parsing)
1954        assert!(body.contains("<<in code block>>"));
1955        assert!(body.contains("<<outside code block>>"));
1956    }
1957
1958    #[test]
1959    fn test_chevrons_preserved_in_inline_code() {
1960        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1961        let doc = decompose(markdown).unwrap();
1962
1963        let body = doc.body().unwrap();
1964        // All chevrons should be preserved
1965        assert!(body.contains("`<<in inline code>>`"));
1966        assert!(body.contains("<<outside inline code>>"));
1967    }
1968
1969    #[test]
1970    fn test_chevrons_preserved_in_tagged_block_body() {
1971        let markdown = r#"---
1972title: Main
1973---
1974
1975Main body.
1976
1977---
1978CARD: items
1979name: Item 1
1980---
1981
1982Use <<raw>> here."#;
1983        let doc = decompose(markdown).unwrap();
1984
1985        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1986        let item = items[0].as_object().unwrap();
1987        let item_body = item.get("body").unwrap().as_str().unwrap();
1988        // Tagged block body should preserve chevrons
1989        assert!(item_body.contains("<<raw>>"));
1990    }
1991
1992    #[test]
1993    fn test_chevrons_preserved_in_tagged_block_yaml() {
1994        let markdown = r#"---
1995title: Main
1996---
1997
1998Main body.
1999
2000---
2001CARD: items
2002description: "<<tagged yaml>>"
2003---
2004
2005Item body."#;
2006        let doc = decompose(markdown).unwrap();
2007
2008        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2009        let item = items[0].as_object().unwrap();
2010        // Tagged block YAML should preserve chevrons
2011        assert_eq!(
2012            item.get("description").unwrap().as_str().unwrap(),
2013            "<<tagged yaml>>"
2014        );
2015    }
2016
2017    #[test]
2018    fn test_yaml_numbers_not_affected() {
2019        // Numbers should not be affected
2020        let markdown = r#"---
2021count: 42
2022---
2023
2024Body."#;
2025        let doc = decompose(markdown).unwrap();
2026        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2027    }
2028
2029    #[test]
2030    fn test_yaml_booleans_not_affected() {
2031        // Booleans should not be affected
2032        let markdown = r#"---
2033active: true
2034---
2035
2036Body."#;
2037        let doc = decompose(markdown).unwrap();
2038        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2039    }
2040
2041    #[test]
2042    fn test_multiline_chevrons_preserved() {
2043        // Multiline chevrons should be preserved as-is
2044        let markdown = "<<text\nacross lines>>";
2045        let doc = decompose(markdown).unwrap();
2046
2047        let body = doc.body().unwrap();
2048        // Should contain the original chevrons
2049        assert!(body.contains("<<text"));
2050        assert!(body.contains("across lines>>"));
2051    }
2052
2053    #[test]
2054    fn test_unmatched_chevrons_preserved() {
2055        let markdown = "<<unmatched";
2056        let doc = decompose(markdown).unwrap();
2057
2058        let body = doc.body().unwrap();
2059        // Unmatched should remain as-is
2060        assert_eq!(body, "<<unmatched");
2061    }
2062}
2063
2064// Additional robustness tests
2065#[cfg(test)]
2066mod robustness_tests {
2067    use super::*;
2068
2069    // Edge cases for delimiter handling
2070
2071    #[test]
2072    fn test_empty_document() {
2073        let doc = decompose("").unwrap();
2074        assert_eq!(doc.body(), Some(""));
2075        assert_eq!(doc.quill_tag(), "__default__");
2076    }
2077
2078    #[test]
2079    fn test_only_whitespace() {
2080        let doc = decompose("   \n\n   \t").unwrap();
2081        assert_eq!(doc.body(), Some("   \n\n   \t"));
2082    }
2083
2084    #[test]
2085    fn test_only_dashes() {
2086        // Just "---" at document start without newline is not treated as frontmatter opener
2087        // (requires "---\n" to start a frontmatter block)
2088        let result = decompose("---");
2089        // This is NOT an error - "---" alone without newline is just body content
2090        assert!(result.is_ok());
2091        assert_eq!(result.unwrap().body(), Some("---"));
2092    }
2093
2094    #[test]
2095    fn test_dashes_in_middle_of_line() {
2096        // --- not at start of line should not be treated as delimiter
2097        let markdown = "some text --- more text";
2098        let doc = decompose(markdown).unwrap();
2099        assert_eq!(doc.body(), Some("some text --- more text"));
2100    }
2101
2102    #[test]
2103    fn test_four_dashes() {
2104        // ---- is not a valid delimiter
2105        let markdown = "----\ntitle: Test\n----\n\nBody";
2106        let doc = decompose(markdown).unwrap();
2107        // Should treat entire content as body
2108        assert!(doc.body().unwrap().contains("----"));
2109    }
2110
2111    #[test]
2112    fn test_crlf_line_endings() {
2113        // Windows-style line endings
2114        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2115        let doc = decompose(markdown).unwrap();
2116        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2117        assert!(doc.body().unwrap().contains("Body content."));
2118    }
2119
2120    #[test]
2121    fn test_mixed_line_endings() {
2122        // Mix of \n and \r\n
2123        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2124        let doc = decompose(markdown).unwrap();
2125        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2126    }
2127
2128    #[test]
2129    fn test_frontmatter_at_eof_no_trailing_newline() {
2130        // Frontmatter closed at EOF without trailing newline
2131        let markdown = "---\ntitle: Test\n---";
2132        let doc = decompose(markdown).unwrap();
2133        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2134        assert_eq!(doc.body(), Some(""));
2135    }
2136
2137    #[test]
2138    fn test_empty_frontmatter() {
2139        // Empty frontmatter block - requires content between delimiters
2140        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2141        // is treated as horizontal rule logic, not empty frontmatter
2142        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2143        let markdown = "---\n \n---\n\nBody content.";
2144        let doc = decompose(markdown).unwrap();
2145        assert!(doc.body().unwrap().contains("Body content."));
2146        // Should only have body field
2147        assert_eq!(doc.fields().len(), 1);
2148    }
2149
2150    #[test]
2151    fn test_whitespace_only_frontmatter() {
2152        // Frontmatter with only whitespace
2153        let markdown = "---\n   \n\n   \n---\n\nBody.";
2154        let doc = decompose(markdown).unwrap();
2155        assert!(doc.body().unwrap().contains("Body."));
2156    }
2157
2158    // Unicode handling
2159
2160    #[test]
2161    fn test_unicode_in_yaml_keys() {
2162        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2163        let doc = decompose(markdown).unwrap();
2164        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2165        assert_eq!(
2166            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2167            "こんにちは"
2168        );
2169    }
2170
2171    #[test]
2172    fn test_unicode_in_yaml_values() {
2173        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2174        let doc = decompose(markdown).unwrap();
2175        assert_eq!(
2176            doc.get_field("title").unwrap().as_str().unwrap(),
2177            "你好世界 🎉"
2178        );
2179    }
2180
2181    #[test]
2182    fn test_unicode_in_body() {
2183        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2184        let doc = decompose(markdown).unwrap();
2185        assert!(doc.body().unwrap().contains("日本語テキスト"));
2186        assert!(doc.body().unwrap().contains("🚀"));
2187    }
2188
2189    // YAML edge cases
2190
2191    #[test]
2192    fn test_yaml_multiline_string() {
2193        let markdown = r#"---
2194description: |
2195  This is a
2196  multiline string
2197  with preserved newlines.
2198---
2199
2200Body."#;
2201        let doc = decompose(markdown).unwrap();
2202        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2203        assert!(desc.contains("multiline string"));
2204        assert!(desc.contains('\n'));
2205    }
2206
2207    #[test]
2208    fn test_yaml_folded_string() {
2209        let markdown = r#"---
2210description: >
2211  This is a folded
2212  string that becomes
2213  a single line.
2214---
2215
2216Body."#;
2217        let doc = decompose(markdown).unwrap();
2218        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2219        // Folded strings join lines with spaces
2220        assert!(desc.contains("folded"));
2221    }
2222
2223    #[test]
2224    fn test_yaml_null_value() {
2225        let markdown = "---\noptional: null\n---\n\nBody.";
2226        let doc = decompose(markdown).unwrap();
2227        assert!(doc.get_field("optional").unwrap().is_null());
2228    }
2229
2230    #[test]
2231    fn test_yaml_empty_string_value() {
2232        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2233        let doc = decompose(markdown).unwrap();
2234        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2235    }
2236
2237    #[test]
2238    fn test_yaml_special_characters_in_string() {
2239        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2240        let doc = decompose(markdown).unwrap();
2241        assert_eq!(
2242            doc.get_field("special").unwrap().as_str().unwrap(),
2243            "colon: here, and [brackets]"
2244        );
2245    }
2246
2247    #[test]
2248    fn test_yaml_nested_objects() {
2249        let markdown = r#"---
2250config:
2251  database:
2252    host: localhost
2253    port: 5432
2254  cache:
2255    enabled: true
2256---
2257
2258Body."#;
2259        let doc = decompose(markdown).unwrap();
2260        let config = doc.get_field("config").unwrap().as_object().unwrap();
2261        let db = config.get("database").unwrap().as_object().unwrap();
2262        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2263        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2264    }
2265
2266    // CARD block edge cases
2267
2268    #[test]
2269    fn test_card_with_empty_body() {
2270        let markdown = r#"---
2271CARD: items
2272name: Item
2273---"#;
2274        let doc = decompose(markdown).unwrap();
2275        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2276        assert_eq!(items.len(), 1);
2277        let item = items[0].as_object().unwrap();
2278        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2279    }
2280
2281    #[test]
2282    fn test_card_consecutive_blocks() {
2283        let markdown = r#"---
2284CARD: a
2285id: 1
2286---
2287---
2288CARD: a
2289id: 2
2290---"#;
2291        let doc = decompose(markdown).unwrap();
2292        let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2293        assert_eq!(items.len(), 2);
2294    }
2295
2296    #[test]
2297    fn test_card_with_body_containing_dashes() {
2298        let markdown = r#"---
2299CARD: items
2300name: Item
2301---
2302
2303Some text with --- dashes in it."#;
2304        let doc = decompose(markdown).unwrap();
2305        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2306        let item = items[0].as_object().unwrap();
2307        let body = item.get("body").unwrap().as_str().unwrap();
2308        assert!(body.contains("--- dashes"));
2309    }
2310
2311    // QUILL directive edge cases
2312
2313    #[test]
2314    fn test_quill_with_underscore_prefix() {
2315        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2316        let doc = decompose(markdown).unwrap();
2317        assert_eq!(doc.quill_tag(), "_internal");
2318    }
2319
2320    #[test]
2321    fn test_quill_with_numbers() {
2322        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2323        let doc = decompose(markdown).unwrap();
2324        assert_eq!(doc.quill_tag(), "form_8_v2");
2325    }
2326
2327    #[test]
2328    fn test_quill_with_additional_fields() {
2329        let markdown = r#"---
2330QUILL: my_quill
2331title: Document Title
2332author: John Doe
2333---
2334
2335Body content."#;
2336        let doc = decompose(markdown).unwrap();
2337        assert_eq!(doc.quill_tag(), "my_quill");
2338        assert_eq!(
2339            doc.get_field("title").unwrap().as_str().unwrap(),
2340            "Document Title"
2341        );
2342        assert_eq!(
2343            doc.get_field("author").unwrap().as_str().unwrap(),
2344            "John Doe"
2345        );
2346    }
2347
2348    // Error handling
2349
2350    #[test]
2351    fn test_invalid_scope_name_uppercase() {
2352        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2353        let result = decompose(markdown);
2354        assert!(result.is_err());
2355        assert!(result
2356            .unwrap_err()
2357            .to_string()
2358            .contains("Invalid field name"));
2359    }
2360
2361    #[test]
2362    fn test_invalid_scope_name_starts_with_number() {
2363        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2364        let result = decompose(markdown);
2365        assert!(result.is_err());
2366    }
2367
2368    #[test]
2369    fn test_invalid_scope_name_with_hyphen() {
2370        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2371        let result = decompose(markdown);
2372        assert!(result.is_err());
2373    }
2374
2375    #[test]
2376    fn test_invalid_quill_name_uppercase() {
2377        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2378        let result = decompose(markdown);
2379        assert!(result.is_err());
2380    }
2381
2382    #[test]
2383    fn test_yaml_syntax_error_missing_colon() {
2384        let markdown = "---\ntitle Test\n---\n\nBody.";
2385        let result = decompose(markdown);
2386        assert!(result.is_err());
2387    }
2388
2389    #[test]
2390    fn test_yaml_syntax_error_bad_indentation() {
2391        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2392        let result = decompose(markdown);
2393        // Bad indentation may or may not be an error depending on YAML parser
2394        // Just ensure it doesn't panic
2395        let _ = result;
2396    }
2397
2398    // Body extraction edge cases
2399
2400    #[test]
2401    fn test_body_with_leading_newlines() {
2402        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2403        let doc = decompose(markdown).unwrap();
2404        // Body should preserve leading newlines after frontmatter
2405        assert!(doc.body().unwrap().starts_with('\n'));
2406    }
2407
2408    #[test]
2409    fn test_body_with_trailing_newlines() {
2410        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2411        let doc = decompose(markdown).unwrap();
2412        // Body should preserve trailing newlines
2413        assert!(doc.body().unwrap().ends_with('\n'));
2414    }
2415
2416    #[test]
2417    fn test_no_body_after_frontmatter() {
2418        let markdown = "---\ntitle: Test\n---";
2419        let doc = decompose(markdown).unwrap();
2420        assert_eq!(doc.body(), Some(""));
2421    }
2422
2423    // Tag name validation
2424
2425    #[test]
2426    fn test_valid_tag_name_single_underscore() {
2427        assert!(is_valid_tag_name("_"));
2428    }
2429
2430    #[test]
2431    fn test_valid_tag_name_underscore_prefix() {
2432        assert!(is_valid_tag_name("_private"));
2433    }
2434
2435    #[test]
2436    fn test_valid_tag_name_with_numbers() {
2437        assert!(is_valid_tag_name("item1"));
2438        assert!(is_valid_tag_name("item_2"));
2439    }
2440
2441    #[test]
2442    fn test_invalid_tag_name_empty() {
2443        assert!(!is_valid_tag_name(""));
2444    }
2445
2446    #[test]
2447    fn test_invalid_tag_name_starts_with_number() {
2448        assert!(!is_valid_tag_name("1item"));
2449    }
2450
2451    #[test]
2452    fn test_invalid_tag_name_uppercase() {
2453        assert!(!is_valid_tag_name("Items"));
2454        assert!(!is_valid_tag_name("ITEMS"));
2455    }
2456
2457    #[test]
2458    fn test_invalid_tag_name_special_chars() {
2459        assert!(!is_valid_tag_name("my-items"));
2460        assert!(!is_valid_tag_name("my.items"));
2461        assert!(!is_valid_tag_name("my items"));
2462    }
2463
2464    // Guillemet preprocessing in YAML
2465
2466    #[test]
2467    fn test_guillemet_in_yaml_preserves_non_strings() {
2468        let markdown = r#"---
2469count: 42
2470price: 19.99
2471active: true
2472items:
2473  - first
2474  - 100
2475  - true
2476---
2477
2478Body."#;
2479        let doc = decompose(markdown).unwrap();
2480        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2481        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2482        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2483    }
2484
2485    #[test]
2486    fn test_guillemet_double_conversion_prevention() {
2487        // Ensure «» in input doesn't get double-processed
2488        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2489        let doc = decompose(markdown).unwrap();
2490        // Should remain as-is (not double-escaped)
2491        assert_eq!(
2492            doc.get_field("title").unwrap().as_str().unwrap(),
2493            "Already «converted»"
2494        );
2495    }
2496}