quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(crate::error::ParseError::from)
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from SCOPE key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = search_str
205            .find("---\n")
206            .map(|p| (p, 4, "\n"))
207            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
208
209        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
210            let abs_pos = pos + delimiter_pos;
211
212            // Check if the delimiter is at the start of a line
213            let is_start_of_line = if abs_pos == 0 {
214                true
215            } else {
216                let char_before = markdown.as_bytes()[abs_pos - 1];
217                char_before == b'\n' || char_before == b'\r'
218            };
219
220            if !is_start_of_line {
221                pos = abs_pos + 1;
222                continue;
223            }
224
225            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
226
227            // Check if this --- is a horizontal rule (blank lines above AND below)
228            let preceded_by_blank = if abs_pos > 0 {
229                // Check if there's a blank line before the ---
230                let before = &markdown[..abs_pos];
231                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
232            } else {
233                false
234            };
235
236            let followed_by_blank = if content_start < markdown.len() {
237                markdown[content_start..].starts_with('\n')
238                    || markdown[content_start..].starts_with("\r\n")
239            } else {
240                false
241            };
242
243            // Horizontal rule: blank lines both above and below
244            if preceded_by_blank && followed_by_blank {
245                // This is a horizontal rule in the body, skip it
246                pos = abs_pos + 3; // Skip past "---"
247                continue;
248            }
249
250            // Check if followed by non-blank line (or if we're at document start)
251            // This starts a metadata block
252            if followed_by_blank {
253                // --- followed by blank line but NOT preceded by blank line
254                // This is NOT a metadata block opening, skip it
255                pos = abs_pos + 3;
256                continue;
257            }
258
259            // Found potential metadata block opening (followed by non-blank line)
260            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
261            let rest = &markdown[content_start..];
262
263            // First try to find delimiters with trailing newlines
264            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
265            let closing_with_newline = closing_patterns
266                .iter()
267                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
268                .min_by_key(|(p, _)| *p);
269
270            // Also check for closing at end of document (no trailing newline)
271            let closing_at_eof = ["\n---", "\r\n---"]
272                .iter()
273                .filter_map(|delim| {
274                    rest.find(delim).and_then(|p| {
275                        if p + delim.len() == rest.len() {
276                            Some((p, delim.len()))
277                        } else {
278                            None
279                        }
280                    })
281                })
282                .min_by_key(|(p, _)| *p);
283
284            let closing_result = match (closing_with_newline, closing_at_eof) {
285                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
286                (Some(_), Some(_)) => closing_with_newline,
287                (Some(_), None) => closing_with_newline,
288                (None, Some(_)) => closing_at_eof,
289                (None, None) => None,
290            };
291
292            if let Some((closing_pos, closing_len)) = closing_result {
293                let abs_closing_pos = content_start + closing_pos;
294                let content = &markdown[content_start..abs_closing_pos];
295
296                // Check YAML size limit
297                if content.len() > crate::error::MAX_YAML_SIZE {
298                    return Err(format!(
299                        "YAML block too large: {} bytes (max: {} bytes)",
300                        content.len(),
301                        crate::error::MAX_YAML_SIZE
302                    )
303                    .into());
304                }
305
306                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
307                // First, try to parse as YAML
308                let (tag, quill_name, yaml_value) = if !content.is_empty() {
309                    // Try to parse the YAML to check for reserved keys
310                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
311                        Ok(parsed_yaml) => {
312                            if let Some(mapping) = parsed_yaml.as_mapping() {
313                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
314                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
315
316                                let has_quill = mapping.contains_key(&quill_key);
317                                let has_scope = mapping.contains_key(&scope_key);
318
319                                if has_quill && has_scope {
320                                    return Err(
321                                        "Cannot specify both QUILL and SCOPE in the same block"
322                                            .into(),
323                                    );
324                                }
325
326                                if has_quill {
327                                    // Extract quill name
328                                    let quill_value = mapping.get(&quill_key).unwrap();
329                                    let quill_name_str = quill_value
330                                        .as_str()
331                                        .ok_or("QUILL value must be a string")?;
332
333                                    if !is_valid_tag_name(quill_name_str) {
334                                        return Err(format!(
335                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
336                                            quill_name_str
337                                        )
338                                        .into());
339                                    }
340
341                                    // Remove QUILL from the YAML value for processing
342                                    let mut new_mapping = mapping.clone();
343                                    new_mapping.remove(&quill_key);
344                                    let new_value = if new_mapping.is_empty() {
345                                        None
346                                    } else {
347                                        Some(serde_yaml::Value::Mapping(new_mapping))
348                                    };
349
350                                    (None, Some(quill_name_str.to_string()), new_value)
351                                } else if has_scope {
352                                    // Extract scope field name
353                                    let scope_value = mapping.get(&scope_key).unwrap();
354                                    let field_name = scope_value
355                                        .as_str()
356                                        .ok_or("SCOPE value must be a string")?;
357
358                                    if !is_valid_tag_name(field_name) {
359                                        return Err(format!(
360                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
361                                            field_name
362                                        )
363                                        .into());
364                                    }
365
366                                    if field_name == BODY_FIELD {
367                                        return Err(format!(
368                                            "Cannot use reserved field name '{}' as SCOPE value",
369                                            BODY_FIELD
370                                        )
371                                        .into());
372                                    }
373
374                                    // Remove SCOPE from the YAML value for processing
375                                    let mut new_mapping = mapping.clone();
376                                    new_mapping.remove(&scope_key);
377                                    let new_value = if new_mapping.is_empty() {
378                                        None
379                                    } else {
380                                        Some(serde_yaml::Value::Mapping(new_mapping))
381                                    };
382
383                                    (Some(field_name.to_string()), None, new_value)
384                                } else {
385                                    // No reserved keys, keep the parsed YAML
386                                    (None, None, Some(parsed_yaml))
387                                }
388                            } else {
389                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
390                                (None, None, Some(parsed_yaml))
391                            }
392                        }
393                        Err(e) => {
394                            // YAML parsing failed - return error with context
395                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
396                        }
397                    }
398                } else {
399                    // Empty content
400                    (None, None, None)
401                };
402
403                blocks.push(MetadataBlock {
404                    start: abs_pos,
405                    end: abs_closing_pos + closing_len, // After closing delimiter
406                    yaml_value,
407                    tag,
408                    quill_name,
409                });
410
411                pos = abs_closing_pos + closing_len;
412            } else if abs_pos == 0 {
413                // Frontmatter started but not closed
414                return Err("Frontmatter started but not closed with ---".into());
415            } else {
416                // Not a valid metadata block, skip this position
417                pos = abs_pos + 3;
418            }
419        } else {
420            break;
421        }
422    }
423
424    Ok(blocks)
425}
426
427/// Decompose markdown into frontmatter fields and body
428fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
429    // Check input size limit
430    if markdown.len() > crate::error::MAX_INPUT_SIZE {
431        return Err(format!(
432            "Input too large: {} bytes (max: {} bytes)",
433            markdown.len(),
434            crate::error::MAX_INPUT_SIZE
435        )
436        .into());
437    }
438
439    let mut fields = HashMap::new();
440
441    // Find all metadata blocks
442    let blocks = find_metadata_blocks(markdown)?;
443
444    if blocks.is_empty() {
445        // No metadata blocks, entire content is body
446        fields.insert(
447            BODY_FIELD.to_string(),
448            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
449        );
450        return Ok(ParsedDocument::new(fields));
451    }
452
453    // Track which attributes are used for tagged blocks
454    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
455    let mut has_global_frontmatter = false;
456    let mut global_frontmatter_index: Option<usize> = None;
457    let mut quill_name: Option<String> = None;
458
459    // First pass: identify global frontmatter, quill directive, and validate
460    for (idx, block) in blocks.iter().enumerate() {
461        // Check for quill directive
462        if let Some(ref name) = block.quill_name {
463            if quill_name.is_some() {
464                return Err("Multiple quill directives found: only one allowed".into());
465            }
466            quill_name = Some(name.clone());
467        }
468
469        // Check for global frontmatter (no tag and no quill directive)
470        if block.tag.is_none() && block.quill_name.is_none() {
471            if has_global_frontmatter {
472                return Err(
473                    "Multiple global frontmatter blocks found: only one untagged block allowed"
474                        .into(),
475                );
476            }
477            has_global_frontmatter = true;
478            global_frontmatter_index = Some(idx);
479        }
480    }
481
482    // Parse global frontmatter if present
483    if let Some(idx) = global_frontmatter_index {
484        let block = &blocks[idx];
485
486        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
487        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
488            Some(serde_yaml::Value::Mapping(mapping)) => mapping
489                .iter()
490                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
491                .collect(),
492            Some(serde_yaml::Value::Null) => {
493                // Null value (from whitespace-only YAML) - treat as empty mapping
494                HashMap::new()
495            }
496            Some(_) => {
497                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
498                return Err("Invalid YAML frontmatter: expected a mapping".into());
499            }
500            None => HashMap::new(),
501        };
502
503        // Check that all tagged blocks don't conflict with global fields
504        // Exception: if the global field is an array, allow it (we'll merge later)
505        for other_block in &blocks {
506            if let Some(ref tag) = other_block.tag {
507                if let Some(global_value) = yaml_fields.get(tag) {
508                    // Check if the global value is an array
509                    if global_value.as_sequence().is_none() {
510                        return Err(format!(
511                            "Name collision: global field '{}' conflicts with tagged attribute",
512                            tag
513                        )
514                        .into());
515                    }
516                }
517            }
518        }
519
520        // Convert YAML values to QuillValue at boundary
521        for (key, value) in yaml_fields {
522            fields.insert(key, QuillValue::from_yaml(value)?);
523        }
524    }
525
526    // Process blocks with quill directives
527    for block in &blocks {
528        if block.quill_name.is_some() {
529            // Quill directive blocks can have YAML content (becomes part of frontmatter)
530            if let Some(ref yaml_val) = block.yaml_value {
531                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
532                    serde_yaml::Value::Mapping(mapping) => mapping
533                        .iter()
534                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
535                        .collect(),
536                    serde_yaml::Value::Null => {
537                        // Null value (from whitespace-only YAML) - treat as empty mapping
538                        HashMap::new()
539                    }
540                    _ => {
541                        return Err("Invalid YAML in quill block: expected a mapping".into());
542                    }
543                };
544
545                // Check for conflicts with existing fields
546                for key in yaml_fields.keys() {
547                    if fields.contains_key(key) {
548                        return Err(format!(
549                            "Name collision: quill block field '{}' conflicts with existing field",
550                            key
551                        )
552                        .into());
553                    }
554                }
555
556                // Convert YAML values to QuillValue at boundary
557                for (key, value) in yaml_fields {
558                    fields.insert(key, QuillValue::from_yaml(value)?);
559                }
560            }
561        }
562    }
563
564    // Parse tagged blocks
565    for (idx, block) in blocks.iter().enumerate() {
566        if let Some(ref tag_name) = block.tag {
567            // Check if this conflicts with global fields
568            // Exception: if the global field is an array, allow it (we'll merge later)
569            if let Some(existing_value) = fields.get(tag_name) {
570                if existing_value.as_array().is_none() {
571                    return Err(format!(
572                        "Name collision: tagged attribute '{}' conflicts with global field",
573                        tag_name
574                    )
575                    .into());
576                }
577            }
578
579            // Get YAML metadata directly (already parsed in find_metadata_blocks)
580            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
581                Some(serde_yaml::Value::Mapping(mapping)) => mapping
582                    .iter()
583                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
584                    .collect(),
585                Some(serde_yaml::Value::Null) => {
586                    // Null value (from whitespace-only YAML) - treat as empty mapping
587                    HashMap::new()
588                }
589                Some(_) => {
590                    return Err(format!(
591                        "Invalid YAML in tagged block '{}': expected a mapping",
592                        tag_name
593                    )
594                    .into());
595                }
596                None => HashMap::new(),
597            };
598
599            // Extract body for this tagged block
600            let body_start = block.end;
601            let body_end = if idx + 1 < blocks.len() {
602                blocks[idx + 1].start
603            } else {
604                markdown.len()
605            };
606            let body = &markdown[body_start..body_end];
607
608            // Add body to item fields
609            item_fields.insert(
610                BODY_FIELD.to_string(),
611                serde_yaml::Value::String(body.to_string()),
612            );
613
614            // Convert HashMap to serde_yaml::Value::Mapping
615            let item_value = serde_yaml::to_value(item_fields)?;
616
617            // Add to collection
618            tagged_attributes
619                .entry(tag_name.clone())
620                .or_default()
621                .push(item_value);
622        }
623    }
624
625    // Extract global body
626    // Body starts after global frontmatter or quill block (whichever comes first)
627    // Body ends at the first scope block or EOF
628    let first_non_scope_block_idx = blocks
629        .iter()
630        .position(|b| b.tag.is_none() && b.quill_name.is_none())
631        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
632
633    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
634        // Body starts after the first non-scope block (global frontmatter or quill)
635        let start = blocks[idx].end;
636
637        // Body ends at the first scope block after this, or EOF
638        let end = blocks
639            .iter()
640            .skip(idx + 1)
641            .find(|b| b.tag.is_some())
642            .map(|b| b.start)
643            .unwrap_or(markdown.len());
644
645        (start, end)
646    } else {
647        // No global frontmatter or quill block - body is everything before the first scope block
648        let end = blocks
649            .iter()
650            .find(|b| b.tag.is_some())
651            .map(|b| b.start)
652            .unwrap_or(0);
653
654        (0, end)
655    };
656
657    let global_body = &markdown[body_start..body_end];
658
659    fields.insert(
660        BODY_FIELD.to_string(),
661        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
662    );
663
664    // Add all tagged collections to fields (convert to QuillValue)
665    // If a field already exists and is an array, merge the new items into it
666    for (tag_name, items) in tagged_attributes {
667        if let Some(existing_value) = fields.get(&tag_name) {
668            // The existing value must be an array (checked earlier)
669            if let Some(existing_array) = existing_value.as_array() {
670                // Convert new items from YAML to JSON
671                let new_items_json: Vec<serde_json::Value> = items
672                    .into_iter()
673                    .map(|yaml_val| {
674                        serde_json::to_value(&yaml_val)
675                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
676                    })
677                    .collect::<Result<Vec<_>, _>>()?;
678
679                // Combine existing and new items
680                let mut merged_array = existing_array.clone();
681                merged_array.extend(new_items_json);
682
683                // Create QuillValue from merged JSON array
684                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
685                fields.insert(tag_name, quill_value);
686            } else {
687                // This should not happen due to earlier validation, but handle it gracefully
688                return Err(format!(
689                    "Internal error: field '{}' exists but is not an array",
690                    tag_name
691                )
692                .into());
693            }
694        } else {
695            // No existing field, just create a new sequence
696            // Note: guillemets in items were already preprocessed when the items were created
697            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
698            fields.insert(tag_name, quill_value);
699        }
700    }
701
702    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
703    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
704
705    Ok(parsed)
706}
707
708#[cfg(test)]
709mod tests {
710    use super::*;
711
712    #[test]
713    fn test_no_frontmatter() {
714        let markdown = "# Hello World\n\nThis is a test.";
715        let doc = decompose(markdown).unwrap();
716
717        assert_eq!(doc.body(), Some(markdown));
718        assert_eq!(doc.fields().len(), 1);
719        // Verify default quill tag is set
720        assert_eq!(doc.quill_tag(), "__default__");
721    }
722
723    #[test]
724    fn test_with_frontmatter() {
725        let markdown = r#"---
726title: Test Document
727author: Test Author
728---
729
730# Hello World
731
732This is the body."#;
733
734        let doc = decompose(markdown).unwrap();
735
736        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
737        assert_eq!(
738            doc.get_field("title").unwrap().as_str().unwrap(),
739            "Test Document"
740        );
741        assert_eq!(
742            doc.get_field("author").unwrap().as_str().unwrap(),
743            "Test Author"
744        );
745        assert_eq!(doc.fields().len(), 3); // title, author, body
746                                           // Verify default quill tag is set when no QUILL directive
747        assert_eq!(doc.quill_tag(), "__default__");
748    }
749
750    #[test]
751    fn test_complex_yaml_frontmatter() {
752        let markdown = r#"---
753title: Complex Document
754tags:
755  - test
756  - yaml
757metadata:
758  version: 1.0
759  nested:
760    field: value
761---
762
763Content here."#;
764
765        let doc = decompose(markdown).unwrap();
766
767        assert_eq!(doc.body(), Some("\nContent here."));
768        assert_eq!(
769            doc.get_field("title").unwrap().as_str().unwrap(),
770            "Complex Document"
771        );
772
773        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
774        assert_eq!(tags.len(), 2);
775        assert_eq!(tags[0].as_str().unwrap(), "test");
776        assert_eq!(tags[1].as_str().unwrap(), "yaml");
777    }
778
779    #[test]
780    fn test_with_defaults_empty_document() {
781        use std::collections::HashMap;
782
783        let mut defaults = HashMap::new();
784        defaults.insert(
785            "status".to_string(),
786            QuillValue::from_json(serde_json::json!("draft")),
787        );
788        defaults.insert(
789            "version".to_string(),
790            QuillValue::from_json(serde_json::json!(1)),
791        );
792
793        // Create an empty parsed document
794        let doc = ParsedDocument::new(HashMap::new());
795        let doc_with_defaults = doc.with_defaults(&defaults);
796
797        // Check that defaults were applied
798        assert_eq!(
799            doc_with_defaults
800                .get_field("status")
801                .unwrap()
802                .as_str()
803                .unwrap(),
804            "draft"
805        );
806        assert_eq!(
807            doc_with_defaults
808                .get_field("version")
809                .unwrap()
810                .as_number()
811                .unwrap()
812                .as_i64()
813                .unwrap(),
814            1
815        );
816    }
817
818    #[test]
819    fn test_with_defaults_preserves_existing_values() {
820        use std::collections::HashMap;
821
822        let mut defaults = HashMap::new();
823        defaults.insert(
824            "status".to_string(),
825            QuillValue::from_json(serde_json::json!("draft")),
826        );
827
828        // Create document with existing status
829        let mut fields = HashMap::new();
830        fields.insert(
831            "status".to_string(),
832            QuillValue::from_json(serde_json::json!("published")),
833        );
834        let doc = ParsedDocument::new(fields);
835
836        let doc_with_defaults = doc.with_defaults(&defaults);
837
838        // Existing value should be preserved
839        assert_eq!(
840            doc_with_defaults
841                .get_field("status")
842                .unwrap()
843                .as_str()
844                .unwrap(),
845            "published"
846        );
847    }
848
849    #[test]
850    fn test_with_defaults_partial_application() {
851        use std::collections::HashMap;
852
853        let mut defaults = HashMap::new();
854        defaults.insert(
855            "status".to_string(),
856            QuillValue::from_json(serde_json::json!("draft")),
857        );
858        defaults.insert(
859            "version".to_string(),
860            QuillValue::from_json(serde_json::json!(1)),
861        );
862
863        // Create document with only one field
864        let mut fields = HashMap::new();
865        fields.insert(
866            "status".to_string(),
867            QuillValue::from_json(serde_json::json!("published")),
868        );
869        let doc = ParsedDocument::new(fields);
870
871        let doc_with_defaults = doc.with_defaults(&defaults);
872
873        // Existing field preserved, missing field gets default
874        assert_eq!(
875            doc_with_defaults
876                .get_field("status")
877                .unwrap()
878                .as_str()
879                .unwrap(),
880            "published"
881        );
882        assert_eq!(
883            doc_with_defaults
884                .get_field("version")
885                .unwrap()
886                .as_number()
887                .unwrap()
888                .as_i64()
889                .unwrap(),
890            1
891        );
892    }
893
894    #[test]
895    fn test_with_defaults_no_defaults() {
896        use std::collections::HashMap;
897
898        let defaults = HashMap::new(); // Empty defaults map
899
900        let doc = ParsedDocument::new(HashMap::new());
901        let doc_with_defaults = doc.with_defaults(&defaults);
902
903        // No defaults should be applied
904        assert!(doc_with_defaults.fields().is_empty());
905    }
906
907    #[test]
908    fn test_with_defaults_complex_types() {
909        use std::collections::HashMap;
910
911        let mut defaults = HashMap::new();
912        defaults.insert(
913            "tags".to_string(),
914            QuillValue::from_json(serde_json::json!(["default", "tag"])),
915        );
916
917        let doc = ParsedDocument::new(HashMap::new());
918        let doc_with_defaults = doc.with_defaults(&defaults);
919
920        // Complex default value should be applied
921        let tags = doc_with_defaults
922            .get_field("tags")
923            .unwrap()
924            .as_sequence()
925            .unwrap();
926        assert_eq!(tags.len(), 2);
927        assert_eq!(tags[0].as_str().unwrap(), "default");
928        assert_eq!(tags[1].as_str().unwrap(), "tag");
929    }
930
931    #[test]
932    fn test_with_coercion_singular_to_array() {
933        use std::collections::HashMap;
934
935        let schema = QuillValue::from_json(serde_json::json!({
936            "$schema": "https://json-schema.org/draft/2019-09/schema",
937            "type": "object",
938            "properties": {
939                "tags": {"type": "array"}
940            }
941        }));
942
943        let mut fields = HashMap::new();
944        fields.insert(
945            "tags".to_string(),
946            QuillValue::from_json(serde_json::json!("single-tag")),
947        );
948        let doc = ParsedDocument::new(fields);
949
950        let coerced_doc = doc.with_coercion(&schema);
951
952        let tags = coerced_doc.get_field("tags").unwrap();
953        assert!(tags.as_array().is_some());
954        let tags_array = tags.as_array().unwrap();
955        assert_eq!(tags_array.len(), 1);
956        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
957    }
958
959    #[test]
960    fn test_with_coercion_string_to_boolean() {
961        use std::collections::HashMap;
962
963        let schema = QuillValue::from_json(serde_json::json!({
964            "$schema": "https://json-schema.org/draft/2019-09/schema",
965            "type": "object",
966            "properties": {
967                "active": {"type": "boolean"}
968            }
969        }));
970
971        let mut fields = HashMap::new();
972        fields.insert(
973            "active".to_string(),
974            QuillValue::from_json(serde_json::json!("true")),
975        );
976        let doc = ParsedDocument::new(fields);
977
978        let coerced_doc = doc.with_coercion(&schema);
979
980        assert_eq!(
981            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
982            true
983        );
984    }
985
986    #[test]
987    fn test_with_coercion_string_to_number() {
988        use std::collections::HashMap;
989
990        let schema = QuillValue::from_json(serde_json::json!({
991            "$schema": "https://json-schema.org/draft/2019-09/schema",
992            "type": "object",
993            "properties": {
994                "count": {"type": "number"}
995            }
996        }));
997
998        let mut fields = HashMap::new();
999        fields.insert(
1000            "count".to_string(),
1001            QuillValue::from_json(serde_json::json!("42")),
1002        );
1003        let doc = ParsedDocument::new(fields);
1004
1005        let coerced_doc = doc.with_coercion(&schema);
1006
1007        assert_eq!(
1008            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1009            42
1010        );
1011    }
1012
1013    #[test]
1014    fn test_invalid_yaml() {
1015        let markdown = r#"---
1016title: [invalid yaml
1017author: missing close bracket
1018---
1019
1020Content here."#;
1021
1022        let result = decompose(markdown);
1023        assert!(result.is_err());
1024        assert!(result
1025            .unwrap_err()
1026            .to_string()
1027            .contains("Invalid YAML frontmatter"));
1028    }
1029
1030    #[test]
1031    fn test_unclosed_frontmatter() {
1032        let markdown = r#"---
1033title: Test
1034author: Test Author
1035
1036Content without closing ---"#;
1037
1038        let result = decompose(markdown);
1039        assert!(result.is_err());
1040        assert!(result.unwrap_err().to_string().contains("not closed"));
1041    }
1042
1043    // Extended metadata tests
1044
1045    #[test]
1046    fn test_basic_tagged_block() {
1047        let markdown = r#"---
1048title: Main Document
1049---
1050
1051Main body content.
1052
1053---
1054SCOPE: items
1055name: Item 1
1056---
1057
1058Body of item 1."#;
1059
1060        let doc = decompose(markdown).unwrap();
1061
1062        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1063        assert_eq!(
1064            doc.get_field("title").unwrap().as_str().unwrap(),
1065            "Main Document"
1066        );
1067
1068        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1069        assert_eq!(items.len(), 1);
1070
1071        let item = items[0].as_object().unwrap();
1072        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1073        assert_eq!(
1074            item.get("body").unwrap().as_str().unwrap(),
1075            "\nBody of item 1."
1076        );
1077    }
1078
1079    #[test]
1080    fn test_multiple_tagged_blocks() {
1081        let markdown = r#"---
1082SCOPE: items
1083name: Item 1
1084tags: [a, b]
1085---
1086
1087First item body.
1088
1089---
1090SCOPE: items
1091name: Item 2
1092tags: [c, d]
1093---
1094
1095Second item body."#;
1096
1097        let doc = decompose(markdown).unwrap();
1098
1099        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1100        assert_eq!(items.len(), 2);
1101
1102        let item1 = items[0].as_object().unwrap();
1103        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1104
1105        let item2 = items[1].as_object().unwrap();
1106        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1107    }
1108
1109    #[test]
1110    fn test_mixed_global_and_tagged() {
1111        let markdown = r#"---
1112title: Global
1113author: John Doe
1114---
1115
1116Global body.
1117
1118---
1119SCOPE: sections
1120title: Section 1
1121---
1122
1123Section 1 content.
1124
1125---
1126SCOPE: sections
1127title: Section 2
1128---
1129
1130Section 2 content."#;
1131
1132        let doc = decompose(markdown).unwrap();
1133
1134        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1135        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1136
1137        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1138        assert_eq!(sections.len(), 2);
1139    }
1140
1141    #[test]
1142    fn test_empty_tagged_metadata() {
1143        let markdown = r#"---
1144SCOPE: items
1145---
1146
1147Body without metadata."#;
1148
1149        let doc = decompose(markdown).unwrap();
1150
1151        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1152        assert_eq!(items.len(), 1);
1153
1154        let item = items[0].as_object().unwrap();
1155        assert_eq!(
1156            item.get("body").unwrap().as_str().unwrap(),
1157            "\nBody without metadata."
1158        );
1159    }
1160
1161    #[test]
1162    fn test_tagged_block_without_body() {
1163        let markdown = r#"---
1164SCOPE: items
1165name: Item
1166---"#;
1167
1168        let doc = decompose(markdown).unwrap();
1169
1170        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1171        assert_eq!(items.len(), 1);
1172
1173        let item = items[0].as_object().unwrap();
1174        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1175    }
1176
1177    #[test]
1178    fn test_name_collision_global_and_tagged() {
1179        let markdown = r#"---
1180items: "global value"
1181---
1182
1183Body
1184
1185---
1186SCOPE: items
1187name: Item
1188---
1189
1190Item body"#;
1191
1192        let result = decompose(markdown);
1193        assert!(result.is_err());
1194        assert!(result.unwrap_err().to_string().contains("collision"));
1195    }
1196
1197    #[test]
1198    fn test_global_array_merged_with_scope() {
1199        // When global frontmatter has an array field with the same name as a SCOPE,
1200        // the SCOPE items should be added to the array
1201        let markdown = r#"---
1202items:
1203  - name: Global Item 1
1204    value: 100
1205  - name: Global Item 2
1206    value: 200
1207---
1208
1209Global body
1210
1211---
1212SCOPE: items
1213name: Scope Item 1
1214value: 300
1215---
1216
1217Scope item 1 body
1218
1219---
1220SCOPE: items
1221name: Scope Item 2
1222value: 400
1223---
1224
1225Scope item 2 body"#;
1226
1227        let doc = decompose(markdown).unwrap();
1228
1229        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1230        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1231        assert_eq!(items.len(), 4);
1232
1233        // Verify first two items (from global array)
1234        let item1 = items[0].as_object().unwrap();
1235        assert_eq!(
1236            item1.get("name").unwrap().as_str().unwrap(),
1237            "Global Item 1"
1238        );
1239        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1240
1241        let item2 = items[1].as_object().unwrap();
1242        assert_eq!(
1243            item2.get("name").unwrap().as_str().unwrap(),
1244            "Global Item 2"
1245        );
1246        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1247
1248        // Verify last two items (from SCOPE blocks)
1249        let item3 = items[2].as_object().unwrap();
1250        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1251        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1252        assert_eq!(
1253            item3.get("body").unwrap().as_str().unwrap(),
1254            "\nScope item 1 body\n\n"
1255        );
1256
1257        let item4 = items[3].as_object().unwrap();
1258        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1259        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1260        assert_eq!(
1261            item4.get("body").unwrap().as_str().unwrap(),
1262            "\nScope item 2 body"
1263        );
1264    }
1265
1266    #[test]
1267    fn test_empty_global_array_with_scope() {
1268        // Edge case: global frontmatter has an empty array
1269        let markdown = r#"---
1270items: []
1271---
1272
1273Global body
1274
1275---
1276SCOPE: items
1277name: Item 1
1278---
1279
1280Item 1 body"#;
1281
1282        let doc = decompose(markdown).unwrap();
1283
1284        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1285        assert_eq!(items.len(), 1);
1286
1287        let item = items[0].as_object().unwrap();
1288        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1289    }
1290
1291    #[test]
1292    fn test_reserved_field_name() {
1293        let markdown = r#"---
1294SCOPE: body
1295content: Test
1296---"#;
1297
1298        let result = decompose(markdown);
1299        assert!(result.is_err());
1300        assert!(result.unwrap_err().to_string().contains("reserved"));
1301    }
1302
1303    #[test]
1304    fn test_invalid_tag_syntax() {
1305        let markdown = r#"---
1306SCOPE: Invalid-Name
1307title: Test
1308---"#;
1309
1310        let result = decompose(markdown);
1311        assert!(result.is_err());
1312        assert!(result
1313            .unwrap_err()
1314            .to_string()
1315            .contains("Invalid field name"));
1316    }
1317
1318    #[test]
1319    fn test_multiple_global_frontmatter_blocks() {
1320        let markdown = r#"---
1321title: First
1322---
1323
1324Body
1325
1326---
1327author: Second
1328---
1329
1330More body"#;
1331
1332        let result = decompose(markdown);
1333        assert!(result.is_err());
1334        assert!(result
1335            .unwrap_err()
1336            .to_string()
1337            .contains("Multiple global frontmatter"));
1338    }
1339
1340    #[test]
1341    fn test_adjacent_blocks_different_tags() {
1342        let markdown = r#"---
1343SCOPE: items
1344name: Item 1
1345---
1346
1347Item 1 body
1348
1349---
1350SCOPE: sections
1351title: Section 1
1352---
1353
1354Section 1 body"#;
1355
1356        let doc = decompose(markdown).unwrap();
1357
1358        assert!(doc.get_field("items").is_some());
1359        assert!(doc.get_field("sections").is_some());
1360
1361        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1362        assert_eq!(items.len(), 1);
1363
1364        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1365        assert_eq!(sections.len(), 1);
1366    }
1367
1368    #[test]
1369    fn test_order_preservation() {
1370        let markdown = r#"---
1371SCOPE: items
1372id: 1
1373---
1374
1375First
1376
1377---
1378SCOPE: items
1379id: 2
1380---
1381
1382Second
1383
1384---
1385SCOPE: items
1386id: 3
1387---
1388
1389Third"#;
1390
1391        let doc = decompose(markdown).unwrap();
1392
1393        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1394        assert_eq!(items.len(), 3);
1395
1396        for (i, item) in items.iter().enumerate() {
1397            let mapping = item.as_object().unwrap();
1398            let id = mapping.get("id").unwrap().as_i64().unwrap();
1399            assert_eq!(id, (i + 1) as i64);
1400        }
1401    }
1402
1403    #[test]
1404    fn test_product_catalog_integration() {
1405        let markdown = r#"---
1406title: Product Catalog
1407author: John Doe
1408date: 2024-01-01
1409---
1410
1411This is the main catalog description.
1412
1413---
1414SCOPE: products
1415name: Widget A
1416price: 19.99
1417sku: WID-001
1418---
1419
1420The **Widget A** is our most popular product.
1421
1422---
1423SCOPE: products
1424name: Gadget B
1425price: 29.99
1426sku: GAD-002
1427---
1428
1429The **Gadget B** is perfect for professionals.
1430
1431---
1432SCOPE: reviews
1433product: Widget A
1434rating: 5
1435---
1436
1437"Excellent product! Highly recommended."
1438
1439---
1440SCOPE: reviews
1441product: Gadget B
1442rating: 4
1443---
1444
1445"Very good, but a bit pricey.""#;
1446
1447        let doc = decompose(markdown).unwrap();
1448
1449        // Verify global fields
1450        assert_eq!(
1451            doc.get_field("title").unwrap().as_str().unwrap(),
1452            "Product Catalog"
1453        );
1454        assert_eq!(
1455            doc.get_field("author").unwrap().as_str().unwrap(),
1456            "John Doe"
1457        );
1458        assert_eq!(
1459            doc.get_field("date").unwrap().as_str().unwrap(),
1460            "2024-01-01"
1461        );
1462
1463        // Verify global body
1464        assert!(doc.body().unwrap().contains("main catalog description"));
1465
1466        // Verify products collection
1467        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1468        assert_eq!(products.len(), 2);
1469
1470        let product1 = products[0].as_object().unwrap();
1471        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1472        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1473
1474        // Verify reviews collection
1475        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1476        assert_eq!(reviews.len(), 2);
1477
1478        let review1 = reviews[0].as_object().unwrap();
1479        assert_eq!(
1480            review1.get("product").unwrap().as_str().unwrap(),
1481            "Widget A"
1482        );
1483        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1484
1485        // Total fields: title, author, date, body, products, reviews = 6
1486        assert_eq!(doc.fields().len(), 6);
1487    }
1488
1489    #[test]
1490    fn taro_quill_directive() {
1491        let markdown = r#"---
1492QUILL: usaf_memo
1493memo_for: [ORG/SYMBOL]
1494memo_from: [ORG/SYMBOL]
1495---
1496
1497This is the memo body."#;
1498
1499        let doc = decompose(markdown).unwrap();
1500
1501        // Verify quill tag is set
1502        assert_eq!(doc.quill_tag(), "usaf_memo");
1503
1504        // Verify fields from quill block become frontmatter
1505        assert_eq!(
1506            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1507                .as_str()
1508                .unwrap(),
1509            "ORG/SYMBOL"
1510        );
1511
1512        // Verify body
1513        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1514    }
1515
1516    #[test]
1517    fn test_quill_with_scope_blocks() {
1518        let markdown = r#"---
1519QUILL: document
1520title: Test Document
1521---
1522
1523Main body.
1524
1525---
1526SCOPE: sections
1527name: Section 1
1528---
1529
1530Section 1 body."#;
1531
1532        let doc = decompose(markdown).unwrap();
1533
1534        // Verify quill tag
1535        assert_eq!(doc.quill_tag(), "document");
1536
1537        // Verify global field from quill block
1538        assert_eq!(
1539            doc.get_field("title").unwrap().as_str().unwrap(),
1540            "Test Document"
1541        );
1542
1543        // Verify scope blocks work
1544        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1545        assert_eq!(sections.len(), 1);
1546
1547        // Verify body
1548        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1549    }
1550
1551    #[test]
1552    fn test_multiple_quill_directives_error() {
1553        let markdown = r#"---
1554QUILL: first
1555---
1556
1557---
1558QUILL: second
1559---"#;
1560
1561        let result = decompose(markdown);
1562        assert!(result.is_err());
1563        assert!(result
1564            .unwrap_err()
1565            .to_string()
1566            .contains("Multiple quill directives"));
1567    }
1568
1569    #[test]
1570    fn test_invalid_quill_name() {
1571        let markdown = r#"---
1572QUILL: Invalid-Name
1573---"#;
1574
1575        let result = decompose(markdown);
1576        assert!(result.is_err());
1577        assert!(result
1578            .unwrap_err()
1579            .to_string()
1580            .contains("Invalid quill name"));
1581    }
1582
1583    #[test]
1584    fn test_quill_wrong_value_type() {
1585        let markdown = r#"---
1586QUILL: 123
1587---"#;
1588
1589        let result = decompose(markdown);
1590        assert!(result.is_err());
1591        assert!(result
1592            .unwrap_err()
1593            .to_string()
1594            .contains("QUILL value must be a string"));
1595    }
1596
1597    #[test]
1598    fn test_scope_wrong_value_type() {
1599        let markdown = r#"---
1600SCOPE: 123
1601---"#;
1602
1603        let result = decompose(markdown);
1604        assert!(result.is_err());
1605        assert!(result
1606            .unwrap_err()
1607            .to_string()
1608            .contains("SCOPE value must be a string"));
1609    }
1610
1611    #[test]
1612    fn test_both_quill_and_scope_error() {
1613        let markdown = r#"---
1614QUILL: test
1615SCOPE: items
1616---"#;
1617
1618        let result = decompose(markdown);
1619        assert!(result.is_err());
1620        assert!(result
1621            .unwrap_err()
1622            .to_string()
1623            .contains("Cannot specify both QUILL and SCOPE"));
1624    }
1625
1626    #[test]
1627    fn test_blank_lines_in_frontmatter() {
1628        // New parsing standard: blank lines are allowed within YAML blocks
1629        let markdown = r#"---
1630title: Test Document
1631author: Test Author
1632
1633description: This has a blank line above it
1634tags:
1635  - one
1636  - two
1637---
1638
1639# Hello World
1640
1641This is the body."#;
1642
1643        let doc = decompose(markdown).unwrap();
1644
1645        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1646        assert_eq!(
1647            doc.get_field("title").unwrap().as_str().unwrap(),
1648            "Test Document"
1649        );
1650        assert_eq!(
1651            doc.get_field("author").unwrap().as_str().unwrap(),
1652            "Test Author"
1653        );
1654        assert_eq!(
1655            doc.get_field("description").unwrap().as_str().unwrap(),
1656            "This has a blank line above it"
1657        );
1658
1659        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1660        assert_eq!(tags.len(), 2);
1661    }
1662
1663    #[test]
1664    fn test_blank_lines_in_scope_blocks() {
1665        // Blank lines should be allowed in SCOPE blocks too
1666        let markdown = r#"---
1667SCOPE: items
1668name: Item 1
1669
1670price: 19.99
1671
1672tags:
1673  - electronics
1674  - gadgets
1675---
1676
1677Body of item 1."#;
1678
1679        let doc = decompose(markdown).unwrap();
1680
1681        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1682        assert_eq!(items.len(), 1);
1683
1684        let item = items[0].as_object().unwrap();
1685        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1686        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1687
1688        let tags = item.get("tags").unwrap().as_array().unwrap();
1689        assert_eq!(tags.len(), 2);
1690    }
1691
1692    #[test]
1693    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1694        // Horizontal rule: blank lines both above AND below the ---
1695        let markdown = r#"---
1696title: Test
1697---
1698
1699First paragraph.
1700
1701---
1702
1703Second paragraph."#;
1704
1705        let doc = decompose(markdown).unwrap();
1706
1707        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1708
1709        // The body should contain the horizontal rule (---) as part of the content
1710        let body = doc.body().unwrap();
1711        assert!(body.contains("First paragraph."));
1712        assert!(body.contains("---"));
1713        assert!(body.contains("Second paragraph."));
1714    }
1715
1716    #[test]
1717    fn test_horizontal_rule_not_preceded_by_blank() {
1718        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1719        // It's also NOT a valid metadata block opening (since it's followed by blank)
1720        let markdown = r#"---
1721title: Test
1722---
1723
1724First paragraph.
1725---
1726
1727Second paragraph."#;
1728
1729        let doc = decompose(markdown).unwrap();
1730
1731        let body = doc.body().unwrap();
1732        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1733        assert!(body.contains("---"));
1734    }
1735
1736    #[test]
1737    fn test_multiple_blank_lines_in_yaml() {
1738        // Multiple blank lines should also be allowed
1739        let markdown = r#"---
1740title: Test
1741
1742
1743author: John Doe
1744
1745
1746version: 1.0
1747---
1748
1749Body content."#;
1750
1751        let doc = decompose(markdown).unwrap();
1752
1753        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1754        assert_eq!(
1755            doc.get_field("author").unwrap().as_str().unwrap(),
1756            "John Doe"
1757        );
1758        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1759    }
1760
1761    #[test]
1762    fn test_html_comment_interaction() {
1763        let markdown = r#"<!---
1764---> the rest of the page content
1765
1766---
1767key: value
1768---
1769"#;
1770        let doc = decompose(markdown).unwrap();
1771
1772        // The comment should be ignored (or at least not cause a parse error)
1773        // The frontmatter should be parsed
1774        let key = doc.get_field("key").and_then(|v| v.as_str());
1775        assert_eq!(key, Some("value"));
1776    }
1777}
1778#[cfg(test)]
1779mod demo_file_test {
1780    use super::*;
1781
1782    #[test]
1783    fn test_extended_metadata_demo_file() {
1784        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1785        let doc = decompose(markdown).unwrap();
1786
1787        // Verify global fields
1788        assert_eq!(
1789            doc.get_field("title").unwrap().as_str().unwrap(),
1790            "Extended Metadata Demo"
1791        );
1792        assert_eq!(
1793            doc.get_field("author").unwrap().as_str().unwrap(),
1794            "Quillmark Team"
1795        );
1796        // version is parsed as a number by YAML
1797        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1798
1799        // Verify body
1800        assert!(doc
1801            .body()
1802            .unwrap()
1803            .contains("extended YAML metadata standard"));
1804
1805        // Verify features collection
1806        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1807        assert_eq!(features.len(), 3);
1808
1809        // Verify use_cases collection
1810        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1811        assert_eq!(use_cases.len(), 2);
1812
1813        // Check first feature
1814        let feature1 = features[0].as_object().unwrap();
1815        assert_eq!(
1816            feature1.get("name").unwrap().as_str().unwrap(),
1817            "Tag Directives"
1818        );
1819    }
1820
1821    #[test]
1822    fn test_input_size_limit() {
1823        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1824        let size = crate::error::MAX_INPUT_SIZE + 1;
1825        let large_markdown = "a".repeat(size);
1826
1827        let result = decompose(&large_markdown);
1828        assert!(result.is_err());
1829
1830        let err_msg = result.unwrap_err().to_string();
1831        assert!(err_msg.contains("Input too large"));
1832    }
1833
1834    #[test]
1835    fn test_yaml_size_limit() {
1836        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1837        let mut markdown = String::from("---\n");
1838
1839        // Create a very large YAML field
1840        let size = crate::error::MAX_YAML_SIZE + 1;
1841        markdown.push_str("data: \"");
1842        markdown.push_str(&"x".repeat(size));
1843        markdown.push_str("\"\n---\n\nBody");
1844
1845        let result = decompose(&markdown);
1846        assert!(result.is_err());
1847
1848        let err_msg = result.unwrap_err().to_string();
1849        assert!(err_msg.contains("YAML block too large"));
1850    }
1851
1852    #[test]
1853    fn test_input_within_size_limit() {
1854        // Create markdown just under the limit
1855        let size = 1000; // Much smaller than limit
1856        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1857
1858        let result = decompose(&markdown);
1859        assert!(result.is_ok());
1860    }
1861
1862    #[test]
1863    fn test_yaml_within_size_limit() {
1864        // Create YAML block well within the limit
1865        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1866
1867        let result = decompose(&markdown);
1868        assert!(result.is_ok());
1869    }
1870
1871    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1872    // Guillemet conversion now happens in process_plate, not during parsing
1873    #[test]
1874    fn test_chevrons_preserved_in_body_no_frontmatter() {
1875        let markdown = "Use <<raw content>> here.";
1876        let doc = decompose(markdown).unwrap();
1877
1878        // Body should preserve chevrons (conversion happens later in process_plate)
1879        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1880    }
1881
1882    #[test]
1883    fn test_chevrons_preserved_in_body_with_frontmatter() {
1884        let markdown = r#"---
1885title: Test
1886---
1887
1888Use <<raw content>> here."#;
1889        let doc = decompose(markdown).unwrap();
1890
1891        // Body should preserve chevrons
1892        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1893    }
1894
1895    #[test]
1896    fn test_chevrons_preserved_in_yaml_string() {
1897        let markdown = r#"---
1898title: Test <<with chevrons>>
1899---
1900
1901Body content."#;
1902        let doc = decompose(markdown).unwrap();
1903
1904        // YAML string values should preserve chevrons
1905        assert_eq!(
1906            doc.get_field("title").unwrap().as_str().unwrap(),
1907            "Test <<with chevrons>>"
1908        );
1909    }
1910
1911    #[test]
1912    fn test_chevrons_preserved_in_yaml_array() {
1913        let markdown = r#"---
1914items:
1915  - "<<first>>"
1916  - "<<second>>"
1917---
1918
1919Body."#;
1920        let doc = decompose(markdown).unwrap();
1921
1922        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1923        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1924        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1925    }
1926
1927    #[test]
1928    fn test_chevrons_preserved_in_yaml_nested() {
1929        let markdown = r#"---
1930metadata:
1931  description: "<<nested value>>"
1932---
1933
1934Body."#;
1935        let doc = decompose(markdown).unwrap();
1936
1937        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1938        assert_eq!(
1939            metadata.get("description").unwrap().as_str().unwrap(),
1940            "<<nested value>>"
1941        );
1942    }
1943
1944    #[test]
1945    fn test_chevrons_preserved_in_code_blocks() {
1946        let markdown = r#"```
1947<<in code block>>
1948```
1949
1950<<outside code block>>"#;
1951        let doc = decompose(markdown).unwrap();
1952
1953        let body = doc.body().unwrap();
1954        // All chevrons should be preserved (no conversion during parsing)
1955        assert!(body.contains("<<in code block>>"));
1956        assert!(body.contains("<<outside code block>>"));
1957    }
1958
1959    #[test]
1960    fn test_chevrons_preserved_in_inline_code() {
1961        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1962        let doc = decompose(markdown).unwrap();
1963
1964        let body = doc.body().unwrap();
1965        // All chevrons should be preserved
1966        assert!(body.contains("`<<in inline code>>`"));
1967        assert!(body.contains("<<outside inline code>>"));
1968    }
1969
1970    #[test]
1971    fn test_chevrons_preserved_in_tagged_block_body() {
1972        let markdown = r#"---
1973title: Main
1974---
1975
1976Main body.
1977
1978---
1979SCOPE: items
1980name: Item 1
1981---
1982
1983Use <<raw>> here."#;
1984        let doc = decompose(markdown).unwrap();
1985
1986        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1987        let item = items[0].as_object().unwrap();
1988        let item_body = item.get("body").unwrap().as_str().unwrap();
1989        // Tagged block body should preserve chevrons
1990        assert!(item_body.contains("<<raw>>"));
1991    }
1992
1993    #[test]
1994    fn test_chevrons_preserved_in_tagged_block_yaml() {
1995        let markdown = r#"---
1996title: Main
1997---
1998
1999Main body.
2000
2001---
2002SCOPE: items
2003description: "<<tagged yaml>>"
2004---
2005
2006Item body."#;
2007        let doc = decompose(markdown).unwrap();
2008
2009        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2010        let item = items[0].as_object().unwrap();
2011        // Tagged block YAML should preserve chevrons
2012        assert_eq!(
2013            item.get("description").unwrap().as_str().unwrap(),
2014            "<<tagged yaml>>"
2015        );
2016    }
2017
2018    #[test]
2019    fn test_yaml_numbers_not_affected() {
2020        // Numbers should not be affected
2021        let markdown = r#"---
2022count: 42
2023---
2024
2025Body."#;
2026        let doc = decompose(markdown).unwrap();
2027        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2028    }
2029
2030    #[test]
2031    fn test_yaml_booleans_not_affected() {
2032        // Booleans should not be affected
2033        let markdown = r#"---
2034active: true
2035---
2036
2037Body."#;
2038        let doc = decompose(markdown).unwrap();
2039        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2040    }
2041
2042    #[test]
2043    fn test_multiline_chevrons_preserved() {
2044        // Multiline chevrons should be preserved as-is
2045        let markdown = "<<text\nacross lines>>";
2046        let doc = decompose(markdown).unwrap();
2047
2048        let body = doc.body().unwrap();
2049        // Should contain the original chevrons
2050        assert!(body.contains("<<text"));
2051        assert!(body.contains("across lines>>"));
2052    }
2053
2054    #[test]
2055    fn test_unmatched_chevrons_preserved() {
2056        let markdown = "<<unmatched";
2057        let doc = decompose(markdown).unwrap();
2058
2059        let body = doc.body().unwrap();
2060        // Unmatched should remain as-is
2061        assert_eq!(body, "<<unmatched");
2062    }
2063}
2064
2065// Additional robustness tests
2066#[cfg(test)]
2067mod robustness_tests {
2068    use super::*;
2069
2070    // Edge cases for delimiter handling
2071
2072    #[test]
2073    fn test_empty_document() {
2074        let doc = decompose("").unwrap();
2075        assert_eq!(doc.body(), Some(""));
2076        assert_eq!(doc.quill_tag(), "__default__");
2077    }
2078
2079    #[test]
2080    fn test_only_whitespace() {
2081        let doc = decompose("   \n\n   \t").unwrap();
2082        assert_eq!(doc.body(), Some("   \n\n   \t"));
2083    }
2084
2085    #[test]
2086    fn test_only_dashes() {
2087        // Just "---" at document start without newline is not treated as frontmatter opener
2088        // (requires "---\n" to start a frontmatter block)
2089        let result = decompose("---");
2090        // This is NOT an error - "---" alone without newline is just body content
2091        assert!(result.is_ok());
2092        assert_eq!(result.unwrap().body(), Some("---"));
2093    }
2094
2095    #[test]
2096    fn test_dashes_in_middle_of_line() {
2097        // --- not at start of line should not be treated as delimiter
2098        let markdown = "some text --- more text";
2099        let doc = decompose(markdown).unwrap();
2100        assert_eq!(doc.body(), Some("some text --- more text"));
2101    }
2102
2103    #[test]
2104    fn test_four_dashes() {
2105        // ---- is not a valid delimiter
2106        let markdown = "----\ntitle: Test\n----\n\nBody";
2107        let doc = decompose(markdown).unwrap();
2108        // Should treat entire content as body
2109        assert!(doc.body().unwrap().contains("----"));
2110    }
2111
2112    #[test]
2113    fn test_crlf_line_endings() {
2114        // Windows-style line endings
2115        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2116        let doc = decompose(markdown).unwrap();
2117        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2118        assert!(doc.body().unwrap().contains("Body content."));
2119    }
2120
2121    #[test]
2122    fn test_mixed_line_endings() {
2123        // Mix of \n and \r\n
2124        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2125        let doc = decompose(markdown).unwrap();
2126        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2127    }
2128
2129    #[test]
2130    fn test_frontmatter_at_eof_no_trailing_newline() {
2131        // Frontmatter closed at EOF without trailing newline
2132        let markdown = "---\ntitle: Test\n---";
2133        let doc = decompose(markdown).unwrap();
2134        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2135        assert_eq!(doc.body(), Some(""));
2136    }
2137
2138    #[test]
2139    fn test_empty_frontmatter() {
2140        // Empty frontmatter block - requires content between delimiters
2141        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2142        // is treated as horizontal rule logic, not empty frontmatter
2143        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2144        let markdown = "---\n \n---\n\nBody content.";
2145        let doc = decompose(markdown).unwrap();
2146        assert!(doc.body().unwrap().contains("Body content."));
2147        // Should only have body field
2148        assert_eq!(doc.fields().len(), 1);
2149    }
2150
2151    #[test]
2152    fn test_whitespace_only_frontmatter() {
2153        // Frontmatter with only whitespace
2154        let markdown = "---\n   \n\n   \n---\n\nBody.";
2155        let doc = decompose(markdown).unwrap();
2156        assert!(doc.body().unwrap().contains("Body."));
2157    }
2158
2159    // Unicode handling
2160
2161    #[test]
2162    fn test_unicode_in_yaml_keys() {
2163        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2164        let doc = decompose(markdown).unwrap();
2165        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2166        assert_eq!(
2167            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2168            "こんにちは"
2169        );
2170    }
2171
2172    #[test]
2173    fn test_unicode_in_yaml_values() {
2174        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2175        let doc = decompose(markdown).unwrap();
2176        assert_eq!(
2177            doc.get_field("title").unwrap().as_str().unwrap(),
2178            "你好世界 🎉"
2179        );
2180    }
2181
2182    #[test]
2183    fn test_unicode_in_body() {
2184        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2185        let doc = decompose(markdown).unwrap();
2186        assert!(doc.body().unwrap().contains("日本語テキスト"));
2187        assert!(doc.body().unwrap().contains("🚀"));
2188    }
2189
2190    // YAML edge cases
2191
2192    #[test]
2193    fn test_yaml_multiline_string() {
2194        let markdown = r#"---
2195description: |
2196  This is a
2197  multiline string
2198  with preserved newlines.
2199---
2200
2201Body."#;
2202        let doc = decompose(markdown).unwrap();
2203        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2204        assert!(desc.contains("multiline string"));
2205        assert!(desc.contains('\n'));
2206    }
2207
2208    #[test]
2209    fn test_yaml_folded_string() {
2210        let markdown = r#"---
2211description: >
2212  This is a folded
2213  string that becomes
2214  a single line.
2215---
2216
2217Body."#;
2218        let doc = decompose(markdown).unwrap();
2219        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2220        // Folded strings join lines with spaces
2221        assert!(desc.contains("folded"));
2222    }
2223
2224    #[test]
2225    fn test_yaml_null_value() {
2226        let markdown = "---\noptional: null\n---\n\nBody.";
2227        let doc = decompose(markdown).unwrap();
2228        assert!(doc.get_field("optional").unwrap().is_null());
2229    }
2230
2231    #[test]
2232    fn test_yaml_empty_string_value() {
2233        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2234        let doc = decompose(markdown).unwrap();
2235        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2236    }
2237
2238    #[test]
2239    fn test_yaml_special_characters_in_string() {
2240        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2241        let doc = decompose(markdown).unwrap();
2242        assert_eq!(
2243            doc.get_field("special").unwrap().as_str().unwrap(),
2244            "colon: here, and [brackets]"
2245        );
2246    }
2247
2248    #[test]
2249    fn test_yaml_nested_objects() {
2250        let markdown = r#"---
2251config:
2252  database:
2253    host: localhost
2254    port: 5432
2255  cache:
2256    enabled: true
2257---
2258
2259Body."#;
2260        let doc = decompose(markdown).unwrap();
2261        let config = doc.get_field("config").unwrap().as_object().unwrap();
2262        let db = config.get("database").unwrap().as_object().unwrap();
2263        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2264        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2265    }
2266
2267    // SCOPE block edge cases
2268
2269    #[test]
2270    fn test_scope_with_empty_body() {
2271        let markdown = r#"---
2272SCOPE: items
2273name: Item
2274---"#;
2275        let doc = decompose(markdown).unwrap();
2276        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2277        assert_eq!(items.len(), 1);
2278        let item = items[0].as_object().unwrap();
2279        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2280    }
2281
2282    #[test]
2283    fn test_scope_consecutive_blocks() {
2284        let markdown = r#"---
2285SCOPE: a
2286id: 1
2287---
2288---
2289SCOPE: a
2290id: 2
2291---"#;
2292        let doc = decompose(markdown).unwrap();
2293        let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2294        assert_eq!(items.len(), 2);
2295    }
2296
2297    #[test]
2298    fn test_scope_with_body_containing_dashes() {
2299        let markdown = r#"---
2300SCOPE: items
2301name: Item
2302---
2303
2304Some text with --- dashes in it."#;
2305        let doc = decompose(markdown).unwrap();
2306        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2307        let item = items[0].as_object().unwrap();
2308        let body = item.get("body").unwrap().as_str().unwrap();
2309        assert!(body.contains("--- dashes"));
2310    }
2311
2312    // QUILL directive edge cases
2313
2314    #[test]
2315    fn test_quill_with_underscore_prefix() {
2316        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2317        let doc = decompose(markdown).unwrap();
2318        assert_eq!(doc.quill_tag(), "_internal");
2319    }
2320
2321    #[test]
2322    fn test_quill_with_numbers() {
2323        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2324        let doc = decompose(markdown).unwrap();
2325        assert_eq!(doc.quill_tag(), "form_8_v2");
2326    }
2327
2328    #[test]
2329    fn test_quill_with_additional_fields() {
2330        let markdown = r#"---
2331QUILL: my_quill
2332title: Document Title
2333author: John Doe
2334---
2335
2336Body content."#;
2337        let doc = decompose(markdown).unwrap();
2338        assert_eq!(doc.quill_tag(), "my_quill");
2339        assert_eq!(
2340            doc.get_field("title").unwrap().as_str().unwrap(),
2341            "Document Title"
2342        );
2343        assert_eq!(
2344            doc.get_field("author").unwrap().as_str().unwrap(),
2345            "John Doe"
2346        );
2347    }
2348
2349    // Error handling
2350
2351    #[test]
2352    fn test_invalid_scope_name_uppercase() {
2353        let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2354        let result = decompose(markdown);
2355        assert!(result.is_err());
2356        assert!(result
2357            .unwrap_err()
2358            .to_string()
2359            .contains("Invalid field name"));
2360    }
2361
2362    #[test]
2363    fn test_invalid_scope_name_starts_with_number() {
2364        let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2365        let result = decompose(markdown);
2366        assert!(result.is_err());
2367    }
2368
2369    #[test]
2370    fn test_invalid_scope_name_with_hyphen() {
2371        let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2372        let result = decompose(markdown);
2373        assert!(result.is_err());
2374    }
2375
2376    #[test]
2377    fn test_invalid_quill_name_uppercase() {
2378        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2379        let result = decompose(markdown);
2380        assert!(result.is_err());
2381    }
2382
2383    #[test]
2384    fn test_yaml_syntax_error_missing_colon() {
2385        let markdown = "---\ntitle Test\n---\n\nBody.";
2386        let result = decompose(markdown);
2387        assert!(result.is_err());
2388    }
2389
2390    #[test]
2391    fn test_yaml_syntax_error_bad_indentation() {
2392        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2393        let result = decompose(markdown);
2394        // Bad indentation may or may not be an error depending on YAML parser
2395        // Just ensure it doesn't panic
2396        let _ = result;
2397    }
2398
2399    // Body extraction edge cases
2400
2401    #[test]
2402    fn test_body_with_leading_newlines() {
2403        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2404        let doc = decompose(markdown).unwrap();
2405        // Body should preserve leading newlines after frontmatter
2406        assert!(doc.body().unwrap().starts_with('\n'));
2407    }
2408
2409    #[test]
2410    fn test_body_with_trailing_newlines() {
2411        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2412        let doc = decompose(markdown).unwrap();
2413        // Body should preserve trailing newlines
2414        assert!(doc.body().unwrap().ends_with('\n'));
2415    }
2416
2417    #[test]
2418    fn test_no_body_after_frontmatter() {
2419        let markdown = "---\ntitle: Test\n---";
2420        let doc = decompose(markdown).unwrap();
2421        assert_eq!(doc.body(), Some(""));
2422    }
2423
2424    // Tag name validation
2425
2426    #[test]
2427    fn test_valid_tag_name_single_underscore() {
2428        assert!(is_valid_tag_name("_"));
2429    }
2430
2431    #[test]
2432    fn test_valid_tag_name_underscore_prefix() {
2433        assert!(is_valid_tag_name("_private"));
2434    }
2435
2436    #[test]
2437    fn test_valid_tag_name_with_numbers() {
2438        assert!(is_valid_tag_name("item1"));
2439        assert!(is_valid_tag_name("item_2"));
2440    }
2441
2442    #[test]
2443    fn test_invalid_tag_name_empty() {
2444        assert!(!is_valid_tag_name(""));
2445    }
2446
2447    #[test]
2448    fn test_invalid_tag_name_starts_with_number() {
2449        assert!(!is_valid_tag_name("1item"));
2450    }
2451
2452    #[test]
2453    fn test_invalid_tag_name_uppercase() {
2454        assert!(!is_valid_tag_name("Items"));
2455        assert!(!is_valid_tag_name("ITEMS"));
2456    }
2457
2458    #[test]
2459    fn test_invalid_tag_name_special_chars() {
2460        assert!(!is_valid_tag_name("my-items"));
2461        assert!(!is_valid_tag_name("my.items"));
2462        assert!(!is_valid_tag_name("my items"));
2463    }
2464
2465    // Guillemet preprocessing in YAML
2466
2467    #[test]
2468    fn test_guillemet_in_yaml_preserves_non_strings() {
2469        let markdown = r#"---
2470count: 42
2471price: 19.99
2472active: true
2473items:
2474  - first
2475  - 100
2476  - true
2477---
2478
2479Body."#;
2480        let doc = decompose(markdown).unwrap();
2481        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2482        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2483        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2484    }
2485
2486    #[test]
2487    fn test_guillemet_double_conversion_prevention() {
2488        // Ensure «» in input doesn't get double-processed
2489        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2490        let doc = decompose(markdown).unwrap();
2491        // Should remain as-is (not double-escaped)
2492        assert_eq!(
2493            doc.get_field("title").unwrap().as_str().unwrap(),
2494            "Already «converted»"
2495        );
2496    }
2497}