quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::guillemet::{preprocess_guillemets, preprocess_markdown_guillemets};
52use crate::value::QuillValue;
53
54/// The field name used to store the document body
55pub const BODY_FIELD: &str = "body";
56
57/// Recursively preprocesses guillemets in YAML values
58///
59/// Converts `<<text>>` to `«text»` in all string values within the YAML structure.
60/// For non-string values (numbers, booleans, null), they are passed through unchanged.
61/// For sequences and mappings, the function recurses into their elements.
62fn preprocess_yaml_guillemets(value: serde_yaml::Value) -> serde_yaml::Value {
63    match value {
64        serde_yaml::Value::String(s) => serde_yaml::Value::String(preprocess_guillemets(&s)),
65        serde_yaml::Value::Sequence(seq) => {
66            serde_yaml::Value::Sequence(seq.into_iter().map(preprocess_yaml_guillemets).collect())
67        }
68        serde_yaml::Value::Mapping(map) => {
69            let new_map: serde_yaml::Mapping = map
70                .into_iter()
71                .map(|(k, v)| (k, preprocess_yaml_guillemets(v)))
72                .collect();
73            serde_yaml::Value::Mapping(new_map)
74        }
75        // Pass through other types unchanged (numbers, booleans, null, tagged)
76        other => other,
77    }
78}
79
80/// Reserved tag name for quill specification
81pub const QUILL_TAG: &str = "quill";
82
83/// A parsed markdown document with frontmatter
84#[derive(Debug, Clone)]
85pub struct ParsedDocument {
86    fields: HashMap<String, QuillValue>,
87    quill_tag: String,
88}
89
90impl ParsedDocument {
91    /// Create a new ParsedDocument with the given fields
92    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
93        Self {
94            fields,
95            quill_tag: "__default__".to_string(),
96        }
97    }
98
99    /// Create a ParsedDocument from fields and quill tag
100    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
101        Self { fields, quill_tag }
102    }
103
104    /// Create a ParsedDocument from markdown string
105    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
106        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
107    }
108
109    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
110    pub fn quill_tag(&self) -> &str {
111        &self.quill_tag
112    }
113
114    /// Get the document body
115    pub fn body(&self) -> Option<&str> {
116        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
117    }
118
119    /// Get a specific field
120    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
121        self.fields.get(name)
122    }
123
124    /// Get all fields (including body)
125    pub fn fields(&self) -> &HashMap<String, QuillValue> {
126        &self.fields
127    }
128
129    /// Create a new ParsedDocument with default values applied
130    ///
131    /// This method creates a new ParsedDocument with default values applied for any
132    /// fields that are missing from the original document but have defaults specified.
133    /// Existing fields are preserved and not overwritten.
134    ///
135    /// # Arguments
136    ///
137    /// * `defaults` - A HashMap of field names to their default QuillValues
138    ///
139    /// # Returns
140    ///
141    /// A new ParsedDocument with defaults applied for missing fields
142    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
143        let mut fields = self.fields.clone();
144
145        for (field_name, default_value) in defaults {
146            // Only apply default if field is missing
147            if !fields.contains_key(field_name) {
148                fields.insert(field_name.clone(), default_value.clone());
149            }
150        }
151
152        Self {
153            fields,
154            quill_tag: self.quill_tag.clone(),
155        }
156    }
157
158    /// Create a new ParsedDocument with coerced field values
159    ///
160    /// This method applies type coercions to field values based on the schema.
161    /// Coercions include:
162    /// - Singular values to arrays when schema expects array
163    /// - String "true"/"false" to boolean
164    /// - Numbers to boolean (0=false, non-zero=true)
165    /// - String numbers to number type
166    /// - Boolean to number (true=1, false=0)
167    ///
168    /// # Arguments
169    ///
170    /// * `schema` - A JSON Schema object defining expected field types
171    ///
172    /// # Returns
173    ///
174    /// A new ParsedDocument with coerced field values
175    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
176        use crate::schema::coerce_document;
177
178        let coerced_fields = coerce_document(schema, &self.fields);
179
180        Self {
181            fields: coerced_fields,
182            quill_tag: self.quill_tag.clone(),
183        }
184    }
185}
186
187#[derive(Debug)]
188struct MetadataBlock {
189    start: usize,                          // Position of opening "---"
190    end: usize,                            // Position after closing "---\n"
191    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
192    tag: Option<String>,                   // Field name from SCOPE key
193    quill_name: Option<String>,            // Quill name from QUILL key
194}
195
196/// Validate tag name follows pattern [a-z_][a-z0-9_]*
197fn is_valid_tag_name(name: &str) -> bool {
198    if name.is_empty() {
199        return false;
200    }
201
202    let mut chars = name.chars();
203    let first = chars.next().unwrap();
204
205    if !first.is_ascii_lowercase() && first != '_' {
206        return false;
207    }
208
209    for ch in chars {
210        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
211            return false;
212        }
213    }
214
215    true
216}
217
218/// Find all metadata blocks in the document
219fn find_metadata_blocks(
220    markdown: &str,
221) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
222    let mut blocks = Vec::new();
223    let mut pos = 0;
224
225    while pos < markdown.len() {
226        // Look for opening "---\n" or "---\r\n"
227        let search_str = &markdown[pos..];
228        let delimiter_result = if let Some(p) = search_str.find("---\n") {
229            Some((p, 4, "\n"))
230        } else if let Some(p) = search_str.find("---\r\n") {
231            Some((p, 5, "\r\n"))
232        } else {
233            None
234        };
235
236        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
237            let abs_pos = pos + delimiter_pos;
238
239            // Check if the delimiter is at the start of a line
240            let is_start_of_line = if abs_pos == 0 {
241                true
242            } else {
243                let char_before = markdown.as_bytes()[abs_pos - 1];
244                char_before == b'\n' || char_before == b'\r'
245            };
246
247            if !is_start_of_line {
248                pos = abs_pos + 1;
249                continue;
250            }
251
252            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
253
254            // Check if this --- is a horizontal rule (blank lines above AND below)
255            let preceded_by_blank = if abs_pos > 0 {
256                // Check if there's a blank line before the ---
257                let before = &markdown[..abs_pos];
258                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
259            } else {
260                false
261            };
262
263            let followed_by_blank = if content_start < markdown.len() {
264                markdown[content_start..].starts_with('\n')
265                    || markdown[content_start..].starts_with("\r\n")
266            } else {
267                false
268            };
269
270            // Horizontal rule: blank lines both above and below
271            if preceded_by_blank && followed_by_blank {
272                // This is a horizontal rule in the body, skip it
273                pos = abs_pos + 3; // Skip past "---"
274                continue;
275            }
276
277            // Check if followed by non-blank line (or if we're at document start)
278            // This starts a metadata block
279            if followed_by_blank {
280                // --- followed by blank line but NOT preceded by blank line
281                // This is NOT a metadata block opening, skip it
282                pos = abs_pos + 3;
283                continue;
284            }
285
286            // Found potential metadata block opening (followed by non-blank line)
287            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
288            let rest = &markdown[content_start..];
289
290            // First try to find delimiters with trailing newlines
291            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
292            let closing_with_newline = closing_patterns
293                .iter()
294                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
295                .min_by_key(|(p, _)| *p);
296
297            // Also check for closing at end of document (no trailing newline)
298            let closing_at_eof = ["\n---", "\r\n---"]
299                .iter()
300                .filter_map(|delim| {
301                    rest.find(delim).and_then(|p| {
302                        if p + delim.len() == rest.len() {
303                            Some((p, delim.len()))
304                        } else {
305                            None
306                        }
307                    })
308                })
309                .min_by_key(|(p, _)| *p);
310
311            let closing_result = match (closing_with_newline, closing_at_eof) {
312                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
313                (Some(_), Some(_)) => closing_with_newline,
314                (Some(_), None) => closing_with_newline,
315                (None, Some(_)) => closing_at_eof,
316                (None, None) => None,
317            };
318
319            if let Some((closing_pos, closing_len)) = closing_result {
320                let abs_closing_pos = content_start + closing_pos;
321                let content = &markdown[content_start..abs_closing_pos];
322
323                // Check YAML size limit
324                if content.len() > crate::error::MAX_YAML_SIZE {
325                    return Err(format!(
326                        "YAML block too large: {} bytes (max: {} bytes)",
327                        content.len(),
328                        crate::error::MAX_YAML_SIZE
329                    )
330                    .into());
331                }
332
333                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
334                // First, try to parse as YAML
335                let (tag, quill_name, yaml_value) = if !content.is_empty() {
336                    // Try to parse the YAML to check for reserved keys
337                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
338                        Ok(parsed_yaml) => {
339                            if let Some(mapping) = parsed_yaml.as_mapping() {
340                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
341                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
342
343                                let has_quill = mapping.contains_key(&quill_key);
344                                let has_scope = mapping.contains_key(&scope_key);
345
346                                if has_quill && has_scope {
347                                    return Err(
348                                        "Cannot specify both QUILL and SCOPE in the same block"
349                                            .into(),
350                                    );
351                                }
352
353                                if has_quill {
354                                    // Extract quill name
355                                    let quill_value = mapping.get(&quill_key).unwrap();
356                                    let quill_name_str = quill_value
357                                        .as_str()
358                                        .ok_or_else(|| "QUILL value must be a string")?;
359
360                                    if !is_valid_tag_name(quill_name_str) {
361                                        return Err(format!(
362                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
363                                            quill_name_str
364                                        )
365                                        .into());
366                                    }
367
368                                    // Remove QUILL from the YAML value for processing
369                                    let mut new_mapping = mapping.clone();
370                                    new_mapping.remove(&quill_key);
371                                    let new_value = if new_mapping.is_empty() {
372                                        None
373                                    } else {
374                                        Some(serde_yaml::Value::Mapping(new_mapping))
375                                    };
376
377                                    (None, Some(quill_name_str.to_string()), new_value)
378                                } else if has_scope {
379                                    // Extract scope field name
380                                    let scope_value = mapping.get(&scope_key).unwrap();
381                                    let field_name = scope_value
382                                        .as_str()
383                                        .ok_or_else(|| "SCOPE value must be a string")?;
384
385                                    if !is_valid_tag_name(field_name) {
386                                        return Err(format!(
387                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
388                                            field_name
389                                        )
390                                        .into());
391                                    }
392
393                                    if field_name == BODY_FIELD {
394                                        return Err(format!(
395                                            "Cannot use reserved field name '{}' as SCOPE value",
396                                            BODY_FIELD
397                                        )
398                                        .into());
399                                    }
400
401                                    // Remove SCOPE from the YAML value for processing
402                                    let mut new_mapping = mapping.clone();
403                                    new_mapping.remove(&scope_key);
404                                    let new_value = if new_mapping.is_empty() {
405                                        None
406                                    } else {
407                                        Some(serde_yaml::Value::Mapping(new_mapping))
408                                    };
409
410                                    (Some(field_name.to_string()), None, new_value)
411                                } else {
412                                    // No reserved keys, keep the parsed YAML
413                                    (None, None, Some(parsed_yaml))
414                                }
415                            } else {
416                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
417                                (None, None, Some(parsed_yaml))
418                            }
419                        }
420                        Err(e) => {
421                            // YAML parsing failed - return error with context
422                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
423                        }
424                    }
425                } else {
426                    // Empty content
427                    (None, None, None)
428                };
429
430                blocks.push(MetadataBlock {
431                    start: abs_pos,
432                    end: abs_closing_pos + closing_len, // After closing delimiter
433                    yaml_value,
434                    tag,
435                    quill_name,
436                });
437
438                pos = abs_closing_pos + closing_len;
439            } else if abs_pos == 0 {
440                // Frontmatter started but not closed
441                return Err("Frontmatter started but not closed with ---".into());
442            } else {
443                // Not a valid metadata block, skip this position
444                pos = abs_pos + 3;
445            }
446        } else {
447            break;
448        }
449    }
450
451    Ok(blocks)
452}
453
454/// Decompose markdown into frontmatter fields and body
455fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
456    // Check input size limit
457    if markdown.len() > crate::error::MAX_INPUT_SIZE {
458        return Err(format!(
459            "Input too large: {} bytes (max: {} bytes)",
460            markdown.len(),
461            crate::error::MAX_INPUT_SIZE
462        )
463        .into());
464    }
465
466    let mut fields = HashMap::new();
467
468    // Find all metadata blocks
469    let blocks = find_metadata_blocks(markdown)?;
470
471    if blocks.is_empty() {
472        // No metadata blocks, entire content is body
473        // Preprocess guillemets in markdown body
474        let preprocessed_body = preprocess_markdown_guillemets(markdown);
475        fields.insert(
476            BODY_FIELD.to_string(),
477            QuillValue::from_json(serde_json::Value::String(preprocessed_body)),
478        );
479        return Ok(ParsedDocument::new(fields));
480    }
481
482    // Track which attributes are used for tagged blocks
483    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
484    let mut has_global_frontmatter = false;
485    let mut global_frontmatter_index: Option<usize> = None;
486    let mut quill_name: Option<String> = None;
487
488    // First pass: identify global frontmatter, quill directive, and validate
489    for (idx, block) in blocks.iter().enumerate() {
490        // Check for quill directive
491        if let Some(ref name) = block.quill_name {
492            if quill_name.is_some() {
493                return Err("Multiple quill directives found: only one allowed".into());
494            }
495            quill_name = Some(name.clone());
496        }
497
498        // Check for global frontmatter (no tag and no quill directive)
499        if block.tag.is_none() && block.quill_name.is_none() {
500            if has_global_frontmatter {
501                return Err(
502                    "Multiple global frontmatter blocks found: only one untagged block allowed"
503                        .into(),
504                );
505            }
506            has_global_frontmatter = true;
507            global_frontmatter_index = Some(idx);
508        }
509    }
510
511    // Parse global frontmatter if present
512    if let Some(idx) = global_frontmatter_index {
513        let block = &blocks[idx];
514
515        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
516        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
517            Some(serde_yaml::Value::Mapping(mapping)) => mapping
518                .iter()
519                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
520                .collect(),
521            Some(serde_yaml::Value::Null) => {
522                // Null value (from whitespace-only YAML) - treat as empty mapping
523                HashMap::new()
524            }
525            Some(_) => {
526                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
527                return Err("Invalid YAML frontmatter: expected a mapping".into());
528            }
529            None => HashMap::new(),
530        };
531
532        // Check that all tagged blocks don't conflict with global fields
533        // Exception: if the global field is an array, allow it (we'll merge later)
534        for other_block in &blocks {
535            if let Some(ref tag) = other_block.tag {
536                if let Some(global_value) = yaml_fields.get(tag) {
537                    // Check if the global value is an array
538                    if global_value.as_sequence().is_none() {
539                        return Err(format!(
540                            "Name collision: global field '{}' conflicts with tagged attribute",
541                            tag
542                        )
543                        .into());
544                    }
545                }
546            }
547        }
548
549        // Convert YAML values to QuillValue at boundary
550        // Preprocess guillemets in all string values
551        for (key, value) in yaml_fields {
552            let preprocessed = preprocess_yaml_guillemets(value);
553            fields.insert(key, QuillValue::from_yaml(preprocessed)?);
554        }
555    }
556
557    // Process blocks with quill directives
558    for block in &blocks {
559        if block.quill_name.is_some() {
560            // Quill directive blocks can have YAML content (becomes part of frontmatter)
561            if let Some(ref yaml_val) = block.yaml_value {
562                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
563                    serde_yaml::Value::Mapping(mapping) => mapping
564                        .iter()
565                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
566                        .collect(),
567                    serde_yaml::Value::Null => {
568                        // Null value (from whitespace-only YAML) - treat as empty mapping
569                        HashMap::new()
570                    }
571                    _ => {
572                        return Err("Invalid YAML in quill block: expected a mapping".into());
573                    }
574                };
575
576                // Check for conflicts with existing fields
577                for key in yaml_fields.keys() {
578                    if fields.contains_key(key) {
579                        return Err(format!(
580                            "Name collision: quill block field '{}' conflicts with existing field",
581                            key
582                        )
583                        .into());
584                    }
585                }
586
587                // Convert YAML values to QuillValue at boundary
588                // Preprocess guillemets in all string values
589                for (key, value) in yaml_fields {
590                    let preprocessed = preprocess_yaml_guillemets(value);
591                    fields.insert(key, QuillValue::from_yaml(preprocessed)?);
592                }
593            }
594        }
595    }
596
597    // Parse tagged blocks
598    for (idx, block) in blocks.iter().enumerate() {
599        if let Some(ref tag_name) = block.tag {
600            // Check if this conflicts with global fields
601            // Exception: if the global field is an array, allow it (we'll merge later)
602            if let Some(existing_value) = fields.get(tag_name) {
603                if existing_value.as_array().is_none() {
604                    return Err(format!(
605                        "Name collision: tagged attribute '{}' conflicts with global field",
606                        tag_name
607                    )
608                    .into());
609                }
610            }
611
612            // Get YAML metadata directly (already parsed in find_metadata_blocks)
613            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
614                Some(serde_yaml::Value::Mapping(mapping)) => mapping
615                    .iter()
616                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
617                    .collect(),
618                Some(serde_yaml::Value::Null) => {
619                    // Null value (from whitespace-only YAML) - treat as empty mapping
620                    HashMap::new()
621                }
622                Some(_) => {
623                    return Err(format!(
624                        "Invalid YAML in tagged block '{}': expected a mapping",
625                        tag_name
626                    )
627                    .into());
628                }
629                None => HashMap::new(),
630            };
631
632            // Extract body for this tagged block
633            let body_start = block.end;
634            let body_end = if idx + 1 < blocks.len() {
635                blocks[idx + 1].start
636            } else {
637                markdown.len()
638            };
639            let body = &markdown[body_start..body_end];
640
641            // Preprocess guillemets in the tagged block body (markdown-aware)
642            let preprocessed_body = preprocess_markdown_guillemets(body);
643
644            // Add preprocessed body to item fields
645            item_fields.insert(
646                BODY_FIELD.to_string(),
647                serde_yaml::Value::String(preprocessed_body),
648            );
649
650            // Preprocess guillemets in YAML string values
651            let preprocessed_fields: HashMap<String, serde_yaml::Value> = item_fields
652                .into_iter()
653                .map(|(k, v)| (k, preprocess_yaml_guillemets(v)))
654                .collect();
655
656            // Convert HashMap to serde_yaml::Value::Mapping
657            let item_value = serde_yaml::to_value(preprocessed_fields)?;
658
659            // Add to collection
660            tagged_attributes
661                .entry(tag_name.clone())
662                .or_insert_with(Vec::new)
663                .push(item_value);
664        }
665    }
666
667    // Extract global body
668    // Body starts after global frontmatter or quill block (whichever comes first)
669    // Body ends at the first scope block or EOF
670    let first_non_scope_block_idx = blocks
671        .iter()
672        .position(|b| b.tag.is_none() && b.quill_name.is_none())
673        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
674
675    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
676        // Body starts after the first non-scope block (global frontmatter or quill)
677        let start = blocks[idx].end;
678
679        // Body ends at the first scope block after this, or EOF
680        let end = blocks
681            .iter()
682            .skip(idx + 1)
683            .find(|b| b.tag.is_some())
684            .map(|b| b.start)
685            .unwrap_or(markdown.len());
686
687        (start, end)
688    } else {
689        // No global frontmatter or quill block - body is everything before the first scope block
690        let end = blocks
691            .iter()
692            .find(|b| b.tag.is_some())
693            .map(|b| b.start)
694            .unwrap_or(0);
695
696        (0, end)
697    };
698
699    let global_body = &markdown[body_start..body_end];
700
701    // Preprocess guillemets in markdown body
702    let preprocessed_global_body = preprocess_markdown_guillemets(global_body);
703
704    fields.insert(
705        BODY_FIELD.to_string(),
706        QuillValue::from_json(serde_json::Value::String(preprocessed_global_body)),
707    );
708
709    // Add all tagged collections to fields (convert to QuillValue)
710    // If a field already exists and is an array, merge the new items into it
711    for (tag_name, items) in tagged_attributes {
712        if let Some(existing_value) = fields.get(&tag_name) {
713            // The existing value must be an array (checked earlier)
714            if let Some(existing_array) = existing_value.as_array() {
715                // Convert new items from YAML to JSON
716                // Note: guillemets in items were already preprocessed when the items were created
717                let new_items_json: Vec<serde_json::Value> = items
718                    .into_iter()
719                    .map(|yaml_val| {
720                        serde_json::to_value(&yaml_val)
721                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
722                    })
723                    .collect::<Result<Vec<_>, _>>()?;
724
725                // Combine existing and new items
726                let mut merged_array = existing_array.clone();
727                merged_array.extend(new_items_json);
728
729                // Create QuillValue from merged JSON array
730                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
731                fields.insert(tag_name, quill_value);
732            } else {
733                // This should not happen due to earlier validation, but handle it gracefully
734                return Err(format!(
735                    "Internal error: field '{}' exists but is not an array",
736                    tag_name
737                )
738                .into());
739            }
740        } else {
741            // No existing field, just create a new sequence
742            // Note: guillemets in items were already preprocessed when the items were created
743            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
744            fields.insert(tag_name, quill_value);
745        }
746    }
747
748    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
749    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
750
751    Ok(parsed)
752}
753
754#[cfg(test)]
755mod tests {
756    use super::*;
757
758    #[test]
759    fn test_no_frontmatter() {
760        let markdown = "# Hello World\n\nThis is a test.";
761        let doc = decompose(markdown).unwrap();
762
763        assert_eq!(doc.body(), Some(markdown));
764        assert_eq!(doc.fields().len(), 1);
765        // Verify default quill tag is set
766        assert_eq!(doc.quill_tag(), "__default__");
767    }
768
769    #[test]
770    fn test_with_frontmatter() {
771        let markdown = r#"---
772title: Test Document
773author: Test Author
774---
775
776# Hello World
777
778This is the body."#;
779
780        let doc = decompose(markdown).unwrap();
781
782        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
783        assert_eq!(
784            doc.get_field("title").unwrap().as_str().unwrap(),
785            "Test Document"
786        );
787        assert_eq!(
788            doc.get_field("author").unwrap().as_str().unwrap(),
789            "Test Author"
790        );
791        assert_eq!(doc.fields().len(), 3); // title, author, body
792                                           // Verify default quill tag is set when no QUILL directive
793        assert_eq!(doc.quill_tag(), "__default__");
794    }
795
796    #[test]
797    fn test_complex_yaml_frontmatter() {
798        let markdown = r#"---
799title: Complex Document
800tags:
801  - test
802  - yaml
803metadata:
804  version: 1.0
805  nested:
806    field: value
807---
808
809Content here."#;
810
811        let doc = decompose(markdown).unwrap();
812
813        assert_eq!(doc.body(), Some("\nContent here."));
814        assert_eq!(
815            doc.get_field("title").unwrap().as_str().unwrap(),
816            "Complex Document"
817        );
818
819        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
820        assert_eq!(tags.len(), 2);
821        assert_eq!(tags[0].as_str().unwrap(), "test");
822        assert_eq!(tags[1].as_str().unwrap(), "yaml");
823    }
824
825    #[test]
826    fn test_with_defaults_empty_document() {
827        use std::collections::HashMap;
828
829        let mut defaults = HashMap::new();
830        defaults.insert(
831            "status".to_string(),
832            QuillValue::from_json(serde_json::json!("draft")),
833        );
834        defaults.insert(
835            "version".to_string(),
836            QuillValue::from_json(serde_json::json!(1)),
837        );
838
839        // Create an empty parsed document
840        let doc = ParsedDocument::new(HashMap::new());
841        let doc_with_defaults = doc.with_defaults(&defaults);
842
843        // Check that defaults were applied
844        assert_eq!(
845            doc_with_defaults
846                .get_field("status")
847                .unwrap()
848                .as_str()
849                .unwrap(),
850            "draft"
851        );
852        assert_eq!(
853            doc_with_defaults
854                .get_field("version")
855                .unwrap()
856                .as_number()
857                .unwrap()
858                .as_i64()
859                .unwrap(),
860            1
861        );
862    }
863
864    #[test]
865    fn test_with_defaults_preserves_existing_values() {
866        use std::collections::HashMap;
867
868        let mut defaults = HashMap::new();
869        defaults.insert(
870            "status".to_string(),
871            QuillValue::from_json(serde_json::json!("draft")),
872        );
873
874        // Create document with existing status
875        let mut fields = HashMap::new();
876        fields.insert(
877            "status".to_string(),
878            QuillValue::from_json(serde_json::json!("published")),
879        );
880        let doc = ParsedDocument::new(fields);
881
882        let doc_with_defaults = doc.with_defaults(&defaults);
883
884        // Existing value should be preserved
885        assert_eq!(
886            doc_with_defaults
887                .get_field("status")
888                .unwrap()
889                .as_str()
890                .unwrap(),
891            "published"
892        );
893    }
894
895    #[test]
896    fn test_with_defaults_partial_application() {
897        use std::collections::HashMap;
898
899        let mut defaults = HashMap::new();
900        defaults.insert(
901            "status".to_string(),
902            QuillValue::from_json(serde_json::json!("draft")),
903        );
904        defaults.insert(
905            "version".to_string(),
906            QuillValue::from_json(serde_json::json!(1)),
907        );
908
909        // Create document with only one field
910        let mut fields = HashMap::new();
911        fields.insert(
912            "status".to_string(),
913            QuillValue::from_json(serde_json::json!("published")),
914        );
915        let doc = ParsedDocument::new(fields);
916
917        let doc_with_defaults = doc.with_defaults(&defaults);
918
919        // Existing field preserved, missing field gets default
920        assert_eq!(
921            doc_with_defaults
922                .get_field("status")
923                .unwrap()
924                .as_str()
925                .unwrap(),
926            "published"
927        );
928        assert_eq!(
929            doc_with_defaults
930                .get_field("version")
931                .unwrap()
932                .as_number()
933                .unwrap()
934                .as_i64()
935                .unwrap(),
936            1
937        );
938    }
939
940    #[test]
941    fn test_with_defaults_no_defaults() {
942        use std::collections::HashMap;
943
944        let defaults = HashMap::new(); // Empty defaults map
945
946        let doc = ParsedDocument::new(HashMap::new());
947        let doc_with_defaults = doc.with_defaults(&defaults);
948
949        // No defaults should be applied
950        assert!(doc_with_defaults.fields().is_empty());
951    }
952
953    #[test]
954    fn test_with_defaults_complex_types() {
955        use std::collections::HashMap;
956
957        let mut defaults = HashMap::new();
958        defaults.insert(
959            "tags".to_string(),
960            QuillValue::from_json(serde_json::json!(["default", "tag"])),
961        );
962
963        let doc = ParsedDocument::new(HashMap::new());
964        let doc_with_defaults = doc.with_defaults(&defaults);
965
966        // Complex default value should be applied
967        let tags = doc_with_defaults
968            .get_field("tags")
969            .unwrap()
970            .as_sequence()
971            .unwrap();
972        assert_eq!(tags.len(), 2);
973        assert_eq!(tags[0].as_str().unwrap(), "default");
974        assert_eq!(tags[1].as_str().unwrap(), "tag");
975    }
976
977    #[test]
978    fn test_with_coercion_singular_to_array() {
979        use std::collections::HashMap;
980
981        let schema = QuillValue::from_json(serde_json::json!({
982            "$schema": "https://json-schema.org/draft/2019-09/schema",
983            "type": "object",
984            "properties": {
985                "tags": {"type": "array"}
986            }
987        }));
988
989        let mut fields = HashMap::new();
990        fields.insert(
991            "tags".to_string(),
992            QuillValue::from_json(serde_json::json!("single-tag")),
993        );
994        let doc = ParsedDocument::new(fields);
995
996        let coerced_doc = doc.with_coercion(&schema);
997
998        let tags = coerced_doc.get_field("tags").unwrap();
999        assert!(tags.as_array().is_some());
1000        let tags_array = tags.as_array().unwrap();
1001        assert_eq!(tags_array.len(), 1);
1002        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
1003    }
1004
1005    #[test]
1006    fn test_with_coercion_string_to_boolean() {
1007        use std::collections::HashMap;
1008
1009        let schema = QuillValue::from_json(serde_json::json!({
1010            "$schema": "https://json-schema.org/draft/2019-09/schema",
1011            "type": "object",
1012            "properties": {
1013                "active": {"type": "boolean"}
1014            }
1015        }));
1016
1017        let mut fields = HashMap::new();
1018        fields.insert(
1019            "active".to_string(),
1020            QuillValue::from_json(serde_json::json!("true")),
1021        );
1022        let doc = ParsedDocument::new(fields);
1023
1024        let coerced_doc = doc.with_coercion(&schema);
1025
1026        assert_eq!(
1027            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
1028            true
1029        );
1030    }
1031
1032    #[test]
1033    fn test_with_coercion_string_to_number() {
1034        use std::collections::HashMap;
1035
1036        let schema = QuillValue::from_json(serde_json::json!({
1037            "$schema": "https://json-schema.org/draft/2019-09/schema",
1038            "type": "object",
1039            "properties": {
1040                "count": {"type": "number"}
1041            }
1042        }));
1043
1044        let mut fields = HashMap::new();
1045        fields.insert(
1046            "count".to_string(),
1047            QuillValue::from_json(serde_json::json!("42")),
1048        );
1049        let doc = ParsedDocument::new(fields);
1050
1051        let coerced_doc = doc.with_coercion(&schema);
1052
1053        assert_eq!(
1054            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1055            42
1056        );
1057    }
1058
1059    #[test]
1060    fn test_invalid_yaml() {
1061        let markdown = r#"---
1062title: [invalid yaml
1063author: missing close bracket
1064---
1065
1066Content here."#;
1067
1068        let result = decompose(markdown);
1069        assert!(result.is_err());
1070        assert!(result
1071            .unwrap_err()
1072            .to_string()
1073            .contains("Invalid YAML frontmatter"));
1074    }
1075
1076    #[test]
1077    fn test_unclosed_frontmatter() {
1078        let markdown = r#"---
1079title: Test
1080author: Test Author
1081
1082Content without closing ---"#;
1083
1084        let result = decompose(markdown);
1085        assert!(result.is_err());
1086        assert!(result.unwrap_err().to_string().contains("not closed"));
1087    }
1088
1089    // Extended metadata tests
1090
1091    #[test]
1092    fn test_basic_tagged_block() {
1093        let markdown = r#"---
1094title: Main Document
1095---
1096
1097Main body content.
1098
1099---
1100SCOPE: items
1101name: Item 1
1102---
1103
1104Body of item 1."#;
1105
1106        let doc = decompose(markdown).unwrap();
1107
1108        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1109        assert_eq!(
1110            doc.get_field("title").unwrap().as_str().unwrap(),
1111            "Main Document"
1112        );
1113
1114        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1115        assert_eq!(items.len(), 1);
1116
1117        let item = items[0].as_object().unwrap();
1118        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1119        assert_eq!(
1120            item.get("body").unwrap().as_str().unwrap(),
1121            "\nBody of item 1."
1122        );
1123    }
1124
1125    #[test]
1126    fn test_multiple_tagged_blocks() {
1127        let markdown = r#"---
1128SCOPE: items
1129name: Item 1
1130tags: [a, b]
1131---
1132
1133First item body.
1134
1135---
1136SCOPE: items
1137name: Item 2
1138tags: [c, d]
1139---
1140
1141Second item body."#;
1142
1143        let doc = decompose(markdown).unwrap();
1144
1145        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1146        assert_eq!(items.len(), 2);
1147
1148        let item1 = items[0].as_object().unwrap();
1149        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1150
1151        let item2 = items[1].as_object().unwrap();
1152        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1153    }
1154
1155    #[test]
1156    fn test_mixed_global_and_tagged() {
1157        let markdown = r#"---
1158title: Global
1159author: John Doe
1160---
1161
1162Global body.
1163
1164---
1165SCOPE: sections
1166title: Section 1
1167---
1168
1169Section 1 content.
1170
1171---
1172SCOPE: sections
1173title: Section 2
1174---
1175
1176Section 2 content."#;
1177
1178        let doc = decompose(markdown).unwrap();
1179
1180        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1181        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1182
1183        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1184        assert_eq!(sections.len(), 2);
1185    }
1186
1187    #[test]
1188    fn test_empty_tagged_metadata() {
1189        let markdown = r#"---
1190SCOPE: items
1191---
1192
1193Body without metadata."#;
1194
1195        let doc = decompose(markdown).unwrap();
1196
1197        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1198        assert_eq!(items.len(), 1);
1199
1200        let item = items[0].as_object().unwrap();
1201        assert_eq!(
1202            item.get("body").unwrap().as_str().unwrap(),
1203            "\nBody without metadata."
1204        );
1205    }
1206
1207    #[test]
1208    fn test_tagged_block_without_body() {
1209        let markdown = r#"---
1210SCOPE: items
1211name: Item
1212---"#;
1213
1214        let doc = decompose(markdown).unwrap();
1215
1216        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1217        assert_eq!(items.len(), 1);
1218
1219        let item = items[0].as_object().unwrap();
1220        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1221    }
1222
1223    #[test]
1224    fn test_name_collision_global_and_tagged() {
1225        let markdown = r#"---
1226items: "global value"
1227---
1228
1229Body
1230
1231---
1232SCOPE: items
1233name: Item
1234---
1235
1236Item body"#;
1237
1238        let result = decompose(markdown);
1239        assert!(result.is_err());
1240        assert!(result.unwrap_err().to_string().contains("collision"));
1241    }
1242
1243    #[test]
1244    fn test_global_array_merged_with_scope() {
1245        // When global frontmatter has an array field with the same name as a SCOPE,
1246        // the SCOPE items should be added to the array
1247        let markdown = r#"---
1248items:
1249  - name: Global Item 1
1250    value: 100
1251  - name: Global Item 2
1252    value: 200
1253---
1254
1255Global body
1256
1257---
1258SCOPE: items
1259name: Scope Item 1
1260value: 300
1261---
1262
1263Scope item 1 body
1264
1265---
1266SCOPE: items
1267name: Scope Item 2
1268value: 400
1269---
1270
1271Scope item 2 body"#;
1272
1273        let doc = decompose(markdown).unwrap();
1274
1275        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1276        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1277        assert_eq!(items.len(), 4);
1278
1279        // Verify first two items (from global array)
1280        let item1 = items[0].as_object().unwrap();
1281        assert_eq!(
1282            item1.get("name").unwrap().as_str().unwrap(),
1283            "Global Item 1"
1284        );
1285        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1286
1287        let item2 = items[1].as_object().unwrap();
1288        assert_eq!(
1289            item2.get("name").unwrap().as_str().unwrap(),
1290            "Global Item 2"
1291        );
1292        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1293
1294        // Verify last two items (from SCOPE blocks)
1295        let item3 = items[2].as_object().unwrap();
1296        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1297        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1298        assert_eq!(
1299            item3.get("body").unwrap().as_str().unwrap(),
1300            "\nScope item 1 body\n\n"
1301        );
1302
1303        let item4 = items[3].as_object().unwrap();
1304        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1305        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1306        assert_eq!(
1307            item4.get("body").unwrap().as_str().unwrap(),
1308            "\nScope item 2 body"
1309        );
1310    }
1311
1312    #[test]
1313    fn test_empty_global_array_with_scope() {
1314        // Edge case: global frontmatter has an empty array
1315        let markdown = r#"---
1316items: []
1317---
1318
1319Global body
1320
1321---
1322SCOPE: items
1323name: Item 1
1324---
1325
1326Item 1 body"#;
1327
1328        let doc = decompose(markdown).unwrap();
1329
1330        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1331        assert_eq!(items.len(), 1);
1332
1333        let item = items[0].as_object().unwrap();
1334        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1335    }
1336
1337    #[test]
1338    fn test_reserved_field_name() {
1339        let markdown = r#"---
1340SCOPE: body
1341content: Test
1342---"#;
1343
1344        let result = decompose(markdown);
1345        assert!(result.is_err());
1346        assert!(result.unwrap_err().to_string().contains("reserved"));
1347    }
1348
1349    #[test]
1350    fn test_invalid_tag_syntax() {
1351        let markdown = r#"---
1352SCOPE: Invalid-Name
1353title: Test
1354---"#;
1355
1356        let result = decompose(markdown);
1357        assert!(result.is_err());
1358        assert!(result
1359            .unwrap_err()
1360            .to_string()
1361            .contains("Invalid field name"));
1362    }
1363
1364    #[test]
1365    fn test_multiple_global_frontmatter_blocks() {
1366        let markdown = r#"---
1367title: First
1368---
1369
1370Body
1371
1372---
1373author: Second
1374---
1375
1376More body"#;
1377
1378        let result = decompose(markdown);
1379        assert!(result.is_err());
1380        assert!(result
1381            .unwrap_err()
1382            .to_string()
1383            .contains("Multiple global frontmatter"));
1384    }
1385
1386    #[test]
1387    fn test_adjacent_blocks_different_tags() {
1388        let markdown = r#"---
1389SCOPE: items
1390name: Item 1
1391---
1392
1393Item 1 body
1394
1395---
1396SCOPE: sections
1397title: Section 1
1398---
1399
1400Section 1 body"#;
1401
1402        let doc = decompose(markdown).unwrap();
1403
1404        assert!(doc.get_field("items").is_some());
1405        assert!(doc.get_field("sections").is_some());
1406
1407        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1408        assert_eq!(items.len(), 1);
1409
1410        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1411        assert_eq!(sections.len(), 1);
1412    }
1413
1414    #[test]
1415    fn test_order_preservation() {
1416        let markdown = r#"---
1417SCOPE: items
1418id: 1
1419---
1420
1421First
1422
1423---
1424SCOPE: items
1425id: 2
1426---
1427
1428Second
1429
1430---
1431SCOPE: items
1432id: 3
1433---
1434
1435Third"#;
1436
1437        let doc = decompose(markdown).unwrap();
1438
1439        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1440        assert_eq!(items.len(), 3);
1441
1442        for (i, item) in items.iter().enumerate() {
1443            let mapping = item.as_object().unwrap();
1444            let id = mapping.get("id").unwrap().as_i64().unwrap();
1445            assert_eq!(id, (i + 1) as i64);
1446        }
1447    }
1448
1449    #[test]
1450    fn test_product_catalog_integration() {
1451        let markdown = r#"---
1452title: Product Catalog
1453author: John Doe
1454date: 2024-01-01
1455---
1456
1457This is the main catalog description.
1458
1459---
1460SCOPE: products
1461name: Widget A
1462price: 19.99
1463sku: WID-001
1464---
1465
1466The **Widget A** is our most popular product.
1467
1468---
1469SCOPE: products
1470name: Gadget B
1471price: 29.99
1472sku: GAD-002
1473---
1474
1475The **Gadget B** is perfect for professionals.
1476
1477---
1478SCOPE: reviews
1479product: Widget A
1480rating: 5
1481---
1482
1483"Excellent product! Highly recommended."
1484
1485---
1486SCOPE: reviews
1487product: Gadget B
1488rating: 4
1489---
1490
1491"Very good, but a bit pricey.""#;
1492
1493        let doc = decompose(markdown).unwrap();
1494
1495        // Verify global fields
1496        assert_eq!(
1497            doc.get_field("title").unwrap().as_str().unwrap(),
1498            "Product Catalog"
1499        );
1500        assert_eq!(
1501            doc.get_field("author").unwrap().as_str().unwrap(),
1502            "John Doe"
1503        );
1504        assert_eq!(
1505            doc.get_field("date").unwrap().as_str().unwrap(),
1506            "2024-01-01"
1507        );
1508
1509        // Verify global body
1510        assert!(doc.body().unwrap().contains("main catalog description"));
1511
1512        // Verify products collection
1513        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1514        assert_eq!(products.len(), 2);
1515
1516        let product1 = products[0].as_object().unwrap();
1517        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1518        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1519
1520        // Verify reviews collection
1521        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1522        assert_eq!(reviews.len(), 2);
1523
1524        let review1 = reviews[0].as_object().unwrap();
1525        assert_eq!(
1526            review1.get("product").unwrap().as_str().unwrap(),
1527            "Widget A"
1528        );
1529        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1530
1531        // Total fields: title, author, date, body, products, reviews = 6
1532        assert_eq!(doc.fields().len(), 6);
1533    }
1534
1535    #[test]
1536    fn taro_quill_directive() {
1537        let markdown = r#"---
1538QUILL: usaf_memo
1539memo_for: [ORG/SYMBOL]
1540memo_from: [ORG/SYMBOL]
1541---
1542
1543This is the memo body."#;
1544
1545        let doc = decompose(markdown).unwrap();
1546
1547        // Verify quill tag is set
1548        assert_eq!(doc.quill_tag(), "usaf_memo");
1549
1550        // Verify fields from quill block become frontmatter
1551        assert_eq!(
1552            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1553                .as_str()
1554                .unwrap(),
1555            "ORG/SYMBOL"
1556        );
1557
1558        // Verify body
1559        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1560    }
1561
1562    #[test]
1563    fn test_quill_with_scope_blocks() {
1564        let markdown = r#"---
1565QUILL: document
1566title: Test Document
1567---
1568
1569Main body.
1570
1571---
1572SCOPE: sections
1573name: Section 1
1574---
1575
1576Section 1 body."#;
1577
1578        let doc = decompose(markdown).unwrap();
1579
1580        // Verify quill tag
1581        assert_eq!(doc.quill_tag(), "document");
1582
1583        // Verify global field from quill block
1584        assert_eq!(
1585            doc.get_field("title").unwrap().as_str().unwrap(),
1586            "Test Document"
1587        );
1588
1589        // Verify scope blocks work
1590        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1591        assert_eq!(sections.len(), 1);
1592
1593        // Verify body
1594        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1595    }
1596
1597    #[test]
1598    fn test_multiple_quill_directives_error() {
1599        let markdown = r#"---
1600QUILL: first
1601---
1602
1603---
1604QUILL: second
1605---"#;
1606
1607        let result = decompose(markdown);
1608        assert!(result.is_err());
1609        assert!(result
1610            .unwrap_err()
1611            .to_string()
1612            .contains("Multiple quill directives"));
1613    }
1614
1615    #[test]
1616    fn test_invalid_quill_name() {
1617        let markdown = r#"---
1618QUILL: Invalid-Name
1619---"#;
1620
1621        let result = decompose(markdown);
1622        assert!(result.is_err());
1623        assert!(result
1624            .unwrap_err()
1625            .to_string()
1626            .contains("Invalid quill name"));
1627    }
1628
1629    #[test]
1630    fn test_quill_wrong_value_type() {
1631        let markdown = r#"---
1632QUILL: 123
1633---"#;
1634
1635        let result = decompose(markdown);
1636        assert!(result.is_err());
1637        assert!(result
1638            .unwrap_err()
1639            .to_string()
1640            .contains("QUILL value must be a string"));
1641    }
1642
1643    #[test]
1644    fn test_scope_wrong_value_type() {
1645        let markdown = r#"---
1646SCOPE: 123
1647---"#;
1648
1649        let result = decompose(markdown);
1650        assert!(result.is_err());
1651        assert!(result
1652            .unwrap_err()
1653            .to_string()
1654            .contains("SCOPE value must be a string"));
1655    }
1656
1657    #[test]
1658    fn test_both_quill_and_scope_error() {
1659        let markdown = r#"---
1660QUILL: test
1661SCOPE: items
1662---"#;
1663
1664        let result = decompose(markdown);
1665        assert!(result.is_err());
1666        assert!(result
1667            .unwrap_err()
1668            .to_string()
1669            .contains("Cannot specify both QUILL and SCOPE"));
1670    }
1671
1672    #[test]
1673    fn test_blank_lines_in_frontmatter() {
1674        // New parsing standard: blank lines are allowed within YAML blocks
1675        let markdown = r#"---
1676title: Test Document
1677author: Test Author
1678
1679description: This has a blank line above it
1680tags:
1681  - one
1682  - two
1683---
1684
1685# Hello World
1686
1687This is the body."#;
1688
1689        let doc = decompose(markdown).unwrap();
1690
1691        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1692        assert_eq!(
1693            doc.get_field("title").unwrap().as_str().unwrap(),
1694            "Test Document"
1695        );
1696        assert_eq!(
1697            doc.get_field("author").unwrap().as_str().unwrap(),
1698            "Test Author"
1699        );
1700        assert_eq!(
1701            doc.get_field("description").unwrap().as_str().unwrap(),
1702            "This has a blank line above it"
1703        );
1704
1705        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1706        assert_eq!(tags.len(), 2);
1707    }
1708
1709    #[test]
1710    fn test_blank_lines_in_scope_blocks() {
1711        // Blank lines should be allowed in SCOPE blocks too
1712        let markdown = r#"---
1713SCOPE: items
1714name: Item 1
1715
1716price: 19.99
1717
1718tags:
1719  - electronics
1720  - gadgets
1721---
1722
1723Body of item 1."#;
1724
1725        let doc = decompose(markdown).unwrap();
1726
1727        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1728        assert_eq!(items.len(), 1);
1729
1730        let item = items[0].as_object().unwrap();
1731        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1732        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1733
1734        let tags = item.get("tags").unwrap().as_array().unwrap();
1735        assert_eq!(tags.len(), 2);
1736    }
1737
1738    #[test]
1739    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1740        // Horizontal rule: blank lines both above AND below the ---
1741        let markdown = r#"---
1742title: Test
1743---
1744
1745First paragraph.
1746
1747---
1748
1749Second paragraph."#;
1750
1751        let doc = decompose(markdown).unwrap();
1752
1753        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1754
1755        // The body should contain the horizontal rule (---) as part of the content
1756        let body = doc.body().unwrap();
1757        assert!(body.contains("First paragraph."));
1758        assert!(body.contains("---"));
1759        assert!(body.contains("Second paragraph."));
1760    }
1761
1762    #[test]
1763    fn test_horizontal_rule_not_preceded_by_blank() {
1764        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1765        // It's also NOT a valid metadata block opening (since it's followed by blank)
1766        let markdown = r#"---
1767title: Test
1768---
1769
1770First paragraph.
1771---
1772
1773Second paragraph."#;
1774
1775        let doc = decompose(markdown).unwrap();
1776
1777        let body = doc.body().unwrap();
1778        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1779        assert!(body.contains("---"));
1780    }
1781
1782    #[test]
1783    fn test_multiple_blank_lines_in_yaml() {
1784        // Multiple blank lines should also be allowed
1785        let markdown = r#"---
1786title: Test
1787
1788
1789author: John Doe
1790
1791
1792version: 1.0
1793---
1794
1795Body content."#;
1796
1797        let doc = decompose(markdown).unwrap();
1798
1799        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1800        assert_eq!(
1801            doc.get_field("author").unwrap().as_str().unwrap(),
1802            "John Doe"
1803        );
1804        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1805    }
1806
1807    #[test]
1808    fn test_html_comment_interaction() {
1809        let markdown = r#"<!---
1810---> the rest of the page content
1811
1812---
1813key: value
1814---
1815"#;
1816        let doc = decompose(markdown).unwrap();
1817
1818        // The comment should be ignored (or at least not cause a parse error)
1819        // The frontmatter should be parsed
1820        let key = doc.get_field("key").and_then(|v| v.as_str());
1821        assert_eq!(key, Some("value"));
1822    }
1823}
1824#[cfg(test)]
1825mod demo_file_test {
1826    use super::*;
1827
1828    #[test]
1829    fn test_extended_metadata_demo_file() {
1830        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1831        let doc = decompose(markdown).unwrap();
1832
1833        // Verify global fields
1834        assert_eq!(
1835            doc.get_field("title").unwrap().as_str().unwrap(),
1836            "Extended Metadata Demo"
1837        );
1838        assert_eq!(
1839            doc.get_field("author").unwrap().as_str().unwrap(),
1840            "Quillmark Team"
1841        );
1842        // version is parsed as a number by YAML
1843        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1844
1845        // Verify body
1846        assert!(doc
1847            .body()
1848            .unwrap()
1849            .contains("extended YAML metadata standard"));
1850
1851        // Verify features collection
1852        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1853        assert_eq!(features.len(), 3);
1854
1855        // Verify use_cases collection
1856        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1857        assert_eq!(use_cases.len(), 2);
1858
1859        // Check first feature
1860        let feature1 = features[0].as_object().unwrap();
1861        assert_eq!(
1862            feature1.get("name").unwrap().as_str().unwrap(),
1863            "Tag Directives"
1864        );
1865    }
1866
1867    #[test]
1868    fn test_input_size_limit() {
1869        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1870        let size = crate::error::MAX_INPUT_SIZE + 1;
1871        let large_markdown = "a".repeat(size);
1872
1873        let result = decompose(&large_markdown);
1874        assert!(result.is_err());
1875
1876        let err_msg = result.unwrap_err().to_string();
1877        assert!(err_msg.contains("Input too large"));
1878    }
1879
1880    #[test]
1881    fn test_yaml_size_limit() {
1882        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1883        let mut markdown = String::from("---\n");
1884
1885        // Create a very large YAML field
1886        let size = crate::error::MAX_YAML_SIZE + 1;
1887        markdown.push_str("data: \"");
1888        markdown.push_str(&"x".repeat(size));
1889        markdown.push_str("\"\n---\n\nBody");
1890
1891        let result = decompose(&markdown);
1892        assert!(result.is_err());
1893
1894        let err_msg = result.unwrap_err().to_string();
1895        assert!(err_msg.contains("YAML block too large"));
1896    }
1897
1898    #[test]
1899    fn test_input_within_size_limit() {
1900        // Create markdown just under the limit
1901        let size = 1000; // Much smaller than limit
1902        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1903
1904        let result = decompose(&markdown);
1905        assert!(result.is_ok());
1906    }
1907
1908    #[test]
1909    fn test_yaml_within_size_limit() {
1910        // Create YAML block well within the limit
1911        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1912
1913        let result = decompose(&markdown);
1914        assert!(result.is_ok());
1915    }
1916
1917    // Tests for guillemet preprocessing in parsing
1918    #[test]
1919    fn test_guillemet_in_body_no_frontmatter() {
1920        let markdown = "Use <<raw content>> here.";
1921        let doc = decompose(markdown).unwrap();
1922
1923        // Body should have guillemets converted
1924        assert_eq!(doc.body(), Some("Use «raw content» here."));
1925    }
1926
1927    #[test]
1928    fn test_guillemet_in_body_with_frontmatter() {
1929        let markdown = r#"---
1930title: Test
1931---
1932
1933Use <<raw content>> here."#;
1934        let doc = decompose(markdown).unwrap();
1935
1936        // Body should have guillemets converted
1937        assert_eq!(doc.body(), Some("\nUse «raw content» here."));
1938    }
1939
1940    #[test]
1941    fn test_guillemet_in_yaml_string() {
1942        let markdown = r#"---
1943title: Test <<with chevrons>>
1944---
1945
1946Body content."#;
1947        let doc = decompose(markdown).unwrap();
1948
1949        // YAML string values should have guillemets converted
1950        assert_eq!(
1951            doc.get_field("title").unwrap().as_str().unwrap(),
1952            "Test «with chevrons»"
1953        );
1954    }
1955
1956    #[test]
1957    fn test_guillemet_in_yaml_array() {
1958        let markdown = r#"---
1959items:
1960  - "<<first>>"
1961  - "<<second>>"
1962---
1963
1964Body."#;
1965        let doc = decompose(markdown).unwrap();
1966
1967        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1968        assert_eq!(items[0].as_str().unwrap(), "«first»");
1969        assert_eq!(items[1].as_str().unwrap(), "«second»");
1970    }
1971
1972    #[test]
1973    fn test_guillemet_in_yaml_nested() {
1974        let markdown = r#"---
1975metadata:
1976  description: "<<nested value>>"
1977---
1978
1979Body."#;
1980        let doc = decompose(markdown).unwrap();
1981
1982        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1983        assert_eq!(
1984            metadata.get("description").unwrap().as_str().unwrap(),
1985            "«nested value»"
1986        );
1987    }
1988
1989    #[test]
1990    fn test_guillemet_in_body_skips_code_blocks() {
1991        let markdown = r#"```
1992<<not converted>>
1993```
1994
1995<<converted>>"#;
1996        let doc = decompose(markdown).unwrap();
1997
1998        let body = doc.body().unwrap();
1999        // Code block content should NOT be converted
2000        assert!(body.contains("<<not converted>>"));
2001        // Regular content should be converted
2002        assert!(body.contains("«converted»"));
2003    }
2004
2005    #[test]
2006    fn test_guillemet_in_body_skips_inline_code() {
2007        let markdown = "`<<not converted>>` and <<converted>>";
2008        let doc = decompose(markdown).unwrap();
2009
2010        let body = doc.body().unwrap();
2011        // Inline code should NOT be converted
2012        assert!(body.contains("`<<not converted>>`"));
2013        // Regular content should be converted
2014        assert!(body.contains("«converted»"));
2015    }
2016
2017    #[test]
2018    fn test_guillemet_in_tagged_block_body() {
2019        let markdown = r#"---
2020title: Main
2021---
2022
2023Main body.
2024
2025---
2026SCOPE: items
2027name: Item 1
2028---
2029
2030Use <<raw>> here."#;
2031        let doc = decompose(markdown).unwrap();
2032
2033        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2034        let item = items[0].as_object().unwrap();
2035        let item_body = item.get("body").unwrap().as_str().unwrap();
2036        // Tagged block body should have guillemets converted
2037        assert!(item_body.contains("«raw»"));
2038    }
2039
2040    #[test]
2041    fn test_guillemet_in_tagged_block_yaml() {
2042        let markdown = r#"---
2043title: Main
2044---
2045
2046Main body.
2047
2048---
2049SCOPE: items
2050description: "<<tagged yaml>>"
2051---
2052
2053Item body."#;
2054        let doc = decompose(markdown).unwrap();
2055
2056        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2057        let item = items[0].as_object().unwrap();
2058        // Tagged block YAML should have guillemets converted
2059        assert_eq!(
2060            item.get("description").unwrap().as_str().unwrap(),
2061            "«tagged yaml»"
2062        );
2063    }
2064
2065    #[test]
2066    fn test_guillemet_not_converted_in_yaml_numbers() {
2067        // Numbers should not be affected
2068        let markdown = r#"---
2069count: 42
2070---
2071
2072Body."#;
2073        let doc = decompose(markdown).unwrap();
2074        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2075    }
2076
2077    #[test]
2078    fn test_guillemet_not_converted_in_yaml_booleans() {
2079        // Booleans should not be affected
2080        let markdown = r#"---
2081active: true
2082---
2083
2084Body."#;
2085        let doc = decompose(markdown).unwrap();
2086        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2087    }
2088
2089    #[test]
2090    fn test_guillemet_multiline_not_converted() {
2091        // Multiline guillemets should not be converted
2092        let markdown = "<<text\nacross lines>>";
2093        let doc = decompose(markdown).unwrap();
2094
2095        let body = doc.body().unwrap();
2096        // Should NOT contain guillemets since content spans lines
2097        assert!(!body.contains('«'));
2098        assert!(!body.contains('»'));
2099    }
2100
2101    #[test]
2102    fn test_guillemet_unmatched_not_converted() {
2103        let markdown = "<<unmatched";
2104        let doc = decompose(markdown).unwrap();
2105
2106        let body = doc.body().unwrap();
2107        // Unmatched should remain as-is
2108        assert_eq!(body, "<<unmatched");
2109    }
2110}
2111
2112// Additional robustness tests
2113#[cfg(test)]
2114mod robustness_tests {
2115    use super::*;
2116
2117    // Edge cases for delimiter handling
2118
2119    #[test]
2120    fn test_empty_document() {
2121        let doc = decompose("").unwrap();
2122        assert_eq!(doc.body(), Some(""));
2123        assert_eq!(doc.quill_tag(), "__default__");
2124    }
2125
2126    #[test]
2127    fn test_only_whitespace() {
2128        let doc = decompose("   \n\n   \t").unwrap();
2129        assert_eq!(doc.body(), Some("   \n\n   \t"));
2130    }
2131
2132    #[test]
2133    fn test_only_dashes() {
2134        // Just "---" at document start without newline is not treated as frontmatter opener
2135        // (requires "---\n" to start a frontmatter block)
2136        let result = decompose("---");
2137        // This is NOT an error - "---" alone without newline is just body content
2138        assert!(result.is_ok());
2139        assert_eq!(result.unwrap().body(), Some("---"));
2140    }
2141
2142    #[test]
2143    fn test_dashes_in_middle_of_line() {
2144        // --- not at start of line should not be treated as delimiter
2145        let markdown = "some text --- more text";
2146        let doc = decompose(markdown).unwrap();
2147        assert_eq!(doc.body(), Some("some text --- more text"));
2148    }
2149
2150    #[test]
2151    fn test_four_dashes() {
2152        // ---- is not a valid delimiter
2153        let markdown = "----\ntitle: Test\n----\n\nBody";
2154        let doc = decompose(markdown).unwrap();
2155        // Should treat entire content as body
2156        assert!(doc.body().unwrap().contains("----"));
2157    }
2158
2159    #[test]
2160    fn test_crlf_line_endings() {
2161        // Windows-style line endings
2162        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2163        let doc = decompose(markdown).unwrap();
2164        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2165        assert!(doc.body().unwrap().contains("Body content."));
2166    }
2167
2168    #[test]
2169    fn test_mixed_line_endings() {
2170        // Mix of \n and \r\n
2171        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2172        let doc = decompose(markdown).unwrap();
2173        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2174    }
2175
2176    #[test]
2177    fn test_frontmatter_at_eof_no_trailing_newline() {
2178        // Frontmatter closed at EOF without trailing newline
2179        let markdown = "---\ntitle: Test\n---";
2180        let doc = decompose(markdown).unwrap();
2181        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2182        assert_eq!(doc.body(), Some(""));
2183    }
2184
2185    #[test]
2186    fn test_empty_frontmatter() {
2187        // Empty frontmatter block - requires content between delimiters
2188        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2189        // is treated as horizontal rule logic, not empty frontmatter
2190        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2191        let markdown = "---\n \n---\n\nBody content.";
2192        let doc = decompose(markdown).unwrap();
2193        assert!(doc.body().unwrap().contains("Body content."));
2194        // Should only have body field
2195        assert_eq!(doc.fields().len(), 1);
2196    }
2197
2198    #[test]
2199    fn test_whitespace_only_frontmatter() {
2200        // Frontmatter with only whitespace
2201        let markdown = "---\n   \n\n   \n---\n\nBody.";
2202        let doc = decompose(markdown).unwrap();
2203        assert!(doc.body().unwrap().contains("Body."));
2204    }
2205
2206    // Unicode handling
2207
2208    #[test]
2209    fn test_unicode_in_yaml_keys() {
2210        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2211        let doc = decompose(markdown).unwrap();
2212        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2213        assert_eq!(
2214            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2215            "こんにちは"
2216        );
2217    }
2218
2219    #[test]
2220    fn test_unicode_in_yaml_values() {
2221        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2222        let doc = decompose(markdown).unwrap();
2223        assert_eq!(
2224            doc.get_field("title").unwrap().as_str().unwrap(),
2225            "你好世界 🎉"
2226        );
2227    }
2228
2229    #[test]
2230    fn test_unicode_in_body() {
2231        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2232        let doc = decompose(markdown).unwrap();
2233        assert!(doc.body().unwrap().contains("日本語テキスト"));
2234        assert!(doc.body().unwrap().contains("🚀"));
2235    }
2236
2237    // YAML edge cases
2238
2239    #[test]
2240    fn test_yaml_multiline_string() {
2241        let markdown = r#"---
2242description: |
2243  This is a
2244  multiline string
2245  with preserved newlines.
2246---
2247
2248Body."#;
2249        let doc = decompose(markdown).unwrap();
2250        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2251        assert!(desc.contains("multiline string"));
2252        assert!(desc.contains('\n'));
2253    }
2254
2255    #[test]
2256    fn test_yaml_folded_string() {
2257        let markdown = r#"---
2258description: >
2259  This is a folded
2260  string that becomes
2261  a single line.
2262---
2263
2264Body."#;
2265        let doc = decompose(markdown).unwrap();
2266        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2267        // Folded strings join lines with spaces
2268        assert!(desc.contains("folded"));
2269    }
2270
2271    #[test]
2272    fn test_yaml_null_value() {
2273        let markdown = "---\noptional: null\n---\n\nBody.";
2274        let doc = decompose(markdown).unwrap();
2275        assert!(doc.get_field("optional").unwrap().is_null());
2276    }
2277
2278    #[test]
2279    fn test_yaml_empty_string_value() {
2280        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2281        let doc = decompose(markdown).unwrap();
2282        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2283    }
2284
2285    #[test]
2286    fn test_yaml_special_characters_in_string() {
2287        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2288        let doc = decompose(markdown).unwrap();
2289        assert_eq!(
2290            doc.get_field("special").unwrap().as_str().unwrap(),
2291            "colon: here, and [brackets]"
2292        );
2293    }
2294
2295    #[test]
2296    fn test_yaml_nested_objects() {
2297        let markdown = r#"---
2298config:
2299  database:
2300    host: localhost
2301    port: 5432
2302  cache:
2303    enabled: true
2304---
2305
2306Body."#;
2307        let doc = decompose(markdown).unwrap();
2308        let config = doc.get_field("config").unwrap().as_object().unwrap();
2309        let db = config.get("database").unwrap().as_object().unwrap();
2310        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2311        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2312    }
2313
2314    // SCOPE block edge cases
2315
2316    #[test]
2317    fn test_scope_with_empty_body() {
2318        let markdown = r#"---
2319SCOPE: items
2320name: Item
2321---"#;
2322        let doc = decompose(markdown).unwrap();
2323        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2324        assert_eq!(items.len(), 1);
2325        let item = items[0].as_object().unwrap();
2326        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2327    }
2328
2329    #[test]
2330    fn test_scope_consecutive_blocks() {
2331        let markdown = r#"---
2332SCOPE: a
2333id: 1
2334---
2335---
2336SCOPE: a
2337id: 2
2338---"#;
2339        let doc = decompose(markdown).unwrap();
2340        let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2341        assert_eq!(items.len(), 2);
2342    }
2343
2344    #[test]
2345    fn test_scope_with_body_containing_dashes() {
2346        let markdown = r#"---
2347SCOPE: items
2348name: Item
2349---
2350
2351Some text with --- dashes in it."#;
2352        let doc = decompose(markdown).unwrap();
2353        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2354        let item = items[0].as_object().unwrap();
2355        let body = item.get("body").unwrap().as_str().unwrap();
2356        assert!(body.contains("--- dashes"));
2357    }
2358
2359    // QUILL directive edge cases
2360
2361    #[test]
2362    fn test_quill_with_underscore_prefix() {
2363        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2364        let doc = decompose(markdown).unwrap();
2365        assert_eq!(doc.quill_tag(), "_internal");
2366    }
2367
2368    #[test]
2369    fn test_quill_with_numbers() {
2370        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2371        let doc = decompose(markdown).unwrap();
2372        assert_eq!(doc.quill_tag(), "form_8_v2");
2373    }
2374
2375    #[test]
2376    fn test_quill_with_additional_fields() {
2377        let markdown = r#"---
2378QUILL: my_quill
2379title: Document Title
2380author: John Doe
2381---
2382
2383Body content."#;
2384        let doc = decompose(markdown).unwrap();
2385        assert_eq!(doc.quill_tag(), "my_quill");
2386        assert_eq!(
2387            doc.get_field("title").unwrap().as_str().unwrap(),
2388            "Document Title"
2389        );
2390        assert_eq!(
2391            doc.get_field("author").unwrap().as_str().unwrap(),
2392            "John Doe"
2393        );
2394    }
2395
2396    // Error handling
2397
2398    #[test]
2399    fn test_invalid_scope_name_uppercase() {
2400        let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2401        let result = decompose(markdown);
2402        assert!(result.is_err());
2403        assert!(result
2404            .unwrap_err()
2405            .to_string()
2406            .contains("Invalid field name"));
2407    }
2408
2409    #[test]
2410    fn test_invalid_scope_name_starts_with_number() {
2411        let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2412        let result = decompose(markdown);
2413        assert!(result.is_err());
2414    }
2415
2416    #[test]
2417    fn test_invalid_scope_name_with_hyphen() {
2418        let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2419        let result = decompose(markdown);
2420        assert!(result.is_err());
2421    }
2422
2423    #[test]
2424    fn test_invalid_quill_name_uppercase() {
2425        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2426        let result = decompose(markdown);
2427        assert!(result.is_err());
2428    }
2429
2430    #[test]
2431    fn test_yaml_syntax_error_missing_colon() {
2432        let markdown = "---\ntitle Test\n---\n\nBody.";
2433        let result = decompose(markdown);
2434        assert!(result.is_err());
2435    }
2436
2437    #[test]
2438    fn test_yaml_syntax_error_bad_indentation() {
2439        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2440        let result = decompose(markdown);
2441        // Bad indentation may or may not be an error depending on YAML parser
2442        // Just ensure it doesn't panic
2443        let _ = result;
2444    }
2445
2446    // Body extraction edge cases
2447
2448    #[test]
2449    fn test_body_with_leading_newlines() {
2450        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2451        let doc = decompose(markdown).unwrap();
2452        // Body should preserve leading newlines after frontmatter
2453        assert!(doc.body().unwrap().starts_with('\n'));
2454    }
2455
2456    #[test]
2457    fn test_body_with_trailing_newlines() {
2458        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2459        let doc = decompose(markdown).unwrap();
2460        // Body should preserve trailing newlines
2461        assert!(doc.body().unwrap().ends_with('\n'));
2462    }
2463
2464    #[test]
2465    fn test_no_body_after_frontmatter() {
2466        let markdown = "---\ntitle: Test\n---";
2467        let doc = decompose(markdown).unwrap();
2468        assert_eq!(doc.body(), Some(""));
2469    }
2470
2471    // Tag name validation
2472
2473    #[test]
2474    fn test_valid_tag_name_single_underscore() {
2475        assert!(is_valid_tag_name("_"));
2476    }
2477
2478    #[test]
2479    fn test_valid_tag_name_underscore_prefix() {
2480        assert!(is_valid_tag_name("_private"));
2481    }
2482
2483    #[test]
2484    fn test_valid_tag_name_with_numbers() {
2485        assert!(is_valid_tag_name("item1"));
2486        assert!(is_valid_tag_name("item_2"));
2487    }
2488
2489    #[test]
2490    fn test_invalid_tag_name_empty() {
2491        assert!(!is_valid_tag_name(""));
2492    }
2493
2494    #[test]
2495    fn test_invalid_tag_name_starts_with_number() {
2496        assert!(!is_valid_tag_name("1item"));
2497    }
2498
2499    #[test]
2500    fn test_invalid_tag_name_uppercase() {
2501        assert!(!is_valid_tag_name("Items"));
2502        assert!(!is_valid_tag_name("ITEMS"));
2503    }
2504
2505    #[test]
2506    fn test_invalid_tag_name_special_chars() {
2507        assert!(!is_valid_tag_name("my-items"));
2508        assert!(!is_valid_tag_name("my.items"));
2509        assert!(!is_valid_tag_name("my items"));
2510    }
2511
2512    // Guillemet preprocessing in YAML
2513
2514    #[test]
2515    fn test_guillemet_in_yaml_preserves_non_strings() {
2516        let markdown = r#"---
2517count: 42
2518price: 19.99
2519active: true
2520items:
2521  - first
2522  - 100
2523  - true
2524---
2525
2526Body."#;
2527        let doc = decompose(markdown).unwrap();
2528        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2529        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2530        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2531    }
2532
2533    #[test]
2534    fn test_guillemet_double_conversion_prevention() {
2535        // Ensure «» in input doesn't get double-processed
2536        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2537        let doc = decompose(markdown).unwrap();
2538        // Should remain as-is (not double-escaped)
2539        assert_eq!(
2540            doc.get_field("title").unwrap().as_str().unwrap(),
2541            "Already «converted»"
2542        );
2543    }
2544}