quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Reserved tag name for quill specification
57pub const QUILL_TAG: &str = "quill";
58
59/// A parsed markdown document with frontmatter
60#[derive(Debug, Clone)]
61pub struct ParsedDocument {
62    fields: HashMap<String, QuillValue>,
63    quill_tag: String,
64}
65
66impl ParsedDocument {
67    /// Create a new ParsedDocument with the given fields
68    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
69        Self {
70            fields,
71            quill_tag: "__default__".to_string(),
72        }
73    }
74
75    /// Create a ParsedDocument from fields and quill tag
76    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
77        Self { fields, quill_tag }
78    }
79
80    /// Create a ParsedDocument from markdown string
81    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
82        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
83    }
84
85    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
86    pub fn quill_tag(&self) -> &str {
87        &self.quill_tag
88    }
89
90    /// Get the document body
91    pub fn body(&self) -> Option<&str> {
92        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
93    }
94
95    /// Get a specific field
96    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
97        self.fields.get(name)
98    }
99
100    /// Get all fields (including body)
101    pub fn fields(&self) -> &HashMap<String, QuillValue> {
102        &self.fields
103    }
104
105    /// Create a new ParsedDocument with default values applied
106    ///
107    /// This method creates a new ParsedDocument with default values applied for any
108    /// fields that are missing from the original document but have defaults specified.
109    /// Existing fields are preserved and not overwritten.
110    ///
111    /// # Arguments
112    ///
113    /// * `defaults` - A HashMap of field names to their default QuillValues
114    ///
115    /// # Returns
116    ///
117    /// A new ParsedDocument with defaults applied for missing fields
118    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
119        let mut fields = self.fields.clone();
120
121        for (field_name, default_value) in defaults {
122            // Only apply default if field is missing
123            if !fields.contains_key(field_name) {
124                fields.insert(field_name.clone(), default_value.clone());
125            }
126        }
127
128        Self {
129            fields,
130            quill_tag: self.quill_tag.clone(),
131        }
132    }
133
134    /// Create a new ParsedDocument with coerced field values
135    ///
136    /// This method applies type coercions to field values based on the schema.
137    /// Coercions include:
138    /// - Singular values to arrays when schema expects array
139    /// - String "true"/"false" to boolean
140    /// - Numbers to boolean (0=false, non-zero=true)
141    /// - String numbers to number type
142    /// - Boolean to number (true=1, false=0)
143    ///
144    /// # Arguments
145    ///
146    /// * `schema` - A JSON Schema object defining expected field types
147    ///
148    /// # Returns
149    ///
150    /// A new ParsedDocument with coerced field values
151    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
152        use crate::schema::coerce_document;
153
154        let coerced_fields = coerce_document(schema, &self.fields);
155
156        Self {
157            fields: coerced_fields,
158            quill_tag: self.quill_tag.clone(),
159        }
160    }
161}
162
163#[derive(Debug)]
164struct MetadataBlock {
165    start: usize,                          // Position of opening "---"
166    end: usize,                            // Position after closing "---\n"
167    yaml_value: Option<serde_yaml::Value>, // Parsed YAML (None if empty or parse failed)
168    tag: Option<String>,                   // Field name from SCOPE key
169    quill_name: Option<String>,            // Quill name from QUILL key
170}
171
172/// Validate tag name follows pattern [a-z_][a-z0-9_]*
173fn is_valid_tag_name(name: &str) -> bool {
174    if name.is_empty() {
175        return false;
176    }
177
178    let mut chars = name.chars();
179    let first = chars.next().unwrap();
180
181    if !first.is_ascii_lowercase() && first != '_' {
182        return false;
183    }
184
185    for ch in chars {
186        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
187            return false;
188        }
189    }
190
191    true
192}
193
194/// Find all metadata blocks in the document
195fn find_metadata_blocks(
196    markdown: &str,
197) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
198    let mut blocks = Vec::new();
199    let mut pos = 0;
200
201    while pos < markdown.len() {
202        // Look for opening "---\n" or "---\r\n"
203        let search_str = &markdown[pos..];
204        let delimiter_result = if let Some(p) = search_str.find("---\n") {
205            Some((p, 4, "\n"))
206        } else if let Some(p) = search_str.find("---\r\n") {
207            Some((p, 5, "\r\n"))
208        } else {
209            None
210        };
211
212        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
213            let abs_pos = pos + delimiter_pos;
214
215            // Check if the delimiter is at the start of a line
216            let is_start_of_line = if abs_pos == 0 {
217                true
218            } else {
219                let char_before = markdown.as_bytes()[abs_pos - 1];
220                char_before == b'\n' || char_before == b'\r'
221            };
222
223            if !is_start_of_line {
224                pos = abs_pos + 1;
225                continue;
226            }
227
228            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
229
230            // Check if this --- is a horizontal rule (blank lines above AND below)
231            let preceded_by_blank = if abs_pos > 0 {
232                // Check if there's a blank line before the ---
233                let before = &markdown[..abs_pos];
234                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
235            } else {
236                false
237            };
238
239            let followed_by_blank = if content_start < markdown.len() {
240                markdown[content_start..].starts_with('\n')
241                    || markdown[content_start..].starts_with("\r\n")
242            } else {
243                false
244            };
245
246            // Horizontal rule: blank lines both above and below
247            if preceded_by_blank && followed_by_blank {
248                // This is a horizontal rule in the body, skip it
249                pos = abs_pos + 3; // Skip past "---"
250                continue;
251            }
252
253            // Check if followed by non-blank line (or if we're at document start)
254            // This starts a metadata block
255            if followed_by_blank {
256                // --- followed by blank line but NOT preceded by blank line
257                // This is NOT a metadata block opening, skip it
258                pos = abs_pos + 3;
259                continue;
260            }
261
262            // Found potential metadata block opening (followed by non-blank line)
263            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
264            let rest = &markdown[content_start..];
265
266            // First try to find delimiters with trailing newlines
267            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
268            let closing_with_newline = closing_patterns
269                .iter()
270                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
271                .min_by_key(|(p, _)| *p);
272
273            // Also check for closing at end of document (no trailing newline)
274            let closing_at_eof = ["\n---", "\r\n---"]
275                .iter()
276                .filter_map(|delim| {
277                    rest.find(delim).and_then(|p| {
278                        if p + delim.len() == rest.len() {
279                            Some((p, delim.len()))
280                        } else {
281                            None
282                        }
283                    })
284                })
285                .min_by_key(|(p, _)| *p);
286
287            let closing_result = match (closing_with_newline, closing_at_eof) {
288                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
289                (Some(_), Some(_)) => closing_with_newline,
290                (Some(_), None) => closing_with_newline,
291                (None, Some(_)) => closing_at_eof,
292                (None, None) => None,
293            };
294
295            if let Some((closing_pos, closing_len)) = closing_result {
296                let abs_closing_pos = content_start + closing_pos;
297                let content = &markdown[content_start..abs_closing_pos];
298
299                // Check YAML size limit
300                if content.len() > crate::error::MAX_YAML_SIZE {
301                    return Err(format!(
302                        "YAML block too large: {} bytes (max: {} bytes)",
303                        content.len(),
304                        crate::error::MAX_YAML_SIZE
305                    )
306                    .into());
307                }
308
309                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
310                // First, try to parse as YAML
311                let (tag, quill_name, yaml_value) = if !content.is_empty() {
312                    // Try to parse the YAML to check for reserved keys
313                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
314                        Ok(parsed_yaml) => {
315                            if let Some(mapping) = parsed_yaml.as_mapping() {
316                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
317                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
318
319                                let has_quill = mapping.contains_key(&quill_key);
320                                let has_scope = mapping.contains_key(&scope_key);
321
322                                if has_quill && has_scope {
323                                    return Err(
324                                        "Cannot specify both QUILL and SCOPE in the same block"
325                                            .into(),
326                                    );
327                                }
328
329                                if has_quill {
330                                    // Extract quill name
331                                    let quill_value = mapping.get(&quill_key).unwrap();
332                                    let quill_name_str = quill_value
333                                        .as_str()
334                                        .ok_or_else(|| "QUILL value must be a string")?;
335
336                                    if !is_valid_tag_name(quill_name_str) {
337                                        return Err(format!(
338                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
339                                            quill_name_str
340                                        )
341                                        .into());
342                                    }
343
344                                    // Remove QUILL from the YAML value for processing
345                                    let mut new_mapping = mapping.clone();
346                                    new_mapping.remove(&quill_key);
347                                    let new_value = if new_mapping.is_empty() {
348                                        None
349                                    } else {
350                                        Some(serde_yaml::Value::Mapping(new_mapping))
351                                    };
352
353                                    (None, Some(quill_name_str.to_string()), new_value)
354                                } else if has_scope {
355                                    // Extract scope field name
356                                    let scope_value = mapping.get(&scope_key).unwrap();
357                                    let field_name = scope_value
358                                        .as_str()
359                                        .ok_or_else(|| "SCOPE value must be a string")?;
360
361                                    if !is_valid_tag_name(field_name) {
362                                        return Err(format!(
363                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
364                                            field_name
365                                        )
366                                        .into());
367                                    }
368
369                                    if field_name == BODY_FIELD {
370                                        return Err(format!(
371                                            "Cannot use reserved field name '{}' as SCOPE value",
372                                            BODY_FIELD
373                                        )
374                                        .into());
375                                    }
376
377                                    // Remove SCOPE from the YAML value for processing
378                                    let mut new_mapping = mapping.clone();
379                                    new_mapping.remove(&scope_key);
380                                    let new_value = if new_mapping.is_empty() {
381                                        None
382                                    } else {
383                                        Some(serde_yaml::Value::Mapping(new_mapping))
384                                    };
385
386                                    (Some(field_name.to_string()), None, new_value)
387                                } else {
388                                    // No reserved keys, keep the parsed YAML
389                                    (None, None, Some(parsed_yaml))
390                                }
391                            } else {
392                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
393                                (None, None, Some(parsed_yaml))
394                            }
395                        }
396                        Err(e) => {
397                            // YAML parsing failed - return error with context
398                            return Err(format!("Invalid YAML frontmatter: {}", e).into());
399                        }
400                    }
401                } else {
402                    // Empty content
403                    (None, None, None)
404                };
405
406                blocks.push(MetadataBlock {
407                    start: abs_pos,
408                    end: abs_closing_pos + closing_len, // After closing delimiter
409                    yaml_value,
410                    tag,
411                    quill_name,
412                });
413
414                pos = abs_closing_pos + closing_len;
415            } else if abs_pos == 0 {
416                // Frontmatter started but not closed
417                return Err("Frontmatter started but not closed with ---".into());
418            } else {
419                // Not a valid metadata block, skip this position
420                pos = abs_pos + 3;
421            }
422        } else {
423            break;
424        }
425    }
426
427    Ok(blocks)
428}
429
430/// Decompose markdown into frontmatter fields and body
431fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
432    // Check input size limit
433    if markdown.len() > crate::error::MAX_INPUT_SIZE {
434        return Err(format!(
435            "Input too large: {} bytes (max: {} bytes)",
436            markdown.len(),
437            crate::error::MAX_INPUT_SIZE
438        )
439        .into());
440    }
441
442    let mut fields = HashMap::new();
443
444    // Find all metadata blocks
445    let blocks = find_metadata_blocks(markdown)?;
446
447    if blocks.is_empty() {
448        // No metadata blocks, entire content is body
449        fields.insert(
450            BODY_FIELD.to_string(),
451            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
452        );
453        return Ok(ParsedDocument::new(fields));
454    }
455
456    // Track which attributes are used for tagged blocks
457    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
458    let mut has_global_frontmatter = false;
459    let mut global_frontmatter_index: Option<usize> = None;
460    let mut quill_name: Option<String> = None;
461
462    // First pass: identify global frontmatter, quill directive, and validate
463    for (idx, block) in blocks.iter().enumerate() {
464        // Check for quill directive
465        if let Some(ref name) = block.quill_name {
466            if quill_name.is_some() {
467                return Err("Multiple quill directives found: only one allowed".into());
468            }
469            quill_name = Some(name.clone());
470        }
471
472        // Check for global frontmatter (no tag and no quill directive)
473        if block.tag.is_none() && block.quill_name.is_none() {
474            if has_global_frontmatter {
475                return Err(
476                    "Multiple global frontmatter blocks found: only one untagged block allowed"
477                        .into(),
478                );
479            }
480            has_global_frontmatter = true;
481            global_frontmatter_index = Some(idx);
482        }
483    }
484
485    // Parse global frontmatter if present
486    if let Some(idx) = global_frontmatter_index {
487        let block = &blocks[idx];
488
489        // Get parsed YAML fields directly (already parsed in find_metadata_blocks)
490        let yaml_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
491            Some(serde_yaml::Value::Mapping(mapping)) => mapping
492                .iter()
493                .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
494                .collect(),
495            Some(serde_yaml::Value::Null) => {
496                // Null value (from whitespace-only YAML) - treat as empty mapping
497                HashMap::new()
498            }
499            Some(_) => {
500                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
501                return Err("Invalid YAML frontmatter: expected a mapping".into());
502            }
503            None => HashMap::new(),
504        };
505
506        // Check that all tagged blocks don't conflict with global fields
507        // Exception: if the global field is an array, allow it (we'll merge later)
508        for other_block in &blocks {
509            if let Some(ref tag) = other_block.tag {
510                if let Some(global_value) = yaml_fields.get(tag) {
511                    // Check if the global value is an array
512                    if global_value.as_sequence().is_none() {
513                        return Err(format!(
514                            "Name collision: global field '{}' conflicts with tagged attribute",
515                            tag
516                        )
517                        .into());
518                    }
519                }
520            }
521        }
522
523        // Convert YAML values to QuillValue at boundary
524        for (key, value) in yaml_fields {
525            fields.insert(key, QuillValue::from_yaml(value)?);
526        }
527    }
528
529    // Process blocks with quill directives
530    for block in &blocks {
531        if block.quill_name.is_some() {
532            // Quill directive blocks can have YAML content (becomes part of frontmatter)
533            if let Some(ref yaml_val) = block.yaml_value {
534                let yaml_fields: HashMap<String, serde_yaml::Value> = match yaml_val {
535                    serde_yaml::Value::Mapping(mapping) => mapping
536                        .iter()
537                        .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
538                        .collect(),
539                    serde_yaml::Value::Null => {
540                        // Null value (from whitespace-only YAML) - treat as empty mapping
541                        HashMap::new()
542                    }
543                    _ => {
544                        return Err("Invalid YAML in quill block: expected a mapping".into());
545                    }
546                };
547
548                // Check for conflicts with existing fields
549                for key in yaml_fields.keys() {
550                    if fields.contains_key(key) {
551                        return Err(format!(
552                            "Name collision: quill block field '{}' conflicts with existing field",
553                            key
554                        )
555                        .into());
556                    }
557                }
558
559                // Convert YAML values to QuillValue at boundary
560                for (key, value) in yaml_fields {
561                    fields.insert(key, QuillValue::from_yaml(value)?);
562                }
563            }
564        }
565    }
566
567    // Parse tagged blocks
568    for (idx, block) in blocks.iter().enumerate() {
569        if let Some(ref tag_name) = block.tag {
570            // Check if this conflicts with global fields
571            // Exception: if the global field is an array, allow it (we'll merge later)
572            if let Some(existing_value) = fields.get(tag_name) {
573                if existing_value.as_array().is_none() {
574                    return Err(format!(
575                        "Name collision: tagged attribute '{}' conflicts with global field",
576                        tag_name
577                    )
578                    .into());
579                }
580            }
581
582            // Get YAML metadata directly (already parsed in find_metadata_blocks)
583            let mut item_fields: HashMap<String, serde_yaml::Value> = match &block.yaml_value {
584                Some(serde_yaml::Value::Mapping(mapping)) => mapping
585                    .iter()
586                    .filter_map(|(k, v)| k.as_str().map(|key| (key.to_string(), v.clone())))
587                    .collect(),
588                Some(serde_yaml::Value::Null) => {
589                    // Null value (from whitespace-only YAML) - treat as empty mapping
590                    HashMap::new()
591                }
592                Some(_) => {
593                    return Err(format!(
594                        "Invalid YAML in tagged block '{}': expected a mapping",
595                        tag_name
596                    )
597                    .into());
598                }
599                None => HashMap::new(),
600            };
601
602            // Extract body for this tagged block
603            let body_start = block.end;
604            let body_end = if idx + 1 < blocks.len() {
605                blocks[idx + 1].start
606            } else {
607                markdown.len()
608            };
609            let body = &markdown[body_start..body_end];
610
611            // Add body to item fields
612            item_fields.insert(
613                BODY_FIELD.to_string(),
614                serde_yaml::Value::String(body.to_string()),
615            );
616
617            // Convert HashMap to serde_yaml::Value::Mapping
618            let item_value = serde_yaml::to_value(item_fields)?;
619
620            // Add to collection
621            tagged_attributes
622                .entry(tag_name.clone())
623                .or_insert_with(Vec::new)
624                .push(item_value);
625        }
626    }
627
628    // Extract global body
629    // Body starts after global frontmatter or quill block (whichever comes first)
630    // Body ends at the first scope block or EOF
631    let first_non_scope_block_idx = blocks
632        .iter()
633        .position(|b| b.tag.is_none() && b.quill_name.is_none())
634        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
635
636    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
637        // Body starts after the first non-scope block (global frontmatter or quill)
638        let start = blocks[idx].end;
639
640        // Body ends at the first scope block after this, or EOF
641        let end = blocks
642            .iter()
643            .skip(idx + 1)
644            .find(|b| b.tag.is_some())
645            .map(|b| b.start)
646            .unwrap_or(markdown.len());
647
648        (start, end)
649    } else {
650        // No global frontmatter or quill block - body is everything before the first scope block
651        let end = blocks
652            .iter()
653            .find(|b| b.tag.is_some())
654            .map(|b| b.start)
655            .unwrap_or(0);
656
657        (0, end)
658    };
659
660    let global_body = &markdown[body_start..body_end];
661
662    fields.insert(
663        BODY_FIELD.to_string(),
664        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
665    );
666
667    // Add all tagged collections to fields (convert to QuillValue)
668    // If a field already exists and is an array, merge the new items into it
669    for (tag_name, items) in tagged_attributes {
670        if let Some(existing_value) = fields.get(&tag_name) {
671            // The existing value must be an array (checked earlier)
672            if let Some(existing_array) = existing_value.as_array() {
673                // Convert new items from YAML to JSON
674                let new_items_json: Vec<serde_json::Value> = items
675                    .into_iter()
676                    .map(|yaml_val| {
677                        serde_json::to_value(&yaml_val)
678                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
679                    })
680                    .collect::<Result<Vec<_>, _>>()?;
681
682                // Combine existing and new items
683                let mut merged_array = existing_array.clone();
684                merged_array.extend(new_items_json);
685
686                // Create QuillValue from merged JSON array
687                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
688                fields.insert(tag_name, quill_value);
689            } else {
690                // This should not happen due to earlier validation, but handle it gracefully
691                return Err(format!(
692                    "Internal error: field '{}' exists but is not an array",
693                    tag_name
694                )
695                .into());
696            }
697        } else {
698            // No existing field, just create a new sequence
699            // Note: guillemets in items were already preprocessed when the items were created
700            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
701            fields.insert(tag_name, quill_value);
702        }
703    }
704
705    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
706    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
707
708    Ok(parsed)
709}
710
711#[cfg(test)]
712mod tests {
713    use super::*;
714
715    #[test]
716    fn test_no_frontmatter() {
717        let markdown = "# Hello World\n\nThis is a test.";
718        let doc = decompose(markdown).unwrap();
719
720        assert_eq!(doc.body(), Some(markdown));
721        assert_eq!(doc.fields().len(), 1);
722        // Verify default quill tag is set
723        assert_eq!(doc.quill_tag(), "__default__");
724    }
725
726    #[test]
727    fn test_with_frontmatter() {
728        let markdown = r#"---
729title: Test Document
730author: Test Author
731---
732
733# Hello World
734
735This is the body."#;
736
737        let doc = decompose(markdown).unwrap();
738
739        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
740        assert_eq!(
741            doc.get_field("title").unwrap().as_str().unwrap(),
742            "Test Document"
743        );
744        assert_eq!(
745            doc.get_field("author").unwrap().as_str().unwrap(),
746            "Test Author"
747        );
748        assert_eq!(doc.fields().len(), 3); // title, author, body
749                                           // Verify default quill tag is set when no QUILL directive
750        assert_eq!(doc.quill_tag(), "__default__");
751    }
752
753    #[test]
754    fn test_complex_yaml_frontmatter() {
755        let markdown = r#"---
756title: Complex Document
757tags:
758  - test
759  - yaml
760metadata:
761  version: 1.0
762  nested:
763    field: value
764---
765
766Content here."#;
767
768        let doc = decompose(markdown).unwrap();
769
770        assert_eq!(doc.body(), Some("\nContent here."));
771        assert_eq!(
772            doc.get_field("title").unwrap().as_str().unwrap(),
773            "Complex Document"
774        );
775
776        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
777        assert_eq!(tags.len(), 2);
778        assert_eq!(tags[0].as_str().unwrap(), "test");
779        assert_eq!(tags[1].as_str().unwrap(), "yaml");
780    }
781
782    #[test]
783    fn test_with_defaults_empty_document() {
784        use std::collections::HashMap;
785
786        let mut defaults = HashMap::new();
787        defaults.insert(
788            "status".to_string(),
789            QuillValue::from_json(serde_json::json!("draft")),
790        );
791        defaults.insert(
792            "version".to_string(),
793            QuillValue::from_json(serde_json::json!(1)),
794        );
795
796        // Create an empty parsed document
797        let doc = ParsedDocument::new(HashMap::new());
798        let doc_with_defaults = doc.with_defaults(&defaults);
799
800        // Check that defaults were applied
801        assert_eq!(
802            doc_with_defaults
803                .get_field("status")
804                .unwrap()
805                .as_str()
806                .unwrap(),
807            "draft"
808        );
809        assert_eq!(
810            doc_with_defaults
811                .get_field("version")
812                .unwrap()
813                .as_number()
814                .unwrap()
815                .as_i64()
816                .unwrap(),
817            1
818        );
819    }
820
821    #[test]
822    fn test_with_defaults_preserves_existing_values() {
823        use std::collections::HashMap;
824
825        let mut defaults = HashMap::new();
826        defaults.insert(
827            "status".to_string(),
828            QuillValue::from_json(serde_json::json!("draft")),
829        );
830
831        // Create document with existing status
832        let mut fields = HashMap::new();
833        fields.insert(
834            "status".to_string(),
835            QuillValue::from_json(serde_json::json!("published")),
836        );
837        let doc = ParsedDocument::new(fields);
838
839        let doc_with_defaults = doc.with_defaults(&defaults);
840
841        // Existing value should be preserved
842        assert_eq!(
843            doc_with_defaults
844                .get_field("status")
845                .unwrap()
846                .as_str()
847                .unwrap(),
848            "published"
849        );
850    }
851
852    #[test]
853    fn test_with_defaults_partial_application() {
854        use std::collections::HashMap;
855
856        let mut defaults = HashMap::new();
857        defaults.insert(
858            "status".to_string(),
859            QuillValue::from_json(serde_json::json!("draft")),
860        );
861        defaults.insert(
862            "version".to_string(),
863            QuillValue::from_json(serde_json::json!(1)),
864        );
865
866        // Create document with only one field
867        let mut fields = HashMap::new();
868        fields.insert(
869            "status".to_string(),
870            QuillValue::from_json(serde_json::json!("published")),
871        );
872        let doc = ParsedDocument::new(fields);
873
874        let doc_with_defaults = doc.with_defaults(&defaults);
875
876        // Existing field preserved, missing field gets default
877        assert_eq!(
878            doc_with_defaults
879                .get_field("status")
880                .unwrap()
881                .as_str()
882                .unwrap(),
883            "published"
884        );
885        assert_eq!(
886            doc_with_defaults
887                .get_field("version")
888                .unwrap()
889                .as_number()
890                .unwrap()
891                .as_i64()
892                .unwrap(),
893            1
894        );
895    }
896
897    #[test]
898    fn test_with_defaults_no_defaults() {
899        use std::collections::HashMap;
900
901        let defaults = HashMap::new(); // Empty defaults map
902
903        let doc = ParsedDocument::new(HashMap::new());
904        let doc_with_defaults = doc.with_defaults(&defaults);
905
906        // No defaults should be applied
907        assert!(doc_with_defaults.fields().is_empty());
908    }
909
910    #[test]
911    fn test_with_defaults_complex_types() {
912        use std::collections::HashMap;
913
914        let mut defaults = HashMap::new();
915        defaults.insert(
916            "tags".to_string(),
917            QuillValue::from_json(serde_json::json!(["default", "tag"])),
918        );
919
920        let doc = ParsedDocument::new(HashMap::new());
921        let doc_with_defaults = doc.with_defaults(&defaults);
922
923        // Complex default value should be applied
924        let tags = doc_with_defaults
925            .get_field("tags")
926            .unwrap()
927            .as_sequence()
928            .unwrap();
929        assert_eq!(tags.len(), 2);
930        assert_eq!(tags[0].as_str().unwrap(), "default");
931        assert_eq!(tags[1].as_str().unwrap(), "tag");
932    }
933
934    #[test]
935    fn test_with_coercion_singular_to_array() {
936        use std::collections::HashMap;
937
938        let schema = QuillValue::from_json(serde_json::json!({
939            "$schema": "https://json-schema.org/draft/2019-09/schema",
940            "type": "object",
941            "properties": {
942                "tags": {"type": "array"}
943            }
944        }));
945
946        let mut fields = HashMap::new();
947        fields.insert(
948            "tags".to_string(),
949            QuillValue::from_json(serde_json::json!("single-tag")),
950        );
951        let doc = ParsedDocument::new(fields);
952
953        let coerced_doc = doc.with_coercion(&schema);
954
955        let tags = coerced_doc.get_field("tags").unwrap();
956        assert!(tags.as_array().is_some());
957        let tags_array = tags.as_array().unwrap();
958        assert_eq!(tags_array.len(), 1);
959        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
960    }
961
962    #[test]
963    fn test_with_coercion_string_to_boolean() {
964        use std::collections::HashMap;
965
966        let schema = QuillValue::from_json(serde_json::json!({
967            "$schema": "https://json-schema.org/draft/2019-09/schema",
968            "type": "object",
969            "properties": {
970                "active": {"type": "boolean"}
971            }
972        }));
973
974        let mut fields = HashMap::new();
975        fields.insert(
976            "active".to_string(),
977            QuillValue::from_json(serde_json::json!("true")),
978        );
979        let doc = ParsedDocument::new(fields);
980
981        let coerced_doc = doc.with_coercion(&schema);
982
983        assert_eq!(
984            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
985            true
986        );
987    }
988
989    #[test]
990    fn test_with_coercion_string_to_number() {
991        use std::collections::HashMap;
992
993        let schema = QuillValue::from_json(serde_json::json!({
994            "$schema": "https://json-schema.org/draft/2019-09/schema",
995            "type": "object",
996            "properties": {
997                "count": {"type": "number"}
998            }
999        }));
1000
1001        let mut fields = HashMap::new();
1002        fields.insert(
1003            "count".to_string(),
1004            QuillValue::from_json(serde_json::json!("42")),
1005        );
1006        let doc = ParsedDocument::new(fields);
1007
1008        let coerced_doc = doc.with_coercion(&schema);
1009
1010        assert_eq!(
1011            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
1012            42
1013        );
1014    }
1015
1016    #[test]
1017    fn test_invalid_yaml() {
1018        let markdown = r#"---
1019title: [invalid yaml
1020author: missing close bracket
1021---
1022
1023Content here."#;
1024
1025        let result = decompose(markdown);
1026        assert!(result.is_err());
1027        assert!(result
1028            .unwrap_err()
1029            .to_string()
1030            .contains("Invalid YAML frontmatter"));
1031    }
1032
1033    #[test]
1034    fn test_unclosed_frontmatter() {
1035        let markdown = r#"---
1036title: Test
1037author: Test Author
1038
1039Content without closing ---"#;
1040
1041        let result = decompose(markdown);
1042        assert!(result.is_err());
1043        assert!(result.unwrap_err().to_string().contains("not closed"));
1044    }
1045
1046    // Extended metadata tests
1047
1048    #[test]
1049    fn test_basic_tagged_block() {
1050        let markdown = r#"---
1051title: Main Document
1052---
1053
1054Main body content.
1055
1056---
1057SCOPE: items
1058name: Item 1
1059---
1060
1061Body of item 1."#;
1062
1063        let doc = decompose(markdown).unwrap();
1064
1065        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1066        assert_eq!(
1067            doc.get_field("title").unwrap().as_str().unwrap(),
1068            "Main Document"
1069        );
1070
1071        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1072        assert_eq!(items.len(), 1);
1073
1074        let item = items[0].as_object().unwrap();
1075        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1076        assert_eq!(
1077            item.get("body").unwrap().as_str().unwrap(),
1078            "\nBody of item 1."
1079        );
1080    }
1081
1082    #[test]
1083    fn test_multiple_tagged_blocks() {
1084        let markdown = r#"---
1085SCOPE: items
1086name: Item 1
1087tags: [a, b]
1088---
1089
1090First item body.
1091
1092---
1093SCOPE: items
1094name: Item 2
1095tags: [c, d]
1096---
1097
1098Second item body."#;
1099
1100        let doc = decompose(markdown).unwrap();
1101
1102        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1103        assert_eq!(items.len(), 2);
1104
1105        let item1 = items[0].as_object().unwrap();
1106        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1107
1108        let item2 = items[1].as_object().unwrap();
1109        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1110    }
1111
1112    #[test]
1113    fn test_mixed_global_and_tagged() {
1114        let markdown = r#"---
1115title: Global
1116author: John Doe
1117---
1118
1119Global body.
1120
1121---
1122SCOPE: sections
1123title: Section 1
1124---
1125
1126Section 1 content.
1127
1128---
1129SCOPE: sections
1130title: Section 2
1131---
1132
1133Section 2 content."#;
1134
1135        let doc = decompose(markdown).unwrap();
1136
1137        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1138        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1139
1140        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1141        assert_eq!(sections.len(), 2);
1142    }
1143
1144    #[test]
1145    fn test_empty_tagged_metadata() {
1146        let markdown = r#"---
1147SCOPE: items
1148---
1149
1150Body without metadata."#;
1151
1152        let doc = decompose(markdown).unwrap();
1153
1154        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1155        assert_eq!(items.len(), 1);
1156
1157        let item = items[0].as_object().unwrap();
1158        assert_eq!(
1159            item.get("body").unwrap().as_str().unwrap(),
1160            "\nBody without metadata."
1161        );
1162    }
1163
1164    #[test]
1165    fn test_tagged_block_without_body() {
1166        let markdown = r#"---
1167SCOPE: items
1168name: Item
1169---"#;
1170
1171        let doc = decompose(markdown).unwrap();
1172
1173        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1174        assert_eq!(items.len(), 1);
1175
1176        let item = items[0].as_object().unwrap();
1177        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1178    }
1179
1180    #[test]
1181    fn test_name_collision_global_and_tagged() {
1182        let markdown = r#"---
1183items: "global value"
1184---
1185
1186Body
1187
1188---
1189SCOPE: items
1190name: Item
1191---
1192
1193Item body"#;
1194
1195        let result = decompose(markdown);
1196        assert!(result.is_err());
1197        assert!(result.unwrap_err().to_string().contains("collision"));
1198    }
1199
1200    #[test]
1201    fn test_global_array_merged_with_scope() {
1202        // When global frontmatter has an array field with the same name as a SCOPE,
1203        // the SCOPE items should be added to the array
1204        let markdown = r#"---
1205items:
1206  - name: Global Item 1
1207    value: 100
1208  - name: Global Item 2
1209    value: 200
1210---
1211
1212Global body
1213
1214---
1215SCOPE: items
1216name: Scope Item 1
1217value: 300
1218---
1219
1220Scope item 1 body
1221
1222---
1223SCOPE: items
1224name: Scope Item 2
1225value: 400
1226---
1227
1228Scope item 2 body"#;
1229
1230        let doc = decompose(markdown).unwrap();
1231
1232        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1233        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1234        assert_eq!(items.len(), 4);
1235
1236        // Verify first two items (from global array)
1237        let item1 = items[0].as_object().unwrap();
1238        assert_eq!(
1239            item1.get("name").unwrap().as_str().unwrap(),
1240            "Global Item 1"
1241        );
1242        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1243
1244        let item2 = items[1].as_object().unwrap();
1245        assert_eq!(
1246            item2.get("name").unwrap().as_str().unwrap(),
1247            "Global Item 2"
1248        );
1249        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1250
1251        // Verify last two items (from SCOPE blocks)
1252        let item3 = items[2].as_object().unwrap();
1253        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1254        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1255        assert_eq!(
1256            item3.get("body").unwrap().as_str().unwrap(),
1257            "\nScope item 1 body\n\n"
1258        );
1259
1260        let item4 = items[3].as_object().unwrap();
1261        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1262        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1263        assert_eq!(
1264            item4.get("body").unwrap().as_str().unwrap(),
1265            "\nScope item 2 body"
1266        );
1267    }
1268
1269    #[test]
1270    fn test_empty_global_array_with_scope() {
1271        // Edge case: global frontmatter has an empty array
1272        let markdown = r#"---
1273items: []
1274---
1275
1276Global body
1277
1278---
1279SCOPE: items
1280name: Item 1
1281---
1282
1283Item 1 body"#;
1284
1285        let doc = decompose(markdown).unwrap();
1286
1287        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1288        assert_eq!(items.len(), 1);
1289
1290        let item = items[0].as_object().unwrap();
1291        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1292    }
1293
1294    #[test]
1295    fn test_reserved_field_name() {
1296        let markdown = r#"---
1297SCOPE: body
1298content: Test
1299---"#;
1300
1301        let result = decompose(markdown);
1302        assert!(result.is_err());
1303        assert!(result.unwrap_err().to_string().contains("reserved"));
1304    }
1305
1306    #[test]
1307    fn test_invalid_tag_syntax() {
1308        let markdown = r#"---
1309SCOPE: Invalid-Name
1310title: Test
1311---"#;
1312
1313        let result = decompose(markdown);
1314        assert!(result.is_err());
1315        assert!(result
1316            .unwrap_err()
1317            .to_string()
1318            .contains("Invalid field name"));
1319    }
1320
1321    #[test]
1322    fn test_multiple_global_frontmatter_blocks() {
1323        let markdown = r#"---
1324title: First
1325---
1326
1327Body
1328
1329---
1330author: Second
1331---
1332
1333More body"#;
1334
1335        let result = decompose(markdown);
1336        assert!(result.is_err());
1337        assert!(result
1338            .unwrap_err()
1339            .to_string()
1340            .contains("Multiple global frontmatter"));
1341    }
1342
1343    #[test]
1344    fn test_adjacent_blocks_different_tags() {
1345        let markdown = r#"---
1346SCOPE: items
1347name: Item 1
1348---
1349
1350Item 1 body
1351
1352---
1353SCOPE: sections
1354title: Section 1
1355---
1356
1357Section 1 body"#;
1358
1359        let doc = decompose(markdown).unwrap();
1360
1361        assert!(doc.get_field("items").is_some());
1362        assert!(doc.get_field("sections").is_some());
1363
1364        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1365        assert_eq!(items.len(), 1);
1366
1367        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1368        assert_eq!(sections.len(), 1);
1369    }
1370
1371    #[test]
1372    fn test_order_preservation() {
1373        let markdown = r#"---
1374SCOPE: items
1375id: 1
1376---
1377
1378First
1379
1380---
1381SCOPE: items
1382id: 2
1383---
1384
1385Second
1386
1387---
1388SCOPE: items
1389id: 3
1390---
1391
1392Third"#;
1393
1394        let doc = decompose(markdown).unwrap();
1395
1396        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1397        assert_eq!(items.len(), 3);
1398
1399        for (i, item) in items.iter().enumerate() {
1400            let mapping = item.as_object().unwrap();
1401            let id = mapping.get("id").unwrap().as_i64().unwrap();
1402            assert_eq!(id, (i + 1) as i64);
1403        }
1404    }
1405
1406    #[test]
1407    fn test_product_catalog_integration() {
1408        let markdown = r#"---
1409title: Product Catalog
1410author: John Doe
1411date: 2024-01-01
1412---
1413
1414This is the main catalog description.
1415
1416---
1417SCOPE: products
1418name: Widget A
1419price: 19.99
1420sku: WID-001
1421---
1422
1423The **Widget A** is our most popular product.
1424
1425---
1426SCOPE: products
1427name: Gadget B
1428price: 29.99
1429sku: GAD-002
1430---
1431
1432The **Gadget B** is perfect for professionals.
1433
1434---
1435SCOPE: reviews
1436product: Widget A
1437rating: 5
1438---
1439
1440"Excellent product! Highly recommended."
1441
1442---
1443SCOPE: reviews
1444product: Gadget B
1445rating: 4
1446---
1447
1448"Very good, but a bit pricey.""#;
1449
1450        let doc = decompose(markdown).unwrap();
1451
1452        // Verify global fields
1453        assert_eq!(
1454            doc.get_field("title").unwrap().as_str().unwrap(),
1455            "Product Catalog"
1456        );
1457        assert_eq!(
1458            doc.get_field("author").unwrap().as_str().unwrap(),
1459            "John Doe"
1460        );
1461        assert_eq!(
1462            doc.get_field("date").unwrap().as_str().unwrap(),
1463            "2024-01-01"
1464        );
1465
1466        // Verify global body
1467        assert!(doc.body().unwrap().contains("main catalog description"));
1468
1469        // Verify products collection
1470        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1471        assert_eq!(products.len(), 2);
1472
1473        let product1 = products[0].as_object().unwrap();
1474        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1475        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1476
1477        // Verify reviews collection
1478        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1479        assert_eq!(reviews.len(), 2);
1480
1481        let review1 = reviews[0].as_object().unwrap();
1482        assert_eq!(
1483            review1.get("product").unwrap().as_str().unwrap(),
1484            "Widget A"
1485        );
1486        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1487
1488        // Total fields: title, author, date, body, products, reviews = 6
1489        assert_eq!(doc.fields().len(), 6);
1490    }
1491
1492    #[test]
1493    fn taro_quill_directive() {
1494        let markdown = r#"---
1495QUILL: usaf_memo
1496memo_for: [ORG/SYMBOL]
1497memo_from: [ORG/SYMBOL]
1498---
1499
1500This is the memo body."#;
1501
1502        let doc = decompose(markdown).unwrap();
1503
1504        // Verify quill tag is set
1505        assert_eq!(doc.quill_tag(), "usaf_memo");
1506
1507        // Verify fields from quill block become frontmatter
1508        assert_eq!(
1509            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1510                .as_str()
1511                .unwrap(),
1512            "ORG/SYMBOL"
1513        );
1514
1515        // Verify body
1516        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1517    }
1518
1519    #[test]
1520    fn test_quill_with_scope_blocks() {
1521        let markdown = r#"---
1522QUILL: document
1523title: Test Document
1524---
1525
1526Main body.
1527
1528---
1529SCOPE: sections
1530name: Section 1
1531---
1532
1533Section 1 body."#;
1534
1535        let doc = decompose(markdown).unwrap();
1536
1537        // Verify quill tag
1538        assert_eq!(doc.quill_tag(), "document");
1539
1540        // Verify global field from quill block
1541        assert_eq!(
1542            doc.get_field("title").unwrap().as_str().unwrap(),
1543            "Test Document"
1544        );
1545
1546        // Verify scope blocks work
1547        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1548        assert_eq!(sections.len(), 1);
1549
1550        // Verify body
1551        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1552    }
1553
1554    #[test]
1555    fn test_multiple_quill_directives_error() {
1556        let markdown = r#"---
1557QUILL: first
1558---
1559
1560---
1561QUILL: second
1562---"#;
1563
1564        let result = decompose(markdown);
1565        assert!(result.is_err());
1566        assert!(result
1567            .unwrap_err()
1568            .to_string()
1569            .contains("Multiple quill directives"));
1570    }
1571
1572    #[test]
1573    fn test_invalid_quill_name() {
1574        let markdown = r#"---
1575QUILL: Invalid-Name
1576---"#;
1577
1578        let result = decompose(markdown);
1579        assert!(result.is_err());
1580        assert!(result
1581            .unwrap_err()
1582            .to_string()
1583            .contains("Invalid quill name"));
1584    }
1585
1586    #[test]
1587    fn test_quill_wrong_value_type() {
1588        let markdown = r#"---
1589QUILL: 123
1590---"#;
1591
1592        let result = decompose(markdown);
1593        assert!(result.is_err());
1594        assert!(result
1595            .unwrap_err()
1596            .to_string()
1597            .contains("QUILL value must be a string"));
1598    }
1599
1600    #[test]
1601    fn test_scope_wrong_value_type() {
1602        let markdown = r#"---
1603SCOPE: 123
1604---"#;
1605
1606        let result = decompose(markdown);
1607        assert!(result.is_err());
1608        assert!(result
1609            .unwrap_err()
1610            .to_string()
1611            .contains("SCOPE value must be a string"));
1612    }
1613
1614    #[test]
1615    fn test_both_quill_and_scope_error() {
1616        let markdown = r#"---
1617QUILL: test
1618SCOPE: items
1619---"#;
1620
1621        let result = decompose(markdown);
1622        assert!(result.is_err());
1623        assert!(result
1624            .unwrap_err()
1625            .to_string()
1626            .contains("Cannot specify both QUILL and SCOPE"));
1627    }
1628
1629    #[test]
1630    fn test_blank_lines_in_frontmatter() {
1631        // New parsing standard: blank lines are allowed within YAML blocks
1632        let markdown = r#"---
1633title: Test Document
1634author: Test Author
1635
1636description: This has a blank line above it
1637tags:
1638  - one
1639  - two
1640---
1641
1642# Hello World
1643
1644This is the body."#;
1645
1646        let doc = decompose(markdown).unwrap();
1647
1648        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1649        assert_eq!(
1650            doc.get_field("title").unwrap().as_str().unwrap(),
1651            "Test Document"
1652        );
1653        assert_eq!(
1654            doc.get_field("author").unwrap().as_str().unwrap(),
1655            "Test Author"
1656        );
1657        assert_eq!(
1658            doc.get_field("description").unwrap().as_str().unwrap(),
1659            "This has a blank line above it"
1660        );
1661
1662        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1663        assert_eq!(tags.len(), 2);
1664    }
1665
1666    #[test]
1667    fn test_blank_lines_in_scope_blocks() {
1668        // Blank lines should be allowed in SCOPE blocks too
1669        let markdown = r#"---
1670SCOPE: items
1671name: Item 1
1672
1673price: 19.99
1674
1675tags:
1676  - electronics
1677  - gadgets
1678---
1679
1680Body of item 1."#;
1681
1682        let doc = decompose(markdown).unwrap();
1683
1684        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1685        assert_eq!(items.len(), 1);
1686
1687        let item = items[0].as_object().unwrap();
1688        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1689        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1690
1691        let tags = item.get("tags").unwrap().as_array().unwrap();
1692        assert_eq!(tags.len(), 2);
1693    }
1694
1695    #[test]
1696    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1697        // Horizontal rule: blank lines both above AND below the ---
1698        let markdown = r#"---
1699title: Test
1700---
1701
1702First paragraph.
1703
1704---
1705
1706Second paragraph."#;
1707
1708        let doc = decompose(markdown).unwrap();
1709
1710        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1711
1712        // The body should contain the horizontal rule (---) as part of the content
1713        let body = doc.body().unwrap();
1714        assert!(body.contains("First paragraph."));
1715        assert!(body.contains("---"));
1716        assert!(body.contains("Second paragraph."));
1717    }
1718
1719    #[test]
1720    fn test_horizontal_rule_not_preceded_by_blank() {
1721        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1722        // It's also NOT a valid metadata block opening (since it's followed by blank)
1723        let markdown = r#"---
1724title: Test
1725---
1726
1727First paragraph.
1728---
1729
1730Second paragraph."#;
1731
1732        let doc = decompose(markdown).unwrap();
1733
1734        let body = doc.body().unwrap();
1735        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1736        assert!(body.contains("---"));
1737    }
1738
1739    #[test]
1740    fn test_multiple_blank_lines_in_yaml() {
1741        // Multiple blank lines should also be allowed
1742        let markdown = r#"---
1743title: Test
1744
1745
1746author: John Doe
1747
1748
1749version: 1.0
1750---
1751
1752Body content."#;
1753
1754        let doc = decompose(markdown).unwrap();
1755
1756        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1757        assert_eq!(
1758            doc.get_field("author").unwrap().as_str().unwrap(),
1759            "John Doe"
1760        );
1761        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1762    }
1763
1764    #[test]
1765    fn test_html_comment_interaction() {
1766        let markdown = r#"<!---
1767---> the rest of the page content
1768
1769---
1770key: value
1771---
1772"#;
1773        let doc = decompose(markdown).unwrap();
1774
1775        // The comment should be ignored (or at least not cause a parse error)
1776        // The frontmatter should be parsed
1777        let key = doc.get_field("key").and_then(|v| v.as_str());
1778        assert_eq!(key, Some("value"));
1779    }
1780}
1781#[cfg(test)]
1782mod demo_file_test {
1783    use super::*;
1784
1785    #[test]
1786    fn test_extended_metadata_demo_file() {
1787        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1788        let doc = decompose(markdown).unwrap();
1789
1790        // Verify global fields
1791        assert_eq!(
1792            doc.get_field("title").unwrap().as_str().unwrap(),
1793            "Extended Metadata Demo"
1794        );
1795        assert_eq!(
1796            doc.get_field("author").unwrap().as_str().unwrap(),
1797            "Quillmark Team"
1798        );
1799        // version is parsed as a number by YAML
1800        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1801
1802        // Verify body
1803        assert!(doc
1804            .body()
1805            .unwrap()
1806            .contains("extended YAML metadata standard"));
1807
1808        // Verify features collection
1809        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1810        assert_eq!(features.len(), 3);
1811
1812        // Verify use_cases collection
1813        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1814        assert_eq!(use_cases.len(), 2);
1815
1816        // Check first feature
1817        let feature1 = features[0].as_object().unwrap();
1818        assert_eq!(
1819            feature1.get("name").unwrap().as_str().unwrap(),
1820            "Tag Directives"
1821        );
1822    }
1823
1824    #[test]
1825    fn test_input_size_limit() {
1826        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1827        let size = crate::error::MAX_INPUT_SIZE + 1;
1828        let large_markdown = "a".repeat(size);
1829
1830        let result = decompose(&large_markdown);
1831        assert!(result.is_err());
1832
1833        let err_msg = result.unwrap_err().to_string();
1834        assert!(err_msg.contains("Input too large"));
1835    }
1836
1837    #[test]
1838    fn test_yaml_size_limit() {
1839        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1840        let mut markdown = String::from("---\n");
1841
1842        // Create a very large YAML field
1843        let size = crate::error::MAX_YAML_SIZE + 1;
1844        markdown.push_str("data: \"");
1845        markdown.push_str(&"x".repeat(size));
1846        markdown.push_str("\"\n---\n\nBody");
1847
1848        let result = decompose(&markdown);
1849        assert!(result.is_err());
1850
1851        let err_msg = result.unwrap_err().to_string();
1852        assert!(err_msg.contains("YAML block too large"));
1853    }
1854
1855    #[test]
1856    fn test_input_within_size_limit() {
1857        // Create markdown just under the limit
1858        let size = 1000; // Much smaller than limit
1859        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1860
1861        let result = decompose(&markdown);
1862        assert!(result.is_ok());
1863    }
1864
1865    #[test]
1866    fn test_yaml_within_size_limit() {
1867        // Create YAML block well within the limit
1868        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1869
1870        let result = decompose(&markdown);
1871        assert!(result.is_ok());
1872    }
1873
1874    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1875    // Guillemet conversion now happens in process_plate, not during parsing
1876    #[test]
1877    fn test_chevrons_preserved_in_body_no_frontmatter() {
1878        let markdown = "Use <<raw content>> here.";
1879        let doc = decompose(markdown).unwrap();
1880
1881        // Body should preserve chevrons (conversion happens later in process_plate)
1882        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1883    }
1884
1885    #[test]
1886    fn test_chevrons_preserved_in_body_with_frontmatter() {
1887        let markdown = r#"---
1888title: Test
1889---
1890
1891Use <<raw content>> here."#;
1892        let doc = decompose(markdown).unwrap();
1893
1894        // Body should preserve chevrons
1895        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1896    }
1897
1898    #[test]
1899    fn test_chevrons_preserved_in_yaml_string() {
1900        let markdown = r#"---
1901title: Test <<with chevrons>>
1902---
1903
1904Body content."#;
1905        let doc = decompose(markdown).unwrap();
1906
1907        // YAML string values should preserve chevrons
1908        assert_eq!(
1909            doc.get_field("title").unwrap().as_str().unwrap(),
1910            "Test <<with chevrons>>"
1911        );
1912    }
1913
1914    #[test]
1915    fn test_chevrons_preserved_in_yaml_array() {
1916        let markdown = r#"---
1917items:
1918  - "<<first>>"
1919  - "<<second>>"
1920---
1921
1922Body."#;
1923        let doc = decompose(markdown).unwrap();
1924
1925        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1926        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1927        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1928    }
1929
1930    #[test]
1931    fn test_chevrons_preserved_in_yaml_nested() {
1932        let markdown = r#"---
1933metadata:
1934  description: "<<nested value>>"
1935---
1936
1937Body."#;
1938        let doc = decompose(markdown).unwrap();
1939
1940        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1941        assert_eq!(
1942            metadata.get("description").unwrap().as_str().unwrap(),
1943            "<<nested value>>"
1944        );
1945    }
1946
1947    #[test]
1948    fn test_chevrons_preserved_in_code_blocks() {
1949        let markdown = r#"```
1950<<in code block>>
1951```
1952
1953<<outside code block>>"#;
1954        let doc = decompose(markdown).unwrap();
1955
1956        let body = doc.body().unwrap();
1957        // All chevrons should be preserved (no conversion during parsing)
1958        assert!(body.contains("<<in code block>>"));
1959        assert!(body.contains("<<outside code block>>"));
1960    }
1961
1962    #[test]
1963    fn test_chevrons_preserved_in_inline_code() {
1964        let markdown = "`<<in inline code>>` and <<outside inline code>>";
1965        let doc = decompose(markdown).unwrap();
1966
1967        let body = doc.body().unwrap();
1968        // All chevrons should be preserved
1969        assert!(body.contains("`<<in inline code>>`"));
1970        assert!(body.contains("<<outside inline code>>"));
1971    }
1972
1973    #[test]
1974    fn test_chevrons_preserved_in_tagged_block_body() {
1975        let markdown = r#"---
1976title: Main
1977---
1978
1979Main body.
1980
1981---
1982SCOPE: items
1983name: Item 1
1984---
1985
1986Use <<raw>> here."#;
1987        let doc = decompose(markdown).unwrap();
1988
1989        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1990        let item = items[0].as_object().unwrap();
1991        let item_body = item.get("body").unwrap().as_str().unwrap();
1992        // Tagged block body should preserve chevrons
1993        assert!(item_body.contains("<<raw>>"));
1994    }
1995
1996    #[test]
1997    fn test_chevrons_preserved_in_tagged_block_yaml() {
1998        let markdown = r#"---
1999title: Main
2000---
2001
2002Main body.
2003
2004---
2005SCOPE: items
2006description: "<<tagged yaml>>"
2007---
2008
2009Item body."#;
2010        let doc = decompose(markdown).unwrap();
2011
2012        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2013        let item = items[0].as_object().unwrap();
2014        // Tagged block YAML should preserve chevrons
2015        assert_eq!(
2016            item.get("description").unwrap().as_str().unwrap(),
2017            "<<tagged yaml>>"
2018        );
2019    }
2020
2021    #[test]
2022    fn test_yaml_numbers_not_affected() {
2023        // Numbers should not be affected
2024        let markdown = r#"---
2025count: 42
2026---
2027
2028Body."#;
2029        let doc = decompose(markdown).unwrap();
2030        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2031    }
2032
2033    #[test]
2034    fn test_yaml_booleans_not_affected() {
2035        // Booleans should not be affected
2036        let markdown = r#"---
2037active: true
2038---
2039
2040Body."#;
2041        let doc = decompose(markdown).unwrap();
2042        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2043    }
2044
2045    #[test]
2046    fn test_multiline_chevrons_preserved() {
2047        // Multiline chevrons should be preserved as-is
2048        let markdown = "<<text\nacross lines>>";
2049        let doc = decompose(markdown).unwrap();
2050
2051        let body = doc.body().unwrap();
2052        // Should contain the original chevrons
2053        assert!(body.contains("<<text"));
2054        assert!(body.contains("across lines>>"));
2055    }
2056
2057    #[test]
2058    fn test_unmatched_chevrons_preserved() {
2059        let markdown = "<<unmatched";
2060        let doc = decompose(markdown).unwrap();
2061
2062        let body = doc.body().unwrap();
2063        // Unmatched should remain as-is
2064        assert_eq!(body, "<<unmatched");
2065    }
2066}
2067
2068// Additional robustness tests
2069#[cfg(test)]
2070mod robustness_tests {
2071    use super::*;
2072
2073    // Edge cases for delimiter handling
2074
2075    #[test]
2076    fn test_empty_document() {
2077        let doc = decompose("").unwrap();
2078        assert_eq!(doc.body(), Some(""));
2079        assert_eq!(doc.quill_tag(), "__default__");
2080    }
2081
2082    #[test]
2083    fn test_only_whitespace() {
2084        let doc = decompose("   \n\n   \t").unwrap();
2085        assert_eq!(doc.body(), Some("   \n\n   \t"));
2086    }
2087
2088    #[test]
2089    fn test_only_dashes() {
2090        // Just "---" at document start without newline is not treated as frontmatter opener
2091        // (requires "---\n" to start a frontmatter block)
2092        let result = decompose("---");
2093        // This is NOT an error - "---" alone without newline is just body content
2094        assert!(result.is_ok());
2095        assert_eq!(result.unwrap().body(), Some("---"));
2096    }
2097
2098    #[test]
2099    fn test_dashes_in_middle_of_line() {
2100        // --- not at start of line should not be treated as delimiter
2101        let markdown = "some text --- more text";
2102        let doc = decompose(markdown).unwrap();
2103        assert_eq!(doc.body(), Some("some text --- more text"));
2104    }
2105
2106    #[test]
2107    fn test_four_dashes() {
2108        // ---- is not a valid delimiter
2109        let markdown = "----\ntitle: Test\n----\n\nBody";
2110        let doc = decompose(markdown).unwrap();
2111        // Should treat entire content as body
2112        assert!(doc.body().unwrap().contains("----"));
2113    }
2114
2115    #[test]
2116    fn test_crlf_line_endings() {
2117        // Windows-style line endings
2118        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2119        let doc = decompose(markdown).unwrap();
2120        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2121        assert!(doc.body().unwrap().contains("Body content."));
2122    }
2123
2124    #[test]
2125    fn test_mixed_line_endings() {
2126        // Mix of \n and \r\n
2127        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2128        let doc = decompose(markdown).unwrap();
2129        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2130    }
2131
2132    #[test]
2133    fn test_frontmatter_at_eof_no_trailing_newline() {
2134        // Frontmatter closed at EOF without trailing newline
2135        let markdown = "---\ntitle: Test\n---";
2136        let doc = decompose(markdown).unwrap();
2137        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2138        assert_eq!(doc.body(), Some(""));
2139    }
2140
2141    #[test]
2142    fn test_empty_frontmatter() {
2143        // Empty frontmatter block - requires content between delimiters
2144        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2145        // is treated as horizontal rule logic, not empty frontmatter
2146        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2147        let markdown = "---\n \n---\n\nBody content.";
2148        let doc = decompose(markdown).unwrap();
2149        assert!(doc.body().unwrap().contains("Body content."));
2150        // Should only have body field
2151        assert_eq!(doc.fields().len(), 1);
2152    }
2153
2154    #[test]
2155    fn test_whitespace_only_frontmatter() {
2156        // Frontmatter with only whitespace
2157        let markdown = "---\n   \n\n   \n---\n\nBody.";
2158        let doc = decompose(markdown).unwrap();
2159        assert!(doc.body().unwrap().contains("Body."));
2160    }
2161
2162    // Unicode handling
2163
2164    #[test]
2165    fn test_unicode_in_yaml_keys() {
2166        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2167        let doc = decompose(markdown).unwrap();
2168        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2169        assert_eq!(
2170            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2171            "こんにちは"
2172        );
2173    }
2174
2175    #[test]
2176    fn test_unicode_in_yaml_values() {
2177        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2178        let doc = decompose(markdown).unwrap();
2179        assert_eq!(
2180            doc.get_field("title").unwrap().as_str().unwrap(),
2181            "你好世界 🎉"
2182        );
2183    }
2184
2185    #[test]
2186    fn test_unicode_in_body() {
2187        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2188        let doc = decompose(markdown).unwrap();
2189        assert!(doc.body().unwrap().contains("日本語テキスト"));
2190        assert!(doc.body().unwrap().contains("🚀"));
2191    }
2192
2193    // YAML edge cases
2194
2195    #[test]
2196    fn test_yaml_multiline_string() {
2197        let markdown = r#"---
2198description: |
2199  This is a
2200  multiline string
2201  with preserved newlines.
2202---
2203
2204Body."#;
2205        let doc = decompose(markdown).unwrap();
2206        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2207        assert!(desc.contains("multiline string"));
2208        assert!(desc.contains('\n'));
2209    }
2210
2211    #[test]
2212    fn test_yaml_folded_string() {
2213        let markdown = r#"---
2214description: >
2215  This is a folded
2216  string that becomes
2217  a single line.
2218---
2219
2220Body."#;
2221        let doc = decompose(markdown).unwrap();
2222        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2223        // Folded strings join lines with spaces
2224        assert!(desc.contains("folded"));
2225    }
2226
2227    #[test]
2228    fn test_yaml_null_value() {
2229        let markdown = "---\noptional: null\n---\n\nBody.";
2230        let doc = decompose(markdown).unwrap();
2231        assert!(doc.get_field("optional").unwrap().is_null());
2232    }
2233
2234    #[test]
2235    fn test_yaml_empty_string_value() {
2236        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2237        let doc = decompose(markdown).unwrap();
2238        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2239    }
2240
2241    #[test]
2242    fn test_yaml_special_characters_in_string() {
2243        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2244        let doc = decompose(markdown).unwrap();
2245        assert_eq!(
2246            doc.get_field("special").unwrap().as_str().unwrap(),
2247            "colon: here, and [brackets]"
2248        );
2249    }
2250
2251    #[test]
2252    fn test_yaml_nested_objects() {
2253        let markdown = r#"---
2254config:
2255  database:
2256    host: localhost
2257    port: 5432
2258  cache:
2259    enabled: true
2260---
2261
2262Body."#;
2263        let doc = decompose(markdown).unwrap();
2264        let config = doc.get_field("config").unwrap().as_object().unwrap();
2265        let db = config.get("database").unwrap().as_object().unwrap();
2266        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2267        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2268    }
2269
2270    // SCOPE block edge cases
2271
2272    #[test]
2273    fn test_scope_with_empty_body() {
2274        let markdown = r#"---
2275SCOPE: items
2276name: Item
2277---"#;
2278        let doc = decompose(markdown).unwrap();
2279        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2280        assert_eq!(items.len(), 1);
2281        let item = items[0].as_object().unwrap();
2282        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
2283    }
2284
2285    #[test]
2286    fn test_scope_consecutive_blocks() {
2287        let markdown = r#"---
2288SCOPE: a
2289id: 1
2290---
2291---
2292SCOPE: a
2293id: 2
2294---"#;
2295        let doc = decompose(markdown).unwrap();
2296        let items = doc.get_field("a").unwrap().as_sequence().unwrap();
2297        assert_eq!(items.len(), 2);
2298    }
2299
2300    #[test]
2301    fn test_scope_with_body_containing_dashes() {
2302        let markdown = r#"---
2303SCOPE: items
2304name: Item
2305---
2306
2307Some text with --- dashes in it."#;
2308        let doc = decompose(markdown).unwrap();
2309        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
2310        let item = items[0].as_object().unwrap();
2311        let body = item.get("body").unwrap().as_str().unwrap();
2312        assert!(body.contains("--- dashes"));
2313    }
2314
2315    // QUILL directive edge cases
2316
2317    #[test]
2318    fn test_quill_with_underscore_prefix() {
2319        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2320        let doc = decompose(markdown).unwrap();
2321        assert_eq!(doc.quill_tag(), "_internal");
2322    }
2323
2324    #[test]
2325    fn test_quill_with_numbers() {
2326        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2327        let doc = decompose(markdown).unwrap();
2328        assert_eq!(doc.quill_tag(), "form_8_v2");
2329    }
2330
2331    #[test]
2332    fn test_quill_with_additional_fields() {
2333        let markdown = r#"---
2334QUILL: my_quill
2335title: Document Title
2336author: John Doe
2337---
2338
2339Body content."#;
2340        let doc = decompose(markdown).unwrap();
2341        assert_eq!(doc.quill_tag(), "my_quill");
2342        assert_eq!(
2343            doc.get_field("title").unwrap().as_str().unwrap(),
2344            "Document Title"
2345        );
2346        assert_eq!(
2347            doc.get_field("author").unwrap().as_str().unwrap(),
2348            "John Doe"
2349        );
2350    }
2351
2352    // Error handling
2353
2354    #[test]
2355    fn test_invalid_scope_name_uppercase() {
2356        let markdown = "---\nSCOPE: ITEMS\n---\n\nBody.";
2357        let result = decompose(markdown);
2358        assert!(result.is_err());
2359        assert!(result
2360            .unwrap_err()
2361            .to_string()
2362            .contains("Invalid field name"));
2363    }
2364
2365    #[test]
2366    fn test_invalid_scope_name_starts_with_number() {
2367        let markdown = "---\nSCOPE: 123items\n---\n\nBody.";
2368        let result = decompose(markdown);
2369        assert!(result.is_err());
2370    }
2371
2372    #[test]
2373    fn test_invalid_scope_name_with_hyphen() {
2374        let markdown = "---\nSCOPE: my-items\n---\n\nBody.";
2375        let result = decompose(markdown);
2376        assert!(result.is_err());
2377    }
2378
2379    #[test]
2380    fn test_invalid_quill_name_uppercase() {
2381        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2382        let result = decompose(markdown);
2383        assert!(result.is_err());
2384    }
2385
2386    #[test]
2387    fn test_yaml_syntax_error_missing_colon() {
2388        let markdown = "---\ntitle Test\n---\n\nBody.";
2389        let result = decompose(markdown);
2390        assert!(result.is_err());
2391    }
2392
2393    #[test]
2394    fn test_yaml_syntax_error_bad_indentation() {
2395        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2396        let result = decompose(markdown);
2397        // Bad indentation may or may not be an error depending on YAML parser
2398        // Just ensure it doesn't panic
2399        let _ = result;
2400    }
2401
2402    // Body extraction edge cases
2403
2404    #[test]
2405    fn test_body_with_leading_newlines() {
2406        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2407        let doc = decompose(markdown).unwrap();
2408        // Body should preserve leading newlines after frontmatter
2409        assert!(doc.body().unwrap().starts_with('\n'));
2410    }
2411
2412    #[test]
2413    fn test_body_with_trailing_newlines() {
2414        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2415        let doc = decompose(markdown).unwrap();
2416        // Body should preserve trailing newlines
2417        assert!(doc.body().unwrap().ends_with('\n'));
2418    }
2419
2420    #[test]
2421    fn test_no_body_after_frontmatter() {
2422        let markdown = "---\ntitle: Test\n---";
2423        let doc = decompose(markdown).unwrap();
2424        assert_eq!(doc.body(), Some(""));
2425    }
2426
2427    // Tag name validation
2428
2429    #[test]
2430    fn test_valid_tag_name_single_underscore() {
2431        assert!(is_valid_tag_name("_"));
2432    }
2433
2434    #[test]
2435    fn test_valid_tag_name_underscore_prefix() {
2436        assert!(is_valid_tag_name("_private"));
2437    }
2438
2439    #[test]
2440    fn test_valid_tag_name_with_numbers() {
2441        assert!(is_valid_tag_name("item1"));
2442        assert!(is_valid_tag_name("item_2"));
2443    }
2444
2445    #[test]
2446    fn test_invalid_tag_name_empty() {
2447        assert!(!is_valid_tag_name(""));
2448    }
2449
2450    #[test]
2451    fn test_invalid_tag_name_starts_with_number() {
2452        assert!(!is_valid_tag_name("1item"));
2453    }
2454
2455    #[test]
2456    fn test_invalid_tag_name_uppercase() {
2457        assert!(!is_valid_tag_name("Items"));
2458        assert!(!is_valid_tag_name("ITEMS"));
2459    }
2460
2461    #[test]
2462    fn test_invalid_tag_name_special_chars() {
2463        assert!(!is_valid_tag_name("my-items"));
2464        assert!(!is_valid_tag_name("my.items"));
2465        assert!(!is_valid_tag_name("my items"));
2466    }
2467
2468    // Guillemet preprocessing in YAML
2469
2470    #[test]
2471    fn test_guillemet_in_yaml_preserves_non_strings() {
2472        let markdown = r#"---
2473count: 42
2474price: 19.99
2475active: true
2476items:
2477  - first
2478  - 100
2479  - true
2480---
2481
2482Body."#;
2483        let doc = decompose(markdown).unwrap();
2484        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2485        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2486        assert_eq!(doc.get_field("active").unwrap().as_bool().unwrap(), true);
2487    }
2488
2489    #[test]
2490    fn test_guillemet_double_conversion_prevention() {
2491        // Ensure «» in input doesn't get double-processed
2492        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2493        let doc = decompose(markdown).unwrap();
2494        // Should remain as-is (not double-escaped)
2495        assert_eq!(
2496            doc.get_field("title").unwrap().as_str().unwrap(),
2497            "Already «converted»"
2498        );
2499    }
2500}