quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and CARD specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "BODY";
55
56/// A parsed markdown document with frontmatter
57#[derive(Debug, Clone)]
58pub struct ParsedDocument {
59    fields: HashMap<String, QuillValue>,
60    quill_tag: String,
61}
62
63impl ParsedDocument {
64    /// Create a new ParsedDocument with the given fields
65    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
66        Self {
67            fields,
68            quill_tag: "__default__".to_string(),
69        }
70    }
71
72    /// Create a ParsedDocument from fields and quill tag
73    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
74        Self { fields, quill_tag }
75    }
76
77    /// Create a ParsedDocument from markdown string
78    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
79        decompose(markdown)
80    }
81
82    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
83    pub fn quill_tag(&self) -> &str {
84        &self.quill_tag
85    }
86
87    /// Get the document body
88    pub fn body(&self) -> Option<&str> {
89        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
90    }
91
92    /// Get a specific field
93    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
94        self.fields.get(name)
95    }
96
97    /// Get all fields (including body)
98    pub fn fields(&self) -> &HashMap<String, QuillValue> {
99        &self.fields
100    }
101
102    /// Create a new ParsedDocument with default values applied
103    ///
104    /// This method creates a new ParsedDocument with default values applied for any
105    /// fields that are missing from the original document but have defaults specified.
106    /// Existing fields are preserved and not overwritten.
107    ///
108    /// # Arguments
109    ///
110    /// * `defaults` - A HashMap of field names to their default QuillValues
111    ///
112    /// # Returns
113    ///
114    /// A new ParsedDocument with defaults applied for missing fields
115    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
116        let mut fields = self.fields.clone();
117
118        for (field_name, default_value) in defaults {
119            // Only apply default if field is missing
120            if !fields.contains_key(field_name) {
121                fields.insert(field_name.clone(), default_value.clone());
122            }
123        }
124
125        Self {
126            fields,
127            quill_tag: self.quill_tag.clone(),
128        }
129    }
130
131    /// Create a new ParsedDocument with coerced field values
132    ///
133    /// This method applies type coercions to field values based on the schema.
134    /// Coercions include:
135    /// - Singular values to arrays when schema expects array
136    /// - String "true"/"false" to boolean
137    /// - Numbers to boolean (0=false, non-zero=true)
138    /// - String numbers to number type
139    /// - Boolean to number (true=1, false=0)
140    ///
141    /// # Arguments
142    ///
143    /// * `schema` - A JSON Schema object defining expected field types
144    ///
145    /// # Returns
146    ///
147    /// A new ParsedDocument with coerced field values
148    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
149        use crate::schema::coerce_document;
150
151        let coerced_fields = coerce_document(schema, &self.fields);
152
153        Self {
154            fields: coerced_fields,
155            quill_tag: self.quill_tag.clone(),
156        }
157    }
158}
159
160#[derive(Debug)]
161struct MetadataBlock {
162    start: usize,                          // Position of opening "---"
163    end: usize,                            // Position after closing "---\n"
164    yaml_value: Option<serde_json::Value>, // Parsed YAML as JSON (None if empty or parse failed)
165    tag: Option<String>,                   // Field name from CARD key
166    quill_name: Option<String>,            // Quill name from QUILL key
167}
168
169/// Validate tag name follows pattern [a-z_][a-z0-9_]*
170fn is_valid_tag_name(name: &str) -> bool {
171    if name.is_empty() {
172        return false;
173    }
174
175    let mut chars = name.chars();
176    let first = chars.next().unwrap();
177
178    if !first.is_ascii_lowercase() && first != '_' {
179        return false;
180    }
181
182    for ch in chars {
183        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
184            return false;
185        }
186    }
187
188    true
189}
190
191/// Check if a position is inside a fenced code block
192///
193/// This uses a simple count-based approach: count opening fences before the position.
194/// An odd count means we're inside a fenced block.
195fn is_inside_fenced_block(markdown: &str, pos: usize) -> bool {
196    let before = &markdown[..pos];
197
198    // Count fences that appear at the start of lines
199    let mut fence_count = 0;
200
201    // Check if document starts with a fence
202    if before.starts_with("```") || before.starts_with("~~~") {
203        fence_count += 1;
204    }
205
206    // Count fences after newlines
207    fence_count += before.matches("\n```").count();
208    fence_count += before.matches("\n~~~").count();
209    fence_count += before.matches("\r\n```").count();
210    fence_count += before.matches("\r\n~~~").count();
211
212    // Odd count means we're inside a fenced block
213    fence_count % 2 == 1
214}
215
216/// Find all metadata blocks in the document
217fn find_metadata_blocks(markdown: &str) -> Result<Vec<MetadataBlock>, crate::error::ParseError> {
218    let mut blocks = Vec::new();
219    let mut pos = 0;
220
221    while pos < markdown.len() {
222        // Look for opening "---\n" or "---\r\n"
223        let search_str = &markdown[pos..];
224        let delimiter_result = search_str
225            .find("---\n")
226            .map(|p| (p, 4, "\n"))
227            .or_else(|| search_str.find("---\r\n").map(|p| (p, 5, "\r\n")));
228
229        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
230            let abs_pos = pos + delimiter_pos;
231
232            // Check if the delimiter is at the start of a line
233            let is_start_of_line = if abs_pos == 0 {
234                true
235            } else {
236                let char_before = markdown.as_bytes()[abs_pos - 1];
237                char_before == b'\n' || char_before == b'\r'
238            };
239
240            if !is_start_of_line {
241                pos = abs_pos + 1;
242                continue;
243            }
244
245            // Skip if inside a fenced code block
246            if is_inside_fenced_block(markdown, abs_pos) {
247                pos = abs_pos + 3;
248                continue;
249            }
250
251            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
252
253            // Check if this --- is a horizontal rule (blank lines above AND below)
254            let preceded_by_blank = if abs_pos > 0 {
255                // Check if there's a blank line before the ---
256                let before = &markdown[..abs_pos];
257                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
258            } else {
259                false
260            };
261
262            let followed_by_blank = if content_start < markdown.len() {
263                markdown[content_start..].starts_with('\n')
264                    || markdown[content_start..].starts_with("\r\n")
265            } else {
266                false
267            };
268
269            // Horizontal rule: blank lines both above and below
270            if preceded_by_blank && followed_by_blank {
271                // This is a horizontal rule in the body, skip it
272                pos = abs_pos + 3; // Skip past "---"
273                continue;
274            }
275
276            // Check if followed by non-blank line (or if we're at document start)
277            // This starts a metadata block
278            if followed_by_blank {
279                // --- followed by blank line but NOT preceded by blank line
280                // This is NOT a metadata block opening, skip it
281                pos = abs_pos + 3;
282                continue;
283            }
284
285            // Found potential metadata block opening (followed by non-blank line)
286            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
287            let rest = &markdown[content_start..];
288
289            // First try to find delimiters with trailing newlines
290            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
291            let closing_with_newline = closing_patterns
292                .iter()
293                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
294                .min_by_key(|(p, _)| *p);
295
296            // Also check for closing at end of document (no trailing newline)
297            let closing_at_eof = ["\n---", "\r\n---"]
298                .iter()
299                .filter_map(|delim| {
300                    rest.find(delim).and_then(|p| {
301                        if p + delim.len() == rest.len() {
302                            Some((p, delim.len()))
303                        } else {
304                            None
305                        }
306                    })
307                })
308                .min_by_key(|(p, _)| *p);
309
310            let closing_result = match (closing_with_newline, closing_at_eof) {
311                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
312                (Some(_), Some(_)) => closing_with_newline,
313                (Some(_), None) => closing_with_newline,
314                (None, Some(_)) => closing_at_eof,
315                (None, None) => None,
316            };
317
318            if let Some((closing_pos, closing_len)) = closing_result {
319                let abs_closing_pos = content_start + closing_pos;
320                let content = &markdown[content_start..abs_closing_pos];
321
322                // Check YAML size limit
323                if content.len() > crate::error::MAX_YAML_SIZE {
324                    return Err(crate::error::ParseError::InputTooLarge {
325                        size: content.len(),
326                        max: crate::error::MAX_YAML_SIZE,
327                    });
328                }
329
330                // Parse YAML content to check for reserved keys (QUILL, CARD)
331                // First, try to parse as YAML
332                let (tag, quill_name, yaml_value) = if !content.is_empty() {
333                    // Try to parse the YAML to check for reserved keys
334                    match serde_saphyr::from_str::<serde_json::Value>(content) {
335                        Ok(parsed_yaml) => {
336                            if let Some(mapping) = parsed_yaml.as_object() {
337                                let quill_key = "QUILL";
338                                let card_key = "CARD";
339
340                                let has_quill = mapping.contains_key(quill_key);
341                                let has_card = mapping.contains_key(card_key);
342
343                                if has_quill && has_card {
344                                    return Err(crate::error::ParseError::InvalidStructure(
345                                        "Cannot specify both QUILL and CARD in the same block"
346                                            .to_string(),
347                                    ));
348                                }
349
350                                // Check for reserved field names (BODY, CARDS)
351                                const RESERVED_FIELDS: &[&str] = &["BODY", "CARDS"];
352                                for reserved in RESERVED_FIELDS {
353                                    if mapping.contains_key(*reserved) {
354                                        return Err(crate::error::ParseError::InvalidStructure(
355                                            format!(
356                                                "Reserved field name '{}' cannot be used in YAML frontmatter",
357                                                reserved
358                                            ),
359                                        ));
360                                    }
361                                }
362
363                                if has_quill {
364                                    // Extract quill name
365                                    let quill_value = mapping.get(quill_key).unwrap();
366                                    let quill_name_str = quill_value
367                                        .as_str()
368                                        .ok_or("QUILL value must be a string")?;
369
370                                    if !is_valid_tag_name(quill_name_str) {
371                                        return Err(crate::error::ParseError::InvalidStructure(format!(
372                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
373                                            quill_name_str
374                                        )));
375                                    }
376
377                                    // Remove QUILL from the YAML value for processing
378                                    let mut new_mapping = mapping.clone();
379                                    new_mapping.remove(quill_key);
380                                    let new_value = if new_mapping.is_empty() {
381                                        None
382                                    } else {
383                                        Some(serde_json::Value::Object(new_mapping))
384                                    };
385
386                                    (None, Some(quill_name_str.to_string()), new_value)
387                                } else if has_card {
388                                    // Extract card field name
389                                    let card_value = mapping.get(card_key).unwrap();
390                                    let field_name =
391                                        card_value.as_str().ok_or("CARD value must be a string")?;
392
393                                    if !is_valid_tag_name(field_name) {
394                                        return Err(crate::error::ParseError::InvalidStructure(format!(
395                                            "Invalid card field name '{}': must match pattern [a-z_][a-z0-9_]*",
396                                            field_name
397                                        )));
398                                    }
399
400                                    // Remove CARD from the YAML value for processing
401                                    let mut new_mapping = mapping.clone();
402                                    new_mapping.remove(card_key);
403                                    let new_value = if new_mapping.is_empty() {
404                                        None
405                                    } else {
406                                        Some(serde_json::Value::Object(new_mapping))
407                                    };
408
409                                    (Some(field_name.to_string()), None, new_value)
410                                } else {
411                                    // No reserved keys, keep the parsed YAML
412                                    (None, None, Some(parsed_yaml))
413                                }
414                            } else {
415                                // Not a mapping, keep the parsed YAML (could be null for whitespace)
416                                (None, None, Some(parsed_yaml))
417                            }
418                        }
419                        Err(e) => {
420                            // YAML parsing failed - return error with context
421                            return Err(crate::error::ParseError::YamlError(e));
422                        }
423                    }
424                } else {
425                    // Empty content
426                    (None, None, None)
427                };
428
429                blocks.push(MetadataBlock {
430                    start: abs_pos,
431                    end: abs_closing_pos + closing_len, // After closing delimiter
432                    yaml_value,
433                    tag,
434                    quill_name,
435                });
436
437                pos = abs_closing_pos + closing_len;
438            } else if abs_pos == 0 {
439                // Frontmatter started but not closed
440                return Err(crate::error::ParseError::InvalidStructure(
441                    "Frontmatter started but not closed with ---".to_string(),
442                ));
443            } else {
444                // Not a valid metadata block, skip this position
445                pos = abs_pos + 3;
446            }
447        } else {
448            break;
449        }
450    }
451
452    Ok(blocks)
453}
454
455/// Decompose markdown into frontmatter fields and body
456fn decompose(markdown: &str) -> Result<ParsedDocument, crate::error::ParseError> {
457    // Check input size limit
458    if markdown.len() > crate::error::MAX_INPUT_SIZE {
459        return Err(crate::error::ParseError::InputTooLarge {
460            size: markdown.len(),
461            max: crate::error::MAX_INPUT_SIZE,
462        });
463    }
464
465    let mut fields = HashMap::new();
466
467    // Find all metadata blocks
468    let blocks = find_metadata_blocks(markdown)?;
469
470    if blocks.is_empty() {
471        // No metadata blocks, entire content is body
472        fields.insert(
473            BODY_FIELD.to_string(),
474            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
475        );
476        return Ok(ParsedDocument::new(fields));
477    }
478
479    // Collect all card items into unified CARDS array
480    let mut cards_array: Vec<serde_json::Value> = Vec::new();
481    let mut global_frontmatter_index: Option<usize> = None;
482    let mut quill_name: Option<String> = None;
483
484    // First pass: identify global frontmatter, quill directive, and validate
485    for (idx, block) in blocks.iter().enumerate() {
486        if idx == 0 {
487            // Top-level frontmatter: can have QUILL or neither (not considered a card)
488            if let Some(ref name) = block.quill_name {
489                quill_name = Some(name.clone());
490            }
491            // If it has neither QUILL nor CARD, it's global frontmatter
492            if block.tag.is_none() && block.quill_name.is_none() {
493                global_frontmatter_index = Some(idx);
494            }
495        } else {
496            // Inline blocks (idx > 0): MUST have CARD, cannot have QUILL
497            if block.quill_name.is_some() {
498                return Err(crate::error::ParseError::InvalidStructure("QUILL directive can only appear in the top-level frontmatter, not in inline blocks. Use CARD instead.".to_string()));
499            }
500            if block.tag.is_none() {
501                // Inline block without CARD
502                return Err(crate::error::ParseError::missing_card_directive());
503            }
504        }
505    }
506
507    // Parse global frontmatter if present
508    if let Some(idx) = global_frontmatter_index {
509        let block = &blocks[idx];
510
511        // Get parsed JSON fields directly (already parsed in find_metadata_blocks)
512        let json_fields: HashMap<String, serde_json::Value> = match &block.yaml_value {
513            Some(serde_json::Value::Object(mapping)) => mapping
514                .iter()
515                .map(|(k, v)| (k.clone(), v.clone()))
516                .collect(),
517            Some(serde_json::Value::Null) => {
518                // Null value (from whitespace-only YAML) - treat as empty mapping
519                HashMap::new()
520            }
521            Some(_) => {
522                // Non-mapping, non-null YAML (e.g., scalar, sequence) - this is an error for frontmatter
523                return Err(crate::error::ParseError::InvalidStructure(
524                    "Invalid YAML frontmatter: expected a mapping".to_string(),
525                ));
526            }
527            None => HashMap::new(),
528        };
529
530        // Convert JSON values to QuillValue at boundary
531        for (key, value) in json_fields {
532            fields.insert(key, QuillValue::from_json(value));
533        }
534    }
535
536    // Process blocks with quill directives
537    for block in &blocks {
538        if block.quill_name.is_some() {
539            // Quill directive blocks can have YAML content (becomes part of frontmatter)
540            if let Some(ref json_val) = block.yaml_value {
541                let json_fields: HashMap<String, serde_json::Value> = match json_val {
542                    serde_json::Value::Object(mapping) => mapping
543                        .iter()
544                        .map(|(k, v)| (k.clone(), v.clone()))
545                        .collect(),
546                    serde_json::Value::Null => {
547                        // Null value (from whitespace-only YAML) - treat as empty mapping
548                        HashMap::new()
549                    }
550                    _ => {
551                        return Err(crate::error::ParseError::InvalidStructure(
552                            "Invalid YAML in quill block: expected a mapping".to_string(),
553                        ));
554                    }
555                };
556
557                // Check for conflicts with existing fields
558                for key in json_fields.keys() {
559                    if fields.contains_key(key) {
560                        return Err(crate::error::ParseError::InvalidStructure(format!(
561                            "Name collision: quill block field '{}' conflicts with existing field",
562                            key
563                        )));
564                    }
565                }
566
567                // Convert JSON values to QuillValue at boundary
568                for (key, value) in json_fields {
569                    fields.insert(key, QuillValue::from_json(value));
570                }
571            }
572        }
573    }
574
575    // Parse tagged blocks (CARD blocks)
576    for (idx, block) in blocks.iter().enumerate() {
577        if let Some(ref tag_name) = block.tag {
578            // Get YAML metadata directly (already parsed in find_metadata_blocks)
579            // Get JSON metadata directly (already parsed in find_metadata_blocks)
580            let mut item_fields: serde_json::Map<String, serde_json::Value> =
581                match &block.yaml_value {
582                    Some(serde_json::Value::Object(mapping)) => mapping.clone(),
583                    Some(serde_json::Value::Null) => {
584                        // Null value (from whitespace-only YAML) - treat as empty mapping
585                        serde_json::Map::new()
586                    }
587                    Some(_) => {
588                        return Err(crate::error::ParseError::InvalidStructure(format!(
589                            "Invalid YAML in card block '{}': expected a mapping",
590                            tag_name
591                        )));
592                    }
593                    None => serde_json::Map::new(),
594                };
595
596            // Extract body for this card block
597            let body_start = block.end;
598            let body_end = if idx + 1 < blocks.len() {
599                blocks[idx + 1].start
600            } else {
601                markdown.len()
602            };
603            let body = &markdown[body_start..body_end];
604
605            // Add body to item fields
606            item_fields.insert(
607                BODY_FIELD.to_string(),
608                serde_json::Value::String(body.to_string()),
609            );
610
611            // Add CARD discriminator field
612            item_fields.insert(
613                "CARD".to_string(),
614                serde_json::Value::String(tag_name.clone()),
615            );
616
617            // Add to CARDS array
618            cards_array.push(serde_json::Value::Object(item_fields));
619        }
620    }
621
622    // Extract global body
623    // Body starts after global frontmatter or quill block (whichever comes first)
624    // Body ends at the first card block or EOF
625    let first_non_card_block_idx = blocks
626        .iter()
627        .position(|b| b.tag.is_none() && b.quill_name.is_none())
628        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
629
630    let (body_start, body_end) = if let Some(idx) = first_non_card_block_idx {
631        // Body starts after the first non-card block (global frontmatter or quill)
632        let start = blocks[idx].end;
633
634        // Body ends at the first card block after this, or EOF
635        let end = blocks
636            .iter()
637            .skip(idx + 1)
638            .find(|b| b.tag.is_some())
639            .map(|b| b.start)
640            .unwrap_or(markdown.len());
641
642        (start, end)
643    } else {
644        // No global frontmatter or quill block - body is everything before the first card block
645        let end = blocks
646            .iter()
647            .find(|b| b.tag.is_some())
648            .map(|b| b.start)
649            .unwrap_or(0);
650
651        (0, end)
652    };
653
654    let global_body = &markdown[body_start..body_end];
655
656    fields.insert(
657        BODY_FIELD.to_string(),
658        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
659    );
660
661    // Always add CARDS array to fields (may be empty)
662    fields.insert(
663        "CARDS".to_string(),
664        QuillValue::from_json(serde_json::Value::Array(cards_array)),
665    );
666
667    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
668    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
669
670    Ok(parsed)
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676
677    #[test]
678    fn test_no_frontmatter() {
679        let markdown = "# Hello World\n\nThis is a test.";
680        let doc = decompose(markdown).unwrap();
681
682        assert_eq!(doc.body(), Some(markdown));
683        assert_eq!(doc.fields().len(), 1);
684        // Verify default quill tag is set
685        assert_eq!(doc.quill_tag(), "__default__");
686    }
687
688    #[test]
689    fn test_with_frontmatter() {
690        let markdown = r#"---
691title: Test Document
692author: Test Author
693---
694
695# Hello World
696
697This is the body."#;
698
699        let doc = decompose(markdown).unwrap();
700
701        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
702        assert_eq!(
703            doc.get_field("title").unwrap().as_str().unwrap(),
704            "Test Document"
705        );
706        assert_eq!(
707            doc.get_field("author").unwrap().as_str().unwrap(),
708            "Test Author"
709        );
710        assert_eq!(doc.fields().len(), 4); // title, author, body, CARDS
711                                           // Verify default quill tag is set when no QUILL directive
712        assert_eq!(doc.quill_tag(), "__default__");
713    }
714
715    #[test]
716    fn test_complex_yaml_frontmatter() {
717        let markdown = r#"---
718title: Complex Document
719tags:
720  - test
721  - yaml
722metadata:
723  version: 1.0
724  nested:
725    field: value
726---
727
728Content here."#;
729
730        let doc = decompose(markdown).unwrap();
731
732        assert_eq!(doc.body(), Some("\nContent here."));
733        assert_eq!(
734            doc.get_field("title").unwrap().as_str().unwrap(),
735            "Complex Document"
736        );
737
738        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
739        assert_eq!(tags.len(), 2);
740        assert_eq!(tags[0].as_str().unwrap(), "test");
741        assert_eq!(tags[1].as_str().unwrap(), "yaml");
742    }
743
744    #[test]
745    fn test_with_defaults_empty_document() {
746        use std::collections::HashMap;
747
748        let mut defaults = HashMap::new();
749        defaults.insert(
750            "status".to_string(),
751            QuillValue::from_json(serde_json::json!("draft")),
752        );
753        defaults.insert(
754            "version".to_string(),
755            QuillValue::from_json(serde_json::json!(1)),
756        );
757
758        // Create an empty parsed document
759        let doc = ParsedDocument::new(HashMap::new());
760        let doc_with_defaults = doc.with_defaults(&defaults);
761
762        // Check that defaults were applied
763        assert_eq!(
764            doc_with_defaults
765                .get_field("status")
766                .unwrap()
767                .as_str()
768                .unwrap(),
769            "draft"
770        );
771        assert_eq!(
772            doc_with_defaults
773                .get_field("version")
774                .unwrap()
775                .as_number()
776                .unwrap()
777                .as_i64()
778                .unwrap(),
779            1
780        );
781    }
782
783    #[test]
784    fn test_with_defaults_preserves_existing_values() {
785        use std::collections::HashMap;
786
787        let mut defaults = HashMap::new();
788        defaults.insert(
789            "status".to_string(),
790            QuillValue::from_json(serde_json::json!("draft")),
791        );
792
793        // Create document with existing status
794        let mut fields = HashMap::new();
795        fields.insert(
796            "status".to_string(),
797            QuillValue::from_json(serde_json::json!("published")),
798        );
799        let doc = ParsedDocument::new(fields);
800
801        let doc_with_defaults = doc.with_defaults(&defaults);
802
803        // Existing value should be preserved
804        assert_eq!(
805            doc_with_defaults
806                .get_field("status")
807                .unwrap()
808                .as_str()
809                .unwrap(),
810            "published"
811        );
812    }
813
814    #[test]
815    fn test_with_defaults_partial_application() {
816        use std::collections::HashMap;
817
818        let mut defaults = HashMap::new();
819        defaults.insert(
820            "status".to_string(),
821            QuillValue::from_json(serde_json::json!("draft")),
822        );
823        defaults.insert(
824            "version".to_string(),
825            QuillValue::from_json(serde_json::json!(1)),
826        );
827
828        // Create document with only one field
829        let mut fields = HashMap::new();
830        fields.insert(
831            "status".to_string(),
832            QuillValue::from_json(serde_json::json!("published")),
833        );
834        let doc = ParsedDocument::new(fields);
835
836        let doc_with_defaults = doc.with_defaults(&defaults);
837
838        // Existing field preserved, missing field gets default
839        assert_eq!(
840            doc_with_defaults
841                .get_field("status")
842                .unwrap()
843                .as_str()
844                .unwrap(),
845            "published"
846        );
847        assert_eq!(
848            doc_with_defaults
849                .get_field("version")
850                .unwrap()
851                .as_number()
852                .unwrap()
853                .as_i64()
854                .unwrap(),
855            1
856        );
857    }
858
859    #[test]
860    fn test_with_defaults_no_defaults() {
861        use std::collections::HashMap;
862
863        let defaults = HashMap::new(); // Empty defaults map
864
865        let doc = ParsedDocument::new(HashMap::new());
866        let doc_with_defaults = doc.with_defaults(&defaults);
867
868        // No defaults should be applied
869        assert!(doc_with_defaults.fields().is_empty());
870    }
871
872    #[test]
873    fn test_with_defaults_complex_types() {
874        use std::collections::HashMap;
875
876        let mut defaults = HashMap::new();
877        defaults.insert(
878            "tags".to_string(),
879            QuillValue::from_json(serde_json::json!(["default", "tag"])),
880        );
881
882        let doc = ParsedDocument::new(HashMap::new());
883        let doc_with_defaults = doc.with_defaults(&defaults);
884
885        // Complex default value should be applied
886        let tags = doc_with_defaults
887            .get_field("tags")
888            .unwrap()
889            .as_sequence()
890            .unwrap();
891        assert_eq!(tags.len(), 2);
892        assert_eq!(tags[0].as_str().unwrap(), "default");
893        assert_eq!(tags[1].as_str().unwrap(), "tag");
894    }
895
896    #[test]
897    fn test_with_coercion_singular_to_array() {
898        use std::collections::HashMap;
899
900        let schema = QuillValue::from_json(serde_json::json!({
901            "$schema": "https://json-schema.org/draft/2019-09/schema",
902            "type": "object",
903            "properties": {
904                "tags": {"type": "array"}
905            }
906        }));
907
908        let mut fields = HashMap::new();
909        fields.insert(
910            "tags".to_string(),
911            QuillValue::from_json(serde_json::json!("single-tag")),
912        );
913        let doc = ParsedDocument::new(fields);
914
915        let coerced_doc = doc.with_coercion(&schema);
916
917        let tags = coerced_doc.get_field("tags").unwrap();
918        assert!(tags.as_array().is_some());
919        let tags_array = tags.as_array().unwrap();
920        assert_eq!(tags_array.len(), 1);
921        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
922    }
923
924    #[test]
925    fn test_with_coercion_string_to_boolean() {
926        use std::collections::HashMap;
927
928        let schema = QuillValue::from_json(serde_json::json!({
929            "$schema": "https://json-schema.org/draft/2019-09/schema",
930            "type": "object",
931            "properties": {
932                "active": {"type": "boolean"}
933            }
934        }));
935
936        let mut fields = HashMap::new();
937        fields.insert(
938            "active".to_string(),
939            QuillValue::from_json(serde_json::json!("true")),
940        );
941        let doc = ParsedDocument::new(fields);
942
943        let coerced_doc = doc.with_coercion(&schema);
944
945        assert!(coerced_doc.get_field("active").unwrap().as_bool().unwrap());
946    }
947
948    #[test]
949    fn test_with_coercion_string_to_number() {
950        use std::collections::HashMap;
951
952        let schema = QuillValue::from_json(serde_json::json!({
953            "$schema": "https://json-schema.org/draft/2019-09/schema",
954            "type": "object",
955            "properties": {
956                "count": {"type": "number"}
957            }
958        }));
959
960        let mut fields = HashMap::new();
961        fields.insert(
962            "count".to_string(),
963            QuillValue::from_json(serde_json::json!("42")),
964        );
965        let doc = ParsedDocument::new(fields);
966
967        let coerced_doc = doc.with_coercion(&schema);
968
969        assert_eq!(
970            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
971            42
972        );
973    }
974
975    #[test]
976    fn test_invalid_yaml() {
977        let markdown = r#"---
978title: [invalid yaml
979author: missing close bracket
980---
981
982Content here."#;
983
984        let result = decompose(markdown);
985        assert!(result.is_err());
986        assert!(result
987            .unwrap_err()
988            .to_string()
989            .contains("YAML parsing error"));
990    }
991
992    #[test]
993    fn test_unclosed_frontmatter() {
994        let markdown = r#"---
995title: Test
996author: Test Author
997
998Content without closing ---"#;
999
1000        let result = decompose(markdown);
1001        assert!(result.is_err());
1002        assert!(result.unwrap_err().to_string().contains("not closed"));
1003    }
1004
1005    // Extended metadata tests
1006
1007    #[test]
1008    fn test_basic_tagged_block() {
1009        let markdown = r#"---
1010title: Main Document
1011---
1012
1013Main body content.
1014
1015---
1016CARD: items
1017name: Item 1
1018---
1019
1020Body of item 1."#;
1021
1022        let doc = decompose(markdown).unwrap();
1023
1024        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1025        assert_eq!(
1026            doc.get_field("title").unwrap().as_str().unwrap(),
1027            "Main Document"
1028        );
1029
1030        // Cards are now in CARDS array with CARD discriminator
1031        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1032        assert_eq!(cards.len(), 1);
1033
1034        let item = cards[0].as_object().unwrap();
1035        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1036        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1037        assert_eq!(
1038            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1039            "\nBody of item 1."
1040        );
1041    }
1042
1043    #[test]
1044    fn test_multiple_tagged_blocks() {
1045        let markdown = r#"---
1046CARD: items
1047name: Item 1
1048tags: [a, b]
1049---
1050
1051First item body.
1052
1053---
1054CARD: items
1055name: Item 2
1056tags: [c, d]
1057---
1058
1059Second item body."#;
1060
1061        let doc = decompose(markdown).unwrap();
1062
1063        // Cards are in CARDS array
1064        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1065        assert_eq!(cards.len(), 2);
1066
1067        let item1 = cards[0].as_object().unwrap();
1068        assert_eq!(item1.get("CARD").unwrap().as_str().unwrap(), "items");
1069        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1070
1071        let item2 = cards[1].as_object().unwrap();
1072        assert_eq!(item2.get("CARD").unwrap().as_str().unwrap(), "items");
1073        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1074    }
1075
1076    #[test]
1077    fn test_mixed_global_and_tagged() {
1078        let markdown = r#"---
1079title: Global
1080author: John Doe
1081---
1082
1083Global body.
1084
1085---
1086CARD: sections
1087title: Section 1
1088---
1089
1090Section 1 content.
1091
1092---
1093CARD: sections
1094title: Section 2
1095---
1096
1097Section 2 content."#;
1098
1099        let doc = decompose(markdown).unwrap();
1100
1101        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1102        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1103
1104        // Cards are in unified CARDS array
1105        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1106        assert_eq!(cards.len(), 2);
1107        assert_eq!(
1108            cards[0]
1109                .as_object()
1110                .unwrap()
1111                .get("CARD")
1112                .unwrap()
1113                .as_str()
1114                .unwrap(),
1115            "sections"
1116        );
1117    }
1118
1119    #[test]
1120    fn test_empty_tagged_metadata() {
1121        let markdown = r#"---
1122CARD: items
1123---
1124
1125Body without metadata."#;
1126
1127        let doc = decompose(markdown).unwrap();
1128
1129        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1130        assert_eq!(cards.len(), 1);
1131
1132        let item = cards[0].as_object().unwrap();
1133        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1134        assert_eq!(
1135            item.get(BODY_FIELD).unwrap().as_str().unwrap(),
1136            "\nBody without metadata."
1137        );
1138    }
1139
1140    #[test]
1141    fn test_tagged_block_without_body() {
1142        let markdown = r#"---
1143CARD: items
1144name: Item
1145---"#;
1146
1147        let doc = decompose(markdown).unwrap();
1148
1149        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1150        assert_eq!(cards.len(), 1);
1151
1152        let item = cards[0].as_object().unwrap();
1153        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1154        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
1155    }
1156
1157    #[test]
1158    fn test_name_collision_global_and_tagged() {
1159        let markdown = r#"---
1160items: "global value"
1161---
1162
1163Body
1164
1165---
1166CARD: items
1167name: Item
1168---
1169
1170Item body"#;
1171
1172        let result = decompose(markdown);
1173        assert!(result.is_ok(), "Name collision should be allowed now");
1174    }
1175
1176    #[test]
1177    fn test_card_name_collision_with_array_field() {
1178        // CARD type names CAN now conflict with frontmatter field names
1179        let markdown = r#"---
1180items:
1181  - name: Global Item 1
1182    value: 100
1183---
1184
1185Global body
1186
1187---
1188CARD: items
1189name: Scope Item 1
1190---
1191
1192Scope item 1 body"#;
1193
1194        let result = decompose(markdown);
1195        assert!(
1196            result.is_ok(),
1197            "Collision with array field should be allowed"
1198        );
1199    }
1200
1201    #[test]
1202    fn test_empty_global_array_with_card() {
1203        // CARD type names CAN now conflict with frontmatter field names
1204        let markdown = r#"---
1205items: []
1206---
1207
1208Global body
1209
1210---
1211CARD: items
1212name: Item 1
1213---
1214
1215Item 1 body"#;
1216
1217        let result = decompose(markdown);
1218        assert!(
1219            result.is_ok(),
1220            "Collision with empty array field should be allowed"
1221        );
1222    }
1223
1224    #[test]
1225    fn test_reserved_field_body_rejected() {
1226        let markdown = r#"---
1227CARD: section
1228BODY: Test
1229---"#;
1230
1231        let result = decompose(markdown);
1232        assert!(result.is_err(), "BODY is a reserved field name");
1233        assert!(result
1234            .unwrap_err()
1235            .to_string()
1236            .contains("Reserved field name"));
1237    }
1238
1239    #[test]
1240    fn test_reserved_field_cards_rejected() {
1241        let markdown = r#"---
1242title: Test
1243CARDS: []
1244---"#;
1245
1246        let result = decompose(markdown);
1247        assert!(result.is_err(), "CARDS is a reserved field name");
1248        assert!(result
1249            .unwrap_err()
1250            .to_string()
1251            .contains("Reserved field name"));
1252    }
1253
1254    #[test]
1255    fn test_delimiter_inside_fenced_code_block_backticks() {
1256        let markdown = r#"---
1257title: Test
1258---
1259Here is some code:
1260
1261```yaml
1262---
1263fake: frontmatter
1264---
1265```
1266
1267More content.
1268"#;
1269
1270        let doc = decompose(markdown).unwrap();
1271        // The --- inside the code block should NOT be parsed as metadata
1272        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1273        assert!(doc.get_field("fake").is_none());
1274    }
1275
1276    #[test]
1277    fn test_delimiter_inside_fenced_code_block_tildes() {
1278        let markdown = r#"---
1279title: Test
1280---
1281Here is some code:
1282
1283~~~yaml
1284---
1285fake: frontmatter
1286---
1287~~~
1288
1289More content.
1290"#;
1291
1292        let doc = decompose(markdown).unwrap();
1293        // The --- inside the code block should NOT be parsed as metadata
1294        assert!(doc.body().unwrap().contains("fake: frontmatter"));
1295        assert!(doc.get_field("fake").is_none());
1296    }
1297
1298    #[test]
1299    fn test_invalid_tag_syntax() {
1300        let markdown = r#"---
1301CARD: Invalid-Name
1302title: Test
1303---"#;
1304
1305        let result = decompose(markdown);
1306        assert!(result.is_err());
1307        assert!(result
1308            .unwrap_err()
1309            .to_string()
1310            .contains("Invalid card field name"));
1311    }
1312
1313    #[test]
1314    fn test_multiple_global_frontmatter_blocks() {
1315        let markdown = r#"---
1316title: First
1317---
1318
1319Body
1320
1321---
1322author: Second
1323---
1324
1325More body"#;
1326
1327        let result = decompose(markdown);
1328        assert!(result.is_err());
1329
1330        // Verify the error message contains CARD hint
1331        let err = result.unwrap_err();
1332        let err_str = err.to_string();
1333        assert!(
1334            err_str.contains("CARD"),
1335            "Error should mention CARD directive: {}",
1336            err_str
1337        );
1338        assert!(
1339            err_str.contains("missing"),
1340            "Error should indicate missing directive: {}",
1341            err_str
1342        );
1343    }
1344
1345    #[test]
1346    fn test_adjacent_blocks_different_tags() {
1347        let markdown = r#"---
1348CARD: items
1349name: Item 1
1350---
1351
1352Item 1 body
1353
1354---
1355CARD: sections
1356title: Section 1
1357---
1358
1359Section 1 body"#;
1360
1361        let doc = decompose(markdown).unwrap();
1362
1363        // All cards in unified CARDS array
1364        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1365        assert_eq!(cards.len(), 2);
1366
1367        // First card is "items" type
1368        let item = cards[0].as_object().unwrap();
1369        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1370        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1371
1372        // Second card is "sections" type
1373        let section = cards[1].as_object().unwrap();
1374        assert_eq!(section.get("CARD").unwrap().as_str().unwrap(), "sections");
1375        assert_eq!(section.get("title").unwrap().as_str().unwrap(), "Section 1");
1376    }
1377
1378    #[test]
1379    fn test_order_preservation() {
1380        let markdown = r#"---
1381CARD: items
1382id: 1
1383---
1384
1385First
1386
1387---
1388CARD: items
1389id: 2
1390---
1391
1392Second
1393
1394---
1395CARD: items
1396id: 3
1397---
1398
1399Third"#;
1400
1401        let doc = decompose(markdown).unwrap();
1402
1403        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1404        assert_eq!(cards.len(), 3);
1405
1406        for (i, card) in cards.iter().enumerate() {
1407            let mapping = card.as_object().unwrap();
1408            assert_eq!(mapping.get("CARD").unwrap().as_str().unwrap(), "items");
1409            let id = mapping.get("id").unwrap().as_i64().unwrap();
1410            assert_eq!(id, (i + 1) as i64);
1411        }
1412    }
1413
1414    #[test]
1415    fn test_product_catalog_integration() {
1416        let markdown = r#"---
1417title: Product Catalog
1418author: John Doe
1419date: 2024-01-01
1420---
1421
1422This is the main catalog description.
1423
1424---
1425CARD: products
1426name: Widget A
1427price: 19.99
1428sku: WID-001
1429---
1430
1431The **Widget A** is our most popular product.
1432
1433---
1434CARD: products
1435name: Gadget B
1436price: 29.99
1437sku: GAD-002
1438---
1439
1440The **Gadget B** is perfect for professionals.
1441
1442---
1443CARD: reviews
1444product: Widget A
1445rating: 5
1446---
1447
1448"Excellent product! Highly recommended."
1449
1450---
1451CARD: reviews
1452product: Gadget B
1453rating: 4
1454---
1455
1456"Very good, but a bit pricey.""#;
1457
1458        let doc = decompose(markdown).unwrap();
1459
1460        // Verify global fields
1461        assert_eq!(
1462            doc.get_field("title").unwrap().as_str().unwrap(),
1463            "Product Catalog"
1464        );
1465        assert_eq!(
1466            doc.get_field("author").unwrap().as_str().unwrap(),
1467            "John Doe"
1468        );
1469        assert_eq!(
1470            doc.get_field("date").unwrap().as_str().unwrap(),
1471            "2024-01-01"
1472        );
1473
1474        // Verify global body
1475        assert!(doc.body().unwrap().contains("main catalog description"));
1476
1477        // All cards in unified CARDS array
1478        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1479        assert_eq!(cards.len(), 4); // 2 products + 2 reviews
1480
1481        // First 2 are products
1482        let product1 = cards[0].as_object().unwrap();
1483        assert_eq!(product1.get("CARD").unwrap().as_str().unwrap(), "products");
1484        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1485        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1486
1487        let product2 = cards[1].as_object().unwrap();
1488        assert_eq!(product2.get("CARD").unwrap().as_str().unwrap(), "products");
1489        assert_eq!(product2.get("name").unwrap().as_str().unwrap(), "Gadget B");
1490
1491        // Last 2 are reviews
1492        let review1 = cards[2].as_object().unwrap();
1493        assert_eq!(review1.get("CARD").unwrap().as_str().unwrap(), "reviews");
1494        assert_eq!(
1495            review1.get("product").unwrap().as_str().unwrap(),
1496            "Widget A"
1497        );
1498        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1499
1500        // Total fields: title, author, date, body, CARDS = 5
1501        assert_eq!(doc.fields().len(), 5);
1502    }
1503
1504    #[test]
1505    fn taro_quill_directive() {
1506        let markdown = r#"---
1507QUILL: usaf_memo
1508memo_for: [ORG/SYMBOL]
1509memo_from: [ORG/SYMBOL]
1510---
1511
1512This is the memo body."#;
1513
1514        let doc = decompose(markdown).unwrap();
1515
1516        // Verify quill tag is set
1517        assert_eq!(doc.quill_tag(), "usaf_memo");
1518
1519        // Verify fields from quill block become frontmatter
1520        assert_eq!(
1521            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1522                .as_str()
1523                .unwrap(),
1524            "ORG/SYMBOL"
1525        );
1526
1527        // Verify body
1528        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1529    }
1530
1531    #[test]
1532    fn test_quill_with_card_blocks() {
1533        let markdown = r#"---
1534QUILL: document
1535title: Test Document
1536---
1537
1538Main body.
1539
1540---
1541CARD: sections
1542name: Section 1
1543---
1544
1545Section 1 body."#;
1546
1547        let doc = decompose(markdown).unwrap();
1548
1549        // Verify quill tag
1550        assert_eq!(doc.quill_tag(), "document");
1551
1552        // Verify global field from quill block
1553        assert_eq!(
1554            doc.get_field("title").unwrap().as_str().unwrap(),
1555            "Test Document"
1556        );
1557
1558        // Verify card blocks work via CARDS array
1559        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1560        assert_eq!(cards.len(), 1);
1561        assert_eq!(
1562            cards[0]
1563                .as_object()
1564                .unwrap()
1565                .get("CARD")
1566                .unwrap()
1567                .as_str()
1568                .unwrap(),
1569            "sections"
1570        );
1571
1572        // Verify body
1573        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1574    }
1575
1576    #[test]
1577    fn test_multiple_quill_directives_error() {
1578        let markdown = r#"---
1579QUILL: first
1580---
1581
1582---
1583QUILL: second
1584---"#;
1585
1586        let result = decompose(markdown);
1587        assert!(result.is_err());
1588        // QUILL in inline block is now an error (must appear in top-level frontmatter only)
1589        assert!(result
1590            .unwrap_err()
1591            .to_string()
1592            .contains("top-level frontmatter"));
1593    }
1594
1595    #[test]
1596    fn test_invalid_quill_name() {
1597        let markdown = r#"---
1598QUILL: Invalid-Name
1599---"#;
1600
1601        let result = decompose(markdown);
1602        assert!(result.is_err());
1603        assert!(result
1604            .unwrap_err()
1605            .to_string()
1606            .contains("Invalid quill name"));
1607    }
1608
1609    #[test]
1610    fn test_quill_wrong_value_type() {
1611        let markdown = r#"---
1612QUILL: 123
1613---"#;
1614
1615        let result = decompose(markdown);
1616        assert!(result.is_err());
1617        assert!(result
1618            .unwrap_err()
1619            .to_string()
1620            .contains("QUILL value must be a string"));
1621    }
1622
1623    #[test]
1624    fn test_card_wrong_value_type() {
1625        let markdown = r#"---
1626CARD: 123
1627---"#;
1628
1629        let result = decompose(markdown);
1630        assert!(result.is_err());
1631        assert!(result
1632            .unwrap_err()
1633            .to_string()
1634            .contains("CARD value must be a string"));
1635    }
1636
1637    #[test]
1638    fn test_both_quill_and_card_error() {
1639        let markdown = r#"---
1640QUILL: test
1641CARD: items
1642---"#;
1643
1644        let result = decompose(markdown);
1645        assert!(result.is_err());
1646        assert!(result
1647            .unwrap_err()
1648            .to_string()
1649            .contains("Cannot specify both QUILL and CARD"));
1650    }
1651
1652    #[test]
1653    fn test_blank_lines_in_frontmatter() {
1654        // New parsing standard: blank lines are allowed within YAML blocks
1655        let markdown = r#"---
1656title: Test Document
1657author: Test Author
1658
1659description: This has a blank line above it
1660tags:
1661  - one
1662  - two
1663---
1664
1665# Hello World
1666
1667This is the body."#;
1668
1669        let doc = decompose(markdown).unwrap();
1670
1671        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1672        assert_eq!(
1673            doc.get_field("title").unwrap().as_str().unwrap(),
1674            "Test Document"
1675        );
1676        assert_eq!(
1677            doc.get_field("author").unwrap().as_str().unwrap(),
1678            "Test Author"
1679        );
1680        assert_eq!(
1681            doc.get_field("description").unwrap().as_str().unwrap(),
1682            "This has a blank line above it"
1683        );
1684
1685        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1686        assert_eq!(tags.len(), 2);
1687    }
1688
1689    #[test]
1690    fn test_blank_lines_in_scope_blocks() {
1691        // Blank lines should be allowed in CARD blocks too
1692        let markdown = r#"---
1693CARD: items
1694name: Item 1
1695
1696price: 19.99
1697
1698tags:
1699  - electronics
1700  - gadgets
1701---
1702
1703Body of item 1."#;
1704
1705        let doc = decompose(markdown).unwrap();
1706
1707        // Cards are in CARDS array
1708        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1709        assert_eq!(cards.len(), 1);
1710
1711        let item = cards[0].as_object().unwrap();
1712        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
1713        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1714        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1715
1716        let tags = item.get("tags").unwrap().as_array().unwrap();
1717        assert_eq!(tags.len(), 2);
1718    }
1719
1720    #[test]
1721    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1722        // Horizontal rule: blank lines both above AND below the ---
1723        let markdown = r#"---
1724title: Test
1725---
1726
1727First paragraph.
1728
1729---
1730
1731Second paragraph."#;
1732
1733        let doc = decompose(markdown).unwrap();
1734
1735        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1736
1737        // The body should contain the horizontal rule (---) as part of the content
1738        let body = doc.body().unwrap();
1739        assert!(body.contains("First paragraph."));
1740        assert!(body.contains("---"));
1741        assert!(body.contains("Second paragraph."));
1742    }
1743
1744    #[test]
1745    fn test_horizontal_rule_not_preceded_by_blank() {
1746        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1747        // It's also NOT a valid metadata block opening (since it's followed by blank)
1748        let markdown = r#"---
1749title: Test
1750---
1751
1752First paragraph.
1753---
1754
1755Second paragraph."#;
1756
1757        let doc = decompose(markdown).unwrap();
1758
1759        let body = doc.body().unwrap();
1760        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1761        assert!(body.contains("---"));
1762    }
1763
1764    #[test]
1765    fn test_multiple_blank_lines_in_yaml() {
1766        // Multiple blank lines should also be allowed
1767        let markdown = r#"---
1768title: Test
1769
1770
1771author: John Doe
1772
1773
1774version: 1.0
1775---
1776
1777Body content."#;
1778
1779        let doc = decompose(markdown).unwrap();
1780
1781        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1782        assert_eq!(
1783            doc.get_field("author").unwrap().as_str().unwrap(),
1784            "John Doe"
1785        );
1786        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1787    }
1788
1789    #[test]
1790    fn test_html_comment_interaction() {
1791        let markdown = r#"<!---
1792---> the rest of the page content
1793
1794---
1795key: value
1796---
1797"#;
1798        let doc = decompose(markdown).unwrap();
1799
1800        // The comment should be ignored (or at least not cause a parse error)
1801        // The frontmatter should be parsed
1802        let key = doc.get_field("key").and_then(|v| v.as_str());
1803        assert_eq!(key, Some("value"));
1804    }
1805}
1806#[cfg(test)]
1807mod demo_file_test {
1808    use super::*;
1809
1810    #[test]
1811    fn test_extended_metadata_demo_file() {
1812        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1813        let doc = decompose(markdown).unwrap();
1814
1815        // Verify global fields
1816        assert_eq!(
1817            doc.get_field("title").unwrap().as_str().unwrap(),
1818            "Extended Metadata Demo"
1819        );
1820        assert_eq!(
1821            doc.get_field("author").unwrap().as_str().unwrap(),
1822            "Quillmark Team"
1823        );
1824        // version is parsed as a number by YAML
1825        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1826
1827        // Verify body
1828        assert!(doc
1829            .body()
1830            .unwrap()
1831            .contains("extended YAML metadata standard"));
1832
1833        // All cards are now in unified CARDS array
1834        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
1835        assert_eq!(cards.len(), 5); // 3 features + 2 use_cases
1836
1837        // Count features and use_cases cards
1838        let features_count = cards
1839            .iter()
1840            .filter(|c| {
1841                c.as_object()
1842                    .unwrap()
1843                    .get("CARD")
1844                    .unwrap()
1845                    .as_str()
1846                    .unwrap()
1847                    == "features"
1848            })
1849            .count();
1850        let use_cases_count = cards
1851            .iter()
1852            .filter(|c| {
1853                c.as_object()
1854                    .unwrap()
1855                    .get("CARD")
1856                    .unwrap()
1857                    .as_str()
1858                    .unwrap()
1859                    == "use_cases"
1860            })
1861            .count();
1862        assert_eq!(features_count, 3);
1863        assert_eq!(use_cases_count, 2);
1864
1865        // Check first card is a feature
1866        let feature1 = cards[0].as_object().unwrap();
1867        assert_eq!(feature1.get("CARD").unwrap().as_str().unwrap(), "features");
1868        assert_eq!(
1869            feature1.get("name").unwrap().as_str().unwrap(),
1870            "Tag Directives"
1871        );
1872    }
1873
1874    #[test]
1875    fn test_input_size_limit() {
1876        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1877        let size = crate::error::MAX_INPUT_SIZE + 1;
1878        let large_markdown = "a".repeat(size);
1879
1880        let result = decompose(&large_markdown);
1881        assert!(result.is_err());
1882
1883        let err_msg = result.unwrap_err().to_string();
1884        assert!(err_msg.contains("Input too large"));
1885    }
1886
1887    #[test]
1888    fn test_yaml_size_limit() {
1889        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1890        let mut markdown = String::from("---\n");
1891
1892        // Create a very large YAML field
1893        let size = crate::error::MAX_YAML_SIZE + 1;
1894        markdown.push_str("data: \"");
1895        markdown.push_str(&"x".repeat(size));
1896        markdown.push_str("\"\n---\n\nBody");
1897
1898        let result = decompose(&markdown);
1899        assert!(result.is_err());
1900
1901        let err_msg = result.unwrap_err().to_string();
1902        assert!(err_msg.contains("Input too large"));
1903    }
1904
1905    #[test]
1906    fn test_input_within_size_limit() {
1907        // Create markdown just under the limit
1908        let size = 1000; // Much smaller than limit
1909        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1910
1911        let result = decompose(&markdown);
1912        assert!(result.is_ok());
1913    }
1914
1915    #[test]
1916    fn test_yaml_within_size_limit() {
1917        // Create YAML block well within the limit
1918        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1919
1920        let result = decompose(markdown);
1921        assert!(result.is_ok());
1922    }
1923
1924    // Tests for guillemet preservation in parsing (guillemets are NOT converted during parsing)
1925    // Guillemet conversion now happens in process_plate, not during parsing
1926    #[test]
1927    fn test_chevrons_preserved_in_body_no_frontmatter() {
1928        let markdown = "Use <<raw content>> here.";
1929        let doc = decompose(markdown).unwrap();
1930
1931        // Body should preserve chevrons (conversion happens later in process_plate)
1932        assert_eq!(doc.body(), Some("Use <<raw content>> here."));
1933    }
1934
1935    #[test]
1936    fn test_chevrons_preserved_in_body_with_frontmatter() {
1937        let markdown = r#"---
1938title: Test
1939---
1940
1941Use <<raw content>> here."#;
1942        let doc = decompose(markdown).unwrap();
1943
1944        // Body should preserve chevrons
1945        assert_eq!(doc.body(), Some("\nUse <<raw content>> here."));
1946    }
1947
1948    #[test]
1949    fn test_chevrons_preserved_in_yaml_string() {
1950        let markdown = r#"---
1951title: Test <<with chevrons>>
1952---
1953
1954Body content."#;
1955        let doc = decompose(markdown).unwrap();
1956
1957        // YAML string values should preserve chevrons
1958        assert_eq!(
1959            doc.get_field("title").unwrap().as_str().unwrap(),
1960            "Test <<with chevrons>>"
1961        );
1962    }
1963
1964    #[test]
1965    fn test_chevrons_preserved_in_yaml_array() {
1966        let markdown = r#"---
1967items:
1968  - "<<first>>"
1969  - "<<second>>"
1970---
1971
1972Body."#;
1973        let doc = decompose(markdown).unwrap();
1974
1975        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1976        assert_eq!(items[0].as_str().unwrap(), "<<first>>");
1977        assert_eq!(items[1].as_str().unwrap(), "<<second>>");
1978    }
1979
1980    #[test]
1981    fn test_chevrons_preserved_in_yaml_nested() {
1982        let markdown = r#"---
1983metadata:
1984  description: "<<nested value>>"
1985---
1986
1987Body."#;
1988        let doc = decompose(markdown).unwrap();
1989
1990        let metadata = doc.get_field("metadata").unwrap().as_object().unwrap();
1991        assert_eq!(
1992            metadata.get("description").unwrap().as_str().unwrap(),
1993            "<<nested value>>"
1994        );
1995    }
1996
1997    #[test]
1998    fn test_chevrons_preserved_in_code_blocks() {
1999        let markdown = r#"```
2000<<in code block>>
2001```
2002
2003<<outside code block>>"#;
2004        let doc = decompose(markdown).unwrap();
2005
2006        let body = doc.body().unwrap();
2007        // All chevrons should be preserved (no conversion during parsing)
2008        assert!(body.contains("<<in code block>>"));
2009        assert!(body.contains("<<outside code block>>"));
2010    }
2011
2012    #[test]
2013    fn test_chevrons_preserved_in_inline_code() {
2014        let markdown = "`<<in inline code>>` and <<outside inline code>>";
2015        let doc = decompose(markdown).unwrap();
2016
2017        let body = doc.body().unwrap();
2018        // All chevrons should be preserved
2019        assert!(body.contains("`<<in inline code>>`"));
2020        assert!(body.contains("<<outside inline code>>"));
2021    }
2022
2023    #[test]
2024    fn test_chevrons_preserved_in_tagged_block_body() {
2025        let markdown = r#"---
2026title: Main
2027---
2028
2029Main body.
2030
2031---
2032CARD: items
2033name: Item 1
2034---
2035
2036Use <<raw>> here."#;
2037        let doc = decompose(markdown).unwrap();
2038
2039        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2040        let item = cards[0].as_object().unwrap();
2041        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2042        let item_body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2043        // Tagged block body should preserve chevrons
2044        assert!(item_body.contains("<<raw>>"));
2045    }
2046
2047    #[test]
2048    fn test_chevrons_preserved_in_tagged_block_yaml() {
2049        let markdown = r#"---
2050title: Main
2051---
2052
2053Main body.
2054
2055---
2056CARD: items
2057description: "<<tagged yaml>>"
2058---
2059
2060Item body."#;
2061        let doc = decompose(markdown).unwrap();
2062
2063        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2064        let item = cards[0].as_object().unwrap();
2065        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2066        // Tagged block YAML should preserve chevrons
2067        assert_eq!(
2068            item.get("description").unwrap().as_str().unwrap(),
2069            "<<tagged yaml>>"
2070        );
2071    }
2072
2073    #[test]
2074    fn test_yaml_numbers_not_affected() {
2075        // Numbers should not be affected
2076        let markdown = r#"---
2077count: 42
2078---
2079
2080Body."#;
2081        let doc = decompose(markdown).unwrap();
2082        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2083    }
2084
2085    #[test]
2086    fn test_yaml_booleans_not_affected() {
2087        // Booleans should not be affected
2088        let markdown = r#"---
2089active: true
2090---
2091
2092Body."#;
2093        let doc = decompose(markdown).unwrap();
2094        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2095    }
2096
2097    #[test]
2098    fn test_multiline_chevrons_preserved() {
2099        // Multiline chevrons should be preserved as-is
2100        let markdown = "<<text\nacross lines>>";
2101        let doc = decompose(markdown).unwrap();
2102
2103        let body = doc.body().unwrap();
2104        // Should contain the original chevrons
2105        assert!(body.contains("<<text"));
2106        assert!(body.contains("across lines>>"));
2107    }
2108
2109    #[test]
2110    fn test_unmatched_chevrons_preserved() {
2111        let markdown = "<<unmatched";
2112        let doc = decompose(markdown).unwrap();
2113
2114        let body = doc.body().unwrap();
2115        // Unmatched should remain as-is
2116        assert_eq!(body, "<<unmatched");
2117    }
2118}
2119
2120// Additional robustness tests
2121#[cfg(test)]
2122mod robustness_tests {
2123    use super::*;
2124
2125    // Edge cases for delimiter handling
2126
2127    #[test]
2128    fn test_empty_document() {
2129        let doc = decompose("").unwrap();
2130        assert_eq!(doc.body(), Some(""));
2131        assert_eq!(doc.quill_tag(), "__default__");
2132    }
2133
2134    #[test]
2135    fn test_only_whitespace() {
2136        let doc = decompose("   \n\n   \t").unwrap();
2137        assert_eq!(doc.body(), Some("   \n\n   \t"));
2138    }
2139
2140    #[test]
2141    fn test_only_dashes() {
2142        // Just "---" at document start without newline is not treated as frontmatter opener
2143        // (requires "---\n" to start a frontmatter block)
2144        let result = decompose("---");
2145        // This is NOT an error - "---" alone without newline is just body content
2146        assert!(result.is_ok());
2147        assert_eq!(result.unwrap().body(), Some("---"));
2148    }
2149
2150    #[test]
2151    fn test_dashes_in_middle_of_line() {
2152        // --- not at start of line should not be treated as delimiter
2153        let markdown = "some text --- more text";
2154        let doc = decompose(markdown).unwrap();
2155        assert_eq!(doc.body(), Some("some text --- more text"));
2156    }
2157
2158    #[test]
2159    fn test_four_dashes() {
2160        // ---- is not a valid delimiter
2161        let markdown = "----\ntitle: Test\n----\n\nBody";
2162        let doc = decompose(markdown).unwrap();
2163        // Should treat entire content as body
2164        assert!(doc.body().unwrap().contains("----"));
2165    }
2166
2167    #[test]
2168    fn test_crlf_line_endings() {
2169        // Windows-style line endings
2170        let markdown = "---\r\ntitle: Test\r\n---\r\n\r\nBody content.";
2171        let doc = decompose(markdown).unwrap();
2172        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2173        assert!(doc.body().unwrap().contains("Body content."));
2174    }
2175
2176    #[test]
2177    fn test_mixed_line_endings() {
2178        // Mix of \n and \r\n
2179        let markdown = "---\ntitle: Test\r\n---\n\nBody.";
2180        let doc = decompose(markdown).unwrap();
2181        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2182    }
2183
2184    #[test]
2185    fn test_frontmatter_at_eof_no_trailing_newline() {
2186        // Frontmatter closed at EOF without trailing newline
2187        let markdown = "---\ntitle: Test\n---";
2188        let doc = decompose(markdown).unwrap();
2189        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
2190        assert_eq!(doc.body(), Some(""));
2191    }
2192
2193    #[test]
2194    fn test_empty_frontmatter() {
2195        // Empty frontmatter block - requires content between delimiters
2196        // "---\n---" is not valid because --- followed by --- (blank line then ---)
2197        // is treated as horizontal rule logic, not empty frontmatter
2198        // A valid empty frontmatter would be "---\n \n---" (with whitespace content)
2199        let markdown = "---\n \n---\n\nBody content.";
2200        let doc = decompose(markdown).unwrap();
2201        assert!(doc.body().unwrap().contains("Body content."));
2202        // Should have body and CARDS fields
2203        assert_eq!(doc.fields().len(), 2);
2204    }
2205
2206    #[test]
2207    fn test_whitespace_only_frontmatter() {
2208        // Frontmatter with only whitespace
2209        let markdown = "---\n   \n\n   \n---\n\nBody.";
2210        let doc = decompose(markdown).unwrap();
2211        assert!(doc.body().unwrap().contains("Body."));
2212    }
2213
2214    // Unicode handling
2215
2216    #[test]
2217    fn test_unicode_in_yaml_keys() {
2218        let markdown = "---\ntitre: Bonjour\nタイトル: こんにちは\n---\n\nBody.";
2219        let doc = decompose(markdown).unwrap();
2220        assert_eq!(doc.get_field("titre").unwrap().as_str().unwrap(), "Bonjour");
2221        assert_eq!(
2222            doc.get_field("タイトル").unwrap().as_str().unwrap(),
2223            "こんにちは"
2224        );
2225    }
2226
2227    #[test]
2228    fn test_unicode_in_yaml_values() {
2229        let markdown = "---\ntitle: 你好世界 🎉\n---\n\nBody.";
2230        let doc = decompose(markdown).unwrap();
2231        assert_eq!(
2232            doc.get_field("title").unwrap().as_str().unwrap(),
2233            "你好世界 🎉"
2234        );
2235    }
2236
2237    #[test]
2238    fn test_unicode_in_body() {
2239        let markdown = "---\ntitle: Test\n---\n\n日本語テキスト with emoji 🚀";
2240        let doc = decompose(markdown).unwrap();
2241        assert!(doc.body().unwrap().contains("日本語テキスト"));
2242        assert!(doc.body().unwrap().contains("🚀"));
2243    }
2244
2245    // YAML edge cases
2246
2247    #[test]
2248    fn test_yaml_multiline_string() {
2249        let markdown = r#"---
2250description: |
2251  This is a
2252  multiline string
2253  with preserved newlines.
2254---
2255
2256Body."#;
2257        let doc = decompose(markdown).unwrap();
2258        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2259        assert!(desc.contains("multiline string"));
2260        assert!(desc.contains('\n'));
2261    }
2262
2263    #[test]
2264    fn test_yaml_folded_string() {
2265        let markdown = r#"---
2266description: >
2267  This is a folded
2268  string that becomes
2269  a single line.
2270---
2271
2272Body."#;
2273        let doc = decompose(markdown).unwrap();
2274        let desc = doc.get_field("description").unwrap().as_str().unwrap();
2275        // Folded strings join lines with spaces
2276        assert!(desc.contains("folded"));
2277    }
2278
2279    #[test]
2280    fn test_yaml_null_value() {
2281        let markdown = "---\noptional: null\n---\n\nBody.";
2282        let doc = decompose(markdown).unwrap();
2283        assert!(doc.get_field("optional").unwrap().is_null());
2284    }
2285
2286    #[test]
2287    fn test_yaml_empty_string_value() {
2288        let markdown = "---\nempty: \"\"\n---\n\nBody.";
2289        let doc = decompose(markdown).unwrap();
2290        assert_eq!(doc.get_field("empty").unwrap().as_str().unwrap(), "");
2291    }
2292
2293    #[test]
2294    fn test_yaml_special_characters_in_string() {
2295        let markdown = "---\nspecial: \"colon: here, and [brackets]\"\n---\n\nBody.";
2296        let doc = decompose(markdown).unwrap();
2297        assert_eq!(
2298            doc.get_field("special").unwrap().as_str().unwrap(),
2299            "colon: here, and [brackets]"
2300        );
2301    }
2302
2303    #[test]
2304    fn test_yaml_nested_objects() {
2305        let markdown = r#"---
2306config:
2307  database:
2308    host: localhost
2309    port: 5432
2310  cache:
2311    enabled: true
2312---
2313
2314Body."#;
2315        let doc = decompose(markdown).unwrap();
2316        let config = doc.get_field("config").unwrap().as_object().unwrap();
2317        let db = config.get("database").unwrap().as_object().unwrap();
2318        assert_eq!(db.get("host").unwrap().as_str().unwrap(), "localhost");
2319        assert_eq!(db.get("port").unwrap().as_i64().unwrap(), 5432);
2320    }
2321
2322    // CARD block edge cases
2323
2324    #[test]
2325    fn test_card_with_empty_body() {
2326        let markdown = r#"---
2327CARD: items
2328name: Item
2329---"#;
2330        let doc = decompose(markdown).unwrap();
2331        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2332        assert_eq!(cards.len(), 1);
2333        let item = cards[0].as_object().unwrap();
2334        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2335        assert_eq!(item.get(BODY_FIELD).unwrap().as_str().unwrap(), "");
2336    }
2337
2338    #[test]
2339    fn test_card_consecutive_blocks() {
2340        let markdown = r#"---
2341CARD: a
2342id: 1
2343---
2344---
2345CARD: a
2346id: 2
2347---"#;
2348        let doc = decompose(markdown).unwrap();
2349        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2350        assert_eq!(cards.len(), 2);
2351        assert_eq!(
2352            cards[0]
2353                .as_object()
2354                .unwrap()
2355                .get("CARD")
2356                .unwrap()
2357                .as_str()
2358                .unwrap(),
2359            "a"
2360        );
2361        assert_eq!(
2362            cards[1]
2363                .as_object()
2364                .unwrap()
2365                .get("CARD")
2366                .unwrap()
2367                .as_str()
2368                .unwrap(),
2369            "a"
2370        );
2371    }
2372
2373    #[test]
2374    fn test_card_with_body_containing_dashes() {
2375        let markdown = r#"---
2376CARD: items
2377name: Item
2378---
2379
2380Some text with --- dashes in it."#;
2381        let doc = decompose(markdown).unwrap();
2382        let cards = doc.get_field("CARDS").unwrap().as_sequence().unwrap();
2383        let item = cards[0].as_object().unwrap();
2384        assert_eq!(item.get("CARD").unwrap().as_str().unwrap(), "items");
2385        let body = item.get(BODY_FIELD).unwrap().as_str().unwrap();
2386        assert!(body.contains("--- dashes"));
2387    }
2388
2389    // QUILL directive edge cases
2390
2391    #[test]
2392    fn test_quill_with_underscore_prefix() {
2393        let markdown = "---\nQUILL: _internal\n---\n\nBody.";
2394        let doc = decompose(markdown).unwrap();
2395        assert_eq!(doc.quill_tag(), "_internal");
2396    }
2397
2398    #[test]
2399    fn test_quill_with_numbers() {
2400        let markdown = "---\nQUILL: form_8_v2\n---\n\nBody.";
2401        let doc = decompose(markdown).unwrap();
2402        assert_eq!(doc.quill_tag(), "form_8_v2");
2403    }
2404
2405    #[test]
2406    fn test_quill_with_additional_fields() {
2407        let markdown = r#"---
2408QUILL: my_quill
2409title: Document Title
2410author: John Doe
2411---
2412
2413Body content."#;
2414        let doc = decompose(markdown).unwrap();
2415        assert_eq!(doc.quill_tag(), "my_quill");
2416        assert_eq!(
2417            doc.get_field("title").unwrap().as_str().unwrap(),
2418            "Document Title"
2419        );
2420        assert_eq!(
2421            doc.get_field("author").unwrap().as_str().unwrap(),
2422            "John Doe"
2423        );
2424    }
2425
2426    // Error handling
2427
2428    #[test]
2429    fn test_invalid_scope_name_uppercase() {
2430        let markdown = "---\nCARD: ITEMS\n---\n\nBody.";
2431        let result = decompose(markdown);
2432        assert!(result.is_err());
2433        assert!(result
2434            .unwrap_err()
2435            .to_string()
2436            .contains("Invalid card field name"));
2437    }
2438
2439    #[test]
2440    fn test_invalid_scope_name_starts_with_number() {
2441        let markdown = "---\nCARD: 123items\n---\n\nBody.";
2442        let result = decompose(markdown);
2443        assert!(result.is_err());
2444    }
2445
2446    #[test]
2447    fn test_invalid_scope_name_with_hyphen() {
2448        let markdown = "---\nCARD: my-items\n---\n\nBody.";
2449        let result = decompose(markdown);
2450        assert!(result.is_err());
2451    }
2452
2453    #[test]
2454    fn test_invalid_quill_name_uppercase() {
2455        let markdown = "---\nQUILL: MyQuill\n---\n\nBody.";
2456        let result = decompose(markdown);
2457        assert!(result.is_err());
2458    }
2459
2460    #[test]
2461    fn test_yaml_syntax_error_missing_colon() {
2462        let markdown = "---\ntitle Test\n---\n\nBody.";
2463        let result = decompose(markdown);
2464        assert!(result.is_err());
2465    }
2466
2467    #[test]
2468    fn test_yaml_syntax_error_bad_indentation() {
2469        let markdown = "---\nitems:\n- one\n - two\n---\n\nBody.";
2470        let result = decompose(markdown);
2471        // Bad indentation may or may not be an error depending on YAML parser
2472        // Just ensure it doesn't panic
2473        let _ = result;
2474    }
2475
2476    // Body extraction edge cases
2477
2478    #[test]
2479    fn test_body_with_leading_newlines() {
2480        let markdown = "---\ntitle: Test\n---\n\n\n\nBody with leading newlines.";
2481        let doc = decompose(markdown).unwrap();
2482        // Body should preserve leading newlines after frontmatter
2483        assert!(doc.body().unwrap().starts_with('\n'));
2484    }
2485
2486    #[test]
2487    fn test_body_with_trailing_newlines() {
2488        let markdown = "---\ntitle: Test\n---\n\nBody.\n\n\n";
2489        let doc = decompose(markdown).unwrap();
2490        // Body should preserve trailing newlines
2491        assert!(doc.body().unwrap().ends_with('\n'));
2492    }
2493
2494    #[test]
2495    fn test_no_body_after_frontmatter() {
2496        let markdown = "---\ntitle: Test\n---";
2497        let doc = decompose(markdown).unwrap();
2498        assert_eq!(doc.body(), Some(""));
2499    }
2500
2501    // Tag name validation
2502
2503    #[test]
2504    fn test_valid_tag_name_single_underscore() {
2505        assert!(is_valid_tag_name("_"));
2506    }
2507
2508    #[test]
2509    fn test_valid_tag_name_underscore_prefix() {
2510        assert!(is_valid_tag_name("_private"));
2511    }
2512
2513    #[test]
2514    fn test_valid_tag_name_with_numbers() {
2515        assert!(is_valid_tag_name("item1"));
2516        assert!(is_valid_tag_name("item_2"));
2517    }
2518
2519    #[test]
2520    fn test_invalid_tag_name_empty() {
2521        assert!(!is_valid_tag_name(""));
2522    }
2523
2524    #[test]
2525    fn test_invalid_tag_name_starts_with_number() {
2526        assert!(!is_valid_tag_name("1item"));
2527    }
2528
2529    #[test]
2530    fn test_invalid_tag_name_uppercase() {
2531        assert!(!is_valid_tag_name("Items"));
2532        assert!(!is_valid_tag_name("ITEMS"));
2533    }
2534
2535    #[test]
2536    fn test_invalid_tag_name_special_chars() {
2537        assert!(!is_valid_tag_name("my-items"));
2538        assert!(!is_valid_tag_name("my.items"));
2539        assert!(!is_valid_tag_name("my items"));
2540    }
2541
2542    // Guillemet preprocessing in YAML
2543
2544    #[test]
2545    fn test_guillemet_in_yaml_preserves_non_strings() {
2546        let markdown = r#"---
2547count: 42
2548price: 19.99
2549active: true
2550items:
2551  - first
2552  - 100
2553  - true
2554---
2555
2556Body."#;
2557        let doc = decompose(markdown).unwrap();
2558        assert_eq!(doc.get_field("count").unwrap().as_i64().unwrap(), 42);
2559        assert_eq!(doc.get_field("price").unwrap().as_f64().unwrap(), 19.99);
2560        assert!(doc.get_field("active").unwrap().as_bool().unwrap());
2561    }
2562
2563    #[test]
2564    fn test_guillemet_double_conversion_prevention() {
2565        // Ensure «» in input doesn't get double-processed
2566        let markdown = "---\ntitle: Already «converted»\n---\n\nBody.";
2567        let doc = decompose(markdown).unwrap();
2568        // Should remain as-is (not double-escaped)
2569        assert_eq!(
2570            doc.get_field("title").unwrap().as_str().unwrap(),
2571            "Already «converted»"
2572        );
2573    }
2574
2575    #[test]
2576    fn test_allowed_card_field_collision() {
2577        let markdown = r#"---
2578my_card: "some global value"
2579---
2580
2581---
2582CARD: my_card
2583title: "My Card"
2584---
2585Body
2586"#;
2587        // This should SUCCEED according to new PARSE.md
2588        let doc = decompose(markdown).unwrap();
2589
2590        // Verify global field exists
2591        assert_eq!(
2592            doc.get_field("my_card").unwrap().as_str().unwrap(),
2593            "some global value"
2594        );
2595
2596        // Verify Card exists in CARDS array
2597        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2598        assert!(!cards.is_empty());
2599        let card = cards
2600            .iter()
2601            .find(|v| v.get("CARD").and_then(|c| c.as_str()) == Some("my_card"))
2602            .expect("Card not found");
2603        assert_eq!(card.get("title").unwrap().as_str().unwrap(), "My Card");
2604    }
2605
2606    #[test]
2607    fn test_yaml_custom_tags_in_frontmatter() {
2608        // User-defined YAML tags like !fill should be accepted and ignored
2609        let markdown = r#"---
2610memo_from: !fill 2d lt example
2611regular_field: normal value
2612---
2613
2614Body content."#;
2615        let doc = decompose(markdown).unwrap();
2616
2617        // The tag !fill should be ignored, value parsed as string "2d lt example"
2618        assert_eq!(
2619            doc.get_field("memo_from").unwrap().as_str().unwrap(),
2620            "2d lt example"
2621        );
2622        // Regular fields should still work
2623        assert_eq!(
2624            doc.get_field("regular_field").unwrap().as_str().unwrap(),
2625            "normal value"
2626        );
2627        assert_eq!(doc.body(), Some("\nBody content."));
2628    }
2629
2630    /// Test the exact example from EXTENDED_MARKDOWN.md (lines 92-127)
2631    #[test]
2632    fn test_spec_example() {
2633        let markdown = r#"---
2634title: My Document
2635QUILL: blog_post
2636---
2637Main document body.
2638
2639***
2640
2641More content after horizontal rule.
2642
2643---
2644CARD: section
2645heading: Introduction
2646---
2647Introduction content.
2648
2649---
2650CARD: section
2651heading: Conclusion
2652---
2653Conclusion content.
2654"#;
2655
2656        let doc = decompose(markdown).unwrap();
2657
2658        // Verify global fields
2659        assert_eq!(
2660            doc.get_field("title").unwrap().as_str().unwrap(),
2661            "My Document"
2662        );
2663        assert_eq!(doc.quill_tag(), "blog_post");
2664
2665        // Verify body contains horizontal rule (*** preserved)
2666        let body = doc.body().unwrap();
2667        assert!(body.contains("Main document body."));
2668        assert!(body.contains("***"));
2669        assert!(body.contains("More content after horizontal rule."));
2670
2671        // Verify CARDS array
2672        let cards = doc.get_field("CARDS").unwrap().as_array().unwrap();
2673        assert_eq!(cards.len(), 2);
2674
2675        // First card
2676        let card1 = cards[0].as_object().unwrap();
2677        assert_eq!(card1.get("CARD").unwrap().as_str().unwrap(), "section");
2678        assert_eq!(
2679            card1.get("heading").unwrap().as_str().unwrap(),
2680            "Introduction"
2681        );
2682        assert_eq!(
2683            card1.get("BODY").unwrap().as_str().unwrap(),
2684            "Introduction content.\n\n"
2685        );
2686
2687        // Second card
2688        let card2 = cards[1].as_object().unwrap();
2689        assert_eq!(card2.get("CARD").unwrap().as_str().unwrap(), "section");
2690        assert_eq!(
2691            card2.get("heading").unwrap().as_str().unwrap(),
2692            "Conclusion"
2693        );
2694        assert_eq!(
2695            card2.get("BODY").unwrap().as_str().unwrap(),
2696            "Conclusion content.\n"
2697        );
2698    }
2699}