quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Helper function to convert serde_yaml::Error with location extraction
57fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58    let mut msg = format!("{}: {}", context, e);
59
60    if let Some(loc) = e.location() {
61        msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62    }
63
64    msg
65}
66
67/// Reserved tag name for quill specification
68pub const QUILL_TAG: &str = "quill";
69
70/// A parsed markdown document with frontmatter
71#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73    fields: HashMap<String, QuillValue>,
74    quill_tag: Option<String>,
75}
76
77impl ParsedDocument {
78    /// Create a new ParsedDocument with the given fields
79    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80        Self {
81            fields,
82            quill_tag: None,
83        }
84    }
85
86    /// Create a ParsedDocument from fields and optional quill tag
87    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: Option<String>) -> Self {
88        Self { fields, quill_tag }
89    }
90
91    /// Create a ParsedDocument from markdown string
92    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94    }
95
96    /// Get the quill tag if specified (from QUILL key)
97    pub fn quill_tag(&self) -> Option<&str> {
98        self.quill_tag.as_deref()
99    }
100
101    /// Get the document body
102    pub fn body(&self) -> Option<&str> {
103        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104    }
105
106    /// Get a specific field
107    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108        self.fields.get(name)
109    }
110
111    /// Get all fields (including body)
112    pub fn fields(&self) -> &HashMap<String, QuillValue> {
113        &self.fields
114    }
115
116    /// Create a new ParsedDocument with default values applied from field schemas
117    ///
118    /// This method creates a new ParsedDocument with default values applied for any
119    /// fields that are missing from the original document but have defaults specified
120    /// in the field schemas. Existing fields are preserved and not overwritten.
121    ///
122    /// # Arguments
123    ///
124    /// * `field_schemas` - A HashMap of field schemas containing default values
125    ///
126    /// # Returns
127    ///
128    /// A new ParsedDocument with defaults applied for missing fields
129    pub fn with_defaults(
130        &self,
131        field_schemas: &HashMap<String, crate::quill::FieldSchema>,
132    ) -> Self {
133        let mut fields = self.fields.clone();
134
135        for (field_name, schema) in field_schemas {
136            // Only apply default if field is missing and default exists
137            if !fields.contains_key(field_name) {
138                if let Some(ref default_value) = schema.default {
139                    fields.insert(field_name.clone(), default_value.clone());
140                }
141            }
142        }
143
144        Self {
145            fields,
146            quill_tag: self.quill_tag.clone(),
147        }
148    }
149}
150
151#[derive(Debug)]
152struct MetadataBlock {
153    start: usize, // Position of opening "---"
154    end: usize,   // Position after closing "---\n"
155    yaml_content: String,
156    tag: Option<String>,        // Field name from SCOPE key
157    quill_name: Option<String>, // Quill name from QUILL key
158}
159
160/// Validate tag name follows pattern [a-z_][a-z0-9_]*
161fn is_valid_tag_name(name: &str) -> bool {
162    if name.is_empty() {
163        return false;
164    }
165
166    let mut chars = name.chars();
167    let first = chars.next().unwrap();
168
169    if !first.is_ascii_lowercase() && first != '_' {
170        return false;
171    }
172
173    for ch in chars {
174        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
175            return false;
176        }
177    }
178
179    true
180}
181
182/// Find all metadata blocks in the document
183fn find_metadata_blocks(
184    markdown: &str,
185) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
186    let mut blocks = Vec::new();
187    let mut pos = 0;
188
189    while pos < markdown.len() {
190        // Look for opening "---\n" or "---\r\n"
191        let search_str = &markdown[pos..];
192        let delimiter_result = if let Some(p) = search_str.find("---\n") {
193            Some((p, 4, "\n"))
194        } else if let Some(p) = search_str.find("---\r\n") {
195            Some((p, 5, "\r\n"))
196        } else {
197            None
198        };
199
200        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
201            let abs_pos = pos + delimiter_pos;
202            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
203
204            // Check if this --- is a horizontal rule (blank lines above AND below)
205            let preceded_by_blank = if abs_pos > 0 {
206                // Check if there's a blank line before the ---
207                let before = &markdown[..abs_pos];
208                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
209            } else {
210                false
211            };
212
213            let followed_by_blank = if content_start < markdown.len() {
214                markdown[content_start..].starts_with('\n')
215                    || markdown[content_start..].starts_with("\r\n")
216            } else {
217                false
218            };
219
220            // Horizontal rule: blank lines both above and below
221            if preceded_by_blank && followed_by_blank {
222                // This is a horizontal rule in the body, skip it
223                pos = abs_pos + 3; // Skip past "---"
224                continue;
225            }
226
227            // Check if followed by non-blank line (or if we're at document start)
228            // This starts a metadata block
229            if followed_by_blank {
230                // --- followed by blank line but NOT preceded by blank line
231                // This is NOT a metadata block opening, skip it
232                pos = abs_pos + 3;
233                continue;
234            }
235
236            // Found potential metadata block opening (followed by non-blank line)
237            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
238            let rest = &markdown[content_start..];
239
240            // First try to find delimiters with trailing newlines
241            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
242            let closing_with_newline = closing_patterns
243                .iter()
244                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
245                .min_by_key(|(p, _)| *p);
246
247            // Also check for closing at end of document (no trailing newline)
248            let closing_at_eof = ["\n---", "\r\n---"]
249                .iter()
250                .filter_map(|delim| {
251                    rest.find(delim).and_then(|p| {
252                        if p + delim.len() == rest.len() {
253                            Some((p, delim.len()))
254                        } else {
255                            None
256                        }
257                    })
258                })
259                .min_by_key(|(p, _)| *p);
260
261            let closing_result = match (closing_with_newline, closing_at_eof) {
262                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
263                (Some(_), Some(_)) => closing_with_newline,
264                (Some(_), None) => closing_with_newline,
265                (None, Some(_)) => closing_at_eof,
266                (None, None) => None,
267            };
268
269            if let Some((closing_pos, closing_len)) = closing_result {
270                let abs_closing_pos = content_start + closing_pos;
271                let content = &markdown[content_start..abs_closing_pos];
272
273                // Check YAML size limit
274                if content.len() > crate::error::MAX_YAML_SIZE {
275                    return Err(format!(
276                        "YAML block too large: {} bytes (max: {} bytes)",
277                        content.len(),
278                        crate::error::MAX_YAML_SIZE
279                    )
280                    .into());
281                }
282
283                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
284                // First, try to parse as YAML
285                let (tag, quill_name, yaml_content) = if !content.is_empty() {
286                    // Try to parse the YAML to check for reserved keys
287                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
288                        Ok(yaml_value) => {
289                            if let Some(mapping) = yaml_value.as_mapping() {
290                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
291                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
292
293                                let has_quill = mapping.contains_key(&quill_key);
294                                let has_scope = mapping.contains_key(&scope_key);
295
296                                if has_quill && has_scope {
297                                    return Err(
298                                        "Cannot specify both QUILL and SCOPE in the same block"
299                                            .into(),
300                                    );
301                                }
302
303                                if has_quill {
304                                    // Extract quill name
305                                    let quill_value = mapping.get(&quill_key).unwrap();
306                                    let quill_name_str = quill_value
307                                        .as_str()
308                                        .ok_or_else(|| "QUILL value must be a string")?;
309
310                                    if !is_valid_tag_name(quill_name_str) {
311                                        return Err(format!(
312                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
313                                            quill_name_str
314                                        )
315                                        .into());
316                                    }
317
318                                    // Remove QUILL from the YAML content for processing
319                                    let mut new_mapping = mapping.clone();
320                                    new_mapping.remove(&quill_key);
321                                    let new_yaml = serde_yaml::to_string(&new_mapping)
322                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
323
324                                    (None, Some(quill_name_str.to_string()), new_yaml)
325                                } else if has_scope {
326                                    // Extract scope field name
327                                    let scope_value = mapping.get(&scope_key).unwrap();
328                                    let field_name = scope_value
329                                        .as_str()
330                                        .ok_or_else(|| "SCOPE value must be a string")?;
331
332                                    if !is_valid_tag_name(field_name) {
333                                        return Err(format!(
334                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
335                                            field_name
336                                        )
337                                        .into());
338                                    }
339
340                                    if field_name == BODY_FIELD {
341                                        return Err(format!(
342                                            "Cannot use reserved field name '{}' as SCOPE value",
343                                            BODY_FIELD
344                                        )
345                                        .into());
346                                    }
347
348                                    // Remove SCOPE from the YAML content for processing
349                                    let mut new_mapping = mapping.clone();
350                                    new_mapping.remove(&scope_key);
351                                    let new_yaml = serde_yaml::to_string(&new_mapping)
352                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
353
354                                    (Some(field_name.to_string()), None, new_yaml)
355                                } else {
356                                    // No reserved keys, treat as normal YAML
357                                    (None, None, content.to_string())
358                                }
359                            } else {
360                                // Not a mapping, treat as normal YAML
361                                (None, None, content.to_string())
362                            }
363                        }
364                        Err(_) => {
365                            // If YAML parsing fails here, we'll catch it later
366                            (None, None, content.to_string())
367                        }
368                    }
369                } else {
370                    (None, None, content.to_string())
371                };
372
373                blocks.push(MetadataBlock {
374                    start: abs_pos,
375                    end: abs_closing_pos + closing_len, // After closing delimiter
376                    yaml_content,
377                    tag,
378                    quill_name,
379                });
380
381                pos = abs_closing_pos + closing_len;
382            } else if abs_pos == 0 {
383                // Frontmatter started but not closed
384                return Err("Frontmatter started but not closed with ---".into());
385            } else {
386                // Not a valid metadata block, skip this position
387                pos = abs_pos + 3;
388            }
389        } else {
390            break;
391        }
392    }
393
394    Ok(blocks)
395}
396
397/// Decompose markdown into frontmatter fields and body
398fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
399    // Check input size limit
400    if markdown.len() > crate::error::MAX_INPUT_SIZE {
401        return Err(format!(
402            "Input too large: {} bytes (max: {} bytes)",
403            markdown.len(),
404            crate::error::MAX_INPUT_SIZE
405        )
406        .into());
407    }
408
409    let mut fields = HashMap::new();
410
411    // Find all metadata blocks
412    let blocks = find_metadata_blocks(markdown)?;
413
414    if blocks.is_empty() {
415        // No metadata blocks, entire content is body
416        fields.insert(
417            BODY_FIELD.to_string(),
418            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
419        );
420        return Ok(ParsedDocument::new(fields));
421    }
422
423    // Track which attributes are used for tagged blocks
424    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
425    let mut has_global_frontmatter = false;
426    let mut global_frontmatter_index: Option<usize> = None;
427    let mut quill_name: Option<String> = None;
428
429    // First pass: identify global frontmatter, quill directive, and validate
430    for (idx, block) in blocks.iter().enumerate() {
431        // Check for quill directive
432        if let Some(ref name) = block.quill_name {
433            if quill_name.is_some() {
434                return Err("Multiple quill directives found: only one allowed".into());
435            }
436            quill_name = Some(name.clone());
437        }
438
439        // Check for global frontmatter (no tag and no quill directive)
440        if block.tag.is_none() && block.quill_name.is_none() {
441            if has_global_frontmatter {
442                return Err(
443                    "Multiple global frontmatter blocks found: only one untagged block allowed"
444                        .into(),
445                );
446            }
447            has_global_frontmatter = true;
448            global_frontmatter_index = Some(idx);
449        }
450    }
451
452    // Parse global frontmatter if present
453    if let Some(idx) = global_frontmatter_index {
454        let block = &blocks[idx];
455
456        // Parse YAML frontmatter
457        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
458            HashMap::new()
459        } else {
460            serde_yaml::from_str(&block.yaml_content)
461                .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
462        };
463
464        // Check that all tagged blocks don't conflict with global fields
465        // Exception: if the global field is an array, allow it (we'll merge later)
466        for other_block in &blocks {
467            if let Some(ref tag) = other_block.tag {
468                if let Some(global_value) = yaml_fields.get(tag) {
469                    // Check if the global value is an array
470                    if global_value.as_sequence().is_none() {
471                        return Err(format!(
472                            "Name collision: global field '{}' conflicts with tagged attribute",
473                            tag
474                        )
475                        .into());
476                    }
477                }
478            }
479        }
480
481        // Convert YAML values to QuillValue at boundary
482        for (key, value) in yaml_fields {
483            fields.insert(key, QuillValue::from_yaml(value)?);
484        }
485    }
486
487    // Process blocks with quill directives
488    for block in &blocks {
489        if block.quill_name.is_some() {
490            // Quill directive blocks can have YAML content (becomes part of frontmatter)
491            if !block.yaml_content.is_empty() {
492                let yaml_fields: HashMap<String, serde_yaml::Value> =
493                    serde_yaml::from_str(&block.yaml_content)
494                        .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
495
496                // Check for conflicts with existing fields
497                for key in yaml_fields.keys() {
498                    if fields.contains_key(key) {
499                        return Err(format!(
500                            "Name collision: quill block field '{}' conflicts with existing field",
501                            key
502                        )
503                        .into());
504                    }
505                }
506
507                // Convert YAML values to QuillValue at boundary
508                for (key, value) in yaml_fields {
509                    fields.insert(key, QuillValue::from_yaml(value)?);
510                }
511            }
512        }
513    }
514
515    // Parse tagged blocks
516    for (idx, block) in blocks.iter().enumerate() {
517        if let Some(ref tag_name) = block.tag {
518            // Check if this conflicts with global fields
519            // Exception: if the global field is an array, allow it (we'll merge later)
520            if let Some(existing_value) = fields.get(tag_name) {
521                if existing_value.as_array().is_none() {
522                    return Err(format!(
523                        "Name collision: tagged attribute '{}' conflicts with global field",
524                        tag_name
525                    )
526                    .into());
527                }
528            }
529
530            // Parse YAML metadata
531            let mut item_fields: HashMap<String, serde_yaml::Value> = if block
532                .yaml_content
533                .is_empty()
534            {
535                HashMap::new()
536            } else {
537                serde_yaml::from_str(&block.yaml_content).map_err(|e| {
538                    yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
539                })?
540            };
541
542            // Extract body for this tagged block
543            let body_start = block.end;
544            let body_end = if idx + 1 < blocks.len() {
545                blocks[idx + 1].start
546            } else {
547                markdown.len()
548            };
549            let body = &markdown[body_start..body_end];
550
551            // Add body to item fields
552            item_fields.insert(
553                BODY_FIELD.to_string(),
554                serde_yaml::Value::String(body.to_string()),
555            );
556
557            // Convert HashMap to serde_yaml::Value::Mapping
558            let item_value = serde_yaml::to_value(item_fields)?;
559
560            // Add to collection
561            tagged_attributes
562                .entry(tag_name.clone())
563                .or_insert_with(Vec::new)
564                .push(item_value);
565        }
566    }
567
568    // Extract global body
569    // Body starts after global frontmatter or quill block (whichever comes first)
570    // Body ends at the first scope block or EOF
571    let first_non_scope_block_idx = blocks
572        .iter()
573        .position(|b| b.tag.is_none() && b.quill_name.is_none())
574        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
575
576    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
577        // Body starts after the first non-scope block (global frontmatter or quill)
578        let start = blocks[idx].end;
579
580        // Body ends at the first scope block after this, or EOF
581        let end = blocks
582            .iter()
583            .skip(idx + 1)
584            .find(|b| b.tag.is_some())
585            .map(|b| b.start)
586            .unwrap_or(markdown.len());
587
588        (start, end)
589    } else {
590        // No global frontmatter or quill block - body is everything before the first scope block
591        let end = blocks
592            .iter()
593            .find(|b| b.tag.is_some())
594            .map(|b| b.start)
595            .unwrap_or(0);
596
597        (0, end)
598    };
599
600    let global_body = &markdown[body_start..body_end];
601
602    fields.insert(
603        BODY_FIELD.to_string(),
604        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
605    );
606
607    // Add all tagged collections to fields (convert to QuillValue)
608    // If a field already exists and is an array, merge the new items into it
609    for (tag_name, items) in tagged_attributes {
610        if let Some(existing_value) = fields.get(&tag_name) {
611            // The existing value must be an array (checked earlier)
612            if let Some(existing_array) = existing_value.as_array() {
613                // Convert new items from YAML to JSON
614                let new_items_json: Vec<serde_json::Value> = items
615                    .into_iter()
616                    .map(|yaml_val| {
617                        serde_json::to_value(&yaml_val)
618                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
619                    })
620                    .collect::<Result<Vec<_>, _>>()?;
621
622                // Combine existing and new items
623                let mut merged_array = existing_array.clone();
624                merged_array.extend(new_items_json);
625
626                // Create QuillValue from merged JSON array
627                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
628                fields.insert(tag_name, quill_value);
629            } else {
630                // This should not happen due to earlier validation, but handle it gracefully
631                return Err(format!(
632                    "Internal error: field '{}' exists but is not an array",
633                    tag_name
634                )
635                .into());
636            }
637        } else {
638            // No existing field, just create a new sequence
639            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
640            fields.insert(tag_name, quill_value);
641        }
642    }
643
644    let mut parsed = ParsedDocument::new(fields);
645
646    // Set quill tag if present
647    if let Some(name) = quill_name {
648        parsed.quill_tag = Some(name);
649    }
650
651    Ok(parsed)
652}
653
654#[cfg(test)]
655mod tests {
656    use super::*;
657
658    #[test]
659    fn test_no_frontmatter() {
660        let markdown = "# Hello World\n\nThis is a test.";
661        let doc = decompose(markdown).unwrap();
662
663        assert_eq!(doc.body(), Some(markdown));
664        assert_eq!(doc.fields().len(), 1);
665    }
666
667    #[test]
668    fn test_with_frontmatter() {
669        let markdown = r#"---
670title: Test Document
671author: Test Author
672---
673
674# Hello World
675
676This is the body."#;
677
678        let doc = decompose(markdown).unwrap();
679
680        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
681        assert_eq!(
682            doc.get_field("title").unwrap().as_str().unwrap(),
683            "Test Document"
684        );
685        assert_eq!(
686            doc.get_field("author").unwrap().as_str().unwrap(),
687            "Test Author"
688        );
689        assert_eq!(doc.fields().len(), 3); // title, author, body
690    }
691
692    #[test]
693    fn test_complex_yaml_frontmatter() {
694        let markdown = r#"---
695title: Complex Document
696tags:
697  - test
698  - yaml
699metadata:
700  version: 1.0
701  nested:
702    field: value
703---
704
705Content here."#;
706
707        let doc = decompose(markdown).unwrap();
708
709        assert_eq!(doc.body(), Some("\nContent here."));
710        assert_eq!(
711            doc.get_field("title").unwrap().as_str().unwrap(),
712            "Complex Document"
713        );
714
715        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
716        assert_eq!(tags.len(), 2);
717        assert_eq!(tags[0].as_str().unwrap(), "test");
718        assert_eq!(tags[1].as_str().unwrap(), "yaml");
719    }
720
721    #[test]
722    fn test_with_defaults_empty_document() {
723        use crate::quill::FieldSchema;
724        use std::collections::HashMap;
725
726        let mut field_schemas = HashMap::new();
727        let mut schema1 = FieldSchema::new("status".to_string(), "Document status".to_string());
728        schema1.default = Some(QuillValue::from_json(serde_json::json!("draft")));
729        field_schemas.insert("status".to_string(), schema1);
730
731        let mut schema2 = FieldSchema::new("version".to_string(), "Version number".to_string());
732        schema2.default = Some(QuillValue::from_json(serde_json::json!(1)));
733        field_schemas.insert("version".to_string(), schema2);
734
735        // Create an empty parsed document
736        let doc = ParsedDocument::new(HashMap::new());
737        let doc_with_defaults = doc.with_defaults(&field_schemas);
738
739        // Check that defaults were applied
740        assert_eq!(
741            doc_with_defaults
742                .get_field("status")
743                .unwrap()
744                .as_str()
745                .unwrap(),
746            "draft"
747        );
748        assert_eq!(
749            doc_with_defaults
750                .get_field("version")
751                .unwrap()
752                .as_number()
753                .unwrap()
754                .as_i64()
755                .unwrap(),
756            1
757        );
758    }
759
760    #[test]
761    fn test_with_defaults_preserves_existing_values() {
762        use crate::quill::FieldSchema;
763        use std::collections::HashMap;
764
765        let mut field_schemas = HashMap::new();
766        let mut schema = FieldSchema::new("status".to_string(), "Document status".to_string());
767        schema.default = Some(QuillValue::from_json(serde_json::json!("draft")));
768        field_schemas.insert("status".to_string(), schema);
769
770        // Create document with existing status
771        let mut fields = HashMap::new();
772        fields.insert(
773            "status".to_string(),
774            QuillValue::from_json(serde_json::json!("published")),
775        );
776        let doc = ParsedDocument::new(fields);
777
778        let doc_with_defaults = doc.with_defaults(&field_schemas);
779
780        // Existing value should be preserved
781        assert_eq!(
782            doc_with_defaults
783                .get_field("status")
784                .unwrap()
785                .as_str()
786                .unwrap(),
787            "published"
788        );
789    }
790
791    #[test]
792    fn test_with_defaults_partial_application() {
793        use crate::quill::FieldSchema;
794        use std::collections::HashMap;
795
796        let mut field_schemas = HashMap::new();
797
798        let mut schema1 = FieldSchema::new("status".to_string(), "Document status".to_string());
799        schema1.default = Some(QuillValue::from_json(serde_json::json!("draft")));
800        field_schemas.insert("status".to_string(), schema1);
801
802        let mut schema2 = FieldSchema::new("version".to_string(), "Version number".to_string());
803        schema2.default = Some(QuillValue::from_json(serde_json::json!(1)));
804        field_schemas.insert("version".to_string(), schema2);
805
806        // Create document with only one field
807        let mut fields = HashMap::new();
808        fields.insert(
809            "status".to_string(),
810            QuillValue::from_json(serde_json::json!("published")),
811        );
812        let doc = ParsedDocument::new(fields);
813
814        let doc_with_defaults = doc.with_defaults(&field_schemas);
815
816        // Existing field preserved, missing field gets default
817        assert_eq!(
818            doc_with_defaults
819                .get_field("status")
820                .unwrap()
821                .as_str()
822                .unwrap(),
823            "published"
824        );
825        assert_eq!(
826            doc_with_defaults
827                .get_field("version")
828                .unwrap()
829                .as_number()
830                .unwrap()
831                .as_i64()
832                .unwrap(),
833            1
834        );
835    }
836
837    #[test]
838    fn test_with_defaults_no_defaults() {
839        use crate::quill::FieldSchema;
840        use std::collections::HashMap;
841
842        let mut field_schemas = HashMap::new();
843
844        // Create schema without default
845        let schema = FieldSchema::new("title".to_string(), "Document title".to_string());
846        field_schemas.insert("title".to_string(), schema);
847
848        let doc = ParsedDocument::new(HashMap::new());
849        let doc_with_defaults = doc.with_defaults(&field_schemas);
850
851        // No defaults should be applied
852        assert!(doc_with_defaults.get_field("title").is_none());
853    }
854
855    #[test]
856    fn test_with_defaults_complex_types() {
857        use crate::quill::FieldSchema;
858        use std::collections::HashMap;
859
860        let mut field_schemas = HashMap::new();
861
862        let mut schema = FieldSchema::new("tags".to_string(), "Document tags".to_string());
863        schema.default = Some(QuillValue::from_json(serde_json::json!(["default", "tag"])));
864        field_schemas.insert("tags".to_string(), schema);
865
866        let doc = ParsedDocument::new(HashMap::new());
867        let doc_with_defaults = doc.with_defaults(&field_schemas);
868
869        // Complex default value should be applied
870        let tags = doc_with_defaults
871            .get_field("tags")
872            .unwrap()
873            .as_sequence()
874            .unwrap();
875        assert_eq!(tags.len(), 2);
876        assert_eq!(tags[0].as_str().unwrap(), "default");
877        assert_eq!(tags[1].as_str().unwrap(), "tag");
878    }
879
880    #[test]
881    fn test_invalid_yaml() {
882        let markdown = r#"---
883title: [invalid yaml
884author: missing close bracket
885---
886
887Content here."#;
888
889        let result = decompose(markdown);
890        assert!(result.is_err());
891        assert!(result
892            .unwrap_err()
893            .to_string()
894            .contains("Invalid YAML frontmatter"));
895    }
896
897    #[test]
898    fn test_unclosed_frontmatter() {
899        let markdown = r#"---
900title: Test
901author: Test Author
902
903Content without closing ---"#;
904
905        let result = decompose(markdown);
906        assert!(result.is_err());
907        assert!(result.unwrap_err().to_string().contains("not closed"));
908    }
909
910    // Extended metadata tests
911
912    #[test]
913    fn test_basic_tagged_block() {
914        let markdown = r#"---
915title: Main Document
916---
917
918Main body content.
919
920---
921SCOPE: items
922name: Item 1
923---
924
925Body of item 1."#;
926
927        let doc = decompose(markdown).unwrap();
928
929        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
930        assert_eq!(
931            doc.get_field("title").unwrap().as_str().unwrap(),
932            "Main Document"
933        );
934
935        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
936        assert_eq!(items.len(), 1);
937
938        let item = items[0].as_object().unwrap();
939        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
940        assert_eq!(
941            item.get("body").unwrap().as_str().unwrap(),
942            "\nBody of item 1."
943        );
944    }
945
946    #[test]
947    fn test_multiple_tagged_blocks() {
948        let markdown = r#"---
949SCOPE: items
950name: Item 1
951tags: [a, b]
952---
953
954First item body.
955
956---
957SCOPE: items
958name: Item 2
959tags: [c, d]
960---
961
962Second item body."#;
963
964        let doc = decompose(markdown).unwrap();
965
966        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
967        assert_eq!(items.len(), 2);
968
969        let item1 = items[0].as_object().unwrap();
970        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
971
972        let item2 = items[1].as_object().unwrap();
973        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
974    }
975
976    #[test]
977    fn test_mixed_global_and_tagged() {
978        let markdown = r#"---
979title: Global
980author: John Doe
981---
982
983Global body.
984
985---
986SCOPE: sections
987title: Section 1
988---
989
990Section 1 content.
991
992---
993SCOPE: sections
994title: Section 2
995---
996
997Section 2 content."#;
998
999        let doc = decompose(markdown).unwrap();
1000
1001        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1002        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1003
1004        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1005        assert_eq!(sections.len(), 2);
1006    }
1007
1008    #[test]
1009    fn test_empty_tagged_metadata() {
1010        let markdown = r#"---
1011SCOPE: items
1012---
1013
1014Body without metadata."#;
1015
1016        let doc = decompose(markdown).unwrap();
1017
1018        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1019        assert_eq!(items.len(), 1);
1020
1021        let item = items[0].as_object().unwrap();
1022        assert_eq!(
1023            item.get("body").unwrap().as_str().unwrap(),
1024            "\nBody without metadata."
1025        );
1026    }
1027
1028    #[test]
1029    fn test_tagged_block_without_body() {
1030        let markdown = r#"---
1031SCOPE: items
1032name: Item
1033---"#;
1034
1035        let doc = decompose(markdown).unwrap();
1036
1037        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1038        assert_eq!(items.len(), 1);
1039
1040        let item = items[0].as_object().unwrap();
1041        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1042    }
1043
1044    #[test]
1045    fn test_name_collision_global_and_tagged() {
1046        let markdown = r#"---
1047items: "global value"
1048---
1049
1050Body
1051
1052---
1053SCOPE: items
1054name: Item
1055---
1056
1057Item body"#;
1058
1059        let result = decompose(markdown);
1060        assert!(result.is_err());
1061        assert!(result.unwrap_err().to_string().contains("collision"));
1062    }
1063
1064    #[test]
1065    fn test_global_array_merged_with_scope() {
1066        // When global frontmatter has an array field with the same name as a SCOPE,
1067        // the SCOPE items should be added to the array
1068        let markdown = r#"---
1069items:
1070  - name: Global Item 1
1071    value: 100
1072  - name: Global Item 2
1073    value: 200
1074---
1075
1076Global body
1077
1078---
1079SCOPE: items
1080name: Scope Item 1
1081value: 300
1082---
1083
1084Scope item 1 body
1085
1086---
1087SCOPE: items
1088name: Scope Item 2
1089value: 400
1090---
1091
1092Scope item 2 body"#;
1093
1094        let doc = decompose(markdown).unwrap();
1095
1096        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1097        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1098        assert_eq!(items.len(), 4);
1099
1100        // Verify first two items (from global array)
1101        let item1 = items[0].as_object().unwrap();
1102        assert_eq!(
1103            item1.get("name").unwrap().as_str().unwrap(),
1104            "Global Item 1"
1105        );
1106        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1107
1108        let item2 = items[1].as_object().unwrap();
1109        assert_eq!(
1110            item2.get("name").unwrap().as_str().unwrap(),
1111            "Global Item 2"
1112        );
1113        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1114
1115        // Verify last two items (from SCOPE blocks)
1116        let item3 = items[2].as_object().unwrap();
1117        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1118        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1119        assert_eq!(
1120            item3.get("body").unwrap().as_str().unwrap(),
1121            "\nScope item 1 body\n\n"
1122        );
1123
1124        let item4 = items[3].as_object().unwrap();
1125        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1126        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1127        assert_eq!(
1128            item4.get("body").unwrap().as_str().unwrap(),
1129            "\nScope item 2 body"
1130        );
1131    }
1132
1133    #[test]
1134    fn test_empty_global_array_with_scope() {
1135        // Edge case: global frontmatter has an empty array
1136        let markdown = r#"---
1137items: []
1138---
1139
1140Global body
1141
1142---
1143SCOPE: items
1144name: Item 1
1145---
1146
1147Item 1 body"#;
1148
1149        let doc = decompose(markdown).unwrap();
1150
1151        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1152        assert_eq!(items.len(), 1);
1153
1154        let item = items[0].as_object().unwrap();
1155        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1156    }
1157
1158    #[test]
1159    fn test_reserved_field_name() {
1160        let markdown = r#"---
1161SCOPE: body
1162content: Test
1163---"#;
1164
1165        let result = decompose(markdown);
1166        assert!(result.is_err());
1167        assert!(result.unwrap_err().to_string().contains("reserved"));
1168    }
1169
1170    #[test]
1171    fn test_invalid_tag_syntax() {
1172        let markdown = r#"---
1173SCOPE: Invalid-Name
1174title: Test
1175---"#;
1176
1177        let result = decompose(markdown);
1178        assert!(result.is_err());
1179        assert!(result
1180            .unwrap_err()
1181            .to_string()
1182            .contains("Invalid field name"));
1183    }
1184
1185    #[test]
1186    fn test_multiple_global_frontmatter_blocks() {
1187        let markdown = r#"---
1188title: First
1189---
1190
1191Body
1192
1193---
1194author: Second
1195---
1196
1197More body"#;
1198
1199        let result = decompose(markdown);
1200        assert!(result.is_err());
1201        assert!(result
1202            .unwrap_err()
1203            .to_string()
1204            .contains("Multiple global frontmatter"));
1205    }
1206
1207    #[test]
1208    fn test_adjacent_blocks_different_tags() {
1209        let markdown = r#"---
1210SCOPE: items
1211name: Item 1
1212---
1213
1214Item 1 body
1215
1216---
1217SCOPE: sections
1218title: Section 1
1219---
1220
1221Section 1 body"#;
1222
1223        let doc = decompose(markdown).unwrap();
1224
1225        assert!(doc.get_field("items").is_some());
1226        assert!(doc.get_field("sections").is_some());
1227
1228        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1229        assert_eq!(items.len(), 1);
1230
1231        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1232        assert_eq!(sections.len(), 1);
1233    }
1234
1235    #[test]
1236    fn test_order_preservation() {
1237        let markdown = r#"---
1238SCOPE: items
1239id: 1
1240---
1241
1242First
1243
1244---
1245SCOPE: items
1246id: 2
1247---
1248
1249Second
1250
1251---
1252SCOPE: items
1253id: 3
1254---
1255
1256Third"#;
1257
1258        let doc = decompose(markdown).unwrap();
1259
1260        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1261        assert_eq!(items.len(), 3);
1262
1263        for (i, item) in items.iter().enumerate() {
1264            let mapping = item.as_object().unwrap();
1265            let id = mapping.get("id").unwrap().as_i64().unwrap();
1266            assert_eq!(id, (i + 1) as i64);
1267        }
1268    }
1269
1270    #[test]
1271    fn test_product_catalog_integration() {
1272        let markdown = r#"---
1273title: Product Catalog
1274author: John Doe
1275date: 2024-01-01
1276---
1277
1278This is the main catalog description.
1279
1280---
1281SCOPE: products
1282name: Widget A
1283price: 19.99
1284sku: WID-001
1285---
1286
1287The **Widget A** is our most popular product.
1288
1289---
1290SCOPE: products
1291name: Gadget B
1292price: 29.99
1293sku: GAD-002
1294---
1295
1296The **Gadget B** is perfect for professionals.
1297
1298---
1299SCOPE: reviews
1300product: Widget A
1301rating: 5
1302---
1303
1304"Excellent product! Highly recommended."
1305
1306---
1307SCOPE: reviews
1308product: Gadget B
1309rating: 4
1310---
1311
1312"Very good, but a bit pricey.""#;
1313
1314        let doc = decompose(markdown).unwrap();
1315
1316        // Verify global fields
1317        assert_eq!(
1318            doc.get_field("title").unwrap().as_str().unwrap(),
1319            "Product Catalog"
1320        );
1321        assert_eq!(
1322            doc.get_field("author").unwrap().as_str().unwrap(),
1323            "John Doe"
1324        );
1325        assert_eq!(
1326            doc.get_field("date").unwrap().as_str().unwrap(),
1327            "2024-01-01"
1328        );
1329
1330        // Verify global body
1331        assert!(doc.body().unwrap().contains("main catalog description"));
1332
1333        // Verify products collection
1334        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1335        assert_eq!(products.len(), 2);
1336
1337        let product1 = products[0].as_object().unwrap();
1338        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1339        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1340
1341        // Verify reviews collection
1342        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1343        assert_eq!(reviews.len(), 2);
1344
1345        let review1 = reviews[0].as_object().unwrap();
1346        assert_eq!(
1347            review1.get("product").unwrap().as_str().unwrap(),
1348            "Widget A"
1349        );
1350        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1351
1352        // Total fields: title, author, date, body, products, reviews = 6
1353        assert_eq!(doc.fields().len(), 6);
1354    }
1355
1356    #[test]
1357    fn taro_quill_directive() {
1358        let markdown = r#"---
1359QUILL: usaf_memo
1360memo_for: [ORG/SYMBOL]
1361memo_from: [ORG/SYMBOL]
1362---
1363
1364This is the memo body."#;
1365
1366        let doc = decompose(markdown).unwrap();
1367
1368        // Verify quill tag is set
1369        assert_eq!(doc.quill_tag(), Some("usaf_memo"));
1370
1371        // Verify fields from quill block become frontmatter
1372        assert_eq!(
1373            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1374                .as_str()
1375                .unwrap(),
1376            "ORG/SYMBOL"
1377        );
1378
1379        // Verify body
1380        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1381    }
1382
1383    #[test]
1384    fn test_quill_with_scope_blocks() {
1385        let markdown = r#"---
1386QUILL: document
1387title: Test Document
1388---
1389
1390Main body.
1391
1392---
1393SCOPE: sections
1394name: Section 1
1395---
1396
1397Section 1 body."#;
1398
1399        let doc = decompose(markdown).unwrap();
1400
1401        // Verify quill tag
1402        assert_eq!(doc.quill_tag(), Some("document"));
1403
1404        // Verify global field from quill block
1405        assert_eq!(
1406            doc.get_field("title").unwrap().as_str().unwrap(),
1407            "Test Document"
1408        );
1409
1410        // Verify scope blocks work
1411        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1412        assert_eq!(sections.len(), 1);
1413
1414        // Verify body
1415        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1416    }
1417
1418    #[test]
1419    fn test_multiple_quill_directives_error() {
1420        let markdown = r#"---
1421QUILL: first
1422---
1423
1424---
1425QUILL: second
1426---"#;
1427
1428        let result = decompose(markdown);
1429        assert!(result.is_err());
1430        assert!(result
1431            .unwrap_err()
1432            .to_string()
1433            .contains("Multiple quill directives"));
1434    }
1435
1436    #[test]
1437    fn test_invalid_quill_name() {
1438        let markdown = r#"---
1439QUILL: Invalid-Name
1440---"#;
1441
1442        let result = decompose(markdown);
1443        assert!(result.is_err());
1444        assert!(result
1445            .unwrap_err()
1446            .to_string()
1447            .contains("Invalid quill name"));
1448    }
1449
1450    #[test]
1451    fn test_quill_wrong_value_type() {
1452        let markdown = r#"---
1453QUILL: 123
1454---"#;
1455
1456        let result = decompose(markdown);
1457        assert!(result.is_err());
1458        assert!(result
1459            .unwrap_err()
1460            .to_string()
1461            .contains("QUILL value must be a string"));
1462    }
1463
1464    #[test]
1465    fn test_scope_wrong_value_type() {
1466        let markdown = r#"---
1467SCOPE: 123
1468---"#;
1469
1470        let result = decompose(markdown);
1471        assert!(result.is_err());
1472        assert!(result
1473            .unwrap_err()
1474            .to_string()
1475            .contains("SCOPE value must be a string"));
1476    }
1477
1478    #[test]
1479    fn test_both_quill_and_scope_error() {
1480        let markdown = r#"---
1481QUILL: test
1482SCOPE: items
1483---"#;
1484
1485        let result = decompose(markdown);
1486        assert!(result.is_err());
1487        assert!(result
1488            .unwrap_err()
1489            .to_string()
1490            .contains("Cannot specify both QUILL and SCOPE"));
1491    }
1492
1493    #[test]
1494    fn test_blank_lines_in_frontmatter() {
1495        // New parsing standard: blank lines are allowed within YAML blocks
1496        let markdown = r#"---
1497title: Test Document
1498author: Test Author
1499
1500description: This has a blank line above it
1501tags:
1502  - one
1503  - two
1504---
1505
1506# Hello World
1507
1508This is the body."#;
1509
1510        let doc = decompose(markdown).unwrap();
1511
1512        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1513        assert_eq!(
1514            doc.get_field("title").unwrap().as_str().unwrap(),
1515            "Test Document"
1516        );
1517        assert_eq!(
1518            doc.get_field("author").unwrap().as_str().unwrap(),
1519            "Test Author"
1520        );
1521        assert_eq!(
1522            doc.get_field("description").unwrap().as_str().unwrap(),
1523            "This has a blank line above it"
1524        );
1525
1526        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1527        assert_eq!(tags.len(), 2);
1528    }
1529
1530    #[test]
1531    fn test_blank_lines_in_scope_blocks() {
1532        // Blank lines should be allowed in SCOPE blocks too
1533        let markdown = r#"---
1534SCOPE: items
1535name: Item 1
1536
1537price: 19.99
1538
1539tags:
1540  - electronics
1541  - gadgets
1542---
1543
1544Body of item 1."#;
1545
1546        let doc = decompose(markdown).unwrap();
1547
1548        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1549        assert_eq!(items.len(), 1);
1550
1551        let item = items[0].as_object().unwrap();
1552        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1553        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1554
1555        let tags = item.get("tags").unwrap().as_array().unwrap();
1556        assert_eq!(tags.len(), 2);
1557    }
1558
1559    #[test]
1560    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1561        // Horizontal rule: blank lines both above AND below the ---
1562        let markdown = r#"---
1563title: Test
1564---
1565
1566First paragraph.
1567
1568---
1569
1570Second paragraph."#;
1571
1572        let doc = decompose(markdown).unwrap();
1573
1574        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1575
1576        // The body should contain the horizontal rule (---) as part of the content
1577        let body = doc.body().unwrap();
1578        assert!(body.contains("First paragraph."));
1579        assert!(body.contains("---"));
1580        assert!(body.contains("Second paragraph."));
1581    }
1582
1583    #[test]
1584    fn test_horizontal_rule_not_preceded_by_blank() {
1585        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1586        // It's also NOT a valid metadata block opening (since it's followed by blank)
1587        let markdown = r#"---
1588title: Test
1589---
1590
1591First paragraph.
1592---
1593
1594Second paragraph."#;
1595
1596        let doc = decompose(markdown).unwrap();
1597
1598        let body = doc.body().unwrap();
1599        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1600        assert!(body.contains("---"));
1601    }
1602
1603    #[test]
1604    fn test_multiple_blank_lines_in_yaml() {
1605        // Multiple blank lines should also be allowed
1606        let markdown = r#"---
1607title: Test
1608
1609
1610author: John Doe
1611
1612
1613version: 1.0
1614---
1615
1616Body content."#;
1617
1618        let doc = decompose(markdown).unwrap();
1619
1620        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1621        assert_eq!(
1622            doc.get_field("author").unwrap().as_str().unwrap(),
1623            "John Doe"
1624        );
1625        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1626    }
1627}
1628#[cfg(test)]
1629mod demo_file_test {
1630    use super::*;
1631
1632    #[test]
1633    fn test_extended_metadata_demo_file() {
1634        let markdown = include_str!("../../quillmark-fixtures/resources/extended_metadata_demo.md");
1635        let doc = decompose(markdown).unwrap();
1636
1637        // Verify global fields
1638        assert_eq!(
1639            doc.get_field("title").unwrap().as_str().unwrap(),
1640            "Extended Metadata Demo"
1641        );
1642        assert_eq!(
1643            doc.get_field("author").unwrap().as_str().unwrap(),
1644            "Quillmark Team"
1645        );
1646        // version is parsed as a number by YAML
1647        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1648
1649        // Verify body
1650        assert!(doc
1651            .body()
1652            .unwrap()
1653            .contains("extended YAML metadata standard"));
1654
1655        // Verify features collection
1656        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1657        assert_eq!(features.len(), 3);
1658
1659        // Verify use_cases collection
1660        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1661        assert_eq!(use_cases.len(), 2);
1662
1663        // Check first feature
1664        let feature1 = features[0].as_object().unwrap();
1665        assert_eq!(
1666            feature1.get("name").unwrap().as_str().unwrap(),
1667            "Tag Directives"
1668        );
1669    }
1670
1671    #[test]
1672    fn test_input_size_limit() {
1673        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1674        let size = crate::error::MAX_INPUT_SIZE + 1;
1675        let large_markdown = "a".repeat(size);
1676
1677        let result = decompose(&large_markdown);
1678        assert!(result.is_err());
1679
1680        let err_msg = result.unwrap_err().to_string();
1681        assert!(err_msg.contains("Input too large"));
1682    }
1683
1684    #[test]
1685    fn test_yaml_size_limit() {
1686        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1687        let mut markdown = String::from("---\n");
1688
1689        // Create a very large YAML field
1690        let size = crate::error::MAX_YAML_SIZE + 1;
1691        markdown.push_str("data: \"");
1692        markdown.push_str(&"x".repeat(size));
1693        markdown.push_str("\"\n---\n\nBody");
1694
1695        let result = decompose(&markdown);
1696        assert!(result.is_err());
1697
1698        let err_msg = result.unwrap_err().to_string();
1699        assert!(err_msg.contains("YAML block too large"));
1700    }
1701
1702    #[test]
1703    fn test_input_within_size_limit() {
1704        // Create markdown just under the limit
1705        let size = 1000; // Much smaller than limit
1706        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1707
1708        let result = decompose(&markdown);
1709        assert!(result.is_ok());
1710    }
1711
1712    #[test]
1713    fn test_yaml_within_size_limit() {
1714        // Create YAML block well within the limit
1715        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1716
1717        let result = decompose(&markdown);
1718        assert!(result.is_ok());
1719    }
1720}