quillmark_core/
parse.rs

1//! # Parsing Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `parse` module provides the [`ParsedDocument::from_markdown`] function for parsing markdown documents
8//!
9//! ## Key Types
10//!
11//! - [`ParsedDocument`]: Container for parsed frontmatter fields and body content
12//! - [`BODY_FIELD`]: Constant for the field name storing document body
13//!
14//! ## Examples
15//!
16//! ### Basic Parsing
17//!
18//! ```
19//! use quillmark_core::ParsedDocument;
20//!
21//! let markdown = r#"---
22//! title: My Document
23//! author: John Doe
24//! ---
25//!
26//! # Introduction
27//!
28//! Document content here.
29//! "#;
30//!
31//! let doc = ParsedDocument::from_markdown(markdown).unwrap();
32//! let title = doc.get_field("title")
33//!     .and_then(|v| v.as_str())
34//!     .unwrap_or("Untitled");
35//! ```
36//!
37//! ## Error Handling
38//!
39//! The [`ParsedDocument::from_markdown`] function returns errors for:
40//! - Malformed YAML syntax
41//! - Unclosed frontmatter blocks
42//! - Multiple global frontmatter blocks
43//! - Both QUILL and SCOPE specified in the same block
44//! - Reserved field name usage
45//! - Name collisions
46//!
47//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for comprehensive documentation of the Extended YAML Metadata Standard.
48
49use std::collections::HashMap;
50
51use crate::value::QuillValue;
52
53/// The field name used to store the document body
54pub const BODY_FIELD: &str = "body";
55
56/// Helper function to convert serde_yaml::Error with location extraction
57fn yaml_error_to_string(e: serde_yaml::Error, context: &str) -> String {
58    let mut msg = format!("{}: {}", context, e);
59
60    if let Some(loc) = e.location() {
61        msg.push_str(&format!(" at line {}, column {}", loc.line(), loc.column()));
62    }
63
64    msg
65}
66
67/// Reserved tag name for quill specification
68pub const QUILL_TAG: &str = "quill";
69
70/// A parsed markdown document with frontmatter
71#[derive(Debug, Clone)]
72pub struct ParsedDocument {
73    fields: HashMap<String, QuillValue>,
74    quill_tag: String,
75}
76
77impl ParsedDocument {
78    /// Create a new ParsedDocument with the given fields
79    pub fn new(fields: HashMap<String, QuillValue>) -> Self {
80        Self {
81            fields,
82            quill_tag: "__default__".to_string(),
83        }
84    }
85
86    /// Create a ParsedDocument from fields and quill tag
87    pub fn with_quill_tag(fields: HashMap<String, QuillValue>, quill_tag: String) -> Self {
88        Self { fields, quill_tag }
89    }
90
91    /// Create a ParsedDocument from markdown string
92    pub fn from_markdown(markdown: &str) -> Result<Self, crate::error::ParseError> {
93        decompose(markdown).map_err(|e| crate::error::ParseError::from(e))
94    }
95
96    /// Get the quill tag (from QUILL key, or "__default__" if not specified)
97    pub fn quill_tag(&self) -> &str {
98        &self.quill_tag
99    }
100
101    /// Get the document body
102    pub fn body(&self) -> Option<&str> {
103        self.fields.get(BODY_FIELD).and_then(|v| v.as_str())
104    }
105
106    /// Get a specific field
107    pub fn get_field(&self, name: &str) -> Option<&QuillValue> {
108        self.fields.get(name)
109    }
110
111    /// Get all fields (including body)
112    pub fn fields(&self) -> &HashMap<String, QuillValue> {
113        &self.fields
114    }
115
116    /// Create a new ParsedDocument with default values applied
117    ///
118    /// This method creates a new ParsedDocument with default values applied for any
119    /// fields that are missing from the original document but have defaults specified.
120    /// Existing fields are preserved and not overwritten.
121    ///
122    /// # Arguments
123    ///
124    /// * `defaults` - A HashMap of field names to their default QuillValues
125    ///
126    /// # Returns
127    ///
128    /// A new ParsedDocument with defaults applied for missing fields
129    pub fn with_defaults(&self, defaults: &HashMap<String, QuillValue>) -> Self {
130        let mut fields = self.fields.clone();
131
132        for (field_name, default_value) in defaults {
133            // Only apply default if field is missing
134            if !fields.contains_key(field_name) {
135                fields.insert(field_name.clone(), default_value.clone());
136            }
137        }
138
139        Self {
140            fields,
141            quill_tag: self.quill_tag.clone(),
142        }
143    }
144
145    /// Create a new ParsedDocument with coerced field values
146    ///
147    /// This method applies type coercions to field values based on the schema.
148    /// Coercions include:
149    /// - Singular values to arrays when schema expects array
150    /// - String "true"/"false" to boolean
151    /// - Numbers to boolean (0=false, non-zero=true)
152    /// - String numbers to number type
153    /// - Boolean to number (true=1, false=0)
154    ///
155    /// # Arguments
156    ///
157    /// * `schema` - A JSON Schema object defining expected field types
158    ///
159    /// # Returns
160    ///
161    /// A new ParsedDocument with coerced field values
162    pub fn with_coercion(&self, schema: &QuillValue) -> Self {
163        use crate::schema::coerce_document;
164
165        let coerced_fields = coerce_document(schema, &self.fields);
166
167        Self {
168            fields: coerced_fields,
169            quill_tag: self.quill_tag.clone(),
170        }
171    }
172}
173
174#[derive(Debug)]
175struct MetadataBlock {
176    start: usize, // Position of opening "---"
177    end: usize,   // Position after closing "---\n"
178    yaml_content: String,
179    tag: Option<String>,        // Field name from SCOPE key
180    quill_name: Option<String>, // Quill name from QUILL key
181}
182
183/// Validate tag name follows pattern [a-z_][a-z0-9_]*
184fn is_valid_tag_name(name: &str) -> bool {
185    if name.is_empty() {
186        return false;
187    }
188
189    let mut chars = name.chars();
190    let first = chars.next().unwrap();
191
192    if !first.is_ascii_lowercase() && first != '_' {
193        return false;
194    }
195
196    for ch in chars {
197        if !ch.is_ascii_lowercase() && !ch.is_ascii_digit() && ch != '_' {
198            return false;
199        }
200    }
201
202    true
203}
204
205/// Find all metadata blocks in the document
206fn find_metadata_blocks(
207    markdown: &str,
208) -> Result<Vec<MetadataBlock>, Box<dyn std::error::Error + Send + Sync>> {
209    let mut blocks = Vec::new();
210    let mut pos = 0;
211
212    while pos < markdown.len() {
213        // Look for opening "---\n" or "---\r\n"
214        let search_str = &markdown[pos..];
215        let delimiter_result = if let Some(p) = search_str.find("---\n") {
216            Some((p, 4, "\n"))
217        } else if let Some(p) = search_str.find("---\r\n") {
218            Some((p, 5, "\r\n"))
219        } else {
220            None
221        };
222
223        if let Some((delimiter_pos, delimiter_len, _line_ending)) = delimiter_result {
224            let abs_pos = pos + delimiter_pos;
225
226            // Check if the delimiter is at the start of a line
227            let is_start_of_line = if abs_pos == 0 {
228                true
229            } else {
230                let char_before = markdown.as_bytes()[abs_pos - 1];
231                char_before == b'\n' || char_before == b'\r'
232            };
233
234            if !is_start_of_line {
235                pos = abs_pos + 1;
236                continue;
237            }
238
239            let content_start = abs_pos + delimiter_len; // After "---\n" or "---\r\n"
240
241            // Check if this --- is a horizontal rule (blank lines above AND below)
242            let preceded_by_blank = if abs_pos > 0 {
243                // Check if there's a blank line before the ---
244                let before = &markdown[..abs_pos];
245                before.ends_with("\n\n") || before.ends_with("\r\n\r\n")
246            } else {
247                false
248            };
249
250            let followed_by_blank = if content_start < markdown.len() {
251                markdown[content_start..].starts_with('\n')
252                    || markdown[content_start..].starts_with("\r\n")
253            } else {
254                false
255            };
256
257            // Horizontal rule: blank lines both above and below
258            if preceded_by_blank && followed_by_blank {
259                // This is a horizontal rule in the body, skip it
260                pos = abs_pos + 3; // Skip past "---"
261                continue;
262            }
263
264            // Check if followed by non-blank line (or if we're at document start)
265            // This starts a metadata block
266            if followed_by_blank {
267                // --- followed by blank line but NOT preceded by blank line
268                // This is NOT a metadata block opening, skip it
269                pos = abs_pos + 3;
270                continue;
271            }
272
273            // Found potential metadata block opening (followed by non-blank line)
274            // Look for closing "\n---\n" or "\r\n---\r\n" etc., OR "\n---" / "\r\n---" at end of document
275            let rest = &markdown[content_start..];
276
277            // First try to find delimiters with trailing newlines
278            let closing_patterns = ["\n---\n", "\r\n---\r\n", "\n---\r\n", "\r\n---\n"];
279            let closing_with_newline = closing_patterns
280                .iter()
281                .filter_map(|delim| rest.find(delim).map(|p| (p, delim.len())))
282                .min_by_key(|(p, _)| *p);
283
284            // Also check for closing at end of document (no trailing newline)
285            let closing_at_eof = ["\n---", "\r\n---"]
286                .iter()
287                .filter_map(|delim| {
288                    rest.find(delim).and_then(|p| {
289                        if p + delim.len() == rest.len() {
290                            Some((p, delim.len()))
291                        } else {
292                            None
293                        }
294                    })
295                })
296                .min_by_key(|(p, _)| *p);
297
298            let closing_result = match (closing_with_newline, closing_at_eof) {
299                (Some((p1, _l1)), Some((p2, _))) if p2 < p1 => closing_at_eof,
300                (Some(_), Some(_)) => closing_with_newline,
301                (Some(_), None) => closing_with_newline,
302                (None, Some(_)) => closing_at_eof,
303                (None, None) => None,
304            };
305
306            if let Some((closing_pos, closing_len)) = closing_result {
307                let abs_closing_pos = content_start + closing_pos;
308                let content = &markdown[content_start..abs_closing_pos];
309
310                // Check YAML size limit
311                if content.len() > crate::error::MAX_YAML_SIZE {
312                    return Err(format!(
313                        "YAML block too large: {} bytes (max: {} bytes)",
314                        content.len(),
315                        crate::error::MAX_YAML_SIZE
316                    )
317                    .into());
318                }
319
320                // Parse YAML content to check for reserved keys (QUILL, SCOPE)
321                // First, try to parse as YAML
322                let (tag, quill_name, yaml_content) = if !content.is_empty() {
323                    // Try to parse the YAML to check for reserved keys
324                    match serde_yaml::from_str::<serde_yaml::Value>(content) {
325                        Ok(yaml_value) => {
326                            if let Some(mapping) = yaml_value.as_mapping() {
327                                let quill_key = serde_yaml::Value::String("QUILL".to_string());
328                                let scope_key = serde_yaml::Value::String("SCOPE".to_string());
329
330                                let has_quill = mapping.contains_key(&quill_key);
331                                let has_scope = mapping.contains_key(&scope_key);
332
333                                if has_quill && has_scope {
334                                    return Err(
335                                        "Cannot specify both QUILL and SCOPE in the same block"
336                                            .into(),
337                                    );
338                                }
339
340                                if has_quill {
341                                    // Extract quill name
342                                    let quill_value = mapping.get(&quill_key).unwrap();
343                                    let quill_name_str = quill_value
344                                        .as_str()
345                                        .ok_or_else(|| "QUILL value must be a string")?;
346
347                                    if !is_valid_tag_name(quill_name_str) {
348                                        return Err(format!(
349                                            "Invalid quill name '{}': must match pattern [a-z_][a-z0-9_]*",
350                                            quill_name_str
351                                        )
352                                        .into());
353                                    }
354
355                                    // Remove QUILL from the YAML content for processing
356                                    let mut new_mapping = mapping.clone();
357                                    new_mapping.remove(&quill_key);
358                                    let new_yaml = serde_yaml::to_string(&new_mapping)
359                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
360
361                                    (None, Some(quill_name_str.to_string()), new_yaml)
362                                } else if has_scope {
363                                    // Extract scope field name
364                                    let scope_value = mapping.get(&scope_key).unwrap();
365                                    let field_name = scope_value
366                                        .as_str()
367                                        .ok_or_else(|| "SCOPE value must be a string")?;
368
369                                    if !is_valid_tag_name(field_name) {
370                                        return Err(format!(
371                                            "Invalid field name '{}': must match pattern [a-z_][a-z0-9_]*",
372                                            field_name
373                                        )
374                                        .into());
375                                    }
376
377                                    if field_name == BODY_FIELD {
378                                        return Err(format!(
379                                            "Cannot use reserved field name '{}' as SCOPE value",
380                                            BODY_FIELD
381                                        )
382                                        .into());
383                                    }
384
385                                    // Remove SCOPE from the YAML content for processing
386                                    let mut new_mapping = mapping.clone();
387                                    new_mapping.remove(&scope_key);
388                                    let new_yaml = serde_yaml::to_string(&new_mapping)
389                                        .map_err(|e| format!("Failed to serialize YAML: {}", e))?;
390
391                                    (Some(field_name.to_string()), None, new_yaml)
392                                } else {
393                                    // No reserved keys, treat as normal YAML
394                                    (None, None, content.to_string())
395                                }
396                            } else {
397                                // Not a mapping, treat as normal YAML
398                                (None, None, content.to_string())
399                            }
400                        }
401                        Err(_) => {
402                            // If YAML parsing fails here, we'll catch it later
403                            (None, None, content.to_string())
404                        }
405                    }
406                } else {
407                    (None, None, content.to_string())
408                };
409
410                blocks.push(MetadataBlock {
411                    start: abs_pos,
412                    end: abs_closing_pos + closing_len, // After closing delimiter
413                    yaml_content,
414                    tag,
415                    quill_name,
416                });
417
418                pos = abs_closing_pos + closing_len;
419            } else if abs_pos == 0 {
420                // Frontmatter started but not closed
421                return Err("Frontmatter started but not closed with ---".into());
422            } else {
423                // Not a valid metadata block, skip this position
424                pos = abs_pos + 3;
425            }
426        } else {
427            break;
428        }
429    }
430
431    Ok(blocks)
432}
433
434/// Decompose markdown into frontmatter fields and body
435fn decompose(markdown: &str) -> Result<ParsedDocument, Box<dyn std::error::Error + Send + Sync>> {
436    // Check input size limit
437    if markdown.len() > crate::error::MAX_INPUT_SIZE {
438        return Err(format!(
439            "Input too large: {} bytes (max: {} bytes)",
440            markdown.len(),
441            crate::error::MAX_INPUT_SIZE
442        )
443        .into());
444    }
445
446    let mut fields = HashMap::new();
447
448    // Find all metadata blocks
449    let blocks = find_metadata_blocks(markdown)?;
450
451    if blocks.is_empty() {
452        // No metadata blocks, entire content is body
453        fields.insert(
454            BODY_FIELD.to_string(),
455            QuillValue::from_json(serde_json::Value::String(markdown.to_string())),
456        );
457        return Ok(ParsedDocument::new(fields));
458    }
459
460    // Track which attributes are used for tagged blocks
461    let mut tagged_attributes: HashMap<String, Vec<serde_yaml::Value>> = HashMap::new();
462    let mut has_global_frontmatter = false;
463    let mut global_frontmatter_index: Option<usize> = None;
464    let mut quill_name: Option<String> = None;
465
466    // First pass: identify global frontmatter, quill directive, and validate
467    for (idx, block) in blocks.iter().enumerate() {
468        // Check for quill directive
469        if let Some(ref name) = block.quill_name {
470            if quill_name.is_some() {
471                return Err("Multiple quill directives found: only one allowed".into());
472            }
473            quill_name = Some(name.clone());
474        }
475
476        // Check for global frontmatter (no tag and no quill directive)
477        if block.tag.is_none() && block.quill_name.is_none() {
478            if has_global_frontmatter {
479                return Err(
480                    "Multiple global frontmatter blocks found: only one untagged block allowed"
481                        .into(),
482                );
483            }
484            has_global_frontmatter = true;
485            global_frontmatter_index = Some(idx);
486        }
487    }
488
489    // Parse global frontmatter if present
490    if let Some(idx) = global_frontmatter_index {
491        let block = &blocks[idx];
492
493        // Parse YAML frontmatter
494        let yaml_fields: HashMap<String, serde_yaml::Value> = if block.yaml_content.is_empty() {
495            HashMap::new()
496        } else {
497            serde_yaml::from_str(&block.yaml_content)
498                .map_err(|e| yaml_error_to_string(e, "Invalid YAML frontmatter"))?
499        };
500
501        // Check that all tagged blocks don't conflict with global fields
502        // Exception: if the global field is an array, allow it (we'll merge later)
503        for other_block in &blocks {
504            if let Some(ref tag) = other_block.tag {
505                if let Some(global_value) = yaml_fields.get(tag) {
506                    // Check if the global value is an array
507                    if global_value.as_sequence().is_none() {
508                        return Err(format!(
509                            "Name collision: global field '{}' conflicts with tagged attribute",
510                            tag
511                        )
512                        .into());
513                    }
514                }
515            }
516        }
517
518        // Convert YAML values to QuillValue at boundary
519        for (key, value) in yaml_fields {
520            fields.insert(key, QuillValue::from_yaml(value)?);
521        }
522    }
523
524    // Process blocks with quill directives
525    for block in &blocks {
526        if block.quill_name.is_some() {
527            // Quill directive blocks can have YAML content (becomes part of frontmatter)
528            if !block.yaml_content.is_empty() {
529                let yaml_fields: HashMap<String, serde_yaml::Value> =
530                    serde_yaml::from_str(&block.yaml_content)
531                        .map_err(|e| yaml_error_to_string(e, "Invalid YAML in quill block"))?;
532
533                // Check for conflicts with existing fields
534                for key in yaml_fields.keys() {
535                    if fields.contains_key(key) {
536                        return Err(format!(
537                            "Name collision: quill block field '{}' conflicts with existing field",
538                            key
539                        )
540                        .into());
541                    }
542                }
543
544                // Convert YAML values to QuillValue at boundary
545                for (key, value) in yaml_fields {
546                    fields.insert(key, QuillValue::from_yaml(value)?);
547                }
548            }
549        }
550    }
551
552    // Parse tagged blocks
553    for (idx, block) in blocks.iter().enumerate() {
554        if let Some(ref tag_name) = block.tag {
555            // Check if this conflicts with global fields
556            // Exception: if the global field is an array, allow it (we'll merge later)
557            if let Some(existing_value) = fields.get(tag_name) {
558                if existing_value.as_array().is_none() {
559                    return Err(format!(
560                        "Name collision: tagged attribute '{}' conflicts with global field",
561                        tag_name
562                    )
563                    .into());
564                }
565            }
566
567            // Parse YAML metadata
568            let mut item_fields: HashMap<String, serde_yaml::Value> = if block
569                .yaml_content
570                .is_empty()
571            {
572                HashMap::new()
573            } else {
574                serde_yaml::from_str(&block.yaml_content).map_err(|e| {
575                    yaml_error_to_string(e, &format!("Invalid YAML in tagged block '{}'", tag_name))
576                })?
577            };
578
579            // Extract body for this tagged block
580            let body_start = block.end;
581            let body_end = if idx + 1 < blocks.len() {
582                blocks[idx + 1].start
583            } else {
584                markdown.len()
585            };
586            let body = &markdown[body_start..body_end];
587
588            // Add body to item fields
589            item_fields.insert(
590                BODY_FIELD.to_string(),
591                serde_yaml::Value::String(body.to_string()),
592            );
593
594            // Convert HashMap to serde_yaml::Value::Mapping
595            let item_value = serde_yaml::to_value(item_fields)?;
596
597            // Add to collection
598            tagged_attributes
599                .entry(tag_name.clone())
600                .or_insert_with(Vec::new)
601                .push(item_value);
602        }
603    }
604
605    // Extract global body
606    // Body starts after global frontmatter or quill block (whichever comes first)
607    // Body ends at the first scope block or EOF
608    let first_non_scope_block_idx = blocks
609        .iter()
610        .position(|b| b.tag.is_none() && b.quill_name.is_none())
611        .or_else(|| blocks.iter().position(|b| b.quill_name.is_some()));
612
613    let (body_start, body_end) = if let Some(idx) = first_non_scope_block_idx {
614        // Body starts after the first non-scope block (global frontmatter or quill)
615        let start = blocks[idx].end;
616
617        // Body ends at the first scope block after this, or EOF
618        let end = blocks
619            .iter()
620            .skip(idx + 1)
621            .find(|b| b.tag.is_some())
622            .map(|b| b.start)
623            .unwrap_or(markdown.len());
624
625        (start, end)
626    } else {
627        // No global frontmatter or quill block - body is everything before the first scope block
628        let end = blocks
629            .iter()
630            .find(|b| b.tag.is_some())
631            .map(|b| b.start)
632            .unwrap_or(0);
633
634        (0, end)
635    };
636
637    let global_body = &markdown[body_start..body_end];
638
639    fields.insert(
640        BODY_FIELD.to_string(),
641        QuillValue::from_json(serde_json::Value::String(global_body.to_string())),
642    );
643
644    // Add all tagged collections to fields (convert to QuillValue)
645    // If a field already exists and is an array, merge the new items into it
646    for (tag_name, items) in tagged_attributes {
647        if let Some(existing_value) = fields.get(&tag_name) {
648            // The existing value must be an array (checked earlier)
649            if let Some(existing_array) = existing_value.as_array() {
650                // Convert new items from YAML to JSON
651                let new_items_json: Vec<serde_json::Value> = items
652                    .into_iter()
653                    .map(|yaml_val| {
654                        serde_json::to_value(&yaml_val)
655                            .map_err(|e| format!("Failed to convert YAML to JSON: {}", e))
656                    })
657                    .collect::<Result<Vec<_>, _>>()?;
658
659                // Combine existing and new items
660                let mut merged_array = existing_array.clone();
661                merged_array.extend(new_items_json);
662
663                // Create QuillValue from merged JSON array
664                let quill_value = QuillValue::from_json(serde_json::Value::Array(merged_array));
665                fields.insert(tag_name, quill_value);
666            } else {
667                // This should not happen due to earlier validation, but handle it gracefully
668                return Err(format!(
669                    "Internal error: field '{}' exists but is not an array",
670                    tag_name
671                )
672                .into());
673            }
674        } else {
675            // No existing field, just create a new sequence
676            let quill_value = QuillValue::from_yaml(serde_yaml::Value::Sequence(items))?;
677            fields.insert(tag_name, quill_value);
678        }
679    }
680
681    let quill_tag = quill_name.unwrap_or_else(|| "__default__".to_string());
682    let parsed = ParsedDocument::with_quill_tag(fields, quill_tag);
683
684    Ok(parsed)
685}
686
687#[cfg(test)]
688mod tests {
689    use super::*;
690
691    #[test]
692    fn test_no_frontmatter() {
693        let markdown = "# Hello World\n\nThis is a test.";
694        let doc = decompose(markdown).unwrap();
695
696        assert_eq!(doc.body(), Some(markdown));
697        assert_eq!(doc.fields().len(), 1);
698        // Verify default quill tag is set
699        assert_eq!(doc.quill_tag(), "__default__");
700    }
701
702    #[test]
703    fn test_with_frontmatter() {
704        let markdown = r#"---
705title: Test Document
706author: Test Author
707---
708
709# Hello World
710
711This is the body."#;
712
713        let doc = decompose(markdown).unwrap();
714
715        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
716        assert_eq!(
717            doc.get_field("title").unwrap().as_str().unwrap(),
718            "Test Document"
719        );
720        assert_eq!(
721            doc.get_field("author").unwrap().as_str().unwrap(),
722            "Test Author"
723        );
724        assert_eq!(doc.fields().len(), 3); // title, author, body
725                                           // Verify default quill tag is set when no QUILL directive
726        assert_eq!(doc.quill_tag(), "__default__");
727    }
728
729    #[test]
730    fn test_complex_yaml_frontmatter() {
731        let markdown = r#"---
732title: Complex Document
733tags:
734  - test
735  - yaml
736metadata:
737  version: 1.0
738  nested:
739    field: value
740---
741
742Content here."#;
743
744        let doc = decompose(markdown).unwrap();
745
746        assert_eq!(doc.body(), Some("\nContent here."));
747        assert_eq!(
748            doc.get_field("title").unwrap().as_str().unwrap(),
749            "Complex Document"
750        );
751
752        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
753        assert_eq!(tags.len(), 2);
754        assert_eq!(tags[0].as_str().unwrap(), "test");
755        assert_eq!(tags[1].as_str().unwrap(), "yaml");
756    }
757
758    #[test]
759    fn test_with_defaults_empty_document() {
760        use std::collections::HashMap;
761
762        let mut defaults = HashMap::new();
763        defaults.insert(
764            "status".to_string(),
765            QuillValue::from_json(serde_json::json!("draft")),
766        );
767        defaults.insert(
768            "version".to_string(),
769            QuillValue::from_json(serde_json::json!(1)),
770        );
771
772        // Create an empty parsed document
773        let doc = ParsedDocument::new(HashMap::new());
774        let doc_with_defaults = doc.with_defaults(&defaults);
775
776        // Check that defaults were applied
777        assert_eq!(
778            doc_with_defaults
779                .get_field("status")
780                .unwrap()
781                .as_str()
782                .unwrap(),
783            "draft"
784        );
785        assert_eq!(
786            doc_with_defaults
787                .get_field("version")
788                .unwrap()
789                .as_number()
790                .unwrap()
791                .as_i64()
792                .unwrap(),
793            1
794        );
795    }
796
797    #[test]
798    fn test_with_defaults_preserves_existing_values() {
799        use std::collections::HashMap;
800
801        let mut defaults = HashMap::new();
802        defaults.insert(
803            "status".to_string(),
804            QuillValue::from_json(serde_json::json!("draft")),
805        );
806
807        // Create document with existing status
808        let mut fields = HashMap::new();
809        fields.insert(
810            "status".to_string(),
811            QuillValue::from_json(serde_json::json!("published")),
812        );
813        let doc = ParsedDocument::new(fields);
814
815        let doc_with_defaults = doc.with_defaults(&defaults);
816
817        // Existing value should be preserved
818        assert_eq!(
819            doc_with_defaults
820                .get_field("status")
821                .unwrap()
822                .as_str()
823                .unwrap(),
824            "published"
825        );
826    }
827
828    #[test]
829    fn test_with_defaults_partial_application() {
830        use std::collections::HashMap;
831
832        let mut defaults = HashMap::new();
833        defaults.insert(
834            "status".to_string(),
835            QuillValue::from_json(serde_json::json!("draft")),
836        );
837        defaults.insert(
838            "version".to_string(),
839            QuillValue::from_json(serde_json::json!(1)),
840        );
841
842        // Create document with only one field
843        let mut fields = HashMap::new();
844        fields.insert(
845            "status".to_string(),
846            QuillValue::from_json(serde_json::json!("published")),
847        );
848        let doc = ParsedDocument::new(fields);
849
850        let doc_with_defaults = doc.with_defaults(&defaults);
851
852        // Existing field preserved, missing field gets default
853        assert_eq!(
854            doc_with_defaults
855                .get_field("status")
856                .unwrap()
857                .as_str()
858                .unwrap(),
859            "published"
860        );
861        assert_eq!(
862            doc_with_defaults
863                .get_field("version")
864                .unwrap()
865                .as_number()
866                .unwrap()
867                .as_i64()
868                .unwrap(),
869            1
870        );
871    }
872
873    #[test]
874    fn test_with_defaults_no_defaults() {
875        use std::collections::HashMap;
876
877        let defaults = HashMap::new(); // Empty defaults map
878
879        let doc = ParsedDocument::new(HashMap::new());
880        let doc_with_defaults = doc.with_defaults(&defaults);
881
882        // No defaults should be applied
883        assert!(doc_with_defaults.fields().is_empty());
884    }
885
886    #[test]
887    fn test_with_defaults_complex_types() {
888        use std::collections::HashMap;
889
890        let mut defaults = HashMap::new();
891        defaults.insert(
892            "tags".to_string(),
893            QuillValue::from_json(serde_json::json!(["default", "tag"])),
894        );
895
896        let doc = ParsedDocument::new(HashMap::new());
897        let doc_with_defaults = doc.with_defaults(&defaults);
898
899        // Complex default value should be applied
900        let tags = doc_with_defaults
901            .get_field("tags")
902            .unwrap()
903            .as_sequence()
904            .unwrap();
905        assert_eq!(tags.len(), 2);
906        assert_eq!(tags[0].as_str().unwrap(), "default");
907        assert_eq!(tags[1].as_str().unwrap(), "tag");
908    }
909
910    #[test]
911    fn test_with_coercion_singular_to_array() {
912        use std::collections::HashMap;
913
914        let schema = QuillValue::from_json(serde_json::json!({
915            "$schema": "https://json-schema.org/draft/2019-09/schema",
916            "type": "object",
917            "properties": {
918                "tags": {"type": "array"}
919            }
920        }));
921
922        let mut fields = HashMap::new();
923        fields.insert(
924            "tags".to_string(),
925            QuillValue::from_json(serde_json::json!("single-tag")),
926        );
927        let doc = ParsedDocument::new(fields);
928
929        let coerced_doc = doc.with_coercion(&schema);
930
931        let tags = coerced_doc.get_field("tags").unwrap();
932        assert!(tags.as_array().is_some());
933        let tags_array = tags.as_array().unwrap();
934        assert_eq!(tags_array.len(), 1);
935        assert_eq!(tags_array[0].as_str().unwrap(), "single-tag");
936    }
937
938    #[test]
939    fn test_with_coercion_string_to_boolean() {
940        use std::collections::HashMap;
941
942        let schema = QuillValue::from_json(serde_json::json!({
943            "$schema": "https://json-schema.org/draft/2019-09/schema",
944            "type": "object",
945            "properties": {
946                "active": {"type": "boolean"}
947            }
948        }));
949
950        let mut fields = HashMap::new();
951        fields.insert(
952            "active".to_string(),
953            QuillValue::from_json(serde_json::json!("true")),
954        );
955        let doc = ParsedDocument::new(fields);
956
957        let coerced_doc = doc.with_coercion(&schema);
958
959        assert_eq!(
960            coerced_doc.get_field("active").unwrap().as_bool().unwrap(),
961            true
962        );
963    }
964
965    #[test]
966    fn test_with_coercion_string_to_number() {
967        use std::collections::HashMap;
968
969        let schema = QuillValue::from_json(serde_json::json!({
970            "$schema": "https://json-schema.org/draft/2019-09/schema",
971            "type": "object",
972            "properties": {
973                "count": {"type": "number"}
974            }
975        }));
976
977        let mut fields = HashMap::new();
978        fields.insert(
979            "count".to_string(),
980            QuillValue::from_json(serde_json::json!("42")),
981        );
982        let doc = ParsedDocument::new(fields);
983
984        let coerced_doc = doc.with_coercion(&schema);
985
986        assert_eq!(
987            coerced_doc.get_field("count").unwrap().as_i64().unwrap(),
988            42
989        );
990    }
991
992    #[test]
993    fn test_invalid_yaml() {
994        let markdown = r#"---
995title: [invalid yaml
996author: missing close bracket
997---
998
999Content here."#;
1000
1001        let result = decompose(markdown);
1002        assert!(result.is_err());
1003        assert!(result
1004            .unwrap_err()
1005            .to_string()
1006            .contains("Invalid YAML frontmatter"));
1007    }
1008
1009    #[test]
1010    fn test_unclosed_frontmatter() {
1011        let markdown = r#"---
1012title: Test
1013author: Test Author
1014
1015Content without closing ---"#;
1016
1017        let result = decompose(markdown);
1018        assert!(result.is_err());
1019        assert!(result.unwrap_err().to_string().contains("not closed"));
1020    }
1021
1022    // Extended metadata tests
1023
1024    #[test]
1025    fn test_basic_tagged_block() {
1026        let markdown = r#"---
1027title: Main Document
1028---
1029
1030Main body content.
1031
1032---
1033SCOPE: items
1034name: Item 1
1035---
1036
1037Body of item 1."#;
1038
1039        let doc = decompose(markdown).unwrap();
1040
1041        assert_eq!(doc.body(), Some("\nMain body content.\n\n"));
1042        assert_eq!(
1043            doc.get_field("title").unwrap().as_str().unwrap(),
1044            "Main Document"
1045        );
1046
1047        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1048        assert_eq!(items.len(), 1);
1049
1050        let item = items[0].as_object().unwrap();
1051        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1052        assert_eq!(
1053            item.get("body").unwrap().as_str().unwrap(),
1054            "\nBody of item 1."
1055        );
1056    }
1057
1058    #[test]
1059    fn test_multiple_tagged_blocks() {
1060        let markdown = r#"---
1061SCOPE: items
1062name: Item 1
1063tags: [a, b]
1064---
1065
1066First item body.
1067
1068---
1069SCOPE: items
1070name: Item 2
1071tags: [c, d]
1072---
1073
1074Second item body."#;
1075
1076        let doc = decompose(markdown).unwrap();
1077
1078        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1079        assert_eq!(items.len(), 2);
1080
1081        let item1 = items[0].as_object().unwrap();
1082        assert_eq!(item1.get("name").unwrap().as_str().unwrap(), "Item 1");
1083
1084        let item2 = items[1].as_object().unwrap();
1085        assert_eq!(item2.get("name").unwrap().as_str().unwrap(), "Item 2");
1086    }
1087
1088    #[test]
1089    fn test_mixed_global_and_tagged() {
1090        let markdown = r#"---
1091title: Global
1092author: John Doe
1093---
1094
1095Global body.
1096
1097---
1098SCOPE: sections
1099title: Section 1
1100---
1101
1102Section 1 content.
1103
1104---
1105SCOPE: sections
1106title: Section 2
1107---
1108
1109Section 2 content."#;
1110
1111        let doc = decompose(markdown).unwrap();
1112
1113        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Global");
1114        assert_eq!(doc.body(), Some("\nGlobal body.\n\n"));
1115
1116        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1117        assert_eq!(sections.len(), 2);
1118    }
1119
1120    #[test]
1121    fn test_empty_tagged_metadata() {
1122        let markdown = r#"---
1123SCOPE: items
1124---
1125
1126Body without metadata."#;
1127
1128        let doc = decompose(markdown).unwrap();
1129
1130        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1131        assert_eq!(items.len(), 1);
1132
1133        let item = items[0].as_object().unwrap();
1134        assert_eq!(
1135            item.get("body").unwrap().as_str().unwrap(),
1136            "\nBody without metadata."
1137        );
1138    }
1139
1140    #[test]
1141    fn test_tagged_block_without_body() {
1142        let markdown = r#"---
1143SCOPE: items
1144name: Item
1145---"#;
1146
1147        let doc = decompose(markdown).unwrap();
1148
1149        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1150        assert_eq!(items.len(), 1);
1151
1152        let item = items[0].as_object().unwrap();
1153        assert_eq!(item.get("body").unwrap().as_str().unwrap(), "");
1154    }
1155
1156    #[test]
1157    fn test_name_collision_global_and_tagged() {
1158        let markdown = r#"---
1159items: "global value"
1160---
1161
1162Body
1163
1164---
1165SCOPE: items
1166name: Item
1167---
1168
1169Item body"#;
1170
1171        let result = decompose(markdown);
1172        assert!(result.is_err());
1173        assert!(result.unwrap_err().to_string().contains("collision"));
1174    }
1175
1176    #[test]
1177    fn test_global_array_merged_with_scope() {
1178        // When global frontmatter has an array field with the same name as a SCOPE,
1179        // the SCOPE items should be added to the array
1180        let markdown = r#"---
1181items:
1182  - name: Global Item 1
1183    value: 100
1184  - name: Global Item 2
1185    value: 200
1186---
1187
1188Global body
1189
1190---
1191SCOPE: items
1192name: Scope Item 1
1193value: 300
1194---
1195
1196Scope item 1 body
1197
1198---
1199SCOPE: items
1200name: Scope Item 2
1201value: 400
1202---
1203
1204Scope item 2 body"#;
1205
1206        let doc = decompose(markdown).unwrap();
1207
1208        // Verify the items array has all 4 items (2 from global + 2 from SCOPE)
1209        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1210        assert_eq!(items.len(), 4);
1211
1212        // Verify first two items (from global array)
1213        let item1 = items[0].as_object().unwrap();
1214        assert_eq!(
1215            item1.get("name").unwrap().as_str().unwrap(),
1216            "Global Item 1"
1217        );
1218        assert_eq!(item1.get("value").unwrap().as_i64().unwrap(), 100);
1219
1220        let item2 = items[1].as_object().unwrap();
1221        assert_eq!(
1222            item2.get("name").unwrap().as_str().unwrap(),
1223            "Global Item 2"
1224        );
1225        assert_eq!(item2.get("value").unwrap().as_i64().unwrap(), 200);
1226
1227        // Verify last two items (from SCOPE blocks)
1228        let item3 = items[2].as_object().unwrap();
1229        assert_eq!(item3.get("name").unwrap().as_str().unwrap(), "Scope Item 1");
1230        assert_eq!(item3.get("value").unwrap().as_i64().unwrap(), 300);
1231        assert_eq!(
1232            item3.get("body").unwrap().as_str().unwrap(),
1233            "\nScope item 1 body\n\n"
1234        );
1235
1236        let item4 = items[3].as_object().unwrap();
1237        assert_eq!(item4.get("name").unwrap().as_str().unwrap(), "Scope Item 2");
1238        assert_eq!(item4.get("value").unwrap().as_i64().unwrap(), 400);
1239        assert_eq!(
1240            item4.get("body").unwrap().as_str().unwrap(),
1241            "\nScope item 2 body"
1242        );
1243    }
1244
1245    #[test]
1246    fn test_empty_global_array_with_scope() {
1247        // Edge case: global frontmatter has an empty array
1248        let markdown = r#"---
1249items: []
1250---
1251
1252Global body
1253
1254---
1255SCOPE: items
1256name: Item 1
1257---
1258
1259Item 1 body"#;
1260
1261        let doc = decompose(markdown).unwrap();
1262
1263        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1264        assert_eq!(items.len(), 1);
1265
1266        let item = items[0].as_object().unwrap();
1267        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1268    }
1269
1270    #[test]
1271    fn test_reserved_field_name() {
1272        let markdown = r#"---
1273SCOPE: body
1274content: Test
1275---"#;
1276
1277        let result = decompose(markdown);
1278        assert!(result.is_err());
1279        assert!(result.unwrap_err().to_string().contains("reserved"));
1280    }
1281
1282    #[test]
1283    fn test_invalid_tag_syntax() {
1284        let markdown = r#"---
1285SCOPE: Invalid-Name
1286title: Test
1287---"#;
1288
1289        let result = decompose(markdown);
1290        assert!(result.is_err());
1291        assert!(result
1292            .unwrap_err()
1293            .to_string()
1294            .contains("Invalid field name"));
1295    }
1296
1297    #[test]
1298    fn test_multiple_global_frontmatter_blocks() {
1299        let markdown = r#"---
1300title: First
1301---
1302
1303Body
1304
1305---
1306author: Second
1307---
1308
1309More body"#;
1310
1311        let result = decompose(markdown);
1312        assert!(result.is_err());
1313        assert!(result
1314            .unwrap_err()
1315            .to_string()
1316            .contains("Multiple global frontmatter"));
1317    }
1318
1319    #[test]
1320    fn test_adjacent_blocks_different_tags() {
1321        let markdown = r#"---
1322SCOPE: items
1323name: Item 1
1324---
1325
1326Item 1 body
1327
1328---
1329SCOPE: sections
1330title: Section 1
1331---
1332
1333Section 1 body"#;
1334
1335        let doc = decompose(markdown).unwrap();
1336
1337        assert!(doc.get_field("items").is_some());
1338        assert!(doc.get_field("sections").is_some());
1339
1340        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1341        assert_eq!(items.len(), 1);
1342
1343        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1344        assert_eq!(sections.len(), 1);
1345    }
1346
1347    #[test]
1348    fn test_order_preservation() {
1349        let markdown = r#"---
1350SCOPE: items
1351id: 1
1352---
1353
1354First
1355
1356---
1357SCOPE: items
1358id: 2
1359---
1360
1361Second
1362
1363---
1364SCOPE: items
1365id: 3
1366---
1367
1368Third"#;
1369
1370        let doc = decompose(markdown).unwrap();
1371
1372        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1373        assert_eq!(items.len(), 3);
1374
1375        for (i, item) in items.iter().enumerate() {
1376            let mapping = item.as_object().unwrap();
1377            let id = mapping.get("id").unwrap().as_i64().unwrap();
1378            assert_eq!(id, (i + 1) as i64);
1379        }
1380    }
1381
1382    #[test]
1383    fn test_product_catalog_integration() {
1384        let markdown = r#"---
1385title: Product Catalog
1386author: John Doe
1387date: 2024-01-01
1388---
1389
1390This is the main catalog description.
1391
1392---
1393SCOPE: products
1394name: Widget A
1395price: 19.99
1396sku: WID-001
1397---
1398
1399The **Widget A** is our most popular product.
1400
1401---
1402SCOPE: products
1403name: Gadget B
1404price: 29.99
1405sku: GAD-002
1406---
1407
1408The **Gadget B** is perfect for professionals.
1409
1410---
1411SCOPE: reviews
1412product: Widget A
1413rating: 5
1414---
1415
1416"Excellent product! Highly recommended."
1417
1418---
1419SCOPE: reviews
1420product: Gadget B
1421rating: 4
1422---
1423
1424"Very good, but a bit pricey.""#;
1425
1426        let doc = decompose(markdown).unwrap();
1427
1428        // Verify global fields
1429        assert_eq!(
1430            doc.get_field("title").unwrap().as_str().unwrap(),
1431            "Product Catalog"
1432        );
1433        assert_eq!(
1434            doc.get_field("author").unwrap().as_str().unwrap(),
1435            "John Doe"
1436        );
1437        assert_eq!(
1438            doc.get_field("date").unwrap().as_str().unwrap(),
1439            "2024-01-01"
1440        );
1441
1442        // Verify global body
1443        assert!(doc.body().unwrap().contains("main catalog description"));
1444
1445        // Verify products collection
1446        let products = doc.get_field("products").unwrap().as_sequence().unwrap();
1447        assert_eq!(products.len(), 2);
1448
1449        let product1 = products[0].as_object().unwrap();
1450        assert_eq!(product1.get("name").unwrap().as_str().unwrap(), "Widget A");
1451        assert_eq!(product1.get("price").unwrap().as_f64().unwrap(), 19.99);
1452
1453        // Verify reviews collection
1454        let reviews = doc.get_field("reviews").unwrap().as_sequence().unwrap();
1455        assert_eq!(reviews.len(), 2);
1456
1457        let review1 = reviews[0].as_object().unwrap();
1458        assert_eq!(
1459            review1.get("product").unwrap().as_str().unwrap(),
1460            "Widget A"
1461        );
1462        assert_eq!(review1.get("rating").unwrap().as_i64().unwrap(), 5);
1463
1464        // Total fields: title, author, date, body, products, reviews = 6
1465        assert_eq!(doc.fields().len(), 6);
1466    }
1467
1468    #[test]
1469    fn taro_quill_directive() {
1470        let markdown = r#"---
1471QUILL: usaf_memo
1472memo_for: [ORG/SYMBOL]
1473memo_from: [ORG/SYMBOL]
1474---
1475
1476This is the memo body."#;
1477
1478        let doc = decompose(markdown).unwrap();
1479
1480        // Verify quill tag is set
1481        assert_eq!(doc.quill_tag(), "usaf_memo");
1482
1483        // Verify fields from quill block become frontmatter
1484        assert_eq!(
1485            doc.get_field("memo_for").unwrap().as_sequence().unwrap()[0]
1486                .as_str()
1487                .unwrap(),
1488            "ORG/SYMBOL"
1489        );
1490
1491        // Verify body
1492        assert_eq!(doc.body(), Some("\nThis is the memo body."));
1493    }
1494
1495    #[test]
1496    fn test_quill_with_scope_blocks() {
1497        let markdown = r#"---
1498QUILL: document
1499title: Test Document
1500---
1501
1502Main body.
1503
1504---
1505SCOPE: sections
1506name: Section 1
1507---
1508
1509Section 1 body."#;
1510
1511        let doc = decompose(markdown).unwrap();
1512
1513        // Verify quill tag
1514        assert_eq!(doc.quill_tag(), "document");
1515
1516        // Verify global field from quill block
1517        assert_eq!(
1518            doc.get_field("title").unwrap().as_str().unwrap(),
1519            "Test Document"
1520        );
1521
1522        // Verify scope blocks work
1523        let sections = doc.get_field("sections").unwrap().as_sequence().unwrap();
1524        assert_eq!(sections.len(), 1);
1525
1526        // Verify body
1527        assert_eq!(doc.body(), Some("\nMain body.\n\n"));
1528    }
1529
1530    #[test]
1531    fn test_multiple_quill_directives_error() {
1532        let markdown = r#"---
1533QUILL: first
1534---
1535
1536---
1537QUILL: second
1538---"#;
1539
1540        let result = decompose(markdown);
1541        assert!(result.is_err());
1542        assert!(result
1543            .unwrap_err()
1544            .to_string()
1545            .contains("Multiple quill directives"));
1546    }
1547
1548    #[test]
1549    fn test_invalid_quill_name() {
1550        let markdown = r#"---
1551QUILL: Invalid-Name
1552---"#;
1553
1554        let result = decompose(markdown);
1555        assert!(result.is_err());
1556        assert!(result
1557            .unwrap_err()
1558            .to_string()
1559            .contains("Invalid quill name"));
1560    }
1561
1562    #[test]
1563    fn test_quill_wrong_value_type() {
1564        let markdown = r#"---
1565QUILL: 123
1566---"#;
1567
1568        let result = decompose(markdown);
1569        assert!(result.is_err());
1570        assert!(result
1571            .unwrap_err()
1572            .to_string()
1573            .contains("QUILL value must be a string"));
1574    }
1575
1576    #[test]
1577    fn test_scope_wrong_value_type() {
1578        let markdown = r#"---
1579SCOPE: 123
1580---"#;
1581
1582        let result = decompose(markdown);
1583        assert!(result.is_err());
1584        assert!(result
1585            .unwrap_err()
1586            .to_string()
1587            .contains("SCOPE value must be a string"));
1588    }
1589
1590    #[test]
1591    fn test_both_quill_and_scope_error() {
1592        let markdown = r#"---
1593QUILL: test
1594SCOPE: items
1595---"#;
1596
1597        let result = decompose(markdown);
1598        assert!(result.is_err());
1599        assert!(result
1600            .unwrap_err()
1601            .to_string()
1602            .contains("Cannot specify both QUILL and SCOPE"));
1603    }
1604
1605    #[test]
1606    fn test_blank_lines_in_frontmatter() {
1607        // New parsing standard: blank lines are allowed within YAML blocks
1608        let markdown = r#"---
1609title: Test Document
1610author: Test Author
1611
1612description: This has a blank line above it
1613tags:
1614  - one
1615  - two
1616---
1617
1618# Hello World
1619
1620This is the body."#;
1621
1622        let doc = decompose(markdown).unwrap();
1623
1624        assert_eq!(doc.body(), Some("\n# Hello World\n\nThis is the body."));
1625        assert_eq!(
1626            doc.get_field("title").unwrap().as_str().unwrap(),
1627            "Test Document"
1628        );
1629        assert_eq!(
1630            doc.get_field("author").unwrap().as_str().unwrap(),
1631            "Test Author"
1632        );
1633        assert_eq!(
1634            doc.get_field("description").unwrap().as_str().unwrap(),
1635            "This has a blank line above it"
1636        );
1637
1638        let tags = doc.get_field("tags").unwrap().as_sequence().unwrap();
1639        assert_eq!(tags.len(), 2);
1640    }
1641
1642    #[test]
1643    fn test_blank_lines_in_scope_blocks() {
1644        // Blank lines should be allowed in SCOPE blocks too
1645        let markdown = r#"---
1646SCOPE: items
1647name: Item 1
1648
1649price: 19.99
1650
1651tags:
1652  - electronics
1653  - gadgets
1654---
1655
1656Body of item 1."#;
1657
1658        let doc = decompose(markdown).unwrap();
1659
1660        let items = doc.get_field("items").unwrap().as_sequence().unwrap();
1661        assert_eq!(items.len(), 1);
1662
1663        let item = items[0].as_object().unwrap();
1664        assert_eq!(item.get("name").unwrap().as_str().unwrap(), "Item 1");
1665        assert_eq!(item.get("price").unwrap().as_f64().unwrap(), 19.99);
1666
1667        let tags = item.get("tags").unwrap().as_array().unwrap();
1668        assert_eq!(tags.len(), 2);
1669    }
1670
1671    #[test]
1672    fn test_horizontal_rule_with_blank_lines_above_and_below() {
1673        // Horizontal rule: blank lines both above AND below the ---
1674        let markdown = r#"---
1675title: Test
1676---
1677
1678First paragraph.
1679
1680---
1681
1682Second paragraph."#;
1683
1684        let doc = decompose(markdown).unwrap();
1685
1686        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1687
1688        // The body should contain the horizontal rule (---) as part of the content
1689        let body = doc.body().unwrap();
1690        assert!(body.contains("First paragraph."));
1691        assert!(body.contains("---"));
1692        assert!(body.contains("Second paragraph."));
1693    }
1694
1695    #[test]
1696    fn test_horizontal_rule_not_preceded_by_blank() {
1697        // --- not preceded by blank line but followed by blank line is NOT a horizontal rule
1698        // It's also NOT a valid metadata block opening (since it's followed by blank)
1699        let markdown = r#"---
1700title: Test
1701---
1702
1703First paragraph.
1704---
1705
1706Second paragraph."#;
1707
1708        let doc = decompose(markdown).unwrap();
1709
1710        let body = doc.body().unwrap();
1711        // The second --- should be in the body as text (not a horizontal rule since no blank above)
1712        assert!(body.contains("---"));
1713    }
1714
1715    #[test]
1716    fn test_multiple_blank_lines_in_yaml() {
1717        // Multiple blank lines should also be allowed
1718        let markdown = r#"---
1719title: Test
1720
1721
1722author: John Doe
1723
1724
1725version: 1.0
1726---
1727
1728Body content."#;
1729
1730        let doc = decompose(markdown).unwrap();
1731
1732        assert_eq!(doc.get_field("title").unwrap().as_str().unwrap(), "Test");
1733        assert_eq!(
1734            doc.get_field("author").unwrap().as_str().unwrap(),
1735            "John Doe"
1736        );
1737        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1738    }
1739
1740    #[test]
1741    fn test_html_comment_interaction() {
1742        let markdown = r#"<!---
1743---> the rest of the page content
1744
1745---
1746key: value
1747---
1748"#;
1749        let doc = decompose(markdown).unwrap();
1750
1751        // The comment should be ignored (or at least not cause a parse error)
1752        // The frontmatter should be parsed
1753        let key = doc.get_field("key").and_then(|v| v.as_str());
1754        assert_eq!(key, Some("value"));
1755    }
1756}
1757#[cfg(test)]
1758mod demo_file_test {
1759    use super::*;
1760
1761    #[test]
1762    fn test_extended_metadata_demo_file() {
1763        let markdown = include_str!("../../fixtures/resources/extended_metadata_demo.md");
1764        let doc = decompose(markdown).unwrap();
1765
1766        // Verify global fields
1767        assert_eq!(
1768            doc.get_field("title").unwrap().as_str().unwrap(),
1769            "Extended Metadata Demo"
1770        );
1771        assert_eq!(
1772            doc.get_field("author").unwrap().as_str().unwrap(),
1773            "Quillmark Team"
1774        );
1775        // version is parsed as a number by YAML
1776        assert_eq!(doc.get_field("version").unwrap().as_f64().unwrap(), 1.0);
1777
1778        // Verify body
1779        assert!(doc
1780            .body()
1781            .unwrap()
1782            .contains("extended YAML metadata standard"));
1783
1784        // Verify features collection
1785        let features = doc.get_field("features").unwrap().as_sequence().unwrap();
1786        assert_eq!(features.len(), 3);
1787
1788        // Verify use_cases collection
1789        let use_cases = doc.get_field("use_cases").unwrap().as_sequence().unwrap();
1790        assert_eq!(use_cases.len(), 2);
1791
1792        // Check first feature
1793        let feature1 = features[0].as_object().unwrap();
1794        assert_eq!(
1795            feature1.get("name").unwrap().as_str().unwrap(),
1796            "Tag Directives"
1797        );
1798    }
1799
1800    #[test]
1801    fn test_input_size_limit() {
1802        // Create markdown larger than MAX_INPUT_SIZE (10 MB)
1803        let size = crate::error::MAX_INPUT_SIZE + 1;
1804        let large_markdown = "a".repeat(size);
1805
1806        let result = decompose(&large_markdown);
1807        assert!(result.is_err());
1808
1809        let err_msg = result.unwrap_err().to_string();
1810        assert!(err_msg.contains("Input too large"));
1811    }
1812
1813    #[test]
1814    fn test_yaml_size_limit() {
1815        // Create YAML block larger than MAX_YAML_SIZE (1 MB)
1816        let mut markdown = String::from("---\n");
1817
1818        // Create a very large YAML field
1819        let size = crate::error::MAX_YAML_SIZE + 1;
1820        markdown.push_str("data: \"");
1821        markdown.push_str(&"x".repeat(size));
1822        markdown.push_str("\"\n---\n\nBody");
1823
1824        let result = decompose(&markdown);
1825        assert!(result.is_err());
1826
1827        let err_msg = result.unwrap_err().to_string();
1828        assert!(err_msg.contains("YAML block too large"));
1829    }
1830
1831    #[test]
1832    fn test_input_within_size_limit() {
1833        // Create markdown just under the limit
1834        let size = 1000; // Much smaller than limit
1835        let markdown = format!("---\ntitle: Test\n---\n\n{}", "a".repeat(size));
1836
1837        let result = decompose(&markdown);
1838        assert!(result.is_ok());
1839    }
1840
1841    #[test]
1842    fn test_yaml_within_size_limit() {
1843        // Create YAML block well within the limit
1844        let markdown = "---\ntitle: Test\nauthor: John Doe\n---\n\nBody content";
1845
1846        let result = decompose(&markdown);
1847        assert!(result.is_ok());
1848    }
1849}