agpm_cli/markdown/
mod.rs

1//! Markdown file operations and metadata extraction for Claude Code resources.
2//!
3//! This module provides comprehensive support for reading, writing, and manipulating
4//! Markdown files that contain Claude Code agents and snippets. It handles both
5//! plain Markdown files and files with structured metadata in frontmatter.
6//!
7//! # Overview
8//!
9//! The markdown module is a core component of AGPM that:
10//! - Parses Markdown files with optional YAML or TOML frontmatter
11//! - Extracts structured metadata for dependency resolution
12//! - Preserves document structure during read/write operations
13//! - Provides utilities for file discovery and validation
14//! - Supports atomic file operations for safe installation
15//!
16//! # Supported File Formats
17//!
18//! ## Plain Markdown Files
19//!
20//! Standard Markdown files without frontmatter are fully supported:
21//!
22//! ```markdown
23//! # Python Code Reviewer
24//!
25//! This agent specializes in reviewing Python code for:
26//! - PEP 8 compliance
27//! - Security vulnerabilities
28//! - Performance optimizations
29//!
30//! ## Usage
31//!
32//! When reviewing code, I will...
33//! ```
34//!
35//! ## YAML Frontmatter Format
36//!
37//! Files can include YAML frontmatter for structured metadata:
38//!
39//! ```markdown
40//! ---
41//! title: "Python Code Reviewer"
42//! description: "Specialized agent for Python code quality review"
43//! version: "2.1.0"
44//! author: "Claude Code Team"
45//! type: "agent"
46//! tags:
47//!   - "python"
48//!   - "code-review"
49//!   - "quality"
50//! dependencies:
51//!   agents:
52//!     - path: agents/syntax-checker.md
53//!   snippets:
54//!     - path: snippets/security-scanner.md
55//! ---
56//!
57//! # Python Code Reviewer
58//!
59//! This agent specializes in reviewing Python code...
60//! ```
61//!
62//! ## TOML Frontmatter Format
63//!
64//! TOML frontmatter is also supported using `+++` delimiters:
65//!
66//! ```text
67//! +++
68//! title = "JavaScript Snippet Collection"
69//! description = "Useful JavaScript utilities and helpers"
70//! version = "1.0.0"
71//! author = "Community Contributors"
72//! type = "snippet"
73//! tags = ["javascript", "utilities", "helpers"]
74//! +++
75//!
76//! # JavaScript Snippet Collection
77//!
78//! ## Array Utilities
79//!
80//! ```javascript
81//! function unique(arr) {
82//!     return [...new Set(arr)];
83//! }
84//! ```
85//!
86//! # Metadata Schema
87//!
88//! The frontmatter metadata follows this schema:
89//!
90//! | Field | Type | Description | Required |
91//! |-------|------|-------------|----------|
92//! | title | string | Human-readable resource title | No |
93//! | description | string | Brief description of the resource | No |
94//! | version | string | Resource version (semver recommended) | No |
95//! | author | string | Author name or organization | No |
96//! | type | string | Resource type ("agent" or "snippet") | No |
97//! | tags | array | Tags for categorization | No |
98//! | dependencies | object | Structured dependencies by resource type | No |
99//!
100//! Additional custom fields are preserved in the extra map.
101//!
102//! # Content Extraction
103//!
104//! When metadata is not explicitly provided in frontmatter, the module
105//! can extract information from the Markdown content:
106//!
107//! - **Title**: Extracted from the first level-1 heading in the content
108//! - **Description**: Extracted from the first paragraph after headings
109//!
110//! This allows resources to work without frontmatter while still providing
111//! useful metadata for dependency resolution and display.
112//!
113//! # File Operations
114//!
115//! All file operations are designed to be safe and atomic:
116//! - Parent directories are created automatically during writes
117//! - Content is validated during parsing to catch errors early  
118//! - File extensions are validated (.md, .markdown)
119//! - Recursive directory traversal for bulk operations
120//!
121//! # Usage Examples
122//!
123//! ## Basic Reading and Writing
124//!
125//! ```rust,no_run
126//! use agpm_cli::markdown::MarkdownDocument;
127//! use std::path::Path;
128//!
129//! # fn example() -> anyhow::Result<()> {
130//! // Read a markdown file
131//! let doc = MarkdownDocument::read(Path::new("agents/reviewer.md"))?;
132//!
133//! // Access metadata
134//! if let Some(metadata) = &doc.metadata {
135//!     println!("Title: {:?}", metadata.title);
136//!     println!("Version: {:?}", metadata.version);
137//!     println!("Tags: {:?}", metadata.tags);
138//! }
139//!
140//! // Extract title from content if not in metadata
141//! if let Some(title) = doc.get_title() {
142//!     println!("Extracted title: {}", title);
143//! }
144//!
145//! // Write to a new location
146//! doc.write(Path::new("installed/reviewer.md"))?;
147//! # Ok(())
148//! # }
149//! ```
150//!
151//! ## Creating Documents Programmatically
152//!
153//! ```rust,no_run
154//! use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
155//!
156//! # fn example() -> anyhow::Result<()> {
157//! // Create metadata
158//! let mut metadata = MarkdownMetadata::default();
159//! metadata.title = Some("Custom Agent".to_string());
160//! metadata.version = Some("1.0.0".to_string());
161//! metadata.tags = vec!["custom".to_string(), "utility".to_string()];
162//!
163//! // Create document with metadata
164//! let content = "# Custom Agent\n\nThis is a custom agent...";
165//! let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
166//!
167//! // The raw field contains formatted frontmatter + content
168//! println!("{}", doc.raw);
169//! # Ok(())
170//! # }
171//! ```
172//!
173//! ## Batch File Processing
174//!
175//! ```rust,no_run
176//! use agpm_cli::markdown::{list_markdown_files, MarkdownDocument};
177//! use std::path::Path;
178//!
179//! # fn example() -> anyhow::Result<()> {
180//! // Find all markdown files in a directory
181//! let files = list_markdown_files(Path::new("resources/"))?;
182//!
183//! for file in files {
184//!     let doc = MarkdownDocument::read(&file)?;
185//!     
186//!     if let Some(title) = doc.get_title() {
187//!         println!("{}: {}", file.display(), title);
188//!     }
189//! }
190//! # Ok(())
191//! # }
192//! ```
193//!
194//! # Integration with AGPM
195//!
196//! This module integrates with other AGPM components:
197//!
198//! - `crate::manifest`: Uses metadata for dependency resolution
199//! - `crate::lockfile`: Stores checksums and installation paths  
200//! - `crate::source`: Handles remote resource fetching
201//! - `crate::core`: Provides core types and error handling
202//!
203//! See the respective module documentation for integration details.
204
205use anyhow::{Context, Result};
206use serde::{Deserialize, Serialize};
207use std::collections::HashMap;
208use std::fs;
209use std::path::Path;
210
211use crate::manifest::DependencySpec;
212
213/// Type alias for [`MarkdownDocument`] for backward compatibility.
214///
215/// This alias exists to provide a consistent naming convention and maintain
216/// backward compatibility with existing code that might use `MarkdownFile`.
217/// New code should prefer using [`MarkdownDocument`] directly.
218///
219/// # Examples
220///
221/// ```rust,no_run
222/// # use agpm_cli::markdown::{MarkdownFile, MarkdownDocument};
223/// // These are equivalent
224/// let doc1 = MarkdownDocument::new("content".to_string());
225/// let doc2 = MarkdownFile::new("content".to_string());
226///
227/// assert_eq!(doc1.content, doc2.content);
228/// ```
229pub type MarkdownFile = MarkdownDocument;
230
231/// Structured metadata extracted from Markdown frontmatter.
232///
233/// This struct represents all the metadata that can be parsed from YAML or TOML
234/// frontmatter in Markdown files. It follows a flexible schema that accommodates
235/// both standard AGPM fields and custom extensions.
236///
237/// # Standard Fields
238///
239/// The following fields have special meaning in AGPM:
240/// - `title`: Human-readable name for the resource
241/// - `description`: Brief explanation of what the resource does
242/// - `version`: Version identifier (semantic versioning recommended)
243/// - `author`: Creator or maintainer information
244/// - `resource_type`: Type classification ("agent" or "snippet")
245/// - `tags`: Categorization labels for filtering and discovery
246/// - `dependencies`: Structured dependencies for transitive resolution
247///
248/// # Custom Fields
249///
250/// Additional fields are preserved in the `extra` map, allowing resource
251/// authors to include custom metadata without breaking compatibility.
252///
253/// # Serialization
254///
255/// The struct uses Serde for serialization with skip-if-empty optimizations
256/// to keep generated frontmatter clean. Empty collections and None values
257/// are omitted from the output.
258///
259/// # Example
260///
261/// ```rust,no_run
262/// # use agpm_cli::markdown::MarkdownMetadata;
263/// # use std::collections::HashMap;
264/// let mut metadata = MarkdownMetadata::default();
265/// metadata.title = Some("Python Linter".to_string());
266/// metadata.version = Some("2.0.1".to_string());
267/// metadata.tags = vec!["python".to_string(), "linting".to_string()];
268/// // Dependencies can be set as a JSON value for the structured format
269/// // This is typically parsed from frontmatter rather than set programmatically
270///
271/// // Custom fields via extra map
272/// let mut extra = HashMap::new();
273/// extra.insert("license".to_string(), "MIT".into());
274/// extra.insert("min_python".to_string(), "3.8".into());
275/// metadata.extra = extra;
276/// ```
277#[derive(Debug, Clone, Default, Serialize, Deserialize)]
278pub struct MarkdownMetadata {
279    /// Human-readable title of the resource.
280    ///
281    /// This is displayed in listings and used for resource identification.
282    /// If not provided, the title may be extracted from the first heading
283    /// in the Markdown content.
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub title: Option<String>,
286
287    /// Brief description explaining what the resource does.
288    ///
289    /// Used for documentation and resource discovery. If not provided,
290    /// the description may be extracted from the first paragraph in
291    /// the Markdown content.
292    #[serde(skip_serializing_if = "Option::is_none")]
293    pub description: Option<String>,
294
295    /// Version identifier for the resource.
296    ///
297    /// Semantic versioning (e.g., "1.2.3") is recommended for compatibility
298    /// with dependency resolution, but any string format is accepted.
299    #[serde(skip_serializing_if = "Option::is_none")]
300    pub version: Option<String>,
301
302    /// Author or maintainer information.
303    ///
304    /// Can be a name, organization, or contact information. Free-form text.
305    #[serde(skip_serializing_if = "Option::is_none")]
306    pub author: Option<String>,
307
308    /// Classification tags for categorization and filtering.
309    ///
310    /// Tags help with resource discovery and organization. Common patterns:
311    /// - Language-specific: "python", "javascript", "rust"
312    /// - Functionality: "linting", "testing", "documentation"
313    /// - Domain: "web-dev", "data-science", "devops"
314    #[serde(default, skip_serializing_if = "Vec::is_empty")]
315    pub tags: Vec<String>,
316
317    /// Resource type classification.
318    ///
319    /// Currently supported types:
320    /// - "agent": Interactive Claude Code agents
321    /// - "snippet": Code snippets and templates
322    ///
323    /// This field uses `rename = "type"` to match the frontmatter format
324    /// while avoiding Rust's `type` keyword.
325    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
326    pub resource_type: Option<String>,
327
328    /// Dependencies for this resource.
329    ///
330    /// This field uses the structured transitive dependency format where
331    /// dependencies are organized by resource type (agents, snippets, etc.).
332    /// Each resource type maps to a list of dependency specifications.
333    ///
334    /// Example:
335    /// ```yaml
336    /// dependencies:
337    ///   agents:
338    ///     - path: agents/helper.md
339    ///       version: v1.0.0
340    ///   snippets:
341    ///     - path: snippets/utils.md
342    /// ```
343    #[serde(default, skip_serializing_if = "Option::is_none")]
344    pub dependencies: Option<HashMap<String, Vec<DependencySpec>>>,
345
346    /// Additional custom metadata fields.
347    ///
348    /// Any frontmatter fields not recognized by the standard schema are
349    /// preserved here. This allows resource authors to include custom
350    /// metadata without breaking compatibility with AGPM.
351    ///
352    /// Values are stored as `serde_json::Value` to handle mixed types
353    /// (strings, numbers, arrays, objects).
354    #[serde(flatten)]
355    pub extra: HashMap<String, serde_json::Value>,
356}
357
358/// A parsed Markdown document representing a Claude Code resource.
359///
360/// This is the core structure for handling Markdown files in AGPM. It provides
361/// a clean separation between structured metadata (from frontmatter) and the
362/// actual content, while preserving the original document format for roundtrip
363/// compatibility.
364///
365/// # Structure
366///
367/// A `MarkdownDocument` consists of three parts:
368/// 1. **Metadata**: Structured data from frontmatter (YAML or TOML)
369/// 2. **Content**: The main Markdown content without frontmatter
370/// 3. **Raw**: The complete original document for faithful reproduction
371///
372/// # Frontmatter Support
373///
374/// The document can parse both YAML (`---` delimiters) and TOML (`+++` delimiters)
375/// frontmatter formats. If no frontmatter is present, the entire file is treated
376/// as content.
377///
378/// # Content Extraction
379///
380/// When explicit metadata is not available, the document can extract information
381/// from the content itself using [`get_title`] and [`get_description`] methods.
382///
383/// # Thread Safety
384///
385/// This struct is `Clone` and can be safely passed between threads for
386/// concurrent processing of multiple documents.
387///
388/// # Examples
389///
390/// ## Reading from File
391///
392/// ```rust,no_run
393/// # use agpm_cli::markdown::MarkdownDocument;
394/// # use std::path::Path;
395/// # fn example() -> anyhow::Result<()> {
396/// let doc = MarkdownDocument::read(Path::new("agent.md"))?;
397///
398/// if let Some(metadata) = &doc.metadata {
399///     println!("Found metadata: {:?}", metadata.title);
400/// }
401///
402/// println!("Content length: {} chars", doc.content.len());
403/// # Ok(())
404/// # }
405/// ```
406///
407/// ## Creating Programmatically
408///
409/// ```rust,no_run
410/// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
411/// let metadata = MarkdownMetadata {
412///     title: Some("Test Agent".to_string()),
413///     version: Some("1.0.0".to_string()),
414///     ..Default::default()
415/// };
416///
417/// let content = "# Test Agent\n\nThis agent helps with testing.";
418/// let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
419///
420/// // Raw contains formatted frontmatter + content
421/// assert!(doc.raw.contains("title: Test Agent"));
422/// assert!(doc.raw.contains("This agent helps with testing"));
423/// ```
424///
425/// ## Modifying Content
426///
427/// ```rust,no_run
428/// # use agpm_cli::markdown::MarkdownDocument;
429/// let mut doc = MarkdownDocument::new("# Original".to_string());
430///
431/// // Update content - raw is automatically regenerated
432/// doc.set_content("# Updated Content\n\nNew description.".to_string());
433///
434/// assert_eq!(doc.content, "# Updated Content\n\nNew description.");
435/// assert_eq!(doc.raw, doc.content); // No frontmatter, so raw == content
436/// ```
437///
438/// [`get_title`]: MarkdownDocument::get_title
439/// [`get_description`]: MarkdownDocument::get_description
440#[derive(Debug, Clone)]
441pub struct MarkdownDocument {
442    /// Parsed metadata extracted from frontmatter.
443    ///
444    /// This will be `Some` if the document contained valid YAML or TOML
445    /// frontmatter, and `None` for plain Markdown files. The metadata
446    /// is used by AGPM for dependency resolution and resource management.
447    pub metadata: Option<MarkdownMetadata>,
448
449    /// The main Markdown content without frontmatter delimiters.
450    ///
451    /// This contains only the actual content portion of the document,
452    /// with frontmatter stripped away. This is what gets processed
453    /// for content-based metadata extraction.
454    pub content: String,
455
456    /// The complete original document including frontmatter.
457    ///
458    /// This field preserves the exact original format for faithful
459    /// reproduction when writing back to disk. When metadata or content
460    /// is modified, this field is automatically regenerated to maintain
461    /// consistency.
462    pub raw: String,
463}
464
465impl MarkdownDocument {
466    /// Create a new markdown document without frontmatter.
467    ///
468    /// This creates a plain Markdown document with no metadata. The content
469    /// becomes both the `content` and `raw` fields since there's no frontmatter
470    /// to format.
471    ///
472    /// # Arguments
473    ///
474    /// * `content` - The Markdown content as a string
475    ///
476    /// # Examples
477    ///
478    /// ```rust,no_run
479    /// # use agpm_cli::markdown::MarkdownDocument;
480    /// let doc = MarkdownDocument::new("# Hello\n\nWorld!".to_string());
481    ///
482    /// assert!(doc.metadata.is_none());
483    /// assert_eq!(doc.content, "# Hello\n\nWorld!");
484    /// assert_eq!(doc.raw, doc.content);
485    /// ```
486    #[must_use]
487    pub fn new(content: String) -> Self {
488        Self {
489            metadata: None,
490            content: content.clone(),
491            raw: content,
492        }
493    }
494
495    /// Create a markdown document with metadata and content.
496    ///
497    /// This constructor creates a complete document with structured metadata
498    /// in YAML frontmatter format. The `raw` field will contain the formatted
499    /// frontmatter followed by the content.
500    ///
501    /// # Arguments
502    ///
503    /// * `metadata` - The structured metadata for the document
504    /// * `content` - The Markdown content (without frontmatter)
505    ///
506    /// # Examples
507    ///
508    /// ```rust,no_run
509    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
510    /// let metadata = MarkdownMetadata {
511    ///     title: Some("Example".to_string()),
512    ///     version: Some("1.0.0".to_string()),
513    ///     ..Default::default()
514    /// };
515    ///
516    /// let doc = MarkdownDocument::with_metadata(
517    ///     metadata,
518    ///     "# Example\n\nThis is an example.".to_string()
519    /// );
520    ///
521    /// assert!(doc.metadata.is_some());
522    /// assert!(doc.raw.starts_with("---\n"));
523    /// assert!(doc.raw.contains("title: Example"));
524    /// ```
525    #[must_use]
526    pub fn with_metadata(metadata: MarkdownMetadata, content: String) -> Self {
527        let raw = Self::format_with_frontmatter(&metadata, &content);
528        Self {
529            metadata: Some(metadata),
530            content,
531            raw,
532        }
533    }
534
535    /// Read and parse a Markdown file from the filesystem.
536    ///
537    /// This method reads the entire file into memory and parses it for
538    /// frontmatter and content. It supports both YAML and TOML frontmatter
539    /// formats and provides detailed error context on failure.
540    ///
541    /// # Arguments
542    ///
543    /// * `path` - Path to the Markdown file to read
544    ///
545    /// # Returns
546    ///
547    /// Returns a `Result` containing the parsed document or an error with
548    /// context about what went wrong (file not found, parse error, etc.).
549    ///
550    /// # Errors
551    ///
552    /// This function will return an error if:
553    /// - The file cannot be read (doesn't exist, permissions, etc.)
554    /// - The file contains invalid UTF-8
555    /// - The frontmatter is malformed YAML or TOML
556    ///
557    /// # Examples
558    ///
559    /// ```rust,no_run
560    /// # use agpm_cli::markdown::MarkdownDocument;
561    /// # use std::path::Path;
562    /// # fn example() -> anyhow::Result<()> {
563    /// let doc = MarkdownDocument::read(Path::new("resources/agent.md"))?;
564    ///
565    /// println!("Title: {:?}", doc.get_title());
566    /// println!("Content length: {}", doc.content.len());
567    /// # Ok(())
568    /// # }
569    /// ```
570    pub fn read(path: &Path) -> Result<Self> {
571        let raw = fs::read_to_string(path)
572            .with_context(|| format!("Failed to read markdown file: {}", path.display()))?;
573
574        Self::parse(&raw)
575    }
576
577    /// Write the document to a file on disk.
578    ///
579    /// This method performs an atomic write operation, creating any necessary
580    /// parent directories automatically. The complete `raw` content (including
581    /// frontmatter if present) is written to the specified path.
582    ///
583    /// # Arguments
584    ///
585    /// * `path` - Target path where the file should be written
586    ///
587    /// # Returns
588    ///
589    /// Returns `Ok(())` on success, or an error with context on failure.
590    ///
591    /// # Errors
592    ///
593    /// This function will return an error if:
594    /// - Parent directories cannot be created (permissions, disk space, etc.)
595    /// - The file cannot be written (permissions, disk space, etc.)
596    /// - The path is invalid or inaccessible
597    ///
598    /// # Safety
599    ///
600    /// This operation creates parent directories as needed, which could
601    /// potentially create unexpected directory structures if the path
602    /// is not validated by the caller.
603    ///
604    /// # Examples
605    ///
606    /// ```rust,no_run
607    /// # use agpm_cli::markdown::MarkdownDocument;
608    /// # use std::path::Path;
609    /// # fn example() -> anyhow::Result<()> {
610    /// let doc = MarkdownDocument::new("# Test\n\nContent".to_string());
611    ///
612    /// // Writes to file, creating directories as needed
613    /// doc.write(Path::new("output/resources/test.md"))?;
614    /// # Ok(())
615    /// # }
616    /// ```
617    pub fn write(&self, path: &Path) -> Result<()> {
618        // Ensure parent directory exists
619        if let Some(parent) = path.parent() {
620            fs::create_dir_all(parent)
621                .with_context(|| format!("Failed to create directory: {}", parent.display()))?;
622        }
623
624        fs::write(path, &self.raw)
625            .with_context(|| format!("Failed to write markdown file: {}", path.display()))?;
626
627        Ok(())
628    }
629
630    /// Parse a Markdown string that may contain frontmatter with context for warnings.
631    ///
632    /// This is similar to [`parse`](Self::parse) but accepts an optional context string
633    /// that will be included in warning messages when preprocessing is required.
634    ///
635    /// # Arguments
636    ///
637    /// * `input` - The complete Markdown document as a string
638    /// * `context` - Optional context (e.g., file path) for warning messages
639    ///
640    /// # Returns
641    ///
642    /// Returns a parsed `MarkdownDocument`. If frontmatter parsing fails,
643    /// a warning is emitted and the entire document is treated as content.
644    pub fn parse_with_context(input: &str, context: Option<&str>) -> Result<Self> {
645        // Check for YAML frontmatter (starts with ---)
646        if (input.starts_with("---\n") || input.starts_with("---\r\n"))
647            && let Some(end_idx) = find_frontmatter_end(input)
648        {
649            let skip_size = if input.starts_with("---\r\n") {
650                5
651            } else {
652                4
653            };
654            let frontmatter = &input[skip_size..end_idx];
655            let content = input[end_idx..].trim_start_matches("---").trim_start();
656
657            // Try to parse YAML frontmatter with standard parser first
658            match serde_yaml::from_str::<MarkdownMetadata>(frontmatter) {
659                Ok(metadata) => {
660                    // Standard parsing succeeded
661                    return Ok(Self {
662                        metadata: Some(metadata),
663                        content: content.to_string(),
664                        raw: input.to_string(),
665                    });
666                }
667                Err(err) => {
668                    // Parsing failed - emit warning and treat entire document as content
669                    if let Some(ctx) = context {
670                        eprintln!(
671                            "⚠️  Warning: Unable to parse YAML frontmatter in '{ctx}'. \
672                            The document will be processed without metadata. Error: {err}"
673                        );
674                    } else {
675                        eprintln!(
676                            "⚠️  Warning: Unable to parse YAML frontmatter. \
677                            The document will be processed without metadata. Error: {err}"
678                        );
679                    }
680
681                    // Treat the entire document as content (including the invalid frontmatter)
682                    return Ok(Self {
683                        metadata: None,
684                        content: input.to_string(),
685                        raw: input.to_string(),
686                    });
687                }
688            }
689        }
690
691        // Check for TOML frontmatter (starts with +++)
692        if (input.starts_with("+++\n") || input.starts_with("+++\r\n"))
693            && let Some(end_idx) = find_toml_frontmatter_end(input)
694        {
695            let skip_size = if input.starts_with("+++\r\n") {
696                5
697            } else {
698                4
699            };
700            let frontmatter = &input[skip_size..end_idx];
701            let content = input[end_idx..].trim_start_matches("+++").trim_start();
702
703            // Try to parse TOML frontmatter
704            match toml::from_str::<MarkdownMetadata>(frontmatter) {
705                Ok(metadata) => {
706                    return Ok(Self {
707                        metadata: Some(metadata),
708                        content: content.to_string(),
709                        raw: input.to_string(),
710                    });
711                }
712                Err(err) => {
713                    // TOML parsing failed - emit warning and treat entire document as content
714                    if let Some(ctx) = context {
715                        eprintln!(
716                            "⚠️  Warning: Unable to parse TOML frontmatter in '{ctx}'. \
717                            The document will be processed without metadata. Error: {err}"
718                        );
719                    } else {
720                        eprintln!(
721                            "⚠️  Warning: Unable to parse TOML frontmatter. \
722                            The document will be processed without metadata. Error: {err}"
723                        );
724                    }
725
726                    // Treat the entire document as content (including the invalid frontmatter)
727                    return Ok(Self {
728                        metadata: None,
729                        content: input.to_string(),
730                        raw: input.to_string(),
731                    });
732                }
733            }
734        }
735
736        // No frontmatter, entire document is content
737        Ok(Self {
738            metadata: None,
739            content: input.to_string(),
740            raw: input.to_string(),
741        })
742    }
743
744    /// Parse a Markdown string that may contain frontmatter.
745    ///
746    /// This is the core parsing method that handles both YAML and TOML
747    /// frontmatter formats. It attempts to detect and parse frontmatter,
748    /// falling back to treating the entire input as content if no valid
749    /// frontmatter is found.
750    ///
751    /// # Supported Formats
752    ///
753    /// ## YAML Frontmatter (recommended)
754    /// ```text
755    /// ---
756    /// title: "Example"
757    /// version: "1.0.0"
758    /// ---
759    /// Content here...
760    /// ```
761    ///
762    /// ## TOML Frontmatter
763    /// ```text
764    /// +++
765    /// title = "Example"
766    /// version = "1.0.0"
767    /// +++
768    /// Content here...
769    /// ```
770    ///
771    /// # Arguments
772    ///
773    /// * `input` - The complete Markdown document as a string
774    ///
775    /// # Returns
776    ///
777    /// Returns a parsed `MarkdownDocument` with metadata extracted if present.
778    ///
779    /// # Errors
780    ///
781    /// Returns an error if the frontmatter is present but malformed:
782    /// - Invalid YAML syntax in `---` delimited frontmatter
783    /// - Invalid TOML syntax in `+++` delimited frontmatter
784    /// - Frontmatter that doesn't match the expected metadata schema
785    ///
786    /// # Examples
787    ///
788    /// ```rust,no_run
789    /// # use agpm_cli::markdown::MarkdownDocument;
790    /// // Parse document with YAML frontmatter
791    /// let input = "---\ntitle: Test\n---\n# Content";
792    /// let doc = MarkdownDocument::parse(input).unwrap();
793    /// assert!(doc.metadata.is_some());
794    ///
795    /// // Parse plain Markdown
796    /// let input = "# Just Content";
797    /// let doc = MarkdownDocument::parse(input).unwrap();
798    /// assert!(doc.metadata.is_none());
799    /// ```
800    pub fn parse(input: &str) -> Result<Self> {
801        Self::parse_with_context(input, None)
802    }
803
804    /// Format a document with YAML frontmatter
805    fn format_with_frontmatter(metadata: &MarkdownMetadata, content: &str) -> String {
806        let yaml = serde_yaml::to_string(metadata).unwrap_or_default();
807        format!("---\n{yaml}---\n\n{content}")
808    }
809
810    /// Update the document's metadata and regenerate the raw content.
811    ///
812    /// This method replaces the current metadata (if any) with new metadata
813    /// and automatically regenerates the `raw` field to include properly
814    /// formatted YAML frontmatter.
815    ///
816    /// # Arguments
817    ///
818    /// * `metadata` - The new metadata to set for this document
819    ///
820    /// # Effects
821    ///
822    /// - Sets `self.metadata` to `Some(metadata)`
823    /// - Regenerates `self.raw` with YAML frontmatter + content
824    /// - Preserves the existing `content` field unchanged
825    ///
826    /// # Examples
827    ///
828    /// ```rust,no_run
829    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
830    /// let mut doc = MarkdownDocument::new("# Test\n\nContent".to_string());
831    /// assert!(doc.metadata.is_none());
832    ///
833    /// let metadata = MarkdownMetadata {
834    ///     title: Some("New Title".to_string()),
835    ///     version: Some("2.0.0".to_string()),
836    ///     ..Default::default()
837    /// };
838    ///
839    /// doc.set_metadata(metadata);
840    /// assert!(doc.metadata.is_some());
841    /// assert!(doc.raw.contains("title: New Title"));
842    /// assert!(doc.raw.contains("# Test"));
843    /// ```
844    pub fn set_metadata(&mut self, metadata: MarkdownMetadata) {
845        self.raw = Self::format_with_frontmatter(&metadata, &self.content);
846        self.metadata = Some(metadata);
847    }
848
849    /// Update the document's content and regenerate the raw document.
850    ///
851    /// This method replaces the current content with new content and
852    /// automatically regenerates the `raw` field. If metadata is present,
853    /// the raw content will include formatted frontmatter; otherwise it
854    /// will be just the new content.
855    ///
856    /// # Arguments
857    ///
858    /// * `content` - The new Markdown content (without frontmatter)
859    ///
860    /// # Effects
861    ///
862    /// - Sets `self.content` to the new content
863    /// - Regenerates `self.raw` appropriately:
864    ///   - If metadata exists: frontmatter + new content
865    ///   - If no metadata: just the new content
866    /// - Preserves existing metadata unchanged
867    ///
868    /// # Examples
869    ///
870    /// ```rust,no_run
871    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
872    /// // Document with metadata
873    /// let metadata = MarkdownMetadata {
874    ///     title: Some("Test".to_string()),
875    ///     ..Default::default()
876    /// };
877    /// let mut doc = MarkdownDocument::with_metadata(
878    ///     metadata,
879    ///     "Original content".to_string()
880    /// );
881    ///
882    /// doc.set_content("# New Content\n\nUpdated!".to_string());
883    ///
884    /// assert_eq!(doc.content, "# New Content\n\nUpdated!");
885    /// assert!(doc.raw.contains("title: Test"));
886    /// assert!(doc.raw.contains("# New Content"));
887    /// ```
888    pub fn set_content(&mut self, content: String) {
889        if let Some(ref metadata) = self.metadata {
890            self.raw = Self::format_with_frontmatter(metadata, &content);
891        } else {
892            self.raw = content.clone();
893        }
894        self.content = content;
895    }
896
897    /// Extract the document title from metadata or content.
898    ///
899    /// This method provides a fallback mechanism for getting the document title:
900    /// 1. First, check if metadata contains an explicit title
901    /// 2. If not, scan the content for the first level-1 heading (`# Title`)
902    /// 3. Return `None` if neither source provides a title
903    ///
904    /// # Returns
905    ///
906    /// - `Some(String)` containing the title if found
907    /// - `None` if no title is available from either source
908    ///
909    /// # Title Extraction Rules
910    ///
911    /// When extracting from content:
912    /// - Only level-1 headings (starting with `# `) are considered
913    /// - The first matching heading is used
914    /// - Leading/trailing whitespace is trimmed from the result
915    /// - Empty headings (just `#`) are ignored
916    ///
917    /// # Examples
918    ///
919    /// ```rust,no_run
920    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
921    /// // From metadata
922    /// let metadata = MarkdownMetadata {
923    ///     title: Some("Metadata Title".to_string()),
924    ///     ..Default::default()
925    /// };
926    /// let doc = MarkdownDocument::with_metadata(
927    ///     metadata,
928    ///     "# Content Title\n\nSome text".to_string()
929    /// );
930    /// assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
931    ///
932    /// // From content heading
933    /// let doc = MarkdownDocument::new("# Extracted Title\n\nContent".to_string());
934    /// assert_eq!(doc.get_title(), Some("Extracted Title".to_string()));
935    ///
936    /// // No title available
937    /// let doc = MarkdownDocument::new("Just some content without headings".to_string());
938    /// assert_eq!(doc.get_title(), None);
939    /// ```
940    #[must_use]
941    pub fn get_title(&self) -> Option<String> {
942        // First check metadata
943        if let Some(ref metadata) = self.metadata
944            && let Some(ref title) = metadata.title
945        {
946            return Some(title.clone());
947        }
948
949        // Try to extract from first # heading
950        for line in self.content.lines() {
951            if let Some(heading) = line.strip_prefix("# ") {
952                return Some(heading.trim().to_string());
953            }
954        }
955
956        None
957    }
958
959    /// Extract the document description from metadata or content.
960    ///
961    /// This method provides a fallback mechanism for getting the document description:
962    /// 1. First, check if metadata contains an explicit description
963    /// 2. If not, extract the first paragraph from the content (after any headings)
964    /// 3. Return `None` if neither source provides a description
965    ///
966    /// # Returns
967    ///
968    /// - `Some(String)` containing the description if found
969    /// - `None` if no description is available from either source
970    ///
971    /// # Description Extraction Rules
972    ///
973    /// When extracting from content:
974    /// - All headings (lines starting with `#`) are skipped
975    /// - Empty lines before the first paragraph are ignored
976    /// - The first continuous block of non-empty lines becomes the description
977    /// - Multiple lines are joined with spaces
978    /// - Extraction stops at the first empty line after content starts
979    ///
980    /// # Examples
981    ///
982    /// ```rust,no_run
983    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
984    /// // From metadata
985    /// let metadata = MarkdownMetadata {
986    ///     description: Some("Metadata description".to_string()),
987    ///     ..Default::default()
988    /// };
989    /// let doc = MarkdownDocument::with_metadata(
990    ///     metadata,
991    ///     "# Title\n\nContent description".to_string()
992    /// );
993    /// assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
994    ///
995    /// // From content paragraph
996    /// let doc = MarkdownDocument::new(
997    ///     "# Title\n\nThis is the first\nparagraph of content.\n\nSecond paragraph.".to_string()
998    /// );
999    /// assert_eq!(doc.get_description(), Some("This is the first paragraph of content.".to_string()));
1000    ///
1001    /// // No description available  
1002    /// let doc = MarkdownDocument::new("# Just a title".to_string());
1003    /// assert_eq!(doc.get_description(), None);
1004    /// ```
1005    #[must_use]
1006    pub fn get_description(&self) -> Option<String> {
1007        // First check metadata
1008        if let Some(ref metadata) = self.metadata
1009            && let Some(ref desc) = metadata.description
1010        {
1011            return Some(desc.clone());
1012        }
1013
1014        // Try to extract first non-heading paragraph
1015        let mut in_paragraph = false;
1016        let mut paragraph = String::new();
1017
1018        for line in self.content.lines() {
1019            let trimmed = line.trim();
1020
1021            // Skip headings and empty lines at start
1022            if trimmed.starts_with('#') || (trimmed.is_empty() && !in_paragraph) {
1023                continue;
1024            }
1025
1026            // Start collecting paragraph
1027            if !trimmed.is_empty() {
1028                in_paragraph = true;
1029                if !paragraph.is_empty() {
1030                    paragraph.push(' ');
1031                }
1032                paragraph.push_str(trimmed);
1033            } else if in_paragraph {
1034                // End of first paragraph
1035                break;
1036            }
1037        }
1038
1039        if paragraph.is_empty() {
1040            None
1041        } else {
1042            Some(paragraph)
1043        }
1044    }
1045}
1046
1047/// Find the end position of YAML frontmatter in a document.
1048///
1049/// This helper function scans through a document that starts with YAML
1050/// frontmatter (delimited by `---`) to find where the closing delimiter
1051/// occurs. It returns the byte position of the closing delimiter.
1052///
1053/// # Arguments
1054///
1055/// * `input` - The document content starting with `---`
1056///
1057/// # Returns
1058///
1059/// - `Some(usize)` - Byte position of the closing `---` delimiter
1060/// - `None` - If no closing delimiter is found
1061///
1062/// # Implementation Notes
1063///
1064/// - Assumes the input starts with the opening `---` delimiter
1065/// - Counts bytes, not characters, for proper string slicing
1066/// - Accounts for newline characters in position calculation
1067fn find_frontmatter_end(input: &str) -> Option<usize> {
1068    // Handle both Unix (LF) and Windows (CRLF) line endings
1069    let has_crlf = input.contains("\r\n");
1070    let initial_skip = if has_crlf {
1071        5
1072    } else {
1073        4
1074    }; // "---\r\n" or "---\n"
1075
1076    let mut lines = input.lines();
1077    lines.next()?; // Skip first ---
1078
1079    let mut pos = initial_skip;
1080    for line in lines {
1081        if line == "---" {
1082            return Some(pos);
1083        }
1084        // Account for actual line ending bytes (CRLF = 2, LF = 1)
1085        let line_ending_size = if has_crlf {
1086            2
1087        } else {
1088            1
1089        };
1090        pos += line.len() + line_ending_size;
1091    }
1092
1093    None
1094}
1095
1096/// Find the end position of TOML frontmatter in a document.
1097///
1098/// This helper function scans through a document that starts with TOML
1099/// frontmatter (delimited by `+++`) to find where the closing delimiter
1100/// occurs. It returns the byte position of the closing delimiter.
1101///
1102/// # Arguments
1103///
1104/// * `input` - The document content starting with `+++`
1105///
1106/// # Returns
1107///
1108/// - `Some(usize)` - Byte position of the closing `+++` delimiter
1109/// - `None` - If no closing delimiter is found
1110///
1111/// # Implementation Notes
1112///
1113/// - Assumes the input starts with the opening `+++` delimiter
1114/// - Counts bytes, not characters, for proper string slicing
1115/// - Accounts for newline characters in position calculation
1116fn find_toml_frontmatter_end(input: &str) -> Option<usize> {
1117    // Handle both Unix (LF) and Windows (CRLF) line endings
1118    let has_crlf = input.contains("\r\n");
1119    let initial_skip = if has_crlf {
1120        5
1121    } else {
1122        4
1123    }; // "+++\r\n" or "+++\n"
1124
1125    let mut lines = input.lines();
1126    lines.next()?; // Skip first +++
1127
1128    let mut pos = initial_skip;
1129    for line in lines {
1130        if line == "+++" {
1131            return Some(pos);
1132        }
1133        // Account for actual line ending bytes (CRLF = 2, LF = 1)
1134        let line_ending_size = if has_crlf {
1135            2
1136        } else {
1137            1
1138        };
1139        pos += line.len() + line_ending_size;
1140    }
1141
1142    None
1143}
1144
1145/// Check if a path represents a Markdown file based on its extension.
1146///
1147/// This function validates file paths to determine if they should be treated
1148/// as Markdown files. It performs case-insensitive extension checking to
1149/// support different naming conventions across platforms.
1150///
1151/// # Supported Extensions
1152///
1153/// - `.md` (most common)
1154/// - `.markdown` (verbose form)
1155/// - Case variations: `.MD`, `.Markdown`, etc.
1156///
1157/// # Arguments
1158///
1159/// * `path` - The file path to check
1160///
1161/// # Returns
1162///
1163/// - `true` if the file has a recognized Markdown extension
1164/// - `false` otherwise (including files with no extension)
1165///
1166/// # Examples
1167///
1168/// ```rust,no_run
1169/// # use agpm_cli::markdown::is_markdown_file;
1170/// # use std::path::Path;
1171/// assert!(is_markdown_file(Path::new("agent.md")));
1172/// assert!(is_markdown_file(Path::new("README.MD")));
1173/// assert!(is_markdown_file(Path::new("guide.markdown")));
1174/// assert!(!is_markdown_file(Path::new("config.toml")));
1175/// assert!(!is_markdown_file(Path::new("script.sh")));
1176/// assert!(!is_markdown_file(Path::new("no-extension")));
1177/// ```
1178#[must_use]
1179pub fn is_markdown_file(path: &Path) -> bool {
1180    path.extension()
1181        .and_then(|ext| ext.to_str())
1182        .is_some_and(|ext| ext.eq_ignore_ascii_case("md") || ext.eq_ignore_ascii_case("markdown"))
1183}
1184
1185/// Recursively find all Markdown files in a directory.
1186///
1187/// This function performs a recursive traversal of the given directory,
1188/// collecting all files that have Markdown extensions. It follows symbolic
1189/// links and handles filesystem errors gracefully.
1190///
1191/// # Directory Traversal
1192///
1193/// - Recursively traverses all subdirectories
1194/// - Follows symbolic links (may cause infinite loops with circular links)
1195/// - Silently skips entries that cannot be accessed
1196/// - Only includes regular files (not directories or special files)
1197///
1198/// # Arguments
1199///
1200/// * `dir` - The directory path to search
1201///
1202/// # Returns
1203///
1204/// - `Ok(Vec<PathBuf>)` - List of absolute paths to Markdown files
1205/// - `Err(...)` - Only on severe filesystem errors (rare)
1206///
1207/// # Behavior
1208///
1209/// - Returns empty vector if directory doesn't exist (not an error)
1210/// - Files are returned in filesystem order (not sorted)
1211/// - Paths are absolute and canonicalized
1212/// - Uses [`is_markdown_file`] for extension validation
1213///
1214/// # Examples
1215///
1216/// ```rust,no_run
1217/// # use agpm_cli::markdown::list_markdown_files;
1218/// # use std::path::Path;
1219/// # fn example() -> anyhow::Result<()> {
1220/// let files = list_markdown_files(Path::new("resources/"))?;
1221///
1222/// for file in files {
1223///     println!("Found: {}", file.display());
1224/// }
1225/// # Ok(())
1226/// # }
1227/// ```
1228///
1229/// # Performance
1230///
1231/// This function loads directory metadata but not file contents, making it
1232/// suitable for scanning large directory trees. For processing the files,
1233/// consider using [`MarkdownDocument::read`] on each result.
1234///
1235/// [`is_markdown_file`]: is_markdown_file
1236/// [`MarkdownDocument::read`]: MarkdownDocument::read
1237pub fn list_markdown_files(dir: &Path) -> Result<Vec<std::path::PathBuf>> {
1238    let mut files = Vec::new();
1239
1240    if !dir.exists() {
1241        return Ok(files);
1242    }
1243
1244    for entry in walkdir::WalkDir::new(dir)
1245        .follow_links(true)
1246        .into_iter()
1247        .filter_map(std::result::Result::ok)
1248    {
1249        let path = entry.path();
1250        if path.is_file() && is_markdown_file(path) {
1251            files.push(path.to_path_buf());
1252        }
1253    }
1254
1255    Ok(files)
1256}
1257
1258#[cfg(test)]
1259mod tests {
1260    use super::*;
1261    use tempfile::tempdir;
1262
1263    #[test]
1264    fn test_markdown_document_new() {
1265        let doc = MarkdownDocument::new("# Hello World".to_string());
1266        assert!(doc.metadata.is_none());
1267        assert_eq!(doc.content, "# Hello World");
1268        assert_eq!(doc.raw, "# Hello World");
1269    }
1270
1271    #[test]
1272    fn test_markdown_with_yaml_frontmatter() {
1273        let input = r"---
1274title: Test Document
1275description: A test document
1276tags:
1277  - test
1278  - example
1279---
1280
1281# Hello World
1282
1283This is the content.";
1284
1285        let doc = MarkdownDocument::parse(input).unwrap();
1286        assert!(doc.metadata.is_some());
1287
1288        let metadata = doc.metadata.unwrap();
1289        assert_eq!(metadata.title, Some("Test Document".to_string()));
1290        assert_eq!(metadata.description, Some("A test document".to_string()));
1291        assert_eq!(metadata.tags, vec!["test", "example"]);
1292
1293        assert!(doc.content.starts_with("# Hello World"));
1294    }
1295
1296    #[test]
1297    fn test_markdown_with_toml_frontmatter() {
1298        let input = r#"+++
1299title = "Test Document"
1300description = "A test document"
1301tags = ["test", "example"]
1302+++
1303
1304# Hello World
1305
1306This is the content."#;
1307
1308        let doc = MarkdownDocument::parse(input).unwrap();
1309        assert!(doc.metadata.is_some());
1310
1311        let metadata = doc.metadata.unwrap();
1312        assert_eq!(metadata.title, Some("Test Document".to_string()));
1313        assert_eq!(metadata.description, Some("A test document".to_string()));
1314        assert_eq!(metadata.tags, vec!["test", "example"]);
1315    }
1316
1317    #[test]
1318    fn test_markdown_without_frontmatter() {
1319        let input = "# Hello World\n\nThis is the content.";
1320
1321        let doc = MarkdownDocument::parse(input).unwrap();
1322        assert!(doc.metadata.is_none());
1323        assert_eq!(doc.content, input);
1324    }
1325
1326    #[test]
1327    fn test_get_title() {
1328        // From metadata
1329        let metadata = MarkdownMetadata {
1330            title: Some("Metadata Title".to_string()),
1331            ..Default::default()
1332        };
1333        let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1334        assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
1335
1336        // From heading
1337        let doc = MarkdownDocument::new("# Heading Title\n\nContent".to_string());
1338        assert_eq!(doc.get_title(), Some("Heading Title".to_string()));
1339
1340        // No title
1341        let doc = MarkdownDocument::new("Just content".to_string());
1342        assert_eq!(doc.get_title(), None);
1343    }
1344
1345    #[test]
1346    fn test_get_description() {
1347        // From metadata
1348        let metadata = MarkdownMetadata {
1349            description: Some("Metadata description".to_string()),
1350            ..Default::default()
1351        };
1352        let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1353        assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
1354
1355        // From first paragraph
1356        let doc = MarkdownDocument::new(
1357            "# Title\n\nThis is the first paragraph.\n\nSecond paragraph.".to_string(),
1358        );
1359        assert_eq!(doc.get_description(), Some("This is the first paragraph.".to_string()));
1360    }
1361
1362    #[test]
1363    fn test_read_write_markdown() {
1364        let temp = tempdir().unwrap();
1365        let file_path = temp.path().join("test.md");
1366
1367        // Create and write document
1368        let metadata = MarkdownMetadata {
1369            title: Some("Test".to_string()),
1370            ..Default::default()
1371        };
1372        let doc = MarkdownDocument::with_metadata(metadata, "# Test\n\nContent".to_string());
1373        doc.write(&file_path).unwrap();
1374
1375        // Read back
1376        let loaded = MarkdownDocument::read(&file_path).unwrap();
1377        assert!(loaded.metadata.is_some());
1378        assert_eq!(loaded.metadata.unwrap().title, Some("Test".to_string()));
1379        assert!(loaded.content.contains("# Test"));
1380    }
1381
1382    #[test]
1383    fn test_is_markdown_file() {
1384        assert!(is_markdown_file(Path::new("test.md")));
1385        assert!(is_markdown_file(Path::new("test.MD")));
1386        assert!(is_markdown_file(Path::new("test.markdown")));
1387        assert!(is_markdown_file(Path::new("test.MARKDOWN")));
1388        assert!(!is_markdown_file(Path::new("test.txt")));
1389        assert!(!is_markdown_file(Path::new("test")));
1390    }
1391
1392    #[test]
1393    fn test_list_markdown_files() {
1394        let temp = tempdir().unwrap();
1395
1396        // Create some files
1397        std::fs::write(temp.path().join("file1.md"), "content").unwrap();
1398        std::fs::write(temp.path().join("file2.markdown"), "content").unwrap();
1399        std::fs::write(temp.path().join("file3.txt"), "content").unwrap();
1400
1401        let subdir = temp.path().join("subdir");
1402        std::fs::create_dir(&subdir).unwrap();
1403        std::fs::write(subdir.join("file4.md"), "content").unwrap();
1404
1405        let files = list_markdown_files(temp.path()).unwrap();
1406        assert_eq!(files.len(), 3);
1407
1408        let names: Vec<String> =
1409            files.iter().map(|p| p.file_name().unwrap().to_string_lossy().to_string()).collect();
1410
1411        assert!(names.contains(&"file1.md".to_string()));
1412        assert!(names.contains(&"file2.markdown".to_string()));
1413        assert!(names.contains(&"file4.md".to_string()));
1414        assert!(!names.contains(&"file3.txt".to_string()));
1415    }
1416
1417    #[test]
1418    fn test_set_metadata_and_content() {
1419        let mut doc = MarkdownDocument::new("Initial content".to_string());
1420
1421        // Set metadata
1422        let metadata = MarkdownMetadata {
1423            title: Some("New Title".to_string()),
1424            ..Default::default()
1425        };
1426        doc.set_metadata(metadata);
1427
1428        assert!(doc.metadata.is_some());
1429        assert!(doc.raw.contains("title: New Title"));
1430        assert!(doc.raw.contains("Initial content"));
1431
1432        // Set content
1433        doc.set_content("Updated content".to_string());
1434        assert_eq!(doc.content, "Updated content");
1435        assert!(doc.raw.contains("Updated content"));
1436        assert!(doc.raw.contains("title: New Title"));
1437    }
1438
1439    #[test]
1440    fn test_invalid_frontmatter_with_escaped_newlines() {
1441        // Content with invalid YAML frontmatter (literal \n that isn't properly quoted)
1442        let input = r#"---
1443name: haiku-syntax-tool
1444description: Use this agent when you need to fix linting errors, formatting issues, type checking problems, or ensure code adheres to project-specific standards. This agent specializes in enforcing language-specific conventions, project style guides, and maintaining code quality through automated fixes. Examples:\n\n<example>\nContext: The user has just written a new Python function and wants to ensure it meets project standards.\nuser: "I've added a new sync handler function"\nassistant: "Let me review this with the code-standards-enforcer agent to ensure it meets our project standards"\n<commentary>\nSince new code was written, use the Task tool to launch the code-standards-enforcer agent to check for linting, formatting, and type issues according to CLAUDE.md standards.\n</commentary>\n</example>\n\n<example>\nContext: The user encounters linting errors during CI/CD.\nuser: "The CI pipeline is failing due to formatting issues"\nassistant: "I'll use the code-standards-enforcer agent to fix these formatting and linting issues"\n<commentary>\nWhen there are explicit linting or formatting problems, use the code-standards-enforcer agent to automatically fix them according to project standards.\n</commentary>\n</example>\n\n<example>\nContext: The user wants to ensure type hints are correct.\nuser: "Can you check if my type annotations are correct in the API module?"\nassistant: "I'll launch the code-standards-enforcer agent to verify and fix any type annotation issues"\n<commentary>\nFor type checking and annotation verification, use the code-standards-enforcer agent to ensure compliance with project typing standards.\n</commentary>\n</example>
1445model: haiku
1446---
1447
1448You are a meticulous code standards enforcement specialist"#;
1449
1450        // This should succeed but treat the entire document as content (no metadata)
1451        let result = MarkdownDocument::parse(input);
1452        match result {
1453            Ok(doc) => {
1454                // Invalid frontmatter means no metadata
1455                assert!(doc.metadata.is_none());
1456                // The entire document should be treated as content
1457                assert!(doc.content.contains("---"));
1458                assert!(doc.content.contains("name: haiku-syntax-tool"));
1459                assert!(doc.content.contains("description: Use this agent"));
1460                assert!(doc.content.contains("model: haiku"));
1461                assert!(doc.content.contains("meticulous code standards enforcement specialist"));
1462            }
1463            Err(e) => {
1464                panic!("Should not fail, but got error: {}", e);
1465            }
1466        }
1467    }
1468
1469    #[test]
1470    fn test_completely_invalid_frontmatter_fallback() {
1471        // Test with completely broken YAML
1472        let input = r#"---
1473name: test
1474description: {this is not valid yaml at all
1475model: test
1476---
1477
1478Content here"#;
1479
1480        // This should now succeed but without metadata
1481        let result = MarkdownDocument::parse(input);
1482        match result {
1483            Ok(doc) => {
1484                // Should treat entire document as content when frontmatter is invalid
1485                assert!(doc.metadata.is_none());
1486                assert!(doc.content.contains("---"));
1487                assert!(doc.content.contains("name: test"));
1488                assert!(doc.content.contains("Content here"));
1489            }
1490            Err(e) => {
1491                panic!("Should not fail, but got error: {}", e);
1492            }
1493        }
1494    }
1495}