agpm_cli/markdown/
mod.rs

1//! Markdown file operations and metadata extraction for Claude Code resources.
2//!
3//! This module provides comprehensive support for reading, writing, and manipulating
4//! Markdown files that contain Claude Code agents and snippets. It handles both
5//! plain Markdown files and files with structured metadata in frontmatter.
6//!
7//! # Overview
8//!
9//! The markdown module is a core component of AGPM that:
10//! - Parses Markdown files with optional YAML or TOML frontmatter
11//! - Extracts structured metadata for dependency resolution
12//! - Preserves document structure during read/write operations
13//! - Provides utilities for file discovery and validation
14//! - Supports atomic file operations for safe installation
15//! - Extracts and validates file references within markdown content
16//!
17//! # Supported File Formats
18//!
19//! ## Plain Markdown Files
20//!
21//! Standard Markdown files without frontmatter are fully supported:
22//!
23//! ```markdown
24//! # Python Code Reviewer
25//!
26//! This agent specializes in reviewing Python code for:
27//! - PEP 8 compliance
28//! - Security vulnerabilities
29//! - Performance optimizations
30//!
31//! ## Usage
32//!
33//! When reviewing code, I will...
34//! ```
35//!
36//! ## YAML Frontmatter Format
37//!
38//! Files can include YAML frontmatter for structured metadata:
39//!
40//! ```markdown
41//! ---
42//! title: "Python Code Reviewer"
43//! description: "Specialized agent for Python code quality review"
44//! version: "2.1.0"
45//! author: "Claude Code Team"
46//! type: "agent"
47//! tags:
48//!   - "python"
49//!   - "code-review"
50//!   - "quality"
51//! dependencies:
52//!   agents:
53//!     - path: agents/syntax-checker.md
54//!   snippets:
55//!     - path: snippets/security-scanner.md
56//! ---
57//!
58//! # Python Code Reviewer
59//!
60//! This agent specializes in reviewing Python code...
61//! ```
62//!
63//! ## TOML Frontmatter Format
64//!
65//! TOML frontmatter is also supported using `+++` delimiters:
66//!
67//! ```text
68//! +++
69//! title = "JavaScript Snippet Collection"
70//! description = "Useful JavaScript utilities and helpers"
71//! version = "1.0.0"
72//! author = "Community Contributors"
73//! type = "snippet"
74//! tags = ["javascript", "utilities", "helpers"]
75//! +++
76//!
77//! # JavaScript Snippet Collection
78//!
79//! ## Array Utilities
80//!
81//! ```javascript
82//! function unique(arr) {
83//!     return [...new Set(arr)];
84//! }
85//! ```
86//!
87//! # Metadata Schema
88//!
89//! The frontmatter metadata follows this schema:
90//!
91//! | Field | Type | Description | Required |
92//! |-------|------|-------------|----------|
93//! | title | string | Human-readable resource title | No |
94//! | description | string | Brief description of the resource | No |
95//! | version | string | Resource version (semver recommended) | No |
96//! | author | string | Author name or organization | No |
97//! | type | string | Resource type ("agent" or "snippet") | No |
98//! | tags | array | Tags for categorization | No |
99//! | dependencies | object | Structured dependencies by resource type | No |
100//!
101//! Additional custom fields are preserved in the extra map.
102//!
103//! # Content Extraction
104//!
105//! When metadata is not explicitly provided in frontmatter, the module
106//! can extract information from the Markdown content:
107//!
108//! - **Title**: Extracted from the first level-1 heading in the content
109//! - **Description**: Extracted from the first paragraph after headings
110//!
111//! This allows resources to work without frontmatter while still providing
112//! useful metadata for dependency resolution and display.
113//!
114//! # File Operations
115//!
116//! All file operations are designed to be safe and atomic:
117//! - Parent directories are created automatically during writes
118//! - Content is validated during parsing to catch errors early  
119//! - File extensions are validated (.md, .markdown)
120//! - Recursive directory traversal for bulk operations
121//!
122//! # Usage Examples
123//!
124//! ## Basic Reading and Writing
125//!
126//! ```rust,no_run
127//! use agpm_cli::markdown::MarkdownDocument;
128//! use std::path::Path;
129//!
130//! # fn example() -> anyhow::Result<()> {
131//! // Read a markdown file
132//! let doc = MarkdownDocument::read(Path::new("agents/reviewer.md"))?;
133//!
134//! // Access metadata
135//! if let Some(metadata) = &doc.metadata {
136//!     println!("Title: {:?}", metadata.title);
137//!     println!("Version: {:?}", metadata.version);
138//!     println!("Tags: {:?}", metadata.tags);
139//! }
140//!
141//! // Extract title from content if not in metadata
142//! if let Some(title) = doc.get_title() {
143//!     println!("Extracted title: {}", title);
144//! }
145//!
146//! // Write to a new location
147//! doc.write(Path::new("installed/reviewer.md"))?;
148//! # Ok(())
149//! # }
150//! ```
151//!
152//! ## Creating Documents Programmatically
153//!
154//! ```rust,no_run
155//! use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
156//!
157//! # fn example() -> anyhow::Result<()> {
158//! // Create metadata
159//! let mut metadata = MarkdownMetadata::default();
160//! metadata.title = Some("Custom Agent".to_string());
161//! metadata.version = Some("1.0.0".to_string());
162//! metadata.tags = vec!["custom".to_string(), "utility".to_string()];
163//!
164//! // Create document with metadata
165//! let content = "# Custom Agent\n\nThis is a custom agent...";
166//! let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
167//!
168//! // The raw field contains formatted frontmatter + content
169//! println!("{}", doc.raw);
170//! # Ok(())
171//! # }
172//! ```
173//!
174//! ## Batch File Processing
175//!
176//! ```rust,no_run
177//! use agpm_cli::markdown::{list_markdown_files, MarkdownDocument};
178//! use std::path::Path;
179//!
180//! # fn example() -> anyhow::Result<()> {
181//! // Find all markdown files in a directory
182//! let files = list_markdown_files(Path::new("resources/"))?;
183//!
184//! for file in files {
185//!     let doc = MarkdownDocument::read(&file)?;
186//!     
187//!     if let Some(title) = doc.get_title() {
188//!         println!("{}: {}", file.display(), title);
189//!     }
190//! }
191//! # Ok(())
192//! # }
193//! ```
194//!
195//! # Integration with AGPM
196//!
197//! This module integrates with other AGPM components:
198//!
199//! - `crate::manifest`: Uses metadata for dependency resolution
200//! - `crate::lockfile`: Stores checksums and installation paths  
201//! - `crate::source`: Handles remote resource fetching
202//! - `crate::core`: Provides core types and error handling
203//!
204//! See the respective module documentation for integration details.
205
206pub mod reference_extractor;
207
208use anyhow::{Context, Result};
209use serde::{Deserialize, Serialize};
210use std::collections::HashMap;
211use std::fs;
212use std::path::Path;
213
214use crate::manifest::DependencySpec;
215
216/// Emits a warning when frontmatter parsing fails.
217///
218/// This helper function provides consistent, helpful error messages when
219/// YAML or TOML frontmatter cannot be parsed. It explains the consequences
220/// and provides guidance on fixing common issues.
221///
222/// # Arguments
223///
224/// * `format_type` - The frontmatter format ("YAML" or "TOML")
225/// * `context` - Optional context string (e.g., file path) for the warning
226/// * `error` - The parsing error to display
227fn emit_frontmatter_parse_warning(
228    format_type: &str,
229    context: Option<&str>,
230    error: &impl std::fmt::Display,
231) {
232    let location = context.map(|c| format!(" in '{c}'")).unwrap_or_default();
233
234    let field_term = match format_type {
235        "YAML" => "objects",
236        "TOML" => "tables",
237        _ => "structures",
238    };
239
240    eprintln!("⚠️  Warning: Unable to parse {format_type} frontmatter{location}.");
241    eprintln!();
242    eprintln!("The document will be processed without metadata, and any declared dependencies");
243    eprintln!("will NOT be resolved or installed.");
244    eprintln!();
245    eprintln!("Parse error: {error}");
246    eprintln!();
247    eprintln!("If you're declaring dependencies, ensure they use the correct structured format.");
248    eprintln!(
249        "Dependencies must be {field_term} with 'path' and optional 'version' fields, not plain strings."
250    );
251    eprintln!();
252    eprintln!("For the correct dependency format, see:");
253    eprintln!("https://github.com/aig787/agpm#transitive-dependencies");
254}
255
256/// Type alias for [`MarkdownDocument`] for backward compatibility.
257///
258/// This alias exists to provide a consistent naming convention and maintain
259/// backward compatibility with existing code that might use `MarkdownFile`.
260/// New code should prefer using [`MarkdownDocument`] directly.
261///
262/// # Examples
263///
264/// ```rust,no_run
265/// # use agpm_cli::markdown::{MarkdownFile, MarkdownDocument};
266/// // These are equivalent
267/// let doc1 = MarkdownDocument::new("content".to_string());
268/// let doc2 = MarkdownFile::new("content".to_string());
269///
270/// assert_eq!(doc1.content, doc2.content);
271/// ```
272pub type MarkdownFile = MarkdownDocument;
273
274/// Structured metadata extracted from Markdown frontmatter.
275///
276/// This struct represents all the metadata that can be parsed from YAML or TOML
277/// frontmatter in Markdown files. It follows a flexible schema that accommodates
278/// both standard AGPM fields and custom extensions.
279///
280/// # Standard Fields
281///
282/// The following fields have special meaning in AGPM:
283/// - `title`: Human-readable name for the resource
284/// - `description`: Brief explanation of what the resource does
285/// - `version`: Version identifier (semantic versioning recommended)
286/// - `author`: Creator or maintainer information
287/// - `resource_type`: Type classification ("agent" or "snippet")
288/// - `tags`: Categorization labels for filtering and discovery
289/// - `dependencies`: Structured dependencies for transitive resolution
290///
291/// # Custom Fields
292///
293/// Additional fields are preserved in the `extra` map, allowing resource
294/// authors to include custom metadata without breaking compatibility.
295///
296/// # Serialization
297///
298/// The struct uses Serde for serialization with skip-if-empty optimizations
299/// to keep generated frontmatter clean. Empty collections and None values
300/// are omitted from the output.
301///
302/// # Example
303///
304/// ```rust,no_run
305/// # use agpm_cli::markdown::MarkdownMetadata;
306/// # use std::collections::HashMap;
307/// let mut metadata = MarkdownMetadata::default();
308/// metadata.title = Some("Python Linter".to_string());
309/// metadata.version = Some("2.0.1".to_string());
310/// metadata.tags = vec!["python".to_string(), "linting".to_string()];
311/// // Dependencies can be set as a JSON value for the structured format
312/// // This is typically parsed from frontmatter rather than set programmatically
313///
314/// // Custom fields via extra map
315/// let mut extra = HashMap::new();
316/// extra.insert("license".to_string(), "MIT".into());
317/// extra.insert("min_python".to_string(), "3.8".into());
318/// metadata.extra = extra;
319/// ```
320#[derive(Debug, Clone, Default, Serialize, Deserialize)]
321pub struct MarkdownMetadata {
322    /// Human-readable title of the resource.
323    ///
324    /// This is displayed in listings and used for resource identification.
325    /// If not provided, the title may be extracted from the first heading
326    /// in the Markdown content.
327    #[serde(skip_serializing_if = "Option::is_none")]
328    pub title: Option<String>,
329
330    /// Brief description explaining what the resource does.
331    ///
332    /// Used for documentation and resource discovery. If not provided,
333    /// the description may be extracted from the first paragraph in
334    /// the Markdown content.
335    #[serde(skip_serializing_if = "Option::is_none")]
336    pub description: Option<String>,
337
338    /// Version identifier for the resource.
339    ///
340    /// Semantic versioning (e.g., "1.2.3") is recommended for compatibility
341    /// with dependency resolution, but any string format is accepted.
342    #[serde(skip_serializing_if = "Option::is_none")]
343    pub version: Option<String>,
344
345    /// Author or maintainer information.
346    ///
347    /// Can be a name, organization, or contact information. Free-form text.
348    #[serde(skip_serializing_if = "Option::is_none")]
349    pub author: Option<String>,
350
351    /// Classification tags for categorization and filtering.
352    ///
353    /// Tags help with resource discovery and organization. Common patterns:
354    /// - Language-specific: "python", "javascript", "rust"
355    /// - Functionality: "linting", "testing", "documentation"
356    /// - Domain: "web-dev", "data-science", "devops"
357    #[serde(default, skip_serializing_if = "Vec::is_empty")]
358    pub tags: Vec<String>,
359
360    /// Resource type classification.
361    ///
362    /// Currently supported types:
363    /// - "agent": Interactive Claude Code agents
364    /// - "snippet": Code snippets and templates
365    ///
366    /// This field uses `rename = "type"` to match the frontmatter format
367    /// while avoiding Rust's `type` keyword.
368    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
369    pub resource_type: Option<String>,
370
371    /// Dependencies for this resource.
372    ///
373    /// This field uses the structured transitive dependency format where
374    /// dependencies are organized by resource type (agents, snippets, etc.).
375    /// Each resource type maps to a list of dependency specifications.
376    ///
377    /// Example:
378    /// ```yaml
379    /// dependencies:
380    ///   agents:
381    ///     - path: agents/helper.md
382    ///       version: v1.0.0
383    ///   snippets:
384    ///     - path: snippets/utils.md
385    /// ```
386    #[serde(default, skip_serializing_if = "Option::is_none")]
387    pub dependencies: Option<HashMap<String, Vec<DependencySpec>>>,
388
389    /// Additional custom metadata fields.
390    ///
391    /// Any frontmatter fields not recognized by the standard schema are
392    /// preserved here. This allows resource authors to include custom
393    /// metadata without breaking compatibility with AGPM.
394    ///
395    /// Values are stored as `serde_json::Value` to handle mixed types
396    /// (strings, numbers, arrays, objects).
397    #[serde(flatten)]
398    pub extra: HashMap<String, serde_json::Value>,
399}
400
401/// A parsed Markdown document representing a Claude Code resource.
402///
403/// This is the core structure for handling Markdown files in AGPM. It provides
404/// a clean separation between structured metadata (from frontmatter) and the
405/// actual content, while preserving the original document format for roundtrip
406/// compatibility.
407///
408/// # Structure
409///
410/// A `MarkdownDocument` consists of three parts:
411/// 1. **Metadata**: Structured data from frontmatter (YAML or TOML)
412/// 2. **Content**: The main Markdown content without frontmatter
413/// 3. **Raw**: The complete original document for faithful reproduction
414///
415/// # Frontmatter Support
416///
417/// The document can parse both YAML (`---` delimiters) and TOML (`+++` delimiters)
418/// frontmatter formats. If no frontmatter is present, the entire file is treated
419/// as content.
420///
421/// # Content Extraction
422///
423/// When explicit metadata is not available, the document can extract information
424/// from the content itself using [`get_title`] and [`get_description`] methods.
425///
426/// # Thread Safety
427///
428/// This struct is `Clone` and can be safely passed between threads for
429/// concurrent processing of multiple documents.
430///
431/// # Examples
432///
433/// ## Reading from File
434///
435/// ```rust,no_run
436/// # use agpm_cli::markdown::MarkdownDocument;
437/// # use std::path::Path;
438/// # fn example() -> anyhow::Result<()> {
439/// let doc = MarkdownDocument::read(Path::new("agent.md"))?;
440///
441/// if let Some(metadata) = &doc.metadata {
442///     println!("Found metadata: {:?}", metadata.title);
443/// }
444///
445/// println!("Content length: {} chars", doc.content.len());
446/// # Ok(())
447/// # }
448/// ```
449///
450/// ## Creating Programmatically
451///
452/// ```rust,no_run
453/// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
454/// let metadata = MarkdownMetadata {
455///     title: Some("Test Agent".to_string()),
456///     version: Some("1.0.0".to_string()),
457///     ..Default::default()
458/// };
459///
460/// let content = "# Test Agent\n\nThis agent helps with testing.";
461/// let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
462///
463/// // Raw contains formatted frontmatter + content
464/// assert!(doc.raw.contains("title: Test Agent"));
465/// assert!(doc.raw.contains("This agent helps with testing"));
466/// ```
467///
468/// ## Modifying Content
469///
470/// ```rust,no_run
471/// # use agpm_cli::markdown::MarkdownDocument;
472/// let mut doc = MarkdownDocument::new("# Original".to_string());
473///
474/// // Update content - raw is automatically regenerated
475/// doc.set_content("# Updated Content\n\nNew description.".to_string());
476///
477/// assert_eq!(doc.content, "# Updated Content\n\nNew description.");
478/// assert_eq!(doc.raw, doc.content); // No frontmatter, so raw == content
479/// ```
480///
481/// [`get_title`]: MarkdownDocument::get_title
482/// [`get_description`]: MarkdownDocument::get_description
483#[derive(Debug, Clone)]
484pub struct MarkdownDocument {
485    /// Parsed metadata extracted from frontmatter.
486    ///
487    /// This will be `Some` if the document contained valid YAML or TOML
488    /// frontmatter, and `None` for plain Markdown files. The metadata
489    /// is used by AGPM for dependency resolution and resource management.
490    pub metadata: Option<MarkdownMetadata>,
491
492    /// The main Markdown content without frontmatter delimiters.
493    ///
494    /// This contains only the actual content portion of the document,
495    /// with frontmatter stripped away. This is what gets processed
496    /// for content-based metadata extraction.
497    pub content: String,
498
499    /// The complete original document including frontmatter.
500    ///
501    /// This field preserves the exact original format for faithful
502    /// reproduction when writing back to disk. When metadata or content
503    /// is modified, this field is automatically regenerated to maintain
504    /// consistency.
505    pub raw: String,
506}
507
508impl MarkdownDocument {
509    /// Create a new markdown document without frontmatter.
510    ///
511    /// This creates a plain Markdown document with no metadata. The content
512    /// becomes both the `content` and `raw` fields since there's no frontmatter
513    /// to format.
514    ///
515    /// # Arguments
516    ///
517    /// * `content` - The Markdown content as a string
518    ///
519    /// # Examples
520    ///
521    /// ```rust,no_run
522    /// # use agpm_cli::markdown::MarkdownDocument;
523    /// let doc = MarkdownDocument::new("# Hello\n\nWorld!".to_string());
524    ///
525    /// assert!(doc.metadata.is_none());
526    /// assert_eq!(doc.content, "# Hello\n\nWorld!");
527    /// assert_eq!(doc.raw, doc.content);
528    /// ```
529    #[must_use]
530    pub fn new(content: String) -> Self {
531        Self {
532            metadata: None,
533            content: content.clone(),
534            raw: content,
535        }
536    }
537
538    /// Create a markdown document with metadata and content.
539    ///
540    /// This constructor creates a complete document with structured metadata
541    /// in YAML frontmatter format. The `raw` field will contain the formatted
542    /// frontmatter followed by the content.
543    ///
544    /// # Arguments
545    ///
546    /// * `metadata` - The structured metadata for the document
547    /// * `content` - The Markdown content (without frontmatter)
548    ///
549    /// # Examples
550    ///
551    /// ```rust,no_run
552    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
553    /// let metadata = MarkdownMetadata {
554    ///     title: Some("Example".to_string()),
555    ///     version: Some("1.0.0".to_string()),
556    ///     ..Default::default()
557    /// };
558    ///
559    /// let doc = MarkdownDocument::with_metadata(
560    ///     metadata,
561    ///     "# Example\n\nThis is an example.".to_string()
562    /// );
563    ///
564    /// assert!(doc.metadata.is_some());
565    /// assert!(doc.raw.starts_with("---\n"));
566    /// assert!(doc.raw.contains("title: Example"));
567    /// ```
568    #[must_use]
569    pub fn with_metadata(metadata: MarkdownMetadata, content: String) -> Self {
570        let raw = Self::format_with_frontmatter(&metadata, &content);
571        Self {
572            metadata: Some(metadata),
573            content,
574            raw,
575        }
576    }
577
578    /// Read and parse a Markdown file from the filesystem.
579    ///
580    /// This method reads the entire file into memory and parses it for
581    /// frontmatter and content. It supports both YAML and TOML frontmatter
582    /// formats and provides detailed error context on failure.
583    ///
584    /// # Arguments
585    ///
586    /// * `path` - Path to the Markdown file to read
587    ///
588    /// # Returns
589    ///
590    /// Returns a `Result` containing the parsed document or an error with
591    /// context about what went wrong (file not found, parse error, etc.).
592    ///
593    /// # Errors
594    ///
595    /// This function will return an error if:
596    /// - The file cannot be read (doesn't exist, permissions, etc.)
597    /// - The file contains invalid UTF-8
598    /// - The frontmatter is malformed YAML or TOML
599    ///
600    /// # Examples
601    ///
602    /// ```rust,no_run
603    /// # use agpm_cli::markdown::MarkdownDocument;
604    /// # use std::path::Path;
605    /// # fn example() -> anyhow::Result<()> {
606    /// let doc = MarkdownDocument::read(Path::new("resources/agent.md"))?;
607    ///
608    /// println!("Title: {:?}", doc.get_title());
609    /// println!("Content length: {}", doc.content.len());
610    /// # Ok(())
611    /// # }
612    /// ```
613    pub fn read(path: &Path) -> Result<Self> {
614        let raw = fs::read_to_string(path)
615            .with_context(|| format!("Failed to read markdown file: {}", path.display()))?;
616
617        Self::parse(&raw)
618    }
619
620    /// Write the document to a file on disk.
621    ///
622    /// This method performs an atomic write operation, creating any necessary
623    /// parent directories automatically. The complete `raw` content (including
624    /// frontmatter if present) is written to the specified path.
625    ///
626    /// # Arguments
627    ///
628    /// * `path` - Target path where the file should be written
629    ///
630    /// # Returns
631    ///
632    /// Returns `Ok(())` on success, or an error with context on failure.
633    ///
634    /// # Errors
635    ///
636    /// This function will return an error if:
637    /// - Parent directories cannot be created (permissions, disk space, etc.)
638    /// - The file cannot be written (permissions, disk space, etc.)
639    /// - The path is invalid or inaccessible
640    ///
641    /// # Safety
642    ///
643    /// This operation creates parent directories as needed, which could
644    /// potentially create unexpected directory structures if the path
645    /// is not validated by the caller.
646    ///
647    /// # Examples
648    ///
649    /// ```rust,no_run
650    /// # use agpm_cli::markdown::MarkdownDocument;
651    /// # use std::path::Path;
652    /// # fn example() -> anyhow::Result<()> {
653    /// let doc = MarkdownDocument::new("# Test\n\nContent".to_string());
654    ///
655    /// // Writes to file, creating directories as needed
656    /// doc.write(Path::new("output/resources/test.md"))?;
657    /// # Ok(())
658    /// # }
659    /// ```
660    pub fn write(&self, path: &Path) -> Result<()> {
661        // Ensure parent directory exists
662        if let Some(parent) = path.parent() {
663            fs::create_dir_all(parent)
664                .with_context(|| format!("Failed to create directory: {}", parent.display()))?;
665        }
666
667        fs::write(path, &self.raw)
668            .with_context(|| format!("Failed to write markdown file: {}", path.display()))?;
669
670        Ok(())
671    }
672
673    /// Parse a Markdown string that may contain frontmatter with context for warnings.
674    ///
675    /// This is similar to [`parse`](Self::parse) but accepts an optional context string
676    /// that will be included in warning messages when preprocessing is required.
677    ///
678    /// # Arguments
679    ///
680    /// * `input` - The complete Markdown document as a string
681    /// * `context` - Optional context (e.g., file path) for warning messages
682    ///
683    /// # Returns
684    ///
685    /// Returns a parsed `MarkdownDocument`. If frontmatter parsing fails,
686    /// a warning is emitted and the entire document is treated as content.
687    pub fn parse_with_context(input: &str, context: Option<&str>) -> Result<Self> {
688        // Check for YAML frontmatter (starts with ---)
689        if (input.starts_with("---\n") || input.starts_with("---\r\n"))
690            && let Some(end_idx) = find_frontmatter_end(input)
691        {
692            let skip_size = if input.starts_with("---\r\n") {
693                5
694            } else {
695                4
696            };
697            let frontmatter = &input[skip_size..end_idx];
698            let content = input[end_idx..].trim_start_matches("---").trim_start();
699
700            // Try to parse YAML frontmatter with standard parser first
701            match serde_yaml::from_str::<MarkdownMetadata>(frontmatter) {
702                Ok(metadata) => {
703                    // Standard parsing succeeded
704                    return Ok(Self {
705                        metadata: Some(metadata),
706                        content: content.to_string(),
707                        raw: input.to_string(),
708                    });
709                }
710                Err(err) => {
711                    // Parsing failed - emit helpful warning and treat entire document as content
712                    emit_frontmatter_parse_warning("YAML", context, &err);
713
714                    // Treat the entire document as content (including the invalid frontmatter)
715                    return Ok(Self {
716                        metadata: None,
717                        content: input.to_string(),
718                        raw: input.to_string(),
719                    });
720                }
721            }
722        }
723
724        // Check for TOML frontmatter (starts with +++)
725        if (input.starts_with("+++\n") || input.starts_with("+++\r\n"))
726            && let Some(end_idx) = find_toml_frontmatter_end(input)
727        {
728            let skip_size = if input.starts_with("+++\r\n") {
729                5
730            } else {
731                4
732            };
733            let frontmatter = &input[skip_size..end_idx];
734            let content = input[end_idx..].trim_start_matches("+++").trim_start();
735
736            // Try to parse TOML frontmatter
737            match toml::from_str::<MarkdownMetadata>(frontmatter) {
738                Ok(metadata) => {
739                    return Ok(Self {
740                        metadata: Some(metadata),
741                        content: content.to_string(),
742                        raw: input.to_string(),
743                    });
744                }
745                Err(err) => {
746                    // TOML parsing failed - emit helpful warning and treat entire document as content
747                    emit_frontmatter_parse_warning("TOML", context, &err);
748
749                    // Treat the entire document as content (including the invalid frontmatter)
750                    return Ok(Self {
751                        metadata: None,
752                        content: input.to_string(),
753                        raw: input.to_string(),
754                    });
755                }
756            }
757        }
758
759        // No frontmatter, entire document is content
760        Ok(Self {
761            metadata: None,
762            content: input.to_string(),
763            raw: input.to_string(),
764        })
765    }
766
767    /// Parse a Markdown string that may contain frontmatter.
768    ///
769    /// This is the core parsing method that handles both YAML and TOML
770    /// frontmatter formats. It attempts to detect and parse frontmatter,
771    /// falling back to treating the entire input as content if no valid
772    /// frontmatter is found.
773    ///
774    /// # Supported Formats
775    ///
776    /// ## YAML Frontmatter (recommended)
777    /// ```text
778    /// ---
779    /// title: "Example"
780    /// version: "1.0.0"
781    /// ---
782    /// Content here...
783    /// ```
784    ///
785    /// ## TOML Frontmatter
786    /// ```text
787    /// +++
788    /// title = "Example"
789    /// version = "1.0.0"
790    /// +++
791    /// Content here...
792    /// ```
793    ///
794    /// # Arguments
795    ///
796    /// * `input` - The complete Markdown document as a string
797    ///
798    /// # Returns
799    ///
800    /// Returns a parsed `MarkdownDocument` with metadata extracted if present.
801    ///
802    /// # Errors
803    ///
804    /// Returns an error if the frontmatter is present but malformed:
805    /// - Invalid YAML syntax in `---` delimited frontmatter
806    /// - Invalid TOML syntax in `+++` delimited frontmatter
807    /// - Frontmatter that doesn't match the expected metadata schema
808    ///
809    /// # Examples
810    ///
811    /// ```rust,no_run
812    /// # use agpm_cli::markdown::MarkdownDocument;
813    /// // Parse document with YAML frontmatter
814    /// let input = "---\ntitle: Test\n---\n# Content";
815    /// let doc = MarkdownDocument::parse(input).unwrap();
816    /// assert!(doc.metadata.is_some());
817    ///
818    /// // Parse plain Markdown
819    /// let input = "# Just Content";
820    /// let doc = MarkdownDocument::parse(input).unwrap();
821    /// assert!(doc.metadata.is_none());
822    /// ```
823    pub fn parse(input: &str) -> Result<Self> {
824        Self::parse_with_context(input, None)
825    }
826
827    /// Format a document with YAML frontmatter
828    fn format_with_frontmatter(metadata: &MarkdownMetadata, content: &str) -> String {
829        let yaml = serde_yaml::to_string(metadata).unwrap_or_default();
830        // Trim trailing whitespace from YAML and ensure newline before closing delimiter
831        // This prevents the closing --- from being concatenated with the YAML content
832        let yaml_trimmed = yaml.trim_end();
833        format!("---\n{}\n---\n\n{}", yaml_trimmed, content)
834    }
835
836    /// Update the document's metadata and regenerate the raw content.
837    ///
838    /// This method replaces the current metadata (if any) with new metadata
839    /// and automatically regenerates the `raw` field to include properly
840    /// formatted YAML frontmatter.
841    ///
842    /// # Arguments
843    ///
844    /// * `metadata` - The new metadata to set for this document
845    ///
846    /// # Effects
847    ///
848    /// - Sets `self.metadata` to `Some(metadata)`
849    /// - Regenerates `self.raw` with YAML frontmatter + content
850    /// - Preserves the existing `content` field unchanged
851    ///
852    /// # Examples
853    ///
854    /// ```rust,no_run
855    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
856    /// let mut doc = MarkdownDocument::new("# Test\n\nContent".to_string());
857    /// assert!(doc.metadata.is_none());
858    ///
859    /// let metadata = MarkdownMetadata {
860    ///     title: Some("New Title".to_string()),
861    ///     version: Some("2.0.0".to_string()),
862    ///     ..Default::default()
863    /// };
864    ///
865    /// doc.set_metadata(metadata);
866    /// assert!(doc.metadata.is_some());
867    /// assert!(doc.raw.contains("title: New Title"));
868    /// assert!(doc.raw.contains("# Test"));
869    /// ```
870    pub fn set_metadata(&mut self, metadata: MarkdownMetadata) {
871        self.raw = Self::format_with_frontmatter(&metadata, &self.content);
872        self.metadata = Some(metadata);
873    }
874
875    /// Update the document's content and regenerate the raw document.
876    ///
877    /// This method replaces the current content with new content and
878    /// automatically regenerates the `raw` field. If metadata is present,
879    /// the raw content will include formatted frontmatter; otherwise it
880    /// will be just the new content.
881    ///
882    /// # Arguments
883    ///
884    /// * `content` - The new Markdown content (without frontmatter)
885    ///
886    /// # Effects
887    ///
888    /// - Sets `self.content` to the new content
889    /// - Regenerates `self.raw` appropriately:
890    ///   - If metadata exists: frontmatter + new content
891    ///   - If no metadata: just the new content
892    /// - Preserves existing metadata unchanged
893    ///
894    /// # Examples
895    ///
896    /// ```rust,no_run
897    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
898    /// // Document with metadata
899    /// let metadata = MarkdownMetadata {
900    ///     title: Some("Test".to_string()),
901    ///     ..Default::default()
902    /// };
903    /// let mut doc = MarkdownDocument::with_metadata(
904    ///     metadata,
905    ///     "Original content".to_string()
906    /// );
907    ///
908    /// doc.set_content("# New Content\n\nUpdated!".to_string());
909    ///
910    /// assert_eq!(doc.content, "# New Content\n\nUpdated!");
911    /// assert!(doc.raw.contains("title: Test"));
912    /// assert!(doc.raw.contains("# New Content"));
913    /// ```
914    pub fn set_content(&mut self, content: String) {
915        if let Some(ref metadata) = self.metadata {
916            self.raw = Self::format_with_frontmatter(metadata, &content);
917        } else {
918            self.raw = content.clone();
919        }
920        self.content = content;
921    }
922
923    /// Extract the document title from metadata or content.
924    ///
925    /// This method provides a fallback mechanism for getting the document title:
926    /// 1. First, check if metadata contains an explicit title
927    /// 2. If not, scan the content for the first level-1 heading (`# Title`)
928    /// 3. Return `None` if neither source provides a title
929    ///
930    /// # Returns
931    ///
932    /// - `Some(String)` containing the title if found
933    /// - `None` if no title is available from either source
934    ///
935    /// # Title Extraction Rules
936    ///
937    /// When extracting from content:
938    /// - Only level-1 headings (starting with `# `) are considered
939    /// - The first matching heading is used
940    /// - Leading/trailing whitespace is trimmed from the result
941    /// - Empty headings (just `#`) are ignored
942    ///
943    /// # Examples
944    ///
945    /// ```rust,no_run
946    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
947    /// // From metadata
948    /// let metadata = MarkdownMetadata {
949    ///     title: Some("Metadata Title".to_string()),
950    ///     ..Default::default()
951    /// };
952    /// let doc = MarkdownDocument::with_metadata(
953    ///     metadata,
954    ///     "# Content Title\n\nSome text".to_string()
955    /// );
956    /// assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
957    ///
958    /// // From content heading
959    /// let doc = MarkdownDocument::new("# Extracted Title\n\nContent".to_string());
960    /// assert_eq!(doc.get_title(), Some("Extracted Title".to_string()));
961    ///
962    /// // No title available
963    /// let doc = MarkdownDocument::new("Just some content without headings".to_string());
964    /// assert_eq!(doc.get_title(), None);
965    /// ```
966    #[must_use]
967    pub fn get_title(&self) -> Option<String> {
968        // First check metadata
969        if let Some(ref metadata) = self.metadata
970            && let Some(ref title) = metadata.title
971        {
972            return Some(title.clone());
973        }
974
975        // Try to extract from first # heading
976        for line in self.content.lines() {
977            if let Some(heading) = line.strip_prefix("# ") {
978                return Some(heading.trim().to_string());
979            }
980        }
981
982        None
983    }
984
985    /// Extract the document description from metadata or content.
986    ///
987    /// This method provides a fallback mechanism for getting the document description:
988    /// 1. First, check if metadata contains an explicit description
989    /// 2. If not, extract the first paragraph from the content (after any headings)
990    /// 3. Return `None` if neither source provides a description
991    ///
992    /// # Returns
993    ///
994    /// - `Some(String)` containing the description if found
995    /// - `None` if no description is available from either source
996    ///
997    /// # Description Extraction Rules
998    ///
999    /// When extracting from content:
1000    /// - All headings (lines starting with `#`) are skipped
1001    /// - Empty lines before the first paragraph are ignored
1002    /// - The first continuous block of non-empty lines becomes the description
1003    /// - Multiple lines are joined with spaces
1004    /// - Extraction stops at the first empty line after content starts
1005    ///
1006    /// # Examples
1007    ///
1008    /// ```rust,no_run
1009    /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
1010    /// // From metadata
1011    /// let metadata = MarkdownMetadata {
1012    ///     description: Some("Metadata description".to_string()),
1013    ///     ..Default::default()
1014    /// };
1015    /// let doc = MarkdownDocument::with_metadata(
1016    ///     metadata,
1017    ///     "# Title\n\nContent description".to_string()
1018    /// );
1019    /// assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
1020    ///
1021    /// // From content paragraph
1022    /// let doc = MarkdownDocument::new(
1023    ///     "# Title\n\nThis is the first\nparagraph of content.\n\nSecond paragraph.".to_string()
1024    /// );
1025    /// assert_eq!(doc.get_description(), Some("This is the first paragraph of content.".to_string()));
1026    ///
1027    /// // No description available  
1028    /// let doc = MarkdownDocument::new("# Just a title".to_string());
1029    /// assert_eq!(doc.get_description(), None);
1030    /// ```
1031    #[must_use]
1032    pub fn get_description(&self) -> Option<String> {
1033        // First check metadata
1034        if let Some(ref metadata) = self.metadata
1035            && let Some(ref desc) = metadata.description
1036        {
1037            return Some(desc.clone());
1038        }
1039
1040        // Try to extract first non-heading paragraph
1041        let mut in_paragraph = false;
1042        let mut paragraph = String::new();
1043
1044        for line in self.content.lines() {
1045            let trimmed = line.trim();
1046
1047            // Skip headings and empty lines at start
1048            if trimmed.starts_with('#') || (trimmed.is_empty() && !in_paragraph) {
1049                continue;
1050            }
1051
1052            // Start collecting paragraph
1053            if !trimmed.is_empty() {
1054                in_paragraph = true;
1055                if !paragraph.is_empty() {
1056                    paragraph.push(' ');
1057                }
1058                paragraph.push_str(trimmed);
1059            } else if in_paragraph {
1060                // End of first paragraph
1061                break;
1062            }
1063        }
1064
1065        if paragraph.is_empty() {
1066            None
1067        } else {
1068            Some(paragraph)
1069        }
1070    }
1071}
1072
1073/// Find the end position of YAML frontmatter in a document.
1074///
1075/// This helper function scans through a document that starts with YAML
1076/// frontmatter (delimited by `---`) to find where the closing delimiter
1077/// occurs. It returns the byte position of the closing delimiter.
1078///
1079/// # Arguments
1080///
1081/// * `input` - The document content starting with `---`
1082///
1083/// # Returns
1084///
1085/// - `Some(usize)` - Byte position of the closing `---` delimiter
1086/// - `None` - If no closing delimiter is found
1087///
1088/// # Implementation Notes
1089///
1090/// - Assumes the input starts with the opening `---` delimiter
1091/// - Counts bytes, not characters, for proper string slicing
1092/// - Accounts for newline characters in position calculation
1093fn find_frontmatter_end(input: &str) -> Option<usize> {
1094    // Handle both Unix (LF) and Windows (CRLF) line endings
1095    let has_crlf = input.contains("\r\n");
1096    let initial_skip = if has_crlf {
1097        5
1098    } else {
1099        4
1100    }; // "---\r\n" or "---\n"
1101
1102    let mut lines = input.lines();
1103    lines.next()?; // Skip first ---
1104
1105    let mut pos = initial_skip;
1106    for line in lines {
1107        if line == "---" {
1108            return Some(pos);
1109        }
1110        // Account for actual line ending bytes (CRLF = 2, LF = 1)
1111        let line_ending_size = if has_crlf {
1112            2
1113        } else {
1114            1
1115        };
1116        pos += line.len() + line_ending_size;
1117    }
1118
1119    None
1120}
1121
1122/// Find the end position of TOML frontmatter in a document.
1123///
1124/// This helper function scans through a document that starts with TOML
1125/// frontmatter (delimited by `+++`) to find where the closing delimiter
1126/// occurs. It returns the byte position of the closing delimiter.
1127///
1128/// # Arguments
1129///
1130/// * `input` - The document content starting with `+++`
1131///
1132/// # Returns
1133///
1134/// - `Some(usize)` - Byte position of the closing `+++` delimiter
1135/// - `None` - If no closing delimiter is found
1136///
1137/// # Implementation Notes
1138///
1139/// - Assumes the input starts with the opening `+++` delimiter
1140/// - Counts bytes, not characters, for proper string slicing
1141/// - Accounts for newline characters in position calculation
1142fn find_toml_frontmatter_end(input: &str) -> Option<usize> {
1143    // Handle both Unix (LF) and Windows (CRLF) line endings
1144    let has_crlf = input.contains("\r\n");
1145    let initial_skip = if has_crlf {
1146        5
1147    } else {
1148        4
1149    }; // "+++\r\n" or "+++\n"
1150
1151    let mut lines = input.lines();
1152    lines.next()?; // Skip first +++
1153
1154    let mut pos = initial_skip;
1155    for line in lines {
1156        if line == "+++" {
1157            return Some(pos);
1158        }
1159        // Account for actual line ending bytes (CRLF = 2, LF = 1)
1160        let line_ending_size = if has_crlf {
1161            2
1162        } else {
1163            1
1164        };
1165        pos += line.len() + line_ending_size;
1166    }
1167
1168    None
1169}
1170
1171/// Check if a path represents a Markdown file based on its extension.
1172///
1173/// This function validates file paths to determine if they should be treated
1174/// as Markdown files. It performs case-insensitive extension checking to
1175/// support different naming conventions across platforms.
1176///
1177/// # Supported Extensions
1178///
1179/// - `.md` (most common)
1180/// - `.markdown` (verbose form)
1181/// - Case variations: `.MD`, `.Markdown`, etc.
1182///
1183/// # Arguments
1184///
1185/// * `path` - The file path to check
1186///
1187/// # Returns
1188///
1189/// - `true` if the file has a recognized Markdown extension
1190/// - `false` otherwise (including files with no extension)
1191///
1192/// # Examples
1193///
1194/// ```rust,no_run
1195/// # use agpm_cli::markdown::is_markdown_file;
1196/// # use std::path::Path;
1197/// assert!(is_markdown_file(Path::new("agent.md")));
1198/// assert!(is_markdown_file(Path::new("README.MD")));
1199/// assert!(is_markdown_file(Path::new("guide.markdown")));
1200/// assert!(!is_markdown_file(Path::new("config.toml")));
1201/// assert!(!is_markdown_file(Path::new("script.sh")));
1202/// assert!(!is_markdown_file(Path::new("no-extension")));
1203/// ```
1204#[must_use]
1205pub fn is_markdown_file(path: &Path) -> bool {
1206    path.extension()
1207        .and_then(|ext| ext.to_str())
1208        .is_some_and(|ext| ext.eq_ignore_ascii_case("md") || ext.eq_ignore_ascii_case("markdown"))
1209}
1210
1211/// Recursively find all Markdown files in a directory.
1212///
1213/// This function performs a recursive traversal of the given directory,
1214/// collecting all files that have Markdown extensions. It follows symbolic
1215/// links and handles filesystem errors gracefully.
1216///
1217/// # Directory Traversal
1218///
1219/// - Recursively traverses all subdirectories
1220/// - Follows symbolic links (may cause infinite loops with circular links)
1221/// - Silently skips entries that cannot be accessed
1222/// - Only includes regular files (not directories or special files)
1223///
1224/// # Arguments
1225///
1226/// * `dir` - The directory path to search
1227///
1228/// # Returns
1229///
1230/// - `Ok(Vec<PathBuf>)` - List of absolute paths to Markdown files
1231/// - `Err(...)` - Only on severe filesystem errors (rare)
1232///
1233/// # Behavior
1234///
1235/// - Returns empty vector if directory doesn't exist (not an error)
1236/// - Files are returned in filesystem order (not sorted)
1237/// - Paths are absolute and canonicalized
1238/// - Uses [`is_markdown_file`] for extension validation
1239///
1240/// # Examples
1241///
1242/// ```rust,no_run
1243/// # use agpm_cli::markdown::list_markdown_files;
1244/// # use std::path::Path;
1245/// # fn example() -> anyhow::Result<()> {
1246/// let files = list_markdown_files(Path::new("resources/"))?;
1247///
1248/// for file in files {
1249///     println!("Found: {}", file.display());
1250/// }
1251/// # Ok(())
1252/// # }
1253/// ```
1254///
1255/// # Performance
1256///
1257/// This function loads directory metadata but not file contents, making it
1258/// suitable for scanning large directory trees. For processing the files,
1259/// consider using [`MarkdownDocument::read`] on each result.
1260///
1261/// [`is_markdown_file`]: is_markdown_file
1262/// [`MarkdownDocument::read`]: MarkdownDocument::read
1263pub fn list_markdown_files(dir: &Path) -> Result<Vec<std::path::PathBuf>> {
1264    let mut files = Vec::new();
1265
1266    if !dir.exists() {
1267        return Ok(files);
1268    }
1269
1270    for entry in walkdir::WalkDir::new(dir)
1271        .follow_links(true)
1272        .into_iter()
1273        .filter_map(std::result::Result::ok)
1274    {
1275        let path = entry.path();
1276        if path.is_file() && is_markdown_file(path) {
1277            files.push(path.to_path_buf());
1278        }
1279    }
1280
1281    Ok(files)
1282}
1283
1284#[cfg(test)]
1285mod tests {
1286    use super::*;
1287    use tempfile::tempdir;
1288
1289    #[test]
1290    fn test_markdown_document_new() {
1291        let doc = MarkdownDocument::new("# Hello World".to_string());
1292        assert!(doc.metadata.is_none());
1293        assert_eq!(doc.content, "# Hello World");
1294        assert_eq!(doc.raw, "# Hello World");
1295    }
1296
1297    #[test]
1298    fn test_markdown_with_yaml_frontmatter() {
1299        let input = r"---
1300title: Test Document
1301description: A test document
1302tags:
1303  - test
1304  - example
1305---
1306
1307# Hello World
1308
1309This is the content.";
1310
1311        let doc = MarkdownDocument::parse(input).unwrap();
1312        assert!(doc.metadata.is_some());
1313
1314        let metadata = doc.metadata.unwrap();
1315        assert_eq!(metadata.title, Some("Test Document".to_string()));
1316        assert_eq!(metadata.description, Some("A test document".to_string()));
1317        assert_eq!(metadata.tags, vec!["test", "example"]);
1318
1319        assert!(doc.content.starts_with("# Hello World"));
1320    }
1321
1322    #[test]
1323    fn test_markdown_with_toml_frontmatter() {
1324        let input = r#"+++
1325title = "Test Document"
1326description = "A test document"
1327tags = ["test", "example"]
1328+++
1329
1330# Hello World
1331
1332This is the content."#;
1333
1334        let doc = MarkdownDocument::parse(input).unwrap();
1335        assert!(doc.metadata.is_some());
1336
1337        let metadata = doc.metadata.unwrap();
1338        assert_eq!(metadata.title, Some("Test Document".to_string()));
1339        assert_eq!(metadata.description, Some("A test document".to_string()));
1340        assert_eq!(metadata.tags, vec!["test", "example"]);
1341    }
1342
1343    #[test]
1344    fn test_markdown_without_frontmatter() {
1345        let input = "# Hello World\n\nThis is the content.";
1346
1347        let doc = MarkdownDocument::parse(input).unwrap();
1348        assert!(doc.metadata.is_none());
1349        assert_eq!(doc.content, input);
1350    }
1351
1352    #[test]
1353    fn test_get_title() {
1354        // From metadata
1355        let metadata = MarkdownMetadata {
1356            title: Some("Metadata Title".to_string()),
1357            ..Default::default()
1358        };
1359        let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1360        assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
1361
1362        // From heading
1363        let doc = MarkdownDocument::new("# Heading Title\n\nContent".to_string());
1364        assert_eq!(doc.get_title(), Some("Heading Title".to_string()));
1365
1366        // No title
1367        let doc = MarkdownDocument::new("Just content".to_string());
1368        assert_eq!(doc.get_title(), None);
1369    }
1370
1371    #[test]
1372    fn test_get_description() {
1373        // From metadata
1374        let metadata = MarkdownMetadata {
1375            description: Some("Metadata description".to_string()),
1376            ..Default::default()
1377        };
1378        let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1379        assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
1380
1381        // From first paragraph
1382        let doc = MarkdownDocument::new(
1383            "# Title\n\nThis is the first paragraph.\n\nSecond paragraph.".to_string(),
1384        );
1385        assert_eq!(doc.get_description(), Some("This is the first paragraph.".to_string()));
1386    }
1387
1388    #[test]
1389    fn test_read_write_markdown() {
1390        let temp = tempdir().unwrap();
1391        let file_path = temp.path().join("test.md");
1392
1393        // Create and write document
1394        let metadata = MarkdownMetadata {
1395            title: Some("Test".to_string()),
1396            ..Default::default()
1397        };
1398        let doc = MarkdownDocument::with_metadata(metadata, "# Test\n\nContent".to_string());
1399        doc.write(&file_path).unwrap();
1400
1401        // Read back
1402        let loaded = MarkdownDocument::read(&file_path).unwrap();
1403        assert!(loaded.metadata.is_some());
1404        assert_eq!(loaded.metadata.unwrap().title, Some("Test".to_string()));
1405        assert!(loaded.content.contains("# Test"));
1406    }
1407
1408    #[test]
1409    fn test_is_markdown_file() {
1410        assert!(is_markdown_file(Path::new("test.md")));
1411        assert!(is_markdown_file(Path::new("test.MD")));
1412        assert!(is_markdown_file(Path::new("test.markdown")));
1413        assert!(is_markdown_file(Path::new("test.MARKDOWN")));
1414        assert!(!is_markdown_file(Path::new("test.txt")));
1415        assert!(!is_markdown_file(Path::new("test")));
1416    }
1417
1418    #[test]
1419    fn test_list_markdown_files() {
1420        let temp = tempdir().unwrap();
1421
1422        // Create some files
1423        std::fs::write(temp.path().join("file1.md"), "content").unwrap();
1424        std::fs::write(temp.path().join("file2.markdown"), "content").unwrap();
1425        std::fs::write(temp.path().join("file3.txt"), "content").unwrap();
1426
1427        let subdir = temp.path().join("subdir");
1428        std::fs::create_dir(&subdir).unwrap();
1429        std::fs::write(subdir.join("file4.md"), "content").unwrap();
1430
1431        let files = list_markdown_files(temp.path()).unwrap();
1432        assert_eq!(files.len(), 3);
1433
1434        let names: Vec<String> =
1435            files.iter().map(|p| p.file_name().unwrap().to_string_lossy().to_string()).collect();
1436
1437        assert!(names.contains(&"file1.md".to_string()));
1438        assert!(names.contains(&"file2.markdown".to_string()));
1439        assert!(names.contains(&"file4.md".to_string()));
1440        assert!(!names.contains(&"file3.txt".to_string()));
1441    }
1442
1443    #[test]
1444    fn test_set_metadata_and_content() {
1445        let mut doc = MarkdownDocument::new("Initial content".to_string());
1446
1447        // Set metadata
1448        let metadata = MarkdownMetadata {
1449            title: Some("New Title".to_string()),
1450            ..Default::default()
1451        };
1452        doc.set_metadata(metadata);
1453
1454        assert!(doc.metadata.is_some());
1455        assert!(doc.raw.contains("title: New Title"));
1456        assert!(doc.raw.contains("Initial content"));
1457
1458        // Set content
1459        doc.set_content("Updated content".to_string());
1460        assert_eq!(doc.content, "Updated content");
1461        assert!(doc.raw.contains("Updated content"));
1462        assert!(doc.raw.contains("title: New Title"));
1463    }
1464
1465    #[test]
1466    fn test_invalid_frontmatter_with_escaped_newlines() {
1467        // Content with invalid YAML frontmatter (literal \n that isn't properly quoted)
1468        let input = r#"---
1469name: haiku-syntax-tool
1470description: Use this agent when you need to fix linting errors, formatting issues, type checking problems, or ensure code adheres to project-specific standards. This agent specializes in enforcing language-specific conventions, project style guides, and maintaining code quality through automated fixes. Examples:\n\n<example>\nContext: The user has just written a new Python function and wants to ensure it meets project standards.\nuser: "I've added a new sync handler function"\nassistant: "Let me review this with the code-standards-enforcer agent to ensure it meets our project standards"\n<commentary>\nSince new code was written, use the Task tool to launch the code-standards-enforcer agent to check for linting, formatting, and type issues according to CLAUDE.md standards.\n</commentary>\n</example>\n\n<example>\nContext: The user encounters linting errors during CI/CD.\nuser: "The CI pipeline is failing due to formatting issues"\nassistant: "I'll use the code-standards-enforcer agent to fix these formatting and linting issues"\n<commentary>\nWhen there are explicit linting or formatting problems, use the code-standards-enforcer agent to automatically fix them according to project standards.\n</commentary>\n</example>\n\n<example>\nContext: The user wants to ensure type hints are correct.\nuser: "Can you check if my type annotations are correct in the API module?"\nassistant: "I'll launch the code-standards-enforcer agent to verify and fix any type annotation issues"\n<commentary>\nFor type checking and annotation verification, use the code-standards-enforcer agent to ensure compliance with project typing standards.\n</commentary>\n</example>
1471model: haiku
1472---
1473
1474You are a meticulous code standards enforcement specialist"#;
1475
1476        // This should succeed but treat the entire document as content (no metadata)
1477        let result = MarkdownDocument::parse(input);
1478        match result {
1479            Ok(doc) => {
1480                // Invalid frontmatter means no metadata
1481                assert!(doc.metadata.is_none());
1482                // The entire document should be treated as content
1483                assert!(doc.content.contains("---"));
1484                assert!(doc.content.contains("name: haiku-syntax-tool"));
1485                assert!(doc.content.contains("description: Use this agent"));
1486                assert!(doc.content.contains("model: haiku"));
1487                assert!(doc.content.contains("meticulous code standards enforcement specialist"));
1488            }
1489            Err(e) => {
1490                panic!("Should not fail, but got error: {}", e);
1491            }
1492        }
1493    }
1494
1495    #[test]
1496    fn test_completely_invalid_frontmatter_fallback() {
1497        // Test with completely broken YAML
1498        let input = r#"---
1499name: test
1500description: {this is not valid yaml at all
1501model: test
1502---
1503
1504Content here"#;
1505
1506        // This should now succeed but without metadata
1507        let result = MarkdownDocument::parse(input);
1508        match result {
1509            Ok(doc) => {
1510                // Should treat entire document as content when frontmatter is invalid
1511                assert!(doc.metadata.is_none());
1512                assert!(doc.content.contains("---"));
1513                assert!(doc.content.contains("name: test"));
1514                assert!(doc.content.contains("Content here"));
1515            }
1516            Err(e) => {
1517                panic!("Should not fail, but got error: {}", e);
1518            }
1519        }
1520    }
1521}