agpm_cli/markdown/mod.rs
1//! Markdown file operations and metadata extraction for Claude Code resources.
2//!
3//! This module provides comprehensive support for reading, writing, and manipulating
4//! Markdown files that contain Claude Code agents and snippets. It handles both
5//! plain Markdown files and files with structured metadata in frontmatter.
6//!
7//! # Overview
8//!
9//! The markdown module is a core component of AGPM that:
10//! - Parses Markdown files with optional YAML or TOML frontmatter
11//! - Extracts structured metadata for dependency resolution
12//! - Preserves document structure during read/write operations
13//! - Provides utilities for file discovery and validation
14//! - Supports atomic file operations for safe installation
15//!
16//! # Supported File Formats
17//!
18//! ## Plain Markdown Files
19//!
20//! Standard Markdown files without frontmatter are fully supported:
21//!
22//! ```markdown
23//! # Python Code Reviewer
24//!
25//! This agent specializes in reviewing Python code for:
26//! - PEP 8 compliance
27//! - Security vulnerabilities
28//! - Performance optimizations
29//!
30//! ## Usage
31//!
32//! When reviewing code, I will...
33//! ```
34//!
35//! ## YAML Frontmatter Format
36//!
37//! Files can include YAML frontmatter for structured metadata:
38//!
39//! ```markdown
40//! ---
41//! title: "Python Code Reviewer"
42//! description: "Specialized agent for Python code quality review"
43//! version: "2.1.0"
44//! author: "Claude Code Team"
45//! type: "agent"
46//! tags:
47//! - "python"
48//! - "code-review"
49//! - "quality"
50//! dependencies:
51//! agents:
52//! - path: agents/syntax-checker.md
53//! snippets:
54//! - path: snippets/security-scanner.md
55//! ---
56//!
57//! # Python Code Reviewer
58//!
59//! This agent specializes in reviewing Python code...
60//! ```
61//!
62//! ## TOML Frontmatter Format
63//!
64//! TOML frontmatter is also supported using `+++` delimiters:
65//!
66//! ```text
67//! +++
68//! title = "JavaScript Snippet Collection"
69//! description = "Useful JavaScript utilities and helpers"
70//! version = "1.0.0"
71//! author = "Community Contributors"
72//! type = "snippet"
73//! tags = ["javascript", "utilities", "helpers"]
74//! +++
75//!
76//! # JavaScript Snippet Collection
77//!
78//! ## Array Utilities
79//!
80//! ```javascript
81//! function unique(arr) {
82//! return [...new Set(arr)];
83//! }
84//! ```
85//!
86//! # Metadata Schema
87//!
88//! The frontmatter metadata follows this schema:
89//!
90//! | Field | Type | Description | Required |
91//! |-------|------|-------------|----------|
92//! | title | string | Human-readable resource title | No |
93//! | description | string | Brief description of the resource | No |
94//! | version | string | Resource version (semver recommended) | No |
95//! | author | string | Author name or organization | No |
96//! | type | string | Resource type ("agent" or "snippet") | No |
97//! | tags | array | Tags for categorization | No |
98//! | dependencies | object | Structured dependencies by resource type | No |
99//!
100//! Additional custom fields are preserved in the extra map.
101//!
102//! # Content Extraction
103//!
104//! When metadata is not explicitly provided in frontmatter, the module
105//! can extract information from the Markdown content:
106//!
107//! - **Title**: Extracted from the first level-1 heading in the content
108//! - **Description**: Extracted from the first paragraph after headings
109//!
110//! This allows resources to work without frontmatter while still providing
111//! useful metadata for dependency resolution and display.
112//!
113//! # File Operations
114//!
115//! All file operations are designed to be safe and atomic:
116//! - Parent directories are created automatically during writes
117//! - Content is validated during parsing to catch errors early
118//! - File extensions are validated (.md, .markdown)
119//! - Recursive directory traversal for bulk operations
120//!
121//! # Usage Examples
122//!
123//! ## Basic Reading and Writing
124//!
125//! ```rust,no_run
126//! use agpm_cli::markdown::MarkdownDocument;
127//! use std::path::Path;
128//!
129//! # fn example() -> anyhow::Result<()> {
130//! // Read a markdown file
131//! let doc = MarkdownDocument::read(Path::new("agents/reviewer.md"))?;
132//!
133//! // Access metadata
134//! if let Some(metadata) = &doc.metadata {
135//! println!("Title: {:?}", metadata.title);
136//! println!("Version: {:?}", metadata.version);
137//! println!("Tags: {:?}", metadata.tags);
138//! }
139//!
140//! // Extract title from content if not in metadata
141//! if let Some(title) = doc.get_title() {
142//! println!("Extracted title: {}", title);
143//! }
144//!
145//! // Write to a new location
146//! doc.write(Path::new("installed/reviewer.md"))?;
147//! # Ok(())
148//! # }
149//! ```
150//!
151//! ## Creating Documents Programmatically
152//!
153//! ```rust,no_run
154//! use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
155//!
156//! # fn example() -> anyhow::Result<()> {
157//! // Create metadata
158//! let mut metadata = MarkdownMetadata::default();
159//! metadata.title = Some("Custom Agent".to_string());
160//! metadata.version = Some("1.0.0".to_string());
161//! metadata.tags = vec!["custom".to_string(), "utility".to_string()];
162//!
163//! // Create document with metadata
164//! let content = "# Custom Agent\n\nThis is a custom agent...";
165//! let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
166//!
167//! // The raw field contains formatted frontmatter + content
168//! println!("{}", doc.raw);
169//! # Ok(())
170//! # }
171//! ```
172//!
173//! ## Batch File Processing
174//!
175//! ```rust,no_run
176//! use agpm_cli::markdown::{list_markdown_files, MarkdownDocument};
177//! use std::path::Path;
178//!
179//! # fn example() -> anyhow::Result<()> {
180//! // Find all markdown files in a directory
181//! let files = list_markdown_files(Path::new("resources/"))?;
182//!
183//! for file in files {
184//! let doc = MarkdownDocument::read(&file)?;
185//!
186//! if let Some(title) = doc.get_title() {
187//! println!("{}: {}", file.display(), title);
188//! }
189//! }
190//! # Ok(())
191//! # }
192//! ```
193//!
194//! # Integration with AGPM
195//!
196//! This module integrates with other AGPM components:
197//!
198//! - `crate::manifest`: Uses metadata for dependency resolution
199//! - `crate::lockfile`: Stores checksums and installation paths
200//! - `crate::source`: Handles remote resource fetching
201//! - `crate::core`: Provides core types and error handling
202//!
203//! See the respective module documentation for integration details.
204
205use anyhow::{Context, Result};
206use serde::{Deserialize, Serialize};
207use std::collections::HashMap;
208use std::fs;
209use std::path::Path;
210
211use crate::manifest::DependencySpec;
212
213/// Type alias for [`MarkdownDocument`] for backward compatibility.
214///
215/// This alias exists to provide a consistent naming convention and maintain
216/// backward compatibility with existing code that might use `MarkdownFile`.
217/// New code should prefer using [`MarkdownDocument`] directly.
218///
219/// # Examples
220///
221/// ```rust,no_run
222/// # use agpm_cli::markdown::{MarkdownFile, MarkdownDocument};
223/// // These are equivalent
224/// let doc1 = MarkdownDocument::new("content".to_string());
225/// let doc2 = MarkdownFile::new("content".to_string());
226///
227/// assert_eq!(doc1.content, doc2.content);
228/// ```
229pub type MarkdownFile = MarkdownDocument;
230
231/// Structured metadata extracted from Markdown frontmatter.
232///
233/// This struct represents all the metadata that can be parsed from YAML or TOML
234/// frontmatter in Markdown files. It follows a flexible schema that accommodates
235/// both standard AGPM fields and custom extensions.
236///
237/// # Standard Fields
238///
239/// The following fields have special meaning in AGPM:
240/// - `title`: Human-readable name for the resource
241/// - `description`: Brief explanation of what the resource does
242/// - `version`: Version identifier (semantic versioning recommended)
243/// - `author`: Creator or maintainer information
244/// - `resource_type`: Type classification ("agent" or "snippet")
245/// - `tags`: Categorization labels for filtering and discovery
246/// - `dependencies`: Structured dependencies for transitive resolution
247///
248/// # Custom Fields
249///
250/// Additional fields are preserved in the `extra` map, allowing resource
251/// authors to include custom metadata without breaking compatibility.
252///
253/// # Serialization
254///
255/// The struct uses Serde for serialization with skip-if-empty optimizations
256/// to keep generated frontmatter clean. Empty collections and None values
257/// are omitted from the output.
258///
259/// # Example
260///
261/// ```rust,no_run
262/// # use agpm_cli::markdown::MarkdownMetadata;
263/// # use std::collections::HashMap;
264/// let mut metadata = MarkdownMetadata::default();
265/// metadata.title = Some("Python Linter".to_string());
266/// metadata.version = Some("2.0.1".to_string());
267/// metadata.tags = vec!["python".to_string(), "linting".to_string()];
268/// // Dependencies can be set as a JSON value for the structured format
269/// // This is typically parsed from frontmatter rather than set programmatically
270///
271/// // Custom fields via extra map
272/// let mut extra = HashMap::new();
273/// extra.insert("license".to_string(), "MIT".into());
274/// extra.insert("min_python".to_string(), "3.8".into());
275/// metadata.extra = extra;
276/// ```
277#[derive(Debug, Clone, Default, Serialize, Deserialize)]
278pub struct MarkdownMetadata {
279 /// Human-readable title of the resource.
280 ///
281 /// This is displayed in listings and used for resource identification.
282 /// If not provided, the title may be extracted from the first heading
283 /// in the Markdown content.
284 #[serde(skip_serializing_if = "Option::is_none")]
285 pub title: Option<String>,
286
287 /// Brief description explaining what the resource does.
288 ///
289 /// Used for documentation and resource discovery. If not provided,
290 /// the description may be extracted from the first paragraph in
291 /// the Markdown content.
292 #[serde(skip_serializing_if = "Option::is_none")]
293 pub description: Option<String>,
294
295 /// Version identifier for the resource.
296 ///
297 /// Semantic versioning (e.g., "1.2.3") is recommended for compatibility
298 /// with dependency resolution, but any string format is accepted.
299 #[serde(skip_serializing_if = "Option::is_none")]
300 pub version: Option<String>,
301
302 /// Author or maintainer information.
303 ///
304 /// Can be a name, organization, or contact information. Free-form text.
305 #[serde(skip_serializing_if = "Option::is_none")]
306 pub author: Option<String>,
307
308 /// Classification tags for categorization and filtering.
309 ///
310 /// Tags help with resource discovery and organization. Common patterns:
311 /// - Language-specific: "python", "javascript", "rust"
312 /// - Functionality: "linting", "testing", "documentation"
313 /// - Domain: "web-dev", "data-science", "devops"
314 #[serde(default, skip_serializing_if = "Vec::is_empty")]
315 pub tags: Vec<String>,
316
317 /// Resource type classification.
318 ///
319 /// Currently supported types:
320 /// - "agent": Interactive Claude Code agents
321 /// - "snippet": Code snippets and templates
322 ///
323 /// This field uses `rename = "type"` to match the frontmatter format
324 /// while avoiding Rust's `type` keyword.
325 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
326 pub resource_type: Option<String>,
327
328 /// Dependencies for this resource.
329 ///
330 /// This field uses the structured transitive dependency format where
331 /// dependencies are organized by resource type (agents, snippets, etc.).
332 /// Each resource type maps to a list of dependency specifications.
333 ///
334 /// Example:
335 /// ```yaml
336 /// dependencies:
337 /// agents:
338 /// - path: agents/helper.md
339 /// version: v1.0.0
340 /// snippets:
341 /// - path: snippets/utils.md
342 /// ```
343 #[serde(default, skip_serializing_if = "Option::is_none")]
344 pub dependencies: Option<HashMap<String, Vec<DependencySpec>>>,
345
346 /// Additional custom metadata fields.
347 ///
348 /// Any frontmatter fields not recognized by the standard schema are
349 /// preserved here. This allows resource authors to include custom
350 /// metadata without breaking compatibility with AGPM.
351 ///
352 /// Values are stored as `serde_json::Value` to handle mixed types
353 /// (strings, numbers, arrays, objects).
354 #[serde(flatten)]
355 pub extra: HashMap<String, serde_json::Value>,
356}
357
358/// A parsed Markdown document representing a Claude Code resource.
359///
360/// This is the core structure for handling Markdown files in AGPM. It provides
361/// a clean separation between structured metadata (from frontmatter) and the
362/// actual content, while preserving the original document format for roundtrip
363/// compatibility.
364///
365/// # Structure
366///
367/// A `MarkdownDocument` consists of three parts:
368/// 1. **Metadata**: Structured data from frontmatter (YAML or TOML)
369/// 2. **Content**: The main Markdown content without frontmatter
370/// 3. **Raw**: The complete original document for faithful reproduction
371///
372/// # Frontmatter Support
373///
374/// The document can parse both YAML (`---` delimiters) and TOML (`+++` delimiters)
375/// frontmatter formats. If no frontmatter is present, the entire file is treated
376/// as content.
377///
378/// # Content Extraction
379///
380/// When explicit metadata is not available, the document can extract information
381/// from the content itself using [`get_title`] and [`get_description`] methods.
382///
383/// # Thread Safety
384///
385/// This struct is `Clone` and can be safely passed between threads for
386/// concurrent processing of multiple documents.
387///
388/// # Examples
389///
390/// ## Reading from File
391///
392/// ```rust,no_run
393/// # use agpm_cli::markdown::MarkdownDocument;
394/// # use std::path::Path;
395/// # fn example() -> anyhow::Result<()> {
396/// let doc = MarkdownDocument::read(Path::new("agent.md"))?;
397///
398/// if let Some(metadata) = &doc.metadata {
399/// println!("Found metadata: {:?}", metadata.title);
400/// }
401///
402/// println!("Content length: {} chars", doc.content.len());
403/// # Ok(())
404/// # }
405/// ```
406///
407/// ## Creating Programmatically
408///
409/// ```rust,no_run
410/// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
411/// let metadata = MarkdownMetadata {
412/// title: Some("Test Agent".to_string()),
413/// version: Some("1.0.0".to_string()),
414/// ..Default::default()
415/// };
416///
417/// let content = "# Test Agent\n\nThis agent helps with testing.";
418/// let doc = MarkdownDocument::with_metadata(metadata, content.to_string());
419///
420/// // Raw contains formatted frontmatter + content
421/// assert!(doc.raw.contains("title: Test Agent"));
422/// assert!(doc.raw.contains("This agent helps with testing"));
423/// ```
424///
425/// ## Modifying Content
426///
427/// ```rust,no_run
428/// # use agpm_cli::markdown::MarkdownDocument;
429/// let mut doc = MarkdownDocument::new("# Original".to_string());
430///
431/// // Update content - raw is automatically regenerated
432/// doc.set_content("# Updated Content\n\nNew description.".to_string());
433///
434/// assert_eq!(doc.content, "# Updated Content\n\nNew description.");
435/// assert_eq!(doc.raw, doc.content); // No frontmatter, so raw == content
436/// ```
437///
438/// [`get_title`]: MarkdownDocument::get_title
439/// [`get_description`]: MarkdownDocument::get_description
440#[derive(Debug, Clone)]
441pub struct MarkdownDocument {
442 /// Parsed metadata extracted from frontmatter.
443 ///
444 /// This will be `Some` if the document contained valid YAML or TOML
445 /// frontmatter, and `None` for plain Markdown files. The metadata
446 /// is used by AGPM for dependency resolution and resource management.
447 pub metadata: Option<MarkdownMetadata>,
448
449 /// The main Markdown content without frontmatter delimiters.
450 ///
451 /// This contains only the actual content portion of the document,
452 /// with frontmatter stripped away. This is what gets processed
453 /// for content-based metadata extraction.
454 pub content: String,
455
456 /// The complete original document including frontmatter.
457 ///
458 /// This field preserves the exact original format for faithful
459 /// reproduction when writing back to disk. When metadata or content
460 /// is modified, this field is automatically regenerated to maintain
461 /// consistency.
462 pub raw: String,
463}
464
465impl MarkdownDocument {
466 /// Create a new markdown document without frontmatter.
467 ///
468 /// This creates a plain Markdown document with no metadata. The content
469 /// becomes both the `content` and `raw` fields since there's no frontmatter
470 /// to format.
471 ///
472 /// # Arguments
473 ///
474 /// * `content` - The Markdown content as a string
475 ///
476 /// # Examples
477 ///
478 /// ```rust,no_run
479 /// # use agpm_cli::markdown::MarkdownDocument;
480 /// let doc = MarkdownDocument::new("# Hello\n\nWorld!".to_string());
481 ///
482 /// assert!(doc.metadata.is_none());
483 /// assert_eq!(doc.content, "# Hello\n\nWorld!");
484 /// assert_eq!(doc.raw, doc.content);
485 /// ```
486 #[must_use]
487 pub fn new(content: String) -> Self {
488 Self {
489 metadata: None,
490 content: content.clone(),
491 raw: content,
492 }
493 }
494
495 /// Create a markdown document with metadata and content.
496 ///
497 /// This constructor creates a complete document with structured metadata
498 /// in YAML frontmatter format. The `raw` field will contain the formatted
499 /// frontmatter followed by the content.
500 ///
501 /// # Arguments
502 ///
503 /// * `metadata` - The structured metadata for the document
504 /// * `content` - The Markdown content (without frontmatter)
505 ///
506 /// # Examples
507 ///
508 /// ```rust,no_run
509 /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
510 /// let metadata = MarkdownMetadata {
511 /// title: Some("Example".to_string()),
512 /// version: Some("1.0.0".to_string()),
513 /// ..Default::default()
514 /// };
515 ///
516 /// let doc = MarkdownDocument::with_metadata(
517 /// metadata,
518 /// "# Example\n\nThis is an example.".to_string()
519 /// );
520 ///
521 /// assert!(doc.metadata.is_some());
522 /// assert!(doc.raw.starts_with("---\n"));
523 /// assert!(doc.raw.contains("title: Example"));
524 /// ```
525 #[must_use]
526 pub fn with_metadata(metadata: MarkdownMetadata, content: String) -> Self {
527 let raw = Self::format_with_frontmatter(&metadata, &content);
528 Self {
529 metadata: Some(metadata),
530 content,
531 raw,
532 }
533 }
534
535 /// Read and parse a Markdown file from the filesystem.
536 ///
537 /// This method reads the entire file into memory and parses it for
538 /// frontmatter and content. It supports both YAML and TOML frontmatter
539 /// formats and provides detailed error context on failure.
540 ///
541 /// # Arguments
542 ///
543 /// * `path` - Path to the Markdown file to read
544 ///
545 /// # Returns
546 ///
547 /// Returns a `Result` containing the parsed document or an error with
548 /// context about what went wrong (file not found, parse error, etc.).
549 ///
550 /// # Errors
551 ///
552 /// This function will return an error if:
553 /// - The file cannot be read (doesn't exist, permissions, etc.)
554 /// - The file contains invalid UTF-8
555 /// - The frontmatter is malformed YAML or TOML
556 ///
557 /// # Examples
558 ///
559 /// ```rust,no_run
560 /// # use agpm_cli::markdown::MarkdownDocument;
561 /// # use std::path::Path;
562 /// # fn example() -> anyhow::Result<()> {
563 /// let doc = MarkdownDocument::read(Path::new("resources/agent.md"))?;
564 ///
565 /// println!("Title: {:?}", doc.get_title());
566 /// println!("Content length: {}", doc.content.len());
567 /// # Ok(())
568 /// # }
569 /// ```
570 pub fn read(path: &Path) -> Result<Self> {
571 let raw = fs::read_to_string(path)
572 .with_context(|| format!("Failed to read markdown file: {}", path.display()))?;
573
574 Self::parse(&raw)
575 }
576
577 /// Write the document to a file on disk.
578 ///
579 /// This method performs an atomic write operation, creating any necessary
580 /// parent directories automatically. The complete `raw` content (including
581 /// frontmatter if present) is written to the specified path.
582 ///
583 /// # Arguments
584 ///
585 /// * `path` - Target path where the file should be written
586 ///
587 /// # Returns
588 ///
589 /// Returns `Ok(())` on success, or an error with context on failure.
590 ///
591 /// # Errors
592 ///
593 /// This function will return an error if:
594 /// - Parent directories cannot be created (permissions, disk space, etc.)
595 /// - The file cannot be written (permissions, disk space, etc.)
596 /// - The path is invalid or inaccessible
597 ///
598 /// # Safety
599 ///
600 /// This operation creates parent directories as needed, which could
601 /// potentially create unexpected directory structures if the path
602 /// is not validated by the caller.
603 ///
604 /// # Examples
605 ///
606 /// ```rust,no_run
607 /// # use agpm_cli::markdown::MarkdownDocument;
608 /// # use std::path::Path;
609 /// # fn example() -> anyhow::Result<()> {
610 /// let doc = MarkdownDocument::new("# Test\n\nContent".to_string());
611 ///
612 /// // Writes to file, creating directories as needed
613 /// doc.write(Path::new("output/resources/test.md"))?;
614 /// # Ok(())
615 /// # }
616 /// ```
617 pub fn write(&self, path: &Path) -> Result<()> {
618 // Ensure parent directory exists
619 if let Some(parent) = path.parent() {
620 fs::create_dir_all(parent)
621 .with_context(|| format!("Failed to create directory: {}", parent.display()))?;
622 }
623
624 fs::write(path, &self.raw)
625 .with_context(|| format!("Failed to write markdown file: {}", path.display()))?;
626
627 Ok(())
628 }
629
630 /// Parse a Markdown string that may contain frontmatter with context for warnings.
631 ///
632 /// This is similar to [`parse`](Self::parse) but accepts an optional context string
633 /// that will be included in warning messages when preprocessing is required.
634 ///
635 /// # Arguments
636 ///
637 /// * `input` - The complete Markdown document as a string
638 /// * `context` - Optional context (e.g., file path) for warning messages
639 ///
640 /// # Returns
641 ///
642 /// Returns a parsed `MarkdownDocument`. If frontmatter parsing fails,
643 /// a warning is emitted and the entire document is treated as content.
644 pub fn parse_with_context(input: &str, context: Option<&str>) -> Result<Self> {
645 // Check for YAML frontmatter (starts with ---)
646 if (input.starts_with("---\n") || input.starts_with("---\r\n"))
647 && let Some(end_idx) = find_frontmatter_end(input)
648 {
649 let skip_size = if input.starts_with("---\r\n") {
650 5
651 } else {
652 4
653 };
654 let frontmatter = &input[skip_size..end_idx];
655 let content = input[end_idx..].trim_start_matches("---").trim_start();
656
657 // Try to parse YAML frontmatter with standard parser first
658 match serde_yaml::from_str::<MarkdownMetadata>(frontmatter) {
659 Ok(metadata) => {
660 // Standard parsing succeeded
661 return Ok(Self {
662 metadata: Some(metadata),
663 content: content.to_string(),
664 raw: input.to_string(),
665 });
666 }
667 Err(err) => {
668 // Parsing failed - emit warning and treat entire document as content
669 if let Some(ctx) = context {
670 eprintln!(
671 "⚠️ Warning: Unable to parse YAML frontmatter in '{ctx}'. \
672 The document will be processed without metadata. Error: {err}"
673 );
674 } else {
675 eprintln!(
676 "⚠️ Warning: Unable to parse YAML frontmatter. \
677 The document will be processed without metadata. Error: {err}"
678 );
679 }
680
681 // Treat the entire document as content (including the invalid frontmatter)
682 return Ok(Self {
683 metadata: None,
684 content: input.to_string(),
685 raw: input.to_string(),
686 });
687 }
688 }
689 }
690
691 // Check for TOML frontmatter (starts with +++)
692 if (input.starts_with("+++\n") || input.starts_with("+++\r\n"))
693 && let Some(end_idx) = find_toml_frontmatter_end(input)
694 {
695 let skip_size = if input.starts_with("+++\r\n") {
696 5
697 } else {
698 4
699 };
700 let frontmatter = &input[skip_size..end_idx];
701 let content = input[end_idx..].trim_start_matches("+++").trim_start();
702
703 // Try to parse TOML frontmatter
704 match toml::from_str::<MarkdownMetadata>(frontmatter) {
705 Ok(metadata) => {
706 return Ok(Self {
707 metadata: Some(metadata),
708 content: content.to_string(),
709 raw: input.to_string(),
710 });
711 }
712 Err(err) => {
713 // TOML parsing failed - emit warning and treat entire document as content
714 if let Some(ctx) = context {
715 eprintln!(
716 "⚠️ Warning: Unable to parse TOML frontmatter in '{ctx}'. \
717 The document will be processed without metadata. Error: {err}"
718 );
719 } else {
720 eprintln!(
721 "⚠️ Warning: Unable to parse TOML frontmatter. \
722 The document will be processed without metadata. Error: {err}"
723 );
724 }
725
726 // Treat the entire document as content (including the invalid frontmatter)
727 return Ok(Self {
728 metadata: None,
729 content: input.to_string(),
730 raw: input.to_string(),
731 });
732 }
733 }
734 }
735
736 // No frontmatter, entire document is content
737 Ok(Self {
738 metadata: None,
739 content: input.to_string(),
740 raw: input.to_string(),
741 })
742 }
743
744 /// Parse a Markdown string that may contain frontmatter.
745 ///
746 /// This is the core parsing method that handles both YAML and TOML
747 /// frontmatter formats. It attempts to detect and parse frontmatter,
748 /// falling back to treating the entire input as content if no valid
749 /// frontmatter is found.
750 ///
751 /// # Supported Formats
752 ///
753 /// ## YAML Frontmatter (recommended)
754 /// ```text
755 /// ---
756 /// title: "Example"
757 /// version: "1.0.0"
758 /// ---
759 /// Content here...
760 /// ```
761 ///
762 /// ## TOML Frontmatter
763 /// ```text
764 /// +++
765 /// title = "Example"
766 /// version = "1.0.0"
767 /// +++
768 /// Content here...
769 /// ```
770 ///
771 /// # Arguments
772 ///
773 /// * `input` - The complete Markdown document as a string
774 ///
775 /// # Returns
776 ///
777 /// Returns a parsed `MarkdownDocument` with metadata extracted if present.
778 ///
779 /// # Errors
780 ///
781 /// Returns an error if the frontmatter is present but malformed:
782 /// - Invalid YAML syntax in `---` delimited frontmatter
783 /// - Invalid TOML syntax in `+++` delimited frontmatter
784 /// - Frontmatter that doesn't match the expected metadata schema
785 ///
786 /// # Examples
787 ///
788 /// ```rust,no_run
789 /// # use agpm_cli::markdown::MarkdownDocument;
790 /// // Parse document with YAML frontmatter
791 /// let input = "---\ntitle: Test\n---\n# Content";
792 /// let doc = MarkdownDocument::parse(input).unwrap();
793 /// assert!(doc.metadata.is_some());
794 ///
795 /// // Parse plain Markdown
796 /// let input = "# Just Content";
797 /// let doc = MarkdownDocument::parse(input).unwrap();
798 /// assert!(doc.metadata.is_none());
799 /// ```
800 pub fn parse(input: &str) -> Result<Self> {
801 Self::parse_with_context(input, None)
802 }
803
804 /// Format a document with YAML frontmatter
805 fn format_with_frontmatter(metadata: &MarkdownMetadata, content: &str) -> String {
806 let yaml = serde_yaml::to_string(metadata).unwrap_or_default();
807 format!("---\n{yaml}---\n\n{content}")
808 }
809
810 /// Update the document's metadata and regenerate the raw content.
811 ///
812 /// This method replaces the current metadata (if any) with new metadata
813 /// and automatically regenerates the `raw` field to include properly
814 /// formatted YAML frontmatter.
815 ///
816 /// # Arguments
817 ///
818 /// * `metadata` - The new metadata to set for this document
819 ///
820 /// # Effects
821 ///
822 /// - Sets `self.metadata` to `Some(metadata)`
823 /// - Regenerates `self.raw` with YAML frontmatter + content
824 /// - Preserves the existing `content` field unchanged
825 ///
826 /// # Examples
827 ///
828 /// ```rust,no_run
829 /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
830 /// let mut doc = MarkdownDocument::new("# Test\n\nContent".to_string());
831 /// assert!(doc.metadata.is_none());
832 ///
833 /// let metadata = MarkdownMetadata {
834 /// title: Some("New Title".to_string()),
835 /// version: Some("2.0.0".to_string()),
836 /// ..Default::default()
837 /// };
838 ///
839 /// doc.set_metadata(metadata);
840 /// assert!(doc.metadata.is_some());
841 /// assert!(doc.raw.contains("title: New Title"));
842 /// assert!(doc.raw.contains("# Test"));
843 /// ```
844 pub fn set_metadata(&mut self, metadata: MarkdownMetadata) {
845 self.raw = Self::format_with_frontmatter(&metadata, &self.content);
846 self.metadata = Some(metadata);
847 }
848
849 /// Update the document's content and regenerate the raw document.
850 ///
851 /// This method replaces the current content with new content and
852 /// automatically regenerates the `raw` field. If metadata is present,
853 /// the raw content will include formatted frontmatter; otherwise it
854 /// will be just the new content.
855 ///
856 /// # Arguments
857 ///
858 /// * `content` - The new Markdown content (without frontmatter)
859 ///
860 /// # Effects
861 ///
862 /// - Sets `self.content` to the new content
863 /// - Regenerates `self.raw` appropriately:
864 /// - If metadata exists: frontmatter + new content
865 /// - If no metadata: just the new content
866 /// - Preserves existing metadata unchanged
867 ///
868 /// # Examples
869 ///
870 /// ```rust,no_run
871 /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
872 /// // Document with metadata
873 /// let metadata = MarkdownMetadata {
874 /// title: Some("Test".to_string()),
875 /// ..Default::default()
876 /// };
877 /// let mut doc = MarkdownDocument::with_metadata(
878 /// metadata,
879 /// "Original content".to_string()
880 /// );
881 ///
882 /// doc.set_content("# New Content\n\nUpdated!".to_string());
883 ///
884 /// assert_eq!(doc.content, "# New Content\n\nUpdated!");
885 /// assert!(doc.raw.contains("title: Test"));
886 /// assert!(doc.raw.contains("# New Content"));
887 /// ```
888 pub fn set_content(&mut self, content: String) {
889 if let Some(ref metadata) = self.metadata {
890 self.raw = Self::format_with_frontmatter(metadata, &content);
891 } else {
892 self.raw = content.clone();
893 }
894 self.content = content;
895 }
896
897 /// Extract the document title from metadata or content.
898 ///
899 /// This method provides a fallback mechanism for getting the document title:
900 /// 1. First, check if metadata contains an explicit title
901 /// 2. If not, scan the content for the first level-1 heading (`# Title`)
902 /// 3. Return `None` if neither source provides a title
903 ///
904 /// # Returns
905 ///
906 /// - `Some(String)` containing the title if found
907 /// - `None` if no title is available from either source
908 ///
909 /// # Title Extraction Rules
910 ///
911 /// When extracting from content:
912 /// - Only level-1 headings (starting with `# `) are considered
913 /// - The first matching heading is used
914 /// - Leading/trailing whitespace is trimmed from the result
915 /// - Empty headings (just `#`) are ignored
916 ///
917 /// # Examples
918 ///
919 /// ```rust,no_run
920 /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
921 /// // From metadata
922 /// let metadata = MarkdownMetadata {
923 /// title: Some("Metadata Title".to_string()),
924 /// ..Default::default()
925 /// };
926 /// let doc = MarkdownDocument::with_metadata(
927 /// metadata,
928 /// "# Content Title\n\nSome text".to_string()
929 /// );
930 /// assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
931 ///
932 /// // From content heading
933 /// let doc = MarkdownDocument::new("# Extracted Title\n\nContent".to_string());
934 /// assert_eq!(doc.get_title(), Some("Extracted Title".to_string()));
935 ///
936 /// // No title available
937 /// let doc = MarkdownDocument::new("Just some content without headings".to_string());
938 /// assert_eq!(doc.get_title(), None);
939 /// ```
940 #[must_use]
941 pub fn get_title(&self) -> Option<String> {
942 // First check metadata
943 if let Some(ref metadata) = self.metadata
944 && let Some(ref title) = metadata.title
945 {
946 return Some(title.clone());
947 }
948
949 // Try to extract from first # heading
950 for line in self.content.lines() {
951 if let Some(heading) = line.strip_prefix("# ") {
952 return Some(heading.trim().to_string());
953 }
954 }
955
956 None
957 }
958
959 /// Extract the document description from metadata or content.
960 ///
961 /// This method provides a fallback mechanism for getting the document description:
962 /// 1. First, check if metadata contains an explicit description
963 /// 2. If not, extract the first paragraph from the content (after any headings)
964 /// 3. Return `None` if neither source provides a description
965 ///
966 /// # Returns
967 ///
968 /// - `Some(String)` containing the description if found
969 /// - `None` if no description is available from either source
970 ///
971 /// # Description Extraction Rules
972 ///
973 /// When extracting from content:
974 /// - All headings (lines starting with `#`) are skipped
975 /// - Empty lines before the first paragraph are ignored
976 /// - The first continuous block of non-empty lines becomes the description
977 /// - Multiple lines are joined with spaces
978 /// - Extraction stops at the first empty line after content starts
979 ///
980 /// # Examples
981 ///
982 /// ```rust,no_run
983 /// # use agpm_cli::markdown::{MarkdownDocument, MarkdownMetadata};
984 /// // From metadata
985 /// let metadata = MarkdownMetadata {
986 /// description: Some("Metadata description".to_string()),
987 /// ..Default::default()
988 /// };
989 /// let doc = MarkdownDocument::with_metadata(
990 /// metadata,
991 /// "# Title\n\nContent description".to_string()
992 /// );
993 /// assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
994 ///
995 /// // From content paragraph
996 /// let doc = MarkdownDocument::new(
997 /// "# Title\n\nThis is the first\nparagraph of content.\n\nSecond paragraph.".to_string()
998 /// );
999 /// assert_eq!(doc.get_description(), Some("This is the first paragraph of content.".to_string()));
1000 ///
1001 /// // No description available
1002 /// let doc = MarkdownDocument::new("# Just a title".to_string());
1003 /// assert_eq!(doc.get_description(), None);
1004 /// ```
1005 #[must_use]
1006 pub fn get_description(&self) -> Option<String> {
1007 // First check metadata
1008 if let Some(ref metadata) = self.metadata
1009 && let Some(ref desc) = metadata.description
1010 {
1011 return Some(desc.clone());
1012 }
1013
1014 // Try to extract first non-heading paragraph
1015 let mut in_paragraph = false;
1016 let mut paragraph = String::new();
1017
1018 for line in self.content.lines() {
1019 let trimmed = line.trim();
1020
1021 // Skip headings and empty lines at start
1022 if trimmed.starts_with('#') || (trimmed.is_empty() && !in_paragraph) {
1023 continue;
1024 }
1025
1026 // Start collecting paragraph
1027 if !trimmed.is_empty() {
1028 in_paragraph = true;
1029 if !paragraph.is_empty() {
1030 paragraph.push(' ');
1031 }
1032 paragraph.push_str(trimmed);
1033 } else if in_paragraph {
1034 // End of first paragraph
1035 break;
1036 }
1037 }
1038
1039 if paragraph.is_empty() {
1040 None
1041 } else {
1042 Some(paragraph)
1043 }
1044 }
1045}
1046
1047/// Find the end position of YAML frontmatter in a document.
1048///
1049/// This helper function scans through a document that starts with YAML
1050/// frontmatter (delimited by `---`) to find where the closing delimiter
1051/// occurs. It returns the byte position of the closing delimiter.
1052///
1053/// # Arguments
1054///
1055/// * `input` - The document content starting with `---`
1056///
1057/// # Returns
1058///
1059/// - `Some(usize)` - Byte position of the closing `---` delimiter
1060/// - `None` - If no closing delimiter is found
1061///
1062/// # Implementation Notes
1063///
1064/// - Assumes the input starts with the opening `---` delimiter
1065/// - Counts bytes, not characters, for proper string slicing
1066/// - Accounts for newline characters in position calculation
1067fn find_frontmatter_end(input: &str) -> Option<usize> {
1068 // Handle both Unix (LF) and Windows (CRLF) line endings
1069 let has_crlf = input.contains("\r\n");
1070 let initial_skip = if has_crlf {
1071 5
1072 } else {
1073 4
1074 }; // "---\r\n" or "---\n"
1075
1076 let mut lines = input.lines();
1077 lines.next()?; // Skip first ---
1078
1079 let mut pos = initial_skip;
1080 for line in lines {
1081 if line == "---" {
1082 return Some(pos);
1083 }
1084 // Account for actual line ending bytes (CRLF = 2, LF = 1)
1085 let line_ending_size = if has_crlf {
1086 2
1087 } else {
1088 1
1089 };
1090 pos += line.len() + line_ending_size;
1091 }
1092
1093 None
1094}
1095
1096/// Find the end position of TOML frontmatter in a document.
1097///
1098/// This helper function scans through a document that starts with TOML
1099/// frontmatter (delimited by `+++`) to find where the closing delimiter
1100/// occurs. It returns the byte position of the closing delimiter.
1101///
1102/// # Arguments
1103///
1104/// * `input` - The document content starting with `+++`
1105///
1106/// # Returns
1107///
1108/// - `Some(usize)` - Byte position of the closing `+++` delimiter
1109/// - `None` - If no closing delimiter is found
1110///
1111/// # Implementation Notes
1112///
1113/// - Assumes the input starts with the opening `+++` delimiter
1114/// - Counts bytes, not characters, for proper string slicing
1115/// - Accounts for newline characters in position calculation
1116fn find_toml_frontmatter_end(input: &str) -> Option<usize> {
1117 // Handle both Unix (LF) and Windows (CRLF) line endings
1118 let has_crlf = input.contains("\r\n");
1119 let initial_skip = if has_crlf {
1120 5
1121 } else {
1122 4
1123 }; // "+++\r\n" or "+++\n"
1124
1125 let mut lines = input.lines();
1126 lines.next()?; // Skip first +++
1127
1128 let mut pos = initial_skip;
1129 for line in lines {
1130 if line == "+++" {
1131 return Some(pos);
1132 }
1133 // Account for actual line ending bytes (CRLF = 2, LF = 1)
1134 let line_ending_size = if has_crlf {
1135 2
1136 } else {
1137 1
1138 };
1139 pos += line.len() + line_ending_size;
1140 }
1141
1142 None
1143}
1144
1145/// Check if a path represents a Markdown file based on its extension.
1146///
1147/// This function validates file paths to determine if they should be treated
1148/// as Markdown files. It performs case-insensitive extension checking to
1149/// support different naming conventions across platforms.
1150///
1151/// # Supported Extensions
1152///
1153/// - `.md` (most common)
1154/// - `.markdown` (verbose form)
1155/// - Case variations: `.MD`, `.Markdown`, etc.
1156///
1157/// # Arguments
1158///
1159/// * `path` - The file path to check
1160///
1161/// # Returns
1162///
1163/// - `true` if the file has a recognized Markdown extension
1164/// - `false` otherwise (including files with no extension)
1165///
1166/// # Examples
1167///
1168/// ```rust,no_run
1169/// # use agpm_cli::markdown::is_markdown_file;
1170/// # use std::path::Path;
1171/// assert!(is_markdown_file(Path::new("agent.md")));
1172/// assert!(is_markdown_file(Path::new("README.MD")));
1173/// assert!(is_markdown_file(Path::new("guide.markdown")));
1174/// assert!(!is_markdown_file(Path::new("config.toml")));
1175/// assert!(!is_markdown_file(Path::new("script.sh")));
1176/// assert!(!is_markdown_file(Path::new("no-extension")));
1177/// ```
1178#[must_use]
1179pub fn is_markdown_file(path: &Path) -> bool {
1180 path.extension()
1181 .and_then(|ext| ext.to_str())
1182 .is_some_and(|ext| ext.eq_ignore_ascii_case("md") || ext.eq_ignore_ascii_case("markdown"))
1183}
1184
1185/// Recursively find all Markdown files in a directory.
1186///
1187/// This function performs a recursive traversal of the given directory,
1188/// collecting all files that have Markdown extensions. It follows symbolic
1189/// links and handles filesystem errors gracefully.
1190///
1191/// # Directory Traversal
1192///
1193/// - Recursively traverses all subdirectories
1194/// - Follows symbolic links (may cause infinite loops with circular links)
1195/// - Silently skips entries that cannot be accessed
1196/// - Only includes regular files (not directories or special files)
1197///
1198/// # Arguments
1199///
1200/// * `dir` - The directory path to search
1201///
1202/// # Returns
1203///
1204/// - `Ok(Vec<PathBuf>)` - List of absolute paths to Markdown files
1205/// - `Err(...)` - Only on severe filesystem errors (rare)
1206///
1207/// # Behavior
1208///
1209/// - Returns empty vector if directory doesn't exist (not an error)
1210/// - Files are returned in filesystem order (not sorted)
1211/// - Paths are absolute and canonicalized
1212/// - Uses [`is_markdown_file`] for extension validation
1213///
1214/// # Examples
1215///
1216/// ```rust,no_run
1217/// # use agpm_cli::markdown::list_markdown_files;
1218/// # use std::path::Path;
1219/// # fn example() -> anyhow::Result<()> {
1220/// let files = list_markdown_files(Path::new("resources/"))?;
1221///
1222/// for file in files {
1223/// println!("Found: {}", file.display());
1224/// }
1225/// # Ok(())
1226/// # }
1227/// ```
1228///
1229/// # Performance
1230///
1231/// This function loads directory metadata but not file contents, making it
1232/// suitable for scanning large directory trees. For processing the files,
1233/// consider using [`MarkdownDocument::read`] on each result.
1234///
1235/// [`is_markdown_file`]: is_markdown_file
1236/// [`MarkdownDocument::read`]: MarkdownDocument::read
1237pub fn list_markdown_files(dir: &Path) -> Result<Vec<std::path::PathBuf>> {
1238 let mut files = Vec::new();
1239
1240 if !dir.exists() {
1241 return Ok(files);
1242 }
1243
1244 for entry in walkdir::WalkDir::new(dir)
1245 .follow_links(true)
1246 .into_iter()
1247 .filter_map(std::result::Result::ok)
1248 {
1249 let path = entry.path();
1250 if path.is_file() && is_markdown_file(path) {
1251 files.push(path.to_path_buf());
1252 }
1253 }
1254
1255 Ok(files)
1256}
1257
1258#[cfg(test)]
1259mod tests {
1260 use super::*;
1261 use tempfile::tempdir;
1262
1263 #[test]
1264 fn test_markdown_document_new() {
1265 let doc = MarkdownDocument::new("# Hello World".to_string());
1266 assert!(doc.metadata.is_none());
1267 assert_eq!(doc.content, "# Hello World");
1268 assert_eq!(doc.raw, "# Hello World");
1269 }
1270
1271 #[test]
1272 fn test_markdown_with_yaml_frontmatter() {
1273 let input = r"---
1274title: Test Document
1275description: A test document
1276tags:
1277 - test
1278 - example
1279---
1280
1281# Hello World
1282
1283This is the content.";
1284
1285 let doc = MarkdownDocument::parse(input).unwrap();
1286 assert!(doc.metadata.is_some());
1287
1288 let metadata = doc.metadata.unwrap();
1289 assert_eq!(metadata.title, Some("Test Document".to_string()));
1290 assert_eq!(metadata.description, Some("A test document".to_string()));
1291 assert_eq!(metadata.tags, vec!["test", "example"]);
1292
1293 assert!(doc.content.starts_with("# Hello World"));
1294 }
1295
1296 #[test]
1297 fn test_markdown_with_toml_frontmatter() {
1298 let input = r#"+++
1299title = "Test Document"
1300description = "A test document"
1301tags = ["test", "example"]
1302+++
1303
1304# Hello World
1305
1306This is the content."#;
1307
1308 let doc = MarkdownDocument::parse(input).unwrap();
1309 assert!(doc.metadata.is_some());
1310
1311 let metadata = doc.metadata.unwrap();
1312 assert_eq!(metadata.title, Some("Test Document".to_string()));
1313 assert_eq!(metadata.description, Some("A test document".to_string()));
1314 assert_eq!(metadata.tags, vec!["test", "example"]);
1315 }
1316
1317 #[test]
1318 fn test_markdown_without_frontmatter() {
1319 let input = "# Hello World\n\nThis is the content.";
1320
1321 let doc = MarkdownDocument::parse(input).unwrap();
1322 assert!(doc.metadata.is_none());
1323 assert_eq!(doc.content, input);
1324 }
1325
1326 #[test]
1327 fn test_get_title() {
1328 // From metadata
1329 let metadata = MarkdownMetadata {
1330 title: Some("Metadata Title".to_string()),
1331 ..Default::default()
1332 };
1333 let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1334 assert_eq!(doc.get_title(), Some("Metadata Title".to_string()));
1335
1336 // From heading
1337 let doc = MarkdownDocument::new("# Heading Title\n\nContent".to_string());
1338 assert_eq!(doc.get_title(), Some("Heading Title".to_string()));
1339
1340 // No title
1341 let doc = MarkdownDocument::new("Just content".to_string());
1342 assert_eq!(doc.get_title(), None);
1343 }
1344
1345 #[test]
1346 fn test_get_description() {
1347 // From metadata
1348 let metadata = MarkdownMetadata {
1349 description: Some("Metadata description".to_string()),
1350 ..Default::default()
1351 };
1352 let doc = MarkdownDocument::with_metadata(metadata, "Content".to_string());
1353 assert_eq!(doc.get_description(), Some("Metadata description".to_string()));
1354
1355 // From first paragraph
1356 let doc = MarkdownDocument::new(
1357 "# Title\n\nThis is the first paragraph.\n\nSecond paragraph.".to_string(),
1358 );
1359 assert_eq!(doc.get_description(), Some("This is the first paragraph.".to_string()));
1360 }
1361
1362 #[test]
1363 fn test_read_write_markdown() {
1364 let temp = tempdir().unwrap();
1365 let file_path = temp.path().join("test.md");
1366
1367 // Create and write document
1368 let metadata = MarkdownMetadata {
1369 title: Some("Test".to_string()),
1370 ..Default::default()
1371 };
1372 let doc = MarkdownDocument::with_metadata(metadata, "# Test\n\nContent".to_string());
1373 doc.write(&file_path).unwrap();
1374
1375 // Read back
1376 let loaded = MarkdownDocument::read(&file_path).unwrap();
1377 assert!(loaded.metadata.is_some());
1378 assert_eq!(loaded.metadata.unwrap().title, Some("Test".to_string()));
1379 assert!(loaded.content.contains("# Test"));
1380 }
1381
1382 #[test]
1383 fn test_is_markdown_file() {
1384 assert!(is_markdown_file(Path::new("test.md")));
1385 assert!(is_markdown_file(Path::new("test.MD")));
1386 assert!(is_markdown_file(Path::new("test.markdown")));
1387 assert!(is_markdown_file(Path::new("test.MARKDOWN")));
1388 assert!(!is_markdown_file(Path::new("test.txt")));
1389 assert!(!is_markdown_file(Path::new("test")));
1390 }
1391
1392 #[test]
1393 fn test_list_markdown_files() {
1394 let temp = tempdir().unwrap();
1395
1396 // Create some files
1397 std::fs::write(temp.path().join("file1.md"), "content").unwrap();
1398 std::fs::write(temp.path().join("file2.markdown"), "content").unwrap();
1399 std::fs::write(temp.path().join("file3.txt"), "content").unwrap();
1400
1401 let subdir = temp.path().join("subdir");
1402 std::fs::create_dir(&subdir).unwrap();
1403 std::fs::write(subdir.join("file4.md"), "content").unwrap();
1404
1405 let files = list_markdown_files(temp.path()).unwrap();
1406 assert_eq!(files.len(), 3);
1407
1408 let names: Vec<String> =
1409 files.iter().map(|p| p.file_name().unwrap().to_string_lossy().to_string()).collect();
1410
1411 assert!(names.contains(&"file1.md".to_string()));
1412 assert!(names.contains(&"file2.markdown".to_string()));
1413 assert!(names.contains(&"file4.md".to_string()));
1414 assert!(!names.contains(&"file3.txt".to_string()));
1415 }
1416
1417 #[test]
1418 fn test_set_metadata_and_content() {
1419 let mut doc = MarkdownDocument::new("Initial content".to_string());
1420
1421 // Set metadata
1422 let metadata = MarkdownMetadata {
1423 title: Some("New Title".to_string()),
1424 ..Default::default()
1425 };
1426 doc.set_metadata(metadata);
1427
1428 assert!(doc.metadata.is_some());
1429 assert!(doc.raw.contains("title: New Title"));
1430 assert!(doc.raw.contains("Initial content"));
1431
1432 // Set content
1433 doc.set_content("Updated content".to_string());
1434 assert_eq!(doc.content, "Updated content");
1435 assert!(doc.raw.contains("Updated content"));
1436 assert!(doc.raw.contains("title: New Title"));
1437 }
1438
1439 #[test]
1440 fn test_invalid_frontmatter_with_escaped_newlines() {
1441 // Content with invalid YAML frontmatter (literal \n that isn't properly quoted)
1442 let input = r#"---
1443name: haiku-syntax-tool
1444description: Use this agent when you need to fix linting errors, formatting issues, type checking problems, or ensure code adheres to project-specific standards. This agent specializes in enforcing language-specific conventions, project style guides, and maintaining code quality through automated fixes. Examples:\n\n<example>\nContext: The user has just written a new Python function and wants to ensure it meets project standards.\nuser: "I've added a new sync handler function"\nassistant: "Let me review this with the code-standards-enforcer agent to ensure it meets our project standards"\n<commentary>\nSince new code was written, use the Task tool to launch the code-standards-enforcer agent to check for linting, formatting, and type issues according to CLAUDE.md standards.\n</commentary>\n</example>\n\n<example>\nContext: The user encounters linting errors during CI/CD.\nuser: "The CI pipeline is failing due to formatting issues"\nassistant: "I'll use the code-standards-enforcer agent to fix these formatting and linting issues"\n<commentary>\nWhen there are explicit linting or formatting problems, use the code-standards-enforcer agent to automatically fix them according to project standards.\n</commentary>\n</example>\n\n<example>\nContext: The user wants to ensure type hints are correct.\nuser: "Can you check if my type annotations are correct in the API module?"\nassistant: "I'll launch the code-standards-enforcer agent to verify and fix any type annotation issues"\n<commentary>\nFor type checking and annotation verification, use the code-standards-enforcer agent to ensure compliance with project typing standards.\n</commentary>\n</example>
1445model: haiku
1446---
1447
1448You are a meticulous code standards enforcement specialist"#;
1449
1450 // This should succeed but treat the entire document as content (no metadata)
1451 let result = MarkdownDocument::parse(input);
1452 match result {
1453 Ok(doc) => {
1454 // Invalid frontmatter means no metadata
1455 assert!(doc.metadata.is_none());
1456 // The entire document should be treated as content
1457 assert!(doc.content.contains("---"));
1458 assert!(doc.content.contains("name: haiku-syntax-tool"));
1459 assert!(doc.content.contains("description: Use this agent"));
1460 assert!(doc.content.contains("model: haiku"));
1461 assert!(doc.content.contains("meticulous code standards enforcement specialist"));
1462 }
1463 Err(e) => {
1464 panic!("Should not fail, but got error: {}", e);
1465 }
1466 }
1467 }
1468
1469 #[test]
1470 fn test_completely_invalid_frontmatter_fallback() {
1471 // Test with completely broken YAML
1472 let input = r#"---
1473name: test
1474description: {this is not valid yaml at all
1475model: test
1476---
1477
1478Content here"#;
1479
1480 // This should now succeed but without metadata
1481 let result = MarkdownDocument::parse(input);
1482 match result {
1483 Ok(doc) => {
1484 // Should treat entire document as content when frontmatter is invalid
1485 assert!(doc.metadata.is_none());
1486 assert!(doc.content.contains("---"));
1487 assert!(doc.content.contains("name: test"));
1488 assert!(doc.content.contains("Content here"));
1489 }
1490 Err(e) => {
1491 panic!("Should not fail, but got error: {}", e);
1492 }
1493 }
1494 }
1495}