Skip to main content

utils/
markdown_file.rs

1use futures::future::join_all;
2use serde::de::DeserializeOwned;
3use std::{
4    fmt, fs, io,
5    path::{Path, PathBuf},
6};
7
8/// Represents a parsed markdown file with optional frontmatter
9#[derive(Debug, Clone)]
10pub struct MarkdownFile<T: DeserializeOwned> {
11    /// Parsed frontmatter (if present)
12    pub frontmatter: Option<T>,
13    /// The content after frontmatter
14    pub content: String,
15}
16
17impl<T: DeserializeOwned + Send + 'static> MarkdownFile<T> {
18    pub fn parse(path: impl AsRef<Path>) -> Result<Self, ParseError> {
19        let raw_content = fs::read_to_string(path)?;
20
21        match split_frontmatter(&raw_content) {
22            Some((yaml_str, body)) => {
23                let frontmatter = serde_yml::from_str(yaml_str).ok();
24                Ok(Self {
25                    frontmatter,
26                    content: body.to_string(),
27                })
28            }
29            None => Ok(Self {
30                frontmatter: None,
31                content: raw_content.trim().to_string(),
32            }),
33        }
34    }
35
36    /// List all markdown files in a directory
37    pub fn list(dir: impl AsRef<Path>) -> Result<Vec<PathBuf>, io::Error> {
38        let paths: Vec<_> = fs::read_dir(dir)?
39            .filter_map(|entry| {
40                let path = entry.ok()?.path();
41                (path.extension().and_then(|s| s.to_str()) == Some("md")).then_some(path)
42            })
43            .collect();
44
45        Ok(paths)
46    }
47
48    /// Load a single markdown file from a path
49    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, ParseError> {
50        let path = path.as_ref();
51
52        if !path.exists() {
53            return Err(ParseError::Io(io::Error::new(
54                io::ErrorKind::NotFound,
55                format!("File not found: {}", path.display()),
56            )));
57        }
58
59        Self::parse(path)
60    }
61
62    /// Load all markdown files from a directory
63    pub async fn from_dir(dir: &PathBuf) -> Result<Vec<(PathBuf, Self)>, io::Error> {
64        if !dir.exists() {
65            return Err(io::Error::new(
66                io::ErrorKind::NotFound,
67                format!("Directory not found: {}", dir.display()),
68            ));
69        }
70
71        if !dir.is_dir() {
72            return Err(io::Error::new(
73                io::ErrorKind::NotADirectory,
74                format!("Not a directory: {}", dir.display()),
75            ));
76        }
77
78        let parse_tasks: Vec<_> = Self::list(dir)?
79            .into_iter()
80            .map(|path| {
81                tokio::spawn(async move {
82                    let path_clone = path.clone();
83                    Self::parse(path).map(|f| (path_clone, f))
84                })
85            })
86            .collect();
87
88        let results = join_all(parse_tasks).await;
89        let items = results
90            .into_iter()
91            .filter_map(|result| match result {
92                Ok(Ok(item)) => Some(item),
93                Ok(Err(e)) => {
94                    tracing::warn!("Failed to parse file: {}", e);
95                    None
96                }
97                Err(_) => None,
98            })
99            .collect();
100
101        Ok(items)
102    }
103
104    /// Load all markdown files from nested subdirectories, where each subdirectory
105    /// contains a file with the specified filename.
106    ///
107    /// Flat files in the parent directory are ignored. Only subdirectories containing
108    /// the specified filename are processed.
109    ///
110    /// # Example
111    /// ```ignore
112    /// // Load from:
113    /// //   skills/skill-1/SKILL.md
114    /// //   skills/skill-2/SKILL.md
115    /// //   skills/flat-file.md      -> ignored (not in a subdirectory)
116    /// let skills = MarkdownFile::from_nested_dirs(Path::new("skills"), "SKILL.md").await?;
117    /// ```
118    pub async fn from_nested_dirs(
119        parent_dir: impl AsRef<Path>,
120        filename: &str,
121    ) -> Result<Vec<(PathBuf, Self)>, io::Error> {
122        let parent_dir = parent_dir.as_ref();
123
124        if !parent_dir.exists() {
125            return Err(io::Error::new(
126                io::ErrorKind::NotFound,
127                format!("Directory not found: {}", parent_dir.display()),
128            ));
129        }
130
131        if !parent_dir.is_dir() {
132            return Err(io::Error::new(
133                io::ErrorKind::NotADirectory,
134                format!("Not a directory: {}", parent_dir.display()),
135            ));
136        }
137
138        let subdirs = list_subdirs(parent_dir)?;
139        let filename = filename.to_string();
140        let parse_tasks: Vec<_> = subdirs
141            .into_iter()
142            .map(|dir| {
143                let filename = filename.clone();
144                tokio::spawn(async move {
145                    let file_path = dir.join(&filename);
146                    Self::parse(&file_path).map(|f| (dir, f))
147                })
148            })
149            .collect();
150
151        let results = join_all(parse_tasks).await;
152        let items = results
153            .into_iter()
154            .filter_map(|result| match result {
155                Ok(Ok(item)) => Some(item),
156                Ok(Err(e)) => {
157                    tracing::debug!("Skipping directory: {}", e);
158                    None
159                }
160                Err(_) => None,
161            })
162            .collect();
163
164        Ok(items)
165    }
166}
167
168/// Split YAML frontmatter from markdown content.
169///
170/// Returns `(yaml_str, body)` if frontmatter delimiters (`---`) are found,
171/// or `None` if the content has no frontmatter.
172pub fn split_frontmatter(content: &str) -> Option<(&str, &str)> {
173    let content = content.trim();
174    let rest = content.strip_prefix("---")?;
175    let end_pos = rest.find("\n---")?;
176    Some((&rest[..end_pos], rest[end_pos + 4..].trim()))
177}
178
179/// List all subdirectories in a directory
180fn list_subdirs(dir: impl AsRef<Path>) -> Result<Vec<PathBuf>, io::Error> {
181    let paths: Vec<_> = fs::read_dir(dir)?
182        .filter_map(|entry| {
183            let path = entry.ok()?.path();
184            path.is_dir().then_some(path)
185        })
186        .collect();
187
188    Ok(paths)
189}
190
191#[derive(Debug)]
192pub enum ParseError {
193    InvalidFilename,
194    Io(io::Error),
195}
196
197impl fmt::Display for ParseError {
198    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199        match self {
200            ParseError::InvalidFilename => write!(f, "Invalid filename"),
201            ParseError::Io(e) => write!(f, "IO error: {e}"),
202        }
203    }
204}
205
206impl std::error::Error for ParseError {
207    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
208        match self {
209            ParseError::Io(e) => Some(e),
210            ParseError::InvalidFilename => None,
211        }
212    }
213}
214
215impl From<io::Error> for ParseError {
216    fn from(e: io::Error) -> Self {
217        ParseError::Io(e)
218    }
219}