Skip to main content

utils/
markdown_file.rs

1use futures::future::join_all;
2use serde::de::DeserializeOwned;
3use std::{
4    fmt, fs, io,
5    path::{Path, PathBuf},
6};
7
8/// Represents a parsed markdown file with optional frontmatter
9#[derive(Debug, Clone)]
10pub struct MarkdownFile<T: DeserializeOwned> {
11    /// Parsed frontmatter (if present)
12    pub frontmatter: Option<T>,
13    /// The content after frontmatter
14    pub content: String,
15}
16
17impl<T: DeserializeOwned + Send + 'static> MarkdownFile<T> {
18    pub fn parse(path: impl AsRef<Path>) -> Result<Self, ParseError> {
19        let raw_content = fs::read_to_string(path)?;
20
21        match split_frontmatter(&raw_content) {
22            Some((yaml_str, body)) => {
23                let frontmatter = serde_yml::from_str(yaml_str).ok();
24                Ok(Self { frontmatter, content: body.to_string() })
25            }
26            None => Ok(Self { frontmatter: None, content: raw_content.trim().to_string() }),
27        }
28    }
29
30    /// List all markdown files in a directory
31    pub fn list(dir: impl AsRef<Path>) -> Result<Vec<PathBuf>, io::Error> {
32        let paths: Vec<_> = fs::read_dir(dir)?
33            .filter_map(|entry| {
34                let path = entry.ok()?.path();
35                (path.extension().and_then(|s| s.to_str()) == Some("md")).then_some(path)
36            })
37            .collect();
38
39        Ok(paths)
40    }
41
42    /// Load a single markdown file from a path
43    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, ParseError> {
44        let path = path.as_ref();
45
46        if !path.exists() {
47            return Err(ParseError::Io(io::Error::new(
48                io::ErrorKind::NotFound,
49                format!("File not found: {}", path.display()),
50            )));
51        }
52
53        Self::parse(path)
54    }
55
56    /// Load all markdown files from a directory
57    pub async fn from_dir(dir: &PathBuf) -> Result<Vec<(PathBuf, Self)>, io::Error> {
58        if !dir.exists() {
59            return Err(io::Error::new(io::ErrorKind::NotFound, format!("Directory not found: {}", dir.display())));
60        }
61
62        if !dir.is_dir() {
63            return Err(io::Error::new(io::ErrorKind::NotADirectory, format!("Not a directory: {}", dir.display())));
64        }
65
66        let parse_tasks: Vec<_> = Self::list(dir)?
67            .into_iter()
68            .map(|path| {
69                tokio::spawn(async move {
70                    let path_clone = path.clone();
71                    Self::parse(path).map(|f| (path_clone, f))
72                })
73            })
74            .collect();
75
76        let results = join_all(parse_tasks).await;
77        let items = results
78            .into_iter()
79            .filter_map(|result| match result {
80                Ok(Ok(item)) => Some(item),
81                Ok(Err(e)) => {
82                    tracing::warn!("Failed to parse file: {}", e);
83                    None
84                }
85                Err(_) => None,
86            })
87            .collect();
88
89        Ok(items)
90    }
91
92    /// Load all markdown files from nested subdirectories, where each subdirectory
93    /// contains a file with the specified filename.
94    ///
95    /// Flat files in the parent directory are ignored. Only subdirectories containing
96    /// the specified filename are processed.
97    ///
98    /// # Example
99    /// ```ignore
100    /// // Load from:
101    /// //   skills/skill-1/SKILL.md
102    /// //   skills/skill-2/SKILL.md
103    /// //   skills/flat-file.md      -> ignored (not in a subdirectory)
104    /// let skills = MarkdownFile::from_nested_dirs(Path::new("skills"), "SKILL.md").await?;
105    /// ```
106    pub async fn from_nested_dirs(
107        parent_dir: impl AsRef<Path>,
108        filename: &str,
109    ) -> Result<Vec<(PathBuf, Self)>, io::Error> {
110        let parent_dir = parent_dir.as_ref();
111
112        if !parent_dir.exists() {
113            return Err(io::Error::new(
114                io::ErrorKind::NotFound,
115                format!("Directory not found: {}", parent_dir.display()),
116            ));
117        }
118
119        if !parent_dir.is_dir() {
120            return Err(io::Error::new(
121                io::ErrorKind::NotADirectory,
122                format!("Not a directory: {}", parent_dir.display()),
123            ));
124        }
125
126        let subdirs = list_subdirs(parent_dir)?;
127        let filename = filename.to_string();
128        let parse_tasks: Vec<_> = subdirs
129            .into_iter()
130            .map(|dir| {
131                let filename = filename.clone();
132                tokio::spawn(async move {
133                    let file_path = dir.join(&filename);
134                    Self::parse(&file_path).map(|f| (dir, f))
135                })
136            })
137            .collect();
138
139        let results = join_all(parse_tasks).await;
140        let items = results
141            .into_iter()
142            .filter_map(|result| match result {
143                Ok(Ok(item)) => Some(item),
144                Ok(Err(e)) => {
145                    tracing::debug!("Skipping directory: {}", e);
146                    None
147                }
148                Err(_) => None,
149            })
150            .collect();
151
152        Ok(items)
153    }
154}
155
156/// Split YAML frontmatter from markdown content.
157///
158/// Returns `(yaml_str, body)` if frontmatter delimiters (`---`) are found,
159/// or `None` if the content has no frontmatter.
160pub fn split_frontmatter(content: &str) -> Option<(&str, &str)> {
161    let content = content.trim();
162    let rest = content.strip_prefix("---")?;
163    let end_pos = rest.find("\n---")?;
164    Some((&rest[..end_pos], rest[end_pos + 4..].trim()))
165}
166
167/// List all subdirectories in a directory
168fn list_subdirs(dir: impl AsRef<Path>) -> Result<Vec<PathBuf>, io::Error> {
169    let paths: Vec<_> = fs::read_dir(dir)?
170        .filter_map(|entry| {
171            let path = entry.ok()?.path();
172            path.is_dir().then_some(path)
173        })
174        .collect();
175
176    Ok(paths)
177}
178
179#[derive(Debug)]
180pub enum ParseError {
181    InvalidFilename,
182    Io(io::Error),
183}
184
185impl fmt::Display for ParseError {
186    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187        match self {
188            ParseError::InvalidFilename => write!(f, "Invalid filename"),
189            ParseError::Io(e) => write!(f, "IO error: {e}"),
190        }
191    }
192}
193
194impl std::error::Error for ParseError {
195    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
196        match self {
197            ParseError::Io(e) => Some(e),
198            ParseError::InvalidFilename => None,
199        }
200    }
201}
202
203impl From<io::Error> for ParseError {
204    fn from(e: io::Error) -> Self {
205        ParseError::Io(e)
206    }
207}