futhorc/parser.rs
1//! Defines the [`Post`], [`Parser`], and [`Error`] types. Also defines the
2//! logic for parsing posts from the file system into memory. See the
3//! [`Post::to_value`] and [`Post::summarize`] for details on how posts are
4//! converted into template values.
5
6use std::{
7 collections::HashSet,
8 fmt,
9 fs::{read_dir, File},
10 path::Path,
11};
12
13use serde::Deserialize;
14use url::Url;
15
16use crate::{markdown, post::Post, tag::Tag};
17
18/// Parses [`Post`] objects from source files.
19pub struct Parser<'a> {
20 /// `index_url` is the base URL for index pages. It's used to prefix tag
21 /// page URLs (i.e., the URL for the first page of a tag is
22 /// `{index_url}/{tag_name}/index.html`).
23 index_url: &'a Url,
24
25 /// `posts_url` is the base URL for post pages. It's used to prefix post
26 /// page URLs (i.e., the URL for a post is
27 /// `{posts_url}/{post_id}.html`).
28 posts_url: &'a Url,
29
30 /// `posts_directory` is the directory in which post pages will be
31 /// rendered.
32 posts_directory: &'a Path,
33}
34
35impl<'a> Parser<'a> {
36 /// Constructs a new parser. See fields on [`Parser`] for argument
37 /// descriptions.
38 pub fn new(
39 index_url: &'a Url,
40 posts_url: &'a Url,
41 posts_directory: &'a Path,
42 ) -> Parser<'a> {
43 Parser {
44 index_url,
45 posts_url,
46 posts_directory,
47 }
48 }
49
50 /// Parses a single [`Post`] from an `id` and `input` strings. The `id` is
51 /// the path of the file relative to the `posts_source_directory` less the
52 /// extension (e.g., the ID for a post whose source file is
53 /// `{posts_source_directory}/foo/bar.md` is `foo/bar`).
54 fn parse_post(&self, id: &str, input: &str) -> Result<Post> {
55 match self._parse_post(id, input) {
56 Ok(p) => Ok(p),
57 Err(e) => Err(Error::Annotated(
58 format!("parsing post `{}`", id),
59 Box::new(e),
60 )),
61 }
62 }
63
64 fn _parse_post(&self, id: &str, input: &str) -> Result<Post> {
65 fn frontmatter_indices(input: &str) -> Result<(usize, usize, usize)> {
66 const FENCE: &str = "---";
67 if !input.starts_with(FENCE) {
68 return Err(Error::FrontmatterMissingStartFence);
69 }
70 match input[FENCE.len()..].find("---") {
71 None => Err(Error::FrontmatterMissingEndFence),
72 Some(offset) => Ok((
73 FENCE.len(), // yaml_start
74 FENCE.len() + offset, // yaml_stop
75 FENCE.len() + offset + FENCE.len(), // body_start
76 )),
77 }
78 }
79
80 let (yaml_start, yaml_stop, body_start) = frontmatter_indices(input)?;
81 let frontmatter: Frontmatter =
82 serde_yaml::from_str(&input[yaml_start..yaml_stop])?;
83 let file_name = format!("{}.html", id);
84 let mut post = Post {
85 title: frontmatter.title,
86 date: frontmatter.date,
87 file_path: self.posts_directory.join(&file_name),
88 url: self.posts_url.join(&file_name)?,
89 tags: frontmatter
90 .tags
91 .iter()
92 .map(|t| {
93 Ok(Tag {
94 name: t.clone(),
95 url: self
96 .index_url
97 // NOTE: tried
98 // `index_url.join(t).join("index.html")`; however,
99 // since `t` doesn't have a trailing slash,
100 // [`Url::join`] was treating it as equivalent to
101 // `index_url.join("index.html")` per the
102 // `Url::join` docs:
103 //
104 // > Note: a trailing slash is significant. Without
105 // it, the last path component is considered to be
106 // a “file” name to be removed to get at the
107 // “directory” that is used as the base
108 .join(&format!("{}/index.html", t))
109 .unwrap(), // should always succeed
110 })
111 })
112 .collect::<Result<HashSet<Tag>>>()?,
113 body: String::default(),
114 };
115
116 markdown::to_html(
117 &mut post.body,
118 self.posts_url,
119 id,
120 &input[body_start..],
121 post.url.as_str(),
122 )?;
123 Ok(post)
124 }
125
126 /// Searches a provided `source_directory` for post files (extension =
127 /// `.md`) and returns a list of [`Post`] objects sorted by date (most
128 /// recent first). Each post file must be structured as follows:
129 ///
130 /// 1. Initial frontmatter fence (`---`)
131 /// 2. YAML frontmatter with fields `Title`, `Date`, and optionally `Tags`
132 /// 3. Terminal frontmatter fence (`---`)
133 /// 4. Post body
134 ///
135 /// For example:
136 ///
137 /// ```md
138 /// ---
139 /// Title: Hello, world!
140 /// Date: 2021-04-16
141 /// Tags: [greet]
142 /// ---
143 /// # Hello
144 ///
145 /// World
146 /// ```
147 pub fn parse_posts(&self, source_directory: &Path) -> Result<Vec<Post>> {
148 use std::io::Read;
149 const MARKDOWN_EXTENSION: &str = ".md";
150
151 let mut posts = Vec::new();
152 for result in read_dir(source_directory)? {
153 let entry = result?;
154 let os_file_name = entry.file_name();
155 let file_name = os_file_name.to_string_lossy();
156 if file_name.ends_with(MARKDOWN_EXTENSION) {
157 let base_name = file_name.trim_end_matches(MARKDOWN_EXTENSION);
158 let mut contents = String::new();
159 File::open(entry.path())?.read_to_string(&mut contents)?;
160 posts.push(self.parse_post(base_name, &contents)?);
161 }
162 }
163
164 posts.sort_by(|a, b| b.date.cmp(&a.date));
165 Ok(posts)
166 }
167}
168
169#[derive(Deserialize, Clone)]
170struct Frontmatter {
171 /// The title of the post.
172 #[serde(rename = "Title")]
173 pub title: String,
174
175 /// The date of the post.
176 #[serde(rename = "Date")]
177 pub date: String,
178
179 /// The tags associated with the post.
180 #[serde(default, rename = "Tags")]
181 pub tags: HashSet<String>,
182}
183
184/// Represents the result of a [`Post`]-parse operation.
185pub type Result<T> = std::result::Result<T, Error>;
186
187/// Represents an error parsing a [`Post`] object.
188#[derive(Debug)]
189pub enum Error {
190 /// Returned when a post source file is missing its starting frontmatter
191 /// fence (`---`).
192 FrontmatterMissingStartFence,
193
194 /// Returned when a post source file is missing its terminal frontmatter
195 /// fence (`---` i.e., the starting fence was found but the ending one was
196 /// missing).
197 FrontmatterMissingEndFence,
198
199 /// Returned when there was an error parsing the frontmatter as YAML.
200 DeserializeYaml(serde_yaml::Error),
201
202 /// Returned when there is a problem parsing URLs.
203 UrlParse(url::ParseError),
204
205 /// Returned for other I/O errors.
206 Io(std::io::Error),
207
208 /// An error with an annotation.
209 Annotated(String, Box<Error>),
210}
211
212impl fmt::Display for Error {
213 /// Displays an [`Error`] as human-readable text.
214 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215 match self {
216 Error::FrontmatterMissingStartFence => {
217 write!(f, "Post must begin with `---`")
218 }
219 Error::FrontmatterMissingEndFence => {
220 write!(f, "Missing clossing `---`")
221 }
222 Error::DeserializeYaml(err) => err.fmt(f),
223 Error::UrlParse(err) => err.fmt(f),
224 Error::Io(err) => err.fmt(f),
225 Error::Annotated(annotation, err) => {
226 write!(f, "{}: {}", &annotation, err)
227 }
228 }
229 }
230}
231
232impl std::error::Error for Error {
233 /// Implements the [`std::error::Error`] trait for [`Error`].
234 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
235 match self {
236 Error::FrontmatterMissingStartFence => None,
237 Error::FrontmatterMissingEndFence => None,
238 Error::DeserializeYaml(err) => Some(err),
239 Error::UrlParse(err) => Some(err),
240 Error::Io(err) => Some(err),
241 Error::Annotated(_, err) => Some(err),
242 }
243 }
244}
245
246impl From<markdown::Error> for Error {
247 fn from(err: markdown::Error) -> Error {
248 match err {
249 markdown::Error::Io(e) => Error::Io(e),
250 markdown::Error::UrlParse(e) => Error::UrlParse(e),
251 }
252 }
253}
254
255impl From<url::ParseError> for Error {
256 /// Converts a [`url::ParseError`] into an [`Error`]. It allows us to use
257 /// the `?` operator for URL parsing and joining functions.
258 fn from(err: url::ParseError) -> Error {
259 Error::UrlParse(err)
260 }
261}
262
263impl From<serde_yaml::Error> for Error {
264 /// Converts a [`serde_yaml::Error`] into an [`Error`]. It allows us to use
265 /// the `?` operator for [`serde_yaml`] deserialization functions.
266 fn from(err: serde_yaml::Error) -> Error {
267 Error::DeserializeYaml(err)
268 }
269}
270
271impl From<std::io::Error> for Error {
272 /// Converts a [`std::io::Error`] into an [`Error`]. It allows us to
273 // use the `?` operator for fallible I/O functions.
274 fn from(err: std::io::Error) -> Error {
275 Error::Io(err)
276 }
277}