futhorc/
parser.rs

1//! Defines the [`Post`], [`Parser`], and [`Error`] types. Also defines the
2//! logic for parsing posts from the file system into memory. See the
3//! [`Post::to_value`] and [`Post::summarize`] for details on how posts are
4//! converted into template values.
5
6use std::{
7    collections::HashSet,
8    fmt,
9    fs::{read_dir, File},
10    path::Path,
11};
12
13use serde::Deserialize;
14use url::Url;
15
16use crate::{markdown, post::Post, tag::Tag};
17
18/// Parses [`Post`] objects from source files.
19pub struct Parser<'a> {
20    /// `index_url` is the base URL for index pages. It's used to prefix tag
21    /// page URLs (i.e., the URL for the first page of a tag is
22    /// `{index_url}/{tag_name}/index.html`).
23    index_url: &'a Url,
24
25    /// `posts_url` is the base URL for post pages. It's used to prefix post
26    /// page URLs (i.e., the URL for a post is
27    /// `{posts_url}/{post_id}.html`).
28    posts_url: &'a Url,
29
30    /// `posts_directory` is the directory in which post pages will be
31    /// rendered.
32    posts_directory: &'a Path,
33}
34
35impl<'a> Parser<'a> {
36    /// Constructs a new parser. See fields on [`Parser`] for argument
37    /// descriptions.
38    pub fn new(
39        index_url: &'a Url,
40        posts_url: &'a Url,
41        posts_directory: &'a Path,
42    ) -> Parser<'a> {
43        Parser {
44            index_url,
45            posts_url,
46            posts_directory,
47        }
48    }
49
50    /// Parses a single [`Post`] from an `id` and `input` strings. The `id` is
51    /// the path of the file relative to the `posts_source_directory` less the
52    /// extension (e.g., the ID for a post whose source file is
53    /// `{posts_source_directory}/foo/bar.md` is `foo/bar`).
54    fn parse_post(&self, id: &str, input: &str) -> Result<Post> {
55        match self._parse_post(id, input) {
56            Ok(p) => Ok(p),
57            Err(e) => Err(Error::Annotated(
58                format!("parsing post `{}`", id),
59                Box::new(e),
60            )),
61        }
62    }
63
64    fn _parse_post(&self, id: &str, input: &str) -> Result<Post> {
65        fn frontmatter_indices(input: &str) -> Result<(usize, usize, usize)> {
66            const FENCE: &str = "---";
67            if !input.starts_with(FENCE) {
68                return Err(Error::FrontmatterMissingStartFence);
69            }
70            match input[FENCE.len()..].find("---") {
71                None => Err(Error::FrontmatterMissingEndFence),
72                Some(offset) => Ok((
73                    FENCE.len(),                        // yaml_start
74                    FENCE.len() + offset,               // yaml_stop
75                    FENCE.len() + offset + FENCE.len(), // body_start
76                )),
77            }
78        }
79
80        let (yaml_start, yaml_stop, body_start) = frontmatter_indices(input)?;
81        let frontmatter: Frontmatter =
82            serde_yaml::from_str(&input[yaml_start..yaml_stop])?;
83        let file_name = format!("{}.html", id);
84        let mut post = Post {
85            title: frontmatter.title,
86            date: frontmatter.date,
87            file_path: self.posts_directory.join(&file_name),
88            url: self.posts_url.join(&file_name)?,
89            tags: frontmatter
90                .tags
91                .iter()
92                .map(|t| {
93                    Ok(Tag {
94                        name: t.clone(),
95                        url: self
96                            .index_url
97                            // NOTE: tried
98                            // `index_url.join(t).join("index.html")`; however,
99                            // since `t` doesn't have a trailing slash,
100                            // [`Url::join`] was treating it as equivalent to
101                            // `index_url.join("index.html")` per the
102                            // `Url::join` docs:
103                            //
104                            // > Note: a trailing slash is significant. Without
105                            // it, the last path component is considered to be
106                            // a “file” name to be removed to get at the
107                            // “directory” that is used as the base
108                            .join(&format!("{}/index.html", t))
109                            .unwrap(), // should always succeed
110                    })
111                })
112                .collect::<Result<HashSet<Tag>>>()?,
113            body: String::default(),
114        };
115
116        markdown::to_html(
117            &mut post.body,
118            self.posts_url,
119            id,
120            &input[body_start..],
121            post.url.as_str(),
122        )?;
123        Ok(post)
124    }
125
126    /// Searches a provided `source_directory` for post files (extension =
127    /// `.md`) and returns a list of [`Post`] objects sorted by date (most
128    /// recent first). Each post file must be structured as follows:
129    ///
130    /// 1. Initial frontmatter fence (`---`)
131    /// 2. YAML frontmatter with fields `Title`, `Date`, and optionally `Tags`
132    /// 3. Terminal frontmatter fence (`---`)
133    /// 4. Post body
134    ///
135    /// For example:
136    ///
137    /// ```md
138    /// ---
139    /// Title: Hello, world!
140    /// Date: 2021-04-16
141    /// Tags: [greet]
142    /// ---
143    /// # Hello
144    ///
145    /// World
146    /// ```
147    pub fn parse_posts(&self, source_directory: &Path) -> Result<Vec<Post>> {
148        use std::io::Read;
149        const MARKDOWN_EXTENSION: &str = ".md";
150
151        let mut posts = Vec::new();
152        for result in read_dir(source_directory)? {
153            let entry = result?;
154            let os_file_name = entry.file_name();
155            let file_name = os_file_name.to_string_lossy();
156            if file_name.ends_with(MARKDOWN_EXTENSION) {
157                let base_name = file_name.trim_end_matches(MARKDOWN_EXTENSION);
158                let mut contents = String::new();
159                File::open(entry.path())?.read_to_string(&mut contents)?;
160                posts.push(self.parse_post(base_name, &contents)?);
161            }
162        }
163
164        posts.sort_by(|a, b| b.date.cmp(&a.date));
165        Ok(posts)
166    }
167}
168
169#[derive(Deserialize, Clone)]
170struct Frontmatter {
171    /// The title of the post.
172    #[serde(rename = "Title")]
173    pub title: String,
174
175    /// The date of the post.
176    #[serde(rename = "Date")]
177    pub date: String,
178
179    /// The tags associated with the post.
180    #[serde(default, rename = "Tags")]
181    pub tags: HashSet<String>,
182}
183
184/// Represents the result of a [`Post`]-parse operation.
185pub type Result<T> = std::result::Result<T, Error>;
186
187/// Represents an error parsing a [`Post`] object.
188#[derive(Debug)]
189pub enum Error {
190    /// Returned when a post source file is missing its starting frontmatter
191    /// fence (`---`).
192    FrontmatterMissingStartFence,
193
194    /// Returned when a post source file is missing its terminal frontmatter
195    /// fence (`---` i.e., the starting fence was found but the ending one was
196    /// missing).
197    FrontmatterMissingEndFence,
198
199    /// Returned when there was an error parsing the frontmatter as YAML.
200    DeserializeYaml(serde_yaml::Error),
201
202    /// Returned when there is a problem parsing URLs.
203    UrlParse(url::ParseError),
204
205    /// Returned for other I/O errors.
206    Io(std::io::Error),
207
208    /// An error with an annotation.
209    Annotated(String, Box<Error>),
210}
211
212impl fmt::Display for Error {
213    /// Displays an [`Error`] as human-readable text.
214    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
215        match self {
216            Error::FrontmatterMissingStartFence => {
217                write!(f, "Post must begin with `---`")
218            }
219            Error::FrontmatterMissingEndFence => {
220                write!(f, "Missing clossing `---`")
221            }
222            Error::DeserializeYaml(err) => err.fmt(f),
223            Error::UrlParse(err) => err.fmt(f),
224            Error::Io(err) => err.fmt(f),
225            Error::Annotated(annotation, err) => {
226                write!(f, "{}: {}", &annotation, err)
227            }
228        }
229    }
230}
231
232impl std::error::Error for Error {
233    /// Implements the [`std::error::Error`] trait for [`Error`].
234    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
235        match self {
236            Error::FrontmatterMissingStartFence => None,
237            Error::FrontmatterMissingEndFence => None,
238            Error::DeserializeYaml(err) => Some(err),
239            Error::UrlParse(err) => Some(err),
240            Error::Io(err) => Some(err),
241            Error::Annotated(_, err) => Some(err),
242        }
243    }
244}
245
246impl From<markdown::Error> for Error {
247    fn from(err: markdown::Error) -> Error {
248        match err {
249            markdown::Error::Io(e) => Error::Io(e),
250            markdown::Error::UrlParse(e) => Error::UrlParse(e),
251        }
252    }
253}
254
255impl From<url::ParseError> for Error {
256    /// Converts a [`url::ParseError`] into an [`Error`]. It allows us to use
257    /// the `?` operator for URL parsing and joining functions.
258    fn from(err: url::ParseError) -> Error {
259        Error::UrlParse(err)
260    }
261}
262
263impl From<serde_yaml::Error> for Error {
264    /// Converts a [`serde_yaml::Error`] into an [`Error`]. It allows us to use
265    /// the `?` operator for [`serde_yaml`] deserialization functions.
266    fn from(err: serde_yaml::Error) -> Error {
267        Error::DeserializeYaml(err)
268    }
269}
270
271impl From<std::io::Error> for Error {
272    /// Converts a [`std::io::Error`] into an [`Error`]. It allows us to
273    // use the `?` operator for fallible I/O functions.
274    fn from(err: std::io::Error) -> Error {
275        Error::Io(err)
276    }
277}