cobalt/
document.rs

1use std::clone::Clone;
2use std::collections::HashMap;
3use std::default::Default;
4use std::path::Path;
5use std::sync::LazyLock;
6
7use anyhow::Context as _;
8use liquid::Object;
9use liquid::ValueView;
10use liquid::model::Value;
11use log::trace;
12use regex::Regex;
13
14use crate::cobalt_model;
15use crate::cobalt_model::Minify;
16use crate::cobalt_model::files;
17use crate::cobalt_model::permalink;
18use crate::cobalt_model::slug;
19use crate::error::Result;
20
21pub(crate) struct RenderContext<'a> {
22    pub(crate) parser: &'a cobalt_model::Liquid,
23    pub(crate) markdown: &'a cobalt_model::Markdown,
24    pub(crate) globals: &'a Object,
25    pub(crate) minify: Minify,
26}
27
28#[derive(Debug, Clone)]
29pub(crate) struct Document {
30    pub(crate) url_path: String,
31    pub(crate) file_path: relative_path::RelativePathBuf,
32    pub(crate) content: liquid::model::KString,
33    pub(crate) attributes: Object,
34    pub(crate) front: cobalt_model::Frontmatter,
35}
36
37impl Document {
38    pub(crate) fn parse(
39        src_path: &Path,
40        rel_path: &relative_path::RelativePath,
41        default_front: cobalt_config::Frontmatter,
42    ) -> Result<Document> {
43        trace!("Parsing `{rel_path}`");
44        let content = files::read_file(src_path)?;
45        let builder = cobalt_config::Document::parse(&content)?;
46        let (front, content) = builder.into_parts();
47        let front = front.merge_path(rel_path).merge(&default_front);
48
49        let front = cobalt_model::Frontmatter::from_config(front)?;
50
51        let (file_path, url_path) = {
52            let perma_attributes = permalink_attributes(&front, rel_path);
53            let url_path =
54                permalink::explode_permalink(front.permalink.as_str(), &perma_attributes)
55                    .with_context(|| {
56                        anyhow::format_err!("Failed to create permalink `{}`", front.permalink)
57                    })?;
58            let file_path = permalink::format_url_as_file(&url_path);
59            (file_path, url_path)
60        };
61
62        let doc_attributes = document_attributes(&front, rel_path, url_path.as_ref());
63
64        Ok(Document {
65            url_path,
66            file_path,
67            content,
68            attributes: doc_attributes,
69            front,
70        })
71    }
72
73    /// Metadata for generating RSS feeds
74    pub(crate) fn to_rss(&self, root_url: &str) -> Result<rss::Item> {
75        let link = format!("{}/{}", root_url, &self.url_path);
76        let guid = rss::GuidBuilder::default()
77            .value(link.clone())
78            .permalink(true)
79            .build();
80
81        let item = rss::ItemBuilder::default()
82            .title(Some(self.front.title.as_str().to_owned()))
83            .link(Some(link))
84            .guid(Some(guid))
85            .pub_date(self.front.published_date.map(|date| date.to_rfc2822()))
86            .description(self.description_to_str())
87            .build();
88        Ok(item)
89    }
90
91    /// Metadata for generating JSON feeds
92    pub(crate) fn to_jsonfeed(&self, root_url: &str) -> jsonfeed::Item {
93        let link = format!("{}/{}", root_url, &self.url_path);
94
95        let tags = if !self.front.tags.is_empty() {
96            self.front
97                .tags
98                .iter()
99                .map(|s| s.as_str().to_owned())
100                .collect()
101        } else {
102            self.front
103                .categories
104                .iter()
105                .map(|s| s.as_str().to_owned())
106                .collect()
107        };
108
109        jsonfeed::Item {
110            id: link.clone(),
111            url: Some(link),
112            title: Some(self.front.title.as_str().to_owned()),
113            content: jsonfeed::Content::Html(
114                self.description_to_str().unwrap_or_else(|| "".into()),
115            ),
116            date_published: self.front.published_date.map(|date| date.to_rfc2822()),
117            tags: Some(tags),
118            ..Default::default()
119        }
120    }
121
122    pub(crate) fn to_sitemap<T: std::io::Write>(
123        &self,
124        root_url: &str,
125        writer: &mut sitemap::writer::UrlSetWriter<T>,
126    ) -> Result<()> {
127        let link = format!("{}/{}", root_url, &self.url_path);
128        let mut url = sitemap::structs::UrlEntry::builder();
129        url = url.loc(link);
130        if let Some(date) = self.front.published_date {
131            let date = chrono::DateTime::parse_from_rfc2822(&date.to_rfc2822())
132                .expect("chrono/time compatible RFC 2822 implementations");
133            url = url.lastmod(date);
134        }
135        writer.url(url)?;
136        Ok(())
137    }
138
139    fn description_to_str(&self) -> Option<String> {
140        self.front
141            .description
142            .as_ref()
143            .map(|s| s.as_str().to_owned())
144            .or_else(|| {
145                self.attributes.get("excerpt").and_then(|excerpt| {
146                    if excerpt.is_nil() {
147                        None
148                    } else {
149                        Some(excerpt.render().to_string())
150                    }
151                })
152            })
153            .or_else(|| {
154                self.attributes
155                    .get("content")
156                    .map(|s| s.render().to_string())
157            })
158    }
159
160    /// Renders liquid templates into HTML in the context of current document.
161    ///
162    /// Takes `content` string and returns rendered HTML. This function doesn't
163    /// take `"extends"` attribute into account. This function can be used for
164    /// rendering content or excerpt.
165    fn render_html(&self, content: &str, context: &RenderContext<'_>) -> Result<String> {
166        let html = if self.front.templated {
167            let template = context.parser.parse(content)?;
168            template.render(context.globals)?
169        } else {
170            content.to_owned()
171        };
172
173        let html = match self.front.format {
174            cobalt_model::SourceFormat::Raw => html,
175            cobalt_model::SourceFormat::Markdown => context.markdown.parse(&html)?,
176        };
177
178        Ok(html)
179    }
180
181    /// Renders the excerpt and adds it to attributes of the document.
182    ///
183    /// The excerpt is either taken from the `excerpt` frontmatter setting, if
184    /// given, or extracted from the content, if `excerpt_separator` is not
185    /// empty. When neither condition applies, the excerpt is set to the `Nil`
186    /// value.
187    pub(crate) fn render_excerpt(&mut self, context: &RenderContext<'_>) -> Result<()> {
188        let value = if let Some(excerpt_str) = self.front.excerpt.as_ref() {
189            let excerpt = self.render_html(excerpt_str, context)?;
190            Value::scalar(excerpt)
191        } else if self.front.excerpt_separator.is_empty() {
192            Value::Nil
193        } else {
194            let excerpt = extract_excerpt(
195                &self.content,
196                self.front.format,
197                &self.front.excerpt_separator,
198            );
199            let excerpt = self.render_html(&excerpt, context)?;
200            Value::scalar(excerpt)
201        };
202
203        self.attributes.insert("excerpt".into(), value);
204        Ok(())
205    }
206
207    /// Renders the content and adds it to attributes of the document.
208    ///
209    /// When we say "content" we mean only this document without extended layout.
210    pub(crate) fn render_content(&mut self, context: &RenderContext<'_>) -> Result<()> {
211        let content_html = self.render_html(&self.content, context)?;
212        self.attributes
213            .insert("content".into(), Value::scalar(content_html));
214        Ok(())
215    }
216
217    /// Renders the document to an HTML string.
218    ///
219    /// Side effects:
220    ///
221    /// * layout may be inserted to layouts cache
222    pub(crate) fn render(
223        &mut self,
224        context: &RenderContext<'_>,
225        layouts: &HashMap<String, String>,
226    ) -> Result<String> {
227        if let Some(ref layout) = self.front.layout {
228            let layout_data_ref = layouts.get(layout.as_str()).ok_or_else(|| {
229                anyhow::format_err!(
230                    "Layout {} does not exist (referenced in {}).",
231                    layout,
232                    self.file_path
233                )
234            })?;
235
236            let template = context
237                .parser
238                .parse(layout_data_ref)
239                .with_context(|| anyhow::format_err!("Failed to parse layout `{}`", layout))?;
240            let content_html = template
241                .render(context.globals)
242                .with_context(|| anyhow::format_err!("Failed to render layout `{}`", layout))?;
243            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
244            Ok(content_html)
245        } else {
246            let path = &[
247                liquid::model::KStringCow::from_static("page").into(),
248                liquid::model::KStringCow::from_static("content").into(),
249            ];
250            let content_html = liquid::model::try_find(context.globals, path)
251                .ok_or_else(|| anyhow::format_err!("Internal error: page isn't in globals"))?
252                .render()
253                .to_string();
254
255            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
256            Ok(content_html)
257        }
258    }
259}
260
261pub(crate) fn permalink_attributes(
262    front: &cobalt_model::Frontmatter,
263    dest_file: &relative_path::RelativePath,
264) -> Object {
265    let mut attributes = Object::new();
266
267    attributes.insert(
268        "parent".into(),
269        Value::scalar(
270            dest_file
271                .parent()
272                .unwrap_or_else(|| relative_path::RelativePath::new(""))
273                .to_string(),
274        ),
275    );
276
277    let filename = dest_file.file_stem().unwrap_or("").to_owned();
278    attributes.insert("name".into(), Value::scalar(filename));
279
280    attributes.insert("ext".into(), Value::scalar(".html"));
281
282    // TODO(epage): Add `collection` (the collection's slug), see #257
283    // or `parent.slug`, see #323
284
285    attributes.insert("slug".into(), Value::scalar(front.slug.clone()));
286
287    attributes.insert(
288        "categories".into(),
289        Value::scalar(itertools::join(
290            front.categories.iter().map(slug::slugify),
291            "/",
292        )),
293    );
294
295    if let Some(ref date) = front.published_date {
296        attributes.insert("year".into(), Value::scalar(date.year().to_string()));
297        attributes.insert(
298            "month".into(),
299            Value::scalar(format!("{:02}", &date.month())),
300        );
301        attributes.insert("i_month".into(), Value::scalar(date.month().to_string()));
302        attributes.insert("day".into(), Value::scalar(format!("{:02}", &date.day())));
303        attributes.insert("i_day".into(), Value::scalar(date.day().to_string()));
304        attributes.insert("hour".into(), Value::scalar(format!("{:02}", &date.hour())));
305        attributes.insert(
306            "minute".into(),
307            Value::scalar(format!("{:02}", &date.minute())),
308        );
309        attributes.insert(
310            "second".into(),
311            Value::scalar(format!("{:02}", &date.second())),
312        );
313    }
314
315    attributes.insert("data".into(), Value::Object(front.data.clone()));
316
317    attributes
318}
319
320fn document_attributes(
321    front: &cobalt_model::Frontmatter,
322    source_file: &relative_path::RelativePath,
323    url_path: &str,
324) -> Object {
325    let categories = Value::Array(
326        front
327            .categories
328            .iter()
329            .cloned()
330            .map(Value::scalar)
331            .collect(),
332    );
333    let tags = Value::Array(front.tags.iter().cloned().map(Value::scalar).collect());
334    // Reason for `file`:
335    // - Allow access to assets in the original location
336    // - Ease linking back to page's source
337    let file: Object = [
338        (
339            "permalink".into(),
340            Value::scalar(source_file.as_str().to_owned()),
341        ),
342        (
343            "parent".into(),
344            Value::scalar(
345                source_file
346                    .parent()
347                    .map(relative_path::RelativePath::as_str)
348                    .unwrap_or("")
349                    .to_owned(),
350            ),
351        ),
352    ]
353    .into_iter()
354    .collect();
355    let attributes = [
356        ("permalink".into(), Value::scalar(url_path.to_owned())),
357        ("title".into(), Value::scalar(front.title.clone())),
358        ("slug".into(), Value::scalar(front.slug.clone())),
359        (
360            "description".into(),
361            Value::scalar(front.description.as_deref().unwrap_or("").to_owned()),
362        ),
363        ("categories".into(), categories),
364        ("tags".into(), tags),
365        ("is_draft".into(), Value::scalar(front.is_draft)),
366        ("weight".into(), Value::scalar(front.weight)),
367        ("file".into(), Value::Object(file)),
368        ("collection".into(), Value::scalar(front.collection.clone())),
369        ("data".into(), Value::Object(front.data.clone())),
370    ];
371    let mut attributes: Object = attributes.into_iter().collect();
372
373    if let Some(ref published_date) = front.published_date {
374        attributes.insert("published_date".into(), Value::scalar(*published_date));
375    }
376
377    attributes
378}
379
380#[cfg(not(feature = "html-minifier"))]
381fn minify_if_enabled(
382    html: String,
383    _context: &RenderContext,
384    _file_path: &relative_path::RelativePath,
385) -> Result<String> {
386    Ok(html)
387}
388
389#[cfg(feature = "html-minifier")]
390fn minify_if_enabled(
391    html: String,
392    context: &RenderContext<'_>,
393    file_path: &relative_path::RelativePath,
394) -> Result<String> {
395    let extension = file_path.extension().unwrap_or_default();
396    if context.minify.html && (extension == "html" || extension == "htm") {
397        Ok(html_minifier::minify(html)?)
398    } else {
399        Ok(html)
400    }
401}
402
403fn extract_excerpt_raw(content: &str, excerpt_separator: &str) -> String {
404    content
405        .split(excerpt_separator)
406        .next()
407        .unwrap_or(content)
408        .to_owned()
409}
410
411fn extract_excerpt_markdown(content: &str, excerpt_separator: &str) -> String {
412    static MARKDOWN_REF: LazyLock<Regex> =
413        LazyLock::new(|| Regex::new(r"(?m:^ {0,3}\[[^\]]+\]:.+$)").unwrap());
414
415    let mut trail = String::new();
416
417    if MARKDOWN_REF.is_match(content) {
418        for mat in MARKDOWN_REF.find_iter(content) {
419            trail.push_str(mat.as_str());
420            trail.push('\n');
421        }
422    }
423    trail + content.split(excerpt_separator).next().unwrap_or(content)
424}
425
426fn extract_excerpt(
427    content: &str,
428    format: cobalt_model::SourceFormat,
429    excerpt_separator: &str,
430) -> String {
431    match format {
432        cobalt_model::SourceFormat::Markdown => {
433            extract_excerpt_markdown(content, excerpt_separator)
434        }
435        cobalt_model::SourceFormat::Raw => extract_excerpt_raw(content, excerpt_separator),
436    }
437}