cobalt/
document.rs

1use std::clone::Clone;
2use std::collections::HashMap;
3use std::default::Default;
4use std::path::Path;
5use std::sync::LazyLock;
6
7use anyhow::Context as _;
8use liquid::Object;
9use liquid::ValueView;
10use liquid::model::Value;
11use log::trace;
12use regex::Regex;
13
14use crate::cobalt_model;
15use crate::cobalt_model::Minify;
16use crate::cobalt_model::files;
17use crate::cobalt_model::permalink;
18use crate::cobalt_model::slug;
19use crate::error::Result;
20
21pub(crate) struct RenderContext<'a> {
22    pub(crate) parser: &'a cobalt_model::Liquid,
23    pub(crate) markdown: &'a cobalt_model::Markdown,
24    pub(crate) globals: &'a Object,
25    pub(crate) minify: Minify,
26}
27
28#[derive(Debug, Clone)]
29pub(crate) struct Document {
30    pub(crate) url_path: String,
31    pub(crate) file_path: relative_path::RelativePathBuf,
32    pub(crate) content: liquid::model::KString,
33    pub(crate) attributes: Object,
34    pub(crate) front: cobalt_model::Frontmatter,
35}
36
37impl Document {
38    pub(crate) fn parse(
39        src_path: &Path,
40        rel_path: &relative_path::RelativePath,
41        default_front: cobalt_config::Frontmatter,
42    ) -> Result<Document> {
43        trace!("Parsing `{}`", rel_path);
44        let content = files::read_file(src_path)?;
45        let builder = cobalt_config::Document::parse(&content)?;
46        let (front, content) = builder.into_parts();
47        let front = front.merge_path(rel_path).merge(&default_front);
48
49        let front = cobalt_model::Frontmatter::from_config(front)?;
50
51        let (file_path, url_path) = {
52            let perma_attributes = permalink_attributes(&front, rel_path);
53            let url_path =
54                permalink::explode_permalink(front.permalink.as_str(), &perma_attributes)
55                    .with_context(|| {
56                        anyhow::format_err!("Failed to create permalink `{}`", front.permalink)
57                    })?;
58            let file_path = permalink::format_url_as_file(&url_path);
59            (file_path, url_path)
60        };
61
62        let doc_attributes = document_attributes(&front, rel_path, url_path.as_ref());
63
64        Ok(Document {
65            url_path,
66            file_path,
67            content,
68            attributes: doc_attributes,
69            front,
70        })
71    }
72
73    /// Metadata for generating RSS feeds
74    pub(crate) fn to_rss(&self, root_url: &str) -> Result<rss::Item> {
75        let link = format!("{}/{}", root_url, &self.url_path);
76        let guid = rss::GuidBuilder::default()
77            .value(link.clone())
78            .permalink(true)
79            .build();
80
81        let item = rss::ItemBuilder::default()
82            .title(Some(self.front.title.as_str().to_owned()))
83            .link(Some(link))
84            .guid(Some(guid))
85            .pub_date(self.front.published_date.map(|date| date.to_rfc2822()))
86            .description(self.description_to_str())
87            .build();
88        Ok(item)
89    }
90
91    /// Metadata for generating JSON feeds
92    pub(crate) fn to_jsonfeed(&self, root_url: &str) -> jsonfeed::Item {
93        let link = format!("{}/{}", root_url, &self.url_path);
94
95        jsonfeed::Item {
96            id: link.clone(),
97            url: Some(link),
98            title: Some(self.front.title.as_str().to_owned()),
99            content: jsonfeed::Content::Html(
100                self.description_to_str().unwrap_or_else(|| "".into()),
101            ),
102            date_published: self.front.published_date.map(|date| date.to_rfc2822()),
103            tags: Some(
104                self.front
105                    .tags
106                    .as_ref()
107                    .map(|tags| tags.iter().map(|s| s.as_str().to_owned()).collect())
108                    .unwrap_or_else(|| {
109                        self.front
110                            .categories
111                            .iter()
112                            .map(|s| s.as_str().to_owned())
113                            .collect()
114                    }),
115            ),
116            ..Default::default()
117        }
118    }
119
120    pub(crate) fn to_sitemap<T: std::io::Write>(
121        &self,
122        root_url: &str,
123        writer: &mut sitemap::writer::UrlSetWriter<T>,
124    ) -> Result<()> {
125        let link = format!("{}/{}", root_url, &self.url_path);
126        let mut url = sitemap::structs::UrlEntry::builder();
127        url = url.loc(link);
128        if let Some(date) = self.front.published_date {
129            let date = chrono::DateTime::parse_from_rfc2822(&date.to_rfc2822())
130                .expect("chrono/time compatible RFC 2822 implementations");
131            url = url.lastmod(date);
132        }
133        writer.url(url)?;
134        Ok(())
135    }
136
137    fn description_to_str(&self) -> Option<String> {
138        self.front
139            .description
140            .as_ref()
141            .map(|s| s.as_str().to_owned())
142            .or_else(|| {
143                self.attributes.get("excerpt").and_then(|excerpt| {
144                    if excerpt.is_nil() {
145                        None
146                    } else {
147                        Some(excerpt.render().to_string())
148                    }
149                })
150            })
151            .or_else(|| {
152                self.attributes
153                    .get("content")
154                    .map(|s| s.render().to_string())
155            })
156    }
157
158    /// Renders liquid templates into HTML in the context of current document.
159    ///
160    /// Takes `content` string and returns rendered HTML. This function doesn't
161    /// take `"extends"` attribute into account. This function can be used for
162    /// rendering content or excerpt.
163    fn render_html(&self, content: &str, context: &RenderContext<'_>) -> Result<String> {
164        let html = if self.front.templated {
165            let template = context.parser.parse(content)?;
166            template.render(context.globals)?
167        } else {
168            content.to_owned()
169        };
170
171        let html = match self.front.format {
172            cobalt_model::SourceFormat::Raw => html,
173            cobalt_model::SourceFormat::Markdown => context.markdown.parse(&html)?,
174        };
175
176        Ok(html)
177    }
178
179    /// Renders the excerpt and adds it to attributes of the document.
180    ///
181    /// The excerpt is either taken from the `excerpt` frontmatter setting, if
182    /// given, or extracted from the content, if `excerpt_separator` is not
183    /// empty. When neither condition applies, the excerpt is set to the `Nil`
184    /// value.
185    pub(crate) fn render_excerpt(&mut self, context: &RenderContext<'_>) -> Result<()> {
186        let value = if let Some(excerpt_str) = self.front.excerpt.as_ref() {
187            let excerpt = self.render_html(excerpt_str, context)?;
188            Value::scalar(excerpt)
189        } else if self.front.excerpt_separator.is_empty() {
190            Value::Nil
191        } else {
192            let excerpt = extract_excerpt(
193                &self.content,
194                self.front.format,
195                &self.front.excerpt_separator,
196            );
197            let excerpt = self.render_html(&excerpt, context)?;
198            Value::scalar(excerpt)
199        };
200
201        self.attributes.insert("excerpt".into(), value);
202        Ok(())
203    }
204
205    /// Renders the content and adds it to attributes of the document.
206    ///
207    /// When we say "content" we mean only this document without extended layout.
208    pub(crate) fn render_content(&mut self, context: &RenderContext<'_>) -> Result<()> {
209        let content_html = self.render_html(&self.content, context)?;
210        self.attributes
211            .insert("content".into(), Value::scalar(content_html));
212        Ok(())
213    }
214
215    /// Renders the document to an HTML string.
216    ///
217    /// Side effects:
218    ///
219    /// * layout may be inserted to layouts cache
220    pub(crate) fn render(
221        &mut self,
222        context: &RenderContext<'_>,
223        layouts: &HashMap<String, String>,
224    ) -> Result<String> {
225        if let Some(ref layout) = self.front.layout {
226            let layout_data_ref = layouts.get(layout.as_str()).ok_or_else(|| {
227                anyhow::format_err!(
228                    "Layout {} does not exist (referenced in {}).",
229                    layout,
230                    self.file_path
231                )
232            })?;
233
234            let template = context
235                .parser
236                .parse(layout_data_ref)
237                .with_context(|| anyhow::format_err!("Failed to parse layout `{}`", layout))?;
238            let content_html = template
239                .render(context.globals)
240                .with_context(|| anyhow::format_err!("Failed to render layout `{}`", layout))?;
241            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
242            Ok(content_html)
243        } else {
244            let path = &[
245                liquid::model::KStringCow::from_static("page").into(),
246                liquid::model::KStringCow::from_static("content").into(),
247            ];
248            let content_html = liquid::model::try_find(context.globals, path)
249                .ok_or_else(|| anyhow::format_err!("Internal error: page isn't in globals"))?
250                .render()
251                .to_string();
252
253            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
254            Ok(content_html)
255        }
256    }
257}
258
259pub(crate) fn permalink_attributes(
260    front: &cobalt_model::Frontmatter,
261    dest_file: &relative_path::RelativePath,
262) -> Object {
263    let mut attributes = Object::new();
264
265    attributes.insert(
266        "parent".into(),
267        Value::scalar(
268            dest_file
269                .parent()
270                .unwrap_or_else(|| relative_path::RelativePath::new(""))
271                .to_string(),
272        ),
273    );
274
275    let filename = dest_file.file_stem().unwrap_or("").to_owned();
276    attributes.insert("name".into(), Value::scalar(filename));
277
278    attributes.insert("ext".into(), Value::scalar(".html"));
279
280    // TODO(epage): Add `collection` (the collection's slug), see #257
281    // or `parent.slug`, see #323
282
283    attributes.insert("slug".into(), Value::scalar(front.slug.clone()));
284
285    attributes.insert(
286        "categories".into(),
287        Value::scalar(itertools::join(
288            front.categories.iter().map(slug::slugify),
289            "/",
290        )),
291    );
292
293    if let Some(ref date) = front.published_date {
294        attributes.insert("year".into(), Value::scalar(date.year().to_string()));
295        attributes.insert(
296            "month".into(),
297            Value::scalar(format!("{:02}", &date.month())),
298        );
299        attributes.insert("i_month".into(), Value::scalar(date.month().to_string()));
300        attributes.insert("day".into(), Value::scalar(format!("{:02}", &date.day())));
301        attributes.insert("i_day".into(), Value::scalar(date.day().to_string()));
302        attributes.insert("hour".into(), Value::scalar(format!("{:02}", &date.hour())));
303        attributes.insert(
304            "minute".into(),
305            Value::scalar(format!("{:02}", &date.minute())),
306        );
307        attributes.insert(
308            "second".into(),
309            Value::scalar(format!("{:02}", &date.second())),
310        );
311    }
312
313    attributes.insert("data".into(), Value::Object(front.data.clone()));
314
315    attributes
316}
317
318fn document_attributes(
319    front: &cobalt_model::Frontmatter,
320    source_file: &relative_path::RelativePath,
321    url_path: &str,
322) -> Object {
323    let categories = Value::Array(
324        front
325            .categories
326            .iter()
327            .cloned()
328            .map(Value::scalar)
329            .collect(),
330    );
331    // Reason for `file`:
332    // - Allow access to assets in the original location
333    // - Ease linking back to page's source
334    let file: Object = vec![
335        (
336            "permalink".into(),
337            Value::scalar(source_file.as_str().to_owned()),
338        ),
339        (
340            "parent".into(),
341            Value::scalar(
342                source_file
343                    .parent()
344                    .map(relative_path::RelativePath::as_str)
345                    .unwrap_or("")
346                    .to_owned(),
347            ),
348        ),
349    ]
350    .into_iter()
351    .collect();
352    let attributes = vec![
353        ("permalink".into(), Value::scalar(url_path.to_owned())),
354        ("title".into(), Value::scalar(front.title.clone())),
355        ("slug".into(), Value::scalar(front.slug.clone())),
356        (
357            "description".into(),
358            Value::scalar(front.description.as_deref().unwrap_or("").to_owned()),
359        ),
360        ("categories".into(), categories),
361        ("is_draft".into(), Value::scalar(front.is_draft)),
362        ("weight".into(), Value::scalar(front.weight)),
363        ("file".into(), Value::Object(file)),
364        ("collection".into(), Value::scalar(front.collection.clone())),
365        ("data".into(), Value::Object(front.data.clone())),
366    ];
367    let mut attributes: Object = attributes.into_iter().collect();
368
369    if let Some(ref tags) = front.tags {
370        let tags = Value::Array(tags.iter().cloned().map(Value::scalar).collect());
371        attributes.insert("tags".into(), tags);
372    }
373
374    if let Some(ref published_date) = front.published_date {
375        attributes.insert("published_date".into(), Value::scalar(*published_date));
376    }
377
378    attributes
379}
380
381#[cfg(not(feature = "html-minifier"))]
382fn minify_if_enabled(
383    html: String,
384    _context: &RenderContext,
385    _file_path: &relative_path::RelativePath,
386) -> Result<String> {
387    Ok(html)
388}
389
390#[cfg(feature = "html-minifier")]
391fn minify_if_enabled(
392    html: String,
393    context: &RenderContext<'_>,
394    file_path: &relative_path::RelativePath,
395) -> Result<String> {
396    let extension = file_path.extension().unwrap_or_default();
397    if context.minify.html && (extension == "html" || extension == "htm") {
398        Ok(html_minifier::minify(html)?)
399    } else {
400        Ok(html)
401    }
402}
403
404fn extract_excerpt_raw(content: &str, excerpt_separator: &str) -> String {
405    content
406        .split(excerpt_separator)
407        .next()
408        .unwrap_or(content)
409        .to_owned()
410}
411
412fn extract_excerpt_markdown(content: &str, excerpt_separator: &str) -> String {
413    static MARKDOWN_REF: LazyLock<Regex> =
414        LazyLock::new(|| Regex::new(r"(?m:^ {0,3}\[[^\]]+\]:.+$)").unwrap());
415
416    let mut trail = String::new();
417
418    if MARKDOWN_REF.is_match(content) {
419        for mat in MARKDOWN_REF.find_iter(content) {
420            trail.push_str(mat.as_str());
421            trail.push('\n');
422        }
423    }
424    trail + content.split(excerpt_separator).next().unwrap_or(content)
425}
426
427fn extract_excerpt(
428    content: &str,
429    format: cobalt_model::SourceFormat,
430    excerpt_separator: &str,
431) -> String {
432    match format {
433        cobalt_model::SourceFormat::Markdown => {
434            extract_excerpt_markdown(content, excerpt_separator)
435        }
436        cobalt_model::SourceFormat::Raw => extract_excerpt_raw(content, excerpt_separator),
437    }
438}