cobalt/
document.rs

1use std::clone::Clone;
2use std::collections::HashMap;
3use std::default::Default;
4use std::path::Path;
5use std::sync::LazyLock;
6
7use anyhow::Context as _;
8use liquid::Object;
9use liquid::ValueView;
10use liquid::model::Value;
11use log::trace;
12use regex::Regex;
13use rss::Category;
14
15use crate::cobalt_model;
16use crate::cobalt_model::Minify;
17use crate::cobalt_model::files;
18use crate::cobalt_model::permalink;
19use crate::cobalt_model::slug;
20use crate::error::Result;
21
22pub(crate) struct RenderContext<'a> {
23    pub(crate) parser: &'a cobalt_model::Liquid,
24    pub(crate) markdown: &'a cobalt_model::Markdown,
25    pub(crate) globals: &'a Object,
26    pub(crate) minify: Minify,
27}
28
29#[derive(Debug, Clone)]
30pub(crate) struct Document {
31    pub(crate) url_path: String,
32    pub(crate) file_path: relative_path::RelativePathBuf,
33    pub(crate) content: liquid::model::KString,
34    pub(crate) attributes: Object,
35    pub(crate) front: cobalt_model::Frontmatter,
36}
37
38impl Document {
39    pub(crate) fn parse(
40        src_path: &Path,
41        rel_path: &relative_path::RelativePath,
42        default_front: cobalt_config::Frontmatter,
43    ) -> Result<Document> {
44        trace!("Parsing `{rel_path}`");
45        let content = files::read_file(src_path)?;
46        let builder = cobalt_config::Document::parse(&content)?;
47        let (front, content) = builder.into_parts();
48        let front = front.merge_path(rel_path).merge(&default_front);
49
50        let front = cobalt_model::Frontmatter::from_config(front)?;
51
52        let (file_path, url_path) = {
53            let perma_attributes = permalink_attributes(&front, rel_path);
54            let url_path =
55                permalink::explode_permalink(front.permalink.as_str(), &perma_attributes)
56                    .with_context(|| {
57                        anyhow::format_err!("Failed to create permalink `{}`", front.permalink)
58                    })?;
59            let file_path = permalink::format_url_as_file(&url_path);
60            (file_path, url_path)
61        };
62
63        let doc_attributes = document_attributes(&front, rel_path, url_path.as_ref());
64
65        Ok(Document {
66            url_path,
67            file_path,
68            content,
69            attributes: doc_attributes,
70            front,
71        })
72    }
73
74    /// Metadata for generating RSS feeds
75    pub(crate) fn to_rss(&self, root_url: &str) -> Result<rss::Item> {
76        let link = format!("{}/{}", root_url, &self.url_path);
77        let guid = rss::GuidBuilder::default()
78            .value(link.clone())
79            .permalink(true)
80            .build();
81
82        let mut categories = self
83            .front
84            .tags
85            .iter()
86            .map(|c| Category::from(c.as_str()))
87            .collect::<Vec<_>>();
88
89        if !self.front.categories.is_empty() {
90            categories.push(self.front.categories.join("/").into());
91        }
92
93        let item = rss::ItemBuilder::default()
94            .title(Some(self.front.title.as_str().to_owned()))
95            .link(Some(link))
96            .guid(Some(guid))
97            .pub_date(self.front.published_date.map(|date| date.to_rfc2822()))
98            .description(self.description_to_str())
99            .categories(categories)
100            .build();
101        Ok(item)
102    }
103
104    /// Metadata for generating JSON feeds
105    pub(crate) fn to_jsonfeed(&self, root_url: &str) -> jsonfeed::Item {
106        let link = format!("{}/{}", root_url, &self.url_path);
107
108        let tags = if !self.front.tags.is_empty() {
109            self.front
110                .tags
111                .iter()
112                .map(|s| s.as_str().to_owned())
113                .collect()
114        } else {
115            self.front
116                .categories
117                .iter()
118                .map(|s| s.as_str().to_owned())
119                .collect()
120        };
121
122        jsonfeed::Item {
123            id: link.clone(),
124            url: Some(link),
125            title: Some(self.front.title.as_str().to_owned()),
126            content: jsonfeed::Content::Html(
127                self.description_to_str().unwrap_or_else(|| "".into()),
128            ),
129            date_published: self.front.published_date.map(|date| date.to_rfc2822()),
130            tags: Some(tags),
131            ..Default::default()
132        }
133    }
134
135    pub(crate) fn to_sitemap<T: std::io::Write>(
136        &self,
137        root_url: &str,
138        writer: &mut sitemap::writer::UrlSetWriter<T>,
139    ) -> Result<()> {
140        let link = format!("{}/{}", root_url, &self.url_path);
141        let mut url = sitemap::structs::UrlEntry::builder();
142        url = url.loc(link);
143        if let Some(date) = self.front.published_date {
144            let date = chrono::DateTime::parse_from_rfc2822(&date.to_rfc2822())
145                .expect("chrono/time compatible RFC 2822 implementations");
146            url = url.lastmod(date);
147        }
148        writer.url(url)?;
149        Ok(())
150    }
151
152    fn description_to_str(&self) -> Option<String> {
153        self.front
154            .description
155            .as_ref()
156            .map(|s| s.as_str().to_owned())
157            .or_else(|| {
158                self.attributes.get("excerpt").and_then(|excerpt| {
159                    if excerpt.is_nil() {
160                        None
161                    } else {
162                        Some(excerpt.render().to_string())
163                    }
164                })
165            })
166            .or_else(|| {
167                self.attributes
168                    .get("content")
169                    .map(|s| s.render().to_string())
170            })
171    }
172
173    /// Renders liquid templates into HTML in the context of current document.
174    ///
175    /// Takes `content` string and returns rendered HTML. This function doesn't
176    /// take `"extends"` attribute into account. This function can be used for
177    /// rendering content or excerpt.
178    fn render_html(&self, content: &str, context: &RenderContext<'_>) -> Result<String> {
179        let html = if self.front.templated {
180            let template = context.parser.parse(content)?;
181            template.render(context.globals)?
182        } else {
183            content.to_owned()
184        };
185
186        let html = match self.front.format {
187            cobalt_model::SourceFormat::Raw => html,
188            cobalt_model::SourceFormat::Markdown => context.markdown.parse(&html)?,
189        };
190
191        Ok(html)
192    }
193
194    /// Renders the excerpt and adds it to attributes of the document.
195    ///
196    /// The excerpt is either taken from the `excerpt` frontmatter setting, if
197    /// given, or extracted from the content, if `excerpt_separator` is not
198    /// empty. When neither condition applies, the excerpt is set to the `Nil`
199    /// value.
200    pub(crate) fn render_excerpt(&mut self, context: &RenderContext<'_>) -> Result<()> {
201        let value = if let Some(excerpt_str) = self.front.excerpt.as_ref() {
202            let excerpt = self.render_html(excerpt_str, context)?;
203            Value::scalar(excerpt)
204        } else if self.front.excerpt_separator.is_empty() {
205            Value::Nil
206        } else {
207            let excerpt = extract_excerpt(
208                &self.content,
209                self.front.format,
210                &self.front.excerpt_separator,
211            );
212            let excerpt = self.render_html(&excerpt, context)?;
213            Value::scalar(excerpt)
214        };
215
216        self.attributes.insert("excerpt".into(), value);
217        Ok(())
218    }
219
220    /// Renders the content and adds it to attributes of the document.
221    ///
222    /// When we say "content" we mean only this document without extended layout.
223    pub(crate) fn render_content(&mut self, context: &RenderContext<'_>) -> Result<()> {
224        let content_html = self.render_html(&self.content, context)?;
225        self.attributes
226            .insert("content".into(), Value::scalar(content_html));
227        Ok(())
228    }
229
230    /// Renders the document to an HTML string.
231    ///
232    /// Side effects:
233    ///
234    /// * layout may be inserted to layouts cache
235    pub(crate) fn render(
236        &mut self,
237        context: &RenderContext<'_>,
238        layouts: &HashMap<String, String>,
239    ) -> Result<String> {
240        if let Some(ref layout) = self.front.layout {
241            let layout_data_ref = layouts.get(layout.as_str()).ok_or_else(|| {
242                anyhow::format_err!(
243                    "Layout {} does not exist (referenced in {}).",
244                    layout,
245                    self.file_path
246                )
247            })?;
248
249            let template = context
250                .parser
251                .parse(layout_data_ref)
252                .with_context(|| anyhow::format_err!("Failed to parse layout `{}`", layout))?;
253            let content_html = template
254                .render(context.globals)
255                .with_context(|| anyhow::format_err!("Failed to render layout `{}`", layout))?;
256            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
257            Ok(content_html)
258        } else {
259            let path = &[
260                liquid::model::KStringCow::from_static("page").into(),
261                liquid::model::KStringCow::from_static("content").into(),
262            ];
263            let content_html = liquid::model::try_find(context.globals, path)
264                .ok_or_else(|| anyhow::format_err!("Internal error: page isn't in globals"))?
265                .render()
266                .to_string();
267
268            let content_html = minify_if_enabled(content_html, context, &self.file_path)?;
269            Ok(content_html)
270        }
271    }
272}
273
274pub(crate) fn permalink_attributes(
275    front: &cobalt_model::Frontmatter,
276    dest_file: &relative_path::RelativePath,
277) -> Object {
278    let mut attributes = Object::new();
279
280    attributes.insert(
281        "parent".into(),
282        Value::scalar(
283            dest_file
284                .parent()
285                .unwrap_or_else(|| relative_path::RelativePath::new(""))
286                .to_string(),
287        ),
288    );
289
290    let filename = dest_file.file_stem().unwrap_or("").to_owned();
291    attributes.insert("name".into(), Value::scalar(filename));
292
293    attributes.insert("ext".into(), Value::scalar(".html"));
294
295    // TODO(epage): Add `collection` (the collection's slug), see #257
296    // or `parent.slug`, see #323
297
298    attributes.insert("slug".into(), Value::scalar(front.slug.clone()));
299
300    attributes.insert(
301        "categories".into(),
302        Value::scalar(itertools::join(
303            front.categories.iter().map(slug::slugify),
304            "/",
305        )),
306    );
307
308    if let Some(ref date) = front.published_date {
309        attributes.insert("year".into(), Value::scalar(date.year().to_string()));
310        attributes.insert(
311            "month".into(),
312            Value::scalar(format!("{:02}", &date.month())),
313        );
314        attributes.insert("i_month".into(), Value::scalar(date.month().to_string()));
315        attributes.insert("day".into(), Value::scalar(format!("{:02}", &date.day())));
316        attributes.insert("i_day".into(), Value::scalar(date.day().to_string()));
317        attributes.insert("hour".into(), Value::scalar(format!("{:02}", &date.hour())));
318        attributes.insert(
319            "minute".into(),
320            Value::scalar(format!("{:02}", &date.minute())),
321        );
322        attributes.insert(
323            "second".into(),
324            Value::scalar(format!("{:02}", &date.second())),
325        );
326    }
327
328    attributes.insert("data".into(), Value::Object(front.data.clone()));
329
330    attributes
331}
332
333fn document_attributes(
334    front: &cobalt_model::Frontmatter,
335    source_file: &relative_path::RelativePath,
336    url_path: &str,
337) -> Object {
338    let categories = Value::Array(
339        front
340            .categories
341            .iter()
342            .cloned()
343            .map(Value::scalar)
344            .collect(),
345    );
346    let tags = Value::Array(front.tags.iter().cloned().map(Value::scalar).collect());
347    // Reason for `file`:
348    // - Allow access to assets in the original location
349    // - Ease linking back to page's source
350    let file: Object = [
351        (
352            "permalink".into(),
353            Value::scalar(source_file.as_str().to_owned()),
354        ),
355        (
356            "parent".into(),
357            Value::scalar(
358                source_file
359                    .parent()
360                    .map(relative_path::RelativePath::as_str)
361                    .unwrap_or("")
362                    .to_owned(),
363            ),
364        ),
365    ]
366    .into_iter()
367    .collect();
368    let attributes = [
369        ("permalink".into(), Value::scalar(url_path.to_owned())),
370        ("title".into(), Value::scalar(front.title.clone())),
371        ("slug".into(), Value::scalar(front.slug.clone())),
372        (
373            "description".into(),
374            Value::scalar(front.description.as_deref().unwrap_or("").to_owned()),
375        ),
376        ("categories".into(), categories),
377        ("tags".into(), tags),
378        ("is_draft".into(), Value::scalar(front.is_draft)),
379        ("weight".into(), Value::scalar(front.weight)),
380        ("file".into(), Value::Object(file)),
381        ("collection".into(), Value::scalar(front.collection.clone())),
382        ("data".into(), Value::Object(front.data.clone())),
383    ];
384    let mut attributes: Object = attributes.into_iter().collect();
385
386    if let Some(ref published_date) = front.published_date {
387        attributes.insert("published_date".into(), Value::scalar(*published_date));
388    }
389
390    attributes
391}
392
393#[cfg(not(feature = "html-minifier"))]
394fn minify_if_enabled(
395    html: String,
396    _context: &RenderContext,
397    _file_path: &relative_path::RelativePath,
398) -> Result<String> {
399    Ok(html)
400}
401
402#[cfg(feature = "html-minifier")]
403fn minify_if_enabled(
404    html: String,
405    context: &RenderContext<'_>,
406    file_path: &relative_path::RelativePath,
407) -> Result<String> {
408    let extension = file_path.extension().unwrap_or_default();
409    if context.minify.html && (extension == "html" || extension == "htm") {
410        Ok(html_minifier::minify(html)?)
411    } else {
412        Ok(html)
413    }
414}
415
416fn extract_excerpt_raw(content: &str, excerpt_separator: &str) -> String {
417    content
418        .split(excerpt_separator)
419        .next()
420        .unwrap_or(content)
421        .to_owned()
422}
423
424fn extract_excerpt_markdown(content: &str, excerpt_separator: &str) -> String {
425    static MARKDOWN_REF: LazyLock<Regex> =
426        LazyLock::new(|| Regex::new(r"(?m:^ {0,3}\[[^\]]+\]:.+$)").unwrap());
427
428    let mut trail = String::new();
429
430    if MARKDOWN_REF.is_match(content) {
431        for mat in MARKDOWN_REF.find_iter(content) {
432            trail.push_str(mat.as_str());
433            trail.push('\n');
434        }
435    }
436    trail + content.split(excerpt_separator).next().unwrap_or(content)
437}
438
439fn extract_excerpt(
440    content: &str,
441    format: cobalt_model::SourceFormat,
442    excerpt_separator: &str,
443) -> String {
444    match format {
445        cobalt_model::SourceFormat::Markdown => {
446            extract_excerpt_markdown(content, excerpt_separator)
447        }
448        cobalt_model::SourceFormat::Raw => extract_excerpt_raw(content, excerpt_separator),
449    }
450}