mdbook-bibtex 0.1.0

Adds support for bibliographies to mdbook. Backed by hayagriva, supporting all CSL citation styles.
Documentation
use std::{
    collections::HashMap,
    fmt,
    path::{Path, PathBuf},
};

use anyhow::{Result, anyhow};
use hayagriva::{
    BibliographyDriver, BibliographyRequest, CitationItem, CitationRequest, ElemChildren, Entry,
    Rendered, RenderedBibliography, archive, citationberg,
};
use itertools::Itertools;
use lazy_static::lazy_static;
use mdbook_core::book::{Book, Chapter};
use regex::Regex;
use tracing::warn;

use crate::config::{BibliographyLocation, Config};

lazy_static! {
    static ref REF_REGEX: Regex =
        Regex::new(r##"(@@)([^\[\]\s,;"#'()={}%]+)"##).expect("Valid regex");
    static ref LOCALE_FILES: Vec<citationberg::Locale> = archive::locales();
}

/// A tuple representing a citation's index, its position in the text and the key.
type CitationTuple = (usize, std::ops::Range<usize>, String);

/// Preprocessor pass over the book with two states:
///   - Collecting citations
///   - Replacing citations with rendered strings
///
/// To transition between states, call `render()`.
pub(crate) struct BibPass<'a, S: State<'a>> {
    config: &'a Config,
    /// Maps chapters, identified by their source path, to the citations and their positions.
    cites: HashMap<PathBuf, Vec<CitationTuple>>,
    state: S,
}

impl<'a, S: State<'a>> BibPass<'a, S> {
    /// Get the path to the bibliography chapter based on the source path of the current chapter.
    fn get_biblio_path(&self, source_path: &Path) -> PathBuf {
        source_path
            .parent()
            .map(|path| path.components().map(|_| "..").collect::<PathBuf>())
            .unwrap_or_default()
            .join(format!("{}.md", self.config.title.to_lowercase()))
    }
}

pub(crate) trait State<'a> {}

#[derive(Default)]
pub(crate) struct Collecting<'a> {
    driver: BibliographyDriver<'a, Entry>,
    index: usize,
    missing_keys: Vec<String>,
}
impl<'a> State<'a> for Collecting<'a> {}

impl<'a> BibPass<'a, Collecting<'a>> {
    /// Create a new BibPass in the Collecting state.
    pub fn new(config: &'a Config) -> Result<Self> {
        Ok(Self {
            config,
            cites: HashMap::new(),
            state: Collecting::default(),
        })
    }

    /// Throws an error if there are missing citation keys and the config requires it.
    fn check_for_missing_citations(&self) -> Result<()> {
        if self.config.errors_on_missing_keys && !self.state.missing_keys.is_empty() {
            Err(anyhow!(
                "The following citation keys were not found in the bibliography: {}",
                self.state.missing_keys.iter().unique().join(", ")
            ))
        } else {
            for key in &self.state.missing_keys {
                warn!("Citation key '{}' not found in bibliography", key);
            }
            Ok(())
        }
    }

    /// Cite all entries in the bibliography if `display_all` is set in the config.
    fn cite_everything(&mut self) {
        let Some(bib) = &self.config.bib else {
            return;
        };

        for entry in bib {
            self.state.driver.citation(CitationRequest::from_items(
                vec![CitationItem::with_entry(entry)],
                &self.config.style,
                &LOCALE_FILES,
            ));
        }
    }

    /// Collect citations from a chapter's content.
    pub fn collect_citations(&mut self, chapter: &Chapter) -> Result<()> {
        let Some(source_path) = &chapter.source_path else {
            return Ok(()); // is draft chapter
        };

        let mut cites = vec![];
        for capt in REF_REGEX.captures_iter(&chapter.content) {
            let key = capt.get(2).expect("Regex has two groups").as_str();
            let entry = self.config.bib.as_ref().and_then(|bib| bib.get(key));

            if let Some(entry) = entry {
                self.state.driver.citation(CitationRequest::from_items(
                    vec![CitationItem::with_entry(entry)],
                    &self.config.style,
                    &LOCALE_FILES,
                ));
                cites.push((
                    self.state.index,
                    capt.get(0).expect("Full match exists").range(),
                    key.to_string(),
                ));
                self.state.index += 1;
            } else {
                self.state.missing_keys.push(key.to_string());
            }
        }
        self.cites.insert(source_path.to_owned(), cites);

        Ok(())
    }

    /// Finalizes all citations to produce a rendered output.
    pub fn render(mut self) -> Result<BibPass<'a, Replacing>> {
        self.check_for_missing_citations()?;

        if self.config.display_all {
            self.cite_everything();
        }

        Ok(BibPass {
            config: self.config,
            cites: self.cites,
            state: Replacing {
                rendered: self.state.driver.finish(BibliographyRequest {
                    style: &self.config.style,
                    locale: None,
                    locale_files: &LOCALE_FILES,
                }),
            },
        })
    }
}

pub(crate) struct Replacing {
    rendered: Rendered,
}
impl State<'_> for Replacing {}

struct HTMLOutput<'a>(&'a ElemChildren);

impl fmt::Display for HTMLOutput<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.write_buf(f, hayagriva::BufWriteFormat::Html)
    }
}

impl BibPass<'_, Replacing> {
    /// Replace citations in a chapter's content with rendered strings.
    pub fn replace_citations(&self, chapter: &mut Chapter) {
        let Some(source_path) = &chapter.source_path else {
            return;
        };

        let Some(rendered_bib) = &self.state.rendered.bibliography else {
            unreachable!("Config should reject any CSL without bibliography definitions");
        };

        for (index, range, key) in self
            .cites
            .get(source_path)
            .expect("Content didn't change between both passes")
            .iter()
            .rev()
        {
            match self.config.biblio_location {
                BibliographyLocation::Footnotes => {
                    chapter
                        .content
                        .replace_range(range.to_owned(), &format!("[^{key}]"));
                }
                BibliographyLocation::Global | BibliographyLocation::Chapter => {
                    let citation = &self
                        .state
                        .rendered
                        .citations
                        .get(*index)
                        .expect("Index exists")
                        .citation;

                    let link = match self.config.biblio_location {
                        BibliographyLocation::Global => {
                            format!("{}#{}", self.get_biblio_path(source_path).display(), key)
                        }
                        BibliographyLocation::Chapter => format!("#{}", key),
                        _ => unreachable!(),
                    };

                    // Apparently some styles do not have a bibliography (e.g., alphanumeric)
                    let citation_string = if self.config.citation_preview {
                        let bib_entry = &rendered_bib
                            .items
                            .iter()
                            .find(|item| item.key == key.as_ref())
                            .expect("Key exists")
                            .content;
                        format!(
                            "<a href=\"{link}\"><abbr title=\"{:#}\">{:#}</abbr></a>",
                            bib_entry,
                            HTMLOutput(citation),
                        )
                    } else {
                        format!("<a href=\"{link}\">{:#}</a>", HTMLOutput(citation),)
                    };

                    chapter
                        .content
                        .replace_range(range.to_owned(), &citation_string);
                }
            }
        }
    }

    /// Insert the bibliography as footnotes at the end of the chapter.
    pub fn add_chapter_bib(&self, chapter: &mut Chapter) {
        let Some(rendered_bib) = &self.state.rendered.bibliography else {
            unreachable!("Config should reject any CSL without bibliography definitions");
        };

        match self.config.biblio_location {
            BibliographyLocation::Chapter => {
                let contents = bib_contents(rendered_bib);

                chapter.content.push_str("\n");
                chapter
                    .content
                    .push_str(&format!("\n## {}\n\n{}\n\n", self.config.title, contents));
            }
            BibliographyLocation::Footnotes => {
                let footnotes = rendered_bib
                    .items
                    .iter()
                    .map(|item| format!("[^{}]: {}", item.key, HTMLOutput(&item.content)))
                    .join("\n");
                chapter.content.push_str("\n");
                chapter.content.push_str(&footnotes);
            }
            _ => unreachable!(),
        }
    }

    /// Insert the bibliography as a new chapter at the end of the book.
    pub fn add_global_bib(&self, book: &mut Book) {
        assert!(
            matches!(self.config.biblio_location, BibliographyLocation::Global),
            "add_global_bib called when biblio_location is not global"
        );

        let Some(rendered_bib) = &self.state.rendered.bibliography else {
            return;
        };

        let contents = bib_contents(rendered_bib);

        let bib_chapter = Chapter::new(
            &self.config.title,
            format!("# {}\n{}", self.config.title, contents),
            PathBuf::from(format!("{}.md", self.config.title.to_lowercase())),
            vec![],
        );

        book.push_item(bib_chapter);
    }
}

fn bib_contents(rendered_bib: &RenderedBibliography) -> String {
    let mut contents = rendered_bib
        .items
        .iter()
        .map(|item| {
            format!(
                "<tr>
                    <td>
                        <span id=\"{}\">
                            <abbr title=\"{:#}\">{}</abbr>
                        </span>
                    </td>
                    <td>{:#}</td>
                </tr>",
                item.key,
                &item.key,
                item.first_field
                    .as_ref()
                    .map(|f| format!("{:#}", f))
                    .unwrap_or_default(),
                HTMLOutput(&item.content),
            )
        })
        .join("\n");
    contents.insert_str(0, "<table>\n");
    contents.push_str("\n</table>\n");
    contents
}