use std::collections::HashMap;
use std::sync::LazyLock;
use serde::Deserialize;
use smol_str::SmolStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FieldCategory {
Name,
Date,
Verbatim,
Literal,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FieldSig {
pub category: FieldCategory,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RequiredField {
One(SmolStr),
OneOf(Vec<SmolStr>),
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct EntrySig {
pub required: Vec<RequiredField>,
pub optional: Vec<SmolStr>,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct BibFieldDb {
entries: HashMap<SmolStr, EntrySig>,
fields: HashMap<SmolStr, FieldSig>,
}
impl BibFieldDb {
pub fn entry(&self, name: &str) -> Option<&EntrySig> {
self.entries.get(name.to_lowercase().as_str())
}
pub fn field(&self, name: &str) -> Option<&FieldSig> {
self.fields.get(name.to_lowercase().as_str())
}
pub fn category(&self, name: &str) -> FieldCategory {
self.field(name)
.map_or(FieldCategory::Literal, |sig| sig.category)
}
pub fn entry_names(&self) -> impl Iterator<Item = &str> {
self.entries.keys().map(SmolStr::as_str)
}
pub fn field_names(&self) -> impl Iterator<Item = &str> {
self.fields.keys().map(SmolStr::as_str)
}
}
pub fn builtin() -> &'static BibFieldDb {
&DB
}
const BIB_FIELDS_JSON: &str = include_str!("../../../data/bib_fields.json");
static DB: LazyLock<BibFieldDb> =
LazyLock::new(|| parse(BIB_FIELDS_JSON).expect("bundled data/bib_fields.json must be valid"));
#[derive(Deserialize)]
#[serde(untagged)]
enum RawRequired {
One(String),
OneOf(Vec<String>),
}
#[derive(Deserialize, Default)]
struct RawEntry {
#[serde(default)]
required: Vec<RawRequired>,
#[serde(default)]
optional: Vec<String>,
}
#[derive(Deserialize)]
#[serde(rename_all = "lowercase")]
enum RawCategory {
Name,
Date,
Verbatim,
Literal,
}
#[derive(Deserialize)]
struct RawField {
category: RawCategory,
}
#[derive(Deserialize, Default)]
struct RawDb {
#[serde(default)]
entries: HashMap<String, RawEntry>,
#[serde(default)]
fields: HashMap<String, RawField>,
}
fn lower(s: String) -> SmolStr {
SmolStr::new(s.to_lowercase())
}
impl From<RawRequired> for RequiredField {
fn from(raw: RawRequired) -> Self {
match raw {
RawRequired::One(name) => RequiredField::One(lower(name)),
RawRequired::OneOf(names) => {
RequiredField::OneOf(names.into_iter().map(lower).collect())
}
}
}
}
impl From<RawEntry> for EntrySig {
fn from(raw: RawEntry) -> Self {
EntrySig {
required: raw.required.into_iter().map(Into::into).collect(),
optional: raw.optional.into_iter().map(lower).collect(),
}
}
}
impl From<RawCategory> for FieldCategory {
fn from(raw: RawCategory) -> Self {
match raw {
RawCategory::Name => FieldCategory::Name,
RawCategory::Date => FieldCategory::Date,
RawCategory::Verbatim => FieldCategory::Verbatim,
RawCategory::Literal => FieldCategory::Literal,
}
}
}
impl From<RawField> for FieldSig {
fn from(raw: RawField) -> Self {
FieldSig {
category: raw.category.into(),
}
}
}
fn parse(json: &str) -> serde_json::Result<BibFieldDb> {
let raw: RawDb = serde_json::from_str(json)?;
Ok(BibFieldDb {
entries: raw
.entries
.into_iter()
.map(|(name, sig)| (lower(name), sig.into()))
.collect(),
fields: raw
.fields
.into_iter()
.map(|(name, sig)| (lower(name), sig.into()))
.collect(),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bundled_json_parses() {
let db = builtin();
assert!(db.entry_names().count() > 10);
}
#[test]
fn covers_the_full_biblatex_data_model() {
let db = builtin();
for ty in [
"software",
"reference",
"dataset",
"online",
"suppperiodical",
] {
assert!(db.entry(ty).is_some(), "missing entry type `{ty}`");
}
assert!(
db.entry("software")
.unwrap()
.required
.contains(&RequiredField::One(SmolStr::new("title")))
);
for f in [
"langid",
"shortjournal",
"shorttitle",
"pubstate",
"urlyear",
] {
assert!(db.field(f).is_some(), "missing field `{f}`");
}
assert_eq!(db.category("urlyear"), FieldCategory::Date);
assert_eq!(db.category("shortauthor"), FieldCategory::Name);
}
#[test]
fn new_data_model_types_use_oneof_date_constraints() {
let suppbook = builtin().entry("suppbook").expect("suppbook entry");
assert!(suppbook.required.iter().any(|r| matches!(
r,
RequiredField::OneOf(alts) if alts.iter().any(|a| a == "date")
)));
}
#[test]
fn existing_types_required_aligned_to_data_model() {
let db = builtin();
let one = |s: &str| RequiredField::One(SmolStr::new(s));
assert!(db.entry("book").unwrap().required.contains(&one("author")));
assert!(
db.entry("incollection")
.unwrap()
.required
.contains(&one("editor"))
);
assert!(
db.entry("periodical")
.unwrap()
.required
.contains(&one("editor"))
);
assert!(
db.entry("online")
.unwrap()
.required
.iter()
.any(|r| matches!(
r,
RequiredField::OneOf(alts)
if alts.iter().any(|a| a == "url") && alts.iter().any(|a| a == "eprint")
))
);
assert!(db.entry("misc").unwrap().required.iter().any(|r| matches!(
r, RequiredField::OneOf(alts) if alts.iter().any(|a| a == "date")
)));
assert!(
db.entry("mastersthesis")
.unwrap()
.required
.contains(&one("school"))
);
}
#[test]
fn article_required_fields() {
let article = builtin().entry("article").expect("article entry");
assert!(
article
.required
.contains(&RequiredField::One(SmolStr::new("author")))
);
assert!(
article
.required
.contains(&RequiredField::One(SmolStr::new("title")))
);
assert!(article.required.iter().any(|r| matches!(
r,
RequiredField::OneOf(alts) if alts.iter().any(|a| a == "date")
)));
}
#[test]
fn entry_lookup_is_case_insensitive() {
assert_eq!(builtin().entry("Article"), builtin().entry("article"));
assert!(builtin().entry("InProceedings").is_some());
}
#[test]
fn field_categories() {
let db = builtin();
assert_eq!(db.category("author"), FieldCategory::Name);
assert_eq!(db.category("Editor"), FieldCategory::Name);
assert_eq!(db.category("year"), FieldCategory::Date);
assert_eq!(db.category("url"), FieldCategory::Verbatim);
assert_eq!(db.category("doi"), FieldCategory::Verbatim);
assert_eq!(db.category("title"), FieldCategory::Literal);
assert_eq!(db.category("totallyunknownfield"), FieldCategory::Literal);
}
}