pub mod bibmechanics;
pub mod raw;
pub mod types;
mod resolve;
use std::collections::HashMap;
use anyhow::anyhow;
use paste::paste;
use crate::bibmechanics::EntryType;
use crate::raw::RawBibliography;
use crate::resolve::resolve;
use crate::types::{
chunks_to_string, Date, EditorType, Gender, IntOrChunks, Pagination, Person, Type,
};
#[derive(Clone, Debug)]
pub struct Bibliography {
items: Vec<Entry>,
dict: HashMap<String, usize>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Entry {
pub cite_key: String,
pub entry_type: EntryType,
pub fields: HashMap<String, Vec<Chunk>>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Chunk {
Normal(String),
Verbatim(String),
}
impl Bibliography {
pub fn new() -> Self {
Self { items: Vec::new(), dict: HashMap::new() }
}
pub fn from_str(src: &str, allow_bibtex: bool) -> Self {
Self::from_raw(RawBibliography::from_str(src, allow_bibtex))
}
pub fn from_raw(raw: RawBibliography) -> Self {
let mut res = Self::new();
let abbreviations = &raw.abbreviations;
for entry in raw.entries {
res.add(Entry {
cite_key: entry.cite_key.to_string(),
entry_type: EntryType::robust_from_str(entry.entry_type),
fields: entry
.fields
.into_iter()
.map(|(key, value)| (key.to_string(), resolve(value, abbreviations)))
.collect(),
})
.ok();
}
res
}
pub fn get(&self, cite_key: &str) -> Option<&Entry> {
let index = self.dict.get(cite_key);
if let Some(&index) = index {
self.items.get(index)
} else {
None
}
}
pub fn get_mut(&mut self, cite_key: &str) -> Option<&mut Entry> {
let index = self.dict.get(cite_key);
if let Some(&index) = index {
self.items.get_mut(index)
} else {
None
}
}
pub fn get_resolved(&mut self, cite_key: &str) -> Option<Entry> {
self.get_mut(cite_key)
.cloned()
.and_then(|mut e| e.resolve_crossrefs(self).map(|_| e).ok())
}
pub fn add(&mut self, entry: Entry) -> anyhow::Result<()> {
if self.get(&entry.cite_key).is_some() {
Err(anyhow!("key already present"))
} else {
self.dict.insert(entry.cite_key.clone(), self.items.len());
self.items.push(entry);
let ids = self.items.last().unwrap().get_as::<Vec<String>>("ids");
let key = self.items.last().unwrap().cite_key.clone();
if let Ok(ids) = ids {
for i in ids {
self.add_alias(&key, &i)?;
}
}
Ok(())
}
}
pub fn add_empty(
&mut self,
cite_key: &str,
entry_type: EntryType,
) -> anyhow::Result<&mut Entry> {
if self.get(cite_key).is_some() {
Err(anyhow!("key already present"))
} else {
self.dict.insert(cite_key.to_string(), self.items.len());
self.items.push(Entry::new(cite_key, entry_type));
self.get_mut(cite_key)
.ok_or_else(|| anyhow!("could not fetch inserted entry"))
}
}
pub fn add_alias(&mut self, cite_key: &str, alias: &str) -> anyhow::Result<()> {
let &index = self
.dict
.get(cite_key)
.ok_or_else(|| anyhow!("item for alias not found"))?;
if self.dict.contains_key(alias) {
Err(anyhow!("alias name already in use"))
} else {
self.dict.insert(alias.to_string(), index);
Ok(())
}
}
pub fn remove_item(&mut self, cite_key: &str) -> anyhow::Result<Entry> {
let &index = self
.dict
.get(cite_key)
.ok_or_else(|| anyhow!("item for alias not found"))?;
let entry = self.items.remove(index);
self.dict.retain(|_, &mut v| v != index);
self.dict = self
.dict
.clone()
.into_iter()
.map(|(k, mut v)| {
if v > index {
v -= 1;
}
(k, v)
})
.collect();
Ok(entry)
}
pub fn len(&self) -> usize {
self.items.len()
}
pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, Entry> {
self.items.iter()
}
pub fn as_biblatex_string(&mut self) -> String {
let mut res = String::new();
for e in self.items.iter_mut() {
res += &e.as_biblatex_string();
res.push('\n')
}
res
}
}
impl IntoIterator for Bibliography {
type Item = Entry;
type IntoIter = std::vec::IntoIter<Entry>;
fn into_iter(self) -> Self::IntoIter {
self.items.into_iter()
}
}
impl Entry {
pub fn new(cite_key: &str, entry_type: EntryType) -> Self {
Self {
cite_key: cite_key.to_string(),
entry_type,
fields: HashMap::new(),
}
}
pub fn get(&self, key: &str) -> Option<&[Chunk]> {
self.fields.get(&key.to_lowercase()).map(AsRef::as_ref)
}
pub fn get_as<T: Type>(&self, key: &str) -> anyhow::Result<T> {
self.get(key)
.ok_or_else(|| anyhow!("The {} field is not present", key))
.and_then(|chunks| chunks.parse::<T>())
}
pub fn set(&mut self, key: &str, chunks: Vec<Chunk>) {
self.fields.insert(key.to_lowercase(), chunks);
}
pub fn set_as<T: Type>(&mut self, key: &str, value: &T) -> anyhow::Result<()> {
let chunks = value.to_chunks()?;
self.fields.insert(key.to_lowercase(), chunks);
Ok(())
}
pub fn delete(&mut self, key: &str) -> Option<Vec<Chunk>> {
self.fields.remove(key)
}
fn rename(&mut self, old_key: &str, new_key: &str) {
if let Some(chunks) = self.delete(old_key) {
self.set(new_key, chunks);
}
}
pub fn as_biblatex_string(&mut self) -> String {
self.rename("journal", "journaltitle");
self.rename("address", "location");
self.rename("school", "institution");
let mut res = format!("@{}{{{},\n", self.entry_type.to_biblatex(), self.cite_key);
for (key, value) in self.fields.iter() {
res.push_str(&format!("{} = {},\n", key, chunks_to_string(value)))
}
res.push('}');
res
}
pub fn as_bibtex_string(&mut self) -> String {
let bibtex_type = self.entry_type.to_bibtex();
self.rename("journaltitle", "journal");
self.rename("location", "address");
if bibtex_type == EntryType::PhdThesis || bibtex_type == EntryType::MastersThesis
{
self.rename("institution", "school");
}
if let Ok(date) = self.get_date() {
for (key, value) in date.to_fieldset() {
self.set(&key, vec![Chunk::Normal(value)]);
}
}
let mut res = format!("@{}{{{},\n", bibtex_type, self.cite_key);
for (key, value) in self.fields.iter() {
if key == "date" {
continue;
}
res.push_str(&format!("{} = {},\n", key, chunks_to_string(value)))
}
res.push('}');
res
}
pub fn get_parents(&self) -> Vec<String> {
let mut res = vec![];
if let Ok(crossref) = self.get_as::<String>("crossref") {
res.push(crossref);
}
if let Ok(mut xrefs) = self.get_as::<Vec<String>>("xref") {
res.append(&mut xrefs);
}
res
}
fn resolve_single_crossref(&mut self, crossref: Entry) {
let typ = self.entry_type.clone();
let mut requirements = typ.get_requirements();
let mut active_fields = requirements.required.clone();
active_fields.append(&mut requirements.optional);
active_fields.append(&mut requirements.page_chapter_field.get_all_possible());
active_fields.append(&mut requirements.author_eds_field.get_all_possible());
if self.entry_type == EntryType::XData {
for f in crossref.fields.keys() {
active_fields.push(f);
}
}
for f in active_fields {
if self.get(f).is_some() {
continue;
}
match f {
"journaltitle" | "journalsubtitle"
if crossref.entry_type == EntryType::Periodical =>
{
let key = if f.contains('s') { "subtitle" } else { "title" };
if let Some(item) = crossref.get(key) {
self.set(f, item.to_vec())
}
}
"booktitle" | "booksubtitle" | "booktitleaddon"
if crossref.entry_type.is_collection() =>
{
let key = if f.contains('s') {
"subtitle"
} else if f.contains('a') {
"titleaddon"
} else {
"title"
};
if let Some(item) = crossref.get(key) {
self.set(f, item.to_vec())
}
}
"maintitle" | "mainsubtitle" | "maintitleaddon"
if crossref.entry_type.is_multi_volume() =>
{
let key = if f.contains('s') {
"subtitle"
} else if f.contains('a') {
"titleaddon"
} else {
"title"
};
if let Some(item) = crossref.get(key) {
self.set(f, item.to_vec())
}
}
"address" => {
if let Some(item) =
crossref.get(f).or_else(|| crossref.get("location"))
{
self.set(f, item.to_vec())
}
}
"institution" => {
if let Some(item) = crossref.get(f).or_else(|| crossref.get("school"))
{
self.set(f, item.to_vec())
}
}
"school" => {
if let Some(item) =
crossref.get(f).or_else(|| crossref.get("institution"))
{
self.set(f, item.to_vec())
}
}
"journaltitle" => {
if let Some(item) =
crossref.get(f).or_else(|| crossref.get("journal"))
{
self.set(f, item.to_vec())
}
}
"title" | "addendum" | "note" => {}
_ => {
if let Some(item) = crossref.get(f) {
self.set(f, item.to_vec())
}
}
}
}
if self.entry_type == EntryType::XData {
return;
}
if requirements.needs_date {
if let Ok(date) = crossref.get_date() {
self.set_date(date).expect("date set failure");
}
}
}
fn resolve_crossrefs(&mut self, bib: &mut Bibliography) -> anyhow::Result<()> {
let crossref = self.get_as::<String>("crossref").map(|s| {
bib.get_mut(&s)
.map(|e| e.clone())
.ok_or_else(|| anyhow!("crossref'd item not found"))
});
let references = self.get_as::<Vec<String>>("xdata").map(|keys| {
keys.iter()
.map(|s| {
bib.get_mut(&s)
.map(|e| e.clone())
.ok_or_else(|| anyhow!("crossref'd item not found"))
})
.collect::<Vec<anyhow::Result<Entry>>>()
});
let mut refs = vec![];
if let Ok(crossref) = crossref {
refs.push(crossref?);
}
if let Ok(references) = references {
for r in references {
refs.push(r?);
}
}
for mut crossref in refs {
crossref.resolve_crossrefs(bib)?;
self.resolve_single_crossref(crossref);
}
self.delete("crossref");
self.delete("xdata");
Ok(())
}
}
macro_rules! fields {
($($name:ident: $field_name:expr $(=> $res:ty)?),* $(,)*) => {
$(
paste! {
#[doc = "Get and parse the `" $field_name "` field."]
pub fn [<get_ $name>](&self) -> anyhow::Result<fields!(@type $($res)?)> {
self.get($field_name)
.ok_or_else(|| anyhow!("The {} field is not present", $field_name))
$(.and_then(|chunks| chunks.parse::<$res>()))?
}
fields!(single_set $name => $field_name, $($res)?);
}
)*
};
(single_set $name:ident => $field_name:expr, ) => {
paste! {
#[doc = "Set a value in the `" $field_name "` field."]
pub fn [<set_ $name>](&mut self, item: Vec<Chunk>) -> anyhow::Result<()> {
self.set($field_name, item);
Ok(())
}
}
};
(single_set $name:ident => $field_name:expr, $other_type:ty) => {
paste! {
#[doc = "Set a value in the `" $field_name "` field."]
pub fn [<set_ $name>](&mut self, item: $other_type) -> anyhow::Result<()> {
let chunks = item.to_chunks()?;
self.set($field_name, chunks);
Ok(())
}
}
};
(@type) => {&[Chunk]};
(@type $res:ty) => {$res};
}
macro_rules! alias_fields {
($($name:ident: $field_name:expr, $field_alias:expr $(=> $res:ty)?),* $(,)*) => {
$(
paste! {
#[doc = "Get and parse the `" $field_name "` field, falling back on `" $field_alias "` if `" $field_name "` is empty."]
pub fn [<get_ $name>](&self) -> anyhow::Result<fields!(@type $($res)?)> {
self.get($field_name)
.or_else(|| self.get($field_alias))
.ok_or_else(|| anyhow!("The {} field is not present", $field_name))
$(.and_then(|chunks| chunks.parse::<$res>()))?
}
fields!(single_set $name => $field_name, $($res)?);
}
)*
};
(@type) => {&[Chunk]};
(@type $res:ty) => {$res};
}
macro_rules! date_fields {
($($name:ident: $field_prefix:expr),* $(,)*) => {
$(
paste! {
#[doc = "Get and parse the `" $field_prefix "date` field, falling back to the `" $field_prefix "year`, `" $field_prefix "month`, and `" $field_prefix "day` fields when not present."]
pub fn [<get_ $name>](&self) -> anyhow::Result<Date> {
if let Some(chunks) = self.get(concat!($field_prefix, "date")) {
chunks.parse::<Date>()
} else {
Date::new_from_three_fields(
self.get(concat!($field_prefix, "year")),
self.get(concat!($field_prefix, "month")),
self.get(concat!($field_prefix, "day")),
)
}
}
#[doc = "Set a value in the `" $field_prefix "date` field."]
pub fn [<set_ $name>](&mut self, item: Date) -> anyhow::Result<()> {
let chunks = item.to_chunks()?;
self.set(concat!($field_prefix, "date"), chunks);
self.delete(concat!($field_prefix, "year"));
self.delete(concat!($field_prefix, "month"));
self.delete(concat!($field_prefix, "day"));
Ok(())
}
}
)*
};
}
impl Entry {
fields! {
author: "author" => Vec<Person>,
book_title: "booktitle",
chapter: "chapter",
edition: "edition" => IntOrChunks,
how_published: "howpublished",
note: "note",
number: "number",
organization: "organization" => Vec<Vec<Chunk>>,
pages: "pages" => Vec<std::ops::Range<u32>>,
publisher: "publisher" => Vec<Vec<Chunk>>,
series: "series",
title: "title",
type: "type" => String,
volume: "volume" => i64,
}
date_fields! {
date: "",
event_date: "event",
orig_date: "orig",
url_date: "url",
}
pub fn get_editors(&self) -> anyhow::Result<Vec<(Vec<Person>, EditorType)>> {
let mut editors = vec![];
let mut parse_editor_field = |name_field: &str, editor_field: &str| {
self.get(name_field)
.and_then(|chunks| chunks.parse::<Vec<Person>>().ok())
.map(|persons| {
let editor_type = self
.get(editor_field)
.and_then(|chunks| chunks.parse::<EditorType>().ok())
.unwrap_or(EditorType::Editor);
editors.push((persons, editor_type));
});
};
parse_editor_field("editor", "editortype");
parse_editor_field("editora", "editoratype");
parse_editor_field("editorb", "editorbtype");
parse_editor_field("editorc", "editorctype");
if editors.is_empty() {
return Err(anyhow!("No editor fields present"));
}
Ok(editors)
}
alias_fields! {
address: "address", "location",
location: "location", "address",
annotation: "annotation", "annote",
eprint_type: "eprinttype", "archiveprefix",
journal: "journal", "journaltitle",
journal_title: "journaltitle", "journal",
sort_key: "key", "sortkey" => String,
file: "file", "pdf" => String,
school: "school", "institution",
institution: "institution", "school",
}
fields! {
abstract: "abstract",
addendum: "addendum",
afterword: "afterword" => Vec<Person>,
annotator: "annotator" => Vec<Person>,
author_type: "authortype" => String,
book_author: "bookauthor" => Vec<Person>,
book_pagination: "bookpagination" => Pagination,
book_subtitle: "booksubtitle",
book_title_addon: "booktitleaddon",
commentator: "commentator" => Vec<Person>,
doi: "doi" => String,
eid: "eid",
entry_subtype: "entrysubtype",
eprint: "eprint" => String,
eprint_class: "eprintclass",
eventtitle: "eventtitle",
eventtitle_addon: "eventtitleaddon",
foreword: "foreword" => Vec<Person>,
holder: "holder" => Vec<Person>,
index_title: "indextitle",
introduction: "introduction" => Vec<Person>,
isan: "isan",
isbn: "isbn",
ismn: "ismn",
isrn: "isrn",
issn: "issn",
issue: "issue",
issue_subtitle: "issuesubtitle",
issue_title: "issuetitle",
issue_title_addon: "issuetitleaddon",
iswc: "iswc",
journal_subtitle: "journalsubtitle",
journal_title_addon: "journaltitleaddon",
keywords: "keywords",
label: "label",
language: "language" => String,
library: "library",
main_subtitle: "mainsubtitle",
main_title: "maintitle",
main_title_addon: "maintitleaddon",
name_addon: "nameaddon",
options: "options",
orig_language: "origlanguage" => String,
orig_location: "origlocation",
page_total: "pagetotal",
pagination: "pagination" => Pagination,
part: "part",
pubstate: "pubstate",
reprint_title: "reprinttitle",
short_author: "shortauthor" => Vec<Person>,
short_editor: "shorteditor" => Vec<Person>,
shorthand: "shorthand",
shorthand_intro: "shorthandintro",
short_journal: "shortjournal",
short_series: "shortseries",
short_title: "shorttitle",
subtitle: "subtitle",
title_addon: "titleaddon",
translator: "translator" => Vec<Person>,
url: "url" => String,
venue: "venue",
version: "version",
volumes: "volumes" => i64,
gender: "gender" => Gender,
}
}
pub trait ChunksExt {
fn parse<T: Type>(&self) -> anyhow::Result<T>;
fn format_sentence(&self) -> String;
fn format_verbatim(&self) -> String;
}
impl ChunksExt for [Chunk] {
fn parse<T: Type>(&self) -> anyhow::Result<T> {
T::from_chunks(self)
}
fn format_sentence(&self) -> String {
let mut out = String::new();
let mut first = true;
for val in self {
if let Chunk::Normal(s) = val {
for c in s.chars() {
if first {
out.push_str(&c.to_uppercase().to_string());
} else {
out.push(c);
}
first = false;
}
} else if let Chunk::Verbatim(s) = val {
out += s;
}
first = false;
}
out
}
fn format_verbatim(&self) -> String {
let mut out = String::new();
for val in self {
if let Chunk::Normal(s) = val {
out += s;
} else if let Chunk::Verbatim(s) = val {
out += s;
}
}
out
}
}
#[cfg(test)]
mod tests {
use std::fs;
use super::*;
#[test]
fn test_gral_paper() {
dump_debug("test/gral.bib");
}
#[test]
fn test_ds_report() {
dump_debug("test/ds.bib");
}
#[test]
fn test_libra_paper() {
dump_author_title("test/libra.bib");
}
#[test]
fn test_rass_report() {
dump_author_title("test/rass.bib");
}
#[test]
fn test_alias() {
let contents = fs::read_to_string("test/cross.bib").unwrap();
let mut bibliography = Bibliography::from_str(&contents, true);
assert_eq!(bibliography.get("issue201"), bibliography.get("github"));
bibliography.add_alias("issue201", "crap").expect("this must work");
assert_eq!(bibliography.get("crap"), bibliography.get("unstable"));
bibliography.remove_item("crap").expect("removal must work");
let cf = bibliography.get("cannonfodder").unwrap();
assert_eq!(cf.entry_type, EntryType::Misc);
assert_eq!(cf.cite_key, "cannonfodder");
}
#[test]
fn test_bibtex_conversion() {
let contents = fs::read_to_string("test/cross.bib").unwrap();
let mut bibliography = Bibliography::from_str(&contents, true);
let biblatex = bibliography.get_mut("haug2019").unwrap().as_biblatex_string();
assert!(biblatex.contains("institution = {Technische Universität Berlin},"));
let bibtex = bibliography.get_mut("haug2019").unwrap().as_bibtex_string();
assert!(bibtex.contains("school = {Technische Universität Berlin},"));
assert!(bibtex.contains("year = {2019},"));
assert!(bibtex.contains("month = {10},"));
assert!(!bibtex.contains("institution"));
assert!(!bibtex.contains("date"));
}
#[test]
fn test_crossref() {
let contents = fs::read_to_string("test/cross.bib").unwrap();
let mut bibliography = Bibliography::from_str(&contents, true);
let e = bibliography.get_resolved("macmillan").unwrap();
assert_eq!(e.get_publisher().unwrap()[0].format_verbatim(), "Macmillan");
assert_eq!(
e.get_location().unwrap().format_verbatim(),
"New York and London"
);
let book = bibliography.get_resolved("recursive").unwrap();
assert_eq!(
book.get_publisher().unwrap()[0].format_verbatim(),
"Macmillan"
);
assert_eq!(
book.get_location().unwrap().format_verbatim(),
"New York and London"
);
assert_eq!(
book.get_title().unwrap().format_verbatim(),
"Recursive shennenigans and other important stuff"
);
assert_eq!(bibliography.get("arrgh").unwrap().get_parents(), vec![
"polecon".to_string()
]);
let arrgh = bibliography.get_resolved("arrgh").unwrap();
assert_eq!(arrgh.entry_type, EntryType::Article);
assert_eq!(arrgh.get_volume().unwrap(), 115);
assert_eq!(arrgh.get_editors().unwrap()[0].0[0].name, "Uhlig");
assert_eq!(arrgh.get_number().unwrap().format_verbatim(), "6");
assert_eq!(
arrgh.get_journal().unwrap().format_verbatim(),
"Journal of Political Economy"
);
assert_eq!(
arrgh.get_title().unwrap().format_verbatim(),
"An‐arrgh‐chy: The Law and Economics of Pirate Organization"
);
}
fn dump_debug(file: &str) {
let contents = fs::read_to_string(file).unwrap();
let bibliography = Bibliography::from_str(&contents, true);
println!("{:#?}", bibliography);
}
fn dump_author_title(file: &str) {
let contents = fs::read_to_string(file).unwrap();
let mut bibliography = Bibliography::from_str(&contents, true);
println!("{}", bibliography.as_biblatex_string());
for x in bibliography {
let authors = x.get_author().unwrap_or_default();
for a in authors {
print!("{}, ", a);
}
println!("\"{}\".", x.get_title().unwrap().format_sentence());
}
}
}