use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::bib::{BibError, ParsedEntry, Span, validate_ris};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BibFormat {
BibTeX,
CslJson,
CslYaml,
Ris,
}
#[derive(Debug, Clone)]
pub struct BibEntry {
pub key: String,
pub entry_type: Option<String>,
pub fields: HashMap<String, String>,
pub source_file: PathBuf,
pub span: Span,
pub format: BibFormat,
}
#[derive(Debug, Clone)]
pub struct BibEntryLocation {
pub key: String,
pub file: PathBuf,
pub span: Span,
}
#[derive(Debug, Clone)]
pub struct BibDuplicate {
pub key: String,
pub first: BibEntryLocation,
pub duplicate: BibEntryLocation,
}
#[derive(Debug, Clone)]
pub struct BibIndex {
pub entries: HashMap<String, BibEntry>,
pub duplicates: Vec<BibDuplicate>,
pub errors: Vec<BibError>,
pub load_errors: Vec<BibLoadError>,
}
#[derive(Debug, Clone)]
pub struct BibLoadError {
pub path: PathBuf,
pub message: String,
}
pub fn load_bibliography(paths: &[PathBuf]) -> BibIndex {
let mut index = BibIndex {
entries: HashMap::new(),
duplicates: Vec::new(),
errors: Vec::new(),
load_errors: Vec::new(),
};
let mut seen_paths = std::collections::HashSet::new();
for path in paths {
if !seen_paths.insert(path.clone()) {
continue;
}
let text = match std::fs::read_to_string(path) {
Ok(text) => text,
Err(err) => {
index.load_errors.push(BibLoadError {
path: path.clone(),
message: err.to_string(),
});
continue;
}
};
index.merge_from(load_bibliography_from_text(&text, path));
}
index
}
pub fn load_bibliography_from_text(text: &str, path: &Path) -> BibIndex {
let mut entries: HashMap<String, BibEntry> = HashMap::new();
let mut duplicates = Vec::new();
let mut errors = Vec::new();
let load_errors = Vec::new();
let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
let (format, parsed_result, parse_errors): (
BibFormat,
Result<Vec<ParsedEntry>, String>,
Vec<BibError>,
) = match extension {
"json" => {
use crate::bib::parse_csl_json_full;
(BibFormat::CslJson, parse_csl_json_full(text), Vec::new())
}
"yaml" | "yml" => {
use crate::bib::parse_csl_yaml_full;
(BibFormat::CslYaml, parse_csl_yaml_full(text), Vec::new())
}
"ris" => {
use crate::bib::parse_ris_full;
if let Err(message) = validate_ris(text) {
errors.push(BibError {
message: message.clone(),
span: None,
});
return BibIndex {
entries,
duplicates,
errors,
load_errors,
};
}
(BibFormat::Ris, parse_ris_full(text), Vec::new())
}
_ => {
use crate::bib::parse_bibtex_full;
let (entries, parse_errors) = parse_bibtex_full(text);
(BibFormat::BibTeX, Ok(entries), parse_errors)
}
};
errors.extend(parse_errors);
match parsed_result {
Ok(parsed_entries) => {
for (key, entry_type, entry_fields, span) in parsed_entries {
let key_lower = key.to_lowercase();
let unified_entry = BibEntry {
key: key.clone(),
entry_type,
fields: entry_fields,
source_file: path.to_path_buf(),
span,
format,
};
if let Some(existing) = entries.get(&key_lower) {
duplicates.push(BibDuplicate {
key: key.clone(),
first: BibEntryLocation {
key: existing.key.clone(),
file: existing.source_file.clone(),
span: existing.span,
},
duplicate: BibEntryLocation {
key: key.clone(),
file: path.to_path_buf(),
span,
},
});
} else {
entries.insert(key_lower, unified_entry);
}
}
}
Err(message) => {
errors.push(BibError {
message,
span: None,
});
}
}
BibIndex {
entries,
duplicates,
errors,
load_errors,
}
}
impl BibIndex {
pub fn merge_from(&mut self, other: BibIndex) {
self.errors.extend(other.errors);
self.load_errors.extend(other.load_errors);
self.duplicates.extend(other.duplicates);
for (key, entry) in other.entries {
if let Some(existing) = self.entries.get(&key) {
self.duplicates.push(BibDuplicate {
key: entry.key.clone(),
first: BibEntryLocation {
key: existing.key.clone(),
file: existing.source_file.clone(),
span: existing.span,
},
duplicate: BibEntryLocation {
key: entry.key.clone(),
file: entry.source_file.clone(),
span: entry.span,
},
});
} else {
self.entries.insert(key, entry);
}
}
}
pub fn get(&self, key: &str) -> Option<&BibEntry> {
self.entries.get(&key.to_lowercase())
}
pub fn iter_keys(&self) -> impl Iterator<Item = &String> {
self.entries.keys()
}
pub fn entries(&self) -> impl Iterator<Item = &BibEntry> {
self.entries.values()
}
pub fn get_location(&self, key: &str) -> Option<BibEntryLocation> {
self.entries
.get(&key.to_lowercase())
.map(|entry| BibEntryLocation {
key: entry.key.clone(),
file: entry.source_file.clone(),
span: entry.span,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn load_bibliography_dedupes_paths() {
let temp_dir = TempDir::new().unwrap();
let bib_path = temp_dir.path().join("refs.bib");
std::fs::write(&bib_path, "@book{Test,}\n").unwrap();
let index = load_bibliography(&[bib_path.clone(), bib_path]);
assert!(index.duplicates.is_empty());
assert_eq!(index.entries.len(), 1);
}
}