#![allow(dead_code)]
use serde::Deserialize;
#[derive(Debug, Default, Clone, Deserialize)]
#[serde(default)]
pub struct BibEntry {
#[serde(deserialize_with = "de_string")]
pub key: String,
#[serde(deserialize_with = "de_string")]
pub entry_type: String,
#[serde(deserialize_with = "de_string")]
pub author: String,
#[serde(deserialize_with = "de_string")]
pub title: String,
#[serde(deserialize_with = "de_string")]
pub year: String,
#[serde(deserialize_with = "de_opt_string")]
pub journal: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub volume: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub number: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub pages: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub publisher: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub booktitle: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub editor: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub edition: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub url: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub doi: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub isbn: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub note: Option<String>,
#[serde(rename = "abstract", deserialize_with = "de_opt_string")]
pub abstract_: Option<String>,
#[serde(deserialize_with = "de_opt_string")]
pub keywords: Option<String>,
}
struct ScalarString;
impl serde::de::Visitor<'_> for ScalarString {
type Value = String;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.write_str("a string, number, or boolean")
}
fn visit_str<E>(self, v: &str) -> Result<String, E> {
Ok(v.to_string())
}
fn visit_string<E>(self, v: String) -> Result<String, E> {
Ok(v)
}
fn visit_i64<E>(self, v: i64) -> Result<String, E> {
Ok(v.to_string())
}
fn visit_u64<E>(self, v: u64) -> Result<String, E> {
Ok(v.to_string())
}
fn visit_f64<E>(self, v: f64) -> Result<String, E> {
Ok(v.to_string())
}
fn visit_bool<E>(self, v: bool) -> Result<String, E> {
Ok(v.to_string())
}
}
fn de_string<'de, D: serde::Deserializer<'de>>(d: D) -> Result<String, D::Error> {
d.deserialize_any(ScalarString)
}
fn de_opt_string<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Option<String>, D::Error> {
Ok(Some(d.deserialize_any(ScalarString)?))
}
impl BibEntry {
pub fn from_hjson(body: &str) -> Option<BibEntry> {
serde_hjson::from_str::<BibEntry>(body).ok()
}
pub fn is_valid(&self) -> bool {
!self.key.trim().is_empty()
}
pub fn to_hjson(&self) -> String {
let mut out = String::from("{\n");
let mut push = |name: &str, value: &str| {
let v = value.trim();
if !v.is_empty() {
out.push_str(&format!(" {name}: {}\n", hjson_quote(v)));
}
};
let entry_type = {
let t = self.entry_type.trim();
if t.is_empty() { "misc" } else { t }
};
push("key", &self.key);
push("entry_type", entry_type);
push("author", &self.author);
push("title", &self.title);
push("year", &self.year);
push("journal", self.journal.as_deref().unwrap_or(""));
push("volume", self.volume.as_deref().unwrap_or(""));
push("number", self.number.as_deref().unwrap_or(""));
push("pages", self.pages.as_deref().unwrap_or(""));
push("publisher", self.publisher.as_deref().unwrap_or(""));
push("booktitle", self.booktitle.as_deref().unwrap_or(""));
push("editor", self.editor.as_deref().unwrap_or(""));
push("edition", self.edition.as_deref().unwrap_or(""));
push("url", self.url.as_deref().unwrap_or(""));
push("doi", self.doi.as_deref().unwrap_or(""));
push("isbn", self.isbn.as_deref().unwrap_or(""));
push("note", self.note.as_deref().unwrap_or(""));
push("abstract", self.abstract_.as_deref().unwrap_or(""));
push("keywords", self.keywords.as_deref().unwrap_or(""));
out.push_str("}\n");
out
}
pub fn to_bibtex(&self) -> String {
let entry_type = {
let t = self.entry_type.trim();
if t.is_empty() { "misc" } else { t }
};
let mut lines: Vec<String> = Vec::new();
let mut push = |name: &str, value: &str| {
let v = value.trim();
if !v.is_empty() {
lines.push(format!(" {name} = {{{v}}}"));
}
};
push("author", &self.author);
push("title", &self.title);
push("year", &self.year);
push("journal", self.journal.as_deref().unwrap_or(""));
push("volume", self.volume.as_deref().unwrap_or(""));
push("number", self.number.as_deref().unwrap_or(""));
push("pages", self.pages.as_deref().unwrap_or(""));
push("publisher", self.publisher.as_deref().unwrap_or(""));
push("booktitle", self.booktitle.as_deref().unwrap_or(""));
push("editor", self.editor.as_deref().unwrap_or(""));
push("edition", self.edition.as_deref().unwrap_or(""));
push("url", self.url.as_deref().unwrap_or(""));
push("doi", self.doi.as_deref().unwrap_or(""));
push("isbn", self.isbn.as_deref().unwrap_or(""));
push("note", self.note.as_deref().unwrap_or(""));
push("abstract", self.abstract_.as_deref().unwrap_or(""));
push("keywords", self.keywords.as_deref().unwrap_or(""));
format!("@{entry_type}{{{key},\n{body}\n}}\n", key = self.key.trim(), body = lines.join(",\n"))
}
}
pub fn compile_bibtex(entries: &[BibEntry]) -> (String, usize) {
let valid: Vec<&BibEntry> = entries.iter().filter(|e| e.is_valid()).collect();
let body = valid.iter().map(|e| e.to_bibtex()).collect::<Vec<_>>().join("\n");
(body, valid.len())
}
pub const ENTRY_TEMPLATE: &str = "{
// Citation key — insert in prose as @smith2024
key: change-me
// entry_type: article | book | misc | online | inproceedings | …
entry_type: article
author: Last, First
title: Title of the work
year: 2024
// Optional — delete unused fields:
// journal: Journal Name
// volume: 1
// number: 2
// pages: 10-20
// publisher: Publisher Name
// url: https://example.com
// doi: 10.xxxx/xxxxx
// note: Additional note
}
";
fn slugify_key(title: &str) -> String {
let mut key = String::new();
for ch in title.trim().chars() {
if ch.is_ascii_alphanumeric() || matches!(ch, '_' | ':' | '-') {
key.push(ch.to_ascii_lowercase());
} else if ch.is_whitespace() {
continue;
}
}
while key.chars().next().is_some_and(|c| !c.is_ascii_alphabetic()) {
key.remove(0);
}
if key.is_empty() { "change-me".to_string() } else { key }
}
pub fn seed_sources_body_for_tui(title: &str) -> String {
let key = slugify_key(title);
ENTRY_TEMPLATE.replacen("key: change-me", &format!("key: {key}"), 1)
}
fn hjson_quote(value: &str) -> String {
let mut out = String::with_capacity(value.len() + 2);
out.push('"');
for ch in value.chars() {
match ch {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' | '\r' | '\t' => out.push(' '),
other => out.push(other),
}
}
out.push('"');
out
}
fn normalize_ws(value: &str) -> String {
value.split_whitespace().collect::<Vec<_>>().join(" ")
}
pub fn extract_cite_keys(prose: &str) -> Vec<String> {
let chars: Vec<char> = prose.chars().collect();
let mut keys = Vec::new();
let mut i = 0;
while i < chars.len() {
if chars[i] == '@' {
let prev_ok = i == 0 || !chars[i - 1].is_alphanumeric();
let starts_letter =
i + 1 < chars.len() && chars[i + 1].is_ascii_alphabetic();
if prev_ok && starts_letter {
let mut j = i + 1;
let mut key = String::new();
while j < chars.len()
&& (chars[j].is_ascii_alphanumeric()
|| matches!(chars[j], '_' | ':' | '-'))
{
key.push(chars[j]);
j += 1;
}
keys.push(key);
i = j;
continue;
}
}
i += 1;
}
keys
}
pub fn parse_bibtex(input: &str) -> Vec<BibEntry> {
let chars: Vec<char> = input.chars().collect();
let mut entries = Vec::new();
let mut i = 0;
while i < chars.len() {
if chars[i] != '@' {
i += 1;
continue;
}
let mut j = i + 1;
let mut etype = String::new();
while j < chars.len() && chars[j] != '{' && chars[j] != '(' {
if chars[j] == '@' || chars[j] == '\n' {
break;
}
etype.push(chars[j]);
j += 1;
}
if j >= chars.len() || (chars[j] != '{' && chars[j] != '(') {
i += 1;
continue;
}
let open = chars[j];
let close = if open == '{' { '}' } else { ')' };
j += 1;
let start = j;
let mut depth = 1;
while j < chars.len() && depth > 0 {
if chars[j] == open {
depth += 1;
} else if chars[j] == close {
depth -= 1;
if depth == 0 {
break;
}
}
j += 1;
}
let inner: String = chars[start..j.min(chars.len())].iter().collect();
let etype = etype.trim().to_lowercase();
if !matches!(etype.as_str(), "comment" | "string" | "preamble") {
if let Some(e) = parse_bibtex_entry(&etype, &inner) {
entries.push(e);
}
}
i = j + 1;
}
entries
}
fn parse_bibtex_entry(etype: &str, inner: &str) -> Option<BibEntry> {
let chars: Vec<char> = inner.chars().collect();
let mut i = 0;
let mut key = String::new();
while i < chars.len() && chars[i] != ',' {
key.push(chars[i]);
i += 1;
}
let key = key.trim().to_string();
if key.is_empty() {
return None;
}
i += 1;
let mut e = BibEntry {
key,
entry_type: etype.to_string(),
..Default::default()
};
while i < chars.len() {
while i < chars.len() && (chars[i].is_whitespace() || chars[i] == ',') {
i += 1;
}
if i >= chars.len() {
break;
}
let mut name = String::new();
while i < chars.len() && chars[i] != '=' && chars[i] != ',' {
name.push(chars[i]);
i += 1;
}
if i >= chars.len() || chars[i] != '=' {
break;
}
i += 1; while i < chars.len() && chars[i].is_whitespace() {
i += 1;
}
let value = if i < chars.len() && chars[i] == '{' {
let mut depth = 0;
let mut v = String::new();
while i < chars.len() {
match chars[i] {
'{' => {
depth += 1;
if depth == 1 {
i += 1;
continue;
}
}
'}' => {
depth -= 1;
if depth == 0 {
i += 1;
break;
}
}
_ => {}
}
v.push(chars[i]);
i += 1;
}
v
} else if i < chars.len() && chars[i] == '"' {
i += 1;
let mut v = String::new();
while i < chars.len() && chars[i] != '"' {
v.push(chars[i]);
i += 1;
}
if i < chars.len() {
i += 1; }
v
} else {
let mut v = String::new();
while i < chars.len() && chars[i] != ',' {
v.push(chars[i]);
i += 1;
}
v
};
let name = name.trim().to_lowercase();
let value = normalize_ws(&value);
if value.is_empty() {
continue;
}
match name.as_str() {
"author" => e.author = value,
"title" => e.title = value,
"year" => e.year = value,
"date" if e.year.is_empty() => {
e.year = value.split('-').next().unwrap_or("").to_string();
}
"journal" | "journaltitle" => e.journal = Some(value),
"volume" => e.volume = Some(value),
"number" | "issue" => e.number = Some(value),
"pages" => e.pages = Some(value),
"publisher" => e.publisher = Some(value),
"booktitle" => e.booktitle = Some(value),
"editor" => e.editor = Some(value),
"edition" => e.edition = Some(value),
"url" => e.url = Some(value),
"doi" => e.doi = Some(value),
"isbn" => e.isbn = Some(value),
"note" => e.note = Some(value),
"abstract" => e.abstract_ = Some(value),
"keywords" => e.keywords = Some(value),
_ => {}
}
}
Some(e)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_and_serialises_a_full_entry() {
let body = "{\n key: smith2024\n entry_type: article\n author: Smith, Jane\n \
title: On Things\n year: 2024\n journal: J. Things\n volume: 12\n \
pages: 1-9\n}";
let e = BibEntry::from_hjson(body).expect("parses");
assert!(e.is_valid());
let bib = e.to_bibtex();
assert!(bib.starts_with("@article{smith2024,"), "{bib}");
assert!(bib.contains("author = {Smith, Jane}"));
assert!(bib.contains("journal = {J. Things}"));
assert!(bib.contains("volume = {12}"));
assert!(!bib.contains("doi"));
assert!(bib.trim_end().ends_with('}'));
}
#[test]
fn partial_entry_omits_missing_fields_and_defaults_type() {
let e = BibEntry::from_hjson("{\n key: k1\n title: Untyped\n}").unwrap();
let bib = e.to_bibtex();
assert!(bib.starts_with("@misc{k1,"), "blank type → misc: {bib}");
assert!(bib.contains("title = {Untyped}"));
assert!(!bib.contains("author"));
assert!(!bib.contains("year"));
}
#[test]
fn empty_key_is_invalid_and_skipped_by_compile() {
let keyless = BibEntry::from_hjson("{\n title: No Key\n}").unwrap();
assert!(!keyless.is_valid());
let ok = BibEntry::from_hjson("{\n key: real\n title: T\n}").unwrap();
let (text, n) = compile_bibtex(&[keyless, ok]);
assert_eq!(n, 1);
assert!(text.contains("@misc{real,"));
assert!(!text.contains("No Key"));
}
#[test]
fn unknown_fields_are_tolerated() {
let e = BibEntry::from_hjson("{\n key: k\n title: T\n some_future_field: x\n}").unwrap();
assert_eq!(e.key, "k");
}
#[test]
fn unicode_author_survives() {
let e = BibEntry::from_hjson("{\n key: u\n author: Ulánov, Владимир\n title: Т\n}").unwrap();
assert!(e.to_bibtex().contains("Ulánov, Владимир"));
}
#[test]
fn abstract_field_is_renamed() {
let e = BibEntry::from_hjson("{\n key: a\n title: T\n abstract: a summary\n}").unwrap();
assert_eq!(e.abstract_.as_deref(), Some("a summary"));
assert!(e.to_bibtex().contains("abstract = {a summary}"));
}
#[test]
fn the_seed_template_parses() {
let e = BibEntry::from_hjson(ENTRY_TEMPLATE).expect("template parses");
assert_eq!(e.key, "change-me");
assert_eq!(e.entry_type, "article");
}
#[test]
fn tui_seed_uses_title_as_key_and_stays_valid() {
let body = seed_sources_body_for_tui("Smith 2024");
let e = BibEntry::from_hjson(&body).expect("seeded body parses");
assert_eq!(e.key, "smith2024");
assert_eq!(e.entry_type, "article");
}
#[test]
fn extract_cite_keys_finds_tokens_and_skips_emails() {
let prose = "As shown by @smith2024 and @doe:2023b, but mail me at a@b.com. \
See also @nguyen-1999.";
let keys = extract_cite_keys(prose);
assert_eq!(keys, vec!["smith2024", "doe:2023b", "nguyen-1999"]);
assert!(!keys.iter().any(|k| k == "b"));
}
#[test]
fn extract_cite_keys_handles_unicode_prose() {
let keys = extract_cite_keys("Как показано в @ivanov2020, текст продолжается.");
assert_eq!(keys, vec!["ivanov2020"]);
}
#[test]
fn parse_bibtex_reads_braced_and_quoted_and_bare() {
let bib = r#"
@article{smith2024,
author = {Smith, Jane and Doe, John},
title = "On {Nested} Braces",
journal= {Journal of Things},
year = 2024,
volume = 12,
pages = {1--9},
}
@comment{ this is ignored }
@book{ulanov2021,
author = {Ulánov, Владимир},
title = {Системы},
year = {2021}
}
"#;
let entries = parse_bibtex(bib);
assert_eq!(entries.len(), 2);
let a = &entries[0];
assert_eq!(a.key, "smith2024");
assert_eq!(a.entry_type, "article");
assert_eq!(a.author, "Smith, Jane and Doe, John");
assert_eq!(a.title, "On {Nested} Braces");
assert_eq!(a.year, "2024");
assert_eq!(a.volume.as_deref(), Some("12"));
assert_eq!(a.pages.as_deref(), Some("1--9"));
let b = &entries[1];
assert_eq!(b.key, "ulanov2021");
assert_eq!(b.entry_type, "book");
assert_eq!(b.author, "Ulánov, Владимир");
}
#[test]
fn bibtex_import_round_trips_through_hjson() {
let bib = "@inproceedings{x:1, author = {Last, First}, title = {A, B: C}, year = 2020 }";
let parsed = parse_bibtex(bib);
assert_eq!(parsed.len(), 1);
let hjson = parsed[0].to_hjson();
let back = BibEntry::from_hjson(&hjson).expect("hjson round-trips");
assert_eq!(back.key, "x:1");
assert_eq!(back.entry_type, "inproceedings");
assert_eq!(back.author, "Last, First");
assert_eq!(back.title, "A, B: C");
assert_eq!(back.year, "2020");
}
#[test]
fn slugify_key_handles_edge_cases() {
assert_eq!(slugify_key("Smith, Jane 2024"), "smithjane2024");
assert_eq!(slugify_key("doe:2023"), "doe:2023");
assert_eq!(slugify_key("2024 review"), "review");
assert_eq!(slugify_key("Влади"), "change-me");
}
}