use std::collections::HashMap;
use crate::bib::ast;
use crate::bib::semantic::{BibFieldDb, RequiredField};
use crate::bib::syntax::{SyntaxKind, SyntaxNode};
pub(super) fn canonical_fields(entry: &SyntaxNode, db: &BibFieldDb) -> Vec<SyntaxNode> {
let etype = ast::entry_type(entry).unwrap_or_default();
let ranks = field_ranks(db, &etype);
let mut fields: Vec<SyntaxNode> = ast::fields(entry).collect();
fields.sort_by_cached_key(|field| {
let name = ast::field_name(field).unwrap_or_default().to_lowercase();
match ranks.get(name.as_str()) {
Some(&rank) => (0u8, rank, String::new()),
None => (1u8, 0usize, name),
}
});
fields
}
fn field_ranks<'a>(db: &'a BibFieldDb, etype: &str) -> HashMap<&'a str, usize> {
let mut order: Vec<&str> = Vec::new();
if let Some(sig) = db.entry(etype) {
for req in &sig.required {
match req {
RequiredField::One(name) => order.push(name.as_str()),
RequiredField::OneOf(alts) => order.extend(alts.iter().map(|a| a.as_str())),
}
}
order.extend(sig.optional.iter().map(|o| o.as_str()));
}
let mut ranks = HashMap::new();
for (rank, name) in order.into_iter().enumerate() {
ranks.entry(name).or_insert(rank);
}
ranks
}
pub(super) fn sorted_blocks(root: &SyntaxNode) -> Vec<SyntaxNode> {
let blocks: Vec<SyntaxNode> = root.children().collect();
let mut result: Vec<SyntaxNode> = Vec::with_capacity(blocks.len());
let mut i = 0;
while i < blocks.len() {
if blocks[i].kind() != SyntaxKind::ENTRY {
result.push(blocks[i].clone());
i += 1;
continue;
}
let start = i;
while i < blocks.len() && blocks[i].kind() == SyntaxKind::ENTRY {
i += 1;
}
result.extend(segment_in_order(&blocks[start..i]));
}
result
}
fn segment_in_order(segment: &[SyntaxNode]) -> Vec<SyntaxNode> {
let mut entries = segment.to_vec();
if segment.iter().any(has_cross_reference) {
return entries;
}
entries.sort_by_cached_key(|entry| {
ast::cite_key(entry)
.map(|(key, _)| key.to_lowercase())
.unwrap_or_default()
});
entries
}
fn has_cross_reference(entry: &SyntaxNode) -> bool {
ast::fields(entry).any(|field| {
ast::field_name(&field).is_some_and(|name| {
let lc = name.to_lowercase();
lc == "crossref" || lc == "xdata"
})
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bib::parse;
use crate::bib::semantic::builtin;
fn entry(src: &str) -> SyntaxNode {
parse(src)
.syntax()
.descendants()
.find(|n| n.kind() == SyntaxKind::ENTRY)
.expect("an ENTRY node")
}
fn ordered_names(entry: &SyntaxNode) -> Vec<String> {
canonical_fields(entry, builtin())
.iter()
.map(|f| ast::field_name(f).unwrap_or_default().to_lowercase())
.collect()
}
fn ordered_keys(src: &str) -> Vec<String> {
sorted_blocks(&parse(src).syntax())
.iter()
.filter_map(|b| ast::cite_key(b).map(|(k, _)| k))
.collect()
}
#[test]
fn fields_sorted_to_canonical_order() {
let e = entry("@article{k, year = 2020, title = {T}, author = {A}}\n");
assert_eq!(ordered_names(&e), ["author", "title", "year"]);
}
#[test]
fn unknown_fields_alphabetized_after_known() {
let e = entry("@article{k, zzz = {z}, author = {A}, aaa = {a}}\n");
assert_eq!(ordered_names(&e), ["author", "aaa", "zzz"]);
}
#[test]
fn unknown_entry_type_is_fully_alphabetical() {
let e = entry("@weirdtype{k, charlie = {c}, alpha = {a}, bravo = {b}}\n");
assert_eq!(ordered_names(&e), ["alpha", "bravo", "charlie"]);
}
#[test]
fn duplicate_field_names_keep_source_order() {
let e = entry("@misc{k, note = {first}, note = {second}}\n");
let values: Vec<String> = canonical_fields(&e, builtin())
.iter()
.filter(|f| ast::field_name(f).as_deref() == Some("note"))
.map(|f| ast::field_value(f).unwrap().to_string())
.collect();
assert_eq!(values, ["{first}", "{second}"]);
}
#[test]
fn entries_sorted_by_key_case_insensitive() {
let keys = ordered_keys("@misc{Charlie}\n@misc{alpha}\n@misc{Bravo}\n");
assert_eq!(keys, ["alpha", "Bravo", "Charlie"]);
}
#[test]
fn string_def_is_a_barrier() {
let blocks =
sorted_blocks(&parse("@misc{zoo}\n@string{m = \"x\"}\n@misc{apple}\n").syntax());
assert_eq!(ast::cite_key(&blocks[0]).unwrap().0, "zoo");
assert_eq!(blocks[1].kind(), SyntaxKind::STRING_ENTRY);
assert_eq!(ast::cite_key(&blocks[2]).unwrap().0, "apple");
}
#[test]
fn crossref_segment_left_in_source_order() {
let keys = ordered_keys(
"@inproceedings{zzz, crossref = {proc}}\n@proceedings{proc, title = {P}}\n",
);
assert_eq!(keys, ["zzz", "proc"]);
}
}