use crate::bib::ast;
use crate::bib::parse;
use crate::bib::semantic::{BibFieldDb, FieldCategory, builtin};
use crate::bib::syntax::{SyntaxKind, SyntaxNode};
use crate::formatter::ir::Ir;
use crate::formatter::printer::Printer;
use crate::formatter::style::FormatStyle;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FormatError {
ParseErrors { count: usize },
UnsupportedConstruct { kind: SyntaxKind, snippet: String },
}
impl std::fmt::Display for FormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseErrors { count } => write!(
f,
"input contains {count} parser diagnostic(s); formatter only supports parseable input"
),
Self::UnsupportedConstruct { kind, snippet } => {
write!(
f,
"unsupported construct for formatter: {kind:?} near {snippet:?}"
)
}
}
}
}
impl std::error::Error for FormatError {}
pub fn format(input: &str) -> Result<String, FormatError> {
format_with_style(input, FormatStyle::default())
}
pub fn format_with_style(input: &str, style: FormatStyle) -> Result<String, FormatError> {
let parsed = parse(input);
if !parsed.errors.is_empty() {
return Err(FormatError::ParseErrors {
count: parsed.errors.len(),
});
}
format_node(&parsed.syntax(), style)
}
pub fn format_node(root: &SyntaxNode, style: FormatStyle) -> Result<String, FormatError> {
validate_supported_tokens(root)?;
let mut formatted = format_root(root, style);
let trimmed_len = formatted.trim_end_matches([' ', '\t', '\n', '\r']).len();
formatted.truncate(trimmed_len);
if !formatted.is_empty() {
formatted.push('\n');
}
Ok(formatted)
}
fn validate_supported_tokens(root: &SyntaxNode) -> Result<(), FormatError> {
for element in root.descendants_with_tokens() {
let Some(token) = element.into_token() else {
continue;
};
if token.kind() == SyntaxKind::ERROR {
return Err(FormatError::UnsupportedConstruct {
kind: token.kind(),
snippet: token.text().to_string(),
});
}
}
Ok(())
}
fn format_root(root: &SyntaxNode, style: FormatStyle) -> String {
let cx = Lower { db: builtin() };
let ir = lower_root(root, cx);
Printer::new(style).print(&ir)
}
#[derive(Clone, Copy)]
struct Lower {
db: &'static BibFieldDb,
}
fn lower_root(root: &SyntaxNode, cx: Lower) -> Ir {
let blocks = super::sort::sorted_blocks(root)
.into_iter()
.map(|node| lower_block(&node, cx));
Ir::join(Ir::empty_line(), blocks)
}
fn lower_block(node: &SyntaxNode, cx: Lower) -> Ir {
match node.kind() {
SyntaxKind::ENTRY => lower_entry(node, cx),
SyntaxKind::STRING_ENTRY => lower_string_entry(node),
SyntaxKind::PREAMBLE_ENTRY => lower_preamble_entry(node),
SyntaxKind::COMMENT_ENTRY => Ir::verbatim(node.to_string()),
SyntaxKind::JUNK => Ir::verbatim(node.to_string().trim().to_string()),
_ => Ir::verbatim(node.to_string()),
}
}
fn entry_delimiters(entry: &SyntaxNode) -> (&'static str, &'static str) {
let is_paren = entry
.children_with_tokens()
.filter_map(|element| element.into_token())
.any(|token| token.kind() == SyntaxKind::L_PAREN);
if is_paren { ("(", ")") } else { ("{", "}") }
}
fn lower_entry(entry: &SyntaxNode, cx: Lower) -> Ir {
let etype = ast::entry_type(entry).unwrap_or_default().to_lowercase();
let key = ast::cite_key(entry)
.map(|(text, _)| text)
.unwrap_or_default();
let (open, close) = entry_delimiters(entry);
let fields: Vec<SyntaxNode> = super::sort::canonical_fields(entry, cx.db);
let names: Vec<String> = fields
.iter()
.map(|field| ast::field_name(field).unwrap_or_default().to_lowercase())
.collect();
if fields.is_empty() {
return Ir::text(format!("@{etype}{open}{key}{close}"));
}
let width = names
.iter()
.map(|name| name.chars().count())
.max()
.unwrap_or(0);
let header = Ir::text(format!("@{etype}{open}{key},"));
let lines = fields.iter().enumerate().map(|(i, field)| {
let last = i + 1 == fields.len();
lower_field(field, &names[i], width, last, cx)
});
let body = Ir::concat([Ir::hard_line(), Ir::join(Ir::hard_line(), lines)]);
Ir::concat([
header,
Ir::indent(body),
Ir::hard_line(),
Ir::text(close.to_string()),
])
}
fn lower_field(field: &SyntaxNode, name_lc: &str, width: usize, last: bool, cx: Lower) -> Ir {
let pad = " ".repeat(width - name_lc.chars().count());
let prefix = Ir::text(format!("{name_lc}{pad} = "));
let prefix_width = width + " = ".len();
let category = cx.db.category(name_lc);
let normalize = category != FieldCategory::Verbatim;
let value = match ast::field_value(field) {
Some(value) => lower_value_reflowed(&value, normalize, category, prefix_width),
None => Ir::nil(),
};
let comma = if last { Ir::nil() } else { Ir::text(",") };
Ir::concat([prefix, value, comma])
}
fn lower_value_reflowed(
value: &SyntaxNode,
normalize: bool,
category: FieldCategory,
prefix_width: usize,
) -> Ir {
if matches!(category, FieldCategory::Verbatim | FieldCategory::Date) {
return lower_value(value, normalize);
}
let pieces: Vec<SyntaxNode> = value
.children()
.filter(|piece| {
matches!(
piece.kind(),
SyntaxKind::LITERAL | SyntaxKind::QUOTED | SyntaxKind::BRACE_GROUP
)
})
.collect();
let [piece] = pieces.as_slice() else {
return lower_value(value, normalize);
};
let inner = match piece.kind() {
SyntaxKind::BRACE_GROUP => brace_inner(piece),
SyntaxKind::QUOTED => match quoted_inner_if_safe(piece, normalize) {
Some(inner) => inner,
None => return lower_value(value, normalize),
},
_ => return lower_value(value, normalize),
};
match category {
FieldCategory::Name => reflow_name_value(&inner, prefix_width),
_ => reflow_prose_value(&inner, prefix_width),
}
}
fn reflow_prose_value(inner: &str, prefix_width: usize) -> Ir {
let words = split_brace_aware(inner);
let fill = Ir::fill(words.into_iter().map(Ir::text));
Ir::concat([
Ir::text("{"),
Ir::align(prefix_width + 1, fill),
Ir::text("}"),
])
}
fn reflow_name_value(inner: &str, prefix_width: usize) -> Ir {
let names = split_top_level_and(inner);
let body = if names.len() <= 1 {
Ir::text(names.into_iter().next().unwrap_or_default())
} else {
let sep = Ir::concat([Ir::text(" and"), Ir::Line]);
let mut parts = Vec::with_capacity(names.len() * 2 - 1);
for (i, name) in names.into_iter().enumerate() {
if i > 0 {
parts.push(sep.clone());
}
parts.push(Ir::text(name));
}
Ir::Fill(parts.into())
};
Ir::concat([
Ir::text("{"),
Ir::align(prefix_width + 1, body),
Ir::text("}"),
])
}
fn brace_inner(piece: &SyntaxNode) -> String {
let raw = piece.to_string();
raw.strip_prefix('{')
.and_then(|rest| rest.strip_suffix('}'))
.unwrap_or(&raw)
.to_string()
}
fn quoted_inner_if_safe(piece: &SyntaxNode, normalize: bool) -> Option<String> {
let raw = piece.to_string();
let inner = raw
.strip_prefix('"')
.and_then(|rest| rest.strip_suffix('"'))?;
(normalize && braces_balanced(inner)).then(|| inner.to_string())
}
fn split_brace_aware(s: &str) -> Vec<String> {
let mut words = Vec::new();
let mut cur = String::new();
let mut brace_depth: i32 = 0;
let mut in_math = false;
let mut escaped = false;
for ch in s.chars() {
if escaped {
cur.push(ch);
escaped = false;
continue;
}
match ch {
'\\' => {
cur.push(ch);
escaped = true;
}
c if c.is_whitespace() && brace_depth == 0 && !in_math => {
if !cur.is_empty() {
words.push(std::mem::take(&mut cur));
}
}
'{' => {
brace_depth += 1;
cur.push(ch);
}
'}' => {
brace_depth -= 1;
cur.push(ch);
}
'$' => {
in_math = !in_math;
cur.push(ch);
}
_ => cur.push(ch),
}
}
if !cur.is_empty() {
words.push(cur);
}
words
}
fn split_top_level_and(s: &str) -> Vec<String> {
let mut names: Vec<String> = Vec::new();
let mut cur: Vec<String> = Vec::new();
for word in split_brace_aware(s) {
if word == "and" {
if !cur.is_empty() {
names.push(cur.join(" "));
cur.clear();
}
} else {
cur.push(word);
}
}
if !cur.is_empty() {
names.push(cur.join(" "));
}
names
}
fn lower_string_entry(entry: &SyntaxNode) -> Ir {
let etype = ast::entry_type(entry).unwrap_or_default().to_lowercase();
let (open, close) = entry_delimiters(entry);
let Some(field) = ast::fields(entry).next() else {
return Ir::verbatim(entry.to_string());
};
let name = ast::field_name(&field).unwrap_or_default();
let value = match ast::field_value(&field) {
Some(value) => lower_value(&value, false),
None => Ir::nil(),
};
Ir::concat([
Ir::text(format!("@{etype}{open}{name} = ")),
value,
Ir::text(close.to_string()),
])
}
fn lower_preamble_entry(entry: &SyntaxNode) -> Ir {
let etype = ast::entry_type(entry).unwrap_or_default().to_lowercase();
let (open, close) = entry_delimiters(entry);
let Some(value) = entry.children().find(|n| n.kind() == SyntaxKind::VALUE) else {
return Ir::verbatim(entry.to_string());
};
let value = lower_value(&value, false);
Ir::concat([
Ir::text(format!("@{etype}{open}")),
value,
Ir::text(close.to_string()),
])
}
fn lower_value(value: &SyntaxNode, normalize: bool) -> Ir {
let pieces = value
.children()
.filter(|piece| {
matches!(
piece.kind(),
SyntaxKind::LITERAL | SyntaxKind::QUOTED | SyntaxKind::BRACE_GROUP
)
})
.map(|piece| lower_value_piece(&piece, normalize));
Ir::join(Ir::text(" # "), pieces)
}
fn lower_value_piece(piece: &SyntaxNode, normalize: bool) -> Ir {
match piece.kind() {
SyntaxKind::LITERAL => Ir::text(piece.to_string()),
SyntaxKind::BRACE_GROUP => Ir::verbatim(piece.to_string()),
SyntaxKind::QUOTED => lower_quoted(piece, normalize),
_ => Ir::verbatim(piece.to_string()),
}
}
fn lower_quoted(piece: &SyntaxNode, normalize: bool) -> Ir {
let raw = piece.to_string();
if normalize
&& let Some(inner) = raw
.strip_prefix('"')
.and_then(|rest| rest.strip_suffix('"'))
&& braces_balanced(inner)
{
return Ir::verbatim(format!("{{{inner}}}"));
}
Ir::verbatim(raw)
}
fn braces_balanced(s: &str) -> bool {
let mut depth: i32 = 0;
for ch in s.chars() {
match ch {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth < 0 {
return false;
}
}
_ => {}
}
}
depth == 0
}
#[cfg(test)]
mod tests {
use super::{split_brace_aware, split_top_level_and};
#[test]
fn splits_prose_at_depth_zero_whitespace() {
assert_eq!(split_brace_aware("A title here"), ["A", "title", "here"]);
}
#[test]
fn collapses_every_whitespace_run() {
assert_eq!(
split_brace_aware("A title\nthat\n wraps"),
["A", "title", "that", "wraps"]
);
}
#[test]
fn glues_braced_and_math_spans() {
assert_eq!(
split_brace_aware("a {Protected group} b"),
["a", "{Protected group}", "b"]
);
assert_eq!(split_brace_aware("x $a + b$ y"), ["x", "$a + b$", "y"]);
}
#[test]
fn escapes_do_not_change_depth() {
assert_eq!(split_brace_aware(r"a \{ b"), ["a", r"\{", "b"]);
assert_eq!(split_brace_aware(r"a \$ b"), ["a", r"\$", "b"]);
}
#[test]
fn splits_names_at_top_level_and() {
assert_eq!(
split_top_level_and("John Doe and Jane Smith"),
["John Doe", "Jane Smith"]
);
}
#[test]
fn protects_braced_and() {
assert_eq!(
split_top_level_and("{Barnes and Noble} and Jane Public"),
["{Barnes and Noble}", "Jane Public"]
);
}
#[test]
fn drops_empty_name_segments_and_normalizes_spacing() {
assert_eq!(
split_top_level_and("Knuth, Donald\n E. and Lamport and "),
["Knuth, Donald E.", "Lamport"]
);
}
#[test]
fn single_name_stays_whole() {
assert_eq!(
split_top_level_and("Knuth, Donald E."),
["Knuth, Donald E."]
);
}
}