use crate::api::AnnotationStyle;
use crate::error::ProcessorError;
use crate::processor::Processor;
use crate::reference::{Bibliography, Citation};
use crate::render::djot::Djot;
use crate::render::format::OutputFormat;
use crate::render::html::Html;
use crate::render::latex::Latex;
use crate::render::markdown::Markdown;
use crate::render::plain::PlainText;
use crate::render::typst::Typst;
use citum_schema::Style;
use citum_schema::locale::{GeneralTerm, TermForm};
use citum_schema::reference::{
ClassExtension, CollectionType, ContributorRole as ReferenceRole, MonographComponentType,
MonographType, ReferenceClass, SerialComponentType,
};
use citum_schema::template::ContributorRole as TemplateRole;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::{
BibliographyEntry, CitationOccurrence, DocumentOptions, EntryMetadata, FormattedBibliography,
FormattedCitation, OutputFormatKind, RefsInput, StyleInput, Warning, WarningLevel,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatDocumentRequest {
pub style: StyleInput,
pub locale: Option<String>,
#[serde(default)]
pub output_format: OutputFormatKind,
pub refs: RefsInput,
pub citations: Vec<CitationOccurrence>,
pub document_options: Option<DocumentOptions>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormatDocumentResult {
pub formatted_citations: Vec<FormattedCitation>,
pub bibliography: FormattedBibliography,
pub warnings: Vec<Warning>,
}
#[derive(Debug)]
pub enum FormatDocumentError {
UnresolvedInput(String),
StyleParse(String),
StylePath(String),
RefsInputPath(String),
RefsInputParse(String),
Processing(ProcessorError),
StyleResolution(String),
}
impl std::fmt::Display for FormatDocumentError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnresolvedInput(msg) => write!(f, "Unresolved style input: {}", msg),
Self::StyleParse(msg) => write!(f, "Style parse error: {}", msg),
Self::StylePath(msg) => write!(f, "Style path error: {}", msg),
Self::RefsInputPath(msg) => write!(f, "Refs input path error: {}", msg),
Self::RefsInputParse(msg) => write!(f, "Refs input parse error: {}", msg),
Self::Processing(err) => write!(f, "Processing error: {}", err),
Self::StyleResolution(msg) => write!(f, "Style resolution error: {}", msg),
}
}
}
impl std::error::Error for FormatDocumentError {}
impl From<ProcessorError> for FormatDocumentError {
fn from(err: ProcessorError) -> Self {
Self::Processing(err)
}
}
pub fn format_document(
request: FormatDocumentRequest,
) -> Result<FormatDocumentResult, FormatDocumentError> {
let style = request.style.resolve_local()?;
format_document_with_style(style, request)
}
pub fn format_document_with_resolver(
request: FormatDocumentRequest,
resolver: &citum_schema::StyleResolver,
) -> Result<FormatDocumentResult, FormatDocumentError> {
let style = match &request.style {
StyleInput::Yaml(_) => request.style.resolve_local()?,
StyleInput::Id(value) | StyleInput::Uri(value) | StyleInput::Path(value) => resolver
.resolve_style(value)
.map_err(|e| FormatDocumentError::UnresolvedInput(e.to_string()))?,
};
let mut resolved = style
.try_into_resolved_with(Some(resolver))
.map_err(|e| FormatDocumentError::StyleResolution(e.to_string()))?;
resolved.extends = None;
format_document_with_style(resolved, request)
}
#[allow(
clippy::too_many_lines,
reason = "match arms grow one-to-one with format variants"
)]
pub fn format_document_with_style(
style: Style,
request: FormatDocumentRequest,
) -> Result<FormatDocumentResult, FormatDocumentError> {
let mut warnings = Vec::new();
if let Some(tag) = &request.locale
&& !tag.is_empty()
&& !tag.eq_ignore_ascii_case("en-us")
{
warnings.push(Warning {
level: WarningLevel::Warning,
code: "locale_fallback".to_string(),
citation_id: None,
ref_id: None,
message: format!(
"Requested locale '{tag}' could not be loaded by the engine; falling back to en-US. Adapter-side locale resolution is not yet wired through."
),
});
}
let bibliography = request.refs.resolve_local()?;
let mut processor = Processor::new(style, bibliography);
warnings.extend(unknown_reference_class_warnings(&processor.bibliography));
warnings.extend(unknown_enum_warnings(&processor));
if let Some(opts) = &request.document_options {
if let Some(show_semantics) = opts.show_semantics {
processor.show_semantics = show_semantics;
}
if let Some(inject_ast) = opts.inject_ast_indices {
processor.set_inject_ast_indices(inject_ast);
}
if let Some(abbr_map) = opts.abbreviation_map.clone() {
processor.abbreviation_map = Some(abbr_map);
}
if opts.integral_name_memory.is_some() {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "integral_name_memory_not_applied".to_string(),
citation_id: None,
ref_id: None,
message: "document_options.integral_name_memory is accepted but not yet wired through the processor; tracked in csl26-wq0y.".to_string(),
});
}
}
let mut citations: Vec<Citation> = Vec::new();
for occ in request.citations {
let mut citation: Citation = occ.into();
citation.items.retain(|item| {
if processor.bibliography.contains_key(&item.id) {
true
} else {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "missing_ref".to_string(),
citation_id: citation.id.clone(),
ref_id: Some(item.id.clone()),
message: format!("Reference '{}' not found in bibliography", item.id),
});
false
}
});
citations.push(citation);
}
let formatted_citations = match request.output_format {
OutputFormatKind::Plain => format_by_kind::<PlainText>(&processor, &citations)?,
OutputFormatKind::Html => format_by_kind::<Html>(&processor, &citations)?,
OutputFormatKind::Djot => format_by_kind::<Djot>(&processor, &citations)?,
OutputFormatKind::Latex => format_by_kind::<Latex>(&processor, &citations)?,
OutputFormatKind::Typst => format_by_kind::<Typst>(&processor, &citations)?,
OutputFormatKind::Markdown => format_by_kind::<Markdown>(&processor, &citations)?,
};
let bibliography = match request.output_format {
OutputFormatKind::Plain => format_bibliography::<PlainText>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
OutputFormatKind::Html => format_bibliography::<Html>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
OutputFormatKind::Djot => format_bibliography::<Djot>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
OutputFormatKind::Latex => format_bibliography::<Latex>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
OutputFormatKind::Typst => format_bibliography::<Typst>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
OutputFormatKind::Markdown => format_bibliography::<Markdown>(
&processor,
request.output_format,
request.document_options.as_ref(),
)?,
};
Ok(FormatDocumentResult {
formatted_citations,
bibliography,
warnings,
})
}
pub fn unknown_reference_class_warnings(bibliography: &Bibliography) -> Vec<Warning> {
bibliography
.iter()
.filter_map(|(ref_id, reference)| {
let ReferenceClass::Unknown(class) = reference.class() else {
return None;
};
Some(Warning {
level: WarningLevel::Warning,
code: "unknown_reference_class".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!(
"Reference '{ref_id}' uses unknown class '{class}'; rendering will use only fields this engine understands."
),
})
})
.collect()
}
pub fn unknown_enum_warnings(processor: &Processor) -> Vec<Warning> {
let mut warnings = Vec::new();
for (ref_id, reference) in &processor.bibliography {
match reference.extension() {
ClassExtension::Monograph(r) => {
if let MonographType::Unknown(s) = &r.r#type {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!("Reference '{ref_id}' uses unknown monograph type '{s}'; rendering will use default monograph formatting."),
});
}
}
ClassExtension::Collection(r) => {
if let CollectionType::Unknown(s) = &r.r#type {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!("Reference '{ref_id}' uses unknown collection type '{s}'; rendering will use default collection formatting."),
});
}
}
ClassExtension::CollectionComponent(r) => {
if let MonographComponentType::Unknown(s) = &r.r#type {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!("Reference '{ref_id}' uses unknown monograph component type '{s}'; rendering will use default chapter formatting."),
});
}
}
ClassExtension::SerialComponent(r) => {
if let SerialComponentType::Unknown(s) = &r.r#type {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!("Reference '{ref_id}' uses unknown serial component type '{s}'; rendering will use default article formatting."),
});
}
}
_ => {}
}
for contributor in reference.all_contributor_entries() {
if let ReferenceRole::Unknown(s) = &contributor.role {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: Some(ref_id.clone()),
message: format!("Reference '{ref_id}' uses unknown contributor role '{s}'; this role may be ignored during rendering."),
});
}
}
}
if let Some(templates) = &processor.style.templates {
for (name, template) in templates {
scan_template_for_unknowns(template, &format!("template '{name}'"), &mut warnings);
}
}
if let Some(citation) = &processor.style.citation
&& let Some(template) = &citation.template
{
scan_template_for_unknowns(template, "citation layout", &mut warnings);
}
if let Some(bib) = &processor.style.bibliography
&& let Some(template) = &bib.template
{
scan_template_for_unknowns(template, "bibliography layout", &mut warnings);
}
warnings
}
fn scan_template_for_unknowns(
components: &[citum_schema::template::TemplateComponent],
location: &str,
warnings: &mut Vec<Warning>,
) {
use citum_schema::template::TemplateComponent;
for component in components {
match component {
TemplateComponent::Term(t) => {
if let GeneralTerm::Unknown(s) = &t.term {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: None,
message: format!("Style {location} uses unknown locale term key '{s}'; this term may render as empty."),
});
}
if let Some(TermForm::Unknown(s)) = &t.form {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: None,
message: format!("Style {location} uses unknown term form '{s}'; falling back to long form."),
});
}
}
TemplateComponent::Contributor(c) => {
if let TemplateRole::Unknown(s) = &c.contributor {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: None,
message: format!("Style {location} uses unknown contributor role '{s}'; this role may be ignored."),
});
}
}
TemplateComponent::Date(d) => {
if let citum_schema::template::DateForm::Unknown(s) = &d.form {
warnings.push(Warning {
level: WarningLevel::Warning,
code: "unknown_enum_variant".to_string(),
citation_id: None,
ref_id: None,
message: format!("Style {location} uses unknown date form '{s}'; falling back to year only."),
});
}
}
TemplateComponent::Group(g) => {
scan_template_for_unknowns(&g.group, location, warnings);
}
_ => {}
}
}
}
fn format_by_kind<F>(
processor: &Processor,
citations: &[Citation],
) -> Result<Vec<FormattedCitation>, FormatDocumentError>
where
F: OutputFormat<Output = String>,
{
let texts = processor.process_citations_with_format::<F>(citations)?;
let formatted = citations
.iter()
.zip(texts.iter())
.map(|(citation, text)| {
let ref_ids = citation.items.iter().map(|item| item.id.clone()).collect();
FormattedCitation {
id: citation.id.clone().unwrap_or_default(),
text: text.clone(),
ref_ids,
}
})
.collect();
Ok(formatted)
}
fn format_bibliography<F>(
processor: &Processor,
format_kind: OutputFormatKind,
doc_opts: Option<&DocumentOptions>,
) -> Result<FormattedBibliography, FormatDocumentError>
where
F: OutputFormat<Output = String>,
{
let (annotations, annotation_style) = if let Some(opts) = doc_opts {
if let Some(anns) = &opts.annotations {
let style = opts.annotation_format.as_ref().map(|fmt| AnnotationStyle {
format: fmt.clone(),
});
(anns.clone(), style)
} else {
(HashMap::new(), None)
}
} else {
(HashMap::new(), None)
};
let content = if annotations.is_empty() {
processor
.render_bibliography_with_format_and_annotations::<F>(None, annotation_style.as_ref())
} else {
processor.render_bibliography_with_format_and_annotations::<F>(
Some(&annotations),
annotation_style.as_ref(),
)
};
let proc_entries = processor.process_references().bibliography;
let entries = proc_entries
.into_iter()
.map(|entry| {
let entry_anns = if annotations.is_empty() {
None
} else {
Some(&annotations)
};
let text = crate::render::bibliography::refs_to_string_with_format::<F>(
vec![entry.clone()],
entry_anns,
annotation_style.as_ref(),
);
let metadata = EntryMetadata {
author: entry.metadata.author.unwrap_or_default(),
year: entry.metadata.year.unwrap_or_default(),
title: entry.metadata.title.unwrap_or_default(),
};
BibliographyEntry {
id: entry.id,
text,
metadata,
}
})
.collect();
Ok(FormattedBibliography {
format: format_kind,
content,
entries,
})
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::indexing_slicing,
reason = "test code uses assertions and panic"
)]
mod tests {
use super::*;
use crate::api::CitationOccurrenceItem;
use crate::{
Config, ContributorForm, ContributorRole, DateForm, Processing, Rendering,
TemplateComponent, TemplateContributor, TemplateDate, TemplateDateVariable,
WrapPunctuation,
};
use citum_schema::reference::{EdtfString, InputReference, Monograph, MonographType, Title};
use citum_schema::{CitationSpec, StyleInfo};
fn make_test_style() -> Style {
Style {
info: StyleInfo {
title: Some("Test Style".to_string()),
id: Some("test".into()),
..Default::default()
},
options: Some(Config {
processing: Some(Processing::AuthorDate),
..Default::default()
}),
citation: Some(CitationSpec {
template: Some(vec![
TemplateComponent::Contributor(TemplateContributor {
contributor: ContributorRole::Author,
form: ContributorForm::Short,
rendering: Rendering::default(),
..Default::default()
}),
TemplateComponent::Date(TemplateDate {
date: TemplateDateVariable::Issued,
form: DateForm::Year,
rendering: Rendering::default(),
..Default::default()
}),
]),
wrap: Some(WrapPunctuation::Parentheses.into()),
..Default::default()
}),
..Default::default()
}
}
fn make_test_bibliography() -> RefsInput {
let mut refs = Bibliography::new();
refs.insert(
"smith2020".to_string(),
InputReference::Monograph(Box::new(Monograph {
id: Some("smith2020".into()),
r#type: MonographType::Book,
title: Some(Title::Single("Sample Work".to_string())),
issued: EdtfString("2020".to_string()),
..Default::default()
})),
);
RefsInput::Json(serde_json::to_value(refs).unwrap())
}
#[test]
fn format_document_with_style_empty_citations() {
let style = make_test_style();
let refs = make_test_bibliography();
let request = FormatDocumentRequest {
style: StyleInput::Yaml("dummy".to_string()),
locale: None,
output_format: OutputFormatKind::Plain,
refs,
citations: vec![],
document_options: None,
};
let result = format_document_with_style(style, request);
assert!(result.is_ok());
let res = result.unwrap();
assert_eq!(res.formatted_citations.len(), 0);
}
#[test]
fn format_document_missing_ref_warning() {
let style = make_test_style();
let refs = make_test_bibliography();
let citation_occ = CitationOccurrence {
id: "cite1".to_string(),
items: vec![CitationOccurrenceItem {
id: "unknown_ref".to_string(),
locator: None,
prefix: None,
suffix: None,
integral_name_state: None,
org_abbreviation_state: None,
}],
mode: None,
note_number: None,
suppress_author: None,
grouped: None,
prefix: None,
suffix: None,
};
let request = FormatDocumentRequest {
style: StyleInput::Yaml("dummy".to_string()),
locale: None,
output_format: OutputFormatKind::Plain,
refs,
citations: vec![citation_occ],
document_options: None,
};
let result = format_document_with_style(style, request);
assert!(result.is_ok());
let res = result.unwrap();
assert!(res.warnings.iter().any(|w| w.code == "missing_ref"));
}
#[test]
fn format_document_unknown_reference_class_warning() {
let style = make_test_style();
let mut refs = Bibliography::new();
let unknown_ref: InputReference = serde_json::from_str(
r#"{
"class": "dance-performance",
"id": "pina2011",
"title": "Pina",
"issued": "2011",
"venue": "Berlin"
}"#,
)
.expect("unknown class should parse through the compatibility path");
refs.insert("pina2011".to_string(), unknown_ref);
let citation_occ = CitationOccurrence {
id: "cite1".to_string(),
items: vec![CitationOccurrenceItem {
id: "pina2011".to_string(),
locator: None,
prefix: None,
suffix: None,
integral_name_state: None,
org_abbreviation_state: None,
}],
mode: None,
note_number: None,
suppress_author: None,
grouped: None,
prefix: None,
suffix: None,
};
let request = FormatDocumentRequest {
style: StyleInput::Yaml("dummy".to_string()),
locale: None,
output_format: OutputFormatKind::Plain,
refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
citations: vec![citation_occ],
document_options: None,
};
let result = format_document_with_style(style, request).unwrap();
let warning = result
.warnings
.iter()
.find(|w| w.code == "unknown_reference_class")
.expect("unknown class warning should be emitted");
assert_eq!(warning.ref_id.as_deref(), Some("pina2011"));
assert!(warning.message.contains("dance-performance"));
}
#[test]
fn format_document_yaml_style_input() {
let style = make_test_style();
let yaml_style = serde_yaml::to_string(&style).expect("serialize test style");
let mut refs = Bibliography::new();
refs.insert(
"test2024".to_string(),
InputReference::Monograph(Box::new(Monograph {
id: Some("test2024".into()),
r#type: MonographType::Book,
title: Some(Title::Single("Test Work".to_string())),
issued: EdtfString("2024".to_string()),
..Default::default()
})),
);
let citation_occ = CitationOccurrence {
id: "c1".to_string(),
items: vec![CitationOccurrenceItem {
id: "test2024".to_string(),
locator: None,
prefix: None,
suffix: None,
integral_name_state: None,
org_abbreviation_state: None,
}],
mode: None,
note_number: None,
suppress_author: None,
grouped: None,
prefix: None,
suffix: None,
};
let request = FormatDocumentRequest {
style: StyleInput::Yaml(yaml_style),
locale: None,
output_format: OutputFormatKind::Plain,
refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
citations: vec![citation_occ],
document_options: None,
};
let result = format_document(request);
assert!(result.is_ok());
let res = result.unwrap();
assert_eq!(res.formatted_citations.len(), 1);
assert!(!res.formatted_citations[0].text.is_empty());
}
#[test]
fn format_document_uri_input_unresolved() {
let request = FormatDocumentRequest {
style: StyleInput::Uri("https://example.com/style.yaml".to_string()),
locale: None,
output_format: OutputFormatKind::Plain,
refs: RefsInput::Json(serde_json::Value::Object(Default::default())),
citations: vec![],
document_options: None,
};
let result = format_document(request);
match result {
Err(FormatDocumentError::UnresolvedInput(_)) => {
}
_ => panic!("Expected UnresolvedInput error"),
}
}
struct MockResolver(Style);
impl citum_resolver_api::StyleResolver for MockResolver {
type Style = Style;
type Locale = citum_schema::locale::Locale;
fn resolve_style(&self, _uri: &str) -> Result<Style, citum_schema::ResolverError> {
Ok(self.0.clone())
}
fn resolve_locale(
&self,
id: &str,
) -> Result<citum_schema::locale::Locale, citum_schema::ResolverError> {
Err(citum_schema::ResolverError::LocaleNotFound(
std::borrow::Cow::Owned(id.to_string()),
))
}
}
#[test]
fn format_document_with_resolver_injects_style_for_id_input() {
let style = make_test_style();
let resolver = MockResolver(style);
let refs = make_test_bibliography();
let citation_occ = CitationOccurrence {
id: "c1".to_string(),
items: vec![CitationOccurrenceItem {
id: "smith2020".to_string(),
locator: None,
prefix: None,
suffix: None,
integral_name_state: None,
org_abbreviation_state: None,
}],
mode: None,
note_number: None,
suppress_author: None,
grouped: None,
prefix: None,
suffix: None,
};
let request = FormatDocumentRequest {
style: StyleInput::Id("any-id".to_string()),
locale: None,
output_format: OutputFormatKind::Plain,
refs,
citations: vec![citation_occ],
document_options: None,
};
match format_document(request.clone()) {
Err(FormatDocumentError::UnresolvedInput(_)) => {}
other => panic!("expected UnresolvedInput without resolver, got: {other:?}"),
}
let result = format_document_with_resolver(request, &resolver);
assert!(result.is_ok(), "expected Ok, got: {:?}", result.err());
let res = result.unwrap();
assert_eq!(res.formatted_citations.len(), 1);
assert!(
!res.formatted_citations[0].text.is_empty(),
"formatted citation text should not be empty"
);
}
}