1use crate::api::AnnotationStyle;
9use crate::error::ProcessorError;
10use crate::processor::Processor;
11use crate::reference::{Bibliography, Citation};
12use crate::render::djot::Djot;
13use crate::render::format::OutputFormat;
14use crate::render::html::Html;
15use crate::render::latex::Latex;
16use crate::render::plain::PlainText;
17use crate::render::typst::Typst;
18use citum_schema::Style;
19use citum_schema::locale::{GeneralTerm, TermForm};
20use citum_schema::reference::{
21 ClassExtension, CollectionType, ContributorRole as ReferenceRole, MonographComponentType,
22 MonographType, ReferenceClass, SerialComponentType,
23};
24use citum_schema::template::ContributorRole as TemplateRole;
25
26use serde::{Deserialize, Serialize};
27use std::collections::HashMap;
28
29use super::{
30 BibliographyEntry, CitationOccurrence, DocumentOptions, EntryMetadata, FormattedBibliography,
31 FormattedCitation, OutputFormatKind, StyleInput, Warning, WarningLevel,
32};
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct FormatDocumentRequest {
37 pub style: StyleInput,
39 pub locale: Option<String>,
44 #[serde(default)]
47 pub output_format: OutputFormatKind,
48 pub refs: Bibliography,
50 pub citations: Vec<CitationOccurrence>,
52 pub document_options: Option<DocumentOptions>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct FormatDocumentResult {
59 pub formatted_citations: Vec<FormattedCitation>,
61 pub bibliography: FormattedBibliography,
63 pub warnings: Vec<Warning>,
65}
66
67#[derive(Debug)]
69pub enum FormatDocumentError {
70 UnresolvedInput(String),
72 StyleParse(String),
74 StylePath(String),
76 Processing(ProcessorError),
78}
79
80impl std::fmt::Display for FormatDocumentError {
81 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82 match self {
83 Self::UnresolvedInput(msg) => write!(f, "Unresolved style input: {}", msg),
84 Self::StyleParse(msg) => write!(f, "Style parse error: {}", msg),
85 Self::StylePath(msg) => write!(f, "Style path error: {}", msg),
86 Self::Processing(err) => write!(f, "Processing error: {}", err),
87 }
88 }
89}
90
91impl std::error::Error for FormatDocumentError {}
92
93impl From<ProcessorError> for FormatDocumentError {
94 fn from(err: ProcessorError) -> Self {
95 Self::Processing(err)
96 }
97}
98
99pub fn format_document(
109 request: FormatDocumentRequest,
110) -> Result<FormatDocumentResult, FormatDocumentError> {
111 let style = request.style.resolve_local()?;
112 format_document_with_style(style, request)
113}
114
115pub fn format_document_with_style(
124 style: Style,
125 request: FormatDocumentRequest,
126) -> Result<FormatDocumentResult, FormatDocumentError> {
127 let mut warnings = Vec::new();
128
129 if let Some(tag) = &request.locale
134 && !tag.is_empty()
135 && !tag.eq_ignore_ascii_case("en-us")
136 {
137 warnings.push(Warning {
138 level: WarningLevel::Warning,
139 code: "locale_fallback".to_string(),
140 citation_id: None,
141 ref_id: None,
142 message: format!(
143 "Requested locale '{tag}' could not be loaded by the engine; falling back to en-US. Adapter-side locale resolution is not yet wired through."
144 ),
145 });
146 }
147
148 let mut processor = Processor::new(style, request.refs);
149 warnings.extend(unknown_reference_class_warnings(&processor.bibliography));
150 warnings.extend(unknown_enum_warnings(&processor));
151
152 if let Some(opts) = &request.document_options {
153 if let Some(show_semantics) = opts.show_semantics {
154 processor.show_semantics = show_semantics;
155 }
156 if let Some(inject_ast) = opts.inject_ast_indices {
157 processor.set_inject_ast_indices(inject_ast);
158 }
159 if let Some(abbr_map) = opts.abbreviation_map.clone() {
160 processor.abbreviation_map = Some(abbr_map);
161 }
162 if opts.integral_name_memory.is_some() {
163 warnings.push(Warning {
164 level: WarningLevel::Warning,
165 code: "integral_name_memory_not_applied".to_string(),
166 citation_id: None,
167 ref_id: None,
168 message: "document_options.integral_name_memory is accepted but not yet wired through the processor; tracked in csl26-wq0y.".to_string(),
169 });
170 }
171 }
172
173 let mut citations: Vec<Citation> = Vec::new();
178 for occ in request.citations {
179 let mut citation: Citation = occ.into();
180 citation.items.retain(|item| {
181 if processor.bibliography.contains_key(&item.id) {
182 true
183 } else {
184 warnings.push(Warning {
185 level: WarningLevel::Warning,
186 code: "missing_ref".to_string(),
187 citation_id: citation.id.clone(),
188 ref_id: Some(item.id.clone()),
189 message: format!("Reference '{}' not found in bibliography", item.id),
190 });
191 false
192 }
193 });
194 citations.push(citation);
195 }
196
197 let formatted_citations = match request.output_format {
199 OutputFormatKind::Plain => format_by_kind::<PlainText>(&processor, &citations)?,
200 OutputFormatKind::Html => format_by_kind::<Html>(&processor, &citations)?,
201 OutputFormatKind::Djot => format_by_kind::<Djot>(&processor, &citations)?,
202 OutputFormatKind::Latex => format_by_kind::<Latex>(&processor, &citations)?,
203 OutputFormatKind::Typst => format_by_kind::<Typst>(&processor, &citations)?,
204 };
205
206 let bibliography = match request.output_format {
208 OutputFormatKind::Plain => format_bibliography::<PlainText>(
209 &processor,
210 request.output_format,
211 request.document_options.as_ref(),
212 )?,
213 OutputFormatKind::Html => format_bibliography::<Html>(
214 &processor,
215 request.output_format,
216 request.document_options.as_ref(),
217 )?,
218 OutputFormatKind::Djot => format_bibliography::<Djot>(
219 &processor,
220 request.output_format,
221 request.document_options.as_ref(),
222 )?,
223 OutputFormatKind::Latex => format_bibliography::<Latex>(
224 &processor,
225 request.output_format,
226 request.document_options.as_ref(),
227 )?,
228 OutputFormatKind::Typst => format_bibliography::<Typst>(
229 &processor,
230 request.output_format,
231 request.document_options.as_ref(),
232 )?,
233 };
234
235 Ok(FormatDocumentResult {
236 formatted_citations,
237 bibliography,
238 warnings,
239 })
240}
241
242pub fn unknown_reference_class_warnings(bibliography: &Bibliography) -> Vec<Warning> {
244 bibliography
245 .iter()
246 .filter_map(|(ref_id, reference)| {
247 let ReferenceClass::Unknown(class) = reference.class() else {
248 return None;
249 };
250 Some(Warning {
251 level: WarningLevel::Warning,
252 code: "unknown_reference_class".to_string(),
253 citation_id: None,
254 ref_id: Some(ref_id.clone()),
255 message: format!(
256 "Reference '{ref_id}' uses unknown class '{class}'; rendering will use only fields this engine understands."
257 ),
258 })
259 })
260 .collect()
261}
262
263pub fn unknown_enum_warnings(processor: &Processor) -> Vec<Warning> {
268 let mut warnings = Vec::new();
269
270 for (ref_id, reference) in &processor.bibliography {
272 match reference.extension() {
273 ClassExtension::Monograph(r) => {
274 if let MonographType::Unknown(s) = &r.r#type {
275 warnings.push(Warning {
276 level: WarningLevel::Warning,
277 code: "unknown_enum_variant".to_string(),
278 citation_id: None,
279 ref_id: Some(ref_id.clone()),
280 message: format!("Reference '{ref_id}' uses unknown monograph type '{s}'; rendering will use default monograph formatting."),
281 });
282 }
283 }
284 ClassExtension::Collection(r) => {
285 if let CollectionType::Unknown(s) = &r.r#type {
286 warnings.push(Warning {
287 level: WarningLevel::Warning,
288 code: "unknown_enum_variant".to_string(),
289 citation_id: None,
290 ref_id: Some(ref_id.clone()),
291 message: format!("Reference '{ref_id}' uses unknown collection type '{s}'; rendering will use default collection formatting."),
292 });
293 }
294 }
295 ClassExtension::CollectionComponent(r) => {
296 if let MonographComponentType::Unknown(s) = &r.r#type {
297 warnings.push(Warning {
298 level: WarningLevel::Warning,
299 code: "unknown_enum_variant".to_string(),
300 citation_id: None,
301 ref_id: Some(ref_id.clone()),
302 message: format!("Reference '{ref_id}' uses unknown monograph component type '{s}'; rendering will use default chapter formatting."),
303 });
304 }
305 }
306 ClassExtension::SerialComponent(r) => {
307 if let SerialComponentType::Unknown(s) = &r.r#type {
308 warnings.push(Warning {
309 level: WarningLevel::Warning,
310 code: "unknown_enum_variant".to_string(),
311 citation_id: None,
312 ref_id: Some(ref_id.clone()),
313 message: format!("Reference '{ref_id}' uses unknown serial component type '{s}'; rendering will use default article formatting."),
314 });
315 }
316 }
317 _ => {}
318 }
319
320 for contributor in reference.all_contributor_entries() {
321 if let ReferenceRole::Unknown(s) = &contributor.role {
322 warnings.push(Warning {
323 level: WarningLevel::Warning,
324 code: "unknown_enum_variant".to_string(),
325 citation_id: None,
326 ref_id: Some(ref_id.clone()),
327 message: format!("Reference '{ref_id}' uses unknown contributor role '{s}'; this role may be ignored during rendering."),
328 });
329 }
330 }
331 }
332
333 if let Some(templates) = &processor.style.templates {
335 for (name, template) in templates {
336 scan_template_for_unknowns(template, &format!("template '{name}'"), &mut warnings);
337 }
338 }
339 if let Some(citation) = &processor.style.citation
340 && let Some(template) = &citation.template
341 {
342 scan_template_for_unknowns(template, "citation layout", &mut warnings);
343 }
344 if let Some(bib) = &processor.style.bibliography
345 && let Some(template) = &bib.template
346 {
347 scan_template_for_unknowns(template, "bibliography layout", &mut warnings);
348 }
349
350 warnings
351}
352
353fn scan_template_for_unknowns(
354 components: &[citum_schema::template::TemplateComponent],
355 location: &str,
356 warnings: &mut Vec<Warning>,
357) {
358 use citum_schema::template::TemplateComponent;
359 for component in components {
360 match component {
361 TemplateComponent::Term(t) => {
362 if let GeneralTerm::Unknown(s) = &t.term {
363 warnings.push(Warning {
364 level: WarningLevel::Warning,
365 code: "unknown_enum_variant".to_string(),
366 citation_id: None,
367 ref_id: None,
368 message: format!("Style {location} uses unknown locale term key '{s}'; this term may render as empty."),
369 });
370 }
371 if let Some(TermForm::Unknown(s)) = &t.form {
372 warnings.push(Warning {
373 level: WarningLevel::Warning,
374 code: "unknown_enum_variant".to_string(),
375 citation_id: None,
376 ref_id: None,
377 message: format!("Style {location} uses unknown term form '{s}'; falling back to long form."),
378 });
379 }
380 }
381 TemplateComponent::Contributor(c) => {
382 if let TemplateRole::Unknown(s) = &c.contributor {
383 warnings.push(Warning {
384 level: WarningLevel::Warning,
385 code: "unknown_enum_variant".to_string(),
386 citation_id: None,
387 ref_id: None,
388 message: format!("Style {location} uses unknown contributor role '{s}'; this role may be ignored."),
389 });
390 }
391 }
392 TemplateComponent::Date(d) => {
393 if let citum_schema::template::DateForm::Unknown(s) = &d.form {
394 warnings.push(Warning {
395 level: WarningLevel::Warning,
396 code: "unknown_enum_variant".to_string(),
397 citation_id: None,
398 ref_id: None,
399 message: format!("Style {location} uses unknown date form '{s}'; falling back to year only."),
400 });
401 }
402 }
403 TemplateComponent::Group(g) => {
404 scan_template_for_unknowns(&g.group, location, warnings);
405 }
406 _ => {}
407 }
408 }
409}
410
411fn format_by_kind<F>(
413 processor: &Processor,
414 citations: &[Citation],
415) -> Result<Vec<FormattedCitation>, FormatDocumentError>
416where
417 F: OutputFormat<Output = String>,
418{
419 let texts = processor.process_citations_with_format::<F>(citations)?;
420
421 let formatted = citations
422 .iter()
423 .zip(texts.iter())
424 .map(|(citation, text)| {
425 let ref_ids = citation.items.iter().map(|item| item.id.clone()).collect();
426 FormattedCitation {
427 id: citation.id.clone().unwrap_or_default(),
428 text: text.clone(),
429 ref_ids,
430 }
431 })
432 .collect();
433
434 Ok(formatted)
435}
436
437fn format_bibliography<F>(
439 processor: &Processor,
440 format_kind: OutputFormatKind,
441 doc_opts: Option<&DocumentOptions>,
442) -> Result<FormattedBibliography, FormatDocumentError>
443where
444 F: OutputFormat<Output = String>,
445{
446 let (annotations, annotation_style) = if let Some(opts) = doc_opts {
448 if let Some(anns) = &opts.annotations {
449 let style = opts.annotation_format.as_ref().map(|fmt| AnnotationStyle {
450 format: fmt.clone(),
451 });
452 (anns.clone(), style)
453 } else {
454 (HashMap::new(), None)
455 }
456 } else {
457 (HashMap::new(), None)
458 };
459
460 let content = if annotations.is_empty() {
462 processor
463 .render_bibliography_with_format_and_annotations::<F>(None, annotation_style.as_ref())
464 } else {
465 processor.render_bibliography_with_format_and_annotations::<F>(
466 Some(&annotations),
467 annotation_style.as_ref(),
468 )
469 };
470
471 let proc_entries = processor.process_references().bibliography;
473 let entries = proc_entries
474 .into_iter()
475 .map(|entry| {
476 let entry_anns = if annotations.is_empty() {
477 None
478 } else {
479 Some(&annotations)
480 };
481 let text = crate::render::bibliography::refs_to_string_with_format::<F>(
482 vec![entry.clone()],
483 entry_anns,
484 annotation_style.as_ref(),
485 );
486 let metadata = EntryMetadata {
487 author: entry.metadata.author.unwrap_or_default(),
488 year: entry.metadata.year.unwrap_or_default(),
489 title: entry.metadata.title.unwrap_or_default(),
490 };
491 BibliographyEntry {
492 id: entry.id,
493 text,
494 metadata,
495 }
496 })
497 .collect();
498
499 Ok(FormattedBibliography {
500 format: format_kind,
501 content,
502 entries,
503 })
504}
505
506#[cfg(test)]
507#[allow(
508 clippy::unwrap_used,
509 clippy::expect_used,
510 clippy::panic,
511 clippy::indexing_slicing,
512 reason = "test code uses assertions and panic"
513)]
514mod tests {
515 use super::*;
516 use crate::api::CitationOccurrenceItem;
517 use crate::{
518 Config, ContributorForm, ContributorRole, DateForm, Processing, Rendering,
519 TemplateComponent, TemplateContributor, TemplateDate, TemplateDateVariable,
520 WrapPunctuation,
521 };
522 use citum_schema::reference::{EdtfString, InputReference, Monograph, MonographType, Title};
523 use citum_schema::{CitationSpec, StyleInfo};
524
525 fn make_test_style() -> Style {
526 Style {
527 info: StyleInfo {
528 title: Some("Test Style".to_string()),
529 id: Some("test".into()),
530 ..Default::default()
531 },
532 options: Some(Config {
533 processing: Some(Processing::AuthorDate),
534 ..Default::default()
535 }),
536 citation: Some(CitationSpec {
537 template: Some(vec![
538 TemplateComponent::Contributor(TemplateContributor {
539 contributor: ContributorRole::Author,
540 form: ContributorForm::Short,
541 rendering: Rendering::default(),
542 ..Default::default()
543 }),
544 TemplateComponent::Date(TemplateDate {
545 date: TemplateDateVariable::Issued,
546 form: DateForm::Year,
547 rendering: Rendering::default(),
548 ..Default::default()
549 }),
550 ]),
551 wrap: Some(WrapPunctuation::Parentheses.into()),
552 ..Default::default()
553 }),
554 ..Default::default()
555 }
556 }
557
558 fn make_test_bibliography() -> Bibliography {
559 let mut refs = Bibliography::new();
560 refs.insert(
561 "smith2020".to_string(),
562 InputReference::Monograph(Box::new(Monograph {
563 id: Some("smith2020".into()),
564 r#type: MonographType::Book,
565 title: Some(Title::Single("Sample Work".to_string())),
566 issued: EdtfString("2020".to_string()),
567 ..Default::default()
568 })),
569 );
570 refs
571 }
572
573 #[test]
574 fn format_document_with_style_empty_citations() {
575 let style = make_test_style();
576 let refs = make_test_bibliography();
577 let request = FormatDocumentRequest {
578 style: StyleInput::Yaml("dummy".to_string()),
579 locale: None,
580 output_format: OutputFormatKind::Plain,
581 refs,
582 citations: vec![],
583 document_options: None,
584 };
585
586 let result = format_document_with_style(style, request);
587 assert!(result.is_ok());
588 let res = result.unwrap();
589 assert_eq!(res.formatted_citations.len(), 0);
590 }
591
592 #[test]
593 fn format_document_missing_ref_warning() {
594 let style = make_test_style();
595 let refs = make_test_bibliography();
596
597 let citation_occ = CitationOccurrence {
598 id: "cite1".to_string(),
599 items: vec![CitationOccurrenceItem {
600 id: "unknown_ref".to_string(),
601 locator: None,
602 prefix: None,
603 suffix: None,
604 integral_name_state: None,
605 }],
606 mode: None,
607 note_number: None,
608 suppress_author: None,
609 grouped: None,
610 prefix: None,
611 suffix: None,
612 };
613
614 let request = FormatDocumentRequest {
615 style: StyleInput::Yaml("dummy".to_string()),
616 locale: None,
617 output_format: OutputFormatKind::Plain,
618 refs,
619 citations: vec![citation_occ],
620 document_options: None,
621 };
622
623 let result = format_document_with_style(style, request);
624 assert!(result.is_ok());
625 let res = result.unwrap();
626 assert!(res.warnings.iter().any(|w| w.code == "missing_ref"));
627 }
628
629 #[test]
630 fn format_document_unknown_reference_class_warning() {
631 let style = make_test_style();
632 let mut refs = Bibliography::new();
633 let unknown_ref: InputReference = serde_json::from_str(
634 r#"{
635 "class": "dance-performance",
636 "id": "pina2011",
637 "title": "Pina",
638 "issued": "2011",
639 "venue": "Berlin"
640 }"#,
641 )
642 .expect("unknown class should parse through the compatibility path");
643 refs.insert("pina2011".to_string(), unknown_ref);
644
645 let citation_occ = CitationOccurrence {
646 id: "cite1".to_string(),
647 items: vec![CitationOccurrenceItem {
648 id: "pina2011".to_string(),
649 locator: None,
650 prefix: None,
651 suffix: None,
652 integral_name_state: None,
653 }],
654 mode: None,
655 note_number: None,
656 suppress_author: None,
657 grouped: None,
658 prefix: None,
659 suffix: None,
660 };
661
662 let request = FormatDocumentRequest {
663 style: StyleInput::Yaml("dummy".to_string()),
664 locale: None,
665 output_format: OutputFormatKind::Plain,
666 refs,
667 citations: vec![citation_occ],
668 document_options: None,
669 };
670
671 let result = format_document_with_style(style, request).unwrap();
672 let warning = result
673 .warnings
674 .iter()
675 .find(|w| w.code == "unknown_reference_class")
676 .expect("unknown class warning should be emitted");
677 assert_eq!(warning.ref_id.as_deref(), Some("pina2011"));
678 assert!(warning.message.contains("dance-performance"));
679 }
680
681 #[test]
682 fn format_document_yaml_style_input() {
683 let style = make_test_style();
684 let yaml_style = serde_yaml::to_string(&style).expect("serialize test style");
685
686 let mut refs = Bibliography::new();
687 refs.insert(
688 "test2024".to_string(),
689 InputReference::Monograph(Box::new(Monograph {
690 id: Some("test2024".into()),
691 r#type: MonographType::Book,
692 title: Some(Title::Single("Test Work".to_string())),
693 issued: EdtfString("2024".to_string()),
694 ..Default::default()
695 })),
696 );
697
698 let citation_occ = CitationOccurrence {
699 id: "c1".to_string(),
700 items: vec![CitationOccurrenceItem {
701 id: "test2024".to_string(),
702 locator: None,
703 prefix: None,
704 suffix: None,
705 integral_name_state: None,
706 }],
707 mode: None,
708 note_number: None,
709 suppress_author: None,
710 grouped: None,
711 prefix: None,
712 suffix: None,
713 };
714
715 let request = FormatDocumentRequest {
716 style: StyleInput::Yaml(yaml_style),
717 locale: None,
718 output_format: OutputFormatKind::Plain,
719 refs,
720 citations: vec![citation_occ],
721 document_options: None,
722 };
723
724 let result = format_document(request);
725 assert!(result.is_ok());
726 let res = result.unwrap();
727 assert_eq!(res.formatted_citations.len(), 1);
728 assert!(!res.formatted_citations[0].text.is_empty());
729 }
730
731 #[test]
732 fn format_document_uri_input_unresolved() {
733 let request = FormatDocumentRequest {
734 style: StyleInput::Uri("https://example.com/style.yaml".to_string()),
735 locale: None,
736 output_format: OutputFormatKind::Plain,
737 refs: Bibliography::new(),
738 citations: vec![],
739 document_options: None,
740 };
741
742 let result = format_document(request);
743 match result {
744 Err(FormatDocumentError::UnresolvedInput(_)) => {
745 }
747 _ => panic!("Expected UnresolvedInput error"),
748 }
749 }
750}