1use crate::api::AnnotationStyle;
9use crate::error::ProcessorError;
10use crate::processor::Processor;
11use crate::reference::{Bibliography, Citation};
12use crate::render::djot::Djot;
13use crate::render::format::OutputFormat;
14use crate::render::html::Html;
15use crate::render::latex::Latex;
16use crate::render::plain::PlainText;
17use crate::render::typst::Typst;
18use citum_schema::Style;
19use citum_schema::locale::{GeneralTerm, TermForm};
20use citum_schema::reference::{
21 ClassExtension, CollectionType, ContributorRole as ReferenceRole, MonographComponentType,
22 MonographType, ReferenceClass, SerialComponentType,
23};
24use citum_schema::template::ContributorRole as TemplateRole;
25
26use serde::{Deserialize, Serialize};
27use std::collections::HashMap;
28
29use super::{
30 BibliographyEntry, CitationOccurrence, DocumentOptions, EntryMetadata, FormattedBibliography,
31 FormattedCitation, OutputFormatKind, RefsInput, StyleInput, Warning, WarningLevel,
32};
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct FormatDocumentRequest {
37 pub style: StyleInput,
39 pub locale: Option<String>,
44 #[serde(default)]
47 pub output_format: OutputFormatKind,
48 pub refs: RefsInput,
50 pub citations: Vec<CitationOccurrence>,
52 pub document_options: Option<DocumentOptions>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct FormatDocumentResult {
59 pub formatted_citations: Vec<FormattedCitation>,
61 pub bibliography: FormattedBibliography,
63 pub warnings: Vec<Warning>,
65}
66
67#[derive(Debug)]
69pub enum FormatDocumentError {
70 UnresolvedInput(String),
72 StyleParse(String),
74 StylePath(String),
76 RefsInputPath(String),
78 RefsInputParse(String),
80 Processing(ProcessorError),
82}
83
84impl std::fmt::Display for FormatDocumentError {
85 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 match self {
87 Self::UnresolvedInput(msg) => write!(f, "Unresolved style input: {}", msg),
88 Self::StyleParse(msg) => write!(f, "Style parse error: {}", msg),
89 Self::StylePath(msg) => write!(f, "Style path error: {}", msg),
90 Self::RefsInputPath(msg) => write!(f, "Refs input path error: {}", msg),
91 Self::RefsInputParse(msg) => write!(f, "Refs input parse error: {}", msg),
92 Self::Processing(err) => write!(f, "Processing error: {}", err),
93 }
94 }
95}
96
97impl std::error::Error for FormatDocumentError {}
98
99impl From<ProcessorError> for FormatDocumentError {
100 fn from(err: ProcessorError) -> Self {
101 Self::Processing(err)
102 }
103}
104
105pub fn format_document(
115 request: FormatDocumentRequest,
116) -> Result<FormatDocumentResult, FormatDocumentError> {
117 let style = request.style.resolve_local()?;
118 format_document_with_style(style, request)
119}
120
121pub fn format_document_with_style(
130 style: Style,
131 request: FormatDocumentRequest,
132) -> Result<FormatDocumentResult, FormatDocumentError> {
133 let mut warnings = Vec::new();
134
135 if let Some(tag) = &request.locale
140 && !tag.is_empty()
141 && !tag.eq_ignore_ascii_case("en-us")
142 {
143 warnings.push(Warning {
144 level: WarningLevel::Warning,
145 code: "locale_fallback".to_string(),
146 citation_id: None,
147 ref_id: None,
148 message: format!(
149 "Requested locale '{tag}' could not be loaded by the engine; falling back to en-US. Adapter-side locale resolution is not yet wired through."
150 ),
151 });
152 }
153
154 let bibliography = request.refs.resolve_local()?;
155 let mut processor = Processor::new(style, bibliography);
156 warnings.extend(unknown_reference_class_warnings(&processor.bibliography));
157 warnings.extend(unknown_enum_warnings(&processor));
158
159 if let Some(opts) = &request.document_options {
160 if let Some(show_semantics) = opts.show_semantics {
161 processor.show_semantics = show_semantics;
162 }
163 if let Some(inject_ast) = opts.inject_ast_indices {
164 processor.set_inject_ast_indices(inject_ast);
165 }
166 if let Some(abbr_map) = opts.abbreviation_map.clone() {
167 processor.abbreviation_map = Some(abbr_map);
168 }
169 if opts.integral_name_memory.is_some() {
170 warnings.push(Warning {
171 level: WarningLevel::Warning,
172 code: "integral_name_memory_not_applied".to_string(),
173 citation_id: None,
174 ref_id: None,
175 message: "document_options.integral_name_memory is accepted but not yet wired through the processor; tracked in csl26-wq0y.".to_string(),
176 });
177 }
178 }
179
180 let mut citations: Vec<Citation> = Vec::new();
185 for occ in request.citations {
186 let mut citation: Citation = occ.into();
187 citation.items.retain(|item| {
188 if processor.bibliography.contains_key(&item.id) {
189 true
190 } else {
191 warnings.push(Warning {
192 level: WarningLevel::Warning,
193 code: "missing_ref".to_string(),
194 citation_id: citation.id.clone(),
195 ref_id: Some(item.id.clone()),
196 message: format!("Reference '{}' not found in bibliography", item.id),
197 });
198 false
199 }
200 });
201 citations.push(citation);
202 }
203
204 let formatted_citations = match request.output_format {
206 OutputFormatKind::Plain => format_by_kind::<PlainText>(&processor, &citations)?,
207 OutputFormatKind::Html => format_by_kind::<Html>(&processor, &citations)?,
208 OutputFormatKind::Djot => format_by_kind::<Djot>(&processor, &citations)?,
209 OutputFormatKind::Latex => format_by_kind::<Latex>(&processor, &citations)?,
210 OutputFormatKind::Typst => format_by_kind::<Typst>(&processor, &citations)?,
211 };
212
213 let bibliography = match request.output_format {
215 OutputFormatKind::Plain => format_bibliography::<PlainText>(
216 &processor,
217 request.output_format,
218 request.document_options.as_ref(),
219 )?,
220 OutputFormatKind::Html => format_bibliography::<Html>(
221 &processor,
222 request.output_format,
223 request.document_options.as_ref(),
224 )?,
225 OutputFormatKind::Djot => format_bibliography::<Djot>(
226 &processor,
227 request.output_format,
228 request.document_options.as_ref(),
229 )?,
230 OutputFormatKind::Latex => format_bibliography::<Latex>(
231 &processor,
232 request.output_format,
233 request.document_options.as_ref(),
234 )?,
235 OutputFormatKind::Typst => format_bibliography::<Typst>(
236 &processor,
237 request.output_format,
238 request.document_options.as_ref(),
239 )?,
240 };
241
242 Ok(FormatDocumentResult {
243 formatted_citations,
244 bibliography,
245 warnings,
246 })
247}
248
249pub fn unknown_reference_class_warnings(bibliography: &Bibliography) -> Vec<Warning> {
251 bibliography
252 .iter()
253 .filter_map(|(ref_id, reference)| {
254 let ReferenceClass::Unknown(class) = reference.class() else {
255 return None;
256 };
257 Some(Warning {
258 level: WarningLevel::Warning,
259 code: "unknown_reference_class".to_string(),
260 citation_id: None,
261 ref_id: Some(ref_id.clone()),
262 message: format!(
263 "Reference '{ref_id}' uses unknown class '{class}'; rendering will use only fields this engine understands."
264 ),
265 })
266 })
267 .collect()
268}
269
270pub fn unknown_enum_warnings(processor: &Processor) -> Vec<Warning> {
275 let mut warnings = Vec::new();
276
277 for (ref_id, reference) in &processor.bibliography {
279 match reference.extension() {
280 ClassExtension::Monograph(r) => {
281 if let MonographType::Unknown(s) = &r.r#type {
282 warnings.push(Warning {
283 level: WarningLevel::Warning,
284 code: "unknown_enum_variant".to_string(),
285 citation_id: None,
286 ref_id: Some(ref_id.clone()),
287 message: format!("Reference '{ref_id}' uses unknown monograph type '{s}'; rendering will use default monograph formatting."),
288 });
289 }
290 }
291 ClassExtension::Collection(r) => {
292 if let CollectionType::Unknown(s) = &r.r#type {
293 warnings.push(Warning {
294 level: WarningLevel::Warning,
295 code: "unknown_enum_variant".to_string(),
296 citation_id: None,
297 ref_id: Some(ref_id.clone()),
298 message: format!("Reference '{ref_id}' uses unknown collection type '{s}'; rendering will use default collection formatting."),
299 });
300 }
301 }
302 ClassExtension::CollectionComponent(r) => {
303 if let MonographComponentType::Unknown(s) = &r.r#type {
304 warnings.push(Warning {
305 level: WarningLevel::Warning,
306 code: "unknown_enum_variant".to_string(),
307 citation_id: None,
308 ref_id: Some(ref_id.clone()),
309 message: format!("Reference '{ref_id}' uses unknown monograph component type '{s}'; rendering will use default chapter formatting."),
310 });
311 }
312 }
313 ClassExtension::SerialComponent(r) => {
314 if let SerialComponentType::Unknown(s) = &r.r#type {
315 warnings.push(Warning {
316 level: WarningLevel::Warning,
317 code: "unknown_enum_variant".to_string(),
318 citation_id: None,
319 ref_id: Some(ref_id.clone()),
320 message: format!("Reference '{ref_id}' uses unknown serial component type '{s}'; rendering will use default article formatting."),
321 });
322 }
323 }
324 _ => {}
325 }
326
327 for contributor in reference.all_contributor_entries() {
328 if let ReferenceRole::Unknown(s) = &contributor.role {
329 warnings.push(Warning {
330 level: WarningLevel::Warning,
331 code: "unknown_enum_variant".to_string(),
332 citation_id: None,
333 ref_id: Some(ref_id.clone()),
334 message: format!("Reference '{ref_id}' uses unknown contributor role '{s}'; this role may be ignored during rendering."),
335 });
336 }
337 }
338 }
339
340 if let Some(templates) = &processor.style.templates {
342 for (name, template) in templates {
343 scan_template_for_unknowns(template, &format!("template '{name}'"), &mut warnings);
344 }
345 }
346 if let Some(citation) = &processor.style.citation
347 && let Some(template) = &citation.template
348 {
349 scan_template_for_unknowns(template, "citation layout", &mut warnings);
350 }
351 if let Some(bib) = &processor.style.bibliography
352 && let Some(template) = &bib.template
353 {
354 scan_template_for_unknowns(template, "bibliography layout", &mut warnings);
355 }
356
357 warnings
358}
359
360fn scan_template_for_unknowns(
361 components: &[citum_schema::template::TemplateComponent],
362 location: &str,
363 warnings: &mut Vec<Warning>,
364) {
365 use citum_schema::template::TemplateComponent;
366 for component in components {
367 match component {
368 TemplateComponent::Term(t) => {
369 if let GeneralTerm::Unknown(s) = &t.term {
370 warnings.push(Warning {
371 level: WarningLevel::Warning,
372 code: "unknown_enum_variant".to_string(),
373 citation_id: None,
374 ref_id: None,
375 message: format!("Style {location} uses unknown locale term key '{s}'; this term may render as empty."),
376 });
377 }
378 if let Some(TermForm::Unknown(s)) = &t.form {
379 warnings.push(Warning {
380 level: WarningLevel::Warning,
381 code: "unknown_enum_variant".to_string(),
382 citation_id: None,
383 ref_id: None,
384 message: format!("Style {location} uses unknown term form '{s}'; falling back to long form."),
385 });
386 }
387 }
388 TemplateComponent::Contributor(c) => {
389 if let TemplateRole::Unknown(s) = &c.contributor {
390 warnings.push(Warning {
391 level: WarningLevel::Warning,
392 code: "unknown_enum_variant".to_string(),
393 citation_id: None,
394 ref_id: None,
395 message: format!("Style {location} uses unknown contributor role '{s}'; this role may be ignored."),
396 });
397 }
398 }
399 TemplateComponent::Date(d) => {
400 if let citum_schema::template::DateForm::Unknown(s) = &d.form {
401 warnings.push(Warning {
402 level: WarningLevel::Warning,
403 code: "unknown_enum_variant".to_string(),
404 citation_id: None,
405 ref_id: None,
406 message: format!("Style {location} uses unknown date form '{s}'; falling back to year only."),
407 });
408 }
409 }
410 TemplateComponent::Group(g) => {
411 scan_template_for_unknowns(&g.group, location, warnings);
412 }
413 _ => {}
414 }
415 }
416}
417
418fn format_by_kind<F>(
420 processor: &Processor,
421 citations: &[Citation],
422) -> Result<Vec<FormattedCitation>, FormatDocumentError>
423where
424 F: OutputFormat<Output = String>,
425{
426 let texts = processor.process_citations_with_format::<F>(citations)?;
427
428 let formatted = citations
429 .iter()
430 .zip(texts.iter())
431 .map(|(citation, text)| {
432 let ref_ids = citation.items.iter().map(|item| item.id.clone()).collect();
433 FormattedCitation {
434 id: citation.id.clone().unwrap_or_default(),
435 text: text.clone(),
436 ref_ids,
437 }
438 })
439 .collect();
440
441 Ok(formatted)
442}
443
444fn format_bibliography<F>(
446 processor: &Processor,
447 format_kind: OutputFormatKind,
448 doc_opts: Option<&DocumentOptions>,
449) -> Result<FormattedBibliography, FormatDocumentError>
450where
451 F: OutputFormat<Output = String>,
452{
453 let (annotations, annotation_style) = if let Some(opts) = doc_opts {
455 if let Some(anns) = &opts.annotations {
456 let style = opts.annotation_format.as_ref().map(|fmt| AnnotationStyle {
457 format: fmt.clone(),
458 });
459 (anns.clone(), style)
460 } else {
461 (HashMap::new(), None)
462 }
463 } else {
464 (HashMap::new(), None)
465 };
466
467 let content = if annotations.is_empty() {
469 processor
470 .render_bibliography_with_format_and_annotations::<F>(None, annotation_style.as_ref())
471 } else {
472 processor.render_bibliography_with_format_and_annotations::<F>(
473 Some(&annotations),
474 annotation_style.as_ref(),
475 )
476 };
477
478 let proc_entries = processor.process_references().bibliography;
480 let entries = proc_entries
481 .into_iter()
482 .map(|entry| {
483 let entry_anns = if annotations.is_empty() {
484 None
485 } else {
486 Some(&annotations)
487 };
488 let text = crate::render::bibliography::refs_to_string_with_format::<F>(
489 vec![entry.clone()],
490 entry_anns,
491 annotation_style.as_ref(),
492 );
493 let metadata = EntryMetadata {
494 author: entry.metadata.author.unwrap_or_default(),
495 year: entry.metadata.year.unwrap_or_default(),
496 title: entry.metadata.title.unwrap_or_default(),
497 };
498 BibliographyEntry {
499 id: entry.id,
500 text,
501 metadata,
502 }
503 })
504 .collect();
505
506 Ok(FormattedBibliography {
507 format: format_kind,
508 content,
509 entries,
510 })
511}
512
513#[cfg(test)]
514#[allow(
515 clippy::unwrap_used,
516 clippy::expect_used,
517 clippy::panic,
518 clippy::indexing_slicing,
519 reason = "test code uses assertions and panic"
520)]
521mod tests {
522 use super::*;
523 use crate::api::CitationOccurrenceItem;
524 use crate::{
525 Config, ContributorForm, ContributorRole, DateForm, Processing, Rendering,
526 TemplateComponent, TemplateContributor, TemplateDate, TemplateDateVariable,
527 WrapPunctuation,
528 };
529 use citum_schema::reference::{EdtfString, InputReference, Monograph, MonographType, Title};
530 use citum_schema::{CitationSpec, StyleInfo};
531
532 fn make_test_style() -> Style {
533 Style {
534 info: StyleInfo {
535 title: Some("Test Style".to_string()),
536 id: Some("test".into()),
537 ..Default::default()
538 },
539 options: Some(Config {
540 processing: Some(Processing::AuthorDate),
541 ..Default::default()
542 }),
543 citation: Some(CitationSpec {
544 template: Some(vec![
545 TemplateComponent::Contributor(TemplateContributor {
546 contributor: ContributorRole::Author,
547 form: ContributorForm::Short,
548 rendering: Rendering::default(),
549 ..Default::default()
550 }),
551 TemplateComponent::Date(TemplateDate {
552 date: TemplateDateVariable::Issued,
553 form: DateForm::Year,
554 rendering: Rendering::default(),
555 ..Default::default()
556 }),
557 ]),
558 wrap: Some(WrapPunctuation::Parentheses.into()),
559 ..Default::default()
560 }),
561 ..Default::default()
562 }
563 }
564
565 fn make_test_bibliography() -> RefsInput {
566 let mut refs = Bibliography::new();
567 refs.insert(
568 "smith2020".to_string(),
569 InputReference::Monograph(Box::new(Monograph {
570 id: Some("smith2020".into()),
571 r#type: MonographType::Book,
572 title: Some(Title::Single("Sample Work".to_string())),
573 issued: EdtfString("2020".to_string()),
574 ..Default::default()
575 })),
576 );
577 RefsInput::Json(serde_json::to_value(refs).unwrap())
578 }
579
580 #[test]
581 fn format_document_with_style_empty_citations() {
582 let style = make_test_style();
583 let refs = make_test_bibliography();
584 let request = FormatDocumentRequest {
585 style: StyleInput::Yaml("dummy".to_string()),
586 locale: None,
587 output_format: OutputFormatKind::Plain,
588 refs,
589 citations: vec![],
590 document_options: None,
591 };
592
593 let result = format_document_with_style(style, request);
594 assert!(result.is_ok());
595 let res = result.unwrap();
596 assert_eq!(res.formatted_citations.len(), 0);
597 }
598
599 #[test]
600 fn format_document_missing_ref_warning() {
601 let style = make_test_style();
602 let refs = make_test_bibliography();
603
604 let citation_occ = CitationOccurrence {
605 id: "cite1".to_string(),
606 items: vec![CitationOccurrenceItem {
607 id: "unknown_ref".to_string(),
608 locator: None,
609 prefix: None,
610 suffix: None,
611 integral_name_state: None,
612 }],
613 mode: None,
614 note_number: None,
615 suppress_author: None,
616 grouped: None,
617 prefix: None,
618 suffix: None,
619 };
620
621 let request = FormatDocumentRequest {
622 style: StyleInput::Yaml("dummy".to_string()),
623 locale: None,
624 output_format: OutputFormatKind::Plain,
625 refs,
626 citations: vec![citation_occ],
627 document_options: None,
628 };
629
630 let result = format_document_with_style(style, request);
631 assert!(result.is_ok());
632 let res = result.unwrap();
633 assert!(res.warnings.iter().any(|w| w.code == "missing_ref"));
634 }
635
636 #[test]
637 fn format_document_unknown_reference_class_warning() {
638 let style = make_test_style();
639 let mut refs = Bibliography::new();
640 let unknown_ref: InputReference = serde_json::from_str(
641 r#"{
642 "class": "dance-performance",
643 "id": "pina2011",
644 "title": "Pina",
645 "issued": "2011",
646 "venue": "Berlin"
647 }"#,
648 )
649 .expect("unknown class should parse through the compatibility path");
650 refs.insert("pina2011".to_string(), unknown_ref);
651
652 let citation_occ = CitationOccurrence {
653 id: "cite1".to_string(),
654 items: vec![CitationOccurrenceItem {
655 id: "pina2011".to_string(),
656 locator: None,
657 prefix: None,
658 suffix: None,
659 integral_name_state: None,
660 }],
661 mode: None,
662 note_number: None,
663 suppress_author: None,
664 grouped: None,
665 prefix: None,
666 suffix: None,
667 };
668
669 let request = FormatDocumentRequest {
670 style: StyleInput::Yaml("dummy".to_string()),
671 locale: None,
672 output_format: OutputFormatKind::Plain,
673 refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
674 citations: vec![citation_occ],
675 document_options: None,
676 };
677
678 let result = format_document_with_style(style, request).unwrap();
679 let warning = result
680 .warnings
681 .iter()
682 .find(|w| w.code == "unknown_reference_class")
683 .expect("unknown class warning should be emitted");
684 assert_eq!(warning.ref_id.as_deref(), Some("pina2011"));
685 assert!(warning.message.contains("dance-performance"));
686 }
687
688 #[test]
689 fn format_document_yaml_style_input() {
690 let style = make_test_style();
691 let yaml_style = serde_yaml::to_string(&style).expect("serialize test style");
692
693 let mut refs = Bibliography::new();
694 refs.insert(
695 "test2024".to_string(),
696 InputReference::Monograph(Box::new(Monograph {
697 id: Some("test2024".into()),
698 r#type: MonographType::Book,
699 title: Some(Title::Single("Test Work".to_string())),
700 issued: EdtfString("2024".to_string()),
701 ..Default::default()
702 })),
703 );
704
705 let citation_occ = CitationOccurrence {
706 id: "c1".to_string(),
707 items: vec![CitationOccurrenceItem {
708 id: "test2024".to_string(),
709 locator: None,
710 prefix: None,
711 suffix: None,
712 integral_name_state: None,
713 }],
714 mode: None,
715 note_number: None,
716 suppress_author: None,
717 grouped: None,
718 prefix: None,
719 suffix: None,
720 };
721
722 let request = FormatDocumentRequest {
723 style: StyleInput::Yaml(yaml_style),
724 locale: None,
725 output_format: OutputFormatKind::Plain,
726 refs: RefsInput::Json(serde_json::to_value(refs).unwrap()),
727 citations: vec![citation_occ],
728 document_options: None,
729 };
730
731 let result = format_document(request);
732 assert!(result.is_ok());
733 let res = result.unwrap();
734 assert_eq!(res.formatted_citations.len(), 1);
735 assert!(!res.formatted_citations[0].text.is_empty());
736 }
737
738 #[test]
739 fn format_document_uri_input_unresolved() {
740 let request = FormatDocumentRequest {
741 style: StyleInput::Uri("https://example.com/style.yaml".to_string()),
742 locale: None,
743 output_format: OutputFormatKind::Plain,
744 refs: RefsInput::Json(serde_json::Value::Object(Default::default())),
745 citations: vec![],
746 document_options: None,
747 };
748
749 let result = format_document(request);
750 match result {
751 Err(FormatDocumentError::UnresolvedInput(_)) => {
752 }
754 _ => panic!("Expected UnresolvedInput error"),
755 }
756 }
757}