Skip to main content

citum_engine/processor/document/
pipeline.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! High-level document-processing orchestration.
7
8use super::output::{
9    HtmlPlaceholderRegistry, RenderedDocumentBody, append_document_bibliography,
10    bibliography_block_placeholder, render_document_bibliography_block_replacement,
11    rewrite_document_markup_for_typst, rewrite_group_headings_for_document,
12    stage_document_bibliography_blocks,
13};
14use super::{BibliographyBlock, CitationParser, DocumentFormat, ParsedDocument};
15use crate::processor::Processor;
16
17impl Processor {
18    /// Process citations in a document and append a bibliography.
19    #[allow(
20        clippy::string_slice,
21        reason = "parser-guaranteed boundaries and indices"
22    )]
23    pub fn process_document<P, F>(
24        &self,
25        content: &str,
26        parser: &P,
27        format: DocumentFormat,
28    ) -> String
29    where
30        P: CitationParser,
31        F: crate::render::format::OutputFormat<Output = String>,
32    {
33        let mut parsed = parser.parse_document(content, &self.locale);
34        let owned_processor = self.processor_with_document_integral_name_override(
35            parsed.frontmatter_integral_names.as_ref(),
36        );
37        let processor = owned_processor.as_ref().unwrap_or(self);
38        let body = &content[parsed.body_start..];
39        if let Some(groups) = parsed.frontmatter_groups.take() {
40            return processor.process_document_with_frontmatter_groups::<P, F>(
41                body, parsed, groups, parser, format,
42            );
43        }
44
45        if !parsed.bibliography_blocks.is_empty() {
46            return processor.process_document_with_bibliography_blocks::<P, F>(
47                body,
48                std::mem::take(&mut parsed.bibliography_blocks),
49                parser,
50                format,
51            );
52        }
53
54        processor.process_document_with_default_bibliography::<P, F>(body, parsed, parser, format)
55    }
56
57    fn process_document_with_frontmatter_groups<P, F>(
58        &self,
59        body: &str,
60        parsed: ParsedDocument,
61        groups: Vec<citum_schema::grouping::BibliographyGroup>,
62        parser: &P,
63        format: DocumentFormat,
64    ) -> String
65    where
66        P: CitationParser,
67        F: crate::render::format::OutputFormat<Output = String>,
68    {
69        self.render_document_with_trailing_bibliography::<P, F, _>(
70            body,
71            parsed,
72            parser,
73            format,
74            |processor| {
75                rewrite_group_headings_for_document(
76                    processor.render_document_bibliography_groups::<F>(&groups),
77                    format,
78                )
79            },
80        )
81    }
82
83    fn process_document_with_bibliography_blocks<P, F>(
84        &self,
85        body: &str,
86        blocks: Vec<BibliographyBlock>,
87        parser: &P,
88        format: DocumentFormat,
89    ) -> String
90    where
91        P: CitationParser,
92        F: crate::render::format::OutputFormat<Output = String>,
93    {
94        let staged = stage_document_bibliography_blocks(body, &blocks);
95        let parsed_staged = parser.parse_document(&staged, &self.locale);
96        let mut rendered = self.render_document_body::<F>(&staged, parsed_staged, format);
97        self.replace_document_bibliography_blocks::<F>(&mut rendered, &blocks, format);
98        self.finalize_document_output(parser, format, rendered)
99    }
100
101    fn process_document_with_default_bibliography<P, F>(
102        &self,
103        body: &str,
104        parsed: ParsedDocument,
105        parser: &P,
106        format: DocumentFormat,
107    ) -> String
108    where
109        P: CitationParser,
110        F: crate::render::format::OutputFormat<Output = String>,
111    {
112        self.render_document_with_trailing_bibliography::<P, F, _>(
113            body,
114            parsed,
115            parser,
116            format,
117            super::super::Processor::render_grouped_bibliography_with_format::<F>,
118        )
119    }
120
121    fn render_document_with_trailing_bibliography<P, F, B>(
122        &self,
123        body: &str,
124        parsed: ParsedDocument,
125        parser: &P,
126        format: DocumentFormat,
127        render_bibliography: B,
128    ) -> String
129    where
130        P: CitationParser,
131        F: crate::render::format::OutputFormat<Output = String>,
132        B: FnOnce(&Self) -> String,
133    {
134        let mut rendered = self.render_document_body::<F>(body, parsed, format);
135        let bibliography = render_bibliography(self);
136        append_document_bibliography(&mut rendered, format, bibliography);
137        self.finalize_document_output(parser, format, rendered)
138    }
139
140    fn render_document_body<F>(
141        &self,
142        content: &str,
143        parsed: ParsedDocument,
144        format: DocumentFormat,
145    ) -> RenderedDocumentBody
146    where
147        F: crate::render::format::OutputFormat<Output = String>,
148    {
149        if matches!(format, DocumentFormat::Html) {
150            let mut placeholders = HtmlPlaceholderRegistry::default();
151            let content = if self.is_note_style() {
152                self.process_note_document_html(content, parsed, &mut placeholders)
153            } else {
154                self.process_inline_document_html(content, parsed, &mut placeholders)
155            };
156            return RenderedDocumentBody {
157                content,
158                placeholders: Some(placeholders),
159            };
160        }
161
162        let content = if self.is_note_style() {
163            self.process_note_document::<F>(content, parsed)
164        } else {
165            self.process_inline_document::<F>(content, parsed)
166        };
167
168        RenderedDocumentBody {
169            content,
170            placeholders: None,
171        }
172    }
173
174    #[allow(
175        clippy::string_slice,
176        reason = "parser-guaranteed boundaries and indices"
177    )]
178    fn process_inline_document<F>(&self, content: &str, parsed: ParsedDocument) -> String
179    where
180        F: crate::render::format::OutputFormat<Output = String>,
181    {
182        let mut result = String::new();
183        let mut last_idx = 0;
184        let normalized = self.normalize_integral_name_citations(&parsed);
185
186        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
187            result.push_str(&content[last_idx..parsed.start]);
188            match self.process_citation_with_format::<F>(&citation) {
189                Ok(rendered) => result.push_str(&rendered),
190                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
191            }
192            last_idx = parsed.end;
193        }
194
195        result.push_str(&content[last_idx..]);
196        result
197    }
198
199    #[allow(
200        clippy::string_slice,
201        reason = "parser-guaranteed boundaries and indices"
202    )]
203    fn process_inline_document_html(
204        &self,
205        content: &str,
206        parsed: ParsedDocument,
207        placeholders: &mut HtmlPlaceholderRegistry,
208    ) -> String {
209        let mut result = String::new();
210        let mut last_idx = 0;
211        let normalized = self.normalize_integral_name_citations(&parsed);
212
213        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
214            result.push_str(&content[last_idx..parsed.start]);
215            match self.process_citation_with_format::<crate::render::html::Html>(&citation) {
216                Ok(rendered) => result.push_str(&placeholders.push_inline(rendered)),
217                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
218            }
219            last_idx = parsed.end;
220        }
221
222        result.push_str(&content[last_idx..]);
223        result
224    }
225
226    fn replace_document_bibliography_blocks<F>(
227        &self,
228        rendered: &mut RenderedDocumentBody,
229        blocks: &[BibliographyBlock],
230        format: DocumentFormat,
231    ) where
232        F: crate::render::format::OutputFormat<Output = String>,
233    {
234        for (index, block) in blocks.iter().enumerate() {
235            let placeholder = bibliography_block_placeholder(index);
236            let rendered_group = self.render_document_bibliography_block::<F>(&block.group);
237            let replacement = render_document_bibliography_block_replacement(
238                rendered.placeholders.as_mut(),
239                format,
240                rendered_group.heading,
241                rendered_group.body,
242            );
243            rendered.content = rendered.content.replace(&placeholder, &replacement);
244        }
245    }
246
247    fn finalize_document_output<P>(
248        &self,
249        parser: &P,
250        format: DocumentFormat,
251        rendered: RenderedDocumentBody,
252    ) -> String
253    where
254        P: CitationParser,
255    {
256        let result = rewrite_document_markup_for_typst(rendered.content, format);
257        match rendered.placeholders {
258            Some(placeholders) => placeholders.apply(parser.finalize_html_output(&result)),
259            None => match format {
260                DocumentFormat::Html => parser.finalize_html_output(&result),
261                DocumentFormat::Djot
262                | DocumentFormat::Markdown
263                | DocumentFormat::Plain
264                | DocumentFormat::Latex
265                | DocumentFormat::Typst => result,
266            },
267        }
268    }
269}