Skip to main content

citum_engine/processor/document/
pipeline.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! High-level document-processing orchestration.
7
8use super::output::{
9    HtmlPlaceholderRegistry, RenderedDocumentBody, append_document_bibliography,
10    bibliography_block_placeholder, render_document_bibliography_block_replacement,
11    rewrite_document_markup_for_typst, rewrite_group_headings_for_document,
12    stage_document_bibliography_blocks,
13};
14use super::{BibliographyBlock, CitationParser, DocumentFormat, ParsedDocument};
15use crate::processor::Processor;
16
17impl Processor {
18    /// Process citations in a document and append a bibliography.
19    ///
20    /// This is the primary document-level entry point. It:
21    /// 1. Parses the source document using the provided adapter.
22    /// 2. Resolves frontmatter overrides for the integral-name policy.
23    /// 3. Chooses a bibliography orchestration path based on frontmatter and document blocks.
24    #[allow(
25        clippy::string_slice,
26        reason = "parser-guaranteed boundaries and indices"
27    )]
28    pub fn process_document<P, F>(
29        &self,
30        content: &str,
31        parser: &P,
32        format: DocumentFormat,
33    ) -> String
34    where
35        P: CitationParser,
36        F: crate::render::format::OutputFormat<Output = String>,
37    {
38        let mut parsed = parser.parse_document(content, &self.locale);
39        let owned_processor = self.processor_with_document_integral_name_override(
40            parsed.frontmatter_integral_name_memory.as_ref(),
41        );
42        let processor = owned_processor.as_ref().unwrap_or(self);
43        let body = &content[parsed.body_start..];
44        if let Some(groups) = parsed.frontmatter_groups.take() {
45            return processor.process_document_with_frontmatter_groups::<P, F>(
46                body, parsed, groups, parser, format,
47            );
48        }
49
50        if !parsed.bibliography_blocks.is_empty() {
51            return processor.process_document_with_bibliography_blocks::<P, F>(
52                body,
53                std::mem::take(&mut parsed.bibliography_blocks),
54                parser,
55                format,
56            );
57        }
58
59        processor.process_document_with_default_bibliography::<P, F>(body, parsed, parser, format)
60    }
61
62    /// Orchestrate document processing with custom frontmatter bibliography groups.
63    fn process_document_with_frontmatter_groups<P, F>(
64        &self,
65        body: &str,
66        parsed: ParsedDocument,
67        groups: Vec<citum_schema::grouping::BibliographyGroup>,
68        parser: &P,
69        format: DocumentFormat,
70    ) -> String
71    where
72        P: CitationParser,
73        F: crate::render::format::OutputFormat<Output = String>,
74    {
75        self.render_document_with_trailing_bibliography::<P, F, _>(
76            body,
77            parsed,
78            parser,
79            format,
80            |processor| {
81                rewrite_group_headings_for_document(
82                    processor.render_document_bibliography_groups::<F>(&groups),
83                    format,
84                )
85            },
86        )
87    }
88
89    /// Orchestrate document processing with explicit bibliography blocks.
90    fn process_document_with_bibliography_blocks<P, F>(
91        &self,
92        body: &str,
93        blocks: Vec<BibliographyBlock>,
94        parser: &P,
95        format: DocumentFormat,
96    ) -> String
97    where
98        P: CitationParser,
99        F: crate::render::format::OutputFormat<Output = String>,
100    {
101        let staged = stage_document_bibliography_blocks(body, &blocks);
102        let parsed_staged = parser.parse_document(&staged, &self.locale);
103        let mut rendered = self.render_document_body::<F>(&staged, parsed_staged, format);
104        self.replace_document_bibliography_blocks::<F>(&mut rendered, &blocks, format);
105        self.finalize_document_output(parser, format, rendered)
106    }
107
108    /// Orchestrate document processing with the default trailing bibliography.
109    fn process_document_with_default_bibliography<P, F>(
110        &self,
111        body: &str,
112        parsed: ParsedDocument,
113        parser: &P,
114        format: DocumentFormat,
115    ) -> String
116    where
117        P: CitationParser,
118        F: crate::render::format::OutputFormat<Output = String>,
119    {
120        self.render_document_with_trailing_bibliography::<P, F, _>(
121            body,
122            parsed,
123            parser,
124            format,
125            super::super::Processor::render_grouped_bibliography_with_format::<F>,
126        )
127    }
128
129    /// Generic helper for rendering document body + trailing bibliography.
130    fn render_document_with_trailing_bibliography<P, F, B>(
131        &self,
132        body: &str,
133        parsed: ParsedDocument,
134        parser: &P,
135        format: DocumentFormat,
136        render_bibliography: B,
137    ) -> String
138    where
139        P: CitationParser,
140        F: crate::render::format::OutputFormat<Output = String>,
141        B: FnOnce(&Self) -> String,
142    {
143        let mut rendered = self.render_document_body::<F>(body, parsed, format);
144        let bibliography = render_bibliography(self);
145        append_document_bibliography(&mut rendered, format, bibliography);
146        self.finalize_document_output(parser, format, rendered)
147    }
148
149    /// Render the citation-annotated document body.
150    ///
151    /// Governs the choice between note-style and inline-style processing,
152    /// and handles HTML placeholder registration for format finalization.
153    fn render_document_body<F>(
154        &self,
155        content: &str,
156        parsed: ParsedDocument,
157        format: DocumentFormat,
158    ) -> RenderedDocumentBody
159    where
160        F: crate::render::format::OutputFormat<Output = String>,
161    {
162        if matches!(format, DocumentFormat::Html) {
163            let mut placeholders = HtmlPlaceholderRegistry::default();
164            let content = if self.is_note_style() {
165                self.process_note_document_html(content, parsed, &mut placeholders)
166            } else {
167                self.process_inline_document_html(content, parsed, &mut placeholders)
168            };
169            return RenderedDocumentBody {
170                content,
171                placeholders: Some(placeholders),
172            };
173        }
174
175        let content = if self.is_note_style() {
176            self.process_note_document::<F>(content, parsed)
177        } else {
178            self.process_inline_document::<F>(content, parsed)
179        };
180
181        RenderedDocumentBody {
182            content,
183            placeholders: None,
184        }
185    }
186
187    /// Splice rendered citations into document markup for non-note styles.
188    #[allow(
189        clippy::string_slice,
190        reason = "parser-guaranteed boundaries and indices"
191    )]
192    fn process_inline_document<F>(&self, content: &str, parsed: ParsedDocument) -> String
193    where
194        F: crate::render::format::OutputFormat<Output = String>,
195    {
196        let mut result = String::new();
197        let mut last_idx = 0;
198        let normalized = self.normalize_integral_name_citations(&parsed);
199
200        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
201            result.push_str(&content[last_idx..parsed.start]);
202            match self.process_citation_with_format::<F>(&citation) {
203                Ok(rendered) => result.push_str(&rendered),
204                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
205            }
206            last_idx = parsed.end;
207        }
208
209        result.push_str(&content[last_idx..]);
210        result
211    }
212
213    /// Splice HTML-rendered citations into document markup using placeholders.
214    #[allow(
215        clippy::string_slice,
216        reason = "parser-guaranteed boundaries and indices"
217    )]
218    fn process_inline_document_html(
219        &self,
220        content: &str,
221        parsed: ParsedDocument,
222        placeholders: &mut HtmlPlaceholderRegistry,
223    ) -> String {
224        let mut result = String::new();
225        let mut last_idx = 0;
226        let normalized = self.normalize_integral_name_citations(&parsed);
227
228        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
229            result.push_str(&content[last_idx..parsed.start]);
230            match self.process_citation_with_format::<crate::render::html::Html>(&citation) {
231                Ok(rendered) => result.push_str(&placeholders.push_inline(rendered)),
232                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
233            }
234            last_idx = parsed.end;
235        }
236
237        result.push_str(&content[last_idx..]);
238        result
239    }
240
241    /// Replace bibliography block placeholders with rendered content.
242    fn replace_document_bibliography_blocks<F>(
243        &self,
244        rendered: &mut RenderedDocumentBody,
245        blocks: &[BibliographyBlock],
246        format: DocumentFormat,
247    ) where
248        F: crate::render::format::OutputFormat<Output = String>,
249    {
250        for (index, block) in blocks.iter().enumerate() {
251            let placeholder = bibliography_block_placeholder(index);
252            let rendered_group = self.render_document_bibliography_block::<F>(&block.group);
253            let replacement = render_document_bibliography_block_replacement(
254                rendered.placeholders.as_mut(),
255                format,
256                rendered_group.heading,
257                rendered_group.body,
258            );
259            rendered.content = rendered.content.replace(&placeholder, &replacement);
260        }
261    }
262
263    /// Perform final document rewrites and resolve HTML placeholders.
264    fn finalize_document_output<P>(
265        &self,
266        parser: &P,
267        format: DocumentFormat,
268        rendered: RenderedDocumentBody,
269    ) -> String
270    where
271        P: CitationParser,
272    {
273        let result = rewrite_document_markup_for_typst(rendered.content, format);
274        match rendered.placeholders {
275            Some(placeholders) => placeholders.apply(parser.finalize_html_output(&result)),
276            None => match format {
277                DocumentFormat::Html => parser.finalize_html_output(&result),
278                DocumentFormat::Djot
279                | DocumentFormat::Markdown
280                | DocumentFormat::Plain
281                | DocumentFormat::Latex
282                | DocumentFormat::Typst => result,
283            },
284        }
285    }
286}