Skip to main content

citum_engine/processor/document/
pipeline.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! High-level document-processing orchestration.
7
8use super::output::{
9    HtmlPlaceholderRegistry, RenderedDocumentBody, append_document_bibliography,
10    bibliography_block_placeholder, render_document_bibliography_block_replacement,
11    rewrite_document_markup_for_typst, rewrite_group_headings_for_document,
12    stage_document_bibliography_blocks,
13};
14use super::{BibliographyBlock, CitationParser, DocumentFormat, ParsedDocument};
15use crate::processor::Processor;
16
17impl Processor {
18    /// Process citations in a document and append a bibliography.
19    ///
20    /// This is the primary document-level entry point. It:
21    /// 1. Parses the source document using the provided adapter.
22    /// 2. Resolves frontmatter overrides (integral-name policy, bibliography options).
23    /// 3. Chooses a bibliography orchestration path based on frontmatter and document blocks.
24    #[allow(
25        clippy::string_slice,
26        reason = "parser-guaranteed boundaries and indices"
27    )]
28    pub fn process_document<P, F>(
29        &self,
30        content: &str,
31        parser: &P,
32        format: DocumentFormat,
33    ) -> String
34    where
35        P: CitationParser,
36        F: crate::render::format::OutputFormat<Output = String>,
37    {
38        let mut parsed = parser.parse_document(content, &self.locale);
39
40        if let Some(err) = &parsed.frontmatter_error {
41            eprintln!("citum: error: frontmatter parse error: {err}");
42            std::process::exit(1);
43        }
44
45        // `options.*` fields take precedence over the legacy top-level fields.
46        let effective_integral_override = parsed
47            .frontmatter_options
48            .as_ref()
49            .and_then(|o| o.integral_name_memory.as_ref())
50            .or(parsed.frontmatter_integral_name_memory.as_ref());
51        let owned_integral =
52            self.processor_with_document_integral_name_override(effective_integral_override);
53
54        // `options.org-abbreviation-memory` takes precedence over the legacy top-level field.
55        let effective_org_override = parsed
56            .frontmatter_options
57            .as_ref()
58            .and_then(|o| o.org_abbreviation_memory.as_ref())
59            .or(parsed.frontmatter_org_abbreviation_memory.as_ref());
60        let owned_org = {
61            let base = owned_integral.as_ref().unwrap_or(self);
62            base.processor_with_document_org_abbreviation_override(effective_org_override)
63        };
64
65        // Apply bibliography overrides from the options block.
66        let owned_bib = parsed
67            .frontmatter_options
68            .as_ref()
69            .filter(|o| o.bibliography.is_some())
70            .map(|options| {
71                let base = owned_org
72                    .as_ref()
73                    .or(owned_integral.as_ref())
74                    .unwrap_or(self);
75                base.processor_with_bibliography_override(options)
76            });
77
78        let processor = owned_bib
79            .as_ref()
80            .or(owned_org.as_ref())
81            .or(owned_integral.as_ref())
82            .unwrap_or(self);
83        let body = &content[parsed.body_start..];
84        if let Some(groups) = parsed.frontmatter_groups.take() {
85            return processor.process_document_with_frontmatter_groups::<P, F>(
86                body, parsed, groups, parser, format,
87            );
88        }
89
90        if !parsed.bibliography_blocks.is_empty() {
91            return processor.process_document_with_bibliography_blocks::<P, F>(
92                body,
93                std::mem::take(&mut parsed.bibliography_blocks),
94                parser,
95                format,
96            );
97        }
98
99        processor.process_document_with_default_bibliography::<P, F>(body, parsed, parser, format)
100    }
101
102    /// Orchestrate document processing with custom frontmatter bibliography groups.
103    fn process_document_with_frontmatter_groups<P, F>(
104        &self,
105        body: &str,
106        parsed: ParsedDocument,
107        groups: Vec<citum_schema::grouping::BibliographyGroup>,
108        parser: &P,
109        format: DocumentFormat,
110    ) -> String
111    where
112        P: CitationParser,
113        F: crate::render::format::OutputFormat<Output = String>,
114    {
115        self.render_document_with_trailing_bibliography::<P, F, _>(
116            body,
117            parsed,
118            parser,
119            format,
120            |processor| {
121                rewrite_group_headings_for_document(
122                    processor.render_document_bibliography_groups::<F>(&groups),
123                    format,
124                )
125            },
126        )
127    }
128
129    /// Orchestrate document processing with explicit bibliography blocks.
130    fn process_document_with_bibliography_blocks<P, F>(
131        &self,
132        body: &str,
133        blocks: Vec<BibliographyBlock>,
134        parser: &P,
135        format: DocumentFormat,
136    ) -> String
137    where
138        P: CitationParser,
139        F: crate::render::format::OutputFormat<Output = String>,
140    {
141        let staged = stage_document_bibliography_blocks(body, &blocks);
142        let parsed_staged = parser.parse_document(&staged, &self.locale);
143        let mut rendered = self.render_document_body::<F>(&staged, parsed_staged, format);
144        self.replace_document_bibliography_blocks::<F>(&mut rendered, &blocks, format);
145        self.finalize_document_output(parser, format, rendered)
146    }
147
148    /// Orchestrate document processing with the default trailing bibliography.
149    fn process_document_with_default_bibliography<P, F>(
150        &self,
151        body: &str,
152        parsed: ParsedDocument,
153        parser: &P,
154        format: DocumentFormat,
155    ) -> String
156    where
157        P: CitationParser,
158        F: crate::render::format::OutputFormat<Output = String>,
159    {
160        self.render_document_with_trailing_bibliography::<P, F, _>(
161            body,
162            parsed,
163            parser,
164            format,
165            super::super::Processor::render_grouped_bibliography_with_format::<F>,
166        )
167    }
168
169    /// Generic helper for rendering document body + trailing bibliography.
170    fn render_document_with_trailing_bibliography<P, F, B>(
171        &self,
172        body: &str,
173        parsed: ParsedDocument,
174        parser: &P,
175        format: DocumentFormat,
176        render_bibliography: B,
177    ) -> String
178    where
179        P: CitationParser,
180        F: crate::render::format::OutputFormat<Output = String>,
181        B: FnOnce(&Self) -> String,
182    {
183        let mut rendered = self.render_document_body::<F>(body, parsed, format);
184        let bibliography = render_bibliography(self);
185        append_document_bibliography(&mut rendered, format, bibliography);
186        self.finalize_document_output(parser, format, rendered)
187    }
188
189    /// Render the citation-annotated document body.
190    ///
191    /// Governs the choice between note-style and inline-style processing,
192    /// and handles HTML placeholder registration for format finalization.
193    fn render_document_body<F>(
194        &self,
195        content: &str,
196        parsed: ParsedDocument,
197        format: DocumentFormat,
198    ) -> RenderedDocumentBody
199    where
200        F: crate::render::format::OutputFormat<Output = String>,
201    {
202        if matches!(format, DocumentFormat::Html) {
203            let mut placeholders = HtmlPlaceholderRegistry::default();
204            let content = if self.is_note_style() {
205                self.process_note_document_html(content, parsed, &mut placeholders)
206            } else {
207                self.process_inline_document_html(content, parsed, &mut placeholders)
208            };
209            return RenderedDocumentBody {
210                content,
211                placeholders: Some(placeholders),
212            };
213        }
214
215        let content = if self.is_note_style() {
216            self.process_note_document::<F>(content, parsed)
217        } else {
218            self.process_inline_document::<F>(content, parsed)
219        };
220
221        RenderedDocumentBody {
222            content,
223            placeholders: None,
224        }
225    }
226
227    /// Splice rendered citations into document markup for non-note styles.
228    #[allow(
229        clippy::string_slice,
230        reason = "parser-guaranteed boundaries and indices"
231    )]
232    fn process_inline_document<F>(&self, content: &str, parsed: ParsedDocument) -> String
233    where
234        F: crate::render::format::OutputFormat<Output = String>,
235    {
236        let mut result = String::new();
237        let mut last_idx = 0;
238        let normalized = self.normalize_integral_name_citations(&parsed);
239
240        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
241            result.push_str(&content[last_idx..parsed.start]);
242            match self.process_citation_with_format::<F>(&citation) {
243                Ok(rendered) => result.push_str(&rendered),
244                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
245            }
246            last_idx = parsed.end;
247        }
248
249        result.push_str(&content[last_idx..]);
250        result
251    }
252
253    /// Splice HTML-rendered citations into document markup using placeholders.
254    #[allow(
255        clippy::string_slice,
256        reason = "parser-guaranteed boundaries and indices"
257    )]
258    fn process_inline_document_html(
259        &self,
260        content: &str,
261        parsed: ParsedDocument,
262        placeholders: &mut HtmlPlaceholderRegistry,
263    ) -> String {
264        let mut result = String::new();
265        let mut last_idx = 0;
266        let normalized = self.normalize_integral_name_citations(&parsed);
267
268        for (parsed, citation) in parsed.citations.iter().zip(normalized) {
269            result.push_str(&content[last_idx..parsed.start]);
270            match self.process_citation_with_format::<crate::render::html::Html>(&citation) {
271                Ok(rendered) => result.push_str(&placeholders.push_inline(rendered)),
272                Err(_) => result.push_str(&content[parsed.start..parsed.end]),
273            }
274            last_idx = parsed.end;
275        }
276
277        result.push_str(&content[last_idx..]);
278        result
279    }
280
281    /// Replace bibliography block placeholders with rendered content.
282    fn replace_document_bibliography_blocks<F>(
283        &self,
284        rendered: &mut RenderedDocumentBody,
285        blocks: &[BibliographyBlock],
286        format: DocumentFormat,
287    ) where
288        F: crate::render::format::OutputFormat<Output = String>,
289    {
290        for (index, block) in blocks.iter().enumerate() {
291            let placeholder = bibliography_block_placeholder(index);
292            let rendered_group = self.render_document_bibliography_block::<F>(&block.group);
293            let replacement = render_document_bibliography_block_replacement(
294                rendered.placeholders.as_mut(),
295                format,
296                rendered_group.heading,
297                rendered_group.body,
298            );
299            rendered.content = rendered.content.replace(&placeholder, &replacement);
300        }
301    }
302
303    /// Perform final document rewrites and resolve HTML placeholders.
304    fn finalize_document_output<P>(
305        &self,
306        parser: &P,
307        format: DocumentFormat,
308        rendered: RenderedDocumentBody,
309    ) -> String
310    where
311        P: CitationParser,
312    {
313        let result = rewrite_document_markup_for_typst(rendered.content, format);
314        match rendered.placeholders {
315            Some(placeholders) => placeholders.apply(parser.finalize_html_output(&result)),
316            None => match format {
317                DocumentFormat::Html => parser.finalize_html_output(&result),
318                DocumentFormat::Djot
319                | DocumentFormat::Markdown
320                | DocumentFormat::Plain
321                | DocumentFormat::Latex
322                | DocumentFormat::Typst => result,
323            },
324        }
325    }
326}