Skip to main content

acdc_parser/grammar/
passthrough_processing.rs

1use crate::{
2    InlineNode, Location, Pass, PassthroughKind, Plain, ProcessedContent, Raw, Substitution,
3};
4
5use super::{
6    ParserState,
7    document::{BlockParsingMetadata, document_parser},
8    location_mapping::{clamp_inline_node_locations, remap_inline_node_location},
9};
10
11/// Process passthrough content that contains quote substitutions, parsing nested markup
12pub(crate) fn process_passthrough_with_quotes(
13    content: &str,
14    passthrough: &Pass,
15) -> Vec<InlineNode> {
16    let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
17
18    // If no quotes processing needed
19    if !has_quotes {
20        // If SpecialChars substitution is enabled, escape HTML (return PlainText)
21        // This applies to: +text+ (Single), ++text++ (Double), pass:c[] (Macro with SpecialChars)
22        // Otherwise output raw HTML (return RawText)
23        // This applies to: +++text+++ (Triple), pass:[] (Macro without SpecialChars)
24        // Use RawText for all passthroughs without Quotes to avoid merging with
25        // adjacent PlainText nodes (which would lose the passthrough's substitution info).
26        // Carry the passthrough's own subs (minus Quotes, already handled) so the
27        // converter applies exactly those instead of the block's subs.
28        // Compute content-only location by stripping the delimiter prefix/suffix
29        // from the full passthrough macro location.
30        let suffix_len = match passthrough.kind {
31            PassthroughKind::Macro | PassthroughKind::Single => 1, // ] or +
32            PassthroughKind::Double => 2,                          // ++
33            PassthroughKind::Triple => 3,                          // +++
34        };
35        let total_span = passthrough.location.absolute_end - passthrough.location.absolute_start;
36        let prefix_len = total_span - content.len() - suffix_len;
37
38        let content_abs_start = passthrough.location.absolute_start + prefix_len;
39        let content_col_start = passthrough.location.start.column + prefix_len;
40
41        let content_location = Location {
42            absolute_start: content_abs_start,
43            absolute_end: content_abs_start + content.len(),
44            start: crate::Position {
45                line: passthrough.location.start.line,
46                column: content_col_start,
47            },
48            end: crate::Position {
49                line: passthrough.location.start.line,
50                column: content_col_start + content.len(),
51            },
52        };
53
54        return vec![InlineNode::RawText(Raw {
55            content: content.to_string(),
56            location: content_location,
57            subs: passthrough
58                .substitutions
59                .iter()
60                .filter(|s| **s != Substitution::Quotes)
61                .cloned()
62                .collect(),
63        })];
64    }
65
66    tracing::debug!(content = ?content, "Parsing passthrough content with quotes");
67
68    parse_text_for_quotes(content)
69}
70
71/// Parse text for inline formatting markup (bold, italic, monospace, etc.).
72///
73/// This function scans the input text for `AsciiDoc` formatting patterns and returns
74/// a vector of `InlineNode`s representing the parsed content. Used for applying
75/// "quotes" substitution to verbatim block content.
76///
77/// # Supported Patterns
78///
79/// - `*bold*` and `**bold**` (constrained/unconstrained)
80/// - `_italic_` and `__italic__`
81/// - `` `monospace` `` and ``` ``monospace`` ```
82/// - `^superscript^` and `~subscript~`
83/// - `#highlight#` and `##highlight##`
84/// - `` "`curved quotes`" `` and `` '`curved apostrophe`' ``
85///
86/// # Example
87///
88/// ```
89/// use acdc_parser::parse_text_for_quotes;
90///
91/// let nodes = parse_text_for_quotes("This has *bold* text.");
92/// assert_eq!(nodes.len(), 3); // "This has ", Bold("bold"), " text."
93/// ```
94#[must_use]
95pub fn parse_text_for_quotes(content: &str) -> Vec<InlineNode> {
96    if content.is_empty() {
97        return Vec::new();
98    }
99
100    let mut state = ParserState::new(content);
101    state.quotes_only = true;
102    let block_metadata = BlockParsingMetadata::default();
103
104    match document_parser::quotes_only_inlines(content, &mut state, 0, &block_metadata) {
105        Ok(nodes) => nodes,
106        Err(err) => {
107            tracing::warn!(
108                ?err,
109                ?content,
110                "quotes-only PEG parse failed, falling back to plain text"
111            );
112            vec![InlineNode::PlainText(Plain {
113                content: content.to_string(),
114                location: Location::default(),
115                escaped: false,
116            })]
117        }
118    }
119}
120
121/// Process passthrough placeholders in content, returning expanded `InlineNode`s.
122///
123/// This function handles the multi-pass parsing needed for passthroughs with quote substitutions.
124/// It splits the content around placeholders and processes each passthrough according to its
125/// substitution settings.
126pub(crate) fn process_passthrough_placeholders(
127    content: &str,
128    processed: &ProcessedContent,
129    state: &ParserState,
130    base_location: &Location,
131) -> Vec<InlineNode> {
132    let mut result = Vec::new();
133    let mut remaining = content;
134    let mut processed_offset = 0; // Position in the processed content (with placeholders)
135
136    // Process each passthrough placeholder in order
137    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
138        let placeholder = format!("���{index}���");
139
140        if let Some(placeholder_pos) = remaining.find(&placeholder) {
141            let before_content = if placeholder_pos > 0 {
142                Some(&remaining[..placeholder_pos])
143            } else {
144                None
145            };
146
147            // Add content before the placeholder if any, using original string positions
148            if let Some(before) = before_content
149                && !before.is_empty()
150            {
151                result.push(InlineNode::PlainText(Plain {
152                    content: before.to_string(),
153                    location: Location {
154                        // Use original string positions
155                        absolute_start: base_location.absolute_start + processed_offset,
156                        absolute_end: base_location.absolute_start
157                            + processed_offset
158                            + before.len(),
159                        start: crate::Position {
160                            line: base_location.start.line,
161                            column: base_location.start.column + processed_offset,
162                        },
163                        end: crate::Position {
164                            line: base_location.start.line,
165                            column: base_location.start.column + processed_offset + before.len(),
166                        },
167                    },
168                    escaped: false,
169                }));
170                processed_offset += before.len();
171            }
172
173            // Process the passthrough content using original string positions from passthrough.location
174            if let Some(passthrough_content) = &passthrough.text {
175                let processed_nodes =
176                    process_passthrough_with_quotes(passthrough_content, passthrough);
177
178                // Remap locations of processed nodes to use original string coordinates
179                // The passthrough content starts after "pass:q[" so we need to account for that offset
180                let macro_prefix_len = "pass:q[".len(); // 7 characters
181                let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
182                let remaining_subs: Vec<Substitution> = passthrough
183                    .substitutions
184                    .iter()
185                    .filter(|s| **s != Substitution::Quotes)
186                    .cloned()
187                    .collect();
188                for mut node in processed_nodes {
189                    remap_inline_node_location(
190                        &mut node,
191                        passthrough.location.absolute_start + macro_prefix_len,
192                    );
193                    // For passthroughs with quotes, convert PlainText to RawText so
194                    // HTML content passes through unescaped. Must happen AFTER
195                    // remapping since remap_inline_node_location handles PlainText
196                    // but not RawText (RawText from non-quotes path already has
197                    // correct locations from passthrough.location).
198                    if has_quotes {
199                        if let InlineNode::PlainText(p) = node {
200                            node = InlineNode::RawText(Raw {
201                                content: p.content,
202                                location: p.location,
203                                subs: remaining_subs.clone(),
204                            });
205                        }
206                    }
207                    result.push(node);
208                }
209            }
210
211            // Move past the placeholder in the processed content
212            let skip_len = placeholder_pos + placeholder.len();
213            remaining = &remaining[skip_len..];
214            // Update processed_offset to account for the original passthrough macro length
215            processed_offset +=
216                passthrough.location.absolute_end - passthrough.location.absolute_start;
217        }
218    }
219
220    // Add any remaining content as plain text
221    if !remaining.is_empty() {
222        // Check if the last node is PlainText and merge if so
223        if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
224            // Merge remaining content with the last plain text node
225            last_plain.content.push_str(remaining);
226            // Extend the location to include the remaining content
227            last_plain.location.absolute_end = base_location.absolute_end;
228            last_plain.location.end = base_location.end.clone();
229        } else {
230            // Add as separate node if last node is not plain text
231            result.push(InlineNode::PlainText(Plain {
232                content: remaining.to_string(),
233                location: Location {
234                    absolute_start: base_location.absolute_start + processed_offset,
235                    absolute_end: base_location.absolute_end,
236                    start: crate::Position {
237                        line: base_location.start.line,
238                        column: base_location.start.column + processed_offset,
239                    },
240                    end: base_location.end.clone(),
241                },
242                escaped: false,
243            }));
244        }
245    }
246
247    // If no placeholders were found, return the original content as plain text
248    if result.is_empty() {
249        result.push(InlineNode::PlainText(Plain {
250            content: content.to_string(),
251            location: base_location.clone(),
252            escaped: false,
253        }));
254    }
255
256    // Clamp all locations to valid bounds within the input string
257    for node in &mut result {
258        clamp_inline_node_locations(node, &state.input);
259    }
260
261    // Merge adjacent plain text nodes
262    merge_adjacent_plain_text_nodes(result)
263}
264
265/// Merge adjacent plain text nodes into single nodes to simplify the output
266pub(crate) fn merge_adjacent_plain_text_nodes(nodes: Vec<InlineNode>) -> Vec<InlineNode> {
267    let mut result = Vec::new();
268
269    for node in nodes {
270        match (result.last_mut(), node) {
271            (Some(InlineNode::PlainText(last_plain)), InlineNode::PlainText(current_plain)) => {
272                // Merge current plain text with the last one
273                last_plain.content.push_str(&current_plain.content);
274                // Extend the location to cover both nodes
275                last_plain.location.absolute_end = current_plain.location.absolute_end;
276                last_plain.location.end = current_plain.location.end;
277            }
278            (_, node) => {
279                // Not adjacent plain text nodes, add as separate node
280                result.push(node);
281            }
282        }
283    }
284
285    result
286}
287
288pub(crate) fn replace_passthrough_placeholders(
289    content: &str,
290    processed: &ProcessedContent,
291) -> String {
292    let mut result = content.to_string();
293
294    // Replace each passthrough placeholder with its content
295    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
296        let placeholder = format!("���{index}���");
297        if let Some(text) = &passthrough.text {
298            result = result.replace(&placeholder, text);
299        }
300    }
301
302    result
303}
304
305#[cfg(test)]
306#[allow(clippy::indexing_slicing)] // Tests verify length before indexing
307mod tests {
308    use super::*;
309
310    // === Divergence Prevention Tests ===
311    //
312    // These tests verify that parse_text_for_quotes produces the same structural
313    // output as the main PEG parser for common inline formatting patterns.
314    // If these tests fail after grammar changes, update parse_text_for_quotes.
315
316    #[test]
317    fn test_constrained_bold_pattern() {
318        let nodes = parse_text_for_quotes("This is *bold* text.");
319        assert_eq!(nodes.len(), 3);
320        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
321        assert!(
322            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
323        );
324        assert!(matches!(nodes[2], InlineNode::PlainText(_)));
325    }
326
327    #[test]
328    fn test_unconstrained_bold_pattern() {
329        let nodes = parse_text_for_quotes("This**bold**word");
330        assert_eq!(nodes.len(), 3);
331        assert!(
332            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
333        );
334    }
335
336    #[test]
337    fn test_constrained_italic_pattern() {
338        let nodes = parse_text_for_quotes("This is _italic_ text.");
339        assert_eq!(nodes.len(), 3);
340        assert!(
341            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
342        );
343    }
344
345    #[test]
346    fn test_unconstrained_italic_pattern() {
347        let nodes = parse_text_for_quotes("This__italic__word");
348        assert_eq!(nodes.len(), 3);
349        assert!(
350            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
351        );
352    }
353
354    #[test]
355    fn test_constrained_monospace_pattern() {
356        let nodes = parse_text_for_quotes("Use `code` here.");
357        assert_eq!(nodes.len(), 3);
358        assert!(
359            matches!(&nodes[1], InlineNode::MonospaceText(m) if matches!(m.content.first(), Some(InlineNode::PlainText(p)) if p.content == "code"))
360        );
361    }
362
363    #[test]
364    fn test_superscript_pattern() {
365        let nodes = parse_text_for_quotes("E=mc^2^");
366        assert_eq!(nodes.len(), 2);
367        assert!(
368            matches!(&nodes[1], InlineNode::SuperscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
369        );
370    }
371
372    #[test]
373    fn test_subscript_pattern() {
374        let nodes = parse_text_for_quotes("H~2~O");
375        assert_eq!(nodes.len(), 3);
376        assert!(
377            matches!(&nodes[1], InlineNode::SubscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
378        );
379    }
380
381    #[test]
382    fn test_highlight_pattern() {
383        let nodes = parse_text_for_quotes("This is #highlighted# text.");
384        assert_eq!(nodes.len(), 3);
385        assert!(
386            matches!(&nodes[1], InlineNode::HighlightText(h) if matches!(h.content.first(), Some(InlineNode::PlainText(p)) if p.content == "highlighted"))
387        );
388    }
389
390    #[test]
391    fn test_escaped_superscript_not_parsed() {
392        // Backslash-escaped markers should not be parsed as formatting
393        let nodes = parse_text_for_quotes(r"E=mc\^2^");
394        // Should remain as plain text (escape prevents parsing)
395        assert!(
396            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
397            "Escaped superscript should not be parsed"
398        );
399    }
400
401    #[test]
402    fn test_escaped_subscript_not_parsed() {
403        let nodes = parse_text_for_quotes(r"H\~2~O");
404        assert!(
405            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
406            "Escaped subscript should not be parsed"
407        );
408    }
409
410    #[test]
411    fn test_multiple_formats_in_sequence() {
412        let nodes = parse_text_for_quotes("*bold* and _italic_ and `code`");
413        assert!(nodes.iter().any(|n| matches!(n, InlineNode::BoldText(_))));
414        assert!(nodes.iter().any(|n| matches!(n, InlineNode::ItalicText(_))));
415        assert!(
416            nodes
417                .iter()
418                .any(|n| matches!(n, InlineNode::MonospaceText(_)))
419        );
420    }
421
422    #[test]
423    fn test_plain_text_only() {
424        let nodes = parse_text_for_quotes("Just plain text here.");
425        assert_eq!(nodes.len(), 1);
426        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
427    }
428
429    #[test]
430    fn test_empty_input() {
431        let nodes = parse_text_for_quotes("");
432        assert!(nodes.is_empty());
433    }
434}