Skip to main content

acdc_parser/grammar/
passthrough_processing.rs

1use crate::{
2    InlineNode, Location, Pass, PassthroughKind, Plain, ProcessedContent, Raw, Substitution,
3};
4
5use super::{
6    ParserState,
7    helpers::BlockParsingMetadata,
8    inlines::inline_parser,
9    location_mapping::{clamp_inline_node_locations, remap_inline_node_location},
10};
11
12/// Process passthrough content that contains quote substitutions, parsing nested markup
13pub(crate) fn process_passthrough_with_quotes(
14    content: &str,
15    passthrough: &Pass,
16) -> Vec<InlineNode> {
17    let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
18
19    // If no quotes processing needed
20    if !has_quotes {
21        // If SpecialChars substitution is enabled, escape HTML (return PlainText)
22        // This applies to: +text+ (Single), ++text++ (Double), pass:c[] (Macro with SpecialChars)
23        // Otherwise output raw HTML (return RawText)
24        // This applies to: +++text+++ (Triple), pass:[] (Macro without SpecialChars)
25        // Use RawText for all passthroughs without Quotes to avoid merging with
26        // adjacent PlainText nodes (which would lose the passthrough's substitution info).
27        // Carry the passthrough's own subs (minus Quotes, already handled) so the
28        // converter applies exactly those instead of the block's subs.
29        // Compute content-only location by stripping the delimiter prefix/suffix
30        // from the full passthrough macro location. For attribute-ref passthroughs,
31        // the location spans the `{attr}` reference with no delimiters to strip.
32        let suffix_len = match passthrough.kind {
33            PassthroughKind::Macro | PassthroughKind::Single => Some(1), // ] or +
34            PassthroughKind::Double => Some(2),                          // ++
35            PassthroughKind::Triple => Some(3),                          // +++
36            PassthroughKind::AttributeRef => None,
37        };
38
39        let content_location = if let Some(suffix_len) = suffix_len {
40            let total_span =
41                passthrough.location.absolute_end - passthrough.location.absolute_start;
42            let prefix_len = total_span - content.len() - suffix_len;
43
44            let content_abs_start = passthrough.location.absolute_start + prefix_len;
45            let content_col_start = passthrough.location.start.column + prefix_len;
46
47            Location {
48                absolute_start: content_abs_start,
49                absolute_end: content_abs_start + content.len(),
50                start: crate::Position {
51                    line: passthrough.location.start.line,
52                    column: content_col_start,
53                },
54                end: crate::Position {
55                    line: passthrough.location.start.line,
56                    column: content_col_start + content.len(),
57                },
58            }
59        } else {
60            passthrough.location.clone()
61        };
62
63        return vec![InlineNode::RawText(Raw {
64            content: content.to_string(),
65            location: content_location,
66            subs: passthrough
67                .substitutions
68                .iter()
69                .filter(|s| **s != Substitution::Quotes)
70                .cloned()
71                .collect(),
72        })];
73    }
74
75    tracing::debug!(content = ?content, "Parsing passthrough content with quotes");
76
77    parse_text_for_quotes(content)
78}
79
80/// Parse text for inline formatting markup (bold, italic, monospace, etc.).
81///
82/// This function scans the input text for `AsciiDoc` formatting patterns and returns
83/// a vector of `InlineNode`s representing the parsed content. Used for applying
84/// "quotes" substitution to verbatim block content.
85///
86/// # Supported Patterns
87///
88/// - `*bold*` and `**bold**` (constrained/unconstrained)
89/// - `_italic_` and `__italic__`
90/// - `` `monospace` `` and ``` ``monospace`` ```
91/// - `^superscript^` and `~subscript~`
92/// - `#highlight#` and `##highlight##`
93/// - `` "`curved quotes`" `` and `` '`curved apostrophe`' ``
94///
95/// # Example
96///
97/// ```
98/// use acdc_parser::parse_text_for_quotes;
99///
100/// let nodes = parse_text_for_quotes("This has *bold* text.");
101/// assert_eq!(nodes.len(), 3); // "This has ", Bold("bold"), " text."
102/// ```
103#[must_use]
104pub fn parse_text_for_quotes(content: &str) -> Vec<InlineNode> {
105    if content.is_empty() {
106        return Vec::new();
107    }
108
109    let mut state = ParserState::new(content);
110    state.quotes_only = true;
111    let block_metadata = BlockParsingMetadata::default();
112
113    match inline_parser::quotes_only_inlines(content, &mut state, 0, &block_metadata) {
114        Ok(nodes) => nodes,
115        Err(err) => {
116            tracing::warn!(
117                ?err,
118                ?content,
119                "quotes-only PEG parse failed, falling back to plain text"
120            );
121            vec![InlineNode::PlainText(Plain {
122                content: content.to_string(),
123                location: Location::default(),
124                escaped: false,
125            })]
126        }
127    }
128}
129
130/// Process passthrough placeholders in content, returning expanded `InlineNode`s.
131///
132/// This function handles the multi-pass parsing needed for passthroughs with quote substitutions.
133/// It splits the content around placeholders and processes each passthrough according to its
134/// substitution settings.
135pub(crate) fn process_passthrough_placeholders(
136    content: &str,
137    processed: &ProcessedContent,
138    state: &ParserState,
139    base_location: &Location,
140) -> Vec<InlineNode> {
141    let mut result = Vec::new();
142    let mut remaining = content;
143    let mut processed_offset = 0; // Position in the processed content (with placeholders)
144
145    // Process each passthrough placeholder in order
146    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
147        let placeholder = format!("���{index}���");
148
149        if let Some(placeholder_pos) = remaining.find(&placeholder) {
150            let before_content = if placeholder_pos > 0 {
151                Some(&remaining[..placeholder_pos])
152            } else {
153                None
154            };
155
156            // Add content before the placeholder if any, using original string positions
157            if let Some(before) = before_content
158                && !before.is_empty()
159            {
160                result.push(InlineNode::PlainText(Plain {
161                    content: before.to_string(),
162                    location: Location {
163                        // Use original string positions
164                        absolute_start: base_location.absolute_start + processed_offset,
165                        absolute_end: base_location.absolute_start
166                            + processed_offset
167                            + before.len(),
168                        start: crate::Position {
169                            line: base_location.start.line,
170                            column: base_location.start.column + processed_offset,
171                        },
172                        end: crate::Position {
173                            line: base_location.start.line,
174                            column: base_location.start.column + processed_offset + before.len(),
175                        },
176                    },
177                    escaped: false,
178                }));
179                processed_offset += before.len();
180            }
181
182            // Process the passthrough content using original string positions from passthrough.location
183            if let Some(passthrough_content) = &passthrough.text {
184                let processed_nodes =
185                    process_passthrough_with_quotes(passthrough_content, passthrough);
186
187                // Remap locations of processed nodes to use original string coordinates
188                // The passthrough content starts after "pass:q[" so we need to account for that offset
189                let macro_prefix_len = "pass:q[".len(); // 7 characters
190                let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
191                let remaining_subs: Vec<Substitution> = passthrough
192                    .substitutions
193                    .iter()
194                    .filter(|s| **s != Substitution::Quotes)
195                    .cloned()
196                    .collect();
197                for mut node in processed_nodes {
198                    remap_inline_node_location(
199                        &mut node,
200                        passthrough.location.absolute_start + macro_prefix_len,
201                    );
202                    // For passthroughs with quotes, convert PlainText to RawText so
203                    // HTML content passes through unescaped. Must happen AFTER
204                    // remapping since remap_inline_node_location handles PlainText
205                    // but not RawText (RawText from non-quotes path already has
206                    // correct locations from passthrough.location).
207                    if has_quotes {
208                        if let InlineNode::PlainText(p) = node {
209                            node = InlineNode::RawText(Raw {
210                                content: p.content,
211                                location: p.location,
212                                subs: remaining_subs.clone(),
213                            });
214                        }
215                    }
216                    result.push(node);
217                }
218            }
219
220            // Move past the placeholder in the processed content
221            let skip_len = placeholder_pos + placeholder.len();
222            remaining = &remaining[skip_len..];
223            // Update processed_offset to account for the original passthrough macro length
224            processed_offset +=
225                passthrough.location.absolute_end - passthrough.location.absolute_start;
226        }
227    }
228
229    // Add any remaining content as plain text
230    if !remaining.is_empty() {
231        // Check if the last node is PlainText and merge if so
232        if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
233            // Merge remaining content with the last plain text node
234            last_plain.content.push_str(remaining);
235            // Extend the location to include the remaining content
236            last_plain.location.absolute_end = base_location.absolute_end;
237            last_plain.location.end = base_location.end.clone();
238        } else {
239            // Add as separate node if last node is not plain text
240            result.push(InlineNode::PlainText(Plain {
241                content: remaining.to_string(),
242                location: Location {
243                    absolute_start: base_location.absolute_start + processed_offset,
244                    absolute_end: base_location.absolute_end,
245                    start: crate::Position {
246                        line: base_location.start.line,
247                        column: base_location.start.column + processed_offset,
248                    },
249                    end: base_location.end.clone(),
250                },
251                escaped: false,
252            }));
253        }
254    }
255
256    // If no placeholders were found, return the original content as plain text
257    if result.is_empty() {
258        result.push(InlineNode::PlainText(Plain {
259            content: content.to_string(),
260            location: base_location.clone(),
261            escaped: false,
262        }));
263    }
264
265    // Clamp all locations to valid bounds within the input string
266    for node in &mut result {
267        clamp_inline_node_locations(node, &state.input);
268    }
269
270    // Merge adjacent plain text nodes
271    merge_adjacent_plain_text_nodes(result)
272}
273
274/// Merge adjacent plain text nodes into single nodes to simplify the output
275pub(crate) fn merge_adjacent_plain_text_nodes(nodes: Vec<InlineNode>) -> Vec<InlineNode> {
276    let mut result = Vec::new();
277
278    for node in nodes {
279        match (result.last_mut(), node) {
280            (Some(InlineNode::PlainText(last_plain)), InlineNode::PlainText(current_plain)) => {
281                // Merge current plain text with the last one
282                last_plain.content.push_str(&current_plain.content);
283                // Extend the location to cover both nodes
284                last_plain.location.absolute_end = current_plain.location.absolute_end;
285                last_plain.location.end = current_plain.location.end;
286            }
287            (_, node) => {
288                // Not adjacent plain text nodes, add as separate node
289                result.push(node);
290            }
291        }
292    }
293
294    result
295}
296
297pub(crate) fn replace_passthrough_placeholders(
298    content: &str,
299    processed: &ProcessedContent,
300) -> String {
301    let mut result = content.to_string();
302
303    // Replace each passthrough placeholder with its content
304    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
305        let placeholder = format!("���{index}���");
306        if let Some(text) = &passthrough.text {
307            result = result.replace(&placeholder, text);
308        }
309    }
310
311    result
312}
313
314#[cfg(test)]
315#[allow(clippy::indexing_slicing)] // Tests verify length before indexing
316mod tests {
317    use super::*;
318
319    // === Divergence Prevention Tests ===
320    //
321    // These tests verify that parse_text_for_quotes produces the same structural
322    // output as the main PEG parser for common inline formatting patterns.
323    // If these tests fail after grammar changes, update parse_text_for_quotes.
324
325    #[test]
326    fn test_constrained_bold_pattern() {
327        let nodes = parse_text_for_quotes("This is *bold* text.");
328        assert_eq!(nodes.len(), 3);
329        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
330        assert!(
331            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
332        );
333        assert!(matches!(nodes[2], InlineNode::PlainText(_)));
334    }
335
336    #[test]
337    fn test_unconstrained_bold_pattern() {
338        let nodes = parse_text_for_quotes("This**bold**word");
339        assert_eq!(nodes.len(), 3);
340        assert!(
341            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
342        );
343    }
344
345    #[test]
346    fn test_constrained_italic_pattern() {
347        let nodes = parse_text_for_quotes("This is _italic_ text.");
348        assert_eq!(nodes.len(), 3);
349        assert!(
350            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
351        );
352    }
353
354    #[test]
355    fn test_unconstrained_italic_pattern() {
356        let nodes = parse_text_for_quotes("This__italic__word");
357        assert_eq!(nodes.len(), 3);
358        assert!(
359            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
360        );
361    }
362
363    #[test]
364    fn test_constrained_monospace_pattern() {
365        let nodes = parse_text_for_quotes("Use `code` here.");
366        assert_eq!(nodes.len(), 3);
367        assert!(
368            matches!(&nodes[1], InlineNode::MonospaceText(m) if matches!(m.content.first(), Some(InlineNode::PlainText(p)) if p.content == "code"))
369        );
370    }
371
372    #[test]
373    fn test_superscript_pattern() {
374        let nodes = parse_text_for_quotes("E=mc^2^");
375        assert_eq!(nodes.len(), 2);
376        assert!(
377            matches!(&nodes[1], InlineNode::SuperscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
378        );
379    }
380
381    #[test]
382    fn test_subscript_pattern() {
383        let nodes = parse_text_for_quotes("H~2~O");
384        assert_eq!(nodes.len(), 3);
385        assert!(
386            matches!(&nodes[1], InlineNode::SubscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
387        );
388    }
389
390    #[test]
391    fn test_highlight_pattern() {
392        let nodes = parse_text_for_quotes("This is #highlighted# text.");
393        assert_eq!(nodes.len(), 3);
394        assert!(
395            matches!(&nodes[1], InlineNode::HighlightText(h) if matches!(h.content.first(), Some(InlineNode::PlainText(p)) if p.content == "highlighted"))
396        );
397    }
398
399    #[test]
400    fn test_escaped_superscript_not_parsed() {
401        // Backslash-escaped markers should not be parsed as formatting
402        let nodes = parse_text_for_quotes(r"E=mc\^2^");
403        // Should remain as plain text (escape prevents parsing)
404        assert!(
405            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
406            "Escaped superscript should not be parsed"
407        );
408    }
409
410    #[test]
411    fn test_escaped_subscript_not_parsed() {
412        let nodes = parse_text_for_quotes(r"H\~2~O");
413        assert!(
414            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
415            "Escaped subscript should not be parsed"
416        );
417    }
418
419    #[test]
420    fn test_multiple_formats_in_sequence() {
421        let nodes = parse_text_for_quotes("*bold* and _italic_ and `code`");
422        assert!(nodes.iter().any(|n| matches!(n, InlineNode::BoldText(_))));
423        assert!(nodes.iter().any(|n| matches!(n, InlineNode::ItalicText(_))));
424        assert!(
425            nodes
426                .iter()
427                .any(|n| matches!(n, InlineNode::MonospaceText(_)))
428        );
429    }
430
431    #[test]
432    fn test_plain_text_only() {
433        let nodes = parse_text_for_quotes("Just plain text here.");
434        assert_eq!(nodes.len(), 1);
435        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
436    }
437
438    #[test]
439    fn test_empty_input() {
440        let nodes = parse_text_for_quotes("");
441        assert!(nodes.is_empty());
442    }
443}