Skip to main content

acdc_parser/grammar/
passthrough_processing.rs

1use bumpalo::Bump;
2
3use crate::{
4    InlineNode, Location, ParseInlineResult, Pass, PassthroughKind, Plain, ProcessedContent, Raw,
5    Substitution, parsed::OwnedInput,
6};
7
8use super::{
9    ParserState,
10    inlines::inline_parser,
11    location_mapping::{clamp_inline_node_locations, remap_inline_node_location},
12};
13
14/// Process passthrough content that contains quote substitutions, parsing nested markup
15pub(crate) fn process_passthrough_with_quotes<'a>(
16    arena: &'a Bump,
17    content: &'a str,
18    passthrough: &Pass,
19) -> Vec<InlineNode<'a>> {
20    let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
21
22    // If no quotes processing needed
23    if !has_quotes {
24        // If SpecialChars substitution is enabled, escape HTML (return PlainText)
25        // This applies to: +text+ (Single), ++text++ (Double), pass:c[] (Macro with SpecialChars)
26        // Otherwise output raw HTML (return RawText)
27        // This applies to: +++text+++ (Triple), pass:[] (Macro without SpecialChars)
28        // Use RawText for all passthroughs without Quotes to avoid merging with
29        // adjacent PlainText nodes (which would lose the passthrough's substitution info).
30        // Carry the passthrough's own subs (minus Quotes, already handled) so the
31        // converter applies exactly those instead of the block's subs.
32        // Compute content-only location by stripping the delimiter prefix/suffix
33        // from the full passthrough macro location. For attribute-ref passthroughs,
34        // the location spans the `{attr}` reference with no delimiters to strip.
35        let suffix_len = match passthrough.kind {
36            PassthroughKind::Macro | PassthroughKind::Single => Some(1), // ] or +
37            PassthroughKind::Double => Some(2),                          // ++
38            PassthroughKind::Triple => Some(3),                          // +++
39            PassthroughKind::AttributeRef => None,
40        };
41
42        let content_location = if let Some(suffix_len) = suffix_len {
43            let total_span =
44                passthrough.location.absolute_end - passthrough.location.absolute_start;
45            let prefix_len = total_span - content.len() - suffix_len;
46
47            let content_abs_start = passthrough.location.absolute_start + prefix_len;
48            let content_col_start = passthrough.location.start.column + prefix_len;
49
50            Location {
51                absolute_start: content_abs_start,
52                absolute_end: content_abs_start + content.len(),
53                start: crate::Position {
54                    line: passthrough.location.start.line,
55                    column: content_col_start,
56                },
57                end: crate::Position {
58                    line: passthrough.location.start.line,
59                    column: content_col_start + content.len(),
60                },
61            }
62        } else {
63            passthrough.location.clone()
64        };
65
66        return vec![InlineNode::RawText(Raw {
67            content,
68            location: content_location,
69            subs: passthrough
70                .substitutions
71                .iter()
72                .filter(|s| **s != Substitution::Quotes)
73                .cloned()
74                .collect(),
75        })];
76    }
77
78    tracing::debug!(content = ?content, "Parsing passthrough content with quotes");
79
80    parse_text_for_quotes_in(arena, content)
81}
82
83/// Parse text for inline formatting markup (bold, italic, monospace, etc.).
84///
85/// Public entry point — returns a `ParseInlineResult` that owns the arena
86/// the resulting `InlineNode`s borrow from. Callers reach the nodes via
87/// `.inlines()`. Each call allocates a fresh arena; memory is reclaimed
88/// when the returned value is dropped (no leaks). The returned
89/// `ParseInlineResult::warnings()` slice is always empty for this entry
90/// point — the quotes-only grammar never raises warnings.
91///
92/// # Supported Patterns
93///
94/// - `*bold*` and `**bold**` (constrained/unconstrained)
95/// - `_italic_` and `__italic__`
96/// - `` `monospace` `` and ``` ``monospace`` ```
97/// - `^superscript^` and `~subscript~`
98/// - `#highlight#` and `##highlight##`
99/// - `` "`curved quotes`" `` and `` '`curved apostrophe`' ``
100///
101/// # Example
102///
103/// ```
104/// use acdc_parser::parse_text_for_quotes;
105///
106/// let parsed = parse_text_for_quotes("This has *bold* text.");
107/// assert_eq!(parsed.inlines().len(), 3); // "This has ", Bold("bold"), " text."
108/// ```
109pub fn parse_text_for_quotes(content: &str) -> ParseInlineResult {
110    let owner = OwnedInput::new(content.into());
111    ParseInlineResult::from_infallible(owner, |owner| {
112        parse_text_for_quotes_in(&owner.arena, &owner.source)
113    })
114}
115
116/// Arena-parameterised variant for internal callers that already have an
117/// arena threaded through `ParserState`. Avoids the per-call `Bump`
118/// allocation that the public entry point does.
119pub(crate) fn parse_text_for_quotes_in<'a>(
120    arena: &'a Bump,
121    content: &'a str,
122) -> Vec<InlineNode<'a>> {
123    if content.is_empty() {
124        return Vec::new();
125    }
126
127    // Fast path: if content has no formatting markers, return as plain text
128    // without creating a ParserState or invoking the PEG parser.
129    // Covers ~87% of calls in typical documents.
130    if !content
131        .bytes()
132        .any(|b| matches!(b, b'*' | b'_' | b'`' | b'#' | b'^' | b'~' | b'"' | b'\''))
133    {
134        return vec![InlineNode::PlainText(Plain {
135            content,
136            location: Location::default(),
137            escaped: false,
138        })];
139    }
140
141    let mut state = ParserState::new_quotes_only(content, arena);
142    match inline_parser::quotes_only_inlines(content, &mut state) {
143        Ok(nodes) => nodes,
144        Err(err) => {
145            tracing::warn!(
146                ?err,
147                ?content,
148                "quotes-only PEG parse failed, falling back to plain text"
149            );
150            vec![InlineNode::PlainText(Plain {
151                content,
152                location: Location::default(),
153                escaped: false,
154            })]
155        }
156    }
157}
158
159/// Build an `InlineNode::PlainText` at `text`, located at
160/// `base_location.start + offset` and extending over `text.len()` columns on
161/// the same line.
162fn plain_text_at<'a>(text: &'a str, base_location: &Location, offset: usize) -> InlineNode<'a> {
163    let abs_start = base_location.absolute_start + offset;
164    let col_start = base_location.start.column + offset;
165    InlineNode::PlainText(Plain {
166        content: text,
167        location: Location {
168            absolute_start: abs_start,
169            absolute_end: abs_start + text.len(),
170            start: crate::Position {
171                line: base_location.start.line,
172                column: col_start,
173            },
174            end: crate::Position {
175                line: base_location.start.line,
176                column: col_start + text.len(),
177            },
178        },
179        escaped: false,
180    })
181}
182
183/// Process passthrough placeholders in content, returning expanded `InlineNode`s.
184///
185/// This function handles the multi-pass parsing needed for passthroughs with quote substitutions.
186/// It splits the content around placeholders and processes each passthrough according to its
187/// substitution settings.
188pub(crate) fn process_passthrough_placeholders<'a>(
189    content: &'a str,
190    processed: &'a ProcessedContent<'a>,
191    state: &ParserState<'a>,
192    base_location: &Location,
193) -> Vec<InlineNode<'a>> {
194    // Each passthrough produces at most (placeholder-count × small factor) +
195    // one trailing-plain. Upper-bound at 2 × placeholders + 1 so a paragraph
196    // full of passthroughs doesn't trigger log-N reallocs.
197    let mut result = Vec::with_capacity(processed.passthroughs.len() * 2 + 1);
198    let mut remaining = content;
199    let mut processed_offset = 0; // Position in the processed content (with placeholders)
200
201    // Process each passthrough placeholder in order
202    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
203        let placeholder = format!("���{index}���");
204
205        if let Some(placeholder_pos) = remaining.find(&placeholder) {
206            let before_content = if placeholder_pos > 0 {
207                Some(&remaining[..placeholder_pos])
208            } else {
209                None
210            };
211
212            // Add content before the placeholder if any, using original string positions
213            if let Some(before) = before_content
214                && !before.is_empty()
215            {
216                result.push(plain_text_at(before, base_location, processed_offset));
217                processed_offset += before.len();
218            }
219
220            // Process the passthrough content using original string positions from passthrough.location
221            if let Some(passthrough_content) = &passthrough.text {
222                let processed_nodes =
223                    process_passthrough_with_quotes(state.arena, passthrough_content, passthrough);
224
225                // Remap locations of processed nodes to use original string coordinates
226                // The passthrough content starts after "pass:q[" so we need to account for that offset
227                let macro_prefix_len = "pass:q[".len(); // 7 characters
228                let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
229                let remaining_subs: Vec<Substitution> = passthrough
230                    .substitutions
231                    .iter()
232                    .filter(|s| **s != Substitution::Quotes)
233                    .cloned()
234                    .collect();
235                for mut node in processed_nodes {
236                    remap_inline_node_location(
237                        &mut node,
238                        passthrough.location.absolute_start + macro_prefix_len,
239                    );
240                    // For passthroughs with quotes, convert PlainText to RawText so
241                    // HTML content passes through unescaped. Must happen AFTER
242                    // remapping since remap_inline_node_location handles PlainText
243                    // but not RawText (RawText from non-quotes path already has
244                    // correct locations from passthrough.location).
245                    if has_quotes {
246                        if let InlineNode::PlainText(p) = node {
247                            node = InlineNode::RawText(Raw {
248                                content: p.content,
249                                location: p.location,
250                                subs: remaining_subs.clone(),
251                            });
252                        }
253                    }
254                    result.push(node);
255                }
256            }
257
258            // Move past the placeholder in the processed content
259            let skip_len = placeholder_pos + placeholder.len();
260            remaining = &remaining[skip_len..];
261            // Update processed_offset to account for the original passthrough macro length
262            processed_offset +=
263                passthrough.location.absolute_end - passthrough.location.absolute_start;
264        }
265    }
266
267    // Add any remaining content as plain text
268    if !remaining.is_empty() {
269        // Check if the last node is PlainText and merge if so
270        if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
271            // Merge remaining content with the last plain text node
272            last_plain.content =
273                state.intern_fmt(format_args!("{}{remaining}", last_plain.content));
274            // Extend the location to include the remaining content
275            last_plain.location.absolute_end = base_location.absolute_end;
276            last_plain.location.end = base_location.end.clone();
277        } else {
278            // Add as separate node if last node is not plain text. Extend
279            // the end to cover `base_location.end` (this is the final
280            // trailing segment).
281            let mut node = plain_text_at(remaining, base_location, processed_offset);
282            if let InlineNode::PlainText(ref mut p) = node {
283                p.location.absolute_end = base_location.absolute_end;
284                p.location.end = base_location.end.clone();
285            }
286            result.push(node);
287        }
288    }
289
290    // If no placeholders were found, return the original content as plain text
291    if result.is_empty() {
292        result.push(InlineNode::PlainText(Plain {
293            content,
294            location: base_location.clone(),
295            escaped: false,
296        }));
297    }
298
299    // Clamp all locations to valid bounds within the input string
300    for node in &mut result {
301        clamp_inline_node_locations(node, state.input);
302    }
303
304    // Merge adjacent plain text nodes
305    merge_adjacent_plain_text_nodes(state, result)
306}
307
308/// Merge adjacent plain text nodes into single nodes to simplify the output.
309/// Arena-interns the concatenated content so the merged node keeps lifetime `'a`.
310pub(crate) fn merge_adjacent_plain_text_nodes<'a>(
311    state: &ParserState<'a>,
312    nodes: Vec<InlineNode<'a>>,
313) -> Vec<InlineNode<'a>> {
314    // Worst case: no merges possible, so the output matches the input length.
315    let mut result: Vec<InlineNode<'a>> = Vec::with_capacity(nodes.len());
316
317    for node in nodes {
318        match (result.last_mut(), node) {
319            (Some(InlineNode::PlainText(last_plain)), InlineNode::PlainText(current_plain)) => {
320                // Merge current plain text with the last one
321                last_plain.content = state.intern_fmt(format_args!(
322                    "{}{}",
323                    last_plain.content, current_plain.content
324                ));
325                // Extend the location to cover both nodes
326                last_plain.location.absolute_end = current_plain.location.absolute_end;
327                last_plain.location.end = current_plain.location.end;
328            }
329            (_, node) => {
330                // Not adjacent plain text nodes, add as separate node
331                result.push(node);
332            }
333        }
334    }
335
336    result
337}
338
339pub(crate) fn replace_passthrough_placeholders(
340    content: &str,
341    processed: &ProcessedContent,
342) -> String {
343    let mut result: String = content.into();
344
345    // Replace each passthrough placeholder with its content
346    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
347        let placeholder = format!("���{index}���");
348        if let Some(text) = &passthrough.text {
349            result = result.replace(&placeholder, text);
350        }
351    }
352
353    result
354}
355
356#[cfg(test)]
357#[allow(clippy::indexing_slicing)] // Tests verify length before indexing
358mod tests {
359    use super::*;
360
361    // === Divergence Prevention Tests ===
362    //
363    // These tests verify that parse_text_for_quotes produces the same structural
364    // output as the main PEG parser for common inline formatting patterns.
365    // If these tests fail after grammar changes, update parse_text_for_quotes.
366
367    #[test]
368    fn test_constrained_bold_pattern() {
369        let parsed = parse_text_for_quotes("This is *bold* text.");
370        let nodes = parsed.inlines();
371        assert_eq!(nodes.len(), 3);
372        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
373        assert!(
374            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
375        );
376        assert!(matches!(nodes[2], InlineNode::PlainText(_)));
377    }
378
379    #[test]
380    fn test_unconstrained_bold_pattern() {
381        let parsed = parse_text_for_quotes("This**bold**word");
382        let nodes = parsed.inlines();
383        assert_eq!(nodes.len(), 3);
384        assert!(
385            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
386        );
387    }
388
389    #[test]
390    fn test_constrained_italic_pattern() {
391        let parsed = parse_text_for_quotes("This is _italic_ text.");
392        let nodes = parsed.inlines();
393        assert_eq!(nodes.len(), 3);
394        assert!(
395            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
396        );
397    }
398
399    #[test]
400    fn test_unconstrained_italic_pattern() {
401        let parsed = parse_text_for_quotes("This__italic__word");
402        let nodes = parsed.inlines();
403        assert_eq!(nodes.len(), 3);
404        assert!(
405            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
406        );
407    }
408
409    #[test]
410    fn test_constrained_monospace_pattern() {
411        let parsed = parse_text_for_quotes("Use `code` here.");
412        let nodes = parsed.inlines();
413        assert_eq!(nodes.len(), 3);
414        assert!(
415            matches!(&nodes[1], InlineNode::MonospaceText(m) if matches!(m.content.first(), Some(InlineNode::PlainText(p)) if p.content == "code"))
416        );
417    }
418
419    #[test]
420    fn test_superscript_pattern() {
421        let parsed = parse_text_for_quotes("E=mc^2^");
422        let nodes = parsed.inlines();
423        assert_eq!(nodes.len(), 2);
424        assert!(
425            matches!(&nodes[1], InlineNode::SuperscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
426        );
427    }
428
429    #[test]
430    fn test_subscript_pattern() {
431        let parsed = parse_text_for_quotes("H~2~O");
432        let nodes = parsed.inlines();
433        assert_eq!(nodes.len(), 3);
434        assert!(
435            matches!(&nodes[1], InlineNode::SubscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
436        );
437    }
438
439    #[test]
440    fn test_highlight_pattern() {
441        let parsed = parse_text_for_quotes("This is #highlighted# text.");
442        let nodes = parsed.inlines();
443        assert_eq!(nodes.len(), 3);
444        assert!(
445            matches!(&nodes[1], InlineNode::HighlightText(h) if matches!(h.content.first(), Some(InlineNode::PlainText(p)) if p.content == "highlighted"))
446        );
447    }
448
449    #[test]
450    fn test_escaped_superscript_not_parsed() {
451        // Backslash-escaped markers should not be parsed as formatting
452        let parsed = parse_text_for_quotes(r"E=mc\^2^");
453        let nodes = parsed.inlines();
454        // Should remain as plain text (escape prevents parsing)
455        assert!(
456            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
457            "Escaped superscript should not be parsed"
458        );
459    }
460
461    #[test]
462    fn test_escaped_subscript_not_parsed() {
463        let parsed = parse_text_for_quotes(r"H\~2~O");
464        let nodes = parsed.inlines();
465        assert!(
466            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
467            "Escaped subscript should not be parsed"
468        );
469    }
470
471    #[test]
472    fn test_multiple_formats_in_sequence() {
473        let parsed = parse_text_for_quotes("*bold* and _italic_ and `code`");
474        let nodes = parsed.inlines();
475        assert!(nodes.iter().any(|n| matches!(n, InlineNode::BoldText(_))));
476        assert!(nodes.iter().any(|n| matches!(n, InlineNode::ItalicText(_))));
477        assert!(
478            nodes
479                .iter()
480                .any(|n| matches!(n, InlineNode::MonospaceText(_)))
481        );
482    }
483
484    #[test]
485    fn test_plain_text_only() {
486        let parsed = parse_text_for_quotes("Just plain text here.");
487        let nodes = parsed.inlines();
488        assert_eq!(nodes.len(), 1);
489        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
490    }
491
492    #[test]
493    fn test_empty_input() {
494        let parsed = parse_text_for_quotes("");
495        let nodes = parsed.inlines();
496        assert!(nodes.is_empty());
497    }
498}