Skip to main content

acdc_parser/grammar/
passthrough_processing.rs

1use crate::{InlineNode, Location, Pass, Plain, ProcessedContent, Raw, Substitution};
2
3use super::{
4    ParserState,
5    location_mapping::{clamp_inline_node_locations, remap_inline_node_location},
6    markup_patterns::{
7        MarkupMatch, find_constrained_bold_pattern, find_curved_apostrophe_pattern,
8        find_curved_quotation_pattern, find_highlight_constrained_pattern,
9        find_highlight_unconstrained_pattern, find_italic_pattern,
10        find_monospace_constrained_pattern, find_monospace_unconstrained_pattern,
11        find_subscript_pattern, find_superscript_pattern, find_unconstrained_bold_pattern,
12        find_unconstrained_italic_pattern,
13    },
14};
15use crate::{
16    Bold, CurvedApostrophe, CurvedQuotation, Form, Highlight, Italic, Monospace, Subscript,
17    Superscript,
18};
19
20/// Markup type for passthrough inline content parsing.
21#[derive(Debug, Clone, Copy)]
22enum MarkupType {
23    UnconstrainedBold,
24    UnconstrainedItalic,
25    ConstrainedBold,
26    ConstrainedItalic,
27    Superscript,
28    Subscript,
29    CurvedQuotation,
30    CurvedApostrophe,
31    UnconstrainedMonospace,
32    ConstrainedMonospace,
33    UnconstrainedHighlight,
34    ConstrainedHighlight,
35}
36
37impl MarkupType {
38    /// Returns the delimiter length for this markup type.
39    const fn delimiter_len(self) -> usize {
40        match self {
41            Self::UnconstrainedBold
42            | Self::UnconstrainedItalic
43            | Self::CurvedQuotation
44            | Self::CurvedApostrophe
45            | Self::UnconstrainedMonospace
46            | Self::UnconstrainedHighlight => 2,
47            Self::ConstrainedBold
48            | Self::ConstrainedItalic
49            | Self::Superscript
50            | Self::Subscript
51            | Self::ConstrainedMonospace
52            | Self::ConstrainedHighlight => 1,
53        }
54    }
55
56    /// Returns the Form for this markup type.
57    const fn form(self) -> Form {
58        match self {
59            Self::UnconstrainedBold
60            | Self::UnconstrainedItalic
61            | Self::Superscript
62            | Self::Subscript
63            | Self::CurvedQuotation
64            | Self::CurvedApostrophe
65            | Self::UnconstrainedMonospace
66            | Self::UnconstrainedHighlight => Form::Unconstrained,
67            Self::ConstrainedBold
68            | Self::ConstrainedItalic
69            | Self::ConstrainedMonospace
70            | Self::ConstrainedHighlight => Form::Constrained,
71        }
72    }
73
74    /// Whether this pattern uses <= priority (curved quotes take precedence at same position).
75    const fn uses_lte_priority(self) -> bool {
76        matches!(self, Self::CurvedQuotation | Self::CurvedApostrophe)
77    }
78
79    /// Find this pattern in the input.
80    fn find(self, input: &str) -> Option<MarkupMatch> {
81        match self {
82            Self::UnconstrainedBold => find_unconstrained_bold_pattern(input),
83            Self::UnconstrainedItalic => find_unconstrained_italic_pattern(input),
84            Self::ConstrainedBold => find_constrained_bold_pattern(input),
85            Self::ConstrainedItalic => find_italic_pattern(input),
86            Self::Superscript => find_superscript_pattern(input),
87            Self::Subscript => find_subscript_pattern(input),
88            Self::CurvedQuotation => find_curved_quotation_pattern(input),
89            Self::CurvedApostrophe => find_curved_apostrophe_pattern(input),
90            Self::UnconstrainedMonospace => find_monospace_unconstrained_pattern(input),
91            Self::ConstrainedMonospace => find_monospace_constrained_pattern(input),
92            Self::UnconstrainedHighlight => find_highlight_unconstrained_pattern(input),
93            Self::ConstrainedHighlight => find_highlight_constrained_pattern(input),
94        }
95    }
96
97    /// Create an `InlineNode` for this markup type.
98    fn create_node(self, inner_content: InlineNode, outer_location: Location) -> InlineNode {
99        let form = self.form();
100        match self {
101            Self::UnconstrainedBold | Self::ConstrainedBold => InlineNode::BoldText(Bold {
102                content: vec![inner_content],
103                form,
104                role: None,
105                id: None,
106                location: outer_location,
107            }),
108            Self::UnconstrainedItalic | Self::ConstrainedItalic => InlineNode::ItalicText(Italic {
109                content: vec![inner_content],
110                form,
111                role: None,
112                id: None,
113                location: outer_location,
114            }),
115            Self::Superscript => InlineNode::SuperscriptText(Superscript {
116                content: vec![inner_content],
117                form,
118                role: None,
119                id: None,
120                location: outer_location,
121            }),
122            Self::Subscript => InlineNode::SubscriptText(Subscript {
123                content: vec![inner_content],
124                form,
125                role: None,
126                id: None,
127                location: outer_location,
128            }),
129            Self::CurvedQuotation => InlineNode::CurvedQuotationText(CurvedQuotation {
130                content: vec![inner_content],
131                form,
132                role: None,
133                id: None,
134                location: outer_location,
135            }),
136            Self::CurvedApostrophe => InlineNode::CurvedApostropheText(CurvedApostrophe {
137                content: vec![inner_content],
138                form,
139                role: None,
140                id: None,
141                location: outer_location,
142            }),
143            Self::UnconstrainedMonospace | Self::ConstrainedMonospace => {
144                InlineNode::MonospaceText(Monospace {
145                    content: vec![inner_content],
146                    form,
147                    role: None,
148                    id: None,
149                    location: outer_location,
150                })
151            }
152            Self::UnconstrainedHighlight | Self::ConstrainedHighlight => {
153                InlineNode::HighlightText(Highlight {
154                    content: vec![inner_content],
155                    form,
156                    role: None,
157                    id: None,
158                    location: outer_location,
159                })
160            }
161        }
162    }
163}
164
165/// All markup types to check, in priority order.
166const MARKUP_TYPES: &[MarkupType] = &[
167    MarkupType::UnconstrainedBold,
168    MarkupType::UnconstrainedItalic,
169    MarkupType::ConstrainedBold,
170    MarkupType::ConstrainedItalic,
171    MarkupType::Superscript,
172    MarkupType::Subscript,
173    // Curved quotes checked before monospace since they start with backticks
174    MarkupType::CurvedQuotation,
175    MarkupType::CurvedApostrophe,
176    MarkupType::UnconstrainedMonospace,
177    MarkupType::ConstrainedMonospace,
178    MarkupType::UnconstrainedHighlight,
179    MarkupType::ConstrainedHighlight,
180];
181
182/// Process passthrough content that contains quote substitutions, parsing nested markup
183pub(crate) fn process_passthrough_with_quotes(
184    content: &str,
185    passthrough: &Pass,
186) -> Vec<InlineNode> {
187    let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
188
189    // If no quotes processing needed
190    if !has_quotes {
191        // If SpecialChars substitution is enabled, escape HTML (return PlainText)
192        // This applies to: +text+ (Single), ++text++ (Double), pass:c[] (Macro with SpecialChars)
193        // Otherwise output raw HTML (return RawText)
194        // This applies to: +++text+++ (Triple), pass:[] (Macro without SpecialChars)
195        // Use RawText for all passthroughs without Quotes to avoid merging with
196        // adjacent PlainText nodes (which would lose the passthrough's substitution info).
197        // Carry the passthrough's own subs (minus Quotes, already handled) so the
198        // converter applies exactly those instead of the block's subs.
199        return vec![InlineNode::RawText(Raw {
200            content: content.to_string(),
201            location: passthrough.location.clone(),
202            subs: passthrough
203                .substitutions
204                .iter()
205                .filter(|s| **s != Substitution::Quotes)
206                .cloned()
207                .collect(),
208        })];
209    }
210
211    tracing::debug!(content = ?content, "Parsing passthrough content with quotes");
212
213    // Manual parsing for bold and italic patterns in passthrough content
214    // This is a simpler approach than trying to use the full PEG parser
215    parse_text_for_quotes(content)
216}
217
218/// Parse text for inline formatting markup (bold, italic, monospace, etc.).
219///
220/// This function scans the input text for `AsciiDoc` formatting patterns and returns
221/// a vector of `InlineNode`s representing the parsed content. Used for applying
222/// "quotes" substitution to verbatim block content.
223///
224/// # Supported Patterns
225///
226/// - `*bold*` and `**bold**` (constrained/unconstrained)
227/// - `_italic_` and `__italic__`
228/// - `` `monospace` `` and ``` ``monospace`` ```
229/// - `^superscript^` and `~subscript~`
230/// - `#highlight#` and `##highlight##`
231/// - `` "`curved quotes`" `` and `` '`curved apostrophe`' ``
232///
233/// # Example
234///
235/// ```
236/// use acdc_parser::parse_text_for_quotes;
237///
238/// let nodes = parse_text_for_quotes("This has *bold* text.");
239/// assert_eq!(nodes.len(), 3); // "This has ", Bold("bold"), " text."
240/// ```
241#[must_use]
242pub fn parse_text_for_quotes(content: &str) -> Vec<InlineNode> {
243    let mut result = Vec::new();
244    let mut remaining = content;
245    let mut current_offset = 0;
246
247    while !remaining.is_empty() {
248        // Find the earliest pattern in the remaining text
249        let earliest = find_earliest_pattern(remaining);
250
251        if let Some((markup_match, markup_type)) = earliest {
252            // Add any content before the markup as plain text
253            if markup_match.start > 0 {
254                let before_content = &remaining[..markup_match.start];
255                result.push(InlineNode::PlainText(Plain {
256                    content: before_content.to_string(),
257                    location: create_relative_location(
258                        current_offset,
259                        current_offset + before_content.len(),
260                    ),
261                    escaped: false,
262                }));
263                current_offset += before_content.len();
264            }
265
266            // Create inner content location
267            let delim_len = markup_type.delimiter_len();
268            let inner_location = create_relative_location(
269                current_offset + delim_len,
270                current_offset + delim_len + markup_match.content.len(),
271            );
272            let inner_content = InlineNode::PlainText(Plain {
273                content: markup_match.content.clone(),
274                location: inner_location,
275                escaped: false,
276            });
277
278            // Create outer location
279            let outer_location = create_relative_location(
280                current_offset,
281                current_offset + markup_match.end - markup_match.start,
282            );
283
284            // Create the appropriate node
285            result.push(markup_type.create_node(inner_content, outer_location));
286
287            // Move past the markup pattern
288            remaining = &remaining[markup_match.end..];
289            current_offset += markup_match.end - markup_match.start;
290        } else {
291            // No patterns found, add remaining content as plain text and exit
292            if !remaining.is_empty() {
293                if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
294                    // Merge with the last plain text node
295                    last_plain.content.push_str(remaining);
296                    last_plain.location.absolute_end = current_offset + remaining.len();
297                    last_plain.location.end.column = current_offset + remaining.len() + 1;
298                } else {
299                    result.push(InlineNode::PlainText(Plain {
300                        content: remaining.to_string(),
301                        location: create_relative_location(
302                            current_offset,
303                            current_offset + remaining.len(),
304                        ),
305                        escaped: false,
306                    }));
307                }
308            }
309            break;
310        }
311    }
312
313    result
314}
315
316/// Find the earliest matching pattern in the input.
317fn find_earliest_pattern(input: &str) -> Option<(MarkupMatch, MarkupType)> {
318    let mut earliest: Option<(MarkupMatch, MarkupType)> = None;
319
320    for &markup_type in MARKUP_TYPES {
321        if let Some(markup_match) = markup_type.find(input) {
322            let dominated = earliest.as_ref().is_some_and(|(e, _)| {
323                if markup_type.uses_lte_priority() {
324                    markup_match.start > e.start
325                } else {
326                    markup_match.start >= e.start
327                }
328            });
329
330            if !dominated {
331                earliest = Some((markup_match, markup_type));
332            }
333        }
334    }
335
336    earliest
337}
338
339/// Create a location for relative positions within passthrough content.
340/// These positions will be remapped later during final location mapping.
341fn create_relative_location(start: usize, end: usize) -> Location {
342    Location {
343        absolute_start: start,
344        absolute_end: end,
345        start: crate::Position {
346            line: 1,
347            column: start + 1,
348        },
349        end: crate::Position {
350            line: 1,
351            column: end + 1,
352        },
353    }
354}
355
356/// Process passthrough placeholders in content, returning expanded `InlineNode`s.
357///
358/// This function handles the multi-pass parsing needed for passthroughs with quote substitutions.
359/// It splits the content around placeholders and processes each passthrough according to its
360/// substitution settings.
361pub(crate) fn process_passthrough_placeholders(
362    content: &str,
363    processed: &ProcessedContent,
364    state: &ParserState,
365    base_location: &Location,
366) -> Vec<InlineNode> {
367    let mut result = Vec::new();
368    let mut remaining = content;
369    let mut processed_offset = 0; // Position in the processed content (with placeholders)
370
371    // Process each passthrough placeholder in order
372    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
373        let placeholder = format!("���{index}���");
374
375        if let Some(placeholder_pos) = remaining.find(&placeholder) {
376            let before_content = if placeholder_pos > 0 {
377                Some(&remaining[..placeholder_pos])
378            } else {
379                None
380            };
381
382            // Add content before the placeholder if any, using original string positions
383            if let Some(before) = before_content
384                && !before.is_empty()
385            {
386                result.push(InlineNode::PlainText(Plain {
387                    content: before.to_string(),
388                    location: Location {
389                        // Use original string positions
390                        absolute_start: base_location.absolute_start + processed_offset,
391                        absolute_end: base_location.absolute_start
392                            + processed_offset
393                            + before.len(),
394                        start: crate::Position {
395                            line: base_location.start.line,
396                            column: base_location.start.column + processed_offset,
397                        },
398                        end: crate::Position {
399                            line: base_location.start.line,
400                            column: base_location.start.column + processed_offset + before.len(),
401                        },
402                    },
403                    escaped: false,
404                }));
405                processed_offset += before.len();
406            }
407
408            // Process the passthrough content using original string positions from passthrough.location
409            if let Some(passthrough_content) = &passthrough.text {
410                let processed_nodes =
411                    process_passthrough_with_quotes(passthrough_content, passthrough);
412
413                // Remap locations of processed nodes to use original string coordinates
414                // The passthrough content starts after "pass:q[" so we need to account for that offset
415                let macro_prefix_len = "pass:q[".len(); // 7 characters
416                for mut node in processed_nodes {
417                    remap_inline_node_location(
418                        &mut node,
419                        passthrough.location.absolute_start + macro_prefix_len,
420                    );
421                    result.push(node);
422                }
423            }
424
425            // Move past the placeholder in the processed content
426            let skip_len = placeholder_pos + placeholder.len();
427            remaining = &remaining[skip_len..];
428            // Update processed_offset to account for the original passthrough macro length
429            processed_offset +=
430                passthrough.location.absolute_end - passthrough.location.absolute_start;
431        }
432    }
433
434    // Add any remaining content as plain text
435    if !remaining.is_empty() {
436        // Check if the last node is PlainText and merge if so
437        if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
438            // Merge remaining content with the last plain text node
439            last_plain.content.push_str(remaining);
440            // Extend the location to include the remaining content
441            last_plain.location.absolute_end = base_location.absolute_end;
442            last_plain.location.end = base_location.end.clone();
443        } else {
444            // Add as separate node if last node is not plain text
445            result.push(InlineNode::PlainText(Plain {
446                content: remaining.to_string(),
447                location: Location {
448                    absolute_start: base_location.absolute_start + processed_offset,
449                    absolute_end: base_location.absolute_end,
450                    start: crate::Position {
451                        line: base_location.start.line,
452                        column: base_location.start.column + processed_offset,
453                    },
454                    end: base_location.end.clone(),
455                },
456                escaped: false,
457            }));
458        }
459    }
460
461    // If no placeholders were found, return the original content as plain text
462    if result.is_empty() {
463        result.push(InlineNode::PlainText(Plain {
464            content: content.to_string(),
465            location: base_location.clone(),
466            escaped: false,
467        }));
468    }
469
470    // Clamp all locations to valid bounds within the input string
471    for node in &mut result {
472        clamp_inline_node_locations(node, &state.input);
473    }
474
475    // Merge adjacent plain text nodes
476    merge_adjacent_plain_text_nodes(result)
477}
478
479/// Merge adjacent plain text nodes into single nodes to simplify the output
480pub(crate) fn merge_adjacent_plain_text_nodes(nodes: Vec<InlineNode>) -> Vec<InlineNode> {
481    let mut result = Vec::new();
482
483    for node in nodes {
484        match (result.last_mut(), node) {
485            (Some(InlineNode::PlainText(last_plain)), InlineNode::PlainText(current_plain)) => {
486                // Merge current plain text with the last one
487                last_plain.content.push_str(&current_plain.content);
488                // Extend the location to cover both nodes
489                last_plain.location.absolute_end = current_plain.location.absolute_end;
490                last_plain.location.end = current_plain.location.end;
491            }
492            (_, node) => {
493                // Not adjacent plain text nodes, add as separate node
494                result.push(node);
495            }
496        }
497    }
498
499    result
500}
501
502pub(crate) fn replace_passthrough_placeholders(
503    content: &str,
504    processed: &ProcessedContent,
505) -> String {
506    let mut result = content.to_string();
507
508    // Replace each passthrough placeholder with its content
509    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
510        let placeholder = format!("���{index}���");
511        if let Some(text) = &passthrough.text {
512            result = result.replace(&placeholder, text);
513        }
514    }
515
516    result
517}
518
519#[cfg(test)]
520#[allow(clippy::indexing_slicing)] // Tests verify length before indexing
521mod tests {
522    use super::*;
523
524    // === Divergence Prevention Tests ===
525    //
526    // These tests verify that parse_text_for_quotes produces the same structural
527    // output as the main PEG parser for common inline formatting patterns.
528    // If these tests fail after grammar changes, update parse_text_for_quotes.
529
530    #[test]
531    fn test_constrained_bold_pattern() {
532        let nodes = parse_text_for_quotes("This is *bold* text.");
533        assert_eq!(nodes.len(), 3);
534        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
535        assert!(
536            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
537        );
538        assert!(matches!(nodes[2], InlineNode::PlainText(_)));
539    }
540
541    #[test]
542    fn test_unconstrained_bold_pattern() {
543        let nodes = parse_text_for_quotes("This**bold**word");
544        assert_eq!(nodes.len(), 3);
545        assert!(
546            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
547        );
548    }
549
550    #[test]
551    fn test_constrained_italic_pattern() {
552        let nodes = parse_text_for_quotes("This is _italic_ text.");
553        assert_eq!(nodes.len(), 3);
554        assert!(
555            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
556        );
557    }
558
559    #[test]
560    fn test_unconstrained_italic_pattern() {
561        let nodes = parse_text_for_quotes("This__italic__word");
562        assert_eq!(nodes.len(), 3);
563        assert!(
564            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
565        );
566    }
567
568    #[test]
569    fn test_constrained_monospace_pattern() {
570        let nodes = parse_text_for_quotes("Use `code` here.");
571        assert_eq!(nodes.len(), 3);
572        assert!(
573            matches!(&nodes[1], InlineNode::MonospaceText(m) if matches!(m.content.first(), Some(InlineNode::PlainText(p)) if p.content == "code"))
574        );
575    }
576
577    #[test]
578    fn test_superscript_pattern() {
579        let nodes = parse_text_for_quotes("E=mc^2^");
580        assert_eq!(nodes.len(), 2);
581        assert!(
582            matches!(&nodes[1], InlineNode::SuperscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
583        );
584    }
585
586    #[test]
587    fn test_subscript_pattern() {
588        let nodes = parse_text_for_quotes("H~2~O");
589        assert_eq!(nodes.len(), 3);
590        assert!(
591            matches!(&nodes[1], InlineNode::SubscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
592        );
593    }
594
595    #[test]
596    fn test_highlight_pattern() {
597        let nodes = parse_text_for_quotes("This is #highlighted# text.");
598        assert_eq!(nodes.len(), 3);
599        assert!(
600            matches!(&nodes[1], InlineNode::HighlightText(h) if matches!(h.content.first(), Some(InlineNode::PlainText(p)) if p.content == "highlighted"))
601        );
602    }
603
604    #[test]
605    fn test_escaped_superscript_not_parsed() {
606        // Backslash-escaped markers should not be parsed as formatting
607        let nodes = parse_text_for_quotes(r"E=mc\^2^");
608        // Should remain as plain text (escape prevents parsing)
609        assert!(
610            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
611            "Escaped superscript should not be parsed"
612        );
613    }
614
615    #[test]
616    fn test_escaped_subscript_not_parsed() {
617        let nodes = parse_text_for_quotes(r"H\~2~O");
618        assert!(
619            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
620            "Escaped subscript should not be parsed"
621        );
622    }
623
624    #[test]
625    fn test_multiple_formats_in_sequence() {
626        let nodes = parse_text_for_quotes("*bold* and _italic_ and `code`");
627        assert!(nodes.iter().any(|n| matches!(n, InlineNode::BoldText(_))));
628        assert!(nodes.iter().any(|n| matches!(n, InlineNode::ItalicText(_))));
629        assert!(
630            nodes
631                .iter()
632                .any(|n| matches!(n, InlineNode::MonospaceText(_)))
633        );
634    }
635
636    #[test]
637    fn test_plain_text_only() {
638        let nodes = parse_text_for_quotes("Just plain text here.");
639        assert_eq!(nodes.len(), 1);
640        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
641    }
642
643    #[test]
644    fn test_empty_input() {
645        let nodes = parse_text_for_quotes("");
646        assert!(nodes.is_empty());
647    }
648}