Skip to main content

acdc_parser/grammar/
passthrough_processing.rs

1use crate::{InlineNode, Location, Pass, Plain, ProcessedContent, Raw, Substitution};
2
3use super::{
4    ParserState,
5    location_mapping::{clamp_inline_node_locations, remap_inline_node_location},
6    markup_patterns::{
7        MarkupMatch, find_constrained_bold_pattern, find_curved_apostrophe_pattern,
8        find_curved_quotation_pattern, find_highlight_constrained_pattern,
9        find_highlight_unconstrained_pattern, find_italic_pattern,
10        find_monospace_constrained_pattern, find_monospace_unconstrained_pattern,
11        find_subscript_pattern, find_superscript_pattern, find_unconstrained_bold_pattern,
12        find_unconstrained_italic_pattern,
13    },
14};
15use crate::{
16    Bold, CurvedApostrophe, CurvedQuotation, Form, Highlight, Italic, Monospace, Subscript,
17    Superscript,
18};
19
20/// Markup type for passthrough inline content parsing.
21#[derive(Debug, Clone, Copy)]
22enum MarkupType {
23    UnconstrainedBold,
24    UnconstrainedItalic,
25    ConstrainedBold,
26    ConstrainedItalic,
27    Superscript,
28    Subscript,
29    CurvedQuotation,
30    CurvedApostrophe,
31    UnconstrainedMonospace,
32    ConstrainedMonospace,
33    UnconstrainedHighlight,
34    ConstrainedHighlight,
35}
36
37impl MarkupType {
38    /// Returns the delimiter length for this markup type.
39    const fn delimiter_len(self) -> usize {
40        match self {
41            Self::UnconstrainedBold
42            | Self::UnconstrainedItalic
43            | Self::CurvedQuotation
44            | Self::CurvedApostrophe
45            | Self::UnconstrainedMonospace
46            | Self::UnconstrainedHighlight => 2,
47            Self::ConstrainedBold
48            | Self::ConstrainedItalic
49            | Self::Superscript
50            | Self::Subscript
51            | Self::ConstrainedMonospace
52            | Self::ConstrainedHighlight => 1,
53        }
54    }
55
56    /// Returns the Form for this markup type.
57    const fn form(self) -> Form {
58        match self {
59            Self::UnconstrainedBold
60            | Self::UnconstrainedItalic
61            | Self::Superscript
62            | Self::Subscript
63            | Self::CurvedQuotation
64            | Self::CurvedApostrophe
65            | Self::UnconstrainedMonospace
66            | Self::UnconstrainedHighlight => Form::Unconstrained,
67            Self::ConstrainedBold
68            | Self::ConstrainedItalic
69            | Self::ConstrainedMonospace
70            | Self::ConstrainedHighlight => Form::Constrained,
71        }
72    }
73
74    /// Whether this pattern uses <= priority (curved quotes take precedence at same position).
75    const fn uses_lte_priority(self) -> bool {
76        matches!(self, Self::CurvedQuotation | Self::CurvedApostrophe)
77    }
78
79    /// Find this pattern in the input.
80    fn find(self, input: &str) -> Option<MarkupMatch> {
81        match self {
82            Self::UnconstrainedBold => find_unconstrained_bold_pattern(input),
83            Self::UnconstrainedItalic => find_unconstrained_italic_pattern(input),
84            Self::ConstrainedBold => find_constrained_bold_pattern(input),
85            Self::ConstrainedItalic => find_italic_pattern(input),
86            Self::Superscript => find_superscript_pattern(input),
87            Self::Subscript => find_subscript_pattern(input),
88            Self::CurvedQuotation => find_curved_quotation_pattern(input),
89            Self::CurvedApostrophe => find_curved_apostrophe_pattern(input),
90            Self::UnconstrainedMonospace => find_monospace_unconstrained_pattern(input),
91            Self::ConstrainedMonospace => find_monospace_constrained_pattern(input),
92            Self::UnconstrainedHighlight => find_highlight_unconstrained_pattern(input),
93            Self::ConstrainedHighlight => find_highlight_constrained_pattern(input),
94        }
95    }
96
97    /// Create an `InlineNode` for this markup type.
98    fn create_node(self, inner_content: InlineNode, outer_location: Location) -> InlineNode {
99        let form = self.form();
100        match self {
101            Self::UnconstrainedBold | Self::ConstrainedBold => InlineNode::BoldText(Bold {
102                content: vec![inner_content],
103                form,
104                role: None,
105                id: None,
106                location: outer_location,
107            }),
108            Self::UnconstrainedItalic | Self::ConstrainedItalic => InlineNode::ItalicText(Italic {
109                content: vec![inner_content],
110                form,
111                role: None,
112                id: None,
113                location: outer_location,
114            }),
115            Self::Superscript => InlineNode::SuperscriptText(Superscript {
116                content: vec![inner_content],
117                form,
118                role: None,
119                id: None,
120                location: outer_location,
121            }),
122            Self::Subscript => InlineNode::SubscriptText(Subscript {
123                content: vec![inner_content],
124                form,
125                role: None,
126                id: None,
127                location: outer_location,
128            }),
129            Self::CurvedQuotation => InlineNode::CurvedQuotationText(CurvedQuotation {
130                content: vec![inner_content],
131                form,
132                role: None,
133                id: None,
134                location: outer_location,
135            }),
136            Self::CurvedApostrophe => InlineNode::CurvedApostropheText(CurvedApostrophe {
137                content: vec![inner_content],
138                form,
139                role: None,
140                id: None,
141                location: outer_location,
142            }),
143            Self::UnconstrainedMonospace | Self::ConstrainedMonospace => {
144                InlineNode::MonospaceText(Monospace {
145                    content: vec![inner_content],
146                    form,
147                    role: None,
148                    id: None,
149                    location: outer_location,
150                })
151            }
152            Self::UnconstrainedHighlight | Self::ConstrainedHighlight => {
153                InlineNode::HighlightText(Highlight {
154                    content: vec![inner_content],
155                    form,
156                    role: None,
157                    id: None,
158                    location: outer_location,
159                })
160            }
161        }
162    }
163}
164
165/// All markup types to check, in priority order.
166const MARKUP_TYPES: &[MarkupType] = &[
167    MarkupType::UnconstrainedBold,
168    MarkupType::UnconstrainedItalic,
169    MarkupType::ConstrainedBold,
170    MarkupType::ConstrainedItalic,
171    MarkupType::Superscript,
172    MarkupType::Subscript,
173    // Curved quotes checked before monospace since they start with backticks
174    MarkupType::CurvedQuotation,
175    MarkupType::CurvedApostrophe,
176    MarkupType::UnconstrainedMonospace,
177    MarkupType::ConstrainedMonospace,
178    MarkupType::UnconstrainedHighlight,
179    MarkupType::ConstrainedHighlight,
180];
181
182/// Process passthrough content that contains quote substitutions, parsing nested markup
183pub(crate) fn process_passthrough_with_quotes(
184    content: &str,
185    passthrough: &Pass,
186) -> Vec<InlineNode> {
187    let has_special_chars = passthrough
188        .substitutions
189        .contains(&Substitution::SpecialChars);
190    let has_quotes = passthrough.substitutions.contains(&Substitution::Quotes);
191
192    // If no quotes processing needed
193    if !has_quotes {
194        // If SpecialChars substitution is enabled, escape HTML (return PlainText)
195        // This applies to: +text+ (Single), ++text++ (Double), pass:c[] (Macro with SpecialChars)
196        // Otherwise output raw HTML (return RawText)
197        // This applies to: +++text+++ (Triple), pass:[] (Macro without SpecialChars)
198        return if has_special_chars {
199            vec![InlineNode::PlainText(Plain {
200                content: content.to_string(),
201                location: passthrough.location.clone(),
202                escaped: false,
203            })]
204        } else {
205            vec![InlineNode::RawText(Raw {
206                content: content.to_string(),
207                location: passthrough.location.clone(),
208            })]
209        };
210    }
211
212    tracing::debug!(content = ?content, "Parsing passthrough content with quotes");
213
214    // Manual parsing for bold and italic patterns in passthrough content
215    // This is a simpler approach than trying to use the full PEG parser
216    parse_text_for_quotes(content)
217}
218
219/// Parse text for inline formatting markup (bold, italic, monospace, etc.).
220///
221/// This function scans the input text for `AsciiDoc` formatting patterns and returns
222/// a vector of `InlineNode`s representing the parsed content. Used for applying
223/// "quotes" substitution to verbatim block content.
224///
225/// # Supported Patterns
226///
227/// - `*bold*` and `**bold**` (constrained/unconstrained)
228/// - `_italic_` and `__italic__`
229/// - `` `monospace` `` and ``` ``monospace`` ```
230/// - `^superscript^` and `~subscript~`
231/// - `#highlight#` and `##highlight##`
232/// - `` "`curved quotes`" `` and `` '`curved apostrophe`' ``
233///
234/// # Example
235///
236/// ```
237/// use acdc_parser::parse_text_for_quotes;
238///
239/// let nodes = parse_text_for_quotes("This has *bold* text.");
240/// assert_eq!(nodes.len(), 3); // "This has ", Bold("bold"), " text."
241/// ```
242#[must_use]
243pub fn parse_text_for_quotes(content: &str) -> Vec<InlineNode> {
244    let mut result = Vec::new();
245    let mut remaining = content;
246    let mut current_offset = 0;
247
248    while !remaining.is_empty() {
249        // Find the earliest pattern in the remaining text
250        let earliest = find_earliest_pattern(remaining);
251
252        if let Some((markup_match, markup_type)) = earliest {
253            // Add any content before the markup as plain text
254            if markup_match.start > 0 {
255                let before_content = &remaining[..markup_match.start];
256                result.push(InlineNode::PlainText(Plain {
257                    content: before_content.to_string(),
258                    location: create_relative_location(
259                        current_offset,
260                        current_offset + before_content.len(),
261                    ),
262                    escaped: false,
263                }));
264                current_offset += before_content.len();
265            }
266
267            // Create inner content location
268            let delim_len = markup_type.delimiter_len();
269            let inner_location = create_relative_location(
270                current_offset + delim_len,
271                current_offset + delim_len + markup_match.content.len(),
272            );
273            let inner_content = InlineNode::PlainText(Plain {
274                content: markup_match.content.clone(),
275                location: inner_location,
276                escaped: false,
277            });
278
279            // Create outer location
280            let outer_location = create_relative_location(
281                current_offset,
282                current_offset + markup_match.end - markup_match.start,
283            );
284
285            // Create the appropriate node
286            result.push(markup_type.create_node(inner_content, outer_location));
287
288            // Move past the markup pattern
289            remaining = &remaining[markup_match.end..];
290            current_offset += markup_match.end - markup_match.start;
291        } else {
292            // No patterns found, add remaining content as plain text and exit
293            if !remaining.is_empty() {
294                if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
295                    // Merge with the last plain text node
296                    last_plain.content.push_str(remaining);
297                    last_plain.location.absolute_end = current_offset + remaining.len();
298                    last_plain.location.end.column = current_offset + remaining.len() + 1;
299                } else {
300                    result.push(InlineNode::PlainText(Plain {
301                        content: remaining.to_string(),
302                        location: create_relative_location(
303                            current_offset,
304                            current_offset + remaining.len(),
305                        ),
306                        escaped: false,
307                    }));
308                }
309            }
310            break;
311        }
312    }
313
314    result
315}
316
317/// Find the earliest matching pattern in the input.
318fn find_earliest_pattern(input: &str) -> Option<(MarkupMatch, MarkupType)> {
319    let mut earliest: Option<(MarkupMatch, MarkupType)> = None;
320
321    for &markup_type in MARKUP_TYPES {
322        if let Some(markup_match) = markup_type.find(input) {
323            let dominated = earliest.as_ref().is_some_and(|(e, _)| {
324                if markup_type.uses_lte_priority() {
325                    markup_match.start > e.start
326                } else {
327                    markup_match.start >= e.start
328                }
329            });
330
331            if !dominated {
332                earliest = Some((markup_match, markup_type));
333            }
334        }
335    }
336
337    earliest
338}
339
340/// Create a location for relative positions within passthrough content.
341/// These positions will be remapped later during final location mapping.
342fn create_relative_location(start: usize, end: usize) -> Location {
343    Location {
344        absolute_start: start,
345        absolute_end: end,
346        start: crate::Position {
347            line: 1,
348            column: start + 1,
349        },
350        end: crate::Position {
351            line: 1,
352            column: end + 1,
353        },
354    }
355}
356
357/// Process passthrough placeholders in content, returning expanded `InlineNode`s.
358///
359/// This function handles the multi-pass parsing needed for passthroughs with quote substitutions.
360/// It splits the content around placeholders and processes each passthrough according to its
361/// substitution settings.
362pub(crate) fn process_passthrough_placeholders(
363    content: &str,
364    processed: &ProcessedContent,
365    state: &ParserState,
366    base_location: &Location,
367) -> Vec<InlineNode> {
368    let mut result = Vec::new();
369    let mut remaining = content;
370    let mut processed_offset = 0; // Position in the processed content (with placeholders)
371
372    // Process each passthrough placeholder in order
373    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
374        let placeholder = format!("���{index}���");
375
376        if let Some(placeholder_pos) = remaining.find(&placeholder) {
377            let before_content = if placeholder_pos > 0 {
378                Some(&remaining[..placeholder_pos])
379            } else {
380                None
381            };
382
383            // Add content before the placeholder if any, using original string positions
384            if let Some(before) = before_content
385                && !before.is_empty()
386            {
387                result.push(InlineNode::PlainText(Plain {
388                    content: before.to_string(),
389                    location: Location {
390                        // Use original string positions
391                        absolute_start: base_location.absolute_start + processed_offset,
392                        absolute_end: base_location.absolute_start
393                            + processed_offset
394                            + before.len(),
395                        start: crate::Position {
396                            line: base_location.start.line,
397                            column: base_location.start.column + processed_offset,
398                        },
399                        end: crate::Position {
400                            line: base_location.start.line,
401                            column: base_location.start.column + processed_offset + before.len(),
402                        },
403                    },
404                    escaped: false,
405                }));
406                processed_offset += before.len();
407            }
408
409            // Process the passthrough content using original string positions from passthrough.location
410            if let Some(passthrough_content) = &passthrough.text {
411                let processed_nodes =
412                    process_passthrough_with_quotes(passthrough_content, passthrough);
413
414                // Remap locations of processed nodes to use original string coordinates
415                // The passthrough content starts after "pass:q[" so we need to account for that offset
416                let macro_prefix_len = "pass:q[".len(); // 7 characters
417                for mut node in processed_nodes {
418                    remap_inline_node_location(
419                        &mut node,
420                        passthrough.location.absolute_start + macro_prefix_len,
421                    );
422                    result.push(node);
423                }
424            }
425
426            // Move past the placeholder in the processed content
427            let skip_len = placeholder_pos + placeholder.len();
428            remaining = &remaining[skip_len..];
429            // Update processed_offset to account for the original passthrough macro length
430            processed_offset +=
431                passthrough.location.absolute_end - passthrough.location.absolute_start;
432        }
433    }
434
435    // Add any remaining content as plain text
436    if !remaining.is_empty() {
437        // Check if the last node is PlainText and merge if so
438        if let Some(InlineNode::PlainText(last_plain)) = result.last_mut() {
439            // Merge remaining content with the last plain text node
440            last_plain.content.push_str(remaining);
441            // Extend the location to include the remaining content
442            last_plain.location.absolute_end = base_location.absolute_end;
443            last_plain.location.end = base_location.end.clone();
444        } else {
445            // Add as separate node if last node is not plain text
446            result.push(InlineNode::PlainText(Plain {
447                content: remaining.to_string(),
448                location: Location {
449                    absolute_start: base_location.absolute_start + processed_offset,
450                    absolute_end: base_location.absolute_end,
451                    start: crate::Position {
452                        line: base_location.start.line,
453                        column: base_location.start.column + processed_offset,
454                    },
455                    end: base_location.end.clone(),
456                },
457                escaped: false,
458            }));
459        }
460    }
461
462    // If no placeholders were found, return the original content as plain text
463    if result.is_empty() {
464        result.push(InlineNode::PlainText(Plain {
465            content: content.to_string(),
466            location: base_location.clone(),
467            escaped: false,
468        }));
469    }
470
471    // Clamp all locations to valid bounds within the input string
472    for node in &mut result {
473        clamp_inline_node_locations(node, &state.input);
474    }
475
476    // Merge adjacent plain text nodes
477    merge_adjacent_plain_text_nodes(result)
478}
479
480/// Merge adjacent plain text nodes into single nodes to simplify the output
481pub(crate) fn merge_adjacent_plain_text_nodes(nodes: Vec<InlineNode>) -> Vec<InlineNode> {
482    let mut result = Vec::new();
483
484    for node in nodes {
485        match (result.last_mut(), node) {
486            (Some(InlineNode::PlainText(last_plain)), InlineNode::PlainText(current_plain)) => {
487                // Merge current plain text with the last one
488                last_plain.content.push_str(&current_plain.content);
489                // Extend the location to cover both nodes
490                last_plain.location.absolute_end = current_plain.location.absolute_end;
491                last_plain.location.end = current_plain.location.end;
492            }
493            (_, node) => {
494                // Not adjacent plain text nodes, add as separate node
495                result.push(node);
496            }
497        }
498    }
499
500    result
501}
502
503pub(crate) fn replace_passthrough_placeholders(
504    content: &str,
505    processed: &ProcessedContent,
506) -> String {
507    let mut result = content.to_string();
508
509    // Replace each passthrough placeholder with its content
510    for (index, passthrough) in processed.passthroughs.iter().enumerate() {
511        let placeholder = format!("���{index}���");
512        if let Some(text) = &passthrough.text {
513            result = result.replace(&placeholder, text);
514        }
515    }
516
517    result
518}
519
520#[cfg(test)]
521#[allow(clippy::indexing_slicing)] // Tests verify length before indexing
522mod tests {
523    use super::*;
524
525    // === Divergence Prevention Tests ===
526    //
527    // These tests verify that parse_text_for_quotes produces the same structural
528    // output as the main PEG parser for common inline formatting patterns.
529    // If these tests fail after grammar changes, update parse_text_for_quotes.
530
531    #[test]
532    fn test_constrained_bold_pattern() {
533        let nodes = parse_text_for_quotes("This is *bold* text.");
534        assert_eq!(nodes.len(), 3);
535        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
536        assert!(
537            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
538        );
539        assert!(matches!(nodes[2], InlineNode::PlainText(_)));
540    }
541
542    #[test]
543    fn test_unconstrained_bold_pattern() {
544        let nodes = parse_text_for_quotes("This**bold**word");
545        assert_eq!(nodes.len(), 3);
546        assert!(
547            matches!(&nodes[1], InlineNode::BoldText(b) if matches!(b.content.first(), Some(InlineNode::PlainText(p)) if p.content == "bold"))
548        );
549    }
550
551    #[test]
552    fn test_constrained_italic_pattern() {
553        let nodes = parse_text_for_quotes("This is _italic_ text.");
554        assert_eq!(nodes.len(), 3);
555        assert!(
556            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
557        );
558    }
559
560    #[test]
561    fn test_unconstrained_italic_pattern() {
562        let nodes = parse_text_for_quotes("This__italic__word");
563        assert_eq!(nodes.len(), 3);
564        assert!(
565            matches!(&nodes[1], InlineNode::ItalicText(i) if matches!(i.content.first(), Some(InlineNode::PlainText(p)) if p.content == "italic"))
566        );
567    }
568
569    #[test]
570    fn test_constrained_monospace_pattern() {
571        let nodes = parse_text_for_quotes("Use `code` here.");
572        assert_eq!(nodes.len(), 3);
573        assert!(
574            matches!(&nodes[1], InlineNode::MonospaceText(m) if matches!(m.content.first(), Some(InlineNode::PlainText(p)) if p.content == "code"))
575        );
576    }
577
578    #[test]
579    fn test_superscript_pattern() {
580        let nodes = parse_text_for_quotes("E=mc^2^");
581        assert_eq!(nodes.len(), 2);
582        assert!(
583            matches!(&nodes[1], InlineNode::SuperscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
584        );
585    }
586
587    #[test]
588    fn test_subscript_pattern() {
589        let nodes = parse_text_for_quotes("H~2~O");
590        assert_eq!(nodes.len(), 3);
591        assert!(
592            matches!(&nodes[1], InlineNode::SubscriptText(s) if matches!(s.content.first(), Some(InlineNode::PlainText(p)) if p.content == "2"))
593        );
594    }
595
596    #[test]
597    fn test_highlight_pattern() {
598        let nodes = parse_text_for_quotes("This is #highlighted# text.");
599        assert_eq!(nodes.len(), 3);
600        assert!(
601            matches!(&nodes[1], InlineNode::HighlightText(h) if matches!(h.content.first(), Some(InlineNode::PlainText(p)) if p.content == "highlighted"))
602        );
603    }
604
605    #[test]
606    fn test_escaped_superscript_not_parsed() {
607        // Backslash-escaped markers should not be parsed as formatting
608        let nodes = parse_text_for_quotes(r"E=mc\^2^");
609        // Should remain as plain text (escape prevents parsing)
610        assert!(
611            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
612            "Escaped superscript should not be parsed"
613        );
614    }
615
616    #[test]
617    fn test_escaped_subscript_not_parsed() {
618        let nodes = parse_text_for_quotes(r"H\~2~O");
619        assert!(
620            nodes.iter().all(|n| matches!(n, InlineNode::PlainText(_))),
621            "Escaped subscript should not be parsed"
622        );
623    }
624
625    #[test]
626    fn test_multiple_formats_in_sequence() {
627        let nodes = parse_text_for_quotes("*bold* and _italic_ and `code`");
628        assert!(nodes.iter().any(|n| matches!(n, InlineNode::BoldText(_))));
629        assert!(nodes.iter().any(|n| matches!(n, InlineNode::ItalicText(_))));
630        assert!(
631            nodes
632                .iter()
633                .any(|n| matches!(n, InlineNode::MonospaceText(_)))
634        );
635    }
636
637    #[test]
638    fn test_plain_text_only() {
639        let nodes = parse_text_for_quotes("Just plain text here.");
640        assert_eq!(nodes.len(), 1);
641        assert!(matches!(nodes[0], InlineNode::PlainText(_)));
642    }
643
644    #[test]
645    fn test_empty_input() {
646        let nodes = parse_text_for_quotes("");
647        assert!(nodes.is_empty());
648    }
649}