Skip to main content

panache_parser/parser/inlines/
escapes.rs

1/// Parsing for backslash escape sequences
2///
3/// Per Pandoc spec (all_symbols_escapable extension):
4/// - Any punctuation or space preceded by backslash is treated literally
5/// - Backslash-escaped space = nonbreaking space
6/// - Backslash-escaped newline = hard line break
7/// - Does NOT work in verbatim contexts (code blocks, code spans)
8use crate::syntax::SyntaxKind;
9use rowan::GreenNodeBuilder;
10
11/// Check if a character can be escaped according to Pandoc's all_symbols_escapable
12fn is_escapable(ch: char) -> bool {
13    // Per spec: any punctuation or space character
14    ch.is_ascii_punctuation() || ch.is_whitespace()
15}
16
17/// Try to parse a backslash escape sequence starting at the current position.
18/// Returns (total_len, escaped_char, escape_type) or None if not an escape.
19pub fn try_parse_escape(text: &str) -> Option<(usize, char, EscapeType)> {
20    if !text.starts_with('\\') {
21        return None;
22    }
23
24    if text.len() < 2 {
25        // Backslash at end of input - not an escape
26        return None;
27    }
28
29    let next_char = text[1..].chars().next()?;
30
31    if !is_escapable(next_char) {
32        // Not an escapable character
33        return None;
34    }
35
36    let escape_type = match next_char {
37        ' ' => EscapeType::NonbreakingSpace,
38        '\n' => EscapeType::HardLineBreak,
39        _ => EscapeType::Literal,
40    };
41
42    let total_len = 1 + next_char.len_utf8(); // backslash + character
43    Some((total_len, next_char, escape_type))
44}
45
46#[derive(Debug, PartialEq, Eq)]
47pub enum EscapeType {
48    Literal,          // Regular escaped character like \*
49    NonbreakingSpace, // \<space>
50    HardLineBreak,    // \<newline>
51}
52
53/// Emit an escape sequence to the builder.
54pub fn emit_escape(builder: &mut GreenNodeBuilder, ch: char, escape_type: EscapeType) {
55    match escape_type {
56        EscapeType::NonbreakingSpace => {
57            // Preserve source bytes for losslessness while still tagging the
58            // semantic token kind as NONBREAKING_SPACE.
59            builder.token(SyntaxKind::NONBREAKING_SPACE.into(), "\\ ");
60        }
61        EscapeType::HardLineBreak => {
62            // Emit as a special hard line break token - include backslash for losslessness
63            builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\\\n");
64        }
65        EscapeType::Literal => {
66            // Emit the full escape sequence (backslash + character) for losslessness
67            let mut s = String::new();
68            s.push('\\');
69            s.push(ch);
70            builder.token(SyntaxKind::ESCAPED_CHAR.into(), &s);
71        }
72    }
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn test_escape_asterisk() {
81        let result = try_parse_escape(r"\*");
82        assert_eq!(result, Some((2, '*', EscapeType::Literal)));
83    }
84
85    #[test]
86    fn test_escape_backtick() {
87        let result = try_parse_escape(r"\`");
88        assert_eq!(result, Some((2, '`', EscapeType::Literal)));
89    }
90
91    #[test]
92    fn test_escape_space() {
93        let result = try_parse_escape(r"\ ");
94        assert_eq!(result, Some((2, ' ', EscapeType::NonbreakingSpace)));
95    }
96
97    #[test]
98    fn test_escape_newline() {
99        let result = try_parse_escape("\\\n");
100        assert_eq!(result, Some((2, '\n', EscapeType::HardLineBreak)));
101    }
102
103    #[test]
104    fn test_escape_bracket() {
105        let result = try_parse_escape(r"\[");
106        assert_eq!(result, Some((2, '[', EscapeType::Literal)));
107    }
108
109    #[test]
110    fn test_escape_dollar() {
111        let result = try_parse_escape(r"\$");
112        assert_eq!(result, Some((2, '$', EscapeType::Literal)));
113    }
114
115    #[test]
116    fn test_not_escape_letter() {
117        // Letters cannot be escaped in Pandoc
118        let result = try_parse_escape(r"\a");
119        assert_eq!(result, None);
120    }
121
122    #[test]
123    fn test_not_escape_at_end() {
124        let result = try_parse_escape(r"\");
125        assert_eq!(result, None);
126    }
127
128    #[test]
129    fn test_escape_all_punctuation() {
130        // Test the common Markdown punctuation
131        for ch in r#"`*_{}[]()>#+-.!"#.chars() {
132            let input = format!(r"\{}", ch);
133            let result = try_parse_escape(&input);
134            assert!(result.is_some(), "Should escape '{}'", ch);
135            assert_eq!(result.unwrap().1, ch);
136        }
137    }
138
139    #[test]
140    fn test_is_escapable() {
141        // Punctuation
142        assert!(is_escapable('*'));
143        assert!(is_escapable('`'));
144        assert!(is_escapable('['));
145        assert!(is_escapable('!'));
146
147        // Space/whitespace
148        assert!(is_escapable(' '));
149        assert!(is_escapable('\n'));
150        assert!(is_escapable('\t'));
151
152        // Not escapable
153        assert!(!is_escapable('a'));
154        assert!(!is_escapable('Z'));
155        assert!(!is_escapable('5'));
156    }
157}