Skip to main content

rustpython_ruff_python_ast/
str.rs

1use aho_corasick::{AhoCorasick, AhoCorasickKind, Anchored, Input, MatchKind, StartKind};
2use std::fmt;
3use std::sync::LazyLock;
4
5use ruff_text_size::{TextLen, TextRange};
6
7/// Enumeration of the two kinds of quotes that can be used
8/// for Python string/f/t-string/bytestring literals
9#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, is_macro::Is)]
10pub enum Quote {
11    /// E.g. `'`
12    Single,
13    /// E.g. `"`
14    #[default]
15    Double,
16}
17
18impl Quote {
19    #[inline]
20    pub const fn as_char(self) -> char {
21        match self {
22            Self::Single => '\'',
23            Self::Double => '"',
24        }
25    }
26
27    #[inline]
28    pub const fn as_str(self) -> &'static str {
29        match self {
30            Self::Single => "'",
31            Self::Double => "\"",
32        }
33    }
34
35    #[must_use]
36    #[inline]
37    pub const fn opposite(self) -> Self {
38        match self {
39            Self::Single => Self::Double,
40            Self::Double => Self::Single,
41        }
42    }
43
44    #[inline]
45    pub const fn as_byte(self) -> u8 {
46        match self {
47            Self::Single => b'\'',
48            Self::Double => b'"',
49        }
50    }
51}
52
53impl fmt::Display for Quote {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        write!(f, "{}", self.as_char())
56    }
57}
58
59impl TryFrom<char> for Quote {
60    type Error = ();
61
62    fn try_from(value: char) -> Result<Self, Self::Error> {
63        match value {
64            '\'' => Ok(Quote::Single),
65            '"' => Ok(Quote::Double),
66            _ => Err(()),
67        }
68    }
69}
70
71#[derive(Debug, Copy, Clone, PartialEq, Eq)]
72pub enum TripleQuotes {
73    Yes,
74    No,
75}
76
77impl TripleQuotes {
78    #[must_use]
79    pub const fn is_yes(self) -> bool {
80        matches!(self, Self::Yes)
81    }
82
83    #[must_use]
84    pub const fn is_no(self) -> bool {
85        matches!(self, Self::No)
86    }
87}
88
89/// Includes all permutations of `r`, `u`, `f`, and `fr` (`ur` is invalid, as is `uf`). This
90/// includes all possible orders, and all possible casings, for both single and triple quotes.
91///
92/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
93#[rustfmt::skip]
94const TRIPLE_QUOTE_STR_PREFIXES: &[&str] = &[
95    "FR\"\"\"",
96    "Fr\"\"\"",
97    "fR\"\"\"",
98    "fr\"\"\"",
99    "RF\"\"\"",
100    "Rf\"\"\"",
101    "rF\"\"\"",
102    "rf\"\"\"",
103    "FR'''",
104    "Fr'''",
105    "fR'''",
106    "fr'''",
107    "RF'''",
108    "Rf'''",
109    "rF'''",
110    "rf'''",
111    "R\"\"\"",
112    "r\"\"\"",
113    "R'''",
114    "r'''",
115    "F\"\"\"",
116    "f\"\"\"",
117    "F'''",
118    "f'''",
119    "U\"\"\"",
120    "u\"\"\"",
121    "U'''",
122    "u'''",
123    "\"\"\"",
124    "'''",
125];
126
127#[rustfmt::skip]
128const SINGLE_QUOTE_STR_PREFIXES: &[&str] = &[
129    "FR\"",
130    "Fr\"",
131    "fR\"",
132    "fr\"",
133    "RF\"",
134    "Rf\"",
135    "rF\"",
136    "rf\"",
137    "FR'",
138    "Fr'",
139    "fR'",
140    "fr'",
141    "RF'",
142    "Rf'",
143    "rF'",
144    "rf'",
145    "R\"",
146    "r\"",
147    "R'",
148    "r'",
149    "F\"",
150    "f\"",
151    "F'",
152    "f'",
153    "U\"",
154    "u\"",
155    "U'",
156    "u'",
157    "\"",
158    "'",
159];
160
161/// Includes all permutations of `b` and `rb`. This includes all possible orders, and all possible
162/// casings, for both single and triple quotes.
163///
164/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
165#[rustfmt::skip]
166pub const TRIPLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
167    "BR\"\"\"",
168    "Br\"\"\"",
169    "bR\"\"\"",
170    "br\"\"\"",
171    "RB\"\"\"",
172    "Rb\"\"\"",
173    "rB\"\"\"",
174    "rb\"\"\"",
175    "BR'''",
176    "Br'''",
177    "bR'''",
178    "br'''",
179    "RB'''",
180    "Rb'''",
181    "rB'''",
182    "rb'''",
183    "B\"\"\"",
184    "b\"\"\"",
185    "B'''",
186    "b'''",
187];
188
189#[rustfmt::skip]
190pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
191    "BR\"",
192    "Br\"",
193    "bR\"",
194    "br\"",
195    "RB\"",
196    "Rb\"",
197    "rB\"",
198    "rb\"",
199    "BR'",
200    "Br'",
201    "bR'",
202    "br'",
203    "RB'",
204    "Rb'",
205    "rB'",
206    "rb'",
207    "B\"",
208    "b\"",
209    "B'",
210    "b'",
211];
212
213/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
214/// casings, for both single and triple quotes.
215///
216/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
217#[rustfmt::skip]
218pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
219    "TR\"\"\"",
220    "Tr\"\"\"",
221    "tR\"\"\"",
222    "tr\"\"\"",
223    "RT\"\"\"",
224    "Rt\"\"\"",
225    "rT\"\"\"",
226    "rt\"\"\"",
227    "TR'''",
228    "Tr'''",
229    "tR'''",
230    "tr'''",
231    "RT'''",
232    "Rt'''",
233    "rT'''",
234    "rt'''",
235    "T\"\"\"",
236    "t\"\"\"",
237    "T'''",
238    "t'''",
239];
240
241#[rustfmt::skip]
242pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
243    "TR\"",
244    "Tr\"",
245    "tR\"",
246    "tr\"",
247    "RT\"",
248    "Rt\"",
249    "rT\"",
250    "rt\"",
251    "TR'",
252    "Tr'",
253    "tR'",
254    "tr'",
255    "RT'",
256    "Rt'",
257    "rT'",
258    "rt'",
259    "T\"",
260    "t\"",
261    "T'",
262    "t'",
263];
264
265/// Strip the leading and trailing quotes from a string.
266/// Assumes that the string is a valid string literal, but does not verify that the string
267/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
268pub fn raw_contents(contents: &str) -> Option<&str> {
269    let range = raw_contents_range(contents)?;
270
271    Some(&contents[range])
272}
273
274pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
275    let leading_quote_str = leading_quote(contents)?;
276    let trailing_quote_str = trailing_quote(contents)?;
277
278    Some(TextRange::new(
279        leading_quote_str.text_len(),
280        contents.text_len() - trailing_quote_str.text_len(),
281    ))
282}
283
284/// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
285static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
286    AhoCorasick::builder()
287        .start_kind(StartKind::Anchored)
288        .match_kind(MatchKind::LeftmostLongest)
289        .kind(Some(AhoCorasickKind::DFA))
290        .build(
291            TRIPLE_QUOTE_STR_PREFIXES
292                .iter()
293                .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
294                .chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
295                .chain(SINGLE_QUOTE_STR_PREFIXES)
296                .chain(SINGLE_QUOTE_BYTE_PREFIXES)
297                .chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
298        )
299        .unwrap()
300});
301
302/// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
303pub fn leading_quote(content: &str) -> Option<&str> {
304    let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
305    Some(&content[mat.start()..mat.end()])
306}
307
308/// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
309pub fn trailing_quote(content: &str) -> Option<&str> {
310    if content.ends_with("'''") {
311        Some("'''")
312    } else if content.ends_with("\"\"\"") {
313        Some("\"\"\"")
314    } else if content.ends_with('\'') {
315        Some("'")
316    } else if content.ends_with('\"') {
317        Some("\"")
318    } else {
319        None
320    }
321}
322
323/// Return `true` if the string is a triple-quote string or byte prefix.
324pub fn is_triple_quote(content: &str) -> bool {
325    TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
326        || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
327        || TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
328}
329
330#[cfg(test)]
331mod tests {
332    use super::{
333        SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
334        TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
335    };
336
337    #[test]
338    fn prefix_uniqueness() {
339        let prefixes = TRIPLE_QUOTE_STR_PREFIXES
340            .iter()
341            .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
342            .chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
343            .chain(SINGLE_QUOTE_STR_PREFIXES)
344            .chain(SINGLE_QUOTE_BYTE_PREFIXES)
345            .chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
346            .collect::<Vec<_>>();
347        for (i, prefix_i) in prefixes.iter().enumerate() {
348            for (j, prefix_j) in prefixes.iter().enumerate() {
349                if i > j {
350                    assert!(
351                        !prefix_i.starts_with(*prefix_j),
352                        "Prefixes are not unique: {prefix_i} starts with {prefix_j}",
353                    );
354                }
355            }
356        }
357    }
358}