libcst_native/
lib.rs

1// Copyright (c) Meta Platforms, Inc. and affiliates.
2//
3// This source code is licensed under the MIT license found in the
4// LICENSE file in the root directory of this source tree.
5
6use std::cmp::{max, min};
7
8pub mod tokenizer;
9
10pub use tokenizer::whitespace_parser::Config;
11use tokenizer::{whitespace_parser, TokConfig, Token, TokenIterator};
12
13mod nodes;
14use nodes::deflated::Module as DeflatedModule;
15pub use nodes::*;
16
17mod parser;
18use parser::{ParserError, Result, TokVec};
19
20#[cfg(feature = "py")]
21pub mod py;
22
23pub fn tokenize(text: &str) -> Result<Vec<Token>> {
24    let iter = TokenIterator::new(
25        text,
26        &TokConfig {
27            async_hacks: false,
28            split_ftstring: true,
29        },
30    );
31
32    iter.collect::<std::result::Result<Vec<_>, _>>()
33        .map_err(|err| ParserError::TokenizerError(err, text))
34}
35
36pub fn parse_module<'a>(
37    mut module_text: &'a str,
38    encoding: Option<&str>,
39) -> Result<'a, Module<'a>> {
40    // Strip UTF-8 BOM
41    if let Some(stripped) = module_text.strip_prefix('\u{feff}') {
42        module_text = stripped;
43    }
44    let tokens = tokenize(module_text)?;
45    let conf = whitespace_parser::Config::new(module_text, &tokens);
46    let tokvec = tokens.into();
47    let m = parse_tokens_without_whitespace(&tokvec, module_text, encoding)?;
48    Ok(m.inflate(&conf)?)
49}
50
51pub fn parse_tokens_without_whitespace<'r, 'a>(
52    tokens: &'r TokVec<'a>,
53    module_text: &'a str,
54    encoding: Option<&str>,
55) -> Result<'a, DeflatedModule<'r, 'a>> {
56    let m = parser::python::file(tokens, module_text, encoding)
57        .map_err(|err| ParserError::ParserError(err, module_text))?;
58    Ok(m)
59}
60
61pub fn parse_statement(text: &str) -> Result<Statement> {
62    let tokens = tokenize(text)?;
63    let conf = whitespace_parser::Config::new(text, &tokens);
64    let tokvec = tokens.into();
65    let stm = parser::python::statement_input(&tokvec, text)
66        .map_err(|err| ParserError::ParserError(err, text))?;
67    Ok(stm.inflate(&conf)?)
68}
69
70pub fn parse_expression(text: &str) -> Result<Expression> {
71    let tokens = tokenize(text)?;
72    let conf = whitespace_parser::Config::new(text, &tokens);
73    let tokvec = tokens.into();
74    let expr = parser::python::expression_input(&tokvec, text)
75        .map_err(|err| ParserError::ParserError(err, text))?;
76    Ok(expr.inflate(&conf)?)
77}
78
79// n starts from 1
80fn bol_offset(source: &str, n: i32) -> usize {
81    if n <= 1 {
82        return 0;
83    }
84    source
85        .match_indices('\n')
86        .nth((n - 2) as usize)
87        .map(|(index, _)| index + 1)
88        .unwrap_or_else(|| source.len())
89}
90
91pub fn prettify_error(err: ParserError, label: &str) -> std::string::String {
92    match err {
93        ParserError::ParserError(e, module_text) => {
94            use annotate_snippets::{Level, Renderer, Snippet};
95
96            let loc = e.location;
97            let context = 1;
98            let line_start = max(
99                1,
100                loc.start_pos
101                    .line
102                    .checked_sub(context as usize)
103                    .unwrap_or(1),
104            );
105            let start_offset = bol_offset(module_text, loc.start_pos.line as i32 - context);
106            let end_offset = bol_offset(module_text, loc.end_pos.line as i32 + context + 1);
107            let source = &module_text[start_offset..end_offset];
108            let start = loc.start_pos.offset - start_offset;
109            let end = loc.end_pos.offset - start_offset;
110            let end = if start == end {
111                min(end + 1, end_offset - start_offset + 1)
112            } else {
113                end
114            };
115            Renderer::styled()
116                .render(
117                    Level::Error.title(label).snippet(
118                        Snippet::source(source)
119                            .line_start(line_start)
120                            .fold(false)
121                            .annotations(vec![Level::Error.span(start..end).label(&format!(
122                                "expected {} {} -> {}",
123                                e.expected, loc.start_pos, loc.end_pos
124                            ))]),
125                    ),
126                )
127                .to_string()
128        }
129        e => format!("Parse error for {}: {}", label, e),
130    }
131}
132
133#[cfg(test)]
134mod test {
135    use super::*;
136    use tokenizer::TokError;
137
138    #[test]
139    fn test_simple() {
140        let n = parse_module("1_", None);
141        assert_eq!(
142            n.err().unwrap(),
143            ParserError::TokenizerError(TokError::BadDecimal, "1_")
144        );
145    }
146
147    #[test]
148    fn test_bare_minimum_funcdef() {
149        parse_module("def f(): ...", None).expect("parse error");
150    }
151
152    #[test]
153    fn test_funcdef_params() {
154        parse_module("def g(a, b): ...", None).expect("parse error");
155    }
156
157    #[test]
158    fn test_single_statement_with_no_newline() {
159        for src in &[
160            "(\n \\\n)",
161            "(\n  \\\n)",
162            "(\n    '''\n''')",
163            "del _",
164            "if _:\n    '''\n)'''",
165            "if _:\n    ('''\n''')",
166            "if _:\n     '''\n  '''",
167            "if _:\n        '''\n    ''' ",
168        ] {
169            parse_module(src, None).unwrap_or_else(|e| panic!("'{}' doesn't parse: {}", src, e));
170        }
171    }
172
173    #[test]
174    fn bol_offset_first_line() {
175        assert_eq!(0, bol_offset("hello", 1));
176        assert_eq!(0, bol_offset("hello", 0));
177        assert_eq!(0, bol_offset("hello\nhello", 1));
178        assert_eq!(0, bol_offset("hello\nhello", 0));
179    }
180
181    #[test]
182    fn bol_offset_second_line() {
183        assert_eq!(5, bol_offset("hello", 2));
184        assert_eq!(6, bol_offset("hello\nhello", 2));
185        assert_eq!(6, bol_offset("hello\nhello\nhello", 2));
186    }
187
188    #[test]
189    fn bol_offset_last_line() {
190        assert_eq!(5, bol_offset("hello", 3));
191        assert_eq!(11, bol_offset("hello\nhello", 3));
192        assert_eq!(12, bol_offset("hello\nhello\nhello", 3));
193    }
194    #[test]
195    fn test_tstring_basic() {
196        assert!(
197            parse_module("t'hello'", None).is_ok(),
198            "Failed to parse t'hello'"
199        );
200        assert!(
201            parse_module("t'{hello}'", None).is_ok(),
202            "Failed to parse t'{{hello}}'"
203        );
204        assert!(
205            parse_module("t'{hello:r}'", None).is_ok(),
206            "Failed to parse t'{{hello:r}}'"
207        );
208        assert!(
209            parse_module("f'line1\\n{hello:r}\\nline2'", None).is_ok(),
210            "Failed to parse t'line1\\n{{hello:r}}\\nline2'"
211        );
212    }
213}