1use std::cmp::{max, min};
7
8pub mod tokenizer;
9
10pub use tokenizer::whitespace_parser::Config;
11use tokenizer::{whitespace_parser, TokConfig, Token, TokenIterator};
12
13mod nodes;
14use nodes::deflated::Module as DeflatedModule;
15pub use nodes::*;
16
17mod parser;
18use parser::{ParserError, Result, TokVec};
19
20#[cfg(feature = "py")]
21pub mod py;
22
23pub fn tokenize(text: &str) -> Result<Vec<Token>> {
24 let iter = TokenIterator::new(
25 text,
26 &TokConfig {
27 async_hacks: false,
28 split_ftstring: true,
29 },
30 );
31
32 iter.collect::<std::result::Result<Vec<_>, _>>()
33 .map_err(|err| ParserError::TokenizerError(err, text))
34}
35
36pub fn parse_module<'a>(
37 mut module_text: &'a str,
38 encoding: Option<&str>,
39) -> Result<'a, Module<'a>> {
40 if let Some(stripped) = module_text.strip_prefix('\u{feff}') {
42 module_text = stripped;
43 }
44 let tokens = tokenize(module_text)?;
45 let conf = whitespace_parser::Config::new(module_text, &tokens);
46 let tokvec = tokens.into();
47 let m = parse_tokens_without_whitespace(&tokvec, module_text, encoding)?;
48 Ok(m.inflate(&conf)?)
49}
50
51pub fn parse_tokens_without_whitespace<'r, 'a>(
52 tokens: &'r TokVec<'a>,
53 module_text: &'a str,
54 encoding: Option<&str>,
55) -> Result<'a, DeflatedModule<'r, 'a>> {
56 let m = parser::python::file(tokens, module_text, encoding)
57 .map_err(|err| ParserError::ParserError(err, module_text))?;
58 Ok(m)
59}
60
61pub fn parse_statement(text: &str) -> Result<Statement> {
62 let tokens = tokenize(text)?;
63 let conf = whitespace_parser::Config::new(text, &tokens);
64 let tokvec = tokens.into();
65 let stm = parser::python::statement_input(&tokvec, text)
66 .map_err(|err| ParserError::ParserError(err, text))?;
67 Ok(stm.inflate(&conf)?)
68}
69
70pub fn parse_expression(text: &str) -> Result<Expression> {
71 let tokens = tokenize(text)?;
72 let conf = whitespace_parser::Config::new(text, &tokens);
73 let tokvec = tokens.into();
74 let expr = parser::python::expression_input(&tokvec, text)
75 .map_err(|err| ParserError::ParserError(err, text))?;
76 Ok(expr.inflate(&conf)?)
77}
78
79fn bol_offset(source: &str, n: i32) -> usize {
81 if n <= 1 {
82 return 0;
83 }
84 source
85 .match_indices('\n')
86 .nth((n - 2) as usize)
87 .map(|(index, _)| index + 1)
88 .unwrap_or_else(|| source.len())
89}
90
91pub fn prettify_error(err: ParserError, label: &str) -> std::string::String {
92 match err {
93 ParserError::ParserError(e, module_text) => {
94 use annotate_snippets::{Level, Renderer, Snippet};
95
96 let loc = e.location;
97 let context = 1;
98 let line_start = max(
99 1,
100 loc.start_pos
101 .line
102 .checked_sub(context as usize)
103 .unwrap_or(1),
104 );
105 let start_offset = bol_offset(module_text, loc.start_pos.line as i32 - context);
106 let end_offset = bol_offset(module_text, loc.end_pos.line as i32 + context + 1);
107 let source = &module_text[start_offset..end_offset];
108 let start = loc.start_pos.offset - start_offset;
109 let end = loc.end_pos.offset - start_offset;
110 let end = if start == end {
111 min(end + 1, end_offset - start_offset + 1)
112 } else {
113 end
114 };
115 Renderer::styled()
116 .render(
117 Level::Error.title(label).snippet(
118 Snippet::source(source)
119 .line_start(line_start)
120 .fold(false)
121 .annotations(vec![Level::Error.span(start..end).label(&format!(
122 "expected {} {} -> {}",
123 e.expected, loc.start_pos, loc.end_pos
124 ))]),
125 ),
126 )
127 .to_string()
128 }
129 e => format!("Parse error for {}: {}", label, e),
130 }
131}
132
133#[cfg(test)]
134mod test {
135 use super::*;
136 use tokenizer::TokError;
137
138 #[test]
139 fn test_simple() {
140 let n = parse_module("1_", None);
141 assert_eq!(
142 n.err().unwrap(),
143 ParserError::TokenizerError(TokError::BadDecimal, "1_")
144 );
145 }
146
147 #[test]
148 fn test_bare_minimum_funcdef() {
149 parse_module("def f(): ...", None).expect("parse error");
150 }
151
152 #[test]
153 fn test_funcdef_params() {
154 parse_module("def g(a, b): ...", None).expect("parse error");
155 }
156
157 #[test]
158 fn test_single_statement_with_no_newline() {
159 for src in &[
160 "(\n \\\n)",
161 "(\n \\\n)",
162 "(\n '''\n''')",
163 "del _",
164 "if _:\n '''\n)'''",
165 "if _:\n ('''\n''')",
166 "if _:\n '''\n '''",
167 "if _:\n '''\n ''' ",
168 ] {
169 parse_module(src, None).unwrap_or_else(|e| panic!("'{}' doesn't parse: {}", src, e));
170 }
171 }
172
173 #[test]
174 fn bol_offset_first_line() {
175 assert_eq!(0, bol_offset("hello", 1));
176 assert_eq!(0, bol_offset("hello", 0));
177 assert_eq!(0, bol_offset("hello\nhello", 1));
178 assert_eq!(0, bol_offset("hello\nhello", 0));
179 }
180
181 #[test]
182 fn bol_offset_second_line() {
183 assert_eq!(5, bol_offset("hello", 2));
184 assert_eq!(6, bol_offset("hello\nhello", 2));
185 assert_eq!(6, bol_offset("hello\nhello\nhello", 2));
186 }
187
188 #[test]
189 fn bol_offset_last_line() {
190 assert_eq!(5, bol_offset("hello", 3));
191 assert_eq!(11, bol_offset("hello\nhello", 3));
192 assert_eq!(12, bol_offset("hello\nhello\nhello", 3));
193 }
194 #[test]
195 fn test_tstring_basic() {
196 assert!(
197 parse_module("t'hello'", None).is_ok(),
198 "Failed to parse t'hello'"
199 );
200 assert!(
201 parse_module("t'{hello}'", None).is_ok(),
202 "Failed to parse t'{{hello}}'"
203 );
204 assert!(
205 parse_module("t'{hello:r}'", None).is_ok(),
206 "Failed to parse t'{{hello:r}}'"
207 );
208 assert!(
209 parse_module("f'line1\\n{hello:r}\\nline2'", None).is_ok(),
210 "Failed to parse t'line1\\n{{hello:r}}\\nline2'"
211 );
212 }
213}