Skip to main content

rustpython_compiler/
lib.rs

1use ruff_source_file::{PositionEncoding, SourceFile, SourceFileBuilder, SourceLocation};
2use rustpython_codegen::{compile, symboltable};
3
4pub use rustpython_codegen::compile::CompileOpts;
5pub use rustpython_compiler_core::{Mode, bytecode::CodeObject};
6
7// these modules are out of repository. re-exporting them here for convenience.
8pub use ruff_python_ast as ast;
9pub use ruff_python_parser as parser;
10pub use rustpython_codegen as codegen;
11pub use rustpython_compiler_core as core;
12use thiserror::Error;
13
14#[derive(Error, Debug)]
15pub enum CompileErrorType {
16    #[error(transparent)]
17    Codegen(#[from] codegen::error::CodegenErrorType),
18    #[error(transparent)]
19    Parse(#[from] parser::ParseErrorType),
20}
21
22#[derive(Error, Debug)]
23pub struct ParseError {
24    #[source]
25    pub error: parser::ParseErrorType,
26    pub raw_location: ruff_text_size::TextRange,
27    pub location: SourceLocation,
28    pub end_location: SourceLocation,
29    pub source_path: String,
30    /// Set when the error is an unclosed bracket (converted from EOF).
31    pub is_unclosed_bracket: bool,
32}
33
34impl ::core::fmt::Display for ParseError {
35    fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
36        self.error.fmt(f)
37    }
38}
39
40#[derive(Error, Debug)]
41pub enum CompileError {
42    #[error(transparent)]
43    Codegen(#[from] codegen::error::CodegenError),
44    #[error(transparent)]
45    Parse(#[from] ParseError),
46}
47
48impl CompileError {
49    pub fn from_ruff_parse_error(error: parser::ParseError, source_file: &SourceFile) -> Self {
50        let source_code = source_file.to_source_code();
51        let source_text = source_file.source_text();
52
53        // For EOF errors (unclosed brackets), find the unclosed bracket position
54        // and adjust both the error location and message
55        let mut is_unclosed_bracket = false;
56        let (error_type, location, end_location) = if matches!(
57            &error.error,
58            parser::ParseErrorType::Lexical(parser::LexicalErrorType::Eof)
59        ) {
60            if let Some((bracket_char, bracket_offset)) = find_unclosed_bracket(source_text) {
61                let bracket_text_size = ruff_text_size::TextSize::new(bracket_offset as u32);
62                let loc = source_code.source_location(bracket_text_size, PositionEncoding::Utf8);
63                let end_loc = SourceLocation {
64                    line: loc.line,
65                    character_offset: loc.character_offset.saturating_add(1),
66                };
67                let msg = format!("'{}' was never closed", bracket_char);
68                is_unclosed_bracket = true;
69                (parser::ParseErrorType::OtherError(msg), loc, end_loc)
70            } else {
71                let loc =
72                    source_code.source_location(error.location.start(), PositionEncoding::Utf8);
73                let end_loc =
74                    source_code.source_location(error.location.end(), PositionEncoding::Utf8);
75                (error.error, loc, end_loc)
76            }
77        } else if matches!(
78            &error.error,
79            parser::ParseErrorType::Lexical(parser::LexicalErrorType::IndentationError)
80        ) {
81            // For IndentationError, point the offset to the end of the line content
82            // instead of the beginning
83            let loc = source_code.source_location(error.location.start(), PositionEncoding::Utf8);
84            let line_idx = loc.line.to_zero_indexed();
85            let line = source_text.split('\n').nth(line_idx).unwrap_or("");
86            let line_end_col = line.chars().count() + 1; // 1-indexed, past last char
87            let end_loc = SourceLocation {
88                line: loc.line,
89                character_offset: ruff_source_file::OneIndexed::new(line_end_col)
90                    .unwrap_or(loc.character_offset),
91            };
92            (error.error, end_loc, end_loc)
93        } else {
94            let loc = source_code.source_location(error.location.start(), PositionEncoding::Utf8);
95            let mut end_loc =
96                source_code.source_location(error.location.end(), PositionEncoding::Utf8);
97
98            // If the error range ends at the start of a new line (column 1),
99            // adjust it to the end of the previous line
100            if end_loc.character_offset.get() == 1 && end_loc.line > loc.line {
101                let prev_line_end = error.location.end() - ruff_text_size::TextSize::from(1);
102                end_loc = source_code.source_location(prev_line_end, PositionEncoding::Utf8);
103                end_loc.character_offset = end_loc.character_offset.saturating_add(1);
104            }
105
106            (error.error, loc, end_loc)
107        };
108
109        Self::Parse(ParseError {
110            error: error_type,
111            raw_location: error.location,
112            location,
113            end_location,
114            source_path: source_file.name().to_owned(),
115            is_unclosed_bracket,
116        })
117    }
118
119    pub const fn location(&self) -> Option<SourceLocation> {
120        match self {
121            Self::Codegen(codegen_error) => codegen_error.location,
122            Self::Parse(parse_error) => Some(parse_error.location),
123        }
124    }
125
126    pub const fn python_location(&self) -> (usize, usize) {
127        if let Some(location) = self.location() {
128            (location.line.get(), location.character_offset.get())
129        } else {
130            (0, 0)
131        }
132    }
133
134    pub fn python_end_location(&self) -> Option<(usize, usize)> {
135        match self {
136            CompileError::Codegen(_) => None,
137            CompileError::Parse(parse_error) => Some((
138                parse_error.end_location.line.get(),
139                parse_error.end_location.character_offset.get(),
140            )),
141        }
142    }
143
144    pub fn source_path(&self) -> &str {
145        match self {
146            Self::Codegen(codegen_error) => &codegen_error.source_path,
147            Self::Parse(parse_error) => &parse_error.source_path,
148        }
149    }
150}
151
152/// Find the last unclosed opening bracket in source code.
153/// Returns the bracket character and its byte offset, or None if all brackets are balanced.
154fn find_unclosed_bracket(source: &str) -> Option<(char, usize)> {
155    let mut stack: Vec<(char, usize)> = Vec::new();
156    let mut in_string = false;
157    let mut string_quote = '\0';
158    let mut triple_quote = false;
159    let mut escape_next = false;
160    let mut is_raw_string = false;
161
162    let chars: Vec<(usize, char)> = source.char_indices().collect();
163    let mut i = 0;
164
165    while i < chars.len() {
166        let (byte_offset, ch) = chars[i];
167
168        if escape_next {
169            escape_next = false;
170            i += 1;
171            continue;
172        }
173
174        if in_string {
175            if ch == '\\' && !is_raw_string {
176                escape_next = true;
177            } else if triple_quote {
178                if ch == string_quote
179                    && i + 2 < chars.len()
180                    && chars[i + 1].1 == string_quote
181                    && chars[i + 2].1 == string_quote
182                {
183                    in_string = false;
184                    i += 3;
185                    continue;
186                }
187            } else if ch == string_quote {
188                in_string = false;
189            }
190            i += 1;
191            continue;
192        }
193
194        // Check for comments
195        if ch == '#' {
196            // Skip to end of line
197            while i < chars.len() && chars[i].1 != '\n' {
198                i += 1;
199            }
200            continue;
201        }
202
203        // Check for string start (with optional prefix like r, b, f, u, rb, br, etc.)
204        if ch == '\'' || ch == '"' {
205            // Check up to 2 characters before the quote for string prefix
206            is_raw_string = false;
207            for look_back in 1..=2.min(i) {
208                let prev = chars[i - look_back].1;
209                if matches!(prev, 'r' | 'R') {
210                    is_raw_string = true;
211                    break;
212                }
213                if !matches!(prev, 'b' | 'B' | 'f' | 'F' | 'u' | 'U') {
214                    break;
215                }
216            }
217            string_quote = ch;
218            if i + 2 < chars.len() && chars[i + 1].1 == ch && chars[i + 2].1 == ch {
219                triple_quote = true;
220                in_string = true;
221                i += 3;
222                continue;
223            }
224            triple_quote = false;
225            in_string = true;
226            i += 1;
227            continue;
228        }
229
230        match ch {
231            '(' | '[' | '{' => stack.push((ch, byte_offset)),
232            ')' | ']' | '}' => {
233                let expected = match ch {
234                    ')' => '(',
235                    ']' => '[',
236                    '}' => '{',
237                    _ => unreachable!(),
238                };
239                if stack.last().is_some_and(|&(open, _)| open == expected) {
240                    stack.pop();
241                }
242            }
243            _ => {}
244        }
245
246        i += 1;
247    }
248
249    stack.last().copied()
250}
251
252/// Compile a given source code into a bytecode object.
253pub fn compile(
254    source: &str,
255    mode: Mode,
256    source_path: &str,
257    opts: CompileOpts,
258) -> Result<CodeObject, CompileError> {
259    // TODO: do this less hacky; ruff's parser should translate a CRLF line
260    //       break in a multiline string into just an LF in the parsed value
261    #[cfg(windows)]
262    let source = source.replace("\r\n", "\n");
263    #[cfg(windows)]
264    let source = source.as_str();
265
266    let source_file = SourceFileBuilder::new(source_path, source).finish();
267    _compile(source_file, mode, opts)
268    // let index = LineIndex::from_source_text(source);
269    // let source_code = SourceCode::new(source, &index);
270    // let mut locator = LinearLocator::new(source);
271    // let mut ast = match parser::parse(source, mode.into(), &source_path) {
272    //     Ok(x) => x,
273    //     Err(e) => return Err(locator.locate_error(e)),
274    // };
275
276    // TODO:
277    // if opts.optimize > 0 {
278    //     ast = ConstantOptimizer::new()
279    //         .fold_mod(ast)
280    //         .unwrap_or_else(|e| match e {});
281    // }
282    // let ast = locator.fold_mod(ast).unwrap_or_else(|e| match e {});
283}
284
285fn _compile(
286    source_file: SourceFile,
287    mode: Mode,
288    opts: CompileOpts,
289) -> Result<CodeObject, CompileError> {
290    let parser_mode = match mode {
291        Mode::Exec => parser::Mode::Module,
292        Mode::Eval => parser::Mode::Expression,
293        // ruff does not have an interactive mode, which is fine,
294        // since these are only different in terms of compilation
295        Mode::Single | Mode::BlockExpr => parser::Mode::Module,
296    };
297    let parsed = parser::parse(source_file.source_text(), parser_mode.into())
298        .map_err(|err| CompileError::from_ruff_parse_error(err, &source_file))?;
299    let ast = parsed.into_syntax();
300    compile::compile_top(ast, source_file, mode, opts).map_err(|e| e.into())
301}
302
303pub fn compile_symtable(
304    source: &str,
305    mode: Mode,
306    source_path: &str,
307) -> Result<symboltable::SymbolTable, CompileError> {
308    let source_file = SourceFileBuilder::new(source_path, source).finish();
309    _compile_symtable(source_file, mode)
310}
311
312pub fn _compile_symtable(
313    source_file: SourceFile,
314    mode: Mode,
315) -> Result<symboltable::SymbolTable, CompileError> {
316    let res = match mode {
317        Mode::Exec | Mode::Single | Mode::BlockExpr => {
318            let ast = ruff_python_parser::parse_module(source_file.source_text())
319                .map_err(|e| CompileError::from_ruff_parse_error(e, &source_file))?;
320            symboltable::SymbolTable::scan_program(&ast.into_syntax(), source_file.clone())
321        }
322        Mode::Eval => {
323            let ast = ruff_python_parser::parse(
324                source_file.source_text(),
325                parser::Mode::Expression.into(),
326            )
327            .map_err(|e| CompileError::from_ruff_parse_error(e, &source_file))?;
328            symboltable::SymbolTable::scan_expr(
329                &ast.into_syntax().expect_expression(),
330                source_file.clone(),
331            )
332        }
333    };
334    res.map_err(|e| e.into_codegen_error(source_file.name().to_owned()).into())
335}
336
337#[test]
338fn test_compile() {
339    let code = "x = 'abc'";
340    let compiled = compile(code, Mode::Single, "<>", CompileOpts::default());
341    dbg!(compiled.expect("compile error"));
342}
343
344#[test]
345fn test_compile_phello() {
346    let code = r#"
347initialized = True
348def main():
349    print("Hello world!")
350if __name__ == '__main__':
351    main()
352"#;
353    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
354    dbg!(compiled.expect("compile error"));
355}
356
357#[test]
358fn test_compile_if_elif_else() {
359    let code = r#"
360if False:
361    pass
362elif False:
363    pass
364elif False:
365    pass
366else:
367    pass
368"#;
369    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
370    dbg!(compiled.expect("compile error"));
371}
372
373#[test]
374fn test_compile_lambda() {
375    let code = r#"
376lambda: 'a'
377"#;
378    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
379    dbg!(compiled.expect("compile error"));
380}
381
382#[test]
383fn test_compile_lambda2() {
384    let code = r#"
385(lambda x: f'hello, {x}')('world}')
386"#;
387    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
388    dbg!(compiled.expect("compile error"));
389}
390
391#[test]
392fn test_compile_lambda3() {
393    let code = r#"
394def g():
395    pass
396def f():
397    if False:
398        return lambda x: g(x)
399    elif False:
400        return g
401    else:
402        return g
403"#;
404    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
405    dbg!(compiled.expect("compile error"));
406}
407
408#[test]
409fn test_compile_int() {
410    let code = r#"
411a = 0xFF
412"#;
413    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
414    dbg!(compiled.expect("compile error"));
415}
416
417#[test]
418fn test_compile_bigint() {
419    let code = r#"
420a = 0xFFFFFFFFFFFFFFFFFFFFFFFF
421"#;
422    let compiled = compile(code, Mode::Exec, "<>", CompileOpts::default());
423    dbg!(compiled.expect("compile error"));
424}
425
426#[test]
427fn test_compile_fstring() {
428    let code1 = r#"
429assert f"1" == '1'
430    "#;
431    let compiled = compile(code1, Mode::Exec, "<>", CompileOpts::default());
432    dbg!(compiled.expect("compile error"));
433
434    let code2 = r#"
435assert f"{1}" == '1'
436    "#;
437    let compiled = compile(code2, Mode::Exec, "<>", CompileOpts::default());
438    dbg!(compiled.expect("compile error"));
439    let code3 = r#"
440assert f"{1+1}" == '2'
441    "#;
442    let compiled = compile(code3, Mode::Exec, "<>", CompileOpts::default());
443    dbg!(compiled.expect("compile error"));
444
445    let code4 = r#"
446assert f"{{{(lambda: f'{1}')}" == '{1'
447    "#;
448    let compiled = compile(code4, Mode::Exec, "<>", CompileOpts::default());
449    dbg!(compiled.expect("compile error"));
450
451    let code5 = r#"
452assert f"a{1}" == 'a1'
453    "#;
454    let compiled = compile(code5, Mode::Exec, "<>", CompileOpts::default());
455    dbg!(compiled.expect("compile error"));
456
457    let code6 = r#"
458assert f"{{{(lambda x: f'hello, {x}')('world}')}" == '{hello, world}'
459    "#;
460    let compiled = compile(code6, Mode::Exec, "<>", CompileOpts::default());
461    dbg!(compiled.expect("compile error"));
462}
463
464#[test]
465fn test_simple_enum() {
466    let code = r#"
467import enum
468@enum._simple_enum(enum.IntFlag, boundary=enum.KEEP)
469class RegexFlag:
470    NOFLAG = 0
471    DEBUG = 1
472print(RegexFlag.NOFLAG & RegexFlag.DEBUG)
473"#;
474    let compiled = compile(code, Mode::Exec, "<string>", CompileOpts::default());
475    dbg!(compiled.expect("compile error"));
476}