blueprint_starlark_syntax/syntax/
module.rs

1/*
2 * Copyright 2018 The Starlark in Rust Authors.
3 * Copyright (c) Facebook, Inc. and its affiliates.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18use std::collections::HashMap;
19use std::fmt::Write;
20use std::fs;
21use std::mem;
22use std::path::Path;
23
24use derivative::Derivative;
25use blueprint_dupe::Dupe;
26use lalrpop_util as lu;
27
28use crate::codemap::CodeMap;
29use crate::codemap::FileSpan;
30use crate::codemap::Pos;
31use crate::codemap::Span;
32use crate::codemap::Spanned;
33use crate::eval_exception::EvalException;
34use crate::lexer::Lexer;
35use crate::lexer::Token;
36use crate::syntax::AstLoad;
37use crate::syntax::Dialect;
38use crate::syntax::ast::ArgumentP;
39use crate::syntax::ast::AstExpr;
40use crate::syntax::ast::AstStmt;
41use crate::syntax::ast::CallArgsP;
42use crate::syntax::ast::ExprP;
43use crate::syntax::ast::IdentP;
44use crate::syntax::ast::LoadArgP;
45use crate::syntax::ast::Stmt;
46use crate::syntax::grammar::StarlarkParser;
47use crate::syntax::lint_suppressions::LintSuppressions;
48use crate::syntax::lint_suppressions::LintSuppressionsBuilder;
49use crate::syntax::state::ParserState;
50use crate::syntax::validate::validate_module;
51
52fn one_of(expected: &[String]) -> String {
53    let mut result = String::new();
54    for (i, e) in expected.iter().enumerate() {
55        let sep = match i {
56            0 => "one of",
57            _ if i < expected.len() - 1 => ",",
58            // Last expected message to be written
59            _ => " or",
60        };
61        write!(result, "{sep} {e}").unwrap();
62    }
63    result
64}
65
66/// Convert the error to a codemap diagnostic.
67///
68/// To build this diagnostic, the method needs the file span corresponding
69/// to the parsed file.
70fn parse_error_add_span(
71    err: lu::ParseError<usize, Token, EvalException>,
72    pos: usize,
73    codemap: &CodeMap,
74) -> crate::Error {
75    let (message, span) = match err {
76        lu::ParseError::InvalidToken { location } => (
77            "Parse error: invalid token".to_owned(),
78            Span::new(Pos::new(location as u32), Pos::new(location as u32)),
79        ),
80        lu::ParseError::UnrecognizedToken {
81            token: (x, t, y),
82            expected,
83        } => (
84            format!(
85                "Parse error: unexpected {} here, expected {}",
86                t,
87                one_of(&expected)
88            ),
89            Span::new(Pos::new(x as u32), Pos::new(y as u32)),
90        ),
91        lu::ParseError::UnrecognizedEOF { .. } => (
92            "Parse error: unexpected end of file".to_owned(),
93            Span::new(Pos::new(pos as u32), Pos::new(pos as u32)),
94        ),
95        lu::ParseError::ExtraToken { token: (x, t, y) } => (
96            format!("Parse error: extraneous token {t}"),
97            Span::new(Pos::new(x as u32), Pos::new(y as u32)),
98        ),
99        lu::ParseError::User { error } => return error.into_error(),
100    };
101
102    crate::Error::new_spanned(
103        crate::ErrorKind::Parser(anyhow::anyhow!(message)),
104        span,
105        codemap,
106    )
107}
108
109/// A representation of a Starlark module abstract syntax tree.
110///
111/// Created with either [`parse`](AstModule::parse) or [`parse_file`](AstModule::parse_file),
112/// and evaluated with `Evaluator::eval_module`.
113///
114/// The internal details (statements/expressions) are deliberately omitted, as they change
115/// more regularly. A few methods to obtain information about the AST are provided.
116#[derive(Derivative)]
117#[derivative(Debug, Clone)]
118pub struct AstModule {
119    #[derivative(Debug = "ignore")]
120    pub(crate) codemap: CodeMap,
121    pub(crate) statement: AstStmt,
122    pub(crate) dialect: Dialect,
123    /// Opt-in typecheck.
124    /// Specified with `@starlark-rust: typecheck`.
125    pub(crate) typecheck: bool,
126    /// Lint issues suppressed in this module using inline comments of shape
127    /// # starlark-lint-disable <ISSUE_NAME>, <ISSUE_NAME>, ...
128    lint_suppressions: LintSuppressions,
129}
130
131/// This trait is not exported as public API of starlark.
132pub trait AstModuleFields: Sized {
133    fn codemap(&self) -> &CodeMap;
134
135    fn statement(&self) -> &AstStmt;
136
137    fn dialect(&self) -> &Dialect;
138
139    fn into_parts(self) -> (CodeMap, AstStmt, Dialect, bool);
140}
141
142impl AstModuleFields for AstModule {
143    fn codemap(&self) -> &CodeMap {
144        &self.codemap
145    }
146
147    fn statement(&self) -> &AstStmt {
148        &self.statement
149    }
150
151    fn dialect(&self) -> &Dialect {
152        &self.dialect
153    }
154
155    fn into_parts(self) -> (CodeMap, AstStmt, Dialect, bool) {
156        (self.codemap, self.statement, self.dialect, self.typecheck)
157    }
158}
159
160impl AstModule {
161    fn create(
162        codemap: CodeMap,
163        statement: AstStmt,
164        dialect: &Dialect,
165        typecheck: bool,
166        lint_suppressions: LintSuppressions,
167    ) -> crate::Result<AstModule> {
168        let mut errors = Vec::new();
169        validate_module(
170            &statement,
171            &mut ParserState {
172                codemap: &codemap,
173                dialect,
174                errors: &mut errors,
175            },
176        );
177        // We need the first error, so we don't use `.pop()`.
178        if let Some(err) = errors.into_iter().next() {
179            return Err(err.into_error());
180        }
181        Ok(AstModule {
182            codemap,
183            statement,
184            dialect: dialect.clone(),
185            typecheck,
186            lint_suppressions,
187        })
188    }
189
190    /// Parse a file stored on disk. For details see [`parse`](AstModule::parse).
191    pub fn parse_file(path: &Path, dialect: &Dialect) -> crate::Result<Self> {
192        let content = fs::read_to_string(path).map_err(anyhow::Error::new)?;
193        Self::parse(&path.to_string_lossy(), content, dialect)
194    }
195
196    /// Parse a Starlark module to produce an [`AstModule`], or an error if there are syntax errors.
197    /// The `filename` is for error messages only, and does not have to be a valid file.
198    /// The [`Dialect`] selects which Starlark constructs are valid.
199    ///
200    /// The returned error may contain diagnostic information. For example:
201    ///
202    /// ```
203    /// use blueprint_starlark_syntax::codemap::FileSpan;
204    /// use blueprint_starlark_syntax::syntax::AstModule;
205    /// use blueprint_starlark_syntax::syntax::Dialect;
206    ///
207    /// let err: blueprint_starlark_syntax::Error =
208    ///     AstModule::parse("filename", "\n(unmatched".to_owned(), &Dialect::Standard).unwrap_err();
209    /// let span: &FileSpan = err.span().unwrap();
210    /// assert_eq!(span.to_string(), "filename:2:11");
211    /// ```
212    pub fn parse(filename: &str, content: String, dialect: &Dialect) -> crate::Result<Self> {
213        let typecheck = content.contains("@starlark-rust: typecheck");
214        let codemap = CodeMap::new(filename.to_owned(), content);
215        let lexer = Lexer::new(codemap.source(), dialect, codemap.dupe());
216        // Store lint suppressions found during parsing
217        let mut lint_suppressions_builder = LintSuppressionsBuilder::new();
218        // Keep track of block of comments, used for accumulating lint suppressions
219        let mut in_comment_block = false;
220        let mut errors = Vec::new();
221        match StarlarkParser::new().parse(
222            &mut ParserState {
223                codemap: &codemap,
224                dialect,
225                errors: &mut errors,
226            },
227            lexer.filter(|token| match token {
228                // Filter out comment tokens and accumulate lint suppressions
229                Ok((start, Token::Comment(comment), end)) => {
230                    lint_suppressions_builder.parse_comment(&codemap, comment, *start, *end);
231                    in_comment_block = true;
232                    false
233                }
234                _ => {
235                    if in_comment_block {
236                        lint_suppressions_builder.end_of_comment_block(&codemap);
237                        in_comment_block = false;
238                    }
239                    true
240                }
241            }),
242        ) {
243            Ok(v) => {
244                if let Some(err) = errors.into_iter().next() {
245                    return Err(err.into_error());
246                }
247                Ok(AstModule::create(
248                    codemap,
249                    v,
250                    dialect,
251                    typecheck,
252                    lint_suppressions_builder.build(),
253                )?)
254            }
255            Err(p) => Err(parse_error_add_span(p, codemap.source().len(), &codemap)),
256        }
257    }
258
259    /// Return the file names of all the `load` statements in the module.
260    /// If the [`Dialect`] had [`enable_load`](Dialect::enable_load) set to [`false`] this will be an empty list.
261    pub fn loads(&self) -> Vec<AstLoad<'_>> {
262        // We know that `load` statements must be at the top-level, so no need to descend inside `if`, `for`, `def` etc.
263        // There is a suggestion that `load` statements should be at the top of a file, but we tolerate that not being true.
264        fn f<'a>(ast: &'a AstStmt, codemap: &CodeMap, vec: &mut Vec<AstLoad<'a>>) {
265            match &ast.node {
266                Stmt::Load(load) => vec.push(AstLoad {
267                    span: FileSpan {
268                        file: codemap.dupe(),
269                        span: load.module.span,
270                    },
271                    module_id: &load.module.node,
272                    symbols: load
273                        .args
274                        .iter()
275                        .map(|LoadArgP { local, their, .. }| {
276                            (local.node.ident.as_str(), their.node.as_str())
277                        })
278                        .collect(),
279                }),
280                Stmt::Statements(stmts) => {
281                    for s in stmts {
282                        f(s, codemap, vec);
283                    }
284                }
285                _ => {}
286            }
287        }
288
289        let mut loads = Vec::new();
290        f(&self.statement, &self.codemap, &mut loads);
291        loads
292    }
293
294    /// Look up a [`Span`] contained in this module to a [`FileSpan`].
295    pub fn file_span(&self, x: Span) -> FileSpan {
296        self.codemap.file_span(x)
297    }
298
299    /// Get back the AST statement for the module
300    pub fn statement(&self) -> &AstStmt {
301        &self.statement
302    }
303
304    /// Locations where statements occur.
305    pub fn stmt_locations(&self) -> Vec<FileSpan> {
306        fn go(x: &AstStmt, codemap: &CodeMap, res: &mut Vec<FileSpan>) {
307            match &**x {
308                Stmt::Statements(_) => {} // These are not interesting statements that come up
309                _ => res.push(FileSpan {
310                    span: x.span,
311                    file: codemap.dupe(),
312                }),
313            }
314            x.visit_stmt(|x| go(x, codemap, res))
315        }
316
317        let mut res = Vec::new();
318        go(&self.statement, &self.codemap, &mut res);
319        res
320    }
321
322    /// Function to help people who want to write deeper AST transformations in Starlark.
323    /// Likely to break type checking and LSP support to some extent.
324    ///
325    /// Replacement must be a map from operator name (e.g. `+` or `==`) to a function name
326    /// (e.g. `my_plus` or `my_equals`).
327    pub fn replace_binary_operators(&mut self, replace: &HashMap<String, String>) {
328        fn f(x: &mut AstExpr, replace: &HashMap<String, String>) {
329            let mut temp = ExprP::Tuple(vec![]);
330            mem::swap(&mut x.node, &mut temp);
331            let mut res = match temp {
332                ExprP::Op(lhs, op, rhs) => match replace.get(op.to_string().trim()) {
333                    Some(func) => ExprP::Call(
334                        Box::new(Spanned {
335                            span: x.span,
336                            node: ExprP::Identifier(Spanned {
337                                span: x.span,
338                                node: IdentP {
339                                    ident: func.clone(),
340                                    payload: (),
341                                },
342                            }),
343                        }),
344                        CallArgsP {
345                            args: vec![
346                                Spanned {
347                                    span: lhs.span,
348                                    node: ArgumentP::Positional(*lhs),
349                                },
350                                Spanned {
351                                    span: rhs.span,
352                                    node: ArgumentP::Positional(*rhs),
353                                },
354                            ],
355                        },
356                    ),
357                    None => ExprP::Op(lhs, op, rhs),
358                },
359                _ => temp,
360            };
361            mem::swap(&mut x.node, &mut res);
362            x.visit_expr_mut(|x| f(x, replace));
363        }
364
365        self.statement.visit_expr_mut(|x| f(x, replace));
366    }
367
368    /// Check if a given Lint short_name and span is suppressed in this module
369    pub fn is_suppressed(&self, issue_short_name: &str, issue_span: Span) -> bool {
370        self.lint_suppressions
371            .is_suppressed(issue_short_name, issue_span)
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use crate::slice_vec_ext::SliceExt;
378    use crate::syntax::grammar_tests;
379
380    #[test]
381    fn test_locations() {
382        fn get(code: &str) -> String {
383            grammar_tests::parse_ast(code)
384                .stmt_locations()
385                .map(|x| x.resolve_span().to_string())
386                .join(" ")
387        }
388
389        assert_eq!(&get("foo"), "1:1-4");
390        assert_eq!(&get("foo\ndef x():\n   pass"), "1:1-4 2:1-3:8 3:4-8");
391    }
392}