ressa_r/
lib.rs

1//! RESSA (Rusty ECMAScript Syntax Analyzer)
2//! A library for parsing js files
3//!
4//! The main interface for this library would be
5//! the `Parser` iterator. A parser is constructed
6//! either via the `::new()` function or a `Builder`.
7//! As part of the constructor, you have to provide
8//! the js you want to parse as an `&str`.
9//!
10//! Once constructed the parser will return a
11//! `ProgramPart` for each iteration.
12//!
13//! A very simple example might look like this
14//! ```
15//! use ressa_r::Parser;
16//! use resast::prelude::*;
17//! fn main() {
18//!     let js = "function helloWorld() { alert('Hello world'); }";
19//!     let p = Parser::new(&js).unwrap();
20//!     let f = ProgramPart::decl(
21//!         Decl::Func(
22//!             Func {
23//!                 id: Some(Ident::from("helloWorld")),
24//!                 params: Vec::new(),
25//!                 body: FuncBody(
26//!                     vec![
27//!                         ProgramPart::Stmt(
28//!                             Stmt::Expr(
29//!                                 Expr::Call(
30//!                                     CallExpr {
31//!                                         callee: Box::new(
32//!                                             Expr::ident_from("alert")
33//!                                         ),
34//!                                         arguments: vec![
35//!                                             Expr::Lit(
36//!                                                 Lit::single_string_from("Hello world")
37//!                                             )
38//!                                         ],
39//!                                     }
40//!                                 )
41//!                             )
42//!                         )
43//!                     ]
44//!                 ),
45//!                 generator: false,
46//!                 is_async: false,
47//!             }
48//!         )
49//!     );
50//!     for part in p {
51//!         // assert_eq!(part.unwrap(), f);
52//!     }
53//! }
54//!```
55//! checkout the `examples` folders for slightly larger
56//! examples.
57//!
58
59use ress::prelude::*;
60pub use ress::Span;
61
62mod comment_handler;
63mod error;
64mod formal_params;
65mod lexical_names;
66mod lhs;
67mod regex;
68pub mod spanned;
69
70pub use crate::comment_handler::CommentHandler;
71pub use crate::comment_handler::DefaultCommentHandler;
72pub use crate::error::Error;
73
74use resast::prelude::*;
75
76use std::collections::HashMap;
77
78/// The current configuration options.
79/// This will most likely increase over time
80struct Config {
81    /// whether or not to tolerate a subset of errors
82    tolerant: bool,
83}
84#[derive(Debug, PartialEq, Eq, Clone, Copy)]
85enum LabelKind {
86    Iteration,
87    Other,
88    Unknown,
89}
90
91/// The current parsing context.
92/// This structure holds the relevant
93/// information to know when some
94/// text might behave differently
95/// depending on what has come before it
96struct Context<'a> {
97    /// If the current JS should be treated
98    /// as a JS module
99    is_module: bool,
100    /// If `in` is allowed as an identifier
101    allow_in: bool,
102    /// If a strict directive is allowed
103    allow_strict_directive: bool,
104    /// If `yield` is allowed as an identifier
105    allow_yield: bool,
106    /// If await is allowed as an identifier
107    allow_await: bool,
108    /// if super is allowed as a keyword
109    allow_super: bool,
110    /// if super is allowed to be part of a call expression
111    /// allow_super should always be true when this is true
112    /// but not the other way around. This is only valid in a
113    /// constructor
114    allow_super_call: bool,
115    /// If we have found any possible naming errors
116    /// which are not yet resolved
117    first_covert_initialized_name_error: Option<Item<&'a str>>,
118    /// If the current expressions is an assignment target
119    is_assignment_target: bool,
120    /// If the current expression is a binding element
121    is_binding_element: bool,
122    /// If we have entered a function body
123    in_function_body: bool,
124    /// If we have entered a loop block
125    in_iteration: bool,
126    /// If we have entered a switch block
127    in_switch: bool,
128    /// The currently known labels, this applies
129    /// to labels only, not all identifiers. Errors
130    /// at that level would need to be handled by
131    /// the calling scope
132    label_set: HashMap<&'a str, LabelKind>,
133    /// If the current scope has a `'use strict';` directive
134    /// in the prelude
135    strict: bool,
136    lexical_names: lexical_names::DuplicateNameDetector<'a>,
137    /// If the scanner has a pending line terminator
138    /// before the next token
139    has_line_term: bool,
140    /// If we have passed the initial prelude where a valid
141    /// `'use strict'` directive would exist
142    past_prolog: bool,
143    /// If we encounter an error, the iterator should stop
144    errored: bool,
145    /// If we find a directive with an octal escape
146    /// we need to error if a 'use strict' directive
147    /// is then found
148    found_directive_octal_escape: bool,
149}
150
151impl Default for Config {
152    fn default() -> Self {
153        log::trace!("default config");
154        Self { tolerant: false }
155    }
156}
157
158impl<'a> Default for Context<'a> {
159    fn default() -> Self {
160        log::trace!("default context",);
161        Self {
162            is_module: false,
163            allow_await: true,
164            allow_in: true,
165            allow_strict_directive: true,
166            allow_yield: true,
167            allow_super: false,
168            allow_super_call: false,
169            first_covert_initialized_name_error: None,
170            is_assignment_target: false,
171            is_binding_element: false,
172            in_function_body: false,
173            in_iteration: false,
174            in_switch: false,
175            label_set: HashMap::new(),
176            strict: false,
177            lexical_names: lexical_names::DuplicateNameDetector::default(),
178            has_line_term: false,
179            past_prolog: false,
180            errored: false,
181            found_directive_octal_escape: false,
182        }
183    }
184}
185impl<'a> Context<'a> {
186    #[tracing::instrument(level = "trace", skip(self))]
187    pub fn set_allow_super(&mut self, value: bool) {
188        self.allow_super = value;
189    }
190    #[tracing::instrument(level = "trace", skip(self))]
191    pub fn set_is_assignment_target(&mut self, value: bool) -> bool {
192        let old = self.is_assignment_target;
193        self.is_assignment_target = value;
194        old
195    }
196    #[tracing::instrument(level = "trace", skip(self))]
197    pub fn set_is_binding_element(&mut self, value: bool) -> bool {
198        let old = self.is_binding_element;
199        self.is_binding_element = value;
200        old
201    }
202}
203/// This is used to create a `Parser` using
204/// the builder method
205#[derive(Default)]
206pub struct Builder<'b> {
207    inner: crate::spanned::Builder<'b>,
208}
209
210impl<'b> Builder<'b> {
211    pub fn new() -> Self {
212        Self::default()
213    }
214    /// Enable or disable error tolerance
215    /// default: `false`
216    pub fn set_tolerant(&mut self, value: bool) {
217        self.inner.set_tolerant(value);
218    }
219    /// Enable or disable error tolerance with a builder
220    /// pattern
221    /// default: `false`
222    pub fn tolerant(mut self, value: bool) -> Self {
223        self.set_tolerant(value);
224        self
225    }
226    /// Set the parsing context to module or script
227    /// default: `false` (script)
228    pub fn set_module(&mut self, value: bool) {
229        self.inner.set_module(value);
230    }
231    /// Set the parsing context to module or script
232    /// with a builder pattern
233    /// default: `false` (script)
234    pub fn module(mut self, value: bool) -> Self {
235        self.set_module(value);
236        self
237    }
238    /// Set the js text that this parser would operate
239    /// on
240    pub fn set_js(&mut self, js: &'b str) {
241        self.inner.set_js(js);
242    }
243    /// Set the js text that this parser would operate
244    /// on with a builder pattern
245    pub fn js(mut self, js: &'b str) -> Self {
246        self.set_js(js);
247        self
248    }
249    /// Complete the builder pattern returning
250    /// `Result<Parser, Error>`
251    pub fn build(self) -> Res<Parser<'b, DefaultCommentHandler>> {
252        let inner = self.inner.build()?;
253        Ok(Parser { inner })
254    }
255}
256
257impl<'b> Builder<'b> {
258    pub fn with_comment_handler<CH>(self, handler: CH) -> Res<Parser<'b, CH>>
259    where
260        CH: CommentHandler<'b>,
261    {
262        let inner = self.inner.with_comment_handler(handler)?;
263        Ok(Parser { inner })
264    }
265}
266
267/// This is the primary interface that you would interact with.
268/// There are two main ways to use it, the first is to utilize
269/// the `Iterator` implementation. Each iteration will return
270/// a `Result<ProgramPart, Error>`.
271/// The other option is to use the `parse` method, which is just
272/// a wrapper around the `collect` method on `Iterator`, however
273/// the final result will be a `Result<Program, Error>` and the
274/// `ProgramPart` collection will be the inner data. Since modern
275/// js allows for both `Module`s as well as `Script`s, these will be
276/// the two `enum` variants.
277pub struct Parser<'a, CH> {
278    inner: crate::spanned::Parser<'a, CH>,
279}
280/// The start/end index of a line
281#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
282pub struct Line {
283    start: usize,
284    end: usize,
285}
286/// The result type for the Parser operations
287type Res<T> = Result<T, Error>;
288
289impl<'a> Parser<'a, DefaultCommentHandler> {
290    /// Create a new parser with the provided
291    /// javascript
292    /// This will default to parsing in the
293    /// script context and discard comments.
294    /// If you wanted change this behavior
295    /// utilize the `Builder` pattern
296    pub fn new(text: &'a str) -> Res<Self> {
297        let inner = crate::spanned::Parser::new(text)?;
298        Ok(Self { inner })
299    }
300}
301
302impl<'a> Parser<'a, ()> {
303    pub fn builder() -> Builder<'a> {
304        Builder::new()
305    }
306}
307
308impl<'b, CH> Parser<'b, CH>
309where
310    CH: CommentHandler<'b> + Sized,
311{
312    /// Wrapper around the `Iterator` implementation for
313    /// Parser
314    /// ```
315    /// extern crate ressa_r;
316    /// use ressa_r::Parser;
317    /// use resast::prelude::*;
318    /// fn main() {
319    ///     let js = "function helloWorld() { alert('Hello world'); }";
320    ///     let mut p = Parser::new(&js).unwrap();
321    ///     let call = CallExpr {
322    ///         callee: Box::new(Expr::ident_from("alert")),
323    ///         arguments: vec![Expr::Lit(Lit::single_string_from("Hello world"))],
324    ///     };
325    ///     let expectation = Program::Script(vec![ProgramPart::Decl(Decl::Func(Func {
326    ///         id: Some(Ident::from("helloWorld")),
327    ///         params: Vec::new(),
328    ///         body: FuncBody(vec![ProgramPart::Stmt(Stmt::Expr(Expr::Call(call)))]),
329    ///         generator: false,
330    ///         is_async: false,
331    ///     }))]);
332    ///     let program = p.parse().unwrap();
333    ///     //assert_eq!(program, expectation);
334    /// }
335    /// ```
336    pub fn parse(&mut self) -> Res<Program> {
337        let ret = self.inner.parse()?;
338        Ok(ret.into())
339    }
340
341    pub fn next_position(&self) -> SourceLocation {
342        self.inner.next_position()
343    }
344
345    pub fn comment_handler(&self) -> &CH {
346        &self.inner.comment_handler
347    }
348    pub fn comment_handler_mut(&mut self) -> &mut CH {
349        &mut self.inner.comment_handler
350    }
351}
352
353impl<'b, CH> Iterator for Parser<'b, CH>
354where
355    CH: CommentHandler<'b> + Sized,
356{
357    type Item = Res<ProgramPart<'b>>;
358    fn next(&mut self) -> Option<Self::Item> {
359        let ret = self.inner.next()?;
360        Some(ret.map(Into::into))
361    }
362}