Skip to main content

fuzzy_pickles/
lib.rs

1//! # Fuzzy Pickles
2//!
3//! This is a library for parsing Rust code, paying specific attention
4//! to locations of items in the source code — their [`Extent`]s*.
5//!
6//! # Examples
7//!
8//! ## Navigating the AST
9//!
10//! Parsing a Rust file returns an AST of the file. You can delve into
11//! individual parts of a given AST node; every field is public. Enums
12//! contain `is_X`, `as_X`, and `into_X` methods to quickly narrow
13//! down to specific variant. If you'd like to see the raw text of the
14//! node, you can index the original source code text with the AST
15//! node.
16//!
17//! ```
18//! extern crate fuzzy_pickles;
19//!
20//! use fuzzy_pickles::parse_rust_file;
21//!
22//! fn main() {
23//!     let example_source = r#"
24//!     fn main() { let the_variable_name = 1 + 1; }
25//!     "#;
26//!
27//!     let file = parse_rust_file(example_source)
28//!         .expect("Unable to parse source");
29//!
30//!     let main_fn = file.items[0].value.as_function()
31//!         .expect("Not a function");
32//!
33//!     let first_expr = main_fn.body.statements[0].as_expression()
34//!         .expect("Not an expression");
35//!
36//!     let let_expr = first_expr.value.as_let()
37//!         .expect("Not a let expression");
38//!
39//!     let name_pat = let_expr.pattern.kind.as_ident()
40//!         .expect("Not an ident pattern");
41//!     assert_eq!("the_variable_name", &example_source[name_pat]);
42//!
43//!     let value = let_expr.value.as_ref()
44//!         .expect("No value for let");
45//!
46//!     let addition_expr = value.value.as_binary()
47//!         .expect("Not a binary expression");
48//!     assert_eq!("1 + 1", &example_source[addition_expr]);
49//! }
50//! ```
51//!
52//! ## Using a visitor
53//!
54//! Doing this amount of digging can be tedious and error prone,
55//! however. The crate also comes with visitor traits allowing you to
56//! quickly find relevant nodes.
57//!
58//! ```
59//! extern crate fuzzy_pickles;
60//!
61//! use fuzzy_pickles::{parse_rust_file, ast, visit::{Visit, Visitor}};
62//!
63//! #[derive(Debug, Default)]
64//! struct AddVisitor<'ast>(Vec<&'ast ast::Binary>);
65//!
66//! impl<'ast> Visitor<'ast> for AddVisitor<'ast> {
67//!     fn exit_binary(&mut self, binary: &'ast ast::Binary) {
68//!         self.0.push(binary)
69//!     }
70//! }
71//!
72//! fn main() {
73//!     let example_source = r#"
74//!     fn main() { let the_variable_name = 1 + 1; }
75//!     "#;
76//!
77//!     let file = parse_rust_file(example_source)
78//!         .expect("Unable to parse source");
79//!
80//!     let mut v = AddVisitor::default();
81//!     file.visit(&mut v);
82//!
83//!     let binary = v.0.pop().expect("Didn't find the binary operator");
84//!     assert!(v.0.is_empty(), "Found additional binary operators");
85//!     assert_eq!("1 + 1", &example_source[binary])
86//! }
87//! ```
88//!
89//! ## Reporting errors
90//!
91//! The parser attempts to have a reasonable level of detail when the
92//! input source code is malformed.
93//!
94//! ```should_panic
95//! extern crate fuzzy_pickles;
96//!
97//! use fuzzy_pickles::parse_rust_file;
98//!
99//! fn main() {
100//!     // Oops, we forgot to close our parenthesis!
101//!     let example_source = r#"
102//!     fn main( { let the_variable_name = 1 + 1; }
103//!     "#;
104//!
105//!     let error = parse_rust_file(example_source)
106//!         .unwrap_err();
107//!
108//!     let pretty_error = error.with_text(example_source);
109//!     panic!("{}", pretty_error);
110//! }
111//! ```
112//!
113//! This produces an error that shows the offending location and what
114//! possible symbols were expected. We've truncated this output:
115//!
116//! ```text
117//! Unable to parse text (line 2, column 14)
118//!
119//!     fn main( { let the_variable_name = 1 + 1; }
120//!              ^
121//! Expected:
122//!   ExpectedAmpersand
123//!   ExpectedBox
124//!   ...
125//! ```
126//!
127
128#[macro_use]
129extern crate fuzzy_pickles_derive;
130
131#[macro_use]
132extern crate peresil;
133
134extern crate unicode_xid;
135
136#[cfg(test)]
137#[macro_use]
138mod test_utils;
139
140mod combinators;
141mod whitespace_apportioner;
142
143pub mod ast;
144pub mod tokenizer;
145pub mod visit;
146pub mod parser;
147
148use std::fmt;
149use crate::whitespace_apportioner::WhitespaceApportioner;
150
151/// A pair of `(start, end)` points corresponding to something
152/// interesting in the source text.
153#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
154pub struct Extent(pub usize, pub usize);
155
156impl PartialEq<(usize, usize)> for Extent {
157    fn eq(&self, other: &(usize, usize)) -> bool {
158        (self.0, self.1) == *other
159    }
160}
161
162impl PartialEq<Extent> for (usize, usize) {
163    fn eq(&self, other: &Extent) -> bool {
164        (other.0, other.1) == *self
165    }
166}
167
168impl From<(usize, usize)> for Extent {
169    fn from(other: (usize, usize)) -> Extent {
170        Extent(other.0, other.1)
171    }
172}
173
174impl From<Extent> for (usize, usize) {
175    fn from(other: Extent) -> (usize, usize) {
176        (other.0, other.1)
177    }
178}
179
180impl std::ops::Index<Extent> for str {
181    type Output = str;
182
183    fn index(&self, Extent(s, e): Extent) -> &Self::Output {
184        &self[s..e]
185    }
186}
187
188impl<'a> std::ops::Index<&'a Extent> for str {
189    type Output = str;
190
191    fn index(&self, &Extent(s, e): &'a Extent) -> &Self::Output {
192        &self[s..e]
193    }
194}
195
196/// A type that has an extent
197pub trait HasExtent {
198    fn extent(&self) -> Extent;
199}
200
201impl<T: HasExtent> HasExtent for Box<T>{
202    fn extent(&self) -> Extent { (**self).extent() }
203}
204
205impl<'a, T: HasExtent> HasExtent for &'a T {
206    fn extent(&self) -> Extent { (**self).extent() }
207}
208
209impl HasExtent for Extent {
210    fn extent(&self) -> Extent { *self }
211}
212
213/// Information about a tokenization or parsing error
214#[derive(Debug, PartialEq)]
215pub enum ErrorDetail {
216    Tokenizer(tokenizer::ErrorDetail),
217    Parser(parser::ErrorDetail),
218}
219
220impl ErrorDetail {
221    /// Enhance the error with the source code
222    pub fn with_text<'a>(&'a self, text: &'a str) -> ErrorDetailText<'a> {
223        ErrorDetailText { detail: self, text }
224    }
225}
226
227impl From<tokenizer::ErrorDetail> for ErrorDetail {
228    fn from(other: tokenizer::ErrorDetail) -> Self {
229        ErrorDetail::Tokenizer(other)
230    }
231}
232
233impl From<parser::ErrorDetail> for ErrorDetail {
234    fn from(other: parser::ErrorDetail) -> Self {
235        ErrorDetail::Parser(other)
236    }
237}
238
239/// Information about a tokenization or parsing error including original source code
240#[derive(Debug)]
241pub struct ErrorDetailText<'a> {
242    detail: &'a ErrorDetail,
243    text: &'a str,
244}
245
246impl<'a> fmt::Display for ErrorDetailText<'a> {
247    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
248        match *self.detail {
249            ErrorDetail::Tokenizer(ref t) => t.with_text(self.text).fmt(f),
250            ErrorDetail::Parser(ref p) => p.with_text(self.text).fmt(f),
251        }
252    }
253}
254
255struct HumanTextError<'a> {
256    head_of_line: &'a str,
257    tail_of_line: &'a str,
258    line: usize,
259    column: usize,
260}
261
262impl<'a> HumanTextError<'a> {
263    fn new(text: &'a str, location: usize) -> HumanTextError<'a> {
264        let (head, tail) = text.split_at(location);
265        let start_of_line = head.rfind('\n').unwrap_or(0);
266        let end_of_line = tail.find('\n').unwrap_or_else(|| tail.len());
267
268        let head_of_line = &head[start_of_line..];
269        let tail_of_line = &tail[..end_of_line];
270
271        let line = head.matches('\n').count() + 1; // Normally the first line is #1, so add one
272        let column = head_of_line.len();
273
274        HumanTextError { head_of_line, tail_of_line, line, column }
275    }
276}
277
278fn extract_whitespace(file: &str) -> Result<(WhitespaceApportioner, Vec<tokenizer::Token>), tokenizer::ErrorDetail> {
279    use crate::tokenizer::{Token, Tokens};
280
281    let mut ws = WhitespaceApportioner::default();
282    let mut tokens = Vec::new();
283
284    for token in Tokens::new(file) {
285        let token = token?;
286
287        match token {
288            Token::Whitespace(w) => {
289                ws.push(ast::Whitespace::Whitespace(w))
290            }
291            Token::CommentLine(c) => {
292                let c = ast::Comment::Line(c);
293                ws.push(ast::Whitespace::Comment(c));
294            }
295            Token::CommentBlock(c) => {
296                let c = ast::Comment::Block(c);
297                ws.push(ast::Whitespace::Comment(c));
298            }
299            o => tokens.push(o),
300        }
301    }
302
303    Ok((ws, tokens))
304}
305
306/// The entrypoint to parsing Rust code.
307pub fn parse_rust_file(file: &str) -> Result<ast::File, ErrorDetail> {
308    use crate::{
309        parser::{attributed, item, Point, Master, State},
310        tokenizer::Token,
311        visit::Visit,
312    };
313
314    let (mut ws, tokens) = extract_whitespace(file)?;
315
316    let mut pt = Point::new(&tokens);
317    let mut pm = Master::with_state(State::new());
318    let mut items = Vec::new();
319
320    loop {
321        if pt.s.first().map(Token::is_end_of_file).unwrap_or(true) { break }
322
323        let item = attributed(item)(&mut pm, pt);
324        let item = pm.finish(item);
325
326        let next_pt = match item.status {
327            peresil::Status::Success(s) => {
328                items.push(s);
329                item.point
330            },
331            peresil::Status::Failure(e) => {
332                return Err(ErrorDetail::Parser(parser::ErrorDetail {
333                    location: tokens[item.point.offset].extent().0,
334                    errors: e.into_iter().collect(),
335                }))
336            },
337        };
338
339        assert!(next_pt.offset > pt.offset, "Unable to make progress");
340        pt = next_pt;
341    }
342
343    let mut file = ast::File { items, whitespace: Vec::new() };
344
345    file.visit_mut(&mut ws);
346    assert!(ws.is_empty(), "Did not assign all whitespace");
347
348    Ok(file)
349}
350
351#[cfg(test)]
352mod test {
353    use super::*;
354
355    #[test]
356    fn can_parse_an_empty_rust_file() {
357        let r = parse_rust_file("");
358        assert!(r.is_ok());
359    }
360
361    #[test]
362    fn error_on_last_token_does_not_panic() {
363        let r = parse_rust_file("an_ident");
364        assert!(r.is_err());
365    }
366
367    #[test]
368    fn error_on_unclosed_macro_args_does_not_panic() {
369        let r = parse_rust_file("c!(");
370        assert!(r.is_err());
371    }
372}