oftlisp/reader/
mod.rs

1//! The OftLisp reader.
2//!
3//! Note that this is typically known as a parser, but Lisp tradition is to
4//! refer to the parser for the language as "the reader", especially when it
5//! does not perform advanced processing (e.g. macro expansion) automatically.
6//!
7//! The grammar of OftLisp is:
8//!
9//! ```ebnf
10//! value = { comment }, byteString
11//!       | { comment }, string
12//!       | { comment }, symbolish
13//!       | { comment }, "(", { value }, ")"
14//!       | { comment }, "(", value, { value }, "\u{2022}", value ")"
15//!       | { comment }, "[", { value }, "]"
16//!       | { comment }, readerMacro, { comment }, value;
17//! comment = ";", { ? all characters ? - "\n" }, "\n";
18//! byteString = "b", '"', { stringChar }, '"'
19//! string = '"', { stringChar }, '"'
20//! symbolish = symbolChar, { symbolChar }
21//! stringChar = ? all characters ? - ( "\\" | '"' )
22//!            | "\\", escape;
23//! escape = "a" | "b" | "e" | "n" | "r" | "t" | '"'
24//!        | "x", hex, hex
25//!        | "u", hex, hex, hex, hex
26//!        | "U", hex, hex, hex, hex, hex, hex, hex, hex;
27//! hex = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
28//!     | "a" | "b" | "c" | "d" | "e" | "f"
29//!     | "A" | "B" | "C" | "D" | "E" | "F";
30//! symbolChar = ? an ASCII letter ?
31//!            | ? an ASCII digit ?
32//!            | "+" | "-" | "." | "/" | "$" | "?" | "*" | "#" | "=" | "<"
33//!            | ">" | "_";
34//! readerMacro = "'" | "`" | ",@" | ",";
35//! ```
36
37pub mod lexer;
38pub mod symbolish;
39
40use std::error::Error;
41use std::fmt::{Display, Formatter, Result as FmtResult};
42use std::fs::File;
43use std::io::{Error as IoError, Read};
44use std::iter::Peekable;
45use std::path::PathBuf;
46
47use either::{Either, Left, Right};
48use gc::Gc;
49
50use context::Context;
51use reader::lexer::{Lexeme, Lexer, Token};
52use reader::symbolish::read_symbolish;
53use value::Value;
54
55/// An error that occurs while reading.
56#[derive(Clone, Debug, Eq, Finalize, Hash, PartialEq, Trace)]
57pub struct ReadError {
58    kind: ReadErrorKind,
59    location: SourceLocation,
60}
61
62impl ReadError {
63    /// Returns the [`ReadErrorKind`](enum.ReadErrorKind.html) associated with
64    /// the error.
65    pub fn kind(&self) -> &ReadErrorKind {
66        &self.kind
67    }
68
69    /// Returns the [`SourceLocation`](struct.SourceLocation.html) associated with
70    /// the error.
71    pub fn location(&self) -> &SourceLocation {
72        &self.location
73    }
74}
75
76impl Display for ReadError {
77    fn fmt(&self, _fmt: &mut Formatter) -> FmtResult {
78        unimplemented!("display ReadError")
79    }
80}
81
82impl Error for ReadError {
83    fn description(&self) -> &str {
84        self.kind.description()
85    }
86}
87
88/// The kind of the error.
89///
90/// TODO: There should probably be a separate lexer AcceptSet.
91#[derive(Clone, Debug, Eq, Finalize, Hash, PartialEq, Trace)]
92pub enum ReadErrorKind {
93    /// A character was encountered that was not expected. The first value is
94    /// the character, the second is a representation of the expected value.
95    Char(String, AcceptSet),
96
97    /// An unexpected EOF was encountered.
98    EOF(AcceptSet),
99
100    /// A token was encountered that was not expected. The first value is
101    /// the token, the second is a representation of the expected value.
102    Token(String, AcceptSet),
103}
104
105impl ReadErrorKind {
106    fn description(&self) -> &'static str {
107        match *self {
108            ReadErrorKind::Char(..) => "unexpected character",
109            ReadErrorKind::EOF(..) => "unexpected EOF",
110            ReadErrorKind::Token(..) => "unexpected token",
111        }
112    }
113}
114
115impl Display for ReadErrorKind {
116    fn fmt(&self, _fmt: &mut Formatter) -> FmtResult {
117        unimplemented!("display ReadErrorKind")
118    }
119}
120
121/// A set of acceptable characters.
122#[derive(Clone, Debug, Eq, Finalize, Hash, PartialEq, Trace)]
123pub enum AcceptSet {
124    /// A closing bracket.
125    CloseBracket,
126
127    /// A closing parenthesis.
128    CloseParenthesis,
129
130    /// A dot for a dotted list.
131    Dot,
132
133    /// A hexadecimal digit.
134    HexChar,
135
136    /// A character in the body of a string.
137    StringChar,
138
139    /// A string escape character.
140    StringEscape,
141
142    /// A value.
143    Value,
144
145    /// The end of the input stream.
146    EOF,
147}
148
149impl Display for AcceptSet {
150    fn fmt(&self, _fmt: &mut Formatter) -> FmtResult {
151        unimplemented!("display AcceptSet")
152    }
153}
154
155/// The location of an error (or of anything else).
156#[derive(Clone, Debug, Eq, Finalize, Hash, PartialEq, Trace)]
157pub struct SourceLocation {
158    /// The start byte of the location.
159    pub start: usize,
160
161    /// The ending byte of the location.
162    pub end: usize,
163
164    /// The path which the location is associated with.
165    pub path: Option<Gc<PathBuf>>,
166}
167
168impl Display for SourceLocation {
169    fn fmt(&self, _fmt: &mut Formatter) -> FmtResult {
170        /*
171        fn write_loc(fmt: &mut Formatter, pos: Option<(usize, usize)>, idx: usize) -> FmtResult {
172            if let Some((r, c)) = pos {
173                write!(fmt, "{}:{}", r, c)
174            } else {
175                write!(fmt, "<invalid {}>", idx)
176            }
177        }
178
179        fmt.write_char('[')?;
180        if let Some(ref path) = self.path {
181            path.display().fmt(fmt)?;
182            fmt.write_str(": ")?;
183        }
184        write_loc(fmt, self.idx_to_pos(self.start), self.start)?;
185
186
187        if self.end != self.start {
188            fmt.write_str(" to ")?;
189            write_loc(fmt, self.idx_to_pos(self.end), self.end)?;
190        }
191
192        fmt.write_char(']')
193        */
194        unimplemented!("display SourceLocation")
195    }
196}
197
198fn build_reader_macro<C: 'static + Context>(name: &str, value: Gc<Value<C>>) -> Gc<Value<C>> {
199    Gc::new(Value::Cons(
200        Gc::new(Value::Symbol(name.into(), Default::default())),
201        Gc::new(Value::Cons(
202            value,
203            Gc::new(Value::Nil(Default::default())),
204            Default::default(),
205        )),
206        Default::default(),
207    ))
208}
209
210/// Reads zero or more [`Value`](../enum.Value.html)s from a file.
211pub fn read_file<C: 'static + Context>(
212    path: Gc<PathBuf>,
213) -> Result<Vec<Gc<Value<C>>>, Either<IoError, ReadError>> {
214    debug!("Reading file {}", path.display());
215    let mut buf = String::new();
216    File::open(&*path)
217        .and_then(|mut f| f.read_to_string(&mut buf))
218        .map_err(Left)?;
219    read_many(buf, Some(path)).map_err(Right)
220}
221
222fn must_read<'a>(lexer: &mut Peekable<Lexer<'a>>, lexeme: Lexeme<'a>) -> Result<(), ReadError> {
223    let next = lexer.next();
224    match next {
225        Some(Ok(token)) => {
226            if token.lexeme == lexeme {
227                Ok(())
228            } else {
229                unimplemented!("failed to must_read: token")
230            }
231        }
232        Some(Err(err)) => Err(err),
233        None => unimplemented!("failed to must_read: eof"),
234    }
235}
236
237fn unexpected_tok_val<'a>(token: Token<'a>, path: Option<Gc<PathBuf>>) -> ReadError {
238    ReadError {
239        kind: ReadErrorKind::Token(token.lexeme.to_string(), AcceptSet::Value),
240        location: SourceLocation {
241            start: token.start,
242            end: token.end,
243            path,
244        },
245    }
246}
247
248/// Reads a single [`Value`](../enum.Value.html) from the lexer.
249pub fn read_one<'a, C: 'static + Context>(
250    s: &'a str,
251    lexer: &mut Peekable<Lexer<'a>>,
252    path: Option<Gc<PathBuf>>,
253) -> Result<Gc<Value<C>>, ReadError> {
254    let mut comments = Vec::new();
255    loop {
256        let was_value = match lexer.peek() {
257            Some(&Ok(ref tok)) => {
258                if let Lexeme::Comment(l, ref n) = tok.lexeme {
259                    comments.push((l, n.to_string()));
260                    false
261                } else {
262                    true
263                }
264            }
265            Some(&Err(_)) => false,
266            None => true,
267        };
268        if was_value {
269            break;
270        } else if let Some(Err(err)) = lexer.next() {
271            return Err(err);
272        }
273    }
274
275    match lexer.next() {
276        Some(Ok(tok)) => {
277            match tok.lexeme {
278                Lexeme::BracketClose => Err(unexpected_tok_val(tok, path)),
279                Lexeme::BracketOpen => {
280                    let mut values = Vec::new();
281                    loop {
282                        let peek = lexer.peek().map(|r| r.clone());
283                        match peek {
284                            Some(Ok(tok)) => {
285                                match tok.lexeme {
286                                    Lexeme::BracketClose => break,
287                                    _ => {}
288                                }
289                            }
290                            _ => {}
291                        }
292                        values.push(read_one(s, lexer, path.clone())?);
293                    }
294                    must_read(lexer, Lexeme::BracketClose)?;
295                    Ok(Value::vector(values))
296                }
297                Lexeme::Comment(..) => unreachable!(),
298                Lexeme::Dot => Err(unexpected_tok_val(tok, path)),
299                Lexeme::ParenClose => Err(unexpected_tok_val(tok, path)),
300                Lexeme::ParenOpen => {
301                    let mut values = Vec::new();
302                    let last = loop {
303                        let peek = lexer.peek().map(|r| r.clone());
304                        match peek {
305                            Some(Ok(tok)) => {
306                                match tok.lexeme {
307                                    Lexeme::Dot => {
308                                        must_read(lexer, Lexeme::Dot)?;
309                                        let value = read_one(s, lexer, path)?;
310                                        break value;
311                                    }
312                                    Lexeme::ParenClose => {
313                                        break Gc::new(Value::Nil(Default::default()));
314                                    }
315                                    _ => {}
316                                }
317                            }
318                            _ => {}
319                        }
320                        values.push(read_one(s, lexer, path.clone())?);
321                    };
322                    must_read(lexer, Lexeme::ParenClose)?;
323                    Ok(Value::improper_list(values, last, Default::default()))
324                }
325                Lexeme::Quasiquote => {
326                    read_one(s, lexer, path.clone()).map(|v| build_reader_macro("quasiquote", v))
327                }
328                Lexeme::Quote => {
329                    read_one(s, lexer, path.clone()).map(|v| build_reader_macro("quote", v))
330                }
331                Lexeme::String(s) => Ok(Gc::new(
332                    Value::String(Gc::new(s.to_string()), Default::default()),
333                )),
334                Lexeme::Symbolish(s) => Ok(Gc::new(read_symbolish(s))),
335                Lexeme::Unquote(b) => {
336                    let name = if b { "unquote-splicing" } else { "unquote" };
337                    read_one(s, lexer, path.clone()).map(|v| build_reader_macro(name, v))
338                }
339            }
340        }
341        Some(Err(err)) => Err(err),
342        None => Err(ReadError {
343            kind: ReadErrorKind::EOF(AcceptSet::Value),
344            location: SourceLocation {
345                start: s.len(),
346                end: s.len(),
347                path,
348            },
349        }),
350    }
351}
352
353/// Reads zero or more [`Value`](../enum.Value.html)s.
354pub fn read_many<B, C>(buf: B, path: Option<Gc<PathBuf>>) -> Result<Vec<Gc<Value<C>>>, ReadError>
355where
356    B: AsRef<str>,
357    C: 'static + Context,
358{
359    let buf = buf.as_ref();
360    let mut lexer = Lexer::new_path(buf, path.clone()).peekable();
361    let mut vals = Vec::new();
362    while lexer.peek().is_some() {
363        vals.push(read_one(buf, &mut lexer, path.clone())?);
364    }
365    Ok(vals)
366}