1use std::borrow::Cow::{self, Borrowed, Owned};
4use std::collections::HashMap;
5use std::fmt;
6use std::path::PathBuf;
7
8use crate::bytes::Bytes;
9use crate::error::Error;
10use crate::exec::Context;
11use crate::integer::{Integer, Ratio};
12use crate::lexer::{Lexer, Span, Token};
13use crate::name::{get_standard_name_for, standard_names, Name, NameDisplay, NameStore};
14use crate::restrict::RestrictError;
15use crate::string;
16use crate::value::Value;
17
18const MODULE_DOC_COMMENT: &str = ";;;";
19
20pub struct Parser<'a, 'lex> {
22 lexer: Lexer<'lex>,
23 ctx: &'a Context,
24 name_cache: HashMap<&'lex str, Name>,
25 cur_token: Option<(Span, Token<'lex>)>,
26}
27
28#[derive(Copy, Clone, Debug, Eq, PartialEq)]
30pub struct ParseError {
31 pub span: Span,
33 pub kind: ParseErrorKind,
35}
36
37impl ParseError {
38 pub fn new(span: Span, kind: ParseErrorKind) -> ParseError {
40 ParseError{ span, kind }
41 }
42}
43
44impl fmt::Display for ParseError {
45 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
46 fmt::Display::fmt(&self.kind, f)
47 }
48}
49
50impl NameDisplay for ParseError {
51 fn fmt(&self, _names: &NameStore, f: &mut fmt::Formatter) -> fmt::Result {
52 fmt::Display::fmt(self, f)
53 }
54}
55
56#[derive(Copy, Clone, Debug, Eq, PartialEq)]
58pub enum ParseErrorKind {
59 CannotDocumentItem,
61 DocCommentEof,
63 InvalidLiteral,
65 InvalidToken,
67 InvalidByte(char),
69 InvalidByteEscape(char),
71 InvalidChar(char),
73 InvalidNumericEscape(char),
75 LiteralParseError,
77 MissingCloseParen,
79 UnbalancedComma,
81 UnexpectedEof,
83 UnexpectedToken{
85 expected: &'static str,
87 found: &'static str,
89 },
90 UnknownCharEscape(char),
92 UnmatchedParen,
94 UnterminatedChar,
96 UnterminatedComment,
98 UnterminatedString,
100}
101
102impl fmt::Display for ParseErrorKind {
103 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
104 match *self {
105 ParseErrorKind::CannotDocumentItem =>
106 f.write_str("doc comment precedes item that cannot be documented"),
107 ParseErrorKind::DocCommentEof => f.write_str("doc comment at end-of-file"),
108 ParseErrorKind::InvalidLiteral => f.write_str("invalid numeric literal"),
109 ParseErrorKind::InvalidToken => f.write_str("invalid token"),
110 ParseErrorKind::InvalidByte(ch) =>
111 write!(f, "byte literal must be ASCII: {:?}", ch),
112 ParseErrorKind::InvalidByteEscape(ch) =>
113 write!(f, "invalid escape sequence in byte literal: {:?}", ch),
114 ParseErrorKind::InvalidChar(ch) =>
115 write!(f, "invalid character: {:?}", ch),
116 ParseErrorKind::InvalidNumericEscape(ch) =>
117 write!(f, "invalid character in escape sequence: {:?}", ch),
118 ParseErrorKind::LiteralParseError => f.write_str("literal parse error"),
119 ParseErrorKind::MissingCloseParen => f.write_str("missing close paren"),
120 ParseErrorKind::UnbalancedComma => f.write_str("unbalanced ` and ,"),
121 ParseErrorKind::UnexpectedEof => f.write_str("unexpected end-of-file"),
122 ParseErrorKind::UnexpectedToken{expected, found} =>
123 write!(f, "expected {}; found {}", expected, found),
124 ParseErrorKind::UnknownCharEscape(ch) =>
125 write!(f, "unknown char escape: {:?}", ch),
126 ParseErrorKind::UnmatchedParen => f.write_str("unmatched `)`"),
127 ParseErrorKind::UnterminatedChar => f.write_str("unterminated char constant"),
128 ParseErrorKind::UnterminatedComment => f.write_str("unterminated block comment"),
129 ParseErrorKind::UnterminatedString => f.write_str("unterminated string constant"),
130 }
131 }
132}
133
134enum Group<'lex> {
135 Backticks(i32),
138 CommaAt,
139 Quotes(u32),
142 Parens(Vec<Value>, Option<(Span, &'lex str)>),
144}
145
146impl<'a, 'lex> Parser<'a, 'lex> {
147 pub fn new(ctx: &'a Context, lexer: Lexer<'lex>) -> Parser<'a, 'lex> {
150 Parser{
151 lexer,
152 ctx,
153 name_cache: HashMap::new(),
154 cur_token: None,
155 }
156 }
157
158 pub fn skip_shebang(&mut self) {
160 self.lexer.skip_shebang();
161 }
162
163 pub fn parse_expr(&mut self) -> Result<Value, Error> {
165 let mut stack = Vec::new();
166 let mut total_backticks = 0;
167
168 loop {
169 if stack.len() >= self.ctx.restrict().max_syntax_nesting {
170 return Err(From::from(RestrictError::MaxSyntaxNestingExceeded));
171 }
172
173 let mut doc = self.read_doc_comment()?;
174
175 let (sp, tok) = self.next()?;
176
177 let r = match tok {
178 Token::DocComment(_) =>
179 return Err(From::from(ParseError::new(
180 doc.unwrap().0, ParseErrorKind::CannotDocumentItem))),
181 Token::LeftParen => {
182 if let Some((doc_sp, _)) = doc {
183 match self.peek()? {
184 (_, Token::Name("const")) |
185 (_, Token::Name("define")) |
186 (_, Token::Name("lambda")) |
187 (_, Token::Name("macro")) |
188 (_, Token::Name("struct"))
189 => (),
190 _ => return Err(From::from(ParseError::new(
191 doc_sp, ParseErrorKind::CannotDocumentItem)))
192 }
193 }
194
195 stack.push(Group::Parens(Vec::new(), doc.take()));
196 continue;
197 }
198 Token::RightParen => {
199 let group = stack.pop().ok_or_else(
200 || ParseError::new(sp, ParseErrorKind::UnmatchedParen))?;
201
202 match group {
203 Group::Parens(values, doc) =>
204 insert_doc_comment(values, doc)
205 .map_err(From::from),
206 _ => Err(From::from(ParseError::new(sp,
207 ParseErrorKind::UnexpectedToken{
208 expected: "expression",
209 found: ")",
210 })))
211 }
212 }
213 Token::Float(f) => parse_float(f)
214 .map(Value::Float)
215 .map_err(|kind| From::from(ParseError::new(sp, kind))),
216 Token::Integer(i, base) => parse_integer(self.ctx, i, base, sp)
217 .map(Value::Integer),
218 Token::Ratio(r) => parse_ratio(self.ctx, r, sp)
219 .map(Value::Ratio),
220 Token::Char(ch) => parse_char(ch)
221 .map(Value::Char).map_err(From::from),
222 Token::String(s) => parse_string(s)
223 .map(|s| s.into()).map_err(From::from),
224 Token::Byte(b) => parse_byte(b)
225 .map(|v| v.into()).map_err(From::from),
226 Token::Bytes(b) => parse_bytes(b)
227 .map(Value::Bytes).map_err(From::from),
228 Token::Path(p) => parse_path(p)
229 .map(|p| p.into()).map_err(From::from),
230 Token::Name(name) => Ok(self.name_value(name)),
231 Token::Keyword(name) => Ok(Value::Keyword(self.add_name(name))),
232 Token::BackQuote => {
233 total_backticks += 1;
234 if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
235 *n += 1;
236 continue;
237 }
238 stack.push(Group::Backticks(1));
239 continue;
240 }
241 Token::Comma => {
242 if total_backticks <= 0 {
243 return Err(From::from(ParseError::new(sp, ParseErrorKind::UnbalancedComma)));
244 }
245 total_backticks -= 1;
246 if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
247 *n -= 1;
248 continue;
249 }
250 stack.push(Group::Backticks(-1));
251 continue;
252 }
253 Token::CommaAt => {
254 if total_backticks <= 0 {
255 return Err(From::from(ParseError::new(sp, ParseErrorKind::UnbalancedComma)));
256 }
257 total_backticks -= 1;
258 stack.push(Group::CommaAt);
259 continue;
260 }
261 Token::Quote => {
262 if let Some(&mut Group::Quotes(ref mut n)) = stack.last_mut() {
263 *n += 1;
264 continue;
265 }
266 stack.push(Group::Quotes(1));
267 continue;
268 }
269 Token::End => {
270 if let Some((doc_sp, _)) = doc {
271 return Err(From::from(ParseError::new(doc_sp,
272 ParseErrorKind::DocCommentEof)));
273 }
274
275 let any_paren = stack.iter().any(|group| {
276 match *group {
277 Group::Parens(_, _) => true,
278 _ => false
279 }
280 });
281
282 if any_paren {
283 Err(From::from(ParseError::new(sp,
284 ParseErrorKind::MissingCloseParen)))
285 } else {
286 Err(From::from(ParseError::new(sp,
287 ParseErrorKind::UnexpectedEof)))
288 }
289 }
290 };
291
292 if let Some((doc_sp, _)) = doc {
293 return Err(From::from(ParseError::new(doc_sp,
294 ParseErrorKind::CannotDocumentItem)));
295 }
296
297 let mut v = r?;
298
299 loop {
300 match stack.last_mut() {
301 None => return Ok(v),
302 Some(&mut Group::Parens(ref mut values, _)) => {
303 values.push(v);
304 break;
305 }
306 _ => ()
307 }
308
309 let group = stack.pop().unwrap();
310
311 match group {
312 Group::Backticks(n) if n > 0 => {
315 total_backticks -= n;
316 v = v.quasiquote(n as u32);
317 }
318 Group::Backticks(n) if n < 0 => {
319 total_backticks -= n; v = v.comma((-n) as u32);
321 }
322 Group::CommaAt => {
323 total_backticks += 1;
324 v = v.comma_at(1);
325 }
326 Group::Quotes(n) => v = v.quote(n),
327 _ => ()
328 }
329 }
330 }
331 }
332
333 pub fn parse_single_expr(&mut self) -> Result<Value, Error> {
336 let expr = self.parse_expr()?;
337
338 match self.next()? {
339 (_, Token::End) => Ok(expr),
340 (sp, tok) => Err(From::from(ParseError::new(sp, ParseErrorKind::UnexpectedToken{
341 expected: "eof",
342 found: tok.name(),
343 })))
344 }
345 }
346
347 pub fn parse_exprs(&mut self) -> Result<Vec<Value>, Error> {
349 let mut res = Vec::new();
350
351 if let Some((_, doc)) = self.read_module_doc_comment()? {
352 res.push(vec![
353 Value::Name(standard_names::SET_MODULE_DOC),
354 format_doc_comment(doc).into(),
355 ].into());
356 }
357
358 loop {
359 match self.peek()? {
360 (_sp, Token::End) => break,
361 _ => res.push(self.parse_expr()?)
362 }
363 }
364
365 Ok(res)
366 }
367
368 pub fn lexer(&self) -> &Lexer<'lex> {
370 &self.lexer
371 }
372
373 fn read_doc_comment(&mut self)
376 -> Result<Option<(Span, &'lex str)>, ParseError> {
377 match self.peek()? {
378 (sp, Token::DocComment(doc)) => {
379 self.cur_token.take();
380 Ok(Some((sp, doc)))
381 }
382 _ => Ok(None)
383 }
384 }
385
386 fn read_module_doc_comment(&mut self)
389 -> Result<Option<(Span, &'lex str)>, ParseError> {
390 match self.peek()? {
391 (sp, Token::DocComment(doc)) if doc.starts_with(MODULE_DOC_COMMENT) => {
392 self.cur_token.take();
393 Ok(Some((sp, doc)))
394 }
395 _ => Ok(None)
396 }
397 }
398
399 fn add_name(&mut self, name: &'lex str) -> Name {
400 let mut names = self.ctx.scope().borrow_names_mut();
401 *self.name_cache.entry(name).or_insert_with(
402 || get_standard_name_for(name).unwrap_or_else(|| names.add(name)))
403 }
404
405 fn name_value(&mut self, name: &'lex str) -> Value {
406 match name {
407 "true" => Value::Bool(true),
408 "false" => Value::Bool(false),
409 _ => {
410 let name = self.add_name(name);
411 Value::Name(name)
412 }
413 }
414 }
415
416 fn next(&mut self) -> Result<(Span, Token<'lex>), ParseError> {
417 let r = self.peek()?;
418 self.cur_token = None;
419 Ok(r)
420 }
421
422 fn peek(&mut self) -> Result<(Span, Token<'lex>), ParseError> {
424 if let Some(tok) = self.cur_token {
425 Ok(tok)
426 } else {
427 let tok = self.lexer.next_token()?;
428 self.cur_token = Some(tok);
429 Ok(tok)
430 }
431 }
432}
433
434fn format_doc_comment(doc: &str) -> String {
435 let mut buf = String::new();
436
437 for line in doc.lines() {
438 buf.push_str(trim_first(
441 line.trim_start_matches(char::is_whitespace)
442 .trim_start_matches(';'), char::is_whitespace));
443 buf.push('\n');
444 }
445
446 buf
447}
448
449fn trim_first<F: FnOnce(char) -> bool>(s: &str, f: F) -> &str {
450 let mut chars = s.chars();
451
452 match chars.next() {
453 Some(ch) if f(ch) => chars.as_str(),
454 _ => s
455 }
456}
457
458fn insert_doc_comment(mut items: Vec<Value>, doc: Option<(Span, &str)>)
459 -> Result<Value, ParseError> {
460 if let Some((sp, doc)) = doc {
461 if items.len() == 3 {
462 items.insert(2, format_doc_comment(doc).into());
463 } else {
464 return Err(ParseError::new(sp, ParseErrorKind::CannotDocumentItem));
465 }
466 }
467
468 Ok(items.into())
469}
470
471fn parse_byte(s: &str) -> Result<u8, ParseError> {
472 let (b, _) = string::parse_byte(s, 0)?;
473 Ok(b)
474}
475
476fn parse_char(s: &str) -> Result<char, ParseError> {
477 let (ch, _) = string::parse_char(s, 0)?;
478 Ok(ch)
479}
480
481fn parse_bytes(s: &str) -> Result<Bytes, ParseError> {
482 let (b, _) = if s.starts_with("#br") {
483 string::parse_raw_byte_string(&s[2..], 0)?
484 } else {
485 string::parse_byte_string(&s[2..], 0)?
486 };
487 Ok(Bytes::new(b))
488}
489
490fn parse_path(s: &str) -> Result<PathBuf, ParseError> {
491 let (s, _) = if s.starts_with("#pr") {
492 string::parse_raw_string(&s[2..], 0)?
493 } else {
494 string::parse_string(&s[2..], 0)?
495 };
496 Ok(PathBuf::from(s))
497}
498
499fn parse_string(s: &str) -> Result<String, ParseError> {
500 let (s, _) = if s.starts_with('r') {
501 string::parse_raw_string(s, 0)?
502 } else {
503 string::parse_string(s, 0)?
504 };
505 Ok(s)
506}
507
508fn parse_float(s: &str) -> Result<f64, ParseErrorKind> {
509 strip_underscores(s).parse()
510 .map_err(|_| ParseErrorKind::LiteralParseError)
511}
512
513fn parse_integer(ctx: &Context, s: &str, base: u32, sp: Span)
514 -> Result<Integer, Error> {
515 let s = match base {
516 10 => s,
517 _ => &s[2..]
518 };
519
520 let s = strip_underscores(s);
521
522 check_integer(ctx, &s, base)?;
523
524 Integer::from_str_radix(&s, base)
525 .map_err(|_| From::from(ParseError::new(sp,
526 ParseErrorKind::LiteralParseError)))
527}
528
529fn parse_ratio(ctx: &Context, s: &str, sp: Span) -> Result<Ratio, Error> {
530 let s = strip_underscores(s);
531
532 check_integer(ctx, &s, 10)?;
533
534 s.parse().map_err(|_| From::from(ParseError::new(sp,
535 ParseErrorKind::LiteralParseError)))
536}
537
538fn check_integer(ctx: &Context, mut s: &str, base: u32) -> Result<(), RestrictError> {
539 let limit = ctx.restrict().max_integer_size;
540
541 if limit == usize::max_value() {
542 return Ok(());
543 }
544
545 if s.starts_with('-') {
546 s = &s[1..].trim_start_matches('0');
547 } else {
548 s = s.trim_start_matches('0');
549 }
550
551 let n_bits = (s.len() as f32 * (base as f32).log2()).ceil() as usize;
553
554 if n_bits > limit {
555 Err(RestrictError::IntegerLimitExceeded)
556 } else {
557 Ok(())
558 }
559}
560
561fn strip_underscores(s: &str) -> Cow<str> {
562 if s.contains('_') {
563 Owned(s.chars().filter(|&ch| ch != '_').collect())
564 } else {
565 Borrowed(s)
566 }
567}
568
569#[cfg(test)]
570mod test {
571 use super::{ParseError, ParseErrorKind, Parser};
572 use crate::error::Error;
573 use crate::interpreter::Interpreter;
574 use crate::lexer::{Span, Lexer};
575 use crate::value::Value;
576
577 fn parse(s: &str) -> Result<Value, ParseError> {
578 let interp = Interpreter::new();
579
580 let mut p = Parser::new(interp.context(), Lexer::new(s, 0));
581 p.parse_single_expr().map_err(|e| {
582 match e {
583 Error::ParseError(e) => e,
584 _ => panic!("parse returned error: {:?}", e)
585 }
586 })
587 }
588
589 #[test]
590 fn test_doc_errors() {
591 assert_eq!(parse(";; foo\n(const)").unwrap_err(), ParseError{
592 span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
593 assert_eq!(parse(";; foo\n(const foo)").unwrap_err(), ParseError{
594 span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
595 assert_eq!(parse(";; foo\n(const foo bar baz)").unwrap_err(), ParseError{
596 span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
597 }
598
599 #[test]
600 fn test_errors() {
601 assert_eq!(parse("(foo").unwrap_err(), ParseError{
602 span: Span{lo: 4, hi: 4}, kind: ParseErrorKind::MissingCloseParen});
603 assert_eq!(parse("(foo ,bar)").unwrap_err(), ParseError{
604 span: Span{lo: 5, hi: 6}, kind: ParseErrorKind::UnbalancedComma});
605 assert_eq!(parse("`(foo ,,bar)").unwrap_err(), ParseError{
606 span: Span{lo: 7, hi: 8}, kind: ParseErrorKind::UnbalancedComma});
607 }
608
609 #[test]
610 fn test_lexer_position() {
611 let interp = Interpreter::new();
612
613 let mut p = Parser::new(interp.context(),
614 Lexer::new("(foo 1 2 3) bar", 0));
615
616 p.parse_expr().unwrap();
617
618 assert_eq!(p.lexer().current_position(), 11);
619 }
620}