ssexp/
lib.rs

1#![deny(missing_docs)]
2/*!
3See [README.md](https://crates.io/crates/ssexp)
4*/
5
6use derive_destructure::destructure;
7
8use std::rc::Rc;
9
10/// Represents the s-expressions as tokens.
11#[derive(Clone, Debug)]
12pub enum Token {
13    /// A symbol token represented by its name.
14    Symbol(String),
15    /// A list token containing other tokens.
16    List(Vec<Token>),
17}
18
19use Token::*;
20
21impl Token {
22    /// If the token is a `Symbol`, returns an `Option`, containing the symbol name.
23    /// Else returns `None`.
24    pub fn symbol(self) -> Option<String> {
25        match self {
26            Symbol(string) => Some(string),
27            List(_) => None,
28        }
29    }
30
31    /// Same as `fn symbol`, but returns a ref.
32    pub fn symbol_ref(&self) -> Option<&String> {
33        match self {
34            Symbol(string) => Some(string),
35            List(_) => None,
36        }
37    }
38
39    /// If the token is a `List`, returns an `Option`, containing the elements.
40    /// Else returns `None`.
41    pub fn list(self) -> Option<Vec<Token>> {
42        match self {
43            Symbol(_) => None,
44            List(tokens) => Some(tokens),
45        }
46    }
47
48    /// Same as `fn list`, but returns a ref.
49    pub fn list_ref(&self) -> Option<&Vec<Token>> {
50        match self {
51            Symbol(_) => None,
52            List(tokens) => Some(tokens),
53        }
54    }
55}
56
57/// Useful for managing opening and closing subcontexts like brackets.
58#[derive(Copy, Clone, Debug)]
59pub enum ParsingState {
60    /// Doesn't do anything, marks success.
61    Fine(bool),
62    /// Closes a subcontext.
63    Finished,
64    /// Closes a subcontext while carrying the closing character.
65    Delegate(char),
66}
67
68use ParsingState::*;
69
70mod implement_display {
71    use std::fmt::{Display, Formatter, Result};
72
73    use crate::Token;
74
75    impl Display for Token {
76        fn fmt(&self, f: &mut Formatter) -> Result {
77            match self {
78                Token::Symbol(string) => write!(f, "'{}'", string),
79                Token::List(vec) => {
80                    let mut first = true;
81                    write!(f, "(").expect("Unexpected end of file");
82                    for tok in vec.iter() {
83                        if !first {
84                            write!(f, " ").expect("Unexpected end of file");
85                        } else {
86                            first = false;
87                        }
88                        let result = write!(f, "{}", tok);
89                        if result.is_err() {
90                            return result;
91                        }
92                    }
93                    write!(f, ")")
94                }
95            }
96        }
97    }
98}
99
100use std::collections::HashMap;
101
102/// A map for storing the macro characters and their behaviours.
103/// Every character may be set as macro character.
104pub struct MacroMap<T>(HashMap<char, Rc<dyn Parser<T>>>);
105
106impl<T: Iterator<Item = char>> MacroMap<T> {
107    /// Creates a new `MacroMap`, which is used to save macro characters
108    pub fn new() -> Self {
109        MacroMap(HashMap::new())
110    }
111
112    /// Adds a new user defined macro character `char`.
113    /// Takes an `Option` of the new parser.
114    pub fn set(
115        &mut self,
116        char: char,
117        value: Option<impl Parser<T> + 'static>,
118    ) -> Option<Rc<dyn Parser<T>>> {
119        if let Some(value) = value {
120            self.0.insert(char, Rc::new(value))
121        } else {
122            self.0.remove(&char)
123        }
124    }
125
126    /// Adds `char` as a macro character with the user defined parser `value`
127    pub fn with(mut self, char: char, value: impl Parser<T> + 'static) -> Self {
128        self.0.insert(char, Rc::new(value));
129        self
130    }
131
132    /// Adds whitespaces as macro characters for splitting there into tokens.
133    pub fn with_separating_whitespaces(self) -> Self {
134        self.with(' ', EmptyParser(Fine(false)))
135            .with('\n', EmptyParser(Fine(false)))
136            .with('\t', EmptyParser(Fine(false)))
137    }
138    /// Makes `start_char` the left delimiter and `end_char` the right delimiter of a list.
139    pub fn with_lists(self, start_char: char, end_char: char) -> Self {
140        self.with(
141            start_char,
142            ListWrapper {
143                prefix: None,
144                parser: DelimitedListParser(end_char),
145            },
146        )
147        .with(end_char, PanicParser)
148    }
149    /// Ignores macro characters in some region and parses a symbol
150    pub fn with_macro_characters_ignored(self, delimiter: char) -> Self {
151        self.with(delimiter, DelimitedSymbolParser)
152    }
153    /// Adds the ability for parsing strings, using char `string_delimiter` for delimiting strings.
154    /// A string will be parsed as list, whose first element is a symbol `prefix` and whose second element is the string itself as symbol.
155    pub fn with_strings(
156        self,
157        prefix: String,
158        delimiter: char,
159        replacements: Vec<(String, String)>,
160    ) -> Self {
161        self.with(
162            delimiter,
163            ListWrapper {
164                prefix: Some(prefix),
165                parser: SymbolReplacer {
166                    replacements,
167                    parser: DelimitedSymbolParser,
168                },
169            },
170        )
171    }
172    /// Adds `comment_char` as macro character for single line comments.
173    pub fn with_comments(self, comment_char: char) -> Self {
174        self.with(comment_char, CommentParser)
175    }
176
177    /// Adds `infix_char` as infix operator.
178    pub fn with_infix(self, infix_char: char, kind: InfixKind, right: bool) -> Self {
179        self.with(infix_char, InfixParser(kind, right))
180    }
181
182    /// Creates a `MacroMap` from a `HashMap`
183    pub fn from_hash_map(map: HashMap<char, Rc<dyn Parser<T>>>) -> Self {
184        MacroMap(map)
185    }
186    /// Adds a new user defined macro character `char`.
187    /// Takes an `Option` of the new parser already converted to the inner type.
188    pub fn insert(
189        &mut self,
190        char: char,
191        value: Option<Rc<dyn Parser<T>>>,
192    ) -> Option<Rc<dyn Parser<T>>> {
193        if let Some(value) = value {
194            self.0.insert(char, value)
195        } else {
196            self.0.remove(&char)
197        }
198    }
199}
200
201/// Used in parsing process for getting chars and calling the parsers.
202pub struct ParsingContext<T: Iterator<Item = char>> {
203    char: Option<char>,
204    chars: T,
205    maps: Vec<MacroMap<T>>,
206}
207
208/// A handle for handling the next macro character.
209#[derive(destructure)]
210pub struct MacroHandle<'a, T: Iterator<Item = char>>
211where
212    ParsingContext<T>: 'a,
213{
214    char: char,
215    context: &'a mut ParsingContext<T>,
216    parser: Option<Rc<dyn Parser<T>>>,
217}
218
219use std::result::Result;
220
221impl<'a, T: Iterator<Item = char>> MacroHandle<'a, T> {
222    /// Tries to call the macro character. If none is defined, it some character.
223    pub fn call(self, result: &mut Vec<Token>) -> Result<ParsingState, char> {
224        let (char, context, parser) = self.destructure();
225        if let Some(parser) = parser {
226            Ok(parser.parse(Some(char), result, context))
227        } else {
228            Err(char)
229        }
230    }
231}
232
233impl<'a, T: Iterator<Item = char>> Drop for MacroHandle<'a, T> {
234    /// Discards this handle. When calling the next time, the same character will be used for dispatch.
235    fn drop(&mut self) {
236        self.context.char = Some(self.char);
237    }
238}
239
240impl<T: Iterator<Item = char>> ParsingContext<T> {
241    /// Parses the next symbol and inserts it into a specified token list.
242    /// Then a handle for the next macro call is returned.
243    pub fn parse(&mut self, tokens: &mut Vec<Token>) -> Option<MacroHandle<T>> {
244        let mut symbol = String::new();
245        let (char, parser) = loop {
246            let char = if let Some(old_char) = self.char {
247                self.char = None;
248                old_char
249            } else {
250                if let Some(next) = self.chars.next() {
251                    next
252                } else {
253                    if !symbol.is_empty() {
254                        tokens.push(Symbol(symbol));
255                    }
256                    return None;
257                }
258            };
259            let parser = if let Some(map) = self.maps.last() {
260                map.0.get(&char).cloned()
261            } else {
262                None
263            };
264            if parser.is_none() {
265                symbol = format!("{}{}", symbol, char);
266            } else {
267                break (char, parser);
268            }
269        };
270
271        if !symbol.is_empty() {
272            tokens.push(Symbol(symbol));
273        }
274        Some(MacroHandle {
275            char,
276            context: self,
277            parser,
278        })
279    }
280
281    /// Binds a new parser to a character and returns some previous, if there was one defined, else returns `None`
282    pub fn set(
283        &mut self,
284        char: char,
285        value: Option<impl Parser<T> + 'static>,
286    ) -> Option<Rc<dyn Parser<T>>> {
287        if let Some(map) = self.maps.last_mut() {
288            map.set(char, value)
289        } else {
290            None
291        }
292    }
293
294    /// Binds a new parser to a character and returns some previous, if there was one defined, else returns `None`
295    pub fn insert(
296        &mut self,
297        char: char,
298        value: Option<Rc<dyn Parser<T>>>,
299    ) -> Option<Rc<dyn Parser<T>>> {
300        if let Some(map) = self.maps.last_mut() {
301            map.insert(char, value)
302        } else {
303            None
304        }
305    }
306
307    /// Replaces the current macro map with a new one.
308    pub fn push(&mut self, map: MacroMap<T>) {
309        self.maps.push(map);
310    }
311
312    /// Removes the current macro map and uses the most recent one.
313    pub fn pop(&mut self) -> Option<MacroMap<T>> {
314        self.maps.pop()
315    }
316}
317
318/// The function for starting the parsing process. It takes some iterator of characters, an initial parser, and a macro map.
319pub fn parse<T: IntoIterator<Item = char>>(
320    stream: T,
321    parser: impl Parser<T::IntoIter>,
322    map: MacroMap<T::IntoIter>,
323) -> Vec<Token> {
324    let mut context = ParsingContext {
325        char: None,
326        chars: stream.into_iter(),
327        maps: vec![map],
328    };
329    let mut tokens = Vec::new();
330    parser.parse(None, &mut tokens, &mut context);
331    tokens
332}
333
334/// A trait for task specific parsers.
335pub trait Parser<T: Iterator<Item = char>> {
336    /// A trait to parse code in a specific way.
337    fn parse(
338        &self,
339        char: Option<char>,
340        result: &mut Vec<Token>,
341        context: &mut ParsingContext<T>,
342    ) -> ParsingState;
343}
344
345use parsers::*;
346
347/// Useful default parsers.
348pub mod parsers {
349    use super::*;
350    /// A parser to mark some char as macro character without doing anything.
351    pub struct EmptyParser(pub ParsingState);
352    impl<T: Iterator<Item = char>> Parser<T> for EmptyParser {
353        fn parse(
354            &self,
355            _char: Option<char>,
356            _result: &mut Vec<Token>,
357            _context: &mut ParsingContext<T>,
358        ) -> ParsingState {
359            self.0
360        }
361    }
362
363    /// A parser, that just panics. Instead of using this, it's preferable to use `DelegateParser`
364    pub struct PanicParser;
365    impl<T: Iterator<Item = char>> Parser<T> for PanicParser {
366        fn parse(
367            &self,
368            char: Option<char>,
369            _result: &mut Vec<Token>,
370            _context: &mut ParsingContext<T>,
371        ) -> ParsingState {
372            panic!("Invalid macro character {:?}", char)
373        }
374    }
375
376    /// A parser, that delegates the handling of some macro character to the parser.
377    pub struct DelegateParser;
378    impl<T: Iterator<Item = char>> Parser<T> for DelegateParser {
379        fn parse(
380            &self,
381            char: Option<char>,
382            _result: &mut Vec<Token>,
383            _context: &mut ParsingContext<T>,
384        ) -> ParsingState {
385            Delegate(char.expect("Delegate as main parser not possible"))
386        }
387    }
388
389    /// Parses a symbol delimited by the starting char
390    pub struct DelimitedSymbolParser;
391    impl<T: Iterator<Item = char>> Parser<T> for DelimitedSymbolParser {
392        fn parse(
393            &self,
394            char: Option<char>,
395            result: &mut Vec<Token>,
396            context: &mut ParsingContext<T>,
397        ) -> ParsingState {
398            let char = char.expect("Delimited Symbol parsers are not allowed as single");
399            context.push(MacroMap::new().with(char, DelegateParser));
400            let state = loop {
401                if let Some(handle) = context.parse(result) {
402                    if let Delegate(c) = handle.call(&mut Vec::new()).unwrap() {
403                        if c == char {
404                            break Fine(true);
405                        }
406                    }
407                } else {
408                    panic!("End of file inside symbol");
409                }
410            };
411            context.pop();
412            return state;
413        }
414    }
415
416    /// Parses a single line comment delimited by `'\n'`
417    pub struct CommentParser;
418    impl<T: Iterator<Item = char>> Parser<T> for CommentParser {
419        fn parse(
420            &self,
421            _char: Option<char>,
422            _result: &mut Vec<Token>,
423            context: &mut ParsingContext<T>,
424        ) -> ParsingState {
425            context.push(MacroMap::new().with('\n', DelegateParser));
426            let state = loop {
427                if let Some(handle) = context.parse(&mut Vec::new()) {
428                    if let Delegate('\n') = handle.call(&mut Vec::new()).unwrap() {
429                        break Fine(false);
430                    }
431                } else {
432                    break Finished;
433                }
434            };
435
436            context.pop();
437            return state;
438        }
439    }
440
441    /// Wraps the parsing result into a new list.
442    pub struct ListWrapper<P> {
443        /// An optinal prefix for the created list.
444        pub prefix: Option<String>,
445        /// The parser of the list.
446        pub parser: P,
447    }
448    impl<T: Iterator<Item = char>, P: Parser<T>> Parser<T> for ListWrapper<P> {
449        fn parse(
450            &self,
451            char: Option<char>,
452            result: &mut Vec<Token>,
453            context: &mut ParsingContext<T>,
454        ) -> ParsingState {
455            let mut contents = Vec::new();
456            let Self { prefix, parser } = self;
457            contents.extend(prefix.into_iter().map(|name| Token::Symbol(name.clone())));
458            let state = parser.parse(char, &mut contents, context);
459            result.push(List(contents));
460            state
461        }
462    }
463
464    /// Replaces substrings in symbol.
465    pub struct SymbolReplacer<P> {
466        /// An optinal prefix for the created list.
467        pub replacements: Vec<(String, String)>,
468        /// The parser of the symbol.
469        pub parser: P,
470    }
471
472    impl<T: Iterator<Item = char>, P: Parser<T>> Parser<T> for SymbolReplacer<P> {
473        fn parse(
474            &self,
475            char: Option<char>,
476            result: &mut Vec<Token>,
477            context: &mut ParsingContext<T>,
478        ) -> ParsingState {
479            let Self {
480                replacements,
481                parser,
482            } = self;
483            let state = parser.parse(char, result, context);
484            if let Some(Symbol(name)) = result.last_mut() {
485                for (from, to) in replacements {
486                    *name = name.replace(&from[..], &to[..]);
487                }
488            }
489            state
490        }
491    }
492
493    /// Parses a list and appends it to the current list `result`
494    pub struct DelimitedListParser(pub char);
495    impl<T: Iterator<Item = char>> Parser<T> for DelimitedListParser {
496        fn parse(
497            &self,
498            char: Option<char>,
499            result: &mut Vec<Token>,
500            context: &mut ParsingContext<T>,
501        ) -> ParsingState {
502            let reset = context.set(self.0, Some(DelegateParser));
503            let state = loop {
504                if let Some(handle) = context.parse(result) {
505                    let state = handle.call(result).unwrap();
506                    match state {
507                        Fine(_) => (),
508                        Finished => {
509                            if char.is_none() {
510                                break Finished;
511                            } else {
512                                panic!("End of file inside list")
513                            }
514                        }
515                        Delegate(char) => {
516                            if char == self.0 {
517                                break Fine(true);
518                            } else {
519                                panic!("Unexpected delegate")
520                            }
521                        }
522                    }
523                } else {
524                    if char.is_none() {
525                        break Finished;
526                    } else {
527                        panic!("End of file inside list")
528                    }
529                }
530            };
531
532            context.insert(self.0, reset);
533            state
534        }
535    }
536
537    /// The kind infix parsers are interpreted as.
538    pub enum InfixKind {
539        /// Create a two element list.
540        List,
541        /// Create a list prefixed with some symbol.
542        Prefix(String),
543        /// Append the second element to a list, which is the first argument.
544        Append,
545    }
546
547    /// Parses a sublist as infix operation, optionally using an infix operator.
548    pub struct InfixParser(pub InfixKind, pub bool);
549    impl<T: Iterator<Item = char>> Parser<T> for InfixParser {
550        fn parse(
551            &self,
552            _char: Option<char>,
553            result: &mut Vec<Token>,
554            context: &mut ParsingContext<T>,
555        ) -> ParsingState {
556            let last = result.pop().expect("Infix operator cannot stand alone");
557            let mut new_list = if let (InfixKind::Append, false) = (&self.0, self.1) {
558                last.list().expect(
559                    "First argument of left associative infix append is required to be a list",
560                )
561            } else {
562                let mut new_list = Vec::new();
563                if let InfixKind::Prefix(op) = &self.0 {
564                    new_list.push(Symbol(op.clone()));
565                }
566                new_list.push(last);
567                new_list
568            };
569            let state = loop {
570                let count = new_list.len();
571                let handle = context.parse(&mut new_list);
572                if count < new_list.len() {
573                    break Fine(true);
574                }
575                if let Some(handle) = handle {
576                    let state = handle.call(&mut new_list).unwrap();
577                    if let Fine(false) = state {
578                        continue;
579                    }
580                    break state;
581                } else {
582                    break Finished;
583                }
584            };
585
586            if self.1 {
587                if let Fine(_) = state {
588                    let mut next_list = Vec::new();
589                    next_list.push(new_list.pop().unwrap());
590                    let state = loop {
591                        if let Some(handle) = context.parse(&mut next_list) {
592                            let state = handle.call(&mut next_list).unwrap();
593                            if let Fine(_) = state {
594                                continue;
595                            }
596                            break state;
597                        } else {
598                            break Finished;
599                        }
600                    };
601                    let mut append = next_list.into_iter();
602                    let first = append.next().unwrap();
603                    if let (InfixKind::Append, true) = (&self.0, self.1) {
604                        new_list.extend(first.list().expect("Last argument of right associative infix append is required to be a list"));
605                    } else {
606                        new_list.push(first);
607                    }
608                    result.push(List(new_list));
609                    result.extend(append);
610                    return state;
611                }
612            }
613            result.push(List(new_list));
614            return state;
615        }
616    }
617}