specmc_base/
parse.rs

1//! Module for parsing tokens.
2
3use std::fmt::Display;
4
5use thiserror::Error;
6
7use crate::ensure;
8
9/// Ensure that the next tokens in the list match the given tokens.
10/// This will remove the tokens from the list.
11/// Tokens are checked in reverse order.
12#[macro_export]
13macro_rules! ensure_tokens {
14    ($tokens:ident, $($token:expr),+) => {
15        $(
16            $crate::ensure!(
17                $tokens.last().ok_or($crate::parse::ParseError::EndOfFile)? == $token,
18                $crate::parse::ParseError::InvalidToken {
19                    token: $tokens.last().unwrap().clone(),
20                    error: format!("Expected {}", $token),
21                }
22            );
23            $tokens.pop();
24        )+
25    };
26}
27
28#[derive(Debug, Error, Clone, PartialEq, Eq)]
29pub enum ParseError {
30    #[error("Unexpected EOF")]
31    EndOfFile,
32
33    #[error("Invalid token: {error}: {token}")]
34    InvalidToken { token: String, error: String },
35}
36
37pub trait Parse
38where
39    Self: Sized,
40{
41    /// Parse a list of tokens into an object, consuming the tokens as needed.
42    /// The token list is consumed in reverse order.
43    /// If this fails, it is **not** guaranteed that no tokens have been consumed.
44    fn parse(tokens: &mut Vec<String>) -> Result<Self, ParseError>;
45}
46
47/// An identifier.
48/// The identifier must not be empty and can only contain letters, numbers, and underscores.
49/// The identifier must not start with a number.
50#[derive(Debug, Clone, PartialEq, Eq, Hash)]
51pub struct Identifier(pub String);
52impl Display for Identifier {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        write!(f, "{}", self.0)
55    }
56}
57impl Parse for Identifier {
58    fn parse(tokens: &mut Vec<String>) -> Result<Self, ParseError> {
59        let value: String = tokens.pop().ok_or(ParseError::EndOfFile)?;
60
61        ensure!(
62            !value.is_empty(),
63            ParseError::InvalidToken {
64                token: value,
65                error: "Empty identifier".to_string()
66            }
67        );
68
69        let mut chars: std::str::Chars = value.chars();
70        ensure!(
71            chars
72                .next()
73                .map(|c| c.is_ascii_alphabetic() || c == '_')
74                .unwrap(),
75            ParseError::InvalidToken {
76                token: value,
77                error: "Identifiers must not start with a number and can only contain letters, numbers, and underscores".to_string()
78            }
79        );
80        ensure!(
81            chars.all(|c| c.is_ascii_alphanumeric() || c == '_'),
82            ParseError::InvalidToken {
83                token: value,
84                error: "Identifiers can only contain letters, numbers, and underscores".to_string()
85            }
86        );
87
88        Ok(Identifier(value))
89    }
90}
91
92/// A literal value.
93/// This can be a boolean, integer, float, or string.
94#[derive(Debug, Clone, PartialEq)]
95pub enum Literal {
96    Boolean(bool),
97    Integer(isize),
98    Float(f64),
99    String(String),
100}
101impl Display for Literal {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        use Literal::*;
104        match self {
105            Boolean(value) => write!(f, "{value}"),
106            Integer(value) => write!(f, "{value}"),
107            Float(value) => write!(f, "{value}"),
108            String(value) => write!(f, "\"{value}\""),
109        }
110    }
111}
112impl Parse for Literal {
113    fn parse(tokens: &mut Vec<String>) -> Result<Self, ParseError> {
114        match tokens.pop().ok_or(ParseError::EndOfFile)?.as_str() {
115            "true" => Ok(Literal::Boolean(true)),
116            "false" => Ok(Literal::Boolean(false)),
117            "\"" => {
118                let mut string: String = String::new();
119                while tokens.last().ok_or(ParseError::EndOfFile)? != "\"" {
120                    string += &tokens.pop().unwrap();
121                }
122                ensure_tokens!(tokens, "\"");
123                Ok(Literal::String(string))
124            }
125            token => {
126                let mut token: String = token.to_string();
127                if let "+" | "-" = token.as_str() {
128                    token += &tokens.pop().ok_or(ParseError::EndOfFile)?;
129                }
130
131                if let Ok(int) = strtoint::strtoint(&token) {
132                    Ok(Literal::Integer(int))
133                } else if let Ok(float) = token.parse::<f64>() {
134                    Ok(Literal::Float(float))
135                } else {
136                    tokens.push(token.clone());
137                    Err(ParseError::InvalidToken {
138                        token: token.clone(),
139                        error: "Invalid literal".to_string(),
140                    })
141                }
142            }
143        }
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use crate::{test_parse, tokenize};
150
151    use super::*;
152
153    #[test]
154    fn test_identifier() {
155        let mut tokens: Vec<String> = tokenize!("cool_identifier");
156
157        test_parse!(
158            tokens,
159            Identifier,
160            Ok(Identifier("cool_identifier".to_string()))
161        );
162
163        assert!(tokens.is_empty());
164        test_parse!(tokens, Identifier, Err(ParseError::EndOfFile));
165    }
166
167    #[test]
168    fn test_literal() {
169        let mut tokens: Vec<String> = tokenize!("true false 0 +42 -5 123.0 +8.5 -11.4 \"string\"");
170
171        test_parse!(tokens, Literal, Ok(Literal::Boolean(true)));
172        test_parse!(tokens, Literal, Ok(Literal::Boolean(false)));
173        test_parse!(tokens, Literal, Ok(Literal::Integer(0)));
174        test_parse!(tokens, Literal, Ok(Literal::Integer(42)));
175        test_parse!(tokens, Literal, Ok(Literal::Integer(-5)));
176        test_parse!(tokens, Literal, Ok(Literal::Float(123.0)));
177        test_parse!(tokens, Literal, Ok(Literal::Float(8.5)));
178        test_parse!(tokens, Literal, Ok(Literal::Float(-11.4)));
179        test_parse!(tokens, Literal, Ok(Literal::String("string".to_string())));
180
181        assert!(tokens.is_empty());
182        test_parse!(tokens, Literal, Err(ParseError::EndOfFile));
183    }
184}