bool_tag_expr/
lexical_parse.rs

1//!
2//! Lexical parsing of boolean expressions
3//!
4
5use crate::{ParseError, Tag, TagError, TagName, TagValue};
6use std::fmt::Display;
7use thiserror::Error;
8
9/// A stream of lexical [`Token`]s
10#[derive(Debug, Clone)]
11pub struct LexicalTokenStream(Vec<Token>);
12
13impl ToString for LexicalTokenStream {
14    fn to_string(&self) -> String {
15        let mut output_str = String::new();
16        for token in &self.0 {
17            output_str.push_str(&token.to_string());
18        }
19        output_str
20    }
21}
22
23impl LexicalTokenStream {
24    /// Borrow the underlying tokens
25    #[must_use]
26    pub const fn tokens(&self) -> &Vec<Token> {
27        &self.0
28    }
29}
30
31/// Possible lexical tokens
32#[derive(Debug, Clone, Hash, PartialEq, Eq)]
33pub enum Token {
34    OpenBracket,
35    CloseBracket,
36    Not,
37    And,
38    Or,
39    Tag(Tag),
40}
41
42impl Display for Token {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::OpenBracket => write!(f, "("),
46            Self::CloseBracket => write!(f, ")"),
47            Self::Not => write!(f, "!"),
48            Self::And => write!(f, "&"),
49            Self::Or => write!(f, "|"),
50            Self::Tag(tag) => match &tag.name {
51                None => write!(f, "{}", tag.value),
52                Some(tag_name) => write!(f, "{}={}", tag_name, tag.value),
53            },
54        }
55    }
56}
57
58/// Possible lexical errors
59#[derive(Debug, PartialEq, Eq, Clone, Error, Hash)]
60pub enum LexicalParseError {
61    /// The boolean expression string contains a disallowed char at the stored
62    /// index
63    #[error("Disallowed character at index {0}")]
64    DisallowedChar(usize),
65
66    /// A [`TagError`] encountered while attempting to parse a tag
67    #[error("Tag error: {0}")]
68    TagError(#[from] TagError),
69}
70
71// TODO: consider making `pub trait LexicalParse<T: ToString>`
72/// Implementing types can be lexically parsed to a token stream
73pub trait BoolTagExprLexicalParse: ToString {
74    /// Lexically parse value
75    fn lexical_parse(self) -> Result<LexicalTokenStream, ParseError>;
76}
77
78// TODO: Change `ToString` to own `ToBoolTagExprString` so that things have to impl it?
79/// Blanket implementation of lexical parsing for any type that implements
80/// `ToString`
81impl<T: ToString> BoolTagExprLexicalParse for T {
82    fn lexical_parse(self) -> Result<LexicalTokenStream, ParseError> {
83        Ok(LexicalTokenStream(lexical_parse(&self.to_string())?))
84    }
85}
86
87/// Use to keep track of what part of a tag is currently being lexically
88/// parsed
89#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
90enum TagParse {
91    None,
92    Name,
93    Value,
94}
95
96/// Use when parsing tags
97#[derive(Debug)]
98struct ParsedTag {
99    name: String,
100    value: String,
101}
102
103// TODO: limit string length?
104/// Lexically parse a boolean expression string to a stream of lexical tokens
105fn lexical_parse(string: &str) -> Result<Vec<Token>, LexicalParseError> {
106    let mut tokens = vec![];
107    let mut parsed_tag = ParsedTag {
108        name: String::new(),
109        value: String::new(),
110    };
111
112    let mut tag_state = TagParse::None;
113    for (index, char) in string.char_indices() {
114        // TODO: allow uppercase to and convert to lowercase?
115        if char.is_ascii_lowercase() || char == '-' {
116            if char == '-' {
117                // `-` must be followed by an alphabetic char
118                if index < (string.len() - 1)
119                    && !string.chars().nth(index + 1).unwrap().is_alphabetic()
120                {
121                    return Err(LexicalParseError::DisallowedChar(index + 1));
122                }
123                // `-` must be preceded by an alphabetic char
124                if index > 0 && !string.chars().nth(index - 1).unwrap().is_alphabetic() {
125                    return Err(LexicalParseError::DisallowedChar(index));
126                }
127            }
128
129            match tag_state {
130                TagParse::None => {
131                    if char == '-' {
132                        // TODO: can't begin a tag with '-' (add error variant)
133                        return Err(LexicalParseError::DisallowedChar(index));
134                    }
135                    tag_state = TagParse::Name;
136                    parsed_tag.name.push(char);
137                }
138                TagParse::Name => {
139                    parsed_tag.name.push(char);
140                }
141                TagParse::Value => {
142                    parsed_tag.value.push(char);
143                }
144            }
145        } else if char == '=' {
146            tag_state = TagParse::Value;
147            match string.chars().nth(index + 1) {
148                Some(char) => {
149                    if !char.is_alphabetic() {
150                        return Err(LexicalParseError::DisallowedChar(index + 1));
151                    }
152                }
153                None => return Err(LexicalParseError::DisallowedChar(index)),
154            }
155        } else if "()!&| ".contains(char) {
156            tag_state = TagParse::None;
157            handle_parsed_tag(&mut parsed_tag, &mut tokens)?;
158            if char == '(' {
159                tokens.push(Token::OpenBracket);
160            } else if char == ')' {
161                tokens.push(Token::CloseBracket);
162            } else if char == '!' {
163                tokens.push(Token::Not);
164            } else if char == '&' {
165                tokens.push(Token::And);
166            } else if char == '|' {
167                tokens.push(Token::Or);
168            }
169        } else {
170            return Err(LexicalParseError::DisallowedChar(index));
171        }
172    }
173    handle_parsed_tag(&mut parsed_tag, &mut tokens)?;
174    Ok(tokens)
175}
176
177/// Convert the [`ParsedTag`] to a [`Tag`] if possible, and add it to the tokens
178fn handle_parsed_tag(
179    parsed_tag: &mut ParsedTag,
180    tokens: &mut Vec<Token>,
181) -> Result<(), LexicalParseError> {
182    // If there's a tag name but no tag value, the tag name is actually a tag value
183    if !parsed_tag.name.is_empty() && parsed_tag.value.is_empty() {
184        parsed_tag.value = parsed_tag.name.clone();
185        parsed_tag.name = String::new();
186    }
187    match (!parsed_tag.name.is_empty(), !parsed_tag.value.is_empty()) {
188        (true, true) => {
189            let tag_name = TagName::from(&parsed_tag.name)?;
190            let tag_value = TagValue::from(&parsed_tag.value)?;
191            tokens.push(Token::Tag(Tag::from(Some(tag_name), tag_value)));
192            *parsed_tag = ParsedTag {
193                name: String::new(),
194                value: String::new(),
195            };
196        }
197        (false, true) => {
198            let tag_value = TagValue::from(&parsed_tag.value)?;
199            tokens.push(Token::Tag(Tag::from(None, tag_value)));
200            *parsed_tag = ParsedTag {
201                name: String::new(),
202                value: String::new(),
203            };
204        }
205        (true, false) => panic!(),
206        (false, false) => (),
207    }
208    Ok(())
209}
210
211#[cfg(test)]
212mod test {
213    use super::*;
214
215    #[test]
216    fn lexical_parse() {
217        // Should fail
218        let exprs = vec![
219            "((x= a & b) | (c & b)) & !d",
220            "x= a",
221            "fa---il",
222            "tag-",
223            "-tag",
224            "-tag-",
225            "a | -tag",
226            "=",
227            "tag \t other",
228            "-",
229        ];
230        for expr in exprs {
231            assert!(expr.lexical_parse().is_err());
232        }
233
234        // Should pass
235        let exprs = vec![
236            "((x=a & b) | (c & b)) & !d",
237            "(tag & !other)",
238            "x-y=a-b",
239            "=a",
240            "=a & =b",
241            "a-tag",
242            "",
243            "!!",
244            "()",
245            "((",
246            "||",
247            "|)(!&",
248            "a ) a",
249            "a)a",
250        ];
251        for expr in exprs {
252            expr.lexical_parse().unwrap();
253        }
254    }
255
256    #[test]
257    fn to_boolean_expression() {
258        // Should pass
259        let a = "((x=a&b)|(c&b))&!d".to_string();
260        let parsed = a.clone().lexical_parse().unwrap().to_string();
261        assert_eq!(a, parsed);
262    }
263}