bool-tag-expr 0.1.0-beta.2

Parse boolean expressions of tags for filtering and selecting
Documentation
//!
//! Lexical parsing of boolean expressions
//!

use crate::{ParseError, Tag, TagError, TagName, TagValue};
use std::fmt::Display;
use thiserror::Error;

/// A stream of lexical [`Token`]s
#[derive(Debug, Clone)]
pub struct LexicalTokenStream(Vec<Token>);

impl ToString for LexicalTokenStream {
    fn to_string(&self) -> String {
        let mut output_str = String::new();
        for token in &self.0 {
            output_str.push_str(&token.to_string());
        }
        output_str
    }
}

impl LexicalTokenStream {
    /// Borrow the underlying tokens
    #[must_use]
    pub const fn tokens(&self) -> &Vec<Token> {
        &self.0
    }
}

/// Possible lexical tokens
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum Token {
    OpenBracket,
    CloseBracket,
    Not,
    And,
    Or,
    Tag(Tag),
}

impl Display for Token {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::OpenBracket => write!(f, "("),
            Self::CloseBracket => write!(f, ")"),
            Self::Not => write!(f, "!"),
            Self::And => write!(f, "&"),
            Self::Or => write!(f, "|"),
            Self::Tag(tag) => match &tag.name {
                None => write!(f, "{}", tag.value),
                Some(tag_name) => write!(f, "{}={}", tag_name, tag.value),
            },
        }
    }
}

/// Possible lexical errors
#[derive(Debug, PartialEq, Eq, Clone, Error, Hash)]
pub enum LexicalParseError {
    /// The boolean expression string contains a disallowed char at the stored
    /// index
    #[error("Disallowed character at index {0}")]
    DisallowedChar(usize),

    /// A [`TagError`] encountered while attempting to parse a tag
    #[error("Tag error: {0}")]
    TagError(#[from] TagError),
}

// TODO: consider making `pub trait LexicalParse<T: ToString>`
/// Implementing types can be lexically parsed to a token stream
pub trait BoolTagExprLexicalParse: ToString {
    /// Lexically parse value
    fn lexical_parse(self) -> Result<LexicalTokenStream, ParseError>;
}

// TODO: Change `ToString` to own `ToBoolTagExprString` so that things have to impl it?
/// Blanket implementation of lexical parsing for any type that implements
/// `ToString`
impl<T: ToString> BoolTagExprLexicalParse for T {
    fn lexical_parse(self) -> Result<LexicalTokenStream, ParseError> {
        Ok(LexicalTokenStream(lexical_parse(&self.to_string())?))
    }
}

/// Use to keep track of what part of a tag is currently being lexically
/// parsed
#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
enum TagParse {
    None,
    Name,
    Value,
}

/// Use when parsing tags
#[derive(Debug)]
struct ParsedTag {
    name: String,
    value: String,
}

// TODO: limit string length?
/// Lexically parse a boolean expression string to a stream of lexical tokens
fn lexical_parse(string: &str) -> Result<Vec<Token>, LexicalParseError> {
    let mut tokens = vec![];
    let mut parsed_tag = ParsedTag {
        name: String::new(),
        value: String::new(),
    };

    let mut tag_state = TagParse::None;
    for (index, char) in string.char_indices() {
        // TODO: allow uppercase to and convert to lowercase?
        if char.is_ascii_lowercase() || char == '-' {
            if char == '-' {
                // `-` must be followed by an alphabetic char
                if index < (string.len() - 1)
                    && !string.chars().nth(index + 1).unwrap().is_alphabetic()
                {
                    return Err(LexicalParseError::DisallowedChar(index + 1));
                }
                // `-` must be preceded by an alphabetic char
                if index > 0 && !string.chars().nth(index - 1).unwrap().is_alphabetic() {
                    return Err(LexicalParseError::DisallowedChar(index));
                }
            }

            match tag_state {
                TagParse::None => {
                    if char == '-' {
                        // TODO: can't begin a tag with '-' (add error variant)
                        return Err(LexicalParseError::DisallowedChar(index));
                    }
                    tag_state = TagParse::Name;
                    parsed_tag.name.push(char);
                }
                TagParse::Name => {
                    parsed_tag.name.push(char);
                }
                TagParse::Value => {
                    parsed_tag.value.push(char);
                }
            }
        } else if char == '=' {
            tag_state = TagParse::Value;
            match string.chars().nth(index + 1) {
                Some(char) => {
                    if !char.is_alphabetic() {
                        return Err(LexicalParseError::DisallowedChar(index + 1));
                    }
                }
                None => return Err(LexicalParseError::DisallowedChar(index)),
            }
        } else if "()!&| ".contains(char) {
            tag_state = TagParse::None;
            handle_parsed_tag(&mut parsed_tag, &mut tokens)?;
            if char == '(' {
                tokens.push(Token::OpenBracket);
            } else if char == ')' {
                tokens.push(Token::CloseBracket);
            } else if char == '!' {
                tokens.push(Token::Not);
            } else if char == '&' {
                tokens.push(Token::And);
            } else if char == '|' {
                tokens.push(Token::Or);
            }
        } else {
            return Err(LexicalParseError::DisallowedChar(index));
        }
    }
    handle_parsed_tag(&mut parsed_tag, &mut tokens)?;
    Ok(tokens)
}

/// Convert the [`ParsedTag`] to a [`Tag`] if possible, and add it to the tokens
fn handle_parsed_tag(
    parsed_tag: &mut ParsedTag,
    tokens: &mut Vec<Token>,
) -> Result<(), LexicalParseError> {
    // If there's a tag name but no tag value, the tag name is actually a tag value
    if !parsed_tag.name.is_empty() && parsed_tag.value.is_empty() {
        parsed_tag.value = parsed_tag.name.clone();
        parsed_tag.name = String::new();
    }
    match (!parsed_tag.name.is_empty(), !parsed_tag.value.is_empty()) {
        (true, true) => {
            let tag_name = TagName::from(&parsed_tag.name)?;
            let tag_value = TagValue::from(&parsed_tag.value)?;
            tokens.push(Token::Tag(Tag::from(Some(tag_name), tag_value)));
            *parsed_tag = ParsedTag {
                name: String::new(),
                value: String::new(),
            };
        }
        (false, true) => {
            let tag_value = TagValue::from(&parsed_tag.value)?;
            tokens.push(Token::Tag(Tag::from(None, tag_value)));
            *parsed_tag = ParsedTag {
                name: String::new(),
                value: String::new(),
            };
        }
        (true, false) => panic!(),
        (false, false) => (),
    }
    Ok(())
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn lexical_parse() {
        // Should fail
        let exprs = vec![
            "((x= a & b) | (c & b)) & !d",
            "x= a",
            "fa---il",
            "tag-",
            "-tag",
            "-tag-",
            "a | -tag",
            "=",
            "tag \t other",
            "-",
        ];
        for expr in exprs {
            assert!(expr.lexical_parse().is_err());
        }

        // Should pass
        let exprs = vec![
            "((x=a & b) | (c & b)) & !d",
            "(tag & !other)",
            "x-y=a-b",
            "=a",
            "=a & =b",
            "a-tag",
            "",
            "!!",
            "()",
            "((",
            "||",
            "|)(!&",
            "a ) a",
            "a)a",
        ];
        for expr in exprs {
            expr.lexical_parse().unwrap();
        }
    }

    #[test]
    fn to_boolean_expression() {
        // Should pass
        let a = "((x=a&b)|(c&b))&!d".to_string();
        let parsed = a.clone().lexical_parse().unwrap().to_string();
        assert_eq!(a, parsed);
    }
}