regular 0.1.1

Regular Expression Manipulation and Analysis
Documentation
use std::mem::take;

use anyhow::{anyhow, Result};

use crate::tokens::Token;
use crate::tree::brackets::Bracket;
use crate::tree::classify::{Classify, TokenClass};
use crate::tree::special::Special;

pub(crate) mod brackets;
mod classify;
pub(crate) mod special;

#[derive(Debug, Clone, Eq, PartialEq, Default)]
pub(crate) enum Node {
    #[default]
    Empty,
    Sequence(Vec<Node>),
    Tokens(Vec<Token>),
    Bracketed(Bracket, Box<Node>),
    Special(Special),
    Augmented(Box<Node>, Box<Node>),
    Alternate(Box<Node>, Box<Node>),
}

#[derive(Debug, Clone, Default)]
enum PartialNode {
    #[default]
    Empty,
    Sequence(Vec<Node>),
    Tokens(Vec<Token>),
    Bracketed(Bracket),
    Alternate(Box<Node>),
}

impl PartialNode {
    fn end_with(self, terminator: Node) -> Result<Node> {
        match self {
            PartialNode::Empty => Ok(terminator),
            PartialNode::Sequence(mut sequence) => {
                if let Node::Sequence(_) = terminator {
                    Err(anyhow!("Can't nest sequences"))
                } else {
                    if terminator != Node::Empty {
                        sequence.push(terminator);
                    }
                    if let Some((first, rest)) = sequence.split_first_mut() {
                        if rest.is_empty() {
                            Ok(take(first))
                        } else {
                            Ok(Node::Sequence(sequence))
                        }
                    } else {
                        Ok(Node::Empty)
                    }
                }
            }
            PartialNode::Tokens(t) => {
                if Node::Empty == terminator {
                    Ok(Node::Tokens(t))
                } else {
                    Err(anyhow!("Can't nest {terminator:?} inside a Tokens"))
                }
            }
            PartialNode::Bracketed(b) => Err(anyhow!("Found an unclosed bracket: {b:?}")),
            PartialNode::Alternate(first) => Ok(Node::Alternate(first, Box::new(terminator))),
        }
    }
}

trait AppendChild {
    fn append_child(&mut self, new: Node) -> Result<()>;
}

impl AppendChild for Vec<PartialNode> {
    fn append_child(&mut self, new: Node) -> Result<()> {
        trace!("Append Child, stack: {self:?}, child: {new:?}");
        if let Some(mut containing_partial) = self.pop() {
            match &mut containing_partial {
                PartialNode::Empty => self.push(PartialNode::Sequence(vec![new])),
                PartialNode::Sequence(sequence) => {
                    sequence.push(new);
                    self.push(containing_partial);
                }
                PartialNode::Tokens(tokens) => {
                    self.push(PartialNode::Sequence(vec![Node::Tokens(take(tokens)), new]))
                }
                PartialNode::Bracketed(_) => {
                    self.push(containing_partial);
                    self.push(PartialNode::Sequence(vec![new]));
                }
                PartialNode::Alternate(first) => {
                    self.append_child(Node::Alternate(take(first), Box::new(new)))?;
                }
            }
        } else {
            self.push(PartialNode::Sequence(vec![new]))
        }

        trace!("Appended Child, stack: {self:?}");
        Ok(())
    }
}

fn close_bracket(
    stack: Vec<PartialNode>,
    closing_bracket: Bracket,
    last_node: Node,
) -> Result<Vec<PartialNode>> {
    trace!("Stack: {stack:?}, closing_bracket: {closing_bracket:?}, last_node: {last_node:?}");
    let (stack, remaining_node) = stack.into_iter().try_rfold(
        (None, last_node),
        |(maybe_stack, last_node): (Option<Vec<PartialNode>>, Node), partial| -> Result<_> {
            trace!("Stack: {maybe_stack:?}, Last Node: {last_node:?}");
            if let Some(mut stack) = maybe_stack {
                stack.insert(0, partial);
                if last_node != Node::Empty {
                    stack.append_child(last_node)?
                }
                Ok((Some(stack), Node::Empty))
            } else if let PartialNode::Bracketed(opening_bracket) = partial {
                if opening_bracket == closing_bracket {
                    Ok((
                        Some(vec![]),
                        Node::Bracketed(opening_bracket, Box::new(last_node)),
                    ))
                } else {
                    Err(anyhow!(
                        "Mismatched brackets: {opening_bracket:?} closed by {closing_bracket:?}"
                    ))
                }
            } else {
                let next_node = partial.end_with(last_node)?;
                Ok((None, next_node))
            }
        },
    )?;
    let stack = stack.ok_or(anyhow!("Close Bracket without Opening Bracket"))?;

    trace!("Stack: {stack:?} Remaining Node: {remaining_node:?}");
    if remaining_node == Node::Empty {
        Ok(stack)
    } else {
        assert!(stack.is_empty());
        Ok(vec![PartialNode::Sequence(vec![remaining_node])])
    }
}

pub(crate) fn tree<T: IntoIterator<Item = Token>>(tokens: T) -> Result<Node> {
    let stack: Vec<PartialNode> = tokens.into_iter().try_fold(
        vec![PartialNode::Empty],
        |mut stack: Vec<PartialNode>, token: Token| -> Result<Vec<PartialNode>> {
            trace!("Token: {token:?}, Stack: {stack:?}");
            let token_class = token.classify();
            if let Some(node) = stack.pop() {
                match node {
                    PartialNode::Empty => match token_class {
                        TokenClass::OpenBracket => {
                            stack.push(PartialNode::Bracketed(Bracket::try_from(token)?));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                        TokenClass::CloseBracket => {
                            close_bracket(stack, token.try_into()?, Node::Empty)
                        }
                        TokenClass::Augmentation => {
                            stack.push(PartialNode::Sequence(vec![Node::Special(
                                token.try_into()?,
                            )]));
                            Ok(stack)
                        }
                        TokenClass::Regular => {
                            stack.push(PartialNode::Tokens(vec![token]));
                            Ok(stack)
                        }
                        TokenClass::Problematic => return Err(anyhow!("Unexpected escape")),
                        TokenClass::Alternation => {
                            stack.push(PartialNode::Alternate(Box::new(Node::Empty)));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                    },
                    PartialNode::Sequence(mut sequence) => match token_class {
                        TokenClass::OpenBracket => {
                            stack.push(PartialNode::Sequence(sequence));
                            stack.push(PartialNode::Bracketed(token.try_into()?));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                        TokenClass::CloseBracket => close_bracket(
                            stack,
                            token.try_into()?,
                            PartialNode::Sequence(sequence).end_with(Node::Empty)?,
                        ),
                        TokenClass::Augmentation => {
                            let previous = sequence
                                .pop()
                                .ok_or(anyhow!("Special character at start of sequence"))?;
                            sequence.push(Node::Augmented(
                                Box::new(previous),
                                Box::new(Node::Special(token.try_into()?)),
                            ));
                            stack.push(PartialNode::Sequence(sequence));
                            Ok(stack)
                        }
                        TokenClass::Regular => {
                            stack.push(PartialNode::Sequence(sequence));
                            stack.push(PartialNode::Tokens(vec![token]));
                            Ok(stack)
                        }
                        TokenClass::Problematic => Err(anyhow!("Unexpected Escape")),
                        TokenClass::Alternation => {
                            stack.push(PartialNode::Alternate(Box::new(Node::Sequence(sequence))));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                    },
                    PartialNode::Tokens(mut token_sequence) => match token_class {
                        TokenClass::OpenBracket => {
                            let container = stack.pop().ok_or(anyhow!("Stack Underflow"))?;
                            match container {
                                PartialNode::Empty => {
                                    return Err(anyhow!("Found an unexpected Empty on the stack"))
                                }
                                PartialNode::Sequence(mut sequence) => {
                                    sequence.push(Node::Tokens(token_sequence));
                                    stack.push(PartialNode::Sequence(sequence))
                                }
                                PartialNode::Tokens(_) => {
                                    return Err(anyhow!("Unexpected nested token sequences"))
                                }
                                PartialNode::Bracketed(_) | PartialNode::Alternate(_) => {
                                    stack.push(container);
                                    stack.push(PartialNode::Sequence(vec![Node::Tokens(
                                        token_sequence,
                                    )]));
                                }
                            }
                            stack.push(PartialNode::Bracketed(token.try_into()?));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                        TokenClass::CloseBracket => {
                            close_bracket(stack, token.try_into()?, Node::Tokens(token_sequence))
                        }
                        TokenClass::Augmentation => {
                            if let Some(augmentee) = token_sequence.pop() {
                                stack.append_child(Node::Tokens(token_sequence))?;
                                stack.append_child(Node::Augmented(
                                    Box::new(Node::Tokens(vec![augmentee])),
                                    Box::new(Node::Special(token.try_into()?)),
                                ))?;
                            } else {
                                stack.append_child(Node::Special(token.try_into()?))?;
                            }
                            Ok(stack)
                        }
                        TokenClass::Regular => {
                            token_sequence.push(token);
                            stack.push(PartialNode::Tokens(token_sequence));
                            Ok(stack)
                        }
                        TokenClass::Problematic => Err(anyhow!("Unexpected Escape")),
                        TokenClass::Alternation => {
                            stack.push(PartialNode::Alternate(Box::new(Node::Tokens(
                                token_sequence,
                            ))));
                            stack.push(PartialNode::Empty);
                            Ok(stack)
                        }
                    },
                    PartialNode::Bracketed(_) | PartialNode::Alternate(_) => {
                        Err(anyhow!("Should not find {node:?} at the top of the stack"))
                    }
                }
            } else {
                Err(anyhow!("Ran out of stack"))
            }
        },
    )?;

    assert!(!stack.is_empty());

    stack
        .into_iter()
        .try_rfold(Node::Empty, |node, partial| partial.end_with(node))
}

#[cfg(test)]
#[allow(clippy::expect_used)]
mod test {
    use lazy_static::lazy_static;
    use pretty_assertions::assert_eq;
    use rstest::rstest;

    use crate::tokens::Token::*;
    use crate::tokens::{Token, TokenMeta, Tokenise};
    use crate::tree::special::Special::Star;
    use crate::tree::Node::*;
    use crate::tree::{tree, Bracket, Node};

    lazy_static! {
        static ref STAR: Node = Bracketed(
            Bracket::Parentheses,
            Box::new(Augmented(
                Box::new(Bracketed(
                    Bracket::Parentheses,
                    Box::new(Alternate(
                        Box::new(Empty),
                        Box::new(Tokens(vec![Raw('a', TokenMeta { position: 3 })]))
                    ))
                )),
                Box::new(Special(Star))
            ))
        );
    }

    #[rstest]
    #[case("a", &Node::Tokens(vec![Token::Raw('a', TokenMeta {position: 0})]))]
    #[case("ab*", &Sequence(vec![Tokens(vec![Raw('a', TokenMeta {position: 0})]), Augmented(Box::new(Tokens(vec![Raw('b', TokenMeta {position: 1})])), Box::new(Special(Star)))]))]
    #[case("a|", &Alternate(Box::new(Tokens(vec![Raw('a', TokenMeta {position: 0})])), Box::new(Empty)))]
    #[case("|a", &Alternate(Box::new(Empty), Box::new(Tokens(vec![Raw('a', TokenMeta {position: 1})]))))]
    #[case("(|a)", &Bracketed(Bracket::Parentheses, Box::new(Alternate(Box::new(Empty), Box::new(Tokens(vec![Raw('a', TokenMeta {position: 2})]))))))]
    #[case("((|a)*)", &STAR)]
    #[case("(|a\\))", &Bracketed(Bracket::Parentheses, Box::new(Alternate(Box::new(Empty), Box::new(Tokens(vec![Raw('a', TokenMeta {position: 2}),Escaped(')', TokenMeta {position: 3})]))))))]
    #[case("[*]", &Bracketed(Bracket::Square, Box::new(Special(Star))))]
    fn test_tree(#[case] input: String, #[case] expected: &Node) {
        let tokens = input.tokenise();
        let actual = tree(tokens).expect("Failed to tree");
        assert_eq!(expected, &actual);
    }
}