bitcalc 0.3.0

A calculator with bit operations
use std::{
    fmt::Display,
    ops::{ControlFlow, Range},
};

#[derive(Clone, Debug, PartialEq)]
pub enum Token<'s> {
    // === Punctuation ===
    Amp,
    AmpAmp,
    AngleLeftEq,
    AngleRightEq,
    Arrow,
    Bang,
    BangEq,
    Colon,
    Comma,
    Eq,
    EqEq,
    Hyphen,
    Period,
    Pipe,
    PipePipe,
    Plus,
    QuestionMark,
    SemiColon,
    Slash,
    Star,
    StarStar,
    Hat,
    Underscore,
    Percent,
    ShiftLeft,
    ShiftRight,
    RotLeft,
    RotRight,

    // === Delimiters ===
    RoundLeft,
    RoundRight,

    // === Literals ===
    Integer(&'s str),
    Hex(&'s str),
    Binary(&'s str),
    History(&'s str),
}

pub struct Lexer<'a> {
    input: &'a str,
    original_length: usize,
    peeked: Option<(Result<Token<'a>, ()>, Range<usize>)>,
}

impl<'a> Lexer<'a> {
    pub fn next(&mut self) -> Option<(Result<Token<'a>, ()>, Range<usize>)> {
        if self.peeked.is_some() {
            return self.peeked.take();
        }
        self.next_inner()
    }

    pub fn peek(&mut self) -> &Option<(Result<Token<'a>, ()>, Range<usize>)> {
        if self.peeked.is_none() {
            self.peeked = self.next_inner();
        }
        &self.peeked
    }

    fn next_inner(&mut self) -> Option<(Result<Token<'a>, ()>, Range<usize>)> {
        match self.next_token() {
            ControlFlow::Continue(()) => {
                if self.input.is_empty() {
                    None
                } else {
                    let start = self.original_length - self.input.len();
                    let end = start + 1;
                    Some((Err(()), start..end))
                }
            }
            ControlFlow::Break((tok, span)) => Some((Ok(tok), span)),
        }
    }
}

impl<'s> Lexer<'s> {
    pub fn new(input: &'s str) -> Self {
        Self {
            input,
            original_length: input.len(),
            peeked: None,
        }
    }

    fn bump(&mut self, n: usize) -> (&'s str, Range<usize>) {
        let start = self.original_length - self.input.len();
        let (a, b) = self.input.split_at(n);
        self.input = b;
        let end = self.original_length - self.input.len();
        (a, start..end)
    }

    fn is_empty(&self) -> bool {
        self.input.is_empty()
    }

    fn next_token(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        self.skip_whitespace();

        if self.is_empty() {
            return ControlFlow::Continue(());
        }

        self.history()?;
        self.two_char_punctuation()?;
        self.one_char_punctuation()?;
        self.hex_number()?;
        self.bin_number()?;
        self.integer()?;
        self.words()?;

        ControlFlow::Continue(())
    }

    fn skip_whitespace(&mut self) {
        loop {
            self.input = self.input.trim_start();
            if self.input.as_bytes().first() == Some(&b'#') {
                let n = self.input.find('\n').unwrap_or(self.input.len());
                self.bump(n);
            } else {
                return;
            }
        }
    }

    fn two_char_punctuation(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let Some(x) = self.input.as_bytes().first_chunk() else {
            return ControlFlow::Continue(());
        };

        let tok = match *x {
            [b'=', b'='] => Token::EqEq,
            [b'!', b'='] => Token::BangEq,
            [b'&', b'&'] => Token::AmpAmp,
            [b'|', b'|'] => Token::PipePipe,
            [b'>', b'='] => Token::AngleRightEq,
            [b'<', b'='] => Token::AngleLeftEq,
            [b'-', b'>'] => Token::Arrow,
            [b'<', b'<'] => Token::ShiftLeft,
            [b'>', b'>'] => Token::ShiftRight,
            [b'*', b'*'] => Token::StarStar,
            _ => return ControlFlow::Continue(()),
        };

        let (_, span) = self.bump(2);

        ControlFlow::Break((tok, span))
    }

    fn one_char_punctuation(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let Some(x) = self.input.as_bytes().first() else {
            return ControlFlow::Continue(());
        };

        let tok = match x {
            b'&' => Token::Amp,
            b'=' => Token::Eq,
            b'|' => Token::Pipe,
            b'-' => Token::Hyphen,
            b':' => Token::Colon,
            b';' => Token::SemiColon,
            b',' => Token::Comma,
            b'.' => Token::Period,
            b'+' => Token::Plus,
            b'*' => Token::Star,
            b'/' => Token::Slash,
            b'!' => Token::Bang,
            b'?' => Token::QuestionMark,
            b'(' => Token::RoundLeft,
            b')' => Token::RoundRight,
            b'^' => Token::Hat,
            b'%' => Token::Percent,
            b'_' => Token::Underscore,
            _ => return ControlFlow::Continue(()),
        };

        let (_, span) = self.bump(1);

        ControlFlow::Break((tok, span))
    }

    fn hex_number(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let Some(rest) = self.input.strip_prefix("0x") else {
            return ControlFlow::Continue(());
        };

        let digit_idx = rest
            .find(|c: char| !c.is_ascii_hexdigit() && c != '_')
            .unwrap_or(rest.len());

        let (tok, span) = self.bump(2 + digit_idx);
        ControlFlow::Break((Token::Hex(tok), span))
    }

    fn bin_number(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let Some(rest) = self.input.strip_prefix("0b") else {
            return ControlFlow::Continue(());
        };

        let digit_idx = rest
            .find(|c: char| !['1', '0', '_'].contains(&c))
            .unwrap_or(rest.len());

        let (tok, span) = self.bump(2 + digit_idx);
        ControlFlow::Break((Token::Binary(tok), span))
    }

    fn integer(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let non_numeric_idx = self
            .input
            .find(|c: char| !c.is_ascii_digit() && c != '_')
            .unwrap_or(self.input.len());

        if non_numeric_idx == 0 {
            return ControlFlow::Continue(());
        }

        let (tok, span) = self.bump(non_numeric_idx);
        ControlFlow::Break((Token::Integer(tok), span))
    }

    fn history(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let Some(rest) = self.input.strip_prefix("_") else {
            return ControlFlow::Continue(());
        };

        let non_numeric_idx = rest
            .find(|c: char| !c.is_ascii_digit())
            .unwrap_or(rest.len());

        if non_numeric_idx == 0 {
            return ControlFlow::Continue(());
        }

        let (tok, span) = self.bump(non_numeric_idx + 1);
        ControlFlow::Break((Token::History(tok), span))
    }

    fn words(&mut self) -> ControlFlow<(Token<'s>, Range<usize>)> {
        let non_letter_idx = self
            .input
            .find(|c: char| !c.is_ascii_alphabetic())
            .unwrap_or(self.input.len());

        if non_letter_idx == 0 {
            return ControlFlow::Continue(());
        }

        let s = &self.input[..non_letter_idx];

        let tok = match s {
            "rotl" => Token::RotLeft,
            "rotr" => Token::RotRight,
            _ => return ControlFlow::Continue(()),
        };

        let (_tok, span) = self.bump(non_letter_idx);

        ControlFlow::Break((tok, span))
    }
}

impl Display for Token<'_> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            // Punctuation
            Token::Amp => "&",
            Token::AmpAmp => "&&",
            Token::AngleLeftEq => "<=",
            Token::AngleRightEq => ">=",
            Token::Arrow => "->",
            Token::Bang => "!",
            Token::BangEq => "!=",
            Token::Colon => ":",
            Token::Comma => ",",
            Token::Eq => "=",
            Token::EqEq => "==",
            Token::Hyphen => "-",
            Token::Period => ".",
            Token::Pipe => "|",
            Token::PipePipe => "||",
            Token::Plus => "+",
            Token::QuestionMark => "?",
            Token::SemiColon => ";",
            Token::Slash => "/",
            Token::Star => "*",
            Token::StarStar => "**",
            Token::Hat => "^",
            Token::Underscore => "_",
            Token::Percent => "%",
            Token::ShiftLeft => "<<",
            Token::ShiftRight => ">>",
            Token::RotLeft => "rotl",
            Token::RotRight => "rotr",

            // Delimiters
            Token::RoundLeft => "(",
            Token::RoundRight => ")",

            // Literals
            Token::Integer(s) => s,
            Token::Hex(s) => s,
            Token::Binary(s) => s,
            Token::History(s) => s,
        };

        f.write_str(s)
    }
}