noline 0.5.1

A no_std line editor
Documentation
use num_enum::{IntoPrimitive, TryFromPrimitive};

use crate::utf8::{Utf8Char, Utf8Decoder, Utf8DecoderStatus};

#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)]
#[repr(u8)]
pub enum ControlCharacter {
    NUL = 0x0,
    CtrlA = 0x1,
    CtrlB = 0x2,
    CtrlC = 0x3,
    CtrlD = 0x4,
    CtrlE = 0x5,
    CtrlF = 0x6,
    CtrlG = 0x7,
    CtrlH = 0x8,
    Tab = 0x9,
    LineFeed = 0xA,
    CtrlK = 0xB,
    CtrlL = 0xC,
    CarriageReturn = 0xD,
    CtrlN = 0xE,
    CtrlO = 0xF,
    CtrlP = 0x10,
    CtrlQ = 0x11,
    CtrlR = 0x12,
    CtrlS = 0x13,
    CtrlT = 0x14,
    CtrlU = 0x15,
    CtrlV = 0x16,
    CtrlW = 0x17,
    CtrlX = 0x18,
    CtrlY = 0x19,
    CtrlZ = 0x1A,
    Escape = 0x1B,
    FS = 0x1C,
    GS = 0x1D,
    RS = 0x1E,
    US = 0x1F,
    Backspace = 0x7F,
}

impl ControlCharacter {
    fn new(byte: u8) -> Result<Self, ()> {
        match Self::try_from(byte) {
            Ok(this) => Ok(this),
            Err(_) => Err(()),
        }
    }
}

#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum CSI {
    CUU(usize),
    CUD(usize),
    CUF(usize),
    CUB(usize),
    CPR(usize, usize),
    CUP(usize, usize),
    ED(usize),
    DSR,
    SU(usize),
    SD(usize),
    Home,
    Delete,
    End,
    Unknown(u8),
}

impl CSI {
    fn new(byte: u8, arg1: Option<usize>, arg2: Option<usize>) -> Option<Self> {
        let c = byte as char;

        Some(match c {
            'A' => Self::CUU(arg1.unwrap_or(1)),
            'B' => Self::CUD(arg1.unwrap_or(1)),
            'C' => Self::CUF(arg1.unwrap_or(1)),
            'D' => Self::CUB(arg1.unwrap_or(1)),
            'H' => Self::CUP(arg1.unwrap_or(1), arg2.unwrap_or(1)),
            'J' => Self::ED(arg1.unwrap_or(0)),
            'R' => Self::CPR(arg1.unwrap(), arg2.unwrap()),
            'S' => Self::SU(arg1.unwrap_or(1)),
            'T' => Self::SD(arg1.unwrap_or(1)),
            'n' => Self::DSR,
            '~' => {
                if let Some(arg) = arg1 {
                    match arg {
                        1 => Self::Home,
                        3 => Self::Delete,
                        4 => Self::End,
                        _ => Self::Unknown(byte),
                    }
                } else {
                    Self::Unknown(byte)
                }
            }
            _ => Self::Unknown(byte),
        })
    }
}

#[cfg_attr(test, derive(Debug))]
#[derive(Eq, PartialEq, Copy, Clone)]
pub enum Action {
    Ignore,
    Print(Utf8Char),
    InvalidUtf8,
    ControlCharacter(ControlCharacter),
    EscapeSequence(u8),
    ControlSequenceIntroducer(CSI),
}

impl Action {
    fn escape_sequence(byte: u8) -> Self {
        Action::EscapeSequence(byte)
    }

    fn control_character(byte: u8) -> Self {
        Action::ControlCharacter(ControlCharacter::new(byte).unwrap())
    }

    fn csi(byte: u8, arg1: Option<usize>, arg2: Option<usize>) -> Self {
        Action::ControlSequenceIntroducer(CSI::new(byte, arg1, arg2).unwrap())
    }
}

#[derive(Debug, Eq, PartialEq)]
enum State {
    Ground,
    Utf8Sequence(Option<Utf8Decoder>),
    EscapeSequence,
    CSIStart,
    CSIArg1(Option<usize>),
    CSIArg2(Option<usize>, Option<usize>),
}

pub struct Parser {
    state: State,
}

impl Parser {
    pub fn new() -> Self {
        Self {
            state: State::Ground,
        }
    }

    pub fn advance(&mut self, byte: u8) -> Action {
        match self.state {
            State::Ground => match byte {
                0x1b => {
                    self.state = State::EscapeSequence;
                    Action::Ignore
                }
                0x0..=0x1a | 0x1c..=0x1f | 0x7f => Action::control_character(byte),
                0x20..=0x7e | 0x80..=0xff => {
                    let mut decoder = Utf8Decoder::new();

                    match decoder.advance(byte) {
                        Utf8DecoderStatus::Continuation => {
                            self.state = State::Utf8Sequence(Some(decoder));
                            Action::Ignore
                        }
                        Utf8DecoderStatus::Done(c) => Action::Print(c),
                        Utf8DecoderStatus::Error => Action::InvalidUtf8,
                    }
                }
            },
            State::Utf8Sequence(ref mut decoder) => {
                let mut decoder = decoder.take().unwrap();

                match decoder.advance(byte) {
                    Utf8DecoderStatus::Continuation => {
                        self.state = State::Utf8Sequence(Some(decoder));
                        Action::Ignore
                    }
                    Utf8DecoderStatus::Done(c) => {
                        self.state = State::Ground;
                        Action::Print(c)
                    }
                    Utf8DecoderStatus::Error => {
                        self.state = State::Ground;
                        Action::InvalidUtf8
                    }
                }
            }
            State::EscapeSequence => {
                if byte == 0x5b {
                    self.state = State::CSIStart;
                    Action::Ignore
                } else {
                    self.state = State::Ground;
                    Action::escape_sequence(byte)
                }
            }
            State::CSIStart => match byte {
                0x30..=0x39 => {
                    let value: usize = (byte - 0x30) as usize;
                    self.state = State::CSIArg1(Some(value));
                    Action::Ignore
                }
                0x3b => {
                    self.state = State::CSIArg2(None, None);
                    Action::Ignore
                }
                0x40..=0x7e => {
                    self.state = State::Ground;
                    Action::csi(byte, None, None)
                }
                _ => Action::Ignore,
            },
            State::CSIArg1(value) => match byte {
                0x30..=0x39 => {
                    let value: usize = value.unwrap_or(0) * 10 + (byte - 0x30) as usize;
                    self.state = State::CSIArg1(Some(value));
                    Action::Ignore
                }
                0x3b => {
                    self.state = State::CSIArg2(value, None);
                    Action::Ignore
                }
                0x40..=0x7e => {
                    self.state = State::Ground;
                    Action::csi(byte, value, None)
                }
                _ => Action::Ignore,
            },
            State::CSIArg2(arg1, arg2) => match byte {
                0x30..=0x39 => {
                    let arg2: usize = arg2.unwrap_or(0) * 10 + (byte - 0x30) as usize;
                    self.state = State::CSIArg2(arg1, Some(arg2));
                    Action::Ignore
                }
                0x40..=0x7e => {
                    self.state = State::Ground;
                    Action::csi(byte, arg1, arg2)
                }
                _ => Action::Ignore,
            },
        }
    }
}

#[cfg(test)]
pub(crate) mod tests {
    use crate::testlib::ToByteVec;

    use super::*;
    use std::vec::Vec;
    use ControlCharacter::*;

    fn input_sequence(parser: &mut Parser, seq: impl ToByteVec) -> Vec<Action> {
        seq.to_byte_vec()
            .into_iter()
            .map(|b| parser.advance(b))
            .collect()
    }

    #[test]
    fn parser() {
        let mut parser = Parser::new();

        assert_eq!(parser.state, State::Ground);

        assert_eq!(parser.advance(b'a'), Action::Print(Utf8Char::from_str("a")));
        assert_eq!(parser.advance(0x7), Action::ControlCharacter(CtrlG));
        assert_eq!(parser.advance(0x3), Action::ControlCharacter(CtrlC));

        let actions = input_sequence(&mut parser, "æ");
        assert_eq!(
            actions,
            [Action::Ignore, Action::Print(Utf8Char::from_str("æ"))]
        );

        let mut actions = input_sequence(&mut parser, "\x1b[312;836R");
        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CPR(312, 836))
        );
        while let Some(action) = actions.pop() {
            assert_eq!(action, Action::Ignore);
        }

        let mut actions = input_sequence(&mut parser, "\x1b[A");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUU(1))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[10B");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUD(10))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(1, 1))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[2;5H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(2, 5))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[;5H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(1, 5))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[17;H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(17, 1))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[;H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(1, 1))
        );

        let mut actions = input_sequence(&mut parser, "\x1b[;10H");

        assert_eq!(
            actions.pop().unwrap(),
            Action::ControlSequenceIntroducer(CSI::CUP(1, 10))
        );
    }
}