kari 0.1.0

An embeddable programming language, writting in and for Rust
Documentation
use crate::{
    data::{
        ch::Char,
        token::{
            self,
            Span,
            Token,
        },
    },
    pipeline::{
        self,
        reader,
    },
};


pub struct Tokenizer<Reader> {
    reader: Reader,
    stream: String,
}

impl<Reader> Tokenizer<Reader> {
    pub fn new(reader: Reader, stream: String) -> Self {
        Self {
            reader,
            stream,
        }
    }
}

impl<Reader> pipeline::Stage for Tokenizer<Reader>
    where Reader: pipeline::Stage<Item=Char, Error=reader::Error>
{
    type Item  = Token;
    type Error = Error;

    fn next(&mut self) -> Result<Self::Item, Self::Error> {
        let mut state   = State::Initial;
        let mut builder = TokenBuilder::new(self.stream.clone());

        loop {
            let c = self.reader.next()?;

            match state {
                State::Initial => {
                    match c.c {
                        '#' => {
                            state = State::Comment;
                        }
                        '"' => {
                            state = State::String;
                            builder.process(c);
                        }
                        ':' => {
                            state = State::Symbol;
                            builder.process(c);
                        }
                        _ => {
                            if !c.is_whitespace() {
                                state = State::Word;
                                builder.store(c);
                            }
                        }
                    }
                }
                State::Comment => {
                    if c == '\n' {
                        state = State::Initial;
                    }
                }
                State::String => {
                    match c.c {
                        '\\' => {
                            state = State::StringEscape;
                            builder.process(c);
                        }
                        '"' => {
                            builder.process(c);
                            return Ok(builder.into_string());
                        }
                        _ => {
                            builder.store(c);
                        }
                    }
                }
                State::StringEscape => {
                    match c.c {
                        'n' => {
                            builder.store(Char { c: '\n', .. c });
                            state = State::String;
                        }
                        c => {
                            return Err(Error::UnexpectedEscape(c));
                        }
                    }
                }
                State::Symbol => {
                    if c.is_whitespace() {
                        return Ok(builder.into_symbol());
                    }

                    builder.store(c);
                }
                State::Word => {
                    if c.is_whitespace() {
                        return Ok(builder.into_word());
                    }

                    builder.store(c);
                }
            }
        }
    }
}


enum State {
    Initial,
    Comment,
    String,
    StringEscape,
    Symbol,
    Word,
}


struct TokenBuilder {
    buffer: String,
    stream: Option<String>,
    span:   Option<Span>,
}

impl TokenBuilder {
    fn new(stream: String) -> Self {
        Self {
            buffer: String::new(),
            stream: Some(stream),
            span:   None,
        }
    }

    fn process(&mut self, c: Char) {
        match &mut self.span {
            Some(span) => {
                span.end = c.pos
            }
            None => {
                self.span = Some(
                    Span {
                        stream: self.stream.take().unwrap(),
                        start:  c.pos,
                        end:    c.pos,
                    }
                )
            }
        }
    }

    fn store(&mut self, c: Char) {
        self.process(c);
        self.buffer.push(c.c);
    }

    fn into_string(self) -> Token {
        Token {
            kind: token::Kind::String(self.buffer),
            span: self.span.unwrap(),
        }
    }

    fn into_symbol(self) -> Token {
        Token {
            kind: token::Kind::Symbol(self.buffer),
            span: self.span.unwrap(),
        }
    }

    fn into_word(self) -> Token {
        let kind = match self.buffer.as_str() {
            "[" => token::Kind::ListOpen,
            "]" => token::Kind::ListClose,

            _ => {
                token::Kind::parse_word(self.buffer)
            }
        };

        Token {
            kind,
            span: self.span.unwrap(),
        }
    }
}


#[derive(Debug)]
pub enum Error {
    Reader(reader::Error),
    UnexpectedEscape(char),
    EndOfStream,
}

impl From<reader::Error> for Error {
    fn from(from: reader::Error) -> Self {
        match from {
            reader::Error::EndOfStream => Error::EndOfStream,
            error                      => Error::Reader(error),
        }
    }
}