hl_core 0.1.3

Syntax highlighting in Rust
Documentation
// ---- DON'T EDIT! THIS IS AUTO GENERATED CODE ---- //
use crate::lexers::Token;

pub struct Lexer {
    input: Vec<char>,
    pub position: usize,
    pub read_position: usize,
    pub ch: char,
}

fn is_letter(ch: char) -> bool {
    ch.is_alphabetic() || ch == '_'
}

impl Lexer {
    pub fn new(input: Vec<char>) -> Self {
        Self {
            input,
            position: 0,
            read_position: 0,
            ch: '\0',
        }
    }

    pub fn read_char(&mut self) {
        if self.read_position >= self.input.len() {
            self.ch = '\0';
        } else {
            self.ch = self.input[self.read_position];
        }
        self.position = self.read_position;
        self.read_position += 1;
    }

    pub fn next_token(&mut self) -> Token {
        let read_identifier = |l: &mut Lexer| -> Vec<char> {
            let position = l.position;
            while l.position < l.input.len() && is_letter(l.ch) {
                l.read_char();
                if l.ch == '-' {
                    l.read_char();
                }
            }
            l.input[position..l.position].to_vec()
        };

        let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
            let position = l.position;
            l.read_char();
            while l.position < l.input.len() && l.ch != ch {
                if l.ch == '\\' {
                    l.read_char();
                }
                l.read_char();
            }
            l.read_char();
            if l.position > l.input.len() {
                l.position -= 1;
                l.read_position -= 1;
            }
            l.input[position..l.position].to_vec()
        };

        let read_number = |l: &mut Lexer| -> Vec<char> {
            let position = l.position;
            while l.position < l.input.len() && l.ch.is_numeric() {
                l.read_char();
            }
            l.input[position..l.position].to_vec()
        };

        let tok: Token;
        if self.ch == '$' {
            let start_position = self.position;
            while self.position < self.input.len()
                && !is_letter(self.ch)
                && !self.ch.is_numeric()
                && self.ch != '\n'
                && self.ch != ' '
            {
                self.read_char();
            }
            let identifier = self.input[start_position..self.position].to_vec();
            return Token::IDENT(identifier);
        }

        if self.ch == '<' {
            let next_ch = self.input[self.position + 1];
            if self.position + 5 < self.input.len()
                && next_ch == '<'
                && self.input[self.position + 2] == 'E'
                && self.input[self.position + 3] == 'O'
                && self.input[self.position + 4] == 'F'
            {
                let mut comment = String::from("<<EOF").chars().collect::<Vec<_>>();
                self.read_char();
                self.read_char();
                self.read_char();
                self.read_char();
                self.read_char();
                let last_position = self.position;
                while self.position < self.input.len() {
                    if self.ch == 'E' {
                        if self.input[self.position + 1] == 'O' {
                            if self.input[self.position + 2] == 'F' {
                                self.read_char();
                                self.read_char();
                                self.read_char();
                                break;
                            }
                        }
                    }
                    self.read_char();
                }
                comment.append(&mut self.input[last_position..self.position].to_vec());
                return Token::STRING(comment);
            }
        }
        if self.ch == '#' {
            return Token::COMMENT(read_string(self, '\n'));
        }

        match self.ch {
            '\n' => {
                tok = Token::ENDL(self.ch);
            }
            '\0' => {
                tok = Token::EOF;
            }
            '!' => {
                tok = Token::KEYWORD(vec![self.ch]);
            }
            '[' => {
                tok = Token::KEYWORD(vec![self.ch]);
            }
            ']' => {
                tok = Token::KEYWORD(vec![self.ch]);
            }
            '|' => {
                tok = Token::KEYWORD(vec![self.ch]);
            }
            '-' => {
                let next_ch = self.input[self.position + 1];
                if is_letter(next_ch) || next_ch == '-' {
                    let mut identifier = vec![self.ch];
                    self.read_char();
                    if self.input[self.position + 1] == '-' {
                        self.read_char();
                        identifier.append(&mut vec![self.ch]);
                    }
                    identifier.append(&mut read_identifier(self));
                    return Token::KEYWORD(identifier);
                }
                tok = Token::CH(self.ch);
            }
            _ => {
                return if is_letter(self.ch) {
                    #[allow(unused_variables)]
                    let start_position = self.position;
                    #[allow(unused_mut)]
                    let mut identifier: Vec<char> = read_identifier(self);
                    if self.ch.is_numeric() {
                        let position = self.position;
                        while self.position < self.input.len() {
                            if !self.ch.is_numeric() && !is_letter(self.ch) {
                                break;
                            }
                            self.read_char();
                        }
                        identifier.append(&mut self.input[position..self.position].to_vec());
                    }
                    match get_keyword_token(&identifier) {
                        Ok(keyword_token) => keyword_token,
                        Err(_) => {
                            if self.ch == '(' {
                                return Token::ENTITY(identifier);
                            } else if self.ch.is_whitespace() {
                                let mut position = self.position;
                                let mut ch = self.input[position];
                                while position < self.input.len() && ch.is_whitespace() {
                                    position += 1;
                                    if position < self.input.len() {
                                        ch = self.input[position];
                                    }
                                }
                                if ch == '(' {
                                    return Token::ENTITY(identifier);
                                }
                            }
                            Token::IDENT(identifier)
                        }
                    }
                } else if self.ch.is_numeric() {
                    let identifier: Vec<char> = read_number(self);
                    Token::INT(identifier)
                } else if self.ch == '\'' {
                    if self.position - 1 > 0 && self.input[self.position - 1] == '/' {
                        tok = Token::CH(self.ch);
                        self.read_char();
                        return tok;
                    }
                    let str_value: Vec<char> = read_string(self, '\'');
                    Token::STRING(str_value)
                } else if self.ch == '"' {
                    let str_value: Vec<char> = read_string(self, '"');
                    Token::STRING(str_value)
                } else {
                    Token::ILLEGAL
                }
            }
        }
        self.read_char();
        tok
    }
}

pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
    let id: String = identifier.into_iter().collect();
    match &id[..] {
        "builtin" | "command" | "compgen" | "echo" | "eval" | "exit" | "false" | "hash"
        | "kill" | "read" | "source" | "unset" | "test" | "true" | "printf" => {
            Ok(Token::CONSTANT(identifier.clone()))
        }
        "alias" | "bg" | "bind" | "break" | "caller" | "case" | "cd" | "complete" | "compopt"
        | "continue" | "coproc" | "declare" | "dirs" | "disown" | "enable" | "exec" | "export"
        | "fc" | "fg" | "for" | "function" | "getopts" | "help" => {
            Ok(Token::KEYWORD(identifier.clone()))
        }
        _ => Err(String::from("Not a keyword")),
    }
}

pub fn render_html(input: Vec<char>) -> String {
    let mut l = Lexer::new(input);
    l.read_char();
    let mut html = String::new();
    let mut line = 1;
    html.push_str("<table class=\"highlight-table\">\n");
    html.push_str("<tbody>\n");
    html.push_str("<tr>");
    html.push_str(&format!(
        "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
        line
    ));

    loop {
        let token = l.next_token();
        if token == Token::EOF {
            html.push_str("</td></tr>\n");
            break;
        }

        match token {
            Token::INT(value) => {
                html.push_str(&format!(
                    "<span class=\"hl-c\">{}</span>",
                    value.iter().collect::<String>()
                ));
            }
            Token::IDENT(value) => {
                html.push_str(&value.iter().collect::<String>());
            }
            Token::STRING(value) => {
                let mut s = String::new();
                for ch in value {
                    if ch == '<' {
                        s.push_str("&lt;");
                    } else if ch == '>' {
                        s.push_str("&gt;");
                    } else {
                        s.push(ch);
                    }
                }
                s = s.replace("&lt;&lt;", "<span class=\"hl-k\">&lt;&lt;</span>");
                s = s.replace("EOF", "<span class=\"hl-k\">EOF</span>");
                let split = s.split("\n");
                let split_len = split.clone().collect::<Vec<&str>>().len();
                let mut index = 0;
                for val in split {
                    html.push_str(&format!("<span class=\"hl-s\">{}</span>", val));
                    index = index + 1;
                    if index != split_len {
                        line = line + 1;
                        html.push_str("</td></tr>\n");
                        html.push_str(&format!(
                            "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
                            line
                        ));
                    }
                }
            }
            Token::CH(value) => {
                html.push(value);
            }
            Token::ENTITY(value) => {
                html.push_str(&format!(
                    "<span class=\"hl-en\">{}</span>",
                    value.iter().collect::<String>()
                ));
            }
            Token::CONSTANT(value) => {
                html.push_str(&format!(
                    "<span class=\"hl-c\">{}</span>",
                    value.iter().collect::<String>()
                ));
            }
            Token::KEYWORD(value) => {
                html.push_str(&format!(
                    "<span class=\"hl-k\">{}</span>",
                    value.iter().collect::<String>()
                ));
            }
            Token::COMMENT(value) => {
                let mut lines = String::new();
                for ch in value {
                    if ch == '<' {
                        lines.push_str("&lt;");
                    } else if ch == '>' {
                        lines.push_str("&gt;");
                    } else {
                        lines.push(ch);
                    }
                }
                let split = lines.split("\n");
                let split_len = split.clone().collect::<Vec<&str>>().len();
                let mut index = 0;
                for val in split {
                    if val.len() > 1 {
                        html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
                    }
                    index = index + 1;
                    if index != split_len {
                        line = line + 1;
                        html.push_str("</td></tr>\n");
                        html.push_str(&format!(
                            "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
                            line
                        ));
                    }
                }
            }
            Token::ENDL(_) => {
                line = line + 1;
                html.push_str("</td></tr>\n");
                html.push_str(&format!(
                    "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
                    line
                ));
            }
            _ => {
                html.push(l.ch);
                l.read_char();
            }
        }
    }

    html.push_str("</tbody>\n");
    html.push_str("</table>");
    html
}