Skip to main content

ttk91/bytecode/
token.rs

1//! Lexer and an enumeration of the tokens that make up the bytecode format.
2
3use logos::Logos;
4use std::fmt;
5use std::str::FromStr;
6
7/// A section header.
8#[derive(Debug, Clone, Copy, PartialEq)]
9pub enum Section {
10    /// Start of the bytecode file.
11    Start,
12
13    /// End of the bytecode file.
14    End,
15
16    /// Start of the code section.
17    Code,
18
19    /// Start of the data section.
20    Data,
21
22    /// Start of the symbol table section.
23    SymbolTable,
24}
25
26impl fmt::Display for Section {
27    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
28        match self {
29            Section::Start => write!(f, "___b91___"),
30            Section::End => write!(f, "___end___"),
31            Section::Code => write!(f, "___code___"),
32            Section::Data => write!(f, "___data___"),
33            Section::SymbolTable => write!(f, "___symboltable___"),
34        }
35    }
36}
37
38impl FromStr for Section {
39    type Err = ();
40
41    fn from_str(input: &str) -> Result<Section, ()> {
42        match input {
43            "___b91___" | "b91" => Ok(Section::Start),
44            "___end___" | "end" => Ok(Section::End),
45            "___code___" | "code" => Ok(Section::Code),
46            "___data___" | "data" => Ok(Section::Data),
47            "___symboltable___" | "symboltable" => Ok(Section::SymbolTable),
48            _ => Err(()),
49        }
50    }
51}
52
53/// Enumeration of the all possible token.
54#[derive(Logos, Debug, Clone, PartialEq)]
55pub enum Token<'t> {
56    /// An errorneous token that cannot be interpreted as any of the other tokens.
57    #[error]
58    #[regex(r"[ \t\f\r\n]+", logos::skip)]
59    Error,
60
61    /// A section header that starts and ends with three underscores.
62    #[regex("___(b91|code|data|symboltable|end)___", |lex| lex.slice().parse())]
63    Section(Section),
64
65    /// A signed number literal.
66    #[regex("-?[0-9]+", |lex| lex.slice().parse())]
67    Number(i32),
68
69    /// A symbol that begins with a letter or an underscore and can contain the characters
70    /// `A-Za-z0-9_`.
71    #[regex("(?i)[a-z_][a-z0-9_]*")]
72    Symbol(&'t str),
73}
74
75impl<'t> fmt::Display for Token<'t> {
76    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77        match self {
78            Token::Error => write!(f, "<error>"),
79            Token::Section(section) => write!(f, "{}", section),
80            Token::Number(num) => write!(f, "{}", num),
81            Token::Symbol(label) => write!(f, "{}", label),
82        }
83    }
84}