rflex_lib/
lexer.rs

1use std::fs;
2use Token::*;
3use Group::*;
4use Op::*;
5
6pub trait TokenGiver { 
7    fn next(&mut self) -> Result<Token, TokenErr>;
8    fn peek(&mut self) -> Result<Token, TokenErr>;
9}
10
11#[derive(Copy, Clone, Debug, PartialEq, Eq)]
12pub enum Op {
13    STAR,
14    PLUS,
15    QUESTION,
16    BAR,
17    DASH,
18    AND
19}
20
21#[derive(Copy, Clone, Debug, PartialEq, Eq)]
22pub enum Group {
23    DBQ,
24    LBR,
25    RBR,
26    LCR,
27    RCR,
28    LPR,
29    RPR
30}
31
32// ", [, ], {, }, (, ), -, *, ;
33#[derive(Copy, Clone, Debug, PartialEq, Eq)]
34pub enum Token {
35    OP(Op),
36    GROUP(Group),
37    CHAR(char),
38    SEMI,
39    EOF
40}
41
42impl Token {
43    pub fn char(&self) -> char {
44        if let CHAR(c) = *self {
45            return c;
46        }
47        panic!("Not A Letter!");
48    }
49}
50
51#[derive(Debug)]
52pub enum TokenErr {
53    InvalidExpr,
54    InvalidEscape
55}
56
57
58pub struct Lexer { 
59    chars: Vec<char>,
60    pos: usize,
61    enclosed: bool
62}
63
64impl Lexer {
65    pub fn new(fname: &str) -> Result<Self, Box<dyn std::error::Error>> {
66        let chars = fs::read_to_string(fname)?
67            .chars()
68            .collect();
69        return Ok(Lexer { chars, pos: 0, enclosed: false });
70    }
71
72    fn nextchar(&mut self) -> char {
73        self.pos += 1;
74        return self.chars[self.pos - 1];
75    }
76}
77
78impl TokenGiver for Lexer {
79    fn next(&mut self) -> Result<Token, TokenErr> {
80        loop {
81            if self.pos == self.chars.len() { 
82                return Ok(EOF)
83            }
84            match self.nextchar() {
85                '\t' | '\n' | '\r' => {}
86                '"' => { 
87                    self.enclosed = !self.enclosed; 
88                    return Ok(GROUP(DBQ)) 
89                },
90                '[' => return Ok(GROUP(LBR)),
91                ']' => return Ok(GROUP(RBR)),
92                '{' => return Ok(GROUP(LCR)),
93                '}' => return Ok(GROUP(RCR)),
94                '(' => return Ok(GROUP(LPR)),
95                ')' => return Ok(GROUP(RPR)),
96                '-' => return Ok(OP(DASH)),
97                '*' => return Ok(OP(STAR)),
98                '+' => return Ok(OP(PLUS)),
99                '|' => return Ok(OP(BAR)),
100                ';' => return Ok(SEMI),
101                '#' => while self.pos < self.chars.len() {
102                    if self.nextchar() == '\n' { break }
103                },
104                '\\' => {
105                    if self.pos == (self.chars.len() - 1)
106                        { return Err(TokenErr::InvalidExpr); }
107                    let c = self.nextchar();
108                    match c {
109                        'n'  => return Ok(CHAR('\n')),
110                        't'  => return Ok(CHAR('\t')),
111                        'r'  => return Ok(CHAR('\r')),
112                        '\\' | ']' | '[' | ')' | '(' | '{' | '}' |
113                        '-' | '*' | ';' | '+' | '"' | '\'' => {
114                            if self.enclosed { return Ok(CHAR(c)); }
115                            else { return Err(TokenErr::InvalidExpr); }
116                        },
117                        _    => {println!("hey: {}", c); return Err(TokenErr::InvalidEscape)},
118                    }
119                }
120                ' ' => if self.enclosed { return Ok(CHAR(' ')); },
121                c => return Ok(CHAR(c))
122            }
123        }
124    }
125
126    fn peek(&mut self) -> Result<Token, TokenErr> {
127        let pos = self.pos;
128        let res = self.next();
129        self.pos = pos;
130        return res;
131    }
132}
133
134// Figure out how to test this...
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use std::{path::Path};
139
140    #[test]
141    fn right_wrong() {
142        let path = "tests/data/lexer/input";
143        for id in ["right", "wrong"] {
144            let mut i = 0;
145            while Path::new(&format!("{path}/{id}-{i}.txt")).exists() {
146                let mut lx = Lexer::new(&format!("{path}/{id}-{i}.txt")).unwrap();
147                loop { match lx.next() {
148                    Ok(tk) => {
149                        if tk != EOF { continue; }
150                        assert!("right" == id);
151                        break;
152                    },
153                    Err(tk) => {
154                        println!("{:?}", tk);
155                        assert!("wrong" == id);
156                    }
157                }}
158                i += 1;
159            }
160        }
161    }
162
163    // Add -- --nocapture to see output.
164    #[allow(dead_code)]
165    fn inspection() {
166        use super::*;
167        let mut lx = Lexer::new("src/example.tk").expect("File not found.");
168        loop { match lx.next() {
169            Ok(tk) => {
170                println!("{:?}", tk);
171                if tk == EOF { break }
172            },
173            Err(tk) => println!("{:?}", tk)
174        }}
175        assert!(true);
176    }
177}