isla_lib/
ir_lexer.rs

1// BSD 2-Clause License
2//
3// Copyright (c) 2019, 2020 Alasdair Armstrong
4//
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
10//
11// 1. Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13//
14// 2. Redistributions in binary form must reproduce the above copyright
15// notice, this list of conditions and the following disclaimer in the
16// documentation and/or other materials provided with the distribution.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30use std::fmt;
31
32use crate::lexer::*;
33
34#[derive(Clone, Debug)]
35pub enum Tok<'input> {
36    Nat(&'input str),
37    Id(&'input str),
38    String(&'input str),
39    Hex(&'input str),
40    Bin(&'input str),
41    OpNot,
42    OpOr,
43    OpAnd,
44    OpEq,
45    OpNeq,
46    OpSlice,
47    OpSetSlice,
48    OpConcat,
49    OpSigned,
50    OpUnsigned,
51    OpBvnot,
52    OpBvor,
53    OpBvxor,
54    OpBvand,
55    OpBvadd,
56    OpBvsub,
57    OpBvaccess,
58    OpAdd,
59    OpSub,
60    OpLteq,
61    OpLt,
62    OpGteq,
63    OpGt,
64    OpHead,
65    OpTail,
66    OpZeroExtend,
67    TyI,
68    TyBv,
69    TyUnit,
70    TyBool,
71    TyBit,
72    TyString,
73    TyReal,
74    TyEnum,
75    TyStruct,
76    TyUnion,
77    TyVec,
78    TyFVec,
79    TyList,
80    TurboFish,
81    Backtick,
82    Gt,
83    Amp,
84    Lparen,
85    Rparen,
86    Lbrace,
87    Rbrace,
88    Dot,
89    Star,
90    Colon,
91    Eq,
92    Comma,
93    Semi,
94    Dollar,
95    Bitzero,
96    Bitone,
97    Unit,
98    Arrow,
99    Minus,
100    Struct,
101    Is,
102    As,
103    Jump,
104    Goto,
105    Mono,
106    Failure,
107    Arbitrary,
108    Undefined,
109    End,
110    Register,
111    Fn,
112    Let,
113    Enum,
114    Union,
115    Val,
116    True,
117    False,
118    EmptyBitvec,
119}
120
121impl<'input> fmt::Display for Tok<'input> {
122    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123        write!(f, "{:?}", self)
124    }
125}
126
127pub struct Keyword {
128    word: &'static str,
129    token: Tok<'static>,
130    len: usize,
131}
132
133impl Keyword {
134    pub fn new(kw: &'static str, tok: Tok<'static>) -> Self {
135        Keyword { word: kw, token: tok, len: kw.len() }
136    }
137}
138
139lazy_static! {
140    static ref KEYWORDS: Vec<Keyword> = {
141        use Tok::*;
142        let mut table = Vec::new();
143        table.push(Keyword::new("::<", TurboFish));
144        table.push(Keyword::new("`", Backtick));
145        table.push(Keyword::new(">", Gt));
146        table.push(Keyword::new("()", Unit));
147        table.push(Keyword::new("->", Arrow));
148        table.push(Keyword::new("&", Amp));
149        table.push(Keyword::new("(", Lparen));
150        table.push(Keyword::new(")", Rparen));
151        table.push(Keyword::new("{", Lbrace));
152        table.push(Keyword::new("}", Rbrace));
153        table.push(Keyword::new(".", Dot));
154        table.push(Keyword::new("*", Star));
155        table.push(Keyword::new(":", Colon));
156        table.push(Keyword::new("=", Eq));
157        table.push(Keyword::new(",", Comma));
158        table.push(Keyword::new(";", Semi));
159        table.push(Keyword::new("$", Dollar));
160        table.push(Keyword::new("bitzero", Bitzero));
161        table.push(Keyword::new("bitone", Bitone));
162        table.push(Keyword::new("-", Minus));
163        table.push(Keyword::new("struct", Struct));
164        table.push(Keyword::new("is", Is));
165        table.push(Keyword::new("as", As));
166        table.push(Keyword::new("jump", Jump));
167        table.push(Keyword::new("goto", Goto));
168        table.push(Keyword::new("mono", Mono));
169        table.push(Keyword::new("failure", Failure));
170        table.push(Keyword::new("arbitrary", Arbitrary));
171        table.push(Keyword::new("undefined", Undefined));
172        table.push(Keyword::new("end", End));
173        table.push(Keyword::new("register", Register));
174        table.push(Keyword::new("fn", Fn));
175        table.push(Keyword::new("let", Let));
176        table.push(Keyword::new("enum", Enum));
177        table.push(Keyword::new("union", Union));
178        table.push(Keyword::new("val", Val));
179        table.push(Keyword::new("%i", TyI));
180        table.push(Keyword::new("%unit", TyUnit));
181        table.push(Keyword::new("%bool", TyBool));
182        table.push(Keyword::new("%bit", TyBit));
183        table.push(Keyword::new("%string", TyString));
184        table.push(Keyword::new("%real", TyReal));
185        table.push(Keyword::new("%enum", TyEnum));
186        table.push(Keyword::new("%struct", TyStruct));
187        table.push(Keyword::new("%union", TyUnion));
188        table.push(Keyword::new("%vec", TyVec));
189        table.push(Keyword::new("%fvec", TyFVec));
190        table.push(Keyword::new("%list", TyList));
191        table.push(Keyword::new("%bv", TyBv));
192        table.push(Keyword::new("@slice", OpSlice));
193        table.push(Keyword::new("@set_slice", OpSetSlice));
194        table.push(Keyword::new("@concat", OpConcat));
195        table.push(Keyword::new("@unsigned", OpUnsigned));
196        table.push(Keyword::new("@signed", OpSigned));
197        table.push(Keyword::new("@not", OpNot));
198        table.push(Keyword::new("@or", OpOr));
199        table.push(Keyword::new("@and", OpAnd));
200        table.push(Keyword::new("@eq", OpEq));
201        table.push(Keyword::new("@neq", OpNeq));
202        table.push(Keyword::new("@bvnot", OpBvnot));
203        table.push(Keyword::new("@bvor", OpBvor));
204        table.push(Keyword::new("@bvxor", OpBvor));
205        table.push(Keyword::new("@bvand", OpBvand));
206        table.push(Keyword::new("@bvadd", OpBvadd));
207        table.push(Keyword::new("@bvsub", OpBvsub));
208        table.push(Keyword::new("@bvaccess", OpBvaccess));
209        table.push(Keyword::new("@lteq", OpLteq));
210        table.push(Keyword::new("@lt", OpLt));
211        table.push(Keyword::new("@gteq", OpGteq));
212        table.push(Keyword::new("@gt", OpGt));
213        table.push(Keyword::new("@hd", OpHead));
214        table.push(Keyword::new("@tl", OpTail));
215        table.push(Keyword::new("@iadd", OpAdd));
216        table.push(Keyword::new("@isub", OpSub));
217        table.push(Keyword::new("@zero_extend", OpZeroExtend));
218        table.push(Keyword::new("bitzero", Bitzero));
219        table.push(Keyword::new("bitone", Bitone));
220        table.push(Keyword::new("true", True));
221        table.push(Keyword::new("false", False));
222        table.push(Keyword::new("UINT64_C(0)", EmptyBitvec));
223        table
224    };
225}
226
227pub type Span<'input> = Result<(usize, Tok<'input>, usize), LexError>;
228
229impl<'input> Iterator for Lexer<'input> {
230    type Item = Span<'input>;
231
232    fn next(&mut self) -> Option<Self::Item> {
233        use Tok::*;
234        self.consume_whitespace()?;
235        let start_pos = self.pos;
236
237        for k in KEYWORDS.iter() {
238            if self.buf.starts_with(k.word) {
239                self.pos += k.len;
240                self.buf = &self.buf[k.len..];
241                return Some(Ok((start_pos, k.token.clone(), self.pos)));
242            }
243        }
244
245        match self.consume_regex(&ID_REGEX) {
246            None => (),
247            Some((from, id, to)) => return Some(Ok((from, Id(id), to))),
248        }
249
250        match self.consume_regex(&HEX_REGEX) {
251            None => (),
252            Some((from, bits, to)) => return Some(Ok((from, Hex(bits), to))),
253        }
254
255        match self.consume_regex(&BIN_REGEX) {
256            None => (),
257            Some((from, bits, to)) => return Some(Ok((from, Bin(bits), to))),
258        }
259
260        match self.consume_regex(&NAT_REGEX) {
261            None => (),
262            Some((from, n, to)) => return Some(Ok((from, Nat(n), to))),
263        }
264
265        match self.consume_string_literal() {
266            None => (),
267            Some((from, s, to)) => return Some(Ok((from, String(s), to))),
268        }
269
270        Some(Err(LexError { pos: self.pos }))
271    }
272}