Skip to main content

oak_dhall/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token type module for DHall.
3pub mod token_type;
4
5use crate::{language::DHallLanguage, lexer::token_type::DHallTokenType};
6use oak_core::{
7    LexOutput, Lexer, LexerCache, LexerState, OakError,
8    lexer::{CommentConfig, StringConfig, WhitespaceConfig},
9    source::{Source, TextEdit},
10};
11use std::sync::LazyLock;
12
13static DHALL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14static DHALL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "--", block_start: "{-", block_end: "-}", nested_blocks: true });
15static DHALL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
16
17/// Lexer implementation for DHall.
18#[derive(Clone)]
19pub struct DHallLexer<'config> {
20    config: &'config DHallLanguage,
21}
22
23impl<'config> Lexer<DHallLanguage> for DHallLexer<'config> {
24    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DHallLanguage>) -> LexOutput<DHallLanguage> {
25        let mut state = LexerState::new(source);
26        let result = self.run(&mut state);
27        state.finish_with_cache(result, cache)
28    }
29}
30
31impl<'config> DHallLexer<'config> {
32    /// Creates a new `DHallLexer`.
33    pub fn new(config: &'config DHallLanguage) -> Self {
34        Self { config }
35    }
36
37    fn run<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> Result<(), OakError> {
38        while state.not_at_end() {
39            let safe_point = state.get_position();
40            if self.skip_whitespace(state) {
41                continue;
42            };
43
44            if self.skip_comment(state) {
45                continue;
46            }
47
48            if self.lex_string_literal(state) {
49                continue;
50            }
51
52            if self.lex_number_literal(state) {
53                continue;
54            }
55
56            if self.lex_identifier_or_keyword(state) {
57                continue;
58            }
59
60            if self.lex_operators(state) {
61                continue;
62            }
63
64            if self.lex_single_char_tokens(state) {
65                continue;
66            }
67
68            state.advance_if_dead_lock(safe_point)
69        }
70
71        Ok(())
72    }
73
74    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
75        DHALL_WHITESPACE.scan(state, DHallTokenType::Whitespace)
76    }
77
78    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
79        DHALL_COMMENT.scan(state, DHallTokenType::Comment, DHallTokenType::Comment)
80    }
81
82    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
83        DHALL_STRING.scan(state, DHallTokenType::String)
84    }
85
86    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
87        let start = state.get_position();
88        let first = match state.peek() {
89            Some(c) => c,
90            None => return false,
91        };
92
93        if !first.is_ascii_digit() {
94            return false;
95        }
96
97        state.advance(1);
98        while let Some(c) = state.peek() {
99            if c.is_ascii_digit() { state.advance(1) } else { break }
100        }
101
102        state.add_token(DHallTokenType::Number, start, state.get_position());
103        true
104    }
105
106    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
107        let start = state.get_position();
108        let first = match state.peek() {
109            Some(c) => c,
110            None => return false,
111        };
112
113        if !first.is_alphabetic() && first != '_' && first != 'λ' {
114            return false;
115        }
116
117        state.advance(1);
118        while let Some(c) = state.peek() {
119            if c.is_alphanumeric() || c == '_' || c == '-' || c == '/' { state.advance(1) } else { break }
120        }
121
122        let end = state.get_position();
123        let text = state.get_text_in((start..end).into());
124
125        let kind = match text.as_ref() {
126            "if" => DHallTokenType::If,
127            "then" => DHallTokenType::Then,
128            "else" => DHallTokenType::Else,
129            "let" => DHallTokenType::Let,
130            "in" => DHallTokenType::In,
131            "using" => DHallTokenType::Using,
132            "as" => DHallTokenType::As,
133            "merge" => DHallTokenType::Merge,
134            "Some" => DHallTokenType::Some,
135            "None" => DHallTokenType::None,
136            "with" => DHallTokenType::With,
137            "forall" => DHallTokenType::Forall,
138            "assert" => DHallTokenType::Assert,
139            "Bool" => DHallTokenType::Bool,
140            "Natural" => DHallTokenType::Natural,
141            "Integer" => DHallTokenType::Integer,
142            "Double" => DHallTokenType::Double,
143            "Text" => DHallTokenType::Text,
144            "List" => DHallTokenType::List,
145            "Optional" => DHallTokenType::Optional,
146            "True" => DHallTokenType::True,
147            "False" => DHallTokenType::False,
148            "λ" => DHallTokenType::Lambda,
149            _ => DHallTokenType::Identifier,
150        };
151
152        state.add_token(kind, start, end);
153        true
154    }
155
156    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
157        let start = state.get_position();
158        let text = state.rest();
159
160        let ops = [
161            ("->", DHallTokenType::Arrow),
162            ("→", DHallTokenType::Arrow),
163            ("=>", DHallTokenType::FatArrow),
164            ("==", DHallTokenType::EqualEqual),
165            ("≡", DHallTokenType::EqualEqual),
166            ("!=", DHallTokenType::NotEqual),
167            ("&&", DHallTokenType::And),
168            ("∧", DHallTokenType::And),
169            ("||", DHallTokenType::Or),
170            ("∨", DHallTokenType::Or),
171            ("++", DHallTokenType::Append),
172            ("//", DHallTokenType::Combine),
173            ("⫽", DHallTokenType::Combine),
174            ("/\\", DHallTokenType::CombineTypes),
175            ("⩓", DHallTokenType::CombineTypes),
176            ("//\\", DHallTokenType::Prefer),
177            ("∀", DHallTokenType::Forall),
178            ("λ", DHallTokenType::Lambda),
179        ];
180
181        for (op, kind) in ops {
182            if text.starts_with(op) {
183                state.advance(op.len());
184                state.add_token(kind, start, state.get_position());
185                return true;
186            }
187        }
188
189        false
190    }
191
192    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
193        let start = state.get_position();
194        let c = match state.peek() {
195            Some(c) => c,
196            None => return false,
197        };
198
199        let kind = match c {
200            '(' => DHallTokenType::LeftParen,
201            ')' => DHallTokenType::RightParen,
202            '[' => DHallTokenType::LeftBracket,
203            ']' => DHallTokenType::RightBracket,
204            '{' => DHallTokenType::LeftBrace,
205            '}' => DHallTokenType::RightBrace,
206            '<' => DHallTokenType::Less,
207            '>' => DHallTokenType::Greater,
208            ',' => DHallTokenType::Comma,
209            '.' => DHallTokenType::Dot,
210            ':' => DHallTokenType::Colon,
211            ';' => DHallTokenType::Semicolon,
212            '=' => DHallTokenType::Equal,
213            '@' => DHallTokenType::At,
214            '#' => DHallTokenType::Hash,
215            '?' => DHallTokenType::Question,
216            '+' => DHallTokenType::Plus,
217            '*' => DHallTokenType::Star,
218            '/' => DHallTokenType::Slash,
219            '|' => DHallTokenType::Pipe,
220            '\\' => DHallTokenType::Lambda,
221            _ => return false,
222        };
223
224        state.advance(1);
225        state.add_token(kind, start, state.get_position());
226        true
227    }
228}