Skip to main content

oak_dhall/lexer/
mod.rs

1use crate::{kind::DHallSyntaxKind, language::DHallLanguage};
2use oak_core::{
3    LexOutput, Lexer, LexerCache, LexerState, OakError,
4    lexer::{CommentConfig, StringConfig, WhitespaceConfig},
5    source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9static DHALL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
10static DHALL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "--", block_start: "{-", block_end: "-}", nested_blocks: true });
11static DHALL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
12
13#[derive(Clone)]
14pub struct DHallLexer<'config> {
15    _config: &'config DHallLanguage,
16}
17
18impl<'config> Lexer<DHallLanguage> for DHallLexer<'config> {
19    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DHallLanguage>) -> LexOutput<DHallLanguage> {
20        let mut state = LexerState::new(source);
21        let result = self.run(&mut state);
22        state.finish_with_cache(result, cache)
23    }
24}
25
26impl<'config> DHallLexer<'config> {
27    pub fn new(config: &'config DHallLanguage) -> Self {
28        Self { _config: config }
29    }
30
31    fn run<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> Result<(), OakError> {
32        while state.not_at_end() {
33            let safe_point = state.get_position();
34            if self.skip_whitespace(state) {
35                continue;
36            }
37
38            if self.skip_comment(state) {
39                continue;
40            }
41
42            if self.lex_string_literal(state) {
43                continue;
44            }
45
46            if self.lex_number_literal(state) {
47                continue;
48            }
49
50            if self.lex_identifier_or_keyword(state) {
51                continue;
52            }
53
54            if self.lex_operators(state) {
55                continue;
56            }
57
58            if self.lex_single_char_tokens(state) {
59                continue;
60            }
61
62            state.advance_if_dead_lock(safe_point);
63        }
64
65        Ok(())
66    }
67
68    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
69        DHALL_WHITESPACE.scan(state, DHallSyntaxKind::Whitespace)
70    }
71
72    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
73        DHALL_COMMENT.scan(state, DHallSyntaxKind::Comment, DHallSyntaxKind::Comment)
74    }
75
76    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
77        DHALL_STRING.scan(state, DHallSyntaxKind::String)
78    }
79
80    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
81        let start = state.get_position();
82        let first = match state.peek() {
83            Some(c) => c,
84            None => return false,
85        };
86
87        if !first.is_ascii_digit() {
88            return false;
89        }
90
91        state.advance(1);
92        while let Some(c) = state.peek() {
93            if c.is_ascii_digit() {
94                state.advance(1);
95            }
96            else {
97                break;
98            }
99        }
100
101        state.add_token(DHallSyntaxKind::Number, start, state.get_position());
102        true
103    }
104
105    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
106        let start = state.get_position();
107        let first = match state.peek() {
108            Some(c) => c,
109            None => return false,
110        };
111
112        if !first.is_alphabetic() && first != '_' {
113            return false;
114        }
115
116        state.advance(1);
117        while let Some(c) = state.peek() {
118            if c.is_alphanumeric() || c == '_' || c == '-' || c == '/' {
119                state.advance(1);
120            }
121            else {
122                break;
123            }
124        }
125
126        let end = state.get_position();
127        let text = state.get_text_in((start..end).into());
128
129        let kind = match text.as_ref() {
130            "if" => DHallSyntaxKind::If,
131            "then" => DHallSyntaxKind::Then,
132            "else" => DHallSyntaxKind::Else,
133            "let" => DHallSyntaxKind::Let,
134            "in" => DHallSyntaxKind::In,
135            "using" => DHallSyntaxKind::Using,
136            "as" => DHallSyntaxKind::As,
137            "merge" => DHallSyntaxKind::Merge,
138            "Some" => DHallSyntaxKind::Some,
139            "None" => DHallSyntaxKind::None,
140            "with" => DHallSyntaxKind::With,
141            "forall" => DHallSyntaxKind::Forall,
142            "assert" => DHallSyntaxKind::Assert,
143            "Bool" => DHallSyntaxKind::Bool,
144            "Natural" => DHallSyntaxKind::Natural,
145            "Integer" => DHallSyntaxKind::Integer,
146            "Double" => DHallSyntaxKind::Double,
147            "Text" => DHallSyntaxKind::Text,
148            "List" => DHallSyntaxKind::List,
149            "Optional" => DHallSyntaxKind::Optional,
150            "True" => DHallSyntaxKind::True,
151            "False" => DHallSyntaxKind::False,
152            "λ" => DHallSyntaxKind::Lambda,
153            _ => DHallSyntaxKind::Identifier,
154        };
155
156        state.add_token(kind, start, end);
157        true
158    }
159
160    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
161        let start = state.get_position();
162        let text = state.rest();
163
164        let ops = [
165            ("->", DHallSyntaxKind::Arrow),
166            ("→", DHallSyntaxKind::Arrow),
167            ("=>", DHallSyntaxKind::FatArrow),
168            ("==", DHallSyntaxKind::EqualEqual),
169            ("≡", DHallSyntaxKind::EqualEqual),
170            ("!=", DHallSyntaxKind::NotEqual),
171            ("&&", DHallSyntaxKind::And),
172            ("∧", DHallSyntaxKind::And),
173            ("||", DHallSyntaxKind::Or),
174            ("∨", DHallSyntaxKind::Or),
175            ("++", DHallSyntaxKind::Append),
176            ("//", DHallSyntaxKind::Combine),
177            ("⫽", DHallSyntaxKind::Combine),
178            ("/\\", DHallSyntaxKind::CombineTypes),
179            ("⩓", DHallSyntaxKind::CombineTypes),
180            ("//\\", DHallSyntaxKind::Prefer),
181            ("∀", DHallSyntaxKind::Forall),
182            ("λ", DHallSyntaxKind::Lambda),
183        ];
184
185        for (op, kind) in ops {
186            if text.starts_with(op) {
187                state.advance(op.len());
188                state.add_token(kind, start, state.get_position());
189                return true;
190            }
191        }
192
193        false
194    }
195
196    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
197        let start = state.get_position();
198        let c = match state.peek() {
199            Some(c) => c,
200            None => return false,
201        };
202
203        let kind = match c {
204            '(' => DHallSyntaxKind::LeftParen,
205            ')' => DHallSyntaxKind::RightParen,
206            '[' => DHallSyntaxKind::LeftBracket,
207            ']' => DHallSyntaxKind::RightBracket,
208            '{' => DHallSyntaxKind::LeftBrace,
209            '}' => DHallSyntaxKind::RightBrace,
210            '<' => DHallSyntaxKind::Less,
211            '>' => DHallSyntaxKind::Greater,
212            ',' => DHallSyntaxKind::Comma,
213            '.' => DHallSyntaxKind::Dot,
214            ':' => DHallSyntaxKind::Colon,
215            ';' => DHallSyntaxKind::Semicolon,
216            '=' => DHallSyntaxKind::Equal,
217            '@' => DHallSyntaxKind::At,
218            '#' => DHallSyntaxKind::Hash,
219            '?' => DHallSyntaxKind::Question,
220            '+' => DHallSyntaxKind::Plus,
221            '*' => DHallSyntaxKind::Star,
222            '/' => DHallSyntaxKind::Slash,
223            '|' => DHallSyntaxKind::Pipe,
224            '\\' => DHallSyntaxKind::Lambda,
225            _ => return false,
226        };
227
228        state.advance(1);
229        state.add_token(kind, start, state.get_position());
230        true
231    }
232}