1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27#[cfg(test)]
28pub(crate) use tokeniser::RESERVED_WORDS;
29
30pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
32
33#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
34#[repr(u32)]
35pub enum NumericSuffix {
36 None,
37 Count,
38 Mm,
39 Cm,
40 M,
41 Inch,
42 Ft,
43 Yd,
44 Deg,
45 Rad,
46}
47
48impl NumericSuffix {
49 #[allow(dead_code)]
50 pub fn is_none(self) -> bool {
51 self == Self::None
52 }
53
54 pub fn is_some(self) -> bool {
55 self != Self::None
56 }
57
58 pub fn digestable_id(&self) -> &[u8] {
59 match self {
60 NumericSuffix::None => &[],
61 NumericSuffix::Count => b"_",
62 NumericSuffix::Mm => b"mm",
63 NumericSuffix::Cm => b"cm",
64 NumericSuffix::M => b"m",
65 NumericSuffix::Inch => b"in",
66 NumericSuffix::Ft => b"ft",
67 NumericSuffix::Yd => b"yd",
68 NumericSuffix::Deg => b"deg",
69 NumericSuffix::Rad => b"rad",
70 }
71 }
72}
73
74impl FromStr for NumericSuffix {
75 type Err = CompilationError;
76
77 fn from_str(s: &str) -> Result<Self, Self::Err> {
78 match s {
79 "_" => Ok(NumericSuffix::Count),
80 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
81 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
82 "m" | "meters" => Ok(NumericSuffix::M),
83 "inch" | "in" => Ok(NumericSuffix::Inch),
84 "ft" | "feet" => Ok(NumericSuffix::Ft),
85 "yd" | "yards" => Ok(NumericSuffix::Yd),
86 "deg" | "degrees" => Ok(NumericSuffix::Deg),
87 "rad" | "radians" => Ok(NumericSuffix::Rad),
88 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
89 }
90 }
91}
92
93impl fmt::Display for NumericSuffix {
94 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95 match self {
96 NumericSuffix::None => Ok(()),
97 NumericSuffix::Count => write!(f, "_"),
98 NumericSuffix::Mm => write!(f, "mm"),
99 NumericSuffix::Cm => write!(f, "cm"),
100 NumericSuffix::M => write!(f, "m"),
101 NumericSuffix::Inch => write!(f, "in"),
102 NumericSuffix::Ft => write!(f, "ft"),
103 NumericSuffix::Yd => write!(f, "yd"),
104 NumericSuffix::Deg => write!(f, "deg"),
105 NumericSuffix::Rad => write!(f, "rad"),
106 }
107 }
108}
109
110#[derive(Clone, Debug, PartialEq)]
111pub(crate) struct TokenStream {
112 tokens: Vec<Token>,
113}
114
115impl TokenStream {
116 fn new(tokens: Vec<Token>) -> Self {
117 Self { tokens }
118 }
119
120 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
121 let tokens = std::mem::take(&mut self.tokens);
122 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
123 .into_iter()
124 .partition(|token| token.token_type != TokenType::Unknown);
125 self.tokens = tokens;
126 unknown_tokens
127 }
128
129 pub fn iter(&self) -> impl Iterator<Item = &Token> {
130 self.tokens.iter()
131 }
132
133 pub fn is_empty(&self) -> bool {
134 self.tokens.is_empty()
135 }
136
137 pub fn as_slice(&self) -> TokenSlice {
138 TokenSlice::from(self)
139 }
140}
141
142impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
143 fn from(stream: &'a TokenStream) -> Self {
144 TokenSlice {
145 start: 0,
146 end: stream.tokens.len(),
147 stream,
148 }
149 }
150}
151
152impl IntoIterator for TokenStream {
153 type Item = Token;
154
155 type IntoIter = std::vec::IntoIter<Token>;
156
157 fn into_iter(self) -> Self::IntoIter {
158 self.tokens.into_iter()
159 }
160}
161
162#[derive(Debug, Clone)]
163pub(crate) struct TokenSlice<'a> {
164 stream: &'a TokenStream,
165 start: usize,
166 end: usize,
167}
168
169impl<'a> std::ops::Deref for TokenSlice<'a> {
170 type Target = [Token];
171
172 fn deref(&self) -> &Self::Target {
173 &self.stream.tokens[self.start..self.end]
174 }
175}
176
177impl<'a> TokenSlice<'a> {
178 pub fn token(&self, i: usize) -> &Token {
179 &self.stream.tokens[i + self.start]
180 }
181
182 pub fn iter(&self) -> impl Iterator<Item = &Token> {
183 (**self).iter()
184 }
185
186 pub fn without_ends(&self) -> Self {
187 Self {
188 start: self.start + 1,
189 end: self.end - 1,
190 stream: self.stream,
191 }
192 }
193}
194
195impl<'a> IntoIterator for TokenSlice<'a> {
196 type Item = &'a Token;
197
198 type IntoIter = std::slice::Iter<'a, Token>;
199
200 fn into_iter(self) -> Self::IntoIter {
201 self.stream.tokens[self.start..self.end].iter()
202 }
203}
204
205impl<'a> Stream for TokenSlice<'a> {
206 type Token = Token;
207 type Slice = Self;
208 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
209 type Checkpoint = Checkpoint;
210
211 fn iter_offsets(&self) -> Self::IterOffsets {
212 #[allow(clippy::unnecessary_to_owned)]
213 self.to_vec().into_iter().enumerate()
214 }
215
216 fn eof_offset(&self) -> usize {
217 self.len()
218 }
219
220 fn next_token(&mut self) -> Option<Self::Token> {
221 let token = self.first()?.clone();
222 self.start += 1;
223 Some(token)
224 }
225
226 fn offset_for<P>(&self, predicate: P) -> Option<usize>
227 where
228 P: Fn(Self::Token) -> bool,
229 {
230 self.iter().position(|b| predicate(b.clone()))
231 }
232
233 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
234 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
235 Err(winnow::error::Needed::Size(needed))
236 } else {
237 Ok(tokens)
238 }
239 }
240
241 fn next_slice(&mut self, offset: usize) -> Self::Slice {
242 assert!(self.start + offset <= self.end);
243
244 let next = TokenSlice {
245 stream: self.stream,
246 start: self.start,
247 end: self.start + offset,
248 };
249 self.start += offset;
250 next
251 }
252
253 fn checkpoint(&self) -> Self::Checkpoint {
254 Checkpoint(self.start, self.end)
255 }
256
257 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
258 self.start = checkpoint.0;
259 self.end = checkpoint.1;
260 }
261
262 fn raw(&self) -> &dyn fmt::Debug {
263 self
264 }
265}
266
267impl<'a> winnow::stream::Offset for TokenSlice<'a> {
268 fn offset_from(&self, start: &Self) -> usize {
269 self.start - start.start
270 }
271}
272
273impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
274 fn offset_from(&self, start: &Checkpoint) -> usize {
275 self.start - start.0
276 }
277}
278
279impl winnow::stream::Offset for Checkpoint {
280 fn offset_from(&self, start: &Self) -> usize {
281 self.0 - start.0
282 }
283}
284
285impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
286 type PartialState = ();
287
288 fn complete(&mut self) -> Self::PartialState {}
289
290 fn restore_partial(&mut self, _: Self::PartialState) {}
291
292 fn is_partial_supported() -> bool {
293 false
294 }
295}
296
297#[derive(Clone, Debug)]
298pub struct Checkpoint(usize, usize);
299
300#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
302#[display(style = "camelCase")]
303pub enum TokenType {
304 Number,
306 Word,
308 Operator,
310 String,
312 Keyword,
314 Type,
316 Brace,
318 Hash,
320 Bang,
322 Dollar,
324 Whitespace,
326 Comma,
328 Colon,
330 Period,
332 DoublePeriod,
334 LineComment,
336 BlockComment,
338 Function,
340 Unknown,
342 QuestionMark,
344 At,
346}
347
348impl TryFrom<TokenType> for SemanticTokenType {
350 type Error = anyhow::Error;
351 fn try_from(token_type: TokenType) -> Result<Self> {
352 Ok(match token_type {
355 TokenType::Number => Self::NUMBER,
356 TokenType::Word => Self::VARIABLE,
357 TokenType::Keyword => Self::KEYWORD,
358 TokenType::Type => Self::TYPE,
359 TokenType::Operator => Self::OPERATOR,
360 TokenType::QuestionMark => Self::OPERATOR,
361 TokenType::String => Self::STRING,
362 TokenType::Bang => Self::OPERATOR,
363 TokenType::LineComment => Self::COMMENT,
364 TokenType::BlockComment => Self::COMMENT,
365 TokenType::Function => Self::FUNCTION,
366 TokenType::Whitespace
367 | TokenType::Brace
368 | TokenType::Comma
369 | TokenType::Colon
370 | TokenType::Period
371 | TokenType::DoublePeriod
372 | TokenType::Hash
373 | TokenType::Dollar
374 | TokenType::At
375 | TokenType::Unknown => {
376 anyhow::bail!("unsupported token type: {:?}", token_type)
377 }
378 })
379 }
380}
381
382impl TokenType {
383 pub fn is_whitespace(&self) -> bool {
384 matches!(self, Self::Whitespace)
385 }
386
387 pub fn is_comment(&self) -> bool {
388 matches!(self, Self::LineComment | Self::BlockComment)
389 }
390}
391
392#[derive(Debug, PartialEq, Eq, Clone)]
393pub struct Token {
394 pub token_type: TokenType,
395 pub start: usize,
397 pub end: usize,
399 pub(super) module_id: ModuleId,
400 pub(super) value: String,
401}
402
403impl ContainsToken<Token> for (TokenType, &str) {
404 fn contains_token(&self, token: Token) -> bool {
405 self.0 == token.token_type && self.1 == token.value
406 }
407}
408
409impl ContainsToken<Token> for TokenType {
410 fn contains_token(&self, token: Token) -> bool {
411 *self == token.token_type
412 }
413}
414
415impl Token {
416 pub fn from_range(
417 range: std::ops::Range<usize>,
418 module_id: ModuleId,
419 token_type: TokenType,
420 value: String,
421 ) -> Self {
422 Self {
423 start: range.start,
424 end: range.end,
425 module_id,
426 value,
427 token_type,
428 }
429 }
430 pub fn is_code_token(&self) -> bool {
431 !matches!(
432 self.token_type,
433 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
434 )
435 }
436
437 pub fn as_source_range(&self) -> SourceRange {
438 SourceRange::new(self.start, self.end, self.module_id)
439 }
440
441 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
442 vec![self.as_source_range()]
443 }
444
445 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
446 if !matches!(self.token_type, TokenType::Keyword) {
447 return None;
448 }
449 match self.value.as_str() {
450 "export" => Some(ItemVisibility::Export),
451 _ => None,
452 }
453 }
454
455 pub fn numeric_value(&self) -> Option<f64> {
456 if self.token_type != TokenType::Number {
457 return None;
458 }
459 let value = &self.value;
460 let value = value
461 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
462 .map(|(s, _)| s)
463 .unwrap_or(value);
464 value.parse().ok()
465 }
466
467 pub fn numeric_suffix(&self) -> NumericSuffix {
468 if self.token_type != TokenType::Number {
469 return NumericSuffix::None;
470 }
471
472 if self.value.ends_with('_') {
473 return NumericSuffix::Count;
474 }
475
476 for suffix in NUM_SUFFIXES {
477 if self.value.ends_with(suffix) {
478 return suffix.parse().unwrap();
479 }
480 }
481
482 NumericSuffix::None
483 }
484
485 pub fn declaration_keyword(&self) -> Option<VariableKind> {
489 if !matches!(self.token_type, TokenType::Keyword) {
490 return None;
491 }
492 Some(match self.value.as_str() {
493 "fn" => VariableKind::Fn,
494 "var" | "let" | "const" => VariableKind::Const,
495 _ => return None,
496 })
497 }
498}
499
500impl From<Token> for SourceRange {
501 fn from(token: Token) -> Self {
502 Self::new(token.start, token.end, token.module_id)
503 }
504}
505
506impl From<&Token> for SourceRange {
507 fn from(token: &Token) -> Self {
508 Self::new(token.start, token.end, token.module_id)
509 }
510}
511
512pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
513 tokeniser::lex(s, module_id).map_err(From::from)
514}
515
516impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
517 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
518 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
519 let module_id = err.input().state.module_id;
520
521 if offset >= input.len() {
522 return KclError::Lexical(crate::errors::KclErrorDetails {
528 source_ranges: vec![SourceRange::new(offset, offset, module_id)],
529 message: "unexpected EOF while parsing".to_string(),
530 });
531 }
532
533 let bad_token = &input[offset];
536 KclError::Lexical(crate::errors::KclErrorDetails {
539 source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
540 message: format!("found unknown token '{}'", bad_token),
541 })
542 }
543}