1#![allow(clippy::needless_lifetimes)]
3
4use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
5
6use anyhow::Result;
7use parse_display::Display;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use tokeniser::Input;
11use tower_lsp::lsp_types::SemanticTokenType;
12use winnow::{
13 self,
14 error::ParseError,
15 stream::{ContainsToken, Stream},
16};
17
18use crate::{
19 errors::KclError,
20 parsing::ast::types::{ItemVisibility, VariableKind},
21 source_range::SourceRange,
22 CompilationError, ModuleId,
23};
24
25mod tokeniser;
26
27pub(crate) use tokeniser::RESERVED_WORDS;
28
29pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
31
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
33#[repr(u32)]
34pub enum NumericSuffix {
35 None,
36 Count,
37 Mm,
38 Cm,
39 M,
40 Inch,
41 Ft,
42 Yd,
43 Deg,
44 Rad,
45}
46
47impl NumericSuffix {
48 #[allow(dead_code)]
49 pub fn is_none(self) -> bool {
50 self == Self::None
51 }
52
53 pub fn is_some(self) -> bool {
54 self != Self::None
55 }
56
57 pub fn digestable_id(&self) -> &[u8] {
58 match self {
59 NumericSuffix::None => &[],
60 NumericSuffix::Count => b"_",
61 NumericSuffix::Mm => b"mm",
62 NumericSuffix::Cm => b"cm",
63 NumericSuffix::M => b"m",
64 NumericSuffix::Inch => b"in",
65 NumericSuffix::Ft => b"ft",
66 NumericSuffix::Yd => b"yd",
67 NumericSuffix::Deg => b"deg",
68 NumericSuffix::Rad => b"rad",
69 }
70 }
71}
72
73impl FromStr for NumericSuffix {
74 type Err = CompilationError;
75
76 fn from_str(s: &str) -> Result<Self, Self::Err> {
77 match s {
78 "_" => Ok(NumericSuffix::Count),
79 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
80 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
81 "m" | "meters" => Ok(NumericSuffix::M),
82 "inch" | "in" => Ok(NumericSuffix::Inch),
83 "ft" | "feet" => Ok(NumericSuffix::Ft),
84 "yd" | "yards" => Ok(NumericSuffix::Yd),
85 "deg" | "degrees" => Ok(NumericSuffix::Deg),
86 "rad" | "radians" => Ok(NumericSuffix::Rad),
87 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
88 }
89 }
90}
91
92impl fmt::Display for NumericSuffix {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 match self {
95 NumericSuffix::None => Ok(()),
96 NumericSuffix::Count => write!(f, "_"),
97 NumericSuffix::Mm => write!(f, "mm"),
98 NumericSuffix::Cm => write!(f, "cm"),
99 NumericSuffix::M => write!(f, "m"),
100 NumericSuffix::Inch => write!(f, "in"),
101 NumericSuffix::Ft => write!(f, "ft"),
102 NumericSuffix::Yd => write!(f, "yd"),
103 NumericSuffix::Deg => write!(f, "deg"),
104 NumericSuffix::Rad => write!(f, "rad"),
105 }
106 }
107}
108
109#[derive(Clone, Debug, PartialEq)]
110pub(crate) struct TokenStream {
111 tokens: Vec<Token>,
112}
113
114impl TokenStream {
115 fn new(tokens: Vec<Token>) -> Self {
116 Self { tokens }
117 }
118
119 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
120 let tokens = std::mem::take(&mut self.tokens);
121 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
122 .into_iter()
123 .partition(|token| token.token_type != TokenType::Unknown);
124 self.tokens = tokens;
125 unknown_tokens
126 }
127
128 pub fn iter(&self) -> impl Iterator<Item = &Token> {
129 self.tokens.iter()
130 }
131
132 pub fn is_empty(&self) -> bool {
133 self.tokens.is_empty()
134 }
135
136 pub fn as_slice(&self) -> TokenSlice {
137 TokenSlice::from(self)
138 }
139}
140
141impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
142 fn from(stream: &'a TokenStream) -> Self {
143 TokenSlice {
144 start: 0,
145 end: stream.tokens.len(),
146 stream,
147 }
148 }
149}
150
151impl IntoIterator for TokenStream {
152 type Item = Token;
153
154 type IntoIter = std::vec::IntoIter<Token>;
155
156 fn into_iter(self) -> Self::IntoIter {
157 self.tokens.into_iter()
158 }
159}
160
161#[derive(Debug, Clone)]
162pub(crate) struct TokenSlice<'a> {
163 stream: &'a TokenStream,
164 start: usize,
166 end: usize,
168}
169
170impl<'a> std::ops::Deref for TokenSlice<'a> {
171 type Target = [Token];
172
173 fn deref(&self) -> &Self::Target {
174 &self.stream.tokens[self.start..self.end]
175 }
176}
177
178impl<'a> TokenSlice<'a> {
179 pub fn token(&self, i: usize) -> &Token {
180 &self.stream.tokens[i + self.start]
181 }
182
183 pub fn iter(&self) -> impl Iterator<Item = &Token> {
184 (**self).iter()
185 }
186
187 pub fn without_ends(&self) -> Self {
188 Self {
189 start: self.start + 1,
190 end: self.end - 1,
191 stream: self.stream,
192 }
193 }
194
195 pub fn as_source_range(&self) -> SourceRange {
196 let stream_len = self.stream.tokens.len();
197 let first_token = if stream_len == self.start {
198 &self.stream.tokens[self.start - 1]
199 } else {
200 self.token(0)
201 };
202 let last_token = if stream_len == self.end {
203 &self.stream.tokens[stream_len - 1]
204 } else {
205 self.token(self.end - self.start)
206 };
207 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
208 }
209}
210
211impl<'a> IntoIterator for TokenSlice<'a> {
212 type Item = &'a Token;
213
214 type IntoIter = std::slice::Iter<'a, Token>;
215
216 fn into_iter(self) -> Self::IntoIter {
217 self.stream.tokens[self.start..self.end].iter()
218 }
219}
220
221impl<'a> Stream for TokenSlice<'a> {
222 type Token = Token;
223 type Slice = Self;
224 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
225 type Checkpoint = Checkpoint;
226
227 fn iter_offsets(&self) -> Self::IterOffsets {
228 #[allow(clippy::unnecessary_to_owned)]
229 self.to_vec().into_iter().enumerate()
230 }
231
232 fn eof_offset(&self) -> usize {
233 self.len()
234 }
235
236 fn next_token(&mut self) -> Option<Self::Token> {
237 let token = self.first()?.clone();
238 self.start += 1;
239 Some(token)
240 }
241
242 fn offset_for<P>(&self, predicate: P) -> Option<usize>
243 where
244 P: Fn(Self::Token) -> bool,
245 {
246 self.iter().position(|b| predicate(b.clone()))
247 }
248
249 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
250 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
251 Err(winnow::error::Needed::Size(needed))
252 } else {
253 Ok(tokens)
254 }
255 }
256
257 fn next_slice(&mut self, offset: usize) -> Self::Slice {
258 assert!(self.start + offset <= self.end);
259
260 let next = TokenSlice {
261 stream: self.stream,
262 start: self.start,
263 end: self.start + offset,
264 };
265 self.start += offset;
266 next
267 }
268
269 fn checkpoint(&self) -> Self::Checkpoint {
270 Checkpoint(self.start, self.end)
271 }
272
273 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
274 self.start = checkpoint.0;
275 self.end = checkpoint.1;
276 }
277
278 fn raw(&self) -> &dyn fmt::Debug {
279 self
280 }
281}
282
283impl<'a> winnow::stream::Offset for TokenSlice<'a> {
284 fn offset_from(&self, start: &Self) -> usize {
285 self.start - start.start
286 }
287}
288
289impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
290 fn offset_from(&self, start: &Checkpoint) -> usize {
291 self.start - start.0
292 }
293}
294
295impl winnow::stream::Offset for Checkpoint {
296 fn offset_from(&self, start: &Self) -> usize {
297 self.0 - start.0
298 }
299}
300
301impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
302 type PartialState = ();
303
304 fn complete(&mut self) -> Self::PartialState {}
305
306 fn restore_partial(&mut self, _: Self::PartialState) {}
307
308 fn is_partial_supported() -> bool {
309 false
310 }
311}
312
313impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
314 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
315 self.iter()
316 .enumerate()
317 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
318 }
319}
320
321#[derive(Clone, Debug)]
322pub struct Checkpoint(usize, usize);
323
324#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
326#[display(style = "camelCase")]
327pub enum TokenType {
328 Number,
330 Word,
332 Operator,
334 String,
336 Keyword,
338 Type,
340 Brace,
342 Hash,
344 Bang,
346 Dollar,
348 Whitespace,
350 Comma,
352 Colon,
354 Period,
356 DoublePeriod,
358 LineComment,
360 BlockComment,
362 Function,
364 Unknown,
366 QuestionMark,
368 At,
370}
371
372impl TryFrom<TokenType> for SemanticTokenType {
374 type Error = anyhow::Error;
375 fn try_from(token_type: TokenType) -> Result<Self> {
376 Ok(match token_type {
379 TokenType::Number => Self::NUMBER,
380 TokenType::Word => Self::VARIABLE,
381 TokenType::Keyword => Self::KEYWORD,
382 TokenType::Type => Self::TYPE,
383 TokenType::Operator => Self::OPERATOR,
384 TokenType::QuestionMark => Self::OPERATOR,
385 TokenType::String => Self::STRING,
386 TokenType::Bang => Self::OPERATOR,
387 TokenType::LineComment => Self::COMMENT,
388 TokenType::BlockComment => Self::COMMENT,
389 TokenType::Function => Self::FUNCTION,
390 TokenType::Whitespace
391 | TokenType::Brace
392 | TokenType::Comma
393 | TokenType::Colon
394 | TokenType::Period
395 | TokenType::DoublePeriod
396 | TokenType::Hash
397 | TokenType::Dollar
398 | TokenType::At
399 | TokenType::Unknown => {
400 anyhow::bail!("unsupported token type: {:?}", token_type)
401 }
402 })
403 }
404}
405
406impl TokenType {
407 pub fn is_whitespace(&self) -> bool {
408 matches!(self, Self::Whitespace)
409 }
410
411 pub fn is_comment(&self) -> bool {
412 matches!(self, Self::LineComment | Self::BlockComment)
413 }
414}
415
416#[derive(Debug, PartialEq, Eq, Clone)]
417pub struct Token {
418 pub token_type: TokenType,
419 pub start: usize,
421 pub end: usize,
423 pub(super) module_id: ModuleId,
424 pub(super) value: String,
425}
426
427impl ContainsToken<Token> for (TokenType, &str) {
428 fn contains_token(&self, token: Token) -> bool {
429 self.0 == token.token_type && self.1 == token.value
430 }
431}
432
433impl ContainsToken<Token> for TokenType {
434 fn contains_token(&self, token: Token) -> bool {
435 *self == token.token_type
436 }
437}
438
439impl Token {
440 pub fn from_range(
441 range: std::ops::Range<usize>,
442 module_id: ModuleId,
443 token_type: TokenType,
444 value: String,
445 ) -> Self {
446 Self {
447 start: range.start,
448 end: range.end,
449 module_id,
450 value,
451 token_type,
452 }
453 }
454 pub fn is_code_token(&self) -> bool {
455 !matches!(
456 self.token_type,
457 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
458 )
459 }
460
461 pub fn as_source_range(&self) -> SourceRange {
462 SourceRange::new(self.start, self.end, self.module_id)
463 }
464
465 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
466 vec![self.as_source_range()]
467 }
468
469 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
470 if !matches!(self.token_type, TokenType::Keyword) {
471 return None;
472 }
473 match self.value.as_str() {
474 "export" => Some(ItemVisibility::Export),
475 _ => None,
476 }
477 }
478
479 pub fn numeric_value(&self) -> Option<f64> {
480 if self.token_type != TokenType::Number {
481 return None;
482 }
483 let value = &self.value;
484 let value = value
485 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
486 .map(|(s, _)| s)
487 .unwrap_or(value);
488 value.parse().ok()
489 }
490
491 pub fn numeric_suffix(&self) -> NumericSuffix {
492 if self.token_type != TokenType::Number {
493 return NumericSuffix::None;
494 }
495
496 if self.value.ends_with('_') {
497 return NumericSuffix::Count;
498 }
499
500 for suffix in NUM_SUFFIXES {
501 if self.value.ends_with(suffix) {
502 return suffix.parse().unwrap();
503 }
504 }
505
506 NumericSuffix::None
507 }
508
509 pub fn declaration_keyword(&self) -> Option<VariableKind> {
513 if !matches!(self.token_type, TokenType::Keyword) {
514 return None;
515 }
516 Some(match self.value.as_str() {
517 "fn" => VariableKind::Fn,
518 "var" | "let" | "const" => VariableKind::Const,
519 _ => return None,
520 })
521 }
522}
523
524impl From<Token> for SourceRange {
525 fn from(token: Token) -> Self {
526 Self::new(token.start, token.end, token.module_id)
527 }
528}
529
530impl From<&Token> for SourceRange {
531 fn from(token: &Token) -> Self {
532 Self::new(token.start, token.end, token.module_id)
533 }
534}
535
536pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
537 tokeniser::lex(s, module_id).map_err(From::from)
538}
539
540impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
541 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
542 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
543 let module_id = err.input().state.module_id;
544
545 if offset >= input.len() {
546 return KclError::Lexical(crate::errors::KclErrorDetails {
552 source_ranges: vec![SourceRange::new(offset, offset, module_id)],
553 message: "unexpected EOF while parsing".to_string(),
554 });
555 }
556
557 let bad_token = &input[offset];
560 KclError::Lexical(crate::errors::KclErrorDetails {
563 source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
564 message: format!("found unknown token '{}'", bad_token),
565 })
566 }
567}