1#![allow(clippy::needless_lifetimes)]
3
4use std::fmt;
5use std::iter::Enumerate;
6use std::num::NonZeroUsize;
7use std::str::FromStr;
8
9use anyhow::Result;
10use parse_display::Display;
11use serde::Deserialize;
12use serde::Serialize;
13use tokeniser::Input;
14use tower_lsp::lsp_types::SemanticTokenType;
15use winnow::error::ParseError;
16use winnow::stream::ContainsToken;
17use winnow::stream::Stream;
18use winnow::{self};
19
20use crate::CompilationError;
21use crate::ModuleId;
22use crate::SourceRange;
23use crate::errors::KclError;
24use crate::parsing::ast::types::ItemVisibility;
25use crate::parsing::ast::types::VariableKind;
26
27mod tokeniser;
28
29pub(crate) use tokeniser::RESERVED_WORDS;
30
31pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"];
33
34#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS)]
35#[repr(u32)]
36pub enum NumericSuffix {
37 None,
38 Count,
39 Length,
40 Angle,
41 Mm,
42 Cm,
43 M,
44 Inch,
45 Ft,
46 Yd,
47 Deg,
48 Rad,
49 Unknown,
50}
51
52impl NumericSuffix {
53 #[allow(dead_code)]
54 pub fn is_none(self) -> bool {
55 self == Self::None
56 }
57
58 pub fn is_some(self) -> bool {
59 self != Self::None
60 }
61
62 pub fn digestable_id(&self) -> &[u8] {
63 match self {
64 NumericSuffix::None => &[],
65 NumericSuffix::Count => b"_",
66 NumericSuffix::Unknown => b"?",
67 NumericSuffix::Length => b"Length",
68 NumericSuffix::Angle => b"Angle",
69 NumericSuffix::Mm => b"mm",
70 NumericSuffix::Cm => b"cm",
71 NumericSuffix::M => b"m",
72 NumericSuffix::Inch => b"in",
73 NumericSuffix::Ft => b"ft",
74 NumericSuffix::Yd => b"yd",
75 NumericSuffix::Deg => b"deg",
76 NumericSuffix::Rad => b"rad",
77 }
78 }
79}
80
81impl FromStr for NumericSuffix {
82 type Err = CompilationError;
83
84 fn from_str(s: &str) -> Result<Self, Self::Err> {
85 match s {
86 "_" | "Count" => Ok(NumericSuffix::Count),
87 "Length" => Ok(NumericSuffix::Length),
88 "Angle" => Ok(NumericSuffix::Angle),
89 "mm" | "millimeters" => Ok(NumericSuffix::Mm),
90 "cm" | "centimeters" => Ok(NumericSuffix::Cm),
91 "m" | "meters" => Ok(NumericSuffix::M),
92 "inch" | "in" => Ok(NumericSuffix::Inch),
93 "ft" | "feet" => Ok(NumericSuffix::Ft),
94 "yd" | "yards" => Ok(NumericSuffix::Yd),
95 "deg" | "degrees" => Ok(NumericSuffix::Deg),
96 "rad" | "radians" => Ok(NumericSuffix::Rad),
97 "?" => Ok(NumericSuffix::Unknown),
98 _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
99 }
100 }
101}
102
103impl fmt::Display for NumericSuffix {
104 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105 match self {
106 NumericSuffix::None => Ok(()),
107 NumericSuffix::Count => write!(f, "_"),
108 NumericSuffix::Unknown => write!(f, "_?"),
109 NumericSuffix::Length => write!(f, "Length"),
110 NumericSuffix::Angle => write!(f, "Angle"),
111 NumericSuffix::Mm => write!(f, "mm"),
112 NumericSuffix::Cm => write!(f, "cm"),
113 NumericSuffix::M => write!(f, "m"),
114 NumericSuffix::Inch => write!(f, "in"),
115 NumericSuffix::Ft => write!(f, "ft"),
116 NumericSuffix::Yd => write!(f, "yd"),
117 NumericSuffix::Deg => write!(f, "deg"),
118 NumericSuffix::Rad => write!(f, "rad"),
119 }
120 }
121}
122
123#[derive(Clone, Debug, PartialEq)]
124pub(crate) struct TokenStream {
125 tokens: Vec<Token>,
126}
127
128impl TokenStream {
129 fn new(tokens: Vec<Token>) -> Self {
130 Self { tokens }
131 }
132
133 pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
134 let tokens = std::mem::take(&mut self.tokens);
135 let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
136 .into_iter()
137 .partition(|token| token.token_type != TokenType::Unknown);
138 self.tokens = tokens;
139 unknown_tokens
140 }
141
142 pub fn iter(&self) -> impl Iterator<Item = &Token> {
143 self.tokens.iter()
144 }
145
146 pub fn is_empty(&self) -> bool {
147 self.tokens.is_empty()
148 }
149
150 pub fn as_slice(&self) -> TokenSlice<'_> {
151 TokenSlice::from(self)
152 }
153}
154
155impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
156 fn from(stream: &'a TokenStream) -> Self {
157 TokenSlice {
158 start: 0,
159 end: stream.tokens.len(),
160 stream,
161 }
162 }
163}
164
165impl IntoIterator for TokenStream {
166 type Item = Token;
167
168 type IntoIter = std::vec::IntoIter<Token>;
169
170 fn into_iter(self) -> Self::IntoIter {
171 self.tokens.into_iter()
172 }
173}
174
175#[derive(Debug, Clone)]
176pub(crate) struct TokenSlice<'a> {
177 stream: &'a TokenStream,
178 start: usize,
180 end: usize,
182}
183
184impl<'a> std::ops::Deref for TokenSlice<'a> {
185 type Target = [Token];
186
187 fn deref(&self) -> &Self::Target {
188 &self.stream.tokens[self.start..self.end]
189 }
190}
191
192impl<'a> TokenSlice<'a> {
193 pub fn token(&self, i: usize) -> &Token {
194 &self.stream.tokens[i + self.start]
195 }
196
197 pub fn iter(&self) -> impl Iterator<Item = &Token> {
198 (**self).iter()
199 }
200
201 pub fn without_ends(&self) -> Self {
202 Self {
203 start: self.start + 1,
204 end: self.end - 1,
205 stream: self.stream,
206 }
207 }
208
209 pub fn as_source_range(&self) -> SourceRange {
210 let stream_len = self.stream.tokens.len();
211 let first_token = if stream_len == self.start {
212 &self.stream.tokens[self.start - 1]
213 } else {
214 self.token(0)
215 };
216 let last_token = if stream_len == self.end {
217 &self.stream.tokens[stream_len - 1]
218 } else {
219 self.token(self.end - self.start)
220 };
221 SourceRange::new(first_token.start, last_token.end, last_token.module_id)
222 }
223}
224
225impl<'a> IntoIterator for TokenSlice<'a> {
226 type Item = &'a Token;
227
228 type IntoIter = std::slice::Iter<'a, Token>;
229
230 fn into_iter(self) -> Self::IntoIter {
231 self.stream.tokens[self.start..self.end].iter()
232 }
233}
234
235impl<'a> Stream for TokenSlice<'a> {
236 type Token = Token;
237 type Slice = Self;
238 type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
239 type Checkpoint = Checkpoint;
240
241 fn iter_offsets(&self) -> Self::IterOffsets {
242 #[allow(clippy::unnecessary_to_owned)]
243 self.to_vec().into_iter().enumerate()
244 }
245
246 fn eof_offset(&self) -> usize {
247 self.len()
248 }
249
250 fn next_token(&mut self) -> Option<Self::Token> {
251 let token = self.first()?.clone();
252 self.start += 1;
253 Some(token)
254 }
255
256 fn peek_token(&self) -> Option<Self::Token> {
258 Some(self.first()?.clone())
259 }
260
261 fn offset_for<P>(&self, predicate: P) -> Option<usize>
262 where
263 P: Fn(Self::Token) -> bool,
264 {
265 self.iter().position(|b| predicate(b.clone()))
266 }
267
268 fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
269 if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
270 Err(winnow::error::Needed::Size(needed))
271 } else {
272 Ok(tokens)
273 }
274 }
275
276 fn next_slice(&mut self, offset: usize) -> Self::Slice {
277 assert!(self.start + offset <= self.end);
278
279 let next = TokenSlice {
280 stream: self.stream,
281 start: self.start,
282 end: self.start + offset,
283 };
284 self.start += offset;
285 next
286 }
287
288 fn peek_slice(&self, offset: usize) -> Self::Slice {
290 assert!(self.start + offset <= self.end);
291
292 TokenSlice {
293 stream: self.stream,
294 start: self.start,
295 end: self.start + offset,
296 }
297 }
298
299 fn checkpoint(&self) -> Self::Checkpoint {
300 Checkpoint(self.start, self.end)
301 }
302
303 fn reset(&mut self, checkpoint: &Self::Checkpoint) {
304 self.start = checkpoint.0;
305 self.end = checkpoint.1;
306 }
307
308 fn raw(&self) -> &dyn fmt::Debug {
309 self
310 }
311}
312
313impl<'a> winnow::stream::Offset for TokenSlice<'a> {
314 fn offset_from(&self, start: &Self) -> usize {
315 self.start - start.start
316 }
317}
318
319impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
320 fn offset_from(&self, start: &Checkpoint) -> usize {
321 self.start - start.0
322 }
323}
324
325impl winnow::stream::Offset for Checkpoint {
326 fn offset_from(&self, start: &Self) -> usize {
327 self.0 - start.0
328 }
329}
330
331impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
332 type PartialState = ();
333
334 fn complete(&mut self) -> Self::PartialState {}
335
336 fn restore_partial(&mut self, _: Self::PartialState) {}
337
338 fn is_partial_supported() -> bool {
339 false
340 }
341}
342
343impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
344 fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
345 self.iter()
346 .enumerate()
347 .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
348 }
349}
350
351#[derive(Clone, Debug)]
352pub struct Checkpoint(usize, usize);
353
354#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
356#[display(style = "camelCase")]
357pub enum TokenType {
358 Number,
360 Word,
362 Operator,
364 String,
366 Keyword,
368 Type,
370 Brace,
372 Hash,
374 Bang,
376 Dollar,
378 Whitespace,
380 Comma,
382 Colon,
384 DoubleColon,
386 Period,
388 DoublePeriod,
390 DoublePeriodLessThan,
392 LineComment,
394 BlockComment,
396 Function,
398 Unknown,
400 QuestionMark,
402 At,
404 SemiColon,
406}
407
408impl TryFrom<TokenType> for SemanticTokenType {
410 type Error = anyhow::Error;
411 fn try_from(token_type: TokenType) -> Result<Self> {
412 Ok(match token_type {
415 TokenType::Number => Self::NUMBER,
416 TokenType::Word => Self::VARIABLE,
417 TokenType::Keyword => Self::KEYWORD,
418 TokenType::Type => Self::TYPE,
419 TokenType::Operator => Self::OPERATOR,
420 TokenType::QuestionMark => Self::OPERATOR,
421 TokenType::String => Self::STRING,
422 TokenType::Bang => Self::OPERATOR,
423 TokenType::LineComment => Self::COMMENT,
424 TokenType::BlockComment => Self::COMMENT,
425 TokenType::Function => Self::FUNCTION,
426 TokenType::Whitespace
427 | TokenType::Brace
428 | TokenType::Comma
429 | TokenType::Colon
430 | TokenType::DoubleColon
431 | TokenType::Period
432 | TokenType::DoublePeriod
433 | TokenType::DoublePeriodLessThan
434 | TokenType::Hash
435 | TokenType::Dollar
436 | TokenType::At
437 | TokenType::SemiColon
438 | TokenType::Unknown => {
439 anyhow::bail!("unsupported token type: {:?}", token_type)
440 }
441 })
442 }
443}
444
445impl TokenType {
446 pub fn is_whitespace(&self) -> bool {
447 matches!(self, Self::Whitespace)
448 }
449
450 pub fn is_comment(&self) -> bool {
451 matches!(self, Self::LineComment | Self::BlockComment)
452 }
453}
454
455#[derive(Debug, PartialEq, Eq, Clone)]
456pub struct Token {
457 pub token_type: TokenType,
458 pub start: usize,
460 pub end: usize,
462 pub(super) module_id: ModuleId,
463 pub(super) value: String,
464}
465
466impl ContainsToken<Token> for (TokenType, &str) {
467 fn contains_token(&self, token: Token) -> bool {
468 self.0 == token.token_type && self.1 == token.value
469 }
470}
471
472impl ContainsToken<Token> for TokenType {
473 fn contains_token(&self, token: Token) -> bool {
474 *self == token.token_type
475 }
476}
477
478impl Token {
479 pub fn from_range(
480 range: std::ops::Range<usize>,
481 module_id: ModuleId,
482 token_type: TokenType,
483 value: String,
484 ) -> Self {
485 Self {
486 start: range.start,
487 end: range.end,
488 module_id,
489 value,
490 token_type,
491 }
492 }
493 pub fn is_code_token(&self) -> bool {
494 !matches!(
495 self.token_type,
496 TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
497 )
498 }
499
500 pub fn as_source_range(&self) -> SourceRange {
501 SourceRange::new(self.start, self.end, self.module_id)
502 }
503
504 pub fn as_source_ranges(&self) -> Vec<SourceRange> {
505 vec![self.as_source_range()]
506 }
507
508 pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
509 if !matches!(self.token_type, TokenType::Keyword) {
510 return None;
511 }
512 match self.value.as_str() {
513 "export" => Some(ItemVisibility::Export),
514 _ => None,
515 }
516 }
517
518 pub fn numeric_value(&self) -> Option<f64> {
519 if self.token_type != TokenType::Number {
520 return None;
521 }
522 let value = &self.value;
523 let value = value
524 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
525 .map(|(s, _)| s)
526 .unwrap_or(value);
527 value.parse().ok()
528 }
529
530 pub fn uint_value(&self) -> Option<u32> {
531 if self.token_type != TokenType::Number {
532 return None;
533 }
534 let value = &self.value;
535 let value = value
536 .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
537 .map(|(s, _)| s)
538 .unwrap_or(value);
539 value.parse().ok()
540 }
541
542 pub fn numeric_suffix(&self) -> NumericSuffix {
543 if self.token_type != TokenType::Number {
544 return NumericSuffix::None;
545 }
546
547 if self.value.ends_with('_') {
548 return NumericSuffix::Count;
549 }
550
551 for suffix in NUM_SUFFIXES {
552 if self.value.ends_with(suffix) {
553 return suffix.parse().unwrap();
554 }
555 }
556
557 NumericSuffix::None
558 }
559
560 pub fn declaration_keyword(&self) -> Option<VariableKind> {
564 if !matches!(self.token_type, TokenType::Keyword) {
565 return None;
566 }
567 Some(match self.value.as_str() {
568 "fn" => VariableKind::Fn,
569 "var" | "let" | "const" => VariableKind::Const,
570 _ => return None,
571 })
572 }
573}
574
575impl From<Token> for SourceRange {
576 fn from(token: Token) -> Self {
577 Self::new(token.start, token.end, token.module_id)
578 }
579}
580
581impl From<&Token> for SourceRange {
582 fn from(token: &Token) -> Self {
583 Self::new(token.start, token.end, token.module_id)
584 }
585}
586
587pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
588 tokeniser::lex(s, module_id).map_err(From::from)
589}
590
591impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
592 fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
593 let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
594 let module_id = err.input().state.module_id;
595
596 if offset >= input.len() {
597 return KclError::new_lexical(crate::errors::KclErrorDetails::new(
603 "unexpected EOF while parsing".to_owned(),
604 vec![SourceRange::new(offset, offset, module_id)],
605 ));
606 }
607
608 let bad_token = &input[offset];
611 KclError::new_lexical(crate::errors::KclErrorDetails::new(
614 format!("found unknown token '{bad_token}'"),
615 vec![SourceRange::new(offset, offset + 1, module_id)],
616 ))
617 }
618}