geo_aid_script/
token.rs

1//! All functionality for turning scripts into series of tokens.
2
3use std::{fmt::Display, iter::Peekable};
4
5use serde::Serialize;
6
7use self::number::{ParsedFloat, ParsedInt, ParsedIntBuilder};
8
9use super::{parser::Parse, Error};
10use geo_aid_derive::Parse;
11
12pub mod number;
13
14/// Defines a position in the script.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize)]
16pub struct Position {
17    /// The line number, starting at 1
18    pub line: usize,
19    /// The column index (character index), starting at 1
20    pub column: usize,
21}
22
23/// Defines a span in the script.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
25pub struct Span {
26    /// Starting position (included)
27    pub start: Position,
28    /// Ending position (exluded)
29    pub end: Position,
30}
31
32impl Span {
33    /// Create a span containing both `self` and `other`. If one of them is empty,
34    /// returns the other.
35    #[must_use]
36    pub fn join(self, other: Span) -> Self {
37        if self.is_empty() {
38            other
39        } else if other.is_empty() {
40            self
41        } else {
42            Self {
43                start: if self.start < other.start {
44                    self.start
45                } else {
46                    other.start
47                },
48                end: if self.end > other.end {
49                    self.end
50                } else {
51                    other.end
52                },
53            }
54        }
55    }
56
57    /// Check if the spans are overlapping (share a position)
58    #[must_use]
59    pub fn overlaps(self, other: Span) -> bool {
60        (self.start <= other.start && self.end >= other.start)
61            || (other.start <= self.start && other.end >= self.start)
62    }
63
64    /// Check if the span is contained within a single line
65    #[must_use]
66    pub const fn is_single_line(&self) -> bool {
67        self.start.line == self.end.line
68    }
69
70    /// Create an empty span. This is a special value used in different cases.
71    #[must_use]
72    pub const fn empty() -> Self {
73        Span {
74            start: Position { line: 0, column: 0 },
75            end: Position { line: 0, column: 0 },
76        }
77    }
78
79    /// Check if the span is empty (`start == end`)
80    #[must_use]
81    pub fn is_empty(&self) -> bool {
82        self.start == self.end
83    }
84}
85
86impl PartialOrd for Span {
87    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
88        Some(self.cmp(other))
89    }
90}
91
92impl Ord for Span {
93    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
94        match self.start.cmp(&other.start) {
95            std::cmp::Ordering::Equal => self.end.cmp(&other.end),
96            v => v,
97        }
98    }
99}
100
101/// A helper span macro accepting start line and column and end line and column.
102/// All numbers are 1-based
103#[macro_export]
104macro_rules! span {
105    ($start_ln:expr, $start_col:expr, $end_ln:expr, $end_col:expr) => {
106        $crate::token::Span {
107            start: $crate::token::Position {
108                line: $start_ln,
109                column: $start_col,
110            },
111            end: $crate::token::Position {
112                line: $end_ln,
113                column: $end_col,
114            },
115        }
116    };
117}
118
119/// A ';' token.
120#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
121#[parse(token)]
122pub struct Semi {
123    pub span: Span,
124}
125
126/// A '=' token.
127#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
128#[parse(token)]
129pub struct Eq {
130    pub span: Span,
131}
132
133/// A '(' token.
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
135#[parse(token)]
136pub struct LParen {
137    pub span: Span,
138}
139
140/// A ')' token.
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
142#[parse(token)]
143pub struct RParen {
144    pub span: Span,
145}
146
147/// A '{' token.
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
149#[parse(token)]
150pub struct LBrace {
151    pub span: Span,
152}
153
154/// A '}' token.
155#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
156#[parse(token)]
157pub struct RBrace {
158    pub span: Span,
159}
160
161/// A '[' token.
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
163#[parse(token)]
164pub struct LSquare {
165    pub span: Span,
166}
167
168/// A ']' token.
169#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
170#[parse(token)]
171pub struct RSquare {
172    pub span: Span,
173}
174
175/// A ',' token.
176#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
177#[parse(token)]
178pub struct Comma {
179    pub span: Span,
180}
181
182/// A '^' token.
183#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
184#[parse(token)]
185pub struct Caret {
186    pub span: Span,
187}
188
189/// A ':' token.
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
191#[parse(token)]
192pub struct Colon {
193    pub span: Span,
194}
195
196/// A '$' token.
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
198#[parse(token)]
199pub struct Dollar {
200    pub span: Span,
201}
202
203/// A '@' token.
204#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
205#[parse(token)]
206pub struct At {
207    pub span: Span,
208}
209
210/// A 'let' token.
211#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
212#[parse(token)]
213pub struct Let {
214    pub span: Span,
215}
216
217/// A '+' token.
218#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
219#[parse(token)]
220pub struct Plus {
221    pub span: Span,
222}
223
224/// A '-' token.
225#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
226#[parse(token)]
227pub struct Minus {
228    pub span: Span,
229}
230
231/// A '*' token.
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
233#[parse(token)]
234pub struct Asterisk {
235    pub span: Span,
236}
237
238/// A '|' token.
239#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
240#[parse(token)]
241pub struct Vertical {
242    pub span: Span,
243}
244
245/// A '/' token.
246#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
247#[parse(token)]
248pub struct Slash {
249    pub span: Span,
250}
251
252/// A '<' token.
253#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
254#[parse(token)]
255pub struct Lt {
256    pub span: Span,
257}
258
259/// A '>' token.
260#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
261#[parse(token)]
262pub struct Gt {
263    pub span: Span,
264}
265
266/// A '<=' token.
267#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
268#[parse(token)]
269pub struct Lteq {
270    pub span: Span,
271}
272
273/// A '.' token.
274#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
275#[parse(token)]
276pub struct Dot {
277    pub span: Span,
278}
279
280/// A '>=' token.
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
282#[parse(token)]
283pub struct Gteq {
284    pub span: Span,
285}
286
287/// A '!' token.
288#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
289#[parse(token)]
290pub struct Exclamation {
291    pub span: Span,
292}
293
294/// A string, delimited by quotation marks.
295#[derive(Debug, Clone, PartialEq, Eq, Parse)]
296#[parse(token)]
297pub struct StrLit {
298    pub span: Span,
299    pub content: String,
300}
301
302impl Display for StrLit {
303    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
304        write!(f, "{}", self.content)
305    }
306}
307
308/// A '&' token.
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
310#[parse(token)]
311pub struct Ampersant {
312    pub span: Span,
313}
314
315/// A '?' token.
316#[derive(Debug, Clone, Copy, PartialEq, Eq, Parse)]
317#[parse(token)]
318pub struct Question {
319    pub span: Span,
320}
321
322/// Any valid token of `GeoScript`
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub enum Token {
325    Semi(Semi),
326    Eq(Eq),
327    Comma(Comma),
328    Caret(Caret),
329    Let(Let),
330    Plus(Plus),
331    Minus(Minus),
332    Asterisk(Asterisk),
333    Vertical(Vertical),
334    LParen(LParen),
335    RParen(RParen),
336    Slash(Slash),
337    Lt(Lt),
338    Gt(Gt),
339    Lteq(Lteq),
340    Gteq(Gteq),
341    Exclamation(Exclamation),
342    Ident(Ident),
343    NumberLit(NumberLit),
344    Dollar(Dollar),
345    Ampersant(Ampersant),
346    LBrace(LBrace),
347    RBrace(RBrace),
348    LSquare(LSquare),
349    RSquare(RSquare),
350    At(At),
351    Colon(Colon),
352    Dot(Dot),
353    StrLit(StrLit),
354    Question(Question),
355}
356
357impl Display for Token {
358    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
359        match self {
360            Self::Semi(_) => write!(f, ";"),
361            Self::Eq(_) => write!(f, "="),
362            Self::Comma(_) => write!(f, ","),
363            Self::Dot(_) => write!(f, "."),
364            Self::Let(_) => write!(f, "let"),
365            Self::Plus(_) => write!(f, "+"),
366            Self::Minus(_) => write!(f, "-"),
367            Self::Asterisk(_) => write!(f, "*"),
368            Self::Vertical(_) => write!(f, "|"),
369            Self::LParen(_) => write!(f, "("),
370            Self::RParen(_) => write!(f, ")"),
371            Self::Slash(_) => write!(f, "/"),
372            Self::Lt(_) => write!(f, "<"),
373            Self::Gt(_) => write!(f, ">"),
374            Self::Lteq(_) => write!(f, "<="),
375            Self::Gteq(_) => write!(f, ">="),
376            Self::Exclamation(_) => write!(f, "!"),
377            Self::Dollar(_) => write!(f, "$"),
378            Self::Ampersant(_) => write!(f, "&"),
379            Self::Question(_) => write!(f, "?"),
380            Self::At(_) => write!(f, "@"),
381            Self::LBrace(_) => write!(f, "{{"),
382            Self::RBrace(_) => write!(f, "}}"),
383            Self::LSquare(_) => write!(f, "["),
384            Self::RSquare(_) => write!(f, "]"),
385            Self::Caret(_) => write!(f, "^"),
386            Self::Colon(_) => write!(f, ":"),
387            Self::StrLit(s) => write!(f, "\"{}\"", s.content),
388            Self::Ident(ident) => write!(
389                f,
390                "{}",
391                match ident {
392                    Ident::Named(named) => named.ident.clone(),
393                    Ident::Collection(col) => format!("{col}"),
394                }
395            ),
396            Self::NumberLit(num) => match num {
397                NumberLit::Integer(v) => write!(f, "{}", v.parsed),
398                NumberLit::Float(v) => write!(f, "{}", v.parsed),
399            },
400        }
401    }
402}
403
404impl Token {
405    /// Get the token's span.
406    #[must_use]
407    pub fn get_span(&self) -> Span {
408        match self {
409            Self::Semi(v) => v.span,
410            Self::Eq(v) => v.span,
411            Self::Comma(v) => v.span,
412            Self::Caret(v) => v.span,
413            Self::Let(v) => v.span,
414            Self::Plus(v) => v.span,
415            Self::Minus(v) => v.span,
416            Self::Asterisk(v) => v.span,
417            Self::Vertical(v) => v.span,
418            Self::LParen(v) => v.span,
419            Self::RParen(v) => v.span,
420            Self::Slash(v) => v.span,
421            Self::Lt(v) => v.span,
422            Self::Gt(v) => v.span,
423            Self::Lteq(v) => v.span,
424            Self::Gteq(v) => v.span,
425            Self::Exclamation(v) => v.span,
426            Self::Ident(v) => v.get_span(),
427            Self::NumberLit(v) => v.get_span(),
428            Self::Dollar(v) => v.span,
429            Self::At(v) => v.span,
430            Self::LBrace(v) => v.span,
431            Self::RBrace(v) => v.span,
432            Self::LSquare(v) => v.span,
433            Self::RSquare(v) => v.span,
434            Self::Ampersant(v) => v.span,
435            Self::Question(v) => v.span,
436            Self::Colon(v) => v.span,
437            Self::Dot(v) => v.span,
438            Self::StrLit(s) => s.span,
439        }
440    }
441}
442
443/// A name identifier, as opposed to a point collection identifier.
444/// For more details, see [`PointCollection`]
445#[derive(Debug, Clone)]
446pub struct NamedIdent {
447    /// The identifier span
448    pub span: Span,
449    /// The identifier characters
450    pub ident: String,
451    /// How likely it is that this identifier should have been a collection.
452    /// Used for error reporting.
453    pub collection_likeness: f64,
454}
455
456impl PartialEq for NamedIdent {
457    fn eq(&self, other: &Self) -> bool {
458        self.span == other.span && self.ident == other.ident
459    }
460}
461
462impl std::cmp::Eq for NamedIdent {}
463
464/// An item of a point collection.
465#[derive(Debug, Clone, PartialEq, Eq)]
466pub struct PointCollectionItem {
467    /// The point's letter
468    pub letter: char,
469    /// The point's optional index.
470    pub index: Option<String>,
471    /// The prime count.
472    pub primes: u8,
473    /// The span of the point
474    pub span: Span,
475}
476
477impl Display for PointCollectionItem {
478    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
479        write!(
480            f,
481            "{}{}{}",
482            self.letter,
483            "'".repeat(self.primes as usize),
484            self.index
485                .as_ref()
486                .map_or(String::new(), |x| format!("_{x}"))
487        )
488    }
489}
490
491/// A point collection composed of single point identifiers.
492/// A point identifier is an uppercase alphabetic character and a number of `'` characters following it.
493#[derive(Debug, Clone, PartialEq, Eq)]
494pub struct PointCollection {
495    /// Each point identifier of this collection
496    pub collection: Vec<PointCollectionItem>,
497    pub span: Span,
498}
499
500impl PointCollection {
501    /// How many points are in this collection
502    #[must_use]
503    pub fn len(&self) -> usize {
504        self.collection.len()
505    }
506
507    /// Whether this collection is empty.
508    /// It's here mostly to shut clippy up. Point collections cannot
509    /// be 0-length.
510    #[must_use]
511    pub fn is_empty(&self) -> bool {
512        self.collection.is_empty()
513    }
514}
515
516impl Display for PointCollection {
517    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
518        write!(
519            f,
520            "{}",
521            self.collection
522                .iter()
523                .fold(String::new(), |b, x| b + &x.to_string())
524        )
525    }
526}
527
528/// An identifier. Either a point collection or a name.
529#[derive(Debug, Clone, PartialEq, Eq, Parse)]
530#[parse(token)]
531pub enum Ident {
532    Named(NamedIdent),
533    Collection(PointCollection),
534}
535
536impl Ident {
537    #[must_use]
538    pub fn as_collection(&self) -> Option<&PointCollection> {
539        if let Self::Collection(v) = self {
540            Some(v)
541        } else {
542            None
543        }
544    }
545
546    #[must_use]
547    pub fn as_ident(&self) -> Option<&NamedIdent> {
548        if let Self::Named(v) = self {
549            Some(v)
550        } else {
551            None
552        }
553    }
554
555    /// Returns `true` if the ident is [`Named`].
556    ///
557    /// [`Named`]: Ident::Named
558    #[must_use]
559    pub fn is_named(&self) -> bool {
560        matches!(self, Self::Named(..))
561    }
562}
563
564impl Display for Ident {
565    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
566        match self {
567            Ident::Named(named) => write!(f, "{}", named.ident),
568            Ident::Collection(col) => write!(f, "{col}"),
569        }
570    }
571}
572
573/// A number token. Can represent an integer or a decimal.
574#[derive(Debug, Clone, PartialEq, Eq, Parse)]
575#[parse(token)]
576pub enum NumberLit {
577    Integer(TokInteger),
578    Float(TokFloat),
579}
580
581impl NumberLit {
582    /// Convert this number to a float.
583    #[must_use]
584    pub fn to_float(&self) -> f64 {
585        match self {
586            Self::Integer(i) => i.parsed.to_float(),
587            Self::Float(f) => f.parsed.to_float(),
588        }
589    }
590
591    /// Check if this token represents a 0.
592    #[must_use]
593    pub fn is_zero(&self) -> bool {
594        match self {
595            Self::Integer(i) => i.parsed.is_zero(),
596            Self::Float(f) => f.parsed.is_zero(),
597        }
598    }
599
600    /// Check if this token represents a 1.
601    #[must_use]
602    pub fn is_one(&self) -> bool {
603        match self {
604            Self::Integer(i) => i.parsed.is_one(),
605            Self::Float(f) => f.parsed.is_one(),
606        }
607    }
608}
609
610impl Display for NumberLit {
611    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
612        match self {
613            Self::Integer(v) => write!(f, "{v}"),
614            Self::Float(v) => write!(f, "{v}"),
615        }
616    }
617}
618
619/// An integer.
620#[derive(Debug, Clone, PartialEq, Eq)]
621pub struct TokInteger {
622    pub span: Span,
623    /// The parsed integer.
624    pub parsed: ParsedInt,
625}
626
627impl Display for TokInteger {
628    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
629        write!(f, "{}", self.parsed)
630    }
631}
632
633/// A decimal number.
634#[derive(Debug, Clone, PartialEq, Eq)]
635pub struct TokFloat {
636    pub span: Span,
637    /// The splitting dot.
638    pub dot: Dot,
639    /// The parsed digits.
640    pub parsed: ParsedFloat,
641}
642
643impl Display for TokFloat {
644    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
645        write!(f, "{}", self.parsed)
646    }
647}
648
649/// Check if `c` is a valid character for an identifier.
650fn is_identifier_character(c: char) -> bool {
651    c.is_alphabetic() || c.is_ascii_digit() || c == '_' || c == '\''
652}
653
654/// Read an identifier (without distinguishing point collections from names)
655/// from a char iterator.
656fn read_identifier<I: Iterator<Item = char>>(
657    it: &mut Peekable<I>,
658    position: &mut Position,
659) -> (Span, String) {
660    let mut str = String::new();
661    let begin_pos = *position;
662
663    while let Some(&c) = it.peek() {
664        if is_identifier_character(c) {
665            str.push(c);
666            position.column += 1;
667            it.next();
668        } else {
669            break;
670        }
671    }
672
673    (
674        span!(
675            begin_pos.line,
676            begin_pos.column,
677            position.line,
678            position.column
679        ),
680        str,
681    )
682}
683
684/// Read a string literal from a string iterator.
685fn read_string<I: Iterator<Item = char>>(
686    it: &mut Peekable<I>,
687    position: &mut Position,
688) -> Result<StrLit, Error> {
689    let mut content = String::new();
690    let begin_pos = *position;
691    let mut closed = false;
692
693    // Assume first char to be correct.
694    it.next();
695    position.column += 1;
696
697    for c in it.by_ref() {
698        position.column += 1;
699
700        // Guard for non-ASCII
701        if !c.is_ascii() {
702            return Err(Error::InvalidCharacter {
703                character: c,
704                error_span: span!(
705                    position.line,
706                    position.column - 1,
707                    position.line,
708                    position.column
709                ),
710            });
711        }
712
713        if c == '\n' {
714            return Err(Error::NewLineInString {
715                error_span: span!(
716                    begin_pos.line,
717                    begin_pos.column,
718                    position.line,
719                    position.column
720                ),
721            });
722        } else if c == '"' {
723            closed = true;
724            break;
725        } else if c.is_ascii_whitespace() {
726            content.push(' ');
727        } else if c.is_ascii_control() {
728            return Err(Error::InvalidCharacter {
729                character: c,
730                error_span: span!(
731                    position.line,
732                    position.column - 1,
733                    position.line,
734                    position.column
735                ),
736            });
737        } else {
738            content.push(c);
739        }
740    }
741
742    if !closed {
743        return Err(Error::UnclosedString {
744            error_span: span!(
745                begin_pos.line,
746                begin_pos.column,
747                position.line,
748                position.column
749            ),
750        });
751    }
752
753    Ok(StrLit {
754        span: span!(
755            begin_pos.line,
756            begin_pos.column,
757            position.line,
758            position.column
759        ),
760        content,
761    })
762}
763
764/// Read a number token from a char iterator.
765fn read_number<I: Iterator<Item = char>>(
766    it: &mut Peekable<I>,
767    position: &mut Position,
768) -> NumberLit {
769    let mut integer = ParsedIntBuilder::new();
770    let mut floating = None;
771    let begin_pos = *position;
772    let mut dot = None;
773
774    while let Some(&c) = it.peek() {
775        if c.is_ascii_digit() {
776            integer.push_digit((c as u8) - b'0');
777            position.column += 1;
778            it.next();
779        } else if c == '.' {
780            dot = Some(Dot {
781                span: span!(
782                    position.line,
783                    position.column,
784                    position.line,
785                    position.column + 1
786                ),
787            });
788            position.column += 1;
789            it.next();
790            floating = Some(integer.dot());
791            break;
792        } else {
793            return NumberLit::Integer(TokInteger {
794                span: span!(
795                    begin_pos.line,
796                    begin_pos.column,
797                    position.line,
798                    position.column
799                ),
800                parsed: integer.build(),
801            });
802        }
803    }
804
805    if let Some(mut floating) = floating {
806        while let Some(&c) = it.peek() {
807            if c.is_ascii_digit() {
808                floating.push_digit((c as u8) - b'0');
809                position.column += 1;
810                it.next();
811            } else {
812                break;
813            }
814        }
815
816        NumberLit::Float(TokFloat {
817            span: span!(
818                begin_pos.line,
819                begin_pos.column,
820                position.line,
821                position.column
822            ),
823            dot: dot.unwrap(),
824            parsed: floating.build(),
825        })
826    } else {
827        unreachable!()
828    }
829}
830
831/// Decides whether the given string is a standard named identifier or a point collection.
832fn dispatch_ident(sp: Span, ident: String) -> Ident {
833    let mut collection = PointCollection {
834        collection: vec![],
835        span: sp,
836    };
837
838    let mut chars = ident.chars().peekable();
839    let mut offset = 0;
840
841    // If the point collection is not a point collection, this will be non-zero
842    let mut invalid = 0;
843
844    while let Some(letter) = chars.next() {
845        if !letter.is_ascii_uppercase() {
846            invalid += 1;
847        }
848
849        let mut len = 1;
850        let mut primes = 0;
851
852        while let Some('\'') = chars.peek().copied() {
853            primes += 1;
854            len += 1;
855            chars.next();
856        }
857
858        let index = if chars.peek().copied() == Some('_') {
859            chars.next();
860            len += 1;
861            let mut index = String::new();
862
863            while chars.peek().is_some_and(char::is_ascii_digit) {
864                len += 1;
865                index.push(chars.next().unwrap());
866            }
867
868            if index.is_empty() {
869                // Assume index exists, go on
870                invalid += 1;
871            }
872
873            Some(index)
874        } else {
875            None
876        };
877
878        collection.collection.push(PointCollectionItem {
879            letter,
880            index,
881            primes,
882            span: span!(
883                sp.start.line,
884                sp.start.column + offset,
885                sp.start.line,
886                sp.start.column + offset + len
887            ),
888        });
889
890        offset += len;
891    }
892
893    invalid += chars.count();
894
895    if invalid > 0 {
896        #[allow(clippy::cast_precision_loss)]
897        return Ident::Named(NamedIdent {
898            span: sp,
899            ident,
900            collection_likeness: (offset - invalid) as f64 / (offset as f64),
901        });
902    }
903
904    Ident::Collection(collection)
905}
906
907/// Recognise special characters
908fn tokenize_special<I: Iterator<Item = char>>(
909    position: &mut Position,
910    tokens: &mut Vec<Token>,
911    c: char,
912    it: &mut Peekable<I>,
913) -> Result<(), Error> {
914    let sp = span!(
915        position.line,
916        position.column,
917        position.line,
918        position.column + 1
919    );
920
921    if c == '=' {
922        let last = tokens.last().cloned();
923
924        match last {
925            Some(Token::Lt(Lt { span })) => {
926                if span
927                    == span!(
928                        sp.start.line,
929                        sp.start.column - 1,
930                        sp.start.line,
931                        sp.start.column
932                    )
933                {
934                    *tokens.last_mut().unwrap() = Token::Lteq(Lteq {
935                        span: span!(
936                            sp.start.line,
937                            sp.start.column - 1,
938                            sp.start.line,
939                            sp.start.column + 1
940                        ),
941                    });
942                }
943            }
944            Some(Token::Gt(Gt { span })) => {
945                if span
946                    == span!(
947                        sp.start.line,
948                        sp.start.column - 1,
949                        sp.start.line,
950                        sp.start.column
951                    )
952                {
953                    *tokens.last_mut().unwrap() = Token::Gteq(Gteq {
954                        span: span!(
955                            sp.start.line,
956                            sp.start.column - 1,
957                            sp.start.line,
958                            sp.start.column + 1
959                        ),
960                    });
961                }
962            }
963            _ => tokens.push(Token::Eq(Eq { span: sp })),
964        }
965    } else {
966        tokens.push(match c {
967            ';' => Token::Semi(Semi { span: sp }),
968            ',' => Token::Comma(Comma { span: sp }),
969            '.' => Token::Dot(Dot { span: sp }),
970            '+' => Token::Plus(Plus { span: sp }),
971            '-' => Token::Minus(Minus { span: sp }),
972            '*' => Token::Asterisk(Asterisk { span: sp }),
973            '/' => Token::Slash(Slash { span: sp }),
974            '(' => Token::LParen(LParen { span: sp }),
975            ')' => Token::RParen(RParen { span: sp }),
976            '|' => Token::Vertical(Vertical { span: sp }),
977            '<' => Token::Lt(Lt { span: sp }),
978            '>' => Token::Gt(Gt { span: sp }),
979            '!' => Token::Exclamation(Exclamation { span: sp }),
980            '$' => Token::Dollar(Dollar { span: sp }),
981            '&' => Token::Ampersant(Ampersant { span: sp }),
982            '?' => Token::Question(Question { span: sp }),
983            '@' => Token::At(At { span: sp }),
984            '^' => Token::Caret(Caret { span: sp }),
985            '{' => Token::LBrace(LBrace { span: sp }),
986            '}' => Token::RBrace(RBrace { span: sp }),
987            '[' => Token::LSquare(LSquare { span: sp }),
988            ']' => Token::RSquare(RSquare { span: sp }),
989            ':' => Token::Colon(Colon { span: sp }),
990            _ => {
991                return Err(Error::InvalidCharacter {
992                    character: c,
993                    error_span: sp,
994                })
995            }
996        });
997    }
998
999    position.column += 1;
1000    it.next();
1001
1002    Ok(())
1003}
1004
1005/// Tokenizes the given script (turns it into a series of tokens).
1006///
1007/// # Errors
1008/// Emits an appropriate error if the script is invalid and tokenization fails.
1009pub fn tokenize(input: &str) -> Result<Vec<Token>, Error> {
1010    let mut it = input.chars().peekable();
1011    let mut tokens = vec![];
1012    let mut position = Position { line: 1, column: 1 };
1013
1014    loop {
1015        match it.peek() {
1016            None => break,
1017            Some(&c) => {
1018                if c.is_whitespace() {
1019                    if c == '\n' {
1020                        position.line += 1;
1021                        position.column = 0;
1022                    }
1023
1024                    position.column += 1;
1025                    it.next();
1026                } else if c.is_alphabetic() || c == '_' {
1027                    let (sp, ident) = read_identifier(&mut it, &mut position);
1028
1029                    tokens.push(match ident.as_str() {
1030                        "let" => Token::Let(Let { span: sp }),
1031                        _ => Token::Ident(dispatch_ident(sp, ident)),
1032                    });
1033                } else if c.is_ascii_digit() {
1034                    tokens.push(Token::NumberLit(read_number(&mut it, &mut position)));
1035                } else if c == '#' {
1036                    position.line += 1;
1037                    position.column = 1;
1038                    while let Some(comment) = it.by_ref().next() {
1039                        if comment == '\n' {
1040                            break;
1041                        }
1042                    }
1043                } else if c == '"' {
1044                    let s = read_string(&mut it, &mut position)?;
1045
1046                    tokens.push(Token::StrLit(s));
1047                } else {
1048                    tokenize_special(&mut position, &mut tokens, c, &mut it)?;
1049                }
1050            }
1051        }
1052    }
1053
1054    Ok(tokens)
1055}