quil_rs/parser/lexer/
mod.rs

1// Copyright 2021 Rigetti Computing
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15mod error;
16mod quoted_strings;
17mod wrapped_parsers;
18
19use std::{num::NonZeroU8, str::FromStr};
20
21use lexical::{
22    FromLexicalWithOptions, NumberFormatBuilder, ParseFloatOptions, ParseIntegerOptions,
23};
24use nom::{
25    bytes::complete::{is_a, take_till, take_while, take_while1},
26    character::complete::one_of,
27    combinator::{all_consuming, cut, map, peek, recognize, value},
28    multi::many0,
29    sequence::{pair, preceded, terminated, tuple},
30    Finish, IResult, Slice as _,
31};
32use nom_locate::LocatedSpan;
33use wrapped_parsers::{alt, tag, tag_no_case};
34
35pub use super::token::{KeywordToken, Token, TokenWithLocation};
36use crate::parser::lexer::wrapped_parsers::expecting;
37use crate::parser::token::token_with_location;
38pub(crate) use error::InternalLexError;
39pub use error::{LexError, LexErrorKind};
40
41#[derive(Debug, Copy, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
42#[strum(serialize_all = "SCREAMING-KEBAB-CASE")]
43pub enum Command {
44    Add,
45    And,
46    Ashr,
47    Call,
48    Capture,
49    Convert,
50    Declare,
51    #[strum(to_string = "DEFCAL")]
52    DefCal,
53    #[strum(to_string = "DEFCIRCUIT")]
54    DefCircuit,
55    #[strum(to_string = "DEFFRAME")]
56    DefFrame,
57    #[strum(to_string = "DEFGATE")]
58    DefGate,
59    #[strum(to_string = "DEFWAVEFORM")]
60    DefWaveform,
61    Delay,
62    Div,
63    Eq,
64    Exchange,
65    Fence,
66    GE,
67    GT,
68    Halt,
69    Include,
70    Ior,
71    Jump,
72    JumpUnless,
73    JumpWhen,
74    Label,
75    LE,
76    Load,
77    LT,
78    Measure,
79    Move,
80    Mul,
81    Neg,
82    Nop,
83    Not,
84    Pragma,
85    Pulse,
86    RawCapture,
87    Reset,
88    SetFrequency,
89    SetPhase,
90    SetScale,
91    ShiftFrequency,
92    ShiftPhase,
93    Shl,
94    Shr,
95    Store,
96    Sub,
97    SwapPhases,
98    Wait,
99    Xor,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
103#[strum(serialize_all = "UPPERCASE")]
104pub enum DataType {
105    Bit,
106    Octet,
107    Real,
108    Integer,
109}
110
111#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)]
112#[strum(serialize_all = "UPPERCASE")]
113pub enum Modifier {
114    Controlled,
115    Dagger,
116    Forked, // Not in the Quil grammar
117}
118
119#[derive(Debug, Clone, PartialEq, Eq, strum::Display)]
120pub enum Operator {
121    #[strum(serialize = "^")]
122    Caret,
123    #[strum(serialize = "-")]
124    Minus,
125    #[strum(serialize = "+")]
126    Plus,
127    #[strum(serialize = "/")]
128    Slash,
129    #[strum(serialize = "*")]
130    Star,
131}
132
133pub type LexInput<'a> = LocatedSpan<&'a str>;
134pub(crate) type InternalLexResult<'a, T = Token, E = InternalLexError<'a>> =
135    IResult<LexInput<'a>, T, E>;
136pub type LexResult<'a, T = Token, E = LexError> = IResult<LexInput<'a>, T, E>;
137
138/// Completely lex a string, returning the tokens within. Panics if the string cannot be completely read.
139pub(crate) fn lex(input: LexInput) -> Result<Vec<TokenWithLocation>, LexError> {
140    all_consuming(_lex)(input)
141        .finish()
142        .map(|(_, tokens)| tokens)
143        .map_err(LexError::from)
144}
145
146fn _lex(input: LexInput) -> InternalLexResult<Vec<TokenWithLocation>> {
147    terminated(
148        many0(alt(
149            "indentation or a token preceded by whitespace",
150            (lex_indent, preceded(many0(tag(" ")), lex_token)),
151        )),
152        many0(one_of("\n\t ")),
153    )(input)
154}
155
156/// The Quil spec defines an indent as exactly 4 spaces. However, the lexer recognizes tabs as well
157/// to allow for more flexible formatting.
158fn lex_indent(input: LexInput) -> InternalLexResult<TokenWithLocation> {
159    alt(
160        "indentation",
161        (
162            token_with_location(value(Token::Indentation, tag("    "))),
163            token_with_location(value(Token::Indentation, tag("\t"))),
164        ),
165    )(input)
166}
167
168fn lex_token(input: LexInput) -> InternalLexResult<TokenWithLocation> {
169    alt(
170        "a token",
171        (
172            token_with_location(lex_comment),
173            token_with_location(lex_punctuation),
174            token_with_location(lex_target),
175            token_with_location(lex_string),
176            token_with_location(lex_operator),
177            token_with_location(lex_variable),
178            token_with_location(lex_keyword_or_identifier),
179            token_with_location(lex_number),
180        ),
181    )(input)
182}
183
184fn lex_comment(input: LexInput) -> InternalLexResult {
185    let (input, _) = tag("#")(input)?;
186    let (input, content) = take_till(|c| c == '\n')(input)?;
187    Ok((input, Token::Comment(content.to_string())))
188}
189
190fn keyword_or_identifier(identifier: String) -> Token {
191    fn parse<T: FromStr>(token: impl Fn(T) -> Token, identifier: &str) -> Result<Token, T::Err> {
192        T::from_str(identifier).map(token)
193    }
194
195    parse(KeywordToken::into, &identifier)
196        .or_else(|_| parse(Token::Command, &identifier))
197        .or_else(|_| parse(Token::DataType, &identifier))
198        .or_else(|_| parse(Token::Modifier, &identifier))
199        .unwrap_or(Token::Identifier(identifier))
200}
201
202fn is_valid_identifier_leading_character(chr: char) -> bool {
203    chr.is_ascii_alphabetic() || chr == '_'
204}
205
206fn is_valid_identifier_end_character(chr: char) -> bool {
207    is_valid_identifier_leading_character(chr) || chr.is_ascii_digit()
208}
209
210fn is_dash(chr: char) -> bool {
211    chr == '-'
212}
213
214fn lex_identifier_raw(input: LexInput) -> InternalLexResult<String> {
215    expecting(
216        "a valid identifier",
217        map(
218            tuple::<_, _, InternalLexError, _>((
219                take_while1(is_valid_identifier_leading_character),
220                take_while(is_valid_identifier_end_character),
221                recognize(many0(pair(
222                    take_while1(is_dash),
223                    take_while1(is_valid_identifier_end_character),
224                ))),
225            )),
226            |(leading, middle, trailing_dash_vars)| {
227                format!("{leading}{middle}{trailing_dash_vars}")
228            },
229        ),
230    )(input)
231}
232
233fn lex_keyword_or_identifier(input: LexInput) -> InternalLexResult {
234    let (input, identifier) = lex_identifier_raw(input)?;
235    let token = keyword_or_identifier(identifier);
236    Ok((input, token))
237}
238
239fn lex_target(input: LexInput) -> InternalLexResult {
240    let (input, _) = tag("@")(input)?;
241    let (input, label) = lex_identifier_raw(input)?;
242    Ok((input, Token::Target(label)))
243}
244
245/// Create a [`lexical`] [formatting constant][lexical::NumberFormat] from a
246/// [`radix`][lexical::NumberFormat::radix] (base) and an optional [prefix
247/// character][lexical::NumberFormat::base_prefix] (which comes after a leading `0`).
248///
249/// For instance, hexadecimal literals have a radix of `16` and a prefix character of `b'x'`.
250const fn number_format(radix: u8, prefix: Option<NonZeroU8>) -> u128 {
251    NumberFormatBuilder::new()
252        .mantissa_radix(radix)
253        .exponent_base(NonZeroU8::new(radix))
254        .exponent_radix(NonZeroU8::new(10))
255        .base_prefix(prefix)
256        .digit_separator(NonZeroU8::new(b'_'))
257        .required_integer_digits(false) // Allows `.1`
258        .required_fraction_digits(false) // Allows `1.`
259        .required_exponent_digits(true) // Forbids `1e`
260        .required_mantissa_digits(true) // Forbids `.`
261        .no_positive_mantissa_sign(true) // Forbids `+1`
262        .required_mantissa_sign(false) // Allows `1`, not just `-1` (though `-1` parses as `-(1)`)
263        .no_exponent_notation(false) // Allows `1e3`
264        .no_positive_exponent_sign(false) // Allows `1e+3`, not just `1e3`
265        .required_exponent_sign(false) // Allows `1e3`, not just `1e+3`
266        .no_exponent_without_fraction(false) // Allows `1e3`, not just `1.e3`
267        .no_special(true) // Forbids `nan` and `inf`
268        .no_integer_leading_zeros(false) // Allows `01`
269        .no_float_leading_zeros(false) // Allows `01.2`
270        .required_exponent_notation(false) // Allows `1.2`, not just `12e-1`
271        .case_sensitive_exponent(false) // Allows `1e3` and `1E3`
272        .case_sensitive_base_prefix(false) // Allows `0x1` and `0X1`
273        .digit_separator_flags(true) // Allows `1__2_.3__4_e_5__6_`, but…
274        .integer_leading_digit_separator(false) // Forbids `_1` (already a variable name)
275        .fraction_leading_digit_separator(false) // Forbids `._1` (but buggy in lexical 7.0.5)
276        .special_digit_separator(false) // Must be `false` since we forbid special floats
277        .build_strict()
278}
279
280const INTEGER_OPTIONS: ParseIntegerOptions = ParseIntegerOptions::new();
281const FLOAT_OPTIONS: ParseFloatOptions = ParseFloatOptions::builder()
282    .exponent(b'e')
283    .decimal_point(b'.')
284    .build_strict();
285
286fn lex_and_parse_number<N: FromLexicalWithOptions, const FORMAT: u128>(
287    options: &'static N::Options,
288) -> impl FnMut(LexInput) -> InternalLexResult<N> {
289    #[inline(always)]
290    fn parse<N: FromLexicalWithOptions, const FORMAT: u128>(
291        input: LexInput,
292        options: &'static N::Options,
293    ) -> lexical::Result<(N, usize)> {
294        let result @ (_, len) =
295            lexical::parse_partial_with_options::<N, _, FORMAT>(input, options)?;
296
297        // There appears to be a bug in lexical where in `0b.`, `0b` is parsed as the integer `0`
298        // even though `.` is not consumed.  This check is a workaround for that.
299        if const {
300            NumberFormatBuilder::rebuild(FORMAT)
301                .get_base_prefix()
302                .is_some()
303        } && len == 2
304        {
305            return Err(lexical::Error::EmptyInteger(2));
306        }
307
308        // There appears to be a bug in lexical where `._1` is accepted even though
309        // `fraction_leading_digit_separator` is `false`.  This check is a workaround for that.
310        if let Some(dot) = input.slice(..len).find("._") {
311            let include_dot = dot + 1;
312            let number =
313                lexical::parse_with_options::<N, _, FORMAT>(input.slice(..include_dot), options)?;
314            return Ok((number, include_dot));
315        }
316
317        Ok(result)
318    }
319
320    move |input| {
321        let (num, len) = parse::<N, FORMAT>(input, options).map_err(|lex_err| {
322            let error = InternalLexError::from_kind(input, lex_err.into());
323            match lex_err {
324                lexical::Error::Overflow(_) | lexical::Error::Underflow(_) => {
325                    // No need to backtrack – this was a number, just a bad one
326                    nom::Err::Failure(error)
327                }
328                _ => nom::Err::Error(error),
329            }
330        })?;
331
332        Ok((input.slice(len..), num))
333    }
334}
335
336fn raw_lex_integer<const PREFIX: u8, const FORMAT: u128>(
337    input: LexInput,
338) -> InternalLexResult<u64> {
339    const {
340        assert!(PREFIX <= 127, "PREFIX must be an ASCII character");
341    }
342
343    if PREFIX == 0 {
344        lex_and_parse_number::<u64, FORMAT>(&INTEGER_OPTIONS)(input)
345    } else {
346        let (_, _) = peek(tag_no_case(std::str::from_utf8(&[b'0', PREFIX]).unwrap()))(input)?;
347        cut(lex_and_parse_number::<u64, FORMAT>(&INTEGER_OPTIONS))(input)
348    }
349}
350
351macro_rules! def_radix {
352    ($name:ident, $radix:literal $(,)?) => {
353        def_radix!($name, $radix, 0);
354    };
355    ($name:ident, $radix:literal, $prefix:literal $(,)?) => {
356        paste::paste! {
357            #[inline]
358            fn [< lex_ $name _integer >](input: LexInput) -> InternalLexResult<u64> {
359                raw_lex_integer::<$prefix, { number_format($radix, NonZeroU8::new($prefix)) }>(
360                    input
361                )
362            }
363        }
364    };
365}
366
367def_radix!(binary, 2, b'b');
368def_radix!(decimal, 10);
369def_radix!(octal, 8, b'o');
370def_radix!(hexadecimal, 16, b'x');
371
372fn lex_decimal_number(input: LexInput) -> InternalLexResult {
373    let parse_float = |input| {
374        let (input, float) = cut(lex_and_parse_number::<f64, { number_format(10, None) }>(
375            &FLOAT_OPTIONS,
376        ))(input)?;
377
378        if !float.is_finite() {
379            return Err(nom::Err::Failure(InternalLexError::from_kind(
380                input,
381                lexical::Error::Overflow(0).into(),
382            )));
383        }
384
385        Ok((input, Token::Float(float)))
386    };
387
388    if input.as_bytes().first() == Some(&b'.') {
389        parse_float(input)
390    } else {
391        let (input_if_int, int) = lex_decimal_integer(input)?;
392        if input_if_int
393            .as_bytes()
394            .first()
395            .is_some_and(|next| b".eE".contains(next))
396        {
397            // Actually it was a float all along!
398            parse_float(input)
399        } else {
400            Ok((input_if_int, Token::Integer(int)))
401        }
402    }
403}
404
405fn lex_number(input: LexInput) -> InternalLexResult {
406    alt(
407        "number",
408        (
409            map(lex_binary_integer, Token::Integer),
410            map(lex_octal_integer, Token::Integer),
411            map(lex_hexadecimal_integer, Token::Integer),
412            lex_decimal_number,
413        ),
414    )(input)
415}
416
417fn lex_operator(input: LexInput) -> InternalLexResult {
418    use Operator::*;
419    map(
420        alt(
421            "an operator",
422            (
423                value(Caret, tag("^")),
424                value(Minus, tag("-")),
425                value(Plus, tag("+")),
426                value(Slash, tag("/")),
427                value(Star, tag("*")),
428            ),
429        ),
430        Token::Operator,
431    )(input)
432}
433
434fn recognize_newlines(input: LexInput) -> InternalLexResult<LexInput> {
435    alt(
436        "one or more newlines",
437        (
438            is_a::<_, _, InternalLexError>("\n"),
439            is_a::<_, _, InternalLexError>("\r\n"),
440        ),
441    )(input)
442}
443
444fn lex_punctuation(input: LexInput) -> InternalLexResult {
445    use Token::*;
446    alt(
447        "punctuation",
448        (
449            value(Bang, tag("!")),
450            value(Colon, tag(":")),
451            value(Comma, tag(",")),
452            value(
453                Indentation,
454                alt("four spaces or a tab character", (tag("    "), tag("\t"))),
455            ),
456            value(LBracket, tag("[")),
457            value(LParenthesis, tag("(")),
458            value(NewLine, recognize_newlines),
459            value(RBracket, tag("]")),
460            value(RParenthesis, tag(")")),
461            value(Semicolon, tag(";")),
462        ),
463    )(input)
464}
465
466fn lex_string(input: LexInput) -> InternalLexResult {
467    map(quoted_strings::unescaped_quoted_string, Token::String)(input)
468}
469
470fn lex_variable(input: LexInput) -> InternalLexResult {
471    map(preceded(tag("%"), lex_identifier_raw), |ident| {
472        Token::Variable(ident)
473    })(input)
474}
475
476#[cfg(test)]
477mod tests {
478    use nom_locate::LocatedSpan;
479    use rstest::*;
480
481    use crate::parser::{common::tests::KITCHEN_SINK_QUIL, DataType};
482
483    use super::{lex, Command, Operator, Token};
484
485    #[test]
486    fn comment() {
487        let input = LocatedSpan::new("# hello\n#world\n#\n#");
488        let tokens = lex(input).unwrap();
489        assert_eq!(
490            tokens,
491            vec![
492                Token::Comment(" hello".to_owned()),
493                Token::NewLine,
494                Token::Comment("world".to_owned()),
495                Token::NewLine,
496                Token::Comment("".to_owned()),
497                Token::NewLine,
498                Token::Comment("".to_owned())
499            ]
500        )
501    }
502
503    #[test]
504    fn keywords() {
505        let input = LocatedSpan::new("DEFGATE DEFCIRCUIT JUMP-WHEN MATRIX LOAD load LOAD-MEMORY");
506        let tokens = lex(input).unwrap();
507        assert_eq!(
508            tokens,
509            vec![
510                Token::Command(Command::DefGate),
511                Token::Command(Command::DefCircuit),
512                Token::Command(Command::JumpWhen),
513                Token::Matrix,
514                Token::Command(Command::Load),
515                Token::Identifier(String::from("load")),
516                Token::Identifier(String::from("LOAD-MEMORY"))
517            ]
518        )
519    }
520
521    #[rstest]
522    #[case::bin_dot("0b10.1", [Token::Integer(0b10), Token::Float(0.1)])]
523    #[case::oct_dot("0o777.7", [Token::Integer(0o777), Token::Float(0.7)])]
524    #[case::hex_dot("0x3.4", [Token::Integer(0x3), Token::Float(0.4)])]
525    #[case::imaginary("1i", [Token::Integer(1), Token::Identifier("i".to_owned())])]
526    #[case::complex(
527        "1 + 2i",
528        [
529            Token::Integer(1),
530            Token::Operator(Operator::Plus),
531            Token::Integer(2),
532            Token::Identifier("i".to_owned()),
533        ],
534    )]
535    #[case::bin_imaginary("0b10i", [Token::Integer(0b10), Token::Identifier("i".to_owned())])]
536    #[case::oct_imaginary("0o10i", [Token::Integer(0o10), Token::Identifier("i".to_owned())])]
537    #[case::hex_imaginary("0x10i", [Token::Integer(0x10), Token::Identifier("i".to_owned())])]
538    #[case::zero_dot_underscore_one("0._1", [Token::Float(0.0), Token::Identifier("_1".to_owned())])]
539    #[case::zero_dot_underscore("0._", [Token::Float(0.0), Token::Identifier("_".to_owned())])]
540    fn tokenization<const N: usize>(#[case] input: &str, #[case] expected: [Token; N]) {
541        let tokens = lex(LocatedSpan::new(input)).expect("lexing error");
542        assert_eq!(
543            tokens,
544            expected,
545            "lexing {input:?}:\n\
546             - got:      {plain_tokens:?},\n\
547             - expected: {expected:?}",
548            plain_tokens = tokens
549                .iter()
550                .map(|located| located.as_token())
551                .collect::<Vec<_>>(),
552        );
553    }
554
555    #[rstest]
556    #[case::bin_prefix_only("0b")]
557    #[case::oct_prefix_only("0o")]
558    #[case::hex_prefix_only("0x")]
559    #[case::bin_prefix_dot("0b.")]
560    #[case::oct_prefix_dot("0o.")]
561    #[case::hex_prefix_dot("0x.")]
562    #[case::bin_prefix_dot_one("0b.1")]
563    #[case::oct_prefix_dot_one("0o.1")]
564    #[case::hex_prefix_dot_one("0x.1")]
565    #[case::int_too_big("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF")]
566    #[case::float_too_big("1e1_000_000")]
567    #[case::dot_underscore_one("._1")]
568    #[case::dot_underscore("._")]
569    fn bad_token(#[case] input: &str) {
570        let _ = lex(LocatedSpan::new(input)).expect_err("lexing error");
571    }
572
573    macro_rules! number_test_builder {
574        {
575            { $($cases:tt)* }
576            $(#[$meta:meta])*
577            $number:literal,
578            $($rest:tt)*
579        } => {
580            number_test_builder! {
581                { $($cases)* }
582                $(#[$meta])*
583                { name: ($number), input: stringify!($number), expected: $number },
584                $($rest)*
585            }
586        };
587
588        {
589            { $($cases:tt)* }
590            $(#[$meta:meta])*
591            lit($($piece:tt)+) => $number:literal,
592            $($rest:tt)*
593        } => {
594            number_test_builder! {
595                { $($cases)* }
596                $(#[$meta])*
597                {
598                    name: ($($piece)+),
599                    input: concat!($(stringify!($piece)),+),
600                    expected: $number,
601                },
602                $($rest)*
603            }
604        };
605
606        {
607            { $($cases:tt)* }
608            $(#[$meta:meta])*
609            name($($piece:tt)+) => $number:literal,
610            $($rest:tt)*
611        } => {
612            number_test_builder! {
613                { $($cases)* }
614                $(#[$meta])*
615                {
616                    name: ($($piece)+),
617                    input: stringify!($number),
618                    expected: $number,
619                },
620                $($rest)*
621            }
622        };
623
624        (
625            { $($cases:tt)* }
626            $(#[$meta:meta])*
627            {
628                name: ($($name_fragment:tt)+),
629                input: $input:expr,
630                expected: $number:expr$(,)?
631            },
632            $($rest:tt)*
633        ) => {
634            paste::paste! {
635                number_test_builder! {
636                    {
637                        $($cases)*
638                        #[allow(
639                            non_snake_case,
640                            clippy::inconsistent_digit_grouping,
641                            clippy::unusual_byte_groupings,
642                            clippy::mixed_case_hex_literals,
643                            clippy::zero_prefixed_literal,
644                        )]
645                        $(#[$meta])*
646                        #[case::[<lex$(_$name_fragment)*>]($input, $number)]
647                    }
648                    $($rest)*
649                }
650            }
651        };
652
653        (
654            {
655                { $test_name:ident, $token:ident, $ty:ty }
656                $($cases:tt)*
657            }
658        ) => {
659            #[rstest]
660            $($cases)*
661            fn $test_name(#[case] input: &str, #[case] expected: $ty) {
662                tokenization(input, [Token::$token(expected)])
663            }
664        }
665    }
666
667    macro_rules! integer_tests {
668        ($($input:tt)*) => {
669            // Ensure that stringification preserves formatting
670            const _: () = assert!(matches!(
671                stringify!(0x_12__34_aBCd__).as_bytes(),
672                b"0x_12__34_aBCd__"
673            ));
674
675            number_test_builder! { { {integer, Integer, u64} } $($input)* }
676        }
677    }
678
679    macro_rules! float_tests {
680        ($($input:tt)*) => {
681            number_test_builder! { { {float, Float, f64} } $($input)* }
682        }
683    }
684
685    integer_tests! {
686        0,
687        1,
688        2_,
689        3__,
690        4_5,
691        6__7,
692        8__9__10,
693        0000042,
694        0x0,
695        0x7F,
696        lit(0X7f) => 0x7f,
697        0x__CaFe__B0bA__1234__,
698        0o0,
699        0o10,
700        lit(0O10) => 0o10,
701        0o__777__555__,
702        0b0,
703        0b101010,
704        lit(0B101010) => 0b101010,
705        0b__1111__0000__,
706    }
707
708    float_tests! {
709        name(0dot) => 0.,
710        { name: (dot0), input: ".0", expected: 0.0 },
711        name(0dot0) => 0.0,
712        name(1dot) => 1.,
713        { name: (dot1), input: ".1", expected: 0.1 },
714        name(1dot1) => 1.1,
715        name(1__2__dot3__4__eplus__1__5__) => 1__2__.3__4__e+__1__5__,
716        name(1__2__dot3__4__eminus__1__5__) => 1__2__.3__4__e-__1__5__,
717        {
718            name: (1__2__dot3__4__e__1__5__),
719            input: "1__2__.3__4__e__1__5__",
720            expected: 1__2__.3__4__e+__1__5__
721        },
722        1e5,
723        { name: (1dote5), input: "1.e5", expected: 1e5 },
724        { name: (dot1e5), input: ".1e5", expected: 0.1e5 },
725        name(1dot1e5) => 1.1e5,
726    }
727
728    #[test]
729    fn a_bunch_of_numbers() {
730        let input = LocatedSpan::new("2 2i 2.0 2e3 2.0e3 (1+2i)");
731        let tokens = lex(input).unwrap();
732        assert_eq!(
733            tokens,
734            vec![
735                Token::Integer(2),
736                Token::Integer(2),
737                Token::Identifier("i".to_owned()),
738                Token::Float(2.0),
739                Token::Float(2000f64),
740                Token::Float(2000f64),
741                Token::LParenthesis,
742                Token::Integer(1),
743                Token::Operator(Operator::Plus),
744                Token::Integer(2),
745                Token::Identifier("i".to_owned()),
746                Token::RParenthesis
747            ]
748        )
749    }
750
751    #[test]
752    fn string() {
753        let input = LocatedSpan::new("\"hello\"\n\"world\"");
754        let tokens = lex(input).unwrap();
755        assert_eq!(
756            tokens,
757            vec![
758                Token::String("hello".to_owned()),
759                Token::NewLine,
760                Token::String("world".to_owned())
761            ]
762        )
763    }
764
765    #[test]
766    fn gate_operation() {
767        let input = LocatedSpan::new("I 0; RX 1\nCZ 0 1");
768        let tokens = lex(input).unwrap();
769        assert_eq!(
770            tokens,
771            vec![
772                Token::Identifier("I".to_owned()),
773                Token::Integer(0),
774                Token::Semicolon,
775                Token::Identifier("RX".to_owned()),
776                Token::Integer(1),
777                Token::NewLine,
778                Token::Identifier("CZ".to_owned()),
779                Token::Integer(0),
780                Token::Integer(1),
781            ]
782        )
783    }
784
785    #[test]
786    fn label() {
787        let input = LocatedSpan::new("@hello\n@world");
788        let tokens = lex(input).unwrap();
789        assert_eq!(
790            tokens,
791            vec![
792                Token::Target("hello".to_owned()),
793                Token::NewLine,
794                Token::Target("world".to_owned())
795            ]
796        )
797    }
798
799    #[test]
800    fn indentation() {
801        let input = LocatedSpan::new("    ");
802        let tokens = lex(input).unwrap();
803        assert_eq!(tokens, vec![Token::Indentation,])
804    }
805
806    #[test]
807    fn indented_block() {
808        let input = LocatedSpan::new("DEFGATE Name AS PERMUTATION:\n\t1,0\n    0,1");
809        let tokens = lex(input).unwrap();
810        assert_eq!(
811            tokens,
812            vec![
813                Token::Command(Command::DefGate),
814                Token::Identifier("Name".to_owned()),
815                Token::As,
816                Token::Permutation,
817                Token::Colon,
818                Token::NewLine,
819                Token::Indentation,
820                Token::Integer(1),
821                Token::Comma,
822                Token::Integer(0),
823                Token::NewLine,
824                Token::Indentation,
825                Token::Integer(0),
826                Token::Comma,
827                Token::Integer(1),
828            ]
829        )
830    }
831
832    #[test]
833    fn surrounding_whitespace() {
834        let input = LocatedSpan::new("\nI 0\n    \n");
835        let tokens = lex(input).unwrap();
836        assert_eq!(
837            tokens,
838            vec![
839                Token::NewLine,
840                Token::Identifier("I".to_owned()),
841                Token::Integer(0),
842                Token::NewLine,
843                Token::Indentation,
844                Token::NewLine
845            ]
846        )
847    }
848
849    #[rstest(input, expected,
850        case("_", vec![Token::Identifier("_".to_string())]),
851        case("a", vec![Token::Identifier("a".to_string())]),
852        case("_a-2_b-2_", vec![Token::Identifier("_a-2_b-2_".to_string())]),
853        case("a-2-%var", vec![
854            Token::Identifier("a-2".to_string()),
855            Token::Operator(Operator::Minus),
856            Token::Variable("var".to_string())
857        ]),
858        case("BIT", vec![Token::DataType(DataType::Bit)]),
859        case("BITS", vec![Token::Identifier("BITS".to_string())]),
860        case("NaN", vec![Token::Identifier("NaN".to_string())]),
861        case("nan", vec![Token::Identifier("nan".to_string())]),
862        case("NaNa", vec![Token::Identifier("NaNa".to_string())]),
863        case("nana", vec![Token::Identifier("nana".to_string())]),
864        case("INF", vec![Token::Identifier("INF".to_string())]),
865        case("Infinity", vec![Token::Identifier("Infinity".to_string())]),
866        case("Inferior", vec![Token::Identifier("Inferior".to_string())]),
867        case("-NaN", vec![Token::Operator(Operator::Minus), Token::Identifier("NaN".to_string())]),
868        case("-inf", vec![Token::Operator(Operator::Minus), Token::Identifier("inf".to_string())]),
869        case("-Infinity", vec![
870            Token::Operator(Operator::Minus),
871            Token::Identifier("Infinity".to_string())
872        ]),
873        case("-inferior", vec![
874            Token::Operator(Operator::Minus),
875            Token::Identifier("inferior".to_string())
876        ]),
877    )]
878    fn it_lexes_identifier(input: &str, expected: Vec<Token>) {
879        let input = LocatedSpan::new(input);
880        let tokens = lex(input).unwrap();
881        assert_eq!(tokens, expected);
882    }
883
884    #[rstest(input, not_expected,
885        case("a-", vec![Token::Identifier("_-".to_string())]),
886        case("-a", vec![Token::Identifier("-a".to_string())]),
887        case("a\\", vec![Token::Identifier("_\\".to_string())]),
888    )]
889    fn it_fails_to_lex_identifier(input: &str, not_expected: Vec<Token>) {
890        let input = LocatedSpan::new(input);
891        if let Ok(tokens) = lex(input) {
892            assert_ne!(tokens, not_expected);
893        }
894    }
895
896    /// Test that an entire sample program can be lexed without failure.
897    #[test]
898    fn kitchen_sink() {
899        let input = LocatedSpan::new(KITCHEN_SINK_QUIL);
900
901        lex(input).unwrap();
902    }
903}
quil_rs/parser/lexer/mod.rs

quil_rs/parser/lexer/
mod.rs