cas_parser/parser/ast/
literal.rs

1use crate::{
2    parser::{
3        ast::{expr::Expr, helper::SquareDelimited},
4        error::{kind, Error},
5        fmt::Latex,
6        token::{Boolean, CloseParen, Float, Name, Int, OpenParen, Quote},
7        Parse,
8        Parser,
9        ParseResult,
10    },
11    tokenizer::TokenKind,
12    return_if_ok,
13};
14use std::{collections::HashSet, fmt, ops::Range};
15
16#[cfg(feature = "serde")]
17use serde::{Deserialize, Serialize};
18
19/// An integer literal, representing as a [`String`].
20#[derive(Debug, Clone, PartialEq)]
21#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
22pub struct LitInt {
23    /// The value of the integer literal as a string.
24    pub value: String,
25
26    /// The region of the source code that this literal was parsed from.
27    pub span: Range<usize>,
28}
29
30impl<'source> Parse<'source> for LitInt {
31    fn std_parse(
32        input: &mut Parser<'source>,
33        recoverable_errors: &mut Vec<Error>
34    ) -> Result<Self, Vec<Error>> {
35        input
36            .try_parse::<Int>()
37            .map(|int| Self {
38                value: int.lexeme.to_owned(),
39                span: int.span,
40            })
41            .forward_errors(recoverable_errors)
42    }
43}
44
45impl std::fmt::Display for LitInt {
46    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
47        write!(f, "{}", self.value)
48    }
49}
50
51impl Latex for LitInt {
52    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
53        write!(f, "{}", self.value)
54    }
55}
56
57/// A floating-point literal, represented as a [`String`].
58#[derive(Debug, Clone, PartialEq)]
59#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
60pub struct LitFloat {
61    /// The value of the floating-point literal as a string.
62    pub value: String,
63
64    /// The region of the source code that this literal was parsed from.
65    pub span: Range<usize>,
66}
67
68impl<'source> Parse<'source> for LitFloat {
69    fn std_parse(
70        input: &mut Parser<'source>,
71        recoverable_errors: &mut Vec<Error>
72    ) -> Result<Self, Vec<Error>> {
73        input
74            .try_parse::<Float>()
75            .map(|float| Self {
76                value: float.lexeme.to_owned(),
77                span: float.span,
78            })
79            .forward_errors(recoverable_errors)
80    }
81}
82
83impl std::fmt::Display for LitFloat {
84    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85        write!(f, "{}", self.value)
86    }
87}
88
89impl Latex for LitFloat {
90    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
91        write!(f, "{}", self.value)
92    }
93}
94
95/// The digits in base 64, in order of increasing value.
96pub const DIGITS: [char; 64] = [
97    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
98    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
99    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
100    '+', '/',
101];
102
103/// Helper struct to parse the digits used in various bases.
104#[derive(Debug, Clone, PartialEq)]
105struct RadixWord {
106    /// The parsed digits.
107    pub value: String,
108
109    /// The region of the source code that this literal was parsed from.
110    pub span: Range<usize>,
111}
112
113impl RadixWord {
114    fn parse(input: &mut Parser) -> Self {
115        let mut value = String::new();
116        let mut span = 0..0;
117        while let Ok(token) = input.next_token_raw() {
118            match token.kind {
119                TokenKind::Add
120                    | TokenKind::Name
121                    | TokenKind::Int
122                    | TokenKind::Div => value.push_str(token.lexeme),
123                _ => {
124                    input.prev();
125                    break;
126                },
127            }
128
129            if span.start == 0 {
130                span.start = token.span.start;
131            }
132            span.end = token.span.end;
133        }
134
135        Self {
136            value,
137            span,
138        }
139    }
140}
141
142/// Helper function to ensure the given string represents a valid base for radix notation.
143fn validate_radix_base(num: &Int) -> ParseResult<u8> {
144    match num.lexeme.parse() {
145        Ok(base) if (2..=64).contains(&base) => ParseResult::Ok(base),
146        Ok(base) if base < 2 => ParseResult::Recoverable(
147            64, // use base 64 to limit invalid radix digit errors
148            vec![Error::new(vec![num.span.clone()], kind::InvalidRadixBase { too_large: false })],
149        ),
150        _ => ParseResult::Recoverable(
151            64,
152            vec![Error::new(vec![num.span.clone()], kind::InvalidRadixBase { too_large: true })],
153        ),
154    }
155}
156
157/// A number written in radix notation. Radix notation allows users to express integers in a base
158/// other than base 10.
159#[derive(Debug, Clone, PartialEq)]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
161pub struct LitRadix {
162    /// The radix of the literal. This value must be between 2 and 64, inclusive.
163    pub base: u8,
164
165    /// The number, expressed in the given radix.
166    pub value: String,
167
168    /// The region of the source code that this literal was parsed from.
169    pub span: Range<usize>,
170}
171
172impl<'source> Parse<'source> for LitRadix {
173    fn std_parse(
174        input: &mut Parser<'source>,
175        recoverable_errors: &mut Vec<Error>
176    ) -> Result<Self, Vec<Error>> {
177        let num = input.try_parse().forward_errors(recoverable_errors)?;
178        let quote = input.try_parse::<Quote>().forward_errors(recoverable_errors)?;
179
180        let base = validate_radix_base(&num).forward_errors(recoverable_errors)?;
181        let word = RadixWord::parse(input);
182        if word.value.is_empty() {
183            recoverable_errors.push(Error::new(vec![quote.span], kind::EmptyRadixLiteral {
184                radix: base,
185                allowed: &DIGITS[..base as usize],
186            }));
187        }
188
189        // ensure that the number is valid for this radix
190        let allowed_digits = &DIGITS[..base as usize];
191        let mut bad_digits = HashSet::new();
192        let mut bad_digit_spans: Vec<Range<usize>> = Vec::new();
193        for (i, c) in word.value.chars().enumerate() {
194            // if we find a digit that isn't allowed, that is fatal
195            // but continue to find all the bad digits so we can report them all at once
196            if !allowed_digits.contains(&c) {
197                let char_start = word.span.start + i;
198                if let Some(last_span) = bad_digit_spans.last_mut() {
199                    // merge adjacent spans
200                    if last_span.end == char_start {
201                        last_span.end += 1;
202                    } else {
203                        bad_digit_spans.push(char_start..char_start + 1);
204                    }
205                } else {
206                    bad_digit_spans.push(char_start..char_start + 1);
207                }
208
209                bad_digits.insert(c);
210                continue;
211            }
212        }
213
214        if !bad_digit_spans.is_empty() {
215            recoverable_errors.push(Error::new(bad_digit_spans, kind::InvalidRadixDigit {
216                radix: base,
217                allowed: allowed_digits,
218                digits: bad_digits,
219                last_op_digit: {
220                    if let Some(ch) = word.value.chars().last() {
221                        ['+', '/'].into_iter()
222                            .find(|&op| op == ch)
223                            .map(|op| (op, word.span.end - 1..word.span.end))
224                    } else {
225                        None
226                    }
227                },
228            }));
229        }
230
231        Ok(Self {
232            base,
233            value: word.value,
234            span: num.span.start..word.span.end,
235        })
236    }
237}
238
239impl std::fmt::Display for LitRadix {
240    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
241        write!(f, "{}'{}", self.base, self.value)
242    }
243}
244
245impl Latex for LitRadix {
246    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
247        write!(f, "{}'{}", self.base, self.value)
248    }
249}
250
251/// A boolean literal, either `true` or `false`.
252#[derive(Debug, Clone, PartialEq)]
253#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
254pub struct LitBool {
255    /// The value of the boolean literal.
256    pub value: bool,
257
258    /// The region of the source code that this literal was parsed from.
259    pub span: Range<usize>,
260}
261
262impl<'source> Parse<'source> for LitBool {
263    fn std_parse(
264        input: &mut Parser<'source>,
265        recoverable_errors: &mut Vec<Error>
266    ) -> Result<Self, Vec<Error>> {
267        input.try_parse::<Boolean>()
268            .map(|boolean| Self {
269                value: match boolean.lexeme {
270                    "true" => true,
271                    "false" => false,
272                    _ => unreachable!(),
273                },
274                span: boolean.span,
275            })
276            .forward_errors(recoverable_errors)
277    }
278}
279
280impl std::fmt::Display for LitBool {
281    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
282        write!(f, "{}", self.value)
283    }
284}
285
286impl Latex for LitBool {
287    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
288        write!(f, "{}", self.value)
289    }
290}
291
292/// A symbol / identifier literal. Symbols are used to represent variables and functions.
293#[derive(Debug, Clone, PartialEq)]
294#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
295pub struct LitSym {
296    /// The name of the symbol.
297    pub name: String,
298
299    /// The region of the source code that this literal was parsed from.
300    pub span: Range<usize>,
301}
302
303impl<'source> Parse<'source> for LitSym {
304    fn std_parse(
305        input: &mut Parser<'source>,
306        recoverable_errors: &mut Vec<Error>
307    ) -> Result<Self, Vec<Error>> {
308        // TODO: it would be nice if we could report an error if the symbol is a keyword
309        //
310        // for example:
311        // break(x) = x
312        // ^^^^^ error: `break` is a keyword and cannot be used as a symbol
313        //
314        // unfortunately this is hard since CalcScript is context-sensitive and we would have to
315        // to parse further ahead to determine if this error should be reported
316        // maybe we should require a `let` keyword to declare variables?
317        input.try_parse::<Name>()
318            .map(|name| Self {
319                name: name.lexeme.to_owned(),
320                span: name.span,
321            })
322            .forward_errors(recoverable_errors)
323    }
324}
325
326impl std::fmt::Display for LitSym {
327    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
328        write!(f, "{}", self.name)
329    }
330}
331
332impl Latex for LitSym {
333    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
334        match self.name.as_str() {
335            "tau" | "pi" | "phi" | "theta" => write!(f, "\\{} ", self.name),
336            _ => write!(f, "{}", self.name),
337        }
338    }
339}
340
341/// The unit type, written as `()`. The unit type is by-default returned by functions that do not
342/// return a value.
343#[derive(Debug, Clone, PartialEq)]
344#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
345pub struct LitUnit {
346    /// The region of the source code that this literal was parsed from.
347    pub span: Range<usize>,
348}
349
350impl<'source> Parse<'source> for LitUnit {
351    fn std_parse(
352        input: &mut Parser<'source>,
353        recoverable_errors: &mut Vec<Error>
354    ) -> Result<Self, Vec<Error>> {
355        let open = input.try_parse::<OpenParen>().forward_errors(recoverable_errors)?;
356        let close = input.try_parse::<CloseParen>().forward_errors(recoverable_errors)?;
357        Ok(Self {
358            span: open.span.start..close.span.end,
359        })
360    }
361}
362
363impl std::fmt::Display for LitUnit {
364    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
365        write!(f, "()")
366    }
367}
368
369impl Latex for LitUnit {
370    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
371        write!(f, "()")
372    }
373}
374
375/// A list type, consisting of a list of expressions surrounded by square brackets and delimited by
376/// commas.
377#[derive(Debug, Clone, PartialEq)]
378#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
379pub struct LitList {
380    /// The list of expressions.
381    pub values: Vec<Expr>,
382
383    /// The region of the source code that this literal was parsed from.
384    pub span: Range<usize>,
385}
386
387impl<'source> Parse<'source> for LitList {
388    fn std_parse(
389        input: &mut Parser<'source>,
390        recoverable_errors: &mut Vec<Error>
391    ) -> Result<Self, Vec<Error>> {
392        let surrounded = input.try_parse::<SquareDelimited<_>>().forward_errors(recoverable_errors)?;
393
394        Ok(Self {
395            values: surrounded.value.values,
396            span: surrounded.open.span.start..surrounded.close.span.end,
397        })
398    }
399}
400
401impl std::fmt::Display for LitList {
402    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
403        write!(f, "[")?;
404        for (i, value) in self.values.iter().enumerate() {
405            if i > 0 {
406                write!(f, ", ")?;
407            }
408            write!(f, "{}", value)?;
409        }
410        write!(f, "]")
411    }
412}
413
414impl Latex for LitList {
415    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
416        write!(f, "[")?;
417        for (i, value) in self.values.iter().enumerate() {
418            if i > 0 {
419                write!(f, ", ")?;
420            }
421            value.fmt_latex(f)?;
422        }
423        write!(f, "]")
424    }
425}
426
427/// Represents a literal value in CalcScript.
428///
429/// A literal is any value that can is written directly into the source code. For example, the
430/// number `1` is a literal (it is currently the only literal type supported by CalcScript).
431#[derive(Debug, Clone, PartialEq)]
432#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
433pub enum Literal {
434    /// An integer literal.
435    Integer(LitInt),
436
437    /// A floating-point literal.
438    Float(LitFloat),
439
440    /// A number written in radix notation. Radix notation allows users to express integers in a
441    /// base other than base 10.
442    Radix(LitRadix),
443
444    /// A boolean literal, either `true` or `false`.
445    Boolean(LitBool),
446
447    /// A symbol / identifier literal. Symbols are used to represent variables and functions.
448    Symbol(LitSym),
449
450    /// The unit type, written as `()`. The unit type is by-default returned by functions that do
451    /// not return a value.
452    Unit(LitUnit),
453
454    /// A list type, consisting of a list of expressions surrounded by square brackets and delimited
455    /// by commas.
456    List(LitList),
457}
458
459impl Literal {
460    /// Returns the span of the literal.
461    pub fn span(&self) -> Range<usize> {
462        match self {
463            Literal::Integer(int) => int.span.clone(),
464            Literal::Float(float) => float.span.clone(),
465            Literal::Radix(radix) => radix.span.clone(),
466            Literal::Boolean(boolean) => boolean.span.clone(),
467            Literal::Symbol(name) => name.span.clone(),
468            Literal::Unit(unit) => unit.span.clone(),
469            Literal::List(list) => list.span.clone(),
470        }
471    }
472}
473
474impl<'source> Parse<'source> for Literal {
475    fn std_parse(
476        input: &mut Parser<'source>,
477        recoverable_errors: &mut Vec<Error>
478    ) -> Result<Self, Vec<Error>> {
479        let _ = return_if_ok!(input.try_parse().map(Literal::Boolean).forward_errors(recoverable_errors));
480        let _ = return_if_ok!(input.try_parse().map(Literal::Radix).forward_errors(recoverable_errors));
481        let _ = return_if_ok!(input.try_parse().map(Literal::Integer).forward_errors(recoverable_errors));
482        let _ = return_if_ok!(input.try_parse().map(Literal::Float).forward_errors(recoverable_errors));
483        let _ = return_if_ok!(input.try_parse().map(Literal::Symbol).forward_errors(recoverable_errors));
484        let _ = return_if_ok!(input.try_parse().map(Literal::Unit).forward_errors(recoverable_errors));
485        input.try_parse().map(Literal::List).forward_errors(recoverable_errors)
486    }
487}
488
489impl std::fmt::Display for Literal {
490    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
491        match self {
492            Literal::Integer(int) => int.fmt(f),
493            Literal::Float(float) => float.fmt(f),
494            Literal::Radix(radix) => radix.fmt(f),
495            Literal::Boolean(boolean) => boolean.fmt(f),
496            Literal::Symbol(name) => name.fmt(f),
497            Literal::Unit(unit) => unit.fmt(f),
498            Literal::List(list) => list.fmt(f),
499        }
500    }
501}
502
503impl Latex for Literal {
504    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
505        match self {
506            Literal::Integer(int) => int.fmt_latex(f),
507            Literal::Float(float) => float.fmt_latex(f),
508            Literal::Radix(radix) => radix.fmt_latex(f),
509            Literal::Boolean(boolean) => boolean.fmt_latex(f),
510            Literal::Symbol(name) => name.fmt_latex(f),
511            Literal::Unit(unit) => unit.fmt_latex(f),
512            Literal::List(list) => list.fmt_latex(f),
513        }
514    }
515}