sphinx/
language.rs

1use string_interner::symbol::SymbolUsize;
2use once_cell::sync::OnceCell;
3
4use crate::lexer::{LexerBuilder, Token};
5use crate::lexer::rules::{SingleCharRule, MultiCharRule};
6use crate::lexer::rules::keywords::KeywordRule;
7use crate::lexer::rules::literals::*;
8use crate::lexer::rules::literals::string::*;
9
10
11// Internal representation for integers
12#[cfg(target_pointer_width = "32")]
13pub type IntType = i32;
14#[cfg(target_pointer_width = "64")]
15pub type IntType = i64;
16
17
18// Internal representation for floats
19#[cfg(target_pointer_width = "32")]
20pub type FloatType = f32;
21#[cfg(target_pointer_width = "64")]
22pub type FloatType = f64;
23
24
25// Interned string symbol representation
26pub type InternSymbol = SymbolUsize;
27
28
29// Comment markers
30pub static COMMENT_CHAR: char = '#';
31pub static NESTED_COMMENT_START: &str = "#{";
32pub static NESTED_COMMENT_END:   &str = "}#";
33
34
35// Variable access modes
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum Access {
38    ReadOnly,
39    ReadWrite,
40}
41
42impl Access {
43    pub fn can_write(&self) -> bool {
44        matches!(self, Self::ReadWrite)
45    }
46}
47
48
49// String literal escape sequences
50static ESCAPE_SEQUENCES: OnceCell<Vec<Box<dyn EscapeSequence>>> = OnceCell::new();
51
52pub fn all_escape_sequences() -> impl Iterator<Item=&'static dyn EscapeSequence> {
53    ESCAPE_SEQUENCES.get_or_init(|| {
54            
55            let escapes: Vec<Box<dyn EscapeSequence>> = vec![
56                
57                Box::new(CharMapEscape::new('0', "\x00")),
58                Box::new(CharMapEscape::new('\\', "\\")),
59                Box::new(CharMapEscape::new('\'', "\'")),
60                Box::new(CharMapEscape::new('\"', "\"")),
61                
62                Box::new(CharMapEscape::new('t', "\t")),
63                Box::new(CharMapEscape::new('n', "\n")),
64                Box::new(CharMapEscape::new('r', "\r")),
65                Box::new(HexByteEscape::new()),
66            ];
67            
68            escapes
69            
70        })
71        .iter().map(|esc| &**esc)
72}
73
74
75// Tokens
76pub fn create_default_lexer_rules() -> LexerBuilder {
77    LexerBuilder::new()
78    
79    // Punctuation
80    .add_rule(SingleCharRule::new(Token::OpenParen,       '('))
81    .add_rule(SingleCharRule::new(Token::CloseParen,      ')'))
82    .add_rule(SingleCharRule::new(Token::OpenBrace,       '{'))
83    .add_rule(SingleCharRule::new(Token::CloseBrace,      '}'))
84    .add_rule(SingleCharRule::new(Token::OpenSquare,      '['))
85    .add_rule(SingleCharRule::new(Token::CloseSquare,     ']'))
86    .add_rule(SingleCharRule::new(Token::Comma,           ','))
87    .add_rule(SingleCharRule::new(Token::Colon,           ':'))
88    .add_rule(SingleCharRule::new(Token::Semicolon,       ';'))
89    .add_rule(SingleCharRule::new(Token::Decorator,       '@'))
90    
91    .add_rule(MultiCharRule::new(Token::Ellipsis,         "..."))
92    
93    // Assignment and access operators
94    .add_rule(SingleCharRule::new(Token::OpAssign,        '='))
95    .add_rule(SingleCharRule::new(Token::OpAccess,        '.'))
96    
97    // Arithmetic and comparison operators
98    .add_rule(MultiCharRule::new(Token::OpExp,            "**"))
99    
100    .add_rule(SingleCharRule::new(Token::OpAdd,           '+'))
101    .add_rule(SingleCharRule::new(Token::OpSub,           '-'))
102    .add_rule(SingleCharRule::new(Token::OpMul,           '*'))
103    .add_rule(SingleCharRule::new(Token::OpDiv,           '/'))
104    .add_rule(SingleCharRule::new(Token::OpMod,           '%'))
105    .add_rule(SingleCharRule::new(Token::OpInv,           '~'))
106    .add_rule(SingleCharRule::new(Token::OpAnd,           '&'))
107    .add_rule(SingleCharRule::new(Token::OpOr,            '|'))
108    .add_rule(SingleCharRule::new(Token::OpXor,           '^'))
109    
110    .add_rule(SingleCharRule::new(Token::OpLT,            '<'))
111    .add_rule(SingleCharRule::new(Token::OpGT,            '>'))
112    
113    .add_rule(MultiCharRule::new(Token::OpLE,             "<="))
114    .add_rule(MultiCharRule::new(Token::OpGE,             ">="))
115    .add_rule(MultiCharRule::new(Token::OpEQ,             "=="))
116    .add_rule(MultiCharRule::new(Token::OpNE,             "!="))
117    
118    .add_rule(MultiCharRule::new(Token::OpAddAssign,      "+="))
119    .add_rule(MultiCharRule::new(Token::OpSubAssign,      "-="))
120    .add_rule(MultiCharRule::new(Token::OpMulAssign,      "*="))
121    .add_rule(MultiCharRule::new(Token::OpDivAssign,      "/="))
122    .add_rule(MultiCharRule::new(Token::OpModAssign,      "%="))
123    .add_rule(MultiCharRule::new(Token::OpAndAssign,      "&="))
124    .add_rule(MultiCharRule::new(Token::OpOrAssign,       "|="))
125    .add_rule(MultiCharRule::new(Token::OpXorAssign,       "^="))
126    .add_rule(MultiCharRule::new(Token::OpLShiftAssign,   "<<="))
127    .add_rule(MultiCharRule::new(Token::OpRShiftAssign,   ">>="))
128    
129    .add_rule(MultiCharRule::new(Token::OpLShift,         "<<"))
130    .add_rule(MultiCharRule::new(Token::OpRShift,         ">>"))
131    
132    // Keywords
133    .add_rule(KeywordRule::new(Token::And,                "and"))
134    .add_rule(KeywordRule::new(Token::Or,                 "or"))
135    .add_rule(KeywordRule::new(Token::Not,                "not"))
136    .add_rule(KeywordRule::new(Token::True,               "true"))
137    .add_rule(KeywordRule::new(Token::False,              "false"))
138    .add_rule(KeywordRule::new(Token::Nil,                "nil"))
139    .add_rule(KeywordRule::new(Token::Let,                "let"))
140    .add_rule(KeywordRule::new(Token::Var,                "var"))
141    .add_rule(KeywordRule::new(Token::Local,              "local"))
142    .add_rule(KeywordRule::new(Token::NonLocal,           "nonlocal"))
143    .add_rule(KeywordRule::new(Token::Del,                "del"))
144    .add_rule(KeywordRule::new(Token::Begin,              "begin"))
145    .add_rule(KeywordRule::new(Token::If,                 "if"))
146    .add_rule(KeywordRule::new(Token::Then,               "then"))
147    .add_rule(KeywordRule::new(Token::Elif,               "elif"))
148    .add_rule(KeywordRule::new(Token::Else,               "else"))
149    .add_rule(KeywordRule::new(Token::Loop,               "loop"))
150    .add_rule(KeywordRule::new(Token::While,              "while"))
151    .add_rule(KeywordRule::new(Token::For,                "for"))
152    .add_rule(KeywordRule::new(Token::In,                 "in"))
153    .add_rule(KeywordRule::new(Token::Do,                 "do"))
154    .add_rule(KeywordRule::new(Token::Continue,           "continue"))
155    .add_rule(KeywordRule::new(Token::Break,              "break"))
156    .add_rule(KeywordRule::new(Token::Return,             "return"))
157    .add_rule(KeywordRule::new(Token::Fun,                "fun"))
158    .add_rule(KeywordRule::new(Token::Class,              "class"))
159    // .add_rule(KeywordRule::new(Token::Self_,              "self"))
160    // .add_rule(KeywordRule::new(Token::Super,              "super"))
161    .add_rule(KeywordRule::new(Token::Assert,             "assert"))
162    .add_rule(KeywordRule::new(Token::End,                "end"))
163    
164    // Identifiers and literals
165    .add_rule(IdentifierRule::new())
166    .add_rule(IntegerLiteralRule::new())
167    .add_rule(PrefixedIntegerLiteralRule::new("0x", 16))
168    .add_rule(PrefixedIntegerLiteralRule::new("0o", 8))
169    .add_rule(PrefixedIntegerLiteralRule::new("0b", 2))
170    .add_rule(FloatLiteralRule::new())
171    .add_rule(StringLiteralRule::new(all_escape_sequences()))
172    .add_rule(LabelRule::new("::"))
173    
174}