//! It's not yet time, but a start nonetheless.
struct File {}
/// Id for a source program fragment.
/// Linked to some data store, so the original source
/// can be fetched from it when necessary.
struct FragmentId(u32);
/// A source program fragment.
/// Valid instances include:
/// - Function definitions
/// - Command definitions
/// - Dice operator declarations
/// - Heterogenous collections of the above
struct Fragment {
id: FragmentId,
}
// This was completely copied from the lexer for another
// language I was working on.
fn string_literal(lex: &mut ::logos::Lexer<Token>) -> bool {
let remainder = lex.remainder();
let mut cursor = remainder;
while let [b, rest @ ..] = cursor {
// Note that, due to string escapes, this may become more complex
// later on. I considered unifying this with `line_comment`, but
// both of them are liable to become very different.
if *b == b'"' {
lex.bump((rest.as_ptr() as usize) - (remainder.as_ptr() as usize));
return true;
}
cursor = rest;
}
false
}
// I basically pasted this from the lexer for another
// language I was working on. :D
fn line_comment(lex: &mut ::logos::Lexer<Token>) -> bool {
let remainder = lex.remainder();
let mut cursor = remainder;
// TODO: support other line endings
// Exclusive range patterns are unstable.
const TO_NEWLINE: u8 = b'\n' - 1;
const AFTER_NEWLINE: u8 = b'\n' + 1;
while let [0..=TO_NEWLINE | AFTER_NEWLINE..=255, rest @ ..] = cursor {
cursor = rest;
}
lex.bump(cursor.as_ptr() as usize - remainder.as_ptr() as usize);
true
}
/// We reserve words we might want to use as keywords in the future.
fn reserved_word(lex: &mut ::logos::Lexer<Token>) -> bool {
macro_rules! decl_words {
($($word:ident),* $(,)?) => {
&[$(stringify!($word).as_bytes()),*]
}
}
let words: &[&[u8]] = decl_words![
__I_AM_A_RESERVED_WORD_THAT_NOBODY_WILL_EVER_USE__,
type,
prim,
intrinsic,
op,
while,
true,
false
];
words.contains(&lex.slice())
}
#[derive(::logos::Logos)]
enum Token {
// I sort token definitions in order of most
// to least specific, shortest to longest, but it bears noting that
// Logos doesn't actually care about variant order.
// It will perform token disambiguation according
// to its own rules, as described at
// https://docs.rs/logos/0.12.0/logos/index.html#token-disambiguation
#[token(b".")]
StatementTerminator,
// I would name this token after its semantic purpose,
// but it's likely to have two.
// - Path separation
// - Type annotation
#[token(b":")]
Colon,
#[token(b",")]
Comma,
#[token(b"(")]
OpenParen,
#[token(b")")]
CloseParen,
#[token(b"{")]
OpenCurly,
#[token(b"}")]
CloseCurly,
#[token(b"=")]
Assign,
#[token(b"@")]
At,
#[token(b"#")]
Quote,
// I was going to make this a `&`, but taking a reference
// is not important enough in this language to allow
// easy confusion between it and logical `and`.
// Or, worse, bitwise `and`, which commonly is exactly the same symbol.
// I use `%` instead because frankly using a percent sign
// for modular arithmetic would be terrible for users as well.
#[token(b"%")]
Ref,
#[token(b"&&")]
And,
#[token(b"==")]
Equal,
#[token(b"->")]
Arrow,
#[token(b"=>")]
FatArrow,
#[token(b"do")]
Do,
#[token(b"fn")]
Fn,
#[token(b"if")]
If,
#[token(b"cmd")]
Cmd,
#[token(b"let")]
Let,
// TODO: determine if I want constructors like this
#[token(b"new")]
New,
#[token(b"loop")]
Loop,
#[token(b"else")]
Else,
#[token(b"meta")]
Meta,
#[token(b"break")]
Break,
#[token(b"const")]
Const,
// This keyword is a little strange, in that it acts
// exactly like an ident, but it cannot be assigned to.
// It also provides access to special semantic constructs,
// like reading the context of a containing command.
#[token(b"builtin")]
Builtin,
#[regex("[0-9]+")]
Integer,
#[regex("[0-9]*d[0-9]+")]
DiceTerm,
#[token(b"\"", string_literal)]
StringLiteral,
#[token(b"//", line_comment)]
LineComment,
#[regex(br##"[_a-zA-Z]+"##, reserved_word, priority = 2)]
ReservedWord,
#[regex(br##"[_a-zA-Z]+"##)]
Ident,
#[regex("[\t ]+")]
Whitespace,
#[error]
Error,
}
#[cfg(test)]
mod tests {
use super::Token;
use logos::Logos;
/// This test verifies that [`Token::ReservedWord`] has higher
/// lexing priority than [`Token::Ident`].
#[test]
fn reserved_words() {
let text = "__I_AM_A_RESERVED_WORD_THAT_NOBODY_WILL_EVER_USE__";
let mut lexer = Token::lexer(text.as_bytes());
assert!(matches!(lexer.next(), Some(Token::ReservedWord)));
}
}
fn parse_fragment(input: &[u8], id: FragmentId) -> Fragment {
use ::logos::Logos;
let mut lexer = Token::lexer(input);
todo!()
}