roketok
[!WARNING]
roketok is still under constant shifts
and changes, though finalisation is near.
A simple tokenization library, focused on ease of use.
If you find an issue, whether is performance or just bugs in general, please submit an issue in issues.
Basic C Tokenization Example
use roketok::prelude::*;
#[derive(Default)]
enum TokenKind {
Identifier,
Number,
Asterisk,
Ampersand,
Semicolon,
Equal,
AddEqual,
Parenthesis,
#[default]
Invalid,
}
fn main() {
let contents = r#"
void foo(int *value) {
*value += 35;
}
int main(void) {
int value = 34;
foo(&value);
return value;
}
"#;
let config = Configuration::new()
.add_tokens([
(TokenConfiguration::Rule(&|iter, _| {
if let Some(char) = iter.last() {
if !char.is_alphabetic() { return false; }
while let Some(char) = iter.peek() {
if !char.is_alphanumeric() { break; }
let _ = iter.next();
}
return true;
}
false
}), TokenKind::Identifier),
(TokenConfiguration::Rule(&|iter, _| {
if let Some(char) = iter.last() {
if !char.is_numeric() { return false; }
while let Some(char) = iter.peek() {
if !char.is_alphanumeric() { break; }
let _ = iter.next();
}
return true;
}
false
}), TokenKind::Number),
(TokenConfiguration::Boring(&['*']), TokenKind::Asterisk),
(TokenConfiguration::Boring(&['&']), TokenKind::Ampersand),
(TokenConfiguration::Boring(&['=']), TokenKind::Equal),
(TokenConfiguration::Boring(&['+', '=']), TokenKind::AddEqual),
(TokenConfiguration::Boring(&[';']), TokenKind::Semicolon),
(TokenConfiguration::Branch(&['('], &[')']), TokenKind::Parenthesis),
]);
let mut tokenizer = Tokenizer::new(&config, contents);
let tree = tokenizer.build();
}
TODO