use super::{TokenInfo, TokenStream, TokenType, Tokenizer};
pub struct SimpleTokenSpec<S1, S2, S3, S4, S5, S6, S7, S8> {
pub ptr_inc: S1,
pub ptr_dec: S2,
pub data_inc: S3,
pub data_dec: S4,
pub output: S5,
pub input: S6,
pub loop_head: S7,
pub loop_tail: S8,
}
pub type SimpleTokenSpec1<S> = SimpleTokenSpec<S, S, S, S, S, S, S, S>;
impl<S1, S2, S3, S4, S5, S6, S7, S8> SimpleTokenSpec<S1, S2, S3, S4, S5, S6, S7, S8>
where
S1: ToString,
S2: ToString,
S3: ToString,
S4: ToString,
S5: ToString,
S6: ToString,
S7: ToString,
S8: ToString,
{
pub fn to_tokenizer(&self) -> SimpleTokenizer {
let mut token_table = vec![
SimpleTokenDef::new(&self.ptr_inc, TokenType::PInc),
SimpleTokenDef::new(&self.ptr_dec, TokenType::PDec),
SimpleTokenDef::new(&self.data_inc, TokenType::DInc),
SimpleTokenDef::new(&self.data_dec, TokenType::DDec),
SimpleTokenDef::new(&self.output, TokenType::Output),
SimpleTokenDef::new(&self.input, TokenType::Input),
SimpleTokenDef::new(&self.loop_head, TokenType::LoopHead),
SimpleTokenDef::new(&self.loop_tail, TokenType::LoopTail),
];
token_table.sort_by_key(|def| usize::MAX - def.char_count);
SimpleTokenizer { token_table }
}
}
#[test]
fn test_simple_def_to_tokenizer() {
let spec = SimpleTokenSpec {
ptr_inc: "♡♡",
ptr_dec: "aaaaa",
data_inc: '♠',
data_dec: "♢♢♢",
output: "♣♣♣♣",
input: "dddddddd".to_string(),
loop_head: "ccccccc",
loop_tail: "bbbbbb",
};
let tokenizer = spec.to_tokenizer();
let expected = [
SimpleTokenDef {
token: "dddddddd".to_string(),
token_type: TokenType::Input,
char_count: 8,
},
SimpleTokenDef {
token: "ccccccc".to_string(),
token_type: TokenType::LoopHead,
char_count: 7,
},
SimpleTokenDef {
token: "bbbbbb".to_string(),
token_type: TokenType::LoopTail,
char_count: 6,
},
SimpleTokenDef {
token: "aaaaa".to_string(),
token_type: TokenType::PDec,
char_count: 5,
},
SimpleTokenDef {
token: "♣♣♣♣".to_string(),
token_type: TokenType::Output,
char_count: 4,
},
SimpleTokenDef {
token: "♢♢♢".to_string(),
token_type: TokenType::DDec,
char_count: 3,
},
SimpleTokenDef {
token: "♡♡".to_string(),
token_type: TokenType::PInc,
char_count: 2,
},
SimpleTokenDef {
token: "♠".to_string(),
token_type: TokenType::DInc,
char_count: 1,
},
];
assert_simple_def_eq(&tokenizer.token_table, &expected);
}
#[cfg(test)]
fn assert_simple_def_eq(actual: &[SimpleTokenDef], expected: &[SimpleTokenDef]) {
assert_eq!(actual.len(), expected.len(), "length");
for (index, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert_eq!(a.token, e.token, "[{index}].token");
assert_eq!(a.token_type, e.token_type, "[{index}].token_type");
assert_eq!(a.char_count, e.char_count, "[{index}].char_count");
}
}
pub struct SimpleMultiTokenSpec<'a, S1, S2, S3, S4, S5, S6, S7, S8> {
pub ptr_inc: &'a [S1],
pub ptr_dec: &'a [S2],
pub data_inc: &'a [S3],
pub data_dec: &'a [S4],
pub output: &'a [S5],
pub input: &'a [S6],
pub loop_head: &'a [S7],
pub loop_tail: &'a [S8],
}
pub type SimpleMultiTokenSpec1<'a, S> = SimpleMultiTokenSpec<'a, S, S, S, S, S, S, S, S>;
impl<'a, S1, S2, S3, S4, S5, S6, S7, S8> SimpleMultiTokenSpec<'a, S1, S2, S3, S4, S5, S6, S7, S8>
where
S1: ToString,
S2: ToString,
S3: ToString,
S4: ToString,
S5: ToString,
S6: ToString,
S7: ToString,
S8: ToString,
{
pub fn to_tokenizer(&self) -> SimpleTokenizer {
let mut token_table = Self::to_token_defs(self.ptr_inc, TokenType::PInc)
.chain(Self::to_token_defs(self.ptr_dec, TokenType::PDec))
.chain(Self::to_token_defs(self.data_inc, TokenType::DInc))
.chain(Self::to_token_defs(self.data_dec, TokenType::DDec))
.chain(Self::to_token_defs(self.output, TokenType::Output))
.chain(Self::to_token_defs(self.input, TokenType::Input))
.chain(Self::to_token_defs(self.loop_head, TokenType::LoopHead))
.chain(Self::to_token_defs(self.loop_tail, TokenType::LoopTail))
.collect::<Vec<_>>();
token_table.sort_by_key(|def| usize::MAX - def.char_count);
SimpleTokenizer { token_table }
}
fn to_token_defs(
tokens: &[impl ToString],
token_type: TokenType,
) -> impl Iterator<Item = SimpleTokenDef> + '_ {
tokens
.iter()
.map(move |token| SimpleTokenDef::new(token, token_type))
}
}
#[test]
fn test_multiple_simple_def_to_tokenizer() {
let spec = SimpleMultiTokenSpec {
ptr_inc: &["♡♡"],
ptr_dec: &["aaaaa"],
data_inc: &['♠'],
data_dec: &["♢♢♢", "??????????"],
output: &["♣♣♣♣"],
input: &["dddddddd".to_string()],
loop_head: &["ccccccc"],
loop_tail: &["bbbbbb"],
};
let tokenizer = spec.to_tokenizer();
let expected = [
SimpleTokenDef {
token: "??????????".to_string(),
token_type: TokenType::DDec,
char_count: 10,
},
SimpleTokenDef {
token: "dddddddd".to_string(),
token_type: TokenType::Input,
char_count: 8,
},
SimpleTokenDef {
token: "ccccccc".to_string(),
token_type: TokenType::LoopHead,
char_count: 7,
},
SimpleTokenDef {
token: "bbbbbb".to_string(),
token_type: TokenType::LoopTail,
char_count: 6,
},
SimpleTokenDef {
token: "aaaaa".to_string(),
token_type: TokenType::PDec,
char_count: 5,
},
SimpleTokenDef {
token: "♣♣♣♣".to_string(),
token_type: TokenType::Output,
char_count: 4,
},
SimpleTokenDef {
token: "♢♢♢".to_string(),
token_type: TokenType::DDec,
char_count: 3,
},
SimpleTokenDef {
token: "♡♡".to_string(),
token_type: TokenType::PInc,
char_count: 2,
},
SimpleTokenDef {
token: "♠".to_string(),
token_type: TokenType::DInc,
char_count: 1,
},
];
assert_simple_def_eq(&tokenizer.token_table, &expected);
}
#[derive(Debug, PartialEq, Eq)]
struct SimpleTokenDef {
token: String,
char_count: usize,
token_type: TokenType,
}
impl SimpleTokenDef {
fn new(token: &impl ToString, token_type: TokenType) -> Self {
let token = token.to_string();
let char_count = token.chars().count();
Self {
token,
char_count,
token_type,
}
}
}
pub struct SimpleTokenizer {
token_table: Vec<SimpleTokenDef>,
}
impl<'a> Tokenizer<'a> for SimpleTokenizer {
type Stream = SimpleTokenStream<'a>;
fn token_stream(&'a self, source: &'a str) -> SimpleTokenStream<'a> {
SimpleTokenStream::new(source, &self.token_table)
}
}
pub struct SimpleTokenStream<'a> {
token_table: &'a [SimpleTokenDef],
source: &'a str,
pos: usize,
pos_in_chars: usize,
}
impl<'a> SimpleTokenStream<'a> {
fn new(source: &'a str, token_table: &'a [SimpleTokenDef]) -> Self {
SimpleTokenStream {
token_table,
source,
pos: 0,
pos_in_chars: 0,
}
}
}
impl<'a> TokenStream for SimpleTokenStream<'a> {
fn next(&mut self) -> Result<TokenInfo, crate::error::ParseError> {
let mut rel_pos_in_chars = 0;
for (rel_pos, _) in self.source[self.pos..].char_indices() {
if let Some(def) = find_token_at(self.source, self.pos + rel_pos, self.token_table) {
let info = TokenInfo {
token_type: Some(def.token_type),
pos_in_chars: self.pos_in_chars + rel_pos_in_chars,
};
self.pos += rel_pos + def.token.len();
self.pos_in_chars += rel_pos_in_chars + def.char_count;
return Ok(info);
}
rel_pos_in_chars += 1;
}
self.pos = self.source.len();
self.pos_in_chars += rel_pos_in_chars;
Ok(TokenInfo {
token_type: None,
pos_in_chars: self.pos_in_chars,
})
}
}
fn find_token_at<'a>(
source: &str,
pos: usize,
token_table: &'a [SimpleTokenDef],
) -> Option<&'a SimpleTokenDef> {
let src_head = &source[pos..];
token_table
.iter()
.find(|def| src_head.starts_with(&def.token))
}