use std::collections::HashSet;
use crate::util::mask_strings;
pub struct TokenRules {
pub operator_keywords: &'static [&'static str],
pub operator_symbols: &'static [&'static str],
pub ignored_keywords: &'static [&'static str],
}
pub struct TokenCounts {
pub distinct_operators: HashSet<String>,
pub distinct_operands: HashSet<String>,
pub total_operators: usize,
pub total_operands: usize,
}
pub use super::rules::rules_for;
pub fn count_tokens(
code_lines: &[&str],
rules: &TokenRules,
line_comments: &[&str],
) -> TokenCounts {
let mut counts = TokenCounts {
distinct_operators: HashSet::new(),
distinct_operands: HashSet::new(),
total_operators: 0,
total_operands: 0,
};
for line in code_lines {
let masked = mask_strings(line, line_comments);
let bytes = masked.as_bytes();
let len = bytes.len();
let mut i = 0;
while i < len {
let ch = bytes[i];
if ch.is_ascii_whitespace() {
i += 1;
continue;
}
if !ch.is_ascii() {
i += 1;
continue;
}
if let Some(sym) = try_match_symbol(&bytes[i..], rules.operator_symbols) {
counts.distinct_operators.insert(sym.to_string());
counts.total_operators += 1;
i += sym.len();
continue;
}
if ch.is_ascii_alphanumeric() || ch == b'_' {
let start = i;
while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
i += 1;
}
let token = &masked[start..i];
if rules.operator_keywords.contains(&token) {
counts.distinct_operators.insert(token.to_string());
counts.total_operators += 1;
} else if rules.ignored_keywords.contains(&token) {
} else if is_numeric(token) {
counts.distinct_operands.insert(token.to_string());
counts.total_operands += 1;
} else {
counts.distinct_operands.insert(token.to_string());
counts.total_operands += 1;
}
continue;
}
i += 1;
}
}
counts
}
fn try_match_symbol<'a>(rest: &[u8], symbols: &[&'a str]) -> Option<&'a str> {
symbols
.iter()
.find(|sym| rest.starts_with(sym.as_bytes()))
.copied()
}
fn is_numeric(token: &str) -> bool {
let bytes = token.as_bytes();
if bytes.is_empty() {
return false;
}
bytes[0].is_ascii_digit()
}
#[cfg(test)]
#[path = "tokenizer_test.rs"]
mod tests;