use ratatui::{
style::{Color, Style},
text::Span,
};
const KQL_KEYWORDS: &[&str] = &[
"let",
"print",
"where",
"project",
"extend",
"summarize",
"join",
"union",
"sort",
"top",
"limit",
"take",
"distinct",
"sample",
"count",
"as",
"by",
"on",
"kind",
"inner",
"leftouter",
"rightouter",
"fullouter",
"leftanti",
"rightanti",
"leftsemi",
"rightsemi",
"asc",
"desc",
"nulls",
"first",
"last",
"render",
"evaluate",
"invoke",
"search",
"find",
"make-series",
"mv-expand",
"mv-apply",
"order",
"parse",
"datatable",
"range",
"facet",
"fork",
"partition",
"scan",
"lookup",
"getschema",
"externaldata",
"materialize",
];
const KQL_OPERATORS: &[&str] = &[
"and",
"or",
"not",
"in",
"!in",
"contains",
"!contains",
"startswith",
"!startswith",
"endswith",
"!endswith",
"matches",
"regex",
"has",
"!has",
"hasprefix",
"hassuffix",
"contains_cs",
"startswith_cs",
"endswith_cs",
"has_cs",
"in~",
"!in~",
"has_any",
"has_all",
"between",
"!between",
];
const KQL_FUNCTIONS: &[&str] = &[
"ago",
"now",
"datetime",
"timespan",
"bin",
"sum",
"count",
"avg",
"min",
"max",
"dcount",
"dcountif",
"countif",
"sumif",
"avgif",
"minif",
"maxif",
"stdev",
"stdevif",
"variance",
"varianceif",
"percentile",
"percentiles",
"make_list",
"make_set",
"make_bag",
"arg_max",
"arg_min",
"any",
"anyif",
"tostring",
"toint",
"tolong",
"todouble",
"tobool",
"todatetime",
"totimespan",
"strlen",
"substring",
"strcat",
"split",
"replace",
"trim",
"toupper",
"tolower",
"parse_json",
"parse_xml",
"parse_csv",
"parse_url",
"extract",
"extract_all",
"extractjson",
"bag_keys",
"bag_remove_keys",
"pack",
"pack_all",
"pack_array",
"todynamic",
"array_length",
"array_concat",
"array_split",
"set_union",
"set_intersect",
"set_difference",
"iif",
"iff",
"case",
"coalesce",
"isempty",
"isnotempty",
"isnull",
"isnotnull",
"array_index_of",
"hash",
"format_datetime",
"format_timespan",
"dayofweek",
"dayofmonth",
"dayofyear",
"week_of_year",
"monthofyear",
"getyear",
"getmonth",
"startofday",
"startofweek",
"startofmonth",
"startofyear",
"endofday",
"endofweek",
"endofmonth",
"endofyear",
"hourofday",
"minuteofhour",
"secondofminute",
];
const KQL_TYPES: &[&str] = &[
"string", "int", "long", "real", "double", "bool", "datetime", "timespan", "dynamic", "guid",
"decimal",
];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TokenType {
Keyword,
Operator,
Function,
Type,
String,
Number,
Comment,
Pipe,
Punctuation,
Variable, TableName, Property, Text,
}
struct KqlTokenizer<'a> {
input: &'a str,
position: usize,
last_token: Option<TokenType>,
prev_word: Option<String>,
}
impl<'a> KqlTokenizer<'a> {
fn new(input: &'a str) -> Self {
Self {
input,
position: 0,
last_token: None,
prev_word: None,
}
}
fn peek_char(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) -> Option<char> {
let ch = self.peek_char()?;
self.position += ch.len_utf8();
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek_char() {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn read_while<F>(&mut self, predicate: F) -> &'a str
where
F: Fn(char) -> bool,
{
let start = self.position;
while let Some(ch) = self.peek_char() {
if predicate(ch) {
self.advance();
} else {
break;
}
}
&self.input[start..self.position]
}
fn classify_identifier(&self, word: &str) -> TokenType {
if let Some(ref prev) = self.prev_word {
if prev.to_lowercase() == "let" {
return TokenType::Variable;
}
}
if matches!(self.last_token, None | Some(TokenType::Pipe)) {
if word.chars().next().is_some_and(|c| c.is_uppercase()) {
return TokenType::TableName;
}
}
let remaining = &self.input[self.position..].trim_start();
if remaining.starts_with('(') {
return TokenType::Function;
}
if let Some(ref prev) = self.prev_word {
let prev_lower = prev.to_lowercase();
if prev_lower == "by" || prev_lower == "project" || prev_lower == "extend" {
return TokenType::Property;
}
}
if matches!(self.last_token, Some(TokenType::Operator)) {
return TokenType::Property;
}
if word.chars().next().is_some_and(|c| c.is_uppercase())
&& word.chars().skip(1).any(|c| c.is_lowercase())
{
return TokenType::TableName;
}
TokenType::Variable
}
fn next_token(&mut self) -> Option<(TokenType, &'a str)> {
self.skip_whitespace();
let start = self.position;
let ch = self.peek_char()?;
if ch == '/' && self.input[self.position..].starts_with("//") {
let comment = self.read_while(|c| c != '\n');
return Some((TokenType::Comment, comment));
}
if ch == '"' {
self.advance(); let _content = self.read_while(|c| c != '"');
self.advance(); return Some((TokenType::String, &self.input[start..self.position]));
}
if ch == '\'' {
self.advance(); let _content = self.read_while(|c| c != '\'');
self.advance(); return Some((TokenType::String, &self.input[start..self.position]));
}
if ch == '|' {
self.advance();
self.last_token = Some(TokenType::Pipe);
return Some((TokenType::Pipe, "|"));
}
if ch == ';' {
self.advance();
self.last_token = Some(TokenType::Pipe);
return Some((TokenType::Pipe, ";"));
}
if ch.is_ascii_digit() {
let num = self.read_while(|c| c.is_ascii_digit() || c == '.');
return Some((TokenType::Number, num));
}
if "(),[]:".contains(ch) {
self.advance();
self.last_token = Some(TokenType::Punctuation);
return Some((TokenType::Punctuation, &self.input[start..self.position]));
}
if ch.is_alphabetic() || ch == '_' || ch == '!' {
let word = self.read_while(|c| c.is_alphanumeric() || c == '_' || c == '-' || c == '!');
let word_lower = word.to_lowercase();
let token_type = if KQL_KEYWORDS.contains(&word_lower.as_str()) {
TokenType::Keyword
} else if KQL_OPERATORS.contains(&word_lower.as_str()) {
TokenType::Operator
} else if KQL_FUNCTIONS.contains(&word_lower.as_str()) {
TokenType::Function
} else if KQL_TYPES.contains(&word_lower.as_str()) {
TokenType::Type
} else {
self.classify_identifier(word)
};
self.last_token = Some(token_type);
self.prev_word = Some(word.to_string());
return Some((token_type, word));
}
if "=<>!+-*/%~".contains(ch) {
let op = self.read_while(|c| "=<>!+-*/%~".contains(c));
return Some((TokenType::Operator, op));
}
self.advance();
Some((TokenType::Text, &self.input[start..self.position]))
}
}
pub fn highlight_line(line: &str) -> Vec<Span<'_>> {
let mut spans = Vec::new();
let mut tokenizer = KqlTokenizer::new(line);
let mut last_pos = 0;
while let Some((token_type, token_str)) = tokenizer.next_token() {
if tokenizer.position - token_str.len() > last_pos {
let whitespace = &line[last_pos..(tokenizer.position - token_str.len())];
if !whitespace.is_empty() {
spans.push(Span::raw(whitespace.to_string()));
}
}
let style = match token_type {
TokenType::Keyword => Style::default().fg(Color::LightMagenta), TokenType::Operator => Style::default().fg(Color::White), TokenType::Function => Style::default().fg(Color::LightYellow), TokenType::Type => Style::default().fg(Color::Cyan), TokenType::String => Style::default().fg(Color::LightRed), TokenType::Number => Style::default().fg(Color::LightGreen), TokenType::Comment => Style::default().fg(Color::Green), TokenType::Pipe => Style::default().fg(Color::White), TokenType::Punctuation => Style::default().fg(Color::White), TokenType::Variable => Style::default().fg(Color::LightBlue), TokenType::TableName => Style::default().fg(Color::LightCyan), TokenType::Property => Style::default().fg(Color::LightBlue), TokenType::Text => Style::default().fg(Color::White), };
spans.push(Span::styled(token_str.to_string(), style));
last_pos = tokenizer.position;
}
if last_pos < line.len() {
spans.push(Span::raw(line[last_pos..].to_string()));
}
if spans.is_empty() {
spans.push(Span::raw(""));
}
spans
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_keyword_highlighting() {
let line = "let x = 5";
let spans = highlight_line(line);
assert!(spans.len() >= 3);
}
#[test]
fn test_pipe_highlighting() {
let line = "table | where x > 5";
let spans = highlight_line(line);
assert!(spans.iter().any(|s| s.content == "|"));
}
#[test]
fn test_string_highlighting() {
let line = r#"where name == "test""#;
let spans = highlight_line(line);
assert!(!spans.is_empty());
}
}