use lemma::{Lexer, TokenKind};
use tower_lsp::lsp_types::*;
const IDX_NAMESPACE: u32 = 0; const IDX_CLASS: u32 = 1; const IDX_PROPERTY: u32 = 2; const IDX_FUNCTION: u32 = 3; const IDX_VALUE: u32 = 4; const IDX_COMMENT: u32 = 5;
const IDX_KEYWORD: u32 = 6; const IDX_OPERATOR: u32 = 7;
const IDX_CONTROL: u32 = 8; const IDX_DATA_BODY: u32 = 9; const IDX_PUNCTUATION: u32 = 10; const IDX_REFERENCE: u32 = 11;
pub const CONTROL_KEYWORD: SemanticTokenType = SemanticTokenType::new("controlKeyword");
pub const DATA_BODY: SemanticTokenType = SemanticTokenType::new("dataBody");
pub const PUNCTUATION: SemanticTokenType = SemanticTokenType::new("punctuation");
pub const REFERENCE: SemanticTokenType = SemanticTokenType::new("reference");
pub const TOKEN_TYPES: &[SemanticTokenType] = &[
SemanticTokenType::NAMESPACE, SemanticTokenType::CLASS, SemanticTokenType::PROPERTY, SemanticTokenType::FUNCTION, SemanticTokenType::STRING, SemanticTokenType::COMMENT, SemanticTokenType::KEYWORD, SemanticTokenType::OPERATOR, CONTROL_KEYWORD, DATA_BODY, PUNCTUATION, REFERENCE, ];
pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[];
#[derive(Clone, Copy, PartialEq)]
enum HeaderState {
None,
Repo,
Spec,
Data,
DataBody,
Rule,
RuleColon,
}
fn type_in_body(kind: &TokenKind) -> Option<u32> {
match kind {
TokenKind::Unless
| TokenKind::Then
| TokenKind::Uses
| TokenKind::Not
| TokenKind::And
| TokenKind::In
| TokenKind::Type
| TokenKind::Meta
| TokenKind::Veto
| TokenKind::Now
| TokenKind::Past
| TokenKind::Future
| TokenKind::Repo => Some(IDX_CONTROL),
TokenKind::QuantityKw
| TokenKind::NumberKw
| TokenKind::TextKw
| TokenKind::DateKw
| TokenKind::TimeKw
| TokenKind::BooleanKw
| TokenKind::PercentKw
| TokenKind::RatioKw => Some(IDX_KEYWORD),
TokenKind::Sqrt
| TokenKind::Sin
| TokenKind::Cos
| TokenKind::Tan
| TokenKind::Asin
| TokenKind::Acos
| TokenKind::Atan
| TokenKind::Log
| TokenKind::Exp
| TokenKind::Abs
| TokenKind::Floor
| TokenKind::Ceil
| TokenKind::Round => Some(IDX_KEYWORD),
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::Percent
| TokenKind::PercentPercent
| TokenKind::Caret
| TokenKind::Gt
| TokenKind::Lt
| TokenKind::Gte
| TokenKind::Lte
| TokenKind::Arrow
| TokenKind::Is => Some(IDX_OPERATOR),
TokenKind::Commentary => Some(IDX_COMMENT),
TokenKind::At
| TokenKind::StringLit
| TokenKind::NumberLit
| TokenKind::True
| TokenKind::False
| TokenKind::Yes
| TokenKind::No
| TokenKind::Accept
| TokenKind::Reject
| TokenKind::Permille => Some(IDX_VALUE),
_ => None,
}
}
fn expression_semantic_type(kind: &TokenKind) -> Option<u32> {
type_in_body(kind).or(if matches!(kind, TokenKind::Identifier) {
Some(IDX_REFERENCE)
} else {
None
})
}
fn is_name_token(kind: &TokenKind) -> bool {
matches!(
kind,
TokenKind::Identifier
| TokenKind::QuantityKw
| TokenKind::NumberKw
| TokenKind::TextKw
| TokenKind::DateKw
| TokenKind::TimeKw
| TokenKind::BooleanKw
| TokenKind::PercentKw
| TokenKind::RatioKw
)
}
pub fn tokenize(text: &str) -> Vec<SemanticToken> {
let mut lexer = Lexer::new(text, &lemma::SourceType::Volatile);
let mut tokens = Vec::new();
let mut prev_line: u32 = 0;
let mut prev_col: u32 = 0;
let mut state = HeaderState::None;
while let Ok(tok) = lexer.next_token() {
if tok.kind == TokenKind::Eof {
break;
}
let token_info: Option<(u32, u32)> = match tok.kind {
TokenKind::Spec => {
state = HeaderState::Spec;
Some((IDX_CLASS, 0))
}
TokenKind::Data => {
state = HeaderState::Data;
Some((IDX_PROPERTY, 0))
}
TokenKind::With => {
state = HeaderState::Data;
Some((IDX_PROPERTY, 0))
}
TokenKind::Rule => {
state = HeaderState::Rule;
Some((IDX_FUNCTION, 0))
}
_ => match state {
HeaderState::Repo => match tok.kind {
TokenKind::At | TokenKind::Identifier | TokenKind::Repo => {
Some((IDX_NAMESPACE, 0))
}
TokenKind::Slash | TokenKind::Dot | TokenKind::Minus => None,
_ => {
state = HeaderState::None;
expression_semantic_type(&tok.kind).map(|idx| (idx, 0))
}
},
HeaderState::Spec => match tok.kind {
TokenKind::Identifier => Some((IDX_CLASS, 0)),
TokenKind::NumberLit => Some((IDX_CLASS, 0)),
TokenKind::Slash | TokenKind::Dot | TokenKind::Minus => None,
_ => {
state = HeaderState::None;
expression_semantic_type(&tok.kind).map(|idx| (idx, 0))
}
},
HeaderState::Data => match tok.kind {
TokenKind::Identifier => Some((IDX_PROPERTY, 0)),
TokenKind::Dot => None,
TokenKind::Colon => {
state = HeaderState::DataBody;
Some((IDX_PUNCTUATION, 0))
}
_ => {
state = HeaderState::DataBody;
Some((IDX_DATA_BODY, 0))
}
},
HeaderState::DataBody => {
if tok.kind == TokenKind::Commentary {
Some((IDX_COMMENT, 0))
} else if type_in_body(&tok.kind) == Some(IDX_CONTROL) {
state = HeaderState::None;
Some((IDX_CONTROL, 0))
} else {
Some((IDX_DATA_BODY, 0))
}
}
HeaderState::Rule => {
if is_name_token(&tok.kind) {
state = HeaderState::RuleColon;
Some((IDX_FUNCTION, 0))
} else {
state = HeaderState::None;
expression_semantic_type(&tok.kind).map(|idx| (idx, 0))
}
}
HeaderState::RuleColon => {
state = HeaderState::None;
if tok.kind == TokenKind::Colon {
Some((IDX_PUNCTUATION, 0))
} else {
expression_semantic_type(&tok.kind).map(|idx| (idx, 0))
}
}
HeaderState::None => {
if tok.kind == TokenKind::Repo {
state = HeaderState::Repo;
Some((IDX_NAMESPACE, 0))
} else {
expression_semantic_type(&tok.kind).map(|idx| (idx, 0))
}
}
},
};
let (type_idx, modifier_bits) = match token_info {
Some(info) => info,
None => continue,
};
let start_line = (tok.span.line as u32).saturating_sub(1);
let start_col = (tok.span.col as u32).saturating_sub(1);
let full_commentary;
let display_text = if tok.kind == TokenKind::Commentary {
full_commentary = format!("\"\"\"{}\"\"\"", tok.text);
&full_commentary
} else {
&tok.text
};
let lines: Vec<&str> = display_text.split('\n').collect();
for (i, segment) in lines.iter().enumerate() {
let seg_len = segment.chars().count() as u32;
if seg_len == 0 {
continue;
}
let line = start_line + i as u32;
let col = if i == 0 { start_col } else { 0 };
let delta_line = line - prev_line;
let delta_start = if delta_line == 0 { col - prev_col } else { col };
tokens.push(SemanticToken {
delta_line,
delta_start,
length: seg_len,
token_type: type_idx,
token_modifiers_bitset: modifier_bits,
});
prev_line = line;
prev_col = col;
}
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
fn token_types(text: &str) -> Vec<u32> {
tokenize(text).iter().map(|t| t.token_type).collect()
}
#[test]
fn repo_keyword_and_qualifier_same_colour() {
assert_eq!(
token_types("repo @lemma/std"),
vec![IDX_NAMESPACE, IDX_NAMESPACE, IDX_NAMESPACE, IDX_NAMESPACE]
);
}
#[test]
fn simple_repo_name_no_qualifier() {
assert_eq!(
token_types("repo local"),
vec![IDX_NAMESPACE, IDX_NAMESPACE]
);
}
#[test]
fn spec_keyword_and_name_same_colour() {
assert_eq!(
token_types("spec weather_clothing"),
vec![IDX_CLASS, IDX_CLASS]
);
}
#[test]
fn spec_followed_by_repo_keyword_is_control_not_namespace() {
assert_eq!(token_types("spec repo"), vec![IDX_CLASS, IDX_CONTROL]);
}
#[test]
fn data_keyword_field_type_and_colon() {
assert_eq!(
token_types("data temperature: number"),
vec![IDX_PROPERTY, IDX_PROPERTY, IDX_PUNCTUATION, IDX_DATA_BODY]
);
}
#[test]
fn data_body_constraints_all_data_body_after_header() {
let text = "data temperature: quantity\n -> unit celsius 1.0\n -> minimum -70 celsius";
let types = token_types(text);
assert_eq!(&types[..3], &[IDX_PROPERTY, IDX_PROPERTY, IDX_PUNCTUATION]);
assert!(types.iter().skip(3).all(|&t| t == IDX_DATA_BODY));
}
#[test]
fn data_body_ends_at_next_declaration() {
let types = token_types("data x: number\nrule y: 5");
assert_eq!(
types,
vec![
IDX_PROPERTY,
IDX_PROPERTY,
IDX_PUNCTUATION,
IDX_DATA_BODY,
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
]
);
}
#[test]
fn with_dotted_path_colon_punctuation() {
assert_eq!(
token_types("with employee.name:"),
vec![IDX_PROPERTY, IDX_PROPERTY, IDX_PROPERTY, IDX_PUNCTUATION]
);
}
#[test]
fn rule_keyword_name_and_colon() {
assert_eq!(
token_types("rule needs_umbrella: 42"),
vec![IDX_FUNCTION, IDX_FUNCTION, IDX_PUNCTUATION, IDX_VALUE]
);
}
#[test]
fn unless_then_are_control() {
assert_eq!(
token_types("rule x: yes\n unless a then no"),
vec![
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
IDX_CONTROL,
IDX_REFERENCE,
IDX_CONTROL,
IDX_VALUE,
]
);
}
#[test]
fn uses_is_control() {
assert_eq!(
token_types("spec s\nuses alias"),
vec![IDX_CLASS, IDX_CLASS, IDX_CONTROL, IDX_REFERENCE]
);
}
#[test]
fn rule_body_identifiers_are_reference() {
assert_eq!(
token_types("rule r: x"),
vec![IDX_FUNCTION, IDX_FUNCTION, IDX_PUNCTUATION, IDX_REFERENCE,]
);
}
#[test]
fn condition_references_and_literals() {
assert_eq!(
token_types("rule x: 1\n unless temperature < 5 then 2"),
vec![
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
IDX_CONTROL,
IDX_REFERENCE,
IDX_OPERATOR,
IDX_VALUE,
IDX_CONTROL,
IDX_VALUE,
]
);
}
#[test]
fn string_and_number_and_bool_all_value() {
assert_eq!(
token_types("rule a: \"hello\"\nrule b: 42\nrule c: yes"),
vec![
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
IDX_FUNCTION,
IDX_FUNCTION,
IDX_PUNCTUATION,
IDX_VALUE,
]
);
}
#[test]
fn spec_effective_year_same_colour_as_name() {
assert_eq!(
token_types("spec weather_clothing 2025"),
vec![IDX_CLASS, IDX_CLASS, IDX_CLASS]
);
}
#[test]
fn spec_effective_full_date_same_colour_as_name() {
assert_eq!(
token_types("spec foo 2026-03-04"),
vec![IDX_CLASS, IDX_CLASS, IDX_CLASS, IDX_CLASS, IDX_CLASS]
);
}
#[test]
fn commentary_delimiters_colored_as_comment() {
let toks = tokenize("\"\"\"hello\"\"\"");
assert_eq!(toks.len(), 1);
assert_eq!(toks[0].token_type, IDX_COMMENT);
assert_eq!(toks[0].length, 11);
}
#[test]
fn multiline_commentary_delimiters_colored() {
let toks = tokenize("\"\"\"\nHello\n\"\"\"");
assert_eq!(toks.len(), 3);
assert!(toks.iter().all(|t| t.token_type == IDX_COMMENT));
assert_eq!(toks[0].length, 3); assert_eq!(toks[1].length, 5); assert_eq!(toks[2].length, 3); }
#[test]
fn declaration_keywords_restart_state_from_any_context() {
assert_eq!(
token_types("spec a\ndata x:"),
vec![
IDX_CLASS,
IDX_CLASS,
IDX_PROPERTY,
IDX_PROPERTY,
IDX_PUNCTUATION
]
);
}
}