use harn_lexer::{Span, Token, TokenKind};
use tower_lsp::lsp_types::*;
use crate::constants::{BUILTINS, TYPE_NAMES};
use crate::symbols::{HarnSymbolKind, SymbolInfo};
pub(crate) mod sem {
pub const KEYWORD: u32 = 0;
pub const FUNCTION: u32 = 1;
pub const PARAMETER: u32 = 2;
pub const VARIABLE: u32 = 3;
pub const STRING: u32 = 4;
pub const NUMBER: u32 = 5;
pub const OPERATOR: u32 = 6;
pub const COMMENT: u32 = 7;
pub const TYPE: u32 = 8;
pub const ENUM_MEMBER: u32 = 9;
pub const NAMESPACE: u32 = 10;
}
pub(crate) fn semantic_token_legend() -> SemanticTokensLegend {
SemanticTokensLegend {
token_types: vec![
SemanticTokenType::KEYWORD, SemanticTokenType::FUNCTION, SemanticTokenType::PARAMETER, SemanticTokenType::VARIABLE, SemanticTokenType::STRING, SemanticTokenType::NUMBER, SemanticTokenType::OPERATOR, SemanticTokenType::COMMENT, SemanticTokenType::TYPE, SemanticTokenType::ENUM_MEMBER, SemanticTokenType::NAMESPACE, ],
token_modifiers: vec![],
}
}
fn token_kind_to_semantic(kind: &TokenKind) -> Option<u32> {
match kind {
TokenKind::Pipeline
| TokenKind::Extends
| TokenKind::Override
| TokenKind::Let
| TokenKind::Var
| TokenKind::If
| TokenKind::Else
| TokenKind::For
| TokenKind::In
| TokenKind::Match
| TokenKind::Retry
| TokenKind::Parallel
| TokenKind::Return
| TokenKind::Import
| TokenKind::True
| TokenKind::False
| TokenKind::Nil
| TokenKind::Try
| TokenKind::Catch
| TokenKind::Throw
| TokenKind::Finally
| TokenKind::Select
| TokenKind::Fn
| TokenKind::Spawn
| TokenKind::While
| TokenKind::TypeKw
| TokenKind::Enum
| TokenKind::Struct
| TokenKind::Interface
| TokenKind::Impl
| TokenKind::Pub
| TokenKind::From
| TokenKind::Thru
| TokenKind::Tool
| TokenKind::Upto
| TokenKind::Guard
| TokenKind::Require
| TokenKind::Deadline
| TokenKind::Yield
| TokenKind::Mutex
| TokenKind::Defer
| TokenKind::Break
| TokenKind::Continue => Some(sem::KEYWORD),
TokenKind::StringLiteral(_)
| TokenKind::RawStringLiteral(_)
| TokenKind::InterpolatedString(_) => Some(sem::STRING),
TokenKind::IntLiteral(_) | TokenKind::FloatLiteral(_) | TokenKind::DurationLiteral(_) => {
Some(sem::NUMBER)
}
TokenKind::Eq
| TokenKind::Neq
| TokenKind::And
| TokenKind::Or
| TokenKind::Pipe
| TokenKind::NilCoal
| TokenKind::Pow
| TokenKind::QuestionDot
| TokenKind::Arrow
| TokenKind::Lte
| TokenKind::Gte
| TokenKind::PlusAssign
| TokenKind::MinusAssign
| TokenKind::StarAssign
| TokenKind::SlashAssign
| TokenKind::PercentAssign
| TokenKind::Assign
| TokenKind::Not
| TokenKind::Dot
| TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::Percent
| TokenKind::Lt
| TokenKind::Gt
| TokenKind::Question
| TokenKind::Bar => Some(sem::OPERATOR),
TokenKind::LineComment(_) | TokenKind::BlockComment(_) => Some(sem::COMMENT),
TokenKind::Identifier(_) => Some(sem::VARIABLE),
TokenKind::LBrace
| TokenKind::RBrace
| TokenKind::LParen
| TokenKind::RParen
| TokenKind::LBracket
| TokenKind::RBracket
| TokenKind::Comma
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Newline
| TokenKind::Eof => None,
}
}
pub(crate) fn build_semantic_tokens(
tokens: &[Token],
symbols: &[SymbolInfo],
source: &str,
) -> Vec<SemanticToken> {
let mut result = Vec::new();
let mut prev_line: u32 = 0;
let mut prev_start: u32 = 0;
for (i, token) in tokens.iter().enumerate() {
let token_type = match &token.kind {
TokenKind::Identifier(name) => {
let prev_kind = if i > 0 {
Some(&tokens[i - 1].kind)
} else {
None
};
if matches!(prev_kind, Some(TokenKind::Fn) | Some(TokenKind::Pipeline)) {
sem::FUNCTION
} else if matches!(prev_kind, Some(TokenKind::Enum)) {
sem::NAMESPACE
} else if matches!(
prev_kind,
Some(TokenKind::Struct) | Some(TokenKind::Interface)
) {
sem::TYPE
} else if matches!(prev_kind, Some(TokenKind::Dot))
&& is_enum_variant_access(tokens, i, symbols)
{
sem::ENUM_MEMBER
} else if is_type_annotation_context(tokens, i)
&& TYPE_NAMES.contains(&name.as_str())
{
sem::TYPE
} else {
classify_identifier(name, &token.span, symbols, source)
}
}
other => match token_kind_to_semantic(other) {
Some(t) => t,
None => continue,
},
};
let line = token.span.line.saturating_sub(1) as u32;
let start_char = token.span.column.saturating_sub(1) as u32;
if token.span.end > token.span.start && token.span.end <= source.len() {
let segment = &source[token.span.start..token.span.end];
let lines_in_token: Vec<&str> = segment.split('\n').collect();
if lines_in_token.len() <= 1 {
let length = segment.chars().count() as u32;
let delta_line = line - prev_line;
let delta_start = if delta_line == 0 {
start_char - prev_start
} else {
start_char
};
result.push(SemanticToken {
delta_line,
delta_start,
length,
token_type,
token_modifiers_bitset: 0,
});
prev_line = line;
prev_start = start_char;
} else {
for (line_idx, line_text) in lines_in_token.iter().enumerate() {
let cur_line = line + line_idx as u32;
let cur_start = if line_idx == 0 { start_char } else { 0 };
let length = line_text.chars().count() as u32;
if length == 0 && line_idx > 0 {
continue; }
let delta_line = cur_line - prev_line;
let delta_start = if delta_line == 0 {
cur_start - prev_start
} else {
cur_start
};
result.push(SemanticToken {
delta_line,
delta_start,
length,
token_type,
token_modifiers_bitset: 0,
});
prev_line = cur_line;
prev_start = cur_start;
}
}
} else {
let delta_line = line - prev_line;
let delta_start = if delta_line == 0 {
start_char - prev_start
} else {
start_char
};
result.push(SemanticToken {
delta_line,
delta_start,
length: 1,
token_type,
token_modifiers_bitset: 0,
});
prev_line = line;
prev_start = start_char;
}
}
result
}
fn is_enum_variant_access(tokens: &[Token], idx: usize, symbols: &[SymbolInfo]) -> bool {
if idx < 2 {
return false;
}
if !matches!(tokens[idx - 1].kind, TokenKind::Dot) {
return false;
}
if let TokenKind::Identifier(ref enum_name) = tokens[idx - 2].kind {
symbols
.iter()
.any(|s| s.name == *enum_name && s.kind == HarnSymbolKind::Enum)
} else {
false
}
}
fn is_type_annotation_context(tokens: &[Token], idx: usize) -> bool {
let mut j = idx;
while j > 0 {
j -= 1;
match &tokens[j].kind {
TokenKind::Newline => continue,
TokenKind::Colon
| TokenKind::Arrow
| TokenKind::Lt
| TokenKind::Bar
| TokenKind::Comma => return true,
TokenKind::LBracket => {
if j > 0 {
if let TokenKind::Identifier(name) = &tokens[j - 1].kind {
if TYPE_NAMES.contains(&name.as_str()) {
return true;
}
}
}
return false;
}
_ => return false,
}
}
false
}
fn classify_identifier(name: &str, span: &Span, symbols: &[SymbolInfo], source: &str) -> u32 {
let offset = span.start;
let mut best: Option<&SymbolInfo> = None;
for sym in symbols {
if sym.name != name {
continue;
}
let in_scope = match sym.scope_span {
Some(sp) => offset >= sp.start && offset <= sp.end,
None => true,
};
if !in_scope {
continue;
}
match best {
None => best = Some(sym),
Some(prev) => {
let prev_size = match prev.scope_span {
Some(sp) => sp.end.saturating_sub(sp.start),
None => usize::MAX,
};
let this_size = match sym.scope_span {
Some(sp) => sp.end.saturating_sub(sp.start),
None => usize::MAX,
};
if this_size < prev_size {
best = Some(sym);
}
}
}
}
match best {
Some(sym) => match sym.kind {
HarnSymbolKind::Pipeline | HarnSymbolKind::Function => sem::FUNCTION,
HarnSymbolKind::Parameter => sem::PARAMETER,
HarnSymbolKind::Variable => sem::VARIABLE,
HarnSymbolKind::Enum => sem::NAMESPACE,
HarnSymbolKind::Struct | HarnSymbolKind::Interface => sem::TYPE,
},
None => {
if BUILTINS.iter().any(|(n, _)| *n == name) {
sem::FUNCTION
} else if TYPE_NAMES.contains(&name) {
sem::TYPE
} else {
let _ = source; sem::VARIABLE
}
}
}
}