bend_language_server/core/
semantic_token.rs

1use std::collections::HashMap;
2
3use itertools::Itertools;
4use ropey::Rope;
5use tower_lsp::lsp_types::{Range, SemanticToken, SemanticTokenType};
6use tree_sitter_bend::HIGHLIGHTS_QUERY;
7use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent};
8
9use super::document::Document;
10use crate::language::bend;
11
12lazy_static::lazy_static! {
13    /// Tree sitter capture names into LSP semantic token types.
14    /// Changes to this table don't need to be added anywhere else due to the structures below.
15    pub static ref NAME_TO_TOKEN_TYPE: HashMap<&'static str, SemanticTokenType> = {
16        HashMap::from([
17            ("variable", SemanticTokenType::VARIABLE),
18            ("variable.parameter", SemanticTokenType::PARAMETER),
19            ("variable.member", SemanticTokenType::ENUM_MEMBER),
20            ("property", SemanticTokenType::TYPE),
21            ("keyword", SemanticTokenType::KEYWORD),
22            ("keyword.conditional", SemanticTokenType::KEYWORD),
23            ("keyword.function", SemanticTokenType::KEYWORD),
24            ("keyword.return", SemanticTokenType::KEYWORD),
25            ("keyword.repeat", SemanticTokenType::KEYWORD),
26            ("keyword.type", SemanticTokenType::KEYWORD),
27            ("string", SemanticTokenType::STRING),
28            ("function", SemanticTokenType::FUNCTION),
29            ("function.call", SemanticTokenType::FUNCTION),
30            ("type", SemanticTokenType::TYPE),
31            // ("constructor", SemanticTokenType::?),
32            ("character", SemanticTokenType::STRING),
33            ("character.special", SemanticTokenType::STRING),
34            ("number", SemanticTokenType::NUMBER),
35            ("number.float", SemanticTokenType::NUMBER),
36            ("comment", SemanticTokenType::COMMENT),
37            // ("punctuation", SemanticTokenType::new("operator")),
38            // ("punctuation.delimiter", SemanticTokenType::new("operator")),
39            // ("punctuation.bracket", SemanticTokenType::new("operator")),
40            ("operator", SemanticTokenType::OPERATOR),
41        ])
42    };
43
44    /// Legend for token types.
45    /// This is sent to the LSP client with the semantic tokens capabilities.
46    pub static ref LEGEND_TOKEN_TYPE: Vec<SemanticTokenType> =
47        NAME_TO_TOKEN_TYPE.values().cloned().unique().collect();
48
49    /// Tree sitter highlighting names.
50    /// This is used to perform syntax highlighting with tree sitter.
51    pub static ref HIGHLIGHT_NAMES: Vec<&'static str> =
52        NAME_TO_TOKEN_TYPE.keys().copied().collect();
53
54    /// Translate indices from `HIGHLIGHT_NAMES` to indices from `LEGEND_TOKEN_TYPE`.
55    pub static ref HIGHLIGHT_INDEX_TO_LSP_INDEX: HashMap<usize, usize> = {
56        let token_type_index: HashMap<SemanticTokenType, usize> = LEGEND_TOKEN_TYPE.iter().enumerate().map(|(i, v)| (v.clone(), i)).collect();
57        let highlight_index: HashMap<&&str, usize> = HIGHLIGHT_NAMES.iter().enumerate().map(|(i, v)| (v, i)).collect();
58        NAME_TO_TOKEN_TYPE.iter().map(|(key, val)| (highlight_index[key], token_type_index[val])).collect()
59    };
60
61    /// Global configuration for syntax highlighting.
62    pub static ref HIGHLIGHTER_CONFIG: HighlightConfiguration = {
63        let mut config = HighlightConfiguration::new(bend(), "bend", HIGHLIGHTS_QUERY, "", "").unwrap();
64        config.configure(&HIGHLIGHT_NAMES);
65        config
66    };
67}
68
69/// Generate the semantic tokens of a document for syntax highlighting.
70pub fn semantic_tokens(doc: &mut Document, range: Option<Range>) -> Vec<SemanticToken> {
71    let code = doc.text.to_string(); // TODO: this is bad
72    let highlights = doc
73        .highlighter
74        .highlight(&HIGHLIGHTER_CONFIG, code.as_bytes(), None, |_| None)
75        .unwrap();
76
77    // Get byte indices of line positions in the range, if it exists
78    let range = range.map(|r| {
79        let rstart = doc.text.line_to_byte(r.start.line as usize);
80        let rend = doc.text.line_to_byte(r.end.line as usize + 1);
81        rstart..rend
82    });
83
84    let mut tokens = vec![]; // result vector
85    let mut types = vec![]; // token type stack
86    let mut pre_line = 0; // calculate line deltas between tokens
87    let mut pre_start = 0; // calculate index deltas between tokens
88    for event in highlights {
89        match event {
90            Result::Ok(HighlightEvent::HighlightStart(h)) => types.push(h.0),
91            Result::Ok(HighlightEvent::HighlightEnd) => drop(types.pop()),
92            Result::Ok(HighlightEvent::Source { mut start, end }) => {
93                // Ranged or full semantic tokens call
94                if let Some(range) = &range {
95                    // If we still haven't gotten to the start of the range, continue.
96                    if end < range.start {
97                        continue;
98                    }
99                    // If we got past the end of the range, stop.
100                    if range.end < start {
101                        break;
102                    }
103                }
104
105                let token = types
106                    .last()
107                    .and_then(|curr| HIGHLIGHT_INDEX_TO_LSP_INDEX.get(curr))
108                    .and_then(|type_index| {
109                        // Prevents tokens from starting with new lines or other white space.
110                        // New lines at the start of tokens may break the `make_semantic_token` function.
111                        while start < end && char::from(doc.text.byte(start)).is_whitespace() {
112                            start += 1;
113                        }
114
115                        // Translates the token ranges into the expected struct from LSP.
116                        make_semantic_token(
117                            &doc.text,
118                            start..end,
119                            *type_index as u32,
120                            &mut pre_line,
121                            &mut pre_start,
122                        )
123                    });
124
125                if let Some(token) = token {
126                    tokens.push(token);
127                }
128            }
129            Err(_) => { /* log error? */ }
130        }
131    }
132
133    tokens
134}
135
136/// Generates a specific semantic token within the guidelines of the LSP.
137fn make_semantic_token(
138    code: &Rope,
139    range: std::ops::Range<usize>,
140    token_type: u32,
141    pre_line: &mut u32,
142    pre_start: &mut u32,
143) -> Option<SemanticToken> {
144    let line = code.try_byte_to_line(range.start).ok()? as u32;
145    let first = code.try_line_to_char(line as usize).ok()? as u32;
146    let start = (code.try_byte_to_char(range.start).ok()? as u32).checked_sub(first)?;
147
148    let delta_line = line.checked_sub(*pre_line)?;
149    let delta_start = if delta_line == 0 {
150        start.checked_sub(*pre_start)?
151    } else {
152        start
153    };
154
155    *pre_line = line;
156    *pre_start = start;
157
158    Some(SemanticToken {
159        delta_line,
160        delta_start,
161        // Multi-byte chars like `λ` need to be treated as a single char
162        length: code.byte_slice(range).chars().count() as u32,
163        token_type,
164        token_modifiers_bitset: 0,
165    })
166}
167
168/// Debugging test - tests steps from the semantic token generation algorithm.
169#[test]
170fn token_capture_test() {
171    let code: Rope = r#"
172def main():
173  return "Hi!"
174"#
175    .into();
176    let mut highlighter = tree_sitter_highlight::Highlighter::new();
177    let config = &HIGHLIGHTER_CONFIG;
178
179    // TODO: use TextProviderRope when the highlighting crate allows it
180    let text = code.to_string();
181    let highlights = highlighter
182        .highlight(&config, text.as_bytes(), None, |_| None)
183        .unwrap();
184
185    let mut stack = vec![];
186    for event in highlights {
187        match event.unwrap() {
188            HighlightEvent::HighlightStart(k) => {
189                let name = HIGHLIGHT_NAMES[k.0];
190                stack.push(name);
191                println!("> start {}", name);
192            }
193            HighlightEvent::Source { start, end } => {
194                println!("> {start}-{end}: {:?}", &text[start..end])
195            }
196            HighlightEvent::HighlightEnd => {
197                println!("> end {}", stack.pop().unwrap());
198            }
199        }
200    }
201    println!();
202
203    let highlights = highlighter
204        .highlight(&config, text.as_bytes(), None, |_| None)
205        .unwrap();
206
207    let mut tokens = vec![];
208    let mut stack = vec![];
209    let mut pre_line = 0;
210    let mut pre_start = 0;
211    for event in highlights {
212        match event {
213            // if the highlight is nested, only save inner range
214            Result::Ok(HighlightEvent::HighlightStart(h)) => stack.push(h.0),
215            Result::Ok(HighlightEvent::HighlightEnd) => drop(stack.pop()),
216            Result::Ok(HighlightEvent::Source { mut start, end }) => {
217                stack
218                    .last()
219                    .and_then(|curr| HIGHLIGHT_INDEX_TO_LSP_INDEX.get(curr))
220                    .and_then(|type_index| {
221                        while start < end && char::from(code.byte(start)).is_whitespace() {
222                            start += 1;
223                        }
224
225                        println!(
226                            "{}-{} {:?}: {}",
227                            start,
228                            end,
229                            &text[start..end],
230                            LEGEND_TOKEN_TYPE[*type_index as usize].as_str()
231                        );
232                        make_semantic_token(
233                            &code,
234                            start..end,
235                            *type_index as u32,
236                            &mut pre_line,
237                            &mut pre_start,
238                        )
239                    })
240                    .map(|token| tokens.push(token));
241            }
242            Err(_) => { /* log error? */ }
243        }
244    }
245    println!();
246
247    println!("> got {} tokens", tokens.len());
248    for token in tokens {
249        println!("{:?}", token);
250    }
251}