Skip to main content

eure_ls/
queries.rs

1//! LSP-specific queries that convert to LSP types.
2
3use eure::query::{
4    DiagnosticMessage, DiagnosticSeverity, GetFileDiagnostics, GetSemanticTokens, SemanticToken,
5    TextFile,
6};
7use lsp_types::{
8    Diagnostic, DiagnosticSeverity as LspSeverity, Position, Range,
9    SemanticToken as LspSemanticToken, SemanticTokens,
10};
11use query_flow::{Db, QueryError, query};
12
13/// LSP-formatted semantic tokens query.
14///
15/// Wraps `GetSemanticTokens` and converts to LSP `SemanticTokens` format.
16#[query]
17pub fn lsp_semantic_tokens(
18    db: &impl Db,
19    file: TextFile,
20    source: String,
21) -> Result<SemanticTokens, QueryError> {
22    let tokens = db.query(GetSemanticTokens::new(file.clone()))?;
23    Ok(convert_tokens(&tokens, &source))
24}
25
26/// LSP-formatted diagnostics query, grouped by file.
27///
28/// Wraps `GetFileDiagnostics` and converts to LSP `Diagnostic` format.
29/// Returns diagnostics grouped by file, so that each file can receive
30/// its own publishDiagnostics notification.
31#[query]
32pub fn lsp_diagnostics(
33    db: &impl Db,
34    file: TextFile,
35) -> Result<Vec<(TextFile, Vec<Diagnostic>)>, QueryError> {
36    let diagnostics = db.query(GetFileDiagnostics::new(file.clone()))?;
37
38    // Group diagnostics by file
39    let mut by_file: std::collections::HashMap<TextFile, Vec<DiagnosticMessage>> =
40        std::collections::HashMap::new();
41
42    // Always include the target file so diagnostics are cleared when errors are fixed
43    by_file.insert(file, vec![]);
44
45    for d in diagnostics.iter() {
46        by_file.entry(d.file.clone()).or_default().push(d.clone());
47    }
48
49    // Convert each group to LSP diagnostics using the correct source
50    let mut result = Vec::new();
51    for (diag_file, file_diagnostics) in by_file {
52        let source: std::sync::Arc<eure::query::TextFileContent> = db.asset(diag_file.clone())?;
53        let line_offsets = compute_line_offsets(source.get());
54        let lsp_diagnostics: Vec<Diagnostic> = file_diagnostics
55            .iter()
56            .map(|d| convert_diagnostic(d, source.get(), &line_offsets))
57            .collect();
58        result.push((diag_file, lsp_diagnostics));
59    }
60
61    Ok(result)
62}
63
64/// LSP-formatted diagnostics for a single file.
65///
66/// Wraps `GetFileDiagnostics` and converts to LSP `Diagnostic` format.
67/// Returns diagnostics only for the specified file.
68#[query]
69pub fn lsp_file_diagnostics(db: &impl Db, file: TextFile) -> Result<Vec<Diagnostic>, QueryError> {
70    let diagnostics = db.query(GetFileDiagnostics::new(file.clone()))?;
71
72    // Get source for position conversion
73    let source: std::sync::Arc<eure::query::TextFileContent> = db.asset(file.clone())?;
74    let line_offsets = compute_line_offsets(source.get());
75
76    // Convert to LSP diagnostics
77    let lsp_diagnostics: Vec<Diagnostic> = diagnostics
78        .iter()
79        .filter(|d| d.file == file) // Only include diagnostics for this file
80        .map(|d| convert_diagnostic(d, source.get(), &line_offsets))
81        .collect();
82
83    Ok(lsp_diagnostics)
84}
85
86/// Convert internal semantic tokens to LSP format.
87///
88/// LSP semantic tokens use a delta encoding:
89/// - Each token is encoded as (deltaLine, deltaStartChar, length, tokenType, tokenModifiers)
90/// - deltaLine is relative to the previous token's line
91/// - deltaStartChar is relative to the previous token's start (or line start if on new line)
92/// - All character positions and lengths are in UTF-16 code units
93fn convert_tokens(tokens: &[SemanticToken], source: &str) -> SemanticTokens {
94    let line_offsets = compute_line_offsets(source);
95
96    let mut data = Vec::new();
97    let mut prev_line = 0u32;
98    let mut prev_start = 0u32;
99
100    for token in tokens {
101        let start = token.start as usize;
102        let end = start + token.length as usize;
103        let (line, char) = offset_to_position(start, source, &line_offsets);
104        let length = byte_len_to_utf16_len(source, start, end);
105
106        let delta_line = line - prev_line;
107        let delta_start = if delta_line == 0 {
108            char - prev_start
109        } else {
110            char
111        };
112
113        data.push(LspSemanticToken {
114            delta_line,
115            delta_start,
116            length,
117            token_type: token.token_type as u32,
118            token_modifiers_bitset: token.modifiers,
119        });
120
121        prev_line = line;
122        prev_start = char;
123    }
124
125    SemanticTokens {
126        result_id: None,
127        data,
128    }
129}
130
131/// Convert internal diagnostic to LSP format.
132fn convert_diagnostic(msg: &DiagnosticMessage, source: &str, line_offsets: &[usize]) -> Diagnostic {
133    let start = offset_to_lsp_position(msg.start, source, line_offsets);
134    let end = offset_to_lsp_position(msg.end, source, line_offsets);
135
136    Diagnostic {
137        range: Range { start, end },
138        severity: Some(convert_severity(msg.severity)),
139        code: None,
140        code_description: None,
141        source: Some("eure".to_string()),
142        message: msg.message.clone(),
143        related_information: None,
144        tags: None,
145        data: None,
146    }
147}
148
149/// Convert internal severity to LSP severity.
150fn convert_severity(severity: DiagnosticSeverity) -> LspSeverity {
151    match severity {
152        DiagnosticSeverity::Error => LspSeverity::ERROR,
153        DiagnosticSeverity::Warning => LspSeverity::WARNING,
154        DiagnosticSeverity::Info => LspSeverity::INFORMATION,
155        DiagnosticSeverity::Hint => LspSeverity::HINT,
156    }
157}
158
159/// Compute line offsets for a source string.
160///
161/// Returns a vector where `line_offsets[i]` is the byte offset of line `i`.
162fn compute_line_offsets(source: &str) -> Vec<usize> {
163    let mut offsets = vec![0];
164    for (i, c) in source.char_indices() {
165        if c == '\n' {
166            offsets.push(i + 1);
167        }
168    }
169    offsets
170}
171
172/// Convert a byte offset to (line, character) position.
173///
174/// Line is 0-indexed. Character is in UTF-16 code units (as required by LSP).
175fn offset_to_position(offset: usize, source: &str, line_offsets: &[usize]) -> (u32, u32) {
176    let line = line_offsets.iter().rposition(|&o| o <= offset).unwrap_or(0);
177    let line_start = line_offsets[line];
178    // Count UTF-16 code units from line start to offset
179    let end = offset.min(source.len());
180    let line_content = &source[line_start..end];
181    let utf16_offset: usize = line_content.chars().map(|c| c.len_utf16()).sum();
182    (line as u32, utf16_offset as u32)
183}
184
185/// Convert a byte offset to LSP Position with UTF-16 character position.
186fn offset_to_lsp_position(offset: usize, source: &str, line_offsets: &[usize]) -> Position {
187    let (line, character) = offset_to_position(offset, source, line_offsets);
188    Position { line, character }
189}
190
191/// Convert a byte length to UTF-16 code unit length.
192fn byte_len_to_utf16_len(source: &str, start: usize, end: usize) -> u32 {
193    let end = end.min(source.len());
194    let start = start.min(end);
195    source[start..end]
196        .chars()
197        .map(|c| c.len_utf16())
198        .sum::<usize>() as u32
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_compute_line_offsets() {
207        let source = "hello\nworld\n";
208        let offsets = compute_line_offsets(source);
209        assert_eq!(offsets, vec![0, 6, 12]);
210    }
211
212    #[test]
213    fn test_offset_to_position_ascii() {
214        let source = "hello\nworld\n";
215        let offsets = compute_line_offsets(source);
216        assert_eq!(offset_to_position(0, source, &offsets), (0, 0));
217        assert_eq!(offset_to_position(5, source, &offsets), (0, 5));
218        assert_eq!(offset_to_position(6, source, &offsets), (1, 0));
219        assert_eq!(offset_to_position(11, source, &offsets), (1, 5));
220    }
221
222    #[test]
223    fn test_offset_to_position_utf8() {
224        // "日本語" is 9 bytes (3 chars × 3 bytes each), but 3 UTF-16 code units
225        let source = "日本語\ntest";
226        let offsets = compute_line_offsets(source);
227        // Byte offset 0 -> (line 0, char 0)
228        assert_eq!(offset_to_position(0, source, &offsets), (0, 0));
229        // Byte offset 3 (after 日) -> (line 0, char 1)
230        assert_eq!(offset_to_position(3, source, &offsets), (0, 1));
231        // Byte offset 6 (after 日本) -> (line 0, char 2)
232        assert_eq!(offset_to_position(6, source, &offsets), (0, 2));
233        // Byte offset 9 (after 日本語) -> (line 0, char 3)
234        assert_eq!(offset_to_position(9, source, &offsets), (0, 3));
235        // Byte offset 10 (after \n) -> (line 1, char 0)
236        assert_eq!(offset_to_position(10, source, &offsets), (1, 0));
237    }
238
239    #[test]
240    fn test_offset_to_position_emoji() {
241        // "😀" is 4 bytes in UTF-8, but 2 UTF-16 code units (surrogate pair)
242        let source = "😀a";
243        let offsets = compute_line_offsets(source);
244        // Byte offset 0 -> (line 0, char 0)
245        assert_eq!(offset_to_position(0, source, &offsets), (0, 0));
246        // Byte offset 4 (after 😀) -> (line 0, char 2) because emoji is 2 UTF-16 units
247        assert_eq!(offset_to_position(4, source, &offsets), (0, 2));
248        // Byte offset 5 (after 😀a) -> (line 0, char 3)
249        assert_eq!(offset_to_position(5, source, &offsets), (0, 3));
250    }
251
252    #[test]
253    fn test_byte_len_to_utf16_len() {
254        // ASCII: 1 byte = 1 UTF-16 unit
255        assert_eq!(byte_len_to_utf16_len("hello", 0, 5), 5);
256        // Japanese: 3 bytes per char, 1 UTF-16 unit per char
257        assert_eq!(byte_len_to_utf16_len("日本語", 0, 9), 3);
258        // Emoji: 4 bytes, 2 UTF-16 units
259        assert_eq!(byte_len_to_utf16_len("😀", 0, 4), 2);
260    }
261}