Skip to main content

solidity_language_server/
utils.rs

1use std::sync::OnceLock;
2use tower_lsp::lsp_types::PositionEncodingKind;
3
4/// How the LSP client counts column offsets within a line.
5///
6/// Set once during `initialize()` via [`set_encoding`] and read implicitly by
7/// [`byte_offset_to_position`] and [`position_to_byte_offset`].  All other
8/// modules are encoding-agnostic — they never need to know or pass this value.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum PositionEncoding {
11    /// Column = number of bytes from the start of the line (UTF-8 code units).
12    Utf8,
13    /// Column = number of UTF-16 code units from the start of the line.
14    /// This is the **mandatory default** per the LSP specification.
15    Utf16,
16}
17
18impl PositionEncoding {
19    /// The mandatory LSP fallback encoding.
20    pub const DEFAULT: Self = PositionEncoding::Utf16;
21
22    /// Pick the best encoding from the set the client advertises.
23    ///
24    /// Preference: UTF-8 if supported, otherwise UTF-16 (the mandatory fallback).
25    pub fn negotiate(client_encodings: Option<&[PositionEncodingKind]>) -> Self {
26        let Some(encodings) = client_encodings else {
27            return Self::DEFAULT;
28        };
29        if encodings.contains(&PositionEncodingKind::UTF8) {
30            PositionEncoding::Utf8
31        } else {
32            PositionEncoding::Utf16
33        }
34    }
35
36    /// Convert to the LSP wire type.
37    pub fn to_encoding_kind(self) -> PositionEncodingKind {
38        match self {
39            PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
40            PositionEncoding::Utf16 => PositionEncodingKind::UTF16,
41        }
42    }
43}
44
45// ---------------------------------------------------------------------------
46// Global encoding state — written once in `initialize`, read everywhere.
47// ---------------------------------------------------------------------------
48
49static ENCODING: OnceLock<PositionEncoding> = OnceLock::new();
50
51/// Store the negotiated encoding.  Called exactly once from the LSP
52/// `initialize` handler.  Subsequent calls are silently ignored.
53pub fn set_encoding(enc: PositionEncoding) {
54    let _ = ENCODING.set(enc);
55}
56
57/// Read the negotiated encoding (falls back to UTF-16 if never set).
58pub fn encoding() -> PositionEncoding {
59    ENCODING.get().copied().unwrap_or(PositionEncoding::DEFAULT)
60}
61
62// ---------------------------------------------------------------------------
63// Byte-offset ↔ LSP-position conversion
64// ---------------------------------------------------------------------------
65
66/// Convert a byte offset in `source` to an `(line, column)` pair whose column
67/// unit depends on the negotiated [`PositionEncoding`].
68pub fn byte_offset_to_position(source: &str, byte_offset: usize) -> (u32, u32) {
69    let enc = encoding();
70    let mut line: u32 = 0;
71    let mut col: u32 = 0;
72    let bytes = source.as_bytes();
73    let mut i = 0;
74
75    while i < byte_offset && i < bytes.len() {
76        match bytes[i] {
77            b'\n' => {
78                line += 1;
79                col = 0;
80                i += 1;
81            }
82            b'\r' if i + 1 < bytes.len() && bytes[i + 1] == b'\n' => {
83                line += 1;
84                col = 0;
85                i += 2;
86            }
87            _ => {
88                match enc {
89                    PositionEncoding::Utf8 => {
90                        // One byte = one UTF-8 code unit.
91                        col += 1;
92                        i += 1;
93                    }
94                    PositionEncoding::Utf16 => {
95                        // Advance by the full character, count UTF-16 code units.
96                        let ch_len = utf8_char_len(bytes[i]);
97                        let ch = &source[i..i + ch_len];
98                        col += ch.chars().next().map(|c| c.len_utf16() as u32).unwrap_or(1);
99                        i += ch_len;
100                    }
101                }
102            }
103        }
104    }
105
106    (line, col)
107}
108
109/// Convert an LSP `(line, character)` position back to a byte offset, where
110/// `character` is interpreted according to the negotiated [`PositionEncoding`].
111pub fn position_to_byte_offset(source: &str, line: u32, character: u32) -> usize {
112    let enc = encoding();
113    let mut current_line: u32 = 0;
114    let mut current_col: u32 = 0;
115
116    for (i, ch) in source.char_indices() {
117        if current_line == line && current_col == character {
118            return i;
119        }
120
121        match ch {
122            '\n' => {
123                if current_line == line {
124                    return i; // clamp to end of line
125                }
126                current_line += 1;
127                current_col = 0;
128            }
129            _ => {
130                current_col += match enc {
131                    PositionEncoding::Utf8 => ch.len_utf8() as u32,
132                    PositionEncoding::Utf16 => ch.len_utf16() as u32,
133                };
134            }
135        }
136    }
137
138    source.len()
139}
140
141// ---------------------------------------------------------------------------
142// Helpers
143// ---------------------------------------------------------------------------
144
145/// Number of bytes in a UTF-8 character given its leading byte.
146fn utf8_char_len(lead: u8) -> usize {
147    match lead {
148        0x00..=0x7F => 1,
149        0xC0..=0xDF => 2,
150        0xE0..=0xEF => 3,
151        0xF0..=0xF7 => 4,
152        _ => 1, // continuation byte — shouldn't happen at a char boundary
153    }
154}
155
156pub fn is_valid_solidity_identifier(name: &str) -> bool {
157    if name.is_empty() {
158        return false;
159    }
160    let chars: Vec<char> = name.chars().collect();
161    let first = chars[0];
162    if !first.is_ascii_alphabetic() && first != '_' {
163        return false;
164    }
165    for &c in &chars {
166        if !c.is_ascii_alphanumeric() && c != '_' {
167            return false;
168        }
169    }
170    true
171}