solidity_language_server/utils.rs
1use std::sync::OnceLock;
2use tower_lsp::lsp_types::PositionEncodingKind;
3
4/// How the LSP client counts column offsets within a line.
5///
6/// Set once during `initialize()` via [`set_encoding`] and read implicitly by
7/// [`byte_offset_to_position`] and [`position_to_byte_offset`]. All other
8/// modules are encoding-agnostic — they never need to know or pass this value.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum PositionEncoding {
11 /// Column = number of bytes from the start of the line (UTF-8 code units).
12 Utf8,
13 /// Column = number of UTF-16 code units from the start of the line.
14 /// This is the **mandatory default** per the LSP specification.
15 Utf16,
16}
17
18impl PositionEncoding {
19 /// The mandatory LSP fallback encoding.
20 pub const DEFAULT: Self = PositionEncoding::Utf16;
21
22 /// Pick the best encoding from the set the client advertises.
23 ///
24 /// Preference: UTF-8 if supported, otherwise UTF-16 (the mandatory fallback).
25 pub fn negotiate(client_encodings: Option<&[PositionEncodingKind]>) -> Self {
26 let Some(encodings) = client_encodings else {
27 return Self::DEFAULT;
28 };
29 if encodings.contains(&PositionEncodingKind::UTF8) {
30 PositionEncoding::Utf8
31 } else {
32 PositionEncoding::Utf16
33 }
34 }
35
36 /// Convert to the LSP wire type.
37 pub fn to_encoding_kind(self) -> PositionEncodingKind {
38 match self {
39 PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
40 PositionEncoding::Utf16 => PositionEncodingKind::UTF16,
41 }
42 }
43}
44
45// ---------------------------------------------------------------------------
46// Global encoding state — written once in `initialize`, read everywhere.
47// ---------------------------------------------------------------------------
48
49static ENCODING: OnceLock<PositionEncoding> = OnceLock::new();
50
51/// Store the negotiated encoding. Called exactly once from the LSP
52/// `initialize` handler. Subsequent calls are silently ignored.
53pub fn set_encoding(enc: PositionEncoding) {
54 let _ = ENCODING.set(enc);
55}
56
57/// Read the negotiated encoding (falls back to UTF-16 if never set).
58pub fn encoding() -> PositionEncoding {
59 ENCODING.get().copied().unwrap_or(PositionEncoding::DEFAULT)
60}
61
62// ---------------------------------------------------------------------------
63// Byte-offset ↔ LSP-position conversion
64// ---------------------------------------------------------------------------
65
66/// Convert a byte offset in `source` to an `(line, column)` pair whose column
67/// unit depends on the negotiated [`PositionEncoding`].
68pub fn byte_offset_to_position(source: &str, byte_offset: usize) -> (u32, u32) {
69 let enc = encoding();
70 let mut line: u32 = 0;
71 let mut col: u32 = 0;
72 let bytes = source.as_bytes();
73 let mut i = 0;
74
75 while i < byte_offset && i < bytes.len() {
76 match bytes[i] {
77 b'\n' => {
78 line += 1;
79 col = 0;
80 i += 1;
81 }
82 b'\r' if i + 1 < bytes.len() && bytes[i + 1] == b'\n' => {
83 line += 1;
84 col = 0;
85 i += 2;
86 }
87 _ => {
88 match enc {
89 PositionEncoding::Utf8 => {
90 // One byte = one UTF-8 code unit.
91 col += 1;
92 i += 1;
93 }
94 PositionEncoding::Utf16 => {
95 // Advance by the full character, count UTF-16 code units.
96 let ch_len = utf8_char_len(bytes[i]);
97 let ch = &source[i..i + ch_len];
98 col += ch.chars().next().map(|c| c.len_utf16() as u32).unwrap_or(1);
99 i += ch_len;
100 }
101 }
102 }
103 }
104 }
105
106 (line, col)
107}
108
109/// Convert an LSP `(line, character)` position back to a byte offset, where
110/// `character` is interpreted according to the negotiated [`PositionEncoding`].
111pub fn position_to_byte_offset(source: &str, line: u32, character: u32) -> usize {
112 let enc = encoding();
113 let mut current_line: u32 = 0;
114 let mut current_col: u32 = 0;
115
116 for (i, ch) in source.char_indices() {
117 if current_line == line && current_col == character {
118 return i;
119 }
120
121 match ch {
122 '\n' => {
123 if current_line == line {
124 return i; // clamp to end of line
125 }
126 current_line += 1;
127 current_col = 0;
128 }
129 _ => {
130 current_col += match enc {
131 PositionEncoding::Utf8 => ch.len_utf8() as u32,
132 PositionEncoding::Utf16 => ch.len_utf16() as u32,
133 };
134 }
135 }
136 }
137
138 source.len()
139}
140
141// ---------------------------------------------------------------------------
142// Helpers
143// ---------------------------------------------------------------------------
144
145/// Number of bytes in a UTF-8 character given its leading byte.
146fn utf8_char_len(lead: u8) -> usize {
147 match lead {
148 0x00..=0x7F => 1,
149 0xC0..=0xDF => 2,
150 0xE0..=0xEF => 3,
151 0xF0..=0xF7 => 4,
152 _ => 1, // continuation byte — shouldn't happen at a char boundary
153 }
154}
155
156pub fn is_valid_solidity_identifier(name: &str) -> bool {
157 if name.is_empty() {
158 return false;
159 }
160 let chars: Vec<char> = name.chars().collect();
161 let first = chars[0];
162 if !first.is_ascii_alphabetic() && first != '_' {
163 return false;
164 }
165 for &c in &chars {
166 if !c.is_ascii_alphanumeric() && c != '_' {
167 return false;
168 }
169 }
170 true
171}