Skip to main content

solidity_language_server/
utils.rs

1use lintspec_core::textindex::{TextIndex, compute_indices};
2use serde_json::Value;
3use std::sync::OnceLock;
4use tower_lsp::lsp_types::{Position, PositionEncodingKind};
5
6// ---------------------------------------------------------------------------
7// Position Encoding
8// ---------------------------------------------------------------------------
9
10static ENCODING: OnceLock<PositionEncoding> = OnceLock::new();
11
12/// Store the negotiated encoding.  Called exactly once from the LSP
13/// `initialize` handler.  Subsequent calls are silently ignored.
14pub fn set_encoding(enc: PositionEncoding) {
15    let _ = ENCODING.set(enc);
16}
17
18/// Read the negotiated encoding (falls back to UTF-16 if never set).
19pub fn encoding() -> PositionEncoding {
20    ENCODING.get().copied().unwrap_or_default()
21}
22
23/// How the LSP client counts column offsets within a line.
24///
25/// Set once during `initialize()` via [`set_encoding`] and read implicitly by
26/// [`byte_offset_to_position`] and [`position_to_byte_offset`].  All other
27/// modules are encoding-agnostic — they never need to know or pass this value.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
29pub enum PositionEncoding {
30    /// Column = number of bytes from the start of the line (UTF-8 code units).
31    Utf8,
32    /// Column = number of UTF-16 code units from the start of the line.
33    /// This is the **mandatory default** per the LSP specification.
34    #[default]
35    Utf16,
36}
37
38impl PositionEncoding {
39    /// Pick the best encoding from the set the client advertises.
40    ///
41    /// Preference: UTF-8 if supported, otherwise UTF-16 (the mandatory fallback).
42    pub fn negotiate(client_encodings: Option<&[PositionEncodingKind]>) -> Self {
43        let Some(encodings) = client_encodings else {
44            return Self::default();
45        };
46        if encodings.contains(&PositionEncodingKind::UTF8) {
47            PositionEncoding::Utf8
48        } else {
49            PositionEncoding::Utf16
50        }
51    }
52}
53
54impl From<PositionEncoding> for PositionEncodingKind {
55    fn from(value: PositionEncoding) -> Self {
56        match value {
57            PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
58            PositionEncoding::Utf16 => PositionEncodingKind::UTF16,
59        }
60    }
61}
62
63// ---------------------------------------------------------------------------
64// Byte-offset to LSP Position conversion
65// ---------------------------------------------------------------------------
66
67/// Convert a byte offset in `source` to a [`Position`] whose column unit depends
68/// on the negotiated [`PositionEncoding`].
69pub fn byte_offset_to_position(source: &str, byte_offset: usize) -> Position {
70    if source.is_empty() {
71        return Position::new(0, 0);
72    }
73
74    let idx = if byte_offset >= source.len() {
75        // Offset is at or past the end of source — walk the entire string.
76        // `compute_indices` only handles offsets that fall within the source.
77        let mut ti = TextIndex::ZERO;
78        let mut chars = source.chars().peekable();
79        while let Some(c) = chars.next() {
80            ti.advance(c, chars.peek());
81        }
82        ti
83    } else {
84        // SIMD-accelerated lookup for offsets within the source.
85        let indices = compute_indices(source, &[byte_offset]);
86        match indices.first() {
87            Some(ti) => *ti,
88            None => return Position::new(0, 0),
89        }
90    };
91
92    Position {
93        line: idx.line,
94        character: match encoding() {
95            PositionEncoding::Utf8 => idx.col_utf8,
96            PositionEncoding::Utf16 => idx.col_utf16,
97        },
98    }
99}
100
101/// Convert an LSP [`Position`] position back to a byte offset, where
102/// `character` is interpreted according to the negotiated [`PositionEncoding`].
103///
104/// Uses a single SIMD-accelerated pass with [`compute_indices`] to build a
105/// coarse index of the file at 128-byte intervals, then does a short linear
106/// walk (at most 128 bytes) with [`TextIndex::advance`] to find the exact
107/// byte offset.
108pub fn position_to_byte_offset(source: &str, pos: Position) -> usize {
109    if source.is_empty() {
110        return 0;
111    }
112
113    let enc = encoding();
114
115    // 1. Build chunk offsets at 128-byte intervals across the source.
116    let chunk_offsets: Vec<usize> = (0..source.len()).step_by(128).collect();
117
118    // 2. Single SIMD-accelerated pass — compute TextIndex for every chunk.
119    let chunk_indices = compute_indices(source, &chunk_offsets);
120
121    // 3. Find the last chunk that is still at or before the target position.
122    //    Multiple chunks can fall on the same line, so we must also check
123    //    the column to avoid starting past the target.
124    let start = chunk_indices
125        .iter()
126        .take_while(|ti| {
127            if ti.line < pos.line {
128                return true;
129            }
130            if ti.line == pos.line {
131                let col = match enc {
132                    PositionEncoding::Utf8 => ti.col_utf8,
133                    PositionEncoding::Utf16 => ti.col_utf16,
134                };
135                return col <= pos.character;
136            }
137            false
138        })
139        .last()
140        .copied()
141        .unwrap_or(TextIndex::ZERO);
142
143    // 4. Linear walk from `start` (at most ~128 bytes) to the exact position.
144    let mut idx = start;
145    let mut chars = source[idx.utf8..].chars().peekable();
146
147    while let Some(c) = chars.next() {
148        let col = match enc {
149            PositionEncoding::Utf8 => idx.col_utf8,
150            PositionEncoding::Utf16 => idx.col_utf16,
151        };
152        if idx.line >= pos.line && col >= pos.character {
153            return idx.utf8;
154        }
155        if idx.line == pos.line && c == '\n' {
156            return idx.utf8; // clamp to end of line
157        }
158        idx.advance(c, chars.peek());
159    }
160    source.len() // position past end of source
161}
162
163// ---------------------------------------------------------------------------
164// Identifier validation
165// ---------------------------------------------------------------------------
166
167/// Check whether `name` is a valid Solidity identifier
168pub fn is_valid_solidity_identifier(name: &str) -> bool {
169    let mut chars = name.chars();
170    let Some(first) = chars.next() else {
171        return false;
172    };
173    if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
174        return false;
175    }
176    if !chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$') {
177        return false;
178    }
179    if SOLIDITY_KEYWORDS.contains(&name) {
180        return false;
181    }
182    if is_numeric_type_keyword(name) {
183        return false;
184    }
185    true
186}
187
188/// Keywords that are not allowed as identifiers in Solidity.
189///
190/// The grammar permits only 7 keywords as identifiers:
191/// `from`, `error`, `revert`, `global`, `transient`, `layout`, `at`.
192/// Everything else listed in the lexer is blacklisted here.
193const SOLIDITY_KEYWORDS: &[&str] = &[
194    // Active keywords
195    "abstract",
196    "address",
197    "anonymous",
198    "as",
199    "assembly",
200    "bool",
201    "break",
202    "bytes",
203    "calldata",
204    "catch",
205    "constant",
206    "constructor",
207    "continue",
208    "contract",
209    "delete",
210    "do",
211    "else",
212    "emit",
213    "enum",
214    "event",
215    "external",
216    "fallback",
217    "false",
218    "fixed",
219    "for",
220    "function",
221    "hex",
222    "if",
223    "immutable",
224    "import",
225    "indexed",
226    "interface",
227    "internal",
228    "is",
229    "library",
230    "mapping",
231    "memory",
232    "modifier",
233    "new",
234    "override",
235    "payable",
236    "pragma",
237    "private",
238    "public",
239    "pure",
240    "receive",
241    "return",
242    "returns",
243    "storage",
244    "string",
245    "struct",
246    "true",
247    "try",
248    "type",
249    "ufixed",
250    "unchecked",
251    "unicode",
252    "using",
253    "view",
254    "virtual",
255    "while",
256    // Reserved keywords (future use)
257    "after",
258    "alias",
259    "apply",
260    "auto",
261    "byte",
262    "case",
263    "copyof",
264    "default",
265    "define",
266    "final",
267    "implements",
268    "in",
269    "inline",
270    "let",
271    "macro",
272    "match",
273    "mutable",
274    "null",
275    "of",
276    "partial",
277    "promise",
278    "reference",
279    "relocatable",
280    "sealed",
281    "sizeof",
282    "static",
283    "supports",
284    "switch",
285    "typedef",
286    "typeof",
287    "var",
288];
289
290/// Check whether `name` is a numeric-type keyword: `int<N>`, `uint<N>`, or `bytes<N>`.
291fn is_numeric_type_keyword(name: &str) -> bool {
292    if let Some(suffix) = name
293        .strip_prefix("uint")
294        .or_else(|| name.strip_prefix("int"))
295    {
296        if suffix.is_empty() {
297            return true;
298        }
299        let Ok(n) = suffix.parse::<u16>() else {
300            return false;
301        };
302        return (8..=256).contains(&n) && n % 8 == 0;
303    }
304    if let Some(suffix) = name.strip_prefix("bytes") {
305        // bare "bytes" is in SOLIDITY_KEYWORDS; only "bytes1"–"bytes32" are handled here
306        let Ok(n) = suffix.parse::<u16>() else {
307            return false;
308        };
309        return (1..=32).contains(&n);
310    }
311    false
312}
313
314// ---------------------------------------------------------------------------
315// JSON AST helpers
316// ---------------------------------------------------------------------------
317
318/// Push a JSON value onto `stack` if the field `key` is an object or array.
319///
320/// Used during AST walks that iterate over child nodes without full
321/// typed deserialization. Objects are pushed as-is; arrays are flattened
322/// so each element is pushed individually.
323pub fn push_if_node_or_array<'a>(tree: &'a Value, key: &str, stack: &mut Vec<&'a Value>) {
324    if let Some(value) = tree.get(key) {
325        match value {
326            Value::Array(arr) => stack.extend(arr),
327            Value::Object(_) => stack.push(value),
328            _ => {}
329        }
330    }
331}