Skip to main content

solidity_language_server/
utils.rs

1use lintspec_core::textindex::{TextIndex, compute_indices};
2use std::sync::OnceLock;
3use tower_lsp::lsp_types::{Position, PositionEncodingKind};
4
5// ---------------------------------------------------------------------------
6// Position Encoding
7// ---------------------------------------------------------------------------
8
9static ENCODING: OnceLock<PositionEncoding> = OnceLock::new();
10
11/// Store the negotiated encoding.  Called exactly once from the LSP
12/// `initialize` handler.  Subsequent calls are silently ignored.
13pub fn set_encoding(enc: PositionEncoding) {
14    let _ = ENCODING.set(enc);
15}
16
17/// Read the negotiated encoding (falls back to UTF-16 if never set).
18pub fn encoding() -> PositionEncoding {
19    ENCODING.get().copied().unwrap_or_default()
20}
21
22/// How the LSP client counts column offsets within a line.
23///
24/// Set once during `initialize()` via [`set_encoding`] and read implicitly by
25/// [`byte_offset_to_position`] and [`position_to_byte_offset`].  All other
26/// modules are encoding-agnostic — they never need to know or pass this value.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
28pub enum PositionEncoding {
29    /// Column = number of bytes from the start of the line (UTF-8 code units).
30    Utf8,
31    /// Column = number of UTF-16 code units from the start of the line.
32    /// This is the **mandatory default** per the LSP specification.
33    #[default]
34    Utf16,
35}
36
37impl PositionEncoding {
38    /// Pick the best encoding from the set the client advertises.
39    ///
40    /// Preference: UTF-8 if supported, otherwise UTF-16 (the mandatory fallback).
41    pub fn negotiate(client_encodings: Option<&[PositionEncodingKind]>) -> Self {
42        let Some(encodings) = client_encodings else {
43            return Self::default();
44        };
45        if encodings.contains(&PositionEncodingKind::UTF8) {
46            PositionEncoding::Utf8
47        } else {
48            PositionEncoding::Utf16
49        }
50    }
51}
52
53impl From<PositionEncoding> for PositionEncodingKind {
54    fn from(value: PositionEncoding) -> Self {
55        match value {
56            PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
57            PositionEncoding::Utf16 => PositionEncodingKind::UTF16,
58        }
59    }
60}
61
62// ---------------------------------------------------------------------------
63// Byte-offset to LSP Position conversion
64// ---------------------------------------------------------------------------
65
66/// Convert a byte offset in `source` to a [`Position`] whose column unit depends
67/// on the negotiated [`PositionEncoding`].
68pub fn byte_offset_to_position(source: &str, byte_offset: usize) -> Position {
69    if source.is_empty() {
70        return Position::new(0, 0);
71    }
72
73    let idx = if byte_offset >= source.len() {
74        // Offset is at or past the end of source — walk the entire string.
75        // `compute_indices` only handles offsets that fall within the source.
76        let mut ti = TextIndex::ZERO;
77        let mut chars = source.chars().peekable();
78        while let Some(c) = chars.next() {
79            ti.advance(c, chars.peek());
80        }
81        ti
82    } else {
83        // SIMD-accelerated lookup for offsets within the source.
84        let indices = compute_indices(source, &[byte_offset]);
85        match indices.first() {
86            Some(ti) => *ti,
87            None => return Position::new(0, 0),
88        }
89    };
90
91    Position {
92        line: idx.line,
93        character: match encoding() {
94            PositionEncoding::Utf8 => idx.col_utf8,
95            PositionEncoding::Utf16 => idx.col_utf16,
96        },
97    }
98}
99
100/// Convert an LSP [`Position`] position back to a byte offset, where
101/// `character` is interpreted according to the negotiated [`PositionEncoding`].
102///
103/// Uses a single SIMD-accelerated pass with [`compute_indices`] to build a
104/// coarse index of the file at 128-byte intervals, then does a short linear
105/// walk (at most 128 bytes) with [`TextIndex::advance`] to find the exact
106/// byte offset.
107pub fn position_to_byte_offset(source: &str, pos: Position) -> usize {
108    if source.is_empty() {
109        return 0;
110    }
111
112    let enc = encoding();
113
114    // 1. Build chunk offsets at 128-byte intervals across the source.
115    let chunk_offsets: Vec<usize> = (0..source.len()).step_by(128).collect();
116
117    // 2. Single SIMD-accelerated pass — compute TextIndex for every chunk.
118    let chunk_indices = compute_indices(source, &chunk_offsets);
119
120    // 3. Find the last chunk that is still at or before the target position.
121    //    Multiple chunks can fall on the same line, so we must also check
122    //    the column to avoid starting past the target.
123    let start = chunk_indices
124        .iter()
125        .take_while(|ti| {
126            if ti.line < pos.line {
127                return true;
128            }
129            if ti.line == pos.line {
130                let col = match enc {
131                    PositionEncoding::Utf8 => ti.col_utf8,
132                    PositionEncoding::Utf16 => ti.col_utf16,
133                };
134                return col <= pos.character;
135            }
136            false
137        })
138        .last()
139        .copied()
140        .unwrap_or(TextIndex::ZERO);
141
142    // 4. Linear walk from `start` (at most ~128 bytes) to the exact position.
143    let mut idx = start;
144    let mut chars = source[idx.utf8..].chars().peekable();
145
146    while let Some(c) = chars.next() {
147        let col = match enc {
148            PositionEncoding::Utf8 => idx.col_utf8,
149            PositionEncoding::Utf16 => idx.col_utf16,
150        };
151        if idx.line >= pos.line && col >= pos.character {
152            return idx.utf8;
153        }
154        if idx.line == pos.line && c == '\n' {
155            return idx.utf8; // clamp to end of line
156        }
157        idx.advance(c, chars.peek());
158    }
159    source.len() // position past end of source
160}
161
162// ---------------------------------------------------------------------------
163// Identifier validation
164// ---------------------------------------------------------------------------
165
166/// Check whether `name` is a valid Solidity identifier
167pub fn is_valid_solidity_identifier(name: &str) -> bool {
168    let mut chars = name.chars();
169    let Some(first) = chars.next() else {
170        return false;
171    };
172    if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
173        return false;
174    }
175    if !chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$') {
176        return false;
177    }
178    if SOLIDITY_KEYWORDS.contains(&name) {
179        return false;
180    }
181    if is_numeric_type_keyword(name) {
182        return false;
183    }
184    true
185}
186
187/// Keywords that are not allowed as identifiers in Solidity.
188///
189/// The grammar permits only 7 keywords as identifiers:
190/// `from`, `error`, `revert`, `global`, `transient`, `layout`, `at`.
191/// Everything else listed in the lexer is blacklisted here.
192const SOLIDITY_KEYWORDS: &[&str] = &[
193    // Active keywords
194    "abstract",
195    "address",
196    "anonymous",
197    "as",
198    "assembly",
199    "bool",
200    "break",
201    "bytes",
202    "calldata",
203    "catch",
204    "constant",
205    "constructor",
206    "continue",
207    "contract",
208    "delete",
209    "do",
210    "else",
211    "emit",
212    "enum",
213    "event",
214    "external",
215    "fallback",
216    "false",
217    "fixed",
218    "for",
219    "function",
220    "hex",
221    "if",
222    "immutable",
223    "import",
224    "indexed",
225    "interface",
226    "internal",
227    "is",
228    "library",
229    "mapping",
230    "memory",
231    "modifier",
232    "new",
233    "override",
234    "payable",
235    "pragma",
236    "private",
237    "public",
238    "pure",
239    "receive",
240    "return",
241    "returns",
242    "storage",
243    "string",
244    "struct",
245    "true",
246    "try",
247    "type",
248    "ufixed",
249    "unchecked",
250    "unicode",
251    "using",
252    "view",
253    "virtual",
254    "while",
255    // Reserved keywords (future use)
256    "after",
257    "alias",
258    "apply",
259    "auto",
260    "byte",
261    "case",
262    "copyof",
263    "default",
264    "define",
265    "final",
266    "implements",
267    "in",
268    "inline",
269    "let",
270    "macro",
271    "match",
272    "mutable",
273    "null",
274    "of",
275    "partial",
276    "promise",
277    "reference",
278    "relocatable",
279    "sealed",
280    "sizeof",
281    "static",
282    "supports",
283    "switch",
284    "typedef",
285    "typeof",
286    "var",
287];
288
289/// Check whether `name` is a numeric-type keyword: `int<N>`, `uint<N>`, or `bytes<N>`.
290fn is_numeric_type_keyword(name: &str) -> bool {
291    if let Some(suffix) = name
292        .strip_prefix("uint")
293        .or_else(|| name.strip_prefix("int"))
294    {
295        if suffix.is_empty() {
296            return true;
297        }
298        let Ok(n) = suffix.parse::<u16>() else {
299            return false;
300        };
301        return (8..=256).contains(&n) && n % 8 == 0;
302    }
303    if let Some(suffix) = name.strip_prefix("bytes") {
304        // bare "bytes" is in SOLIDITY_KEYWORDS; only "bytes1"–"bytes32" are handled here
305        let Ok(n) = suffix.parse::<u16>() else {
306            return false;
307        };
308        return (1..=32).contains(&n);
309    }
310    false
311}