Skip to main content

solidity_language_server/
utils.rs

1use std::{
2    path::{Path, PathBuf},
3    sync::OnceLock,
4};
5use tower_lsp::lsp_types::PositionEncodingKind;
6
7/// How the LSP client counts column offsets within a line.
8///
9/// Set once during `initialize()` via [`set_encoding`] and read implicitly by
10/// [`byte_offset_to_position`] and [`position_to_byte_offset`].  All other
11/// modules are encoding-agnostic — they never need to know or pass this value.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum PositionEncoding {
14    /// Column = number of bytes from the start of the line (UTF-8 code units).
15    Utf8,
16    /// Column = number of UTF-16 code units from the start of the line.
17    /// This is the **mandatory default** per the LSP specification.
18    Utf16,
19}
20
21impl PositionEncoding {
22    /// The mandatory LSP fallback encoding.
23    pub const DEFAULT: Self = PositionEncoding::Utf16;
24
25    /// Pick the best encoding from the set the client advertises.
26    ///
27    /// Preference: UTF-8 if supported, otherwise UTF-16 (the mandatory fallback).
28    pub fn negotiate(client_encodings: Option<&[PositionEncodingKind]>) -> Self {
29        let Some(encodings) = client_encodings else {
30            return Self::DEFAULT;
31        };
32        if encodings.contains(&PositionEncodingKind::UTF8) {
33            PositionEncoding::Utf8
34        } else {
35            PositionEncoding::Utf16
36        }
37    }
38
39    /// Convert to the LSP wire type.
40    pub fn to_encoding_kind(self) -> PositionEncodingKind {
41        match self {
42            PositionEncoding::Utf8 => PositionEncodingKind::UTF8,
43            PositionEncoding::Utf16 => PositionEncodingKind::UTF16,
44        }
45    }
46}
47
48// ---------------------------------------------------------------------------
49// Global encoding state — written once in `initialize`, read everywhere.
50// ---------------------------------------------------------------------------
51
52static ENCODING: OnceLock<PositionEncoding> = OnceLock::new();
53
54/// Store the negotiated encoding.  Called exactly once from the LSP
55/// `initialize` handler.  Subsequent calls are silently ignored.
56pub fn set_encoding(enc: PositionEncoding) {
57    let _ = ENCODING.set(enc);
58}
59
60/// Read the negotiated encoding (falls back to UTF-16 if never set).
61pub fn encoding() -> PositionEncoding {
62    ENCODING.get().copied().unwrap_or(PositionEncoding::DEFAULT)
63}
64
65// ---------------------------------------------------------------------------
66// Byte-offset ↔ LSP-position conversion
67// ---------------------------------------------------------------------------
68
69/// Convert a byte offset in `source` to an `(line, column)` pair whose column
70/// unit depends on the negotiated [`PositionEncoding`].
71pub fn byte_offset_to_position(source: &str, byte_offset: usize) -> (u32, u32) {
72    let enc = encoding();
73    let mut line: u32 = 0;
74    let mut col: u32 = 0;
75    let bytes = source.as_bytes();
76    let mut i = 0;
77
78    while i < byte_offset && i < bytes.len() {
79        match bytes[i] {
80            b'\n' => {
81                line += 1;
82                col = 0;
83                i += 1;
84            }
85            b'\r' if i + 1 < bytes.len() && bytes[i + 1] == b'\n' => {
86                line += 1;
87                col = 0;
88                i += 2;
89            }
90            _ => {
91                match enc {
92                    PositionEncoding::Utf8 => {
93                        // One byte = one UTF-8 code unit.
94                        col += 1;
95                        i += 1;
96                    }
97                    PositionEncoding::Utf16 => {
98                        // Advance by the full character, count UTF-16 code units.
99                        let ch_len = utf8_char_len(bytes[i]);
100                        let ch = &source[i..i + ch_len];
101                        col += ch.chars().next().map(|c| c.len_utf16() as u32).unwrap_or(1);
102                        i += ch_len;
103                    }
104                }
105            }
106        }
107    }
108
109    (line, col)
110}
111
112/// Convert an LSP `(line, character)` position back to a byte offset, where
113/// `character` is interpreted according to the negotiated [`PositionEncoding`].
114pub fn position_to_byte_offset(source: &str, line: u32, character: u32) -> usize {
115    let enc = encoding();
116    let mut current_line: u32 = 0;
117    let mut current_col: u32 = 0;
118
119    for (i, ch) in source.char_indices() {
120        if current_line == line && current_col == character {
121            return i;
122        }
123
124        match ch {
125            '\n' => {
126                if current_line == line {
127                    return i; // clamp to end of line
128                }
129                current_line += 1;
130                current_col = 0;
131            }
132            _ => {
133                current_col += match enc {
134                    PositionEncoding::Utf8 => ch.len_utf8() as u32,
135                    PositionEncoding::Utf16 => ch.len_utf16() as u32,
136                };
137            }
138        }
139    }
140
141    source.len()
142}
143
144// ---------------------------------------------------------------------------
145// Helpers
146// ---------------------------------------------------------------------------
147
148/// Number of bytes in a UTF-8 character given its leading byte.
149fn utf8_char_len(lead: u8) -> usize {
150    match lead {
151        0x00..=0x7F => 1,
152        0xC0..=0xDF => 2,
153        0xE0..=0xEF => 3,
154        0xF0..=0xF7 => 4,
155        _ => 1, // continuation byte — shouldn't happen at a char boundary
156    }
157}
158
159pub fn is_valid_solidity_identifier(name: &str) -> bool {
160    if name.is_empty() {
161        return false;
162    }
163    let chars: Vec<char> = name.chars().collect();
164    let first = chars[0];
165    if !first.is_ascii_alphabetic() && first != '_' {
166        return false;
167    }
168    for &c in &chars {
169        if !c.is_ascii_alphanumeric() && c != '_' {
170            return false;
171        }
172    }
173    true
174}
175
176/// Returns the path of the top-level directory of the working git tree.
177pub fn find_git_root(path: impl AsRef<Path>) -> Option<PathBuf> {
178    path.as_ref()
179        .ancestors()
180        .find(|p| p.join(".git").exists())
181        .map(Path::to_path_buf)
182}
183
184/// Finds the foundry project root by walking up from `path` looking for `foundry.toml`,
185/// bounded by the git root. Falls back to the git root if no `foundry.toml` is found.
186pub fn find_project_root(path: impl AsRef<Path>) -> Option<PathBuf> {
187    let path = path.as_ref();
188    let boundary = find_git_root(path);
189    let found = path
190        .ancestors()
191        .take_while(|p| {
192            if let Some(boundary) = &boundary {
193                p.starts_with(boundary)
194            } else {
195                true
196            }
197        })
198        .find(|p| p.join("foundry.toml").is_file())
199        .map(Path::to_path_buf);
200    found.or(boundary)
201}