1use lsp_types::Position;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum PositionEncoding {
11 #[default]
13 Utf8,
14 Utf16,
16 Utf32,
18}
19
20impl PositionEncoding {
21 #[must_use]
23 pub fn from_lsp(kind: &str) -> Option<Self> {
24 match kind {
25 "utf-8" => Some(Self::Utf8),
26 "utf-16" => Some(Self::Utf16),
27 "utf-32" => Some(Self::Utf32),
28 _ => None,
29 }
30 }
31
32 #[must_use]
34 pub const fn to_lsp(&self) -> &'static str {
35 match self {
36 Self::Utf8 => "utf-8",
37 Self::Utf16 => "utf-16",
38 Self::Utf32 => "utf-32",
39 }
40 }
41}
42
43#[must_use]
48pub const fn mcp_to_lsp_position(line: u32, character: u32) -> Position {
49 Position {
50 line: line.saturating_sub(1),
51 character: character.saturating_sub(1),
52 }
53}
54
55#[must_use]
57pub const fn lsp_to_mcp_position(pos: Position) -> (u32, u32) {
58 (pos.line + 1, pos.character + 1)
59}
60
61#[derive(Debug, Clone)]
67pub struct EncodingConverter {
68 encoding: PositionEncoding,
69}
70
71#[allow(dead_code)] impl EncodingConverter {
73 #[must_use]
75 pub const fn new(encoding: PositionEncoding) -> Self {
76 Self { encoding }
77 }
78
79 #[allow(clippy::cast_possible_truncation)] pub fn byte_offset_to_character(&self, text: &str, byte_offset: usize) -> Result<u32, String> {
88 if byte_offset > text.len() {
89 let text_len = text.len();
90 return Err(format!(
91 "Byte offset {byte_offset} exceeds text length {text_len}"
92 ));
93 }
94
95 match self.encoding {
96 PositionEncoding::Utf8 => Ok(byte_offset as u32),
97 PositionEncoding::Utf16 => {
98 let utf16_units = text[..byte_offset].encode_utf16().count();
99 Ok(utf16_units as u32)
100 }
101 PositionEncoding::Utf32 => {
102 let code_points = text[..byte_offset].chars().count();
103 Ok(code_points as u32)
104 }
105 }
106 }
107
108 #[allow(clippy::cast_possible_truncation)] pub fn character_to_byte_offset(
117 &self,
118 text: &str,
119 character_offset: u32,
120 ) -> Result<usize, String> {
121 match self.encoding {
122 PositionEncoding::Utf8 => {
123 let byte_offset = character_offset as usize;
124 if byte_offset > text.len() {
125 let text_len = text.len();
126 return Err(format!(
127 "Character offset {character_offset} exceeds text length {text_len}"
128 ));
129 }
130 Ok(byte_offset)
131 }
132 PositionEncoding::Utf16 => {
133 let mut utf16_count = 0u32;
134 for (byte_idx, ch) in text.char_indices() {
135 if utf16_count >= character_offset {
136 return Ok(byte_idx);
137 }
138 utf16_count += ch.len_utf16() as u32;
139 }
140 if utf16_count == character_offset {
141 Ok(text.len())
142 } else {
143 Err(format!(
144 "Character offset {character_offset} out of bounds (max UTF-16 units: {utf16_count})"
145 ))
146 }
147 }
148 PositionEncoding::Utf32 => text
149 .char_indices()
150 .nth(character_offset as usize)
151 .map(|(byte_idx, _)| byte_idx)
152 .or_else(|| {
153 if character_offset == text.chars().count() as u32 {
154 Some(text.len())
155 } else {
156 None
157 }
158 })
159 .ok_or_else(|| {
160 let max_code_points = text.chars().count();
161 format!(
162 "Character offset {character_offset} out of bounds (max code points: {max_code_points})"
163 )
164 }),
165 }
166 }
167}
168
169#[cfg(test)]
170#[allow(clippy::unwrap_used)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn test_mcp_to_lsp_position() {
176 let lsp_pos = mcp_to_lsp_position(1, 1);
177 assert_eq!(lsp_pos.line, 0);
178 assert_eq!(lsp_pos.character, 0);
179
180 let lsp_pos = mcp_to_lsp_position(10, 5);
181 assert_eq!(lsp_pos.line, 9);
182 assert_eq!(lsp_pos.character, 4);
183 }
184
185 #[test]
186 fn test_lsp_to_mcp_position() {
187 let (line, char) = lsp_to_mcp_position(Position {
188 line: 0,
189 character: 0,
190 });
191 assert_eq!(line, 1);
192 assert_eq!(char, 1);
193
194 let (line, char) = lsp_to_mcp_position(Position {
195 line: 9,
196 character: 4,
197 });
198 assert_eq!(line, 10);
199 assert_eq!(char, 5);
200 }
201
202 #[test]
203 fn test_roundtrip() {
204 for line in 1..100 {
205 for char in 1..100 {
206 let lsp_pos = mcp_to_lsp_position(line, char);
207 let (mcp_line, mcp_char) = lsp_to_mcp_position(lsp_pos);
208 assert_eq!(line, mcp_line);
209 assert_eq!(char, mcp_char);
210 }
211 }
212 }
213
214 #[test]
215 fn test_saturating_sub_zero() {
216 let lsp_pos = mcp_to_lsp_position(0, 0);
218 assert_eq!(lsp_pos.line, 0);
219 assert_eq!(lsp_pos.character, 0);
220 }
221
222 #[test]
223 fn test_position_encoding_parsing() {
224 assert_eq!(
225 PositionEncoding::from_lsp("utf-8"),
226 Some(PositionEncoding::Utf8)
227 );
228 assert_eq!(
229 PositionEncoding::from_lsp("utf-16"),
230 Some(PositionEncoding::Utf16)
231 );
232 assert_eq!(
233 PositionEncoding::from_lsp("utf-32"),
234 Some(PositionEncoding::Utf32)
235 );
236 assert_eq!(PositionEncoding::from_lsp("invalid"), None);
237 }
238
239 #[test]
240 fn test_utf8_encoding() {
241 let converter = EncodingConverter::new(PositionEncoding::Utf8);
242 let text = "Hello, world!";
243
244 let char_offset = converter.byte_offset_to_character(text, 7).unwrap();
245 assert_eq!(char_offset, 7);
246
247 let byte_offset = converter.character_to_byte_offset(text, 7).unwrap();
248 assert_eq!(byte_offset, 7);
249 }
250
251 #[test]
252 fn test_utf16_encoding_with_emoji() {
253 let converter = EncodingConverter::new(PositionEncoding::Utf16);
254 let text = "Hello 😀 world";
255
256 let char_offset = converter.byte_offset_to_character(text, 6).unwrap();
257 assert_eq!(char_offset, 6);
258
259 let char_offset = converter.byte_offset_to_character(text, 10).unwrap();
260 assert_eq!(char_offset, 8);
261
262 let byte_offset = converter.character_to_byte_offset(text, 6).unwrap();
263 assert_eq!(byte_offset, 6);
264
265 let byte_offset = converter.character_to_byte_offset(text, 8).unwrap();
266 assert_eq!(byte_offset, 10);
267 }
268
269 #[test]
270 fn test_utf16_encoding_roundtrip() {
271 let converter = EncodingConverter::new(PositionEncoding::Utf16);
272 let text = "Hello 🌍 world!";
273
274 for byte_idx in [0, 6, 10, 11] {
275 let char_offset = converter.byte_offset_to_character(text, byte_idx).unwrap();
276 let back_to_byte = converter
277 .character_to_byte_offset(text, char_offset)
278 .unwrap();
279 assert_eq!(byte_idx, back_to_byte);
280 }
281 }
282
283 #[test]
284 fn test_utf32_encoding() {
285 let converter = EncodingConverter::new(PositionEncoding::Utf32);
286 let text = "Hello 😀 world";
287
288 let char_offset = converter.byte_offset_to_character(text, 6).unwrap();
289 assert_eq!(char_offset, 6);
290
291 let char_offset = converter.byte_offset_to_character(text, 10).unwrap();
292 assert_eq!(char_offset, 7);
293
294 let byte_offset = converter.character_to_byte_offset(text, 7).unwrap();
295 assert_eq!(byte_offset, 10);
296 }
297
298 #[test]
299 fn test_encoding_edge_cases() {
300 let converter = EncodingConverter::new(PositionEncoding::Utf8);
301
302 assert!(converter.byte_offset_to_character("test", 100).is_err());
303 assert!(converter.character_to_byte_offset("test", 100).is_err());
304
305 let end_offset = converter.byte_offset_to_character("test", 4).unwrap();
306 assert_eq!(end_offset, 4);
307 }
308}