use lsp_types::Position;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum PositionEncoding {
#[default]
Utf8,
Utf16,
Utf32,
}
impl PositionEncoding {
#[must_use]
pub fn from_lsp(kind: &str) -> Option<Self> {
match kind {
"utf-8" => Some(Self::Utf8),
"utf-16" => Some(Self::Utf16),
"utf-32" => Some(Self::Utf32),
_ => None,
}
}
#[must_use]
pub const fn to_lsp(&self) -> &'static str {
match self {
Self::Utf8 => "utf-8",
Self::Utf16 => "utf-16",
Self::Utf32 => "utf-32",
}
}
}
#[must_use]
pub const fn mcp_to_lsp_position(line: u32, character: u32) -> Position {
Position {
line: line.saturating_sub(1),
character: character.saturating_sub(1),
}
}
#[must_use]
pub const fn lsp_to_mcp_position(pos: Position) -> (u32, u32) {
(pos.line + 1, pos.character + 1)
}
#[derive(Debug, Clone)]
pub struct EncodingConverter {
encoding: PositionEncoding,
}
#[allow(dead_code)] impl EncodingConverter {
#[must_use]
pub const fn new(encoding: PositionEncoding) -> Self {
Self { encoding }
}
#[allow(clippy::cast_possible_truncation)] pub fn byte_offset_to_character(&self, text: &str, byte_offset: usize) -> Result<u32, String> {
if byte_offset > text.len() {
let text_len = text.len();
return Err(format!(
"Byte offset {byte_offset} exceeds text length {text_len}"
));
}
match self.encoding {
PositionEncoding::Utf8 => Ok(byte_offset as u32),
PositionEncoding::Utf16 => {
let utf16_units = text[..byte_offset].encode_utf16().count();
Ok(utf16_units as u32)
}
PositionEncoding::Utf32 => {
let code_points = text[..byte_offset].chars().count();
Ok(code_points as u32)
}
}
}
#[allow(clippy::cast_possible_truncation)] pub fn character_to_byte_offset(
&self,
text: &str,
character_offset: u32,
) -> Result<usize, String> {
match self.encoding {
PositionEncoding::Utf8 => {
let byte_offset = character_offset as usize;
if byte_offset > text.len() {
let text_len = text.len();
return Err(format!(
"Character offset {character_offset} exceeds text length {text_len}"
));
}
Ok(byte_offset)
}
PositionEncoding::Utf16 => {
let mut utf16_count = 0u32;
for (byte_idx, ch) in text.char_indices() {
if utf16_count >= character_offset {
return Ok(byte_idx);
}
utf16_count += ch.len_utf16() as u32;
}
if utf16_count == character_offset {
Ok(text.len())
} else {
Err(format!(
"Character offset {character_offset} out of bounds (max UTF-16 units: {utf16_count})"
))
}
}
PositionEncoding::Utf32 => text
.char_indices()
.nth(character_offset as usize)
.map(|(byte_idx, _)| byte_idx)
.or_else(|| {
if character_offset == text.chars().count() as u32 {
Some(text.len())
} else {
None
}
})
.ok_or_else(|| {
let max_code_points = text.chars().count();
format!(
"Character offset {character_offset} out of bounds (max code points: {max_code_points})"
)
}),
}
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_mcp_to_lsp_position() {
let lsp_pos = mcp_to_lsp_position(1, 1);
assert_eq!(lsp_pos.line, 0);
assert_eq!(lsp_pos.character, 0);
let lsp_pos = mcp_to_lsp_position(10, 5);
assert_eq!(lsp_pos.line, 9);
assert_eq!(lsp_pos.character, 4);
}
#[test]
fn test_lsp_to_mcp_position() {
let (line, char) = lsp_to_mcp_position(Position {
line: 0,
character: 0,
});
assert_eq!(line, 1);
assert_eq!(char, 1);
let (line, char) = lsp_to_mcp_position(Position {
line: 9,
character: 4,
});
assert_eq!(line, 10);
assert_eq!(char, 5);
}
#[test]
fn test_roundtrip() {
for line in 1..100 {
for char in 1..100 {
let lsp_pos = mcp_to_lsp_position(line, char);
let (mcp_line, mcp_char) = lsp_to_mcp_position(lsp_pos);
assert_eq!(line, mcp_line);
assert_eq!(char, mcp_char);
}
}
}
#[test]
fn test_saturating_sub_zero() {
let lsp_pos = mcp_to_lsp_position(0, 0);
assert_eq!(lsp_pos.line, 0);
assert_eq!(lsp_pos.character, 0);
}
#[test]
fn test_position_encoding_parsing() {
assert_eq!(
PositionEncoding::from_lsp("utf-8"),
Some(PositionEncoding::Utf8)
);
assert_eq!(
PositionEncoding::from_lsp("utf-16"),
Some(PositionEncoding::Utf16)
);
assert_eq!(
PositionEncoding::from_lsp("utf-32"),
Some(PositionEncoding::Utf32)
);
assert_eq!(PositionEncoding::from_lsp("invalid"), None);
}
#[test]
fn test_utf8_encoding() {
let converter = EncodingConverter::new(PositionEncoding::Utf8);
let text = "Hello, world!";
let char_offset = converter.byte_offset_to_character(text, 7).unwrap();
assert_eq!(char_offset, 7);
let byte_offset = converter.character_to_byte_offset(text, 7).unwrap();
assert_eq!(byte_offset, 7);
}
#[test]
fn test_utf16_encoding_with_emoji() {
let converter = EncodingConverter::new(PositionEncoding::Utf16);
let text = "Hello 😀 world";
let char_offset = converter.byte_offset_to_character(text, 6).unwrap();
assert_eq!(char_offset, 6);
let char_offset = converter.byte_offset_to_character(text, 10).unwrap();
assert_eq!(char_offset, 8);
let byte_offset = converter.character_to_byte_offset(text, 6).unwrap();
assert_eq!(byte_offset, 6);
let byte_offset = converter.character_to_byte_offset(text, 8).unwrap();
assert_eq!(byte_offset, 10);
}
#[test]
fn test_utf16_encoding_roundtrip() {
let converter = EncodingConverter::new(PositionEncoding::Utf16);
let text = "Hello 🌍 world!";
for byte_idx in [0, 6, 10, 11] {
let char_offset = converter.byte_offset_to_character(text, byte_idx).unwrap();
let back_to_byte = converter
.character_to_byte_offset(text, char_offset)
.unwrap();
assert_eq!(byte_idx, back_to_byte);
}
}
#[test]
fn test_utf32_encoding() {
let converter = EncodingConverter::new(PositionEncoding::Utf32);
let text = "Hello 😀 world";
let char_offset = converter.byte_offset_to_character(text, 6).unwrap();
assert_eq!(char_offset, 6);
let char_offset = converter.byte_offset_to_character(text, 10).unwrap();
assert_eq!(char_offset, 7);
let byte_offset = converter.character_to_byte_offset(text, 7).unwrap();
assert_eq!(byte_offset, 10);
}
#[test]
fn test_encoding_edge_cases() {
let converter = EncodingConverter::new(PositionEncoding::Utf8);
assert!(converter.byte_offset_to_character("test", 100).is_err());
assert!(converter.character_to_byte_offset("test", 100).is_err());
let end_offset = converter.byte_offset_to_character("test", 4).unwrap();
assert_eq!(end_offset, 4);
}
}