use crate::WirePosition as Position;
use ropey::Rope;
use serde_json::Value;
pub struct PositionMapper {
rope: Rope,
line_ending: LineEnding,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LineEnding {
Lf,
CrLf,
Cr,
Mixed,
}
impl PositionMapper {
pub fn new(text: &str) -> Self {
let rope = Rope::from_str(text);
let line_ending = detect_line_ending(text);
Self { rope, line_ending }
}
pub fn update(&mut self, text: &str) {
self.rope = Rope::from_str(text);
self.line_ending = detect_line_ending(text);
}
pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
let start_byte = start_byte.min(self.rope.len_bytes());
let end_byte = end_byte.min(self.rope.len_bytes());
let start_char = self.rope.byte_to_char(start_byte);
let end_char = self.rope.byte_to_char(end_byte);
if end_char > start_char {
self.rope.remove(start_char..end_char);
}
if !new_text.is_empty() {
self.rope.insert(start_char, new_text);
}
self.line_ending = detect_line_ending(&self.rope.to_string());
}
pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
let line_idx = pos.line as usize;
if line_idx >= self.rope.len_lines() {
return None;
}
let line_start_byte = self.rope.line_to_byte(line_idx);
let line = self.rope.line(line_idx);
let mut utf16_offset = 0u32;
let mut byte_offset = 0;
for ch in line.chars() {
if utf16_offset >= pos.character {
break;
}
let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
let next_utf16 = utf16_offset + ch_utf16_len;
if next_utf16 > pos.character {
break;
}
utf16_offset = next_utf16;
byte_offset += ch.len_utf8();
}
Some(line_start_byte + byte_offset)
}
pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
let byte_offset = byte_offset.min(self.rope.len_bytes());
let line_idx = self.rope.byte_to_line(byte_offset);
let line_start_byte = self.rope.line_to_byte(line_idx);
let byte_in_line = byte_offset - line_start_byte;
let line = self.rope.line(line_idx);
let mut utf16_offset = 0u32;
let mut current_byte = 0;
for ch in line.chars() {
if current_byte >= byte_in_line {
break;
}
let ch_len = ch.len_utf8();
if current_byte + ch_len > byte_in_line {
break;
}
current_byte += ch_len;
let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
utf16_offset += ch_utf16_len;
}
Position { line: line_idx as u32, character: utf16_offset }
}
pub fn text(&self) -> String {
self.rope.to_string()
}
pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
let start = start_byte.min(self.rope.len_bytes());
let end = end_byte.min(self.rope.len_bytes());
self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
}
pub fn len_bytes(&self) -> usize {
self.rope.len_bytes()
}
pub fn len_lines(&self) -> usize {
self.rope.len_lines()
}
pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
}
pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
let byte_offset = self.rope.char_to_byte(char_idx);
self.byte_to_lsp_pos(byte_offset)
}
pub fn is_empty(&self) -> bool {
self.rope.len_bytes() == 0
}
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
}
pub fn json_to_position(pos: &Value) -> Option<Position> {
Some(Position {
line: pos["line"].as_u64()? as u32,
character: pos["character"].as_u64()? as u32,
})
}
pub fn position_to_json(pos: Position) -> Value {
serde_json::json!({
"line": pos.line,
"character": pos.character
})
}
fn detect_line_ending(text: &str) -> LineEnding {
let mut crlf_count = 0;
let mut lf_count = 0;
let mut cr_count = 0;
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
crlf_count += 1;
i += 2;
} else if bytes[i] == b'\n' {
lf_count += 1;
i += 1;
} else if bytes[i] == b'\r' {
cr_count += 1;
i += 1;
} else {
i += 1;
}
}
if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
LineEnding::CrLf
} else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
LineEnding::Lf
} else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
LineEnding::Cr
} else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
LineEnding::Mixed
} else {
LineEnding::Lf }
}
pub fn apply_edit_utf8(
text: &mut String,
start_byte: usize,
old_end_byte: usize,
replacement: &str,
) {
if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
return;
}
text.replace_range(start_byte..old_end_byte, replacement);
}
pub fn newline_count(text: &str) -> usize {
text.chars().filter(|&c| c == '\n').count()
}
pub fn last_line_column_utf8(text: &str) -> u32 {
if let Some(last_newline) = text.rfind('\n') {
(text.len() - last_newline - 1) as u32
} else {
text.len() as u32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lf_positions() {
let text = "line 1\nline 2\nline 3";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
}
#[test]
fn test_crlf_positions() {
let text = "line 1\r\nline 2\r\nline 3";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.line_ending(), LineEnding::CrLf);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
}
#[test]
fn test_utf16_positions() {
let text = "hello 😀 world"; let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10));
assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
}
#[test]
fn test_utf16_positions_clamp_mid_surrogate_to_char_start() {
let text = "a😀b";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
}
#[test]
fn test_utf16_surrogate_pair_boundaries() {
let text = "x💖y";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
}
#[test]
fn test_utf16_max_code_point() {
let max_char = '\u{10FFFF}';
let text = format!("a{max_char}b");
let mapper = PositionMapper::new(&text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 });
assert_eq!(mapper.byte_to_lsp_pos(5), Position { line: 0, character: 3 });
assert_eq!(mapper.byte_to_lsp_pos(6), Position { line: 0, character: 4 });
}
#[test]
fn test_utf16_mixed_bmp_and_supplementary_plane() {
let text = "aé💖ñ🎉b";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(7)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 7 }), Some(13)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(14)); }
#[test]
fn test_utf16_zero_length_input() {
let text = "";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(0));
assert!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }).is_none());
assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
}
#[test]
fn test_utf16_consecutive_surrogate_pairs() {
let text = "💖💖";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(0));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(4));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(4));
assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(8));
}
#[test]
fn test_utf16_clamp_matches_convert_helper() {
use crate::convert::utf16_line_col_to_offset;
let text = "a😀b💖c\nx💡y";
let mapper = PositionMapper::new(text);
for col in 0..=7 {
let mapper_byte =
mapper.lsp_pos_to_byte(Position { line: 0, character: col }).unwrap_or(usize::MAX);
let helper_byte = utf16_line_col_to_offset(text, 0, col);
assert_eq!(
mapper_byte, helper_byte,
"disagreement at line 0 col {col}: mapper={mapper_byte} helper={helper_byte}"
);
}
}
#[test]
fn test_mixed_line_endings() {
let text = "line 1\r\nline 2\nline 3\rline 4";
let mapper = PositionMapper::new(text);
assert_eq!(mapper.line_ending(), LineEnding::Mixed);
assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
}
#[test]
fn test_incremental_edit() {
let mut mapper = PositionMapper::new("hello world");
mapper.apply_edit(6, 11, "Rust");
assert_eq!(mapper.text(), "hello Rust");
mapper.apply_edit(5, 5, " beautiful");
assert_eq!(mapper.text(), "hello beautiful Rust");
mapper.apply_edit(5, 16, " ");
assert_eq!(mapper.text(), "hello Rust");
}
}