use tower_lsp::lsp_types::{Position, Range};
pub(crate) fn utf16_offset_to_byte(s: &str, utf16_offset: usize) -> usize {
let mut utf16_count = 0usize;
for (byte_idx, ch) in s.char_indices() {
if utf16_count >= utf16_offset {
return byte_idx;
}
utf16_count += ch.len_utf16();
}
s.len()
}
pub(crate) fn position_to_byte_offset(text: &str, pos: Position) -> usize {
let mut line_start = 0usize;
for _ in 0..pos.line {
match text[line_start..].find('\n') {
Some(i) => line_start += i + 1,
None => return text.len(),
}
}
let line_end = text[line_start..]
.find('\n')
.map_or(text.len(), |i| line_start + i);
line_start + utf16_offset_to_byte(&text[line_start..line_end], pos.character as usize)
}
pub(crate) fn apply_content_change(text: &mut String, range: Range, new_text: &str) {
let start = position_to_byte_offset(text, range.start);
let end = position_to_byte_offset(text, range.end).max(start);
text.replace_range(start..end, new_text);
}
pub(crate) fn byte_to_utf16(s: &str, byte_offset: usize) -> u32 {
s[..byte_offset.min(s.len())]
.chars()
.map(|c| c.len_utf16() as u32)
.sum()
}
pub(crate) fn utf16_code_units(s: &str) -> u32 {
s.chars().map(|c| c.len_utf16() as u32).sum()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn byte_to_utf16_ascii() {
assert_eq!(byte_to_utf16("hello", 3), 3);
}
#[test]
fn byte_to_utf16_multibyte_bmp() {
let s = "café";
assert_eq!(byte_to_utf16(s, 0), 0);
assert_eq!(byte_to_utf16(s, 3), 3); assert_eq!(byte_to_utf16(s, 5), 4); }
#[test]
fn byte_to_utf16_surrogate_pair() {
let s = "a😀b";
assert_eq!(byte_to_utf16(s, 1), 1); assert_eq!(byte_to_utf16(s, 5), 3); assert_eq!(byte_to_utf16(s, 6), 4); }
#[test]
fn byte_to_utf16_past_end_clamps() {
assert_eq!(byte_to_utf16("hi", 100), 2);
}
#[test]
fn utf16_offset_to_byte_ascii() {
assert_eq!(utf16_offset_to_byte("hello", 3), 3);
}
#[test]
fn utf16_offset_to_byte_surrogate_pair() {
let s = "a😀b";
assert_eq!(utf16_offset_to_byte(s, 1), 1);
assert_eq!(utf16_offset_to_byte(s, 3), 5);
}
#[test]
fn position_to_byte_offset_basic() {
let s = "<?php\necho 1;\n";
let p = |line, character| Position { line, character };
assert_eq!(position_to_byte_offset(s, p(0, 0)), 0);
assert_eq!(position_to_byte_offset(s, p(0, 5)), 5);
assert_eq!(position_to_byte_offset(s, p(1, 0)), 6);
assert_eq!(position_to_byte_offset(s, p(1, 4)), 10);
assert_eq!(position_to_byte_offset(s, p(0, 99)), 5);
assert_eq!(position_to_byte_offset(s, p(9, 0)), s.len());
}
#[test]
fn position_to_byte_offset_multibyte() {
let s = "a😀b\nx";
let p = |line, character| Position { line, character };
assert_eq!(position_to_byte_offset(s, p(0, 1)), 1);
assert_eq!(position_to_byte_offset(s, p(0, 3)), 5);
assert_eq!(position_to_byte_offset(s, p(1, 0)), 7);
assert_eq!(position_to_byte_offset(s, p(1, 1)), 8);
}
#[test]
fn apply_content_change_replaces_inserts_deletes() {
let r = |sl, sc, el, ec| Range {
start: Position {
line: sl,
character: sc,
},
end: Position {
line: el,
character: ec,
},
};
let mut s = String::from("<?php\necho one;\n");
apply_content_change(&mut s, r(1, 5, 1, 8), "two");
assert_eq!(s, "<?php\necho two;\n");
let mut s = String::from("ab\ncd\n");
apply_content_change(&mut s, r(1, 1, 1, 1), "X");
assert_eq!(s, "ab\ncXd\n");
let mut s = String::from("ab\ncd\nef\n");
apply_content_change(&mut s, r(0, 2, 1, 0), "");
assert_eq!(s, "abcd\nef\n");
let mut s = String::from("abc");
apply_content_change(&mut s, r(0, 2, 0, 1), "X");
assert_eq!(s, "abXc");
}
#[test]
fn byte_to_utf16_and_back_roundtrip() {
let s = "café 😀 world";
for (byte_idx, _) in s.char_indices() {
let utf16 = byte_to_utf16(s, byte_idx) as usize;
assert_eq!(utf16_offset_to_byte(s, utf16), byte_idx);
}
}
}