use crate::error::{Error, Result, ResultExt};
use log::debug;
use ropey::Rope;
use std::fmt::Display;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Position {
line: u32,
character: u32,
}
impl Position {
pub const ZERO: Self = Self {
line: 0,
character: 0,
};
pub const MAX: Self = Self {
line: u32::MAX,
character: u32::MAX,
};
pub fn new(line: u32, character: u32) -> Self {
Self { line, character }
}
pub fn from_lsp(
pos: tower_lsp::lsp_types::Position,
rope: &Rope,
encoding: &tower_lsp::lsp_types::PositionEncodingKind,
) -> Result<Self> {
let character = match encoding.as_str() {
"utf-8" => pos.character,
"utf-16" => {
#[allow(clippy::cast_possible_truncation)]
let i = utf16_offset_to_utf8_offset(
rope.get_line(pos.line as usize)
.ok_or(Error::PositionOutOfBoundsTowerLsp(pos))?
.to_string()
.as_str(),
pos.character as usize,
)
.ok_or(Error::PositionOutOfBoundsTowerLsp(pos))? as u32;
debug!(
"Converting utf-16 ({}) index into utf-8 ({})",
pos.character, i
);
i
}
_ => todo!(),
};
Ok(Self {
line: pos.line,
character,
})
}
pub fn into_lsp(
self,
rope: &Rope,
encoding: &tower_lsp::lsp_types::PositionEncodingKind,
) -> Result<tower_lsp::lsp_types::Position> {
let character = match encoding.as_str() {
"utf-8" => self.character,
"utf-16" => {
#[allow(clippy::cast_possible_truncation)]
let i = utf8_offset_to_utf16_offset(
rope.get_line(self.line as usize)
.ok_or(Error::PositionOutOfBounds(self))?
.to_string()
.as_str(),
self.character as usize,
)
.ok_or(Error::PositionOutOfBounds(self))? as u32;
i
}
_ => todo!(),
};
Ok(tower_lsp::lsp_types::Position {
line: self.line,
character,
})
}
pub fn new_from_byte_index(rope: &Rope, index: usize) -> Self {
let line = rope.try_byte_to_line(index).unwrap_or(rope.len_lines() - 1);
let character = index - rope.line_to_byte(line);
#[allow(clippy::cast_possible_truncation)]
Self {
line: line as u32,
character: character as u32,
}
}
pub fn line(self) -> u32 {
self.line
}
pub fn character_byte(self) -> u32 {
self.character
}
pub fn byte_index(self, rope: &Rope) -> usize {
rope.try_line_to_byte(self.line as usize)
.map_err(std::convert::Into::into)
.inspect_log()
.unwrap_or(rope.line_to_byte(rope.len_lines() - 1))
+ self.character as usize
}
pub fn char_index(self, rope: &Rope) -> usize {
rope.try_byte_to_char(self.byte_index(rope))
.map_err(std::convert::Into::into)
.inspect_log()
.unwrap_or(rope.len_chars())
}
pub fn moved_right(self, char_offset: u32, rope: &Rope) -> Self {
let char_idx = self.char_index(rope);
let char_idx = char_idx.saturating_add(char_offset as usize);
let char_idx = char_idx.min(rope.len_chars().saturating_sub(1)); Self::new_from_byte_index(rope, rope.char_to_byte(char_idx))
}
pub fn moved_left(self, char_offset: u32, rope: &Rope) -> Self {
let char_idx = self.char_index(rope);
let char_idx = char_idx.saturating_sub(char_offset as usize);
Self::new_from_byte_index(rope, rope.char_to_byte(char_idx))
}
}
impl Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{} (utf-8)", self.line, self.character)
}
}
fn utf16_offset_to_utf8_offset(s: &str, utf16_offset: usize) -> Option<usize> {
let mut utf16_count = 0;
let mut utf8_offset = 0;
for ch in s.chars() {
if utf16_count == utf16_offset {
return Some(utf8_offset);
}
utf16_count += ch.len_utf16();
utf8_offset += ch.len_utf8();
}
if utf16_count == utf16_offset {
Some(utf8_offset)
} else {
None }
}
fn utf8_offset_to_utf16_offset(s: &str, utf8_offset: usize) -> Option<usize> {
let mut utf8_count = 0;
let mut utf16_offset = 0;
for ch in s.chars() {
if utf8_count == utf8_offset {
return Some(utf16_offset);
}
utf16_offset += ch.len_utf16();
utf8_count += ch.len_utf8();
}
if utf8_count == utf8_offset {
Some(utf16_offset)
} else {
None }
}
impl From<tree_sitter_c2rust::Point> for Position {
fn from(value: tree_sitter_c2rust::Point) -> Self {
#[allow(clippy::cast_possible_truncation)]
Position {
line: value.row as u32,
character: value.column as u32,
}
}
}
impl From<Position> for tree_sitter_c2rust::Point {
fn from(value: Position) -> tree_sitter_c2rust::Point {
tree_sitter_c2rust::Point {
row: value.line as usize,
column: value.character as usize,
}
}
}
impl PartialOrd for Position {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Position {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.line
.cmp(&other.line)
.then(self.character.cmp(&other.character))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ascii_only() {
let s = "hello";
assert_eq!(utf16_offset_to_utf8_offset(s, 0), Some(0));
assert_eq!(utf16_offset_to_utf8_offset(s, 3), Some(3));
assert_eq!(utf16_offset_to_utf8_offset(s, 5), Some(5));
assert_eq!(utf16_offset_to_utf8_offset(s, 6), None);
assert_eq!(utf8_offset_to_utf16_offset(s, 0), Some(0));
assert_eq!(utf8_offset_to_utf16_offset(s, 3), Some(3));
assert_eq!(utf8_offset_to_utf16_offset(s, 5), Some(5));
assert_eq!(utf8_offset_to_utf16_offset(s, 6), None);
}
#[test]
fn test_with_emoji() {
let s = "a🌍b";
assert_eq!(utf16_offset_to_utf8_offset(s, 0), Some(0)); assert_eq!(utf16_offset_to_utf8_offset(s, 1), Some(1)); assert_eq!(utf16_offset_to_utf8_offset(s, 3), Some(5)); assert_eq!(utf16_offset_to_utf8_offset(s, 4), Some(6));
assert_eq!(utf8_offset_to_utf16_offset(s, 0), Some(0)); assert_eq!(utf8_offset_to_utf16_offset(s, 1), Some(1)); assert_eq!(utf8_offset_to_utf16_offset(s, 2), None); assert_eq!(utf8_offset_to_utf16_offset(s, 3), None); assert_eq!(utf8_offset_to_utf16_offset(s, 4), None); assert_eq!(utf8_offset_to_utf16_offset(s, 5), Some(3)); assert_eq!(utf8_offset_to_utf16_offset(s, 6), Some(4)); }
#[test]
fn test_empty_string() {
let s = "";
assert_eq!(utf16_offset_to_utf8_offset(s, 0), Some(0));
assert_eq!(utf16_offset_to_utf8_offset(s, 1), None);
assert_eq!(utf8_offset_to_utf16_offset(s, 0), Some(0));
assert_eq!(utf8_offset_to_utf16_offset(s, 1), None);
}
#[test]
fn test_round_trip_conversion() {
let s = "Hello 🌍 World! 🦀";
for utf16_offset in 0..=s.chars().map(char::len_utf16).sum::<usize>() {
if let Some(utf8_offset) = utf16_offset_to_utf8_offset(s, utf16_offset) {
assert_eq!(
utf8_offset_to_utf16_offset(s, utf8_offset),
Some(utf16_offset)
);
}
}
}
}