hx_lsp/
encoding.rs

1use async_lsp::lsp_types::{Position, Range, TextDocumentContentChangeEvent};
2use ropey::{Rope, RopeSlice};
3use tracing::warn;
4
5use crate::errors::Error;
6
7// 参考
8// helix-lsp/src/lib.rs
9// https://gist.github.com/rojas-diego/04d9c4e3fff5f8374f29b9b738d541ef
10
11#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
12pub enum OffsetEncoding {
13    /// UTF-8 code units aka bytes
14    Utf8,
15    /// UTF-16 code units
16    #[default]
17    Utf16,
18    /// UTF-32 code units aka chars
19    Utf32,
20}
21
22/// Converts LSP Position to a position in the document.
23///
24/// Returns `None` if position.line is out of bounds or an overflow occurs
25pub fn lsp_pos_to_pos(
26    doc: &Rope,
27    pos: Position,
28    offset_encoding: OffsetEncoding,
29) -> Result<usize, Error> {
30    let pos_line = pos.line as usize;
31    if pos_line > doc.len_lines() - 1 {
32        warn!("LSP position {pos:?} out of range assuming EOF");
33        return Err(Error::PositionOutOfBounds(pos.line, pos.character));
34    }
35
36    let slice = match doc.get_line(pos.line as usize) {
37        Some(line) => line,
38        None => return Err(Error::PositionOutOfBounds(pos.line, pos.character)),
39    };
40
41    match offset_encoding {
42        OffsetEncoding::Utf8 => slice.try_byte_to_char(pos.character as usize),
43        OffsetEncoding::Utf16 => slice.try_utf16_cu_to_char(pos.character as usize),
44        OffsetEncoding::Utf32 => Ok(pos.character as usize),
45    }
46    .map(|p| p + doc.line_to_char(pos.line as usize))
47    .map_err(|_| Error::PositionOutOfBounds(pos.line, pos.character))
48}
49
50/// 增量变更文本
51pub fn apply_content_change(
52    doc: &mut Rope,
53    change: &TextDocumentContentChangeEvent,
54) -> Result<(), Error> {
55    let offset_encoding = OffsetEncoding::Utf16;
56    match change.range {
57        Some(range) => {
58            assert!(
59                range.start.line < range.end.line
60                    || (range.start.line == range.end.line
61                        && range.start.character <= range.end.character)
62            );
63
64            // 获取 line 中的索引
65            let change_start_doc_char_idx =
66                lsp_pos_to_pos(doc, range.start, offset_encoding).unwrap();
67            let change_end_doc_char_idx = match range.start == range.end {
68                true => change_start_doc_char_idx,
69                false => lsp_pos_to_pos(doc, range.end, offset_encoding).unwrap(),
70            };
71
72            // 移除区域并插入新的文本
73            doc.remove(change_start_doc_char_idx..change_end_doc_char_idx);
74            doc.insert(change_start_doc_char_idx, &change.text);
75        }
76        None => {
77            *doc = Rope::from_str(&change.text);
78        }
79    }
80    Ok(())
81}
82
83//  If input as field or attribute return true.
84pub fn is_field(line: &RopeSlice, line_character_pos: usize) -> bool {
85    if line_character_pos == 0 || line_character_pos > line.len_chars() {
86        return false;
87    }
88
89    let mut after_punctuation = false;
90    let _offset = line
91        .chars_at(line_character_pos)
92        .reversed()
93        .take_while(|&ch| {
94            if char_is_punctuation(ch) {
95                after_punctuation = true;
96                return true;
97            }
98            char_is_word(ch)
99        })
100        .count();
101
102    after_punctuation
103}
104
105pub fn get_current_word<'a>(line: &'a RopeSlice, line_character_pos: usize) -> Option<&'a str> {
106    if line_character_pos == 0 || line_character_pos > line.len_chars() {
107        return None;
108    }
109
110    let offset_sub = line
111        .chars_at(line_character_pos)
112        .reversed()
113        .take_while(|&ch| char_is_word(ch))
114        .count();
115
116    let offset_add = line
117        .chars_at(line_character_pos)
118        .take_while(|&ch| char_is_word(ch))
119        .count();
120
121    if offset_sub == 0 && offset_add == 0 {
122        return None;
123    }
124
125    line.slice(
126        line_character_pos.saturating_sub(offset_sub)
127            ..line_character_pos.saturating_add(offset_add),
128    )
129    .as_str()
130}
131
132/// 获取内容
133pub fn get_range_content<'a>(doc: &'a Rope, range: &Range) -> Option<RopeSlice<'a>> {
134    let offset_encoding = OffsetEncoding::Utf16;
135    if range.start > range.end {
136        return None;
137    }
138
139    let start_idx = lsp_pos_to_pos(doc, range.start, offset_encoding).unwrap();
140    let end_idx = match range.start == range.end {
141        true => start_idx,
142        false => lsp_pos_to_pos(doc, range.end, offset_encoding).unwrap(),
143    };
144    let s = doc.slice(start_idx..end_idx);
145    Some(s)
146}
147
148#[inline]
149pub fn char_is_punctuation(ch: char) -> bool {
150    use unicode_general_category::{GeneralCategory, get_general_category};
151
152    matches!(
153        get_general_category(ch),
154        GeneralCategory::OtherPunctuation
155            | GeneralCategory::OpenPunctuation
156            | GeneralCategory::ClosePunctuation
157            | GeneralCategory::InitialPunctuation
158            | GeneralCategory::FinalPunctuation
159            | GeneralCategory::ConnectorPunctuation
160            | GeneralCategory::DashPunctuation
161            | GeneralCategory::MathSymbol
162            | GeneralCategory::CurrencySymbol
163            | GeneralCategory::ModifierSymbol
164    )
165}
166
167#[inline]
168pub fn char_is_word(ch: char) -> bool {
169    ch.is_alphanumeric() || ch == '_'
170}
171
172#[cfg(test)]
173mod test {
174
175    use async_lsp::lsp_types::{Position, Range};
176    use ropey::Rope;
177
178    use crate::encoding::{char_is_punctuation, get_range_content};
179
180    use super::get_current_word;
181
182    #[test]
183    fn test_get_range_content() {
184        let cases = [
185            ("你好世界", (0, 0, 0, 2), "你好"),
186            ("你好世界", (0, 2, 0, 4), "世界"),
187        ];
188
189        for (input, range, expected) in cases {
190            let result = get_range_content(
191                &Rope::from_str(input),
192                &Range::new(
193                    Position::new(range.0, range.1),
194                    Position::new(range.2, range.3),
195                ),
196            )
197            .map(|f| f.to_string())
198            .unwrap_or_default();
199            assert_eq!(result, expected, "{input}:\n {result} != {expected}")
200        }
201    }
202
203    #[test]
204    fn test_get_last() {
205        let line = ropey::RopeSlice::from("abcd ef1h");
206        let word = get_current_word(&line, 7);
207        assert_eq!(Some("ef1h"), word);
208    }
209
210    #[test]
211    fn test_pun() {
212        assert!(char_is_punctuation(':'));
213    }
214}