Skip to main content

ad_editor/lsp/
capabilities.rs

1//! Capability based logic
2use crate::{
3    buffer::Buffer,
4    exec::{Addr, AddrBase},
5    lsp::Pos,
6};
7use lsp_types::{InitializeResult, Location, Position, PositionEncodingKind, ServerCapabilities};
8use tracing::warn;
9
10#[derive(Debug)]
11#[allow(dead_code)]
12pub(crate) struct Capabilities {
13    inner: ServerCapabilities,
14    pub(super) position_encoding: PositionEncoding,
15}
16
17impl Capabilities {
18    pub(crate) fn try_new(res: InitializeResult) -> Option<Self> {
19        let position_encoding = match &res.capabilities.position_encoding {
20            Some(p) if *p == PositionEncodingKind::UTF8 => PositionEncoding::Utf8,
21            Some(p) if *p == PositionEncodingKind::UTF16 => PositionEncoding::Utf16,
22            Some(p) if *p == PositionEncodingKind::UTF32 => PositionEncoding::Utf32,
23            None => PositionEncoding::Utf16, // see quote from the spec below
24
25            Some(p) => {
26                warn!(
27                    "LSP provided unknown position encoding: {p:?} {:?}",
28                    res.server_info
29                );
30                return None;
31            }
32        };
33
34        Some(Self {
35            inner: res.capabilities,
36            position_encoding,
37        })
38    }
39
40    pub(crate) fn as_pretty_json(&self) -> Option<String> {
41        serde_json::to_string_pretty(&self.inner).ok()
42    }
43}
44
45// NOTE: The LSP spec explicitly calls out needing to support \n, \r and \r\n line
46//       endings which ad doesn't do. Files using \r or \r\n will likely result in
47//       malformed positions.
48
49/// From the LSP 3.17 spec:
50///
51/// The position encodings supported by the client. Client and server
52/// have to agree on the same position encoding to ensure that offsets
53/// (e.g. character position in a line) are interpreted the same on both
54/// side.
55///
56/// To keep the protocol backwards compatible the following applies: if
57/// the value 'utf-16' is missing from the array of position encodings
58/// servers can assume that the client supports UTF-16. UTF-16 is
59/// therefore a mandatory encoding.
60///
61/// If omitted it defaults to ['utf-16'].
62///
63/// Implementation considerations: since the conversion from one encoding
64/// into another requires the content of the file / line the conversion
65/// is best done where the file is read which is usually on the server
66/// side.
67#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
68pub(crate) enum PositionEncoding {
69    /// Raw bytes
70    Utf8,
71    /// Javascript / MS
72    Utf16,
73    /// Unicode code points
74    Utf32,
75}
76
77impl PositionEncoding {
78    pub(crate) fn parse_lsp_position(&self, b: &Buffer, pos: Position) -> (usize, usize) {
79        let pos_line = pos.line as usize;
80        if pos_line > b.len_lines() - 1 {
81            warn!("LSP position out of bounds, clamping to EOF");
82            return (b.len_lines().saturating_sub(1), b.len_chars());
83        }
84
85        match self {
86            Self::Utf8 => {
87                let line_start = b.txt.line_to_byte(pos.line as usize);
88                let col = b.txt.chars_in_raw_range(
89                    b.txt.byte_to_raw_byte(line_start),
90                    b.txt.byte_to_raw_byte(line_start + pos.character as usize),
91                );
92
93                (pos.line as usize, col)
94            }
95
96            Self::Utf16 => {
97                let slice = b.txt.line(pos.line as usize);
98                let mut remaining = pos.character as usize;
99                let mut col = 0;
100                for ch in slice.chars() {
101                    if remaining == 0 {
102                        break;
103                    }
104                    remaining = remaining.saturating_sub(ch.len_utf16());
105                    col += 1;
106                }
107                if remaining > 0 {
108                    col = slice.chars().count(); // clamp to EOL
109                }
110
111                (pos.line as usize, col)
112            }
113
114            Self::Utf32 => (pos.line as usize, pos.character as usize),
115        }
116    }
117
118    pub(super) fn buffer_pos(&self, b: &Buffer) -> Pos {
119        let file = b.full_name();
120        let (y, x) = b.dot.active_cur().as_yx(b);
121        let (line, character) = self.lsp_position(b, y, x);
122
123        Pos::new(file, line, character)
124    }
125
126    fn lsp_position(&self, b: &Buffer, line: usize, col: usize) -> (u32, u32) {
127        match self {
128            Self::Utf8 => {
129                let line_start = b.txt.line_to_char(line);
130                let start_idx = b.txt.char_to_byte(line_start);
131                let character = b.txt.char_to_byte(line_start + col) - start_idx;
132
133                (line as u32, character as u32)
134            }
135
136            Self::Utf16 => {
137                let slice = b.txt.line(line);
138                let mut character = 0;
139                for ch in slice.chars().take(col) {
140                    character += ch.len_utf16();
141                }
142
143                (line as u32, character as u32)
144            }
145
146            Self::Utf32 => (line as u32, col as u32),
147        }
148    }
149}
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
152pub struct Coords {
153    pub(crate) start: Position,
154    pub(crate) end: Position,
155    pub(crate) encoding: PositionEncoding,
156}
157
158impl Coords {
159    pub(crate) fn new(loc: Location, encoding: PositionEncoding) -> (String, Self) {
160        let filepath = loc
161            .uri
162            .to_string()
163            .strip_prefix("file://")
164            .unwrap()
165            .to_owned();
166
167        let coords = Coords {
168            start: loc.range.start,
169            end: loc.range.end,
170            encoding,
171        };
172
173        (filepath, coords)
174    }
175
176    pub(crate) fn new_from_range(r: lsp_types::Range, encoding: PositionEncoding) -> Self {
177        Coords {
178            start: r.start,
179            end: r.end,
180            encoding,
181        }
182    }
183
184    pub(crate) fn new_from_pos(pos: Pos, encoding: PositionEncoding) -> Self {
185        Coords {
186            start: lsp_types::Position::new(pos.line, pos.character),
187            end: lsp_types::Position::new(pos.line, pos.character),
188            encoding,
189        }
190    }
191
192    pub fn line(&self) -> u32 {
193        self.start.line
194    }
195
196    pub(crate) fn as_addr(&self, b: &Buffer) -> Addr {
197        let (row_start, col_start) = self.encoding.parse_lsp_position(b, self.start);
198        let (mut row_end, mut col_end) = self.encoding.parse_lsp_position(b, self.end);
199
200        if (row_start, col_start) == (row_end, col_end) {
201            // LSP insert at this position within the buffer
202            Addr::Simple(AddrBase::LineAndColumn(row_start, col_start).into())
203        } else if row_start == row_end && col_end == col_start + 1 {
204            // LSP delete of a single character
205            Addr::Compound(
206                AddrBase::LineAndColumn(row_start, col_start).into(),
207                AddrBase::LineAndColumn(row_start, col_start).into(),
208            )
209        } else {
210            // From the LSP spec on Ranges:
211            //   https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#range
212            //
213            // "If you want to specify a range that contains a line including the line ending
214            // character(s) then use an end position denoting the start of the next line."
215            //
216            // This idea of targeting the line ending character(s) by specifying the first
217            // character of the following line doesn't seem self consistent given that the range is
218            // inclusive? Not sure how an LSP server is supposed to genuinely target the first
219            // character of a given line then...
220            // With that in mind, we need to check for this case and filter for when the line is
221            // actually a blank line otherwise removing full lines doesn't work.
222            if col_end == 0 && !b.txt.line_is_blank(row_end) {
223                row_end = row_end.saturating_sub(1);
224                col_end = b.txt.line(row_end).chars().count();
225            }
226
227            Addr::Compound(
228                AddrBase::LineAndColumn(row_start, col_start).into(),
229                AddrBase::LineAndColumn(row_end, col_end.saturating_sub(1)).into(),
230            )
231        }
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use lsp_types::Position;
239    use simple_test_case::test_case;
240
241    // LSP positions are _between_ characters (like a cursor).
242    // Position 0 = before first char, position N = after Nth char.
243    //
244    // For ASCII text, UTF-16 positions equal character indices.
245    //
246    // Multi-byte UTF-16: emoji like 😀 uses 2 UTF-16 code units (surrogate pair)
247    // String "a😀b" is 3 chars but 4 UTF-16 units: a(1) + 😀(2) + b(1)
248
249    #[test_case("hello", 0, 0; "ascii position 0")]
250    #[test_case("hello", 1, 1; "ascii position 1")]
251    #[test_case("hello", 5, 5; "ascii position 5")]
252    #[test_case("a😀b", 0, 0; "emoji position 0")]
253    #[test_case("a😀b", 1, 1; "emoji position 1 after a before emoji")]
254    #[test_case("a😀b", 3, 2; "emoji position 3 after emoji before b")]
255    #[test_case("a😀b", 4, 3; "emoji position 4 after b")]
256    #[test]
257    fn parse_lsp_position_utf16_ascii(content: &str, lsp_char: u32, expected_col: usize) {
258        let b = Buffer::new_virtual(0, "test", content, Default::default());
259        let pos = Position {
260            line: 0,
261            character: lsp_char,
262        };
263
264        let (line, col) = PositionEncoding::Utf16.parse_lsp_position(&b, pos);
265
266        assert_eq!(line, 0);
267        assert_eq!(col, expected_col);
268    }
269}