1use crate::language::Language;
15use std::borrow::Cow;
16use std::ops::Range;
17use thiserror::Error;
18use tree_sitter::{
19 InputEdit, Language as TsLang, LanguageError, Node, Parser, ParserError, Point, Tree,
20};
21
22#[inline]
23fn parse_lang(
24 parse_fn: impl Fn(&mut Parser) -> Result<Option<Tree>, ParserError>,
25 ts_lang: TsLang,
26) -> Result<Tree, TSParseError> {
27 let mut parser = Parser::new()?;
28 parser.set_language(&ts_lang)?;
29 if let Some(tree) = parse_fn(&mut parser)? {
30 Ok(tree)
31 } else {
32 Err(TSParseError::TreeUnavailable)
33 }
34}
35
36#[derive(Debug)]
38pub struct Edit<S: Content> {
39 pub position: usize,
40 pub deleted_length: usize,
41 pub inserted_text: Vec<S::Underlying>,
42}
43
44fn position_for_offset(input: &[u8], offset: usize) -> Point {
45 debug_assert!(offset <= input.len());
46 let (mut row, mut col) = (0, 0);
47 for c in &input[0..offset] {
48 if *c as char == '\n' {
49 row += 1;
50 col = 0;
51 } else {
52 col += 1;
53 }
54 }
55 Point::new(row, col)
56}
57
58pub fn perform_edit<S: Content>(tree: &mut Tree, input: &mut S, edit: &Edit<S>) -> InputEdit {
59 let edit = input.accept_edit(edit);
60 tree.edit(&edit);
61 edit
62}
63
64#[derive(Debug, Error)]
66pub enum TSParseError {
67 #[error("web-tree-sitter parser is not available")]
68 Parse(#[from] ParserError),
69 #[error("incompatible `Language` is assigned to a `Parser`.")]
70 Language(#[from] LanguageError),
71 #[error("general error when tree-sitter fails to parse.")]
77 TreeUnavailable,
78}
79
80pub trait Doc: Clone {
81 type Source: Content;
82 type Lang: Language;
83 fn get_lang(&self) -> &Self::Lang;
84 fn get_source(&self) -> &Self::Source;
85 fn get_source_mut(&mut self) -> &mut Self::Source;
86 fn parse(&self, old_tree: Option<&Tree>) -> Result<Tree, TSParseError> {
87 let source = self.get_source();
88 let lang = self.get_lang().get_ts_language();
89 parse_lang(|p| source.parse_tree_sitter(p, old_tree), lang)
90 }
91 fn clone_with_lang(&self, lang: Self::Lang) -> Self;
92 fn from_str(src: &str, lang: Self::Lang) -> Self;
96}
97
98#[derive(Clone)]
99pub struct StrDoc<L: Language> {
100 pub src: String,
101 pub lang: L,
102}
103
104impl<L: Language> StrDoc<L> {
105 pub fn new(src: &str, lang: L) -> Self {
106 Self {
107 src: src.into(),
108 lang,
109 }
110 }
111}
112
113impl<L: Language> Doc for StrDoc<L> {
114 type Source = String;
115 type Lang = L;
116 fn get_lang(&self) -> &Self::Lang {
117 &self.lang
118 }
119 fn get_source(&self) -> &Self::Source {
120 &self.src
121 }
122 fn get_source_mut(&mut self) -> &mut Self::Source {
123 &mut self.src
124 }
125 fn from_str(src: &str, lang: L) -> Self {
126 Self::new(src, lang)
127 }
128 fn clone_with_lang(&self, lang: Self::Lang) -> Self {
129 Self::new(&self.src, lang)
130 }
131}
132
133pub trait Content: Sized {
134 type Underlying: Clone + PartialEq;
135 fn parse_tree_sitter(
136 &self,
137 parser: &mut Parser,
138 tree: Option<&Tree>,
139 ) -> Result<Option<Tree>, ParserError>;
140 fn get_range(&self, range: Range<usize>) -> &[Self::Underlying];
141 fn accept_edit(&mut self, edit: &Edit<Self>) -> InputEdit;
142 fn get_text<'a>(&'a self, node: &Node) -> Cow<'a, str>;
143 fn decode_str(src: &str) -> Cow<[Self::Underlying]>;
146 fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<str>;
149 fn get_char_column(&self, column: usize, offset: usize) -> usize;
151}
152
153impl Content for String {
154 type Underlying = u8;
155 fn parse_tree_sitter(
156 &self,
157 parser: &mut Parser,
158 tree: Option<&Tree>,
159 ) -> Result<Option<Tree>, ParserError> {
160 parser.parse(self.as_bytes(), tree)
161 }
162 fn get_range(&self, range: Range<usize>) -> &[Self::Underlying] {
163 &self.as_bytes()[range]
164 }
165 fn get_text<'a>(&'a self, node: &Node) -> Cow<'a, str> {
166 node
167 .utf8_text(self.as_bytes())
168 .expect("invalid source text encoding")
169 }
170 fn accept_edit(&mut self, edit: &Edit<Self>) -> InputEdit {
171 let start_byte = edit.position;
172 let old_end_byte = edit.position + edit.deleted_length;
173 let new_end_byte = edit.position + edit.inserted_text.len();
174 let input = unsafe { self.as_mut_vec() };
175 let start_position = position_for_offset(input, start_byte);
176 let old_end_position = position_for_offset(input, old_end_byte);
177 input.splice(start_byte..old_end_byte, edit.inserted_text.clone());
178 let new_end_position = position_for_offset(input, new_end_byte);
179 InputEdit::new(
180 start_byte as u32,
181 old_end_byte as u32,
182 new_end_byte as u32,
183 &start_position,
184 &old_end_position,
185 &new_end_position,
186 )
187 }
188 fn decode_str(src: &str) -> Cow<[Self::Underlying]> {
189 Cow::Borrowed(src.as_bytes())
190 }
191 fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<str> {
192 String::from_utf8_lossy(bytes)
193 }
194
195 fn get_char_column(&self, _col: usize, offset: usize) -> usize {
199 let src = self.as_bytes();
200 let mut col = 0;
201 for &b in src[..offset].iter().rev() {
203 if b == b'\n' {
204 break;
205 }
206 if b & 0b1100_0000 != 0b1000_0000 {
208 col += 1;
209 }
210 }
211 col
212 }
213}
214
215#[cfg(test)]
216mod test {
217 use super::*;
218 use crate::language::{Language, Tsx};
219
220 fn parse(src: &str) -> Result<Tree, TSParseError> {
221 parse_lang(|p| p.parse(src, None), Tsx.get_ts_language())
222 }
223
224 #[test]
225 fn test_tree_sitter() -> Result<(), TSParseError> {
226 let tree = parse("var a = 1234")?;
227 let root_node = tree.root_node();
228 assert_eq!(root_node.kind(), "program");
229 assert_eq!(root_node.start_position().column(), 0);
230 assert_eq!(root_node.end_position().column(), 12);
231 assert_eq!(
232 root_node.to_sexp(),
233 "(program (variable_declaration (variable_declarator name: (identifier) value: (number))))"
234 );
235 Ok(())
236 }
237
238 #[test]
239 fn test_object_literal() -> Result<(), TSParseError> {
240 let tree = parse("{a: $X}")?;
241 let root_node = tree.root_node();
242 assert_eq!(root_node.to_sexp(), "(program (expression_statement (object (pair key: (property_identifier) value: (identifier)))))");
244 Ok(())
245 }
246
247 #[test]
248 fn test_string() -> Result<(), TSParseError> {
249 let tree = parse("'$A'")?;
250 let root_node = tree.root_node();
251 assert_eq!(
252 root_node.to_sexp(),
253 "(program (expression_statement (string (string_fragment))))"
254 );
255 Ok(())
256 }
257
258 #[test]
259 fn test_row_col() -> Result<(), TSParseError> {
260 let tree = parse("😄")?;
261 let root = tree.root_node();
262 assert_eq!(root.start_position(), Point::new(0, 0));
263 assert_eq!(root.end_position(), Point::new(0, 4));
265 Ok(())
266 }
267
268 #[test]
269 fn test_edit() -> Result<(), TSParseError> {
270 let mut src = "a + b".to_string();
271 let mut tree = parse(&src)?;
272 let _ = perform_edit(
273 &mut tree,
274 &mut src,
275 &Edit {
276 position: 1,
277 deleted_length: 0,
278 inserted_text: " * b".into(),
279 },
280 );
281 let tree2 = parse_lang(|p| p.parse(&src, Some(&tree)), Tsx.get_ts_language())?;
282 assert_eq!(
283 tree.root_node().to_sexp(),
284 "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
285 );
286 assert_eq!(tree2.root_node().to_sexp(), "(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))");
287 Ok(())
288 }
289}