ast_grep_core/
source.rs

1//! This module defines the `Doc` and `Content` traits to abstract away source code encoding issues.
2//!
3//! ast-grep supports three kinds of encoding: utf-8 for CLI, utf-16 for nodeJS napi and `Vec<char>` for wasm.
4//! Different encoding will produce different tree-sitter Node's range and position.
5//!
6//! The `Content` trait is defined to abstract different encoding.
7//! It is used as associated type bound `Source` in the `Doc` trait.
8//! Its associated type `Underlying`  represents the underlying type of the content, e.g. `Vec<u8>`, `Vec<u16>`.
9//!
10//! `Doc` is a trait that defines a document that can be parsed by Tree-sitter.
11//! It has a `Source` associated type bounded by `Content` that represents the source code of the document,
12//! and a `Lang` associated type that represents the language of the document.
13
14use crate::{language::Language, node::KindId, Position};
15use std::borrow::Cow;
16use std::ops::Range;
17
18// https://github.com/tree-sitter/tree-sitter/blob/e4e5ffe517ca2c668689b24cb17c51b8c6db0790/cli/src/parse.rs
19#[derive(Debug)]
20pub struct Edit<S: Content> {
21  pub position: usize,
22  pub deleted_length: usize,
23  pub inserted_text: Vec<S::Underlying>,
24}
25
26/// NOTE: Some method names are the same as tree-sitter's methods.
27/// Fully Qualified Syntax may needed https://stackoverflow.com/a/44445976/2198656
28pub trait SgNode<'r>: Clone {
29  fn parent(&self) -> Option<Self>;
30  fn children(&self) -> impl ExactSizeIterator<Item = Self>;
31  fn kind(&self) -> Cow<str>;
32  fn kind_id(&self) -> KindId;
33  fn node_id(&self) -> usize;
34  fn range(&self) -> std::ops::Range<usize>;
35  fn start_pos(&self) -> Position;
36  fn end_pos(&self) -> Position;
37
38  // default implentation
39  fn ancestors(&self, _root: Self) -> impl Iterator<Item = Self> {
40    let mut ancestors = vec![];
41    let mut current = self.clone();
42    while let Some(parent) = current.parent() {
43      ancestors.push(parent.clone());
44      current = parent;
45    }
46    ancestors.reverse();
47    ancestors.into_iter()
48  }
49  fn dfs(&self) -> impl Iterator<Item = Self> {
50    let mut stack = vec![self.clone()];
51    std::iter::from_fn(move || {
52      if let Some(node) = stack.pop() {
53        let children: Vec<_> = node.children().collect();
54        stack.extend(children.into_iter().rev());
55        Some(node)
56      } else {
57        None
58      }
59    })
60  }
61  fn child(&self, nth: usize) -> Option<Self> {
62    self.children().nth(nth)
63  }
64  fn next(&self) -> Option<Self> {
65    let parent = self.parent()?;
66    let mut children = parent.children();
67    while let Some(child) = children.next() {
68      if child.node_id() == self.node_id() {
69        return children.next();
70      }
71    }
72    None
73  }
74  fn prev(&self) -> Option<Self> {
75    let parent = self.parent()?;
76    let children = parent.children();
77    let mut prev = None;
78    for child in children {
79      if child.node_id() == self.node_id() {
80        return prev;
81      }
82      prev = Some(child);
83    }
84    None
85  }
86  fn next_all(&self) -> impl Iterator<Item = Self> {
87    let mut next = self.next();
88    std::iter::from_fn(move || {
89      let n = next.clone()?;
90      next = n.next();
91      Some(n)
92    })
93  }
94  fn prev_all(&self) -> impl Iterator<Item = Self> {
95    let mut prev = self.prev();
96    std::iter::from_fn(move || {
97      let n = prev.clone()?;
98      prev = n.prev();
99      Some(n)
100    })
101  }
102  fn is_named(&self) -> bool {
103    true
104  }
105  /// N.B. it is different from is_named && is_leaf
106  /// if a node has no named children.
107  fn is_named_leaf(&self) -> bool {
108    self.is_leaf()
109  }
110  fn is_leaf(&self) -> bool {
111    self.children().count() == 0
112  }
113
114  // missing node is a tree-sitter specific concept
115  fn is_missing(&self) -> bool {
116    false
117  }
118  fn is_error(&self) -> bool {
119    false
120  }
121
122  fn field(&self, name: &str) -> Option<Self>;
123  fn field_children(&self, field_id: Option<u16>) -> impl Iterator<Item = Self>;
124  fn child_by_field_id(&self, field_id: u16) -> Option<Self>;
125}
126
127pub trait Doc: Clone + 'static {
128  type Source: Content;
129  type Lang: Language;
130  type Node<'r>: SgNode<'r>;
131  fn get_lang(&self) -> &Self::Lang;
132  fn get_source(&self) -> &Self::Source;
133  fn do_edit(&mut self, edit: &Edit<Self::Source>) -> Result<(), String>;
134  fn root_node(&self) -> Self::Node<'_>;
135  fn get_node_text<'a>(&'a self, node: &Self::Node<'a>) -> Cow<'a, str>;
136}
137
138pub trait Content: Sized {
139  type Underlying: Clone + PartialEq;
140  fn get_range(&self, range: Range<usize>) -> &[Self::Underlying];
141  /// Used for string replacement. We need this for
142  /// indentation and deindentation.
143  fn decode_str(src: &str) -> Cow<[Self::Underlying]>;
144  /// Used for string replacement. We need this for
145  /// transformation.
146  fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<str>;
147  /// Get the character column at the given position
148  fn get_char_column(&self, column: usize, offset: usize) -> usize;
149}
150
151impl Content for String {
152  type Underlying = u8;
153  fn get_range(&self, range: Range<usize>) -> &[Self::Underlying] {
154    &self.as_bytes()[range]
155  }
156  fn decode_str(src: &str) -> Cow<[Self::Underlying]> {
157    Cow::Borrowed(src.as_bytes())
158  }
159  fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<str> {
160    String::from_utf8_lossy(bytes)
161  }
162
163  /// This is an O(n) operation. We assume the col will not be a
164  /// huge number in reality. This may be problematic for special
165  /// files like compressed js
166  fn get_char_column(&self, _col: usize, offset: usize) -> usize {
167    let src = self.as_bytes();
168    let mut col = 0;
169    // TODO: is it possible to use SIMD here???
170    for &b in src[..offset].iter().rev() {
171      if b == b'\n' {
172        break;
173      }
174      // https://en.wikipedia.org/wiki/UTF-8#Description
175      if b & 0b1100_0000 != 0b1000_0000 {
176        col += 1;
177      }
178    }
179    col
180  }
181}