1#![allow(dead_code)]
2use std::collections::{HashMap, HashSet};
3
4use rand::{prelude::StdRng, Rng, SeedableRng};
5use tree_sitter::{Language, Node, Tree};
6
7use tree_sitter_edit::Editor;
8
9use crate::node_types::NodeTypes;
10
11#[derive(Debug, Default)]
12struct Edits(HashMap<usize, Vec<u8>>);
13
14impl Editor for Edits {
15 fn has_edit(&self, _tree: &Tree, node: &Node) -> bool {
16 self.0.contains_key(&node.id())
17 }
18
19 fn edit(&self, _source: &[u8], tree: &Tree, node: &Node) -> Vec<u8> {
20 debug_assert!(self.has_edit(tree, node));
21 self.0.get(&node.id()).unwrap().clone()
22 }
23}
24
25#[derive(Debug)]
26struct Branches<'a>(HashMap<&'static str, Vec<&'a [u8]>>);
27
28impl<'a> Branches<'a> {
29 fn new(trees: Vec<(&'a [u8], &'a Tree)>) -> Self {
30 let mut branches = HashMap::with_capacity(trees.len()); for (text, tree) in trees {
32 let mut nodes = vec![tree.root_node()];
33 while !nodes.is_empty() {
34 let mut children = Vec::with_capacity(nodes.len()); for node in nodes {
36 branches
37 .entry(node.kind())
38 .or_insert_with(|| HashSet::with_capacity(1))
39 .insert(&text[node.byte_range()]);
40 let mut i = 0;
41 while let Some(child) = node.child(i) {
42 children.push(child);
43 i += 1;
44 }
45 }
46 nodes = children;
47 }
48 }
49 Branches(
50 branches
51 .into_iter()
52 .map(|(k, s)| (k, s.iter().copied().collect()))
53 .collect(),
54 )
55 }
56
57 fn possible(&self) -> usize {
58 let mut possible_mutations = 0;
59 for s in self.0.values() {
60 possible_mutations += s.len() - 1;
61 }
62 possible_mutations
63 }
64}
65
66fn parse(language: Language, code: &str) -> tree_sitter::Tree {
67 let mut parser = tree_sitter::Parser::new();
68 parser
69 .set_language(language)
70 .expect("Failed to set tree-sitter parser language");
71 parser.parse(code, None).expect("Failed to parse code")
72}
73
74#[derive(Debug)]
76pub struct Config {
77 pub chaos: u8,
81 pub deletions: u8,
85 pub language: Language,
86 pub inter_splices: usize,
89 pub max_size: usize,
93 pub node_types: NodeTypes,
94 pub reparse: usize,
98 pub seed: u64,
99}
100
101#[derive(Debug)]
102pub struct Splicer<'a> {
103 pub language: Language,
104 branches: Branches<'a>,
105 chaos: u8,
106 deletions: u8,
107 kinds: Vec<&'static str>,
108 inter_splices: usize,
110 max_size: usize,
111 node_types: NodeTypes,
112 trees: Vec<(&'a [u8], &'a Tree)>,
113 reparse: usize,
114 rng: StdRng,
115}
116
117impl<'a> Splicer<'a> {
118 fn delta(node: Node<'_>, replace: &[u8]) -> isize {
119 let range = node.byte_range();
120 isize::try_from(replace.len()).unwrap_or_default()
121 - isize::try_from(range.end - range.start).unwrap_or_default()
122 }
123
124 pub fn new(config: Config, files: &'a HashMap<String, (Vec<u8>, Tree)>) -> Option<Self> {
125 let trees: Vec<_> = files
126 .iter()
127 .map(|(_, (txt, tree))| (txt.as_ref(), tree))
128 .collect();
129
130 let mut all_empty = true;
131 for (_bytes, tree) in files.values() {
132 if tree.root_node().child_count() != 0 {
133 all_empty = false;
134 break;
135 }
136 }
137 if all_empty {
138 return None;
139 }
140
141 let branches = Branches::new(
142 files
143 .iter()
144 .map(|(_, (txt, tree))| (txt.as_ref(), tree))
145 .collect(),
146 );
147 let rng = rand::rngs::StdRng::seed_from_u64(config.seed);
148 let kinds = branches.0.keys().copied().collect();
149 Some(Splicer {
150 chaos: config.chaos,
151 deletions: config.deletions,
152 language: config.language,
153 branches,
154 kinds,
155 inter_splices: config.inter_splices,
157 max_size: config.max_size,
158 node_types: config.node_types,
159 reparse: config.reparse,
160 rng,
161 trees,
162 })
163 }
164
165 fn pick_usize(&mut self, n: usize) -> usize {
166 self.rng.gen_range(0..n)
167 }
168
169 fn pick_idx<T>(&mut self, v: &[T]) -> usize {
170 self.pick_usize(v.len())
171 }
172
173 fn all_nodes<'b>(&self, tree: &'b Tree) -> Vec<Node<'b>> {
174 let mut all = Vec::with_capacity(16); let root = tree.root_node();
176 let mut cursor = tree.walk();
177 let mut nodes: HashSet<_> = root.children(&mut cursor).collect();
178 while !nodes.is_empty() {
179 let mut next = HashSet::new();
180 for node in nodes {
181 debug_assert!(!next.contains(&node));
182 all.push(node);
183 let mut child_cursor = tree.walk();
184 for child in node.children(&mut child_cursor) {
185 debug_assert!(child.id() != node.id());
186 debug_assert!(!next.contains(&child));
187 next.insert(child);
188 }
189 }
190 nodes = next;
191 }
192 all
193 }
194
195 fn pick_node<'b>(&mut self, tree: &'b Tree) -> Node<'b> {
196 let nodes = self.all_nodes(tree);
197 if nodes.is_empty() {
198 return tree.root_node();
199 }
200 *nodes.get(self.pick_idx(nodes.as_slice())).unwrap()
201 }
202
203 fn delete_node(&mut self, _text: &[u8], tree: &Tree) -> (usize, Vec<u8>, isize) {
204 let chaotic = self.rng.gen_range(0..100) < self.chaos;
205 if chaotic {
206 let node = self.pick_node(tree);
207 return (node.id(), Vec::new(), Self::delta(node, &[]));
208 }
209 let nodes = self.all_nodes(tree);
210 if nodes.iter().all(|n| !self.node_types.optional_node(n)) {
211 let node = self.pick_node(tree);
212 return (node.id(), Vec::new(), Self::delta(node, &[]));
213 }
214 let mut node = nodes.get(self.pick_idx(nodes.as_slice())).unwrap();
215 while !self.node_types.optional_node(node) {
216 node = nodes.get(self.pick_idx(nodes.as_slice())).unwrap();
217 }
218 (node.id(), Vec::new(), Self::delta(*node, &[]))
219 }
220
221 fn splice_node(&mut self, text: &[u8], tree: &Tree) -> (usize, Vec<u8>, isize) {
222 let chaotic = self.rng.gen_range(0..100) < self.chaos;
223
224 let mut node = tree.root_node();
225 let mut candidates = Vec::new();
226 while candidates.len() <= 1 {
230 node = self.pick_node(tree);
231 candidates = if chaotic {
232 let kind_idx = self.rng.gen_range(0..self.kinds.len());
233 let kind = self.kinds.get(kind_idx).unwrap();
234 self.branches.0.get(kind).unwrap().clone()
235 } else {
236 self.branches
237 .0
238 .get(node.kind())
239 .cloned()
240 .unwrap_or_default()
241 };
242 }
243
244 let idx = self.rng.gen_range(0..candidates.len());
245 let mut candidate = candidates.get(idx).unwrap();
246 let node_text = &text[node.byte_range()];
248 while candidates.len() > 1 && candidate == &node_text {
249 let idx = self.rng.gen_range(0..candidates.len());
250 candidate = candidates.get(idx).unwrap();
251 }
252 let replace = Vec::from(*candidate);
258 let delta = Self::delta(node, replace.as_slice());
259 (node.id(), replace, delta)
260 }
261
262 pub fn splice_tree(&mut self, text0: &[u8], mut tree: Tree) -> Option<Vec<u8>> {
263 let mut edits = Edits::default();
265 if self.inter_splices == 0 {
266 return None;
267 }
268 let splices = self.rng.gen_range(1..self.inter_splices);
269 let mut text = Vec::from(text0);
270 let mut sz = isize::try_from(text.len()).unwrap_or_default();
271 for i in 0..splices {
272 let (id, bytes, delta) = if self.rng.gen_range(0..100) < self.deletions {
273 self.delete_node(text.as_slice(), &tree)
274 } else {
275 self.splice_node(text.as_slice(), &tree)
276 };
277 sz += delta;
278 let sized_out = usize::try_from(sz).unwrap_or_default() >= self.max_size;
279 edits.0.insert(id, bytes);
280 if i % self.reparse == 0 || i + 1 == splices || sized_out {
281 let mut result = Vec::with_capacity(usize::try_from(sz).unwrap_or_default());
282 tree_sitter_edit::render(&mut result, &tree, text.as_slice(), &edits).ok()?;
283 text = result.clone();
284 tree = parse(self.language, &String::from_utf8_lossy(text.as_slice()));
285 edits = Edits::default();
286 }
287 if sized_out {
288 break;
289 }
290 }
291 Some(text)
292 }
293}
294
295impl Iterator for Splicer<'_> {
296 type Item = Vec<u8>;
297
298 fn next(&mut self) -> Option<Self::Item> {
299 let mut tree_idx: usize = self.pick_usize(self.trees.len());
300 let (mut text, mut tree) = *self.trees.get(tree_idx).unwrap();
301 while text.len() > self.max_size {
302 tree_idx = self.pick_usize(self.trees.len());
303 (text, tree) = *self.trees.get(tree_idx).unwrap();
304 }
305 self.splice_tree(text, tree.clone())
306 }
307}