1#![allow(dead_code)]
2use std::collections::{HashMap, HashSet};
3
4use rand::{prelude::StdRng, Rng, SeedableRng};
5use tree_sitter::{Language, Node, Tree};
6
7use tree_sitter_edit::Editor;
8
9use crate::node_types::NodeTypes;
10
11#[derive(Debug, Default)]
12struct Edits(HashMap<usize, Vec<u8>>);
13
14impl Editor for Edits {
15 fn has_edit(&self, _tree: &Tree, node: &Node<'_>) -> bool {
16 self.0.contains_key(&node.id())
17 }
18
19 fn edit(&self, _source: &[u8], tree: &Tree, node: &Node<'_>) -> Vec<u8> {
20 debug_assert!(self.has_edit(tree, node));
21 self.0.get(&node.id()).unwrap().clone()
22 }
23}
24
25#[derive(Debug)]
26struct Branches<'a>(HashMap<&'static str, Vec<&'a [u8]>>);
27
28impl<'a> Branches<'a> {
29 fn new(trees: Vec<(&'a [u8], &'a Tree)>) -> Self {
30 let mut branches = HashMap::with_capacity(trees.len()); for (text, tree) in trees {
32 let mut nodes = vec![tree.root_node()];
33 while !nodes.is_empty() {
34 let mut children = Vec::with_capacity(nodes.len()); for node in nodes {
36 branches
37 .entry(node.kind())
38 .or_insert_with(|| HashSet::with_capacity(1))
39 .insert(&text[node.byte_range()]);
40 let mut i = 0;
41 while let Some(child) = node.child(i) {
42 children.push(child);
43 i += 1;
44 }
45 }
46 nodes = children;
47 }
48 }
49 Branches(
50 branches
51 .into_iter()
52 .map(|(k, s)| (k, s.iter().copied().collect()))
53 .collect(),
54 )
55 }
56
57 fn possible(&self) -> usize {
58 let mut possible_mutations = 0;
59 for s in self.0.values() {
60 possible_mutations += s.len() - 1;
61 }
62 possible_mutations
63 }
64}
65
66fn parse(language: &Language, code: &str) -> Tree {
67 let mut parser = tree_sitter::Parser::new();
68 parser
69 .set_language(language)
70 .expect("Failed to set tree-sitter parser language");
71 parser.parse(code, None).expect("Failed to parse code")
72}
73
74#[derive(Debug)]
76pub struct Config {
77 pub chaos: u8,
81 pub deletions: u8,
85 pub language: Language,
86 pub inter_splices: usize,
89 pub max_size: usize,
93 pub node_types: NodeTypes,
94 pub reparse: usize,
98 pub seed: u64,
99}
100
101#[derive(Debug)]
102pub struct Splicer<'a> {
103 pub language: Language,
104 branches: Branches<'a>,
105 chaos: u8,
106 deletions: u8,
107 kinds: Vec<&'static str>,
108 inter_splices: usize,
110 max_size: usize,
111 node_types: NodeTypes,
112 trees: Vec<(&'a [u8], &'a Tree)>,
113 reparse: usize,
114 rng: StdRng,
115}
116
117impl<'a> Splicer<'a> {
118 fn delta(node: Node<'_>, replace: &[u8]) -> isize {
119 let range = node.byte_range();
120 isize::try_from(replace.len()).unwrap_or_default()
121 - isize::try_from(range.end - range.start).unwrap_or_default()
122 }
123
124 #[must_use]
125 pub fn new(config: Config, files: &'a HashMap<String, (Vec<u8>, Tree)>) -> Option<Self> {
126 let trees: Vec<_> = files
127 .iter()
128 .map(|(_, (txt, tree))| (txt.as_ref(), tree))
129 .collect();
130
131 let mut all_empty = true;
132 for (_bytes, tree) in files.values() {
133 if tree.root_node().child_count() != 0 {
134 all_empty = false;
135 break;
136 }
137 }
138 if all_empty {
139 return None;
140 }
141
142 let branches = Branches::new(
143 files
144 .iter()
145 .map(|(_, (txt, tree))| (txt.as_ref(), tree))
146 .collect(),
147 );
148 let rng = StdRng::seed_from_u64(config.seed);
149 let kinds = branches.0.keys().copied().collect();
150 Some(Splicer {
151 chaos: config.chaos,
152 deletions: config.deletions,
153 language: config.language,
154 branches,
155 kinds,
156 inter_splices: config.inter_splices,
158 max_size: config.max_size,
159 node_types: config.node_types,
160 reparse: config.reparse,
161 rng,
162 trees,
163 })
164 }
165
166 fn pick_usize(&mut self, n: usize) -> usize {
167 self.rng.random_range(0..n)
168 }
169
170 fn pick_idx<T>(&mut self, v: &[T]) -> usize {
171 self.pick_usize(v.len())
172 }
173
174 fn all_nodes(tree: &Tree) -> Vec<Node<'_>> {
175 let mut all = Vec::with_capacity(16); let root = tree.root_node();
177 let mut cursor = tree.walk();
178 let mut nodes: HashSet<_> = root.children(&mut cursor).collect();
179 while !nodes.is_empty() {
180 let mut next = HashSet::new();
181 for node in nodes {
182 debug_assert!(!next.contains(&node));
183 all.push(node);
184 let mut child_cursor = tree.walk();
185 for child in node.children(&mut child_cursor) {
186 debug_assert!(child.id() != node.id());
187 debug_assert!(!next.contains(&child));
188 next.insert(child);
189 }
190 }
191 nodes = next;
192 }
193 all
194 }
195
196 fn pick_node<'b>(&mut self, tree: &'b Tree) -> Node<'b> {
197 let nodes = Self::all_nodes(tree);
198 if nodes.is_empty() {
199 return tree.root_node();
200 }
201 *nodes.get(self.pick_idx(nodes.as_slice())).unwrap()
202 }
203
204 fn delete_node(&mut self, _text: &[u8], tree: &Tree) -> (usize, Vec<u8>, isize) {
205 let chaotic = self.rng.random_range(0..100) < self.chaos;
206 if chaotic {
207 let node = self.pick_node(tree);
208 return (node.id(), Vec::new(), Self::delta(node, &[]));
209 }
210 let nodes = Self::all_nodes(tree);
211 if nodes.iter().all(|n| !self.node_types.optional_node(n)) {
212 let node = self.pick_node(tree);
213 return (node.id(), Vec::new(), Self::delta(node, &[]));
214 }
215 let mut node = nodes.get(self.pick_idx(nodes.as_slice())).unwrap();
216 while !self.node_types.optional_node(node) {
217 node = nodes.get(self.pick_idx(nodes.as_slice())).unwrap();
218 }
219 (node.id(), Vec::new(), Self::delta(*node, &[]))
220 }
221
222 fn splice_node(&mut self, text: &[u8], tree: &Tree) -> (usize, Vec<u8>, isize) {
223 let chaotic = self.rng.random_range(0..100) < self.chaos;
224
225 let mut node = tree.root_node();
226 let mut candidates = Vec::new();
227 while candidates.len() <= 1 {
231 node = self.pick_node(tree);
232 candidates = if chaotic {
233 let kind_idx = self.rng.random_range(0..self.kinds.len());
234 let kind = self.kinds.get(kind_idx).unwrap();
235 self.branches.0.get(kind).unwrap().clone()
236 } else {
237 self.branches
238 .0
239 .get(node.kind())
240 .cloned()
241 .unwrap_or_default()
242 };
243 }
244
245 let idx = self.rng.random_range(0..candidates.len());
246 let mut candidate = candidates.get(idx).unwrap();
247 let node_text = &text[node.byte_range()];
249 while candidates.len() > 1 && candidate == &node_text {
250 let idx = self.rng.random_range(0..candidates.len());
251 candidate = candidates.get(idx).unwrap();
252 }
253 let replace = Vec::from(*candidate);
259 let delta = Self::delta(node, replace.as_slice());
260 (node.id(), replace, delta)
261 }
262
263 pub fn splice_tree(&mut self, text0: &[u8], mut tree: Tree) -> Option<Vec<u8>> {
264 let mut edits = Edits::default();
266 if self.inter_splices == 0 {
267 return None;
268 }
269 let splices = self.rng.random_range(1..self.inter_splices);
270 let mut text = Vec::from(text0);
271 let mut sz = isize::try_from(text.len()).unwrap_or_default();
272 for i in 0..splices {
273 let (id, bytes, delta) = if self.rng.random_range(0..100) < self.deletions {
274 self.delete_node(text.as_slice(), &tree)
275 } else {
276 self.splice_node(text.as_slice(), &tree)
277 };
278 sz += delta;
279 let sized_out = usize::try_from(sz).unwrap_or_default() >= self.max_size;
280 edits.0.insert(id, bytes);
281 if i % self.reparse == 0 || i + 1 == splices || sized_out {
282 let mut result = Vec::with_capacity(usize::try_from(sz).unwrap_or_default());
283 tree_sitter_edit::render(&mut result, &tree, text.as_slice(), &edits).ok()?;
284 text = result.clone();
285 tree = parse(&self.language, &String::from_utf8_lossy(text.as_slice()));
286 edits = Edits::default();
287 }
288 if sized_out {
289 break;
290 }
291 }
292 Some(text)
293 }
294}
295
296impl Iterator for Splicer<'_> {
297 type Item = Vec<u8>;
298
299 fn next(&mut self) -> Option<Self::Item> {
300 let mut tree_idx: usize = self.pick_usize(self.trees.len());
301 let (mut text, mut tree) = *self.trees.get(tree_idx).unwrap();
302 while text.len() > self.max_size {
303 tree_idx = self.pick_usize(self.trees.len());
304 (text, tree) = *self.trees.get(tree_idx).unwrap();
305 }
306 self.splice_tree(text, tree.clone())
307 }
308}