1use std::cmp;
18use std::collections::HashSet;
19use std::io;
20use std::io::Write;
21use std::marker::Sized;
22
23use context::Context;
24use newtypes::{NTermID, NodeID, RuleID};
25use pyo3::prelude::{PyObject, PyResult, Python};
26use pyo3::types::{PyBytes, PyString, PyTuple};
27use pyo3::FromPyObject;
28use rand::thread_rng;
29use rand::Rng;
30use recursion_info::RecursionInfo;
31use rule::{PlainRule, RegExpRule, Rule, RuleChild, RuleIDOrCustom, ScriptRule};
32use serde::{Deserialize, Serialize};
33
34enum UnparseStep<'dat> {
35 Term(&'dat [u8]),
36 Nonterm(NTermID),
37 Script(usize, PyObject),
38 PushBuffer(),
39}
40
41struct Unparser<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> {
42 tree: &'tree T,
43 stack: Vec<UnparseStep<'data>>,
44 buffers: Vec<std::io::Cursor<Vec<u8>>>,
45 w: W,
46 i: usize,
47 ctx: &'ctx Context,
48}
49
50impl<'data, 'tree: 'data, 'ctx: 'data, W: Write, T: TreeLike> Unparser<'data, 'tree, 'ctx, W, T> {
51 fn new(nid: NodeID, w: W, tree: &'tree T, ctx: &'ctx Context) -> Self {
52 let i = nid.to_i();
53 let nt = tree.get_rule(NodeID::from(i), ctx).nonterm();
54 let op = UnparseStep::<'data>::Nonterm(nt);
55 let stack = vec![op];
56 Self {
57 stack,
58 buffers: vec![],
59 w,
60 tree,
61 i,
62 ctx,
63 }
64 }
65
66 fn unparse_step(&mut self) -> bool {
67 match self.stack.pop() {
68 Some(UnparseStep::Term(data)) => self.write(data),
69 Some(UnparseStep::Nonterm(nt)) => self.nonterm(nt),
70 Some(UnparseStep::Script(num, expr)) => self.unwrap_script(num, &expr),
71 Some(UnparseStep::PushBuffer()) => self.push_buffer(),
72 None => return false,
73 };
74 true
75 }
76
77 fn write(&mut self, data: &[u8]) {
78 if let Some(buff) = self.buffers.last_mut() {
79 buff.write_all(data).unwrap();
80 } else {
81 self.w.write_all(data).unwrap();
82 }
83 }
84
85 fn nonterm(&mut self, nt: NTermID) {
86 self.next_rule(nt);
87 }
88 fn unwrap_script(&mut self, num: usize, expr: &PyObject) {
89 Python::with_gil(|py| {
90 self.script(py, num, expr)
91 .map_err(|e| e.print_and_set_sys_last_vars(py))
92 .unwrap();
93 });
94 }
95 fn script(&mut self, py: Python, num: usize, expr: &PyObject) -> PyResult<()> {
96 let bufs = self.buffers.split_off(self.buffers.len() - num);
97 let bufs = bufs
98 .into_iter()
99 .map(std::io::Cursor::into_inner)
100 .collect::<Vec<_>>();
101 let byte_arrays = bufs.iter().map(|b| PyBytes::new(py, b));
102 let res = expr.call1(py, PyTuple::new(py, byte_arrays))?;
103 if res.as_ref(py).is_instance_of::<PyString>()? {
104 let pystr = <&PyString>::extract(res.as_ref(py))?;
105 self.write(pystr.to_string_lossy().as_bytes());
106 } else if res.as_ref(py).is_instance_of::<PyBytes>()? {
107 let pybytes = <&PyBytes>::extract(res.as_ref(py))?;
108 self.write(pybytes.as_bytes());
109 } else {
110 return Err(pyo3::exceptions::PyValueError::new_err(
111 "script function should return string or bytes",
112 ));
113 }
114 Ok(())
115 }
116
117 fn push_buffer(&mut self) {
118 self.buffers.push(std::io::Cursor::new(vec![]));
119 }
120
121 fn next_rule(&mut self, nt: NTermID) {
122 let nid = NodeID::from(self.i);
123 let rule: &'ctx Rule = self.tree.get_rule(nid, self.ctx);
124 assert_eq!(nt, rule.nonterm());
125 self.i += 1;
126 match rule {
127 Rule::Plain(r) => self.next_plain(r),
128 Rule::Script(r) => self.next_script(r),
129 Rule::RegExp(_) => self.next_regexp(self.tree.get_custom_rule_data(nid)),
130 }
131 }
132
133 fn next_plain(&mut self, r: &'ctx PlainRule) {
134 for rule_child in r.children.iter().rev() {
135 let op = match rule_child {
136 RuleChild::Term(data) => UnparseStep::<'data>::Term(data),
137 RuleChild::NTerm(id) => UnparseStep::<'data>::Nonterm(*id),
138 };
139 self.stack.push(op);
140 }
141 }
142
143 fn next_script(&mut self, r: &ScriptRule) {
144 {
145 Python::with_gil(|py| {
146 self.stack.push(UnparseStep::Script(
147 r.nonterms.len(),
148 r.script.clone_ref(py),
149 ));
150 });
151 }
152 for nterm in r.nonterms.iter().rev() {
153 self.stack.push(UnparseStep::Nonterm(*nterm));
154 self.stack.push(UnparseStep::PushBuffer());
155 }
156 }
157
158 fn next_regexp(&mut self, data: &'tree [u8]) {
159 self.stack.push(UnparseStep::<'data>::Term(data));
160 }
161
162 fn unparse(&mut self) -> NodeID {
163 while self.unparse_step() {}
164 NodeID::from(self.i)
165 }
166}
167
168pub trait TreeLike
169where
170 Self: Sized,
171{
172 fn get_rule_id(&self, n: NodeID) -> RuleID;
173 fn size(&self) -> usize;
174 fn to_tree(&self, _: &Context) -> Tree;
175 fn get_rule<'c>(&self, n: NodeID, ctx: &'c Context) -> &'c Rule;
176 fn get_rule_or_custom(&self, n: NodeID) -> &RuleIDOrCustom;
177 fn get_custom_rule_data(&self, n: NodeID) -> &[u8];
178 fn get_nonterm_id(&self, n: NodeID, ctx: &Context) -> NTermID {
179 self.get_rule(n, ctx).nonterm()
180 }
181
182 fn unparse<W: Write>(&self, id: NodeID, ctx: &Context, mut w: &mut W) {
183 Unparser::new(id, &mut w, self, ctx).unparse();
184 }
185
186 fn unparse_to<W: Write>(&self, ctx: &Context, w: &mut W) {
187 self.unparse(NodeID::from(0), ctx, w);
188 }
189
190 fn unparse_to_vec(&self, ctx: &Context) -> Vec<u8> {
191 self.unparse_node_to_vec(NodeID::from(0), ctx)
192 }
193
194 fn unparse_node_to_vec(&self, n: NodeID, ctx: &Context) -> Vec<u8> {
195 let mut data = vec![];
196 self.unparse(n, ctx, &mut data);
197 data
198 }
199
200 fn unparse_print(&self, ctx: &Context) {
201 self.unparse_to(ctx, &mut io::stdout());
202 }
203}
204
205#[derive(Clone, Debug, Serialize, Deserialize)]
206pub struct Tree {
207 pub rules: Vec<RuleIDOrCustom>,
208 pub sizes: Vec<usize>,
209 pub paren: Vec<NodeID>,
210}
211
212impl TreeLike for Tree {
213 fn get_rule_id(&self, n: NodeID) -> RuleID {
214 self.rules[n.to_i()].id()
215 }
216
217 fn size(&self) -> usize {
218 self.rules.len()
219 }
220
221 fn to_tree(&self, _ctx: &Context) -> Tree {
222 self.clone()
223 }
224
225 fn get_rule<'c>(&self, n: NodeID, ctx: &'c Context) -> &'c Rule {
226 return ctx.get_rule(self.get_rule_id(n));
227 }
228 fn get_custom_rule_data(&self, n: NodeID) -> &[u8] {
229 self.rules[n.to_i()].data()
230 }
231 fn get_rule_or_custom(&self, n: NodeID) -> &RuleIDOrCustom {
232 &self.rules[n.to_i()]
233 }
234}
235
236impl Tree {
237 #[must_use]
238 pub fn from_rule_vec(rules: Vec<RuleIDOrCustom>, ctx: &Context) -> Self {
239 let sizes = vec![0; rules.len()];
240 let paren = vec![NodeID::from(0); rules.len()];
241 let mut res = Tree {
242 rules,
243 sizes,
244 paren,
245 };
246 if !res.rules.is_empty() {
247 res.calc_subtree_sizes_and_parents(ctx);
248 }
249 res
250 }
251
252 #[must_use]
253 pub fn get_rule_id(&self, n: NodeID) -> RuleID {
254 self.rules[n.to_i()].id()
255 }
256
257 #[must_use]
258 pub fn subtree_size(&self, n: NodeID) -> usize {
259 self.sizes[n.to_i()]
260 }
261
262 #[must_use]
263 pub fn mutate_replace_from_tree<'a>(
264 &'a self,
265 n: NodeID,
266 other: &'a Tree,
267 other_node: NodeID,
268 ) -> TreeMutation<'a> {
269 let old_size = self.subtree_size(n);
270 let new_size = other.subtree_size(other_node);
271 return TreeMutation {
272 prefix: self.slice(0.into(), n),
273 repl: other.slice(other_node, other_node + new_size),
274 postfix: self.slice(n + old_size, self.rules.len().into()),
275 };
276 }
277
278 fn calc_subtree_sizes_and_parents(&mut self, ctx: &Context) {
279 self.calc_parents(ctx);
280 self.calc_sizes();
281 }
282
283 fn calc_parents(&mut self, ctx: &Context) {
284 if self.size() == 0 {
285 return;
286 }
287 let mut stack: Vec<(NTermID, NodeID)> = Vec::new();
288 stack.push((
289 self.get_rule(NodeID::from(0), ctx).nonterm(),
290 NodeID::from(0),
291 ));
292 for i in 0..self.size() {
293 let node_id = NodeID::from(i);
294 let nonterm = self.get_rule(node_id, ctx).nonterm();
295 let (nterm_id, node) = stack.pop().expect("Not a valid tree for unparsing!");
297 if nterm_id == nonterm {
298 self.paren[i] = node;
299 } else {
300 panic!("Not a valid tree for unparsing!");
301 }
302 let rule = self.get_rule(node_id, ctx);
303 for nonterm in rule.nonterms().iter().rev() {
304 stack.push((*nonterm, node_id));
305 }
306 }
307 }
308
309 fn calc_sizes(&mut self) {
310 for size in &mut self.sizes {
312 *size = 1;
313 }
314 for i in (1..self.size()).rev() {
315 self.sizes[self.paren[i].to_i()] += self.sizes[i];
316 }
317 }
318
319 fn slice(&self, from: NodeID, to: NodeID) -> &[RuleIDOrCustom] {
320 &self.rules[from.into()..to.into()]
321 }
322
323 #[must_use]
324 pub fn get_parent(&self, n: NodeID) -> Option<NodeID> {
325 if n == NodeID::from(0) {
326 None
327 } else {
328 Some(self.paren[n.to_i()])
329 }
330 }
331
332 pub fn truncate(&mut self) {
333 self.rules.truncate(0);
334 self.sizes.truncate(0);
335 self.paren.truncate(0);
336 }
337
338 pub fn generate_from_nt(&mut self, start: NTermID, len: usize, ctx: &Context) {
339 let ruleid = ctx.get_random_rule_for_nt(start, len);
340 self.generate_from_rule(ruleid, len - 1, ctx);
341 }
342
343 pub fn generate_from_rule(&mut self, ruleid: RuleID, max_len: usize, ctx: &Context) {
344 match ctx.get_rule(ruleid) {
345 Rule::Plain(..) | Rule::Script(..) => {
346 self.truncate();
347 self.rules.push(RuleIDOrCustom::Rule(ruleid));
348 self.sizes.push(0);
349 self.paren.push(NodeID::from(0));
350 ctx.get_rule(ruleid).generate(self, ctx, max_len);
351 self.sizes[0] = self.rules.len();
352 }
353 Rule::RegExp(RegExpRule { hir, .. }) => {
354 let rid = RuleIDOrCustom::Custom(
355 ruleid,
356 regex_mutator::generate(hir, thread_rng().gen::<u64>()),
357 );
358 self.truncate();
359 self.rules.push(rid);
360 self.sizes.push(0);
361 self.paren.push(NodeID::from(0));
362 self.sizes[0] = self.rules.len();
363 }
364 }
365 }
366
367 #[must_use]
368 pub fn calc_recursions(&self, ctx: &Context) -> Option<Vec<RecursionInfo>> {
369 let mut ret = Vec::new();
370 let mut done_nterms = HashSet::new();
371 for rule in &self.rules {
372 let nterm = ctx.get_nt(rule);
373 if !done_nterms.contains(&nterm) {
374 if let Some(rec_info) = RecursionInfo::new(self, nterm, ctx) {
375 ret.push(rec_info);
376 }
377 done_nterms.insert(nterm);
378 }
379 }
380 if ret.is_empty() {
381 None
382 } else {
383 Some(ret)
384 }
385 }
386
387 #[must_use]
388 pub fn find_recursions_iter(&self, ctx: &Context) -> Vec<(NodeID, NodeID)> {
389 let mut found_recursions = Vec::new();
390 for i in 1..cmp::min(self.size(), 10000) {
392 let node_id = NodeID::from(self.size() - i);
393 let current_nterm: NTermID = self.get_rule(node_id, ctx).nonterm();
394 let mut current_node_id = self.paren[node_id.to_i()];
395 let mut depth = 0;
396 while current_node_id != NodeID::from(0) {
397 if self.get_rule(current_node_id, ctx).nonterm() == current_nterm {
398 found_recursions.push((current_node_id, node_id));
399 }
400 current_node_id = self.paren[current_node_id.to_i()];
401 if depth > 15 {
402 break;
403 }
404 depth += 1;
405 }
406 }
407 found_recursions
408 }
409}
410
411pub struct TreeMutation<'a> {
412 pub prefix: &'a [RuleIDOrCustom],
413 pub repl: &'a [RuleIDOrCustom],
414 pub postfix: &'a [RuleIDOrCustom],
415}
416
417impl<'a> TreeMutation<'a> {
418 #[must_use]
419 pub fn get_at(&self, n: NodeID) -> &'a RuleIDOrCustom {
420 let i = n.to_i();
421 let end0 = self.prefix.len();
422 let end1 = end0 + self.repl.len();
423 let end2 = end1 + self.postfix.len();
424 if i < end0 {
425 return &self.prefix[i];
426 }
427 if i < end1 {
428 return &self.repl[i - end0];
429 }
430 if i < end2 {
431 return &self.postfix[i - end1];
432 }
433 panic!("index out of bound for rule access");
434 }
435}
436
437impl<'a> TreeLike for TreeMutation<'a> {
438 fn get_rule_id(&self, n: NodeID) -> RuleID {
439 self.get_at(n).id()
440 }
441
442 fn size(&self) -> usize {
443 self.prefix.len() + self.repl.len() + self.postfix.len()
444 }
445 fn get_rule_or_custom(&self, n: NodeID) -> &RuleIDOrCustom {
446 self.get_at(n)
447 }
448
449 fn to_tree(&self, ctx: &Context) -> Tree {
450 let mut vec = vec![];
451 vec.extend_from_slice(self.prefix);
452 vec.extend_from_slice(self.repl);
453 vec.extend_from_slice(self.postfix);
454 Tree::from_rule_vec(vec, ctx)
455 }
456
457 fn get_rule<'c>(&self, n: NodeID, ctx: &'c Context) -> &'c Rule {
458 return ctx.get_rule(self.get_rule_id(n));
459 }
460 fn get_custom_rule_data(&self, n: NodeID) -> &[u8] {
461 self.get_at(n).data()
462 }
463}
464
465#[cfg(test)]
466mod tests {
467 use super::*;
468 use context::Context;
469 use newtypes::NodeID;
470
471 fn calc_subtree_sizes_and_parents_rec_test(tree: &mut Tree, n: NodeID, ctx: &Context) -> usize {
472 let mut cur = n + 1;
473 let mut size = 1;
474 for _ in 0..tree.get_rule(n, ctx).number_of_nonterms() {
475 tree.paren[cur.to_i()] = n;
476 let sub_size = calc_subtree_sizes_and_parents_rec_test(tree, cur, ctx);
477 cur = cur + sub_size;
478 size += sub_size;
479 }
480 tree.sizes[n.to_i()] = size;
481 size
482 }
483
484 #[test]
485 fn check_calc_sizes_iter() {
486 let mut ctx = Context::new();
487 let _ = ctx.add_rule("C", b"c{B}c3");
488 let _ = ctx.add_rule("B", b"b{A}b23");
489 let _ = ctx.add_rule("A", b"aasdf {A}");
490 let _ = ctx.add_rule("A", b"a2 {A}");
491 let _ = ctx.add_rule("A", b"a sdf{A}");
492 let _ = ctx.add_rule("A", b"a 34{A}");
493 let _ = ctx.add_rule("A", b"adfe {A}");
494 let _ = ctx.add_rule("A", b"a32");
495 ctx.initialize(50);
496 let mut tree = Tree::from_rule_vec(vec![], &ctx);
497 for _ in 0..100 {
498 tree.truncate();
499 tree.generate_from_nt(ctx.nt_id("C"), 50, &ctx);
500 calc_subtree_sizes_and_parents_rec_test(&mut tree, NodeID::from(0), &ctx);
501 let vec1 = tree.sizes.clone();
502 tree.calc_sizes();
503 let vec2 = tree.sizes.clone();
504 assert_eq!(vec1, vec2);
505 }
506 }
507
508 #[test]
509 fn check_calc_paren_iter() {
510 let mut ctx = Context::new();
511 let _ = ctx.add_rule("C", b"c{B}c3");
512 let _ = ctx.add_rule("B", b"b{A}b23");
513 let _ = ctx.add_rule("A", b"aasdf {A}");
514 let _ = ctx.add_rule("A", b"a2 {A}");
515 let _ = ctx.add_rule("A", b"a sdf{A}");
516 let _ = ctx.add_rule("A", b"a 34{A}");
517 let _ = ctx.add_rule("A", b"adfe {A}");
518 let _ = ctx.add_rule("A", b"a32");
519 ctx.initialize(50);
520 let mut tree = Tree::from_rule_vec(vec![], &ctx);
521 for _ in 0..100 {
522 tree.truncate();
523 tree.generate_from_nt(ctx.nt_id("C"), 50, &ctx);
524 calc_subtree_sizes_and_parents_rec_test(&mut tree, NodeID::from(0), &ctx);
525 let vec1 = tree.paren.clone();
526 tree.calc_parents(&ctx);
527 let vec2 = tree.paren.clone();
528 assert_eq!(vec1, vec2);
529 }
530 }
531
532 #[test]
533 fn check_unparse_iter() {
534 let mut ctx = Context::new();
535 let _ = ctx.add_rule("C", b"c{B}c3");
536 let _ = ctx.add_rule("B", b"b{A}b23");
537 let _ = ctx.add_rule("A", b"aasdf {A}");
538 let _ = ctx.add_rule("A", b"a2 {A}");
539 let _ = ctx.add_rule("A", b"a sdf{A}");
540 let _ = ctx.add_rule("A", b"a 34{A}");
541 let _ = ctx.add_rule("A", b"adfe {A}");
542 let _ = ctx.add_rule("A", b"a32");
543 ctx.initialize(50);
544 let mut tree = Tree::from_rule_vec(vec![], &ctx);
545 for _ in 0..100 {
546 tree.truncate();
547 tree.generate_from_nt(ctx.nt_id("C"), 50, &ctx);
548 let mut vec1 = vec![];
549 let mut vec2 = vec![];
550 tree.unparse(NodeID::from(0), &ctx, &mut vec1);
551 tree.unparse(NodeID::from(0), &ctx, &mut vec2);
552 assert_eq!(vec1, vec2);
553 }
554 }
555
556 #[test]
557 fn check_find_recursions() {
558 let mut ctx = Context::new();
559 let _ = ctx.add_rule("C", b"c{B}c");
560 let _ = ctx.add_rule("B", b"b{A}b");
561 let _ = ctx.add_rule("A", b"a {A}");
562 let _ = ctx.add_rule("A", b"a {A}");
563 let _ = ctx.add_rule("A", b"a {A}");
564 let _ = ctx.add_rule("A", b"a {A}");
565 let _ = ctx.add_rule("A", b"a {A}");
566 let _ = ctx.add_rule("A", b"a");
567 ctx.initialize(20);
568 let mut tree = Tree::from_rule_vec(vec![], &ctx);
569 let mut some_recursions = false;
570 for _ in 0..100 {
571 tree.truncate();
572 tree.generate_from_nt(ctx.nt_id("C"), 20, &ctx);
573 if let Some(recursions) = tree.calc_recursions(&ctx) {
574 assert_ne!(recursions.len(), 0);
575 for recursion_info in recursions {
576 for offset in 0..recursion_info.get_number_of_recursions() {
577 let tuple = recursion_info.get_recursion_pair_by_offset(offset);
578 some_recursions = true;
579 assert!(tuple.0.to_i() < tuple.1.to_i());
580 }
581 }
582 }
583 }
584 assert!(some_recursions);
585 }
586}