1use std::{
9 cell::{Cell, RefCell},
10 cmp::min,
11 fmt,
12 hash::{Hash, Hasher},
13};
14
15use bumpalo::Bump;
16use hashbrown::hash_map::RawEntryMut;
17use smallvec::{smallvec, SmallVec};
18use strsim::normalized_levenshtein;
19
20use self::Edge::*;
21use super::{
22 changes::{insert_deep_unchanged, ChangeKind, ChangeMap},
23 hash::DftHashMap,
24 stack::Stack,
25 syntax::{AtomKind, Syntax, SyntaxId},
26};
27
28#[derive(Debug, Clone)]
60pub struct Vertex<'s, 'v> {
61 pub neighbours: RefCell<Option<&'v [(Edge, &'v Vertex<'s, 'v>)]>>,
62 pub predecessor: Cell<Option<(u32, &'v Vertex<'s, 'v>)>>,
63 pub lhs_syntax: Option<&'s Syntax<'s>>,
66 pub rhs_syntax: Option<&'s Syntax<'s>>,
67 parents: Stack<'v, EnteredDelimiter<'s, 'v>>,
68 lhs_parent_id: Option<SyntaxId>,
69 rhs_parent_id: Option<SyntaxId>,
70}
71
72impl PartialEq for Vertex<'_, '_> {
73 fn eq(&self, other: &Self) -> bool {
74 let b0 = match (self.lhs_syntax, other.lhs_syntax) {
91 (Some(s0), Some(s1)) => s0.id() == s1.id(),
92 (None, None) => self.lhs_parent_id == other.lhs_parent_id,
93 _ => false,
94 };
95 let b1 = match (self.rhs_syntax, other.rhs_syntax) {
96 (Some(s0), Some(s1)) => s0.id() == s1.id(),
97 (None, None) => self.rhs_parent_id == other.rhs_parent_id,
98 _ => false,
99 };
100 let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents);
106
107 b0 && b1 && b2
108 }
109}
110impl Eq for Vertex<'_, '_> {}
111
112impl Hash for Vertex<'_, '_> {
113 fn hash<H: Hasher>(&self, state: &mut H) {
114 self.lhs_syntax.map(|node| node.id()).hash(state);
115 self.rhs_syntax.map(|node| node.id()).hash(state);
116
117 self.lhs_parent_id.hash(state);
118 self.rhs_parent_id.hash(state);
119 can_pop_either_parent(&self.parents).hash(state);
120 }
121}
122
123#[derive(Clone, PartialEq)]
125enum EnteredDelimiter<'s, 'v> {
126 PopEither((Stack<'v, &'s Syntax<'s>>, Stack<'v, &'s Syntax<'s>>)),
131 PopBoth((&'s Syntax<'s>, &'s Syntax<'s>)),
139}
140
141impl fmt::Debug for EnteredDelimiter<'_, '_> {
142 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143 let desc = match self {
144 Self::PopEither((lhs_delims, rhs_delims)) => {
145 format!(
146 "PopEither(lhs count: {}, rhs count: {})",
147 lhs_delims.size(),
148 rhs_delims.size()
149 )
150 }
151 Self::PopBoth(_) => "PopBoth".to_owned(),
152 };
153 f.write_str(&desc)
154 }
155}
156
157fn push_both_delimiters<'s, 'v>(
158 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
159 lhs_delim: &'s Syntax<'s>,
160 rhs_delim: &'s Syntax<'s>,
161 alloc: &'v Bump,
162) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
163 entered.push(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim)), alloc)
164}
165
166fn can_pop_either_parent(entered: &Stack<EnteredDelimiter>) -> bool {
167 matches!(entered.peek(), Some(EnteredDelimiter::PopEither(_)))
168}
169
170fn try_pop_both<'s, 'v>(
171 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
172) -> Option<(
173 &'s Syntax<'s>,
174 &'s Syntax<'s>,
175 Stack<'v, EnteredDelimiter<'s, 'v>>,
176)> {
177 match entered.peek() {
178 Some(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) => {
179 Some((lhs_delim, rhs_delim, entered.pop().unwrap()))
180 }
181 _ => None,
182 }
183}
184
185fn try_pop_lhs<'s, 'v>(
186 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
187 alloc: &'v Bump,
188) -> Option<(&'s Syntax<'s>, Stack<'v, EnteredDelimiter<'s, 'v>>)> {
189 match entered.peek() {
190 Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match lhs_delims.peek() {
191 Some(lhs_delim) => {
192 let mut entered = entered.pop().unwrap();
193 let new_lhs_delims = lhs_delims.pop().unwrap();
194
195 if !new_lhs_delims.is_empty() || !rhs_delims.is_empty() {
196 entered = entered.push(
197 EnteredDelimiter::PopEither((new_lhs_delims, rhs_delims.clone())),
198 alloc,
199 );
200 }
201
202 Some((lhs_delim, entered))
203 }
204 None => None,
205 },
206 _ => None,
207 }
208}
209
210fn try_pop_rhs<'s, 'v>(
211 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
212 alloc: &'v Bump,
213) -> Option<(&'s Syntax<'s>, Stack<'v, EnteredDelimiter<'s, 'v>>)> {
214 match entered.peek() {
215 Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match rhs_delims.peek() {
216 Some(rhs_delim) => {
217 let mut entered = entered.pop().unwrap();
218 let new_rhs_delims = rhs_delims.pop().unwrap();
219
220 if !lhs_delims.is_empty() || !new_rhs_delims.is_empty() {
221 entered = entered.push(
222 EnteredDelimiter::PopEither((lhs_delims.clone(), new_rhs_delims)),
223 alloc,
224 );
225 }
226
227 Some((rhs_delim, entered))
228 }
229 None => None,
230 },
231 _ => None,
232 }
233}
234
235fn push_lhs_delimiter<'s, 'v>(
236 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
237 delimiter: &'s Syntax<'s>,
238 alloc: &'v Bump,
239) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
240 match entered.peek() {
241 Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push(
242 EnteredDelimiter::PopEither((lhs_delims.push(delimiter, alloc), rhs_delims.clone())),
243 alloc,
244 ),
245 _ => entered.push(
246 EnteredDelimiter::PopEither((Stack::new().push(delimiter, alloc), Stack::new())),
247 alloc,
248 ),
249 }
250}
251
252fn push_rhs_delimiter<'s, 'v>(
253 entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
254 delimiter: &'s Syntax<'s>,
255 alloc: &'v Bump,
256) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
257 match entered.peek() {
258 Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push(
259 EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter, alloc))),
260 alloc,
261 ),
262 _ => entered.push(
263 EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter, alloc))),
264 alloc,
265 ),
266 }
267}
268
269impl<'s, 'v> Vertex<'s, 'v> {
270 pub fn is_end(&self) -> bool {
271 self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty()
272 }
273
274 pub fn new(lhs_syntax: Option<&'s Syntax<'s>>, rhs_syntax: Option<&'s Syntax<'s>>) -> Self {
275 let parents = Stack::new();
276 Vertex {
277 neighbours: RefCell::new(None),
278 predecessor: Cell::new(None),
279 lhs_syntax,
280 rhs_syntax,
281 parents,
282 lhs_parent_id: None,
283 rhs_parent_id: None,
284 }
285 }
286}
287
288#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
296pub enum Edge {
297 UnchangedNode {
298 depth_difference: u32,
299 probably_punctuation: bool,
303 },
304 EnterUnchangedDelimiter {
305 depth_difference: u32,
306 },
307 ReplacedComment {
308 levenshtein_pct: u8,
309 },
310 ReplacedString {
311 levenshtein_pct: u8,
312 },
313 NovelAtomLHS {},
314 NovelAtomRHS {},
315 EnterNovelDelimiterLHS {},
318 EnterNovelDelimiterRHS {},
319}
320
321impl Edge {
322 pub fn cost(self) -> u32 {
323 match self {
324 UnchangedNode {
326 depth_difference,
327 probably_punctuation,
328 } => {
329 let base = min(40, depth_difference + 1);
336
337 base + if probably_punctuation { 200 } else { 0 }
351 }
352 EnterUnchangedDelimiter { depth_difference } => 100 + min(40, depth_difference),
354
355 NovelAtomLHS {} | NovelAtomRHS {} => 300,
357 EnterNovelDelimiterLHS { .. } | EnterNovelDelimiterRHS { .. } => 300,
358 ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => {
363 500 + u32::from(100 - levenshtein_pct)
364 }
365 }
366 }
367}
368
369fn allocate_if_new<'s, 'v>(
370 v: Vertex<'s, 'v>,
371 alloc: &'v Bump,
372 seen: &mut DftHashMap<&Vertex<'s, 'v>, SmallVec<[&'v Vertex<'s, 'v>; 2]>>,
373) -> &'v Vertex<'s, 'v> {
374 match seen.raw_entry_mut().from_key(&v) {
377 RawEntryMut::Occupied(mut occupied) => {
378 let existing = occupied.get_mut();
379
380 if let Some(allocated) = existing.last() {
383 if existing.len() >= 2 {
384 return allocated;
385 }
386 }
387
388 for existing_node in existing.iter() {
391 if existing_node.parents == v.parents {
392 return existing_node;
393 }
394 }
395
396 let allocated = alloc.alloc(v);
399 existing.push(allocated);
400 allocated
401 }
402 RawEntryMut::Vacant(vacant) => {
403 let allocated = alloc.alloc(v);
404
405 let existing: SmallVec<[&'v Vertex<'s, 'v>; 2]> = smallvec![&*allocated];
411
412 vacant.insert(allocated, existing);
413 allocated
414 }
415 }
416}
417
418fn looks_like_punctuation(node: &Syntax) -> bool {
423 match node {
424 Syntax::Atom { content, .. } => content == "," || content == ";" || content == ".",
425 _ => false,
426 }
427}
428
429type PoppedParents<'s, 'v> = (
432 Option<&'s Syntax<'s>>,
433 Option<&'s Syntax<'s>>,
434 Option<SyntaxId>,
435 Option<SyntaxId>,
436 Stack<'v, EnteredDelimiter<'s, 'v>>,
437);
438
439fn pop_all_parents<'s, 'v>(
440 lhs_node: Option<&'s Syntax<'s>>,
441 rhs_node: Option<&'s Syntax<'s>>,
442 lhs_parent_id: Option<SyntaxId>,
443 rhs_parent_id: Option<SyntaxId>,
444 parents: &Stack<'v, EnteredDelimiter<'s, 'v>>,
445 alloc: &'v Bump,
446) -> PoppedParents<'s, 'v> {
447 let mut lhs_node = lhs_node;
448 let mut rhs_node = rhs_node;
449 let mut lhs_parent_id = lhs_parent_id;
450 let mut rhs_parent_id = rhs_parent_id;
451 let mut parents = parents.clone();
452
453 loop {
454 if lhs_node.is_none() {
455 if let Some((lhs_parent, parents_next)) = try_pop_lhs(&parents, alloc) {
456 lhs_node = lhs_parent.next_sibling();
460 lhs_parent_id = lhs_parent.parent().map(Syntax::id);
461 parents = parents_next;
462 continue;
463 }
464 }
465
466 if rhs_node.is_none() {
467 if let Some((rhs_parent, parents_next)) = try_pop_rhs(&parents, alloc) {
468 rhs_node = rhs_parent.next_sibling();
472 rhs_parent_id = rhs_parent.parent().map(Syntax::id);
473 parents = parents_next;
474 continue;
475 }
476 }
477
478 if lhs_node.is_none() && rhs_node.is_none() {
479 if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) {
484 lhs_node = lhs_parent.next_sibling();
485 rhs_node = rhs_parent.next_sibling();
486 lhs_parent_id = lhs_parent.parent().map(Syntax::id);
487 rhs_parent_id = rhs_parent.parent().map(Syntax::id);
488 parents = parents_next;
489 continue;
490 }
491 }
492
493 break;
494 }
495
496 (lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents)
497}
498
499pub fn set_neighbours<'s, 'v>(
502 v: &Vertex<'s, 'v>,
503 alloc: &'v Bump,
504 seen: &mut DftHashMap<&Vertex<'s, 'v>, SmallVec<[&'v Vertex<'s, 'v>; 2]>>,
505) {
506 if v.neighbours.borrow().is_some() {
507 return;
508 }
509
510 let mut neighbours: Vec<(Edge, &Vertex)> = Vec::with_capacity(7);
512
513 if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
514 if lhs_syntax == rhs_syntax {
515 let depth_difference = (lhs_syntax.num_ancestors() as i32
516 - rhs_syntax.num_ancestors() as i32)
517 .unsigned_abs();
518
519 let probably_punctuation = looks_like_punctuation(lhs_syntax);
520
521 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents(
523 lhs_syntax.next_sibling(),
524 rhs_syntax.next_sibling(),
525 v.lhs_parent_id,
526 v.rhs_parent_id,
527 &v.parents,
528 alloc,
529 );
530
531 neighbours.push((
532 UnchangedNode {
533 depth_difference,
534 probably_punctuation,
535 },
536 allocate_if_new(
537 Vertex {
538 neighbours: RefCell::new(None),
539 predecessor: Cell::new(None),
540 lhs_syntax,
541 rhs_syntax,
542 parents,
543 lhs_parent_id,
544 rhs_parent_id,
545 },
546 alloc,
547 seen,
548 ),
549 ));
550 }
551
552 if let (
553 Syntax::List {
554 open_content: lhs_open_content,
555 close_content: lhs_close_content,
556 children: lhs_children,
557 ..
558 },
559 Syntax::List {
560 open_content: rhs_open_content,
561 close_content: rhs_close_content,
562 children: rhs_children,
563 ..
564 },
565 ) = (lhs_syntax, rhs_syntax)
566 {
567 if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content {
569 let lhs_next = lhs_children.first().copied();
570 let rhs_next = rhs_children.first().copied();
571
572 let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax, alloc);
574
575 let depth_difference = (lhs_syntax.num_ancestors() as i32
576 - rhs_syntax.num_ancestors() as i32)
577 .unsigned_abs();
578
579 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
582 pop_all_parents(
583 lhs_next,
584 rhs_next,
585 Some(lhs_syntax.id()),
586 Some(rhs_syntax.id()),
587 &parents_next,
588 alloc,
589 );
590
591 neighbours.push((
592 EnterUnchangedDelimiter { depth_difference },
593 allocate_if_new(
594 Vertex {
595 neighbours: RefCell::new(None),
596 predecessor: Cell::new(None),
597 lhs_syntax,
598 rhs_syntax,
599 parents,
600 lhs_parent_id,
601 rhs_parent_id,
602 },
603 alloc,
604 seen,
605 ),
606 ));
607 }
608 }
609
610 if let (
611 Syntax::Atom {
612 content: lhs_content,
613 kind: lhs_kind @ AtomKind::Comment | lhs_kind @ AtomKind::String(_),
614 ..
615 },
616 Syntax::Atom {
617 content: rhs_content,
618 kind: rhs_kind @ AtomKind::Comment | rhs_kind @ AtomKind::String(_),
619 ..
620 },
621 ) = (lhs_syntax, rhs_syntax)
622 {
623 if lhs_kind == rhs_kind && lhs_content != rhs_content {
626 let levenshtein_pct =
627 (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8;
628 let edge = if lhs_kind == &AtomKind::Comment {
629 ReplacedComment { levenshtein_pct }
630 } else {
631 ReplacedString { levenshtein_pct }
632 };
633
634 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
635 pop_all_parents(
636 lhs_syntax.next_sibling(),
637 rhs_syntax.next_sibling(),
638 v.lhs_parent_id,
639 v.rhs_parent_id,
640 &v.parents,
641 alloc,
642 );
643 neighbours.push((
644 edge,
645 allocate_if_new(
646 Vertex {
647 neighbours: RefCell::new(None),
648 predecessor: Cell::new(None),
649 lhs_syntax,
650 rhs_syntax,
651 parents,
652 lhs_parent_id,
653 rhs_parent_id,
654 },
655 alloc,
656 seen,
657 ),
658 ));
659 }
660 }
661 }
662
663 if let Some(lhs_syntax) = &v.lhs_syntax {
664 match lhs_syntax {
665 Syntax::Atom { .. } => {
667 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
668 pop_all_parents(
669 lhs_syntax.next_sibling(),
670 v.rhs_syntax,
671 v.lhs_parent_id,
672 v.rhs_parent_id,
673 &v.parents,
674 alloc,
675 );
676
677 neighbours.push((
678 NovelAtomLHS {},
679 allocate_if_new(
680 Vertex {
681 neighbours: RefCell::new(None),
682 predecessor: Cell::new(None),
683 lhs_syntax,
684 rhs_syntax,
685 parents,
686 lhs_parent_id,
687 rhs_parent_id,
688 },
689 alloc,
690 seen,
691 ),
692 ));
693 }
694 Syntax::List { children, .. } => {
696 let lhs_next = children.first().copied();
697
698 let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax, alloc);
699
700 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
701 pop_all_parents(
702 lhs_next,
703 v.rhs_syntax,
704 Some(lhs_syntax.id()),
705 v.rhs_parent_id,
706 &parents_next,
707 alloc,
708 );
709
710 neighbours.push((
711 EnterNovelDelimiterLHS {},
712 allocate_if_new(
713 Vertex {
714 neighbours: RefCell::new(None),
715 predecessor: Cell::new(None),
716 lhs_syntax,
717 rhs_syntax,
718 parents,
719 lhs_parent_id,
720 rhs_parent_id,
721 },
722 alloc,
723 seen,
724 ),
725 ));
726 }
727 }
728 }
729
730 if let Some(rhs_syntax) = &v.rhs_syntax {
731 match rhs_syntax {
732 Syntax::Atom { .. } => {
734 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
735 pop_all_parents(
736 v.lhs_syntax,
737 rhs_syntax.next_sibling(),
738 v.lhs_parent_id,
739 v.rhs_parent_id,
740 &v.parents,
741 alloc,
742 );
743
744 neighbours.push((
745 NovelAtomRHS {},
746 allocate_if_new(
747 Vertex {
748 neighbours: RefCell::new(None),
749 predecessor: Cell::new(None),
750 lhs_syntax,
751 rhs_syntax,
752 parents,
753 lhs_parent_id,
754 rhs_parent_id,
755 },
756 alloc,
757 seen,
758 ),
759 ));
760 }
761 Syntax::List { children, .. } => {
763 let rhs_next = children.first().copied();
764 let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax, alloc);
765
766 let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
767 pop_all_parents(
768 v.lhs_syntax,
769 rhs_next,
770 v.lhs_parent_id,
771 Some(rhs_syntax.id()),
772 &parents_next,
773 alloc,
774 );
775
776 neighbours.push((
777 EnterNovelDelimiterRHS {},
778 allocate_if_new(
779 Vertex {
780 neighbours: RefCell::new(None),
781 predecessor: Cell::new(None),
782 lhs_syntax,
783 rhs_syntax,
784 parents,
785 lhs_parent_id,
786 rhs_parent_id,
787 },
788 alloc,
789 seen,
790 ),
791 ));
792 }
793 }
794 }
795 assert!(
796 !neighbours.is_empty(),
797 "Must always find some next steps if node is not the end"
798 );
799
800 v.neighbours
801 .replace(Some(alloc.alloc_slice_copy(neighbours.as_slice())));
802}
803
804pub fn populate_change_map<'s, 'v>(
805 route: &[(Edge, &'v Vertex<'s, 'v>)],
806 change_map: &mut ChangeMap<'s>,
807) {
808 for (e, v) in route {
809 match e {
810 UnchangedNode { .. } => {
811 let lhs = v.lhs_syntax.unwrap();
813 let rhs = v.rhs_syntax.unwrap();
814
815 insert_deep_unchanged(lhs, rhs, change_map);
816 insert_deep_unchanged(rhs, lhs, change_map);
817 }
818 EnterUnchangedDelimiter { .. } => {
819 let lhs = v.lhs_syntax.unwrap();
822 let rhs = v.rhs_syntax.unwrap();
823 change_map.insert(lhs, ChangeKind::Unchanged(rhs));
824 change_map.insert(rhs, ChangeKind::Unchanged(lhs));
825 }
826 ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => {
827 let lhs = v.lhs_syntax.unwrap();
828 let rhs = v.rhs_syntax.unwrap();
829 let change_kind = |first, second| {
830 if let ReplacedComment { .. } = e {
831 ChangeKind::ReplacedComment(first, second)
832 } else {
833 ChangeKind::ReplacedString(first, second)
834 }
835 };
836
837 if *levenshtein_pct > 20 {
842 change_map.insert(lhs, change_kind(lhs, rhs));
843 change_map.insert(rhs, change_kind(rhs, lhs));
844 } else {
845 change_map.insert(lhs, ChangeKind::Novel);
846 change_map.insert(rhs, ChangeKind::Novel);
847 }
848 }
849 NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => {
850 let lhs = v.lhs_syntax.unwrap();
851 change_map.insert(lhs, ChangeKind::Novel);
852 }
853 NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => {
854 let rhs = v.rhs_syntax.unwrap();
855 change_map.insert(rhs, ChangeKind::Novel);
856 }
857 }
858 }
859}