biome_rowan/ast/
batch.rs

1use crate::syntax::SyntaxKind;
2use crate::{
3    chain_trivia_pieces, AstNode, Language, SyntaxElement, SyntaxNode, SyntaxSlot, SyntaxToken,
4};
5use biome_text_edit::{TextEdit, TextEditBuilder};
6use biome_text_size::TextRange;
7use std::{
8    cmp,
9    collections::BinaryHeap,
10    iter::{empty, once},
11};
12use tracing::debug;
13
14pub trait BatchMutationExt<L>: AstNode<Language = L>
15where
16    L: Language,
17{
18    /// It starts a [BatchMutation]
19    #[must_use = "This method consumes the node and return the BatchMutation api that returns the new SyntaxNode on commit"]
20    fn begin(self) -> BatchMutation<L>;
21}
22
23impl<L, T> BatchMutationExt<L> for T
24where
25    L: Language,
26    T: AstNode<Language = L>,
27{
28    #[must_use = "This method consumes the node and return the BatchMutation api that returns the new SyntaxNode on commit"]
29    fn begin(self) -> BatchMutation<L> {
30        BatchMutation::new(self.into_syntax())
31    }
32}
33
34/// Stores the changes internally used by the [BatchMutation::commit] algorithm.
35/// It needs to be sorted by depth in decreasing order, then by range start and
36/// by slot in increasing order.
37///
38/// This is necesasry so we can aggregate all changes to the same node using "peek".
39#[derive(Debug, Clone)]
40struct CommitChange<L: Language> {
41    parent_depth: usize,
42    parent: Option<SyntaxNode<L>>,
43    parent_range: Option<(u32, u32)>,
44    new_node_slot: usize,
45    new_node: Option<SyntaxElement<L>>,
46    is_from_action: bool,
47}
48
49impl<L: Language> CommitChange<L> {
50    /// Returns the "ordering key" for a change, controlling in what order this
51    /// change will be applied relatively to other changes. The key consists of
52    /// a tuple of numeric values representing the depth, parent start and slot
53    /// index of the corresponding change.
54    ///
55    /// So, we order first by depth. Then by the range of the node. Then by the
56    /// slot index of the node.
57    ///
58    /// The first is important to guarantee that all nodes that will be changed
59    /// in the future are still valid with using SyntaxNode that we have.
60    ///
61    /// The second and third are essential to guarantee that the ".peek()" we do
62    /// below is sufficient to see the same node in case of two or more nodes
63    /// having the same parent.
64    ///
65    /// All of them will be prioritized in the descending order in a binary heap
66    /// to ensure one change won't invalidate its following changes.
67    fn key(&self) -> (usize, u32, usize) {
68        (
69            self.parent_depth,
70            self.parent_range.map(|(start, _)| start).unwrap_or(0),
71            self.new_node_slot,
72        )
73    }
74}
75
76impl<L: Language> PartialEq for CommitChange<L> {
77    fn eq(&self, other: &Self) -> bool {
78        self.key() == other.key()
79    }
80}
81impl<L: Language> Eq for CommitChange<L> {}
82
83impl<L: Language> PartialOrd for CommitChange<L> {
84    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
85        Some(self.cmp(other))
86    }
87}
88impl<L: Language> Ord for CommitChange<L> {
89    fn cmp(&self, other: &Self) -> cmp::Ordering {
90        self.key().cmp(&other.key())
91    }
92}
93
94#[derive(Debug, Clone)]
95pub struct BatchMutation<L>
96where
97    L: Language,
98{
99    root: SyntaxNode<L>,
100    changes: BinaryHeap<CommitChange<L>>,
101}
102
103impl<L> BatchMutation<L>
104where
105    L: Language,
106{
107    pub fn new(root: SyntaxNode<L>) -> Self {
108        Self {
109            root,
110            changes: BinaryHeap::new(),
111        }
112    }
113
114    /// Push a change to replace the "prev_node" with "next_node".
115    /// Trivia from "prev_node" is automatically copied to "next_node".
116    ///
117    /// Changes to take effect must be committed.
118    pub fn replace_node<T>(&mut self, prev_node: T, next_node: T)
119    where
120        T: AstNode<Language = L>,
121    {
122        self.replace_element(
123            prev_node.into_syntax().into(),
124            next_node.into_syntax().into(),
125        )
126    }
127
128    /// Push a change to replace the "prev_token" with "next_token".
129    /// Trivia from "prev_token" is automatically copied to "next_token".
130    ///
131    /// Changes to take effect must be committed.
132    pub fn replace_token(&mut self, prev_token: SyntaxToken<L>, next_token: SyntaxToken<L>) {
133        self.replace_element(prev_token.into(), next_token.into())
134    }
135
136    /// Push a change to replace the "prev_element" with "next_element".
137    /// Trivia from "prev_element" is automatically copied to "next_element".
138    ///
139    /// Changes to take effect must be committed.
140    pub fn replace_element(
141        &mut self,
142        prev_element: SyntaxElement<L>,
143        next_element: SyntaxElement<L>,
144    ) {
145        let (prev_leading_trivia, prev_trailing_trivia) = match &prev_element {
146            SyntaxElement::Node(node) => (
147                node.first_token().map(|token| token.leading_trivia()),
148                node.last_token().map(|token| token.trailing_trivia()),
149            ),
150            SyntaxElement::Token(token) => {
151                (Some(token.leading_trivia()), Some(token.trailing_trivia()))
152            }
153        };
154
155        let next_element = match next_element {
156            SyntaxElement::Node(mut node) => {
157                if let Some(token) = node.first_token() {
158                    let new_token = match prev_leading_trivia {
159                        Some(prev_leading_trivia) => {
160                            token.with_leading_trivia_pieces(prev_leading_trivia.pieces())
161                        }
162                        None => token.with_leading_trivia_pieces(empty()),
163                    };
164
165                    node = node.replace_child(token.into(), new_token.into()).unwrap();
166                }
167
168                if let Some(token) = node.last_token() {
169                    let new_token = match prev_trailing_trivia {
170                        Some(prev_trailing_trivia) => {
171                            token.with_trailing_trivia_pieces(prev_trailing_trivia.pieces())
172                        }
173                        None => token.with_trailing_trivia_pieces([]),
174                    };
175
176                    node = node.replace_child(token.into(), new_token.into()).unwrap();
177                }
178
179                SyntaxElement::Node(node)
180            }
181            SyntaxElement::Token(token) => {
182                let new_token = match prev_leading_trivia {
183                    Some(prev_leading_trivia) => {
184                        token.with_leading_trivia_pieces(prev_leading_trivia.pieces())
185                    }
186                    None => token.with_leading_trivia_pieces([]),
187                };
188
189                let new_token = match prev_trailing_trivia {
190                    Some(prev_trailing_trivia) => {
191                        new_token.with_trailing_trivia_pieces(prev_trailing_trivia.pieces())
192                    }
193                    None => new_token.with_trailing_trivia_pieces([]),
194                };
195                SyntaxElement::Token(new_token)
196            }
197        };
198
199        self.push_change(prev_element, Some(next_element))
200    }
201
202    /// Push a change to replace the "prev_node" with "next_node".
203    ///
204    /// Changes to take effect must be committed.
205    pub fn replace_node_discard_trivia<T>(&mut self, prev_node: T, next_node: T)
206    where
207        T: AstNode<Language = L>,
208    {
209        self.replace_element_discard_trivia(
210            prev_node.into_syntax().into(),
211            next_node.into_syntax().into(),
212        )
213    }
214
215    /// Push a change to replace the "prev_token" with "next_token".
216    ///
217    /// Changes to take effect must be committed.
218    pub fn replace_token_discard_trivia(
219        &mut self,
220        prev_token: SyntaxToken<L>,
221        next_token: SyntaxToken<L>,
222    ) {
223        self.replace_element_discard_trivia(prev_token.into(), next_token.into())
224    }
225
226    /// Push a change to replace the "prev_token" with "next_token".
227    ///
228    /// - leading trivia of `prev_token`
229    /// - leading trivia of `next_token`
230    /// - trailing trivia of `prev_token`
231    /// - trailing trivia of `next_token`
232    pub fn replace_token_transfer_trivia(
233        &mut self,
234        prev_token: SyntaxToken<L>,
235        next_token: SyntaxToken<L>,
236    ) {
237        let leading_trivia = chain_trivia_pieces(
238            prev_token.leading_trivia().pieces(),
239            next_token.leading_trivia().pieces(),
240        );
241
242        let trailing_trivia = chain_trivia_pieces(
243            prev_token.trailing_trivia().pieces(),
244            next_token.trailing_trivia().pieces(),
245        );
246        let new_token = next_token
247            .with_leading_trivia_pieces(leading_trivia)
248            .with_trailing_trivia_pieces(trailing_trivia);
249
250        self.replace_token_discard_trivia(prev_token, new_token)
251    }
252
253    /// Push a change to replace the "prev_element" with "next_element".
254    ///
255    /// Changes to take effect must be committed.
256    pub fn replace_element_discard_trivia(
257        &mut self,
258        prev_element: SyntaxElement<L>,
259        next_element: SyntaxElement<L>,
260    ) {
261        self.push_change(prev_element, Some(next_element))
262    }
263
264    /// Push a change to remove the specified token.
265    ///
266    /// Changes to take effect must be committed.
267    pub fn remove_token(&mut self, prev_token: SyntaxToken<L>) {
268        self.remove_element(prev_token.into())
269    }
270
271    /// Push a change to remove the specified node.
272    ///
273    /// Changes to take effect must be committed.
274    pub fn remove_node<T>(&mut self, prev_node: T)
275    where
276        T: AstNode<Language = L>,
277    {
278        self.remove_element(prev_node.into_syntax().into())
279    }
280
281    /// Push a change to remove the specified element.
282    ///
283    /// Changes to take effect must be committed.
284    pub fn remove_element(&mut self, prev_element: SyntaxElement<L>) {
285        self.push_change(prev_element, None)
286    }
287
288    fn push_change(
289        &mut self,
290        prev_element: SyntaxElement<L>,
291        next_element: Option<SyntaxElement<L>>,
292    ) {
293        let new_node_slot = prev_element.index();
294        let parent = prev_element.parent();
295        let parent_range: Option<(u32, u32)> = parent.as_ref().map(|p| {
296            let range = p.text_range();
297            (range.start().into(), range.end().into())
298        });
299        let parent_depth = parent.as_ref().map(|p| p.ancestors().count()).unwrap_or(0);
300
301        debug!("pushing change...");
302        self.changes.push(CommitChange {
303            parent_depth,
304            parent,
305            parent_range,
306            new_node_slot,
307            new_node: next_element,
308            is_from_action: true,
309        });
310    }
311
312    /// Returns the range of the document modified by this mutation along with
313    /// a list of individual text edits to be performed on the source code, or
314    /// [None] if the mutation is empty
315    ///
316    /// If the new tree is also required,
317    /// please use `commit_with_text_range_and_edit`
318    pub fn as_text_range_and_edit(self) -> Option<(TextRange, TextEdit)> {
319        self.commit_with_text_range_and_edit(true).1
320    }
321
322    /// Returns the new tree with all commit changes applied.
323    ///
324    /// If the text range and text edit are also required,
325    /// please use `commit_with_text_range_and_edit`
326    pub fn commit(self) -> SyntaxNode<L> {
327        self.commit_with_text_range_and_edit(false).0
328    }
329
330    /// The core of the batch mutation algorithm can be summarized as:
331    ///
332    /// 1. Iterate all requested changes;
333    /// 2. Insert them into a heap (priority queue) by depth. Deeper changes are done first;
334    /// 3. Loop popping requested changes from the heap, taking the deepest change we have for the moment;
335    /// 4. Each requested change has a "parent", an "index" and the "new node" (or None);
336    /// 5. Clone the current parent's "parent", the "grandparent";
337    /// 6. Detach the current "parent" from the tree;
338    /// 7. Replace the old node at "index" at the current "parent" with the current "new node";
339    /// 8. Insert into the heap the grandparent as the parent and the current "parent" as the "new node";
340    ///
341    /// This is the simple case. The algorithm also has a more complex case when to changes have a common ancestor,
342    /// which can actually be one of the changed nodes.
343    ///
344    /// To address this case at step 3, when we pop a new change to apply it, we actually aggregate all changes to the current
345    /// parent together. This is done by the heap because we also sort by node and it's range.
346    ///
347    /// Text range and text edit can be collected simultaneously while committing if "with_text_range_and_edit" is true.
348    /// They're directly calculated from the commit changes. So you can commit and get text range and text edit in one pass.
349    ///
350    /// The calculation of text range and text edit can be summarized as:
351    ///
352    /// While we popping requested changes from the heap, collect the "deleted_text_range" and "optional_inserted_text"
353    /// into an ordered vector "text_mutation_list" sorted by the "deleted_text_range". The reason behind it is that
354    /// changes on the heap are first ordered by parent depth, but we need to construct the TextEdit from start to end.
355    /// So we use binary search and insertion to populate the "text_mutation_list". Reaching the root node means all
356    /// changes have been visted. So we start to construct the TextEdit with the help of "text_edit_builder" by iterating
357    /// the collected "text_mutation_list".
358    pub fn commit_with_text_range_and_edit(
359        self,
360        with_text_range_and_edit: bool,
361    ) -> (SyntaxNode<L>, Option<(TextRange, TextEdit)>) {
362        let BatchMutation { root, mut changes } = self;
363
364        // Ordered text mutation list sorted by text range
365        let mut text_mutation_list: Vec<(TextRange, Option<String>)> =
366            // SAFETY: this is safe bacause changes from actions can only
367            // overwrite each other, so the total number of the finalized
368            // text mutations will only be less.
369            Vec::with_capacity(changes.len());
370
371        // Collect all commit changes
372        while let Some(CommitChange {
373            new_node: curr_new_node,
374            new_node_slot: curr_new_node_slot,
375            parent: curr_parent,
376            parent_depth: curr_parent_depth,
377            is_from_action: curr_is_from_action,
378            ..
379        }) = changes.pop()
380        {
381            if let Some(curr_parent) = curr_parent {
382                // This must be done before the detachment below
383                // because we need nodes that are still valid in the old tree
384                let curr_grand_parent = curr_parent.parent();
385                let curr_grand_parent_range = curr_grand_parent.as_ref().map(|g| {
386                    let range = g.text_range();
387                    (range.start().into(), range.end().into())
388                });
389                let curr_parent_slot = curr_parent.index();
390
391                // Aggregate all modifications to the current parent
392                // This works because of the Ord we defined in the [CommitChange] struct
393                let mut modifications =
394                    vec![(curr_new_node_slot, curr_new_node, curr_is_from_action)];
395
396                while changes
397                    .peek()
398                    .and_then(|c| c.parent.as_ref())
399                    .map_or(false, |p| *p == curr_parent)
400                {
401                    // SAFETY: We can .pop().unwrap() because we .peek() above
402                    let CommitChange {
403                        new_node: next_new_node,
404                        new_node_slot: next_new_node_slot,
405                        is_from_action: next_is_from_action,
406                        ..
407                    } = changes.pop().expect("changes.pop");
408
409                    // If we have two modifications to the same slot,
410                    // last write wins
411                    if let Some(&(prev_new_node_slot, ..)) = modifications.last() {
412                        if prev_new_node_slot == next_new_node_slot {
413                            modifications.pop();
414                        }
415                    }
416
417                    // Add to the modifications
418                    modifications.push((next_new_node_slot, next_new_node, next_is_from_action));
419                }
420
421                // Collect text mutations, this has to be done before the detach below,
422                // or we'll lose the "deleted_text_range" info
423                if with_text_range_and_edit {
424                    for (new_node_slot, new_node, is_from_action) in &modifications {
425                        if !is_from_action {
426                            continue;
427                        }
428                        let deleted_text_range = match curr_parent.slots().nth(*new_node_slot) {
429                            Some(SyntaxSlot::Node(node)) => node.text_range(),
430                            Some(SyntaxSlot::Token(token)) => token.text_range(),
431                            Some(SyntaxSlot::Empty { index }) => {
432                                TextRange::new(index.into(), index.into())
433                            }
434                            None => continue,
435                        };
436                        let optional_inserted_text = new_node.as_ref().map(|n| n.to_string());
437
438                        // We use binary search to keep the text mutations in order
439                        match text_mutation_list
440                            .binary_search_by(|(range, _)| range.ordering(deleted_text_range))
441                        {
442                            // Overwrite the text mutation with an overlapping text range
443                            Ok(pos) => {
444                                text_mutation_list[pos] =
445                                    (deleted_text_range, optional_inserted_text)
446                            }
447                            // Insert the text mutation at the correct position
448                            Err(pos) => text_mutation_list
449                                .insert(pos, (deleted_text_range, optional_inserted_text)),
450                        }
451                    }
452                }
453
454                // Now we detach the current parent, commit all the modifications
455                // and push a pending change to its parent
456                let mut current_parent = curr_parent.detach();
457                let is_list = current_parent.kind().is_list();
458                for (new_node_slot, new_node, ..) in modifications {
459                    current_parent = if is_list && new_node.is_none() {
460                        current_parent.splice_slots(new_node_slot..=new_node_slot, empty())
461                    } else {
462                        current_parent.splice_slots(new_node_slot..=new_node_slot, once(new_node))
463                    }
464                }
465
466                changes.push(CommitChange {
467                    parent_depth: curr_parent_depth - 1,
468                    parent: curr_grand_parent,
469                    parent_range: curr_grand_parent_range,
470                    new_node_slot: curr_parent_slot,
471                    new_node: Some(SyntaxElement::Node(current_parent)),
472                    is_from_action: false,
473                });
474            }
475            // If parent is None, we reached the document root
476            else {
477                let optional_text_range_and_edit = if with_text_range_and_edit {
478                    // The root of batch mutation is not necessarily
479                    // the document root in some rule actions,
480                    // so we need to find the actual document root
481                    let mut document_root = root;
482                    while let Some(parent) = document_root.parent() {
483                        document_root = parent;
484                    }
485
486                    if curr_is_from_action {
487                        text_mutation_list = vec![(
488                            document_root.text_range(),
489                            Some(
490                                curr_new_node
491                                    .as_ref()
492                                    .map_or(String::new(), |n| n.to_string()),
493                            ),
494                        )];
495                    }
496
497                    // Build text range and text edit from the text mutation list
498                    let root_string = document_root.to_string();
499                    let mut text_range = TextRange::default();
500                    let mut text_edit_builder = TextEditBuilder::default();
501
502                    let mut pointer: usize = 0;
503                    for (deleted_text_range, optional_inserted_text) in text_mutation_list {
504                        if let (Ok(range_start), Ok(range_end)) = (
505                            usize::try_from(u32::from(deleted_text_range.start())),
506                            usize::try_from(u32::from(deleted_text_range.end())),
507                        ) {
508                            text_range = text_range.cover(deleted_text_range);
509                            if range_start > pointer {
510                                text_edit_builder.equal(&root_string[pointer..range_start]);
511                            }
512
513                            let old = &root_string[range_start..range_end];
514                            let new = &optional_inserted_text.map_or(String::new(), |t| t);
515
516                            text_edit_builder.with_unicode_words_diff(old, new);
517
518                            pointer = range_end;
519                        }
520                    }
521                    let end_pos = root_string.len();
522                    if end_pos > pointer {
523                        text_edit_builder.equal(&root_string[pointer..end_pos]);
524                    }
525
526                    let text_edit = text_edit_builder.finish();
527
528                    Some((text_range, text_edit))
529                } else {
530                    None
531                };
532
533                return (
534                    // SAFETY: If the change is propagated from the child,
535                    // this will allways be a syntax node element because
536                    // that's how we construct it above.
537                    //
538                    // Otherwise root should still exist as a node even if
539                    // the code is to be transformed to an empty string.
540                    curr_new_node
541                        .expect("expected root to exist")
542                        .into_node()
543                        .expect("expected root to be a node and not a token"),
544                    optional_text_range_and_edit,
545                );
546            }
547        }
548
549        (root, None)
550    }
551
552    pub fn root(&self) -> &SyntaxNode<L> {
553        &self.root
554    }
555}
556
557#[cfg(test)]
558pub mod test {
559    use crate::{
560        raw_language::{LiteralExpression, RawLanguageKind, RawLanguageRoot, RawSyntaxTreeBuilder},
561        AstNode, BatchMutationExt, SyntaxNodeCast,
562    };
563
564    /// ```
565    /// 0: ROOT@0..1
566    ///     0: LITERAL_EXPRESSION@0..1
567    ///         0: STRING_TOKEN@0..1 "a" [] []
568    /// ```
569    fn tree_one(a: &str) -> (RawLanguageRoot, String) {
570        let mut builder = RawSyntaxTreeBuilder::new();
571        builder
572            .start_node(RawLanguageKind::ROOT)
573            .start_node(RawLanguageKind::LITERAL_EXPRESSION)
574            .token(RawLanguageKind::STRING_TOKEN, a)
575            .finish_node()
576            .finish_node();
577        let root = builder.finish().cast::<RawLanguageRoot>().unwrap();
578        let s = format!("{:#?}", root.syntax());
579        (root, s)
580    }
581
582    /// ```
583    /// 0: ROOT@0..1
584    ///     0: LITERAL_EXPRESSION@0..1
585    ///         0: STRING_TOKEN@0..1 "a" [] []
586    ///     1: LITERAL_EXPRESSION@0..1
587    ///         0: STRING_TOKEN@0..1 "b" [] []
588    /// ```
589    fn tree_two(a: &str, b: &str) -> (RawLanguageRoot, String) {
590        let mut builder = RawSyntaxTreeBuilder::new();
591        builder
592            .start_node(RawLanguageKind::ROOT)
593            .start_node(RawLanguageKind::LITERAL_EXPRESSION)
594            .token(RawLanguageKind::STRING_TOKEN, a)
595            .finish_node()
596            .start_node(RawLanguageKind::LITERAL_EXPRESSION)
597            .token(RawLanguageKind::STRING_TOKEN, b)
598            .finish_node()
599            .finish_node();
600        let root = builder.finish().cast::<RawLanguageRoot>().unwrap();
601        let s = format!("{:#?}", root.syntax());
602        (root, s)
603    }
604
605    fn find(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
606        root.syntax()
607            .descendants()
608            .find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
609            .unwrap()
610            .cast::<LiteralExpression>()
611            .unwrap()
612    }
613
614    fn clone_detach(root: &RawLanguageRoot, name: &str) -> LiteralExpression {
615        root.syntax()
616            .descendants()
617            .find(|x| x.kind() == RawLanguageKind::LITERAL_EXPRESSION && x.text_trimmed() == name)
618            .unwrap()
619            .detach()
620            .cast::<LiteralExpression>()
621            .unwrap()
622    }
623
624    #[test]
625    pub fn ok_batch_mutation_no_changes() {
626        let (before, before_debug) = tree_one("a");
627
628        let batch = before.begin();
629        let after = batch.commit();
630
631        assert_eq!(before_debug, format!("{after:#?}"));
632    }
633
634    #[test]
635    pub fn ok_batch_mutation_one_change() {
636        let (before, _) = tree_one("a");
637        let (expected, expected_debug) = tree_one("b");
638
639        let a = find(&before, "a");
640        let b = clone_detach(&expected, "b");
641
642        let mut batch = before.begin();
643        batch.replace_node(a, b);
644        let root = batch.commit();
645
646        assert_eq!(expected_debug, format!("{root:#?}"));
647    }
648
649    #[test]
650    pub fn ok_batch_mutation_multiple_changes_different_branches() {
651        let (before, _) = tree_two("a", "b");
652        let (expected, expected_debug) = tree_two("c", "d");
653
654        let a = find(&before, "a");
655        let b = find(&before, "b");
656        let c = clone_detach(&expected, "c");
657        let d = clone_detach(&expected, "d");
658
659        let mut batch = before.begin();
660        batch.replace_node(a, c);
661        batch.replace_node(b, d);
662        let after = batch.commit();
663
664        assert_eq!(expected_debug, format!("{after:#?}"));
665    }
666}