tree-sitter 0.20.10

Rust bindings to the Tree-sitter parsing library
Documentation
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include <assert.h>

// #define DEBUG_GET_CHANGED_RANGES

static void ts_range_array_add(
  TSRangeArray *self,
  Length start,
  Length end
) {
  if (self->size > 0) {
    TSRange *last_range = array_back(self);
    if (start.bytes <= last_range->end_byte) {
      last_range->end_byte = end.bytes;
      last_range->end_point = end.extent;
      return;
    }
  }

  if (start.bytes < end.bytes) {
    TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
    array_push(self, range);
  }
}

bool ts_range_array_intersects(
  const TSRangeArray *self,
  unsigned start_index,
  uint32_t start_byte,
  uint32_t end_byte
) {
  for (unsigned i = start_index; i < self->size; i++) {
    TSRange *range = &self->contents[i];
    if (range->end_byte > start_byte) {
      if (range->start_byte >= end_byte) break;
      return true;
    }
  }
  return false;
}

void ts_range_array_get_changed_ranges(
  const TSRange *old_ranges, unsigned old_range_count,
  const TSRange *new_ranges, unsigned new_range_count,
  TSRangeArray *differences
) {
  unsigned new_index = 0;
  unsigned old_index = 0;
  Length current_position = length_zero();
  bool in_old_range = false;
  bool in_new_range = false;

  while (old_index < old_range_count || new_index < new_range_count) {
    const TSRange *old_range = &old_ranges[old_index];
    const TSRange *new_range = &new_ranges[new_index];

    Length next_old_position;
    if (in_old_range) {
      next_old_position = (Length) {old_range->end_byte, old_range->end_point};
    } else if (old_index < old_range_count) {
      next_old_position = (Length) {old_range->start_byte, old_range->start_point};
    } else {
      next_old_position = LENGTH_MAX;
    }

    Length next_new_position;
    if (in_new_range) {
      next_new_position = (Length) {new_range->end_byte, new_range->end_point};
    } else if (new_index < new_range_count) {
      next_new_position = (Length) {new_range->start_byte, new_range->start_point};
    } else {
      next_new_position = LENGTH_MAX;
    }

    if (next_old_position.bytes < next_new_position.bytes) {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_old_position);
      }
      if (in_old_range) old_index++;
      current_position = next_old_position;
      in_old_range = !in_old_range;
    } else if (next_new_position.bytes < next_old_position.bytes) {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_new_position);
      }
      if (in_new_range) new_index++;
      current_position = next_new_position;
      in_new_range = !in_new_range;
    } else {
      if (in_old_range != in_new_range) {
        ts_range_array_add(differences, current_position, next_new_position);
      }
      if (in_old_range) old_index++;
      if (in_new_range) new_index++;
      in_old_range = !in_old_range;
      in_new_range = !in_new_range;
      current_position = next_new_position;
    }
  }
}

typedef struct {
  TreeCursor cursor;
  const TSLanguage *language;
  unsigned visible_depth;
  bool in_padding;
} Iterator;

static Iterator iterator_new(
  TreeCursor *cursor,
  const Subtree *tree,
  const TSLanguage *language
) {
  array_clear(&cursor->stack);
  array_push(&cursor->stack, ((TreeCursorEntry) {
    .subtree = tree,
    .position = length_zero(),
    .child_index = 0,
    .structural_child_index = 0,
  }));
  return (Iterator) {
    .cursor = *cursor,
    .language = language,
    .visible_depth = 1,
    .in_padding = false,
  };
}

static bool iterator_done(Iterator *self) {
  return self->cursor.stack.size == 0;
}

static Length iterator_start_position(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  if (self->in_padding) {
    return entry.position;
  } else {
    return length_add(entry.position, ts_subtree_padding(*entry.subtree));
  }
}

static Length iterator_end_position(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
  if (self->in_padding) {
    return result;
  } else {
    return length_add(result, ts_subtree_size(*entry.subtree));
  }
}

static bool iterator_tree_is_visible(const Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  if (ts_subtree_visible(*entry.subtree)) return true;
  if (self->cursor.stack.size > 1) {
    Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
    return ts_language_alias_at(
      self->language,
      parent.ptr->production_id,
      entry.structural_child_index
    ) != 0;
  }
  return false;
}

static void iterator_get_visible_state(
  const Iterator *self,
  Subtree *tree,
  TSSymbol *alias_symbol,
  uint32_t *start_byte
) {
  uint32_t i = self->cursor.stack.size - 1;

  if (self->in_padding) {
    if (i == 0) return;
    i--;
  }

  for (; i + 1 > 0; i--) {
    TreeCursorEntry entry = self->cursor.stack.contents[i];

    if (i > 0) {
      const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
      *alias_symbol = ts_language_alias_at(
        self->language,
        parent->ptr->production_id,
        entry.structural_child_index
      );
    }

    if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
      *tree = *entry.subtree;
      *start_byte = entry.position.bytes;
      break;
    }
  }
}

static void iterator_ascend(Iterator *self) {
  if (iterator_done(self)) return;
  if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
  if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
  self->cursor.stack.size--;
}

static bool iterator_descend(Iterator *self, uint32_t goal_position) {
  if (self->in_padding) return false;

  bool did_descend;
  do {
    did_descend = false;
    TreeCursorEntry entry = *array_back(&self->cursor.stack);
    Length position = entry.position;
    uint32_t structural_child_index = 0;
    for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
      const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
      Length child_left = length_add(position, ts_subtree_padding(*child));
      Length child_right = length_add(child_left, ts_subtree_size(*child));

      if (child_right.bytes > goal_position) {
        array_push(&self->cursor.stack, ((TreeCursorEntry) {
          .subtree = child,
          .position = position,
          .child_index = i,
          .structural_child_index = structural_child_index,
        }));

        if (iterator_tree_is_visible(self)) {
          if (child_left.bytes > goal_position) {
            self->in_padding = true;
          } else {
            self->visible_depth++;
          }
          return true;
        }

        did_descend = true;
        break;
      }

      position = child_right;
      if (!ts_subtree_extra(*child)) structural_child_index++;
    }
  } while (did_descend);

  return false;
}

static void iterator_advance(Iterator *self) {
  if (self->in_padding) {
    self->in_padding = false;
    if (iterator_tree_is_visible(self)) {
      self->visible_depth++;
    } else {
      iterator_descend(self, 0);
    }
    return;
  }

  for (;;) {
    if (iterator_tree_is_visible(self)) self->visible_depth--;
    TreeCursorEntry entry = array_pop(&self->cursor.stack);
    if (iterator_done(self)) return;

    const Subtree *parent = array_back(&self->cursor.stack)->subtree;
    uint32_t child_index = entry.child_index + 1;
    if (ts_subtree_child_count(*parent) > child_index) {
      Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
      uint32_t structural_child_index = entry.structural_child_index;
      if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
      const Subtree *next_child = &ts_subtree_children(*parent)[child_index];

      array_push(&self->cursor.stack, ((TreeCursorEntry) {
        .subtree = next_child,
        .position = position,
        .child_index = child_index,
        .structural_child_index = structural_child_index,
      }));

      if (iterator_tree_is_visible(self)) {
        if (ts_subtree_padding(*next_child).bytes > 0) {
          self->in_padding = true;
        } else {
          self->visible_depth++;
        }
      } else {
        iterator_descend(self, 0);
      }
      break;
    }
  }
}

typedef enum {
  IteratorDiffers,
  IteratorMayDiffer,
  IteratorMatches,
} IteratorComparison;

static IteratorComparison iterator_compare(
  const Iterator *old_iter,
  const Iterator *new_iter
) {
  Subtree old_tree = NULL_SUBTREE;
  Subtree new_tree = NULL_SUBTREE;
  uint32_t old_start = 0;
  uint32_t new_start = 0;
  TSSymbol old_alias_symbol = 0;
  TSSymbol new_alias_symbol = 0;
  iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
  iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);

  if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
  if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;

  if (
    old_alias_symbol == new_alias_symbol &&
    ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
  ) {
    if (old_start == new_start &&
        !ts_subtree_has_changes(old_tree) &&
        ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
        ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
        ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
        ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
        (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
        (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
      return IteratorMatches;
    } else {
      return IteratorMayDiffer;
    }
  }

  return IteratorDiffers;
}

#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
  TreeCursorEntry entry = *array_back(&self->cursor.stack);
  TSPoint start = iterator_start_position(self).extent;
  TSPoint end = iterator_end_position(self).extent;
  const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
  printf(
    "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
    name, self->in_padding ? "(p)" : "   ",
    self->visible_depth,
    start.row + 1, start.column,
    end.row + 1, end.column
  );
}
#endif

unsigned ts_subtree_get_changed_ranges(
  const Subtree *old_tree, const Subtree *new_tree,
  TreeCursor *cursor1, TreeCursor *cursor2,
  const TSLanguage *language,
  const TSRangeArray *included_range_differences,
  TSRange **ranges
) {
  TSRangeArray results = array_new();

  Iterator old_iter = iterator_new(cursor1, old_tree, language);
  Iterator new_iter = iterator_new(cursor2, new_tree, language);

  unsigned included_range_difference_index = 0;

  Length position = iterator_start_position(&old_iter);
  Length next_position = iterator_start_position(&new_iter);
  if (position.bytes < next_position.bytes) {
    ts_range_array_add(&results, position, next_position);
    position = next_position;
  } else if (position.bytes > next_position.bytes) {
    ts_range_array_add(&results, next_position, position);
    next_position = position;
  }

  do {
    #ifdef DEBUG_GET_CHANGED_RANGES
    printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
    iterator_print_state(&old_iter);
    printf("\tvs\t");
    iterator_print_state(&new_iter);
    puts("");
    #endif

    // Compare the old and new subtrees.
    IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);

    // Even if the two subtrees appear to be identical, they could differ
    // internally if they contain a range of text that was previously
    // excluded from the parse, and is now included, or vice-versa.
    if (comparison == IteratorMatches && ts_range_array_intersects(
      included_range_differences,
      included_range_difference_index,
      position.bytes,
      iterator_end_position(&old_iter).bytes
    )) {
      comparison = IteratorMayDiffer;
    }

    bool is_changed = false;
    switch (comparison) {
      // If the subtrees are definitely identical, move to the end
      // of both subtrees.
      case IteratorMatches:
        next_position = iterator_end_position(&old_iter);
        break;

      // If the subtrees might differ internally, descend into both
      // subtrees, finding the first child that spans the current position.
      case IteratorMayDiffer:
        if (iterator_descend(&old_iter, position.bytes)) {
          if (!iterator_descend(&new_iter, position.bytes)) {
            is_changed = true;
            next_position = iterator_end_position(&old_iter);
          }
        } else if (iterator_descend(&new_iter, position.bytes)) {
          is_changed = true;
          next_position = iterator_end_position(&new_iter);
        } else {
          next_position = length_min(
            iterator_end_position(&old_iter),
            iterator_end_position(&new_iter)
          );
        }
        break;

      // If the subtrees are different, record a change and then move
      // to the end of both subtrees.
      case IteratorDiffers:
        is_changed = true;
        next_position = length_min(
          iterator_end_position(&old_iter),
          iterator_end_position(&new_iter)
        );
        break;
    }

    // Ensure that both iterators are caught up to the current position.
    while (
      !iterator_done(&old_iter) &&
      iterator_end_position(&old_iter).bytes <= next_position.bytes
    ) iterator_advance(&old_iter);
    while (
      !iterator_done(&new_iter) &&
      iterator_end_position(&new_iter).bytes <= next_position.bytes
    ) iterator_advance(&new_iter);

    // Ensure that both iterators are at the same depth in the tree.
    while (old_iter.visible_depth > new_iter.visible_depth) {
      iterator_ascend(&old_iter);
    }
    while (new_iter.visible_depth > old_iter.visible_depth) {
      iterator_ascend(&new_iter);
    }

    if (is_changed) {
      #ifdef DEBUG_GET_CHANGED_RANGES
      printf(
        "  change: [[%u, %u] - [%u, %u]]\n",
        position.extent.row + 1, position.extent.column,
        next_position.extent.row + 1, next_position.extent.column
      );
      #endif

      ts_range_array_add(&results, position, next_position);
    }

    position = next_position;

    // Keep track of the current position in the included range differences
    // array in order to avoid scanning the entire array on each iteration.
    while (included_range_difference_index < included_range_differences->size) {
      const TSRange *range = &included_range_differences->contents[
        included_range_difference_index
      ];
      if (range->end_byte <= position.bytes) {
        included_range_difference_index++;
      } else {
        break;
      }
    }
  } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));

  Length old_size = ts_subtree_total_size(*old_tree);
  Length new_size = ts_subtree_total_size(*new_tree);
  if (old_size.bytes < new_size.bytes) {
    ts_range_array_add(&results, old_size, new_size);
  } else if (new_size.bytes < old_size.bytes) {
    ts_range_array_add(&results, new_size, old_size);
  }

  *cursor1 = old_iter.cursor;
  *cursor2 = new_iter.cursor;
  *ranges = results.contents;
  return results.size;
}