use std::{fmt, ops::Range, sync::Arc};
const B_MAX: usize = 8;
const MAX_CHUNK_BYTES: usize = 1024;
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
struct Metrics {
byte_len: usize,
char_len: usize,
line_count: usize,
}
impl Metrics {
fn from_text(text: &str) -> Self {
if text.is_empty() {
return Self::default();
}
let byte_len = text.len();
let mut char_len: usize = 0;
let mut newline_count: usize = 0;
for c in text.chars() {
char_len += 1;
if c == '\n' {
newline_count += 1;
}
}
let line_count = if text.as_bytes()[byte_len - 1] == b'\n' {
newline_count
} else {
newline_count + 1
};
Self {
byte_len,
char_len,
line_count,
}
}
fn sum(iter: impl Iterator<Item = Self>) -> Self {
let mut result = Self::default();
for m in iter {
result.byte_len += m.byte_len;
result.char_len += m.char_len;
result.line_count += m.line_count;
}
result
}
}
enum NodeKind {
Leaf { text: String },
Internal { children: Vec<Arc<RopeNode>> },
}
struct RopeNode {
metrics: Metrics,
kind: NodeKind,
}
impl RopeNode {
fn new_leaf(text: String) -> Arc<Self> {
let metrics = Metrics::from_text(&text);
Arc::new(Self {
metrics,
kind: NodeKind::Leaf { text },
})
}
fn new_internal(children: Vec<Arc<Self>>) -> Arc<Self> {
debug_assert!(!children.is_empty(), "internal node must have children");
let metrics = Metrics::sum(children.iter().map(|c| c.metrics));
Arc::new(Self {
metrics,
kind: NodeKind::Internal { children },
})
}
const fn is_leaf(&self) -> bool {
matches!(self.kind, NodeKind::Leaf { .. })
}
}
#[derive(Clone)]
pub struct Rope {
root: Arc<RopeNode>,
}
impl Rope {
pub fn new() -> Self {
Self {
root: RopeNode::new_leaf(String::new()),
}
}
pub fn from_str(s: &str) -> Self {
if s.is_empty() {
return Self::new();
}
let leaves = chunk_text_to_leaves(s);
Self {
root: build_tree(leaves),
}
}
}
impl Rope {
pub fn is_empty(&self) -> bool {
self.root.metrics.byte_len == 0
}
pub fn byte_len(&self) -> usize {
self.root.metrics.byte_len
}
pub fn char_len(&self) -> usize {
self.root.metrics.char_len
}
pub fn line_count(&self) -> usize {
let count = self.root.metrics.line_count;
if count > 0 && last_byte(&self.root) == Some(b'\n') {
count + 1
} else {
count
}
}
pub fn line(&self, idx: usize) -> Option<&str> {
if idx >= self.line_count() {
return None;
}
if idx == self.root.metrics.line_count {
return Some("");
}
line_at(&self.root, idx)
}
pub fn line_len(&self, idx: usize) -> Option<usize> {
self.line(idx).map(|l| l.chars().count())
}
pub fn content(&self) -> String {
let mut buf = String::with_capacity(self.byte_len());
collect_text(&self.root, &mut buf);
buf
}
}
impl Rope {
pub fn position_to_byte(&self, line: usize, col: usize) -> usize {
if self.is_empty() {
return 0;
}
pos_to_byte(&self.root, line, col)
}
pub fn byte_to_position(&self, byte_offset: usize) -> (usize, usize) {
if self.is_empty() {
return (0, 0);
}
byte_to_pos(&self.root, byte_offset)
}
pub fn char_to_byte(&self, char_offset: usize) -> usize {
if self.is_empty() {
return 0;
}
char_to_byte_offset(&self.root, char_offset)
}
pub fn byte_to_char(&self, byte_offset: usize) -> usize {
if self.is_empty() {
return 0;
}
byte_to_char_offset(&self.root, byte_offset)
}
}
impl Rope {
pub fn insert(&self, byte_offset: usize, text: &str) -> Self {
if text.is_empty() {
return self.clone();
}
if self.is_empty() {
return Self::from_str(text);
}
let offset = byte_offset.min(self.root.metrics.byte_len);
let nodes = insert_at(&self.root, offset, text);
Self {
root: nodes_to_root(nodes),
}
}
pub fn remove(&self, range: Range<usize>) -> Self {
if range.is_empty() || self.is_empty() {
return self.clone();
}
let s = range.start.min(self.byte_len());
let e = range.end.min(self.byte_len());
if s >= e {
return self.clone();
}
let nodes = remove_range(&self.root, s..e);
if nodes.is_empty() {
Self::new()
} else {
Self {
root: nodes_to_root(nodes),
}
}
}
}
impl Rope {
#[allow(dead_code)]
pub const fn lines(&self) -> RopeLines<'_> {
RopeLines { rope: self, idx: 0 }
}
pub fn chunks(&self) -> RopeChunks<'_> {
RopeChunks {
stack: vec![&*self.root],
}
}
}
#[allow(dead_code)]
pub struct RopeLines<'a> {
rope: &'a Rope,
idx: usize,
}
impl<'a> Iterator for RopeLines<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
let line = self.rope.line(self.idx)?;
self.idx += 1;
Some(line)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.rope.line_count().saturating_sub(self.idx);
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for RopeLines<'_> {}
pub struct RopeChunks<'a> {
stack: Vec<&'a RopeNode>,
}
impl<'a> Iterator for RopeChunks<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
loop {
let node = self.stack.pop()?;
match &node.kind {
NodeKind::Leaf { text } => {
if text.is_empty() {
continue;
}
return Some(text.as_str());
}
NodeKind::Internal { children } => {
for child in children.iter().rev() {
self.stack.push(child);
}
}
}
}
}
}
impl fmt::Debug for Rope {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let content = self.content();
let preview = if content.len() > 80 {
format!("{}...", &content[..77])
} else {
content
};
f.debug_struct("Rope")
.field("byte_len", &self.byte_len())
.field("line_count", &self.line_count())
.field("content", &preview)
.finish()
}
}
impl PartialEq for Rope {
fn eq(&self, other: &Self) -> bool {
self.byte_len() == other.byte_len()
&& Iterator::eq(self.chunks().flat_map(str::bytes), other.chunks().flat_map(str::bytes))
}
}
impl Eq for Rope {}
impl fmt::Display for Rope {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for chunk in self.chunks() {
f.write_str(chunk)?;
}
Ok(())
}
}
impl Default for Rope {
fn default() -> Self {
Self::new()
}
}
fn chunk_text_to_leaves(text: &str) -> Vec<Arc<RopeNode>> {
debug_assert!(!text.is_empty());
chunk_text(text)
.into_iter()
.map(RopeNode::new_leaf)
.collect()
}
fn chunk_text(text: &str) -> Vec<String> {
let mut chunks = Vec::new();
let mut remaining = text;
while !remaining.is_empty() {
if remaining.len() <= MAX_CHUNK_BYTES {
chunks.push(remaining.to_string());
break;
}
let safe_end = floor_char_boundary(remaining, MAX_CHUNK_BYTES);
if let Some(pos) = remaining[..safe_end].rfind('\n') {
chunks.push(remaining[..=pos].to_string());
remaining = &remaining[pos + 1..];
} else {
if let Some(pos) = remaining.find('\n') {
chunks.push(remaining[..=pos].to_string());
remaining = &remaining[pos + 1..];
} else {
chunks.push(remaining.to_string());
break;
}
}
}
chunks
}
fn floor_char_boundary(s: &str, byte_idx: usize) -> usize {
if byte_idx >= s.len() {
return s.len();
}
let bytes = s.as_bytes();
let mut idx = byte_idx;
while idx > 0 && (bytes[idx] & 0xC0) == 0x80 {
idx -= 1;
}
idx
}
fn build_tree(mut nodes: Vec<Arc<RopeNode>>) -> Arc<RopeNode> {
debug_assert!(!nodes.is_empty());
if nodes.len() == 1 {
return nodes.into_iter().next().expect("checked non-empty");
}
while nodes.len() > B_MAX {
let mut parents = Vec::new();
let total = nodes.len();
let mut i = 0;
while i < total {
let remaining = total - i;
let group_size = if remaining <= B_MAX {
remaining
} else if remaining <= 2 * B_MAX {
remaining.div_ceil(2)
} else {
B_MAX
};
let children: Vec<Arc<RopeNode>> = nodes[i..i + group_size].to_vec();
parents.push(RopeNode::new_internal(children));
i += group_size;
}
nodes = parents;
}
RopeNode::new_internal(nodes)
}
fn nodes_to_root(nodes: Vec<Arc<RopeNode>>) -> Arc<RopeNode> {
match nodes.len() {
0 => RopeNode::new_leaf(String::new()),
1 => nodes.into_iter().next().expect("len==1"),
n if n <= B_MAX => RopeNode::new_internal(nodes),
_ => build_tree(nodes),
}
}
fn last_byte(node: &RopeNode) -> Option<u8> {
match &node.kind {
NodeKind::Leaf { text } => text.as_bytes().last().copied(),
NodeKind::Internal { children } => children.last().and_then(|c| last_byte(c)),
}
}
fn line_at(node: &RopeNode, idx: usize) -> Option<&str> {
match &node.kind {
NodeKind::Leaf { text } => {
let mut current = 0usize;
let mut start = 0usize;
for (i, &byte) in text.as_bytes().iter().enumerate() {
if byte == b'\n' {
if current == idx {
return Some(&text[start..i]);
}
current += 1;
start = i + 1;
}
}
if current == idx {
return Some(&text[start..]);
}
None
}
NodeKind::Internal { children } => {
let mut remaining = idx;
for child in children {
let lc = child.metrics.line_count;
if remaining < lc {
return line_at(child, remaining);
}
remaining -= lc;
}
None
}
}
}
fn collect_text(node: &RopeNode, buf: &mut String) {
match &node.kind {
NodeKind::Leaf { text } => buf.push_str(text),
NodeKind::Internal { children } => {
for child in children {
collect_text(child, buf);
}
}
}
}
fn pos_to_byte(node: &RopeNode, target_line: usize, target_col: usize) -> usize {
match &node.kind {
NodeKind::Leaf { text } => {
let mut current_line = 0usize;
let mut line_start = 0usize;
for (i, &b) in text.as_bytes().iter().enumerate() {
if b == b'\n' {
if current_line == target_line {
let line = &text[line_start..i];
return line_start + char_to_byte_in(line, target_col);
}
current_line += 1;
line_start = i + 1;
}
}
if current_line == target_line {
let line = &text[line_start..];
return line_start + char_to_byte_in(line, target_col);
}
text.len()
}
NodeKind::Internal { children } => {
let mut remaining = target_line;
let mut offset = 0usize;
for child in children {
if remaining < child.metrics.line_count {
return offset + pos_to_byte(child, remaining, target_col);
}
remaining -= child.metrics.line_count;
offset += child.metrics.byte_len;
}
offset
}
}
}
fn byte_to_pos(node: &RopeNode, byte_offset: usize) -> (usize, usize) {
match &node.kind {
NodeKind::Leaf { text } => {
let off = byte_offset.min(text.len());
let mut line = 0usize;
let mut line_start = 0usize;
for (i, &b) in text.as_bytes()[..off].iter().enumerate() {
if b == b'\n' {
line += 1;
line_start = i + 1;
}
}
let col = text[line_start..off].chars().count();
(line, col)
}
NodeKind::Internal { children } => {
let mut remaining = byte_offset;
let mut line_offset = 0usize;
for child in children {
if remaining < child.metrics.byte_len {
let (cl, col) = byte_to_pos(child, remaining);
return (line_offset + cl, col);
}
remaining -= child.metrics.byte_len;
line_offset += child.metrics.line_count;
}
children.last().map_or((0, 0), |last| {
let (cl, col) = byte_to_pos(last, last.metrics.byte_len);
(line_offset - last.metrics.line_count + cl, col)
})
}
}
}
fn char_to_byte_offset(node: &RopeNode, char_offset: usize) -> usize {
match &node.kind {
NodeKind::Leaf { text } => text
.char_indices()
.nth(char_offset)
.map_or(text.len(), |(b, _)| b),
NodeKind::Internal { children } => {
let mut remaining = char_offset;
let mut offset = 0usize;
for child in children {
if remaining < child.metrics.char_len {
return offset + char_to_byte_offset(child, remaining);
}
remaining -= child.metrics.char_len;
offset += child.metrics.byte_len;
}
offset
}
}
}
fn byte_to_char_offset(node: &RopeNode, byte_offset: usize) -> usize {
match &node.kind {
NodeKind::Leaf { text } => {
let off = byte_offset.min(text.len());
text[..off].chars().count()
}
NodeKind::Internal { children } => {
let mut remaining = byte_offset;
let mut char_off = 0usize;
for child in children {
if remaining < child.metrics.byte_len {
return char_off + byte_to_char_offset(child, remaining);
}
remaining -= child.metrics.byte_len;
char_off += child.metrics.char_len;
}
char_off
}
}
}
fn char_to_byte_in(s: &str, char_idx: usize) -> usize {
s.char_indices().nth(char_idx).map_or(s.len(), |(b, _)| b)
}
fn insert_at(node: &RopeNode, byte_offset: usize, text: &str) -> Vec<Arc<RopeNode>> {
match &node.kind {
NodeKind::Leaf { text: leaf_text } => {
let offset = byte_offset.min(leaf_text.len());
debug_assert!(
leaf_text.is_char_boundary(offset),
"insert byte_offset not on char boundary"
);
let mut new_text = String::with_capacity(leaf_text.len() + text.len());
new_text.push_str(&leaf_text[..offset]);
new_text.push_str(text);
new_text.push_str(&leaf_text[offset..]);
chunk_text_to_leaves(&new_text)
}
NodeKind::Internal { children } => {
let (child_idx, local_offset) = find_child_for_byte(children, byte_offset);
let replacement = insert_at(&children[child_idx], local_offset, text);
let mut new_children = Vec::with_capacity(children.len() + replacement.len());
new_children.extend(children[..child_idx].iter().cloned());
new_children.extend(replacement);
new_children.extend(children[child_idx + 1..].iter().cloned());
if new_children.len() <= B_MAX {
vec![RopeNode::new_internal(new_children)]
} else {
split_children(&new_children)
}
}
}
}
fn remove_range(node: &RopeNode, range: Range<usize>) -> Vec<Arc<RopeNode>> {
if range.is_empty() {
return vec![Arc::new(RopeNode {
metrics: node.metrics,
kind: match &node.kind {
NodeKind::Leaf { text } => NodeKind::Leaf { text: text.clone() },
NodeKind::Internal { children } => NodeKind::Internal {
children: children.clone(),
},
},
})];
}
match &node.kind {
NodeKind::Leaf { text } => {
let s = range.start.min(text.len());
let e = range.end.min(text.len());
debug_assert!(text.is_char_boundary(s), "remove start not on char boundary");
debug_assert!(text.is_char_boundary(e), "remove end not on char boundary");
if s == 0 && e >= text.len() {
return vec![];
}
let mut new_text = String::with_capacity(text.len() - (e - s));
new_text.push_str(&text[..s]);
new_text.push_str(&text[e..]);
chunk_text_to_leaves(&new_text)
}
NodeKind::Internal { children } => {
let mut new_children: Vec<Arc<RopeNode>> = Vec::new();
let mut offset = 0usize;
let mut modified = false;
for child in children {
let cb = child.metrics.byte_len;
let cs = offset;
let ce = offset + cb;
offset = ce;
if range.end <= cs || range.start >= ce {
new_children.push(Arc::clone(child));
} else if range.start <= cs && range.end >= ce {
modified = true;
} else {
let ls = range.start.saturating_sub(cs);
let le = (range.end - cs).min(cb);
let replacement = remove_range(child, ls..le);
modified = true;
new_children.extend(replacement);
}
}
if modified {
fixup_alignment(&mut new_children);
}
if new_children.is_empty() {
vec![]
} else if new_children.len() == 1 {
vec![Arc::clone(&new_children[0])]
} else {
vec![RopeNode::new_internal(new_children)]
}
}
}
}
fn find_child_for_byte(children: &[Arc<RopeNode>], byte_offset: usize) -> (usize, usize) {
let mut offset = 0usize;
for (i, child) in children.iter().enumerate() {
let cb = child.metrics.byte_len;
if byte_offset <= offset + cb {
return (i, byte_offset - offset);
}
offset += cb;
}
let last = children.len() - 1;
(last, children[last].metrics.byte_len)
}
fn split_children(children: &[Arc<RopeNode>]) -> Vec<Arc<RopeNode>> {
let mut result = Vec::new();
let total = children.len();
let mut i = 0;
while i < total {
let remaining = total - i;
let group_size = if remaining <= B_MAX {
remaining
} else if remaining <= 2 * B_MAX {
remaining.div_ceil(2)
} else {
B_MAX
};
result.push(RopeNode::new_internal(children[i..i + group_size].to_vec()));
i += group_size;
}
result
}
fn last_byte_of(node: &RopeNode) -> Option<u8> {
match &node.kind {
NodeKind::Leaf { text } => text.as_bytes().last().copied(),
NodeKind::Internal { children } => children.last().and_then(|c| last_byte_of(c)),
}
}
fn fixup_alignment(children: &mut Vec<Arc<RopeNode>>) {
let mut i = 0;
while i + 1 < children.len() {
let left_last = last_byte_of(&children[i]);
if left_last.is_some() && left_last != Some(b'\n') {
let mut merged = String::new();
collect_text(&children[i], &mut merged);
collect_text(&children[i + 1], &mut merged);
let new_leaves = chunk_text_to_leaves(&merged);
let replacement: Vec<Arc<RopeNode>> = if new_leaves.len() > B_MAX {
vec![build_tree(new_leaves)]
} else if new_leaves.len() > 1 {
if children[i].is_leaf() && children[i + 1].is_leaf() {
new_leaves
} else {
vec![RopeNode::new_internal(new_leaves)]
}
} else {
new_leaves
};
children.splice(i..=i + 1, replacement);
} else {
i += 1;
}
}
}
#[cfg(test)]
#[path = "tests/rope.rs"]
mod tests;