1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
// EndBASIC
// Copyright 2022 Philippe GASSMANN
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//! String buffer used to manipulate strings at the UTF-8 code point level instead of at the byte
//! level.
use std::fmt::Display;
use std::iter;
use std::str::Chars;
#[derive(Default, Debug)]
/// Abstraction over a string to handle manipulation operations at char boundaries.
///
/// This exists because Rust strings are indexed by bytes and manipulating string bytes is
/// complicated.
///
/// TODO(zenria): The current implementation of the buffer is using `String::chars`. Should be
/// converted to using graphemes instead.
pub struct LineBuffer {
line: String,
}
impl LineBuffer {
/// Creates an empty `LineBuffer` with an allocated `capacity`.
pub fn with_capacity(capacity: usize) -> Self {
Self { line: String::with_capacity(capacity) }
}
/// Returns the logical `LineBuffer` length (in UTF-8 code point count and not in bytes).
pub fn len(&self) -> usize {
self.line.chars().count()
}
/// Gets and iterator over buffer chars.
pub fn chars(&self) -> Chars {
self.line.chars()
}
/// Returns the end of the string starting at `start_pos`, or an empty string if `start_pos` is
/// after the string's end.
pub fn end(&self, start_pos: usize) -> String {
self.chars().skip(start_pos).collect()
}
/// Returns the characters from the start of the string to `end_pos`, excluding `end_pos`.
///
/// Calling this with `end_pos` set to 0 will return an empty string, and a 1-char string if
/// `end_pos` is 1 (if the string contains at least 1 character).
pub fn start(&self, end_pos: usize) -> String {
self.chars().take(end_pos).collect()
}
/// Extracts a range of characters from this buffer.
pub fn range(&self, start_pos: usize, end_pos: usize) -> String {
let count = if start_pos > end_pos { 0 } else { end_pos - start_pos };
self.chars().skip(start_pos).take(count).collect()
}
/// Checks if this buffer is empty or not.
pub fn is_empty(&self) -> bool {
self.line.is_empty()
}
/// Gets a view on the underlying bytes held by this buffer.
///
/// Warning: direct bytes manipulation may lead to undefined behavior.
pub fn as_bytes(&self) -> &[u8] {
self.line.as_bytes()
}
/// Removes a character from this buffer.
///
/// If the given position if greater than the length of the buffer, this function does nothing.
pub fn remove(&mut self, pos: usize) {
self.line = self.line.chars().take(pos).chain(self.line.chars().skip(pos + 1)).collect();
}
/// Inserts a char at the given position.
///
/// If the position is greater than the buffer length, the character will be appended at the
/// end ot it.
pub fn insert(&mut self, pos: usize, ch: char) {
self.line = self
.line
.chars()
.take(pos)
.chain(iter::once(ch))
.chain(self.line.chars().skip(pos))
.collect();
}
/// Returns the underlying string.
pub fn into_inner(self) -> String {
self.line
}
/// Inserts the given string `s` into the buffer at `pos`.
///
/// If `pos` is greater than the length of the buffer, `s` will be appended at the end of it.
pub fn insert_str(&mut self, pos: usize, s: &str) {
self.line = self
.line
.chars()
.take(pos)
.chain(s.chars())
.chain(self.line.chars().skip(pos))
.collect();
}
/// Appends the given string `s` to the buffer.
pub fn push_str(&mut self, s: &LineBuffer) {
self.line.push_str(&s.line);
}
/// Splits the buffer in two parts at the position `at`.
///
/// Returns the remaining part of the buffer (same behavior as `String::split_off`).
pub fn split_off(&mut self, at: usize) -> LineBuffer {
let ret = LineBuffer::from(self.line.chars().skip(at).collect::<String>());
self.line = self.line.chars().take(at).collect();
ret
}
}
impl From<&str> for LineBuffer {
fn from(s: &str) -> Self {
Self { line: String::from(s) }
}
}
impl From<&String> for LineBuffer {
fn from(s: &String) -> Self {
Self::from(s.as_str())
}
}
impl From<String> for LineBuffer {
fn from(line: String) -> Self {
Self { line }
}
}
impl Display for LineBuffer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.line.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::LineBuffer;
#[test]
fn test_end() {
let empty = LineBuffer::default();
assert_eq!(empty.end(0), "");
assert_eq!(empty.end(1), ""); // Should not panic even if the given position is invalid.
let hw = LineBuffer::from("Hello, World");
assert_eq!(hw.end(0), "Hello, World");
assert_eq!(hw.end(7), "World");
assert_eq!(hw.end(100), "");
}
#[test]
fn test_start() {
let empty = LineBuffer::default();
assert_eq!(empty.start(0), "");
assert_eq!(empty.start(1), ""); // Should not panic even if the given position is invalid.
let hw = LineBuffer::from("Hello, World");
assert_eq!(hw.start(0), "");
assert_eq!(hw.start(7), "Hello, ");
assert_eq!(hw.start(100), "Hello, World");
}
#[test]
fn test_insert() {
let mut buffer = LineBuffer::default();
buffer.insert(0, 'c'); // c
buffer.insert(0, 'é'); // éc
// Insertion must happen after the 2-byte UTF-8 code point and not at byte 1.
buffer.insert(1, 'à'); // éàc
buffer.insert(100, 'd'); // Should not panic even if the given position is invalid.
assert_eq!(buffer.into_inner(), "éàcd");
}
#[test]
fn test_remove() {
let mut empty = LineBuffer::default();
empty.remove(0);
empty.remove(5);
assert_eq!(empty.into_inner(), "");
let mut hello = LineBuffer::from("Hello");
hello.remove(100); // Do nothing.
hello.remove(1); // Remove the 'e'
assert_eq!(hello.into_inner(), "Hllo");
}
#[test]
fn test_insert_str() {
let mut buffer = LineBuffer::default();
buffer.insert_str(0, "Hello");
assert_eq!(buffer.end(0), "Hello");
buffer.insert_str(100, "World"); // pos > len will insert at the end.
assert_eq!(buffer.end(0), "HelloWorld");
buffer.insert_str(5, ", ");
assert_eq!(buffer.end(0), "Hello, World");
}
#[test]
fn test_range() {
let mut buffer = LineBuffer::default();
assert_eq!(buffer.range(0, 0), "");
assert_eq!(buffer.range(0, 10), "");
assert_eq!(buffer.range(0, 10), "");
assert_eq!(buffer.range(10, 0), ""); // Should not panic even with bad indexes.
buffer.insert_str(0, "Hello, World");
assert_eq!(buffer.range(0, 5), "Hello");
assert_eq!(buffer.range(7, 10), "Wor");
assert_eq!(buffer.range(7, 100), "World");
assert_eq!(buffer.range(10, 0), ""); // Should not panic even with bad indexes.
}
#[test]
fn test_is_empty() {
assert!(LineBuffer::default().is_empty());
assert!(LineBuffer::from("").is_empty());
assert!(!LineBuffer::from("This is not empty").is_empty());
}
#[test]
fn test_split_off() {
let mut buffer = LineBuffer::from("Hello, World");
let world = buffer.split_off(7);
assert_eq!(buffer.into_inner(), "Hello, ");
assert_eq!(world.into_inner(), "World");
}
}