use crate::model::buffer::Buffer;
pub fn is_word_char(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || byte == b'_'
}
pub fn find_word_start_bytes(bytes: &[u8], pos: usize) -> usize {
if pos == 0 {
return 0;
}
let pos = pos.min(bytes.len());
let mut new_pos = pos;
if (new_pos >= bytes.len()
|| bytes
.get(new_pos)
.map(|&b| !is_word_char(b))
.unwrap_or(true))
&& new_pos > 0
{
new_pos = new_pos.saturating_sub(1);
}
while new_pos > 0 {
if let Some(&prev_byte) = bytes.get(new_pos.saturating_sub(1)) {
if !is_word_char(prev_byte) {
break;
}
new_pos = new_pos.saturating_sub(1);
} else {
break;
}
}
new_pos
}
pub fn find_word_end_bytes(bytes: &[u8], pos: usize) -> usize {
let pos = pos.min(bytes.len());
let mut new_pos = pos;
while new_pos < bytes.len() && !is_word_char(bytes[new_pos]) {
new_pos += 1;
}
while new_pos < bytes.len() && is_word_char(bytes[new_pos]) {
new_pos += 1;
}
new_pos
}
pub fn find_completion_word_start(buffer: &Buffer, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let buf_len = buffer.len();
let pos = pos.min(buf_len);
let start = pos.saturating_sub(1000);
let end = (pos + 1).min(buf_len);
let bytes = buffer.slice_bytes(start..end);
let offset = pos - start;
if offset == 0 {
return pos;
}
if let Some(&prev_byte) = bytes.get(offset.saturating_sub(1)) {
if !is_word_char(prev_byte) {
return pos;
}
}
let mut new_pos = offset;
if (new_pos >= bytes.len()
|| bytes
.get(new_pos)
.map(|&b| !is_word_char(b))
.unwrap_or(true))
&& new_pos > 0
{
new_pos = new_pos.saturating_sub(1);
}
while new_pos > 0 {
if let Some(&prev_byte) = bytes.get(new_pos.saturating_sub(1)) {
if !is_word_char(prev_byte) {
break;
}
new_pos = new_pos.saturating_sub(1);
} else {
break;
}
}
start + new_pos
}
pub fn find_word_start(buffer: &Buffer, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let buf_len = buffer.len();
let pos = pos.min(buf_len);
let start = pos.saturating_sub(1000);
let end = (pos + 1).min(buf_len);
let bytes = buffer.slice_bytes(start..end);
let offset = pos - start;
let result = find_word_start_bytes(&bytes, offset);
start + result
}
pub fn find_word_end(buffer: &Buffer, pos: usize) -> usize {
let buf_len = buffer.len();
if pos >= buf_len {
return buf_len;
}
let start = pos;
let end = (pos + 1000).min(buf_len);
let bytes = buffer.slice_bytes(start..end);
let result = find_word_end_bytes(&bytes, 0);
start + result
}
use crate::primitives::grapheme::{next_grapheme_boundary, prev_grapheme_boundary};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CharClass {
Word,
Whitespace,
Punctuation,
}
fn get_grapheme_class(g: &str) -> CharClass {
if g.chars().any(|c| c.is_alphanumeric() || c == '_') {
CharClass::Word
} else if g.chars().all(|c| c.is_whitespace()) {
CharClass::Whitespace
} else {
CharClass::Punctuation
}
}
pub fn find_word_start_left(buffer: &Buffer, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let buf_len = buffer.len();
let actual_pos = pos.min(buf_len);
let start = actual_pos.saturating_sub(1000);
let end = actual_pos;
let bytes = buffer.slice_bytes(start..end);
let text = String::from_utf8_lossy(&bytes);
let mut current_idx = text.len();
while current_idx > 0 {
let prev = prev_grapheme_boundary(&text, current_idx);
let g = &text[prev..current_idx];
if get_grapheme_class(g) == CharClass::Whitespace {
current_idx = prev;
} else {
break;
}
}
if current_idx == 0 {
let delta = text.len() - current_idx;
return actual_pos.saturating_sub(delta);
}
let prev = prev_grapheme_boundary(&text, current_idx);
let target_class = get_grapheme_class(&text[prev..current_idx]);
while current_idx > 0 {
let prev = prev_grapheme_boundary(&text, current_idx);
let g = &text[prev..current_idx];
if get_grapheme_class(g) == target_class {
current_idx = prev;
} else {
break;
}
}
let delta = text.len() - current_idx;
actual_pos.saturating_sub(delta)
}
pub fn find_word_end_right(buffer: &Buffer, pos: usize) -> usize {
let buf_len = buffer.len();
if pos >= buf_len {
return buf_len;
}
let start = pos;
let end = (pos + 1000).min(buf_len);
let bytes = buffer.slice_bytes(start..end);
let text = String::from_utf8_lossy(&bytes);
let mut current_idx = 0;
if current_idx >= text.len() {
return start;
}
let next_bound = next_grapheme_boundary(&text, current_idx);
let start_class = get_grapheme_class(&text[current_idx..next_bound]);
match start_class {
CharClass::Word => {
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == CharClass::Word {
current_idx = next;
} else {
break;
}
}
}
CharClass::Whitespace => {
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == CharClass::Whitespace {
current_idx = next;
} else {
break;
}
}
if current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let landed_class = get_grapheme_class(&text[current_idx..next]);
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == landed_class {
current_idx = next;
} else {
break;
}
}
}
}
CharClass::Punctuation => {
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == CharClass::Punctuation {
current_idx = next;
} else {
break;
}
}
}
}
start + current_idx
}
pub fn find_word_start_right(buffer: &Buffer, pos: usize) -> usize {
let buf_len = buffer.len();
if pos >= buf_len {
return buf_len;
}
let start = pos;
let end = (pos + 1000).min(buf_len);
let bytes = buffer.slice_bytes(start..end);
let text = String::from_utf8_lossy(&bytes);
let mut current_idx = 0;
if current_idx >= text.len() {
return start;
}
let next_bound = next_grapheme_boundary(&text, current_idx);
let start_class = get_grapheme_class(&text[current_idx..next_bound]);
if start_class == CharClass::Whitespace {
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == CharClass::Whitespace {
current_idx = next;
} else {
break;
}
}
return start + current_idx;
}
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == start_class {
current_idx = next;
} else {
break;
}
}
while current_idx < text.len() {
let next = next_grapheme_boundary(&text, current_idx);
let g = &text[current_idx..next];
if get_grapheme_class(g) == CharClass::Whitespace {
current_idx = next;
} else {
break;
}
}
start + current_idx
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::buffer::Buffer;
#[test]
fn test_is_word_char() {
assert!(is_word_char(b'a'));
assert!(is_word_char(b'Z'));
assert!(is_word_char(b'0'));
assert!(is_word_char(b'_'));
assert!(!is_word_char(b' '));
assert!(!is_word_char(b'.'));
assert!(!is_word_char(b'-'));
}
#[test]
fn test_find_word_start() {
let buffer = Buffer::from_str_test("hello world test");
assert_eq!(find_word_start(&buffer, 0), 0); assert_eq!(find_word_start(&buffer, 3), 0); assert_eq!(find_word_start(&buffer, 6), 6); assert_eq!(find_word_start(&buffer, 8), 6); }
#[test]
fn test_find_word_end() {
let buffer = Buffer::from_str_test("hello world test");
assert_eq!(find_word_end(&buffer, 0), 5); assert_eq!(find_word_end(&buffer, 3), 5); assert_eq!(find_word_end(&buffer, 6), 11); }
#[test]
fn test_find_word_start_left() {
let buffer = Buffer::from_str_test("hello world test");
assert_eq!(find_word_start_left(&buffer, 6), 0); assert_eq!(find_word_start_left(&buffer, 12), 6); }
#[test]
fn test_find_word_start_right() {
let buffer = Buffer::from_str_test("hello world test");
assert_eq!(find_word_start_right(&buffer, 0), 6); assert_eq!(find_word_start_right(&buffer, 6), 12); }
#[test]
fn test_find_word_start_bytes_basic() {
let s = "hello world test";
let bytes = s.as_bytes();
assert_eq!(find_word_start_bytes(bytes, 0), 0); assert_eq!(find_word_start_bytes(bytes, 3), 0); assert_eq!(find_word_start_bytes(bytes, 5), 0); assert_eq!(find_word_start_bytes(bytes, 6), 6); assert_eq!(find_word_start_bytes(bytes, 8), 6); assert_eq!(find_word_start_bytes(bytes, 11), 6); assert_eq!(find_word_start_bytes(bytes, 12), 12); }
#[test]
fn test_find_word_end_bytes_basic() {
let s = "hello world test";
let bytes = s.as_bytes();
assert_eq!(find_word_end_bytes(bytes, 0), 5); assert_eq!(find_word_end_bytes(bytes, 3), 5); assert_eq!(find_word_end_bytes(bytes, 6), 11); assert_eq!(find_word_end_bytes(bytes, 8), 11); assert_eq!(find_word_end_bytes(bytes, 12), 16); }
#[test]
fn test_find_word_start_bytes_special_chars() {
let s = "save-file-as";
let bytes = s.as_bytes();
assert_eq!(find_word_start_bytes(bytes, 4), 0); assert_eq!(find_word_start_bytes(bytes, 5), 5); assert_eq!(find_word_start_bytes(bytes, 9), 5); assert_eq!(find_word_start_bytes(bytes, 10), 10); assert_eq!(find_word_start_bytes(bytes, 12), 10); }
#[test]
fn test_find_word_end_bytes_special_chars() {
let s = "open.file.now";
let bytes = s.as_bytes();
assert_eq!(find_word_end_bytes(bytes, 0), 4); assert_eq!(find_word_end_bytes(bytes, 4), 9); assert_eq!(find_word_end_bytes(bytes, 5), 9); assert_eq!(find_word_end_bytes(bytes, 10), 13); }
#[test]
fn test_find_word_start_bytes_whitespace() {
let s = " hello world ";
let bytes = s.as_bytes();
assert_eq!(find_word_start_bytes(bytes, 4), 2); assert_eq!(find_word_start_bytes(bytes, 7), 2); assert_eq!(find_word_start_bytes(bytes, 9), 9); assert_eq!(find_word_start_bytes(bytes, 14), 9); }
#[test]
fn test_find_word_end_bytes_whitespace() {
let s = " hello world ";
let bytes = s.as_bytes();
assert_eq!(find_word_end_bytes(bytes, 0), 7); assert_eq!(find_word_end_bytes(bytes, 2), 7); assert_eq!(find_word_end_bytes(bytes, 7), 14); assert_eq!(find_word_end_bytes(bytes, 9), 14); }
#[test]
fn test_find_word_start_bytes_edge_cases() {
assert_eq!(find_word_start_bytes(b"", 0), 0);
assert_eq!(find_word_start_bytes(b"a", 0), 0);
assert_eq!(find_word_start_bytes(b"a", 1), 0);
assert_eq!(find_word_start_bytes(b"...", 2), 1);
assert_eq!(find_word_start_bytes(b"hello", 100), 0);
}
#[test]
fn test_find_word_end_bytes_edge_cases() {
assert_eq!(find_word_end_bytes(b"", 0), 0);
assert_eq!(find_word_end_bytes(b"a", 0), 1);
assert_eq!(find_word_end_bytes(b"...", 0), 3);
assert_eq!(find_word_end_bytes(b"hello", 100), 5);
}
#[test]
fn test_find_word_start_bytes_underscores() {
let s = "some_variable_name";
let bytes = s.as_bytes();
assert_eq!(find_word_start_bytes(bytes, 7), 0); assert_eq!(find_word_start_bytes(bytes, 18), 0);
}
#[test]
fn test_find_word_end_bytes_underscores() {
let s = "some_variable_name";
let bytes = s.as_bytes();
assert_eq!(find_word_end_bytes(bytes, 0), 18); assert_eq!(find_word_end_bytes(bytes, 7), 18);
}
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
fn ascii_string() -> impl Strategy<Value = String> {
"[a-zA-Z0-9_ .,-]{0,100}".prop_map(|s| s)
}
proptest! {
#[test]
fn prop_word_start_not_after_position(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let result = find_word_start_bytes(bytes, pos);
prop_assert!(result <= pos.min(s.len()));
}
#[test]
fn prop_word_end_not_before_position(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let result = find_word_end_bytes(bytes, pos);
prop_assert!(result >= pos.min(s.len()));
}
#[test]
fn prop_word_end_within_bounds(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let result = find_word_end_bytes(bytes, pos);
prop_assert!(result <= s.len());
}
#[test]
fn prop_word_start_at_zero(s in ascii_string()) {
let bytes = s.as_bytes();
let result = find_word_start_bytes(bytes, 0);
prop_assert_eq!(result, 0);
}
#[test]
fn prop_word_end_at_end(s in ascii_string()) {
let bytes = s.as_bytes();
let result = find_word_end_bytes(bytes, s.len());
prop_assert_eq!(result, s.len());
}
#[test]
fn prop_word_start_monotonic(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let first = find_word_start_bytes(bytes, pos);
let second = find_word_start_bytes(bytes, first);
prop_assert!(second <= first);
}
#[test]
fn prop_word_start_at_boundary(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let result = find_word_start_bytes(bytes, pos.min(s.len()));
prop_assert!(
result == 0 ||
result > bytes.len() ||
!is_word_char(bytes[result.saturating_sub(1)])
);
}
#[test]
fn prop_word_range_valid(s in ascii_string(), pos in 0usize..100) {
let bytes = s.as_bytes();
let pos = pos.min(s.len());
let start = find_word_start_bytes(bytes, pos);
let end = find_word_end_bytes(bytes, pos);
prop_assert!(start <= pos);
prop_assert!(end >= pos);
prop_assert!(start <= end);
}
}
}
}