pub fn byte_column_to_char_column(line_content: &str, byte_column: usize) -> usize {
if byte_column <= 1 {
return 1;
}
let byte_offset = byte_column - 1;
let char_offset = byte_offset_to_char_offset(line_content, byte_offset);
char_offset + 1
}
pub fn get_line_content(content: &str, line_number: usize) -> Option<&str> {
if line_number == 0 {
return None;
}
content.lines().nth(line_number - 1)
}
pub fn byte_offset_to_char_offset(content: &str, byte_offset: usize) -> usize {
if byte_offset == 0 {
return 0;
}
if byte_offset >= content.len() {
return content.chars().count();
}
content
.char_indices()
.take_while(|(byte_idx, _)| *byte_idx < byte_offset)
.count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_byte_offset_to_char_offset_empty() {
assert_eq!(byte_offset_to_char_offset("", 0), 0);
assert_eq!(byte_offset_to_char_offset("", 1), 0);
}
#[test]
fn test_byte_offset_to_char_offset_ascii() {
let content = "Hello World";
assert_eq!(byte_offset_to_char_offset(content, 0), 0);
assert_eq!(byte_offset_to_char_offset(content, 5), 5);
assert_eq!(byte_offset_to_char_offset(content, 11), 11);
assert_eq!(byte_offset_to_char_offset(content, 100), 11);
}
#[test]
fn test_byte_offset_to_char_offset_norwegian() {
let content = "æ"; assert_eq!(content.len(), 2); assert_eq!(content.chars().count(), 1); assert_eq!(byte_offset_to_char_offset(content, 0), 0);
assert_eq!(byte_offset_to_char_offset(content, 2), 1); }
#[test]
fn test_byte_offset_to_char_offset_mixed() {
let content = "Hello æ world";
assert_eq!(content.len(), 14); assert_eq!(content.chars().count(), 13);
assert_eq!(byte_offset_to_char_offset(content, 6), 6); assert_eq!(byte_offset_to_char_offset(content, 8), 7); assert_eq!(byte_offset_to_char_offset(content, 14), 13);
}
#[test]
fn test_byte_offset_to_char_offset_emoji() {
let content = "Hi 👋"; assert_eq!(content.len(), 7);
assert_eq!(content.chars().count(), 4);
assert_eq!(byte_offset_to_char_offset(content, 3), 3); assert_eq!(byte_offset_to_char_offset(content, 7), 4); }
#[test]
fn test_byte_offset_to_char_offset_norwegian_sentence() {
let content = "# Heading\n\nContent with Norwegian letter \"æ\".";
assert_eq!(content.len(), 46); assert_eq!(content.chars().count(), 45);
assert_eq!(byte_offset_to_char_offset(content, 46), 45);
}
#[test]
fn test_byte_offset_to_char_offset_multiple_multibyte() {
let content = "café résumé"; assert_eq!(content.len(), 14);
assert_eq!(content.chars().count(), 11);
assert_eq!(byte_offset_to_char_offset(content, 0), 0);
assert_eq!(byte_offset_to_char_offset(content, 3), 3); assert_eq!(byte_offset_to_char_offset(content, 5), 4); assert_eq!(byte_offset_to_char_offset(content, 14), 11); }
#[test]
fn test_byte_column_to_char_column() {
let line = "Content with Norwegian letter \"æ\".";
assert_eq!(line.len(), 35);
assert_eq!(line.chars().count(), 34);
assert_eq!(byte_column_to_char_column(line, 1), 1);
assert_eq!(byte_column_to_char_column(line, 30), 30);
assert_eq!(byte_column_to_char_column(line, 32), 32);
assert_eq!(byte_column_to_char_column(line, 34), 33);
assert_eq!(byte_column_to_char_column(line, 36), 35);
}
#[test]
fn test_byte_column_to_char_column_edge_cases() {
assert_eq!(byte_column_to_char_column("", 1), 1);
assert_eq!(byte_column_to_char_column("", 0), 1);
let ascii = "Hello World";
assert_eq!(byte_column_to_char_column(ascii, 1), 1);
assert_eq!(byte_column_to_char_column(ascii, 6), 6);
assert_eq!(byte_column_to_char_column(ascii, 12), 12);
let multi = "æøå"; assert_eq!(multi.len(), 6);
assert_eq!(multi.chars().count(), 3);
assert_eq!(byte_column_to_char_column(multi, 1), 1); assert_eq!(byte_column_to_char_column(multi, 3), 2); assert_eq!(byte_column_to_char_column(multi, 5), 3); assert_eq!(byte_column_to_char_column(multi, 7), 4);
let emoji = "Hi 👋!"; assert_eq!(emoji.len(), 8);
assert_eq!(emoji.chars().count(), 5);
assert_eq!(byte_column_to_char_column(emoji, 4), 4); assert_eq!(byte_column_to_char_column(emoji, 8), 5); assert_eq!(byte_column_to_char_column(emoji, 9), 6);
let only_multi = "日本語"; assert_eq!(only_multi.len(), 9);
assert_eq!(only_multi.chars().count(), 3);
assert_eq!(byte_column_to_char_column(only_multi, 1), 1);
assert_eq!(byte_column_to_char_column(only_multi, 4), 2);
assert_eq!(byte_column_to_char_column(only_multi, 7), 3);
assert_eq!(byte_column_to_char_column(only_multi, 10), 4);
}
#[test]
fn test_byte_column_to_char_column_bug_scenario() {
let line = "Content with Norwegian letter \"æ\".";
let byte_column_at_end = line.len() + 1; let expected_char_column = line.chars().count() + 1;
assert_eq!(
byte_column_to_char_column(line, byte_column_at_end),
expected_char_column,
"End-of-line column should be converted from byte {byte_column_at_end} to char {expected_char_column}"
);
let line_from = 11_usize;
let from_position = line_from + (expected_char_column - 1);
assert_eq!(from_position, 45, "Fix position should be 45, not 46");
}
#[test]
fn test_get_line_content() {
let content = "# Heading\n\nContent with Norwegian letter \"æ\".";
assert_eq!(get_line_content(content, 1), Some("# Heading"));
assert_eq!(get_line_content(content, 2), Some(""));
assert_eq!(
get_line_content(content, 3),
Some("Content with Norwegian letter \"æ\".")
);
assert_eq!(get_line_content(content, 4), None);
assert_eq!(get_line_content(content, 0), None);
}
#[test]
fn test_get_line_content_edge_cases() {
assert_eq!(get_line_content("", 1), None);
assert_eq!(get_line_content("", 0), None);
assert_eq!(get_line_content("Hello", 1), Some("Hello"));
assert_eq!(get_line_content("Hello", 2), None);
let content = "\n\n\n";
assert_eq!(get_line_content(content, 1), Some(""));
assert_eq!(get_line_content(content, 2), Some(""));
assert_eq!(get_line_content(content, 3), Some(""));
assert_eq!(get_line_content(content, 4), None);
let content = "Line 1\næøå\n日本語\n👋🎉";
assert_eq!(get_line_content(content, 1), Some("Line 1"));
assert_eq!(get_line_content(content, 2), Some("æøå"));
assert_eq!(get_line_content(content, 3), Some("日本語"));
assert_eq!(get_line_content(content, 4), Some("👋🎉"));
assert_eq!(get_line_content(content, 5), None);
}
}