use std::ffi::CStr;
use std::ops::Range;
use std::slice;
use widestring::U32String;
#[derive(Debug, Clone)]
#[repr(C)]
pub enum StringBoxOrigin {
Byte(Vec<u8>),
String,
Wide(U32String),
}
#[derive(Debug, Clone)]
#[repr(u8)]
pub enum StringBoxOriginType {
Byte,
UTF8,
Wide,
}
impl From<StringBoxOrigin> for StringBoxOriginType {
fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
match origin {
StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
StringBoxOrigin::String => StringBoxOriginType::UTF8,
StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
}
}
}
#[derive(Debug, Clone)]
#[repr(C)]
pub struct StringBox {
origin: StringBoxOrigin,
string: String,
}
impl StringBox {
pub fn new() -> Self {
Self::from_string(String::new())
}
pub fn from_string(string: String) -> Self {
Self {
origin: StringBoxOrigin::String,
string,
}
}
pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
let wide_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
Self::from_wide_string(wide_string)
}
pub fn from_wide_string(data: Vec<u32>) -> Self {
let wide_string = U32String::from_vec(data);
let string = wide_string.to_string_lossy();
Self {
origin: StringBoxOrigin::Wide(wide_string),
string,
}
}
pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
let byte_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
Self::from_byte_string(byte_string)
}
pub fn from_byte_string(data: Vec<u8>) -> Self {
let string = data.iter().map(|&c| c as char).collect::<String>();
Self {
origin: StringBoxOrigin::Byte(data),
string,
}
}
pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
Self::from_utf8_string(unsafe { slice::from_raw_parts(data, length + 1) })
}
pub fn from_utf8_string(data: &[u8]) -> Self {
let string = unsafe {
CStr::from_bytes_with_nul_unchecked(data)
.to_string_lossy()
.into_owned()
};
Self {
origin: StringBoxOrigin::String,
string,
}
}
pub fn set_string(&mut self, string: String) {
self.origin = StringBoxOrigin::String;
self.string = string;
}
pub fn len(&self) -> usize {
self.string.len()
}
pub fn is_empty(&self) -> bool {
self.string.is_empty()
}
pub fn char_count(&self) -> usize {
self.string.chars().count()
}
pub fn as_str(&self) -> &str {
self.string.as_str()
}
pub fn as_bytes(&self) -> &[u8] {
self.string.as_bytes()
}
pub fn as_ptr(&self) -> *const u8 {
self.string.as_ptr()
}
pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
let mut previous_byte_offset = 0_usize;
for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
{
if current_char_index == (index + 1) {
return previous_byte_offset..current_byte_offset;
}
previous_byte_offset = current_byte_offset;
}
previous_byte_offset..self.len()
}
pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
let mut previous_byte_offset = 0_usize;
let mut previous_utf16_offset = 0_usize;
for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
{
let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
if current_char_index == (index + 1) {
return previous_utf16_offset..(previous_utf16_offset + delta);
}
previous_byte_offset = current_byte_offset;
previous_utf16_offset += delta;
}
let delta = (self.len() - previous_byte_offset).div_ceil(2);
previous_utf16_offset..(previous_utf16_offset + delta)
}
pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
let mut previous_byte_offset = 0_usize;
let mut previous_utf16_offset = 0_usize;
for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
{
let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
let current_utf16_offset = previous_utf16_offset + delta;
if current_utf16_offset >= index {
return current_char_index;
}
previous_byte_offset = current_byte_offset;
previous_utf16_offset = current_utf16_offset;
}
self.string.chars().count()
}
}
impl Default for StringBox {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for StringBox {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.string)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
pub fn test_from_wide_string() {
let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
let string = StringBox::from_wide_string(wide_string);
assert_eq!(string.to_string(), String::from("привет"));
}
#[test]
pub fn test_from_byte_string() {
let byte_string = vec![104u8, 101, 108, 108, 111];
let string = StringBox::from_byte_string(byte_string);
assert_eq!(string.to_string(), String::from("hello"));
}
#[test]
pub fn test_from_utf8_string() {
let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
let string = StringBox::from_utf8_string(utf8_string.as_slice());
assert_eq!(string.to_string(), String::from("hello"));
}
#[test]
pub fn sparkle() {
let sparkle = String::from("💖");
let string_box = StringBox::from_string(sparkle.clone());
assert_eq!(sparkle.len(), 4);
assert_eq!(string_box.len(), 4);
assert_eq!(string_box.char_count(), 1);
for char in sparkle.char_indices() {
println!("{:?}", char);
}
println!("{:?}", sparkle.bytes());
}
}