use crate::Result;
use crate::error::Error::FromUtf8Error;
use crate::mutf8;
use std::borrow::{Borrow, Cow};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
#[derive(Eq)]
#[repr(transparent)]
pub struct JavaStr([u8]);
impl JavaStr {
#[inline]
pub fn from_mutf8(bytes: &[u8]) -> Result<&JavaStr> {
mutf8::validate(bytes)?;
#[expect(unsafe_code)]
Ok(unsafe { JavaStr::from_mutf8_unchecked(bytes) })
}
#[must_use]
#[inline]
#[expect(unsafe_code, clippy::ref_as_ptr)]
pub unsafe fn from_mutf8_unchecked(bytes: &[u8]) -> &JavaStr {
unsafe { &*(bytes as *const [u8] as *const JavaStr) }
}
pub fn try_from_str(s: &str) -> Result<&JavaStr> {
let bytes = s.as_bytes();
if bytes.contains(&0x00) || bytes.iter().any(|&b| b >= 0xF0) {
return Err(FromUtf8Error(
"string contains characters requiring MUTF-8 conversion; use JavaString::from() instead".to_string(),
));
}
#[expect(unsafe_code, clippy::ref_as_ptr)]
Ok(unsafe { &*(bytes as *const [u8] as *const JavaStr) })
}
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
#[must_use]
pub fn cow_from_str(s: &str) -> Cow<'_, JavaStr> {
match Self::try_from_str(s) {
Ok(js) => Cow::Borrowed(js),
Err(_) => Cow::Owned(JavaString::from(s)),
}
}
#[must_use]
pub fn as_str(&self) -> Option<&str> {
if mutf8::has_mutf8_specials(&self.0) {
None
} else {
#[expect(unsafe_code)]
Some(unsafe { std::str::from_utf8_unchecked(&self.0) })
}
}
#[must_use]
pub fn to_rust_string(&self) -> String {
if let Some(s) = self.as_str() {
return s.to_owned();
}
mutf8::from_bytes(&self.0).unwrap_or_else(|_| {
String::from_utf8_lossy(&self.0).into_owned()
})
}
#[must_use]
pub fn to_str_lossy(&self) -> Cow<'_, str> {
if let Some(s) = self.as_str() {
Cow::Borrowed(s)
} else {
Cow::Owned(self.to_rust_string())
}
}
pub fn to_utf16(&self) -> crate::Result<Vec<u16>> {
mutf8::to_utf16(&self.0)
}
#[must_use]
pub fn to_java_string(&self) -> JavaString {
JavaString(self.0.to_vec())
}
#[must_use]
pub fn len(&self) -> usize {
self.0.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
}
impl fmt::Debug for JavaStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\"{}\"", self.to_rust_string())
}
}
impl fmt::Display for JavaStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.to_rust_string())
}
}
impl PartialEq for JavaStr {
fn eq(&self, other: &JavaStr) -> bool {
self.0 == other.0
}
}
impl PartialEq<str> for JavaStr {
fn eq(&self, other: &str) -> bool {
if let Some(s) = self.as_str() {
return s == other;
}
let mut mutf8_iter = Mutf8CharIter::new(&self.0);
let mut other_iter = other.chars();
loop {
match (mutf8_iter.next(), other_iter.next()) {
(Some(a), Some(b)) => {
if a != b {
return false;
}
}
(None, None) => return true,
_ => return false,
}
}
}
}
struct Mutf8CharIter<'a> {
bytes: &'a [u8],
pos: usize,
}
impl<'a> Mutf8CharIter<'a> {
fn new(bytes: &'a [u8]) -> Self {
Self { bytes, pos: 0 }
}
}
impl Iterator for Mutf8CharIter<'_> {
type Item = char;
fn next(&mut self) -> Option<char> {
if self.pos >= self.bytes.len() {
return None;
}
let byte1 = self.bytes[self.pos];
match byte1 {
0x00 => {
self.pos += 1;
Some('\0')
}
0x01..=0x7F => {
self.pos += 1;
Some(char::from(byte1))
}
0xC0..=0xDF => {
if self.pos + 1 >= self.bytes.len() {
self.pos = self.bytes.len();
return Some('\u{FFFD}');
}
let byte2 = self.bytes[self.pos + 1];
let code = u32::from(byte1 & 0x1F) << 6 | u32::from(byte2 & 0x3F);
self.pos += 2;
Some(char::from_u32(code).unwrap_or('\u{FFFD}'))
}
0xE0..=0xEF => {
if self.pos + 2 >= self.bytes.len() {
self.pos = self.bytes.len();
return Some('\u{FFFD}');
}
let byte2 = self.bytes[self.pos + 1];
let byte3 = self.bytes[self.pos + 2];
let ch = u32::from(byte1 & 0x0F) << 12
| u32::from(byte2 & 0x3F) << 6
| u32::from(byte3 & 0x3F);
if (0xD800..=0xDBFF).contains(&ch) && self.pos + 5 < self.bytes.len() {
let next1 = self.bytes[self.pos + 3];
if next1 == 0xED {
let next2 = self.bytes[self.pos + 4];
let next3 = self.bytes[self.pos + 5];
let low = u32::from(next1 & 0x0F) << 12
| u32::from(next2 & 0x3F) << 6
| u32::from(next3 & 0x3F);
if (0xDC00..=0xDFFF).contains(&low) {
let code = 0x1_0000 + ((ch - 0xD800) << 10) + (low - 0xDC00);
self.pos += 6;
return Some(char::from_u32(code).unwrap_or('\u{FFFD}'));
}
}
}
self.pos += 3;
Some(char::from_u32(ch).unwrap_or('\u{FFFD}'))
}
_ => {
self.pos += 1;
Some('\u{FFFD}')
}
}
}
}
impl PartialEq<&str> for JavaStr {
fn eq(&self, other: &&str) -> bool {
self == *other
}
}
impl PartialEq<String> for JavaStr {
fn eq(&self, other: &String) -> bool {
self == other.as_str()
}
}
impl PartialEq<JavaStr> for str {
fn eq(&self, other: &JavaStr) -> bool {
other == self
}
}
impl PartialOrd for JavaStr {
fn partial_cmp(&self, other: &JavaStr) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for JavaStr {
fn cmp(&self, other: &JavaStr) -> std::cmp::Ordering {
self.0.cmp(&other.0)
}
}
impl Hash for JavaStr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl AsRef<[u8]> for JavaStr {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl AsRef<JavaStr> for JavaStr {
fn as_ref(&self) -> &JavaStr {
self
}
}
impl ToOwned for JavaStr {
type Owned = JavaString;
fn to_owned(&self) -> JavaString {
self.to_java_string()
}
}
#[derive(Clone, Eq)]
pub struct JavaString(Vec<u8>);
impl JavaString {
#[must_use]
pub const fn new() -> JavaString {
JavaString(Vec::new())
}
pub fn from_mutf8(bytes: Vec<u8>) -> Result<JavaString> {
mutf8::validate(&bytes)?;
Ok(JavaString(bytes))
}
#[must_use]
#[expect(unsafe_code)]
pub unsafe fn from_mutf8_unchecked(bytes: Vec<u8>) -> JavaString {
JavaString(bytes)
}
#[must_use]
pub fn as_java_str(&self) -> &JavaStr {
#[expect(unsafe_code)]
unsafe {
JavaStr::from_mutf8_unchecked(&self.0)
}
}
#[must_use]
pub fn into_bytes(self) -> Vec<u8> {
self.0
}
}
impl Default for JavaString {
fn default() -> Self {
JavaString::new()
}
}
impl Deref for JavaString {
type Target = JavaStr;
fn deref(&self) -> &JavaStr {
self.as_java_str()
}
}
impl Borrow<JavaStr> for JavaString {
fn borrow(&self) -> &JavaStr {
self.as_java_str()
}
}
impl AsRef<JavaStr> for JavaString {
fn as_ref(&self) -> &JavaStr {
self.as_java_str()
}
}
impl AsRef<[u8]> for JavaString {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl From<&str> for JavaString {
fn from(s: &str) -> JavaString {
let bytes = s.as_bytes();
if s.is_ascii() && !bytes.contains(&0) {
return JavaString(bytes.to_vec());
}
match mutf8::to_bytes(s) {
Ok(mutf8_bytes) => JavaString(mutf8_bytes),
Err(_) => JavaString(bytes.to_vec()),
}
}
}
impl From<String> for JavaString {
fn from(s: String) -> JavaString {
JavaString::from(s.as_str())
}
}
impl From<&JavaStr> for JavaString {
fn from(s: &JavaStr) -> JavaString {
s.to_java_string()
}
}
impl From<Cow<'_, str>> for JavaString {
fn from(cow: Cow<'_, str>) -> JavaString {
JavaString::from(cow.as_ref())
}
}
impl From<JavaString> for Cow<'static, JavaStr> {
fn from(s: JavaString) -> Cow<'static, JavaStr> {
Cow::Owned(s)
}
}
impl fmt::Debug for JavaString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_java_str(), f)
}
}
impl fmt::Display for JavaString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_java_str(), f)
}
}
impl PartialEq for JavaString {
fn eq(&self, other: &JavaString) -> bool {
self.0 == other.0
}
}
impl PartialEq<str> for JavaString {
fn eq(&self, other: &str) -> bool {
self.as_java_str() == other
}
}
impl PartialEq<&str> for JavaString {
fn eq(&self, other: &&str) -> bool {
self.as_java_str() == *other
}
}
impl PartialEq<String> for JavaString {
fn eq(&self, other: &String) -> bool {
self.as_java_str() == other.as_str()
}
}
impl PartialEq<JavaStr> for JavaString {
fn eq(&self, other: &JavaStr) -> bool {
self.as_java_str() == other
}
}
impl PartialEq<JavaString> for str {
fn eq(&self, other: &JavaString) -> bool {
other.as_java_str() == self
}
}
impl PartialEq<JavaString> for &str {
fn eq(&self, other: &JavaString) -> bool {
other.as_java_str() == *self
}
}
impl PartialOrd for JavaString {
fn partial_cmp(&self, other: &JavaString) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for JavaString {
fn cmp(&self, other: &JavaString) -> std::cmp::Ordering {
self.0.cmp(&other.0)
}
}
impl Hash for JavaString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_java_string_from_str() {
let js = JavaString::from("Hello");
assert_eq!(js.len(), 5);
assert_eq!(js, "Hello");
assert_eq!(js.to_rust_string(), "Hello");
}
#[test]
fn test_java_string_from_string() {
let js = JavaString::from(String::from("World"));
assert_eq!(js, "World");
}
#[test]
fn test_java_str_as_str_ascii() {
let js = JavaString::from("ascii");
let java_str: &JavaStr = &js;
assert_eq!(java_str.as_str(), Some("ascii"));
}
#[test]
fn test_java_str_as_str_bmp() {
let js = JavaString::from("βγδ");
let java_str: &JavaStr = &js;
assert_eq!(java_str.as_str(), Some("βγδ"));
}
#[test]
fn test_java_string_null_encoding() {
let js = JavaString::from("\0");
assert_eq!(js.as_bytes(), &[0xC0, 0x80]);
assert_eq!(js.as_str(), None); assert_eq!(js.to_rust_string(), "\0");
}
#[test]
fn test_java_string_supplementary() {
let js = JavaString::from("😀");
assert_ne!(js.as_bytes(), "😀".as_bytes()); assert_eq!(js.as_str(), None); assert_eq!(js.to_rust_string(), "😀"); }
#[test]
fn test_java_str_from_mutf8_valid() {
let js = JavaStr::from_mutf8(b"Hello").expect("should be valid");
assert_eq!(js.len(), 5);
assert_eq!(js, "Hello");
}
#[test]
fn test_java_str_from_mutf8_null() {
let js = JavaStr::from_mutf8(&[0xC0, 0x80]).expect("should be valid MUTF-8");
assert_eq!(js.len(), 2);
}
#[test]
fn test_java_str_from_mutf8_invalid() {
assert!(JavaStr::from_mutf8(&[0xF0, 0x9F, 0x98, 0x80]).is_err());
}
#[test]
fn test_java_string_from_mutf8() {
let js = JavaString::from_mutf8(vec![0x48, 0x65, 0x6C, 0x6C, 0x6F]).expect("valid");
assert_eq!(js, "Hello");
}
#[test]
fn test_java_string_equality() {
let js1 = JavaString::from("Hello");
let js2 = JavaString::from("Hello");
let js3 = JavaString::from("World");
assert_eq!(js1, js2);
assert_ne!(js1, js3);
}
#[test]
fn test_java_string_cross_type_equality() {
let js = JavaString::from("Hello");
assert_eq!(js, "Hello");
assert_eq!("Hello", js);
assert_eq!(js, String::from("Hello"));
assert_eq!(js, *"Hello");
}
#[test]
fn test_java_str_cross_type_equality() {
let js = JavaString::from("Hello");
let java_str: &JavaStr = &js;
assert_eq!(java_str, "Hello");
assert_eq!(java_str, &"Hello");
}
#[test]
fn test_java_string_debug() {
let js = JavaString::from("Hello");
assert_eq!(format!("{js:?}"), "\"Hello\"");
}
#[test]
fn test_java_string_display() {
let js = JavaString::from("Hello");
assert_eq!(format!("{js}"), "Hello");
}
#[test]
fn test_java_string_hash() {
use std::collections::HashMap;
let mut map = HashMap::new();
let js = JavaString::from("key");
map.insert(js.clone(), 42);
assert_eq!(map.get(&js), Some(&42));
}
#[test]
fn test_java_string_cow() {
let js = JavaString::from("Hello");
let cow: Cow<'_, JavaStr> = Cow::Owned(js.clone());
assert_eq!(&*cow, &*js);
let borrowed: Cow<'_, JavaStr> = Cow::Borrowed(&js);
assert_eq!(&*borrowed, &*js);
}
#[test]
fn test_java_string_default() {
let js = JavaString::default();
assert!(js.is_empty());
assert_eq!(js.len(), 0);
}
#[test]
fn test_java_string_into_bytes() {
let js = JavaString::from("Hello");
let bytes = js.into_bytes();
assert_eq!(bytes, b"Hello");
}
#[test]
fn test_java_str_to_java_string() {
let js = JavaString::from("Hello");
let java_str: &JavaStr = &js;
let owned = java_str.to_java_string();
assert_eq!(owned, js);
}
#[test]
fn test_java_string_ordering() {
let a = JavaString::from("a");
let b = JavaString::from("b");
assert!(a < b);
assert!(b > a);
}
#[test]
fn test_java_str_ordering() {
let a = JavaString::from("a");
let b = JavaString::from("b");
let a_str: &JavaStr = &a;
let b_str: &JavaStr = &b;
assert!(a_str < b_str);
}
#[test]
fn test_java_string_clone() {
let js = JavaString::from("Hello");
let cloned = js.clone();
assert_eq!(js, cloned);
}
#[test]
fn test_java_str_empty() {
let js = JavaString::from("");
assert!(js.is_empty());
assert_eq!(js.len(), 0);
}
#[test]
fn test_java_string_from_cow_borrowed() {
let cow: Cow<'_, str> = Cow::Borrowed("Hello");
let js = JavaString::from(cow);
assert_eq!(js, "Hello");
}
#[test]
fn test_java_string_from_cow_owned() {
let cow: Cow<'_, str> = Cow::Owned("Hello".to_string());
let js = JavaString::from(cow);
assert_eq!(js, "Hello");
}
#[test]
fn test_java_str_from_str_ascii() {
let java_str = JavaStr::try_from_str("Hello").expect("should succeed for ASCII");
assert_eq!(java_str, "Hello");
}
#[test]
fn test_java_str_from_str_bmp() {
let java_str = JavaStr::try_from_str("βγδ").expect("should succeed for BMP");
assert_eq!(java_str, "βγδ");
}
#[test]
fn test_java_str_from_str_null() {
assert!(JavaStr::try_from_str("\0").is_err());
}
#[test]
fn test_java_str_from_str_supplementary() {
assert!(JavaStr::try_from_str("😀").is_err());
}
#[test]
fn test_java_string_as_ref_bytes() {
let js = JavaString::from("Hello");
let bytes: &[u8] = js.as_ref();
assert_eq!(bytes, b"Hello");
}
#[test]
fn test_java_string_as_ref_java_str() {
let js = JavaString::from("Hello");
let java_str: &JavaStr = js.as_ref();
assert_eq!(java_str, "Hello");
}
#[test]
fn test_java_str_partial_eq_java_string() {
let js = JavaString::from("Hello");
let java_str: &JavaStr = &js;
assert_eq!(java_str, js.as_java_str());
}
}