use crate::ffi::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_hash_t, Py_ssize_t};
use libc::wchar_t;
use std::os::raw::{c_char, c_int, c_uint, c_void};
#[repr(C)]
pub struct PyASCIIObject {
pub ob_base: PyObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
pub state: u32,
pub wstr: *mut wchar_t,
}
#[cfg(not(target_endian = "big"))]
impl PyASCIIObject {
#[inline]
pub unsafe fn interned(&self) -> c_uint {
self.state & 3
}
#[inline]
pub unsafe fn kind(&self) -> c_uint {
(self.state >> 2) & 7
}
#[inline]
pub unsafe fn compact(&self) -> c_uint {
(self.state >> 5) & 1
}
#[inline]
pub unsafe fn ascii(&self) -> c_uint {
(self.state >> 6) & 1
}
#[inline]
pub unsafe fn ready(&self) -> c_uint {
(self.state >> 7) & 1
}
}
#[repr(C)]
pub struct PyCompactUnicodeObject {
pub _base: PyASCIIObject,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
pub wstr_length: Py_ssize_t,
}
#[repr(C)]
pub union PyUnicodeObjectData {
any: *mut c_void,
latin1: *mut Py_UCS1,
ucs2: *mut Py_UCS2,
ucs4: *mut Py_UCS4,
}
#[repr(C)]
pub struct PyUnicodeObject {
pub _base: PyCompactUnicodeObject,
pub data: PyUnicodeObjectData,
}
extern "C" {
#[cfg(not(PyPy))]
pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
}
pub const SSTATE_NOT_INTERNED: c_uint = 0;
pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
debug_assert!(crate::ffi::PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).ascii()
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).compact()
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
if (*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0 {
1
} else {
0
}
}
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
pub const PyUnicode_WCHAR_KIND: c_uint = 0;
pub const PyUnicode_1BYTE_KIND: c_uint = 1;
pub const PyUnicode_2BYTE_KIND: c_uint = 2;
pub const PyUnicode_4BYTE_KIND: c_uint = 4;
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
PyUnicode_DATA(op) as *mut Py_UCS1
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
PyUnicode_DATA(op) as *mut Py_UCS2
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
PyUnicode_DATA(op) as *mut Py_UCS4
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
debug_assert!(crate::ffi::PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).kind()
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
if PyUnicode_IS_ASCII(op) != 0 {
(op as *mut PyASCIIObject).offset(1) as *mut c_void
} else {
(op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
}
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
(*(op as *mut PyUnicodeObject)).data.any
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
debug_assert!(crate::ffi::PyUnicode_Check(op) != 0);
if PyUnicode_IS_COMPACT(op) != 0 {
_PyUnicode_COMPACT_DATA(op)
} else {
_PyUnicode_NONCOMPACT_DATA(op)
}
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
debug_assert!(crate::ffi::PyUnicode_Check(op) != 0);
debug_assert!(PyUnicode_IS_READY(op) != 0);
(*(op as *mut PyASCIIObject)).length
}
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
(*(op as *mut PyASCIIObject)).ready()
}
#[cfg(not(Py_3_12))]
#[cfg_attr(Py_3_10, deprecated(note = "Python 3.10"))]
#[inline]
#[cfg(not(target_endian = "big"))]
pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
debug_assert!(crate::ffi::PyUnicode_Check(op) != 0);
if PyUnicode_IS_READY(op) != 0 {
0
} else {
_PyUnicode_Ready(op)
}
}
extern "C" {
#[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
#[cfg(not(PyPy))]
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
pub fn PyUnicode_CopyCharacters(
to: *mut PyObject,
to_start: Py_ssize_t,
from: *mut PyObject,
from_start: Py_ssize_t,
how_many: Py_ssize_t,
) -> Py_ssize_t;
#[cfg(not(PyPy))]
#[cfg_attr(docsrs, doc(cfg(not(PyPy))))]
pub fn PyUnicode_Fill(
unicode: *mut PyObject,
start: Py_ssize_t,
length: Py_ssize_t,
fill_char: Py_UCS4,
) -> Py_ssize_t;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
pub fn PyUnicode_FromKindAndData(
kind: c_int,
buffer: *const c_void,
size: Py_ssize_t,
) -> *mut PyObject;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
#[cfg(not(Py_3_12))]
#[deprecated]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
pub fn PyUnicode_AsUnicodeAndSize(
unicode: *mut PyObject,
size: *mut Py_ssize_t,
) -> *mut Py_UNICODE;
}
extern "C" {
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8AndSize")]
pub fn PyUnicode_AsUTF8AndSize(unicode: *mut PyObject, size: *mut Py_ssize_t) -> *mut c_char;
#[cfg(Py_3_7)]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
#[cfg(not(Py_3_7))]
#[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *mut c_char;
pub fn PyUnicode_Encode(
s: *const Py_UNICODE,
size: Py_ssize_t,
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF7(
data: *const Py_UNICODE,
length: Py_ssize_t,
base64SetO: c_int,
base64WhiteSpace: c_int,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
pub fn PyUnicode_EncodeUTF8(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF32(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUTF16(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
byteorder: c_int,
) -> *mut PyObject;
pub fn PyUnicode_EncodeUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
pub fn PyUnicode_EncodeRawUnicodeEscape(
data: *const Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
pub fn PyUnicode_EncodeLatin1(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
pub fn PyUnicode_EncodeASCII(
data: *const Py_UNICODE,
length: Py_ssize_t,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_EncodeCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
mapping: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
pub fn PyUnicode_TranslateCharmap(
data: *const Py_UNICODE,
length: Py_ssize_t,
table: *mut PyObject,
errors: *const c_char,
) -> *mut PyObject;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
pub fn PyUnicode_EncodeDecimal(
s: *mut Py_UNICODE,
length: Py_ssize_t,
output: *mut c_char,
errors: *const c_char,
) -> c_int;
#[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
pub fn PyUnicode_TransformDecimalToASCII(
s: *mut Py_UNICODE,
length: Py_ssize_t,
) -> *mut PyObject;
}
#[cfg(test)]
#[cfg(not(target_endian = "big"))]
mod tests {
use super::*;
use crate::types::PyString;
use crate::{AsPyPointer, Python};
#[test]
fn ascii_object_bitfield() {
let ob_base: PyObject = unsafe { std::mem::zeroed() };
let mut o = PyASCIIObject {
ob_base,
length: 0,
hash: 0,
state: 0,
wstr: std::ptr::null_mut() as *mut wchar_t,
};
unsafe {
assert_eq!(o.interned(), 0);
assert_eq!(o.kind(), 0);
assert_eq!(o.compact(), 0);
assert_eq!(o.ascii(), 0);
assert_eq!(o.ready(), 0);
for i in 0..4 {
o.state = i;
assert_eq!(o.interned(), i);
}
for i in 0..8 {
o.state = i << 2;
assert_eq!(o.kind(), i);
}
o.state = 1 << 5;
assert_eq!(o.compact(), 1);
o.state = 1 << 6;
assert_eq!(o.ascii(), 1);
o.state = 1 << 7;
assert_eq!(o.ready(), 1);
}
}
#[test]
#[cfg_attr(Py_3_10, allow(deprecated))]
fn ascii() {
Python::with_gil(|py| {
let s = PyString::new(py, "hello, world");
let ptr = s.as_ptr();
unsafe {
let ascii_ptr = ptr as *mut PyASCIIObject;
let ascii = ascii_ptr.as_ref().unwrap();
assert_eq!(ascii.interned(), 0);
assert_eq!(ascii.kind(), PyUnicode_1BYTE_KIND);
assert_eq!(ascii.compact(), 1);
assert_eq!(ascii.ascii(), 1);
assert_eq!(ascii.ready(), 1);
assert_eq!(PyUnicode_IS_ASCII(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 1);
assert!(!PyUnicode_1BYTE_DATA(ptr).is_null());
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_1BYTE_KIND);
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
assert!(!PyUnicode_DATA(ptr).is_null());
assert_eq!(PyUnicode_GET_LENGTH(ptr), s.len().unwrap() as _);
assert_eq!(PyUnicode_IS_READY(ptr), 1);
assert_eq!(PyUnicode_READY(ptr), 0);
}
})
}
#[test]
#[cfg_attr(Py_3_10, allow(deprecated))]
fn ucs4() {
Python::with_gil(|py| {
let s = "哈哈🐈";
let py_string = PyString::new(py, s);
let ptr = py_string.as_ptr();
unsafe {
let ascii_ptr = ptr as *mut PyASCIIObject;
let ascii = ascii_ptr.as_ref().unwrap();
assert_eq!(ascii.interned(), 0);
assert_eq!(ascii.kind(), PyUnicode_4BYTE_KIND);
assert_eq!(ascii.compact(), 1);
assert_eq!(ascii.ascii(), 0);
assert_eq!(ascii.ready(), 1);
assert_eq!(PyUnicode_IS_ASCII(ptr), 0);
assert_eq!(PyUnicode_IS_COMPACT(ptr), 1);
assert_eq!(PyUnicode_IS_COMPACT_ASCII(ptr), 0);
assert!(!PyUnicode_4BYTE_DATA(ptr).is_null());
assert_eq!(PyUnicode_KIND(ptr), PyUnicode_4BYTE_KIND);
assert!(!_PyUnicode_COMPACT_DATA(ptr).is_null());
assert!(!PyUnicode_DATA(ptr).is_null());
assert_eq!(PyUnicode_GET_LENGTH(ptr), py_string.len().unwrap() as _);
assert_eq!(PyUnicode_IS_READY(ptr), 1);
assert_eq!(PyUnicode_READY(ptr), 0);
}
})
}
}