use super::{exc, PyObject};
use conversion::{FromPyObject, RefFromPyObject, ToPyObject};
use err::{self, PyErr, PyResult};
use ffi;
use libc::c_char;
use python::{PyClone, Python, PythonObject, PythonObjectDowncastError, ToPythonPointer};
use std;
use std::borrow::Cow;
use std::{char, mem, str};
pub struct PyString(PyObject);
#[cfg(feature = "python27-sys")]
pyobject_newtype!(PyString);
#[cfg(feature = "python3-sys")]
pyobject_newtype!(PyString, PyUnicode_Check, PyUnicode_Type);
pub struct PyBytes(PyObject);
pyobject_newtype!(PyBytes, PyBytes_Check, PyBytes_Type);
#[cfg(feature = "python27-sys")]
pub struct PyUnicode(PyObject);
#[cfg(feature = "python27-sys")]
pyobject_newtype!(PyUnicode, PyUnicode_Check, PyUnicode_Type);
#[cfg(feature = "python3-sys")]
pub use PyString as PyUnicode;
#[cfg(feature = "python27-sys")]
impl ::python::PythonObjectWithCheckedDowncast for PyString {
#[inline]
fn downcast_from<'p>(
py: Python<'p>,
obj: PyObject,
) -> Result<PyString, PythonObjectDowncastError<'p>> {
if is_base_string(&obj) {
Ok(PyString(obj))
} else {
Err(PythonObjectDowncastError::new(
py,
"PyString",
obj.get_type(py),
))
}
}
#[inline]
fn downcast_borrow_from<'a, 'p>(
py: Python<'p>,
obj: &'a PyObject,
) -> Result<&'a PyString, PythonObjectDowncastError<'p>> {
unsafe {
if is_base_string(obj) {
Ok(::std::mem::transmute(obj))
} else {
Err(::python::PythonObjectDowncastError::new(
py,
"PyString",
obj.get_type(py),
))
}
}
}
}
#[cfg(feature = "python27-sys")]
#[inline]
fn is_base_string(obj: &PyObject) -> bool {
unsafe {
ffi::PyType_FastSubclass(
ffi::Py_TYPE(obj.as_ptr()),
ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS,
) != 0
}
}
#[cfg(feature = "python27-sys")]
impl ::python::PythonObjectWithTypeObject for PyString {
#[inline]
fn type_object(py: Python) -> super::PyType {
unsafe { ::objects::typeobject::PyType::from_type_ptr(py, &mut ::ffi::PyBaseString_Type) }
}
}
#[derive(Clone, Copy, Debug)]
pub enum PyStringData<'a> {
Latin1(&'a [u8]),
Utf8(&'a [u8]),
Utf16(&'a [u16]),
Utf32(&'a [u32]),
}
impl<'a> From<&'a str> for PyStringData<'a> {
#[inline]
fn from(val: &'a str) -> PyStringData<'a> {
PyStringData::Utf8(val.as_bytes())
}
}
impl<'a> From<&'a [u16]> for PyStringData<'a> {
#[inline]
fn from(val: &'a [u16]) -> PyStringData<'a> {
PyStringData::Utf16(val)
}
}
impl<'a> From<&'a [u32]> for PyStringData<'a> {
#[inline]
fn from(val: &'a [u32]) -> PyStringData<'a> {
PyStringData::Utf32(val)
}
}
impl<'a> PyStringData<'a> {
pub fn to_string(self, py: Python) -> PyResult<Cow<'a, str>> {
match self {
PyStringData::Utf8(data) => match str::from_utf8(data) {
Ok(s) => Ok(Cow::Borrowed(s)),
Err(e) => Err(PyErr::from_instance(
py,
exc::UnicodeDecodeError::new_utf8(py, data, e)?,
)),
},
PyStringData::Latin1(data) => {
if data.iter().all(|&b| b.is_ascii()) {
Ok(Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) }))
} else {
Ok(Cow::Owned(data.iter().map(|&b| b as char).collect()))
}
}
PyStringData::Utf16(data) => {
fn utf16_bytes(input: &[u16]) -> &[u8] {
unsafe { mem::transmute(input) }
}
match String::from_utf16(data) {
Ok(s) => Ok(Cow::Owned(s)),
Err(_) => Err(PyErr::from_instance(
py,
exc::UnicodeDecodeError::new(
py,
cstr!("utf-16"),
utf16_bytes(data),
0..2 * data.len(),
cstr!("invalid utf-16"),
)?,
)),
}
}
PyStringData::Utf32(data) => {
fn utf32_bytes(input: &[u32]) -> &[u8] {
unsafe { mem::transmute(input) }
}
match data.iter().map(|&u| char::from_u32(u)).collect() {
Some(s) => Ok(Cow::Owned(s)),
None => Err(PyErr::from_instance(
py,
exc::UnicodeDecodeError::new(
py,
cstr!("utf-32"),
utf32_bytes(data),
0..4 * data.len(),
cstr!("invalid utf-32"),
)?,
)),
}
}
}
}
pub fn to_string_lossy(self) -> Cow<'a, str> {
match self {
PyStringData::Utf8(data) => String::from_utf8_lossy(data),
PyStringData::Latin1(data) => {
if data.iter().all(|&b| b.is_ascii()) {
Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) })
} else {
Cow::Owned(data.iter().map(|&b| b as char).collect())
}
}
PyStringData::Utf16(data) => Cow::Owned(String::from_utf16_lossy(data)),
PyStringData::Utf32(data) => Cow::Owned(
data.iter()
.map(|&u| char::from_u32(u).unwrap_or('\u{FFFD}'))
.collect(),
),
}
}
}
impl PyString {
pub fn new(py: Python, s: &str) -> PyString {
#[cfg(feature = "python27-sys")]
fn new_impl(py: Python, s: &str) -> PyString {
if s.is_ascii() {
PyBytes::new(py, s.as_bytes()).into_basestring()
} else {
PyUnicode::new(py, s).into_basestring()
}
}
#[cfg(feature = "python3-sys")]
fn new_impl(py: Python, s: &str) -> PyString {
let ptr = s.as_ptr() as *const c_char;
let len = s.len() as ffi::Py_ssize_t;
unsafe {
err::cast_from_owned_ptr_or_panic(py, ffi::PyUnicode_FromStringAndSize(ptr, len))
}
}
new_impl(py, s)
}
pub fn data(&self, py: Python) -> PyStringData {
self.data_impl(py)
}
#[cfg(feature = "python27-sys")]
fn data_impl(&self, py: Python) -> PyStringData {
if let Ok(bytes) = self.0.cast_as::<PyBytes>(py) {
PyStringData::Utf8(bytes.data(py))
} else if let Ok(unicode) = self.0.cast_as::<PyUnicode>(py) {
unicode.data(py)
} else {
panic!("PyString is neither `str` nor `unicode`")
}
}
#[cfg(feature = "python3-sys")]
fn data_impl(&self, py: Python) -> PyStringData {
unsafe {
let mut size: ffi::Py_ssize_t = mem::uninitialized();
let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) as *const u8;
if data.is_null() {
PyErr::fetch(py).print(py);
panic!("PyUnicode_AsUTF8AndSize failed");
}
PyStringData::Utf8(std::slice::from_raw_parts(data, size as usize))
}
}
pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
self.data(py).to_string(py)
}
pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
self.data(py).to_string_lossy()
}
}
impl PyBytes {
pub fn new(py: Python, s: &[u8]) -> PyBytes {
let ptr = s.as_ptr() as *const c_char;
let len = s.len() as ffi::Py_ssize_t;
unsafe { err::cast_from_owned_ptr_or_panic(py, ffi::PyBytes_FromStringAndSize(ptr, len)) }
}
pub fn data(&self, _py: Python) -> &[u8] {
unsafe {
let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
std::slice::from_raw_parts(buffer, length)
}
}
#[cfg(feature = "python27-sys")]
#[inline]
pub fn as_basestring(&self) -> &PyString {
unsafe { self.0.unchecked_cast_as() }
}
#[cfg(feature = "python27-sys")]
#[inline]
pub fn into_basestring(self) -> PyString {
unsafe { self.0.unchecked_cast_into() }
}
}
#[cfg(feature = "python27-sys")]
impl PyUnicode {
pub fn new(py: Python, s: &str) -> PyUnicode {
let ptr = s.as_ptr() as *const c_char;
let len = s.len() as ffi::Py_ssize_t;
unsafe { err::cast_from_owned_ptr_or_panic(py, ffi::PyUnicode_FromStringAndSize(ptr, len)) }
}
#[inline]
pub fn as_basestring(&self) -> &PyString {
unsafe { self.0.unchecked_cast_as() }
}
#[inline]
pub fn into_basestring(self) -> PyString {
unsafe { self.0.unchecked_cast_into() }
}
pub fn data(&self, _py: Python) -> PyStringData {
unsafe {
let buffer = ffi::PyUnicode_AS_UNICODE(self.as_ptr());
let length = ffi::PyUnicode_GET_SIZE(self.as_ptr()) as usize;
std::slice::from_raw_parts(buffer, length).into()
}
}
pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
self.data(py).to_string(py)
}
pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
self.data(py).to_string_lossy()
}
}
impl ToPyObject for str {
type ObjectType = PyString;
#[inline]
fn to_py_object(&self, py: Python) -> PyString {
PyString::new(py, self)
}
}
impl<'a> ToPyObject for Cow<'a, str> {
type ObjectType = PyString;
#[inline]
fn to_py_object(&self, py: Python) -> PyString {
PyString::new(py, self)
}
}
impl ToPyObject for String {
type ObjectType = PyString;
#[inline]
fn to_py_object(&self, py: Python) -> PyString {
PyString::new(py, self)
}
}
impl<'s> FromPyObject<'s> for Cow<'s, str> {
fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
obj.cast_as::<PyString>(py)?.to_string(py)
}
}
impl<'s> FromPyObject<'s> for String {
fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
obj.extract::<Cow<str>>(py).map(Cow::into_owned)
}
}
impl<'s> FromPyObject<'s> for Cow<'s, [u8]> {
fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
if let Ok(bytes) = obj.cast_as::<PyBytes>(py) {
Ok(Cow::Borrowed(bytes.data(py)))
} else {
super::sequence::extract_buffer_or_sequence(py, obj).map(Cow::Owned)
}
}
}
impl RefFromPyObject for str {
fn with_extracted<F, R>(py: Python, obj: &PyObject, f: F) -> PyResult<R>
where
F: FnOnce(&str) -> R,
{
let s = obj.extract::<Cow<str>>(py)?;
Ok(f(&s))
}
}
impl RefFromPyObject for [u8] {
fn with_extracted<F, R>(py: Python, obj: &PyObject, f: F) -> PyResult<R>
where
F: FnOnce(&[u8]) -> R,
{
let s = obj.extract::<Cow<[u8]>>(py)?;
Ok(f(&s))
}
}
#[cfg(test)]
mod test {
use conversion::{RefFromPyObject, ToPyObject};
use python::{Python, PythonObject};
#[test]
fn test_non_bmp() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "\u{1F30F}";
let py_string = s.to_py_object(py).into_object();
assert_eq!(s, py_string.extract::<String>(py).unwrap());
}
#[test]
fn test_extract_str() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "Hello Python";
let py_string = s.to_py_object(py).into_object();
let mut called = false;
RefFromPyObject::with_extracted(py, &py_string, |s2: &str| {
assert_eq!(s, s2);
called = true;
})
.unwrap();
assert!(called);
}
#[test]
fn test_extract_byte_str() {
let gil = Python::acquire_gil();
let py = gil.python();
let py_bytes = py.eval("b'Hello'", None, None).unwrap();
let mut called = false;
RefFromPyObject::with_extracted(py, &py_bytes, |s2: &[u8]| {
assert_eq!(b"Hello", s2);
called = true;
})
.unwrap();
assert!(called);
}
#[test]
#[cfg(feature = "nightly")]
fn test_extract_byte_str_to_vec() {
let gil = Python::acquire_gil();
let py = gil.python();
let py_bytes = py.eval("b'Hello'", None, None).unwrap();
let v = py_bytes.extract::<Vec<u8>>(py).unwrap();
assert_eq!(b"Hello", &v[..]);
}
}