1use libc::c_char;
20use std::borrow::Cow;
21use std::{char, mem, str};
22
23use super::{exc, PyObject};
24use crate::conversion::{FromPyObject, RefFromPyObject, ToPyObject};
25use crate::err::{self, PyErr, PyResult};
26use crate::ffi;
27use crate::python::{PyClone, Python, PythonObject, PythonObjectDowncastError, ToPythonPointer};
28
29pub struct PyString(PyObject);
32
33#[cfg(feature = "python27-sys")]
34pyobject_newtype!(PyString);
35#[cfg(feature = "python3-sys")]
36pyobject_newtype!(PyString, PyUnicode_Check, PyUnicode_Type);
37
38pub struct PyBytes(PyObject);
41
42pyobject_newtype!(PyBytes, PyBytes_Check, PyBytes_Type);
43
44#[cfg(feature = "python27-sys")]
47pub struct PyUnicode(PyObject);
48
49#[cfg(feature = "python27-sys")]
50pyobject_newtype!(PyUnicode, PyUnicode_Check, PyUnicode_Type);
51
52#[cfg(feature = "python3-sys")]
55pub use PyString as PyUnicode;
56
57#[cfg(feature = "python27-sys")]
58impl crate::python::PythonObjectWithCheckedDowncast for PyString {
59 #[inline]
60 fn downcast_from(
61 py: Python<'_>,
62 obj: PyObject,
63 ) -> Result<PyString, PythonObjectDowncastError<'_>> {
64 if is_base_string(&obj) {
65 Ok(PyString(obj))
66 } else {
67 Err(PythonObjectDowncastError::new(
68 py,
69 "PyString",
70 obj.get_type(py),
71 ))
72 }
73 }
74
75 #[inline]
76 fn downcast_borrow_from<'a, 'p>(
77 py: Python<'p>,
78 obj: &'a PyObject,
79 ) -> Result<&'a PyString, PythonObjectDowncastError<'p>> {
80 unsafe {
81 if is_base_string(obj) {
82 Ok(std::mem::transmute(obj))
83 } else {
84 Err(crate::python::PythonObjectDowncastError::new(
85 py,
86 "PyString",
87 obj.get_type(py),
88 ))
89 }
90 }
91 }
92}
93
94#[cfg(feature = "python27-sys")]
95#[inline]
96fn is_base_string(obj: &PyObject) -> bool {
97 unsafe {
98 ffi::PyType_FastSubclass(
99 ffi::Py_TYPE(obj.as_ptr()),
100 ffi::Py_TPFLAGS_STRING_SUBCLASS | ffi::Py_TPFLAGS_UNICODE_SUBCLASS,
101 ) != 0
102 }
103}
104
105#[cfg(feature = "python27-sys")]
106impl crate::python::PythonObjectWithTypeObject for PyString {
107 #[inline]
108 fn type_object(py: Python) -> super::PyType {
109 unsafe {
110 crate::objects::typeobject::PyType::from_type_ptr(py, &mut ffi::PyBaseString_Type)
111 }
112 }
113}
114
115#[derive(Clone, Copy, Debug)]
117pub enum PyStringData<'a> {
118 Latin1(&'a [u8]),
119 Utf8(&'a [u8]),
120 Utf16(&'a [u16]),
121 Utf32(&'a [u32]),
122}
123
124impl<'a> From<&'a str> for PyStringData<'a> {
125 #[inline]
126 fn from(val: &'a str) -> PyStringData<'a> {
127 PyStringData::Utf8(val.as_bytes())
128 }
129}
130
131impl<'a> From<&'a [u16]> for PyStringData<'a> {
132 #[inline]
133 fn from(val: &'a [u16]) -> PyStringData<'a> {
134 PyStringData::Utf16(val)
135 }
136}
137
138impl<'a> From<&'a [u32]> for PyStringData<'a> {
139 #[inline]
140 fn from(val: &'a [u32]) -> PyStringData<'a> {
141 PyStringData::Utf32(val)
142 }
143}
144
145impl<'a> PyStringData<'a> {
146 pub fn to_string(self, py: Python) -> PyResult<Cow<'a, str>> {
153 match self {
154 PyStringData::Utf8(data) => match str::from_utf8(data) {
155 Ok(s) => Ok(Cow::Borrowed(s)),
156 Err(e) => Err(PyErr::from_instance(
157 py,
158 exc::UnicodeDecodeError::new_utf8(py, data, e)?,
159 )),
160 },
161 PyStringData::Latin1(data) => {
162 if data.is_ascii() {
163 Ok(Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) }))
164 } else {
165 Ok(Cow::Owned(data.iter().map(|&b| b as char).collect()))
166 }
167 }
168 PyStringData::Utf16(data) => {
169 fn utf16_bytes(input: &[u16]) -> &[u8] {
170 unsafe { mem::transmute(input) }
171 }
172 match String::from_utf16(data) {
173 Ok(s) => Ok(Cow::Owned(s)),
174 Err(_) => Err(PyErr::from_instance(
175 py,
176 exc::UnicodeDecodeError::new(
177 py,
178 cstr!("utf-16"),
179 utf16_bytes(data),
180 0..2 * data.len(),
181 cstr!("invalid utf-16"),
182 )?,
183 )),
184 }
185 }
186 PyStringData::Utf32(data) => {
187 fn utf32_bytes(input: &[u32]) -> &[u8] {
188 unsafe { mem::transmute(input) }
189 }
190 match data.iter().map(|&u| char::from_u32(u)).collect() {
191 Some(s) => Ok(Cow::Owned(s)),
192 None => Err(PyErr::from_instance(
193 py,
194 exc::UnicodeDecodeError::new(
195 py,
196 cstr!("utf-32"),
197 utf32_bytes(data),
198 0..4 * data.len(),
199 cstr!("invalid utf-32"),
200 )?,
201 )),
202 }
203 }
204 }
205 }
206
207 pub fn to_string_lossy(self) -> Cow<'a, str> {
214 match self {
215 PyStringData::Utf8(data) => String::from_utf8_lossy(data),
216 PyStringData::Latin1(data) => {
217 if data.is_ascii() {
218 Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) })
219 } else {
220 Cow::Owned(data.iter().map(|&b| b as char).collect())
221 }
222 }
223 PyStringData::Utf16(data) => Cow::Owned(String::from_utf16_lossy(data)),
224 PyStringData::Utf32(data) => Cow::Owned(
225 data.iter()
226 .map(|&u| char::from_u32(u).unwrap_or('\u{FFFD}'))
227 .collect(),
228 ),
229 }
230 }
231}
232
233impl PyString {
234 pub fn new(py: Python, s: &str) -> PyString {
246 #[cfg(feature = "python27-sys")]
247 fn new_impl(py: Python, s: &str) -> PyString {
248 if cfg!(feature = "py2-no-auto-unicode-promotion") || s.is_ascii() {
249 PyBytes::new(py, s.as_bytes()).into_basestring()
250 } else {
251 PyUnicode::new(py, s).into_basestring()
252 }
253 }
254 #[cfg(feature = "python3-sys")]
255 fn new_impl(py: Python, s: &str) -> PyString {
256 let ptr = s.as_ptr() as *const c_char;
257 let len = s.len() as ffi::Py_ssize_t;
258 unsafe {
259 err::cast_from_owned_ptr_or_panic(py, ffi::PyUnicode_FromStringAndSize(ptr, len))
260 }
261 }
262 new_impl(py, s)
263 }
264
265 pub fn data(&self, py: Python) -> PyStringData {
271 self.data_impl(py)
272 }
273
274 #[cfg(feature = "python27-sys")]
275 fn data_impl(&self, py: Python) -> PyStringData {
276 if let Ok(bytes) = self.0.cast_as::<PyBytes>(py) {
277 PyStringData::Utf8(bytes.data(py))
278 } else if let Ok(unicode) = self.0.cast_as::<PyUnicode>(py) {
279 unicode.data(py)
280 } else {
281 panic!("PyString is neither `str` nor `unicode`")
282 }
283 }
284
285 #[cfg(feature = "python3-sys")]
286 fn data_impl(&self, _py: Python) -> PyStringData {
287 let ptr = self.as_ptr();
288 unsafe {
289 let ready = ffi::PyUnicode_READY(ptr);
290 if ready < 0 {
291 ffi::PyErr_Print();
293 panic!("PyUnicode_READY failed");
294 }
295 let size = ffi::PyUnicode_GET_LENGTH(ptr) as usize;
296 let data = ffi::PyUnicode_DATA(ptr);
297 let kind = ffi::PyUnicode_KIND(ptr);
298 match kind {
299 ffi::PyUnicode_1BYTE_KIND => {
300 PyStringData::Latin1(std::slice::from_raw_parts(data as *const u8, size))
301 }
302 ffi::PyUnicode_2BYTE_KIND => {
303 PyStringData::Utf16(std::slice::from_raw_parts(data as *const u16, size))
304 }
305 ffi::PyUnicode_4BYTE_KIND => {
306 PyStringData::Utf32(std::slice::from_raw_parts(data as *const u32, size))
307 }
308 _ => panic!("Unknown PyUnicode_KIND"),
309 }
310 }
311 }
312
313 pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
322 #[cfg(feature = "python3-sys")]
323 unsafe {
324 let mut size: ffi::Py_ssize_t = 0;
330 let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size);
331 if data.is_null() {
332 Err(PyErr::fetch(py))
333 } else {
334 let slice = std::slice::from_raw_parts(data as *const u8, size as usize);
335 Ok(Cow::Borrowed(std::str::from_utf8_unchecked(slice)))
336 }
337 }
338 #[cfg(feature = "python27-sys")]
339 {
340 self.data(py).to_string(py)
341 }
342 }
343
344 pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
352 self.data(py).to_string_lossy()
353 }
354}
355
356impl PyBytes {
357 pub fn new(py: Python, s: &[u8]) -> PyBytes {
362 let ptr = s.as_ptr() as *const c_char;
363 let len = s.len() as ffi::Py_ssize_t;
364 unsafe { err::cast_from_owned_ptr_or_panic(py, ffi::PyBytes_FromStringAndSize(ptr, len)) }
365 }
366
367 pub fn data(&self, _py: Python) -> &[u8] {
369 unsafe {
370 let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
371 let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
372 std::slice::from_raw_parts(buffer, length)
373 }
374 }
375
376 #[cfg(feature = "python27-sys")]
379 #[inline]
380 pub fn as_basestring(&self) -> &PyString {
381 unsafe { self.0.unchecked_cast_as() }
382 }
383
384 #[cfg(feature = "python27-sys")]
387 #[inline]
388 pub fn into_basestring(self) -> PyString {
389 unsafe { self.0.unchecked_cast_into() }
390 }
391}
392
393#[cfg(feature = "python27-sys")]
394impl PyUnicode {
395 pub fn new(py: Python, s: &str) -> PyUnicode {
399 let ptr = s.as_ptr() as *const c_char;
400 let len = s.len() as ffi::Py_ssize_t;
401 unsafe { err::cast_from_owned_ptr_or_panic(py, ffi::PyUnicode_FromStringAndSize(ptr, len)) }
402 }
403
404 #[inline]
408 pub fn as_basestring(&self) -> &PyString {
409 unsafe { self.0.unchecked_cast_as() }
410 }
411
412 #[inline]
416 pub fn into_basestring(self) -> PyString {
417 unsafe { self.0.unchecked_cast_into() }
418 }
419
420 pub fn data(&self, _py: Python) -> PyStringData {
422 unsafe {
423 let buffer = ffi::PyUnicode_AS_UNICODE(self.as_ptr());
424 let length = ffi::PyUnicode_GET_SIZE(self.as_ptr()) as usize;
425 std::slice::from_raw_parts(buffer, length).into()
426 }
427 }
428
429 pub fn to_string(&self, py: Python) -> PyResult<Cow<str>> {
434 self.data(py).to_string(py)
435 }
436
437 pub fn to_string_lossy(&self, py: Python) -> Cow<str> {
441 self.data(py).to_string_lossy()
442 }
443}
444
445impl ToPyObject for str {
453 type ObjectType = PyString;
454
455 #[inline]
456 fn to_py_object(&self, py: Python) -> PyString {
457 PyString::new(py, self)
458 }
459}
460
461impl<'a> ToPyObject for Cow<'a, str> {
469 type ObjectType = PyString;
470
471 #[inline]
472 fn to_py_object(&self, py: Python) -> PyString {
473 PyString::new(py, self)
474 }
475}
476
477impl ToPyObject for String {
485 type ObjectType = PyString;
486
487 #[inline]
488 fn to_py_object(&self, py: Python) -> PyString {
489 PyString::new(py, self)
490 }
491}
492
493impl<'s> FromPyObject<'s> for Cow<'s, str> {
501 fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
502 obj.cast_as::<PyString>(py)?.to_string(py)
503 }
504}
505
506impl<'s> FromPyObject<'s> for String {
514 fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
515 obj.extract::<Cow<str>>(py).map(Cow::into_owned)
516 }
517}
518
519impl<'s> FromPyObject<'s> for Cow<'s, [u8]> {
527 fn extract(py: Python, obj: &'s PyObject) -> PyResult<Self> {
528 if let Ok(bytes) = obj.cast_as::<PyBytes>(py) {
529 Ok(Cow::Borrowed(bytes.data(py)))
530 } else {
531 super::sequence::extract_buffer_or_sequence(py, obj).map(Cow::Owned)
532 }
533 }
534}
535
536impl RefFromPyObject for str {
540 fn with_extracted<F, R>(py: Python, obj: &PyObject, f: F) -> PyResult<R>
541 where
542 F: FnOnce(&str) -> R,
543 {
544 let s = obj.extract::<Cow<str>>(py)?;
545 Ok(f(&s))
546 }
547}
548
549impl RefFromPyObject for [u8] {
557 fn with_extracted<F, R>(py: Python, obj: &PyObject, f: F) -> PyResult<R>
558 where
559 F: FnOnce(&[u8]) -> R,
560 {
561 let s = obj.extract::<Cow<[u8]>>(py)?;
562 Ok(f(&s))
563 }
564}
565
566#[cfg(test)]
567mod test {
568 use super::{PyString, PyStringData};
569 use crate::conversion::{RefFromPyObject, ToPyObject};
570 use crate::python::{Python, PythonObject};
571
572 #[test]
573 fn test_non_bmp() {
574 let gil = Python::acquire_gil();
575 let py = gil.python();
576 let s = "\u{1F30F}";
577 let py_string = s.to_py_object(py).into_object();
578 assert_eq!(s, py_string.extract::<String>(py).unwrap());
579 }
580
581 #[test]
582 fn test_extract_str() {
583 let gil = Python::acquire_gil();
584 let py = gil.python();
585 let s = "Hello Python";
586 let py_string = s.to_py_object(py).into_object();
587 let mut called = false;
588 RefFromPyObject::with_extracted(py, &py_string, |s2: &str| {
589 assert_eq!(s, s2);
590 called = true;
591 })
592 .unwrap();
593 assert!(called);
594 }
595
596 #[test]
597 fn test_extract_byte_str() {
598 let gil = Python::acquire_gil();
599 let py = gil.python();
600 let py_bytes = py.eval("b'Hello'", None, None).unwrap();
601 let mut called = false;
602 RefFromPyObject::with_extracted(py, &py_bytes, |s2: &[u8]| {
603 assert_eq!(b"Hello", s2);
604 called = true;
605 })
606 .unwrap();
607 assert!(called);
608 }
609
610 #[test]
611 #[cfg(feature = "nightly")] fn test_extract_byte_str_to_vec() {
613 let gil = Python::acquire_gil();
614 let py = gil.python();
615 let py_bytes = py.eval("b'Hello'", None, None).unwrap();
616 let v = py_bytes.extract::<Vec<u8>>(py).unwrap();
617 assert_eq!(b"Hello", &v[..]);
618 }
619
620 #[allow(unused_variables)] #[test]
622 fn test_extract_umlaut() {
623 let gil = Python::acquire_gil();
624 let py = gil.python();
625 let py_string = py.eval("u'x=\\u00e4'", None, None).unwrap();
626 let data = py_string.cast_as::<PyString>(py).unwrap().data(py);
627 #[cfg(feature = "python3-sys")]
628 {
629 if let PyStringData::Latin1(s) = data {
630 assert_eq!([b'x', b'=', 0xe4], *s);
631 } else {
632 panic!("Expected PyStringData::Latin1");
633 }
634 }
635 assert_eq!("x=รค", py_string.extract::<String>(py).unwrap());
636 }
637
638 #[allow(unused_variables)] #[test]
640 fn test_extract_lone_surrogate() {
641 let gil = Python::acquire_gil();
642 let py = gil.python();
643 let py_string = py.eval("u'x=\\ud800'", None, None).unwrap();
644 let data = py_string.cast_as::<PyString>(py).unwrap().data(py);
645 #[cfg(feature = "python3-sys")]
646 {
647 if let PyStringData::Utf16(s) = data {
648 assert_eq!(['x' as u16, '=' as u16, 0xd800], *s);
649 } else {
650 panic!("Expected PyStringData::Utf16");
651 }
652 }
653 assert!(py_string.extract::<String>(py).is_err());
654 }
655
656 #[test]
657 fn test_extract_lone_surrogate_lossy() {
658 let gil = Python::acquire_gil();
659 let py = gil.python();
660 let py_string = py.eval("u'x=\\ud800'", None, None).unwrap();
661 let result = py_string
662 .cast_as::<PyString>(py)
663 .unwrap()
664 .to_string_lossy(py);
665 assert_eq!("x=\u{fffd}", result);
666 }
667}