1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
use std::{mem, str, char};
use std::borrow::Cow;
use python::Python;
use err::{PyErr, PyResult};
use objects::exc;
#[derive(Clone, Copy, Debug)]
pub enum PyStringData<'a> {
Latin1(&'a [u8]),
Utf8(&'a [u8]),
Utf16(&'a [u16]),
Utf32(&'a [u32])
}
impl <'a> From<&'a str> for PyStringData<'a> {
#[inline]
fn from(val: &'a str) -> PyStringData<'a> {
PyStringData::Utf8(val.as_bytes())
}
}
impl <'a> From<&'a [u16]> for PyStringData<'a> {
#[inline]
fn from(val: &'a [u16]) -> PyStringData<'a> {
PyStringData::Utf16(val)
}
}
impl <'a> From<&'a [u32]> for PyStringData<'a> {
#[inline]
fn from(val: &'a [u32]) -> PyStringData<'a> {
PyStringData::Utf32(val)
}
}
impl <'a> PyStringData<'a> {
pub fn to_string(self, py: Python) -> PyResult<Cow<'a, str>> {
match self {
PyStringData::Utf8(data) => {
match str::from_utf8(data) {
Ok(s) => Ok(Cow::Borrowed(s)),
Err(e) => Err(PyErr::from_instance(
exc::UnicodeDecodeError::new_utf8(py, data, e)?))
}
}
PyStringData::Latin1(data) => {
if data.iter().all(|&b| b.is_ascii()) {
Ok(Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) }))
} else {
Ok(Cow::Owned(data.iter().map(|&b| b as char).collect()))
}
},
PyStringData::Utf16(data) => {
fn utf16_bytes(input: &[u16]) -> &[u8] {
unsafe { mem::transmute(input) }
}
match String::from_utf16(data) {
Ok(s) => Ok(Cow::Owned(s)),
Err(_) => Err(PyErr::from_instance(
exc::UnicodeDecodeError::new_err(
py, cstr!("utf-16"),
utf16_bytes(data), 0 .. 2*data.len(), cstr!("invalid utf-16"))?)
)
}
},
PyStringData::Utf32(data) => {
fn utf32_bytes(input: &[u32]) -> &[u8] {
unsafe { mem::transmute(input) }
}
match data.iter().map(|&u| char::from_u32(u)).collect() {
Some(s) => Ok(Cow::Owned(s)),
None => Err(PyErr::from_instance(
exc::UnicodeDecodeError::new_err(
py, cstr!("utf-32"),
utf32_bytes(data), 0 .. 4*data.len(), cstr!("invalid utf-32"))?)
)
}
}
}
}
pub fn to_string_lossy(self) -> Cow<'a, str> {
match self {
PyStringData::Utf8(data) => String::from_utf8_lossy(data),
PyStringData::Latin1(data) => {
if data.iter().all(|&b| b.is_ascii()) {
Cow::Borrowed(unsafe { str::from_utf8_unchecked(data) })
} else {
Cow::Owned(data.iter().map(|&b| b as char).collect())
}
},
PyStringData::Utf16(data) => {
Cow::Owned(String::from_utf16_lossy(data))
},
PyStringData::Utf32(data) => {
Cow::Owned(data.iter()
.map(|&u| char::from_u32(u).unwrap_or('\u{FFFD}'))
.collect())
}
}
}
}