pyo3/conversions/std/
osstr.rs

1use crate::conversion::IntoPyObject;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::Bound;
4use crate::types::PyString;
5use crate::{ffi, Borrowed, FromPyObject, PyAny, PyErr, Python};
6use std::borrow::Cow;
7use std::convert::Infallible;
8use std::ffi::{OsStr, OsString};
9
10impl<'py> IntoPyObject<'py> for &OsStr {
11    type Target = PyString;
12    type Output = Bound<'py, Self::Target>;
13    type Error = Infallible;
14
15    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
16        // If the string is UTF-8, take the quick and easy shortcut
17        if let Some(valid_utf8_path) = self.to_str() {
18            return valid_utf8_path.into_pyobject(py);
19        }
20
21        // All targets besides windows support the std::os::unix::ffi::OsStrExt API:
22        // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
23        #[cfg(not(windows))]
24        {
25            #[cfg(target_os = "wasi")]
26            let bytes = self.to_str().expect("wasi strings are UTF8").as_bytes();
27            #[cfg(not(target_os = "wasi"))]
28            let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self);
29
30            let ptr = bytes.as_ptr().cast();
31            let len = bytes.len() as ffi::Py_ssize_t;
32            unsafe {
33                // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
34                // parse os strings losslessly (i.e. surrogateescape most of the time)
35                Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len)
36                    .assume_owned(py)
37                    .cast_into_unchecked::<PyString>())
38            }
39        }
40
41        #[cfg(windows)]
42        {
43            let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect();
44
45            unsafe {
46                // This will not panic because the data from encode_wide is well-formed Windows
47                // string data
48
49                Ok(
50                    ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t)
51                        .assume_owned(py)
52                        .cast_into_unchecked::<PyString>(),
53                )
54            }
55        }
56    }
57}
58
59impl<'py> IntoPyObject<'py> for &&OsStr {
60    type Target = PyString;
61    type Output = Bound<'py, Self::Target>;
62    type Error = Infallible;
63
64    #[inline]
65    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
66        (*self).into_pyobject(py)
67    }
68}
69
70impl FromPyObject<'_, '_> for OsString {
71    type Error = PyErr;
72
73    fn extract(ob: Borrowed<'_, '_, PyAny>) -> Result<Self, Self::Error> {
74        let pystring = ob.cast::<PyString>()?;
75
76        #[cfg(not(windows))]
77        {
78            // Decode from Python's lossless bytes string representation back into raw bytes
79            let fs_encoded_bytes = unsafe {
80                crate::Py::<crate::types::PyBytes>::from_owned_ptr(
81                    ob.py(),
82                    ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()),
83                )
84            };
85
86            // Create an OsStr view into the raw bytes from Python
87            //
88            // For WASI: OS strings are UTF-8 by definition.
89            #[cfg(target_os = "wasi")]
90            let os_str: &OsStr =
91                OsStr::new(std::str::from_utf8(fs_encoded_bytes.as_bytes(ob.py()))?);
92            #[cfg(not(target_os = "wasi"))]
93            let os_str: &OsStr =
94                std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
95
96            Ok(os_str.to_os_string())
97        }
98
99        #[cfg(windows)]
100        {
101            // Take the quick and easy shortcut if UTF-8
102            if let Ok(utf8_string) = pystring.to_cow() {
103                return Ok(utf8_string.into_owned().into());
104            }
105
106            // Get an owned allocated wide char buffer from PyString, which we have to deallocate
107            // ourselves
108            let size =
109                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
110            crate::err::error_on_minusone(ob.py(), size)?;
111
112            debug_assert!(
113                size > 0,
114                "PyUnicode_AsWideChar should return at least 1 for null terminator"
115            );
116            let size = size - 1; // exclude null terminator
117
118            let mut buffer = vec![0; size as usize];
119            let bytes_read =
120                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
121            assert_eq!(bytes_read, size);
122
123            // Copy wide char buffer into OsString
124            let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer);
125
126            Ok(os_string)
127        }
128    }
129}
130
131impl<'py> IntoPyObject<'py> for Cow<'_, OsStr> {
132    type Target = PyString;
133    type Output = Bound<'py, Self::Target>;
134    type Error = Infallible;
135
136    #[inline]
137    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
138        (*self).into_pyobject(py)
139    }
140}
141
142impl<'py> IntoPyObject<'py> for &Cow<'_, OsStr> {
143    type Target = PyString;
144    type Output = Bound<'py, Self::Target>;
145    type Error = Infallible;
146
147    #[inline]
148    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
149        (&**self).into_pyobject(py)
150    }
151}
152
153impl<'a> FromPyObject<'a, '_> for Cow<'a, OsStr> {
154    type Error = PyErr;
155
156    fn extract(obj: Borrowed<'a, '_, PyAny>) -> Result<Self, Self::Error> {
157        #[cfg(any(Py_3_10, not(Py_LIMITED_API)))]
158        if let Ok(s) = obj.extract::<&str>() {
159            return Ok(Cow::Borrowed(s.as_ref()));
160        }
161
162        obj.extract::<OsString>().map(Cow::Owned)
163    }
164}
165
166impl<'py> IntoPyObject<'py> for OsString {
167    type Target = PyString;
168    type Output = Bound<'py, Self::Target>;
169    type Error = Infallible;
170
171    #[inline]
172    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
173        self.as_os_str().into_pyobject(py)
174    }
175}
176
177impl<'py> IntoPyObject<'py> for &OsString {
178    type Target = PyString;
179    type Output = Bound<'py, Self::Target>;
180    type Error = Infallible;
181
182    #[inline]
183    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
184        self.as_os_str().into_pyobject(py)
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use crate::types::{PyAnyMethods, PyString, PyStringMethods};
191    use crate::{Bound, BoundObject, IntoPyObject, Python};
192    use std::fmt::Debug;
193    #[cfg(unix)]
194    use std::os::unix::ffi::OsStringExt;
195    #[cfg(windows)]
196    use std::os::windows::ffi::OsStringExt;
197    use std::{
198        borrow::Cow,
199        ffi::{OsStr, OsString},
200    };
201
202    #[test]
203    #[cfg(not(windows))]
204    fn test_non_utf8_conversion() {
205        Python::attach(|py| {
206            #[cfg(not(target_os = "wasi"))]
207            use std::os::unix::ffi::OsStrExt;
208            #[cfg(target_os = "wasi")]
209            use std::os::wasi::ffi::OsStrExt;
210
211            // this is not valid UTF-8
212            let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
213            let os_str = OsStr::from_bytes(payload);
214
215            // do a roundtrip into Pythonland and back and compare
216            let py_str = os_str.into_pyobject(py).unwrap();
217            let os_str_2: OsString = py_str.extract().unwrap();
218            assert_eq!(os_str, os_str_2);
219        });
220    }
221
222    #[test]
223    fn test_intopyobject_roundtrip() {
224        Python::attach(|py| {
225            fn test_roundtrip<'py, T>(py: Python<'py>, obj: T)
226            where
227                T: IntoPyObject<'py> + AsRef<OsStr> + Debug + Clone,
228                T::Error: Debug,
229            {
230                let pyobject = obj.clone().into_pyobject(py).unwrap().into_any();
231                let pystring = pyobject.as_borrowed().cast::<PyString>().unwrap();
232                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
233                let roundtripped_obj: OsString = pystring.extract().unwrap();
234                assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str());
235            }
236            let os_str = OsStr::new("Hello\0\n🐍");
237            test_roundtrip::<&OsStr>(py, os_str);
238            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str));
239            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string()));
240            test_roundtrip::<OsString>(py, os_str.to_os_string());
241        });
242    }
243
244    #[test]
245    #[cfg(windows)]
246    fn test_windows_non_utf8_osstring_roundtrip() {
247        use std::os::windows::ffi::{OsStrExt, OsStringExt};
248
249        Python::attach(|py| {
250            // Example: Unpaired surrogate (0xD800) is not valid UTF-8, but valid in Windows OsString
251            let wide: &[u16] = &['A' as u16, 0xD800, 'B' as u16]; // 'A', unpaired surrogate, 'B'
252            let os_str = OsString::from_wide(wide);
253
254            assert_eq!(os_str.to_string_lossy(), "A�B");
255
256            // This cannot be represented as UTF-8, so .to_str() would return None
257            assert!(os_str.to_str().is_none());
258
259            // Convert to Python and back
260            let py_str = os_str.as_os_str().into_pyobject(py).unwrap();
261            let os_str_2 = py_str.extract::<OsString>().unwrap();
262
263            // The roundtrip should preserve the original wide data
264            assert_eq!(os_str, os_str_2);
265
266            // Show that encode_wide is necessary: direct UTF-8 conversion would lose information
267            let encoded: Vec<u16> = os_str.encode_wide().collect();
268            assert_eq!(encoded, wide);
269        });
270    }
271
272    #[test]
273    fn test_extract_cow() {
274        Python::attach(|py| {
275            fn test_extract<'py, T>(py: Python<'py>, input: &T, is_borrowed: bool)
276            where
277                for<'a> &'a T: IntoPyObject<'py, Output = Bound<'py, PyString>>,
278                for<'a> <&'a T as IntoPyObject<'py>>::Error: Debug,
279                T: AsRef<OsStr> + ?Sized,
280            {
281                let pystring = input.into_pyobject(py).unwrap();
282                let cow: Cow<'_, OsStr> = pystring.extract().unwrap();
283                assert_eq!(cow, input.as_ref());
284                assert_eq!(is_borrowed, matches!(cow, Cow::Borrowed(_)));
285            }
286
287            // On Python 3.10+ or when not using the limited API, we can borrow strings from python
288            let can_borrow_str = cfg!(any(Py_3_10, not(Py_LIMITED_API)));
289            // This can be borrowed because it is valid UTF-8
290            test_extract::<str>(py, "Hello\0\n🐍", can_borrow_str);
291            test_extract::<str>(py, "Hello, world!", can_borrow_str);
292
293            #[cfg(windows)]
294            let os_str = {
295                // 'A', unpaired surrogate, 'B'
296                OsString::from_wide(&['A' as u16, 0xD800, 'B' as u16])
297            };
298
299            #[cfg(unix)]
300            let os_str = { OsString::from_vec(vec![250, 251, 252, 253, 254, 255, 0, 255]) };
301
302            // This cannot be borrowed because it is not valid UTF-8
303            #[cfg(any(windows, unix))]
304            test_extract::<OsStr>(py, &os_str, false);
305        });
306    }
307}