cxx/
cxx_string.rs

1use crate::actually_private::Private;
2use crate::lossy;
3#[cfg(feature = "alloc")]
4use alloc::borrow::Cow;
5#[cfg(feature = "alloc")]
6use alloc::string::String;
7use core::cmp::Ordering;
8use core::ffi::{c_char, CStr};
9use core::fmt::{self, Debug, Display};
10use core::hash::{Hash, Hasher};
11use core::marker::{PhantomData, PhantomPinned};
12use core::mem::MaybeUninit;
13use core::pin::Pin;
14use core::slice;
15use core::str::{self, Utf8Error};
16
17extern "C" {
18    #[link_name = "cxxbridge1$cxx_string$init"]
19    fn string_init(this: &mut MaybeUninit<CxxString>, ptr: *const u8, len: usize);
20    #[link_name = "cxxbridge1$cxx_string$destroy"]
21    fn string_destroy(this: &mut MaybeUninit<CxxString>);
22    #[link_name = "cxxbridge1$cxx_string$data"]
23    fn string_data(this: &CxxString) -> *const u8;
24    #[link_name = "cxxbridge1$cxx_string$length"]
25    fn string_length(this: &CxxString) -> usize;
26    #[link_name = "cxxbridge1$cxx_string$clear"]
27    fn string_clear(this: Pin<&mut CxxString>);
28    #[link_name = "cxxbridge1$cxx_string$reserve_total"]
29    fn string_reserve_total(this: Pin<&mut CxxString>, new_cap: usize);
30    #[link_name = "cxxbridge1$cxx_string$push"]
31    fn string_push(this: Pin<&mut CxxString>, ptr: *const u8, len: usize);
32}
33
34/// Binding to C++ `std::string`.
35///
36/// # Invariants
37///
38/// As an invariant of this API and the static analysis of the cxx::bridge
39/// macro, in Rust code we can never obtain a `CxxString` by value. C++'s string
40/// requires a move constructor and may hold internal pointers, which is not
41/// compatible with Rust's move behavior. Instead in Rust code we will only ever
42/// look at a CxxString through a reference or smart pointer, as in `&CxxString`
43/// or `UniquePtr<CxxString>`.
44#[repr(C)]
45pub struct CxxString {
46    _private: [u8; 0],
47    _pinned: PhantomData<PhantomPinned>,
48}
49
50/// Construct a C++ std::string on the Rust stack.
51///
52/// # Syntax
53///
54/// In statement position:
55///
56/// ```
57/// # use cxx::let_cxx_string;
58/// # let expression = "";
59/// let_cxx_string!(var = expression);
60/// ```
61///
62/// The `expression` may have any type that implements `AsRef<[u8]>`. Commonly
63/// it will be a string literal, but for example `&[u8]` and `String` would work
64/// as well.
65///
66/// The macro expands to something resembling `let $var: Pin<&mut CxxString> =
67/// /*???*/;`. The resulting [`Pin`] can be deref'd to `&CxxString` as needed.
68///
69/// # Example
70///
71/// ```
72/// use cxx::{let_cxx_string, CxxString};
73///
74/// fn f(s: &CxxString) {/* ... */}
75///
76/// fn main() {
77///     let_cxx_string!(s = "example");
78///     f(&s);
79/// }
80/// ```
81#[macro_export]
82macro_rules! let_cxx_string {
83    ($var:ident = $value:expr $(,)?) => {
84        let mut cxx_stack_string = $crate::private::StackString::new();
85        #[allow(unused_mut, unused_unsafe)]
86        let mut $var = match $value {
87            let_cxx_string => unsafe { cxx_stack_string.init(let_cxx_string) },
88        };
89    };
90}
91
92impl CxxString {
93    /// `CxxString` is not constructible via `new`. Instead, use the
94    /// [`let_cxx_string!`] macro.
95    pub fn new<T: Private>() -> Self {
96        unreachable!()
97    }
98
99    /// Returns the length of the string in bytes.
100    ///
101    /// Matches the behavior of C++ [std::string::size][size].
102    ///
103    /// [size]: https://en.cppreference.com/w/cpp/string/basic_string/size
104    pub fn len(&self) -> usize {
105        unsafe { string_length(self) }
106    }
107
108    /// Returns true if `self` has a length of zero bytes.
109    ///
110    /// Matches the behavior of C++ [std::string::empty][empty].
111    ///
112    /// [empty]: https://en.cppreference.com/w/cpp/string/basic_string/empty
113    pub fn is_empty(&self) -> bool {
114        self.len() == 0
115    }
116
117    /// Returns a byte slice of this string's contents.
118    pub fn as_bytes(&self) -> &[u8] {
119        let data = self.as_ptr();
120        let len = self.len();
121        unsafe { slice::from_raw_parts(data, len) }
122    }
123
124    /// Produces a pointer to the first character of the string.
125    ///
126    /// Matches the behavior of C++ [std::string::data][data].
127    ///
128    /// Note that the return type may look like `const char *` but is not a
129    /// `const char *` in the typical C sense, as C++ strings may contain
130    /// internal null bytes. As such, the returned pointer only makes sense as a
131    /// string in combination with the length returned by [`len()`][len].
132    ///
133    /// Modifying the string data through this pointer has undefined behavior.
134    ///
135    /// [data]: https://en.cppreference.com/w/cpp/string/basic_string/data
136    /// [len]: #method.len
137    pub fn as_ptr(&self) -> *const u8 {
138        unsafe { string_data(self) }
139    }
140
141    /// Produces a nul-terminated string view of this string's contents.
142    ///
143    /// Matches the behavior of C++ [std::string::c_str][c_str].
144    ///
145    /// If this string contains no internal '\0' bytes, then
146    /// `self.as_c_str().count_bytes() == self.len()`. But if it does, the CStr
147    /// only refers to the part of the string up to the first nul byte.
148    ///
149    /// [c_str]: https://en.cppreference.com/w/cpp/string/basic_string/c_str
150    pub fn as_c_str(&self) -> &CStr {
151        // Since C++11, string[string.size()] is guaranteed to be \0.
152        unsafe { CStr::from_ptr(self.as_ptr().cast::<c_char>()) }
153    }
154
155    /// Validates that the C++ string contains UTF-8 data and produces a view of
156    /// it as a Rust &amp;str, otherwise an error.
157    pub fn to_str(&self) -> Result<&str, Utf8Error> {
158        str::from_utf8(self.as_bytes())
159    }
160
161    /// If the contents of the C++ string are valid UTF-8, this function returns
162    /// a view as a Cow::Borrowed &amp;str. Otherwise replaces any invalid UTF-8
163    /// sequences with the U+FFFD [replacement character] and returns a
164    /// Cow::Owned String.
165    ///
166    /// [replacement character]: char::REPLACEMENT_CHARACTER
167    #[cfg(feature = "alloc")]
168    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
169    pub fn to_string_lossy(&self) -> Cow<str> {
170        String::from_utf8_lossy(self.as_bytes())
171    }
172
173    /// Removes all characters from the string.
174    ///
175    /// Matches the behavior of C++ [std::string::clear][clear].
176    ///
177    /// Note: **unlike** the guarantee of Rust's `std::string::String::clear`,
178    /// the C++ standard does not require that capacity is unchanged by this
179    /// operation. In practice existing implementations do not change the
180    /// capacity but all pointers, references, and iterators into the string
181    /// contents are nevertheless invalidated.
182    ///
183    /// [clear]: https://en.cppreference.com/w/cpp/string/basic_string/clear
184    pub fn clear(self: Pin<&mut Self>) {
185        unsafe { string_clear(self) }
186    }
187
188    /// Ensures that this string's capacity is at least `additional` bytes
189    /// larger than its length.
190    ///
191    /// The capacity may be increased by more than `additional` bytes if it
192    /// chooses, to amortize the cost of frequent reallocations.
193    ///
194    /// **The meaning of the argument is not the same as
195    /// [std::string::reserve][reserve] in C++.** The C++ standard library and
196    /// Rust standard library both have a `reserve` method on strings, but in
197    /// C++ code the argument always refers to total capacity, whereas in Rust
198    /// code it always refers to additional capacity. This API on `CxxString`
199    /// follows the Rust convention, the same way that for the length accessor
200    /// we use the Rust conventional `len()` naming and not C++ `size()` or
201    /// `length()`.
202    ///
203    /// # Panics
204    ///
205    /// Panics if the new capacity overflows usize.
206    ///
207    /// [reserve]: https://en.cppreference.com/w/cpp/string/basic_string/reserve
208    pub fn reserve(self: Pin<&mut Self>, additional: usize) {
209        let new_cap = self
210            .len()
211            .checked_add(additional)
212            .expect("CxxString capacity overflow");
213        unsafe { string_reserve_total(self, new_cap) }
214    }
215
216    /// Appends a given string slice onto the end of this C++ string.
217    pub fn push_str(self: Pin<&mut Self>, s: &str) {
218        self.push_bytes(s.as_bytes());
219    }
220
221    /// Appends arbitrary bytes onto the end of this C++ string.
222    pub fn push_bytes(self: Pin<&mut Self>, bytes: &[u8]) {
223        unsafe { string_push(self, bytes.as_ptr(), bytes.len()) }
224    }
225}
226
227impl Display for CxxString {
228    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
229        lossy::display(self.as_bytes(), f)
230    }
231}
232
233impl Debug for CxxString {
234    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
235        lossy::debug(self.as_bytes(), f)
236    }
237}
238
239impl PartialEq for CxxString {
240    fn eq(&self, other: &Self) -> bool {
241        self.as_bytes() == other.as_bytes()
242    }
243}
244
245impl PartialEq<CxxString> for str {
246    fn eq(&self, other: &CxxString) -> bool {
247        self.as_bytes() == other.as_bytes()
248    }
249}
250
251impl PartialEq<str> for CxxString {
252    fn eq(&self, other: &str) -> bool {
253        self.as_bytes() == other.as_bytes()
254    }
255}
256
257impl Eq for CxxString {}
258
259impl PartialOrd for CxxString {
260    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
261        Some(self.cmp(other))
262    }
263}
264
265impl Ord for CxxString {
266    fn cmp(&self, other: &Self) -> Ordering {
267        self.as_bytes().cmp(other.as_bytes())
268    }
269}
270
271impl Hash for CxxString {
272    fn hash<H: Hasher>(&self, state: &mut H) {
273        self.as_bytes().hash(state);
274    }
275}
276
277impl fmt::Write for Pin<&mut CxxString> {
278    fn write_str(&mut self, s: &str) -> fmt::Result {
279        self.as_mut().push_str(s);
280        Ok(())
281    }
282}
283
284#[cfg(feature = "std")]
285impl std::io::Write for Pin<&mut CxxString> {
286    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
287        self.as_mut().push_bytes(buf);
288        Ok(buf.len())
289    }
290
291    fn flush(&mut self) -> std::io::Result<()> {
292        Ok(())
293    }
294}
295
296#[doc(hidden)]
297#[repr(C)]
298pub struct StackString {
299    // Static assertions in cxx.cc validate that this is large enough and
300    // aligned enough.
301    space: MaybeUninit<[usize; 8]>,
302}
303
304#[allow(missing_docs)]
305impl StackString {
306    pub fn new() -> Self {
307        StackString {
308            space: MaybeUninit::uninit(),
309        }
310    }
311
312    pub unsafe fn init(&mut self, value: impl AsRef<[u8]>) -> Pin<&mut CxxString> {
313        let value = value.as_ref();
314        unsafe {
315            let this = &mut *self.space.as_mut_ptr().cast::<MaybeUninit<CxxString>>();
316            string_init(this, value.as_ptr(), value.len());
317            Pin::new_unchecked(&mut *this.as_mut_ptr())
318        }
319    }
320}
321
322impl Drop for StackString {
323    fn drop(&mut self) {
324        unsafe {
325            let this = &mut *self.space.as_mut_ptr().cast::<MaybeUninit<CxxString>>();
326            string_destroy(this);
327        }
328    }
329}