bson/raw/
cstr.rs

1use core::str;
2
3use crate::error::{Error, Result};
4
5#[allow(rustdoc::invalid_rust_codeblocks)]
6/// A borrowed BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte.
7/// Can be constructed at compile-time via the [`cstr!`](crate::raw::cstr) macro or at run-time from
8/// a [`prim@str`] via [`TryFrom`].
9///
10/// Unlike [`std::ffi::CStr`], this is required to be valid UTF-8, and does not include the nul
11/// terminator in the buffer:
12/// ```
13/// // std::ffi::CStr accepts invalid UTF-8:
14/// let invalid: &std::ffi::CStr = c"\xc3\x28";
15/// ```
16/// ```compile_fail
17/// # use bson::raw::cstr;
18/// // bson::raw::CStr does not:
19/// let invalid: &bson::raw::CStr = cstr!("\xc3\x28");  // will not compile
20/// ```
21/// ```
22/// // &str accepts embedded nil characters:
23/// let invalid: &str = "foo\0bar";
24/// ```
25/// ```compile_fail
26/// # use bson::raw::cstr;
27/// // bson::raw::CStr does not:
28/// let invalid: &bson::raw::CStr = cstr!("foo\0bar");  // will not compile
29/// ```
30#[derive(Debug, Eq)]
31#[repr(transparent)]
32pub struct CStr {
33    data: [u8],
34}
35
36impl<'a> TryFrom<&'a str> for &'a CStr {
37    type Error = Error;
38
39    fn try_from(value: &str) -> Result<&CStr> {
40        match validate_cstr(value) {
41            Some(cs) => Ok(cs),
42            None => Err(Error::malformed_bytes(format!(
43                "cstring with interior null: {:?}",
44                value,
45            ))),
46        }
47    }
48}
49
50impl CStr {
51    // Convenience shorthand for making the types of TryFrom line up
52    #[cfg(feature = "serde")]
53    pub(crate) fn from_str(value: &str) -> Result<&CStr> {
54        value.try_into()
55    }
56
57    const fn from_str_unchecked(value: &str) -> &Self {
58        // Safety: the conversion is safe because CStr is repr(transparent), and the deref is safe
59        // because the pointer came from a safe reference.
60        unsafe { &*(value.as_bytes() as *const [u8] as *const CStr) }
61    }
62
63    /// View the buffer as a Rust `&str`.
64    pub fn as_str(&self) -> &str {
65        // Safety: the only way to constrct a CStr is from a valid &str.
66        unsafe { str::from_utf8_unchecked(&self.data) }
67    }
68
69    /// The length in bytes of the buffer.
70    pub fn len(&self) -> usize {
71        self.as_str().len()
72    }
73
74    /// Whether the buffer contains zero bytes.
75    pub fn is_empty(&self) -> bool {
76        self.as_str().is_empty()
77    }
78
79    /// Returns the lowercase equivalent of this as a new [`CString`].
80    pub fn to_lowercase(&self) -> CString {
81        CString::from_string_unchecked(self.as_str().to_lowercase())
82    }
83
84    /// Returns the uppercase equivalent of this as a new [`CString`].
85    pub fn to_uppercase(&self) -> CString {
86        CString::from_string_unchecked(self.as_str().to_uppercase())
87    }
88
89    pub(crate) fn append_to(&self, buf: &mut Vec<u8>) {
90        buf.extend(&self.data);
91        buf.push(0);
92    }
93}
94
95impl PartialEq for CStr {
96    fn eq(&self, other: &CStr) -> bool {
97        self.as_str() == other.as_str()
98    }
99}
100
101impl PartialEq<str> for CStr {
102    fn eq(&self, other: &str) -> bool {
103        self.as_str() == other
104    }
105}
106
107impl PartialEq<CString> for CStr {
108    fn eq(&self, other: &CString) -> bool {
109        self.as_str() == other.as_str()
110    }
111}
112
113impl PartialEq<String> for CStr {
114    fn eq(&self, other: &String) -> bool {
115        self.as_str() == other.as_str()
116    }
117}
118
119impl std::hash::Hash for CStr {
120    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
121        self.as_str().hash(state);
122    }
123}
124
125impl Ord for CStr {
126    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
127        self.as_str().cmp(other.as_str())
128    }
129}
130
131impl PartialOrd for CStr {
132    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
133        Some(self.cmp(other))
134    }
135}
136
137impl std::fmt::Display for CStr {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        self.as_str().fmt(f)
140    }
141}
142
143impl std::borrow::ToOwned for CStr {
144    type Owned = CString;
145
146    fn to_owned(&self) -> Self::Owned {
147        self.into()
148    }
149}
150
151impl AsRef<CStr> for CStr {
152    fn as_ref(&self) -> &CStr {
153        self
154    }
155}
156
157impl AsRef<str> for CStr {
158    fn as_ref(&self) -> &str {
159        self.as_str()
160    }
161}
162
163impl<'a> From<&'a CStr> for &'a str {
164    fn from(value: &'a CStr) -> Self {
165        value.as_str()
166    }
167}
168
169#[cfg(feature = "serde")]
170impl serde::Serialize for &CStr {
171    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
172    where
173        S: serde::Serializer,
174    {
175        self.as_str().serialize(serializer)
176    }
177}
178
179#[doc(hidden)]
180#[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")]
181pub trait ValidCStr {}
182#[doc(hidden)]
183pub struct IsValidCStr<const VALID: bool>;
184#[doc(hidden)]
185impl ValidCStr for IsValidCStr<true> {}
186
187#[doc(hidden)]
188pub const fn validate_cstr(text: &str) -> Option<&CStr> {
189    let bytes = text.as_bytes();
190    let mut i = 0;
191    while i < bytes.len() {
192        if bytes[i] == 0 {
193            return None;
194        }
195        i += 1;
196    }
197    Some(CStr::from_str_unchecked(text))
198}
199#[doc(hidden)]
200pub const fn assert_valid_cstr<T: ValidCStr>() {}
201
202#[allow(rustdoc::invalid_rust_codeblocks)]
203/// Construct a `'static &CStr`.  The validitiy will be verified at compile-time.
204/// ```
205/// # use bson::raw::{CStr, cstr};
206/// // A valid literal:
207/// let key: &CStr = cstr!("hello");
208/// ```
209/// ```compile_fail
210/// # use bson::raw::{CStr, cstr};
211/// // A literal with invalid UTF-8 will not compile:
212/// let key: &CStr = cstr!("\xc3\x28");
213/// ```
214/// ```compile_fail
215/// # use bson::raw::{CStr, cstr};
216/// // A literal with an embedded nil will not compile:
217/// let key: &CStr = cstr!("hel\0lo");
218/// ```
219#[macro_export]
220macro_rules! cstr {
221    ($text:literal) => {{
222        const VALIDATED: Option<&$crate::raw::CStr> = $crate::raw::validate_cstr($text);
223        const VALID: bool = VALIDATED.is_some();
224        $crate::raw::assert_valid_cstr::<$crate::raw::IsValidCStr<VALID>>();
225        VALIDATED.unwrap()
226    }};
227}
228pub use cstr;
229
230/// An owned BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the nul byte.
231/// `CString` is to `CStr` as [`String`] is to [`prim@str`].  Can be constructed from a [`CStr`] via
232/// [`ToOwned`]/[`Into`] or from a [`String`] or [`prim@str`] via [`TryFrom`].
233///
234/// Like `CStr`, this differs from [`std::ffi::CString`] in that it is required to be valid UTF-8,
235/// and does not include the nul terminator in the buffer.
236#[derive(Clone, Eq)]
237#[repr(transparent)]
238pub struct CString {
239    data: String,
240}
241
242impl TryFrom<String> for CString {
243    type Error = Error;
244
245    fn try_from(data: String) -> Result<Self> {
246        let _: &CStr = data.as_str().try_into()?;
247        Ok(Self { data })
248    }
249}
250
251impl TryFrom<&str> for CString {
252    type Error = Error;
253
254    fn try_from(data: &str) -> Result<Self> {
255        let cs: &CStr = data.try_into()?;
256        Ok(cs.into())
257    }
258}
259
260impl CString {
261    pub(crate) fn from_string_unchecked(data: String) -> Self {
262        Self { data }
263    }
264
265    /// Consume `self` to return the underlying `String`.
266    pub fn into_string(self) -> String {
267        self.data
268    }
269
270    /// View the buffer as a Rust `&str`.
271    pub fn as_str(&self) -> &str {
272        self.as_ref().as_str()
273    }
274}
275
276impl From<&CStr> for CString {
277    fn from(value: &CStr) -> Self {
278        Self {
279            data: value.as_str().into(),
280        }
281    }
282}
283
284impl AsRef<CStr> for CString {
285    fn as_ref(&self) -> &CStr {
286        CStr::from_str_unchecked(self.data.as_str())
287    }
288}
289
290impl From<CString> for String {
291    fn from(value: CString) -> Self {
292        value.into_string()
293    }
294}
295
296impl std::ops::Deref for CString {
297    type Target = CStr;
298
299    fn deref(&self) -> &Self::Target {
300        self.as_ref()
301    }
302}
303
304impl std::borrow::Borrow<CStr> for CString {
305    fn borrow(&self) -> &CStr {
306        self.as_ref()
307    }
308}
309
310impl std::fmt::Debug for CString {
311    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312        self.data.fmt(f)
313    }
314}
315
316impl std::fmt::Display for CString {
317    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
318        self.data.fmt(f)
319    }
320}
321
322impl PartialEq for CString {
323    fn eq(&self, other: &Self) -> bool {
324        self.data == other.data
325    }
326}
327
328impl PartialEq<CStr> for CString {
329    fn eq(&self, other: &CStr) -> bool {
330        self.data.as_str() == other.as_str()
331    }
332}
333
334impl PartialEq<String> for CString {
335    fn eq(&self, other: &String) -> bool {
336        &self.data == other
337    }
338}
339
340impl PartialEq<str> for CString {
341    fn eq(&self, other: &str) -> bool {
342        self.data.as_str() == other
343    }
344}
345
346impl std::hash::Hash for CString {
347    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
348        self.data.hash(state);
349    }
350}
351
352#[cfg(feature = "serde")]
353impl serde::Serialize for CString {
354    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
355    where
356        S: serde::Serializer,
357    {
358        self.data.serialize(serializer)
359    }
360}