objc2_core_foundation/
string.rs

1use core::cmp::Ordering;
2use core::ffi::c_char;
3use core::fmt::Write;
4use core::ptr::NonNull;
5use core::{fmt, slice, str};
6
7use crate::{
8    kCFAllocatorNull, CFIndex, CFRange, CFRetained, CFString, CFStringBuiltInEncodings,
9    CFStringCompareFlags,
10};
11
12#[track_caller]
13unsafe fn debug_checked_utf8_unchecked(bytes: &[u8]) -> &str {
14    if cfg!(debug_assertions) {
15        match str::from_utf8(bytes) {
16            Ok(s) => s,
17            Err(err) => panic!(
18                "unsafe precondition violated: CF function did not return valid UTF-8: {err}"
19            ),
20        }
21    } else {
22        // SAFETY: Checked by caller
23        unsafe { str::from_utf8_unchecked(bytes) }
24    }
25}
26
27impl CFString {
28    /// Creates a new `CFString` from a [`str`][prim@str].
29    #[inline]
30    #[doc(alias = "CFStringCreateWithBytes")]
31    #[allow(clippy::should_implement_trait)] // Not really sure of a better name
32    pub fn from_str(string: &str) -> CFRetained<Self> {
33        // Can never happen, allocations in Rust cannot be this large.
34        debug_assert!(string.len() < CFIndex::MAX as usize);
35        let len = string.len() as CFIndex;
36        let s = unsafe {
37            Self::with_bytes(
38                None,
39                string.as_ptr(),
40                len,
41                CFStringBuiltInEncodings::EncodingUTF8.0,
42                false,
43            )
44        };
45        // Should only fail if the string is not UTF-8 (which we know it is)
46        // or perhaps on allocation error.
47        s.expect("failed creating CFString")
48    }
49
50    /// Alias for easier transition from the `core-foundation` crate.
51    #[inline]
52    #[deprecated = "renamed to CFString::from_str"]
53    pub fn new(string: &str) -> CFRetained<Self> {
54        Self::from_str(string)
55    }
56
57    /// Creates a new `CFString` from a `'static` [`str`][prim@str].
58    ///
59    /// This may be slightly more efficient than [`CFString::from_str`], as it
60    /// may be able to re-use the existing buffer (since we know it won't be
61    /// deallocated).
62    #[inline]
63    #[doc(alias = "CFStringCreateWithBytesNoCopy")]
64    pub fn from_static_str(string: &'static str) -> CFRetained<Self> {
65        debug_assert!(string.len() < CFIndex::MAX as usize);
66        let len = string.len() as CFIndex;
67        // SAFETY: The string is used as a backing store, and thus must
68        // potentially live forever, since we don't know how long the returned
69        // CFString will be alive for. This is ensured by the `'static`
70        // requirement.
71        let s = unsafe {
72            Self::with_bytes_no_copy(
73                None,
74                string.as_ptr(),
75                len,
76                CFStringBuiltInEncodings::EncodingUTF8.0,
77                false,
78                kCFAllocatorNull,
79            )
80        };
81        s.expect("failed creating CFString")
82    }
83
84    /// Get the [`str`](`prim@str`) representation of this string if it can be
85    /// done efficiently.
86    ///
87    /// Returns [`None`] if the internal storage does not allow this to be
88    /// done efficiently. Use `CFString::to_string` if performance is not an
89    /// issue.
90    ///
91    /// # Safety
92    ///
93    /// The `CFString` must not be mutated for the lifetime of the returned
94    /// string.
95    ///
96    /// Warning: This is very difficult to ensure in generic contexts, e.g. it
97    /// cannot even be used inside `Debug::fmt`, since `Formatter` uses `dyn`
98    /// internally, and could thus mutate the string inside there.
99    #[doc(alias = "CFStringGetCStringPtr")]
100    pub unsafe fn as_str_unchecked(&self) -> Option<&str> {
101        // NOTE: The encoding is an 8-bit encoding.
102        let bytes = self.c_string_ptr(CFStringBuiltInEncodings::EncodingASCII.0);
103        NonNull::new(bytes as *mut c_char).map(|bytes| {
104            // NOTE: The returned string may contain interior NUL bytes:
105            // https://github.com/swiftlang/swift-corelibs-foundation/issues/5200
106            //
107            // So we have to check the length of the string too. We do that
108            // using `CFStringGetLength`; Since `CFStringGetCStringPtr`
109            // returned a pointer, and we picked the encoding to be ASCII
110            // (which has 1 codepoint per byte), this means that the number of
111            // codepoints is the same as the number of bytes in the string.
112            //
113            // This is also what Swift does:
114            // https://github.com/swiftlang/swift-corelibs-foundation/commit/8422c1a5e63913613a93523b3b398cb982df6205
115            let len = self.length() as usize;
116
117            // SAFETY: The pointer is valid for as long as the CFString is not
118            // mutated (which the caller ensures it isn't for the lifetime of
119            // the reference), and the length is correct (see above).
120            let bytes = unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) };
121
122            // SAFETY: `CFStringGetCStringPtr` is (very likely) implemented
123            // correctly, and we picked the encoding to be ASCII (which is a
124            // subset of UTF-8).
125            unsafe { debug_checked_utf8_unchecked(bytes) }
126        })
127    }
128}
129
130impl fmt::Display for CFString {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        // Copy UTF-8 bytes from the CFString to the formatter in a loop, to
133        // avoid allocating.
134        //
135        // We have to do this instead of using `CFStringGetCStringPtr`, as
136        // that will be invalidated if the string is mutated while in use, and
137        // `fmt::Formatter` contains `dyn Write` which may very theoretically
138        // do exactly that.
139
140        // Somewhat reasonably sized stack buffer.
141        // TODO: Do performance testing, and tweak this value.
142        //
143        // Should be at least 4 (as that's the minimum size of `char`).
144        let mut buf = [0u8; 32];
145
146        let mut location_utf16 = 0;
147
148        loop {
149            let len_utf16 = self.length();
150            let mut read_utf8 = 0;
151            let read_utf16 = unsafe {
152                self.bytes(
153                    CFRange {
154                        location: location_utf16,
155                        length: len_utf16 - location_utf16,
156                    },
157                    CFStringBuiltInEncodings::EncodingUTF8.0,
158                    0, // No conversion character
159                    false,
160                    buf.as_mut_ptr(),
161                    buf.len() as _,
162                    &mut read_utf8,
163                )
164            };
165            if read_utf16 <= 0 {
166                if location_utf16 < len_utf16 {
167                    // We're not done reading the entire string yet; emit
168                    // replacement character, advance one character, and try again.
169                    f.write_char(char::REPLACEMENT_CHARACTER)?;
170                    location_utf16 += 1;
171                    continue;
172                }
173                break;
174            }
175            location_utf16 += read_utf16;
176
177            // SAFETY: `CFStringGetBytes` is (very likely) implemented
178            // correctly, and won't return non-UTF8 strings.
179            //
180            // Even if a string contains an UTF-8 char on a boundary, it won't
181            // split it up when returning UTF-8.
182            let s = unsafe { debug_checked_utf8_unchecked(&buf[0..read_utf8 as usize]) };
183
184            // NOTE: May unwind, and may invalidate the string contents.
185            f.write_str(s)?;
186        }
187
188        Ok(())
189    }
190}
191
192impl PartialOrd for CFString {
193    #[inline]
194    #[doc(alias = "CFStringCompare")]
195    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
196        Some(self.cmp(other))
197    }
198}
199
200impl Ord for CFString {
201    #[inline]
202    #[doc(alias = "CFStringCompare")]
203    fn cmp(&self, other: &Self) -> Ordering {
204        // Request standard lexiographical ordering.
205        let flags = CFStringCompareFlags::empty();
206        self.compare(Some(other), flags).into()
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use alloc::string::ToString;
213    use core::ffi::CStr;
214
215    use super::*;
216
217    #[test]
218    fn basic_conversion() {
219        let s = CFString::from_str("abc");
220        assert_eq!(s.to_string(), "abc");
221        let s = CFString::from_str("a♥😀");
222        assert_eq!(s.to_string(), "a♥😀");
223    }
224
225    #[test]
226    fn cstr_conversion() {
227        let table = [
228            (
229                b"abc\xf8xyz\0" as &[u8],
230                CFStringBuiltInEncodings::EncodingISOLatin1,
231                "abcøxyz",
232            ),
233            (
234                b"\x26\x65\0",
235                CFStringBuiltInEncodings::EncodingUTF16BE,
236                "♥",
237            ),
238            (
239                b"\x65\x26\0",
240                CFStringBuiltInEncodings::EncodingUTF16LE,
241                "♥",
242            ),
243        ];
244        for (cstr, encoding, expected) in table {
245            let cstr = CStr::from_bytes_with_nul(cstr).unwrap();
246            let s = unsafe { CFString::with_c_string(None, cstr.as_ptr(), encoding.0) }.unwrap();
247            assert_eq!(s.to_string(), expected);
248        }
249    }
250
251    #[test]
252    fn from_incomplete() {
253        let s = unsafe {
254            CFString::with_bytes(
255                None,
256                b"\xd8\x3d\xde".as_ptr(),
257                3,
258                CFStringBuiltInEncodings::EncodingUTF16BE.0,
259                false,
260            )
261            .unwrap()
262        };
263        assert_eq!(s.to_string(), "�"); // Replacement character
264        assert_eq!(s.length(), 1);
265    }
266
267    #[test]
268    fn internal_nul_byte() {
269        let s = CFString::from_str("a\0b\0c\0d");
270        // Works with `CFStringGetBytes`.
271        assert_eq!(s.to_string(), "a\0b\0c\0d");
272        // `CFStringGetCStringPtr` does not seem to work here on very short
273        // strings (probably those that are stored inline?).
274        if cfg!(target_pointer_width = "64") {
275            assert_eq!(unsafe { s.as_str_unchecked() }, None);
276        } else {
277            assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0b\0c\0d"));
278        }
279
280        // Test `CFStringGetCString`.
281        let mut buf = [0u8; 10];
282        assert!(unsafe {
283            s.c_string(
284                buf.as_mut_ptr().cast(),
285                buf.len() as _,
286                CFStringBuiltInEncodings::EncodingUTF8.0,
287            )
288        });
289        // All the data is copied to the buffer.
290        assert_eq!(&buf[0..10], b"a\0b\0c\0d\0\0\0");
291
292        // But subsequent usage of that as a CStr fails, since it contains
293        // interior NUL bytes.
294        let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
295        assert_eq!(cstr.to_bytes(), b"a");
296
297        // Test with a bit longer string, to ensure the same holds for heap-
298        // allocated CFStrings
299        let s = CFString::from_str("a\0aaaaaaaaaaaaaaa");
300        // Works with `CFStringGetBytes`.
301        assert_eq!(s.to_string(), "a\0aaaaaaaaaaaaaaa");
302        // `CFStringGetCStringPtr` also allows these without truncation.
303        assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0aaaaaaaaaaaaaaa"));
304    }
305
306    #[test]
307    fn as_str_correct_on_unicode() {
308        let s = CFString::from_static_str("😀");
309        assert_eq!(unsafe { s.as_str_unchecked() }, None);
310        let s = CFString::from_static_str("♥");
311        assert_eq!(unsafe { s.as_str_unchecked() }, None);
312    }
313
314    #[test]
315    fn utf8_on_boundary() {
316        // Make the emoji lie across the 32 byte buffer size in Display::fmt.
317        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 29 'a's
318        assert_eq!(CFString::from_str(s).to_string(), s);
319        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 30 'a's
320        assert_eq!(CFString::from_str(s).to_string(), s);
321        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 31 'a's
322        assert_eq!(CFString::from_str(s).to_string(), s);
323    }
324
325    #[test]
326    fn create_with_cstring_broken_on_non_8_bit() {
327        // A CFString that is supposed to contain a "♥" (the UTF-8 encoding of
328        // that is the vastly different b"\xE2\x99\xA5").
329        //
330        // This line is wrong, because `CFStringCreateWithCString` expects an
331        // 8-bit encoding.
332        //
333        // See also:
334        // https://github.com/swiftlang/swift-corelibs-foundation/issues/5164
335        let s = unsafe {
336            CFString::with_c_string(
337                None,
338                b"\x65\x26\0".as_ptr().cast(),
339                CFStringBuiltInEncodings::EncodingUnicode.0,
340            )
341        }
342        .unwrap();
343
344        // `CFStringGetBytes` used in `fmt::Display` converts to UTF-8.
345        assert_eq!(s.to_string(), "♥");
346
347        // So does `CFStringGetCString`.
348        let mut buf = [0u8; 20];
349        assert!(unsafe {
350            s.c_string(
351                buf.as_mut_ptr().cast(),
352                buf.len() as _,
353                CFStringBuiltInEncodings::EncodingUTF8.0,
354            )
355        });
356        let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
357        assert_eq!(cstr.to_bytes(), "♥".as_bytes());
358
359        // `CFStringGetCStringPtr` completely ignores the requested UTF-8 conversion.
360        assert_eq!(unsafe { s.as_str_unchecked() }, Some("e"));
361        assert_eq!(
362            unsafe { CStr::from_ptr(s.c_string_ptr(CFStringBuiltInEncodings::EncodingUTF8.0,)) },
363            CStr::from_bytes_with_nul(b"e&\0").unwrap()
364        );
365    }
366
367    #[test]
368    fn test_static() {
369        let cf = CFString::from_static_str("xyz");
370        assert_eq!(cf.to_string(), "xyz");
371    }
372
373    #[test]
374    fn eq() {
375        assert_eq!(CFString::from_str("abc"), CFString::from_str("abc"));
376        assert_ne!(CFString::from_str("abc"), CFString::from_str("xyz"));
377        // Cross-type comparison
378        assert_ne!(
379            **CFString::from_str("abc"),
380            **unsafe { kCFAllocatorNull }.unwrap()
381        );
382    }
383
384    // TODO: Test mutation while formatting.
385}