objc2_core_foundation/string.rs
1use core::cmp::Ordering;
2use core::ffi::c_char;
3use core::fmt::Write;
4use core::ptr::NonNull;
5use core::{fmt, slice, str};
6
7use crate::{
8 kCFAllocatorNull, CFIndex, CFRange, CFRetained, CFString, CFStringBuiltInEncodings,
9 CFStringCompareFlags,
10};
11
12#[track_caller]
13unsafe fn debug_checked_utf8_unchecked(bytes: &[u8]) -> &str {
14 if cfg!(debug_assertions) {
15 match str::from_utf8(bytes) {
16 Ok(s) => s,
17 Err(err) => panic!(
18 "unsafe precondition violated: CF function did not return valid UTF-8: {err}"
19 ),
20 }
21 } else {
22 // SAFETY: Checked by caller
23 unsafe { str::from_utf8_unchecked(bytes) }
24 }
25}
26
27impl CFString {
28 /// Creates a new `CFString` from a [`str`][prim@str].
29 #[inline]
30 #[doc(alias = "CFStringCreateWithBytes")]
31 #[allow(clippy::should_implement_trait)] // Not really sure of a better name
32 pub fn from_str(string: &str) -> CFRetained<Self> {
33 // Can never happen, allocations in Rust cannot be this large.
34 debug_assert!(string.len() < CFIndex::MAX as usize);
35 let len = string.len() as CFIndex;
36 let s = unsafe {
37 Self::with_bytes(
38 None,
39 string.as_ptr(),
40 len,
41 CFStringBuiltInEncodings::EncodingUTF8.0,
42 false,
43 )
44 };
45 // Should only fail if the string is not UTF-8 (which we know it is)
46 // or perhaps on allocation error.
47 s.expect("failed creating CFString")
48 }
49
50 /// Alias for easier transition from the `core-foundation` crate.
51 #[inline]
52 #[deprecated = "renamed to CFString::from_str"]
53 pub fn new(string: &str) -> CFRetained<Self> {
54 Self::from_str(string)
55 }
56
57 /// Creates a new `CFString` from a `'static` [`str`][prim@str].
58 ///
59 /// This may be slightly more efficient than [`CFString::from_str`], as it
60 /// may be able to re-use the existing buffer (since we know it won't be
61 /// deallocated).
62 #[inline]
63 #[doc(alias = "CFStringCreateWithBytesNoCopy")]
64 pub fn from_static_str(string: &'static str) -> CFRetained<Self> {
65 debug_assert!(string.len() < CFIndex::MAX as usize);
66 let len = string.len() as CFIndex;
67 // SAFETY: The string is used as a backing store, and thus must
68 // potentially live forever, since we don't know how long the returned
69 // CFString will be alive for. This is ensured by the `'static`
70 // requirement.
71 let s = unsafe {
72 Self::with_bytes_no_copy(
73 None,
74 string.as_ptr(),
75 len,
76 CFStringBuiltInEncodings::EncodingUTF8.0,
77 false,
78 kCFAllocatorNull,
79 )
80 };
81 s.expect("failed creating CFString")
82 }
83
84 /// Get the [`str`](`prim@str`) representation of this string if it can be
85 /// done efficiently.
86 ///
87 /// Returns [`None`] if the internal storage does not allow this to be
88 /// done efficiently. Use `CFString::to_string` if performance is not an
89 /// issue.
90 ///
91 /// # Safety
92 ///
93 /// The `CFString` must not be mutated for the lifetime of the returned
94 /// string.
95 ///
96 /// Warning: This is very difficult to ensure in generic contexts, e.g. it
97 /// cannot even be used inside `Debug::fmt`, since `Formatter` uses `dyn`
98 /// internally, and could thus mutate the string inside there.
99 #[doc(alias = "CFStringGetCStringPtr")]
100 pub unsafe fn as_str_unchecked(&self) -> Option<&str> {
101 // NOTE: The encoding is an 8-bit encoding.
102 let bytes = self.c_string_ptr(CFStringBuiltInEncodings::EncodingASCII.0);
103 NonNull::new(bytes as *mut c_char).map(|bytes| {
104 // NOTE: The returned string may contain interior NUL bytes:
105 // https://github.com/swiftlang/swift-corelibs-foundation/issues/5200
106 //
107 // So we have to check the length of the string too. We do that
108 // using `CFStringGetLength`; Since `CFStringGetCStringPtr`
109 // returned a pointer, and we picked the encoding to be ASCII
110 // (which has 1 codepoint per byte), this means that the number of
111 // codepoints is the same as the number of bytes in the string.
112 //
113 // This is also what Swift does:
114 // https://github.com/swiftlang/swift-corelibs-foundation/commit/8422c1a5e63913613a93523b3b398cb982df6205
115 let len = self.length() as usize;
116
117 // SAFETY: The pointer is valid for as long as the CFString is not
118 // mutated (which the caller ensures it isn't for the lifetime of
119 // the reference), and the length is correct (see above).
120 let bytes = unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) };
121
122 // SAFETY: `CFStringGetCStringPtr` is (very likely) implemented
123 // correctly, and we picked the encoding to be ASCII (which is a
124 // subset of UTF-8).
125 unsafe { debug_checked_utf8_unchecked(bytes) }
126 })
127 }
128}
129
130impl fmt::Display for CFString {
131 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132 // Copy UTF-8 bytes from the CFString to the formatter in a loop, to
133 // avoid allocating.
134 //
135 // We have to do this instead of using `CFStringGetCStringPtr`, as
136 // that will be invalidated if the string is mutated while in use, and
137 // `fmt::Formatter` contains `dyn Write` which may very theoretically
138 // do exactly that.
139
140 // Somewhat reasonably sized stack buffer.
141 // TODO: Do performance testing, and tweak this value.
142 //
143 // Should be at least 4 (as that's the minimum size of `char`).
144 let mut buf = [0u8; 32];
145
146 let mut location_utf16 = 0;
147
148 loop {
149 let len_utf16 = self.length();
150 let mut read_utf8 = 0;
151 let read_utf16 = unsafe {
152 self.bytes(
153 CFRange {
154 location: location_utf16,
155 length: len_utf16 - location_utf16,
156 },
157 CFStringBuiltInEncodings::EncodingUTF8.0,
158 0, // No conversion character
159 false,
160 buf.as_mut_ptr(),
161 buf.len() as _,
162 &mut read_utf8,
163 )
164 };
165 if read_utf16 <= 0 {
166 if location_utf16 < len_utf16 {
167 // We're not done reading the entire string yet; emit
168 // replacement character, advance one character, and try again.
169 f.write_char(char::REPLACEMENT_CHARACTER)?;
170 location_utf16 += 1;
171 continue;
172 }
173 break;
174 }
175 location_utf16 += read_utf16;
176
177 // SAFETY: `CFStringGetBytes` is (very likely) implemented
178 // correctly, and won't return non-UTF8 strings.
179 //
180 // Even if a string contains an UTF-8 char on a boundary, it won't
181 // split it up when returning UTF-8.
182 let s = unsafe { debug_checked_utf8_unchecked(&buf[0..read_utf8 as usize]) };
183
184 // NOTE: May unwind, and may invalidate the string contents.
185 f.write_str(s)?;
186 }
187
188 Ok(())
189 }
190}
191
192impl PartialOrd for CFString {
193 #[inline]
194 #[doc(alias = "CFStringCompare")]
195 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
196 Some(self.cmp(other))
197 }
198}
199
200impl Ord for CFString {
201 #[inline]
202 #[doc(alias = "CFStringCompare")]
203 fn cmp(&self, other: &Self) -> Ordering {
204 // Request standard lexiographical ordering.
205 let flags = CFStringCompareFlags::empty();
206 self.compare(Some(other), flags).into()
207 }
208}
209
210#[cfg(test)]
211mod tests {
212 use alloc::string::ToString;
213 use core::ffi::CStr;
214
215 use super::*;
216
217 #[test]
218 fn basic_conversion() {
219 let s = CFString::from_str("abc");
220 assert_eq!(s.to_string(), "abc");
221 let s = CFString::from_str("a♥😀");
222 assert_eq!(s.to_string(), "a♥😀");
223 }
224
225 #[test]
226 fn cstr_conversion() {
227 let table = [
228 (
229 b"abc\xf8xyz\0" as &[u8],
230 CFStringBuiltInEncodings::EncodingISOLatin1,
231 "abcøxyz",
232 ),
233 (
234 b"\x26\x65\0",
235 CFStringBuiltInEncodings::EncodingUTF16BE,
236 "♥",
237 ),
238 (
239 b"\x65\x26\0",
240 CFStringBuiltInEncodings::EncodingUTF16LE,
241 "♥",
242 ),
243 ];
244 for (cstr, encoding, expected) in table {
245 let cstr = CStr::from_bytes_with_nul(cstr).unwrap();
246 let s = unsafe { CFString::with_c_string(None, cstr.as_ptr(), encoding.0) }.unwrap();
247 assert_eq!(s.to_string(), expected);
248 }
249 }
250
251 #[test]
252 fn from_incomplete() {
253 let s = unsafe {
254 CFString::with_bytes(
255 None,
256 b"\xd8\x3d\xde".as_ptr(),
257 3,
258 CFStringBuiltInEncodings::EncodingUTF16BE.0,
259 false,
260 )
261 .unwrap()
262 };
263 assert_eq!(s.to_string(), "�"); // Replacement character
264 assert_eq!(s.length(), 1);
265 }
266
267 #[test]
268 fn internal_nul_byte() {
269 let s = CFString::from_str("a\0b\0c\0d");
270 // Works with `CFStringGetBytes`.
271 assert_eq!(s.to_string(), "a\0b\0c\0d");
272 // `CFStringGetCStringPtr` does not seem to work here on very short
273 // strings (probably those that are stored inline?).
274 if cfg!(target_pointer_width = "64") {
275 assert_eq!(unsafe { s.as_str_unchecked() }, None);
276 } else {
277 assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0b\0c\0d"));
278 }
279
280 // Test `CFStringGetCString`.
281 let mut buf = [0u8; 10];
282 assert!(unsafe {
283 s.c_string(
284 buf.as_mut_ptr().cast(),
285 buf.len() as _,
286 CFStringBuiltInEncodings::EncodingUTF8.0,
287 )
288 });
289 // All the data is copied to the buffer.
290 assert_eq!(&buf[0..10], b"a\0b\0c\0d\0\0\0");
291
292 // But subsequent usage of that as a CStr fails, since it contains
293 // interior NUL bytes.
294 let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
295 assert_eq!(cstr.to_bytes(), b"a");
296
297 // Test with a bit longer string, to ensure the same holds for heap-
298 // allocated CFStrings
299 let s = CFString::from_str("a\0aaaaaaaaaaaaaaa");
300 // Works with `CFStringGetBytes`.
301 assert_eq!(s.to_string(), "a\0aaaaaaaaaaaaaaa");
302 // `CFStringGetCStringPtr` also allows these without truncation.
303 assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0aaaaaaaaaaaaaaa"));
304 }
305
306 #[test]
307 fn as_str_correct_on_unicode() {
308 let s = CFString::from_static_str("😀");
309 assert_eq!(unsafe { s.as_str_unchecked() }, None);
310 let s = CFString::from_static_str("♥");
311 assert_eq!(unsafe { s.as_str_unchecked() }, None);
312 }
313
314 #[test]
315 fn utf8_on_boundary() {
316 // Make the emoji lie across the 32 byte buffer size in Display::fmt.
317 let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 29 'a's
318 assert_eq!(CFString::from_str(s).to_string(), s);
319 let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 30 'a's
320 assert_eq!(CFString::from_str(s).to_string(), s);
321 let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 31 'a's
322 assert_eq!(CFString::from_str(s).to_string(), s);
323 }
324
325 #[test]
326 fn create_with_cstring_broken_on_non_8_bit() {
327 // A CFString that is supposed to contain a "♥" (the UTF-8 encoding of
328 // that is the vastly different b"\xE2\x99\xA5").
329 //
330 // This line is wrong, because `CFStringCreateWithCString` expects an
331 // 8-bit encoding.
332 //
333 // See also:
334 // https://github.com/swiftlang/swift-corelibs-foundation/issues/5164
335 let s = unsafe {
336 CFString::with_c_string(
337 None,
338 b"\x65\x26\0".as_ptr().cast(),
339 CFStringBuiltInEncodings::EncodingUnicode.0,
340 )
341 }
342 .unwrap();
343
344 // `CFStringGetBytes` used in `fmt::Display` converts to UTF-8.
345 assert_eq!(s.to_string(), "♥");
346
347 // So does `CFStringGetCString`.
348 let mut buf = [0u8; 20];
349 assert!(unsafe {
350 s.c_string(
351 buf.as_mut_ptr().cast(),
352 buf.len() as _,
353 CFStringBuiltInEncodings::EncodingUTF8.0,
354 )
355 });
356 let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
357 assert_eq!(cstr.to_bytes(), "♥".as_bytes());
358
359 // `CFStringGetCStringPtr` completely ignores the requested UTF-8 conversion.
360 assert_eq!(unsafe { s.as_str_unchecked() }, Some("e"));
361 assert_eq!(
362 unsafe { CStr::from_ptr(s.c_string_ptr(CFStringBuiltInEncodings::EncodingUTF8.0,)) },
363 CStr::from_bytes_with_nul(b"e&\0").unwrap()
364 );
365 }
366
367 #[test]
368 fn test_static() {
369 let cf = CFString::from_static_str("xyz");
370 assert_eq!(cf.to_string(), "xyz");
371 }
372
373 #[test]
374 fn eq() {
375 assert_eq!(CFString::from_str("abc"), CFString::from_str("abc"));
376 assert_ne!(CFString::from_str("abc"), CFString::from_str("xyz"));
377 // Cross-type comparison
378 assert_ne!(
379 **CFString::from_str("abc"),
380 **unsafe { kCFAllocatorNull }.unwrap()
381 );
382 }
383
384 // TODO: Test mutation while formatting.
385}