ext_php_rs/types/string.rs
1//! Represents a string in the PHP world. Similar to a C string, but is
2//! reference counted and contains the length of the string.
3
4use std::{
5 borrow::Cow,
6 convert::TryFrom,
7 ffi::{CStr, CString},
8 fmt::Debug,
9 ptr, slice,
10};
11
12use parking_lot::{Mutex, const_mutex};
13
14use crate::{
15 boxed::{ZBox, ZBoxable},
16 convert::{FromZval, IntoZval},
17 error::{Error, Result},
18 ffi::{
19 ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
20 ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
21 zend_string_init_interned,
22 },
23 flags::DataType,
24 types::Zval,
25};
26
27/// A borrowed Zend string.
28///
29/// Although this object does implement [`Sized`], it is in fact not sized. As C
30/// cannot represent unsized types, an array of size 1 is used at the end of the
31/// type to represent the contents of the string, therefore this type is
32/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
33/// variant.
34///
35/// Once the `ptr_metadata` feature lands in stable rust, this type can
36/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
37pub type ZendStr = zend_string;
38
39// Adding to the Zend interned string hashtable is not atomic and can be
40// contested when PHP is compiled with ZTS, so an empty mutex is used to ensure
41// no collisions occur on the Rust side. Not much we can do about collisions
42// on the PHP side, but some safety is better than none.
43static INTERNED_LOCK: Mutex<()> = const_mutex(());
44
45// Clippy complains about there being no `is_empty` function when implementing
46// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
47#[allow(clippy::len_without_is_empty)]
48impl ZendStr {
49 /// Creates a new Zend string from a slice of bytes.
50 ///
51 /// # Parameters
52 ///
53 /// * `str` - String content.
54 /// * `persistent` - Whether the string should persist through the request
55 /// boundary.
56 ///
57 /// # Panics
58 ///
59 /// Panics if the function was unable to allocate memory for the Zend
60 /// string.
61 ///
62 /// # Safety
63 ///
64 /// When passing `persistent` as `false`, the caller must ensure that the
65 /// object does not attempt to live after the request finishes. When a
66 /// request starts and finishes in PHP, the Zend heap is deallocated and a
67 /// new one is created, which would leave a dangling pointer in the
68 /// [`ZBox`].
69 ///
70 /// # Example
71 ///
72 /// ```no_run
73 /// use ext_php_rs::types::ZendStr;
74 ///
75 /// let s = ZendStr::new("Hello, world!", false);
76 /// let php = ZendStr::new([80, 72, 80], false);
77 /// ```
78 pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
79 let s = str.as_ref();
80 // TODO: we should handle the special cases when length is either 0 or 1
81 // see `zend_string_init_fast()` in `zend_string.h`
82 unsafe {
83 let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
84 .as_mut()
85 .expect("Failed to allocate memory for new Zend string");
86 ZBox::from_raw(ptr)
87 }
88 }
89
90 /// Creates a new Zend string from a [`CStr`].
91 ///
92 /// # Parameters
93 ///
94 /// * `str` - String content.
95 /// * `persistent` - Whether the string should persist through the request
96 /// boundary.
97 ///
98 /// # Panics
99 ///
100 /// Panics if the function was unable to allocate memory for the Zend
101 /// string.
102 ///
103 /// # Safety
104 ///
105 /// When passing `persistent` as `false`, the caller must ensure that the
106 /// object does not attempt to live after the request finishes. When a
107 /// request starts and finishes in PHP, the Zend heap is deallocated and a
108 /// new one is created, which would leave a dangling pointer in the
109 /// [`ZBox`].
110 ///
111 /// # Example
112 ///
113 /// ```no_run
114 /// use ext_php_rs::types::ZendStr;
115 /// use std::ffi::CString;
116 ///
117 /// let c_s = CString::new("Hello world!").unwrap();
118 /// let s = ZendStr::from_c_str(&c_s, false);
119 /// ```
120 #[must_use]
121 pub fn from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
122 unsafe {
123 let ptr =
124 ext_php_rs_zend_string_init(str.as_ptr(), str.to_bytes().len() as _, persistent);
125
126 ZBox::from_raw(
127 ptr.as_mut()
128 .expect("Failed to allocate memory for new Zend string"),
129 )
130 }
131 }
132
133 /// Creates a new interned Zend string from a slice of bytes.
134 ///
135 /// An interned string is only ever stored once and is immutable. PHP stores
136 /// the string in an internal hashtable which stores the interned
137 /// strings.
138 ///
139 /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
140 /// interned strings from being created at the same time.
141 ///
142 /// Interned strings are not used very often. You should almost always use a
143 /// regular zend string, except in the case that you know you will use a
144 /// string that PHP will already have interned, such as "PHP".
145 ///
146 /// # Parameters
147 ///
148 /// * `str` - String content.
149 /// * `persistent` - Whether the string should persist through the request
150 /// boundary.
151 ///
152 /// # Panics
153 ///
154 /// Panics under the following circumstances:
155 ///
156 /// * The function used to create interned strings has not been set.
157 /// * The function could not allocate enough memory for the Zend string.
158 ///
159 /// # Safety
160 ///
161 /// When passing `persistent` as `false`, the caller must ensure that the
162 /// object does not attempt to live after the request finishes. When a
163 /// request starts and finishes in PHP, the Zend heap is deallocated and a
164 /// new one is created, which would leave a dangling pointer in the
165 /// [`ZBox`].
166 ///
167 /// # Example
168 ///
169 /// ```no_run
170 /// use ext_php_rs::types::ZendStr;
171 ///
172 /// let s = ZendStr::new_interned("PHP", true);
173 /// ```
174 pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
175 let _lock = INTERNED_LOCK.lock();
176 let s = str.as_ref();
177 unsafe {
178 let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
179 let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
180 .as_mut()
181 .expect("Failed to allocate memory for new Zend string");
182 ZBox::from_raw(ptr)
183 }
184 }
185
186 /// Creates a new interned Zend string from a [`CStr`].
187 ///
188 /// An interned string is only ever stored once and is immutable. PHP stores
189 /// the string in an internal hashtable which stores the interned
190 /// strings.
191 ///
192 /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
193 /// interned strings from being created at the same time.
194 ///
195 /// Interned strings are not used very often. You should almost always use a
196 /// regular zend string, except in the case that you know you will use a
197 /// string that PHP will already have interned, such as "PHP".
198 ///
199 /// # Parameters
200 ///
201 /// * `str` - String content.
202 /// * `persistent` - Whether the string should persist through the request
203 /// boundary.
204 ///
205 /// # Panics
206 ///
207 /// Panics under the following circumstances:
208 ///
209 /// * The function used to create interned strings has not been set.
210 /// * The function could not allocate enough memory for the Zend string.
211 ///
212 /// # Safety
213 ///
214 /// When passing `persistent` as `false`, the caller must ensure that the
215 /// object does not attempt to live after the request finishes. When a
216 /// request starts and finishes in PHP, the Zend heap is deallocated and a
217 /// new one is created, which would leave a dangling pointer in the
218 /// [`ZBox`].
219 ///
220 /// # Example
221 ///
222 /// ```no_run
223 /// use ext_php_rs::types::ZendStr;
224 /// use std::ffi::CString;
225 ///
226 /// let c_s = CString::new("PHP").unwrap();
227 /// let s = ZendStr::interned_from_c_str(&c_s, true);
228 /// ```
229 pub fn interned_from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
230 let _lock = INTERNED_LOCK.lock();
231
232 unsafe {
233 let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
234 let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
235
236 ZBox::from_raw(
237 ptr.as_mut()
238 .expect("Failed to allocate memory for new Zend string"),
239 )
240 }
241 }
242
243 /// Returns the length of the string.
244 ///
245 /// # Example
246 ///
247 /// ```no_run
248 /// use ext_php_rs::types::ZendStr;
249 ///
250 /// let s = ZendStr::new("hello, world!", false);
251 /// assert_eq!(s.len(), 13);
252 /// ```
253 #[must_use]
254 pub fn len(&self) -> usize {
255 self.len
256 }
257
258 /// Returns true if the string is empty, false otherwise.
259 ///
260 /// # Example
261 ///
262 /// ```no_run
263 /// use ext_php_rs::types::ZendStr;
264 ///
265 /// let s = ZendStr::new("hello, world!", false);
266 /// assert_eq!(s.is_empty(), false);
267 /// ```
268 #[must_use]
269 pub fn is_empty(&self) -> bool {
270 self.len() == 0
271 }
272
273 /// Attempts to return a reference to the underlying bytes inside the Zend
274 /// string as a [`CStr`].
275 ///
276 /// # Errors
277 ///
278 /// Returns an [`Error::InvalidCString`] variant if the string contains null
279 /// bytes.
280 pub fn as_c_str(&self) -> Result<&CStr> {
281 let bytes_with_null =
282 unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
283 CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
284 }
285
286 /// Attempts to return a reference to the underlying bytes inside the Zend
287 /// string.
288 ///
289 /// # Errors
290 ///
291 /// Returns an [`Error::InvalidUtf8`] variant if the [`str`] contains
292 /// non-UTF-8 characters.
293 ///
294 /// # Example
295 ///
296 /// ```no_run
297 /// use ext_php_rs::types::ZendStr;
298 ///
299 /// let s = ZendStr::new("hello, world!", false);
300 /// assert!(s.as_str().is_ok());
301 /// ```
302 pub fn as_str(&self) -> Result<&str> {
303 if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
304 let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
305 return Ok(str);
306 }
307 let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
308 unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr().cast_mut()) };
309 Ok(str)
310 }
311
312 /// Returns a reference to the underlying bytes inside the Zend string.
313 #[must_use]
314 pub fn as_bytes(&self) -> &[u8] {
315 unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
316 }
317
318 /// Returns a raw pointer to this object
319 #[must_use]
320 pub fn as_ptr(&self) -> *const ZendStr {
321 ptr::from_ref(self)
322 }
323
324 /// Returns a mutable pointer to this object
325 pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
326 ptr::from_mut(self)
327 }
328}
329
330unsafe impl ZBoxable for ZendStr {
331 fn free(&mut self) {
332 unsafe { ext_php_rs_zend_string_release(self) };
333 }
334}
335
336impl Debug for ZendStr {
337 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338 self.as_str().fmt(f)
339 }
340}
341
342impl AsRef<[u8]> for ZendStr {
343 fn as_ref(&self) -> &[u8] {
344 self.as_bytes()
345 }
346}
347
348impl<T> PartialEq<T> for ZendStr
349where
350 T: AsRef<[u8]>,
351{
352 fn eq(&self, other: &T) -> bool {
353 self.as_ref() == other.as_ref()
354 }
355}
356
357impl ToOwned for ZendStr {
358 type Owned = ZBox<ZendStr>;
359
360 fn to_owned(&self) -> Self::Owned {
361 Self::new(self.as_bytes(), false)
362 }
363}
364
365impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
366 type Error = Error;
367
368 fn try_from(value: &'a ZendStr) -> Result<Self> {
369 value.as_c_str()
370 }
371}
372
373impl<'a> TryFrom<&'a ZendStr> for &'a str {
374 type Error = Error;
375
376 fn try_from(value: &'a ZendStr) -> Result<Self> {
377 value.as_str()
378 }
379}
380
381impl TryFrom<&ZendStr> for String {
382 type Error = Error;
383
384 fn try_from(value: &ZendStr) -> Result<Self> {
385 value.as_str().map(ToString::to_string)
386 }
387}
388
389impl<'a> From<&'a ZendStr> for Cow<'a, ZendStr> {
390 fn from(value: &'a ZendStr) -> Self {
391 Cow::Borrowed(value)
392 }
393}
394
395impl From<&CStr> for ZBox<ZendStr> {
396 fn from(value: &CStr) -> Self {
397 ZendStr::from_c_str(value, false)
398 }
399}
400
401impl From<CString> for ZBox<ZendStr> {
402 fn from(value: CString) -> Self {
403 ZendStr::from_c_str(&value, false)
404 }
405}
406
407impl From<&str> for ZBox<ZendStr> {
408 fn from(value: &str) -> Self {
409 ZendStr::new(value.as_bytes(), false)
410 }
411}
412
413impl From<String> for ZBox<ZendStr> {
414 fn from(value: String) -> Self {
415 ZendStr::new(value.as_str(), false)
416 }
417}
418
419impl From<ZBox<ZendStr>> for Cow<'_, ZendStr> {
420 fn from(value: ZBox<ZendStr>) -> Self {
421 Cow::Owned(value)
422 }
423}
424
425impl From<Cow<'_, ZendStr>> for ZBox<ZendStr> {
426 fn from(value: Cow<'_, ZendStr>) -> Self {
427 value.into_owned()
428 }
429}
430
431macro_rules! try_into_zval_str {
432 ($type: ty) => {
433 impl TryFrom<$type> for Zval {
434 type Error = Error;
435
436 fn try_from(value: $type) -> Result<Self> {
437 let mut zv = Self::new();
438 zv.set_string(&value, false)?;
439 Ok(zv)
440 }
441 }
442
443 impl IntoZval for $type {
444 const TYPE: DataType = DataType::String;
445 const NULLABLE: bool = false;
446
447 fn set_zval(self, zv: &mut Zval, persistent: bool) -> Result<()> {
448 zv.set_string(&self, persistent)
449 }
450 }
451 };
452}
453
454try_into_zval_str!(String);
455try_into_zval_str!(&str);
456try_from_zval!(String, string, String);
457
458impl<'a> FromZval<'a> for &'a str {
459 const TYPE: DataType = DataType::String;
460
461 fn from_zval(zval: &'a Zval) -> Option<Self> {
462 zval.str()
463 }
464}
465
466#[cfg(test)]
467#[cfg(feature = "embed")]
468mod tests {
469 use crate::embed::Embed;
470
471 #[test]
472 fn test_string() {
473 Embed::run(|| {
474 let result = Embed::eval("'foo';");
475
476 assert!(result.is_ok());
477
478 let zval = result.as_ref().expect("Unreachable");
479
480 assert!(zval.is_string());
481 assert_eq!(zval.string(), Some("foo".to_string()));
482 });
483 }
484}