ext_php_rs/types/
string.rs

1//! Represents a string in the PHP world. Similar to a C string, but is
2//! reference counted and contains the length of the string.
3
4use std::{
5    borrow::Cow,
6    convert::TryFrom,
7    ffi::{CStr, CString},
8    fmt::Debug,
9    slice,
10};
11
12use parking_lot::{const_mutex, Mutex};
13
14use crate::{
15    boxed::{ZBox, ZBoxable},
16    convert::{FromZval, IntoZval},
17    error::{Error, Result},
18    ffi::{
19        ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
20        ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
21        zend_string_init_interned,
22    },
23    flags::DataType,
24    macros::try_from_zval,
25    types::Zval,
26};
27
28/// A borrowed Zend string.
29///
30/// Although this object does implement [`Sized`], it is in fact not sized. As C
31/// cannot represent unsized types, an array of size 1 is used at the end of the
32/// type to represent the contents of the string, therefore this type is
33/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
34/// variant.
35///
36/// Once the `ptr_metadata` feature lands in stable rust, this type can
37/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
38pub type ZendStr = zend_string;
39
40// Adding to the Zend interned string hashtable is not atomic and can be
41// contested when PHP is compiled with ZTS, so an empty mutex is used to ensure
42// no collisions occur on the Rust side. Not much we can do about collisions
43// on the PHP side, but some safety is better than none.
44static INTERNED_LOCK: Mutex<()> = const_mutex(());
45
46// Clippy complains about there being no `is_empty` function when implementing
47// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
48#[allow(clippy::len_without_is_empty)]
49impl ZendStr {
50    /// Creates a new Zend string from a slice of bytes.
51    ///
52    /// # Parameters
53    ///
54    /// * `str` - String content.
55    /// * `persistent` - Whether the string should persist through the request
56    ///   boundary.
57    ///
58    /// # Panics
59    ///
60    /// Panics if the function was unable to allocate memory for the Zend
61    /// string.
62    ///
63    /// # Safety
64    ///
65    /// When passing `persistent` as `false`, the caller must ensure that the
66    /// object does not attempt to live after the request finishes. When a
67    /// request starts and finishes in PHP, the Zend heap is deallocated and a
68    /// new one is created, which would leave a dangling pointer in the
69    /// [`ZBox`].
70    ///
71    /// # Example
72    ///
73    /// ```no_run
74    /// use ext_php_rs::types::ZendStr;
75    ///
76    /// let s = ZendStr::new("Hello, world!", false);
77    /// let php = ZendStr::new([80, 72, 80], false);
78    /// ```
79    pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
80        let s = str.as_ref();
81        // TODO: we should handle the special cases when length is either 0 or 1
82        // see `zend_string_init_fast()` in `zend_string.h`
83        unsafe {
84            let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
85                .as_mut()
86                .expect("Failed to allocate memory for new Zend string");
87            ZBox::from_raw(ptr)
88        }
89    }
90
91    /// Creates a new Zend string from a [`CStr`].
92    ///
93    /// # Parameters
94    ///
95    /// * `str` - String content.
96    /// * `persistent` - Whether the string should persist through the request
97    ///   boundary.
98    ///
99    /// # Panics
100    ///
101    /// Panics if the function was unable to allocate memory for the Zend
102    /// string.
103    ///
104    /// # Safety
105    ///
106    /// When passing `persistent` as `false`, the caller must ensure that the
107    /// object does not attempt to live after the request finishes. When a
108    /// request starts and finishes in PHP, the Zend heap is deallocated and a
109    /// new one is created, which would leave a dangling pointer in the
110    /// [`ZBox`].
111    ///
112    /// # Example
113    ///
114    /// ```no_run
115    /// use ext_php_rs::types::ZendStr;
116    /// use std::ffi::CString;
117    ///
118    /// let c_s = CString::new("Hello world!").unwrap();
119    /// let s = ZendStr::from_c_str(&c_s, false);
120    /// ```
121    pub fn from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
122        unsafe {
123            let ptr =
124                ext_php_rs_zend_string_init(str.as_ptr(), str.to_bytes().len() as _, persistent);
125
126            ZBox::from_raw(
127                ptr.as_mut()
128                    .expect("Failed to allocate memory for new Zend string"),
129            )
130        }
131    }
132
133    /// Creates a new interned Zend string from a slice of bytes.
134    ///
135    /// An interned string is only ever stored once and is immutable. PHP stores
136    /// the string in an internal hashtable which stores the interned
137    /// strings.
138    ///
139    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
140    /// interned strings from being created at the same time.
141    ///
142    /// Interned strings are not used very often. You should almost always use a
143    /// regular zend string, except in the case that you know you will use a
144    /// string that PHP will already have interned, such as "PHP".
145    ///
146    /// # Parameters
147    ///
148    /// * `str` - String content.
149    /// * `persistent` - Whether the string should persist through the request
150    ///   boundary.
151    ///
152    /// # Panics
153    ///
154    /// Panics under the following circumstances:
155    ///
156    /// * The function used to create interned strings has not been set.
157    /// * The function could not allocate enough memory for the Zend string.
158    ///
159    /// # Safety
160    ///
161    /// When passing `persistent` as `false`, the caller must ensure that the
162    /// object does not attempt to live after the request finishes. When a
163    /// request starts and finishes in PHP, the Zend heap is deallocated and a
164    /// new one is created, which would leave a dangling pointer in the
165    /// [`ZBox`].
166    ///
167    /// # Example
168    ///
169    /// ```no_run
170    /// use ext_php_rs::types::ZendStr;
171    ///
172    /// let s = ZendStr::new_interned("PHP", true);
173    /// ```
174    pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
175        let _lock = INTERNED_LOCK.lock();
176        let s = str.as_ref();
177        unsafe {
178            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
179            let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
180                .as_mut()
181                .expect("Failed to allocate memory for new Zend string");
182            ZBox::from_raw(ptr)
183        }
184    }
185
186    /// Creates a new interned Zend string from a [`CStr`].
187    ///
188    /// An interned string is only ever stored once and is immutable. PHP stores
189    /// the string in an internal hashtable which stores the interned
190    /// strings.
191    ///
192    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
193    /// interned strings from being created at the same time.
194    ///
195    /// Interned strings are not used very often. You should almost always use a
196    /// regular zend string, except in the case that you know you will use a
197    /// string that PHP will already have interned, such as "PHP".
198    ///
199    /// # Parameters
200    ///
201    /// * `str` - String content.
202    /// * `persistent` - Whether the string should persist through the request
203    ///   boundary.
204    ///
205    /// # Panics
206    ///
207    /// Panics under the following circumstances:
208    ///
209    /// * The function used to create interned strings has not been set.
210    /// * The function could not allocate enough memory for the Zend string.
211    ///
212    /// # Safety
213    ///
214    /// When passing `persistent` as `false`, the caller must ensure that the
215    /// object does not attempt to live after the request finishes. When a
216    /// request starts and finishes in PHP, the Zend heap is deallocated and a
217    /// new one is created, which would leave a dangling pointer in the
218    /// [`ZBox`].
219    ///
220    /// # Example
221    ///
222    /// ```no_run
223    /// use ext_php_rs::types::ZendStr;
224    /// use std::ffi::CString;
225    ///
226    /// let c_s = CString::new("PHP").unwrap();
227    /// let s = ZendStr::interned_from_c_str(&c_s, true);
228    /// ```
229    pub fn interned_from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
230        let _lock = INTERNED_LOCK.lock();
231
232        unsafe {
233            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
234            let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
235
236            ZBox::from_raw(
237                ptr.as_mut()
238                    .expect("Failed to allocate memory for new Zend string"),
239            )
240        }
241    }
242
243    /// Returns the length of the string.
244    ///
245    /// # Example
246    ///
247    /// ```no_run
248    /// use ext_php_rs::types::ZendStr;
249    ///
250    /// let s = ZendStr::new("hello, world!", false);
251    /// assert_eq!(s.len(), 13);
252    /// ```
253    pub fn len(&self) -> usize {
254        self.len
255    }
256
257    /// Returns true if the string is empty, false otherwise.
258    ///
259    /// # Example
260    ///
261    /// ```no_run
262    /// use ext_php_rs::types::ZendStr;
263    ///
264    /// let s = ZendStr::new("hello, world!", false);
265    /// assert_eq!(s.is_empty(), false);
266    /// ```
267    pub fn is_empty(&self) -> bool {
268        self.len() == 0
269    }
270
271    /// Attempts to return a reference to the underlying bytes inside the Zend
272    /// string as a [`CStr`].
273    ///
274    /// Returns an [Error::InvalidCString] variant if the string contains null
275    /// bytes.
276    pub fn as_c_str(&self) -> Result<&CStr> {
277        let bytes_with_null =
278            unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
279        CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
280    }
281
282    /// Attempts to return a reference to the underlying bytes inside the Zend
283    /// string.
284    ///
285    /// Returns an [Error::InvalidUtf8] variant if the [`str`] contains
286    /// non-UTF-8 characters.
287    ///
288    /// # Example
289    ///
290    /// ```no_run
291    /// use ext_php_rs::types::ZendStr;
292    ///
293    /// let s = ZendStr::new("hello, world!", false);
294    /// assert!(s.as_str().is_ok());
295    /// ```
296    pub fn as_str(&self) -> Result<&str> {
297        if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
298            let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
299            return Ok(str);
300        }
301        let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
302        unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr() as *mut _) };
303        Ok(str)
304    }
305
306    /// Returns a reference to the underlying bytes inside the Zend string.
307    pub fn as_bytes(&self) -> &[u8] {
308        unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
309    }
310
311    /// Returns a raw pointer to this object
312    pub fn as_ptr(&self) -> *const ZendStr {
313        self as *const _
314    }
315
316    /// Returns a mutable pointer to this object
317    pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
318        self as *mut _
319    }
320}
321
322unsafe impl ZBoxable for ZendStr {
323    fn free(&mut self) {
324        unsafe { ext_php_rs_zend_string_release(self) };
325    }
326}
327
328impl Debug for ZendStr {
329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330        self.as_str().fmt(f)
331    }
332}
333
334impl AsRef<[u8]> for ZendStr {
335    fn as_ref(&self) -> &[u8] {
336        self.as_bytes()
337    }
338}
339
340impl<T> PartialEq<T> for ZendStr
341where
342    T: AsRef<[u8]>,
343{
344    fn eq(&self, other: &T) -> bool {
345        self.as_ref() == other.as_ref()
346    }
347}
348
349impl ToOwned for ZendStr {
350    type Owned = ZBox<ZendStr>;
351
352    fn to_owned(&self) -> Self::Owned {
353        Self::new(self.as_bytes(), false)
354    }
355}
356
357impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
358    type Error = Error;
359
360    fn try_from(value: &'a ZendStr) -> Result<Self> {
361        value.as_c_str()
362    }
363}
364
365impl<'a> TryFrom<&'a ZendStr> for &'a str {
366    type Error = Error;
367
368    fn try_from(value: &'a ZendStr) -> Result<Self> {
369        value.as_str()
370    }
371}
372
373impl TryFrom<&ZendStr> for String {
374    type Error = Error;
375
376    fn try_from(value: &ZendStr) -> Result<Self> {
377        value.as_str().map(ToString::to_string)
378    }
379}
380
381impl<'a> From<&'a ZendStr> for Cow<'a, ZendStr> {
382    fn from(value: &'a ZendStr) -> Self {
383        Cow::Borrowed(value)
384    }
385}
386
387impl From<&CStr> for ZBox<ZendStr> {
388    fn from(value: &CStr) -> Self {
389        ZendStr::from_c_str(value, false)
390    }
391}
392
393impl From<CString> for ZBox<ZendStr> {
394    fn from(value: CString) -> Self {
395        ZendStr::from_c_str(&value, false)
396    }
397}
398
399impl From<&str> for ZBox<ZendStr> {
400    fn from(value: &str) -> Self {
401        ZendStr::new(value.as_bytes(), false)
402    }
403}
404
405impl From<String> for ZBox<ZendStr> {
406    fn from(value: String) -> Self {
407        ZendStr::new(value.as_str(), false)
408    }
409}
410
411impl From<ZBox<ZendStr>> for Cow<'_, ZendStr> {
412    fn from(value: ZBox<ZendStr>) -> Self {
413        Cow::Owned(value)
414    }
415}
416
417impl From<Cow<'_, ZendStr>> for ZBox<ZendStr> {
418    fn from(value: Cow<'_, ZendStr>) -> Self {
419        value.into_owned()
420    }
421}
422
423macro_rules! try_into_zval_str {
424    ($type: ty) => {
425        impl TryFrom<$type> for Zval {
426            type Error = Error;
427
428            fn try_from(value: $type) -> Result<Self> {
429                let mut zv = Self::new();
430                zv.set_string(&value, false)?;
431                Ok(zv)
432            }
433        }
434
435        impl IntoZval for $type {
436            const TYPE: DataType = DataType::String;
437
438            fn set_zval(self, zv: &mut Zval, persistent: bool) -> Result<()> {
439                zv.set_string(&self, persistent)
440            }
441        }
442    };
443}
444
445try_into_zval_str!(String);
446try_into_zval_str!(&str);
447try_from_zval!(String, string, String);
448
449impl<'a> FromZval<'a> for &'a str {
450    const TYPE: DataType = DataType::String;
451
452    fn from_zval(zval: &'a Zval) -> Option<Self> {
453        zval.str()
454    }
455}
456
457#[cfg(test)]
458#[cfg(feature = "embed")]
459mod tests {
460    use crate::embed::Embed;
461
462    #[test]
463    fn test_string() {
464        Embed::run(|| {
465            let result = Embed::eval("'foo';");
466
467            assert!(result.is_ok());
468
469            let zval = result.as_ref().unwrap();
470
471            assert!(zval.is_string());
472            assert_eq!(zval.string().unwrap(), "foo");
473        });
474    }
475}