ext_php_rs/types/
string.rs

1//! Represents a string in the PHP world. Similar to a C string, but is
2//! reference counted and contains the length of the string.
3
4use std::{
5    borrow::Cow,
6    convert::TryFrom,
7    ffi::{CStr, CString},
8    fmt::Debug,
9    ptr, slice,
10};
11
12use parking_lot::{Mutex, const_mutex};
13
14use crate::{
15    boxed::{ZBox, ZBoxable},
16    convert::{FromZval, IntoZval},
17    error::{Error, Result},
18    ffi::{
19        ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
20        ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
21        zend_string_init_interned,
22    },
23    flags::DataType,
24    types::Zval,
25};
26
27/// A borrowed Zend string.
28///
29/// Although this object does implement [`Sized`], it is in fact not sized. As C
30/// cannot represent unsized types, an array of size 1 is used at the end of the
31/// type to represent the contents of the string, therefore this type is
32/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
33/// variant.
34///
35/// Once the `ptr_metadata` feature lands in stable rust, this type can
36/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
37pub type ZendStr = zend_string;
38
39// Adding to the Zend interned string hashtable is not atomic and can be
40// contested when PHP is compiled with ZTS, so an empty mutex is used to ensure
41// no collisions occur on the Rust side. Not much we can do about collisions
42// on the PHP side, but some safety is better than none.
43static INTERNED_LOCK: Mutex<()> = const_mutex(());
44
45// Clippy complains about there being no `is_empty` function when implementing
46// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
47#[allow(clippy::len_without_is_empty)]
48impl ZendStr {
49    /// Creates a new Zend string from a slice of bytes.
50    ///
51    /// # Parameters
52    ///
53    /// * `str` - String content.
54    /// * `persistent` - Whether the string should persist through the request
55    ///   boundary.
56    ///
57    /// # Panics
58    ///
59    /// Panics if the function was unable to allocate memory for the Zend
60    /// string.
61    ///
62    /// # Safety
63    ///
64    /// When passing `persistent` as `false`, the caller must ensure that the
65    /// object does not attempt to live after the request finishes. When a
66    /// request starts and finishes in PHP, the Zend heap is deallocated and a
67    /// new one is created, which would leave a dangling pointer in the
68    /// [`ZBox`].
69    ///
70    /// # Example
71    ///
72    /// ```no_run
73    /// use ext_php_rs::types::ZendStr;
74    ///
75    /// let s = ZendStr::new("Hello, world!", false);
76    /// let php = ZendStr::new([80, 72, 80], false);
77    /// ```
78    pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
79        let s = str.as_ref();
80        // TODO: we should handle the special cases when length is either 0 or 1
81        // see `zend_string_init_fast()` in `zend_string.h`
82        unsafe {
83            let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
84                .as_mut()
85                .expect("Failed to allocate memory for new Zend string");
86            ZBox::from_raw(ptr)
87        }
88    }
89
90    /// Creates a new Zend string from a [`CStr`].
91    ///
92    /// # Parameters
93    ///
94    /// * `str` - String content.
95    /// * `persistent` - Whether the string should persist through the request
96    ///   boundary.
97    ///
98    /// # Panics
99    ///
100    /// Panics if the function was unable to allocate memory for the Zend
101    /// string.
102    ///
103    /// # Safety
104    ///
105    /// When passing `persistent` as `false`, the caller must ensure that the
106    /// object does not attempt to live after the request finishes. When a
107    /// request starts and finishes in PHP, the Zend heap is deallocated and a
108    /// new one is created, which would leave a dangling pointer in the
109    /// [`ZBox`].
110    ///
111    /// # Example
112    ///
113    /// ```no_run
114    /// use ext_php_rs::types::ZendStr;
115    /// use std::ffi::CString;
116    ///
117    /// let c_s = CString::new("Hello world!").unwrap();
118    /// let s = ZendStr::from_c_str(&c_s, false);
119    /// ```
120    #[must_use]
121    pub fn from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
122        unsafe {
123            let ptr =
124                ext_php_rs_zend_string_init(str.as_ptr(), str.to_bytes().len() as _, persistent);
125
126            ZBox::from_raw(
127                ptr.as_mut()
128                    .expect("Failed to allocate memory for new Zend string"),
129            )
130        }
131    }
132
133    /// Creates a new interned Zend string from a slice of bytes.
134    ///
135    /// An interned string is only ever stored once and is immutable. PHP stores
136    /// the string in an internal hashtable which stores the interned
137    /// strings.
138    ///
139    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
140    /// interned strings from being created at the same time.
141    ///
142    /// Interned strings are not used very often. You should almost always use a
143    /// regular zend string, except in the case that you know you will use a
144    /// string that PHP will already have interned, such as "PHP".
145    ///
146    /// # Parameters
147    ///
148    /// * `str` - String content.
149    /// * `persistent` - Whether the string should persist through the request
150    ///   boundary.
151    ///
152    /// # Panics
153    ///
154    /// Panics under the following circumstances:
155    ///
156    /// * The function used to create interned strings has not been set.
157    /// * The function could not allocate enough memory for the Zend string.
158    ///
159    /// # Safety
160    ///
161    /// When passing `persistent` as `false`, the caller must ensure that the
162    /// object does not attempt to live after the request finishes. When a
163    /// request starts and finishes in PHP, the Zend heap is deallocated and a
164    /// new one is created, which would leave a dangling pointer in the
165    /// [`ZBox`].
166    ///
167    /// # Example
168    ///
169    /// ```no_run
170    /// use ext_php_rs::types::ZendStr;
171    ///
172    /// let s = ZendStr::new_interned("PHP", true);
173    /// ```
174    pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
175        let _lock = INTERNED_LOCK.lock();
176        let s = str.as_ref();
177        unsafe {
178            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
179            let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
180                .as_mut()
181                .expect("Failed to allocate memory for new Zend string");
182            ZBox::from_raw(ptr)
183        }
184    }
185
186    /// Creates a new interned Zend string from a [`CStr`].
187    ///
188    /// An interned string is only ever stored once and is immutable. PHP stores
189    /// the string in an internal hashtable which stores the interned
190    /// strings.
191    ///
192    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
193    /// interned strings from being created at the same time.
194    ///
195    /// Interned strings are not used very often. You should almost always use a
196    /// regular zend string, except in the case that you know you will use a
197    /// string that PHP will already have interned, such as "PHP".
198    ///
199    /// # Parameters
200    ///
201    /// * `str` - String content.
202    /// * `persistent` - Whether the string should persist through the request
203    ///   boundary.
204    ///
205    /// # Panics
206    ///
207    /// Panics under the following circumstances:
208    ///
209    /// * The function used to create interned strings has not been set.
210    /// * The function could not allocate enough memory for the Zend string.
211    ///
212    /// # Safety
213    ///
214    /// When passing `persistent` as `false`, the caller must ensure that the
215    /// object does not attempt to live after the request finishes. When a
216    /// request starts and finishes in PHP, the Zend heap is deallocated and a
217    /// new one is created, which would leave a dangling pointer in the
218    /// [`ZBox`].
219    ///
220    /// # Example
221    ///
222    /// ```no_run
223    /// use ext_php_rs::types::ZendStr;
224    /// use std::ffi::CString;
225    ///
226    /// let c_s = CString::new("PHP").unwrap();
227    /// let s = ZendStr::interned_from_c_str(&c_s, true);
228    /// ```
229    pub fn interned_from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
230        let _lock = INTERNED_LOCK.lock();
231
232        unsafe {
233            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
234            let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
235
236            ZBox::from_raw(
237                ptr.as_mut()
238                    .expect("Failed to allocate memory for new Zend string"),
239            )
240        }
241    }
242
243    /// Returns the length of the string.
244    ///
245    /// # Example
246    ///
247    /// ```no_run
248    /// use ext_php_rs::types::ZendStr;
249    ///
250    /// let s = ZendStr::new("hello, world!", false);
251    /// assert_eq!(s.len(), 13);
252    /// ```
253    #[must_use]
254    pub fn len(&self) -> usize {
255        self.len
256    }
257
258    /// Returns true if the string is empty, false otherwise.
259    ///
260    /// # Example
261    ///
262    /// ```no_run
263    /// use ext_php_rs::types::ZendStr;
264    ///
265    /// let s = ZendStr::new("hello, world!", false);
266    /// assert_eq!(s.is_empty(), false);
267    /// ```
268    #[must_use]
269    pub fn is_empty(&self) -> bool {
270        self.len() == 0
271    }
272
273    /// Attempts to return a reference to the underlying bytes inside the Zend
274    /// string as a [`CStr`].
275    ///
276    /// # Errors
277    ///
278    /// Returns an [`Error::InvalidCString`] variant if the string contains null
279    /// bytes.
280    pub fn as_c_str(&self) -> Result<&CStr> {
281        let bytes_with_null =
282            unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
283        CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
284    }
285
286    /// Attempts to return a reference to the underlying bytes inside the Zend
287    /// string.
288    ///
289    /// # Errors
290    ///
291    /// Returns an [`Error::InvalidUtf8`] variant if the [`str`] contains
292    /// non-UTF-8 characters.
293    ///
294    /// # Example
295    ///
296    /// ```no_run
297    /// use ext_php_rs::types::ZendStr;
298    ///
299    /// let s = ZendStr::new("hello, world!", false);
300    /// assert!(s.as_str().is_ok());
301    /// ```
302    pub fn as_str(&self) -> Result<&str> {
303        if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
304            let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
305            return Ok(str);
306        }
307        let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
308        unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr().cast_mut()) };
309        Ok(str)
310    }
311
312    /// Returns a reference to the underlying bytes inside the Zend string.
313    #[must_use]
314    pub fn as_bytes(&self) -> &[u8] {
315        unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
316    }
317
318    /// Returns a raw pointer to this object
319    #[must_use]
320    pub fn as_ptr(&self) -> *const ZendStr {
321        ptr::from_ref(self)
322    }
323
324    /// Returns a mutable pointer to this object
325    pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
326        ptr::from_mut(self)
327    }
328}
329
330unsafe impl ZBoxable for ZendStr {
331    fn free(&mut self) {
332        unsafe { ext_php_rs_zend_string_release(self) };
333    }
334}
335
336impl Debug for ZendStr {
337    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338        self.as_str().fmt(f)
339    }
340}
341
342impl AsRef<[u8]> for ZendStr {
343    fn as_ref(&self) -> &[u8] {
344        self.as_bytes()
345    }
346}
347
348impl<T> PartialEq<T> for ZendStr
349where
350    T: AsRef<[u8]>,
351{
352    fn eq(&self, other: &T) -> bool {
353        self.as_ref() == other.as_ref()
354    }
355}
356
357impl ToOwned for ZendStr {
358    type Owned = ZBox<ZendStr>;
359
360    fn to_owned(&self) -> Self::Owned {
361        Self::new(self.as_bytes(), false)
362    }
363}
364
365impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
366    type Error = Error;
367
368    fn try_from(value: &'a ZendStr) -> Result<Self> {
369        value.as_c_str()
370    }
371}
372
373impl<'a> TryFrom<&'a ZendStr> for &'a str {
374    type Error = Error;
375
376    fn try_from(value: &'a ZendStr) -> Result<Self> {
377        value.as_str()
378    }
379}
380
381impl TryFrom<&ZendStr> for String {
382    type Error = Error;
383
384    fn try_from(value: &ZendStr) -> Result<Self> {
385        value.as_str().map(ToString::to_string)
386    }
387}
388
389impl<'a> From<&'a ZendStr> for Cow<'a, ZendStr> {
390    fn from(value: &'a ZendStr) -> Self {
391        Cow::Borrowed(value)
392    }
393}
394
395impl From<&CStr> for ZBox<ZendStr> {
396    fn from(value: &CStr) -> Self {
397        ZendStr::from_c_str(value, false)
398    }
399}
400
401impl From<CString> for ZBox<ZendStr> {
402    fn from(value: CString) -> Self {
403        ZendStr::from_c_str(&value, false)
404    }
405}
406
407impl From<&str> for ZBox<ZendStr> {
408    fn from(value: &str) -> Self {
409        ZendStr::new(value.as_bytes(), false)
410    }
411}
412
413impl From<String> for ZBox<ZendStr> {
414    fn from(value: String) -> Self {
415        ZendStr::new(value.as_str(), false)
416    }
417}
418
419impl From<ZBox<ZendStr>> for Cow<'_, ZendStr> {
420    fn from(value: ZBox<ZendStr>) -> Self {
421        Cow::Owned(value)
422    }
423}
424
425impl From<Cow<'_, ZendStr>> for ZBox<ZendStr> {
426    fn from(value: Cow<'_, ZendStr>) -> Self {
427        value.into_owned()
428    }
429}
430
431macro_rules! try_into_zval_str {
432    ($type: ty) => {
433        impl TryFrom<$type> for Zval {
434            type Error = Error;
435
436            fn try_from(value: $type) -> Result<Self> {
437                let mut zv = Self::new();
438                zv.set_string(&value, false)?;
439                Ok(zv)
440            }
441        }
442
443        impl IntoZval for $type {
444            const TYPE: DataType = DataType::String;
445            const NULLABLE: bool = false;
446
447            fn set_zval(self, zv: &mut Zval, persistent: bool) -> Result<()> {
448                zv.set_string(&self, persistent)
449            }
450        }
451    };
452}
453
454try_into_zval_str!(String);
455try_into_zval_str!(&str);
456try_from_zval!(String, string, String);
457
458impl<'a> FromZval<'a> for &'a str {
459    const TYPE: DataType = DataType::String;
460
461    fn from_zval(zval: &'a Zval) -> Option<Self> {
462        zval.str()
463    }
464}
465
466#[cfg(test)]
467#[cfg(feature = "embed")]
468mod tests {
469    use crate::embed::Embed;
470
471    #[test]
472    fn test_string() {
473        Embed::run(|| {
474            let result = Embed::eval("'foo';");
475
476            assert!(result.is_ok());
477
478            let zval = result.as_ref().expect("Unreachable");
479
480            assert!(zval.is_string());
481            assert_eq!(zval.string(), Some("foo".to_string()));
482        });
483    }
484}