ext_php_rs/types/
string.rs

1//! Represents a string in the PHP world. Similar to a C string, but is
2//! reference counted and contains the length of the string.
3
4use std::{
5    borrow::Cow,
6    convert::TryFrom,
7    ffi::{CStr, CString},
8    fmt::Debug,
9    ptr, slice,
10};
11
12use parking_lot::{const_mutex, Mutex};
13
14use crate::{
15    boxed::{ZBox, ZBoxable},
16    convert::{FromZval, IntoZval},
17    error::{Error, Result},
18    ffi::{
19        ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
20        ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
21        zend_string_init_interned,
22    },
23    flags::DataType,
24    macros::try_from_zval,
25    types::Zval,
26};
27
28/// A borrowed Zend string.
29///
30/// Although this object does implement [`Sized`], it is in fact not sized. As C
31/// cannot represent unsized types, an array of size 1 is used at the end of the
32/// type to represent the contents of the string, therefore this type is
33/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
34/// variant.
35///
36/// Once the `ptr_metadata` feature lands in stable rust, this type can
37/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
38pub type ZendStr = zend_string;
39
40// Adding to the Zend interned string hashtable is not atomic and can be
41// contested when PHP is compiled with ZTS, so an empty mutex is used to ensure
42// no collisions occur on the Rust side. Not much we can do about collisions
43// on the PHP side, but some safety is better than none.
44static INTERNED_LOCK: Mutex<()> = const_mutex(());
45
46// Clippy complains about there being no `is_empty` function when implementing
47// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
48#[allow(clippy::len_without_is_empty)]
49impl ZendStr {
50    /// Creates a new Zend string from a slice of bytes.
51    ///
52    /// # Parameters
53    ///
54    /// * `str` - String content.
55    /// * `persistent` - Whether the string should persist through the request
56    ///   boundary.
57    ///
58    /// # Panics
59    ///
60    /// Panics if the function was unable to allocate memory for the Zend
61    /// string.
62    ///
63    /// # Safety
64    ///
65    /// When passing `persistent` as `false`, the caller must ensure that the
66    /// object does not attempt to live after the request finishes. When a
67    /// request starts and finishes in PHP, the Zend heap is deallocated and a
68    /// new one is created, which would leave a dangling pointer in the
69    /// [`ZBox`].
70    ///
71    /// # Example
72    ///
73    /// ```no_run
74    /// use ext_php_rs::types::ZendStr;
75    ///
76    /// let s = ZendStr::new("Hello, world!", false);
77    /// let php = ZendStr::new([80, 72, 80], false);
78    /// ```
79    pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
80        let s = str.as_ref();
81        // TODO: we should handle the special cases when length is either 0 or 1
82        // see `zend_string_init_fast()` in `zend_string.h`
83        unsafe {
84            let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
85                .as_mut()
86                .expect("Failed to allocate memory for new Zend string");
87            ZBox::from_raw(ptr)
88        }
89    }
90
91    /// Creates a new Zend string from a [`CStr`].
92    ///
93    /// # Parameters
94    ///
95    /// * `str` - String content.
96    /// * `persistent` - Whether the string should persist through the request
97    ///   boundary.
98    ///
99    /// # Panics
100    ///
101    /// Panics if the function was unable to allocate memory for the Zend
102    /// string.
103    ///
104    /// # Safety
105    ///
106    /// When passing `persistent` as `false`, the caller must ensure that the
107    /// object does not attempt to live after the request finishes. When a
108    /// request starts and finishes in PHP, the Zend heap is deallocated and a
109    /// new one is created, which would leave a dangling pointer in the
110    /// [`ZBox`].
111    ///
112    /// # Example
113    ///
114    /// ```no_run
115    /// use ext_php_rs::types::ZendStr;
116    /// use std::ffi::CString;
117    ///
118    /// let c_s = CString::new("Hello world!").unwrap();
119    /// let s = ZendStr::from_c_str(&c_s, false);
120    /// ```
121    #[must_use]
122    pub fn from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
123        unsafe {
124            let ptr =
125                ext_php_rs_zend_string_init(str.as_ptr(), str.to_bytes().len() as _, persistent);
126
127            ZBox::from_raw(
128                ptr.as_mut()
129                    .expect("Failed to allocate memory for new Zend string"),
130            )
131        }
132    }
133
134    /// Creates a new interned Zend string from a slice of bytes.
135    ///
136    /// An interned string is only ever stored once and is immutable. PHP stores
137    /// the string in an internal hashtable which stores the interned
138    /// strings.
139    ///
140    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
141    /// interned strings from being created at the same time.
142    ///
143    /// Interned strings are not used very often. You should almost always use a
144    /// regular zend string, except in the case that you know you will use a
145    /// string that PHP will already have interned, such as "PHP".
146    ///
147    /// # Parameters
148    ///
149    /// * `str` - String content.
150    /// * `persistent` - Whether the string should persist through the request
151    ///   boundary.
152    ///
153    /// # Panics
154    ///
155    /// Panics under the following circumstances:
156    ///
157    /// * The function used to create interned strings has not been set.
158    /// * The function could not allocate enough memory for the Zend string.
159    ///
160    /// # Safety
161    ///
162    /// When passing `persistent` as `false`, the caller must ensure that the
163    /// object does not attempt to live after the request finishes. When a
164    /// request starts and finishes in PHP, the Zend heap is deallocated and a
165    /// new one is created, which would leave a dangling pointer in the
166    /// [`ZBox`].
167    ///
168    /// # Example
169    ///
170    /// ```no_run
171    /// use ext_php_rs::types::ZendStr;
172    ///
173    /// let s = ZendStr::new_interned("PHP", true);
174    /// ```
175    pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
176        let _lock = INTERNED_LOCK.lock();
177        let s = str.as_ref();
178        unsafe {
179            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
180            let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
181                .as_mut()
182                .expect("Failed to allocate memory for new Zend string");
183            ZBox::from_raw(ptr)
184        }
185    }
186
187    /// Creates a new interned Zend string from a [`CStr`].
188    ///
189    /// An interned string is only ever stored once and is immutable. PHP stores
190    /// the string in an internal hashtable which stores the interned
191    /// strings.
192    ///
193    /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
194    /// interned strings from being created at the same time.
195    ///
196    /// Interned strings are not used very often. You should almost always use a
197    /// regular zend string, except in the case that you know you will use a
198    /// string that PHP will already have interned, such as "PHP".
199    ///
200    /// # Parameters
201    ///
202    /// * `str` - String content.
203    /// * `persistent` - Whether the string should persist through the request
204    ///   boundary.
205    ///
206    /// # Panics
207    ///
208    /// Panics under the following circumstances:
209    ///
210    /// * The function used to create interned strings has not been set.
211    /// * The function could not allocate enough memory for the Zend string.
212    ///
213    /// # Safety
214    ///
215    /// When passing `persistent` as `false`, the caller must ensure that the
216    /// object does not attempt to live after the request finishes. When a
217    /// request starts and finishes in PHP, the Zend heap is deallocated and a
218    /// new one is created, which would leave a dangling pointer in the
219    /// [`ZBox`].
220    ///
221    /// # Example
222    ///
223    /// ```no_run
224    /// use ext_php_rs::types::ZendStr;
225    /// use std::ffi::CString;
226    ///
227    /// let c_s = CString::new("PHP").unwrap();
228    /// let s = ZendStr::interned_from_c_str(&c_s, true);
229    /// ```
230    pub fn interned_from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
231        let _lock = INTERNED_LOCK.lock();
232
233        unsafe {
234            let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
235            let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
236
237            ZBox::from_raw(
238                ptr.as_mut()
239                    .expect("Failed to allocate memory for new Zend string"),
240            )
241        }
242    }
243
244    /// Returns the length of the string.
245    ///
246    /// # Example
247    ///
248    /// ```no_run
249    /// use ext_php_rs::types::ZendStr;
250    ///
251    /// let s = ZendStr::new("hello, world!", false);
252    /// assert_eq!(s.len(), 13);
253    /// ```
254    #[must_use]
255    pub fn len(&self) -> usize {
256        self.len
257    }
258
259    /// Returns true if the string is empty, false otherwise.
260    ///
261    /// # Example
262    ///
263    /// ```no_run
264    /// use ext_php_rs::types::ZendStr;
265    ///
266    /// let s = ZendStr::new("hello, world!", false);
267    /// assert_eq!(s.is_empty(), false);
268    /// ```
269    #[must_use]
270    pub fn is_empty(&self) -> bool {
271        self.len() == 0
272    }
273
274    /// Attempts to return a reference to the underlying bytes inside the Zend
275    /// string as a [`CStr`].
276    ///
277    /// # Errors
278    ///
279    /// Returns an [`Error::InvalidCString`] variant if the string contains null
280    /// bytes.
281    pub fn as_c_str(&self) -> Result<&CStr> {
282        let bytes_with_null =
283            unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
284        CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
285    }
286
287    /// Attempts to return a reference to the underlying bytes inside the Zend
288    /// string.
289    ///
290    /// # Errors
291    ///
292    /// Returns an [`Error::InvalidUtf8`] variant if the [`str`] contains
293    /// non-UTF-8 characters.
294    ///
295    /// # Example
296    ///
297    /// ```no_run
298    /// use ext_php_rs::types::ZendStr;
299    ///
300    /// let s = ZendStr::new("hello, world!", false);
301    /// assert!(s.as_str().is_ok());
302    /// ```
303    pub fn as_str(&self) -> Result<&str> {
304        if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
305            let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
306            return Ok(str);
307        }
308        let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
309        unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr().cast_mut()) };
310        Ok(str)
311    }
312
313    /// Returns a reference to the underlying bytes inside the Zend string.
314    #[must_use]
315    pub fn as_bytes(&self) -> &[u8] {
316        unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
317    }
318
319    /// Returns a raw pointer to this object
320    #[must_use]
321    pub fn as_ptr(&self) -> *const ZendStr {
322        ptr::from_ref(self)
323    }
324
325    /// Returns a mutable pointer to this object
326    pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
327        ptr::from_mut(self)
328    }
329}
330
331unsafe impl ZBoxable for ZendStr {
332    fn free(&mut self) {
333        unsafe { ext_php_rs_zend_string_release(self) };
334    }
335}
336
337impl Debug for ZendStr {
338    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339        self.as_str().fmt(f)
340    }
341}
342
343impl AsRef<[u8]> for ZendStr {
344    fn as_ref(&self) -> &[u8] {
345        self.as_bytes()
346    }
347}
348
349impl<T> PartialEq<T> for ZendStr
350where
351    T: AsRef<[u8]>,
352{
353    fn eq(&self, other: &T) -> bool {
354        self.as_ref() == other.as_ref()
355    }
356}
357
358impl ToOwned for ZendStr {
359    type Owned = ZBox<ZendStr>;
360
361    fn to_owned(&self) -> Self::Owned {
362        Self::new(self.as_bytes(), false)
363    }
364}
365
366impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
367    type Error = Error;
368
369    fn try_from(value: &'a ZendStr) -> Result<Self> {
370        value.as_c_str()
371    }
372}
373
374impl<'a> TryFrom<&'a ZendStr> for &'a str {
375    type Error = Error;
376
377    fn try_from(value: &'a ZendStr) -> Result<Self> {
378        value.as_str()
379    }
380}
381
382impl TryFrom<&ZendStr> for String {
383    type Error = Error;
384
385    fn try_from(value: &ZendStr) -> Result<Self> {
386        value.as_str().map(ToString::to_string)
387    }
388}
389
390impl<'a> From<&'a ZendStr> for Cow<'a, ZendStr> {
391    fn from(value: &'a ZendStr) -> Self {
392        Cow::Borrowed(value)
393    }
394}
395
396impl From<&CStr> for ZBox<ZendStr> {
397    fn from(value: &CStr) -> Self {
398        ZendStr::from_c_str(value, false)
399    }
400}
401
402impl From<CString> for ZBox<ZendStr> {
403    fn from(value: CString) -> Self {
404        ZendStr::from_c_str(&value, false)
405    }
406}
407
408impl From<&str> for ZBox<ZendStr> {
409    fn from(value: &str) -> Self {
410        ZendStr::new(value.as_bytes(), false)
411    }
412}
413
414impl From<String> for ZBox<ZendStr> {
415    fn from(value: String) -> Self {
416        ZendStr::new(value.as_str(), false)
417    }
418}
419
420impl From<ZBox<ZendStr>> for Cow<'_, ZendStr> {
421    fn from(value: ZBox<ZendStr>) -> Self {
422        Cow::Owned(value)
423    }
424}
425
426impl From<Cow<'_, ZendStr>> for ZBox<ZendStr> {
427    fn from(value: Cow<'_, ZendStr>) -> Self {
428        value.into_owned()
429    }
430}
431
432macro_rules! try_into_zval_str {
433    ($type: ty) => {
434        impl TryFrom<$type> for Zval {
435            type Error = Error;
436
437            fn try_from(value: $type) -> Result<Self> {
438                let mut zv = Self::new();
439                zv.set_string(&value, false)?;
440                Ok(zv)
441            }
442        }
443
444        impl IntoZval for $type {
445            const TYPE: DataType = DataType::String;
446            const NULLABLE: bool = false;
447
448            fn set_zval(self, zv: &mut Zval, persistent: bool) -> Result<()> {
449                zv.set_string(&self, persistent)
450            }
451        }
452    };
453}
454
455try_into_zval_str!(String);
456try_into_zval_str!(&str);
457try_from_zval!(String, string, String);
458
459impl<'a> FromZval<'a> for &'a str {
460    const TYPE: DataType = DataType::String;
461
462    fn from_zval(zval: &'a Zval) -> Option<Self> {
463        zval.str()
464    }
465}
466
467#[cfg(test)]
468#[cfg(feature = "embed")]
469mod tests {
470    use crate::embed::Embed;
471
472    #[test]
473    fn test_string() {
474        Embed::run(|| {
475            let result = Embed::eval("'foo';");
476
477            assert!(result.is_ok());
478
479            let zval = result.as_ref().expect("Unreachable");
480
481            assert!(zval.is_string());
482            assert_eq!(zval.string(), Some("foo".to_string()));
483        });
484    }
485}