ext_php_rs/types/string.rs
1//! Represents a string in the PHP world. Similar to a C string, but is
2//! reference counted and contains the length of the string.
3
4use std::{
5 borrow::Cow,
6 convert::TryFrom,
7 ffi::{CStr, CString},
8 fmt::Debug,
9 ptr, slice,
10};
11
12use parking_lot::{const_mutex, Mutex};
13
14use crate::{
15 boxed::{ZBox, ZBoxable},
16 convert::{FromZval, IntoZval},
17 error::{Error, Result},
18 ffi::{
19 ext_php_rs_is_known_valid_utf8, ext_php_rs_set_known_valid_utf8,
20 ext_php_rs_zend_string_init, ext_php_rs_zend_string_release, zend_string,
21 zend_string_init_interned,
22 },
23 flags::DataType,
24 macros::try_from_zval,
25 types::Zval,
26};
27
28/// A borrowed Zend string.
29///
30/// Although this object does implement [`Sized`], it is in fact not sized. As C
31/// cannot represent unsized types, an array of size 1 is used at the end of the
32/// type to represent the contents of the string, therefore this type is
33/// actually unsized. All constructors return [`ZBox<ZendStr>`], the owned
34/// variant.
35///
36/// Once the `ptr_metadata` feature lands in stable rust, this type can
37/// potentially be changed to a DST using slices and metadata. See the tracking issue here: <https://github.com/rust-lang/rust/issues/81513>
38pub type ZendStr = zend_string;
39
40// Adding to the Zend interned string hashtable is not atomic and can be
41// contested when PHP is compiled with ZTS, so an empty mutex is used to ensure
42// no collisions occur on the Rust side. Not much we can do about collisions
43// on the PHP side, but some safety is better than none.
44static INTERNED_LOCK: Mutex<()> = const_mutex(());
45
46// Clippy complains about there being no `is_empty` function when implementing
47// on the alias `ZendStr` :( <https://github.com/rust-lang/rust-clippy/issues/7702>
48#[allow(clippy::len_without_is_empty)]
49impl ZendStr {
50 /// Creates a new Zend string from a slice of bytes.
51 ///
52 /// # Parameters
53 ///
54 /// * `str` - String content.
55 /// * `persistent` - Whether the string should persist through the request
56 /// boundary.
57 ///
58 /// # Panics
59 ///
60 /// Panics if the function was unable to allocate memory for the Zend
61 /// string.
62 ///
63 /// # Safety
64 ///
65 /// When passing `persistent` as `false`, the caller must ensure that the
66 /// object does not attempt to live after the request finishes. When a
67 /// request starts and finishes in PHP, the Zend heap is deallocated and a
68 /// new one is created, which would leave a dangling pointer in the
69 /// [`ZBox`].
70 ///
71 /// # Example
72 ///
73 /// ```no_run
74 /// use ext_php_rs::types::ZendStr;
75 ///
76 /// let s = ZendStr::new("Hello, world!", false);
77 /// let php = ZendStr::new([80, 72, 80], false);
78 /// ```
79 pub fn new(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
80 let s = str.as_ref();
81 // TODO: we should handle the special cases when length is either 0 or 1
82 // see `zend_string_init_fast()` in `zend_string.h`
83 unsafe {
84 let ptr = ext_php_rs_zend_string_init(s.as_ptr().cast(), s.len(), persistent)
85 .as_mut()
86 .expect("Failed to allocate memory for new Zend string");
87 ZBox::from_raw(ptr)
88 }
89 }
90
91 /// Creates a new Zend string from a [`CStr`].
92 ///
93 /// # Parameters
94 ///
95 /// * `str` - String content.
96 /// * `persistent` - Whether the string should persist through the request
97 /// boundary.
98 ///
99 /// # Panics
100 ///
101 /// Panics if the function was unable to allocate memory for the Zend
102 /// string.
103 ///
104 /// # Safety
105 ///
106 /// When passing `persistent` as `false`, the caller must ensure that the
107 /// object does not attempt to live after the request finishes. When a
108 /// request starts and finishes in PHP, the Zend heap is deallocated and a
109 /// new one is created, which would leave a dangling pointer in the
110 /// [`ZBox`].
111 ///
112 /// # Example
113 ///
114 /// ```no_run
115 /// use ext_php_rs::types::ZendStr;
116 /// use std::ffi::CString;
117 ///
118 /// let c_s = CString::new("Hello world!").unwrap();
119 /// let s = ZendStr::from_c_str(&c_s, false);
120 /// ```
121 #[must_use]
122 pub fn from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
123 unsafe {
124 let ptr =
125 ext_php_rs_zend_string_init(str.as_ptr(), str.to_bytes().len() as _, persistent);
126
127 ZBox::from_raw(
128 ptr.as_mut()
129 .expect("Failed to allocate memory for new Zend string"),
130 )
131 }
132 }
133
134 /// Creates a new interned Zend string from a slice of bytes.
135 ///
136 /// An interned string is only ever stored once and is immutable. PHP stores
137 /// the string in an internal hashtable which stores the interned
138 /// strings.
139 ///
140 /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
141 /// interned strings from being created at the same time.
142 ///
143 /// Interned strings are not used very often. You should almost always use a
144 /// regular zend string, except in the case that you know you will use a
145 /// string that PHP will already have interned, such as "PHP".
146 ///
147 /// # Parameters
148 ///
149 /// * `str` - String content.
150 /// * `persistent` - Whether the string should persist through the request
151 /// boundary.
152 ///
153 /// # Panics
154 ///
155 /// Panics under the following circumstances:
156 ///
157 /// * The function used to create interned strings has not been set.
158 /// * The function could not allocate enough memory for the Zend string.
159 ///
160 /// # Safety
161 ///
162 /// When passing `persistent` as `false`, the caller must ensure that the
163 /// object does not attempt to live after the request finishes. When a
164 /// request starts and finishes in PHP, the Zend heap is deallocated and a
165 /// new one is created, which would leave a dangling pointer in the
166 /// [`ZBox`].
167 ///
168 /// # Example
169 ///
170 /// ```no_run
171 /// use ext_php_rs::types::ZendStr;
172 ///
173 /// let s = ZendStr::new_interned("PHP", true);
174 /// ```
175 pub fn new_interned(str: impl AsRef<[u8]>, persistent: bool) -> ZBox<Self> {
176 let _lock = INTERNED_LOCK.lock();
177 let s = str.as_ref();
178 unsafe {
179 let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
180 let ptr = init(s.as_ptr().cast(), s.len() as _, persistent)
181 .as_mut()
182 .expect("Failed to allocate memory for new Zend string");
183 ZBox::from_raw(ptr)
184 }
185 }
186
187 /// Creates a new interned Zend string from a [`CStr`].
188 ///
189 /// An interned string is only ever stored once and is immutable. PHP stores
190 /// the string in an internal hashtable which stores the interned
191 /// strings.
192 ///
193 /// As Zend hashtables are not thread-safe, a mutex is used to prevent two
194 /// interned strings from being created at the same time.
195 ///
196 /// Interned strings are not used very often. You should almost always use a
197 /// regular zend string, except in the case that you know you will use a
198 /// string that PHP will already have interned, such as "PHP".
199 ///
200 /// # Parameters
201 ///
202 /// * `str` - String content.
203 /// * `persistent` - Whether the string should persist through the request
204 /// boundary.
205 ///
206 /// # Panics
207 ///
208 /// Panics under the following circumstances:
209 ///
210 /// * The function used to create interned strings has not been set.
211 /// * The function could not allocate enough memory for the Zend string.
212 ///
213 /// # Safety
214 ///
215 /// When passing `persistent` as `false`, the caller must ensure that the
216 /// object does not attempt to live after the request finishes. When a
217 /// request starts and finishes in PHP, the Zend heap is deallocated and a
218 /// new one is created, which would leave a dangling pointer in the
219 /// [`ZBox`].
220 ///
221 /// # Example
222 ///
223 /// ```no_run
224 /// use ext_php_rs::types::ZendStr;
225 /// use std::ffi::CString;
226 ///
227 /// let c_s = CString::new("PHP").unwrap();
228 /// let s = ZendStr::interned_from_c_str(&c_s, true);
229 /// ```
230 pub fn interned_from_c_str(str: &CStr, persistent: bool) -> ZBox<Self> {
231 let _lock = INTERNED_LOCK.lock();
232
233 unsafe {
234 let init = zend_string_init_interned.expect("`zend_string_init_interned` not ready");
235 let ptr = init(str.as_ptr(), str.to_bytes().len() as _, persistent);
236
237 ZBox::from_raw(
238 ptr.as_mut()
239 .expect("Failed to allocate memory for new Zend string"),
240 )
241 }
242 }
243
244 /// Returns the length of the string.
245 ///
246 /// # Example
247 ///
248 /// ```no_run
249 /// use ext_php_rs::types::ZendStr;
250 ///
251 /// let s = ZendStr::new("hello, world!", false);
252 /// assert_eq!(s.len(), 13);
253 /// ```
254 #[must_use]
255 pub fn len(&self) -> usize {
256 self.len
257 }
258
259 /// Returns true if the string is empty, false otherwise.
260 ///
261 /// # Example
262 ///
263 /// ```no_run
264 /// use ext_php_rs::types::ZendStr;
265 ///
266 /// let s = ZendStr::new("hello, world!", false);
267 /// assert_eq!(s.is_empty(), false);
268 /// ```
269 #[must_use]
270 pub fn is_empty(&self) -> bool {
271 self.len() == 0
272 }
273
274 /// Attempts to return a reference to the underlying bytes inside the Zend
275 /// string as a [`CStr`].
276 ///
277 /// # Errors
278 ///
279 /// Returns an [`Error::InvalidCString`] variant if the string contains null
280 /// bytes.
281 pub fn as_c_str(&self) -> Result<&CStr> {
282 let bytes_with_null =
283 unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len() + 1) };
284 CStr::from_bytes_with_nul(bytes_with_null).map_err(|_| Error::InvalidCString)
285 }
286
287 /// Attempts to return a reference to the underlying bytes inside the Zend
288 /// string.
289 ///
290 /// # Errors
291 ///
292 /// Returns an [`Error::InvalidUtf8`] variant if the [`str`] contains
293 /// non-UTF-8 characters.
294 ///
295 /// # Example
296 ///
297 /// ```no_run
298 /// use ext_php_rs::types::ZendStr;
299 ///
300 /// let s = ZendStr::new("hello, world!", false);
301 /// assert!(s.as_str().is_ok());
302 /// ```
303 pub fn as_str(&self) -> Result<&str> {
304 if unsafe { ext_php_rs_is_known_valid_utf8(self.as_ptr()) } {
305 let str = unsafe { std::str::from_utf8_unchecked(self.as_bytes()) };
306 return Ok(str);
307 }
308 let str = std::str::from_utf8(self.as_bytes()).map_err(|_| Error::InvalidUtf8)?;
309 unsafe { ext_php_rs_set_known_valid_utf8(self.as_ptr().cast_mut()) };
310 Ok(str)
311 }
312
313 /// Returns a reference to the underlying bytes inside the Zend string.
314 #[must_use]
315 pub fn as_bytes(&self) -> &[u8] {
316 unsafe { slice::from_raw_parts(self.val.as_ptr().cast(), self.len()) }
317 }
318
319 /// Returns a raw pointer to this object
320 #[must_use]
321 pub fn as_ptr(&self) -> *const ZendStr {
322 ptr::from_ref(self)
323 }
324
325 /// Returns a mutable pointer to this object
326 pub fn as_mut_ptr(&mut self) -> *mut ZendStr {
327 ptr::from_mut(self)
328 }
329}
330
331unsafe impl ZBoxable for ZendStr {
332 fn free(&mut self) {
333 unsafe { ext_php_rs_zend_string_release(self) };
334 }
335}
336
337impl Debug for ZendStr {
338 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339 self.as_str().fmt(f)
340 }
341}
342
343impl AsRef<[u8]> for ZendStr {
344 fn as_ref(&self) -> &[u8] {
345 self.as_bytes()
346 }
347}
348
349impl<T> PartialEq<T> for ZendStr
350where
351 T: AsRef<[u8]>,
352{
353 fn eq(&self, other: &T) -> bool {
354 self.as_ref() == other.as_ref()
355 }
356}
357
358impl ToOwned for ZendStr {
359 type Owned = ZBox<ZendStr>;
360
361 fn to_owned(&self) -> Self::Owned {
362 Self::new(self.as_bytes(), false)
363 }
364}
365
366impl<'a> TryFrom<&'a ZendStr> for &'a CStr {
367 type Error = Error;
368
369 fn try_from(value: &'a ZendStr) -> Result<Self> {
370 value.as_c_str()
371 }
372}
373
374impl<'a> TryFrom<&'a ZendStr> for &'a str {
375 type Error = Error;
376
377 fn try_from(value: &'a ZendStr) -> Result<Self> {
378 value.as_str()
379 }
380}
381
382impl TryFrom<&ZendStr> for String {
383 type Error = Error;
384
385 fn try_from(value: &ZendStr) -> Result<Self> {
386 value.as_str().map(ToString::to_string)
387 }
388}
389
390impl<'a> From<&'a ZendStr> for Cow<'a, ZendStr> {
391 fn from(value: &'a ZendStr) -> Self {
392 Cow::Borrowed(value)
393 }
394}
395
396impl From<&CStr> for ZBox<ZendStr> {
397 fn from(value: &CStr) -> Self {
398 ZendStr::from_c_str(value, false)
399 }
400}
401
402impl From<CString> for ZBox<ZendStr> {
403 fn from(value: CString) -> Self {
404 ZendStr::from_c_str(&value, false)
405 }
406}
407
408impl From<&str> for ZBox<ZendStr> {
409 fn from(value: &str) -> Self {
410 ZendStr::new(value.as_bytes(), false)
411 }
412}
413
414impl From<String> for ZBox<ZendStr> {
415 fn from(value: String) -> Self {
416 ZendStr::new(value.as_str(), false)
417 }
418}
419
420impl From<ZBox<ZendStr>> for Cow<'_, ZendStr> {
421 fn from(value: ZBox<ZendStr>) -> Self {
422 Cow::Owned(value)
423 }
424}
425
426impl From<Cow<'_, ZendStr>> for ZBox<ZendStr> {
427 fn from(value: Cow<'_, ZendStr>) -> Self {
428 value.into_owned()
429 }
430}
431
432macro_rules! try_into_zval_str {
433 ($type: ty) => {
434 impl TryFrom<$type> for Zval {
435 type Error = Error;
436
437 fn try_from(value: $type) -> Result<Self> {
438 let mut zv = Self::new();
439 zv.set_string(&value, false)?;
440 Ok(zv)
441 }
442 }
443
444 impl IntoZval for $type {
445 const TYPE: DataType = DataType::String;
446 const NULLABLE: bool = false;
447
448 fn set_zval(self, zv: &mut Zval, persistent: bool) -> Result<()> {
449 zv.set_string(&self, persistent)
450 }
451 }
452 };
453}
454
455try_into_zval_str!(String);
456try_into_zval_str!(&str);
457try_from_zval!(String, string, String);
458
459impl<'a> FromZval<'a> for &'a str {
460 const TYPE: DataType = DataType::String;
461
462 fn from_zval(zval: &'a Zval) -> Option<Self> {
463 zval.str()
464 }
465}
466
467#[cfg(test)]
468#[cfg(feature = "embed")]
469mod tests {
470 use crate::embed::Embed;
471
472 #[test]
473 fn test_string() {
474 Embed::run(|| {
475 let result = Embed::eval("'foo';");
476
477 assert!(result.is_ok());
478
479 let zval = result.as_ref().expect("Unreachable");
480
481 assert!(zval.is_string());
482 assert_eq!(zval.string(), Some("foo".to_string()));
483 });
484 }
485}