godot_core/builtin/string/
gstring.rs

1/*
2 * Copyright (c) godot-rust; Bromeon and contributors.
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
6 */
7
8use std::convert::Infallible;
9use std::fmt;
10use std::fmt::Write;
11
12use godot_ffi as sys;
13use sys::types::OpaqueString;
14use sys::{ffi_methods, interface_fn, ExtVariantType, GodotFfi};
15
16use crate::builtin::string::{pad_if_needed, Encoding};
17use crate::builtin::{inner, NodePath, StringName, Variant};
18use crate::meta::error::StringError;
19use crate::meta::AsArg;
20use crate::{impl_shared_string_api, meta};
21
22/// Godot's reference counted string type.
23///
24/// This is the Rust binding of GDScript's `String` type. It represents the native string class used within the Godot engine,
25/// and as such has different memory layout and characteristics than `std::string::String`.
26///
27/// `GString` uses copy-on-write semantics and is cheap to clone. Modifying a string may trigger a copy, if that instance shares
28/// its backing storage with other strings.
29///
30/// Note that `GString` is not immutable, but it offers a very limited set of write APIs. Most operations return new strings.
31/// In order to modify Godot strings, it's often easiest to convert them to Rust strings, perform the modifications and convert back.
32///
33/// # `GString` vs. `String`
34///
35/// When interfacing with the Godot engine API, you often have the choice between `String` and `GString`. In user-declared methods
36/// exposed to Godot through the `#[func]` attribute, both types can be used as parameters and return types, and conversions
37/// are done transparently. For auto-generated binding APIs in `godot::classes`, both parameters and return types are `GString`.
38/// Parameters are declared as `impl AsArg<GString>`, allowing you to be more flexible with arguments such as `"some_string"`.
39///
40/// As a general guideline, use `GString` if:
41/// * your strings are very large, so you can avoid copying them
42/// * you need specific operations only available in Godot (e.g. `sha256_text()`, `c_escape()`, ...)
43/// * you primarily pass them between different Godot APIs, without string processing in user code
44///
45/// Use Rust's `String` if:
46/// * you need to modify the string
47/// * you would like to decouple part of your code from Godot (e.g. independent game logic, standalone tests)
48/// * you want a standard type for interoperability with third-party code (e.g. `regex` crate)
49/// * you have a large number of method calls per string instance (which are more expensive due to indirectly calling into Godot)
50/// * you need UTF-8 encoding (`GString` uses UTF-32)
51///
52/// # Null bytes
53///
54/// Note that Godot ignores any bytes after a null-byte. This means that for instance `"hello, world!"` and `"hello, world!\0 ignored by Godot"`
55/// will be treated as the same string if converted to a `GString`.
56///
57/// # All string types
58///
59/// | Intended use case | String type                                |
60/// |-------------------|--------------------------------------------|
61/// | General purpose   | **`GString`**                              |
62/// | Interned names    | [`StringName`][crate::builtin::StringName] |
63/// | Scene-node paths  | [`NodePath`][crate::builtin::NodePath]     |
64///
65/// # Godot docs
66///
67/// [`String` (stable)](https://docs.godotengine.org/en/stable/classes/class_string.html)
68#[doc(alias = "String")]
69// #[repr] is needed on GString itself rather than the opaque field, because PackedStringArray::as_slice() relies on a packed representation.
70#[repr(transparent)]
71pub struct GString {
72    _opaque: OpaqueString,
73}
74
75impl GString {
76    /// Construct a new empty `GString`.
77    pub fn new() -> Self {
78        Self::default()
79    }
80
81    /// Convert string from bytes with given encoding, returning `Err` on validation errors.
82    ///
83    /// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
84    ///
85    /// Some notes on the encodings:
86    /// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
87    ///   It is your responsibility to ensure that the input is meaningful under Latin-1.
88    /// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
89    /// - **UTF-8**: The input is validated to be UTF-8.
90    ///
91    /// Specifying incorrect encoding is safe, but may result in unintended string values.
92    pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
93        Self::try_from_bytes_with_nul_check(bytes, encoding, true)
94    }
95
96    /// Convert string from C-string with given encoding, returning `Err` on validation errors.
97    ///
98    /// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
99    pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
100        Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
101    }
102
103    pub(super) fn try_from_bytes_with_nul_check(
104        bytes: &[u8],
105        encoding: Encoding,
106        check_nul: bool,
107    ) -> Result<Self, StringError> {
108        match encoding {
109            Encoding::Ascii => {
110                // If the bytes are ASCII, we can fall back to Latin-1, which is always valid (except for NUL).
111                // is_ascii() does *not* check for the NUL byte, so the check in the Latin-1 branch is still necessary.
112                if bytes.is_ascii() {
113                    Self::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
114                        .map_err(|_e| StringError::new("intermediate NUL byte in ASCII string"))
115                } else {
116                    Err(StringError::new("invalid ASCII"))
117                }
118            }
119            Encoding::Latin1 => {
120                // Intermediate NUL bytes are not accepted in Godot. Both ASCII + Latin-1 encodings need to explicitly check for this.
121                if check_nul && bytes.contains(&0) {
122                    // Error overwritten when called from ASCII branch.
123                    return Err(StringError::new("intermediate NUL byte in Latin-1 string"));
124                }
125
126                let s = unsafe {
127                    Self::new_with_string_uninit(|string_ptr| {
128                        let ctor = interface_fn!(string_new_with_latin1_chars_and_len);
129                        ctor(
130                            string_ptr,
131                            bytes.as_ptr() as *const std::ffi::c_char,
132                            bytes.len() as i64,
133                        );
134                    })
135                };
136                Ok(s)
137            }
138            Encoding::Utf8 => {
139                // from_utf8() also checks for intermediate NUL bytes.
140                let utf8 = std::str::from_utf8(bytes);
141
142                utf8.map(GString::from)
143                    .map_err(|e| StringError::with_source("invalid UTF-8", e))
144            }
145        }
146    }
147
148    /// Number of characters in the string.
149    ///
150    /// _Godot equivalent: `length`_
151    #[doc(alias = "length")]
152    pub fn len(&self) -> usize {
153        self.as_inner().length().try_into().unwrap()
154    }
155
156    /// Returns a 32-bit integer hash value representing the string.
157    pub fn hash(&self) -> u32 {
158        self.as_inner()
159            .hash()
160            .try_into()
161            .expect("Godot hashes are uint32_t")
162    }
163
164    /// Gets the UTF-32 character slice from a `GString`.
165    pub fn chars(&self) -> &[char] {
166        // SAFETY: Godot 4.1 ensures valid UTF-32, making interpreting as char slice safe.
167        // See https://github.com/godotengine/godot/pull/74760.
168        unsafe {
169            let s = self.string_sys();
170            let len = interface_fn!(string_to_utf32_chars)(s, std::ptr::null_mut(), 0);
171            let ptr = interface_fn!(string_operator_index_const)(s, 0);
172
173            // Even when len == 0, from_raw_parts requires ptr != null.
174            if ptr.is_null() {
175                return &[];
176            }
177
178            std::slice::from_raw_parts(ptr as *const char, len as usize)
179        }
180    }
181
182    ffi_methods! {
183        type sys::GDExtensionStringPtr = *mut Self;
184
185        fn new_from_string_sys = new_from_sys;
186        fn new_with_string_uninit = new_with_uninit;
187        fn string_sys = sys;
188        fn string_sys_mut = sys_mut;
189    }
190
191    /// Consumes self and turns it into a sys-ptr, should be used together with [`from_owned_string_sys`](Self::from_owned_string_sys).
192    ///
193    /// This will leak memory unless `from_owned_string_sys` is called on the returned pointer.
194    pub(crate) fn into_owned_string_sys(self) -> sys::GDExtensionStringPtr {
195        sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
196
197        let leaked = Box::into_raw(Box::new(self));
198        leaked.cast()
199    }
200
201    /// Creates a `GString` from a sys-ptr without incrementing the refcount.
202    ///
203    /// # Safety
204    ///
205    /// * Must only be used on a pointer returned from a call to [`into_owned_string_sys`](Self::into_owned_string_sys).
206    /// * Must not be called more than once on the same pointer.
207    #[deny(unsafe_op_in_unsafe_fn)]
208    pub(crate) unsafe fn from_owned_string_sys(ptr: sys::GDExtensionStringPtr) -> Self {
209        sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
210
211        let ptr = ptr.cast::<Self>();
212
213        // SAFETY: `ptr` was returned from a call to `into_owned_string_sys`, which means it was created by a call to
214        // `Box::into_raw`, thus we can use `Box::from_raw` here. Additionally, this is only called once on this pointer.
215        let boxed = unsafe { Box::from_raw(ptr) };
216        *boxed
217    }
218
219    /// Convert a `GString` sys pointer to a mutable reference with unbounded lifetime.
220    ///
221    /// # Safety
222    ///
223    /// - `ptr` must point to a live `GString` for the duration of `'a`.
224    /// - Must be exclusive - no other reference to given `GString` instance can exist for the duration of `'a`.
225    pub(crate) unsafe fn borrow_string_sys_mut<'a>(ptr: sys::GDExtensionStringPtr) -> &'a mut Self {
226        sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
227        &mut *(ptr.cast::<GString>())
228    }
229
230    /// Moves this string into a string sys pointer. This is the same as using [`GodotFfi::move_return_ptr`].
231    ///
232    /// # Safety
233    ///
234    /// `dst` must be a valid string pointer.
235    pub(crate) unsafe fn move_into_string_ptr(self, dst: sys::GDExtensionStringPtr) {
236        let dst: sys::GDExtensionTypePtr = dst.cast();
237
238        self.move_return_ptr(dst, sys::PtrcallType::Standard);
239    }
240
241    meta::declare_arg_method! {
242        /// Use as argument for an [`impl AsArg<StringName|NodePath>`][crate::meta::AsArg] parameter.
243        ///
244        /// This is a convenient way to convert arguments of similar string types.
245        ///
246        /// # Example
247        /// [`Node::has_node()`][crate::classes::Node::has_node] takes `NodePath`, let's pass a `GString`:
248        /// ```no_run
249        /// # use godot::prelude::*;
250        /// let name = GString::from("subnode");
251        ///
252        /// let node = Node::new_alloc();
253        /// if node.has_node(name.arg()) {
254        ///     // ...
255        /// }
256        /// ```
257    }
258
259    #[doc(hidden)]
260    pub fn as_inner(&self) -> inner::InnerString<'_> {
261        inner::InnerString::from_outer(self)
262    }
263}
264
265// SAFETY:
266// - `move_return_ptr`
267//   Nothing special needs to be done beyond a `std::mem::swap` when returning a String.
268//   So we can just use `ffi_methods`.
269//
270// - `from_arg_ptr`
271//   Strings are properly initialized through a `from_sys` call, but the ref-count should be
272//   incremented as that is the callee's responsibility. Which we do by calling
273//   `std::mem::forget(string.clone())`.
274unsafe impl GodotFfi for GString {
275    const VARIANT_TYPE: ExtVariantType = ExtVariantType::Concrete(sys::VariantType::STRING);
276
277    ffi_methods! { type sys::GDExtensionTypePtr = *mut Self; .. }
278}
279
280meta::impl_godot_as_self!(GString);
281
282impl_builtin_traits! {
283    for GString {
284        Default => string_construct_default;
285        Clone => string_construct_copy;
286        Drop => string_destroy;
287        Eq => string_operator_equal;
288        Ord => string_operator_less;
289        Hash;
290    }
291}
292
293impl_shared_string_api! {
294    builtin: GString,
295    find_builder: ExGStringFind,
296    split_builder: ExGStringSplit,
297}
298
299impl fmt::Display for GString {
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        pad_if_needed(f, |f| {
302            for ch in self.chars() {
303                f.write_char(*ch)?;
304            }
305
306            Ok(())
307        })
308    }
309}
310
311/// Uses literal syntax from GDScript: `"string"`
312impl fmt::Debug for GString {
313    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
314        // Reuse Display impl.
315        write!(f, "\"{self}\"")
316    }
317}
318
319// ----------------------------------------------------------------------------------------------------------------------------------------------
320// Conversion from/into Rust string-types
321
322impl From<&str> for GString {
323    fn from(s: &str) -> Self {
324        let bytes = s.as_bytes();
325
326        unsafe {
327            Self::new_with_string_uninit(|string_ptr| {
328                let ctor = interface_fn!(string_new_with_utf8_chars_and_len);
329                ctor(
330                    string_ptr,
331                    bytes.as_ptr() as *const std::ffi::c_char,
332                    bytes.len() as i64,
333                );
334            })
335        }
336    }
337}
338
339impl From<&[char]> for GString {
340    fn from(chars: &[char]) -> Self {
341        // SAFETY: A `char` value is by definition a valid Unicode code point.
342        unsafe {
343            Self::new_with_string_uninit(|string_ptr| {
344                let ctor = interface_fn!(string_new_with_utf32_chars_and_len);
345                ctor(
346                    string_ptr,
347                    chars.as_ptr() as *const sys::char32_t,
348                    chars.len() as i64,
349                );
350            })
351        }
352    }
353}
354
355impl From<String> for GString {
356    fn from(value: String) -> Self {
357        value.as_str().into()
358    }
359}
360
361impl From<&String> for GString {
362    fn from(value: &String) -> Self {
363        value.as_str().into()
364    }
365}
366
367impl From<&GString> for String {
368    fn from(string: &GString) -> Self {
369        unsafe {
370            let len =
371                interface_fn!(string_to_utf8_chars)(string.string_sys(), std::ptr::null_mut(), 0);
372
373            assert!(len >= 0);
374            let mut buf = vec![0u8; len as usize];
375
376            interface_fn!(string_to_utf8_chars)(
377                string.string_sys(),
378                buf.as_mut_ptr() as *mut std::ffi::c_char,
379                len,
380            );
381
382            // Note: could use from_utf8_unchecked() but for now prefer safety
383            String::from_utf8(buf).expect("String::from_utf8")
384        }
385    }
386}
387
388impl From<GString> for String {
389    /// Converts this `GString` to a `String`.
390    ///
391    /// This is identical to `String::from(&string)`, and as such there is no performance benefit.
392    fn from(string: GString) -> Self {
393        Self::from(&string)
394    }
395}
396
397impl std::str::FromStr for GString {
398    type Err = Infallible;
399
400    fn from_str(s: &str) -> Result<Self, Self::Err> {
401        Ok(Self::from(s))
402    }
403}
404
405// ----------------------------------------------------------------------------------------------------------------------------------------------
406// Conversion from other Godot string-types
407
408impl From<&StringName> for GString {
409    fn from(string: &StringName) -> Self {
410        unsafe {
411            Self::new_with_uninit(|self_ptr| {
412                let ctor = sys::builtin_fn!(string_from_string_name);
413                let args = [string.sys()];
414                ctor(self_ptr, args.as_ptr());
415            })
416        }
417    }
418}
419
420impl From<StringName> for GString {
421    /// Converts this `StringName` to a `GString`.
422    ///
423    /// This is identical to `GString::from(&string_name)`, and as such there is no performance benefit.
424    fn from(string_name: StringName) -> Self {
425        Self::from(&string_name)
426    }
427}
428
429impl From<&NodePath> for GString {
430    fn from(path: &NodePath) -> Self {
431        unsafe {
432            Self::new_with_uninit(|self_ptr| {
433                let ctor = sys::builtin_fn!(string_from_node_path);
434                let args = [path.sys()];
435                ctor(self_ptr, args.as_ptr());
436            })
437        }
438    }
439}
440
441impl From<NodePath> for GString {
442    /// Converts this `NodePath` to a `GString`.
443    ///
444    /// This is identical to `GString::from(&path)`, and as such there is no performance benefit.
445    fn from(path: NodePath) -> Self {
446        Self::from(&path)
447    }
448}
449
450#[cfg(feature = "serde")] #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
451mod serialize {
452    use super::*;
453    use serde::de::{Error, Visitor};
454    use serde::{Deserialize, Deserializer, Serialize, Serializer};
455    use std::fmt::Formatter;
456
457    // For "Available on crate feature `serde`" in docs. Cannot be inherited from module. Also does not support #[derive] (e.g. in Vector2).
458    #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
459    impl Serialize for GString {
460        #[inline]
461        fn serialize<S>(
462            &self,
463            serializer: S,
464        ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
465        where
466            S: Serializer,
467        {
468            serializer.serialize_str(&self.to_string())
469        }
470    }
471
472    #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
473    impl<'de> Deserialize<'de> for GString {
474        #[inline]
475        fn deserialize<D>(deserializer: D) -> Result<Self, <D as Deserializer<'de>>::Error>
476        where
477            D: Deserializer<'de>,
478        {
479            struct GStringVisitor;
480            impl Visitor<'_> for GStringVisitor {
481                type Value = GString;
482
483                fn expecting(&self, formatter: &mut Formatter) -> fmt::Result {
484                    formatter.write_str("a GString")
485                }
486
487                fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
488                where
489                    E: Error,
490                {
491                    Ok(GString::from(s))
492                }
493            }
494
495            deserializer.deserialize_str(GStringVisitor)
496        }
497    }
498}