godot_core/builtin/string/gstring.rs
1/*
2 * Copyright (c) godot-rust; Bromeon and contributors.
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
6 */
7
8use std::convert::Infallible;
9use std::fmt;
10use std::fmt::Write;
11
12use godot_ffi as sys;
13use sys::types::OpaqueString;
14use sys::{ffi_methods, interface_fn, ExtVariantType, GodotFfi};
15
16use crate::builtin::string::{pad_if_needed, Encoding};
17use crate::builtin::{inner, NodePath, StringName, Variant};
18use crate::meta::error::StringError;
19use crate::meta::AsArg;
20use crate::{impl_shared_string_api, meta};
21
22/// Godot's reference counted string type.
23///
24/// This is the Rust binding of GDScript's `String` type. It represents the native string class used within the Godot engine,
25/// and as such has different memory layout and characteristics than `std::string::String`.
26///
27/// `GString` uses copy-on-write semantics and is cheap to clone. Modifying a string may trigger a copy, if that instance shares
28/// its backing storage with other strings.
29///
30/// Note that `GString` is not immutable, but it offers a very limited set of write APIs. Most operations return new strings.
31/// In order to modify Godot strings, it's often easiest to convert them to Rust strings, perform the modifications and convert back.
32///
33/// # `GString` vs. `String`
34///
35/// When interfacing with the Godot engine API, you often have the choice between `String` and `GString`. In user-declared methods
36/// exposed to Godot through the `#[func]` attribute, both types can be used as parameters and return types, and conversions
37/// are done transparently. For auto-generated binding APIs in `godot::classes`, both parameters and return types are `GString`.
38/// Parameters are declared as `impl AsArg<GString>`, allowing you to be more flexible with arguments such as `"some_string"`.
39///
40/// As a general guideline, use `GString` if:
41/// * your strings are very large, so you can avoid copying them
42/// * you need specific operations only available in Godot (e.g. `sha256_text()`, `c_escape()`, ...)
43/// * you primarily pass them between different Godot APIs, without string processing in user code
44///
45/// Use Rust's `String` if:
46/// * you need to modify the string
47/// * you would like to decouple part of your code from Godot (e.g. independent game logic, standalone tests)
48/// * you want a standard type for interoperability with third-party code (e.g. `regex` crate)
49/// * you have a large number of method calls per string instance (which are more expensive due to indirectly calling into Godot)
50/// * you need UTF-8 encoding (`GString` uses UTF-32)
51///
52/// # Null bytes
53///
54/// Note that Godot ignores any bytes after a null-byte. This means that for instance `"hello, world!"` and `"hello, world!\0 ignored by Godot"`
55/// will be treated as the same string if converted to a `GString`.
56///
57/// # All string types
58///
59/// | Intended use case | String type |
60/// |-------------------|--------------------------------------------|
61/// | General purpose | **`GString`** |
62/// | Interned names | [`StringName`][crate::builtin::StringName] |
63/// | Scene-node paths | [`NodePath`][crate::builtin::NodePath] |
64///
65/// # Godot docs
66///
67/// [`String` (stable)](https://docs.godotengine.org/en/stable/classes/class_string.html)
68#[doc(alias = "String")]
69// #[repr] is needed on GString itself rather than the opaque field, because PackedStringArray::as_slice() relies on a packed representation.
70#[repr(transparent)]
71pub struct GString {
72 _opaque: OpaqueString,
73}
74
75// SAFETY: The Godot implementation of String uses an atomic copy on write pointer, making this thread-safe as we never write to it unless we own it.
76unsafe impl Send for GString {}
77
78impl GString {
79 /// Construct a new empty `GString`.
80 pub fn new() -> Self {
81 Self::default()
82 }
83
84 /// Convert string from bytes with given encoding, returning `Err` on validation errors.
85 ///
86 /// Intermediate `NUL` characters are not accepted in Godot and always return `Err`.
87 ///
88 /// Some notes on the encodings:
89 /// - **Latin-1:** Since every byte is a valid Latin-1 character, no validation besides the `NUL` byte is performed.
90 /// It is your responsibility to ensure that the input is meaningful under Latin-1.
91 /// - **ASCII**: Subset of Latin-1, which is additionally validated to be valid, non-`NUL` ASCII characters.
92 /// - **UTF-8**: The input is validated to be UTF-8.
93 ///
94 /// Specifying incorrect encoding is safe, but may result in unintended string values.
95 pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
96 Self::try_from_bytes_with_nul_check(bytes, encoding, true)
97 }
98
99 /// Convert string from C-string with given encoding, returning `Err` on validation errors.
100 ///
101 /// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
102 pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
103 Self::try_from_bytes_with_nul_check(cstr.to_bytes(), encoding, false)
104 }
105
106 pub(super) fn try_from_bytes_with_nul_check(
107 bytes: &[u8],
108 encoding: Encoding,
109 check_nul: bool,
110 ) -> Result<Self, StringError> {
111 match encoding {
112 Encoding::Ascii => {
113 // If the bytes are ASCII, we can fall back to Latin-1, which is always valid (except for NUL).
114 // is_ascii() does *not* check for the NUL byte, so the check in the Latin-1 branch is still necessary.
115 if bytes.is_ascii() {
116 Self::try_from_bytes_with_nul_check(bytes, Encoding::Latin1, check_nul)
117 .map_err(|_e| StringError::new("intermediate NUL byte in ASCII string"))
118 } else {
119 Err(StringError::new("invalid ASCII"))
120 }
121 }
122 Encoding::Latin1 => {
123 // Intermediate NUL bytes are not accepted in Godot. Both ASCII + Latin-1 encodings need to explicitly check for this.
124 if check_nul && bytes.contains(&0) {
125 // Error overwritten when called from ASCII branch.
126 return Err(StringError::new("intermediate NUL byte in Latin-1 string"));
127 }
128
129 let s = unsafe {
130 Self::new_with_string_uninit(|string_ptr| {
131 let ctor = interface_fn!(string_new_with_latin1_chars_and_len);
132 ctor(
133 string_ptr,
134 bytes.as_ptr() as *const std::ffi::c_char,
135 bytes.len() as i64,
136 );
137 })
138 };
139 Ok(s)
140 }
141 Encoding::Utf8 => {
142 // from_utf8() also checks for intermediate NUL bytes.
143 let utf8 = std::str::from_utf8(bytes);
144
145 utf8.map(GString::from)
146 .map_err(|e| StringError::with_source("invalid UTF-8", e))
147 }
148 }
149 }
150
151 /// Number of characters in the string.
152 ///
153 /// _Godot equivalent: `length`_
154 #[doc(alias = "length")]
155 pub fn len(&self) -> usize {
156 self.as_inner().length().try_into().unwrap()
157 }
158
159 crate::declare_hash_u32_method! {
160 /// Returns a 32-bit integer hash value representing the string.
161 }
162
163 #[deprecated = "renamed to `hash_u32`"]
164 pub fn hash(&self) -> u32 {
165 self.as_inner()
166 .hash()
167 .try_into()
168 .expect("Godot hashes are uint32_t")
169 }
170
171 /// Gets the UTF-32 character slice from a `GString`.
172 pub fn chars(&self) -> &[char] {
173 // SAFETY: Since 4.1, Godot ensures valid UTF-32, making interpreting as char slice safe.
174 // See https://github.com/godotengine/godot/pull/74760.
175 let (ptr, len) = self.raw_slice();
176
177 // Even when len == 0, from_raw_parts requires ptr != null.
178 if ptr.is_null() {
179 return &[];
180 }
181
182 unsafe { std::slice::from_raw_parts(ptr, len) }
183 }
184
185 /// Returns the raw pointer and length of the internal UTF-32 character array.
186 ///
187 /// This is used by `StringName::chars()` in Godot 4.5+ where the buffer is shared via reference counting.
188 /// Since Godot 4.1, the buffer contains valid UTF-32.
189 pub(crate) fn raw_slice(&self) -> (*const char, usize) {
190 let s = self.string_sys();
191
192 let len: sys::GDExtensionInt;
193 let ptr: *const sys::char32_t;
194 unsafe {
195 len = interface_fn!(string_to_utf32_chars)(s, std::ptr::null_mut(), 0);
196 ptr = interface_fn!(string_operator_index_const)(s, 0);
197 }
198
199 (ptr.cast(), len as usize)
200 }
201
202 ffi_methods! {
203 type sys::GDExtensionStringPtr = *mut Self;
204
205 fn new_from_string_sys = new_from_sys;
206 fn new_with_string_uninit = new_with_uninit;
207 fn string_sys = sys;
208 fn string_sys_mut = sys_mut;
209 }
210
211 /// Consumes self and turns it into a sys-ptr, should be used together with [`from_owned_string_sys`](Self::from_owned_string_sys).
212 ///
213 /// This will leak memory unless `from_owned_string_sys` is called on the returned pointer.
214 pub(crate) fn into_owned_string_sys(self) -> sys::GDExtensionStringPtr {
215 sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
216
217 let leaked = Box::into_raw(Box::new(self));
218 leaked.cast()
219 }
220
221 /// Creates a `GString` from a sys-ptr without incrementing the refcount.
222 ///
223 /// # Safety
224 ///
225 /// * Must only be used on a pointer returned from a call to [`into_owned_string_sys`](Self::into_owned_string_sys).
226 /// * Must not be called more than once on the same pointer.
227 #[deny(unsafe_op_in_unsafe_fn)]
228 pub(crate) unsafe fn from_owned_string_sys(ptr: sys::GDExtensionStringPtr) -> Self {
229 sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
230
231 let ptr = ptr.cast::<Self>();
232
233 // SAFETY: `ptr` was returned from a call to `into_owned_string_sys`, which means it was created by a call to
234 // `Box::into_raw`, thus we can use `Box::from_raw` here. Additionally, this is only called once on this pointer.
235 let boxed = unsafe { Box::from_raw(ptr) };
236 *boxed
237 }
238
239 /// Convert a `GString` sys pointer to a mutable reference with unbounded lifetime.
240 ///
241 /// # Safety
242 ///
243 /// - `ptr` must point to a live `GString` for the duration of `'a`.
244 /// - Must be exclusive - no other reference to given `GString` instance can exist for the duration of `'a`.
245 pub(crate) unsafe fn borrow_string_sys_mut<'a>(ptr: sys::GDExtensionStringPtr) -> &'a mut Self {
246 sys::static_assert_eq_size_align!(StringName, sys::types::OpaqueString);
247 &mut *(ptr.cast::<GString>())
248 }
249
250 /// Moves this string into a string sys pointer. This is the same as using [`GodotFfi::move_return_ptr`].
251 ///
252 /// # Safety
253 ///
254 /// `dst` must be a valid string pointer.
255 pub(crate) unsafe fn move_into_string_ptr(self, dst: sys::GDExtensionStringPtr) {
256 let dst: sys::GDExtensionTypePtr = dst.cast();
257
258 self.move_return_ptr(dst, sys::PtrcallType::Standard);
259 }
260
261 meta::declare_arg_method! {
262 /// Use as argument for an [`impl AsArg<StringName|NodePath>`][crate::meta::AsArg] parameter.
263 ///
264 /// This is a convenient way to convert arguments of similar string types.
265 ///
266 /// # Example
267 /// [`Node::has_node()`][crate::classes::Node::has_node] takes `NodePath`, let's pass a `GString`:
268 /// ```no_run
269 /// # use godot::prelude::*;
270 /// let name = GString::from("subnode");
271 ///
272 /// let node = Node::new_alloc();
273 /// if node.has_node(name.arg()) {
274 /// // ...
275 /// }
276 /// ```
277 }
278
279 #[doc(hidden)]
280 pub fn as_inner(&self) -> inner::InnerString<'_> {
281 inner::InnerString::from_outer(self)
282 }
283}
284
285// SAFETY:
286// - `move_return_ptr`
287// Nothing special needs to be done beyond a `std::mem::swap` when returning a String.
288// So we can just use `ffi_methods`.
289//
290// - `from_arg_ptr`
291// Strings are properly initialized through a `from_sys` call, but the ref-count should be
292// incremented as that is the callee's responsibility. Which we do by calling
293// `std::mem::forget(string.clone())`.
294unsafe impl GodotFfi for GString {
295 const VARIANT_TYPE: ExtVariantType = ExtVariantType::Concrete(sys::VariantType::STRING);
296
297 ffi_methods! { type sys::GDExtensionTypePtr = *mut Self; .. }
298}
299
300meta::impl_godot_as_self!(GString: ByRef);
301
302impl_builtin_traits! {
303 for GString {
304 Default => string_construct_default;
305 Clone => string_construct_copy;
306 Drop => string_destroy;
307 Eq => string_operator_equal;
308 Ord => string_operator_less;
309 Hash;
310 }
311}
312
313impl_shared_string_api! {
314 builtin: GString,
315 find_builder: ExGStringFind,
316 split_builder: ExGStringSplit,
317}
318
319impl fmt::Display for GString {
320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321 pad_if_needed(f, |f| {
322 for ch in self.chars() {
323 f.write_char(*ch)?;
324 }
325
326 Ok(())
327 })
328 }
329}
330
331/// Uses literal syntax from GDScript: `"string"`
332impl fmt::Debug for GString {
333 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
334 // Reuse Display impl.
335 write!(f, "\"{self}\"")
336 }
337}
338
339// ----------------------------------------------------------------------------------------------------------------------------------------------
340// Conversion from/into Rust string-types
341
342impl From<&str> for GString {
343 fn from(s: &str) -> Self {
344 let bytes = s.as_bytes();
345
346 unsafe {
347 Self::new_with_string_uninit(|string_ptr| {
348 #[cfg(before_api = "4.3")] #[cfg_attr(published_docs, doc(cfg(before_api = "4.3")))]
349 let ctor = interface_fn!(string_new_with_utf8_chars_and_len);
350 #[cfg(since_api = "4.3")] #[cfg_attr(published_docs, doc(cfg(since_api = "4.3")))]
351 let ctor = interface_fn!(string_new_with_utf8_chars_and_len2);
352
353 ctor(
354 string_ptr,
355 bytes.as_ptr() as *const std::ffi::c_char,
356 bytes.len() as i64,
357 );
358 })
359 }
360 }
361}
362
363impl From<&[char]> for GString {
364 fn from(chars: &[char]) -> Self {
365 // SAFETY: A `char` value is by definition a valid Unicode code point.
366 unsafe {
367 Self::new_with_string_uninit(|string_ptr| {
368 let ctor = interface_fn!(string_new_with_utf32_chars_and_len);
369 ctor(
370 string_ptr,
371 chars.as_ptr() as *const sys::char32_t,
372 chars.len() as i64,
373 );
374 })
375 }
376 }
377}
378
379impl From<&String> for GString {
380 fn from(value: &String) -> Self {
381 value.as_str().into()
382 }
383}
384
385impl From<&GString> for String {
386 fn from(string: &GString) -> Self {
387 unsafe {
388 let len =
389 interface_fn!(string_to_utf8_chars)(string.string_sys(), std::ptr::null_mut(), 0);
390
391 assert!(len >= 0);
392 let mut buf = vec![0u8; len as usize];
393
394 interface_fn!(string_to_utf8_chars)(
395 string.string_sys(),
396 buf.as_mut_ptr() as *mut std::ffi::c_char,
397 len,
398 );
399
400 // Note: could use from_utf8_unchecked() but for now prefer safety
401 String::from_utf8(buf).expect("String::from_utf8")
402 }
403 }
404}
405
406impl From<GString> for String {
407 /// Converts this `GString` to a `String`.
408 ///
409 /// This is identical to `String::from(&string)`, and as such there is no performance benefit.
410 fn from(string: GString) -> Self {
411 Self::from(&string)
412 }
413}
414
415impl std::str::FromStr for GString {
416 type Err = Infallible;
417
418 fn from_str(s: &str) -> Result<Self, Self::Err> {
419 Ok(Self::from(s))
420 }
421}
422
423// ----------------------------------------------------------------------------------------------------------------------------------------------
424// Conversion from other Godot string-types
425
426impl From<&StringName> for GString {
427 fn from(string: &StringName) -> Self {
428 unsafe {
429 Self::new_with_uninit(|self_ptr| {
430 let ctor = sys::builtin_fn!(string_from_string_name);
431 let args = [string.sys()];
432 ctor(self_ptr, args.as_ptr());
433 })
434 }
435 }
436}
437
438impl From<&NodePath> for GString {
439 fn from(path: &NodePath) -> Self {
440 unsafe {
441 Self::new_with_uninit(|self_ptr| {
442 let ctor = sys::builtin_fn!(string_from_node_path);
443 let args = [path.sys()];
444 ctor(self_ptr, args.as_ptr());
445 })
446 }
447 }
448}
449
450#[cfg(feature = "serde")] #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
451mod serialize {
452 use std::fmt::Formatter;
453
454 use serde::de::{Error, Visitor};
455 use serde::{Deserialize, Deserializer, Serialize, Serializer};
456
457 use super::*;
458
459 // For "Available on crate feature `serde`" in docs. Cannot be inherited from module. Also does not support #[derive] (e.g. in Vector2).
460 #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
461 impl Serialize for GString {
462 #[inline]
463 fn serialize<S>(
464 &self,
465 serializer: S,
466 ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
467 where
468 S: Serializer,
469 {
470 serializer.serialize_str(&self.to_string())
471 }
472 }
473
474 #[cfg_attr(published_docs, doc(cfg(feature = "serde")))]
475 impl<'de> Deserialize<'de> for GString {
476 #[inline]
477 fn deserialize<D>(deserializer: D) -> Result<Self, <D as Deserializer<'de>>::Error>
478 where
479 D: Deserializer<'de>,
480 {
481 struct GStringVisitor;
482 impl Visitor<'_> for GStringVisitor {
483 type Value = GString;
484
485 fn expecting(&self, formatter: &mut Formatter) -> fmt::Result {
486 formatter.write_str("a GString")
487 }
488
489 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
490 where
491 E: Error,
492 {
493 Ok(GString::from(s))
494 }
495 }
496
497 deserializer.deserialize_str(GStringVisitor)
498 }
499 }
500}