swf/
string.rs

1//! String type used by SWF files.
2
3pub use encoding_rs::{Encoding, SHIFT_JIS, UTF_8, WINDOWS_1252};
4use std::{borrow::Cow, fmt};
5
6/// A bstr-like string type analogous to [`str`] that's returned by SWF parsing functions:
7///
8/// * The encoding depends on the SWF version (UTF-8 for SWF6 and higher).
9///   Use `Reader::encoding` or [`SwfStr::encoding_for_version`] to get the
10///   proper encoding.
11/// * Invalid data for any particular encoding is allowed;
12///   any conversions to std::String will be lossy for invalid data.
13///
14/// To convert this to a standard Rust string, use [`SwfStr::to_str_lossy`].
15#[derive(Eq, PartialEq)]
16#[repr(transparent)]
17pub struct SwfStr {
18    /// The string bytes.
19    string: [u8],
20}
21
22impl SwfStr {
23    /// Creates a new `SwfStr` from a byte slice.
24    /// The data is not required to be valid for the given encoding.
25    ///
26    /// # Examples
27    ///
28    /// ```rust
29    /// use swf::SwfStr;
30    ///
31    /// let s = SwfStr::from_bytes(b"Hello, World!");
32    /// ```
33    #[inline]
34    pub const fn from_bytes(string: &[u8]) -> &Self {
35        // SAFETY: Casting is safe because internal representations are
36        // the same, see repr(transparent).
37        unsafe { &*(string as *const [u8] as *const Self) }
38    }
39
40    /// Creates a `SwfStr` from a byte slice by reading until a NULL byte (`0`) is encountered.
41    /// Returns `None` if no NULL byte was found.
42    ///
43    /// # Examples
44    ///
45    /// ```rust
46    /// use swf::SwfStr;
47    ///
48    /// let s = SwfStr::from_bytes_null_terminated(b"I'm null-terminated!\0");
49    /// assert!(s.is_some());
50    ///
51    /// let s = SwfStr::from_bytes_null_terminated(b"I'm not terminated...");
52    /// assert!(s.is_none());
53    /// ```
54    #[inline]
55    pub fn from_bytes_null_terminated(string: &[u8]) -> Option<&Self> {
56        // If investigations show that the bounds check is not elided,
57        // it should be safe to use `get_unchecked` here instead.
58        // Initial Godbolt research shows it doesn't make a difference.
59        string
60            .iter()
61            .position(|&c| c == 0)
62            .map(|i| Self::from_bytes(&string[..i]))
63    }
64
65    /// Creates a new UTF-8 `SwfStr` from a Rust [`str`].
66    ///
67    /// # Examples
68    ///
69    /// ```rust
70    /// use swf::SwfStr;
71    ///
72    /// let s = SwfStr::from_utf8_str("Hello, 🌏!");
73    /// ```
74    #[inline]
75    pub const fn from_utf8_str(string: &str) -> &Self {
76        Self::from_bytes(string.as_bytes())
77    }
78
79    /// Creates a new UTF-8 `SwfStr` from a Rust [`str`].
80    ///
81    /// # Examples
82    ///
83    /// ```rust
84    /// use swf::SwfStr;
85    ///
86    /// let s = SwfStr::from_utf8_str_null_terminated("I'm null-terminated!\0");
87    /// assert!(s.is_some());
88    ///
89    /// let s = SwfStr::from_utf8_str_null_terminated("I'm not terminated...");
90    /// assert!(s.is_none());
91    /// ```
92    #[inline]
93    pub fn from_utf8_str_null_terminated(string: &str) -> Option<&Self> {
94        Self::from_bytes_null_terminated(string.as_bytes())
95    }
96
97    /// Creates a new `SwfStr` with the given encoding from a Rust [`str`].
98    /// Returns `None` if the encoding is not lossless.
99    ///
100    /// The string will be re-encoded with the given encoding.
101    /// The string will be truncated if a NULL byte (`0`) is encountered.
102    ///
103    /// Intended for tests.
104    ///
105    /// # Examples
106    ///
107    /// ```rust
108    /// use swf::SwfStr;
109    /// use encoding_rs::WINDOWS_1252;
110    ///
111    /// let s = SwfStr::from_str_with_encoding("Hello, World!", WINDOWS_1252);
112    /// assert!(s.is_some());
113    /// ```
114    pub fn from_str_with_encoding<'a>(
115        string: &'a str,
116        encoding: &'static Encoding,
117    ) -> Option<&'a Self> {
118        if let (Cow::Borrowed(s), _, false) = encoding.encode(string) {
119            Some(Self::from_bytes(s))
120        } else {
121            None
122        }
123    }
124
125    /// Returns the suggested string encoding for the given SWF version.
126    ///
127    /// For SWF version 6 and higher, this is always UTF-8.
128    /// For SWF version 5 and lower, this is locale-dependent,
129    /// and we default to WINDOWS-1252.
130    ///
131    /// # Examples
132    ///
133    /// ```rust
134    /// use swf::SwfStr;
135    /// use encoding_rs::{UTF_8, WINDOWS_1252};
136    ///
137    /// assert_eq!(SwfStr::encoding_for_version(9), UTF_8);
138    /// assert_eq!(SwfStr::encoding_for_version(3), WINDOWS_1252);
139    /// ```
140    #[inline]
141    pub fn encoding_for_version(swf_version: u8) -> &'static Encoding {
142        if swf_version >= 6 {
143            UTF_8
144        } else {
145            WINDOWS_1252
146        }
147    }
148
149    /// Returns the byte slice of this string.
150    ///
151    /// # Examples
152    ///
153    /// ```rust
154    /// use swf::SwfStr;
155    ///
156    /// let s = SwfStr::from_utf8_str("💖");
157    /// assert_eq!(s.as_bytes(), [0xF0, 0x9F, 0x92, 0x96]);
158    /// ```
159    #[inline]
160    pub const fn as_bytes(&self) -> &[u8] {
161        &self.string
162    }
163
164    /// Returns `true` if the string has a length of zero, and `false` otherwise.
165    ///
166    /// # Examples
167    ///
168    /// ```rust
169    /// use swf::SwfStr;
170    ///
171    /// let s = SwfStr::from_bytes(&[]);
172    /// assert!(s.is_empty());
173    ///
174    /// let s = SwfStr::from_utf8_str("💖");
175    /// assert!(!s.is_empty());
176    #[inline]
177    pub const fn is_empty(&self) -> bool {
178        self.string.is_empty()
179    }
180
181    /// Returns the length of the string in bytes.
182    ///
183    /// # Examples
184    ///
185    /// ```rust
186    /// use swf::SwfStr;
187    ///
188    /// let s = SwfStr::from_utf8_str("");
189    /// assert_eq!(s.len(), 0);
190    ///
191    /// let s = SwfStr::from_utf8_str("Hi!");
192    /// assert_eq!(s.len(), 3);
193    ///
194    /// let s = SwfStr::from_utf8_str("💖");
195    /// assert_eq!(s.len(), 4);
196    /// ```
197    #[inline]
198    pub const fn len(&self) -> usize {
199        self.string.len()
200    }
201
202    /// Decodes the string into a Rust UTF-8 [`str`].
203    ///
204    /// The UTF-8 replacement character will be used for any invalid data.
205    ///
206    /// # Examples
207    ///
208    /// ```rust
209    /// use swf::SwfStr;
210    /// use encoding_rs::UTF_8;
211    ///
212    /// let s = SwfStr::from_bytes(&[0xF0, 0x9F, 0x92, 0x96]);
213    /// assert_eq!(s.to_str_lossy(UTF_8), "💖");
214    /// ```
215    #[inline]
216    pub fn to_str_lossy(&self, encoding: &'static Encoding) -> Cow<'_, str> {
217        encoding.decode_without_bom_handling(&self.string).0
218    }
219
220    /// Decodes the string into a Rust UTF-8 [`String`].
221    ///
222    /// The UTF-8 replacement character will be used for any invalid data.
223    ///
224    /// # Examples
225    ///
226    /// ```rust
227    /// use swf::SwfStr;
228    /// use encoding_rs::UTF_8;
229    ///
230    /// let s = SwfStr::from_bytes(&[0xF0, 0x9F, 0x92, 0x96]);
231    /// assert_eq!(s.to_string_lossy(UTF_8), "💖");
232    /// ```
233    #[inline]
234    pub fn to_string_lossy(&self, encoding: &'static Encoding) -> String {
235        self.to_str_lossy(encoding).into_owned()
236    }
237}
238
239impl<'a> Default for &'a SwfStr {
240    fn default() -> &'a SwfStr {
241        SwfStr::from_bytes(&[])
242    }
243}
244
245impl<'a> From<&'a str> for &'a SwfStr {
246    fn from(s: &'a str) -> &'a SwfStr {
247        SwfStr::from_utf8_str(s)
248    }
249}
250
251impl<T: ?Sized + AsRef<str>> PartialEq<T> for SwfStr {
252    fn eq(&self, other: &T) -> bool {
253        &self.string == other.as_ref().as_bytes()
254    }
255}
256
257impl fmt::Debug for SwfStr {
258    /// Formats the `SwfStr` using the given formatter.
259    ///
260    /// Non-ASCII characters will be formatted in hexadecimal
261    /// form (`\xNN`).
262    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263        fmt::Write::write_char(f, '"')?;
264        for chr in self
265            .string
266            .iter()
267            .flat_map(|&c| std::ascii::escape_default(c))
268        {
269            fmt::Write::write_char(f, char::from(chr))?;
270        }
271        fmt::Write::write_char(f, '"')
272    }
273}