swf/string.rs
1//! String type used by SWF files.
2
3pub use encoding_rs::{Encoding, SHIFT_JIS, UTF_8, WINDOWS_1252};
4use std::{borrow::Cow, fmt};
5
6/// A bstr-like string type analogous to [`str`] that's returned by SWF parsing functions:
7///
8/// * The encoding depends on the SWF version (UTF-8 for SWF6 and higher).
9/// Use `Reader::encoding` or [`SwfStr::encoding_for_version`] to get the
10/// proper encoding.
11/// * Invalid data for any particular encoding is allowed;
12/// any conversions to std::String will be lossy for invalid data.
13///
14/// To convert this to a standard Rust string, use [`SwfStr::to_str_lossy`].
15#[derive(Eq, PartialEq)]
16#[repr(transparent)]
17pub struct SwfStr {
18 /// The string bytes.
19 string: [u8],
20}
21
22impl SwfStr {
23 /// Creates a new `SwfStr` from a byte slice.
24 /// The data is not required to be valid for the given encoding.
25 ///
26 /// # Examples
27 ///
28 /// ```rust
29 /// use swf::SwfStr;
30 ///
31 /// let s = SwfStr::from_bytes(b"Hello, World!");
32 /// ```
33 #[inline]
34 pub const fn from_bytes(string: &[u8]) -> &Self {
35 // SAFETY: Casting is safe because internal representations are
36 // the same, see repr(transparent).
37 unsafe { &*(string as *const [u8] as *const Self) }
38 }
39
40 /// Creates a `SwfStr` from a byte slice by reading until a NULL byte (`0`) is encountered.
41 /// Returns `None` if no NULL byte was found.
42 ///
43 /// # Examples
44 ///
45 /// ```rust
46 /// use swf::SwfStr;
47 ///
48 /// let s = SwfStr::from_bytes_null_terminated(b"I'm null-terminated!\0");
49 /// assert!(s.is_some());
50 ///
51 /// let s = SwfStr::from_bytes_null_terminated(b"I'm not terminated...");
52 /// assert!(s.is_none());
53 /// ```
54 #[inline]
55 pub fn from_bytes_null_terminated(string: &[u8]) -> Option<&Self> {
56 // If investigations show that the bounds check is not elided,
57 // it should be safe to use `get_unchecked` here instead.
58 // Initial Godbolt research shows it doesn't make a difference.
59 string
60 .iter()
61 .position(|&c| c == 0)
62 .map(|i| Self::from_bytes(&string[..i]))
63 }
64
65 /// Creates a new UTF-8 `SwfStr` from a Rust [`str`].
66 ///
67 /// # Examples
68 ///
69 /// ```rust
70 /// use swf::SwfStr;
71 ///
72 /// let s = SwfStr::from_utf8_str("Hello, 🌏!");
73 /// ```
74 #[inline]
75 pub const fn from_utf8_str(string: &str) -> &Self {
76 Self::from_bytes(string.as_bytes())
77 }
78
79 /// Creates a new UTF-8 `SwfStr` from a Rust [`str`].
80 ///
81 /// # Examples
82 ///
83 /// ```rust
84 /// use swf::SwfStr;
85 ///
86 /// let s = SwfStr::from_utf8_str_null_terminated("I'm null-terminated!\0");
87 /// assert!(s.is_some());
88 ///
89 /// let s = SwfStr::from_utf8_str_null_terminated("I'm not terminated...");
90 /// assert!(s.is_none());
91 /// ```
92 #[inline]
93 pub fn from_utf8_str_null_terminated(string: &str) -> Option<&Self> {
94 Self::from_bytes_null_terminated(string.as_bytes())
95 }
96
97 /// Creates a new `SwfStr` with the given encoding from a Rust [`str`].
98 /// Returns `None` if the encoding is not lossless.
99 ///
100 /// The string will be re-encoded with the given encoding.
101 /// The string will be truncated if a NULL byte (`0`) is encountered.
102 ///
103 /// Intended for tests.
104 ///
105 /// # Examples
106 ///
107 /// ```rust
108 /// use swf::SwfStr;
109 /// use encoding_rs::WINDOWS_1252;
110 ///
111 /// let s = SwfStr::from_str_with_encoding("Hello, World!", WINDOWS_1252);
112 /// assert!(s.is_some());
113 /// ```
114 pub fn from_str_with_encoding<'a>(
115 string: &'a str,
116 encoding: &'static Encoding,
117 ) -> Option<&'a Self> {
118 if let (Cow::Borrowed(s), _, false) = encoding.encode(string) {
119 Some(Self::from_bytes(s))
120 } else {
121 None
122 }
123 }
124
125 /// Returns the suggested string encoding for the given SWF version.
126 ///
127 /// For SWF version 6 and higher, this is always UTF-8.
128 /// For SWF version 5 and lower, this is locale-dependent,
129 /// and we default to WINDOWS-1252.
130 ///
131 /// # Examples
132 ///
133 /// ```rust
134 /// use swf::SwfStr;
135 /// use encoding_rs::{UTF_8, WINDOWS_1252};
136 ///
137 /// assert_eq!(SwfStr::encoding_for_version(9), UTF_8);
138 /// assert_eq!(SwfStr::encoding_for_version(3), WINDOWS_1252);
139 /// ```
140 #[inline]
141 pub fn encoding_for_version(swf_version: u8) -> &'static Encoding {
142 if swf_version >= 6 {
143 UTF_8
144 } else {
145 WINDOWS_1252
146 }
147 }
148
149 /// Returns the byte slice of this string.
150 ///
151 /// # Examples
152 ///
153 /// ```rust
154 /// use swf::SwfStr;
155 ///
156 /// let s = SwfStr::from_utf8_str("💖");
157 /// assert_eq!(s.as_bytes(), [0xF0, 0x9F, 0x92, 0x96]);
158 /// ```
159 #[inline]
160 pub const fn as_bytes(&self) -> &[u8] {
161 &self.string
162 }
163
164 /// Returns `true` if the string has a length of zero, and `false` otherwise.
165 ///
166 /// # Examples
167 ///
168 /// ```rust
169 /// use swf::SwfStr;
170 ///
171 /// let s = SwfStr::from_bytes(&[]);
172 /// assert!(s.is_empty());
173 ///
174 /// let s = SwfStr::from_utf8_str("💖");
175 /// assert!(!s.is_empty());
176 #[inline]
177 pub const fn is_empty(&self) -> bool {
178 self.string.is_empty()
179 }
180
181 /// Returns the length of the string in bytes.
182 ///
183 /// # Examples
184 ///
185 /// ```rust
186 /// use swf::SwfStr;
187 ///
188 /// let s = SwfStr::from_utf8_str("");
189 /// assert_eq!(s.len(), 0);
190 ///
191 /// let s = SwfStr::from_utf8_str("Hi!");
192 /// assert_eq!(s.len(), 3);
193 ///
194 /// let s = SwfStr::from_utf8_str("💖");
195 /// assert_eq!(s.len(), 4);
196 /// ```
197 #[inline]
198 pub const fn len(&self) -> usize {
199 self.string.len()
200 }
201
202 /// Decodes the string into a Rust UTF-8 [`str`].
203 ///
204 /// The UTF-8 replacement character will be used for any invalid data.
205 ///
206 /// # Examples
207 ///
208 /// ```rust
209 /// use swf::SwfStr;
210 /// use encoding_rs::UTF_8;
211 ///
212 /// let s = SwfStr::from_bytes(&[0xF0, 0x9F, 0x92, 0x96]);
213 /// assert_eq!(s.to_str_lossy(UTF_8), "💖");
214 /// ```
215 #[inline]
216 pub fn to_str_lossy(&self, encoding: &'static Encoding) -> Cow<'_, str> {
217 encoding.decode_without_bom_handling(&self.string).0
218 }
219
220 /// Decodes the string into a Rust UTF-8 [`String`].
221 ///
222 /// The UTF-8 replacement character will be used for any invalid data.
223 ///
224 /// # Examples
225 ///
226 /// ```rust
227 /// use swf::SwfStr;
228 /// use encoding_rs::UTF_8;
229 ///
230 /// let s = SwfStr::from_bytes(&[0xF0, 0x9F, 0x92, 0x96]);
231 /// assert_eq!(s.to_string_lossy(UTF_8), "💖");
232 /// ```
233 #[inline]
234 pub fn to_string_lossy(&self, encoding: &'static Encoding) -> String {
235 self.to_str_lossy(encoding).into_owned()
236 }
237}
238
239impl<'a> Default for &'a SwfStr {
240 fn default() -> &'a SwfStr {
241 SwfStr::from_bytes(&[])
242 }
243}
244
245impl<'a> From<&'a str> for &'a SwfStr {
246 fn from(s: &'a str) -> &'a SwfStr {
247 SwfStr::from_utf8_str(s)
248 }
249}
250
251impl<T: ?Sized + AsRef<str>> PartialEq<T> for SwfStr {
252 fn eq(&self, other: &T) -> bool {
253 &self.string == other.as_ref().as_bytes()
254 }
255}
256
257impl fmt::Debug for SwfStr {
258 /// Formats the `SwfStr` using the given formatter.
259 ///
260 /// Non-ASCII characters will be formatted in hexadecimal
261 /// form (`\xNN`).
262 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263 fmt::Write::write_char(f, '"')?;
264 for chr in self
265 .string
266 .iter()
267 .flat_map(|&c| std::ascii::escape_default(c))
268 {
269 fmt::Write::write_char(f, char::from(chr))?;
270 }
271 fmt::Write::write_char(f, '"')
272 }
273}