r_efi_string/str16.rs
1//! UEFI Char16 based String Types and Converters
2//!
3//! This module implements two basic types `[&EfiStr16]` and `[EfiString16]`, which relate to each
4//! other just as `[&str]` relates to `[String]`. Unlike the strings in the rust standard library,
5//! these types implement UCS-2'ish strings, as used in UEFI systems.
6//!
7//! While the UEFI Specification clearly states that `Efi::Char16` based strings must be UCS-2,
8//! firmware is known to violate this. In fact, any 0-terminated `u16` array might be exposed in
9//! such strings. Therefore, the `EfiStr16` type implements a string based on any `u16` array, and
10//! provides converters to and from the standard rust types.
11
12/// An error indicating wrongly placed Nuls.
13///
14/// This error is used to indicate that a source slice had invalidly placed Nul entries, lacked a
15/// terminating Nul, etc.
16#[derive(Clone, PartialEq, Eq, Debug)]
17pub enum FromSliceWithNulError {
18 /// Indicates that there was an interior Nul entry in the slice.
19 ///
20 /// Only terminating Nul entries are allowed. This error indicates there was a Nul entry which
21 /// was not the string terminator. The embedded value encodes the position in the original
22 /// source array where this interior Nul entry was found.
23 InteriorNul(usize),
24
25 /// Indicates that the source slice was not Nul terminated.
26 ///
27 /// All source slices must be Nul terminated. This error indicates that a conversion was tried
28 /// on a slice that was not Nul terminated.
29 NotNulTerminated,
30}
31
32/// String slice based on UCS-2 strings as defined by UEFI.
33///
34/// The EfiStr16 is similar to `[CStr]` or `[OsStr]` in the rust standard library, but it
35/// implements a string type similar to UCS-2, as defined by the UEFI specification. The type does
36/// neither match UTF-16 nor UCS-2, but is something of a mixture of both. While the UEFI
37/// specification clearly states UCS-2 is used, this is not what happens to be used in practice.
38///
39/// The `EfiStr16` type considers any array of `u16` as a valid string, as long as it is
40/// terminated by a 0 entry, and it does not contain any other 0 entry. The individual entries
41/// must be encoded as native-endian 16-bit unsigned integers.
42#[derive(Eq, Ord, PartialEq, PartialOrd)]
43pub struct EfiStr16 {
44 inner: [u16],
45}
46
47/// A type representing an owned, C-compatible, UEFI-compatible, Nul-terminated string with no
48/// interior Nul-bytes.
49///
50/// The `EfiString16` type is to `&EfiStr16` what `[String]` is to `[&str]`. That is, it
51/// represents a string that owns its content, rather than borrowing it.
52///
53/// The `EfiString16` type can represent exactly the same values as `EfiStr16`.
54#[derive(Clone, Eq, Ord, PartialEq, PartialOrd)]
55pub struct EfiString16 {
56 inner: alloc::boxed::Box<[u16]>,
57}
58
59impl EfiStr16 {
60 /// Create Str16 from pointer to u16.
61 ///
62 /// This takes a pointer to a `Char16` string as defined by the UEFI specification. It is a
63 /// C-string based on 16-bit integers and terminated by a 16-bit 0 entry.
64 ///
65 /// This function turns this C-string into a slice of `[EfiStr16]`. The returned slice does
66 /// not own the backing memory, but points to the original C-string.
67 ///
68 /// # Safety
69 ///
70 /// This function is unsafe for several reasons:
71 ///
72 /// * The caller must guarantee the backing memory of the C-string outlives the livetime
73 /// `'a`.
74 /// * The memory pointer to by `ptr` must be a valid, zero-terminated C-string based on
75 /// 16-bit integers.
76 ///
77 /// The caller must guarantee that the pointer points to a nul-terminated
78 /// native-endian UTF-16 string. The string should either originate in
79 /// UEFI, or be restricted to the subset of UTF-16 that the UEFI spec
80 /// allows.
81 pub unsafe fn from_ptr<'a>(ptr: *const u16) -> &'a EfiStr16 {
82 let mut len: isize = 0;
83
84 while ptr.offset(len).read() != 0 {
85 len += 1;
86 }
87
88 Self::from_slice_with_nul_unchecked(
89 core::slice::from_raw_parts(ptr, len as usize + 1)
90 )
91 }
92
93 /// Create Str16 from a slice of u16.
94 ///
95 /// This turns a slice of `u16` into a `Str16`. The original slice is borrowed by the newly
96 /// returned `Str16`. The input is not verified for validity. It is the caller's
97 /// responsibility to adhere to the safety guarantees.
98 ///
99 /// # Safety
100 ///
101 /// This function is unsafe because the caller has to guarantee that the passed slice contains
102 /// a 0 terminator as its last entry. Furthermore, it must not contain any other 0 entry.
103 pub unsafe fn from_slice_with_nul_unchecked<'a>(slice: &[u16]) -> &EfiStr16 {
104 &*(slice as *const [u16] as *const EfiStr16)
105 }
106
107 /// Create Str16 from a slice of u16.
108 ///
109 /// This turns a slice of `u16` into a `Str16`. The original slice is borrowed by the newly
110 /// returned `Str16`. The input is verified to be a 0-terminated slice, with no other 0
111 /// characters embedded in the string.
112 pub fn from_slice_with_nul<'a>(slice: &[u16]) -> Result<&EfiStr16, FromSliceWithNulError> {
113 let n = slice.len();
114
115 for i in 0..n {
116 if slice[i] == 0 {
117 if i + 1 == n {
118 return unsafe { Ok(Self::from_slice_with_nul_unchecked(slice)) };
119 } else {
120 return Err(FromSliceWithNulError::InteriorNul(i));
121 }
122 }
123 }
124
125 Err(FromSliceWithNulError::NotNulTerminated)
126 }
127
128 /// Convert string slice to a raw pointer.
129 ///
130 /// This converts the string slice to a raw pointer. The pointer references the memory inside
131 /// of `self`. Therefore, the pointer becomes stale as soon as `self` goes out of scope.
132 pub fn as_ptr(&self) -> *const u16 {
133 self.inner.as_ptr()
134 }
135
136 /// Convert string slice to a u16 slice including the terminating 0 character.
137 ///
138 /// This returns a slice of `u16`, which borrows the backing memory of the input string. The
139 /// slice includes the terminating 0 character.
140 pub fn as_slice_with_nul(&self) -> &[u16] {
141 &self.inner
142 }
143
144 /// Convert string slice to a u16 slice excluding the terminating 0 character.
145 ///
146 /// This returns a slice of `u16`, which borrows the backing memory of the input string. The
147 /// slice does not includes the terminating 0 character.
148 pub fn as_slice(&self) -> &[u16] {
149 let s = self.as_slice_with_nul();
150 &s[..s.len() - 1]
151 }
152
153 /// Converts an `EfiStr16` into a `[String]`.
154 ///
155 /// This converts the input string into a standard rust string. This always requires a memory
156 /// allocation since the backing data needs to be converted from 16-bit based UCS-2 to 8-bit
157 /// based UTF-8.
158 ///
159 /// The `EfiStr16` type is a lot less strict on its encoding. Therefore, not all instances can
160 /// be converted to valid UTF-8. If the input string is invalid, this function will raise an
161 /// error. Use `to_string_lossy()` if you want the conversion to replace invalid characters.
162 pub fn to_string(&self) -> Result<alloc::string::String, alloc::string::FromUtf16Error> {
163 alloc::string::String::from_utf16(self.as_slice())
164 }
165
166 /// Converts an `EfiStr16` into a `[String]`, replacing invalid characters with the Unicode
167 /// replacement character.
168 ///
169 /// This function works like `to_string()` but whenever invalid characters are found in the
170 /// input string, they are replaced with the Unicode Replacement Character.
171 pub fn to_string_lossy(&self) -> alloc::string::String {
172 alloc::string::String::from_utf16_lossy(self.as_slice())
173 }
174}
175
176// Default value for an EfiStr16 is the empty string with just a zero terminator.
177impl Default for &EfiStr16 {
178 fn default() -> Self {
179 const DEFAULT: &[u16] = &[0];
180 unsafe { EfiStr16::from_slice_with_nul_unchecked(DEFAULT) }
181 }
182}
183
184// Quirk to make `Box<EfiStr16>` use the default of `&EfiStr16`.
185impl Default for alloc::boxed::Box<EfiStr16> {
186 fn default() -> Self {
187 <&EfiStr16 as Default>::default().into()
188 }
189}
190
191// Creating a box from an `&EfiStr16` simply allocates the backing array.
192impl From<&EfiStr16> for alloc::boxed::Box<EfiStr16> {
193 fn from(s: &EfiStr16) -> alloc::boxed::Box<EfiStr16> {
194 let boxed: alloc::boxed::Box<[u16]> = alloc::boxed::Box::from(s.as_slice_with_nul());
195 unsafe {
196 alloc::boxed::Box::from_raw(
197 alloc::boxed::Box::into_raw(boxed) as *mut EfiStr16
198 )
199 }
200 }
201}
202
203// Quirk to make `Box<EfiStr16>` use `From<&EfiStr16>`.
204impl Clone for alloc::boxed::Box<EfiStr16> {
205 fn clone(&self) -> Self {
206 (**self).into()
207 }
208}
209
210// Print EfiStr16 in ASCII-compatible mode, escape anything else as '\u<hex>`.
211impl core::fmt::Debug for EfiStr16 {
212 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
213 fn hexify_4bit(v: u8) -> char {
214 match v {
215 0x0..=0x9 => (b'0' + v) as char,
216 0xa..=0xf => (b'a' + (v - 0xa)) as char,
217 _ => panic!{},
218 }
219 }
220
221 fn hexify_16bit(v: u16) -> [char; 4] {
222 [
223 hexify_4bit(((v >> 12) & 0x000f) as u8),
224 hexify_4bit(((v >> 8) & 0x000f) as u8),
225 hexify_4bit(((v >> 4) & 0x000f) as u8),
226 hexify_4bit(((v >> 0) & 0x000f) as u8),
227 ]
228 }
229
230 write!(f, "\"")?;
231 for entry in self.as_slice().iter() {
232 match *entry {
233 0x0000..=0x00ff => {
234 for c in core::ascii::escape_default(*entry as u8) {
235 core::fmt::Write::write_char(f, c as char)?;
236 }
237 },
238 _ => {
239 let a = hexify_16bit(*entry);
240
241 write!(f, "\\u")?;
242 core::fmt::Write::write_char(f, a[0])?;
243 core::fmt::Write::write_char(f, a[1])?;
244 core::fmt::Write::write_char(f, a[2])?;
245 core::fmt::Write::write_char(f, a[3])?;
246 },
247 }
248 }
249 write!(f, "\"")
250 }
251}
252
253impl EfiString16 {
254 // XXX: To be implemented.
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn efistr16_constructors() {
263 let original: &[u16] = &[0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0];
264
265 {
266 let s = unsafe { EfiStr16::from_ptr(original.as_ptr()) };
267
268 assert_eq!{s.as_ptr(), original.as_ptr()};
269 assert_eq!{s.as_slice().len(), 6};
270 assert_eq!{s.as_slice()[0], 0x41};
271 assert_eq!{s.as_slice_with_nul(), original};
272 }
273
274 {
275 let s = unsafe { EfiStr16::from_slice_with_nul_unchecked(original) };
276
277 assert_eq!{s.as_ptr(), original.as_ptr()};
278 assert_eq!{s.as_slice().len(), 6};
279 assert_eq!{s.as_slice()[0], 0x41};
280 assert_eq!{s.as_slice_with_nul(), original};
281 }
282
283 {
284 let s = EfiStr16::from_slice_with_nul(original).unwrap();
285
286 assert_eq!{s.as_ptr(), original.as_ptr()};
287 assert_eq!{s.as_slice().len(), 6};
288 assert_eq!{s.as_slice()[0], 0x41};
289 assert_eq!{s.as_slice_with_nul(), original};
290 }
291
292 {
293 assert_eq!{
294 EfiStr16::from_slice_with_nul(
295 &[],
296 ).err().unwrap(),
297 FromSliceWithNulError::NotNulTerminated,
298 };
299
300 assert_eq!{
301 EfiStr16::from_slice_with_nul(
302 &[0x0041],
303 ).err().unwrap(),
304 FromSliceWithNulError::NotNulTerminated,
305 };
306
307 assert!{
308 EfiStr16::from_slice_with_nul(
309 &[0x0041, 0x0000],
310 ).is_ok()
311 };
312
313 assert_eq!{
314 EfiStr16::from_slice_with_nul(
315 &[0x0000, 0x0041, 0x0000],
316 ).err().unwrap(),
317 FromSliceWithNulError::InteriorNul(0),
318 };
319
320 assert_eq!{
321 EfiStr16::from_slice_with_nul(
322 &[0x0041, 0x0000, 0x0000],
323 ).err().unwrap(),
324 FromSliceWithNulError::InteriorNul(1),
325 };
326
327 assert_eq!{
328 EfiStr16::from_slice_with_nul(
329 &[0x0000, 0x0041, 0x0000, 0x0042, 0x0000],
330 ).err().unwrap(),
331 FromSliceWithNulError::InteriorNul(0),
332 };
333 }
334
335 {
336 let s: &EfiStr16 = Default::default();
337
338 assert_eq!{s.as_slice_with_nul(), &[0]};
339 }
340 }
341
342 #[test]
343 fn efistr16_compare() {
344 let slice: &[u16] = &[
345 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0,
346 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0,
347 ];
348 let string1 = unsafe { EfiStr16::from_slice_with_nul_unchecked(&slice[0..7]) };
349 let string2 = unsafe { EfiStr16::from_slice_with_nul_unchecked(&slice[7..14]) };
350
351 assert_eq!{string1, string2};
352 assert_eq!{string1.cmp(string2), core::cmp::Ordering::Equal};
353 }
354
355 #[test]
356 fn efistr16_converters() {
357 let slice_good: &[u16] = &[0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0];
358 let slice_bad: &[u16] = &[0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0xd800, 0];
359 let string_good: &EfiStr16 = unsafe { EfiStr16::from_slice_with_nul_unchecked(slice_good) };
360 let string_bad: &EfiStr16 = unsafe { EfiStr16::from_slice_with_nul_unchecked(slice_bad) };
361
362 assert_eq!{string_good.to_string().unwrap(), "ABCDEF"};
363 assert!{string_bad.to_string().is_err()};
364
365 assert_eq!{string_good.to_string_lossy(), "ABCDEF"};
366 assert_eq!{string_bad.to_string_lossy(), "ABCDE�"};
367 }
368
369 #[test]
370 fn efistr16_debug() {
371 let slice: &[u16] = &[
372 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046,
373 0x0001, 0x000a, 0xabcd,
374 0,
375 ];
376 let string = unsafe { EfiStr16::from_slice_with_nul_unchecked(slice) };
377
378 assert_eq!{
379 format!{"{:?}", string},
380 "\"ABCDEF\\x01\\n\\uabcd\"",
381 };
382 }
383
384 #[test]
385 fn efistr16_box() {
386 let slice: &[u16] = &[0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0];
387 let string = unsafe { EfiStr16::from_slice_with_nul_unchecked(slice) };
388 let boxed: alloc::boxed::Box<EfiStr16> = alloc::boxed::Box::from(string);
389
390 assert_eq!{string.as_slice_with_nul(), slice};
391 assert_eq!{boxed.as_slice_with_nul(), slice};
392 assert_eq!{boxed.clone().as_slice_with_nul(), slice};
393 assert_eq!{
394 <alloc::boxed::Box<EfiStr16> as Default>::default().as_slice_with_nul(),
395 <&EfiStr16 as Default>::default().as_slice_with_nul(),
396 };
397 }
398}