nt_string/
u16strle.rs

1// Copyright 2023 Colin Finck <colin@reactos.org>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3//
4//! Interface around byte slices of UTF-16 (little-endian) strings.
5
6use core::char;
7use core::cmp::Ordering;
8use core::fmt;
9
10use crate::helpers::cmp_iter;
11
12#[cfg(feature = "alloc")]
13use {
14    crate::error::Result,
15    alloc::string::String,
16    alloc::vec::Vec,
17    widestring::{decode_utf16, decode_utf16_lossy, U16String},
18};
19
20/// Byte slice treated as a serialized UTF-16 (little-endian) string.
21///
22/// UTF-16 little-endian is the on-disk format that Windows uses for almost all strings.
23/// `U16StrLe` allows to work with this format directly without creating a copy or performing a conversion first.
24///
25/// The encoding is expected to be UTF-16 (or the UCS-2 subset for Windows NT 4.0 and earlier).
26/// However, this type will happily accept any byte, and only cares about the encoding when converting or displaying.
27#[derive(Clone, Debug, Eq)]
28pub struct U16StrLe<'a>(pub &'a [u8]);
29
30impl<'a> U16StrLe<'a> {
31    /// Returns `true` if `self` has a length of zero bytes.
32    pub const fn is_empty(&self) -> bool {
33        self.len() == 0
34    }
35
36    /// Returns the length of `self`.
37    ///
38    /// This length is in bytes, not characters! In other words,
39    /// it may not be what a human considers the length of the string.
40    pub const fn len(&self) -> usize {
41        self.0.len()
42    }
43
44    /// Attempts to copy `self` to a new owned `String`.
45    /// Returns `Ok(String)` if all characters could be converted successfully or `DecodeUtf16Error` otherwise.
46    #[cfg(feature = "alloc")]
47    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
48    pub fn to_string(&self) -> Result<String> {
49        decode_utf16(self.u16_iter())
50            .collect::<Result<String, _>>()
51            .map_err(Into::into)
52    }
53
54    /// Copies `self` to a new owned `String`, replacing invalid data with the replacement character (U+FFFD).
55    #[cfg(feature = "alloc")]
56    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
57    pub fn to_string_lossy(&self) -> String {
58        decode_utf16_lossy(self.u16_iter()).collect()
59    }
60
61    /// Copies `self` to a new owned [`widestring::U16String`].
62    #[cfg(feature = "alloc")]
63    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
64    pub fn to_ustring(&self) -> U16String {
65        U16String::from_vec(self.u16_iter().collect::<Vec<u16>>())
66    }
67
68    /// Returns an iterator over the [`u16`] codepoints of this string.
69    ///
70    /// The codepoints may or may not be valid UTF-16 codepoints.
71    /// This function does not validate them.
72    pub fn u16_iter(&'a self) -> impl Iterator<Item = u16> + 'a {
73        self.0
74            .chunks_exact(2)
75            .map(|two_bytes| u16::from_le_bytes(two_bytes.try_into().unwrap()))
76    }
77}
78
79impl<'a> fmt::Display for U16StrLe<'a> {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        let utf16_iter =
82            char::decode_utf16(self.u16_iter()).map(|x| x.unwrap_or(char::REPLACEMENT_CHARACTER));
83
84        for single_char in utf16_iter {
85            single_char.fmt(f)?;
86        }
87
88        Ok(())
89    }
90}
91
92impl<'a> Ord for U16StrLe<'a> {
93    fn cmp(&self, other: &Self) -> Ordering {
94        cmp_iter(self.u16_iter(), other.u16_iter())
95    }
96}
97
98impl<'a, 'b> PartialEq<U16StrLe<'a>> for U16StrLe<'b> {
99    /// Checks that two strings are a (case-sensitive!) match.
100    fn eq(&self, other: &U16StrLe<'a>) -> bool {
101        self.0 == other.0
102    }
103}
104
105impl<'a> PartialEq<str> for U16StrLe<'a> {
106    fn eq(&self, other: &str) -> bool {
107        cmp_iter(self.u16_iter(), other.encode_utf16()) == Ordering::Equal
108    }
109}
110
111impl<'a> PartialEq<U16StrLe<'a>> for str {
112    fn eq(&self, other: &U16StrLe<'a>) -> bool {
113        cmp_iter(self.encode_utf16(), other.u16_iter()) == Ordering::Equal
114    }
115}
116
117impl<'a> PartialEq<&str> for U16StrLe<'a> {
118    fn eq(&self, other: &&str) -> bool {
119        cmp_iter(self.u16_iter(), other.encode_utf16()) == Ordering::Equal
120    }
121}
122
123impl<'a> PartialEq<U16StrLe<'a>> for &str {
124    fn eq(&self, other: &U16StrLe<'a>) -> bool {
125        cmp_iter(self.encode_utf16(), other.u16_iter()) == Ordering::Equal
126    }
127}
128
129impl<'a, 'b> PartialOrd<U16StrLe<'a>> for U16StrLe<'b> {
130    fn partial_cmp(&self, other: &U16StrLe<'a>) -> Option<Ordering> {
131        Some(self.cmp(other))
132    }
133}
134
135impl<'a> PartialOrd<str> for U16StrLe<'a> {
136    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
137        Some(cmp_iter(self.u16_iter(), other.encode_utf16()))
138    }
139}
140
141impl<'a> PartialOrd<U16StrLe<'a>> for str {
142    fn partial_cmp(&self, other: &U16StrLe<'a>) -> Option<Ordering> {
143        Some(cmp_iter(self.encode_utf16(), other.u16_iter()))
144    }
145}
146
147impl<'a> PartialOrd<&str> for U16StrLe<'a> {
148    fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
149        Some(cmp_iter(self.u16_iter(), other.encode_utf16()))
150    }
151}
152
153impl<'a> PartialOrd<U16StrLe<'a>> for &str {
154    fn partial_cmp(&self, other: &U16StrLe<'a>) -> Option<Ordering> {
155        Some(cmp_iter(self.encode_utf16(), other.u16_iter()))
156    }
157}