nt_string/unicode_string/mod.rs
1// Copyright 2023 Colin Finck <colin@reactos.org>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3//
4//! Interface around the Windows kernel string type known as [`UNICODE_STRING`].
5//!
6//! The [`UNICODE_STRING`] type was designed for the C programming language, which only knows about NUL-terminated
7//! buffers of characters.
8//! To determine the length of such a buffer, you need to iterate over all characters until finding the NUL.
9//! Bad enough?
10//! It gets worse:
11//! A classic buffer overflow occurs if the buffer contains no NUL, but an algorithm attempts to find
12//! it anyway.
13//!
14//! To overcome these performance and security hazards, [`UNICODE_STRING`]s consist of a buffer, a buffer capacity
15//! ("maximum length"), and a field to indicate the actually used length.
16//! Determining length and capacity is now as simple as querying the corresponding fields.
17//!
18//! Length and capacity are 16-bit values and expressed in bytes.
19//! This allows for up to 32767 UTF-16 characters per string.
20//! However, note that additional space may be occupied by a NUL terminator or by UTF-16 characters outside the
21//! [Basic Multilingual Plane] (which require 4 bytes per character instead of 2).
22//!
23//! 32767 characters are way below the expected limit of most data structures.
24//! Therefore, most functions of this crate are fallible and may return [`NtStringError::BufferSizeExceedsU16`].
25//!
26//! While the string length is fully expressed by the `length` field of a [`UNICODE_STRING`] and a NUL termination
27//! is not required by the specification, this crate tries to NUL-terminate the internal buffer whenever possible.
28//! This defense-in-depth approach guards against external applications that mistakenly treat the internal buffer
29//! of a [`UNICODE_STRING`] as a NUL-terminated string.
30//! Only a few functions of this crate don't NUL-terminate the internal buffer; they are marked as such.
31//!
32//! [`UNICODE_STRING`] is implemented in 3 Rust structs:
33//!
34//! * [`NtUnicodeStr`] is an immutable reference to an existing [`UNICODE_STRING`] in memory
35//! (analogous to `&str`).
36//! You can also create a constant [`NtUnicodeStr`] using the [`nt_unicode_str`] macro
37//! (analogous to `const STR: &'static str = "..."`).
38//!
39//! * [`NtUnicodeStrMut`] is a mutable reference to an existing [`UNICODE_STRING`] in memory
40//! (analogous to `&mut str`).
41//! As it doesn't know how its internal buffer has been allocated, you can only do limited alterations
42//! (like removing a character), but not grow it beyond the buffer size.
43//!
44//! * [`NtUnicodeString`] is an owned and growable [`UNICODE_STRING`] (analogous to [`String`]).
45//! It can reallocate the internal buffer on demand and therefore implements all kinds of methods to alter
46//! its contents.
47//!
48//! [`Deref`] and [`DerefMut`] traits have been implemented to make every [`NtUnicodeString`] act as
49//! [`NtUnicodeStrMut`] and every [`NtUnicodeStrMut`] act as [`NtUnicodeStr`] if required.
50//!
51//! # Examples
52//!
53//! You can work with these string types just like you work with other Rust string types:
54//!
55//! ```
56//! # use nt_string::unicode_string::NtUnicodeString;
57//! let mut string = NtUnicodeString::try_from("Hello! ").unwrap();
58//! string.try_push_str("Moin!").unwrap();
59//! println!("{string}");
60//! ```
61//!
62//! Conversions are also supported from raw [`u16`] string buffers as well as the [`U16CStr`] and [`U16Str`]
63//! types of the [`widestring`] crate:
64//!
65//! ```
66//! # use nt_string::unicode_string::NtUnicodeString;
67//! # use widestring::{u16cstr, u16str};
68//! let abc = NtUnicodeString::try_from_u16(&[b'A' as u16, b'B' as u16, b'C' as u16]).unwrap();
69//! let de = NtUnicodeString::try_from_u16_until_nul(&[b'D' as u16, b'E' as u16, 0]).unwrap();
70//! let fgh = NtUnicodeString::try_from(u16cstr!("FGH")).unwrap();
71//! let ijk = NtUnicodeString::try_from(u16str!("IJK")).unwrap();
72//! ```
73//!
74//! Just like a [`String`] automatically dereferences to a `&str` when you pass it to an appropriate function,
75//! you can do the same with an [`NtUnicodeString`] and it will dereference to an `&NtUnicodeStr`:
76//!
77//! ```
78//! # use nt_string::unicode_string::{NtUnicodeStr, NtUnicodeString};
79//! let string = NtUnicodeString::try_from("My String").unwrap();
80//! subfunction(&string);
81//!
82//! fn subfunction(str_ref: &NtUnicodeStr) {
83//! println!("Hello from subfunction with \"{str_ref}\".");
84//! }
85//! ```
86//!
87//! Constant [`UNICODE_STRING`]s can be created at compile-time.
88//! This provides strings with a `'static` lifetime and saves a UTF-16 conversion at runtime:
89//!
90//! ```
91//! # use nt_string::nt_unicode_str;
92//! # use nt_string::unicode_string::NtUnicodeStr;
93//! const MY_CONSTANT_STRING: NtUnicodeStr<'static> = nt_unicode_str!("My Constant String");
94//! ```
95//!
96//! Finally, you most likely want to pass your [`NtUnicodeStr`], [`NtUnicodeStrMut`] or [`NtUnicodeString`]
97//! to an FFI function that expects a pointer to a [`UNICODE_STRING`].
98//! Use the [`as_ptr`] or [`as_mut_ptr`] methods to get an immutable or mutable pointer.
99//!
100//! [`as_mut_ptr`]: crate::unicode_string::NtUnicodeStrMut::as_mut_ptr
101//! [`as_ptr`]: crate::unicode_string::NtUnicodeStr::as_ptr
102//! [Basic Multilingual Plane]: https://en.wikipedia.org/wiki/Basic_Multilingual_Plane
103//! [`Deref`]: core::ops::Deref
104//! [`DerefMut`]: core::ops::DerefMut
105//! [`nt_unicode_str`]: crate::nt_unicode_str
106//! [`NtStringError::BufferSizeExceedsU16`]: crate::error::NtStringError::BufferSizeExceedsU16
107//! [`U16CStr`]: widestring::U16CStr
108//! [`U16Str`]: widestring::U16Str
109//! [`UNICODE_STRING`]: https://learn.microsoft.com/windows/win32/api/ntdef/ns-ntdef-_unicode_string
110
111pub mod iter;
112
113mod str;
114pub use self::str::*;
115
116mod strmut;
117pub use self::strmut::*;
118
119#[cfg(feature = "alloc")]
120mod string;
121#[cfg(feature = "alloc")]
122pub use self::string::*;
123
124macro_rules! impl_eq {
125 ($lhs:ty, $rhs:ty) => {
126 impl<'a, 'b> PartialEq<$lhs> for $rhs {
127 fn eq(&self, other: &$lhs) -> bool {
128 PartialEq::eq(self.deref(), other.deref())
129 }
130 }
131 };
132}
133use impl_eq;
134
135macro_rules! impl_partial_cmp {
136 ($lhs:ty, $rhs:ty) => {
137 impl<'a, 'b> PartialOrd<$lhs> for $rhs {
138 fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
139 PartialOrd::partial_cmp(self.deref(), other.deref())
140 }
141 }
142 };
143}
144use impl_partial_cmp;