1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! Const equivalents of `char` functions.
//!
//! The module is called `chr` to avoid name collisions with the `char` type.
//!
//! # Removed in 0.4.0
//!
//! These items were removed in 0.4.0 because there is an equivalent
//! way to write it in const:
//!
//! - `from_u32_unchecked`: [char::from_u32_unchecked]
//! - `from_u32`: [char::from_u32]
/// A char encoded as a utf8 string.
///
/// # Example
///
/// ```rust
/// use konst::chr;
///
/// const ENC: &chr::Utf8Encoded = &chr::encode_utf8('û');
/// const ENC_STR: &str = ENC.as_str();
///
/// assert_eq!(ENC_STR, "û");
///
/// ```
#[derive(Copy, Clone)]
pub struct Utf8Encoded {
encoded: [u8; 4],
len: u8,
}
impl Utf8Encoded {
/// Gets the utf8-encoded char as a `&str`
pub const fn as_str(&self) -> &str {
// SAFETY: Utf8Encoded.as_bytes() returns a byte slice that is valid utf8
unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
}
/// Gets the utf8-encoded char as a `&[u8]`
pub const fn as_bytes(&self) -> &[u8] {
crate::slice::slice_up_to(&self.encoded, self.len as usize)
}
}
/// Alternative to [`char::encode_utf8`], which returns an inline-allocated string.
///
/// # Example
///
/// ```rust
/// use konst::chr;
///
/// const ENC: &chr::Utf8Encoded = &chr::encode_utf8('🤔');
/// const ENC_STR: &str = ENC.as_str();
///
/// assert_eq!(ENC_STR, "🤔");
///
/// ```
pub const fn encode_utf8(char: char) -> Utf8Encoded {
let mut encoded = [0u8; 4];
let len = char.encode_utf8(&mut encoded).len() as u8;
Utf8Encoded { encoded, len }
}
#[cfg(test)]
mod tests {
use super::{Utf8Encoded, encode_utf8};
fn as_bytes(fmt: &Utf8Encoded) -> &[u8] {
&fmt.encoded[..fmt.len as usize]
}
#[test]
#[cfg(not(miri))]
fn char_to_utf8_encoding_test() {
for c in '\0'..=core::char::MAX {
let mut utf8_std = [0u8; 4];
let utf8_std = c.encode_utf8(&mut utf8_std);
let utf8_konst = encode_utf8(c);
assert_eq!(utf8_std.as_bytes(), as_bytes(&utf8_konst));
assert_eq!(utf8_std.as_bytes(), utf8_konst.as_bytes());
{
assert_eq!(
core::str::from_utf8(utf8_std.as_bytes()).unwrap(),
utf8_konst.as_str(),
);
}
}
}
#[test]
#[cfg(not(miri))]
fn test_chars_as_str() {
let mut buffer = [0u8; 10];
for c in "fooñ个人bar\u{100000}baz".chars() {
let std_encoded = c.encode_utf8(&mut buffer);
assert_eq!(encode_utf8(c).as_str(), std_encoded);
}
}
#[cfg(not(miri))]
#[test]
fn test_all_chars_as_bytes() {
let mut buffer = [0u8; 10];
for c in '\0'..=char::MAX {
let std_encoded = c.encode_utf8(&mut buffer);
assert_eq!(encode_utf8(c).as_bytes(), std_encoded.as_bytes());
}
}
}