did_simple/
utf8bytes.rs

1use std::fmt::Display;
2
3use bytes::Bytes;
4
5/// Wrapper around [`Bytes`] which is guaranteed to be UTF-8.
6/// Like `Bytes`, it is cheaply cloneable and facilitates zero-copy.
7#[derive(Debug, Eq, PartialEq, Hash, Clone)]
8pub struct Utf8Bytes(Bytes);
9
10impl Utf8Bytes {
11	pub fn as_str(&self) -> &str {
12		// TODO: Consider changing to unsafe later, because this check is entirely unecessary.
13		std::str::from_utf8(self.0.as_ref()).expect("infallible")
14	}
15
16	pub fn as_slice(&self) -> &[u8] {
17		self.0.as_ref()
18	}
19
20	pub fn bytes(&self) -> &Bytes {
21		&self.0
22	}
23
24	pub fn into_inner(self) -> Bytes {
25		self.0
26	}
27
28	/// Same as [`Bytes::split_off()`], but panics if slicing doesn't result in valid utf8.
29	///
30	/// Afterwards `self` contains elements `[0, at)`, and the returned `Utf8Bytes`
31	/// contains elements `[at, len)`.
32	pub fn split_off(&mut self, at: usize) -> Self {
33		assert!(
34			self.as_str().is_char_boundary(at),
35			"slicing would have created invalid UTF-8!"
36		);
37		Self(self.0.split_off(at))
38	}
39
40	/// Same as [`Bytes::split_to()`], but panics if slicing doesn't result in valid utf8.
41	///
42	/// Afterwards `self` contains elements `[at, len)`, and the returned `Utf8Bytes`
43	/// contains elements `[0, at)`.
44	pub fn split_to(&mut self, at: usize) -> Self {
45		assert!(
46			self.as_str().is_char_boundary(at),
47			"slicing would have created invalid UTF-8!"
48		);
49		Self(self.0.split_to(at))
50	}
51}
52
53impl AsRef<[u8]> for Utf8Bytes {
54	fn as_ref(&self) -> &[u8] {
55		self.0.as_ref()
56	}
57}
58
59impl From<String> for Utf8Bytes {
60	/// This is zero-copy, and skips UTF-8 checks.
61	fn from(value: String) -> Self {
62		Self(Bytes::from(value))
63	}
64}
65
66impl From<&'static str> for Utf8Bytes {
67	/// This is zero-copy, and skips UTF-8 checks.
68	fn from(value: &'static str) -> Self {
69		Self(Bytes::from_static(value.as_bytes()))
70	}
71}
72
73impl TryFrom<Bytes> for Utf8Bytes {
74	type Error = std::str::Utf8Error;
75
76	/// This is zero-copy, and performs UTF-8 checks.
77	fn try_from(value: Bytes) -> Result<Self, Self::Error> {
78		let _s = std::str::from_utf8(value.as_ref())?;
79		Ok(Self(value))
80	}
81}
82
83impl Display for Utf8Bytes {
84	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85		self.as_str().fmt(f)
86	}
87}
88
89#[cfg(test)]
90mod test {
91	use super::*;
92
93	const STRINGS: &[&str] = &["foobar", "", "\0", "Yellow ඞ Sus😂"];
94
95	#[test]
96	fn test_from_str() {
97		for &s in STRINGS {
98			let ub = Utf8Bytes::from(s);
99			assert_eq!(s, ub.as_str());
100			assert_eq!(s.as_bytes(), ub.0);
101		}
102	}
103
104	#[test]
105	fn test_from_string() {
106		for &s in STRINGS {
107			let s: String = s.to_owned();
108			let ub = Utf8Bytes::from(s.clone());
109			assert_eq!(s, ub.as_str());
110			assert_eq!(s.as_bytes(), ub.0);
111		}
112	}
113
114	#[test]
115	fn test_from_bytes() {
116		for &s in STRINGS {
117			let b: Bytes = Bytes::from(s);
118			let ub =
119				Utf8Bytes::try_from(b.clone()).expect("failed conversion from bytes");
120			assert_eq!(s, ub.as_str());
121			assert_eq!(s.as_bytes(), ub.0);
122			assert_eq!(ub.0, b);
123		}
124	}
125
126	#[test]
127	fn test_display() {
128		for &s in STRINGS {
129			let ub = Utf8Bytes::from(s);
130			assert_eq!(s, format!("{ub}"))
131		}
132	}
133
134	#[test]
135	fn test_split_off() {
136		for &s in STRINGS {
137			let validity: Vec<bool> =
138				(0..s.len()).map(|idx| s.is_char_boundary(idx)).collect();
139			for pos in 0..s.len() {
140				let mut original = Utf8Bytes::from(s);
141				let result = std::panic::catch_unwind(move || {
142					let ret = original.split_off(pos);
143					(original, ret)
144				});
145				let is_valid = validity[pos];
146
147				if let Ok((original, ret)) = result {
148					assert_eq!(&s[0..pos], original.as_str());
149					assert_eq!(&s[pos..], ret.as_str());
150					assert!(
151						is_valid,
152						"split_off did not panic, so {pos} should be valid"
153					);
154				} else {
155					assert!(
156						!is_valid,
157						"split_off panicked, so {pos} should not be valid"
158					);
159				}
160			}
161		}
162	}
163
164	#[test]
165	fn test_split_to() {
166		for &s in STRINGS {
167			let validity: Vec<bool> =
168				(0..s.len()).map(|idx| s.is_char_boundary(idx)).collect();
169			for pos in 0..s.len() {
170				let mut original = Utf8Bytes::from(s);
171				let result = std::panic::catch_unwind(move || {
172					let ret = original.split_to(pos);
173					(original, ret)
174				});
175				let is_valid = validity[pos];
176
177				if let Ok((original, ret)) = result {
178					assert_eq!(&s[0..pos], ret.as_str());
179					assert_eq!(&s[pos..], original.as_str());
180					assert!(
181						is_valid,
182						"split_to did not panic, so {pos} should be valid"
183					);
184				} else {
185					assert!(
186						!is_valid,
187						"split_to panicked, so {pos} should not be valid"
188					);
189				}
190			}
191		}
192	}
193}