raw_string/str/
mod.rs

1use std;
2use std::char::REPLACEMENT_CHARACTER;
3use std::cmp::Ordering;
4use std::convert::AsRef;
5use std::ffi::OsStr;
6use std::fmt::{Debug, Display, Formatter, Write};
7use std::mem::transmute;
8use std::ops::{Index, IndexMut};
9use std::path::Path;
10use std::str::{from_utf8, Utf8Error};
11
12mod index;
13mod utf8chunks;
14
15pub use self::index::{RawStrIndex, RawStrIndexOutput};
16pub use self::utf8chunks::{Utf8Chunk, Utf8ChunksIter};
17
18/// A `str` with unchecked contents.
19///
20/// It is basically a `[u8]`, to be interpreted as string.
21/// Unlike `str`, there are no guarantees about the contents being valid UTF-8.
22/// Unlike `[u8]`, its Display and Debug implementations show a string, not an
23/// array of numbers.
24#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
25pub struct RawStr {
26	inner: [u8],
27}
28
29impl RawStr {
30	#[inline]
31	pub fn from<S: AsRef<RawStr> + ?Sized>(s: &S) -> &Self {
32		s.as_ref()
33	}
34
35	#[inline]
36	pub fn from_bytes(bytes: &[u8]) -> &Self {
37		unsafe { transmute::<&[u8], &Self>(bytes) }
38	}
39
40	#[inline]
41	pub fn from_str(bytes: &str) -> &Self {
42		Self::from_bytes(bytes.as_bytes())
43	}
44
45	#[inline]
46	pub fn as_bytes(&self) -> &[u8] {
47		&self.inner
48	}
49
50	#[inline]
51	pub fn from_bytes_mut(bytes: &mut [u8]) -> &mut Self {
52		unsafe { transmute::<&mut [u8], &mut Self>(bytes) }
53	}
54
55	#[inline]
56	pub fn as_bytes_mut(&mut self) -> &mut [u8] {
57		&mut self.inner
58	}
59
60	#[inline]
61	pub fn len(&self) -> usize {
62		self.inner.len()
63	}
64
65	#[inline]
66	pub fn is_empty(&self) -> bool {
67		self.inner.is_empty()
68	}
69
70	#[inline]
71	pub fn as_ptr(&self) -> *const u8 {
72		self.inner.as_ptr()
73	}
74
75	#[inline]
76	pub fn first(&self) -> Option<u8> {
77		self.inner.first().map(|&x| x)
78	}
79
80	#[inline]
81	pub fn first_mut(&mut self) -> Option<&mut u8> {
82		self.inner.first_mut()
83	}
84
85	#[inline]
86	pub fn last(&self) -> Option<u8> {
87		self.inner.last().map(|&x| x)
88	}
89
90	#[inline]
91	pub fn last_mut(&mut self) -> Option<&mut u8> {
92		self.inner.last_mut()
93	}
94
95	#[inline]
96	pub fn split_first(&self) -> Option<(u8, &RawStr)> {
97		self.inner
98			.split_first()
99			.map(|(&a, b)| (a, RawStr::from_bytes(b)))
100	}
101
102	#[inline]
103	pub fn split_first_mut(&mut self) -> Option<(&mut u8, &mut RawStr)> {
104		self.inner
105			.split_first_mut()
106			.map(|(a, b)| (a, RawStr::from_bytes_mut(b)))
107	}
108
109	#[inline]
110	pub fn split_last(&self) -> Option<(u8, &RawStr)> {
111		self.inner
112			.split_last()
113			.map(|(&a, b)| (a, RawStr::from_bytes(b)))
114	}
115
116	#[inline]
117	pub fn split_last_mut(&mut self) -> Option<(&mut u8, &mut RawStr)> {
118		self.inner
119			.split_last_mut()
120			.map(|(a, b)| (a, RawStr::from_bytes_mut(b)))
121	}
122
123	#[inline]
124	pub fn split_at(&self, mid: usize) -> (&RawStr, &RawStr) {
125		let (a, b) = self.inner.split_at(mid);
126		(RawStr::from_bytes(a), RawStr::from_bytes(b))
127	}
128
129	#[inline]
130	pub fn split_at_mut(&mut self, mid: usize) -> (&mut RawStr, &mut RawStr) {
131		let (a, b) = self.inner.split_at_mut(mid);
132		(RawStr::from_bytes_mut(a), RawStr::from_bytes_mut(b))
133	}
134
135	#[inline]
136	pub fn contains_byte(&self, x: u8) -> bool {
137		self.inner.contains(&x)
138	}
139
140	#[inline]
141	pub fn starts_with<T: AsRef<RawStr>>(&self, x: T) -> bool {
142		self.inner.starts_with(x.as_ref().as_bytes())
143	}
144
145	#[inline]
146	pub fn ends_with<T: AsRef<RawStr>>(&self, x: T) -> bool {
147		self.inner.ends_with(x.as_ref().as_bytes())
148	}
149
150	#[inline]
151	pub fn get<I: RawStrIndex>(&self, index: I) -> Option<&I::Output> {
152		index.get(self)
153	}
154
155	#[inline]
156	pub fn get_mut<I: RawStrIndex>(&mut self, index: I) -> Option<&mut I::Output> {
157		index.get_mut(self)
158	}
159
160	#[inline]
161	pub unsafe fn get_unchecked<I: RawStrIndex>(&self, index: I) -> &I::Output {
162		index.get_unchecked(self)
163	}
164
165	#[inline]
166	pub unsafe fn get_unchecked_mut<I: RawStrIndex>(&mut self, index: I) -> &mut I::Output {
167		index.get_unchecked_mut(self)
168	}
169
170	#[inline]
171	pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &RawStr {
172		self.get_unchecked(begin..end)
173	}
174
175	#[inline]
176	pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut RawStr {
177		self.get_unchecked_mut(begin..end)
178	}
179
180	#[inline]
181	pub fn bytes(&self) -> std::iter::Cloned<std::slice::Iter<u8>> {
182		self.inner.iter().cloned()
183	}
184
185	#[inline]
186	pub fn bytes_mut(&mut self) -> std::slice::IterMut<u8> {
187		self.inner.iter_mut()
188	}
189
190	/// Iterate over chunks of valid UTF-8.
191	///
192	/// The iterator iterates over the chunks of valid UTF-8 separated by any
193	/// broken characters, which could be replaced by the unicode replacement
194	/// character.
195	#[inline]
196	pub fn utf8_chunks(&self) -> Utf8ChunksIter {
197		Utf8ChunksIter { bytes: &self.inner }
198	}
199
200	// Things that could be added:
201	//   pub fn lines(&self) -> Lines
202	//   pub fn split_whitespace(&self) -> SplitWhitespace
203	//   pub fn trim
204	//   pub fn trim_left
205	//   pub fn trim_right
206	//
207	//  RawPattern and:
208	//   pub fn contains<'a, P: RawPattern<'a>>(&'a self, pat: P) -> bool
209	//   pub fn starts_with<'a, P: RawPattern<'a>>(&'a self, pat: P) -> bool
210	//   pub fn ends_with<'a, P: RawPattern<'a>>(&'a self, pat: P) -> bool
211	//   pub fn find<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Option<usize>
212	//   pub fn rfind<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Option<usize>
213	//   pub fn split<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Split<'a, P>
214	//   pub fn rsplit<'a, P: RawPattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
215	//   pub fn split_terminator<'a, P: RawPattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>
216	//   pub fn rsplit_terminator<'a, P: RawPattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
217	//   pub fn splitn<'a, P: RawPattern<'a>>(&'a self, n: usize, pat: P) -> Split<'a, P>
218	//   pub fn rsplitn<'a, P: RawPattern<'a>>(&'a self, n: usize, pat: P) -> RSplit<'a, P>
219	//   pub fn matches<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Matches<'a, P>
220	//   pub fn rmatches<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Matches<'a, P>
221	//   pub fn match_indices<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Matches<'a, P>
222	//   pub fn rmatch_indices<'a, P: RawPattern<'a>>(&'a self, pat: P) -> Matches<'a, P>
223	//   pub fn trim_matches <RawPattern>
224	//   pub fn trim_left_matches <RawPattern>
225	//   pub fn trim_right_matches <RawPattern>
226	//   pub fn replace (RawPattern -> AsRef<RawStr>) -> RawString
227	//   pub fn replace_n (RawPattern -> AsRef<RawStr>, n) -> RawString
228	//
229	//   pub fn is_utf8_char_boundary(&self, index: usize) -> bool
230	//   pub fn utf8_chars() -> Utf8Chars
231	//   pub fn utf8_char_indices() -> Utf8CharIndices
232	//   pub fn encode_utf16(&self) -> EncodeUtf16
233
234	#[inline]
235	pub fn to_str(&self) -> Result<&str, Utf8Error> {
236		from_utf8(self.as_bytes())
237	}
238
239	/// Convert to an OsStr.
240	///
241	/// On Unix, it never fails.
242	/// On other platforms, it must be encoded as UTF-8.
243	///
244	/// A never-failing version for Unix only is available as
245	/// [`unix::RawStrExt::as_osstr`](struct.RawStr.html#method.as_osstr).
246	#[inline]
247	pub fn to_osstr(&self) -> Result<&OsStr, Utf8Error> {
248		self.to_osstr_()
249	}
250
251	/// Convert to a Path.
252	///
253	/// On Unix, it never fails.
254	/// On other platforms, it must be encoded as UTF-8.
255	///
256	/// A never-failing version for Unix only is available as
257	/// [`unix::RawStrExt::as_path`](struct.RawStr.html#method.as_path).
258	#[inline]
259	pub fn to_path(&self) -> Result<&Path, Utf8Error> {
260		Ok(Path::new(self.to_osstr()?))
261	}
262
263	#[cfg(unix)]
264	#[inline]
265	fn to_osstr_(&self) -> Result<&OsStr, Utf8Error> {
266		use std::os::unix::ffi::OsStrExt;
267		Ok(OsStr::from_bytes(self.as_bytes()))
268	}
269
270	#[cfg(not(unix))]
271	#[inline]
272	fn to_osstr_(&self) -> Result<&OsStr, Utf8Error> {
273		Ok(OsStr::new(self.to_str()?))
274	}
275
276	#[inline]
277	pub fn is_ascii(&self) -> bool {
278		self.inner.is_ascii()
279	}
280
281	#[inline]
282	pub fn eq_ignore_ascii_case(&self, other: &RawStr) -> bool {
283		self.inner.eq_ignore_ascii_case(&other.inner)
284	}
285
286	#[inline]
287	pub fn make_ascii_uppercase(&mut self) {
288		self.inner.make_ascii_uppercase()
289	}
290
291	#[inline]
292	pub fn make_ascii_lowercase(&mut self) {
293		self.inner.make_ascii_lowercase()
294	}
295}
296
297// AsRef {{{
298
299impl AsRef<RawStr> for RawStr {
300	#[inline]
301	fn as_ref(&self) -> &RawStr {
302		self
303	}
304}
305
306impl AsRef<RawStr> for [u8] {
307	#[inline]
308	fn as_ref(&self) -> &RawStr {
309		RawStr::from_bytes(self)
310	}
311}
312
313impl AsRef<RawStr> for str {
314	#[inline]
315	fn as_ref(&self) -> &RawStr {
316		RawStr::from_bytes(self.as_bytes())
317	}
318}
319
320impl AsRef<[u8]> for RawStr {
321	#[inline]
322	fn as_ref(&self) -> &[u8] {
323		&self.inner
324	}
325}
326
327// }}}
328
329// Default {{{
330
331impl<'a> Default for &'a RawStr {
332	#[inline]
333	fn default() -> Self {
334		RawStr::from_bytes(&[])
335	}
336}
337
338impl<'a> Default for &'a mut RawStr {
339	#[inline]
340	fn default() -> Self {
341		RawStr::from_bytes_mut(&mut [])
342	}
343}
344
345// }}}
346
347// Index {{{
348
349impl<I: RawStrIndex> Index<I> for RawStr {
350	type Output = I::Output;
351	#[inline]
352	fn index(&self, index: I) -> &I::Output {
353		index.index(self)
354	}
355}
356
357impl<I: RawStrIndex> IndexMut<I> for RawStr {
358	#[inline]
359	fn index_mut(&mut self, index: I) -> &mut I::Output {
360		index.index_mut(self)
361	}
362}
363
364// }}}
365
366// IntoIterator {{{
367
368impl<'a> IntoIterator for &'a RawStr {
369	type Item = u8;
370	type IntoIter = std::iter::Cloned<std::slice::Iter<'a, u8>>;
371	#[inline]
372	fn into_iter(self) -> Self::IntoIter {
373		self.bytes()
374	}
375}
376
377impl<'a> IntoIterator for &'a mut RawStr {
378	type Item = &'a mut u8;
379	type IntoIter = std::slice::IterMut<'a, u8>;
380	#[inline]
381	fn into_iter(self) -> Self::IntoIter {
382		self.bytes_mut()
383	}
384}
385
386// }}}
387
388// From {{{
389
390impl<'a> From<&'a str> for &'a RawStr {
391	#[inline]
392	fn from(src: &'a str) -> &'a RawStr {
393		RawStr::from_str(src)
394	}
395}
396
397impl<'a> From<&'a [u8]> for &'a RawStr {
398	#[inline]
399	fn from(src: &'a [u8]) -> &'a RawStr {
400		RawStr::from_bytes(src)
401	}
402}
403
404// }}}
405
406// Display {{{
407
408impl Display for RawStr {
409	fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
410		for Utf8Chunk { valid, broken } in self.utf8_chunks() {
411			f.write_str(valid)?;
412			if !broken.is_empty() {
413				f.write_char(REPLACEMENT_CHARACTER)?;
414			}
415		}
416		Ok(())
417	}
418}
419
420// }}}
421
422// Debug {{{
423
424fn write_escaped_str(f: &mut std::fmt::Formatter, s: &str) -> std::fmt::Result {
425	let mut written = 0;
426	for (i, c) in s.char_indices() {
427		let e = c.escape_debug();
428		if e.len() != 1 {
429			f.write_str(&s[written..i])?;
430			for c in e {
431				f.write_char(c)?;
432			}
433			written = i + c.len_utf8();
434		}
435	}
436	f.write_str(&s[written..])
437}
438
439impl Debug for RawStr {
440	fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
441		f.write_char('"')?;
442		for Utf8Chunk { valid, broken } in self.utf8_chunks() {
443			write_escaped_str(f, valid)?;
444			for &b in broken {
445				write!(f, "\\x{:02x}", b)?;
446			}
447		}
448		f.write_char('"')
449	}
450}
451
452// }}}
453
454// {{{ PartialEq / PartialOrd
455
456macro_rules! impl_ord {
457	($t:ty) => {
458		impl PartialEq<$t> for RawStr {
459			#[inline]
460			fn eq(&self, other: &$t) -> bool {
461				<RawStr as PartialEq>::eq(self, other.as_ref())
462			}
463		}
464		impl PartialEq<RawStr> for $t {
465			#[inline]
466			fn eq(&self, other: &RawStr) -> bool {
467				<RawStr as PartialEq>::eq(self.as_ref(), other)
468			}
469		}
470		impl PartialOrd<$t> for RawStr {
471			#[inline]
472			fn partial_cmp(&self, other: &$t) -> Option<Ordering> {
473				<RawStr as PartialOrd>::partial_cmp(self, other.as_ref())
474			}
475		}
476		impl PartialOrd<RawStr> for $t {
477			#[inline]
478			fn partial_cmp(&self, other: &RawStr) -> Option<Ordering> {
479				<RawStr as PartialOrd>::partial_cmp(self.as_ref(), other)
480			}
481		}
482	};
483}
484
485impl_ord!(str);
486impl_ord!([u8]);
487impl_ord!(&str);
488impl_ord!(&[u8]);
489
490// }}}
491
492// Tests {{{
493
494#[test]
495fn test_display() {
496	let a = RawStr::from("1\" μs / °C");
497	assert_eq!(&format!("{}", a), "1\" μs / °C");
498
499	let b = RawStr::from_bytes(b"1 \xFF \xce\xbcs / \xc2\xb0C");
500	assert_eq!(&format!("{}", b), "1 \u{FFFD} μs / °C");
501}
502
503#[test]
504fn test_debug() {
505	let a: &RawStr = RawStr::from("1\" μs / °C");
506	assert_eq!(&format!("{:?}", a), "\"1\\\" μs / °C\"");
507
508	let b: &RawStr = RawStr::from_bytes(b"1 \xFF \xce\xbcs / \xc2\xb0C");
509	assert_eq!(&format!("{:?}", b), "\"1 \\xff μs / °C\"");
510}
511
512// }}}