1mod character;
24mod sift;
25mod sift_preserve_newlines;
26mod unsafe_vec;
27
28use character::{get_char_metadata, Character, CARRIAGE_RETURN, LINE_FEED};
29use sift::sift_preallocated;
30use sift_preserve_newlines::sift_preallocated_until_newline;
31use unsafe_vec::{unsafe_custom_extend, unsafe_push};
32
33pub trait WhitespaceSifter: AsRef<str> {
35 #[must_use]
39 fn sift(&self) -> String {
40 let input: &str = self.as_ref();
41 let mut out: String = String::with_capacity(input.len());
42 sift_preallocated(input.as_ptr(), input.len(), unsafe { out.as_mut_vec() });
43 out
44 }
45
46 #[must_use]
51 fn sift_preserve_newlines(&self) -> String {
52 let input: &str = self.as_ref();
53 let in_ptr: *const u8 = input.as_ptr();
54 let in_len: usize = input.len();
55 let mut out: String = String::with_capacity(input.len());
56 let out_vec: &mut Vec<u8> = unsafe { out.as_mut_vec() };
57 let mut ind: usize = 0;
58 while ind < in_len {
59 sift_preallocated_until_newline(in_ptr, in_len, &mut ind, out_vec);
60 }
61 if out_vec.len() > 1 {
62 let new_out_mut_len: usize = unsafe { out_vec.len().unchecked_sub(2) };
63 if unsafe { out_vec.as_ptr().add(new_out_mut_len).read() } == CARRIAGE_RETURN {
64 unsafe { out_vec.set_len(new_out_mut_len) };
65 return out;
66 }
67 let new_out_mut_len: usize = unsafe { out_vec.len().unchecked_sub(1) };
68 if unsafe { out_vec.as_ptr().add(new_out_mut_len).read() } == LINE_FEED {
69 unsafe { out_vec.set_len(new_out_mut_len) };
70 }
71 }
72 out
73 }
74}
75
76impl<T: AsRef<str>> WhitespaceSifter for T {}
77
78#[allow(clippy::inline_always)]
80#[inline(always)]
81pub(crate) fn sift_trim_start(
82 in_ptr: *const u8,
83 in_len: usize,
84 ind: &mut usize,
85 out: &mut Vec<u8>,
86) {
87 while *ind < in_len {
88 match get_char_metadata(unsafe { in_ptr.add(*ind).read() }) {
89 Character::LineFeed | Character::CarriageReturn | Character::NormalWhitespace => {
90 *ind = unsafe { ind.unchecked_add(1) };
91 }
92 Character::SingleByte => {
93 unsafe { unsafe_push(out, in_ptr.add(*ind).read()) };
94 *ind = unsafe { ind.unchecked_add(1) };
95 break;
96 }
97 Character::MultiByte { len } => {
98 unsafe {
99 unsafe_custom_extend(out, in_ptr.add(*ind), len as usize);
100 }
101 *ind = unsafe { ind.unchecked_add(len as usize) };
102 break;
103 }
104 }
105 }
106}
107
108#[allow(clippy::inline_always)]
110#[inline(always)]
111pub(crate) fn sift_trim_end(out: &mut Vec<u8>, is_last_whitespace: bool) {
112 if is_last_whitespace {
113 let new_out_len: usize = unsafe { out.len().unchecked_sub(1) };
114 unsafe { out.set_len(new_out_len) };
115 }
116}
117
118#[cfg(test)]
119mod tests;
120
121#[cfg(test)]
122mod msrv_test;
123
124#[cfg(test)]
125mod compliance_test;