data_streams/
vec.rs

1// Copyright 2025 - Strixpyrr
2// SPDX-License-Identifier: Apache-2.0
3
4#![cfg(feature = "alloc")]
5
6use alloc::{collections::VecDeque, vec::Vec};
7#[cfg(feature = "utf8")]
8use core::mem::MaybeUninit;
9#[cfg(feature = "utf8")]
10use simdutf8::compat::from_utf8;
11#[cfg(any(feature = "utf8", feature = "unstable_ascii_char"))]
12use crate::Error;
13use crate::{BufferAccess, DataSink, DataSource, Result};
14use crate::markers::source::SourceSize;
15use crate::source::{max_multiple_of, VecSource};
16#[cfg(feature = "utf8")]
17use crate::utf8::utf8_char_width;
18
19impl DataSink for Vec<u8> {
20	fn write_bytes(&mut self, buf: &[u8]) -> Result {
21		self.try_reserve(buf.len())?;
22		self.extend_from_slice(buf);
23		Ok(())
24	}
25
26	fn write_utf8_codepoint(&mut self, value: char) -> Result {
27		let start = self.len();
28		let width = value.len_utf8();
29		self.try_reserve(width)?;
30		self.resize(start + width, 0);
31		value.encode_utf8(&mut self[start..]);
32		Ok(())
33	}
34
35	fn write_u8(&mut self, value: u8) -> Result {
36		self.try_reserve(1)?;
37		self.push(value);
38		Ok(())
39	}
40
41	fn write_i8(&mut self, value: i8) -> Result {
42		self.write_u8(value as u8)
43	}
44}
45
46impl DataSource for VecDeque<u8> {
47	fn available(&self) -> usize { self.len() }
48
49	fn request(&mut self, count: usize) -> Result<bool> {
50		Ok(self.len() >= count)
51	}
52
53	fn skip(&mut self, mut count: usize) -> Result<usize> {
54		count = count.min(self.len());
55		self.drain_buffer(count);
56		Ok(count)
57	}
58
59	fn read_bytes<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a [u8]> {
60		let (mut a, mut b) = self.as_slices();
61		let mut slice = &mut *buf;
62		let mut count = a.read_bytes(slice)?.len();
63		slice = &mut slice[count..];
64		count += b.read_bytes(slice)?.len();
65		self.drain_buffer(count);
66		Ok(&buf[..count])
67	}
68
69	fn read_aligned_bytes<'a>(&mut self, buf: &'a mut [u8], alignment: usize) -> Result<&'a [u8]> {
70		if alignment == 0 { return Ok(&[]) }
71		let len = max_multiple_of(self.len().min(buf.len()), alignment);
72		self.read_bytes(&mut buf[..len])
73	}
74
75	/// Reads bytes into a slice, returning them as a UTF-8 string if valid.
76	///
77	/// # Errors
78	///
79	/// Returns [`Error::Utf8`] if invalid UTF-8 is read. This implementation only
80	/// consumes valid UTF-8. `buf` is left with a valid UTF-8 string whose length
81	/// is given by the error, [`Utf8Error::valid_up_to`]. This slice can be safely
82	/// converted to a string with [`from_str_unchecked`] or [`Utf8Error::split_valid`].
83	///
84	/// [`Utf8Error::valid_up_to`]: simdutf8::compat::Utf8Error::valid_up_to
85	/// [`from_str_unchecked`]: core::str::from_utf8_unchecked
86	#[cfg(feature = "utf8")]
87	fn read_utf8<'a>(&mut self, buf: &'a mut [u8]) -> Result<&'a str> {
88		match self.as_slices() {
89			(mut bytes, _) if bytes.len() >= buf.len() => {
90				// The deque is contiguous up to the buffer length, validate its
91				// data in one go.
92				let len = bytes.len();
93				let result = bytes.read_utf8(buf);
94				let consumed = len - bytes.len();
95				self.drain_buffer(consumed);
96				result
97			}
98			(mut a, mut b) => {
99				// The deque is discontiguous. Validate the first slice, then the
100				// second. If the first slice has an incomplete char, attempt to
101				// rotate it into the second slice before proceeding.
102
103				let mut slice = &mut *buf;
104
105				let offset = match a.read_utf8(slice) {
106					Ok(str) => str.len(),
107					Err(Error::Utf8(error)) if error.error_len().is_none() => {
108						// Incomplete char. Check if the char is completed on the
109						// second slice, then rotate such that the second slice
110						// contains the completed char.
111						let char_start = error.valid_up_to();
112						let incomplete = a.len() - char_start;
113						let width = utf8_char_width(a[char_start]);
114						let remaining = width - incomplete;
115
116						if b.len() < remaining {
117							// The char is actually incomplete. Consume the valid
118							// bytes, then return the error.
119							self.drain_buffer(char_start);
120							return Err(error.into())
121						}
122
123						self.rotate_right(incomplete);
124						(a, b) = self.as_slices();
125						assert_eq!(
126							a.len(),
127							char_start,
128							"`rotate_right` should have caused the first slice to contain the valid \
129							UTF-8 characters"
130						);
131						char_start
132					}
133					Err(error @ Error::Utf8(_)) =>
134						// Invalid bytes, this error is unrecoverable.
135						return Err(error),
136					Err(_) => unreachable!() // <[u8]>::read_utf8 only ever returns Error::Utf8.
137				};
138				slice = &mut slice[offset..];
139
140				match b.read_utf8(slice) {
141					Ok(str) => {
142						let valid = offset + str.len();
143						self.drain_buffer(valid);
144						// Safety: these bytes have been validated as UTF-8 up this point.
145						Ok(unsafe {
146							core::str::from_utf8_unchecked(&buf[..valid])
147						})
148					}
149					Err(Error::Utf8(mut error)) => {
150						error.set_offset(offset);
151						self.drain_buffer(error.valid_up_to());
152						Err(Error::Utf8(error))
153					}
154					Err(_) => unreachable!() // <[u8]>::read_utf8 only ever returns Error::Utf8.
155				}
156			}
157		}
158	}
159	/// Reads bytes into a slice, returning them as an ASCII slice if valid.
160	///
161	/// # Errors
162	///
163	/// Returns [`Error::Ascii`] if a non-ASCII byte is found. This implementation
164	/// consumes only valid ASCII. `buf` is left with valid ASCII bytes with a
165	/// length of [`AsciiError::valid_up_to`]. The valid slice can be retrieved
166	/// with [`AsciiError::valid_slice`].
167	#[cfg(feature = "unstable_ascii_char")]
168	fn read_ascii<'a>(&mut self, mut buf: &'a mut [u8]) -> Result<&'a [core::ascii::Char]> {
169		use crate::source::count_ascii;
170
171		let buf_len = self.len().min(buf.len());
172		buf = &mut buf[..buf_len];
173		let (mut a, mut b) = self.as_slices();
174		if buf.len() >= a.len() {
175			b = &b[..buf.len() - a.len()];
176		} else {
177			a = &a[..buf.len()];
178			b = &[];
179		}
180		
181		let a_count = count_ascii(a);
182		if a_count == a.len() {
183			buf.copy_from_slice(a);
184			let b_count = count_ascii(b);
185			buf[a_count..][..b_count].copy_from_slice(&b[..b_count]);
186			
187			let result = if b_count == b.len() {
188				// Safety: all data is valid ASCII.
189				Ok(unsafe { buf.as_ascii_unchecked() })
190			} else {
191				Err(Error::invalid_ascii(b[b_count], b_count, b_count))
192			};
193			self.drain_buffer(a_count + b_count);
194			result
195		} else {
196			buf[..a_count].copy_from_slice(&a[..a_count]);
197			self.drain_buffer(a_count);
198			Err(Error::invalid_ascii(buf[a_count], a_count, a_count))
199		}
200	}
201}
202
203impl BufferAccess for VecDeque<u8> {
204	fn buffer_capacity(&self) -> usize { self.capacity() }
205
206	fn buffer(&self) -> &[u8] { self.as_slices().0 }
207
208	fn fill_buffer(&mut self) -> Result<&[u8]> {
209		Ok((*self).buffer()) // Nothing to read
210	}
211
212	fn clear_buffer(&mut self) {
213		self.clear();
214	}
215
216	fn drain_buffer(&mut self, count: usize) {
217		if self.len() == count {
218			self.clear();
219		} else {
220			self.drain(..count);
221		}
222	}
223}
224
225impl VecSource for VecDeque<u8> {
226	fn read_to_end<'a>(&mut self, buf: &'a mut Vec<u8>) -> Result<&'a [u8]> {
227		let start = buf.len();
228		buf.extend(core::mem::take(self));
229		Ok(&buf[start..])
230	}
231
232	#[cfg(feature = "utf8")]
233	fn read_utf8_to_end<'a>(&mut self, buf: &'a mut alloc::string::String) -> Result<&'a str> {
234		let start_len = buf.len();
235		buf.try_reserve(self.len())?;
236		{
237			// Safety: the existing contents are not changed, and when this block
238			// ends the buffer will have been checked as valid UTF-8.
239			let buf = unsafe {
240				buf.as_mut_vec()
241			};
242			
243			let slice = {
244				let spare = &mut buf.spare_capacity_mut()[..self.len()];
245				spare.fill(MaybeUninit::new(0));
246				// Safety: read_utf8 does not read from the buffer, and the returned
247				// slice is guaranteed to be initialized.
248				unsafe {
249					&mut *(core::ptr::from_mut::<[MaybeUninit<u8>]>(spare) as *mut [u8])
250				}
251			};
252			
253			let result = self.read_utf8(slice);
254			let valid_len = match result.as_ref() {
255				Ok(valid) => valid.len(),
256				Err(Error::Utf8(error)) => error.valid_up_to(),
257				Err(_) => unreachable!() // read_utf8 only returns Error::Utf8.
258			};
259			// Safety: these bytes are initialized and valid UTF-8.
260			unsafe {
261				buf.set_len(start_len + valid_len);
262			}
263		}
264		self.clear();
265		Ok(&buf[start_len..])
266	}
267}
268
269// Safety: vectors produce exactly the number of bytes as their length.
270unsafe impl SourceSize for VecDeque<u8> {
271	fn lower_bound(&self) -> u64 { self.len() as u64 }
272	fn upper_bound(&self) -> Option<u64> { Some(self.len() as u64) }
273}
274
275impl DataSink for VecDeque<u8> {
276	fn write_bytes(&mut self, buf: &[u8]) -> Result {
277		self.try_reserve(buf.len())?;
278		self.extend(buf);
279		Ok(())
280	}
281
282	fn write_u8(&mut self, value: u8) -> Result {
283		self.try_reserve(1)?;
284		self.push_back(value);
285		Ok(())
286	}
287
288	fn write_i8(&mut self, value: i8) -> Result {
289		self.write_u8(value as u8)
290	}
291}
292
293#[cfg(feature = "utf8")]
294impl DataSink for alloc::string::String {
295	/// Writes all valid UTF-8 bytes from `buf`.
296	///
297	/// # Errors
298	///
299	/// Returns [`Error::Utf8`] if `buf` contains invalid UTF-8. In this case, any
300	/// valid UTF-8 is written. [`Utf8Error::valid_up_to`] in this error returns
301	/// the number of valid bytes written to the string.
302	///
303	/// [`Error::Allocation`] is returned when capacity cannot be allocated.
304	fn write_bytes(&mut self, buf: &[u8]) -> Result {
305		let (valid, result) = match from_utf8(buf).map_err(crate::Utf8Error::from) {
306			Ok(str) => (str, Ok(())),
307			Err(err) =>
308				// Safety: this is safe because we use the same slice passed to the
309				// validator. 
310				(unsafe { err.valid_slice_unchecked(buf) }, Err(err.into()))
311		};
312		self.write_utf8(valid)?;
313		result
314	}
315	/// Writes a UTF-8 string.
316	///
317	/// # Errors
318	///
319	/// [`Error::Allocation`] is returned when capacity cannot be allocated.
320	fn write_utf8(&mut self, value: &str) -> Result {
321		self.try_reserve(value.len())?;
322		self.push_str(value);
323		Ok(())
324	}
325	/// Writes a single UTF-8 codepoint.
326	/// 
327	/// # Errors
328	/// 
329	/// [`Error::Allocation`] is returned when capacity cannot be allocated.
330	fn write_utf8_codepoint(&mut self, value: char) -> Result {
331		self.try_reserve(value.len_utf8())?;
332		self.push(value);
333		Ok(())
334	}
335}