1use crate::base::{RUMResult, RUMVec, RUMVecDeque};
21use crate::strings::{rumtk_format, RUMArrayConversions, RUMString};
22pub use bytes::{BufMut, Bytes as RUMBuffer, BytesMut as RUMBufferMut};
23use clap::builder::TypedValueParser;
24use rand::{distr::Alphanumeric, RngExt};
25use tokio::io::AsyncReadExt;
26
27use std::simd::prelude::*;
28
29pub const DEFAULT_BUFFER_CHUNK_SIZE: usize = 1024;
30pub const DEFAULT_BUFFER_ITEM_COUNT: usize = 1024;
31pub const DEFAULT_CPU_L1_CACHE_LINE_SIZE: usize = 64; pub const DEFAULT_CPU_L1_CACHE_SIZE: usize = 32 * 1024; pub const DEFAULT_CPU_PAGE_SIZE: usize = 4 * 1024; pub const DEFAULT_BYTE_WINDOW_SIZE: usize = 256;
35
36pub struct RUMSliceSplitIter<'a, 'b> {
37 pub remainder: &'a [u8],
38 pub pattern: &'b [u8],
39 pub last: usize,
40 pub pattern_length: usize,
41}
42
43pub struct RUMSliceEnumerateIter<'a, 'b> {
44 pub remainder: &'a [u8],
45 pub pattern: &'b [u8],
46 pub cummulative: usize,
47 pub last: usize,
48 pub pattern_length: usize,
49}
50
51pub struct RUMBufferSplitIter<'a> {
52 pub remainder: RUMBuffer,
53 pub pattern: &'a [u8],
54 pub pattern_length: usize,
55 pub last: usize,
56}
57
58pub trait RUMByteSliceSplitIterTrait {
59 type Item;
60 fn next(&mut self) -> Option<Self::Item>;
61}
62
63pub trait RUMByteSliceEnumeratorIterTrait {
64 type Item;
65 fn next(&mut self) -> Option<(usize, Self::Item)>;
66}
67
68pub trait RUMBufferSplitIterTrait {
69 type Item;
70 fn next(&mut self) -> Option<Self::Item>;
71}
72
73pub trait RUMByteSliceIteratorExt<'a, 'b> {
74 fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b>;
75 fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b>;
76}
77
78pub trait RUMBufferIteratorExt<'a> {
79 fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a>;
80}
81
82impl<'a, 'b> Iterator for RUMSliceSplitIter<'a, 'b> {
83 type Item = &'a [u8];
84
85 fn next(&mut self) -> Option<Self::Item> {
86 self.last = buffer_find(self.remainder, self.pattern);
87
88 if self.remainder.len() > 0 {
89 let r = Some(&self.remainder[..self.last]);
90 let next = self.last + self.pattern_length;
91 if next <= self.remainder.len() {
92 self.remainder = &self.remainder[self.last + self.pattern_length..];
93 } else {
94 self.remainder = &self.remainder[self.last..];
95 }
96 r
97 } else {
98 None
99 }
100 }
101}
102
103impl<'a, 'b> Iterator for RUMSliceEnumerateIter<'a, 'b> {
104 type Item = (usize, &'a [u8]);
105
106 fn next(&mut self) -> Option<Self::Item> {
107 self.last = buffer_find(self.remainder, self.pattern);
108 self.cummulative += self.last;
109
110 if self.remainder.len() > 0 {
111 let r = Some((self.cummulative, &self.remainder[..self.last]));
112 self.remainder = &self.remainder[self.last + self.pattern.len()..];
113 r
114 } else {
115 None
116 }
117 }
118}
119
120impl<'a> Iterator for RUMBufferSplitIter<'a> {
121 type Item = RUMBuffer;
122
123 fn next(&mut self) -> Option<Self::Item> {
124 self.last = buffer_find(&self.remainder, self.pattern);
125
126 if self.remainder.len() > 0 {
127 let v = self.remainder.split_to(self.last);
128 if self.remainder.len() > self.pattern_length {
129 let _ = self.remainder.split_to(self.pattern_length);
130 }
131 Some(v)
132 } else {
133 None
134 }
135 }
136}
137
138impl<'a, 'b> RUMByteSliceIteratorExt<'a, 'b> for &[u8] {
139 fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b> {
140 RUMSliceSplitIter {
141 pattern_length: pattern.len(),
142 remainder: self.clone(),
143 pattern: pattern.clone(),
144 last: 0,
145 }
146 }
147
148 fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b> {
149 RUMSliceEnumerateIter {
150 pattern_length: pattern.len(),
151 remainder: self.clone(),
152 pattern: pattern.clone(),
153 cummulative: 0,
154 last: 0,
155 }
156 }
157}
158
159impl<'a> RUMBufferIteratorExt<'a> for RUMBuffer {
160 fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a> {
161 RUMBufferSplitIter {
162 pattern_length: pattern.len(),
163 remainder: self,
164 pattern: pattern.clone(),
165 last: 0,
166 }
167 }
168}
169
170pub fn slice_to_buffer(buffer: &[u8]) -> RUMBuffer {
186 RUMBuffer::copy_from_slice(buffer)
187}
188
189pub fn new_random_buffer<const N: usize>() -> [u8; N] {
206 let mut buffer = [0u8; N];
207 rand::fill(&mut buffer);
208 buffer
209}
210
211pub fn new_random_rumbuffer<const N: usize>() -> RUMBuffer {
228 slice_to_buffer(&new_random_buffer::<N>())
229}
230
231pub fn new_random_string_buffer<const N: usize>() -> RUMString {
248 rand::rng()
249 .sample_iter(&Alphanumeric)
250 .take(N) .map(char::from)
252 .collect()
253}
254
255pub fn new_random_string_set<const N: usize>(item_count: usize) -> RUMVec<RUMString> {
274 let mut set = RUMVec::<RUMString>::with_capacity(item_count);
275
276 for _ in 0..item_count {
277 set.push(new_random_string_buffer::<N>())
278 }
279
280 set
281}
282
283pub fn buffer_split_fast(mut input: RUMBuffer, pattern: u8) -> RUMVecDeque<RUMBuffer> {
284 if input.is_empty() {
285 return RUMVecDeque::new();
286 }
287
288 let mut item_list = RUMVecDeque::with_capacity(10);
289 let mut offset = buffer_find_byte(input.as_slice(), pattern);
290
291 while offset < input.len() {
292 item_list.push_back(input.split_to(offset));
293 input.split_to(1);
294 offset = buffer_find_byte(input.as_slice(), pattern);
295 }
296 item_list.push_back(input);
297
298 item_list
299}
300
301pub fn buffer_to_string(buffer: &[u8]) -> RUMResult<RUMString> {
317 match buffer.to_string() {
318 Ok(string) => Ok(string),
319 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
320 }
321}
322
323pub fn buffer_to_str(buffer: &[u8]) -> RUMResult<&str> {
324 match std::str::from_utf8(buffer) {
325 Ok(string) => Ok(string),
326 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
327 }
328}
329
330pub fn buffer_count(buffer: &[u8], pattern: u8) -> usize {
331 let instances = buffer.iter().filter(|c| *c != &pattern).collect::<Vec<&u8>>();
332
333 instances.len()
334}
335
336#[inline(always)]
337pub fn buffer_slice_to_array(chunk: &[u8]) -> &[u8; DEFAULT_BYTE_WINDOW_SIZE] {
338 chunk.try_into().expect("length mismatch")
339}
340
341#[inline(always)]
342pub fn buffer_chunk_find_fallback(chunk: &[u8], byte: u8) -> Option<usize> {
343 chunk.iter().position(|c| *c==byte)
344}
345
346#[inline(always)]
347fn buffer_chunk_find_simd_avx2(window: &[u8; DEFAULT_BYTE_WINDOW_SIZE], byte: u8) -> Option<usize> {
348 let target_vec = u8x32::splat(byte);
349
350 for (i, chunk) in window.chunks_exact(32).enumerate() {
351 let data_vec = u8x32::from_slice(chunk);
352 let mask = data_vec.simd_eq(target_vec);
353
354 if mask.any() {
355 let bitmask = mask.to_bitmask();;
356 let lane_i = bitmask.trailing_zeros() as usize;
357 return Some((i * 32) + lane_i);
358 }
359 }
360
361 None
362}
363
364#[inline(always)]
365pub fn buffer_chunk_find_simd(window: &[u8; DEFAULT_BYTE_WINDOW_SIZE], byte: u8) -> Option<usize> {
366 unsafe {
367 if is_x86_feature_detected!("avx2") {
368 buffer_chunk_find_simd_avx2(window, byte)
369 } else {
370 buffer_chunk_find_fallback(window, byte)
371 }
372 }
373}
374
375#[inline(always)]
376pub fn buffer_chunk_find(chunk: &[u8], byte: u8) -> usize {
377 let length = chunk.len();
378
379 if length == DEFAULT_BYTE_WINDOW_SIZE {
380 let chunk_window = buffer_slice_to_array(chunk);
381 buffer_chunk_find_simd(chunk_window, byte).unwrap_or(length)
382 } else {
383 buffer_chunk_find_fallback(chunk, byte).unwrap_or(length)
384 }
385}
386
387#[inline(always)]
388pub fn buffer_find_byte(buffer: &[u8], byte: u8) -> usize {
389 if buffer.is_empty() {
390 return buffer.len();
391 }
392
393 let iter = buffer.chunks(DEFAULT_BYTE_WINDOW_SIZE);
394 for (i, chunk) in iter.enumerate() {
395 if chunk.contains(&byte) {
396 return (i * DEFAULT_BYTE_WINDOW_SIZE) + buffer_chunk_find(chunk, byte);
397 }
398 }
399
400 buffer.len()
401}
402
403#[inline(always)]
404pub fn buffer_find(buffer: &[u8], pattern: &[u8]) -> usize {
405 if buffer.is_empty() {
406 return buffer.len();
407 }
408
409 let start_pattern_byte = pattern[0];
410 let pattern_length = pattern.len();
411 let mut working_buffer = buffer;
412 let mut cumulative = 0;
413 let mut end = 0;
414
415 while (end + pattern_length) < working_buffer.len() {
416 working_buffer = &working_buffer[end..];
417
418 if working_buffer[..pattern_length] == *pattern {
419 return cumulative;
420 } else {
421 working_buffer = &working_buffer[pattern_length..];
422 cumulative += pattern_length;
423 }
424
425 end = buffer_find_byte(&working_buffer, start_pattern_byte);
426 cumulative += end;
427 }
428
429 buffer.len()
430}
431
432#[inline(always)]
433pub fn buffer_find_instances<'a>(buffer: &'a [u8], pattern: &[u8]) -> RUMVec<(usize, &'a [u8])> {
434 if buffer.is_empty() {
435 return RUMVec::new();
436 }
437
438 let pattern_length = pattern.len();
439 let buffer_length = buffer.len() - pattern_length;
440 let mut instances = RUMVec::<(usize, &[u8])>::with_capacity(100);
441
442 let mut cursor = buffer_find(buffer, pattern);
443 let mut cumulative = cursor;
444 let mut remainder = &buffer[..];
445
446 while cumulative < buffer_length {
447 instances.push((cumulative, &remainder[..cursor]));
448 let next = cursor + pattern_length;
449 if next <= remainder.len() {
450 remainder = &remainder[cursor + pattern_length..];
451 cursor = buffer_find(remainder, pattern);
452 cumulative += cursor;
453 } else {
454 cumulative += remainder.len();
455 }
456 }
457
458 instances
459}
460
461#[inline(always)]
462pub fn buffer_pad(buffer: &[u8], pad: u8, target_length: usize) -> RUMBuffer {
463 let buffer_length = buffer.len();
464 let pad_length = target_length - buffer_length;
465 let s = buffer_length + pad_length;
466 let mut slice = RUMBufferMut::with_capacity(s);
467
468 slice.put(buffer);
469
470 for _ in buffer_length..s {
471 slice.put_u8(pad);
472 }
473
474 slice.freeze()
475}
476
477#[inline(always)]
478pub fn buffer_replace_in_place<'a>(buffer: &'a mut [u8], pattern: &[u8], replacement: &[u8]) {
479 let replacement_length = replacement.len();
480 let mut cursor = buffer_find(&buffer, pattern);
481 let mut remainder = buffer;
482
483 while cursor < remainder.len() {
484 for i in 0..replacement_length {
485 remainder[cursor + i] = replacement[i];
486 }
487
488 remainder = &mut remainder[cursor + pattern.len()..];
489 cursor = buffer_find(remainder, pattern);
490 }
491}
492
493#[inline(always)]
494pub fn buffer_replace(buffer: &[u8], pattern: &[u8], replacement: &[u8]) -> RUMBuffer {
495 let pattern_length = pattern.len();
496 let replacement_length = replacement.len();
497 let instances = buffer_find_instances(&buffer, pattern);
498 let mut new_buffer = RUMBufferMut::with_capacity(buffer.len() + (instances.len() * (replacement_length)));
499 let mut last = 0;
500
501 for (indx, chunk) in instances {
502 new_buffer.put(chunk);
503 new_buffer.put(replacement);
504 last = indx + pattern_length;
505 }
506
507 match new_buffer.is_empty() {
508 true => RUMBuffer::copy_from_slice(buffer),
509 false => {
510 new_buffer.put(&buffer[last..]);
511 new_buffer.freeze()
512 }
513 }
514}
515
516pub fn buffer_trim(buffer: &RUMBuffer) -> RUMBuffer {
517 let trimmed = buffer.trim_ascii();
518
519 if trimmed.len() < buffer.len() {
520 RUMBuffer::copy_from_slice(trimmed)
521 } else {
522 buffer.clone()
523 }
524}
525
526pub fn buffer_slice_trim(buffer: &[u8]) -> &[u8] {
527 buffer.trim_ascii()
528}
529
530pub fn buffer_has_pattern(buffer: &[u8], pattern: &[u8]) -> bool {
531 buffer_find(buffer, pattern) != buffer.len()
532}
533
534pub fn is_unique_bytes(data: &[u8]) -> bool {
535 let mut items = ahash::AHashSet::with_capacity(data.len());
536 for i in 0..data.len() {
537 if !items.insert(data[i]) {
538 return false;
539 }
540 }
541 true
542}
543