Skip to main content

rumtk_core/
buffers.rs

1/*
2 *     rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 *     This toolkit aims to be reliable, simple, performant, and standards compliant.
4 *     Copyright (C) 2026  Luis M. Santos, M.D. <lsantos@medicalmasses.com>
5 *     Copyright (C) 2026  MedicalMasses L.L.C. <contact@medicalmasses.com>
6 *
7 *     This program is free software: you can redistribute it and/or modify
8 *     it under the terms of the GNU General Public License as published by
9 *     the Free Software Foundation, either version 3 of the License, or
10 *     (at your option) any later version.
11 *
12 *     This program is distributed in the hope that it will be useful,
13 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *     GNU General Public License for more details.
16 *
17 *     You should have received a copy of the GNU General Public License
18 *     along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20use crate::base::{RUMResult, RUMVec, RUMVecDeque};
21use crate::strings::{rumtk_format, RUMArrayConversions, RUMString};
22pub use bytes::{BufMut, Bytes as RUMBuffer, BytesMut as RUMBufferMut};
23use clap::builder::TypedValueParser;
24use rand::{distr::Alphanumeric, RngExt};
25use tokio::io::AsyncReadExt;
26
27use std::simd::prelude::*;
28
29pub const DEFAULT_BUFFER_CHUNK_SIZE: usize = 1024;
30pub const DEFAULT_BUFFER_ITEM_COUNT: usize = 1024;
31pub const DEFAULT_CPU_L1_CACHE_LINE_SIZE: usize = 64; // Number of bytes in a typical x86_64 CPU L1 cache line.
32pub const DEFAULT_CPU_L1_CACHE_SIZE: usize = 32 * 1024; // Number of bytes in a typical x86_64 CPU L1 cache per core.
33pub const DEFAULT_CPU_PAGE_SIZE: usize = 4 * 1024; // Typical CPU page size
34pub const DEFAULT_BYTE_WINDOW_SIZE: usize = 256;
35
36pub struct RUMSliceSplitIter<'a, 'b> {
37    pub remainder: &'a [u8],
38    pub pattern: &'b [u8],
39    pub last: usize,
40    pub pattern_length: usize,
41}
42
43pub struct RUMSliceEnumerateIter<'a, 'b> {
44    pub remainder: &'a [u8],
45    pub pattern: &'b [u8],
46    pub cummulative: usize,
47    pub last: usize,
48    pub pattern_length: usize,
49}
50
51pub struct RUMBufferSplitIter<'a> {
52    pub remainder: RUMBuffer,
53    pub pattern: &'a [u8],
54    pub pattern_length: usize,
55    pub last: usize,
56}
57
58pub trait RUMByteSliceSplitIterTrait {
59    type Item;
60    fn next(&mut self) -> Option<Self::Item>;
61}
62
63pub trait RUMByteSliceEnumeratorIterTrait {
64    type Item;
65    fn next(&mut self) -> Option<(usize, Self::Item)>;
66}
67
68pub trait RUMBufferSplitIterTrait {
69    type Item;
70    fn next(&mut self) -> Option<Self::Item>;
71}
72
73pub trait RUMByteSliceIteratorExt<'a, 'b> {
74    fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b>;
75    fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b>;
76}
77
78pub trait RUMBufferIteratorExt<'a> {
79    fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a>;
80}
81
82impl<'a, 'b> Iterator for RUMSliceSplitIter<'a, 'b> {
83    type Item = &'a [u8];
84
85    fn next(&mut self) -> Option<Self::Item> {
86        self.last = buffer_find(self.remainder, self.pattern);
87
88        if self.remainder.len() > 0 {
89            let r = Some(&self.remainder[..self.last]);
90            let next = self.last + self.pattern_length;
91            if next <= self.remainder.len() {
92                self.remainder = &self.remainder[self.last + self.pattern_length..];
93            } else {
94                self.remainder = &self.remainder[self.last..];
95            }
96            r
97        } else {
98            None
99        }
100    }
101}
102
103impl<'a, 'b> Iterator for RUMSliceEnumerateIter<'a, 'b> {
104    type Item = (usize, &'a [u8]);
105
106    fn next(&mut self) -> Option<Self::Item> {
107        self.last = buffer_find(self.remainder, self.pattern);
108        self.cummulative += self.last;
109
110        if self.remainder.len() > 0 {
111            let r = Some((self.cummulative, &self.remainder[..self.last]));
112            self.remainder = &self.remainder[self.last + self.pattern.len()..];
113            r
114        } else {
115            None
116        }
117    }
118}
119
120impl<'a> Iterator for RUMBufferSplitIter<'a> {
121    type Item = RUMBuffer;
122
123    fn next(&mut self) -> Option<Self::Item> {
124        self.last = buffer_find(&self.remainder, self.pattern);
125
126        if self.remainder.len() > 0 {
127            let v = self.remainder.split_to(self.last);
128            if self.remainder.len() > self.pattern_length {
129                let _ = self.remainder.split_to(self.pattern_length);
130            }
131            Some(v)
132        } else {
133            None
134        }
135    }
136}
137
138impl<'a, 'b> RUMByteSliceIteratorExt<'a, 'b> for &[u8] {
139    fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b> {
140        RUMSliceSplitIter {
141            pattern_length: pattern.len(),
142            remainder: self.clone(),
143            pattern: pattern.clone(),
144            last: 0,
145        }
146    }
147
148    fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b> {
149        RUMSliceEnumerateIter {
150            pattern_length: pattern.len(),
151            remainder: self.clone(),
152            pattern: pattern.clone(),
153            cummulative: 0,
154            last: 0,
155        }
156    }
157}
158
159impl<'a> RUMBufferIteratorExt<'a> for RUMBuffer {
160    fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a> {
161        RUMBufferSplitIter {
162            pattern_length: pattern.len(),
163            remainder: self,
164            pattern: pattern.clone(),
165            last: 0,
166        }
167    }
168}
169
170///
171/// Convert slice of `&[u8]` to [RUMBuffer].
172///
173/// ## Example
174/// ```
175/// use rumtk_core::buffers::slice_to_buffer;
176/// use rumtk_core::types::RUMBuffer;
177///
178/// const expected: &str = "Hello World!";
179/// let buffer = RUMBuffer::from_static(expected.as_bytes());
180/// let result = slice_to_buffer(expected.as_bytes());
181///
182/// assert_eq!(result, buffer, "Slice to RUMBuffer conversion failed!");
183/// ```
184///
185pub fn slice_to_buffer(buffer: &[u8]) -> RUMBuffer {
186    RUMBuffer::copy_from_slice(buffer)
187}
188
189///
190/// Generates a new random buffer using the `rand` crate and wrapped inside a [RUMBuffer](RUMBuffer).
191///
192/// The buffer size can be adjusted via the turbofish method => `new_random_buffer::<10>()`.
193///
194/// ## Example
195///
196/// ```
197/// use rumtk_core::buffers::{new_random_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
198///
199/// let buffer = new_random_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
200///
201/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
202/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
203/// ```
204///
205pub fn new_random_buffer<const N: usize>() -> [u8; N] {
206    let mut buffer = [0u8; N];
207    rand::fill(&mut buffer);
208    buffer
209}
210
211///
212/// Generates a new random buffer using the `rand` crate and wrapped inside a [RUMBuffer](RUMBuffer).
213///
214/// The buffer size can be adjusted via the turbofish method => `new_random_buffer::<10>()`.
215///
216/// ## Example
217///
218/// ```
219/// use rumtk_core::buffers::{new_random_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
220///
221/// let buffer = new_random_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
222///
223/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
224/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
225/// ```
226///
227pub fn new_random_rumbuffer<const N: usize>() -> RUMBuffer {
228    slice_to_buffer(&new_random_buffer::<N>())
229}
230
231///
232/// Generates a new random string using the `rand` crate and wrapped inside a [RUMString](RUMString).
233///
234/// The buffer size can be adjusted via the turbofish method => `new_random_string_buffer::<10>()`.
235///
236/// ## Example
237///
238/// ```
239/// use rumtk_core::buffers::{new_random_string_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
240///
241/// let buffer = new_random_string_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
242///
243/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
244/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
245/// ```
246///
247pub fn new_random_string_buffer<const N: usize>() -> RUMString {
248    rand::rng()
249        .sample_iter(&Alphanumeric)
250        .take(N) // Length of the string
251        .map(char::from)
252        .collect()
253}
254
255///
256/// Generates a new random set of [RUMString] using the `rand` crate.
257///
258/// The buffer size for each item can be adjusted via the turbofish method => `new_random_string_set::<10>()`.
259///
260/// ## Example
261///
262/// ```
263/// use rumtk_core::buffers::{new_random_string_set, DEFAULT_BUFFER_CHUNK_SIZE};
264///const item_count: usize = 5;
265///
266/// let buffer = new_random_string_set::<DEFAULT_BUFFER_CHUNK_SIZE>(item_count);
267///
268/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
269/// assert_eq!(buffer.len(), item_count, "The new random buffer does not have the expected item count!");
270/// assert_eq!(buffer.get(0).unwrap().len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
271/// ```
272///
273pub fn new_random_string_set<const N: usize>(item_count: usize) -> RUMVec<RUMString> {
274    let mut set = RUMVec::<RUMString>::with_capacity(item_count);
275
276    for _ in 0..item_count {
277        set.push(new_random_string_buffer::<N>())
278    }
279
280    set
281}
282
283pub fn buffer_split_fast(mut input: RUMBuffer, pattern: u8) -> RUMVecDeque<RUMBuffer> {
284    if input.is_empty() {
285        return RUMVecDeque::new();
286    }
287
288    let mut item_list = RUMVecDeque::with_capacity(10);
289    let mut offset = buffer_find_byte(input.as_slice(), pattern);
290
291    while offset < input.len() {
292        item_list.push_back(input.split_to(offset));
293        input.split_to(1);
294        offset = buffer_find_byte(input.as_slice(), pattern);
295    }
296    item_list.push_back(input);
297
298    item_list
299}
300
301///
302/// Convert buffer to string.
303///
304/// ## Example
305/// ```
306/// use rumtk_core::buffers::buffer_to_string;
307/// use rumtk_core::types::RUMBuffer;
308///
309/// const expected: &str = "Hello World!";
310/// let buffer = RUMBuffer::from_static(expected.as_bytes());
311/// let result = buffer_to_string(&buffer).unwrap();
312///
313/// assert_eq!(result, expected, "Buffer to RUMString conversion failed!");
314/// ```
315///
316pub fn buffer_to_string(buffer: &[u8]) -> RUMResult<RUMString> {
317    match buffer.to_string() {
318        Ok(string) => Ok(string),
319        Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
320    }
321}
322
323pub fn buffer_to_str(buffer: &[u8]) -> RUMResult<&str> {
324    match std::str::from_utf8(buffer) {
325        Ok(string) => Ok(string),
326        Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
327    }
328}
329
330pub fn buffer_count(buffer: &[u8], pattern: u8) -> usize {
331    let instances = buffer.iter().filter(|c| *c != &pattern).collect::<Vec<&u8>>();
332
333    instances.len()
334}
335
336#[inline(always)]
337pub fn buffer_slice_to_array(chunk: &[u8]) -> &[u8; DEFAULT_BYTE_WINDOW_SIZE] {
338    chunk.try_into().expect("length mismatch")
339}
340
341#[inline(always)]
342pub fn buffer_chunk_find_fallback(chunk: &[u8], byte: u8) -> Option<usize> {
343    chunk.iter().position(|c| *c==byte)
344}
345
346#[inline(always)]
347fn buffer_chunk_find_simd_avx2(window: &[u8; DEFAULT_BYTE_WINDOW_SIZE], byte: u8) -> Option<usize> {
348    let target_vec = u8x32::splat(byte);
349
350    for (i, chunk) in window.chunks_exact(32).enumerate() {
351        let data_vec = u8x32::from_slice(chunk);
352        let mask = data_vec.simd_eq(target_vec);
353
354        if mask.any() {
355            let bitmask = mask.to_bitmask();;
356            let lane_i = bitmask.trailing_zeros() as usize;
357            return Some((i * 32) + lane_i);
358        }
359    }
360
361    None
362}
363
364#[inline(always)]
365pub fn buffer_chunk_find_simd(window: &[u8; DEFAULT_BYTE_WINDOW_SIZE], byte: u8) -> Option<usize> {
366    unsafe {
367        if is_x86_feature_detected!("avx2") {
368            buffer_chunk_find_simd_avx2(window, byte)
369        } else {
370            buffer_chunk_find_fallback(window, byte)
371        }
372    }
373}
374
375#[inline(always)]
376pub fn buffer_chunk_find(chunk: &[u8], byte: u8) -> usize {
377    let length = chunk.len();
378
379    if length == DEFAULT_BYTE_WINDOW_SIZE {
380        let chunk_window = buffer_slice_to_array(chunk);
381        buffer_chunk_find_simd(chunk_window, byte).unwrap_or(length)
382    } else {
383        buffer_chunk_find_fallback(chunk, byte).unwrap_or(length)
384    }
385}
386
387#[inline(always)]
388pub fn buffer_find_byte(buffer: &[u8], byte: u8) -> usize {
389    if buffer.is_empty() {
390        return buffer.len();
391    }
392
393    let iter = buffer.chunks(DEFAULT_BYTE_WINDOW_SIZE);
394    for (i, chunk) in iter.enumerate() {
395        if chunk.contains(&byte) {
396            return (i * DEFAULT_BYTE_WINDOW_SIZE) + buffer_chunk_find(chunk, byte);
397        }
398    }
399
400    buffer.len()
401}
402
403#[inline(always)]
404pub fn buffer_find(buffer: &[u8], pattern: &[u8]) -> usize {
405    if buffer.is_empty() {
406        return buffer.len();
407    }
408
409    let start_pattern_byte = pattern[0];
410    let pattern_length = pattern.len();
411    let mut working_buffer = buffer;
412    let mut cumulative = 0;
413    let mut end = 0;
414
415    while (end + pattern_length) < working_buffer.len() {
416        working_buffer = &working_buffer[end..];
417
418        if working_buffer[..pattern_length] == *pattern {
419            return cumulative;
420        } else {
421            working_buffer = &working_buffer[pattern_length..];
422            cumulative += pattern_length;
423        }
424
425        end = buffer_find_byte(&working_buffer, start_pattern_byte);
426        cumulative += end;
427    }
428
429    buffer.len()
430}
431
432#[inline(always)]
433pub fn buffer_find_instances<'a>(buffer: &'a [u8], pattern: &[u8]) -> RUMVec<(usize, &'a [u8])> {
434    if buffer.is_empty() {
435        return RUMVec::new();
436    }
437
438    let pattern_length = pattern.len();
439    let buffer_length = buffer.len() - pattern_length;
440    let mut instances = RUMVec::<(usize, &[u8])>::with_capacity(100);
441
442    let mut cursor = buffer_find(buffer, pattern);
443    let mut cumulative = cursor;
444    let mut remainder = &buffer[..];
445
446    while cumulative < buffer_length {
447        instances.push((cumulative, &remainder[..cursor]));
448        let next = cursor + pattern_length;
449        if next <= remainder.len() {
450            remainder = &remainder[cursor + pattern_length..];
451            cursor = buffer_find(remainder, pattern);
452            cumulative += cursor;
453        } else {
454            cumulative += remainder.len();
455        }
456    }
457
458    instances
459}
460
461#[inline(always)]
462pub fn buffer_pad(buffer: &[u8], pad: u8, target_length: usize) -> RUMBuffer {
463    let buffer_length = buffer.len();
464    let pad_length = target_length - buffer_length;
465    let s = buffer_length + pad_length;
466    let mut slice = RUMBufferMut::with_capacity(s);
467
468    slice.put(buffer);
469
470    for _ in buffer_length..s {
471        slice.put_u8(pad);
472    }
473
474    slice.freeze()
475}
476
477#[inline(always)]
478pub fn buffer_replace_in_place<'a>(buffer: &'a mut [u8], pattern: &[u8], replacement: &[u8]) {
479    let replacement_length = replacement.len();
480    let mut cursor = buffer_find(&buffer, pattern);
481    let mut remainder = buffer;
482
483    while cursor < remainder.len() {
484        for i in 0..replacement_length {
485            remainder[cursor + i] = replacement[i];
486        }
487
488        remainder = &mut remainder[cursor + pattern.len()..];
489        cursor = buffer_find(remainder, pattern);
490    }
491}
492
493#[inline(always)]
494pub fn buffer_replace(buffer: &[u8], pattern: &[u8], replacement: &[u8]) -> RUMBuffer {
495    let pattern_length = pattern.len();
496    let replacement_length = replacement.len();
497    let instances = buffer_find_instances(&buffer, pattern);
498    let mut new_buffer =  RUMBufferMut::with_capacity(buffer.len() + (instances.len() * (replacement_length)));
499    let mut last = 0;
500
501    for (indx, chunk) in instances {
502        new_buffer.put(chunk);
503        new_buffer.put(replacement);
504        last = indx + pattern_length;
505    }
506
507    match new_buffer.is_empty() {
508        true => RUMBuffer::copy_from_slice(buffer),
509        false => {
510            new_buffer.put(&buffer[last..]);
511            new_buffer.freeze()
512        }
513    }
514}
515
516pub fn buffer_trim(buffer: &RUMBuffer) -> RUMBuffer {
517    let trimmed = buffer.trim_ascii();
518
519    if trimmed.len() < buffer.len() {
520        RUMBuffer::copy_from_slice(trimmed)
521    } else {
522        buffer.clone()
523    }
524}
525
526pub fn buffer_slice_trim(buffer: &[u8]) -> &[u8] {
527    buffer.trim_ascii()
528}
529
530pub fn buffer_has_pattern(buffer: &[u8], pattern: &[u8]) -> bool {
531    buffer_find(buffer, pattern) != buffer.len()
532}
533
534pub fn is_unique_bytes(data: &[u8]) -> bool {
535    let mut items = ahash::AHashSet::with_capacity(data.len());
536    for i in 0..data.len() {
537        if !items.insert(data[i]) {
538            return false;
539        }
540    }
541    true
542}
543