Skip to main content

rumtk_core/
buffers.rs

1/*
2 *     rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 *     This toolkit aims to be reliable, simple, performant, and standards compliant.
4 *     Copyright (C) 2026  Luis M. Santos, M.D. <lsantos@medicalmasses.com>
5 *     Copyright (C) 2026  MedicalMasses L.L.C. <contact@medicalmasses.com>
6 *
7 *     This program is free software: you can redistribute it and/or modify
8 *     it under the terms of the GNU General Public License as published by
9 *     the Free Software Foundation, either version 3 of the License, or
10 *     (at your option) any later version.
11 *
12 *     This program is distributed in the hope that it will be useful,
13 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *     GNU General Public License for more details.
16 *
17 *     You should have received a copy of the GNU General Public License
18 *     along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20use crate::core::{RUMResult, RUMVec, RUMVecDeque};
21use crate::strings::{rumtk_format, RUMArrayConversions, RUMString};
22pub use bytes::{BufMut, Bytes as RUMBuffer, BytesMut as RUMBufferMut};
23use clap::builder::TypedValueParser;
24use rand::{distr::Alphanumeric, RngExt};
25use tokio::io::AsyncReadExt;
26
27pub const DEFAULT_BUFFER_CHUNK_SIZE: usize = 1024;
28pub const DEFAULT_BUFFER_ITEM_COUNT: usize = 1024;
29
30pub struct RUMSliceSplitIter<'a, 'b> {
31    pub remainder: &'a [u8],
32    pub pattern: &'b [u8],
33    pub last: usize,
34    pub pattern_length: usize,
35}
36
37pub struct RUMSliceEnumerateIter<'a, 'b> {
38    pub remainder: &'a [u8],
39    pub pattern: &'b [u8],
40    pub cummulative: usize,
41    pub last: usize,
42    pub pattern_length: usize,
43}
44
45pub struct RUMBufferSplitIter<'a> {
46    pub remainder: RUMBuffer,
47    pub pattern: &'a [u8],
48    pub pattern_length: usize,
49    pub last: usize,
50}
51
52pub trait RUMByteSliceSplitIterTrait {
53    type Item;
54    fn next(&mut self) -> Option<Self::Item>;
55}
56
57pub trait RUMByteSliceEnumeratorIterTrait {
58    type Item;
59    fn next(&mut self) -> Option<(usize, Self::Item)>;
60}
61
62pub trait RUMBufferSplitIterTrait {
63    type Item;
64    fn next(&mut self) -> Option<Self::Item>;
65}
66
67pub trait RUMByteSliceIteratorExt<'a, 'b> {
68    fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b>;
69    fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b>;
70}
71
72pub trait RUMBufferIteratorExt<'a> {
73    fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a>;
74}
75
76impl<'a, 'b> Iterator for RUMSliceSplitIter<'a, 'b> {
77    type Item = &'a [u8];
78
79    fn next(&mut self) -> Option<Self::Item> {
80        self.last = buffer_find(self.remainder, self.pattern);
81
82        if self.remainder.len() > 0 {
83            let r = Some(&self.remainder[..self.last]);
84            let next = self.last + self.pattern_length;
85            if next <= self.remainder.len() {
86                self.remainder = &self.remainder[self.last + self.pattern_length..];
87            } else {
88                self.remainder = &self.remainder[self.last..];
89            }
90            r
91        } else {
92            None
93        }
94    }
95}
96
97impl<'a, 'b> Iterator for RUMSliceEnumerateIter<'a, 'b> {
98    type Item = (usize, &'a [u8]);
99
100    fn next(&mut self) -> Option<Self::Item> {
101        self.last = buffer_find(self.remainder, self.pattern);
102        self.cummulative += self.last;
103
104        if self.remainder.len() > 0 {
105            let r = Some((self.cummulative, &self.remainder[..self.last]));
106            self.remainder = &self.remainder[self.last + self.pattern.len()..];
107            r
108        } else {
109            None
110        }
111    }
112}
113
114impl<'a> Iterator for RUMBufferSplitIter<'a> {
115    type Item = RUMBuffer;
116
117    fn next(&mut self) -> Option<Self::Item> {
118        self.last = buffer_find(&self.remainder, self.pattern);
119
120        if self.remainder.len() > 0 {
121            let v = self.remainder.split_to(self.last);
122            if self.remainder.len() > self.pattern_length {
123                let _ = self.remainder.split_to(self.pattern_length);
124            }
125            Some(v)
126        } else {
127            None
128        }
129    }
130}
131
132impl<'a, 'b> RUMByteSliceIteratorExt<'a, 'b> for &[u8] {
133    fn split_fast(&'a self, pattern: &'b [u8]) -> RUMSliceSplitIter<'a, 'b> {
134        RUMSliceSplitIter {
135            pattern_length: pattern.len(),
136            remainder: self.clone(),
137            pattern: pattern.clone(),
138            last: 0,
139        }
140    }
141
142    fn enumerate_fast(&'a self, pattern: &'b [u8]) -> RUMSliceEnumerateIter<'a, 'b> {
143        RUMSliceEnumerateIter {
144            pattern_length: pattern.len(),
145            remainder: self.clone(),
146            pattern: pattern.clone(),
147            cummulative: 0,
148            last: 0,
149        }
150    }
151}
152
153impl<'a> RUMBufferIteratorExt<'a> for RUMBuffer {
154    fn split_fast(self, pattern: &'a [u8]) -> RUMBufferSplitIter<'a> {
155        RUMBufferSplitIter {
156            pattern_length: pattern.len(),
157            remainder: self,
158            pattern: pattern.clone(),
159            last: 0,
160        }
161    }
162}
163
164///
165/// Convert slice of `&[u8]` to [RUMBuffer].
166///
167/// ## Example
168/// ```
169/// use rumtk_core::buffers::slice_to_buffer;
170/// use rumtk_core::types::RUMBuffer;
171///
172/// const expected: &str = "Hello World!";
173/// let buffer = RUMBuffer::from_static(expected.as_bytes());
174/// let result = slice_to_buffer(expected.as_bytes());
175///
176/// assert_eq!(result, buffer, "Slice to RUMBuffer conversion failed!");
177/// ```
178///
179pub fn slice_to_buffer(buffer: &[u8]) -> RUMBuffer {
180    RUMBuffer::copy_from_slice(buffer)
181}
182
183///
184/// Generates a new random buffer using the `rand` crate and wrapped inside a [RUMBuffer](RUMBuffer).
185///
186/// The buffer size can be adjusted via the turbofish method => `new_random_buffer::<10>()`.
187///
188/// ## Example
189///
190/// ```
191/// use rumtk_core::buffers::{new_random_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
192///
193/// let buffer = new_random_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
194///
195/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
196/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
197/// ```
198///
199pub fn new_random_buffer<const N: usize>() -> [u8; N] {
200    let mut buffer = [0u8; N];
201    rand::fill(&mut buffer);
202    buffer
203}
204
205///
206/// Generates a new random buffer using the `rand` crate and wrapped inside a [RUMBuffer](RUMBuffer).
207///
208/// The buffer size can be adjusted via the turbofish method => `new_random_buffer::<10>()`.
209///
210/// ## Example
211///
212/// ```
213/// use rumtk_core::buffers::{new_random_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
214///
215/// let buffer = new_random_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
216///
217/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
218/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
219/// ```
220///
221pub fn new_random_rumbuffer<const N: usize>() -> RUMBuffer {
222    slice_to_buffer(&new_random_buffer::<N>())
223}
224
225///
226/// Generates a new random string using the `rand` crate and wrapped inside a [RUMString](RUMString).
227///
228/// The buffer size can be adjusted via the turbofish method => `new_random_string_buffer::<10>()`.
229///
230/// ## Example
231///
232/// ```
233/// use rumtk_core::buffers::{new_random_string_buffer, DEFAULT_BUFFER_CHUNK_SIZE};
234///
235/// let buffer = new_random_string_buffer::<DEFAULT_BUFFER_CHUNK_SIZE>();
236///
237/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
238/// assert_eq!(buffer.len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
239/// ```
240///
241pub fn new_random_string_buffer<const N: usize>() -> RUMString {
242    rand::rng()
243        .sample_iter(&Alphanumeric)
244        .take(N) // Length of the string
245        .map(char::from)
246        .collect()
247}
248
249///
250/// Generates a new random set of [RUMString] using the `rand` crate.
251///
252/// The buffer size for each item can be adjusted via the turbofish method => `new_random_string_set::<10>()`.
253///
254/// ## Example
255///
256/// ```
257/// use rumtk_core::buffers::{new_random_string_set, DEFAULT_BUFFER_CHUNK_SIZE};
258///const item_count: usize = 5;
259///
260/// let buffer = new_random_string_set::<DEFAULT_BUFFER_CHUNK_SIZE>(item_count);
261///
262/// assert_eq!(buffer.is_empty(), false, "Function returned an empty random buffer which was unexpected!");
263/// assert_eq!(buffer.len(), item_count, "The new random buffer does not have the expected item count!");
264/// assert_eq!(buffer.get(0).unwrap().len(), DEFAULT_BUFFER_CHUNK_SIZE, "The new random buffer does not have the expected size!");
265/// ```
266///
267pub fn new_random_string_set<const N: usize>(item_count: usize) -> RUMVec<RUMString> {
268    let mut set = RUMVec::<RUMString>::with_capacity(item_count);
269
270    for _ in 0..item_count {
271        set.push(new_random_string_buffer::<N>())
272    }
273
274    set
275}
276
277pub fn buffer_split_fast(mut input: RUMBuffer, pattern: u8) -> RUMVecDeque<RUMBuffer> {
278    if input.is_empty() {
279        return RUMVecDeque::new();
280    }
281
282    let mut item_list = RUMVecDeque::with_capacity(10);
283    let mut offset = buffer_find_byte(input.as_slice(), pattern);
284
285    while offset < input.len() {
286        item_list.push_back(input.split_to(offset));
287        input.split_to(1);
288        offset = buffer_find_byte(input.as_slice(), pattern);
289    }
290    item_list.push_back(input);
291
292    item_list
293}
294
295///
296/// Convert buffer to string.
297///
298/// ## Example
299/// ```
300/// use rumtk_core::buffers::buffer_to_string;
301/// use rumtk_core::types::RUMBuffer;
302///
303/// const expected: &str = "Hello World!";
304/// let buffer = RUMBuffer::from_static(expected.as_bytes());
305/// let result = buffer_to_string(&buffer).unwrap();
306///
307/// assert_eq!(result, expected, "Buffer to RUMString conversion failed!");
308/// ```
309///
310pub fn buffer_to_string(buffer: &[u8]) -> RUMResult<RUMString> {
311    match buffer.to_string() {
312        Ok(string) => Ok(string),
313        Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
314    }
315}
316
317pub fn buffer_to_str(buffer: &[u8]) -> RUMResult<&str> {
318    match std::str::from_utf8(buffer) {
319        Ok(string) => Ok(string),
320        Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e)),
321    }
322}
323
324pub fn buffer_count(buffer: &[u8], pattern: u8) -> usize {
325    let instances = buffer.iter().filter(|c| *c != &pattern).collect::<Vec<&u8>>();
326
327    instances.len()
328}
329
330pub fn buffer_chunk_find(chunk: &[u8], byte: u8) -> usize {
331    for j in 0..chunk.len() {
332        if chunk[j] == byte {
333            return j;
334        }
335    }
336
337    chunk.len()
338}
339
340pub fn buffer_find_byte(buffer: &[u8], byte: u8) -> usize {
341    if buffer.is_empty() {
342        return buffer.len();
343    }
344
345    let iter = buffer.chunks(256);
346    for (i, chunk) in iter.enumerate() {
347        if chunk.contains(&byte) {
348            return (i * 256) + buffer_chunk_find(chunk, byte);
349        }
350    }
351
352    buffer.len()
353}
354
355pub fn buffer_find(buffer: &[u8], pattern: &[u8]) -> usize {
356    if buffer.is_empty() {
357        return buffer.len();
358    }
359
360    let start_pattern_byte = pattern[0];
361    let pattern_length = pattern.len();
362    let mut working_buffer = buffer;
363    let mut cumulative = 0;
364    let mut end = 0;
365
366    while (end + pattern_length) < working_buffer.len() {
367        working_buffer = &working_buffer[end..];
368
369        if working_buffer[..pattern_length] == *pattern {
370            return cumulative;
371        } else {
372            working_buffer = &working_buffer[pattern_length..];
373            cumulative += pattern_length;
374        }
375
376        end = buffer_find_byte(&working_buffer, start_pattern_byte);
377        cumulative += end;
378    }
379
380    buffer.len()
381}
382
383pub fn buffer_find_instances<'a>(buffer: &'a [u8], pattern: &[u8]) -> RUMVec<(usize, &'a [u8])> {
384    if buffer.is_empty() {
385        return RUMVec::new();
386    }
387
388    let pattern_length = pattern.len();
389    let buffer_length = buffer.len() - pattern_length;
390    let mut instances = RUMVec::<(usize, &[u8])>::with_capacity(100);
391
392    let mut cursor = buffer_find(buffer, pattern);
393    let mut cumulative = cursor;
394    let mut remainder = &buffer[..];
395
396    while cumulative < buffer_length {
397        instances.push((cumulative, &remainder[..cursor]));
398        let next = cursor + pattern_length;
399        if next <= remainder.len() {
400            remainder = &remainder[cursor + pattern_length..];
401            cursor = buffer_find(remainder, pattern);
402            cumulative += cursor;
403        } else {
404            cumulative += remainder.len();
405        }
406    }
407
408    instances
409}
410
411pub fn buffer_pad(buffer: &[u8], pad: u8, target_length: usize) -> RUMBuffer {
412    let buffer_length = buffer.len();
413    let pad_length = target_length - buffer_length;
414    let s = buffer_length + pad_length;
415    let mut slice = RUMBufferMut::with_capacity(s);
416
417    slice.put(buffer);
418
419    for _ in buffer_length..s {
420        slice.put_u8(pad);
421    }
422
423    slice.freeze()
424}
425
426pub fn buffer_replace_in_place<'a>(buffer: &'a mut [u8], pattern: &[u8], replacement: &[u8]) {
427    let replacement_length = replacement.len();
428    let mut cursor = buffer_find(&buffer, pattern);
429    let mut remainder = buffer;
430
431    while cursor < remainder.len() {
432        for i in 0..replacement_length {
433            remainder[cursor + i] = replacement[i];
434        }
435
436        remainder = &mut remainder[cursor + pattern.len()..];
437        cursor = buffer_find(remainder, pattern);
438    }
439}
440
441pub fn buffer_replace(buffer: &[u8], pattern: &[u8], replacement: &[u8]) -> RUMBuffer {
442    let pattern_length = pattern.len();
443    let replacement_length = replacement.len();
444    let instances = buffer_find_instances(&buffer, pattern);
445    let mut new_buffer =  RUMBufferMut::with_capacity(buffer.len() + (instances.len() * (replacement_length)));
446    let mut last = 0;
447
448    for (indx, chunk) in instances {
449        new_buffer.put(chunk);
450        new_buffer.put(replacement);
451        last = indx + pattern_length;
452    }
453
454    match new_buffer.is_empty() {
455        true => RUMBuffer::copy_from_slice(buffer),
456        false => {
457            new_buffer.put(&buffer[last..]);
458            new_buffer.freeze()
459        }
460    }
461}
462
463pub fn buffer_trim(buffer: &RUMBuffer) -> RUMBuffer {
464    let trimmed = buffer.trim_ascii();
465
466    if trimmed.len() < buffer.len() {
467        RUMBuffer::copy_from_slice(trimmed)
468    } else {
469        buffer.clone()
470    }
471}
472
473pub fn buffer_slice_trim(buffer: &[u8]) -> &[u8] {
474    buffer.trim_ascii()
475}
476
477pub fn buffer_has_pattern(buffer: &[u8], pattern: &[u8]) -> bool {
478    buffer_find(buffer, pattern) != buffer.len()
479}
480
481pub fn is_unique_bytes(data: &[u8]) -> bool {
482    let mut items = ahash::AHashSet::with_capacity(data.len());
483    for i in 0..data.len() {
484        if !items.insert(data[i]) {
485            return false;
486        }
487    }
488    true
489}
490