Skip to main content

pure_magic/readers/
slice.rs

1use std::{
2    io::{self, Read, SeekFrom},
3    ops::{Range, Sub},
4};
5
6use crate::readers::DataRead;
7
8/// A buffered reader for byte slices that tracks the current read position.
9///
10/// Wraps any type implementing [`AsRef<[u8]>`] and provides seeking and reading
11/// operations while maintaining an internal cursor position.
12///
13/// See [`BufReader::from_slice`] for construction.
14pub struct BufReader<S: AsRef<[u8]>> {
15    stream_pos: u64,
16    buf: S,
17}
18
19impl<S> Read for BufReader<S>
20where
21    S: AsRef<[u8]>,
22{
23    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
24        let r = self.read_count(buf.len() as u64)?;
25        for (i, b) in r.iter().enumerate() {
26            buf[i] = *b;
27        }
28        Ok(r.len())
29    }
30}
31
32impl<S> DataRead for BufReader<S>
33where
34    S: AsRef<[u8]>,
35{
36    #[inline(always)]
37    fn stream_position(&self) -> u64 {
38        self.stream_pos
39    }
40
41    #[inline]
42    fn read_range(&mut self, range: Range<u64>) -> io::Result<&[u8]> {
43        // we fix range in case we attempt at reading beyond end of file
44        let range = if range.end > self.buf.as_ref().len() as u64 {
45            range.start..self.buf.as_ref().len() as u64
46        } else {
47            range
48        };
49
50        self.seek(SeekFrom::Start(range.end))
51            .expect("buffer seek should never fail");
52
53        let Some(buf) = self
54            .buf
55            .as_ref()
56            .get(range.start as usize..range.end as usize)
57        else {
58            return Ok(&[]);
59        };
60
61        Ok(buf)
62    }
63
64    fn read_until_any_delim_or_limit(
65        &mut self,
66        delims: &[u8],
67        limit: u64,
68    ) -> Result<&[u8], io::Error> {
69        self._read_while_or_limit(|b| !delims.contains(&b), limit, true)
70    }
71
72    fn read_until_or_limit(&mut self, byte: u8, limit: u64) -> Result<&[u8], io::Error> {
73        self._read_while_or_limit(|b| b != byte, limit, true)
74    }
75
76    fn read_while_or_limit<F>(&mut self, f: F, limit: u64) -> Result<&[u8], io::Error>
77    where
78        F: Fn(u8) -> bool,
79    {
80        self._read_while_or_limit(f, limit, false)
81    }
82
83    fn read_until_utf16_or_limit(
84        &mut self,
85        utf16_char: &[u8; 2],
86        limit: u64,
87    ) -> Result<&[u8], io::Error> {
88        let start = self.stream_pos;
89        let mut end = 0;
90
91        let Some(buf) = self.buf.as_ref().get(start as usize..) else {
92            return Ok(&[]);
93        };
94
95        let buf = if buf.len().is_multiple_of(2) {
96            buf
97        } else if buf.len() > 1 {
98            &buf[..buf.len().sub(1)]
99        } else {
100            return Ok(&[]);
101        };
102
103        let even = buf
104            .iter()
105            .enumerate()
106            .filter(|(i, _)| i.is_multiple_of(2))
107            .map(|t| t.1);
108
109        let odd = buf
110            .iter()
111            .enumerate()
112            .filter(|(i, _)| !i.is_multiple_of(2))
113            .map(|t| t.1);
114
115        for t in even.zip(odd) {
116            if limit.saturating_sub(end) == 0 {
117                break;
118            }
119
120            end += 2;
121
122            // tail check
123            if t.0 == &utf16_char[0] && t.1 == &utf16_char[1] {
124                // we include char
125                break;
126            }
127        }
128
129        self.read_exact_range(start..start + end)
130    }
131
132    fn data_size(&self) -> u64 {
133        self.buf.as_ref().len() as u64
134    }
135
136    #[inline(always)]
137    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
138        self.stream_pos = self.offset_from_start(pos);
139        Ok(self.stream_pos)
140    }
141}
142
143impl<S> BufReader<S>
144where
145    S: AsRef<[u8]>,
146{
147    /// Creates a new `BufReader` wrapping the provided byte slice.
148    ///
149    /// The reader's position is initialized to `0`.
150    ///
151    /// # Examples
152    ///
153    /// ```
154    /// use pure_magic::readers::{BufReader, DataRead};
155    ///
156    /// let reader = BufReader::from_slice(b"hello world");
157    /// assert_eq!(reader.stream_position(), 0);
158    /// ```
159    pub fn from_slice(s: S) -> Self {
160        Self {
161            stream_pos: 0,
162            buf: s,
163        }
164    }
165
166    // reads while f returns true or we reach limit
167    #[inline(always)]
168    fn _read_while_or_limit<F>(
169        &mut self,
170        f: F,
171        limit: u64,
172        include_last: bool,
173    ) -> Result<&[u8], io::Error>
174    where
175        F: Fn(u8) -> bool,
176    {
177        let start = self.stream_pos;
178        let mut end = 0;
179
180        let Some(buf) = self.buf.as_ref().get(start as usize..) else {
181            return Ok(&[]);
182        };
183
184        for b in buf {
185            if limit - end == 0 {
186                break;
187            }
188
189            if !f(*b) {
190                if include_last && end < self.data_size() {
191                    end += 1;
192                }
193                break;
194            }
195
196            end += 1;
197        }
198
199        self.read_exact_range(start..start + end)
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    // === from_slice ===
208
209    #[test]
210    fn test_from_slice() {
211        let buf = b"hello world";
212        let r = BufReader::from_slice(buf);
213        assert_eq!(r.stream_position(), 0);
214        assert_eq!(r.data_size(), buf.len() as u64);
215    }
216
217    #[test]
218    fn test_from_slice_empty() {
219        let r = BufReader::from_slice(b"");
220        assert_eq!(r.stream_position(), 0);
221        assert_eq!(r.data_size(), 0);
222    }
223
224    // === Seek impl ===
225
226    #[test]
227    fn test_seek_start() {
228        let mut r = BufReader::from_slice(b"hello world");
229        assert_eq!(r.seek(SeekFrom::Start(5)).unwrap(), 5);
230        assert_eq!(r.stream_position(), 5);
231    }
232
233    #[test]
234    fn test_seek_start_zero() {
235        let mut r = BufReader::from_slice(b"hello");
236        r.seek(SeekFrom::Start(3)).unwrap();
237        assert_eq!(r.seek(SeekFrom::Start(0)).unwrap(), 0);
238        assert_eq!(r.stream_position(), 0);
239    }
240
241    #[test]
242    fn test_seek_current() {
243        let mut r = BufReader::from_slice(b"hello world");
244        r.seek(SeekFrom::Start(5)).unwrap();
245        assert_eq!(r.seek(SeekFrom::Current(2)).unwrap(), 7);
246        assert_eq!(r.stream_position(), 7);
247    }
248
249    #[test]
250    fn test_seek_current_negative() {
251        let mut r = BufReader::from_slice(b"hello world");
252        r.seek(SeekFrom::Start(5)).unwrap();
253        assert_eq!(r.seek(SeekFrom::Current(-3)).unwrap(), 2);
254        assert_eq!(r.stream_position(), 2);
255    }
256
257    #[test]
258    fn test_seek_end() {
259        let mut r = BufReader::from_slice(b"hello world");
260        assert_eq!(r.seek(SeekFrom::End(0)).unwrap(), 11);
261        assert_eq!(r.stream_position(), 11);
262    }
263
264    #[test]
265    fn test_seek_end_negative() {
266        let mut r = BufReader::from_slice(b"hello world");
267        assert_eq!(r.seek(SeekFrom::End(-5)).unwrap(), 6);
268        assert_eq!(r.stream_position(), 6);
269    }
270
271    // === offset_from_start ===
272
273    #[test]
274    fn test_offset_from_start_start() {
275        let r = BufReader::from_slice(b"hello");
276        assert_eq!(r.offset_from_start(SeekFrom::Start(3)), 3);
277    }
278
279    #[test]
280    fn test_offset_from_start_current() {
281        let mut r = BufReader::from_slice(b"hello");
282        r.stream_pos = 5;
283        assert_eq!(r.offset_from_start(SeekFrom::Current(3)), 8);
284        assert_eq!(r.offset_from_start(SeekFrom::Current(-2)), 3);
285    }
286
287    #[test]
288    fn test_offset_from_start_end() {
289        let r = BufReader::from_slice(b"hello");
290        assert_eq!(r.offset_from_start(SeekFrom::End(0)), 5);
291        assert_eq!(r.offset_from_start(SeekFrom::End(-2)), 3);
292    }
293
294    // === stream_position ===
295
296    #[test]
297    fn test_stream_position() {
298        let mut r = BufReader::from_slice(b"hello");
299        assert_eq!(r.stream_position(), 0);
300        r.stream_pos = 3;
301        assert_eq!(r.stream_position(), 3);
302    }
303
304    // === data_size ===
305
306    #[test]
307    fn test_data_size() {
308        let r = BufReader::from_slice(b"hello world");
309        assert_eq!(r.data_size(), 11);
310    }
311
312    #[test]
313    fn test_data_size_empty() {
314        let r = BufReader::from_slice(b"");
315        assert_eq!(r.data_size(), 0);
316    }
317
318    // === read_range ===
319
320    #[test]
321    fn test_read_range_full() {
322        let mut r = BufReader::from_slice(b"hello world");
323        assert_eq!(r.read_range(0..11).unwrap(), b"hello world");
324        assert_eq!(r.stream_position(), 11);
325    }
326
327    #[test]
328    fn test_read_range_partial() {
329        let mut r = BufReader::from_slice(b"hello world");
330        assert_eq!(r.read_range(0..5).unwrap(), b"hello");
331        assert_eq!(r.stream_position(), 5);
332        assert_eq!(r.read_range(6..11).unwrap(), b"world");
333        assert_eq!(r.stream_position(), 11);
334    }
335
336    #[test]
337    fn test_read_range_beyond_end() {
338        let mut r = BufReader::from_slice(b"hello");
339        assert_eq!(r.read_range(0..100).unwrap(), b"hello");
340        assert_eq!(r.stream_position(), 5);
341    }
342
343    #[test]
344    fn test_read_range_start_beyond() {
345        let mut r = BufReader::from_slice(b"hello");
346        assert_eq!(r.read_range(100..200).unwrap(), b"");
347        assert_eq!(r.stream_position(), 5);
348    }
349
350    #[test]
351    fn test_read_range_empty() {
352        let mut r = BufReader::from_slice(b"hello");
353        assert_eq!(r.read_range(3..3).unwrap(), b"");
354        assert_eq!(r.stream_position(), 3);
355    }
356
357    #[test]
358    fn test_read_range_empty_slice() {
359        let mut r = BufReader::from_slice(b"");
360        assert_eq!(r.read_range(0..0).unwrap(), b"");
361        assert_eq!(r.stream_position(), 0);
362    }
363
364    // === read_count ===
365
366    #[test]
367    fn test_read_count_all() {
368        let mut r = BufReader::from_slice(b"hello");
369        assert_eq!(r.read_count(5).unwrap(), b"hello");
370        assert_eq!(r.stream_position(), 5);
371    }
372
373    #[test]
374    fn test_read_count_partial() {
375        let mut r = BufReader::from_slice(b"hello world");
376        assert_eq!(r.read_count(5).unwrap(), b"hello");
377        assert_eq!(r.stream_position(), 5);
378        assert_eq!(r.read_count(1).unwrap(), b" ");
379        assert_eq!(r.stream_position(), 6);
380        assert_eq!(r.read_count(5).unwrap(), b"world");
381        assert_eq!(r.stream_position(), 11);
382    }
383
384    #[test]
385    fn test_read_count_beyond_end() {
386        let mut r = BufReader::from_slice(b"hello");
387        assert_eq!(r.read_count(100).unwrap(), b"hello");
388        assert_eq!(r.stream_position(), 5);
389    }
390
391    #[test]
392    fn test_read_count_zero() {
393        let mut r = BufReader::from_slice(b"hello");
394        assert_eq!(r.read_count(0).unwrap(), b"");
395        assert_eq!(r.stream_position(), 0);
396    }
397
398    #[test]
399    fn test_read_count_zero_at_middle() {
400        let mut r = BufReader::from_slice(b"hello");
401        r.seek(SeekFrom::Start(3)).unwrap();
402        assert_eq!(r.read_count(0).unwrap(), b"");
403        assert_eq!(r.stream_position(), 3);
404    }
405
406    // === read_exact_range ===
407
408    #[test]
409    fn test_read_exact_range_success() {
410        let mut r = BufReader::from_slice(b"hello world");
411        assert_eq!(r.read_exact_range(0..5).unwrap(), b"hello");
412        assert_eq!(r.stream_position(), 5);
413    }
414
415    #[test]
416    fn test_read_exact_range_beyond_end() {
417        let mut r = BufReader::from_slice(b"hello");
418        assert!(r.read_exact_range(0..100).is_err());
419    }
420
421    #[test]
422    fn test_read_exact_range_start_beyond() {
423        let mut r = BufReader::from_slice(b"hello");
424        assert!(r.read_exact_range(10..20).is_err());
425    }
426
427    #[test]
428    fn test_read_exact_range_zero_length() {
429        let mut r = BufReader::from_slice(b"hello");
430        assert_eq!(r.read_exact_range(3..3).unwrap(), b"");
431        assert_eq!(r.stream_position(), 3);
432    }
433
434    // === read_exact_count ===
435
436    #[test]
437    fn test_read_exact_count_success() {
438        let mut r = BufReader::from_slice(b"hello world");
439        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
440        assert_eq!(r.stream_position(), 5);
441    }
442
443    #[test]
444    fn test_read_exact_count_beyond_end() {
445        let mut r = BufReader::from_slice(b"hello");
446        assert!(r.read_exact_count(100).is_err());
447    }
448
449    #[test]
450    fn test_read_exact_count_exact_length() {
451        let mut r = BufReader::from_slice(b"hello");
452        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
453        assert_eq!(r.stream_position(), 5);
454    }
455
456    #[test]
457    fn test_read_exact_count_zero() {
458        let mut r = BufReader::from_slice(b"hello");
459        assert_eq!(r.read_exact_count(0).unwrap(), b"");
460        assert_eq!(r.stream_position(), 0);
461    }
462
463    // === read_exact_into ===
464
465    #[test]
466    fn test_read_exact_into_success() {
467        let mut r = BufReader::from_slice(b"hello world");
468        let mut buf = [0u8; 5];
469        r.read_exact_into(&mut buf).unwrap();
470        assert_eq!(&buf, b"hello");
471        assert_eq!(r.stream_position(), 5);
472    }
473
474    #[test]
475    fn test_read_exact_into_full() {
476        let mut r = BufReader::from_slice(b"hello");
477        let mut buf = [0u8; 5];
478        r.read_exact_into(&mut buf).unwrap();
479        assert_eq!(&buf, b"hello");
480        assert_eq!(r.stream_position(), 5);
481    }
482
483    #[test]
484    fn test_read_exact_into_error_too_large() {
485        let mut r = BufReader::from_slice(b"hello");
486        let mut buf = [0u8; 100];
487        assert!(r.read_exact_into(&mut buf).is_err());
488    }
489
490    #[test]
491    fn test_read_exact_into_empty() {
492        let mut r = BufReader::from_slice(b"hello");
493        let mut buf = [0u8; 0];
494        r.read_exact_into(&mut buf).unwrap();
495        assert_eq!(r.stream_position(), 0);
496    }
497
498    // === read_until_or_limit ===
499
500    #[test]
501    fn test_read_until_or_limit_found() {
502        let mut r = BufReader::from_slice(b"hello world");
503        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b"hello ");
504        assert_eq!(r.stream_position(), 6);
505    }
506
507    #[test]
508    fn test_read_until_or_limit_not_found() {
509        let mut r = BufReader::from_slice(b"hello");
510        assert_eq!(r.read_until_or_limit(b'x', 100).unwrap(), b"hello");
511        assert_eq!(r.stream_position(), 5);
512    }
513
514    #[test]
515    fn test_read_until_or_limit_with_limit() {
516        let mut r = BufReader::from_slice(b"hello world");
517        assert_eq!(r.read_until_or_limit(b' ', 3).unwrap(), b"hel");
518        assert_eq!(r.stream_position(), 3);
519    }
520
521    #[test]
522    fn test_read_until_or_limit_limit_zero() {
523        let mut r = BufReader::from_slice(b"hello");
524        assert_eq!(r.read_until_or_limit(b' ', 0).unwrap(), b"");
525        assert_eq!(r.stream_position(), 0);
526    }
527
528    #[test]
529    fn test_read_until_or_limit_at_start() {
530        let mut r = BufReader::from_slice(b" world");
531        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b" ");
532        assert_eq!(r.stream_position(), 1);
533    }
534
535    // === read_until_any_delim_or_limit ===
536
537    #[test]
538    fn test_read_until_any_delim_or_limit_found() {
539        let mut r = BufReader::from_slice(b"hello,world;test");
540        assert_eq!(
541            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
542            b"hello,"
543        );
544        assert_eq!(r.stream_position(), 6);
545    }
546
547    #[test]
548    fn test_read_until_any_delim_or_limit_not_found() {
549        let mut r = BufReader::from_slice(b"hello");
550        assert_eq!(
551            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
552            b"hello"
553        );
554        assert_eq!(r.stream_position(), 5);
555    }
556
557    #[test]
558    fn test_read_until_any_delim_or_limit_multiple_delims() {
559        let mut r = BufReader::from_slice(b"hello;world,test");
560        assert_eq!(
561            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
562            b"hello;"
563        );
564        assert_eq!(r.stream_position(), 6);
565    }
566
567    #[test]
568    fn test_read_until_any_delim_or_limit_with_limit() {
569        let mut r = BufReader::from_slice(b"hello,world");
570        assert_eq!(r.read_until_any_delim_or_limit(b",", 3).unwrap(), b"hel");
571        assert_eq!(r.stream_position(), 3);
572    }
573
574    #[test]
575    fn test_read_until_any_delim_or_limit_empty_delims() {
576        let mut r = BufReader::from_slice(b"hello");
577        // Empty delims means no delimiter matches, so reads until limit
578        assert_eq!(r.read_until_any_delim_or_limit(b"", 100).unwrap(), b"hello");
579        assert_eq!(r.stream_position(), 5);
580    }
581
582    // === read_while_or_limit ===
583
584    #[test]
585    fn test_read_while_or_limit_stream_pos_past_end() {
586        // stream_pos > buf.len() previously cause an OOB panic via unchecked slice indexing
587        let mut r = BufReader::from_slice(b"hello");
588        r.stream_pos = 10; // past end
589        assert_eq!(r.read_while_or_limit(|_| true, 100).unwrap(), b"");
590    }
591
592    #[test]
593    fn test_read_while_or_limit_all_match() {
594        let mut r = BufReader::from_slice(b"hello world");
595        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
596        assert_eq!(r.stream_position(), 5);
597    }
598
599    #[test]
600    fn test_read_while_or_limit_stop_at_delim() {
601        let mut r = BufReader::from_slice(b"hello world");
602        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
603        assert_eq!(r.stream_position(), 5);
604    }
605
606    #[test]
607    fn test_read_while_or_limit_with_limit() {
608        let mut r = BufReader::from_slice(b"hello world");
609        assert_eq!(r.read_while_or_limit(|b| b != b' ', 3).unwrap(), b"hel");
610        assert_eq!(r.stream_position(), 3);
611    }
612
613    #[test]
614    fn test_read_while_or_limit_limit_zero() {
615        let mut r = BufReader::from_slice(b"hello");
616        assert_eq!(r.read_while_or_limit(|b| b != b'x', 0).unwrap(), b"");
617        assert_eq!(r.stream_position(), 0);
618    }
619
620    #[test]
621    fn test_read_while_or_limit_all_match_no_delim() {
622        let mut r = BufReader::from_slice(b"hello");
623        assert_eq!(r.read_while_or_limit(|b| b != b'x', 100).unwrap(), b"hello");
624        assert_eq!(r.stream_position(), 5);
625    }
626
627    // === read_until_utf16_or_limit ===
628
629    #[test]
630    fn test_read_until_utf16_or_limit_found() {
631        // UTF-16LE encoded "ab\0\0" (a, b, null terminator)
632        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x00\x00");
633        assert_eq!(
634            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
635            b"\x61\x00\x62\x00\x00\x00"
636        );
637        assert_eq!(r.stream_position(), 6);
638    }
639
640    #[test]
641    fn test_read_until_utf16_or_limit_not_found() {
642        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00");
643        assert_eq!(
644            r.read_until_utf16_or_limit(b"\xff\xff", 100).unwrap(),
645            b"\x61\x00\x62\x00\x63\x00"
646        );
647        assert_eq!(r.stream_position(), 6);
648    }
649
650    #[test]
651    fn test_read_until_utf16_or_limit_with_limit() {
652        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00\x00\x00");
653        assert_eq!(
654            r.read_until_utf16_or_limit(b"\x00\x00", 1).unwrap(),
655            b"\x61\x00"
656        );
657        assert_eq!(r.stream_position(), 2);
658    }
659
660    #[test]
661    fn test_read_until_utf16_or_limit_odd_length() {
662        let mut r = BufReader::from_slice(b"\x61\x00\x62");
663        // Odd length: truncates to even, reads all available pairs
664        // Input has 3 bytes, truncated to 2 bytes = one utf16 char
665        // Delimiter not found, so reads all 2 bytes
666        assert_eq!(
667            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
668            b"\x61\x00"
669        );
670        assert_eq!(r.stream_position(), 2);
671    }
672
673    #[test]
674    fn test_read_until_utf16_or_limit_stream_pos_past_end() {
675        // unchecked [start..] indexing previously panicked when stream_pos > buf.len()
676        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00");
677        r.stream_pos = 10;
678        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
679    }
680
681    #[test]
682    fn test_read_until_utf16_or_limit_odd_nonzero_start() {
683        // (len - 1) was used as an absolute index; when start > 0 it would be
684        // less than start, causing a start > end panic
685        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63");
686        r.stream_pos = 2; // 3 bytes remaining (\x62\x00\x63) — odd, start != 0
687        assert_eq!(
688            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
689            b"\x62\x00"
690        );
691    }
692
693    #[test]
694    fn test_read_until_utf16_or_limit_single_byte() {
695        let mut r = BufReader::from_slice(b"\x61");
696        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
697        assert_eq!(r.stream_position(), 0);
698    }
699
700    #[test]
701    fn test_read_until_utf16_or_limit_empty() {
702        let mut r = BufReader::from_slice(b"");
703        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
704        assert_eq!(r.stream_position(), 0);
705    }
706}