Skip to main content

pure_magic/readers/
slice.rs

1use std::{
2    io::{self, Read, SeekFrom},
3    ops::Range,
4};
5
6use crate::readers::DataRead;
7
8/// A buffered reader for byte slices that tracks the current read position.
9///
10/// Wraps any type implementing [`AsRef<[u8]>`] and provides seeking and reading
11/// operations while maintaining an internal cursor position.
12///
13/// See [`BufReader::from_slice`] for construction.
14pub struct BufReader<S: AsRef<[u8]>> {
15    stream_pos: u64,
16    buf: S,
17}
18
19impl<S> Read for BufReader<S>
20where
21    S: AsRef<[u8]>,
22{
23    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
24        let r = self.read_count(buf.len() as u64)?;
25        for (i, b) in r.iter().enumerate() {
26            buf[i] = *b;
27        }
28        Ok(r.len())
29    }
30}
31
32impl<S> DataRead for BufReader<S>
33where
34    S: AsRef<[u8]>,
35{
36    #[inline(always)]
37    fn stream_position(&self) -> u64 {
38        self.stream_pos
39    }
40
41    #[inline]
42    fn read_range(&mut self, range: Range<u64>) -> io::Result<&[u8]> {
43        // we fix range in case we attempt at reading beyond end of file
44        let range = if range.end > self.buf.as_ref().len() as u64 {
45            range.start..self.buf.as_ref().len() as u64
46        } else {
47            range
48        };
49
50        let range_len = range.end.saturating_sub(range.start);
51
52        self.seek(SeekFrom::Start(range.end))
53            .expect("buffer seek should never fail");
54
55        if range.start >= self.buf.as_ref().len() as u64 || range_len == 0 {
56            return Ok(&[]);
57        }
58
59        Ok(&self.buf.as_ref()[range.start as usize..range.end as usize])
60    }
61
62    fn read_until_any_delim_or_limit(
63        &mut self,
64        delims: &[u8],
65        limit: u64,
66    ) -> Result<&[u8], io::Error> {
67        self._read_while_or_limit(|b| !delims.contains(&b), limit, true)
68    }
69
70    fn read_until_or_limit(&mut self, byte: u8, limit: u64) -> Result<&[u8], io::Error> {
71        self._read_while_or_limit(|b| b != byte, limit, true)
72    }
73
74    fn read_while_or_limit<F>(&mut self, f: F, limit: u64) -> Result<&[u8], io::Error>
75    where
76        F: Fn(u8) -> bool,
77    {
78        self._read_while_or_limit(f, limit, false)
79    }
80
81    fn read_until_utf16_or_limit(
82        &mut self,
83        utf16_char: &[u8; 2],
84        limit: u64,
85    ) -> Result<&[u8], io::Error> {
86        let start = self.stream_pos;
87        let mut end = 0;
88        let len = self.buf.as_ref()[start as usize..].len();
89
90        let buf = if len.is_multiple_of(2) {
91            &self.buf.as_ref()[start as usize..]
92        } else if len > 1 {
93            &self.buf.as_ref()[start as usize..(len - 1)]
94        } else {
95            return Ok(&[]);
96        };
97
98        let even = buf
99            .iter()
100            .enumerate()
101            .filter(|(i, _)| i.is_multiple_of(2))
102            .map(|t| t.1);
103
104        let odd = buf
105            .iter()
106            .enumerate()
107            .filter(|(i, _)| !i.is_multiple_of(2))
108            .map(|t| t.1);
109
110        for t in even.zip(odd) {
111            if limit.saturating_sub(end) == 0 {
112                break;
113            }
114
115            end += 2;
116
117            // tail check
118            if t.0 == &utf16_char[0] && t.1 == &utf16_char[1] {
119                // we include char
120                break;
121            }
122        }
123
124        self.read_exact_range(start..start + end)
125    }
126
127    fn data_size(&self) -> u64 {
128        self.buf.as_ref().len() as u64
129    }
130
131    #[inline(always)]
132    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
133        self.stream_pos = self.offset_from_start(pos);
134        Ok(self.stream_pos)
135    }
136}
137
138impl<S> BufReader<S>
139where
140    S: AsRef<[u8]>,
141{
142    /// Creates a new `BufReader` wrapping the provided byte slice.
143    ///
144    /// The reader's position is initialized to `0`.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use pure_magic::readers::{BufReader, DataRead};
150    ///
151    /// let reader = BufReader::from_slice(b"hello world");
152    /// assert_eq!(reader.stream_position(), 0);
153    /// ```
154    pub fn from_slice(s: S) -> Self {
155        Self {
156            stream_pos: 0,
157            buf: s,
158        }
159    }
160
161    // reads while f returns true or we reach limit
162    #[inline(always)]
163    fn _read_while_or_limit<F>(
164        &mut self,
165        f: F,
166        limit: u64,
167        include_last: bool,
168    ) -> Result<&[u8], io::Error>
169    where
170        F: Fn(u8) -> bool,
171    {
172        let start = self.stream_pos;
173        let mut end = 0;
174
175        let Some(buf) = self.buf.as_ref().get(start as usize..) else {
176            return Ok(&[]);
177        };
178
179        for b in buf {
180            if limit - end == 0 {
181                break;
182            }
183
184            if !f(*b) {
185                if include_last && end < self.data_size() {
186                    end += 1;
187                }
188                break;
189            }
190
191            end += 1;
192        }
193
194        self.read_exact_range(start..start + end)
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    // === from_slice ===
203
204    #[test]
205    fn test_from_slice() {
206        let buf = b"hello world";
207        let r = BufReader::from_slice(buf);
208        assert_eq!(r.stream_position(), 0);
209        assert_eq!(r.data_size(), buf.len() as u64);
210    }
211
212    #[test]
213    fn test_from_slice_empty() {
214        let r = BufReader::from_slice(b"");
215        assert_eq!(r.stream_position(), 0);
216        assert_eq!(r.data_size(), 0);
217    }
218
219    // === Seek impl ===
220
221    #[test]
222    fn test_seek_start() {
223        let mut r = BufReader::from_slice(b"hello world");
224        assert_eq!(r.seek(SeekFrom::Start(5)).unwrap(), 5);
225        assert_eq!(r.stream_position(), 5);
226    }
227
228    #[test]
229    fn test_seek_start_zero() {
230        let mut r = BufReader::from_slice(b"hello");
231        r.seek(SeekFrom::Start(3)).unwrap();
232        assert_eq!(r.seek(SeekFrom::Start(0)).unwrap(), 0);
233        assert_eq!(r.stream_position(), 0);
234    }
235
236    #[test]
237    fn test_seek_current() {
238        let mut r = BufReader::from_slice(b"hello world");
239        r.seek(SeekFrom::Start(5)).unwrap();
240        assert_eq!(r.seek(SeekFrom::Current(2)).unwrap(), 7);
241        assert_eq!(r.stream_position(), 7);
242    }
243
244    #[test]
245    fn test_seek_current_negative() {
246        let mut r = BufReader::from_slice(b"hello world");
247        r.seek(SeekFrom::Start(5)).unwrap();
248        assert_eq!(r.seek(SeekFrom::Current(-3)).unwrap(), 2);
249        assert_eq!(r.stream_position(), 2);
250    }
251
252    #[test]
253    fn test_seek_end() {
254        let mut r = BufReader::from_slice(b"hello world");
255        assert_eq!(r.seek(SeekFrom::End(0)).unwrap(), 11);
256        assert_eq!(r.stream_position(), 11);
257    }
258
259    #[test]
260    fn test_seek_end_negative() {
261        let mut r = BufReader::from_slice(b"hello world");
262        assert_eq!(r.seek(SeekFrom::End(-5)).unwrap(), 6);
263        assert_eq!(r.stream_position(), 6);
264    }
265
266    // === offset_from_start ===
267
268    #[test]
269    fn test_offset_from_start_start() {
270        let r = BufReader::from_slice(b"hello");
271        assert_eq!(r.offset_from_start(SeekFrom::Start(3)), 3);
272    }
273
274    #[test]
275    fn test_offset_from_start_current() {
276        let mut r = BufReader::from_slice(b"hello");
277        r.stream_pos = 5;
278        assert_eq!(r.offset_from_start(SeekFrom::Current(3)), 8);
279        assert_eq!(r.offset_from_start(SeekFrom::Current(-2)), 3);
280    }
281
282    #[test]
283    fn test_offset_from_start_end() {
284        let r = BufReader::from_slice(b"hello");
285        assert_eq!(r.offset_from_start(SeekFrom::End(0)), 5);
286        assert_eq!(r.offset_from_start(SeekFrom::End(-2)), 3);
287    }
288
289    // === stream_position ===
290
291    #[test]
292    fn test_stream_position() {
293        let mut r = BufReader::from_slice(b"hello");
294        assert_eq!(r.stream_position(), 0);
295        r.stream_pos = 3;
296        assert_eq!(r.stream_position(), 3);
297    }
298
299    // === data_size ===
300
301    #[test]
302    fn test_data_size() {
303        let r = BufReader::from_slice(b"hello world");
304        assert_eq!(r.data_size(), 11);
305    }
306
307    #[test]
308    fn test_data_size_empty() {
309        let r = BufReader::from_slice(b"");
310        assert_eq!(r.data_size(), 0);
311    }
312
313    // === read_range ===
314
315    #[test]
316    fn test_read_range_full() {
317        let mut r = BufReader::from_slice(b"hello world");
318        assert_eq!(r.read_range(0..11).unwrap(), b"hello world");
319        assert_eq!(r.stream_position(), 11);
320    }
321
322    #[test]
323    fn test_read_range_partial() {
324        let mut r = BufReader::from_slice(b"hello world");
325        assert_eq!(r.read_range(0..5).unwrap(), b"hello");
326        assert_eq!(r.stream_position(), 5);
327        assert_eq!(r.read_range(6..11).unwrap(), b"world");
328        assert_eq!(r.stream_position(), 11);
329    }
330
331    #[test]
332    fn test_read_range_beyond_end() {
333        let mut r = BufReader::from_slice(b"hello");
334        assert_eq!(r.read_range(0..100).unwrap(), b"hello");
335        assert_eq!(r.stream_position(), 5);
336    }
337
338    #[test]
339    fn test_read_range_start_beyond() {
340        let mut r = BufReader::from_slice(b"hello");
341        assert_eq!(r.read_range(100..200).unwrap(), b"");
342        assert_eq!(r.stream_position(), 5);
343    }
344
345    #[test]
346    fn test_read_range_empty() {
347        let mut r = BufReader::from_slice(b"hello");
348        assert_eq!(r.read_range(3..3).unwrap(), b"");
349        assert_eq!(r.stream_position(), 3);
350    }
351
352    #[test]
353    fn test_read_range_empty_slice() {
354        let mut r = BufReader::from_slice(b"");
355        assert_eq!(r.read_range(0..0).unwrap(), b"");
356        assert_eq!(r.stream_position(), 0);
357    }
358
359    // === read_count ===
360
361    #[test]
362    fn test_read_count_all() {
363        let mut r = BufReader::from_slice(b"hello");
364        assert_eq!(r.read_count(5).unwrap(), b"hello");
365        assert_eq!(r.stream_position(), 5);
366    }
367
368    #[test]
369    fn test_read_count_partial() {
370        let mut r = BufReader::from_slice(b"hello world");
371        assert_eq!(r.read_count(5).unwrap(), b"hello");
372        assert_eq!(r.stream_position(), 5);
373        assert_eq!(r.read_count(1).unwrap(), b" ");
374        assert_eq!(r.stream_position(), 6);
375        assert_eq!(r.read_count(5).unwrap(), b"world");
376        assert_eq!(r.stream_position(), 11);
377    }
378
379    #[test]
380    fn test_read_count_beyond_end() {
381        let mut r = BufReader::from_slice(b"hello");
382        assert_eq!(r.read_count(100).unwrap(), b"hello");
383        assert_eq!(r.stream_position(), 5);
384    }
385
386    #[test]
387    fn test_read_count_zero() {
388        let mut r = BufReader::from_slice(b"hello");
389        assert_eq!(r.read_count(0).unwrap(), b"");
390        assert_eq!(r.stream_position(), 0);
391    }
392
393    #[test]
394    fn test_read_count_zero_at_middle() {
395        let mut r = BufReader::from_slice(b"hello");
396        r.seek(SeekFrom::Start(3)).unwrap();
397        assert_eq!(r.read_count(0).unwrap(), b"");
398        assert_eq!(r.stream_position(), 3);
399    }
400
401    // === read_exact_range ===
402
403    #[test]
404    fn test_read_exact_range_success() {
405        let mut r = BufReader::from_slice(b"hello world");
406        assert_eq!(r.read_exact_range(0..5).unwrap(), b"hello");
407        assert_eq!(r.stream_position(), 5);
408    }
409
410    #[test]
411    fn test_read_exact_range_beyond_end() {
412        let mut r = BufReader::from_slice(b"hello");
413        assert!(r.read_exact_range(0..100).is_err());
414    }
415
416    #[test]
417    fn test_read_exact_range_start_beyond() {
418        let mut r = BufReader::from_slice(b"hello");
419        assert!(r.read_exact_range(10..20).is_err());
420    }
421
422    #[test]
423    fn test_read_exact_range_zero_length() {
424        let mut r = BufReader::from_slice(b"hello");
425        assert_eq!(r.read_exact_range(3..3).unwrap(), b"");
426        assert_eq!(r.stream_position(), 3);
427    }
428
429    // === read_exact_count ===
430
431    #[test]
432    fn test_read_exact_count_success() {
433        let mut r = BufReader::from_slice(b"hello world");
434        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
435        assert_eq!(r.stream_position(), 5);
436    }
437
438    #[test]
439    fn test_read_exact_count_beyond_end() {
440        let mut r = BufReader::from_slice(b"hello");
441        assert!(r.read_exact_count(100).is_err());
442    }
443
444    #[test]
445    fn test_read_exact_count_exact_length() {
446        let mut r = BufReader::from_slice(b"hello");
447        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
448        assert_eq!(r.stream_position(), 5);
449    }
450
451    #[test]
452    fn test_read_exact_count_zero() {
453        let mut r = BufReader::from_slice(b"hello");
454        assert_eq!(r.read_exact_count(0).unwrap(), b"");
455        assert_eq!(r.stream_position(), 0);
456    }
457
458    // === read_exact_into ===
459
460    #[test]
461    fn test_read_exact_into_success() {
462        let mut r = BufReader::from_slice(b"hello world");
463        let mut buf = [0u8; 5];
464        r.read_exact_into(&mut buf).unwrap();
465        assert_eq!(&buf, b"hello");
466        assert_eq!(r.stream_position(), 5);
467    }
468
469    #[test]
470    fn test_read_exact_into_full() {
471        let mut r = BufReader::from_slice(b"hello");
472        let mut buf = [0u8; 5];
473        r.read_exact_into(&mut buf).unwrap();
474        assert_eq!(&buf, b"hello");
475        assert_eq!(r.stream_position(), 5);
476    }
477
478    #[test]
479    fn test_read_exact_into_error_too_large() {
480        let mut r = BufReader::from_slice(b"hello");
481        let mut buf = [0u8; 100];
482        assert!(r.read_exact_into(&mut buf).is_err());
483    }
484
485    #[test]
486    fn test_read_exact_into_empty() {
487        let mut r = BufReader::from_slice(b"hello");
488        let mut buf = [0u8; 0];
489        r.read_exact_into(&mut buf).unwrap();
490        assert_eq!(r.stream_position(), 0);
491    }
492
493    // === read_until_or_limit ===
494
495    #[test]
496    fn test_read_until_or_limit_found() {
497        let mut r = BufReader::from_slice(b"hello world");
498        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b"hello ");
499        assert_eq!(r.stream_position(), 6);
500    }
501
502    #[test]
503    fn test_read_until_or_limit_not_found() {
504        let mut r = BufReader::from_slice(b"hello");
505        assert_eq!(r.read_until_or_limit(b'x', 100).unwrap(), b"hello");
506        assert_eq!(r.stream_position(), 5);
507    }
508
509    #[test]
510    fn test_read_until_or_limit_with_limit() {
511        let mut r = BufReader::from_slice(b"hello world");
512        assert_eq!(r.read_until_or_limit(b' ', 3).unwrap(), b"hel");
513        assert_eq!(r.stream_position(), 3);
514    }
515
516    #[test]
517    fn test_read_until_or_limit_limit_zero() {
518        let mut r = BufReader::from_slice(b"hello");
519        assert_eq!(r.read_until_or_limit(b' ', 0).unwrap(), b"");
520        assert_eq!(r.stream_position(), 0);
521    }
522
523    #[test]
524    fn test_read_until_or_limit_at_start() {
525        let mut r = BufReader::from_slice(b" world");
526        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b" ");
527        assert_eq!(r.stream_position(), 1);
528    }
529
530    // === read_until_any_delim_or_limit ===
531
532    #[test]
533    fn test_read_until_any_delim_or_limit_found() {
534        let mut r = BufReader::from_slice(b"hello,world;test");
535        assert_eq!(
536            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
537            b"hello,"
538        );
539        assert_eq!(r.stream_position(), 6);
540    }
541
542    #[test]
543    fn test_read_until_any_delim_or_limit_not_found() {
544        let mut r = BufReader::from_slice(b"hello");
545        assert_eq!(
546            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
547            b"hello"
548        );
549        assert_eq!(r.stream_position(), 5);
550    }
551
552    #[test]
553    fn test_read_until_any_delim_or_limit_multiple_delims() {
554        let mut r = BufReader::from_slice(b"hello;world,test");
555        assert_eq!(
556            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
557            b"hello;"
558        );
559        assert_eq!(r.stream_position(), 6);
560    }
561
562    #[test]
563    fn test_read_until_any_delim_or_limit_with_limit() {
564        let mut r = BufReader::from_slice(b"hello,world");
565        assert_eq!(r.read_until_any_delim_or_limit(b",", 3).unwrap(), b"hel");
566        assert_eq!(r.stream_position(), 3);
567    }
568
569    #[test]
570    fn test_read_until_any_delim_or_limit_empty_delims() {
571        let mut r = BufReader::from_slice(b"hello");
572        // Empty delims means no delimiter matches, so reads until limit
573        assert_eq!(r.read_until_any_delim_or_limit(b"", 100).unwrap(), b"hello");
574        assert_eq!(r.stream_position(), 5);
575    }
576
577    // === read_while_or_limit ===
578
579    #[test]
580    fn test_read_while_or_limit_stream_pos_past_end() {
581        // stream_pos > buf.len() previously cause an OOB panic via unchecked slice indexing
582        let mut r = BufReader::from_slice(b"hello");
583        r.stream_pos = 10; // past end
584        assert_eq!(r.read_while_or_limit(|_| true, 100).unwrap(), b"");
585    }
586
587    #[test]
588    fn test_read_while_or_limit_all_match() {
589        let mut r = BufReader::from_slice(b"hello world");
590        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
591        assert_eq!(r.stream_position(), 5);
592    }
593
594    #[test]
595    fn test_read_while_or_limit_stop_at_delim() {
596        let mut r = BufReader::from_slice(b"hello world");
597        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
598        assert_eq!(r.stream_position(), 5);
599    }
600
601    #[test]
602    fn test_read_while_or_limit_with_limit() {
603        let mut r = BufReader::from_slice(b"hello world");
604        assert_eq!(r.read_while_or_limit(|b| b != b' ', 3).unwrap(), b"hel");
605        assert_eq!(r.stream_position(), 3);
606    }
607
608    #[test]
609    fn test_read_while_or_limit_limit_zero() {
610        let mut r = BufReader::from_slice(b"hello");
611        assert_eq!(r.read_while_or_limit(|b| b != b'x', 0).unwrap(), b"");
612        assert_eq!(r.stream_position(), 0);
613    }
614
615    #[test]
616    fn test_read_while_or_limit_all_match_no_delim() {
617        let mut r = BufReader::from_slice(b"hello");
618        assert_eq!(r.read_while_or_limit(|b| b != b'x', 100).unwrap(), b"hello");
619        assert_eq!(r.stream_position(), 5);
620    }
621
622    // === read_until_utf16_or_limit ===
623
624    #[test]
625    fn test_read_until_utf16_or_limit_found() {
626        // UTF-16LE encoded "ab\0\0" (a, b, null terminator)
627        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x00\x00");
628        assert_eq!(
629            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
630            b"\x61\x00\x62\x00\x00\x00"
631        );
632        assert_eq!(r.stream_position(), 6);
633    }
634
635    #[test]
636    fn test_read_until_utf16_or_limit_not_found() {
637        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00");
638        assert_eq!(
639            r.read_until_utf16_or_limit(b"\xff\xff", 100).unwrap(),
640            b"\x61\x00\x62\x00\x63\x00"
641        );
642        assert_eq!(r.stream_position(), 6);
643    }
644
645    #[test]
646    fn test_read_until_utf16_or_limit_with_limit() {
647        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00\x00\x00");
648        assert_eq!(
649            r.read_until_utf16_or_limit(b"\x00\x00", 1).unwrap(),
650            b"\x61\x00"
651        );
652        assert_eq!(r.stream_position(), 2);
653    }
654
655    #[test]
656    fn test_read_until_utf16_or_limit_odd_length() {
657        let mut r = BufReader::from_slice(b"\x61\x00\x62");
658        // Odd length: truncates to even, reads all available pairs
659        // Input has 3 bytes, truncated to 2 bytes = one utf16 char
660        // Delimiter not found, so reads all 2 bytes
661        assert_eq!(
662            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
663            b"\x61\x00"
664        );
665        assert_eq!(r.stream_position(), 2);
666    }
667
668    #[test]
669    fn test_read_until_utf16_or_limit_single_byte() {
670        let mut r = BufReader::from_slice(b"\x61");
671        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
672        assert_eq!(r.stream_position(), 0);
673    }
674
675    #[test]
676    fn test_read_until_utf16_or_limit_empty() {
677        let mut r = BufReader::from_slice(b"");
678        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
679        assert_eq!(r.stream_position(), 0);
680    }
681}