Skip to main content

pure_magic/readers/
slice.rs

1use std::{
2    io::{self, Read, SeekFrom},
3    ops::Range,
4};
5
6use crate::readers::DataRead;
7
8/// A buffered reader for byte slices that tracks the current read position.
9///
10/// Wraps any type implementing [`AsRef<[u8]>`] and provides seeking and reading
11/// operations while maintaining an internal cursor position.
12///
13/// See [`BufReader::from_slice`] for construction.
14pub struct BufReader<S: AsRef<[u8]>> {
15    stream_pos: u64,
16    buf: S,
17}
18
19impl<S> Read for BufReader<S>
20where
21    S: AsRef<[u8]>,
22{
23    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
24        let r = self.read_count(buf.len() as u64)?;
25        for (i, b) in r.iter().enumerate() {
26            buf[i] = *b;
27        }
28        Ok(r.len())
29    }
30}
31
32impl<S> DataRead for BufReader<S>
33where
34    S: AsRef<[u8]>,
35{
36    #[inline(always)]
37    fn stream_position(&self) -> u64 {
38        self.stream_pos
39    }
40
41    #[inline]
42    fn read_range(&mut self, range: Range<u64>) -> io::Result<&[u8]> {
43        // we fix range in case we attempt at reading beyond end of file
44        let range = if range.end > self.buf.as_ref().len() as u64 {
45            range.start..self.buf.as_ref().len() as u64
46        } else {
47            range
48        };
49
50        let range_len = range.end.saturating_sub(range.start);
51
52        self.seek(SeekFrom::Start(range.end))
53            .expect("buffer seek should never fail");
54
55        if range.start >= self.buf.as_ref().len() as u64 || range_len == 0 {
56            return Ok(&[]);
57        }
58
59        Ok(&self.buf.as_ref()[range.start as usize..range.end as usize])
60    }
61
62    fn read_until_any_delim_or_limit(
63        &mut self,
64        delims: &[u8],
65        limit: u64,
66    ) -> Result<&[u8], io::Error> {
67        self._read_while_or_limit(|b| !delims.contains(&b), limit, true)
68    }
69
70    fn read_until_or_limit(&mut self, byte: u8, limit: u64) -> Result<&[u8], io::Error> {
71        self._read_while_or_limit(|b| b != byte, limit, true)
72    }
73
74    fn read_while_or_limit<F>(&mut self, f: F, limit: u64) -> Result<&[u8], io::Error>
75    where
76        F: Fn(u8) -> bool,
77    {
78        self._read_while_or_limit(f, limit, false)
79    }
80
81    fn read_until_utf16_or_limit(
82        &mut self,
83        utf16_char: &[u8; 2],
84        limit: u64,
85    ) -> Result<&[u8], io::Error> {
86        let start = self.stream_pos;
87        let mut end = 0;
88        let len = self.buf.as_ref()[start as usize..].len();
89
90        let buf = if len.is_multiple_of(2) {
91            &self.buf.as_ref()[start as usize..]
92        } else if len > 1 {
93            &self.buf.as_ref()[start as usize..(len - 1)]
94        } else {
95            return Ok(&[]);
96        };
97
98        let even = buf
99            .iter()
100            .enumerate()
101            .filter(|(i, _)| i.is_multiple_of(2))
102            .map(|t| t.1);
103
104        let odd = buf
105            .iter()
106            .enumerate()
107            .filter(|(i, _)| !i.is_multiple_of(2))
108            .map(|t| t.1);
109
110        for t in even.zip(odd) {
111            if limit.saturating_sub(end) == 0 {
112                break;
113            }
114
115            end += 2;
116
117            // tail check
118            if t.0 == &utf16_char[0] && t.1 == &utf16_char[1] {
119                // we include char
120                break;
121            }
122        }
123
124        self.read_exact_range(start..start + end)
125    }
126
127    fn data_size(&self) -> u64 {
128        self.buf.as_ref().len() as u64
129    }
130
131    #[inline(always)]
132    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
133        self.stream_pos = self.offset_from_start(pos);
134        Ok(self.stream_pos)
135    }
136}
137
138impl<S> BufReader<S>
139where
140    S: AsRef<[u8]>,
141{
142    /// Creates a new `BufReader` wrapping the provided byte slice.
143    ///
144    /// The reader's position is initialized to `0`.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use pure_magic::readers::{BufReader, DataRead};
150    ///
151    /// let reader = BufReader::from_slice(b"hello world");
152    /// assert_eq!(reader.stream_position(), 0);
153    /// ```
154    pub fn from_slice(s: S) -> Self {
155        Self {
156            stream_pos: 0,
157            buf: s,
158        }
159    }
160
161    // reads while f returns true or we reach limit
162    #[inline(always)]
163    fn _read_while_or_limit<F>(
164        &mut self,
165        f: F,
166        limit: u64,
167        include_last: bool,
168    ) -> Result<&[u8], io::Error>
169    where
170        F: Fn(u8) -> bool,
171    {
172        let start = self.stream_pos;
173        let mut end = 0;
174
175        for b in &self.buf.as_ref()[start as usize..] {
176            if limit - end == 0 {
177                break;
178            }
179
180            if !f(*b) {
181                if include_last && end < self.data_size() {
182                    end += 1;
183                }
184                break;
185            }
186
187            end += 1;
188        }
189
190        self.read_exact_range(start..start + end)
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    // === from_slice ===
199
200    #[test]
201    fn test_from_slice() {
202        let buf = b"hello world";
203        let r = BufReader::from_slice(buf);
204        assert_eq!(r.stream_position(), 0);
205        assert_eq!(r.data_size(), buf.len() as u64);
206    }
207
208    #[test]
209    fn test_from_slice_empty() {
210        let r = BufReader::from_slice(b"");
211        assert_eq!(r.stream_position(), 0);
212        assert_eq!(r.data_size(), 0);
213    }
214
215    // === Seek impl ===
216
217    #[test]
218    fn test_seek_start() {
219        let mut r = BufReader::from_slice(b"hello world");
220        assert_eq!(r.seek(SeekFrom::Start(5)).unwrap(), 5);
221        assert_eq!(r.stream_position(), 5);
222    }
223
224    #[test]
225    fn test_seek_start_zero() {
226        let mut r = BufReader::from_slice(b"hello");
227        r.seek(SeekFrom::Start(3)).unwrap();
228        assert_eq!(r.seek(SeekFrom::Start(0)).unwrap(), 0);
229        assert_eq!(r.stream_position(), 0);
230    }
231
232    #[test]
233    fn test_seek_current() {
234        let mut r = BufReader::from_slice(b"hello world");
235        r.seek(SeekFrom::Start(5)).unwrap();
236        assert_eq!(r.seek(SeekFrom::Current(2)).unwrap(), 7);
237        assert_eq!(r.stream_position(), 7);
238    }
239
240    #[test]
241    fn test_seek_current_negative() {
242        let mut r = BufReader::from_slice(b"hello world");
243        r.seek(SeekFrom::Start(5)).unwrap();
244        assert_eq!(r.seek(SeekFrom::Current(-3)).unwrap(), 2);
245        assert_eq!(r.stream_position(), 2);
246    }
247
248    #[test]
249    fn test_seek_end() {
250        let mut r = BufReader::from_slice(b"hello world");
251        assert_eq!(r.seek(SeekFrom::End(0)).unwrap(), 11);
252        assert_eq!(r.stream_position(), 11);
253    }
254
255    #[test]
256    fn test_seek_end_negative() {
257        let mut r = BufReader::from_slice(b"hello world");
258        assert_eq!(r.seek(SeekFrom::End(-5)).unwrap(), 6);
259        assert_eq!(r.stream_position(), 6);
260    }
261
262    // === offset_from_start ===
263
264    #[test]
265    fn test_offset_from_start_start() {
266        let r = BufReader::from_slice(b"hello");
267        assert_eq!(r.offset_from_start(SeekFrom::Start(3)), 3);
268    }
269
270    #[test]
271    fn test_offset_from_start_current() {
272        let mut r = BufReader::from_slice(b"hello");
273        r.stream_pos = 5;
274        assert_eq!(r.offset_from_start(SeekFrom::Current(3)), 8);
275        assert_eq!(r.offset_from_start(SeekFrom::Current(-2)), 3);
276    }
277
278    #[test]
279    fn test_offset_from_start_end() {
280        let r = BufReader::from_slice(b"hello");
281        assert_eq!(r.offset_from_start(SeekFrom::End(0)), 5);
282        assert_eq!(r.offset_from_start(SeekFrom::End(-2)), 3);
283    }
284
285    // === stream_position ===
286
287    #[test]
288    fn test_stream_position() {
289        let mut r = BufReader::from_slice(b"hello");
290        assert_eq!(r.stream_position(), 0);
291        r.stream_pos = 3;
292        assert_eq!(r.stream_position(), 3);
293    }
294
295    // === data_size ===
296
297    #[test]
298    fn test_data_size() {
299        let r = BufReader::from_slice(b"hello world");
300        assert_eq!(r.data_size(), 11);
301    }
302
303    #[test]
304    fn test_data_size_empty() {
305        let r = BufReader::from_slice(b"");
306        assert_eq!(r.data_size(), 0);
307    }
308
309    // === read_range ===
310
311    #[test]
312    fn test_read_range_full() {
313        let mut r = BufReader::from_slice(b"hello world");
314        assert_eq!(r.read_range(0..11).unwrap(), b"hello world");
315        assert_eq!(r.stream_position(), 11);
316    }
317
318    #[test]
319    fn test_read_range_partial() {
320        let mut r = BufReader::from_slice(b"hello world");
321        assert_eq!(r.read_range(0..5).unwrap(), b"hello");
322        assert_eq!(r.stream_position(), 5);
323        assert_eq!(r.read_range(6..11).unwrap(), b"world");
324        assert_eq!(r.stream_position(), 11);
325    }
326
327    #[test]
328    fn test_read_range_beyond_end() {
329        let mut r = BufReader::from_slice(b"hello");
330        assert_eq!(r.read_range(0..100).unwrap(), b"hello");
331        assert_eq!(r.stream_position(), 5);
332    }
333
334    #[test]
335    fn test_read_range_start_beyond() {
336        let mut r = BufReader::from_slice(b"hello");
337        assert_eq!(r.read_range(100..200).unwrap(), b"");
338        assert_eq!(r.stream_position(), 5);
339    }
340
341    #[test]
342    fn test_read_range_empty() {
343        let mut r = BufReader::from_slice(b"hello");
344        assert_eq!(r.read_range(3..3).unwrap(), b"");
345        assert_eq!(r.stream_position(), 3);
346    }
347
348    #[test]
349    fn test_read_range_empty_slice() {
350        let mut r = BufReader::from_slice(b"");
351        assert_eq!(r.read_range(0..0).unwrap(), b"");
352        assert_eq!(r.stream_position(), 0);
353    }
354
355    // === read_count ===
356
357    #[test]
358    fn test_read_count_all() {
359        let mut r = BufReader::from_slice(b"hello");
360        assert_eq!(r.read_count(5).unwrap(), b"hello");
361        assert_eq!(r.stream_position(), 5);
362    }
363
364    #[test]
365    fn test_read_count_partial() {
366        let mut r = BufReader::from_slice(b"hello world");
367        assert_eq!(r.read_count(5).unwrap(), b"hello");
368        assert_eq!(r.stream_position(), 5);
369        assert_eq!(r.read_count(1).unwrap(), b" ");
370        assert_eq!(r.stream_position(), 6);
371        assert_eq!(r.read_count(5).unwrap(), b"world");
372        assert_eq!(r.stream_position(), 11);
373    }
374
375    #[test]
376    fn test_read_count_beyond_end() {
377        let mut r = BufReader::from_slice(b"hello");
378        assert_eq!(r.read_count(100).unwrap(), b"hello");
379        assert_eq!(r.stream_position(), 5);
380    }
381
382    #[test]
383    fn test_read_count_zero() {
384        let mut r = BufReader::from_slice(b"hello");
385        assert_eq!(r.read_count(0).unwrap(), b"");
386        assert_eq!(r.stream_position(), 0);
387    }
388
389    #[test]
390    fn test_read_count_zero_at_middle() {
391        let mut r = BufReader::from_slice(b"hello");
392        r.seek(SeekFrom::Start(3)).unwrap();
393        assert_eq!(r.read_count(0).unwrap(), b"");
394        assert_eq!(r.stream_position(), 3);
395    }
396
397    // === read_exact_range ===
398
399    #[test]
400    fn test_read_exact_range_success() {
401        let mut r = BufReader::from_slice(b"hello world");
402        assert_eq!(r.read_exact_range(0..5).unwrap(), b"hello");
403        assert_eq!(r.stream_position(), 5);
404    }
405
406    #[test]
407    fn test_read_exact_range_beyond_end() {
408        let mut r = BufReader::from_slice(b"hello");
409        assert!(r.read_exact_range(0..100).is_err());
410    }
411
412    #[test]
413    fn test_read_exact_range_start_beyond() {
414        let mut r = BufReader::from_slice(b"hello");
415        assert!(r.read_exact_range(10..20).is_err());
416    }
417
418    #[test]
419    fn test_read_exact_range_zero_length() {
420        let mut r = BufReader::from_slice(b"hello");
421        assert_eq!(r.read_exact_range(3..3).unwrap(), b"");
422        assert_eq!(r.stream_position(), 3);
423    }
424
425    // === read_exact_count ===
426
427    #[test]
428    fn test_read_exact_count_success() {
429        let mut r = BufReader::from_slice(b"hello world");
430        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
431        assert_eq!(r.stream_position(), 5);
432    }
433
434    #[test]
435    fn test_read_exact_count_beyond_end() {
436        let mut r = BufReader::from_slice(b"hello");
437        assert!(r.read_exact_count(100).is_err());
438    }
439
440    #[test]
441    fn test_read_exact_count_exact_length() {
442        let mut r = BufReader::from_slice(b"hello");
443        assert_eq!(r.read_exact_count(5).unwrap(), b"hello");
444        assert_eq!(r.stream_position(), 5);
445    }
446
447    #[test]
448    fn test_read_exact_count_zero() {
449        let mut r = BufReader::from_slice(b"hello");
450        assert_eq!(r.read_exact_count(0).unwrap(), b"");
451        assert_eq!(r.stream_position(), 0);
452    }
453
454    // === read_exact_into ===
455
456    #[test]
457    fn test_read_exact_into_success() {
458        let mut r = BufReader::from_slice(b"hello world");
459        let mut buf = [0u8; 5];
460        r.read_exact_into(&mut buf).unwrap();
461        assert_eq!(&buf, b"hello");
462        assert_eq!(r.stream_position(), 5);
463    }
464
465    #[test]
466    fn test_read_exact_into_full() {
467        let mut r = BufReader::from_slice(b"hello");
468        let mut buf = [0u8; 5];
469        r.read_exact_into(&mut buf).unwrap();
470        assert_eq!(&buf, b"hello");
471        assert_eq!(r.stream_position(), 5);
472    }
473
474    #[test]
475    fn test_read_exact_into_error_too_large() {
476        let mut r = BufReader::from_slice(b"hello");
477        let mut buf = [0u8; 100];
478        assert!(r.read_exact_into(&mut buf).is_err());
479    }
480
481    #[test]
482    fn test_read_exact_into_empty() {
483        let mut r = BufReader::from_slice(b"hello");
484        let mut buf = [0u8; 0];
485        r.read_exact_into(&mut buf).unwrap();
486        assert_eq!(r.stream_position(), 0);
487    }
488
489    // === read_until_or_limit ===
490
491    #[test]
492    fn test_read_until_or_limit_found() {
493        let mut r = BufReader::from_slice(b"hello world");
494        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b"hello ");
495        assert_eq!(r.stream_position(), 6);
496    }
497
498    #[test]
499    fn test_read_until_or_limit_not_found() {
500        let mut r = BufReader::from_slice(b"hello");
501        assert_eq!(r.read_until_or_limit(b'x', 100).unwrap(), b"hello");
502        assert_eq!(r.stream_position(), 5);
503    }
504
505    #[test]
506    fn test_read_until_or_limit_with_limit() {
507        let mut r = BufReader::from_slice(b"hello world");
508        assert_eq!(r.read_until_or_limit(b' ', 3).unwrap(), b"hel");
509        assert_eq!(r.stream_position(), 3);
510    }
511
512    #[test]
513    fn test_read_until_or_limit_limit_zero() {
514        let mut r = BufReader::from_slice(b"hello");
515        assert_eq!(r.read_until_or_limit(b' ', 0).unwrap(), b"");
516        assert_eq!(r.stream_position(), 0);
517    }
518
519    #[test]
520    fn test_read_until_or_limit_at_start() {
521        let mut r = BufReader::from_slice(b" world");
522        assert_eq!(r.read_until_or_limit(b' ', 100).unwrap(), b" ");
523        assert_eq!(r.stream_position(), 1);
524    }
525
526    // === read_until_any_delim_or_limit ===
527
528    #[test]
529    fn test_read_until_any_delim_or_limit_found() {
530        let mut r = BufReader::from_slice(b"hello,world;test");
531        assert_eq!(
532            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
533            b"hello,"
534        );
535        assert_eq!(r.stream_position(), 6);
536    }
537
538    #[test]
539    fn test_read_until_any_delim_or_limit_not_found() {
540        let mut r = BufReader::from_slice(b"hello");
541        assert_eq!(
542            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
543            b"hello"
544        );
545        assert_eq!(r.stream_position(), 5);
546    }
547
548    #[test]
549    fn test_read_until_any_delim_or_limit_multiple_delims() {
550        let mut r = BufReader::from_slice(b"hello;world,test");
551        assert_eq!(
552            r.read_until_any_delim_or_limit(b",;", 100).unwrap(),
553            b"hello;"
554        );
555        assert_eq!(r.stream_position(), 6);
556    }
557
558    #[test]
559    fn test_read_until_any_delim_or_limit_with_limit() {
560        let mut r = BufReader::from_slice(b"hello,world");
561        assert_eq!(r.read_until_any_delim_or_limit(b",", 3).unwrap(), b"hel");
562        assert_eq!(r.stream_position(), 3);
563    }
564
565    #[test]
566    fn test_read_until_any_delim_or_limit_empty_delims() {
567        let mut r = BufReader::from_slice(b"hello");
568        // Empty delims means no delimiter matches, so reads until limit
569        assert_eq!(r.read_until_any_delim_or_limit(b"", 100).unwrap(), b"hello");
570        assert_eq!(r.stream_position(), 5);
571    }
572
573    // === read_while_or_limit ===
574
575    #[test]
576    fn test_read_while_or_limit_all_match() {
577        let mut r = BufReader::from_slice(b"hello world");
578        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
579        assert_eq!(r.stream_position(), 5);
580    }
581
582    #[test]
583    fn test_read_while_or_limit_stop_at_delim() {
584        let mut r = BufReader::from_slice(b"hello world");
585        assert_eq!(r.read_while_or_limit(|b| b != b' ', 100).unwrap(), b"hello");
586        assert_eq!(r.stream_position(), 5);
587    }
588
589    #[test]
590    fn test_read_while_or_limit_with_limit() {
591        let mut r = BufReader::from_slice(b"hello world");
592        assert_eq!(r.read_while_or_limit(|b| b != b' ', 3).unwrap(), b"hel");
593        assert_eq!(r.stream_position(), 3);
594    }
595
596    #[test]
597    fn test_read_while_or_limit_limit_zero() {
598        let mut r = BufReader::from_slice(b"hello");
599        assert_eq!(r.read_while_or_limit(|b| b != b'x', 0).unwrap(), b"");
600        assert_eq!(r.stream_position(), 0);
601    }
602
603    #[test]
604    fn test_read_while_or_limit_all_match_no_delim() {
605        let mut r = BufReader::from_slice(b"hello");
606        assert_eq!(r.read_while_or_limit(|b| b != b'x', 100).unwrap(), b"hello");
607        assert_eq!(r.stream_position(), 5);
608    }
609
610    // === read_until_utf16_or_limit ===
611
612    #[test]
613    fn test_read_until_utf16_or_limit_found() {
614        // UTF-16LE encoded "ab\0\0" (a, b, null terminator)
615        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x00\x00");
616        assert_eq!(
617            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
618            b"\x61\x00\x62\x00\x00\x00"
619        );
620        assert_eq!(r.stream_position(), 6);
621    }
622
623    #[test]
624    fn test_read_until_utf16_or_limit_not_found() {
625        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00");
626        assert_eq!(
627            r.read_until_utf16_or_limit(b"\xff\xff", 100).unwrap(),
628            b"\x61\x00\x62\x00\x63\x00"
629        );
630        assert_eq!(r.stream_position(), 6);
631    }
632
633    #[test]
634    fn test_read_until_utf16_or_limit_with_limit() {
635        let mut r = BufReader::from_slice(b"\x61\x00\x62\x00\x63\x00\x00\x00");
636        assert_eq!(
637            r.read_until_utf16_or_limit(b"\x00\x00", 1).unwrap(),
638            b"\x61\x00"
639        );
640        assert_eq!(r.stream_position(), 2);
641    }
642
643    #[test]
644    fn test_read_until_utf16_or_limit_odd_length() {
645        let mut r = BufReader::from_slice(b"\x61\x00\x62");
646        // Odd length: truncates to even, reads all available pairs
647        // Input has 3 bytes, truncated to 2 bytes = one utf16 char
648        // Delimiter not found, so reads all 2 bytes
649        assert_eq!(
650            r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(),
651            b"\x61\x00"
652        );
653        assert_eq!(r.stream_position(), 2);
654    }
655
656    #[test]
657    fn test_read_until_utf16_or_limit_single_byte() {
658        let mut r = BufReader::from_slice(b"\x61");
659        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
660        assert_eq!(r.stream_position(), 0);
661    }
662
663    #[test]
664    fn test_read_until_utf16_or_limit_empty() {
665        let mut r = BufReader::from_slice(b"");
666        assert_eq!(r.read_until_utf16_or_limit(b"\x00\x00", 100).unwrap(), b"");
667        assert_eq!(r.stream_position(), 0);
668    }
669}