pdf_rs/
sequence.rs

1use std::cmp::min;
2use crate::bytes::{count_leading_line_endings, line_ending};
3use crate::error::Result;
4use crate::error::error_kind::{EOF, SEEK_EXEED_MAX_SIZE};
5use std::fs::File;
6use std::io::{Read, Seek, SeekFrom};
7
8pub trait Sequence {
9    fn read(&mut self, buf: &mut [u8]) -> Result<usize>;
10    /// Read a line data until encounter line delimiter
11    fn read_line(&mut self) -> Result<Vec<u8>>;
12    /// Read a line data as string until encounter line delimiter
13    fn read_line_str(&mut self) -> Result<String>;
14    fn seek(&mut self, pos: u64) -> Result<u64>;
15    fn size(&self) -> Result<u64>;
16}
17
18pub struct FileSequence {
19    file: File,
20    buf: Vec<u8>,
21}
22
23impl FileSequence {
24    pub fn new(file: File) -> Self {
25        let buf = Vec::new();
26        Self { file, buf }
27    }
28
29    fn split_line_data(&mut self, index: usize) -> Vec<u8> {
30        let buf = &mut self.buf;
31        let line = buf.drain(0..index).collect::<Vec<u8>>();
32        buf.len();
33        let crlf_num = count_leading_line_endings(buf);
34        if crlf_num != 0 {
35            buf.drain(0..crlf_num as usize);
36        }
37        line
38    }
39}
40
41impl Sequence for FileSequence {
42    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
43        if !self.buf.is_empty() {
44            let len = self.buf.len();
45            let n = min(len, buf.len());
46            buf[0..n].copy_from_slice(&self.buf[0..n]);
47            self.buf.drain(0..n);
48            return Ok(n);
49        }
50        let n = self.file.read(buf)?;
51        Ok(n)
52    }
53
54    fn read_line(&mut self) -> Result<Vec<u8>> {
55        let buf = &mut self.buf;
56        let mut bytes = [0u8; 1024];
57        let mut tmp = 0;
58        loop {
59            let len = buf.len();
60            for i in tmp..len {
61                if line_ending(buf[i]) {
62                    let line_data = self.split_line_data(i);
63                    return Ok(line_data);
64                }
65            }
66            tmp = len;
67            let n = self.file.read(&mut bytes)?;
68            if n == 0 {
69                return Err(EOF.into());
70            }
71            let offset = if len == 0 {
72                count_leading_line_endings(&bytes)
73            }else {
74                0u64
75            } as usize;
76            buf.extend_from_slice(&bytes[offset..n]);
77        }
78    }
79
80    fn read_line_str(&mut self) -> Result<String> {
81        let buf = self.read_line()?;
82        let text = String::from_utf8(buf)?;
83        Ok(text)
84    }
85
86
87    fn seek(&mut self, pos: u64) -> Result<u64> {
88        if self.size()? < pos {
89            return Err(SEEK_EXEED_MAX_SIZE.into());
90        }
91        let n = self.file.seek(SeekFrom::Start(pos))?;
92        // Due to seek, the buffer is no longer valid
93        self.buf.clear();
94        Ok(n)
95    }
96
97    fn size(&self) -> Result<u64> {
98        let n = self.file.metadata()?.len();
99        Ok(n)
100    }
101}