pdf_rs/
sequence.rs

1use std::cmp::min;
2use crate::utils::{count_leading_line_endings, line_ending};
3use crate::error::{PDFError, Result};
4use std::fs::File;
5use std::io::{Read, Seek, SeekFrom};
6
7pub trait Sequence {
8    fn read(&mut self, buf: &mut [u8]) -> Result<usize>;
9    /// Read a line data until encounter line delimiter
10    fn read_line(&mut self) -> Result<Vec<u8>>;
11    /// Read a line data as string until encounter line delimiter
12    fn read_line_str(&mut self) -> Result<String>;
13    fn seek(&mut self, pos: u64) -> Result<u64>;
14    fn size(&self) -> Result<u64>;
15}
16
17pub struct FileSequence {
18    file: File,
19    buf: Vec<u8>,
20}
21
22impl FileSequence {
23    pub fn new(file: File) -> Self {
24        let buf = Vec::new();
25        Self { file, buf }
26    }
27
28    fn split_line_data(&mut self, index: usize) -> Vec<u8> {
29        let buf = &mut self.buf;
30        let line = buf.drain(0..index).collect::<Vec<u8>>();
31        buf.len();
32        let crlf_num = count_leading_line_endings(buf);
33        if crlf_num != 0 {
34            buf.drain(0..crlf_num as usize);
35        }
36        line
37    }
38}
39
40impl Sequence for FileSequence {
41    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
42        if !self.buf.is_empty() {
43            let len = self.buf.len();
44            let n = min(len, buf.len());
45            buf[0..n].copy_from_slice(&self.buf[0..n]);
46            self.buf.drain(0..n);
47            return Ok(n);
48        }
49        let n = self.file.read(buf)?;
50        Ok(n)
51    }
52
53    fn read_line(&mut self) -> Result<Vec<u8>> {
54        let buf = &mut self.buf;
55        let mut bytes = [0u8; 1024];
56        let mut tmp = 0;
57        loop {
58            let len = buf.len();
59            for i in tmp..len {
60                if line_ending(buf[i]) {
61                    let line_data = self.split_line_data(i);
62                    return Ok(line_data);
63                }
64            }
65            tmp = len;
66            let n = self.file.read(&mut bytes)?;
67            if n == 0 {
68                return Err(PDFError::EOFError);
69            }
70            let offset = if len == 0 {
71                count_leading_line_endings(&bytes)
72            }else {
73                0u64
74            } as usize;
75            buf.extend_from_slice(&bytes[offset..n]);
76        }
77    }
78
79    fn read_line_str(&mut self) -> Result<String> {
80        let buf = self.read_line()?;
81        let text = String::from_utf8(buf)?;
82        Ok(text)
83    }
84
85
86    fn seek(&mut self, pos: u64) -> Result<u64> {
87        if self.size()? < pos {
88            return Err(PDFError::SeekExceedError);
89        }
90        let n = self.file.seek(SeekFrom::Start(pos))?;
91        // Due to seek, the buffer is no longer valid
92        self.buf.clear();
93        Ok(n)
94    }
95
96    fn size(&self) -> Result<u64> {
97        let n = self.file.metadata()?.len();
98        Ok(n)
99    }
100}