oxidize_pdf/parser/
header.rs1use super::{ParseError, ParseResult};
6use std::io::{BufRead, BufReader, Read};
7
8#[derive(Debug, Clone, PartialEq)]
10pub struct PdfVersion {
11 pub major: u8,
12 pub minor: u8,
13}
14
15impl PdfVersion {
16 pub fn new(major: u8, minor: u8) -> Self {
18 Self { major, minor }
19 }
20
21 pub fn is_supported(&self) -> bool {
23 matches!((self.major, self.minor), (1, 0..=7) | (2, 0))
25 }
26}
27
28impl std::fmt::Display for PdfVersion {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 write!(f, "{}.{}", self.major, self.minor)
31 }
32}
33
34#[derive(Debug, Clone)]
36pub struct PdfHeader {
37 pub version: PdfVersion,
38 pub has_binary_marker: bool,
39}
40
41impl PdfHeader {
42 pub fn parse<R: Read>(reader: R) -> ParseResult<Self> {
44 let mut buf_reader = BufReader::new(reader);
45 let mut header = Self::parse_version_line(&mut buf_reader)?;
46
47 header.has_binary_marker = Self::check_binary_marker(&mut buf_reader)?;
49
50 Ok(header)
51 }
52
53 fn parse_version_line<R: BufRead>(reader: &mut R) -> ParseResult<Self> {
55 let mut line_bytes = Vec::new();
57
58 loop {
59 let mut byte = [0u8; 1];
60 match reader.read_exact(&mut byte) {
61 Ok(_) => {
62 if byte[0] == b'\n' || byte[0] == b'\r' {
63 if byte[0] == b'\r' {
65 let mut next_byte = [0u8; 1];
67 if reader.read_exact(&mut next_byte).is_ok() && next_byte[0] != b'\n' {
68 line_bytes.push(byte[0]);
71 }
72 }
73 break;
74 }
75 line_bytes.push(byte[0]);
76 if line_bytes.len() > 100 {
78 return Err(ParseError::InvalidHeader);
79 }
80 }
81 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
82 if line_bytes.is_empty() {
83 return Err(ParseError::InvalidHeader);
84 }
85 break;
86 }
87 Err(e) => return Err(e.into()),
88 }
89 }
90
91 let line = String::from_utf8_lossy(&line_bytes).into_owned();
94
95 if !line.starts_with("%PDF-") {
97 return Err(ParseError::InvalidHeader);
98 }
99
100 let version_str = line[5..].trim();
102 let parts: Vec<&str> = version_str.split('.').collect();
103
104 if parts.len() != 2 {
105 return Err(ParseError::InvalidHeader);
106 }
107
108 let major = parts[0]
109 .parse::<u8>()
110 .map_err(|_| ParseError::InvalidHeader)?;
111 let minor = parts[1]
112 .parse::<u8>()
113 .map_err(|_| ParseError::InvalidHeader)?;
114
115 let version = PdfVersion::new(major, minor);
116
117 if !version.is_supported() {
118 return Err(ParseError::UnsupportedVersion(version.to_string()));
119 }
120
121 Ok(PdfHeader {
122 version,
123 has_binary_marker: false,
124 })
125 }
126
127 fn check_binary_marker<R: BufRead>(reader: &mut R) -> ParseResult<bool> {
129 let mut buffer = Vec::new();
130
131 loop {
133 let mut byte = [0u8; 1];
134 match reader.read_exact(&mut byte) {
135 Ok(_) => {
136 buffer.push(byte[0]);
137 if byte[0] == b'\n' || byte[0] == b'\r' {
138 break;
139 }
140 if buffer.len() > 1024 {
142 break;
143 }
144 }
145 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
146 break;
147 }
148 Err(e) => return Err(e.into()),
149 }
150 }
151
152 if buffer.is_empty() {
153 return Ok(false);
154 }
155
156 if buffer.first() == Some(&b'%') {
158 let binary_count = buffer
159 .iter()
160 .skip(1) .filter(|&&b| b >= 128)
162 .count();
163
164 Ok(binary_count >= 4)
165 } else {
166 Ok(false)
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175 use std::io::Cursor;
176
177 #[test]
178 fn test_parse_pdf_header_basic() {
179 let input = b"%PDF-1.7\n";
180 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
181
182 assert_eq!(header.version.major, 1);
183 assert_eq!(header.version.minor, 7);
184 assert!(!header.has_binary_marker);
185 }
186
187 #[test]
188 fn test_parse_pdf_header_with_binary_marker() {
189 let input = b"%PDF-1.4\n%\xE2\xE3\xCF\xD3\n";
190 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
191
192 assert_eq!(header.version.major, 1);
193 assert_eq!(header.version.minor, 4);
194 assert!(header.has_binary_marker);
195 }
196
197 #[test]
198 fn test_parse_pdf_20() {
199 let input = b"%PDF-2.0\n";
200 let header = PdfHeader::parse(Cursor::new(input)).unwrap();
201
202 assert_eq!(header.version.major, 2);
203 assert_eq!(header.version.minor, 0);
204 }
205
206 #[test]
207 fn test_invalid_header() {
208 let input = b"Not a PDF\n";
209 let result = PdfHeader::parse(Cursor::new(input));
210
211 assert!(matches!(result, Err(ParseError::InvalidHeader)));
212 }
213
214 #[test]
215 fn test_unsupported_version() {
216 let input = b"%PDF-3.0\n";
217 let result = PdfHeader::parse(Cursor::new(input));
218
219 assert!(matches!(result, Err(ParseError::UnsupportedVersion(_))));
220 }
221}