oxidize_pdf/parser/
object_stream.rs1use super::lexer::Lexer;
6use super::objects::{PdfObject, PdfStream};
7use super::xref::XRefEntry;
8use super::{ParseError, ParseOptions, ParseResult};
9use std::collections::HashMap;
10use std::io::Cursor;
11
12#[derive(Debug)]
14pub struct ObjectStream {
15 stream: PdfStream,
17 n: u32,
19 first: u32,
21 objects: HashMap<u32, PdfObject>,
23}
24
25impl ObjectStream {
26 pub fn parse(stream: PdfStream, options: &ParseOptions) -> ParseResult<Self> {
28 let dict = &stream.dict;
30
31 let n = dict
32 .get("N")
33 .and_then(|obj| obj.as_integer())
34 .ok_or_else(|| ParseError::MissingKey("N".to_string()))? as u32;
35
36 let first = dict
37 .get("First")
38 .and_then(|obj| obj.as_integer())
39 .ok_or_else(|| ParseError::MissingKey("First".to_string()))? as u32;
40
41 let mut obj_stream = ObjectStream {
42 stream,
43 n,
44 first,
45 objects: HashMap::new(),
46 };
47
48 obj_stream.parse_objects(options)?;
50
51 Ok(obj_stream)
52 }
53
54 fn parse_objects(&mut self, options: &ParseOptions) -> ParseResult<()> {
56 let data = self.stream.decode(options)?;
58
59 let mut cursor = Cursor::new(&data);
61 let mut lexer = Lexer::new_with_options(&mut cursor, options.clone());
63
64 let mut offsets = Vec::new();
66 for _ in 0..self.n {
67 let obj_num = match lexer.next_token()? {
69 super::lexer::Token::Integer(n) => n as u32,
70 _ => {
71 return Err(ParseError::SyntaxError {
72 position: 0,
73 message: "Expected object number in object stream".to_string(),
74 })
75 }
76 };
77
78 let offset = match lexer.next_token()? {
80 super::lexer::Token::Integer(n) => n as u32,
81 _ => {
82 return Err(ParseError::SyntaxError {
83 position: 0,
84 message: "Expected offset in object stream".to_string(),
85 })
86 }
87 };
88
89 offsets.push((obj_num, offset));
90 }
91
92 for (obj_num, offset) in offsets.iter() {
94 let abs_offset = self.first + offset;
96
97 cursor.set_position(abs_offset as u64);
99 let mut obj_lexer = Lexer::new_with_options(&mut cursor, options.clone());
100
101 let obj = PdfObject::parse_with_options(&mut obj_lexer, options)?;
103
104 self.objects.insert(*obj_num, obj);
106 }
107
108 Ok(())
109 }
110
111 pub fn get_object(&self, obj_num: u32) -> Option<&PdfObject> {
113 self.objects.get(&obj_num)
114 }
115
116 pub fn objects(&self) -> &HashMap<u32, PdfObject> {
118 &self.objects
119 }
120}
121
122#[derive(Debug, Clone, Copy, PartialEq)]
124pub enum XRefEntryType {
125 Free { next_free_obj: u32, generation: u16 },
127 InUse { offset: u64, generation: u16 },
129 Compressed {
131 stream_obj_num: u32,
132 index_in_stream: u32,
133 },
134}
135
136impl XRefEntryType {
137 pub fn to_simple_entry(&self) -> XRefEntry {
139 match self {
140 XRefEntryType::Free { generation, .. } => XRefEntry {
141 offset: 0,
142 generation: *generation,
143 in_use: false,
144 },
145 XRefEntryType::InUse { offset, generation } => XRefEntry {
146 offset: *offset,
147 generation: *generation,
148 in_use: true,
149 },
150 XRefEntryType::Compressed { .. } => XRefEntry {
151 offset: 0,
152 generation: 0,
153 in_use: true,
154 },
155 }
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::super::objects::{PdfDictionary, PdfName};
162 use super::*;
163 use flate2::write::ZlibEncoder;
164 use flate2::Compression;
165 use std::collections::HashMap;
166 use std::io::Write;
167
168 #[allow(dead_code)]
169 fn create_test_stream_data() -> Vec<u8> {
170 let data = b"1 0 2 2 true false";
175 data.to_vec()
176 }
177
178 #[allow(dead_code)]
179 fn create_compressed_stream_data() -> Vec<u8> {
180 let data = create_test_stream_data();
181 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
182 encoder.write_all(&data).unwrap();
183 encoder.finish().unwrap()
184 }
185
186 #[test]
187 fn test_xref_entry_type_free() {
188 let entry = XRefEntryType::Free {
189 next_free_obj: 5,
190 generation: 65535,
191 };
192
193 let simple = entry.to_simple_entry();
194 assert_eq!(simple.offset, 0);
195 assert_eq!(simple.generation, 65535);
196 assert!(!simple.in_use);
197 }
198
199 #[test]
200 fn test_xref_entry_type_in_use() {
201 let entry = XRefEntryType::InUse {
202 offset: 1234,
203 generation: 0,
204 };
205
206 let simple = entry.to_simple_entry();
207 assert_eq!(simple.offset, 1234);
208 assert_eq!(simple.generation, 0);
209 assert!(simple.in_use);
210 }
211
212 #[test]
213 fn test_xref_entry_type_compressed() {
214 let entry = XRefEntryType::Compressed {
215 stream_obj_num: 10,
216 index_in_stream: 3,
217 };
218
219 let simple = entry.to_simple_entry();
220 assert_eq!(simple.offset, 0); assert_eq!(simple.generation, 0);
222 assert!(simple.in_use);
223 }
224
225 #[test]
230 fn test_object_stream_parse_missing_n() {
231 let mut dict = PdfDictionary(HashMap::new());
233 dict.0.insert(
234 PdfName("Type".to_string()),
235 PdfObject::Name(PdfName("ObjStm".to_string())),
236 );
237 dict.0
238 .insert(PdfName("First".to_string()), PdfObject::Integer(10));
239
240 let stream = PdfStream { dict, data: vec![] };
241
242 let options = ParseOptions::default();
243 let result = ObjectStream::parse(stream, &options);
244
245 assert!(result.is_err());
246 match result.unwrap_err() {
247 ParseError::MissingKey(key) => assert_eq!(key, "N"),
248 _ => panic!("Expected MissingKey error"),
249 }
250 }
251
252 #[test]
253 fn test_object_stream_parse_missing_first() {
254 let mut dict = PdfDictionary(HashMap::new());
256 dict.0.insert(
257 PdfName("Type".to_string()),
258 PdfObject::Name(PdfName("ObjStm".to_string())),
259 );
260 dict.0
261 .insert(PdfName("N".to_string()), PdfObject::Integer(2));
262
263 let stream = PdfStream { dict, data: vec![] };
264
265 let options = ParseOptions::default();
266 let result = ObjectStream::parse(stream, &options);
267
268 assert!(result.is_err());
269 match result.unwrap_err() {
270 ParseError::MissingKey(key) => assert_eq!(key, "First"),
271 _ => panic!("Expected MissingKey error"),
272 }
273 }
274}