1use super::lexer::{Lexer, Token};
36use super::{ParseError, ParseOptions, ParseResult};
37use std::collections::HashMap;
38use std::io::Read;
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
60pub struct PdfName(pub String);
61
62#[derive(Debug, Clone, PartialEq)]
88pub struct PdfString(pub Vec<u8>);
89
90#[derive(Debug, Clone, PartialEq)]
117pub struct PdfArray(pub Vec<PdfObject>);
118
119#[derive(Debug, Clone, PartialEq)]
147pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
148
149#[derive(Debug, Clone, PartialEq)]
186pub struct PdfStream {
187 pub dict: PdfDictionary,
189 pub data: Vec<u8>,
191}
192
193pub static EMPTY_PDF_ARRAY: PdfArray = PdfArray(Vec::new());
195
196impl PdfStream {
197 pub fn decode(&self, options: &ParseOptions) -> ParseResult<Vec<u8>> {
232 super::filters::decode_stream(&self.data, &self.dict, options)
233 }
234
235 pub fn raw_data(&self) -> &[u8] {
249 &self.data
250 }
251}
252
253#[derive(Debug, Clone, PartialEq)]
289pub enum PdfObject {
290 Null,
292 Boolean(bool),
294 Integer(i64),
296 Real(f64),
298 String(PdfString),
300 Name(PdfName),
302 Array(PdfArray),
304 Dictionary(PdfDictionary),
306 Stream(PdfStream),
308 Reference(u32, u16),
310}
311
312impl PdfObject {
313 pub fn parse<R: Read + std::io::Seek>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
349 let token = lexer.next_token()?;
350 Self::parse_from_token(lexer, token)
351 }
352
353 pub fn parse_with_options<R: Read + std::io::Seek>(
355 lexer: &mut Lexer<R>,
356 options: &super::ParseOptions,
357 ) -> ParseResult<Self> {
358 let token = lexer.next_token()?;
359 Self::parse_from_token_with_options(lexer, token, options)
360 }
361
362 fn parse_from_token<R: Read + std::io::Seek>(
364 lexer: &mut Lexer<R>,
365 token: Token,
366 ) -> ParseResult<Self> {
367 Self::parse_from_token_with_options(lexer, token, &super::ParseOptions::default())
368 }
369
370 fn parse_from_token_with_options<R: Read + std::io::Seek>(
372 lexer: &mut Lexer<R>,
373 token: Token,
374 options: &super::ParseOptions,
375 ) -> ParseResult<Self> {
376 match token {
377 Token::Null => Ok(PdfObject::Null),
378 Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
379 Token::Integer(i) => {
380 if !(0..=9999999).contains(&i) {
382 return Ok(PdfObject::Integer(i));
383 }
384
385 match lexer.next_token()? {
387 Token::Integer(gen) if (0..=65535).contains(&gen) => {
388 match lexer.next_token()? {
390 Token::Name(s) if s == "R" => {
391 Ok(PdfObject::Reference(i as u32, gen as u16))
392 }
393 token => {
394 lexer.push_token(token);
396 lexer.push_token(Token::Integer(gen));
397 Ok(PdfObject::Integer(i))
398 }
399 }
400 }
401 token => {
402 lexer.push_token(token);
404 Ok(PdfObject::Integer(i))
405 }
406 }
407 }
408 Token::Real(r) => Ok(PdfObject::Real(r)),
409 Token::String(s) => Ok(PdfObject::String(PdfString(s))),
410 Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
411 Token::ArrayStart => Self::parse_array_with_options(lexer, options),
412 Token::DictStart => Self::parse_dictionary_or_stream_with_options(lexer, options),
413 Token::Comment(_) => {
414 Self::parse_with_options(lexer, options)
416 }
417 Token::StartXRef => {
418 Err(ParseError::SyntaxError {
420 position: 0,
421 message: "StartXRef encountered - this is not a PDF object".to_string(),
422 })
423 }
424 Token::Eof => Err(ParseError::SyntaxError {
425 position: 0,
426 message: "Unexpected end of file".to_string(),
427 }),
428 _ => Err(ParseError::UnexpectedToken {
429 expected: "PDF object".to_string(),
430 found: format!("{token:?}"),
431 }),
432 }
433 }
434
435 fn parse_array_with_options<R: Read + std::io::Seek>(
437 lexer: &mut Lexer<R>,
438 options: &super::ParseOptions,
439 ) -> ParseResult<Self> {
440 let mut elements = Vec::new();
441
442 loop {
443 let token = lexer.next_token()?;
444 match token {
445 Token::ArrayEnd => break,
446 Token::Comment(_) => continue, _ => {
448 let obj = Self::parse_from_token_with_options(lexer, token, options)?;
449 elements.push(obj);
450 }
451 }
452 }
453
454 Ok(PdfObject::Array(PdfArray(elements)))
455 }
456
457 fn parse_dictionary_or_stream_with_options<R: Read + std::io::Seek>(
459 lexer: &mut Lexer<R>,
460 options: &super::ParseOptions,
461 ) -> ParseResult<Self> {
462 let dict = Self::parse_dictionary_inner_with_options(lexer, options)?;
463
464 loop {
466 let token = lexer.next_token()?;
467 match token {
469 Token::Stream => {
470 let stream_data = Self::parse_stream_data_with_options(lexer, &dict, options)?;
472 return Ok(PdfObject::Stream(PdfStream {
473 dict,
474 data: stream_data,
475 }));
476 }
477 Token::Comment(_) => {
478 continue;
480 }
481 Token::StartXRef => {
482 lexer.push_token(token);
486 return Ok(PdfObject::Dictionary(dict));
487 }
488 _ => {
489 lexer.push_token(token);
493 return Ok(PdfObject::Dictionary(dict));
494 }
495 }
496 }
497 }
498
499 fn parse_dictionary_inner_with_options<R: Read + std::io::Seek>(
501 lexer: &mut Lexer<R>,
502 options: &super::ParseOptions,
503 ) -> ParseResult<PdfDictionary> {
504 let mut dict = HashMap::new();
505
506 loop {
507 let token = lexer.next_token()?;
508 match token {
509 Token::DictEnd => break,
510 Token::Comment(_) => continue, Token::Name(key) => {
512 let value = Self::parse_with_options(lexer, options)?;
513 dict.insert(PdfName(key), value);
514 }
515 _ => {
516 return Err(ParseError::UnexpectedToken {
517 expected: "dictionary key (name) or >>".to_string(),
518 found: format!("{token:?}"),
519 });
520 }
521 }
522 }
523
524 Ok(PdfDictionary(dict))
525 }
526
527 fn parse_stream_data_with_options<R: Read + std::io::Seek>(
529 lexer: &mut Lexer<R>,
530 dict: &PdfDictionary,
531 options: &super::ParseOptions,
532 ) -> ParseResult<Vec<u8>> {
533 let length = dict
535 .0
536 .get(&PdfName("Length".to_string()))
537 .or_else(|| {
538 if options.lenient_streams {
540 if options.collect_warnings {
541 eprintln!("Warning: Missing Length key in stream dictionary, will search for endstream marker");
542 }
543 Some(&PdfObject::Integer(-1))
545 } else {
546 None
547 }
548 })
549 .ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
550
551 let length = match length {
552 PdfObject::Integer(len) => {
553 if *len == -1 {
554 usize::MAX } else {
557 *len as usize
558 }
559 }
560 PdfObject::Reference(obj_num, gen_num) => {
561 if options.lenient_streams {
565 if options.collect_warnings {
566 eprintln!("Warning: Stream length is an indirect reference ({obj_num} {gen_num} R). Using endstream detection fallback.");
567 }
568 usize::MAX } else {
570 return Err(ParseError::SyntaxError {
571 position: lexer.position(),
572 message: format!("Stream length reference ({obj_num} {gen_num} R) requires lenient mode or reference resolution"),
573 });
574 }
575 }
576 _ => {
577 return Err(ParseError::SyntaxError {
578 position: lexer.position(),
579 message: "Invalid stream length type".to_string(),
580 });
581 }
582 };
583
584 lexer.read_newline()?;
586
587 let mut stream_data = if length == usize::MAX {
589 let mut data = Vec::new();
591 let max_search = 65536; let mut found_endstream = false;
593
594 for _ in 0..max_search {
595 match lexer.peek_byte() {
596 Ok(b) => {
597 if b == b'e' {
599 let pos = lexer.position();
600 if let Ok(Token::EndStream) = lexer.peek_token() {
602 found_endstream = true;
603 break;
604 }
605 lexer.seek(pos as u64)?;
607 }
608 data.push(lexer.read_byte()?);
609 }
610 Err(_) => break,
611 }
612 }
613
614 if !found_endstream && !options.lenient_streams {
615 return Err(ParseError::SyntaxError {
616 position: lexer.position(),
617 message: "Could not find endstream marker".to_string(),
618 });
619 }
620
621 data
622 } else {
623 lexer.read_bytes(length)?
624 };
625
626 lexer.skip_whitespace()?;
628
629 let peek_result = lexer.peek_token();
631
632 match peek_result {
633 Ok(Token::EndStream) => {
634 lexer.next_token()?;
636 Ok(stream_data)
637 }
638 Ok(other_token) => {
639 if options.lenient_streams {
640 eprintln!("Warning: Stream length mismatch. Expected 'endstream' after {length} bytes, got {other_token:?}");
642
643 if let Some(additional_bytes) =
644 lexer.find_keyword_ahead("endstream", options.max_recovery_bytes)?
645 {
646 let extra_data = lexer.read_bytes(additional_bytes)?;
648 stream_data.extend_from_slice(&extra_data);
649
650 let actual_length = stream_data.len();
651 eprintln!(
652 "Stream length corrected: declared={length}, actual={actual_length}"
653 );
654
655 lexer.skip_whitespace()?;
657 lexer.expect_keyword("endstream")?;
658
659 Ok(stream_data)
660 } else {
661 Err(ParseError::SyntaxError {
663 position: lexer.position(),
664 message: format!(
665 "Could not find 'endstream' within {} bytes",
666 options.max_recovery_bytes
667 ),
668 })
669 }
670 } else {
671 Err(ParseError::UnexpectedToken {
673 expected: "endstream".to_string(),
674 found: format!("{other_token:?}"),
675 })
676 }
677 }
678 Err(e) => {
679 if options.lenient_streams {
680 eprintln!(
682 "Warning: Stream length mismatch. Could not peek next token after {length} bytes"
683 );
684
685 if let Some(additional_bytes) =
686 lexer.find_keyword_ahead("endstream", options.max_recovery_bytes)?
687 {
688 let extra_data = lexer.read_bytes(additional_bytes)?;
690 stream_data.extend_from_slice(&extra_data);
691
692 let actual_length = stream_data.len();
693 eprintln!(
694 "Stream length corrected: declared={length}, actual={actual_length}"
695 );
696
697 lexer.skip_whitespace()?;
699 lexer.expect_keyword("endstream")?;
700
701 Ok(stream_data)
702 } else {
703 Err(ParseError::SyntaxError {
705 position: lexer.position(),
706 message: format!(
707 "Could not find 'endstream' within {} bytes",
708 options.max_recovery_bytes
709 ),
710 })
711 }
712 } else {
713 Err(e)
715 }
716 }
717 }
718 }
719
720 pub fn is_null(&self) -> bool {
731 matches!(self, PdfObject::Null)
732 }
733
734 pub fn as_bool(&self) -> Option<bool> {
752 match self {
753 PdfObject::Boolean(b) => Some(*b),
754 _ => None,
755 }
756 }
757
758 pub fn as_integer(&self) -> Option<i64> {
760 match self {
761 PdfObject::Integer(i) => Some(*i),
762 _ => None,
763 }
764 }
765
766 pub fn as_real(&self) -> Option<f64> {
787 match self {
788 PdfObject::Real(r) => Some(*r),
789 PdfObject::Integer(i) => Some(*i as f64),
790 _ => None,
791 }
792 }
793
794 pub fn as_string(&self) -> Option<&PdfString> {
796 match self {
797 PdfObject::String(s) => Some(s),
798 _ => None,
799 }
800 }
801
802 pub fn as_name(&self) -> Option<&PdfName> {
804 match self {
805 PdfObject::Name(n) => Some(n),
806 _ => None,
807 }
808 }
809
810 pub fn as_array(&self) -> Option<&PdfArray> {
812 match self {
813 PdfObject::Array(a) => Some(a),
814 _ => None,
815 }
816 }
817
818 pub fn as_dict(&self) -> Option<&PdfDictionary> {
820 match self {
821 PdfObject::Dictionary(d) => Some(d),
822 PdfObject::Stream(s) => Some(&s.dict),
823 _ => None,
824 }
825 }
826
827 pub fn as_stream(&self) -> Option<&PdfStream> {
829 match self {
830 PdfObject::Stream(s) => Some(s),
831 _ => None,
832 }
833 }
834
835 pub fn as_reference(&self) -> Option<(u32, u16)> {
855 match self {
856 PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
857 _ => None,
858 }
859 }
860}
861
862impl Default for PdfDictionary {
863 fn default() -> Self {
864 Self::new()
865 }
866}
867
868impl PdfDictionary {
869 pub fn new() -> Self {
880 PdfDictionary(HashMap::new())
881 }
882
883 pub fn get(&self, key: &str) -> Option<&PdfObject> {
906 self.0.get(&PdfName(key.to_string()))
907 }
908
909 pub fn insert(&mut self, key: String, value: PdfObject) {
911 self.0.insert(PdfName(key), value);
912 }
913
914 pub fn contains_key(&self, key: &str) -> bool {
916 self.0.contains_key(&PdfName(key.to_string()))
917 }
918
919 pub fn get_type(&self) -> Option<&str> {
945 self.get("Type")
946 .and_then(|obj| obj.as_name())
947 .map(|n| n.0.as_str())
948 }
949}
950
951impl Default for PdfArray {
952 fn default() -> Self {
953 Self::new()
954 }
955}
956
957impl PdfArray {
958 pub fn new() -> Self {
960 PdfArray(Vec::new())
961 }
962
963 pub fn len(&self) -> usize {
965 self.0.len()
966 }
967
968 pub fn is_empty(&self) -> bool {
970 self.0.is_empty()
971 }
972
973 pub fn get(&self, index: usize) -> Option<&PdfObject> {
997 self.0.get(index)
998 }
999
1000 pub fn push(&mut self, obj: PdfObject) {
1002 self.0.push(obj);
1003 }
1004}
1005
1006impl PdfString {
1007 pub fn new(data: Vec<u8>) -> Self {
1009 PdfString(data)
1010 }
1011
1012 pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
1033 std::str::from_utf8(&self.0)
1034 }
1035
1036 pub fn as_bytes(&self) -> &[u8] {
1038 &self.0
1039 }
1040}
1041
1042impl PdfName {
1043 pub fn new(name: String) -> Self {
1045 PdfName(name)
1046 }
1047
1048 pub fn as_str(&self) -> &str {
1050 &self.0
1051 }
1052}
1053
1054#[cfg(test)]
1055mod tests {
1056 use super::*;
1057 use crate::parser::lexer::Lexer;
1058 use crate::parser::ParseOptions;
1059 use std::collections::HashMap;
1060 use std::io::Cursor;
1061
1062 #[test]
1063 fn test_parse_simple_objects() {
1064 let input = b"null true false 123 -456 3.14 /Name (Hello)";
1065 let mut lexer = Lexer::new(Cursor::new(input));
1066
1067 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
1068 assert_eq!(
1069 PdfObject::parse(&mut lexer).unwrap(),
1070 PdfObject::Boolean(true)
1071 );
1072 assert_eq!(
1073 PdfObject::parse(&mut lexer).unwrap(),
1074 PdfObject::Boolean(false)
1075 );
1076 assert_eq!(
1077 PdfObject::parse(&mut lexer).unwrap(),
1078 PdfObject::Integer(123)
1079 );
1080 assert_eq!(
1081 PdfObject::parse(&mut lexer).unwrap(),
1082 PdfObject::Integer(-456)
1083 );
1084 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
1085 assert_eq!(
1086 PdfObject::parse(&mut lexer).unwrap(),
1087 PdfObject::Name(PdfName("Name".to_string()))
1088 );
1089 assert_eq!(
1090 PdfObject::parse(&mut lexer).unwrap(),
1091 PdfObject::String(PdfString(b"Hello".to_vec()))
1092 );
1093 }
1094
1095 #[test]
1096 fn test_parse_array() {
1097 let input = b"[100 200 300 /Name (test)]";
1099 let mut lexer = Lexer::new(Cursor::new(input));
1100
1101 let obj = PdfObject::parse(&mut lexer).unwrap();
1102 let array = obj.as_array().unwrap();
1103
1104 assert_eq!(array.len(), 5);
1105 assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
1106 assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
1107 assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
1108 assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
1109 assert_eq!(
1110 array.get(4).unwrap().as_string().unwrap().as_bytes(),
1111 b"test"
1112 );
1113 }
1114
1115 #[test]
1116 fn test_parse_array_with_references() {
1117 let input = b"[1 0 R 2 0 R]";
1119 let mut lexer = Lexer::new(Cursor::new(input));
1120
1121 let obj = PdfObject::parse(&mut lexer).unwrap();
1122 let array = obj.as_array().unwrap();
1123
1124 assert_eq!(array.len(), 2);
1125 assert!(array.get(0).unwrap().as_reference().is_some());
1126 assert!(array.get(1).unwrap().as_reference().is_some());
1127 }
1128
1129 #[test]
1130 fn test_parse_dictionary() {
1131 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
1132 let mut lexer = Lexer::new(Cursor::new(input));
1133
1134 let obj = PdfObject::parse(&mut lexer).unwrap();
1135 let dict = obj.as_dict().unwrap();
1136
1137 assert_eq!(dict.get_type(), Some("Page"));
1138 assert!(dict.get("Parent").unwrap().as_reference().is_some());
1139 assert!(dict.get("MediaBox").unwrap().as_array().is_some());
1140 }
1141
1142 mod comprehensive_tests {
1144 use super::*;
1145
1146 #[test]
1147 fn test_pdf_object_null() {
1148 let obj = PdfObject::Null;
1149 assert!(obj.is_null());
1150 assert_eq!(obj.as_bool(), None);
1151 assert_eq!(obj.as_integer(), None);
1152 assert_eq!(obj.as_real(), None);
1153 assert_eq!(obj.as_string(), None);
1154 assert_eq!(obj.as_name(), None);
1155 assert_eq!(obj.as_array(), None);
1156 assert_eq!(obj.as_dict(), None);
1157 assert_eq!(obj.as_stream(), None);
1158 assert_eq!(obj.as_reference(), None);
1159 }
1160
1161 #[test]
1162 fn test_pdf_object_boolean() {
1163 let obj_true = PdfObject::Boolean(true);
1164 let obj_false = PdfObject::Boolean(false);
1165
1166 assert!(!obj_true.is_null());
1167 assert_eq!(obj_true.as_bool(), Some(true));
1168 assert_eq!(obj_false.as_bool(), Some(false));
1169
1170 assert_eq!(obj_true.as_integer(), None);
1171 assert_eq!(obj_true.as_real(), None);
1172 assert_eq!(obj_true.as_string(), None);
1173 assert_eq!(obj_true.as_name(), None);
1174 assert_eq!(obj_true.as_array(), None);
1175 assert_eq!(obj_true.as_dict(), None);
1176 assert_eq!(obj_true.as_stream(), None);
1177 assert_eq!(obj_true.as_reference(), None);
1178 }
1179
1180 #[test]
1181 fn test_pdf_object_integer() {
1182 let obj = PdfObject::Integer(42);
1183
1184 assert!(!obj.is_null());
1185 assert_eq!(obj.as_bool(), None);
1186 assert_eq!(obj.as_integer(), Some(42));
1187 assert_eq!(obj.as_real(), Some(42.0)); assert_eq!(obj.as_string(), None);
1189 assert_eq!(obj.as_name(), None);
1190 assert_eq!(obj.as_array(), None);
1191 assert_eq!(obj.as_dict(), None);
1192 assert_eq!(obj.as_stream(), None);
1193 assert_eq!(obj.as_reference(), None);
1194
1195 let obj_neg = PdfObject::Integer(-123);
1197 assert_eq!(obj_neg.as_integer(), Some(-123));
1198 assert_eq!(obj_neg.as_real(), Some(-123.0));
1199
1200 let obj_large = PdfObject::Integer(9999999999);
1202 assert_eq!(obj_large.as_integer(), Some(9999999999));
1203 assert_eq!(obj_large.as_real(), Some(9999999999.0));
1204 }
1205
1206 #[test]
1207 fn test_pdf_object_real() {
1208 let obj = PdfObject::Real(3.14159);
1209
1210 assert!(!obj.is_null());
1211 assert_eq!(obj.as_bool(), None);
1212 assert_eq!(obj.as_integer(), None);
1213 assert_eq!(obj.as_real(), Some(3.14159));
1214 assert_eq!(obj.as_string(), None);
1215 assert_eq!(obj.as_name(), None);
1216 assert_eq!(obj.as_array(), None);
1217 assert_eq!(obj.as_dict(), None);
1218 assert_eq!(obj.as_stream(), None);
1219 assert_eq!(obj.as_reference(), None);
1220
1221 let obj_neg = PdfObject::Real(-2.71828);
1223 assert_eq!(obj_neg.as_real(), Some(-2.71828));
1224
1225 let obj_zero = PdfObject::Real(0.0);
1227 assert_eq!(obj_zero.as_real(), Some(0.0));
1228
1229 let obj_small = PdfObject::Real(0.000001);
1231 assert_eq!(obj_small.as_real(), Some(0.000001));
1232
1233 let obj_large = PdfObject::Real(1e10);
1235 assert_eq!(obj_large.as_real(), Some(1e10));
1236 }
1237
1238 #[test]
1239 fn test_pdf_object_string() {
1240 let string_data = b"Hello World".to_vec();
1241 let pdf_string = PdfString(string_data.clone());
1242 let obj = PdfObject::String(pdf_string);
1243
1244 assert!(!obj.is_null());
1245 assert_eq!(obj.as_bool(), None);
1246 assert_eq!(obj.as_integer(), None);
1247 assert_eq!(obj.as_real(), None);
1248 assert!(obj.as_string().is_some());
1249 assert_eq!(obj.as_string().unwrap().as_bytes(), string_data);
1250 assert_eq!(obj.as_name(), None);
1251 assert_eq!(obj.as_array(), None);
1252 assert_eq!(obj.as_dict(), None);
1253 assert_eq!(obj.as_stream(), None);
1254 assert_eq!(obj.as_reference(), None);
1255 }
1256
1257 #[test]
1258 fn test_pdf_object_name() {
1259 let name_str = "Type".to_string();
1260 let pdf_name = PdfName(name_str.clone());
1261 let obj = PdfObject::Name(pdf_name);
1262
1263 assert!(!obj.is_null());
1264 assert_eq!(obj.as_bool(), None);
1265 assert_eq!(obj.as_integer(), None);
1266 assert_eq!(obj.as_real(), None);
1267 assert_eq!(obj.as_string(), None);
1268 assert!(obj.as_name().is_some());
1269 assert_eq!(obj.as_name().unwrap().as_str(), name_str);
1270 assert_eq!(obj.as_array(), None);
1271 assert_eq!(obj.as_dict(), None);
1272 assert_eq!(obj.as_stream(), None);
1273 assert_eq!(obj.as_reference(), None);
1274 }
1275
1276 #[test]
1277 fn test_pdf_object_array() {
1278 let mut array = PdfArray::new();
1279 array.push(PdfObject::Integer(1));
1280 array.push(PdfObject::Integer(2));
1281 array.push(PdfObject::Integer(3));
1282 let obj = PdfObject::Array(array);
1283
1284 assert!(!obj.is_null());
1285 assert_eq!(obj.as_bool(), None);
1286 assert_eq!(obj.as_integer(), None);
1287 assert_eq!(obj.as_real(), None);
1288 assert_eq!(obj.as_string(), None);
1289 assert_eq!(obj.as_name(), None);
1290 assert!(obj.as_array().is_some());
1291 assert_eq!(obj.as_array().unwrap().len(), 3);
1292 assert_eq!(obj.as_dict(), None);
1293 assert_eq!(obj.as_stream(), None);
1294 assert_eq!(obj.as_reference(), None);
1295 }
1296
1297 #[test]
1298 fn test_pdf_object_dictionary() {
1299 let mut dict = PdfDictionary::new();
1300 dict.insert(
1301 "Type".to_string(),
1302 PdfObject::Name(PdfName("Page".to_string())),
1303 );
1304 dict.insert("Count".to_string(), PdfObject::Integer(5));
1305 let obj = PdfObject::Dictionary(dict);
1306
1307 assert!(!obj.is_null());
1308 assert_eq!(obj.as_bool(), None);
1309 assert_eq!(obj.as_integer(), None);
1310 assert_eq!(obj.as_real(), None);
1311 assert_eq!(obj.as_string(), None);
1312 assert_eq!(obj.as_name(), None);
1313 assert_eq!(obj.as_array(), None);
1314 assert!(obj.as_dict().is_some());
1315 assert_eq!(obj.as_dict().unwrap().0.len(), 2);
1316 assert_eq!(obj.as_stream(), None);
1317 assert_eq!(obj.as_reference(), None);
1318 }
1319
1320 #[test]
1321 fn test_pdf_object_stream() {
1322 let mut dict = PdfDictionary::new();
1323 dict.insert("Length".to_string(), PdfObject::Integer(13));
1324 let data = b"Hello, World!".to_vec();
1325 let stream = PdfStream { dict, data };
1326 let obj = PdfObject::Stream(stream);
1327
1328 assert!(!obj.is_null());
1329 assert_eq!(obj.as_bool(), None);
1330 assert_eq!(obj.as_integer(), None);
1331 assert_eq!(obj.as_real(), None);
1332 assert_eq!(obj.as_string(), None);
1333 assert_eq!(obj.as_name(), None);
1334 assert_eq!(obj.as_array(), None);
1335 assert!(obj.as_dict().is_some()); assert!(obj.as_stream().is_some());
1337 assert_eq!(obj.as_stream().unwrap().raw_data(), b"Hello, World!");
1338 assert_eq!(obj.as_reference(), None);
1339 }
1340
1341 #[test]
1342 fn test_pdf_object_reference() {
1343 let obj = PdfObject::Reference(42, 0);
1344
1345 assert!(!obj.is_null());
1346 assert_eq!(obj.as_bool(), None);
1347 assert_eq!(obj.as_integer(), None);
1348 assert_eq!(obj.as_real(), None);
1349 assert_eq!(obj.as_string(), None);
1350 assert_eq!(obj.as_name(), None);
1351 assert_eq!(obj.as_array(), None);
1352 assert_eq!(obj.as_dict(), None);
1353 assert_eq!(obj.as_stream(), None);
1354 assert_eq!(obj.as_reference(), Some((42, 0)));
1355
1356 let obj_gen = PdfObject::Reference(123, 5);
1358 assert_eq!(obj_gen.as_reference(), Some((123, 5)));
1359 }
1360
1361 #[test]
1362 fn test_pdf_string_methods() {
1363 let string_data = b"Hello, World!".to_vec();
1364 let pdf_string = PdfString(string_data.clone());
1365
1366 assert_eq!(pdf_string.as_bytes(), string_data);
1367 assert_eq!(pdf_string.as_str().unwrap(), "Hello, World!");
1368 assert_eq!(pdf_string.0.len(), 13);
1369 assert!(!pdf_string.0.is_empty());
1370
1371 let empty_string = PdfString(vec![]);
1373 assert!(empty_string.0.is_empty());
1374 assert_eq!(empty_string.0.len(), 0);
1375
1376 let binary_data = vec![0xFF, 0xFE, 0x00, 0x48, 0x00, 0x69]; let binary_string = PdfString(binary_data.clone());
1379 assert_eq!(binary_string.as_bytes(), binary_data);
1380 assert!(binary_string.as_str().is_err()); }
1382
1383 #[test]
1384 fn test_pdf_name_methods() {
1385 let name_str = "Type".to_string();
1386 let pdf_name = PdfName(name_str.clone());
1387
1388 assert_eq!(pdf_name.as_str(), name_str);
1389 assert_eq!(pdf_name.0.len(), 4);
1390 assert!(!pdf_name.0.is_empty());
1391
1392 let empty_name = PdfName("".to_string());
1394 assert!(empty_name.0.is_empty());
1395 assert_eq!(empty_name.0.len(), 0);
1396
1397 let special_name = PdfName("Font#20Name".to_string());
1399 assert_eq!(special_name.as_str(), "Font#20Name");
1400 assert_eq!(special_name.0.len(), 11);
1401 }
1402
1403 #[test]
1404 fn test_pdf_array_methods() {
1405 let mut array = PdfArray::new();
1406 assert_eq!(array.len(), 0);
1407 assert!(array.is_empty());
1408
1409 array.push(PdfObject::Integer(1));
1411 array.push(PdfObject::Integer(2));
1412 array.push(PdfObject::Integer(3));
1413
1414 assert_eq!(array.len(), 3);
1415 assert!(!array.is_empty());
1416
1417 assert_eq!(array.get(0).unwrap().as_integer(), Some(1));
1419 assert_eq!(array.get(1).unwrap().as_integer(), Some(2));
1420 assert_eq!(array.get(2).unwrap().as_integer(), Some(3));
1421 assert!(array.get(3).is_none());
1422
1423 let values: Vec<i64> = array.0.iter().filter_map(|obj| obj.as_integer()).collect();
1425 assert_eq!(values, vec![1, 2, 3]);
1426
1427 let mut mixed_array = PdfArray::new();
1429 mixed_array.push(PdfObject::Integer(42));
1430 mixed_array.push(PdfObject::Real(3.14));
1431 mixed_array.push(PdfObject::String(PdfString(b"text".to_vec())));
1432 mixed_array.push(PdfObject::Name(PdfName("Name".to_string())));
1433 mixed_array.push(PdfObject::Boolean(true));
1434 mixed_array.push(PdfObject::Null);
1435
1436 assert_eq!(mixed_array.len(), 6);
1437 assert_eq!(mixed_array.get(0).unwrap().as_integer(), Some(42));
1438 assert_eq!(mixed_array.get(1).unwrap().as_real(), Some(3.14));
1439 assert_eq!(
1440 mixed_array.get(2).unwrap().as_string().unwrap().as_bytes(),
1441 b"text"
1442 );
1443 assert_eq!(
1444 mixed_array.get(3).unwrap().as_name().unwrap().as_str(),
1445 "Name"
1446 );
1447 assert_eq!(mixed_array.get(4).unwrap().as_bool(), Some(true));
1448 assert!(mixed_array.get(5).unwrap().is_null());
1449 }
1450
1451 #[test]
1452 fn test_pdf_dictionary_methods() {
1453 let mut dict = PdfDictionary::new();
1454 assert_eq!(dict.0.len(), 0);
1455 assert!(dict.0.is_empty());
1456
1457 dict.insert(
1459 "Type".to_string(),
1460 PdfObject::Name(PdfName("Page".to_string())),
1461 );
1462 dict.insert("Count".to_string(), PdfObject::Integer(5));
1463 dict.insert("Resources".to_string(), PdfObject::Reference(10, 0));
1464
1465 assert_eq!(dict.0.len(), 3);
1466 assert!(!dict.0.is_empty());
1467
1468 assert_eq!(
1470 dict.get("Type").unwrap().as_name().unwrap().as_str(),
1471 "Page"
1472 );
1473 assert_eq!(dict.get("Count").unwrap().as_integer(), Some(5));
1474 assert_eq!(dict.get("Resources").unwrap().as_reference(), Some((10, 0)));
1475 assert!(dict.get("NonExistent").is_none());
1476
1477 assert!(dict.contains_key("Type"));
1479 assert!(dict.contains_key("Count"));
1480 assert!(dict.contains_key("Resources"));
1481 assert!(!dict.contains_key("NonExistent"));
1482
1483 assert_eq!(dict.get_type(), Some("Page"));
1485
1486 let mut keys: Vec<String> = dict.0.keys().map(|k| k.0.clone()).collect();
1488 keys.sort();
1489 assert_eq!(keys, vec!["Count", "Resources", "Type"]);
1490
1491 let values: Vec<&PdfObject> = dict.0.values().collect();
1493 assert_eq!(values.len(), 3);
1494 }
1495
1496 #[test]
1497 fn test_pdf_stream_methods() {
1498 let mut dict = PdfDictionary::new();
1499 dict.insert("Length".to_string(), PdfObject::Integer(13));
1500 dict.insert(
1501 "Filter".to_string(),
1502 PdfObject::Name(PdfName("FlateDecode".to_string())),
1503 );
1504
1505 let data = b"Hello, World!".to_vec();
1506 let stream = PdfStream {
1507 dict,
1508 data: data.clone(),
1509 };
1510
1511 assert_eq!(stream.raw_data(), data);
1513
1514 assert_eq!(stream.dict.get("Length").unwrap().as_integer(), Some(13));
1516 assert_eq!(
1517 stream
1518 .dict
1519 .get("Filter")
1520 .unwrap()
1521 .as_name()
1522 .unwrap()
1523 .as_str(),
1524 "FlateDecode"
1525 );
1526
1527 let options = ParseOptions::default();
1530 let decode_result = stream.decode(&options);
1531 assert!(decode_result.is_ok() || decode_result.is_err());
1532 }
1533
1534 #[test]
1535 fn test_parse_complex_nested_structures() {
1536 let input = b"[[1 2] [3 4] [5 6]]";
1538 let mut lexer = Lexer::new(Cursor::new(input));
1539 let obj = PdfObject::parse(&mut lexer).unwrap();
1540
1541 let outer_array = obj.as_array().unwrap();
1542 assert_eq!(outer_array.len(), 3);
1543
1544 for i in 0..3 {
1545 let inner_array = outer_array.get(i).unwrap().as_array().unwrap();
1546 assert_eq!(inner_array.len(), 2);
1547 assert_eq!(
1548 inner_array.get(0).unwrap().as_integer(),
1549 Some((i as i64) * 2 + 1)
1550 );
1551 assert_eq!(
1552 inner_array.get(1).unwrap().as_integer(),
1553 Some((i as i64) * 2 + 2)
1554 );
1555 }
1556 }
1557
1558 #[test]
1559 fn test_parse_complex_dictionary() {
1560 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 2 0 R >> /ProcSet [/PDF /Text] >> /Contents 3 0 R >>";
1561 let mut lexer = Lexer::new(Cursor::new(input));
1562 let obj = PdfObject::parse(&mut lexer).unwrap();
1563
1564 let dict = obj.as_dict().unwrap();
1565 assert_eq!(dict.get_type(), Some("Page"));
1566 assert_eq!(dict.get("Parent").unwrap().as_reference(), Some((1, 0)));
1567 assert_eq!(dict.get("Contents").unwrap().as_reference(), Some((3, 0)));
1568
1569 let media_box = dict.get("MediaBox").unwrap().as_array().unwrap();
1571 assert_eq!(media_box.len(), 4);
1572 assert_eq!(media_box.get(0).unwrap().as_integer(), Some(0));
1573 assert_eq!(media_box.get(1).unwrap().as_integer(), Some(0));
1574 assert_eq!(media_box.get(2).unwrap().as_integer(), Some(612));
1575 assert_eq!(media_box.get(3).unwrap().as_integer(), Some(792));
1576
1577 let resources = dict.get("Resources").unwrap().as_dict().unwrap();
1579 assert!(resources.contains_key("Font"));
1580 assert!(resources.contains_key("ProcSet"));
1581
1582 let font_dict = resources.get("Font").unwrap().as_dict().unwrap();
1584 assert_eq!(font_dict.get("F1").unwrap().as_reference(), Some((2, 0)));
1585
1586 let proc_set = resources.get("ProcSet").unwrap().as_array().unwrap();
1588 assert_eq!(proc_set.len(), 2);
1589 assert_eq!(proc_set.get(0).unwrap().as_name().unwrap().as_str(), "PDF");
1590 assert_eq!(proc_set.get(1).unwrap().as_name().unwrap().as_str(), "Text");
1591 }
1592
1593 #[test]
1594 fn test_parse_hex_strings() {
1595 let input = b"<48656C6C6F>"; let mut lexer = Lexer::new(Cursor::new(input));
1597 let obj = PdfObject::parse(&mut lexer).unwrap();
1598
1599 let string = obj.as_string().unwrap();
1600 assert_eq!(string.as_str().unwrap(), "Hello");
1601 }
1602
1603 #[test]
1604 fn test_parse_literal_strings() {
1605 let input = b"(Hello World)";
1606 let mut lexer = Lexer::new(Cursor::new(input));
1607 let obj = PdfObject::parse(&mut lexer).unwrap();
1608
1609 let string = obj.as_string().unwrap();
1610 assert_eq!(string.as_str().unwrap(), "Hello World");
1611 }
1612
1613 #[test]
1614 fn test_parse_string_with_escapes() {
1615 let input = b"(Hello\\nWorld\\t!)";
1616 let mut lexer = Lexer::new(Cursor::new(input));
1617 let obj = PdfObject::parse(&mut lexer).unwrap();
1618
1619 let string = obj.as_string().unwrap();
1620 assert!(!string.as_bytes().is_empty());
1622 }
1623
1624 #[test]
1625 fn test_parse_names_with_special_chars() {
1626 let input = b"/Name#20with#20spaces";
1627 let mut lexer = Lexer::new(Cursor::new(input));
1628 let obj = PdfObject::parse(&mut lexer).unwrap();
1629
1630 let name = obj.as_name().unwrap();
1631 assert!(!name.as_str().is_empty());
1633 }
1634
1635 #[test]
1636 fn test_parse_references() {
1637 let input = b"1 0 R";
1638 let mut lexer = Lexer::new(Cursor::new(input));
1639 let obj = PdfObject::parse(&mut lexer).unwrap();
1640
1641 assert_eq!(obj.as_reference(), Some((1, 0)));
1642
1643 let input2 = b"42 5 R";
1645 let mut lexer2 = Lexer::new(Cursor::new(input2));
1646 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1647
1648 assert_eq!(obj2.as_reference(), Some((42, 5)));
1649 }
1650
1651 #[test]
1652 fn test_parse_edge_cases() {
1653 let input = b"9223372036854775807"; let mut lexer = Lexer::new(Cursor::new(input));
1656 let obj = PdfObject::parse(&mut lexer).unwrap();
1657 assert_eq!(obj.as_integer(), Some(9223372036854775807));
1658
1659 let input2 = b"-9223372036854775808"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1662 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1663 assert_eq!(obj2.as_integer(), Some(-9223372036854775808));
1664
1665 let input3 = b"1.23e-10";
1667 let mut lexer3 = Lexer::new(Cursor::new(input3));
1668 let obj3 = PdfObject::parse(&mut lexer3).unwrap();
1669 assert!(obj3.as_real().is_some());
1671 }
1672
1673 #[test]
1674 fn test_parse_empty_structures() {
1675 let input = b"[]";
1677 let mut lexer = Lexer::new(Cursor::new(input));
1678 let obj = PdfObject::parse(&mut lexer).unwrap();
1679
1680 let array = obj.as_array().unwrap();
1681 assert_eq!(array.len(), 0);
1682 assert!(array.is_empty());
1683
1684 let input2 = b"<< >>";
1686 let mut lexer2 = Lexer::new(Cursor::new(input2));
1687 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1688
1689 let dict = obj2.as_dict().unwrap();
1690 assert_eq!(dict.0.len(), 0);
1691 assert!(dict.0.is_empty());
1692 }
1693
1694 #[test]
1695 fn test_error_handling() {
1696 let input = b"[1 2 3"; let mut lexer = Lexer::new(Cursor::new(input));
1699 let result = PdfObject::parse(&mut lexer);
1700 assert!(result.is_err());
1701
1702 let input2 = b"<< /Type /Page"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1705 let result2 = PdfObject::parse(&mut lexer2);
1706 assert!(result2.is_err());
1707
1708 let input3 = b"1 0 X"; let mut lexer3 = Lexer::new(Cursor::new(input3));
1711 let result3 = PdfObject::parse(&mut lexer3);
1712 assert!(result3.is_ok() || result3.is_err());
1715 }
1716
1717 #[test]
1718 fn test_clone_and_equality() {
1719 let obj1 = PdfObject::Integer(42);
1720 let obj2 = obj1.clone();
1721 assert_eq!(obj1, obj2);
1722
1723 let obj3 = PdfObject::Integer(43);
1724 assert_ne!(obj1, obj3);
1725
1726 let mut array = PdfArray::new();
1728 array.push(PdfObject::Integer(1));
1729 array.push(PdfObject::String(PdfString(b"test".to_vec())));
1730 let obj4 = PdfObject::Array(array);
1731 let obj5 = obj4.clone();
1732 assert_eq!(obj4, obj5);
1733 }
1734
1735 #[test]
1736 fn test_debug_formatting() {
1737 let obj = PdfObject::Integer(42);
1738 let debug_str = format!("{:?}", obj);
1739 assert!(debug_str.contains("Integer"));
1740 assert!(debug_str.contains("42"));
1741
1742 let name = PdfName("Type".to_string());
1743 let debug_str2 = format!("{:?}", name);
1744 assert!(debug_str2.contains("PdfName"));
1745 assert!(debug_str2.contains("Type"));
1746 }
1747
1748 #[test]
1749 fn test_performance_large_array() {
1750 let mut array = PdfArray::new();
1751 for i in 0..1000 {
1752 array.push(PdfObject::Integer(i));
1753 }
1754
1755 assert_eq!(array.len(), 1000);
1756 assert_eq!(array.get(0).unwrap().as_integer(), Some(0));
1757 assert_eq!(array.get(999).unwrap().as_integer(), Some(999));
1758
1759 let sum: i64 = array.0.iter().filter_map(|obj| obj.as_integer()).sum();
1761 assert_eq!(sum, 499500); }
1763
1764 #[test]
1765 fn test_performance_large_dictionary() {
1766 let mut dict = PdfDictionary::new();
1767 for i in 0..1000 {
1768 dict.insert(format!("Key{}", i), PdfObject::Integer(i));
1769 }
1770
1771 assert_eq!(dict.0.len(), 1000);
1772 assert_eq!(dict.get("Key0").unwrap().as_integer(), Some(0));
1773 assert_eq!(dict.get("Key999").unwrap().as_integer(), Some(999));
1774
1775 for i in 0..1000 {
1777 assert!(dict.contains_key(&format!("Key{}", i)));
1778 }
1779 }
1780 }
1781
1782 #[test]
1783 fn test_lenient_stream_parsing_too_short() {
1784 let dict = PdfDictionary(
1787 vec![(PdfName("Length".to_string()), PdfObject::Integer(10))]
1788 .into_iter()
1789 .collect::<HashMap<_, _>>(),
1790 );
1791
1792 let stream_content = b"This is a much longer text content than just 10 bytes";
1795 let test_data = vec![
1796 b"\n".to_vec(), stream_content.to_vec(),
1798 b"\nendstream".to_vec(),
1799 ]
1800 .concat();
1801
1802 let mut cursor = Cursor::new(test_data);
1804 let mut lexer = Lexer::new(&mut cursor);
1805 let mut options = ParseOptions::default();
1806 options.lenient_streams = true;
1807 options.max_recovery_bytes = 100;
1808 options.collect_warnings = false;
1809
1810 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1814 if let Err(e) = &result {
1815 eprintln!("Error in test_lenient_stream_parsing_too_short: {:?}", e);
1816 eprintln!("Warning: Stream length mismatch expected, checking if lenient parsing is working correctly");
1817 }
1818 assert!(result.is_ok());
1819
1820 let stream_data = result.unwrap();
1821 let content = String::from_utf8_lossy(&stream_data);
1822
1823 assert!(content.contains("This is a"));
1826 }
1827
1828 #[test]
1829 fn test_lenient_stream_parsing_too_long() {
1830 let dict = PdfDictionary(
1832 vec![(PdfName("Length".to_string()), PdfObject::Integer(100))]
1833 .into_iter()
1834 .collect::<HashMap<_, _>>(),
1835 );
1836
1837 let stream_content = b"Short";
1839 let test_data = vec![
1840 b"\n".to_vec(), stream_content.to_vec(),
1842 b"\nendstream".to_vec(),
1843 ]
1844 .concat();
1845
1846 let mut cursor = Cursor::new(test_data);
1848 let mut lexer = Lexer::new(&mut cursor);
1849 let mut options = ParseOptions::default();
1850 options.lenient_streams = true;
1851 options.max_recovery_bytes = 100;
1852 options.collect_warnings = false;
1853
1854 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1857
1858 assert!(result.is_err());
1862 }
1863
1864 #[test]
1865 fn test_lenient_stream_no_endstream_found() {
1866 let input = b"<< /Length 10 >>
1868stream
1869This text does not contain the magic word and continues for a very long time with no proper termination...";
1870
1871 let mut cursor = Cursor::new(input.to_vec());
1872 let mut lexer = Lexer::new(&mut cursor);
1873 let mut options = ParseOptions::default();
1874 options.lenient_streams = true;
1875 options.max_recovery_bytes = 50; options.collect_warnings = false;
1877
1878 let dict_token = lexer.next_token().unwrap();
1879 let obj = PdfObject::parse_from_token_with_options(&mut lexer, dict_token, &options);
1880
1881 assert!(obj.is_err());
1883 }
1884}