1use super::lexer::{Lexer, Token};
36use super::{ParseError, ParseResult};
37use std::collections::HashMap;
38use std::io::Read;
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
60pub struct PdfName(pub String);
61
62#[derive(Debug, Clone, PartialEq)]
88pub struct PdfString(pub Vec<u8>);
89
90#[derive(Debug, Clone, PartialEq)]
117pub struct PdfArray(pub Vec<PdfObject>);
118
119#[derive(Debug, Clone, PartialEq)]
147pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
148
149#[derive(Debug, Clone, PartialEq)]
184pub struct PdfStream {
185 pub dict: PdfDictionary,
187 pub data: Vec<u8>,
189}
190
191impl PdfStream {
192 pub fn decode(&self) -> ParseResult<Vec<u8>> {
221 super::filters::decode_stream(&self.data, &self.dict)
222 }
223
224 pub fn raw_data(&self) -> &[u8] {
238 &self.data
239 }
240}
241
242#[derive(Debug, Clone, PartialEq)]
278pub enum PdfObject {
279 Null,
281 Boolean(bool),
283 Integer(i64),
285 Real(f64),
287 String(PdfString),
289 Name(PdfName),
291 Array(PdfArray),
293 Dictionary(PdfDictionary),
295 Stream(PdfStream),
297 Reference(u32, u16),
299}
300
301impl PdfObject {
302 pub fn parse<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
338 let token = lexer.next_token()?;
339 Self::parse_from_token(lexer, token)
340 }
341
342 fn parse_from_token<R: Read>(lexer: &mut Lexer<R>, token: Token) -> ParseResult<Self> {
344 match token {
345 Token::Null => Ok(PdfObject::Null),
346 Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
347 Token::Integer(i) => {
348 if !(0..=9999999).contains(&i) {
350 return Ok(PdfObject::Integer(i));
351 }
352
353 match lexer.next_token()? {
355 Token::Integer(gen) if (0..=65535).contains(&gen) => {
356 match lexer.next_token()? {
358 Token::Name(s) if s == "R" => {
359 Ok(PdfObject::Reference(i as u32, gen as u16))
360 }
361 token => {
362 lexer.push_token(token);
364 lexer.push_token(Token::Integer(gen));
365 Ok(PdfObject::Integer(i))
366 }
367 }
368 }
369 token => {
370 lexer.push_token(token);
372 Ok(PdfObject::Integer(i))
373 }
374 }
375 }
376 Token::Real(r) => Ok(PdfObject::Real(r)),
377 Token::String(s) => Ok(PdfObject::String(PdfString(s))),
378 Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
379 Token::ArrayStart => Self::parse_array(lexer),
380 Token::DictStart => Self::parse_dictionary_or_stream(lexer),
381 Token::Comment(_) => {
382 Self::parse(lexer)
384 }
385 Token::StartXRef => {
386 Err(ParseError::SyntaxError {
388 position: 0,
389 message: "StartXRef encountered - this is not a PDF object".to_string(),
390 })
391 }
392 Token::Eof => Err(ParseError::SyntaxError {
393 position: 0,
394 message: "Unexpected end of file".to_string(),
395 }),
396 _ => Err(ParseError::UnexpectedToken {
397 expected: "PDF object".to_string(),
398 found: format!("{token:?}"),
399 }),
400 }
401 }
402
403 fn parse_array<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
405 let mut elements = Vec::new();
406
407 loop {
408 let token = lexer.next_token()?;
409 match token {
410 Token::ArrayEnd => break,
411 Token::Comment(_) => continue, _ => {
413 let obj = Self::parse_from_token(lexer, token)?;
414 elements.push(obj);
415 }
416 }
417 }
418
419 Ok(PdfObject::Array(PdfArray(elements)))
420 }
421
422 fn parse_dictionary_or_stream<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
424 let dict = Self::parse_dictionary_inner(lexer)?;
425
426 loop {
428 let token = lexer.next_token()?;
429 match token {
431 Token::Stream => {
432 let stream_data = Self::parse_stream_data(lexer, &dict)?;
434 return Ok(PdfObject::Stream(PdfStream {
435 dict,
436 data: stream_data,
437 }));
438 }
439 Token::Comment(_) => {
440 continue;
442 }
443 Token::StartXRef => {
444 lexer.push_token(token);
448 return Ok(PdfObject::Dictionary(dict));
449 }
450 _ => {
451 lexer.push_token(token);
455 return Ok(PdfObject::Dictionary(dict));
456 }
457 }
458 }
459 }
460
461 fn parse_dictionary_inner<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<PdfDictionary> {
463 let mut dict = HashMap::new();
464
465 loop {
466 let token = lexer.next_token()?;
467 match token {
468 Token::DictEnd => break,
469 Token::Comment(_) => continue, Token::Name(key) => {
471 let value = Self::parse(lexer)?;
472 dict.insert(PdfName(key), value);
473 }
474 _ => {
475 return Err(ParseError::UnexpectedToken {
476 expected: "dictionary key (name) or >>".to_string(),
477 found: format!("{token:?}"),
478 });
479 }
480 }
481 }
482
483 Ok(PdfDictionary(dict))
484 }
485
486 fn parse_stream_data<R: Read>(
488 lexer: &mut Lexer<R>,
489 dict: &PdfDictionary,
490 ) -> ParseResult<Vec<u8>> {
491 let length = dict
493 .0
494 .get(&PdfName("Length".to_string()))
495 .ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
496
497 let length = match length {
498 PdfObject::Integer(len) => *len as usize,
499 PdfObject::Reference(_, _) => {
500 return Err(ParseError::SyntaxError {
503 position: lexer.position(),
504 message: "Stream length references not yet supported".to_string(),
505 });
506 }
507 _ => {
508 return Err(ParseError::SyntaxError {
509 position: lexer.position(),
510 message: "Invalid stream length type".to_string(),
511 });
512 }
513 };
514
515 lexer.read_newline()?;
517
518 let stream_data = lexer.read_bytes(length)?;
520
521 lexer.skip_whitespace()?;
523
524 let token = lexer.next_token()?;
526 match token {
527 Token::EndStream => Ok(stream_data),
528 _ => Err(ParseError::UnexpectedToken {
529 expected: "endstream".to_string(),
530 found: format!("{token:?}"),
531 }),
532 }
533 }
534
535 pub fn is_null(&self) -> bool {
546 matches!(self, PdfObject::Null)
547 }
548
549 pub fn as_bool(&self) -> Option<bool> {
567 match self {
568 PdfObject::Boolean(b) => Some(*b),
569 _ => None,
570 }
571 }
572
573 pub fn as_integer(&self) -> Option<i64> {
575 match self {
576 PdfObject::Integer(i) => Some(*i),
577 _ => None,
578 }
579 }
580
581 pub fn as_real(&self) -> Option<f64> {
602 match self {
603 PdfObject::Real(r) => Some(*r),
604 PdfObject::Integer(i) => Some(*i as f64),
605 _ => None,
606 }
607 }
608
609 pub fn as_string(&self) -> Option<&PdfString> {
611 match self {
612 PdfObject::String(s) => Some(s),
613 _ => None,
614 }
615 }
616
617 pub fn as_name(&self) -> Option<&PdfName> {
619 match self {
620 PdfObject::Name(n) => Some(n),
621 _ => None,
622 }
623 }
624
625 pub fn as_array(&self) -> Option<&PdfArray> {
627 match self {
628 PdfObject::Array(a) => Some(a),
629 _ => None,
630 }
631 }
632
633 pub fn as_dict(&self) -> Option<&PdfDictionary> {
635 match self {
636 PdfObject::Dictionary(d) => Some(d),
637 PdfObject::Stream(s) => Some(&s.dict),
638 _ => None,
639 }
640 }
641
642 pub fn as_stream(&self) -> Option<&PdfStream> {
644 match self {
645 PdfObject::Stream(s) => Some(s),
646 _ => None,
647 }
648 }
649
650 pub fn as_reference(&self) -> Option<(u32, u16)> {
670 match self {
671 PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
672 _ => None,
673 }
674 }
675}
676
677impl Default for PdfDictionary {
678 fn default() -> Self {
679 Self::new()
680 }
681}
682
683impl PdfDictionary {
684 pub fn new() -> Self {
695 PdfDictionary(HashMap::new())
696 }
697
698 pub fn get(&self, key: &str) -> Option<&PdfObject> {
721 self.0.get(&PdfName(key.to_string()))
722 }
723
724 pub fn insert(&mut self, key: String, value: PdfObject) {
726 self.0.insert(PdfName(key), value);
727 }
728
729 pub fn contains_key(&self, key: &str) -> bool {
731 self.0.contains_key(&PdfName(key.to_string()))
732 }
733
734 pub fn get_type(&self) -> Option<&str> {
760 self.get("Type")
761 .and_then(|obj| obj.as_name())
762 .map(|n| n.0.as_str())
763 }
764}
765
766impl Default for PdfArray {
767 fn default() -> Self {
768 Self::new()
769 }
770}
771
772impl PdfArray {
773 pub fn new() -> Self {
775 PdfArray(Vec::new())
776 }
777
778 pub fn len(&self) -> usize {
780 self.0.len()
781 }
782
783 pub fn is_empty(&self) -> bool {
785 self.0.is_empty()
786 }
787
788 pub fn get(&self, index: usize) -> Option<&PdfObject> {
812 self.0.get(index)
813 }
814
815 pub fn push(&mut self, obj: PdfObject) {
817 self.0.push(obj);
818 }
819}
820
821impl PdfString {
822 pub fn new(data: Vec<u8>) -> Self {
824 PdfString(data)
825 }
826
827 pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
848 std::str::from_utf8(&self.0)
849 }
850
851 pub fn as_bytes(&self) -> &[u8] {
853 &self.0
854 }
855}
856
857impl PdfName {
858 pub fn new(name: String) -> Self {
860 PdfName(name)
861 }
862
863 pub fn as_str(&self) -> &str {
865 &self.0
866 }
867}
868
869#[cfg(test)]
870mod tests {
871 use super::*;
872 use std::io::Cursor;
873
874 #[test]
875 fn test_parse_simple_objects() {
876 let input = b"null true false 123 -456 3.14 /Name (Hello)";
877 let mut lexer = Lexer::new(Cursor::new(input));
878
879 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
880 assert_eq!(
881 PdfObject::parse(&mut lexer).unwrap(),
882 PdfObject::Boolean(true)
883 );
884 assert_eq!(
885 PdfObject::parse(&mut lexer).unwrap(),
886 PdfObject::Boolean(false)
887 );
888 assert_eq!(
889 PdfObject::parse(&mut lexer).unwrap(),
890 PdfObject::Integer(123)
891 );
892 assert_eq!(
893 PdfObject::parse(&mut lexer).unwrap(),
894 PdfObject::Integer(-456)
895 );
896 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
897 assert_eq!(
898 PdfObject::parse(&mut lexer).unwrap(),
899 PdfObject::Name(PdfName("Name".to_string()))
900 );
901 assert_eq!(
902 PdfObject::parse(&mut lexer).unwrap(),
903 PdfObject::String(PdfString(b"Hello".to_vec()))
904 );
905 }
906
907 #[test]
908 fn test_parse_array() {
909 let input = b"[100 200 300 /Name (test)]";
911 let mut lexer = Lexer::new(Cursor::new(input));
912
913 let obj = PdfObject::parse(&mut lexer).unwrap();
914 let array = obj.as_array().unwrap();
915
916 assert_eq!(array.len(), 5);
917 assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
918 assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
919 assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
920 assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
921 assert_eq!(
922 array.get(4).unwrap().as_string().unwrap().as_bytes(),
923 b"test"
924 );
925 }
926
927 #[test]
928 fn test_parse_array_with_references() {
929 let input = b"[1 0 R 2 0 R]";
931 let mut lexer = Lexer::new(Cursor::new(input));
932
933 let obj = PdfObject::parse(&mut lexer).unwrap();
934 let array = obj.as_array().unwrap();
935
936 assert_eq!(array.len(), 2);
937 assert!(array.get(0).unwrap().as_reference().is_some());
938 assert!(array.get(1).unwrap().as_reference().is_some());
939 }
940
941 #[test]
942 fn test_parse_dictionary() {
943 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
944 let mut lexer = Lexer::new(Cursor::new(input));
945
946 let obj = PdfObject::parse(&mut lexer).unwrap();
947 let dict = obj.as_dict().unwrap();
948
949 assert_eq!(dict.get_type(), Some("Page"));
950 assert!(dict.get("Parent").unwrap().as_reference().is_some());
951 assert!(dict.get("MediaBox").unwrap().as_array().is_some());
952 }
953
954 mod comprehensive_tests {
956 use super::*;
957
958 #[test]
959 fn test_pdf_object_null() {
960 let obj = PdfObject::Null;
961 assert!(obj.is_null());
962 assert_eq!(obj.as_bool(), None);
963 assert_eq!(obj.as_integer(), None);
964 assert_eq!(obj.as_real(), None);
965 assert_eq!(obj.as_string(), None);
966 assert_eq!(obj.as_name(), None);
967 assert_eq!(obj.as_array(), None);
968 assert_eq!(obj.as_dict(), None);
969 assert_eq!(obj.as_stream(), None);
970 assert_eq!(obj.as_reference(), None);
971 }
972
973 #[test]
974 fn test_pdf_object_boolean() {
975 let obj_true = PdfObject::Boolean(true);
976 let obj_false = PdfObject::Boolean(false);
977
978 assert!(!obj_true.is_null());
979 assert_eq!(obj_true.as_bool(), Some(true));
980 assert_eq!(obj_false.as_bool(), Some(false));
981
982 assert_eq!(obj_true.as_integer(), None);
983 assert_eq!(obj_true.as_real(), None);
984 assert_eq!(obj_true.as_string(), None);
985 assert_eq!(obj_true.as_name(), None);
986 assert_eq!(obj_true.as_array(), None);
987 assert_eq!(obj_true.as_dict(), None);
988 assert_eq!(obj_true.as_stream(), None);
989 assert_eq!(obj_true.as_reference(), None);
990 }
991
992 #[test]
993 fn test_pdf_object_integer() {
994 let obj = PdfObject::Integer(42);
995
996 assert!(!obj.is_null());
997 assert_eq!(obj.as_bool(), None);
998 assert_eq!(obj.as_integer(), Some(42));
999 assert_eq!(obj.as_real(), Some(42.0)); assert_eq!(obj.as_string(), None);
1001 assert_eq!(obj.as_name(), None);
1002 assert_eq!(obj.as_array(), None);
1003 assert_eq!(obj.as_dict(), None);
1004 assert_eq!(obj.as_stream(), None);
1005 assert_eq!(obj.as_reference(), None);
1006
1007 let obj_neg = PdfObject::Integer(-123);
1009 assert_eq!(obj_neg.as_integer(), Some(-123));
1010 assert_eq!(obj_neg.as_real(), Some(-123.0));
1011
1012 let obj_large = PdfObject::Integer(9999999999);
1014 assert_eq!(obj_large.as_integer(), Some(9999999999));
1015 assert_eq!(obj_large.as_real(), Some(9999999999.0));
1016 }
1017
1018 #[test]
1019 fn test_pdf_object_real() {
1020 let obj = PdfObject::Real(3.14159);
1021
1022 assert!(!obj.is_null());
1023 assert_eq!(obj.as_bool(), None);
1024 assert_eq!(obj.as_integer(), None);
1025 assert_eq!(obj.as_real(), Some(3.14159));
1026 assert_eq!(obj.as_string(), None);
1027 assert_eq!(obj.as_name(), None);
1028 assert_eq!(obj.as_array(), None);
1029 assert_eq!(obj.as_dict(), None);
1030 assert_eq!(obj.as_stream(), None);
1031 assert_eq!(obj.as_reference(), None);
1032
1033 let obj_neg = PdfObject::Real(-2.71828);
1035 assert_eq!(obj_neg.as_real(), Some(-2.71828));
1036
1037 let obj_zero = PdfObject::Real(0.0);
1039 assert_eq!(obj_zero.as_real(), Some(0.0));
1040
1041 let obj_small = PdfObject::Real(0.000001);
1043 assert_eq!(obj_small.as_real(), Some(0.000001));
1044
1045 let obj_large = PdfObject::Real(1e10);
1047 assert_eq!(obj_large.as_real(), Some(1e10));
1048 }
1049
1050 #[test]
1051 fn test_pdf_object_string() {
1052 let string_data = b"Hello World".to_vec();
1053 let pdf_string = PdfString(string_data.clone());
1054 let obj = PdfObject::String(pdf_string);
1055
1056 assert!(!obj.is_null());
1057 assert_eq!(obj.as_bool(), None);
1058 assert_eq!(obj.as_integer(), None);
1059 assert_eq!(obj.as_real(), None);
1060 assert!(obj.as_string().is_some());
1061 assert_eq!(obj.as_string().unwrap().as_bytes(), string_data);
1062 assert_eq!(obj.as_name(), None);
1063 assert_eq!(obj.as_array(), None);
1064 assert_eq!(obj.as_dict(), None);
1065 assert_eq!(obj.as_stream(), None);
1066 assert_eq!(obj.as_reference(), None);
1067 }
1068
1069 #[test]
1070 fn test_pdf_object_name() {
1071 let name_str = "Type".to_string();
1072 let pdf_name = PdfName(name_str.clone());
1073 let obj = PdfObject::Name(pdf_name);
1074
1075 assert!(!obj.is_null());
1076 assert_eq!(obj.as_bool(), None);
1077 assert_eq!(obj.as_integer(), None);
1078 assert_eq!(obj.as_real(), None);
1079 assert_eq!(obj.as_string(), None);
1080 assert!(obj.as_name().is_some());
1081 assert_eq!(obj.as_name().unwrap().as_str(), name_str);
1082 assert_eq!(obj.as_array(), None);
1083 assert_eq!(obj.as_dict(), None);
1084 assert_eq!(obj.as_stream(), None);
1085 assert_eq!(obj.as_reference(), None);
1086 }
1087
1088 #[test]
1089 fn test_pdf_object_array() {
1090 let mut array = PdfArray::new();
1091 array.push(PdfObject::Integer(1));
1092 array.push(PdfObject::Integer(2));
1093 array.push(PdfObject::Integer(3));
1094 let obj = PdfObject::Array(array);
1095
1096 assert!(!obj.is_null());
1097 assert_eq!(obj.as_bool(), None);
1098 assert_eq!(obj.as_integer(), None);
1099 assert_eq!(obj.as_real(), None);
1100 assert_eq!(obj.as_string(), None);
1101 assert_eq!(obj.as_name(), None);
1102 assert!(obj.as_array().is_some());
1103 assert_eq!(obj.as_array().unwrap().len(), 3);
1104 assert_eq!(obj.as_dict(), None);
1105 assert_eq!(obj.as_stream(), None);
1106 assert_eq!(obj.as_reference(), None);
1107 }
1108
1109 #[test]
1110 fn test_pdf_object_dictionary() {
1111 let mut dict = PdfDictionary::new();
1112 dict.insert(
1113 "Type".to_string(),
1114 PdfObject::Name(PdfName("Page".to_string())),
1115 );
1116 dict.insert("Count".to_string(), PdfObject::Integer(5));
1117 let obj = PdfObject::Dictionary(dict);
1118
1119 assert!(!obj.is_null());
1120 assert_eq!(obj.as_bool(), None);
1121 assert_eq!(obj.as_integer(), None);
1122 assert_eq!(obj.as_real(), None);
1123 assert_eq!(obj.as_string(), None);
1124 assert_eq!(obj.as_name(), None);
1125 assert_eq!(obj.as_array(), None);
1126 assert!(obj.as_dict().is_some());
1127 assert_eq!(obj.as_dict().unwrap().0.len(), 2);
1128 assert_eq!(obj.as_stream(), None);
1129 assert_eq!(obj.as_reference(), None);
1130 }
1131
1132 #[test]
1133 fn test_pdf_object_stream() {
1134 let mut dict = PdfDictionary::new();
1135 dict.insert("Length".to_string(), PdfObject::Integer(13));
1136 let data = b"Hello, World!".to_vec();
1137 let stream = PdfStream { dict, data };
1138 let obj = PdfObject::Stream(stream);
1139
1140 assert!(!obj.is_null());
1141 assert_eq!(obj.as_bool(), None);
1142 assert_eq!(obj.as_integer(), None);
1143 assert_eq!(obj.as_real(), None);
1144 assert_eq!(obj.as_string(), None);
1145 assert_eq!(obj.as_name(), None);
1146 assert_eq!(obj.as_array(), None);
1147 assert!(obj.as_dict().is_some()); assert!(obj.as_stream().is_some());
1149 assert_eq!(obj.as_stream().unwrap().raw_data(), b"Hello, World!");
1150 assert_eq!(obj.as_reference(), None);
1151 }
1152
1153 #[test]
1154 fn test_pdf_object_reference() {
1155 let obj = PdfObject::Reference(42, 0);
1156
1157 assert!(!obj.is_null());
1158 assert_eq!(obj.as_bool(), None);
1159 assert_eq!(obj.as_integer(), None);
1160 assert_eq!(obj.as_real(), None);
1161 assert_eq!(obj.as_string(), None);
1162 assert_eq!(obj.as_name(), None);
1163 assert_eq!(obj.as_array(), None);
1164 assert_eq!(obj.as_dict(), None);
1165 assert_eq!(obj.as_stream(), None);
1166 assert_eq!(obj.as_reference(), Some((42, 0)));
1167
1168 let obj_gen = PdfObject::Reference(123, 5);
1170 assert_eq!(obj_gen.as_reference(), Some((123, 5)));
1171 }
1172
1173 #[test]
1174 fn test_pdf_string_methods() {
1175 let string_data = b"Hello, World!".to_vec();
1176 let pdf_string = PdfString(string_data.clone());
1177
1178 assert_eq!(pdf_string.as_bytes(), string_data);
1179 assert_eq!(pdf_string.as_str().unwrap(), "Hello, World!");
1180 assert_eq!(pdf_string.0.len(), 13);
1181 assert!(!pdf_string.0.is_empty());
1182
1183 let empty_string = PdfString(vec![]);
1185 assert!(empty_string.0.is_empty());
1186 assert_eq!(empty_string.0.len(), 0);
1187
1188 let binary_data = vec![0xFF, 0xFE, 0x00, 0x48, 0x00, 0x69]; let binary_string = PdfString(binary_data.clone());
1191 assert_eq!(binary_string.as_bytes(), binary_data);
1192 assert!(binary_string.as_str().is_err()); }
1194
1195 #[test]
1196 fn test_pdf_name_methods() {
1197 let name_str = "Type".to_string();
1198 let pdf_name = PdfName(name_str.clone());
1199
1200 assert_eq!(pdf_name.as_str(), name_str);
1201 assert_eq!(pdf_name.0.len(), 4);
1202 assert!(!pdf_name.0.is_empty());
1203
1204 let empty_name = PdfName("".to_string());
1206 assert!(empty_name.0.is_empty());
1207 assert_eq!(empty_name.0.len(), 0);
1208
1209 let special_name = PdfName("Font#20Name".to_string());
1211 assert_eq!(special_name.as_str(), "Font#20Name");
1212 assert_eq!(special_name.0.len(), 11);
1213 }
1214
1215 #[test]
1216 fn test_pdf_array_methods() {
1217 let mut array = PdfArray::new();
1218 assert_eq!(array.len(), 0);
1219 assert!(array.is_empty());
1220
1221 array.push(PdfObject::Integer(1));
1223 array.push(PdfObject::Integer(2));
1224 array.push(PdfObject::Integer(3));
1225
1226 assert_eq!(array.len(), 3);
1227 assert!(!array.is_empty());
1228
1229 assert_eq!(array.get(0).unwrap().as_integer(), Some(1));
1231 assert_eq!(array.get(1).unwrap().as_integer(), Some(2));
1232 assert_eq!(array.get(2).unwrap().as_integer(), Some(3));
1233 assert!(array.get(3).is_none());
1234
1235 let values: Vec<i64> = array.0.iter().filter_map(|obj| obj.as_integer()).collect();
1237 assert_eq!(values, vec![1, 2, 3]);
1238
1239 let mut mixed_array = PdfArray::new();
1241 mixed_array.push(PdfObject::Integer(42));
1242 mixed_array.push(PdfObject::Real(3.14));
1243 mixed_array.push(PdfObject::String(PdfString(b"text".to_vec())));
1244 mixed_array.push(PdfObject::Name(PdfName("Name".to_string())));
1245 mixed_array.push(PdfObject::Boolean(true));
1246 mixed_array.push(PdfObject::Null);
1247
1248 assert_eq!(mixed_array.len(), 6);
1249 assert_eq!(mixed_array.get(0).unwrap().as_integer(), Some(42));
1250 assert_eq!(mixed_array.get(1).unwrap().as_real(), Some(3.14));
1251 assert_eq!(
1252 mixed_array.get(2).unwrap().as_string().unwrap().as_bytes(),
1253 b"text"
1254 );
1255 assert_eq!(
1256 mixed_array.get(3).unwrap().as_name().unwrap().as_str(),
1257 "Name"
1258 );
1259 assert_eq!(mixed_array.get(4).unwrap().as_bool(), Some(true));
1260 assert!(mixed_array.get(5).unwrap().is_null());
1261 }
1262
1263 #[test]
1264 fn test_pdf_dictionary_methods() {
1265 let mut dict = PdfDictionary::new();
1266 assert_eq!(dict.0.len(), 0);
1267 assert!(dict.0.is_empty());
1268
1269 dict.insert(
1271 "Type".to_string(),
1272 PdfObject::Name(PdfName("Page".to_string())),
1273 );
1274 dict.insert("Count".to_string(), PdfObject::Integer(5));
1275 dict.insert("Resources".to_string(), PdfObject::Reference(10, 0));
1276
1277 assert_eq!(dict.0.len(), 3);
1278 assert!(!dict.0.is_empty());
1279
1280 assert_eq!(
1282 dict.get("Type").unwrap().as_name().unwrap().as_str(),
1283 "Page"
1284 );
1285 assert_eq!(dict.get("Count").unwrap().as_integer(), Some(5));
1286 assert_eq!(dict.get("Resources").unwrap().as_reference(), Some((10, 0)));
1287 assert!(dict.get("NonExistent").is_none());
1288
1289 assert!(dict.contains_key("Type"));
1291 assert!(dict.contains_key("Count"));
1292 assert!(dict.contains_key("Resources"));
1293 assert!(!dict.contains_key("NonExistent"));
1294
1295 assert_eq!(dict.get_type(), Some("Page"));
1297
1298 let mut keys: Vec<String> = dict.0.keys().map(|k| k.0.clone()).collect();
1300 keys.sort();
1301 assert_eq!(keys, vec!["Count", "Resources", "Type"]);
1302
1303 let values: Vec<&PdfObject> = dict.0.values().collect();
1305 assert_eq!(values.len(), 3);
1306 }
1307
1308 #[test]
1309 fn test_pdf_stream_methods() {
1310 let mut dict = PdfDictionary::new();
1311 dict.insert("Length".to_string(), PdfObject::Integer(13));
1312 dict.insert(
1313 "Filter".to_string(),
1314 PdfObject::Name(PdfName("FlateDecode".to_string())),
1315 );
1316
1317 let data = b"Hello, World!".to_vec();
1318 let stream = PdfStream {
1319 dict,
1320 data: data.clone(),
1321 };
1322
1323 assert_eq!(stream.raw_data(), data);
1325
1326 assert_eq!(stream.dict.get("Length").unwrap().as_integer(), Some(13));
1328 assert_eq!(
1329 stream
1330 .dict
1331 .get("Filter")
1332 .unwrap()
1333 .as_name()
1334 .unwrap()
1335 .as_str(),
1336 "FlateDecode"
1337 );
1338
1339 let decode_result = stream.decode();
1342 assert!(decode_result.is_ok() || decode_result.is_err());
1343 }
1344
1345 #[test]
1346 fn test_parse_complex_nested_structures() {
1347 let input = b"[[1 2] [3 4] [5 6]]";
1349 let mut lexer = Lexer::new(Cursor::new(input));
1350 let obj = PdfObject::parse(&mut lexer).unwrap();
1351
1352 let outer_array = obj.as_array().unwrap();
1353 assert_eq!(outer_array.len(), 3);
1354
1355 for i in 0..3 {
1356 let inner_array = outer_array.get(i).unwrap().as_array().unwrap();
1357 assert_eq!(inner_array.len(), 2);
1358 assert_eq!(
1359 inner_array.get(0).unwrap().as_integer(),
1360 Some((i as i64) * 2 + 1)
1361 );
1362 assert_eq!(
1363 inner_array.get(1).unwrap().as_integer(),
1364 Some((i as i64) * 2 + 2)
1365 );
1366 }
1367 }
1368
1369 #[test]
1370 fn test_parse_complex_dictionary() {
1371 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 2 0 R >> /ProcSet [/PDF /Text] >> /Contents 3 0 R >>";
1372 let mut lexer = Lexer::new(Cursor::new(input));
1373 let obj = PdfObject::parse(&mut lexer).unwrap();
1374
1375 let dict = obj.as_dict().unwrap();
1376 assert_eq!(dict.get_type(), Some("Page"));
1377 assert_eq!(dict.get("Parent").unwrap().as_reference(), Some((1, 0)));
1378 assert_eq!(dict.get("Contents").unwrap().as_reference(), Some((3, 0)));
1379
1380 let media_box = dict.get("MediaBox").unwrap().as_array().unwrap();
1382 assert_eq!(media_box.len(), 4);
1383 assert_eq!(media_box.get(0).unwrap().as_integer(), Some(0));
1384 assert_eq!(media_box.get(1).unwrap().as_integer(), Some(0));
1385 assert_eq!(media_box.get(2).unwrap().as_integer(), Some(612));
1386 assert_eq!(media_box.get(3).unwrap().as_integer(), Some(792));
1387
1388 let resources = dict.get("Resources").unwrap().as_dict().unwrap();
1390 assert!(resources.contains_key("Font"));
1391 assert!(resources.contains_key("ProcSet"));
1392
1393 let font_dict = resources.get("Font").unwrap().as_dict().unwrap();
1395 assert_eq!(font_dict.get("F1").unwrap().as_reference(), Some((2, 0)));
1396
1397 let proc_set = resources.get("ProcSet").unwrap().as_array().unwrap();
1399 assert_eq!(proc_set.len(), 2);
1400 assert_eq!(proc_set.get(0).unwrap().as_name().unwrap().as_str(), "PDF");
1401 assert_eq!(proc_set.get(1).unwrap().as_name().unwrap().as_str(), "Text");
1402 }
1403
1404 #[test]
1405 fn test_parse_hex_strings() {
1406 let input = b"<48656C6C6F>"; let mut lexer = Lexer::new(Cursor::new(input));
1408 let obj = PdfObject::parse(&mut lexer).unwrap();
1409
1410 let string = obj.as_string().unwrap();
1411 assert_eq!(string.as_str().unwrap(), "Hello");
1412 }
1413
1414 #[test]
1415 fn test_parse_literal_strings() {
1416 let input = b"(Hello World)";
1417 let mut lexer = Lexer::new(Cursor::new(input));
1418 let obj = PdfObject::parse(&mut lexer).unwrap();
1419
1420 let string = obj.as_string().unwrap();
1421 assert_eq!(string.as_str().unwrap(), "Hello World");
1422 }
1423
1424 #[test]
1425 fn test_parse_string_with_escapes() {
1426 let input = b"(Hello\\nWorld\\t!)";
1427 let mut lexer = Lexer::new(Cursor::new(input));
1428 let obj = PdfObject::parse(&mut lexer).unwrap();
1429
1430 let string = obj.as_string().unwrap();
1431 assert!(!string.as_bytes().is_empty());
1433 }
1434
1435 #[test]
1436 fn test_parse_names_with_special_chars() {
1437 let input = b"/Name#20with#20spaces";
1438 let mut lexer = Lexer::new(Cursor::new(input));
1439 let obj = PdfObject::parse(&mut lexer).unwrap();
1440
1441 let name = obj.as_name().unwrap();
1442 assert!(!name.as_str().is_empty());
1444 }
1445
1446 #[test]
1447 fn test_parse_references() {
1448 let input = b"1 0 R";
1449 let mut lexer = Lexer::new(Cursor::new(input));
1450 let obj = PdfObject::parse(&mut lexer).unwrap();
1451
1452 assert_eq!(obj.as_reference(), Some((1, 0)));
1453
1454 let input2 = b"42 5 R";
1456 let mut lexer2 = Lexer::new(Cursor::new(input2));
1457 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1458
1459 assert_eq!(obj2.as_reference(), Some((42, 5)));
1460 }
1461
1462 #[test]
1463 fn test_parse_edge_cases() {
1464 let input = b"9223372036854775807"; let mut lexer = Lexer::new(Cursor::new(input));
1467 let obj = PdfObject::parse(&mut lexer).unwrap();
1468 assert_eq!(obj.as_integer(), Some(9223372036854775807));
1469
1470 let input2 = b"-9223372036854775808"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1473 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1474 assert_eq!(obj2.as_integer(), Some(-9223372036854775808));
1475
1476 let input3 = b"1.23e-10";
1478 let mut lexer3 = Lexer::new(Cursor::new(input3));
1479 let obj3 = PdfObject::parse(&mut lexer3).unwrap();
1480 assert!(obj3.as_real().is_some());
1482 }
1483
1484 #[test]
1485 fn test_parse_empty_structures() {
1486 let input = b"[]";
1488 let mut lexer = Lexer::new(Cursor::new(input));
1489 let obj = PdfObject::parse(&mut lexer).unwrap();
1490
1491 let array = obj.as_array().unwrap();
1492 assert_eq!(array.len(), 0);
1493 assert!(array.is_empty());
1494
1495 let input2 = b"<< >>";
1497 let mut lexer2 = Lexer::new(Cursor::new(input2));
1498 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1499
1500 let dict = obj2.as_dict().unwrap();
1501 assert_eq!(dict.0.len(), 0);
1502 assert!(dict.0.is_empty());
1503 }
1504
1505 #[test]
1506 fn test_error_handling() {
1507 let input = b"[1 2 3"; let mut lexer = Lexer::new(Cursor::new(input));
1510 let result = PdfObject::parse(&mut lexer);
1511 assert!(result.is_err());
1512
1513 let input2 = b"<< /Type /Page"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1516 let result2 = PdfObject::parse(&mut lexer2);
1517 assert!(result2.is_err());
1518
1519 let input3 = b"1 0 X"; let mut lexer3 = Lexer::new(Cursor::new(input3));
1522 let result3 = PdfObject::parse(&mut lexer3);
1523 assert!(result3.is_ok() || result3.is_err());
1526 }
1527
1528 #[test]
1529 fn test_clone_and_equality() {
1530 let obj1 = PdfObject::Integer(42);
1531 let obj2 = obj1.clone();
1532 assert_eq!(obj1, obj2);
1533
1534 let obj3 = PdfObject::Integer(43);
1535 assert_ne!(obj1, obj3);
1536
1537 let mut array = PdfArray::new();
1539 array.push(PdfObject::Integer(1));
1540 array.push(PdfObject::String(PdfString(b"test".to_vec())));
1541 let obj4 = PdfObject::Array(array);
1542 let obj5 = obj4.clone();
1543 assert_eq!(obj4, obj5);
1544 }
1545
1546 #[test]
1547 fn test_debug_formatting() {
1548 let obj = PdfObject::Integer(42);
1549 let debug_str = format!("{:?}", obj);
1550 assert!(debug_str.contains("Integer"));
1551 assert!(debug_str.contains("42"));
1552
1553 let name = PdfName("Type".to_string());
1554 let debug_str2 = format!("{:?}", name);
1555 assert!(debug_str2.contains("PdfName"));
1556 assert!(debug_str2.contains("Type"));
1557 }
1558
1559 #[test]
1560 fn test_performance_large_array() {
1561 let mut array = PdfArray::new();
1562 for i in 0..1000 {
1563 array.push(PdfObject::Integer(i));
1564 }
1565
1566 assert_eq!(array.len(), 1000);
1567 assert_eq!(array.get(0).unwrap().as_integer(), Some(0));
1568 assert_eq!(array.get(999).unwrap().as_integer(), Some(999));
1569
1570 let sum: i64 = array.0.iter().filter_map(|obj| obj.as_integer()).sum();
1572 assert_eq!(sum, 499500); }
1574
1575 #[test]
1576 fn test_performance_large_dictionary() {
1577 let mut dict = PdfDictionary::new();
1578 for i in 0..1000 {
1579 dict.insert(format!("Key{}", i), PdfObject::Integer(i));
1580 }
1581
1582 assert_eq!(dict.0.len(), 1000);
1583 assert_eq!(dict.get("Key0").unwrap().as_integer(), Some(0));
1584 assert_eq!(dict.get("Key999").unwrap().as_integer(), Some(999));
1585
1586 for i in 0..1000 {
1588 assert!(dict.contains_key(&format!("Key{}", i)));
1589 }
1590 }
1591 }
1592}