1use super::lexer::{Lexer, Token};
36use super::{ParseError, ParseOptions, ParseResult};
37use std::collections::HashMap;
38use std::io::Read;
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
60pub struct PdfName(pub String);
61
62#[derive(Debug, Clone, PartialEq)]
88pub struct PdfString(pub Vec<u8>);
89
90#[derive(Debug, Clone, PartialEq)]
117pub struct PdfArray(pub Vec<PdfObject>);
118
119#[derive(Debug, Clone, PartialEq)]
147pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
148
149#[derive(Debug, Clone, PartialEq)]
186pub struct PdfStream {
187 pub dict: PdfDictionary,
189 pub data: Vec<u8>,
191}
192
193pub static EMPTY_PDF_ARRAY: PdfArray = PdfArray(Vec::new());
195
196impl PdfStream {
197 pub fn decode(&self, options: &ParseOptions) -> ParseResult<Vec<u8>> {
232 super::filters::decode_stream(&self.data, &self.dict, options)
233 }
234
235 pub fn raw_data(&self) -> &[u8] {
249 &self.data
250 }
251}
252
253#[derive(Debug, Clone, PartialEq)]
289pub enum PdfObject {
290 Null,
292 Boolean(bool),
294 Integer(i64),
296 Real(f64),
298 String(PdfString),
300 Name(PdfName),
302 Array(PdfArray),
304 Dictionary(PdfDictionary),
306 Stream(PdfStream),
308 Reference(u32, u16),
310}
311
312impl PdfObject {
313 pub fn parse<R: Read + std::io::Seek>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
349 let token = lexer.next_token()?;
350 Self::parse_from_token(lexer, token)
351 }
352
353 pub fn parse_with_options<R: Read + std::io::Seek>(
355 lexer: &mut Lexer<R>,
356 options: &super::ParseOptions,
357 ) -> ParseResult<Self> {
358 let token = lexer.next_token()?;
359 Self::parse_from_token_with_options(lexer, token, options)
360 }
361
362 fn parse_from_token<R: Read + std::io::Seek>(
364 lexer: &mut Lexer<R>,
365 token: Token,
366 ) -> ParseResult<Self> {
367 Self::parse_from_token_with_options(lexer, token, &super::ParseOptions::default())
368 }
369
370 fn parse_from_token_with_options<R: Read + std::io::Seek>(
372 lexer: &mut Lexer<R>,
373 token: Token,
374 options: &super::ParseOptions,
375 ) -> ParseResult<Self> {
376 match token {
377 Token::Null => Ok(PdfObject::Null),
378 Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
379 Token::Integer(i) => {
380 if !(0..=9999999).contains(&i) {
382 return Ok(PdfObject::Integer(i));
383 }
384
385 match lexer.next_token()? {
387 Token::Integer(gen) if (0..=65535).contains(&gen) => {
388 match lexer.next_token()? {
390 Token::Name(s) if s == "R" => {
391 Ok(PdfObject::Reference(i as u32, gen as u16))
392 }
393 token => {
394 lexer.push_token(token);
396 lexer.push_token(Token::Integer(gen));
397 Ok(PdfObject::Integer(i))
398 }
399 }
400 }
401 token => {
402 lexer.push_token(token);
404 Ok(PdfObject::Integer(i))
405 }
406 }
407 }
408 Token::Real(r) => Ok(PdfObject::Real(r)),
409 Token::String(s) => Ok(PdfObject::String(PdfString(s))),
410 Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
411 Token::ArrayStart => Self::parse_array_with_options(lexer, options),
412 Token::DictStart => Self::parse_dictionary_or_stream_with_options(lexer, options),
413 Token::Comment(_) => {
414 Self::parse_with_options(lexer, options)
416 }
417 Token::StartXRef => {
418 Err(ParseError::SyntaxError {
420 position: 0,
421 message: "StartXRef encountered - this is not a PDF object".to_string(),
422 })
423 }
424 Token::Eof => Err(ParseError::SyntaxError {
425 position: 0,
426 message: "Unexpected end of file".to_string(),
427 }),
428 _ => Err(ParseError::UnexpectedToken {
429 expected: "PDF object".to_string(),
430 found: format!("{token:?}"),
431 }),
432 }
433 }
434
435 fn parse_array_with_options<R: Read + std::io::Seek>(
437 lexer: &mut Lexer<R>,
438 options: &super::ParseOptions,
439 ) -> ParseResult<Self> {
440 let mut elements = Vec::new();
441
442 loop {
443 let token = lexer.next_token()?;
444 match token {
445 Token::ArrayEnd => break,
446 Token::Comment(_) => continue, _ => {
448 let obj = Self::parse_from_token_with_options(lexer, token, options)?;
449 elements.push(obj);
450 }
451 }
452 }
453
454 Ok(PdfObject::Array(PdfArray(elements)))
455 }
456
457 fn parse_dictionary_or_stream_with_options<R: Read + std::io::Seek>(
459 lexer: &mut Lexer<R>,
460 options: &super::ParseOptions,
461 ) -> ParseResult<Self> {
462 let dict = Self::parse_dictionary_inner_with_options(lexer, options)?;
463
464 loop {
466 let token = lexer.next_token()?;
467 match token {
469 Token::Stream => {
470 let stream_data = Self::parse_stream_data_with_options(lexer, &dict, options)?;
472 return Ok(PdfObject::Stream(PdfStream {
473 dict,
474 data: stream_data,
475 }));
476 }
477 Token::Comment(_) => {
478 continue;
480 }
481 Token::StartXRef => {
482 lexer.push_token(token);
486 return Ok(PdfObject::Dictionary(dict));
487 }
488 _ => {
489 lexer.push_token(token);
493 return Ok(PdfObject::Dictionary(dict));
494 }
495 }
496 }
497 }
498
499 fn parse_dictionary_inner_with_options<R: Read + std::io::Seek>(
501 lexer: &mut Lexer<R>,
502 options: &super::ParseOptions,
503 ) -> ParseResult<PdfDictionary> {
504 let mut dict = HashMap::new();
505
506 loop {
507 let token = lexer.next_token()?;
508 match token {
509 Token::DictEnd => break,
510 Token::Comment(_) => continue, Token::Name(key) => {
512 let value = Self::parse_with_options(lexer, options)?;
513 dict.insert(PdfName(key), value);
514 }
515 _ => {
516 return Err(ParseError::UnexpectedToken {
517 expected: "dictionary key (name) or >>".to_string(),
518 found: format!("{token:?}"),
519 });
520 }
521 }
522 }
523
524 Ok(PdfDictionary(dict))
525 }
526
527 fn parse_stream_data_with_options<R: Read + std::io::Seek>(
529 lexer: &mut Lexer<R>,
530 dict: &PdfDictionary,
531 options: &super::ParseOptions,
532 ) -> ParseResult<Vec<u8>> {
533 let length = dict
535 .0
536 .get(&PdfName("Length".to_string()))
537 .or_else(|| {
538 if options.lenient_streams {
540 if options.collect_warnings {
541 tracing::debug!("Warning: Missing Length key in stream dictionary, will search for endstream marker");
542 }
543 Some(&PdfObject::Integer(-1))
545 } else {
546 None
547 }
548 })
549 .ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
550
551 let length = match length {
552 PdfObject::Integer(len) => {
553 if *len == -1 {
554 usize::MAX } else {
557 *len as usize
558 }
559 }
560 PdfObject::Reference(obj_num, gen_num) => {
561 if options.lenient_streams {
564 if options.collect_warnings {
565 tracing::debug!("Warning: Stream length is an indirect reference ({obj_num} {gen_num} R). Using unlimited endstream search.");
566 }
567 usize::MAX - 1 } else {
570 return Err(ParseError::SyntaxError {
571 position: lexer.position(),
572 message: format!(
573 "Stream length reference ({obj_num} {gen_num} R) requires lenient mode"
574 ),
575 });
576 }
577 }
578 _ => {
579 return Err(ParseError::SyntaxError {
580 position: lexer.position(),
581 message: "Invalid stream length type".to_string(),
582 });
583 }
584 };
585
586 lexer.read_newline()?;
588
589 let mut stream_data = if length == usize::MAX || length == usize::MAX - 1 {
591 let is_indirect_ref = length == usize::MAX - 1;
593 let is_dct_decode = dict
595 .0
596 .get(&PdfName("Filter".to_string()))
597 .map(|filter| match filter {
598 PdfObject::Name(name) => name.0 == "DCTDecode",
599 PdfObject::Array(arr) => arr
600 .0
601 .iter()
602 .any(|f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode")),
603 _ => false,
604 })
605 .unwrap_or(false);
606
607 let mut data = Vec::new();
608 let max_search = if is_indirect_ref {
611 10 * 1024 * 1024 } else {
613 65536 };
615 let mut found_endstream = false;
616
617 if is_indirect_ref && options.collect_warnings {
618 tracing::debug!("Searching for endstream without fixed limit (up to {}MB) for indirect reference", max_search / 1024 / 1024);
619 }
620
621 for i in 0..max_search {
622 match lexer.peek_byte() {
623 Ok(b) => {
624 if b == b'e' {
626 let mut temp_buffer = vec![b'e'];
628 let expected = b"ndstream";
629 let mut is_endstream = true;
630
631 let _ = lexer.read_byte();
633
634 for &expected_byte in expected.iter() {
636 match lexer.read_byte() {
637 Ok(byte) => {
638 temp_buffer.push(byte);
639 if byte != expected_byte {
640 is_endstream = false;
641 break;
642 }
643 }
644 Err(_) => {
645 is_endstream = false;
646 break;
647 }
648 }
649 }
650
651 if is_endstream && temp_buffer.len() == 9 {
652 found_endstream = true;
654 if is_dct_decode {
655 tracing::debug!("🔍 [PARSER] Found 'endstream' after reading {} bytes for DCTDecode", data.len());
656 }
657 break;
658 } else {
659 data.extend(temp_buffer);
662 continue;
663 }
664 } else {
665 data.push(lexer.read_byte()?);
667 }
668
669 if is_dct_decode && i % 10000 == 0 && i > 0 {
671 }
673 }
674 Err(_) => {
675 break;
677 }
678 }
679 }
680
681 if !found_endstream && !options.lenient_streams {
682 return Err(ParseError::SyntaxError {
683 position: lexer.position(),
684 message: "Could not find endstream marker".to_string(),
685 });
686 }
687
688 if is_dct_decode {
689 tracing::debug!(
694 "DCTDecode stream: read {} bytes (full stream based on endstream marker)",
695 data.len()
696 );
697 }
698
699 data
700 } else {
701 lexer.read_bytes(length)?
702 };
703
704 lexer.skip_whitespace()?;
706
707 let peek_result = lexer.peek_token();
709
710 match peek_result {
711 Ok(Token::EndStream) => {
712 lexer.next_token()?;
714 Ok(stream_data)
715 }
716 Ok(other_token) => {
717 if options.lenient_streams {
718 let is_dct_decode = dict
720 .0
721 .get(&PdfName("Filter".to_string()))
722 .map(|filter| match filter {
723 PdfObject::Name(name) => name.0 == "DCTDecode",
724 PdfObject::Array(arr) => arr.0.iter().any(
725 |f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode"),
726 ),
727 _ => false,
728 })
729 .unwrap_or(false);
730
731 if is_dct_decode {
732 tracing::debug!("Warning: DCTDecode stream length mismatch at {length} bytes, but not extending JPEG data");
735
736 if let Some(additional_bytes) =
738 lexer.find_keyword_ahead("endstream", options.max_recovery_bytes)?
739 {
740 let _ = lexer.read_bytes(additional_bytes)?;
742 }
743
744 lexer.skip_whitespace()?;
746 lexer.expect_keyword("endstream")?;
747
748 Ok(stream_data)
749 } else {
750 tracing::debug!("Warning: Stream length mismatch. Expected 'endstream' after {length} bytes, got {other_token:?}");
752
753 let search_limit = if length == usize::MAX - 1 {
755 10 * 1024 * 1024 } else {
757 options.max_recovery_bytes
758 };
759
760 if let Some(additional_bytes) =
761 lexer.find_keyword_ahead("endstream", search_limit)?
762 {
763 let extra_data = lexer.read_bytes(additional_bytes)?;
765 stream_data.extend_from_slice(&extra_data);
766
767 let actual_length = stream_data.len();
768 tracing::debug!(
769 "Stream length corrected: declared={length}, actual={actual_length}"
770 );
771
772 lexer.skip_whitespace()?;
774 lexer.expect_keyword("endstream")?;
775
776 Ok(stream_data)
777 } else {
778 Err(ParseError::SyntaxError {
780 position: lexer.position(),
781 message: format!(
782 "Could not find 'endstream' within {} bytes",
783 search_limit
784 ),
785 })
786 }
787 }
788 } else {
789 Err(ParseError::UnexpectedToken {
791 expected: "endstream".to_string(),
792 found: format!("{other_token:?}"),
793 })
794 }
795 }
796 Err(e) => {
797 if options.lenient_streams {
798 tracing::debug!(
800 "Warning: Stream length mismatch. Could not peek next token after {length} bytes"
801 );
802
803 let search_limit = if length == usize::MAX - 1 {
805 10 * 1024 * 1024 } else {
807 options.max_recovery_bytes
808 };
809
810 if let Some(additional_bytes) =
811 lexer.find_keyword_ahead("endstream", search_limit)?
812 {
813 let extra_data = lexer.read_bytes(additional_bytes)?;
815 stream_data.extend_from_slice(&extra_data);
816
817 let actual_length = stream_data.len();
818 tracing::debug!(
819 "Stream length corrected: declared={length}, actual={actual_length}"
820 );
821
822 lexer.skip_whitespace()?;
824 lexer.expect_keyword("endstream")?;
825
826 Ok(stream_data)
827 } else {
828 Err(ParseError::SyntaxError {
830 position: lexer.position(),
831 message: format!(
832 "Could not find 'endstream' within {} bytes",
833 search_limit
834 ),
835 })
836 }
837 } else {
838 Err(e)
840 }
841 }
842 }
843 }
844
845 pub fn is_null(&self) -> bool {
856 matches!(self, PdfObject::Null)
857 }
858
859 pub fn as_bool(&self) -> Option<bool> {
877 match self {
878 PdfObject::Boolean(b) => Some(*b),
879 _ => None,
880 }
881 }
882
883 pub fn as_integer(&self) -> Option<i64> {
885 match self {
886 PdfObject::Integer(i) => Some(*i),
887 _ => None,
888 }
889 }
890
891 pub fn as_real(&self) -> Option<f64> {
912 match self {
913 PdfObject::Real(r) => Some(*r),
914 PdfObject::Integer(i) => Some(*i as f64),
915 _ => None,
916 }
917 }
918
919 pub fn as_string(&self) -> Option<&PdfString> {
921 match self {
922 PdfObject::String(s) => Some(s),
923 _ => None,
924 }
925 }
926
927 pub fn as_name(&self) -> Option<&PdfName> {
929 match self {
930 PdfObject::Name(n) => Some(n),
931 _ => None,
932 }
933 }
934
935 pub fn as_array(&self) -> Option<&PdfArray> {
937 match self {
938 PdfObject::Array(a) => Some(a),
939 _ => None,
940 }
941 }
942
943 pub fn as_dict(&self) -> Option<&PdfDictionary> {
945 match self {
946 PdfObject::Dictionary(d) => Some(d),
947 PdfObject::Stream(s) => Some(&s.dict),
948 _ => None,
949 }
950 }
951
952 pub fn as_stream(&self) -> Option<&PdfStream> {
954 match self {
955 PdfObject::Stream(s) => Some(s),
956 _ => None,
957 }
958 }
959
960 pub fn as_reference(&self) -> Option<(u32, u16)> {
980 match self {
981 PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
982 _ => None,
983 }
984 }
985}
986
987impl Default for PdfDictionary {
988 fn default() -> Self {
989 Self::new()
990 }
991}
992
993impl PdfDictionary {
994 pub fn new() -> Self {
1005 PdfDictionary(HashMap::new())
1006 }
1007
1008 pub fn get(&self, key: &str) -> Option<&PdfObject> {
1031 self.0.get(&PdfName(key.to_string()))
1032 }
1033
1034 pub fn insert(&mut self, key: String, value: PdfObject) {
1036 self.0.insert(PdfName(key), value);
1037 }
1038
1039 pub fn contains_key(&self, key: &str) -> bool {
1041 self.0.contains_key(&PdfName(key.to_string()))
1042 }
1043
1044 pub fn get_type(&self) -> Option<&str> {
1070 self.get("Type")
1071 .and_then(|obj| obj.as_name())
1072 .map(|n| n.0.as_str())
1073 }
1074}
1075
1076impl Default for PdfArray {
1077 fn default() -> Self {
1078 Self::new()
1079 }
1080}
1081
1082impl PdfArray {
1083 pub fn new() -> Self {
1085 PdfArray(Vec::new())
1086 }
1087
1088 pub fn len(&self) -> usize {
1090 self.0.len()
1091 }
1092
1093 pub fn is_empty(&self) -> bool {
1095 self.0.is_empty()
1096 }
1097
1098 pub fn get(&self, index: usize) -> Option<&PdfObject> {
1122 self.0.get(index)
1123 }
1124
1125 pub fn push(&mut self, obj: PdfObject) {
1127 self.0.push(obj);
1128 }
1129}
1130
1131impl PdfString {
1132 pub fn new(data: Vec<u8>) -> Self {
1134 PdfString(data)
1135 }
1136
1137 pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
1158 std::str::from_utf8(&self.0)
1159 }
1160
1161 pub fn as_bytes(&self) -> &[u8] {
1163 &self.0
1164 }
1165}
1166
1167impl PdfName {
1168 pub fn new(name: String) -> Self {
1170 PdfName(name)
1171 }
1172
1173 pub fn as_str(&self) -> &str {
1175 &self.0
1176 }
1177}
1178
1179#[cfg(test)]
1180mod tests {
1181 use super::*;
1182 use crate::parser::lexer::Lexer;
1183 use crate::parser::ParseOptions;
1184 use std::collections::HashMap;
1185 use std::io::Cursor;
1186
1187 #[test]
1188 fn test_parse_simple_objects() {
1189 let input = b"null true false 123 -456 3.14 /Name (Hello)";
1190 let mut lexer = Lexer::new(Cursor::new(input));
1191
1192 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
1193 assert_eq!(
1194 PdfObject::parse(&mut lexer).unwrap(),
1195 PdfObject::Boolean(true)
1196 );
1197 assert_eq!(
1198 PdfObject::parse(&mut lexer).unwrap(),
1199 PdfObject::Boolean(false)
1200 );
1201 assert_eq!(
1202 PdfObject::parse(&mut lexer).unwrap(),
1203 PdfObject::Integer(123)
1204 );
1205 assert_eq!(
1206 PdfObject::parse(&mut lexer).unwrap(),
1207 PdfObject::Integer(-456)
1208 );
1209 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
1210 assert_eq!(
1211 PdfObject::parse(&mut lexer).unwrap(),
1212 PdfObject::Name(PdfName("Name".to_string()))
1213 );
1214 assert_eq!(
1215 PdfObject::parse(&mut lexer).unwrap(),
1216 PdfObject::String(PdfString(b"Hello".to_vec()))
1217 );
1218 }
1219
1220 #[test]
1221 fn test_parse_array() {
1222 let input = b"[100 200 300 /Name (test)]";
1224 let mut lexer = Lexer::new(Cursor::new(input));
1225
1226 let obj = PdfObject::parse(&mut lexer).unwrap();
1227 let array = obj.as_array().unwrap();
1228
1229 assert_eq!(array.len(), 5);
1230 assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
1231 assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
1232 assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
1233 assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
1234 assert_eq!(
1235 array.get(4).unwrap().as_string().unwrap().as_bytes(),
1236 b"test"
1237 );
1238 }
1239
1240 #[test]
1241 fn test_parse_array_with_references() {
1242 let input = b"[1 0 R 2 0 R]";
1244 let mut lexer = Lexer::new(Cursor::new(input));
1245
1246 let obj = PdfObject::parse(&mut lexer).unwrap();
1247 let array = obj.as_array().unwrap();
1248
1249 assert_eq!(array.len(), 2);
1250 assert!(array.get(0).unwrap().as_reference().is_some());
1251 assert!(array.get(1).unwrap().as_reference().is_some());
1252 }
1253
1254 #[test]
1255 fn test_parse_dictionary() {
1256 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
1257 let mut lexer = Lexer::new(Cursor::new(input));
1258
1259 let obj = PdfObject::parse(&mut lexer).unwrap();
1260 let dict = obj.as_dict().unwrap();
1261
1262 assert_eq!(dict.get_type(), Some("Page"));
1263 assert!(dict.get("Parent").unwrap().as_reference().is_some());
1264 assert!(dict.get("MediaBox").unwrap().as_array().is_some());
1265 }
1266
1267 mod comprehensive_tests {
1269 use super::*;
1270
1271 #[test]
1272 fn test_pdf_object_null() {
1273 let obj = PdfObject::Null;
1274 assert!(obj.is_null());
1275 assert_eq!(obj.as_bool(), None);
1276 assert_eq!(obj.as_integer(), None);
1277 assert_eq!(obj.as_real(), None);
1278 assert_eq!(obj.as_string(), None);
1279 assert_eq!(obj.as_name(), None);
1280 assert_eq!(obj.as_array(), None);
1281 assert_eq!(obj.as_dict(), None);
1282 assert_eq!(obj.as_stream(), None);
1283 assert_eq!(obj.as_reference(), None);
1284 }
1285
1286 #[test]
1287 fn test_pdf_object_boolean() {
1288 let obj_true = PdfObject::Boolean(true);
1289 let obj_false = PdfObject::Boolean(false);
1290
1291 assert!(!obj_true.is_null());
1292 assert_eq!(obj_true.as_bool(), Some(true));
1293 assert_eq!(obj_false.as_bool(), Some(false));
1294
1295 assert_eq!(obj_true.as_integer(), None);
1296 assert_eq!(obj_true.as_real(), None);
1297 assert_eq!(obj_true.as_string(), None);
1298 assert_eq!(obj_true.as_name(), None);
1299 assert_eq!(obj_true.as_array(), None);
1300 assert_eq!(obj_true.as_dict(), None);
1301 assert_eq!(obj_true.as_stream(), None);
1302 assert_eq!(obj_true.as_reference(), None);
1303 }
1304
1305 #[test]
1306 fn test_pdf_object_integer() {
1307 let obj = PdfObject::Integer(42);
1308
1309 assert!(!obj.is_null());
1310 assert_eq!(obj.as_bool(), None);
1311 assert_eq!(obj.as_integer(), Some(42));
1312 assert_eq!(obj.as_real(), Some(42.0)); assert_eq!(obj.as_string(), None);
1314 assert_eq!(obj.as_name(), None);
1315 assert_eq!(obj.as_array(), None);
1316 assert_eq!(obj.as_dict(), None);
1317 assert_eq!(obj.as_stream(), None);
1318 assert_eq!(obj.as_reference(), None);
1319
1320 let obj_neg = PdfObject::Integer(-123);
1322 assert_eq!(obj_neg.as_integer(), Some(-123));
1323 assert_eq!(obj_neg.as_real(), Some(-123.0));
1324
1325 let obj_large = PdfObject::Integer(9999999999);
1327 assert_eq!(obj_large.as_integer(), Some(9999999999));
1328 assert_eq!(obj_large.as_real(), Some(9999999999.0));
1329 }
1330
1331 #[test]
1332 fn test_pdf_object_real() {
1333 let obj = PdfObject::Real(3.14159);
1334
1335 assert!(!obj.is_null());
1336 assert_eq!(obj.as_bool(), None);
1337 assert_eq!(obj.as_integer(), None);
1338 assert_eq!(obj.as_real(), Some(3.14159));
1339 assert_eq!(obj.as_string(), None);
1340 assert_eq!(obj.as_name(), None);
1341 assert_eq!(obj.as_array(), None);
1342 assert_eq!(obj.as_dict(), None);
1343 assert_eq!(obj.as_stream(), None);
1344 assert_eq!(obj.as_reference(), None);
1345
1346 let obj_neg = PdfObject::Real(-2.71828);
1348 assert_eq!(obj_neg.as_real(), Some(-2.71828));
1349
1350 let obj_zero = PdfObject::Real(0.0);
1352 assert_eq!(obj_zero.as_real(), Some(0.0));
1353
1354 let obj_small = PdfObject::Real(0.000001);
1356 assert_eq!(obj_small.as_real(), Some(0.000001));
1357
1358 let obj_large = PdfObject::Real(1e10);
1360 assert_eq!(obj_large.as_real(), Some(1e10));
1361 }
1362
1363 #[test]
1364 fn test_pdf_object_string() {
1365 let string_data = b"Hello World".to_vec();
1366 let pdf_string = PdfString(string_data.clone());
1367 let obj = PdfObject::String(pdf_string);
1368
1369 assert!(!obj.is_null());
1370 assert_eq!(obj.as_bool(), None);
1371 assert_eq!(obj.as_integer(), None);
1372 assert_eq!(obj.as_real(), None);
1373 assert!(obj.as_string().is_some());
1374 assert_eq!(obj.as_string().unwrap().as_bytes(), string_data);
1375 assert_eq!(obj.as_name(), None);
1376 assert_eq!(obj.as_array(), None);
1377 assert_eq!(obj.as_dict(), None);
1378 assert_eq!(obj.as_stream(), None);
1379 assert_eq!(obj.as_reference(), None);
1380 }
1381
1382 #[test]
1383 fn test_pdf_object_name() {
1384 let name_str = "Type".to_string();
1385 let pdf_name = PdfName(name_str.clone());
1386 let obj = PdfObject::Name(pdf_name);
1387
1388 assert!(!obj.is_null());
1389 assert_eq!(obj.as_bool(), None);
1390 assert_eq!(obj.as_integer(), None);
1391 assert_eq!(obj.as_real(), None);
1392 assert_eq!(obj.as_string(), None);
1393 assert!(obj.as_name().is_some());
1394 assert_eq!(obj.as_name().unwrap().as_str(), name_str);
1395 assert_eq!(obj.as_array(), None);
1396 assert_eq!(obj.as_dict(), None);
1397 assert_eq!(obj.as_stream(), None);
1398 assert_eq!(obj.as_reference(), None);
1399 }
1400
1401 #[test]
1402 fn test_pdf_object_array() {
1403 let mut array = PdfArray::new();
1404 array.push(PdfObject::Integer(1));
1405 array.push(PdfObject::Integer(2));
1406 array.push(PdfObject::Integer(3));
1407 let obj = PdfObject::Array(array);
1408
1409 assert!(!obj.is_null());
1410 assert_eq!(obj.as_bool(), None);
1411 assert_eq!(obj.as_integer(), None);
1412 assert_eq!(obj.as_real(), None);
1413 assert_eq!(obj.as_string(), None);
1414 assert_eq!(obj.as_name(), None);
1415 assert!(obj.as_array().is_some());
1416 assert_eq!(obj.as_array().unwrap().len(), 3);
1417 assert_eq!(obj.as_dict(), None);
1418 assert_eq!(obj.as_stream(), None);
1419 assert_eq!(obj.as_reference(), None);
1420 }
1421
1422 #[test]
1423 fn test_pdf_object_dictionary() {
1424 let mut dict = PdfDictionary::new();
1425 dict.insert(
1426 "Type".to_string(),
1427 PdfObject::Name(PdfName("Page".to_string())),
1428 );
1429 dict.insert("Count".to_string(), PdfObject::Integer(5));
1430 let obj = PdfObject::Dictionary(dict);
1431
1432 assert!(!obj.is_null());
1433 assert_eq!(obj.as_bool(), None);
1434 assert_eq!(obj.as_integer(), None);
1435 assert_eq!(obj.as_real(), None);
1436 assert_eq!(obj.as_string(), None);
1437 assert_eq!(obj.as_name(), None);
1438 assert_eq!(obj.as_array(), None);
1439 assert!(obj.as_dict().is_some());
1440 assert_eq!(obj.as_dict().unwrap().0.len(), 2);
1441 assert_eq!(obj.as_stream(), None);
1442 assert_eq!(obj.as_reference(), None);
1443 }
1444
1445 #[test]
1446 fn test_pdf_object_stream() {
1447 let mut dict = PdfDictionary::new();
1448 dict.insert("Length".to_string(), PdfObject::Integer(13));
1449 let data = b"Hello, World!".to_vec();
1450 let stream = PdfStream { dict, data };
1451 let obj = PdfObject::Stream(stream);
1452
1453 assert!(!obj.is_null());
1454 assert_eq!(obj.as_bool(), None);
1455 assert_eq!(obj.as_integer(), None);
1456 assert_eq!(obj.as_real(), None);
1457 assert_eq!(obj.as_string(), None);
1458 assert_eq!(obj.as_name(), None);
1459 assert_eq!(obj.as_array(), None);
1460 assert!(obj.as_dict().is_some()); assert!(obj.as_stream().is_some());
1462 assert_eq!(obj.as_stream().unwrap().raw_data(), b"Hello, World!");
1463 assert_eq!(obj.as_reference(), None);
1464 }
1465
1466 #[test]
1467 fn test_pdf_object_reference() {
1468 let obj = PdfObject::Reference(42, 0);
1469
1470 assert!(!obj.is_null());
1471 assert_eq!(obj.as_bool(), None);
1472 assert_eq!(obj.as_integer(), None);
1473 assert_eq!(obj.as_real(), None);
1474 assert_eq!(obj.as_string(), None);
1475 assert_eq!(obj.as_name(), None);
1476 assert_eq!(obj.as_array(), None);
1477 assert_eq!(obj.as_dict(), None);
1478 assert_eq!(obj.as_stream(), None);
1479 assert_eq!(obj.as_reference(), Some((42, 0)));
1480
1481 let obj_gen = PdfObject::Reference(123, 5);
1483 assert_eq!(obj_gen.as_reference(), Some((123, 5)));
1484 }
1485
1486 #[test]
1487 fn test_pdf_string_methods() {
1488 let string_data = b"Hello, World!".to_vec();
1489 let pdf_string = PdfString(string_data.clone());
1490
1491 assert_eq!(pdf_string.as_bytes(), string_data);
1492 assert_eq!(pdf_string.as_str().unwrap(), "Hello, World!");
1493 assert_eq!(pdf_string.0.len(), 13);
1494 assert!(!pdf_string.0.is_empty());
1495
1496 let empty_string = PdfString(vec![]);
1498 assert!(empty_string.0.is_empty());
1499 assert_eq!(empty_string.0.len(), 0);
1500
1501 let binary_data = vec![0xFF, 0xFE, 0x00, 0x48, 0x00, 0x69]; let binary_string = PdfString(binary_data.clone());
1504 assert_eq!(binary_string.as_bytes(), binary_data);
1505 assert!(binary_string.as_str().is_err()); }
1507
1508 #[test]
1509 fn test_pdf_name_methods() {
1510 let name_str = "Type".to_string();
1511 let pdf_name = PdfName(name_str.clone());
1512
1513 assert_eq!(pdf_name.as_str(), name_str);
1514 assert_eq!(pdf_name.0.len(), 4);
1515 assert!(!pdf_name.0.is_empty());
1516
1517 let empty_name = PdfName("".to_string());
1519 assert!(empty_name.0.is_empty());
1520 assert_eq!(empty_name.0.len(), 0);
1521
1522 let special_name = PdfName("Font#20Name".to_string());
1524 assert_eq!(special_name.as_str(), "Font#20Name");
1525 assert_eq!(special_name.0.len(), 11);
1526 }
1527
1528 #[test]
1529 fn test_pdf_array_methods() {
1530 let mut array = PdfArray::new();
1531 assert_eq!(array.len(), 0);
1532 assert!(array.is_empty());
1533
1534 array.push(PdfObject::Integer(1));
1536 array.push(PdfObject::Integer(2));
1537 array.push(PdfObject::Integer(3));
1538
1539 assert_eq!(array.len(), 3);
1540 assert!(!array.is_empty());
1541
1542 assert_eq!(array.get(0).unwrap().as_integer(), Some(1));
1544 assert_eq!(array.get(1).unwrap().as_integer(), Some(2));
1545 assert_eq!(array.get(2).unwrap().as_integer(), Some(3));
1546 assert!(array.get(3).is_none());
1547
1548 let values: Vec<i64> = array.0.iter().filter_map(|obj| obj.as_integer()).collect();
1550 assert_eq!(values, vec![1, 2, 3]);
1551
1552 let mut mixed_array = PdfArray::new();
1554 mixed_array.push(PdfObject::Integer(42));
1555 mixed_array.push(PdfObject::Real(3.14));
1556 mixed_array.push(PdfObject::String(PdfString(b"text".to_vec())));
1557 mixed_array.push(PdfObject::Name(PdfName("Name".to_string())));
1558 mixed_array.push(PdfObject::Boolean(true));
1559 mixed_array.push(PdfObject::Null);
1560
1561 assert_eq!(mixed_array.len(), 6);
1562 assert_eq!(mixed_array.get(0).unwrap().as_integer(), Some(42));
1563 assert_eq!(mixed_array.get(1).unwrap().as_real(), Some(3.14));
1564 assert_eq!(
1565 mixed_array.get(2).unwrap().as_string().unwrap().as_bytes(),
1566 b"text"
1567 );
1568 assert_eq!(
1569 mixed_array.get(3).unwrap().as_name().unwrap().as_str(),
1570 "Name"
1571 );
1572 assert_eq!(mixed_array.get(4).unwrap().as_bool(), Some(true));
1573 assert!(mixed_array.get(5).unwrap().is_null());
1574 }
1575
1576 #[test]
1577 fn test_pdf_dictionary_methods() {
1578 let mut dict = PdfDictionary::new();
1579 assert_eq!(dict.0.len(), 0);
1580 assert!(dict.0.is_empty());
1581
1582 dict.insert(
1584 "Type".to_string(),
1585 PdfObject::Name(PdfName("Page".to_string())),
1586 );
1587 dict.insert("Count".to_string(), PdfObject::Integer(5));
1588 dict.insert("Resources".to_string(), PdfObject::Reference(10, 0));
1589
1590 assert_eq!(dict.0.len(), 3);
1591 assert!(!dict.0.is_empty());
1592
1593 assert_eq!(
1595 dict.get("Type").unwrap().as_name().unwrap().as_str(),
1596 "Page"
1597 );
1598 assert_eq!(dict.get("Count").unwrap().as_integer(), Some(5));
1599 assert_eq!(dict.get("Resources").unwrap().as_reference(), Some((10, 0)));
1600 assert!(dict.get("NonExistent").is_none());
1601
1602 assert!(dict.contains_key("Type"));
1604 assert!(dict.contains_key("Count"));
1605 assert!(dict.contains_key("Resources"));
1606 assert!(!dict.contains_key("NonExistent"));
1607
1608 assert_eq!(dict.get_type(), Some("Page"));
1610
1611 let mut keys: Vec<String> = dict.0.keys().map(|k| k.0.clone()).collect();
1613 keys.sort();
1614 assert_eq!(keys, vec!["Count", "Resources", "Type"]);
1615
1616 let values: Vec<&PdfObject> = dict.0.values().collect();
1618 assert_eq!(values.len(), 3);
1619 }
1620
1621 #[test]
1622 fn test_pdf_stream_methods() {
1623 let mut dict = PdfDictionary::new();
1624 dict.insert("Length".to_string(), PdfObject::Integer(13));
1625 dict.insert(
1626 "Filter".to_string(),
1627 PdfObject::Name(PdfName("FlateDecode".to_string())),
1628 );
1629
1630 let data = b"Hello, World!".to_vec();
1631 let stream = PdfStream {
1632 dict,
1633 data: data.clone(),
1634 };
1635
1636 assert_eq!(stream.raw_data(), data);
1638
1639 assert_eq!(stream.dict.get("Length").unwrap().as_integer(), Some(13));
1641 assert_eq!(
1642 stream
1643 .dict
1644 .get("Filter")
1645 .unwrap()
1646 .as_name()
1647 .unwrap()
1648 .as_str(),
1649 "FlateDecode"
1650 );
1651
1652 let options = ParseOptions::default();
1655 let decode_result = stream.decode(&options);
1656 assert!(decode_result.is_ok() || decode_result.is_err());
1657 }
1658
1659 #[test]
1660 fn test_parse_complex_nested_structures() {
1661 let input = b"[[1 2] [3 4] [5 6]]";
1663 let mut lexer = Lexer::new(Cursor::new(input));
1664 let obj = PdfObject::parse(&mut lexer).unwrap();
1665
1666 let outer_array = obj.as_array().unwrap();
1667 assert_eq!(outer_array.len(), 3);
1668
1669 for i in 0..3 {
1670 let inner_array = outer_array.get(i).unwrap().as_array().unwrap();
1671 assert_eq!(inner_array.len(), 2);
1672 assert_eq!(
1673 inner_array.get(0).unwrap().as_integer(),
1674 Some((i as i64) * 2 + 1)
1675 );
1676 assert_eq!(
1677 inner_array.get(1).unwrap().as_integer(),
1678 Some((i as i64) * 2 + 2)
1679 );
1680 }
1681 }
1682
1683 #[test]
1684 fn test_parse_complex_dictionary() {
1685 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 2 0 R >> /ProcSet [/PDF /Text] >> /Contents 3 0 R >>";
1686 let mut lexer = Lexer::new(Cursor::new(input));
1687 let obj = PdfObject::parse(&mut lexer).unwrap();
1688
1689 let dict = obj.as_dict().unwrap();
1690 assert_eq!(dict.get_type(), Some("Page"));
1691 assert_eq!(dict.get("Parent").unwrap().as_reference(), Some((1, 0)));
1692 assert_eq!(dict.get("Contents").unwrap().as_reference(), Some((3, 0)));
1693
1694 let media_box = dict.get("MediaBox").unwrap().as_array().unwrap();
1696 assert_eq!(media_box.len(), 4);
1697 assert_eq!(media_box.get(0).unwrap().as_integer(), Some(0));
1698 assert_eq!(media_box.get(1).unwrap().as_integer(), Some(0));
1699 assert_eq!(media_box.get(2).unwrap().as_integer(), Some(612));
1700 assert_eq!(media_box.get(3).unwrap().as_integer(), Some(792));
1701
1702 let resources = dict.get("Resources").unwrap().as_dict().unwrap();
1704 assert!(resources.contains_key("Font"));
1705 assert!(resources.contains_key("ProcSet"));
1706
1707 let font_dict = resources.get("Font").unwrap().as_dict().unwrap();
1709 assert_eq!(font_dict.get("F1").unwrap().as_reference(), Some((2, 0)));
1710
1711 let proc_set = resources.get("ProcSet").unwrap().as_array().unwrap();
1713 assert_eq!(proc_set.len(), 2);
1714 assert_eq!(proc_set.get(0).unwrap().as_name().unwrap().as_str(), "PDF");
1715 assert_eq!(proc_set.get(1).unwrap().as_name().unwrap().as_str(), "Text");
1716 }
1717
1718 #[test]
1719 fn test_parse_hex_strings() {
1720 let input = b"<48656C6C6F>"; let mut lexer = Lexer::new(Cursor::new(input));
1722 let obj = PdfObject::parse(&mut lexer).unwrap();
1723
1724 let string = obj.as_string().unwrap();
1725 assert_eq!(string.as_str().unwrap(), "Hello");
1726 }
1727
1728 #[test]
1729 fn test_parse_literal_strings() {
1730 let input = b"(Hello World)";
1731 let mut lexer = Lexer::new(Cursor::new(input));
1732 let obj = PdfObject::parse(&mut lexer).unwrap();
1733
1734 let string = obj.as_string().unwrap();
1735 assert_eq!(string.as_str().unwrap(), "Hello World");
1736 }
1737
1738 #[test]
1739 fn test_parse_string_with_escapes() {
1740 let input = b"(Hello\\nWorld\\t!)";
1741 let mut lexer = Lexer::new(Cursor::new(input));
1742 let obj = PdfObject::parse(&mut lexer).unwrap();
1743
1744 let string = obj.as_string().unwrap();
1745 assert!(!string.as_bytes().is_empty());
1747 }
1748
1749 #[test]
1750 fn test_parse_names_with_special_chars() {
1751 let input = b"/Name#20with#20spaces";
1752 let mut lexer = Lexer::new(Cursor::new(input));
1753 let obj = PdfObject::parse(&mut lexer).unwrap();
1754
1755 let name = obj.as_name().unwrap();
1756 assert!(!name.as_str().is_empty());
1758 }
1759
1760 #[test]
1761 fn test_parse_references() {
1762 let input = b"1 0 R";
1763 let mut lexer = Lexer::new(Cursor::new(input));
1764 let obj = PdfObject::parse(&mut lexer).unwrap();
1765
1766 assert_eq!(obj.as_reference(), Some((1, 0)));
1767
1768 let input2 = b"42 5 R";
1770 let mut lexer2 = Lexer::new(Cursor::new(input2));
1771 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1772
1773 assert_eq!(obj2.as_reference(), Some((42, 5)));
1774 }
1775
1776 #[test]
1777 fn test_parse_edge_cases() {
1778 let input = b"9223372036854775807"; let mut lexer = Lexer::new(Cursor::new(input));
1781 let obj = PdfObject::parse(&mut lexer).unwrap();
1782 assert_eq!(obj.as_integer(), Some(9223372036854775807));
1783
1784 let input2 = b"-9223372036854775808"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1787 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1788 assert_eq!(obj2.as_integer(), Some(-9223372036854775808));
1789
1790 let input3 = b"1.23e-10";
1792 let mut lexer3 = Lexer::new(Cursor::new(input3));
1793 let obj3 = PdfObject::parse(&mut lexer3).unwrap();
1794 assert!(obj3.as_real().is_some());
1796 }
1797
1798 #[test]
1799 fn test_parse_empty_structures() {
1800 let input = b"[]";
1802 let mut lexer = Lexer::new(Cursor::new(input));
1803 let obj = PdfObject::parse(&mut lexer).unwrap();
1804
1805 let array = obj.as_array().unwrap();
1806 assert_eq!(array.len(), 0);
1807 assert!(array.is_empty());
1808
1809 let input2 = b"<< >>";
1811 let mut lexer2 = Lexer::new(Cursor::new(input2));
1812 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1813
1814 let dict = obj2.as_dict().unwrap();
1815 assert_eq!(dict.0.len(), 0);
1816 assert!(dict.0.is_empty());
1817 }
1818
1819 #[test]
1820 fn test_error_handling() {
1821 let input = b"[1 2 3"; let mut lexer = Lexer::new(Cursor::new(input));
1824 let result = PdfObject::parse(&mut lexer);
1825 assert!(result.is_err());
1826
1827 let input2 = b"<< /Type /Page"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1830 let result2 = PdfObject::parse(&mut lexer2);
1831 assert!(result2.is_err());
1832
1833 let input3 = b"1 0 X"; let mut lexer3 = Lexer::new(Cursor::new(input3));
1836 let result3 = PdfObject::parse(&mut lexer3);
1837 assert!(result3.is_ok() || result3.is_err());
1840 }
1841
1842 #[test]
1843 fn test_clone_and_equality() {
1844 let obj1 = PdfObject::Integer(42);
1845 let obj2 = obj1.clone();
1846 assert_eq!(obj1, obj2);
1847
1848 let obj3 = PdfObject::Integer(43);
1849 assert_ne!(obj1, obj3);
1850
1851 let mut array = PdfArray::new();
1853 array.push(PdfObject::Integer(1));
1854 array.push(PdfObject::String(PdfString(b"test".to_vec())));
1855 let obj4 = PdfObject::Array(array);
1856 let obj5 = obj4.clone();
1857 assert_eq!(obj4, obj5);
1858 }
1859
1860 #[test]
1861 fn test_debug_formatting() {
1862 let obj = PdfObject::Integer(42);
1863 let debug_str = format!("{obj:?}");
1864 assert!(debug_str.contains("Integer"));
1865 assert!(debug_str.contains("42"));
1866
1867 let name = PdfName("Type".to_string());
1868 let debug_str2 = format!("{name:?}");
1869 assert!(debug_str2.contains("PdfName"));
1870 assert!(debug_str2.contains("Type"));
1871 }
1872
1873 #[test]
1874 fn test_performance_large_array() {
1875 let mut array = PdfArray::new();
1876 for i in 0..1000 {
1877 array.push(PdfObject::Integer(i));
1878 }
1879
1880 assert_eq!(array.len(), 1000);
1881 assert_eq!(array.get(0).unwrap().as_integer(), Some(0));
1882 assert_eq!(array.get(999).unwrap().as_integer(), Some(999));
1883
1884 let sum: i64 = array.0.iter().filter_map(|obj| obj.as_integer()).sum();
1886 assert_eq!(sum, 499500); }
1888
1889 #[test]
1890 fn test_performance_large_dictionary() {
1891 let mut dict = PdfDictionary::new();
1892 for i in 0..1000 {
1893 dict.insert(format!("Key{i}"), PdfObject::Integer(i));
1894 }
1895
1896 assert_eq!(dict.0.len(), 1000);
1897 assert_eq!(dict.get("Key0").unwrap().as_integer(), Some(0));
1898 assert_eq!(dict.get("Key999").unwrap().as_integer(), Some(999));
1899
1900 for i in 0..1000 {
1902 assert!(dict.contains_key(&format!("Key{i}")));
1903 }
1904 }
1905 }
1906
1907 #[test]
1908 fn test_lenient_stream_parsing_too_short() {
1909 let dict = PdfDictionary(
1912 vec![(PdfName("Length".to_string()), PdfObject::Integer(10))]
1913 .into_iter()
1914 .collect::<HashMap<_, _>>(),
1915 );
1916
1917 let stream_content = b"This is a much longer text content than just 10 bytes";
1920 let test_data = vec![
1921 b"\n".to_vec(), stream_content.to_vec(),
1923 b"\nendstream".to_vec(),
1924 ]
1925 .concat();
1926
1927 let mut cursor = Cursor::new(test_data);
1929 let mut lexer = Lexer::new(&mut cursor);
1930 let mut options = ParseOptions::default();
1931 options.lenient_streams = true;
1932 options.max_recovery_bytes = 100;
1933 options.collect_warnings = false;
1934
1935 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1939 if let Err(e) = &result {
1940 tracing::debug!("Error in test_lenient_stream_parsing_too_short: {e:?}");
1941 tracing::debug!("Warning: Stream length mismatch expected, checking if lenient parsing is working correctly");
1942 }
1943 assert!(result.is_ok());
1944
1945 let stream_data = result.unwrap();
1946 let content = String::from_utf8_lossy(&stream_data);
1947
1948 assert!(content.contains("This is a"));
1951 }
1952
1953 #[test]
1954 fn test_lenient_stream_parsing_too_long() {
1955 let dict = PdfDictionary(
1957 vec![(PdfName("Length".to_string()), PdfObject::Integer(100))]
1958 .into_iter()
1959 .collect::<HashMap<_, _>>(),
1960 );
1961
1962 let stream_content = b"Short";
1964 let test_data = vec![
1965 b"\n".to_vec(), stream_content.to_vec(),
1967 b"\nendstream".to_vec(),
1968 ]
1969 .concat();
1970
1971 let mut cursor = Cursor::new(test_data);
1973 let mut lexer = Lexer::new(&mut cursor);
1974 let mut options = ParseOptions::default();
1975 options.lenient_streams = true;
1976 options.max_recovery_bytes = 100;
1977 options.collect_warnings = false;
1978
1979 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1982
1983 assert!(result.is_err());
1987 }
1988
1989 #[test]
1990 fn test_lenient_stream_no_endstream_found() {
1991 let input = b"<< /Length 10 >>
1993stream
1994This text does not contain the magic word and continues for a very long time with no proper termination...";
1995
1996 let mut cursor = Cursor::new(input.to_vec());
1997 let mut lexer = Lexer::new(&mut cursor);
1998 let mut options = ParseOptions::default();
1999 options.lenient_streams = true;
2000 options.max_recovery_bytes = 50; options.collect_warnings = false;
2002
2003 let dict_token = lexer.next_token().unwrap();
2004 let obj = PdfObject::parse_from_token_with_options(&mut lexer, dict_token, &options);
2005
2006 assert!(obj.is_err());
2008 }
2009
2010 #[test]
2013 fn test_pdf_name_special_characters() {
2014 let name = PdfName::new("Name#20With#20Spaces".to_string());
2015 assert_eq!(name.as_str(), "Name#20With#20Spaces");
2016
2017 let unicode_name = PdfName::new("café".to_string());
2019 assert_eq!(unicode_name.as_str(), "café");
2020
2021 let special_name = PdfName::new("Font#2FSubtype".to_string());
2023 assert_eq!(special_name.as_str(), "Font#2FSubtype");
2024 }
2025
2026 #[test]
2027 fn test_pdf_name_edge_cases() {
2028 let empty_name = PdfName::new("".to_string());
2030 assert_eq!(empty_name.as_str(), "");
2031
2032 let long_name = PdfName::new("A".repeat(1000));
2034 assert_eq!(long_name.as_str().len(), 1000);
2035
2036 let complex_name = PdfName::new("ABCdef123-._~!*'()".to_string());
2038 assert_eq!(complex_name.as_str(), "ABCdef123-._~!*'()");
2039 }
2040
2041 #[test]
2042 fn test_pdf_string_encoding_validation() {
2043 let utf8_string = PdfString::new("Hello, 世界! 🌍".as_bytes().to_vec());
2045 assert!(utf8_string.as_str().is_ok());
2046
2047 let invalid_utf8 = PdfString::new(vec![0xFF, 0xFE, 0xFD]);
2049 assert!(invalid_utf8.as_str().is_err());
2050
2051 let empty_string = PdfString::new(vec![]);
2053 assert_eq!(empty_string.as_str().unwrap(), "");
2054 }
2055
2056 #[test]
2057 fn test_pdf_string_binary_data() {
2058 let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD, 0xFC];
2060 let binary_string = PdfString::new(binary_data.clone());
2061 assert_eq!(binary_string.as_bytes(), &binary_data);
2062
2063 let null_string = PdfString::new(vec![
2065 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x57, 0x6F, 0x72, 0x6C, 0x64,
2066 ]);
2067 assert_eq!(binary_string.as_bytes().len(), 8);
2068 assert!(null_string.as_bytes().contains(&0x00));
2069 }
2070
2071 #[test]
2072 fn test_pdf_array_nested_structures() {
2073 let mut array = PdfArray::new();
2074
2075 let mut nested_array = PdfArray::new();
2077 nested_array.push(PdfObject::Integer(1));
2078 nested_array.push(PdfObject::Integer(2));
2079 array.push(PdfObject::Array(nested_array));
2080
2081 let mut nested_dict = PdfDictionary(HashMap::new());
2083 nested_dict.0.insert(
2084 PdfName::new("Key".to_string()),
2085 PdfObject::String(PdfString::new(b"Value".to_vec())),
2086 );
2087 array.push(PdfObject::Dictionary(nested_dict));
2088
2089 assert_eq!(array.len(), 2);
2090 assert!(matches!(array.get(0), Some(PdfObject::Array(_))));
2091 assert!(matches!(array.get(1), Some(PdfObject::Dictionary(_))));
2092 }
2093
2094 #[test]
2095 fn test_pdf_array_type_mixing() {
2096 let mut array = PdfArray::new();
2097
2098 array.push(PdfObject::Null);
2100 array.push(PdfObject::Boolean(true));
2101 array.push(PdfObject::Integer(42));
2102 array.push(PdfObject::Real(3.14159));
2103 array.push(PdfObject::String(PdfString::new(b"text".to_vec())));
2104 array.push(PdfObject::Name(PdfName::new("Name".to_string())));
2105
2106 assert_eq!(array.len(), 6);
2107 assert!(matches!(array.get(0), Some(PdfObject::Null)));
2108 assert!(matches!(array.get(1), Some(PdfObject::Boolean(true))));
2109 assert!(matches!(array.get(2), Some(PdfObject::Integer(42))));
2110 assert!(matches!(array.get(3), Some(PdfObject::Real(_))));
2111 assert!(matches!(array.get(4), Some(PdfObject::String(_))));
2112 assert!(matches!(array.get(5), Some(PdfObject::Name(_))));
2113 }
2114
2115 #[test]
2116 fn test_pdf_dictionary_key_operations() {
2117 let mut dict = PdfDictionary(HashMap::new());
2118
2119 dict.0.insert(
2121 PdfName::new("Type".to_string()),
2122 PdfObject::Name(PdfName::new("Test".to_string())),
2123 );
2124 dict.0
2125 .insert(PdfName::new("Count".to_string()), PdfObject::Integer(100));
2126 dict.0
2127 .insert(PdfName::new("Flag".to_string()), PdfObject::Boolean(true));
2128
2129 assert_eq!(dict.0.len(), 3);
2130 assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
2131 assert!(dict.0.contains_key(&PdfName::new("Count".to_string())));
2132 assert!(dict.0.contains_key(&PdfName::new("Flag".to_string())));
2133 assert!(!dict.0.contains_key(&PdfName::new("Missing".to_string())));
2134
2135 assert!(dict.0.get(&PdfName::new("Type".to_string())).is_some());
2137 }
2138
2139 #[test]
2140 fn test_pdf_dictionary_complex_values() {
2141 let mut dict = PdfDictionary(HashMap::new());
2142
2143 let mut rect_array = PdfArray::new();
2145 rect_array.push(PdfObject::Real(0.0));
2146 rect_array.push(PdfObject::Real(0.0));
2147 rect_array.push(PdfObject::Real(612.0));
2148 rect_array.push(PdfObject::Real(792.0));
2149
2150 dict.0.insert(
2151 PdfName::new("MediaBox".to_string()),
2152 PdfObject::Array(rect_array),
2153 );
2154
2155 let mut resources = PdfDictionary(HashMap::new());
2157 let mut font_dict = PdfDictionary(HashMap::new());
2158 font_dict
2159 .0
2160 .insert(PdfName::new("F1".to_string()), PdfObject::Reference(10, 0));
2161 resources.0.insert(
2162 PdfName::new("Font".to_string()),
2163 PdfObject::Dictionary(font_dict),
2164 );
2165
2166 dict.0.insert(
2167 PdfName::new("Resources".to_string()),
2168 PdfObject::Dictionary(resources),
2169 );
2170
2171 assert_eq!(dict.0.len(), 2);
2172 assert!(dict.0.get(&PdfName::new("MediaBox".to_string())).is_some());
2173 assert!(dict.0.get(&PdfName::new("Resources".to_string())).is_some());
2174 }
2175
2176 #[test]
2177 fn test_object_reference_validation() {
2178 let ref1 = PdfObject::Reference(1, 0);
2179 let ref2 = PdfObject::Reference(1, 0);
2180 let ref3 = PdfObject::Reference(1, 1);
2181 let ref4 = PdfObject::Reference(2, 0);
2182
2183 assert_eq!(ref1, ref2);
2184 assert_ne!(ref1, ref3);
2185 assert_ne!(ref1, ref4);
2186
2187 let max_ref = PdfObject::Reference(u32::MAX, u16::MAX);
2189 assert!(matches!(max_ref, PdfObject::Reference(u32::MAX, u16::MAX)));
2190 }
2191
2192 #[test]
2193 fn test_pdf_object_type_checking() {
2194 let objects = vec![
2195 PdfObject::Null,
2196 PdfObject::Boolean(true),
2197 PdfObject::Integer(42),
2198 PdfObject::Real(3.14),
2199 PdfObject::String(PdfString::new(b"text".to_vec())),
2200 PdfObject::Name(PdfName::new("Name".to_string())),
2201 PdfObject::Array(PdfArray::new()),
2202 PdfObject::Dictionary(PdfDictionary(HashMap::new())),
2203 PdfObject::Reference(1, 0),
2204 ];
2205
2206 assert!(matches!(objects[0], PdfObject::Null));
2208 assert!(matches!(objects[1], PdfObject::Boolean(_)));
2209 assert!(matches!(objects[2], PdfObject::Integer(_)));
2210 assert!(matches!(objects[3], PdfObject::Real(_)));
2211 assert!(matches!(objects[4], PdfObject::String(_)));
2212 assert!(matches!(objects[5], PdfObject::Name(_)));
2213 assert!(matches!(objects[6], PdfObject::Array(_)));
2214 assert!(matches!(objects[7], PdfObject::Dictionary(_)));
2215 assert!(matches!(objects[8], PdfObject::Reference(_, _)));
2216 }
2217
2218 #[test]
2219 fn test_pdf_array_large_capacity() {
2220 let mut array = PdfArray::new();
2221
2222 for i in 0..1000 {
2224 array.push(PdfObject::Integer(i));
2225 }
2226
2227 assert_eq!(array.len(), 1000);
2228 if let Some(PdfObject::Integer(val)) = array.get(999) {
2230 assert_eq!(*val, 999);
2231 } else {
2232 panic!("Expected Integer at index 999");
2233 }
2234 assert!(array.get(1000).is_none());
2235
2236 let mut count = 0;
2238 for i in 0..array.len() {
2239 if let Some(obj) = array.get(i) {
2240 if matches!(obj, PdfObject::Integer(_)) {
2241 count += 1;
2242 }
2243 }
2244 }
2245 assert_eq!(count, 1000);
2246 }
2247
2248 #[test]
2249 fn test_pdf_dictionary_memory_efficiency() {
2250 let mut dict = PdfDictionary(HashMap::new());
2251
2252 for i in 0..100 {
2254 let key = PdfName::new(format!("Key{}", i));
2255 dict.0.insert(key, PdfObject::Integer(i));
2256 }
2257
2258 assert_eq!(dict.0.len(), 100);
2259 assert!(dict.0.contains_key(&PdfName::new("Key99".to_string())));
2260 assert!(!dict.0.contains_key(&PdfName::new("Key100".to_string())));
2261
2262 dict.0.remove(&PdfName::new("Key50".to_string()));
2264 assert_eq!(dict.0.len(), 99);
2265 assert!(!dict.0.contains_key(&PdfName::new("Key50".to_string())));
2266 }
2267
2268 #[test]
2269 fn test_parsing_simple_error_cases() {
2270 use std::io::Cursor;
2271
2272 let empty_input = b"";
2274 let mut cursor = Cursor::new(empty_input.to_vec());
2275 let mut lexer = Lexer::new(&mut cursor);
2276 let result = PdfObject::parse(&mut lexer);
2277
2278 assert!(result.is_err());
2280 }
2281
2282 #[test]
2283 fn test_unicode_string_handling() {
2284 let unicode_tests = vec![
2286 ("ASCII", "Hello World"),
2287 ("Latin-1", "Café résumé"),
2288 ("Emoji", "Hello 🌍 World 🚀"),
2289 ("CJK", "你好世界"),
2290 ("Mixed", "Hello 世界! Bonjour 🌍"),
2291 ];
2292
2293 for (name, text) in unicode_tests {
2294 let pdf_string = PdfString::new(text.as_bytes().to_vec());
2295 match pdf_string.as_str() {
2296 Ok(decoded) => assert_eq!(decoded, text, "Failed for {}", name),
2297 Err(_) => {
2298 assert!(!text.is_empty(), "Should handle {}", name);
2300 }
2301 }
2302 }
2303 }
2304
2305 #[test]
2306 fn test_deep_nesting_limits() {
2307 let mut root_array = PdfArray::new();
2309
2310 for i in 0..10 {
2312 let mut nested = PdfArray::new();
2313 nested.push(PdfObject::Integer(i as i64));
2314 root_array.push(PdfObject::Array(nested));
2315 }
2316
2317 assert_eq!(root_array.len(), 10);
2318
2319 for i in 0..10 {
2321 if let Some(PdfObject::Array(nested)) = root_array.get(i) {
2322 assert_eq!(nested.len(), 1);
2323 }
2324 }
2325 }
2326
2327 #[test]
2328 fn test_special_numeric_values() {
2329 let numbers = vec![
2331 (0i64, 0.0f64),
2332 (i32::MAX as i64, f32::MAX as f64),
2333 (i32::MIN as i64, f32::MIN as f64),
2334 (-1i64, -1.0f64),
2335 (2147483647i64, 2147483647.0f64),
2336 ];
2337
2338 for (int_val, float_val) in numbers {
2339 let int_obj = PdfObject::Integer(int_val);
2340 let float_obj = PdfObject::Real(float_val);
2341
2342 assert!(matches!(int_obj, PdfObject::Integer(_)));
2343 assert!(matches!(float_obj, PdfObject::Real(_)));
2344 }
2345
2346 let special_floats = vec![
2348 (0.0f64, "zero"),
2349 (f64::INFINITY, "infinity"),
2350 (f64::NEG_INFINITY, "negative infinity"),
2351 ];
2352
2353 for (val, _name) in special_floats {
2354 let obj = PdfObject::Real(val);
2355 assert!(matches!(obj, PdfObject::Real(_)));
2356 }
2357 }
2358
2359 #[test]
2360 fn test_array_bounds_checking() {
2361 let mut array = PdfArray::new();
2362 array.push(PdfObject::Integer(1));
2363 array.push(PdfObject::Integer(2));
2364 array.push(PdfObject::Integer(3));
2365
2366 assert!(array.get(0).is_some());
2368 assert!(array.get(1).is_some());
2369 assert!(array.get(2).is_some());
2370
2371 assert!(array.get(3).is_none());
2373 assert!(array.get(100).is_none());
2374
2375 let empty_array = PdfArray::new();
2377 assert!(empty_array.get(0).is_none());
2378 assert_eq!(empty_array.len(), 0);
2379 }
2380
2381 #[test]
2382 fn test_dictionary_case_sensitivity() {
2383 let mut dict = PdfDictionary(HashMap::new());
2384
2385 dict.0.insert(
2387 PdfName::new("Type".to_string()),
2388 PdfObject::Name(PdfName::new("Page".to_string())),
2389 );
2390 dict.0.insert(
2391 PdfName::new("type".to_string()),
2392 PdfObject::Name(PdfName::new("Font".to_string())),
2393 );
2394 dict.0.insert(
2395 PdfName::new("TYPE".to_string()),
2396 PdfObject::Name(PdfName::new("Image".to_string())),
2397 );
2398
2399 assert_eq!(dict.0.len(), 3);
2400 assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
2401 assert!(dict.0.contains_key(&PdfName::new("type".to_string())));
2402 assert!(dict.0.contains_key(&PdfName::new("TYPE".to_string())));
2403
2404 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("Type".to_string())) {
2406 assert_eq!(name.as_str(), "Page");
2407 }
2408 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("type".to_string())) {
2409 assert_eq!(name.as_str(), "Font");
2410 }
2411 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("TYPE".to_string())) {
2412 assert_eq!(name.as_str(), "Image");
2413 }
2414 }
2415
2416 #[test]
2417 fn test_object_cloning_and_equality() {
2418 let original_array = {
2419 let mut arr = PdfArray::new();
2420 arr.push(PdfObject::Integer(42));
2421 arr.push(PdfObject::String(PdfString::new(b"test".to_vec())));
2422 arr
2423 };
2424
2425 let cloned_array = original_array.clone();
2426 assert_eq!(original_array.len(), cloned_array.len());
2427
2428 for i in 0..original_array.len() {
2430 let orig = original_array.get(i).unwrap();
2431 let cloned = cloned_array.get(i).unwrap();
2432 match (orig, cloned) {
2433 (PdfObject::Integer(a), PdfObject::Integer(b)) => assert_eq!(a, b),
2434 (PdfObject::String(a), PdfObject::String(b)) => {
2435 assert_eq!(a.as_bytes(), b.as_bytes())
2436 }
2437 _ => panic!("Type mismatch in cloned array"),
2438 }
2439 }
2440 }
2441
2442 #[test]
2443 fn test_concurrent_object_access() {
2444 use std::sync::Arc;
2445 use std::thread;
2446
2447 let dict = Arc::new({
2448 let mut d = PdfDictionary(HashMap::new());
2449 d.0.insert(
2450 PdfName::new("SharedKey".to_string()),
2451 PdfObject::Integer(42),
2452 );
2453 d
2454 });
2455
2456 let dict_clone = Arc::clone(&dict);
2457 let handle = thread::spawn(move || {
2458 if let Some(PdfObject::Integer(val)) =
2460 dict_clone.0.get(&PdfName::new("SharedKey".to_string()))
2461 {
2462 assert_eq!(*val, 42);
2463 }
2464 });
2465
2466 if let Some(PdfObject::Integer(val)) = dict.0.get(&PdfName::new("SharedKey".to_string())) {
2468 assert_eq!(*val, 42);
2469 }
2470
2471 handle.join().unwrap();
2472 }
2473
2474 #[test]
2475 fn test_stream_data_edge_cases() {
2476 let mut dict = PdfDictionary(HashMap::new());
2478 dict.0
2479 .insert(PdfName::new("Length".to_string()), PdfObject::Integer(0));
2480
2481 let stream = PdfStream {
2482 dict: dict.clone(),
2483 data: vec![],
2484 };
2485
2486 assert_eq!(stream.data.len(), 0);
2488 assert!(stream.raw_data().is_empty());
2489
2490 let stream_with_data = PdfStream {
2492 dict,
2493 data: b"Hello World".to_vec(),
2494 };
2495
2496 assert_eq!(stream_with_data.raw_data(), b"Hello World");
2497 }
2498
2499 #[test]
2500 fn test_name_object_hash_consistency() {
2501 use std::collections::HashSet;
2502
2503 let mut name_set = HashSet::new();
2504
2505 name_set.insert(PdfName::new("Type".to_string()));
2507 name_set.insert(PdfName::new("Pages".to_string()));
2508 name_set.insert(PdfName::new("Type".to_string())); assert_eq!(name_set.len(), 2); assert!(name_set.contains(&PdfName::new("Type".to_string())));
2512 assert!(name_set.contains(&PdfName::new("Pages".to_string())));
2513 assert!(!name_set.contains(&PdfName::new("Font".to_string())));
2514 }
2515}
2516
2517