1use super::lexer::{Lexer, Token};
36use super::{ParseError, ParseOptions, ParseResult};
37use std::collections::HashMap;
38use std::io::Read;
39
40#[derive(Debug, Clone, PartialEq, Eq, Hash)]
60pub struct PdfName(pub String);
61
62#[derive(Debug, Clone, PartialEq)]
88pub struct PdfString(pub Vec<u8>);
89
90#[derive(Debug, Clone, PartialEq)]
117pub struct PdfArray(pub Vec<PdfObject>);
118
119#[derive(Debug, Clone, PartialEq)]
147pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
148
149#[derive(Debug, Clone, PartialEq)]
186pub struct PdfStream {
187 pub dict: PdfDictionary,
189 pub data: Vec<u8>,
191}
192
193pub static EMPTY_PDF_ARRAY: PdfArray = PdfArray(Vec::new());
195
196impl PdfStream {
197 pub fn decode(&self, options: &ParseOptions) -> ParseResult<Vec<u8>> {
232 super::filters::decode_stream(&self.data, &self.dict, options)
233 }
234
235 pub fn raw_data(&self) -> &[u8] {
249 &self.data
250 }
251}
252
253#[derive(Debug, Clone, PartialEq)]
289pub enum PdfObject {
290 Null,
292 Boolean(bool),
294 Integer(i64),
296 Real(f64),
298 String(PdfString),
300 Name(PdfName),
302 Array(PdfArray),
304 Dictionary(PdfDictionary),
306 Stream(PdfStream),
308 Reference(u32, u16),
310}
311
312impl PdfObject {
313 pub fn parse<R: Read + std::io::Seek>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
349 let token = lexer.next_token()?;
350 Self::parse_from_token(lexer, token)
351 }
352
353 pub fn parse_with_options<R: Read + std::io::Seek>(
355 lexer: &mut Lexer<R>,
356 options: &super::ParseOptions,
357 ) -> ParseResult<Self> {
358 let token = lexer.next_token()?;
359 Self::parse_from_token_with_options(lexer, token, options)
360 }
361
362 fn parse_from_token<R: Read + std::io::Seek>(
364 lexer: &mut Lexer<R>,
365 token: Token,
366 ) -> ParseResult<Self> {
367 Self::parse_from_token_with_options(lexer, token, &super::ParseOptions::default())
368 }
369
370 fn parse_from_token_with_options<R: Read + std::io::Seek>(
372 lexer: &mut Lexer<R>,
373 token: Token,
374 options: &super::ParseOptions,
375 ) -> ParseResult<Self> {
376 match token {
377 Token::Null => Ok(PdfObject::Null),
378 Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
379 Token::Integer(i) => {
380 if !(0..=9999999).contains(&i) {
382 return Ok(PdfObject::Integer(i));
383 }
384
385 match lexer.next_token()? {
387 Token::Integer(gen) if (0..=65535).contains(&gen) => {
388 match lexer.next_token()? {
390 Token::Name(s) if s == "R" => {
391 Ok(PdfObject::Reference(i as u32, gen as u16))
392 }
393 token => {
394 lexer.push_token(token);
396 lexer.push_token(Token::Integer(gen));
397 Ok(PdfObject::Integer(i))
398 }
399 }
400 }
401 token => {
402 lexer.push_token(token);
404 Ok(PdfObject::Integer(i))
405 }
406 }
407 }
408 Token::Real(r) => Ok(PdfObject::Real(r)),
409 Token::String(s) => Ok(PdfObject::String(PdfString(s))),
410 Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
411 Token::ArrayStart => Self::parse_array_with_options(lexer, options),
412 Token::DictStart => Self::parse_dictionary_or_stream_with_options(lexer, options),
413 Token::Comment(_) => {
414 Self::parse_with_options(lexer, options)
416 }
417 Token::StartXRef => {
418 Err(ParseError::SyntaxError {
420 position: 0,
421 message: "StartXRef encountered - this is not a PDF object".to_string(),
422 })
423 }
424 Token::Eof => Err(ParseError::SyntaxError {
425 position: 0,
426 message: "Unexpected end of file".to_string(),
427 }),
428 _ => Err(ParseError::UnexpectedToken {
429 expected: "PDF object".to_string(),
430 found: format!("{token:?}"),
431 }),
432 }
433 }
434
435 fn parse_array_with_options<R: Read + std::io::Seek>(
437 lexer: &mut Lexer<R>,
438 options: &super::ParseOptions,
439 ) -> ParseResult<Self> {
440 let mut elements = Vec::new();
441
442 loop {
443 let token = lexer.next_token()?;
444 match token {
445 Token::ArrayEnd => break,
446 Token::Comment(_) => continue, _ => {
448 let obj = Self::parse_from_token_with_options(lexer, token, options)?;
449 elements.push(obj);
450 }
451 }
452 }
453
454 Ok(PdfObject::Array(PdfArray(elements)))
455 }
456
457 fn parse_dictionary_or_stream_with_options<R: Read + std::io::Seek>(
459 lexer: &mut Lexer<R>,
460 options: &super::ParseOptions,
461 ) -> ParseResult<Self> {
462 let dict = Self::parse_dictionary_inner_with_options(lexer, options)?;
463
464 loop {
466 let token = lexer.next_token()?;
467 match token {
469 Token::Stream => {
470 let stream_data = Self::parse_stream_data_with_options(lexer, &dict, options)?;
472 return Ok(PdfObject::Stream(PdfStream {
473 dict,
474 data: stream_data,
475 }));
476 }
477 Token::Comment(_) => {
478 continue;
480 }
481 Token::StartXRef => {
482 lexer.push_token(token);
486 return Ok(PdfObject::Dictionary(dict));
487 }
488 _ => {
489 lexer.push_token(token);
493 return Ok(PdfObject::Dictionary(dict));
494 }
495 }
496 }
497 }
498
499 fn parse_dictionary_inner_with_options<R: Read + std::io::Seek>(
501 lexer: &mut Lexer<R>,
502 options: &super::ParseOptions,
503 ) -> ParseResult<PdfDictionary> {
504 let mut dict = HashMap::new();
505
506 loop {
507 let token = lexer.next_token()?;
508 match token {
509 Token::DictEnd => break,
510 Token::Comment(_) => continue, Token::Name(key) => {
512 let value = Self::parse_with_options(lexer, options)?;
513 dict.insert(PdfName(key), value);
514 }
515 _ => {
516 return Err(ParseError::UnexpectedToken {
517 expected: "dictionary key (name) or >>".to_string(),
518 found: format!("{token:?}"),
519 });
520 }
521 }
522 }
523
524 Ok(PdfDictionary(dict))
525 }
526
527 fn parse_stream_data_with_options<R: Read + std::io::Seek>(
529 lexer: &mut Lexer<R>,
530 dict: &PdfDictionary,
531 options: &super::ParseOptions,
532 ) -> ParseResult<Vec<u8>> {
533 let length = dict
535 .0
536 .get(&PdfName("Length".to_string()))
537 .or_else(|| {
538 if options.lenient_streams {
540 if options.collect_warnings {
541 tracing::debug!("Warning: Missing Length key in stream dictionary, will search for endstream marker");
542 }
543 Some(&PdfObject::Integer(-1))
545 } else {
546 None
547 }
548 })
549 .ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
550
551 let length = match length {
552 PdfObject::Integer(len) => {
553 if *len == -1 {
554 usize::MAX } else {
557 *len as usize
558 }
559 }
560 PdfObject::Reference(obj_num, gen_num) => {
561 if options.lenient_streams {
564 if options.collect_warnings {
565 tracing::debug!("Warning: Stream length is an indirect reference ({obj_num} {gen_num} R). Using unlimited endstream search.");
566 }
567 usize::MAX - 1 } else {
570 return Err(ParseError::SyntaxError {
571 position: lexer.position(),
572 message: format!(
573 "Stream length reference ({obj_num} {gen_num} R) requires lenient mode"
574 ),
575 });
576 }
577 }
578 _ => {
579 return Err(ParseError::SyntaxError {
580 position: lexer.position(),
581 message: "Invalid stream length type".to_string(),
582 });
583 }
584 };
585
586 lexer.read_newline()?;
588
589 let mut stream_data = if length == usize::MAX || length == usize::MAX - 1 {
591 let is_indirect_ref = length == usize::MAX - 1;
593 let is_dct_decode = dict
595 .0
596 .get(&PdfName("Filter".to_string()))
597 .map(|filter| match filter {
598 PdfObject::Name(name) => name.0 == "DCTDecode",
599 PdfObject::Array(arr) => arr
600 .0
601 .iter()
602 .any(|f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode")),
603 _ => false,
604 })
605 .unwrap_or(false);
606
607 let mut data = Vec::new();
608 let max_search = if is_indirect_ref {
611 10 * 1024 * 1024 } else {
613 65536 };
615 let mut found_endstream = false;
616
617 if is_indirect_ref && options.collect_warnings {
618 tracing::debug!("Searching for endstream without fixed limit (up to {}MB) for indirect reference", max_search / 1024 / 1024);
619 }
620
621 for i in 0..max_search {
622 match lexer.peek_byte() {
623 Ok(b) => {
624 if b == b'e' {
626 let mut temp_buffer = vec![b'e'];
628 let expected = b"ndstream";
629 let mut is_endstream = true;
630
631 let _ = lexer.read_byte();
633
634 for &expected_byte in expected.iter() {
636 match lexer.read_byte() {
637 Ok(byte) => {
638 temp_buffer.push(byte);
639 if byte != expected_byte {
640 is_endstream = false;
641 break;
642 }
643 }
644 Err(_) => {
645 is_endstream = false;
646 break;
647 }
648 }
649 }
650
651 if is_endstream && temp_buffer.len() == 9 {
652 found_endstream = true;
654 if is_dct_decode {
655 tracing::debug!("🔍 [PARSER] Found 'endstream' after reading {} bytes for DCTDecode", data.len());
656 }
657 break;
658 } else {
659 data.extend(temp_buffer);
662 continue;
663 }
664 } else {
665 data.push(lexer.read_byte()?);
667 }
668
669 if is_dct_decode && i % 10000 == 0 && i > 0 {
671 }
673 }
674 Err(_) => {
675 break;
677 }
678 }
679 }
680
681 if !found_endstream && !options.lenient_streams {
682 return Err(ParseError::SyntaxError {
683 position: lexer.position(),
684 message: "Could not find endstream marker".to_string(),
685 });
686 }
687
688 if is_dct_decode {
689 tracing::debug!(
692 "DCTDecode stream: read {} bytes (full stream based on endstream marker)",
693 data.len()
694 );
695 }
696
697 data
698 } else {
699 lexer.read_bytes(length)?
700 };
701
702 lexer.skip_whitespace()?;
704
705 let peek_result = lexer.peek_token();
707
708 match peek_result {
709 Ok(Token::EndStream) => {
710 lexer.next_token()?;
712 Ok(stream_data)
713 }
714 Ok(other_token) => {
715 if options.lenient_streams {
716 let is_dct_decode = dict
718 .0
719 .get(&PdfName("Filter".to_string()))
720 .map(|filter| match filter {
721 PdfObject::Name(name) => name.0 == "DCTDecode",
722 PdfObject::Array(arr) => arr.0.iter().any(
723 |f| matches!(f, PdfObject::Name(name) if name.0 == "DCTDecode"),
724 ),
725 _ => false,
726 })
727 .unwrap_or(false);
728
729 if is_dct_decode {
730 tracing::debug!("Warning: DCTDecode stream length mismatch at {length} bytes, but not extending JPEG data");
733
734 if let Some(additional_bytes) =
736 lexer.find_keyword_ahead("endstream", options.max_recovery_bytes)?
737 {
738 let _ = lexer.read_bytes(additional_bytes)?;
740 }
741
742 lexer.skip_whitespace()?;
744 lexer.expect_keyword("endstream")?;
745
746 Ok(stream_data)
747 } else {
748 tracing::debug!("Warning: Stream length mismatch. Expected 'endstream' after {length} bytes, got {other_token:?}");
750
751 let search_limit = if length == usize::MAX - 1 {
753 10 * 1024 * 1024 } else {
755 options.max_recovery_bytes
756 };
757
758 if let Some(additional_bytes) =
759 lexer.find_keyword_ahead("endstream", search_limit)?
760 {
761 let extra_data = lexer.read_bytes(additional_bytes)?;
763 stream_data.extend_from_slice(&extra_data);
764
765 let actual_length = stream_data.len();
766 tracing::debug!(
767 "Stream length corrected: declared={length}, actual={actual_length}"
768 );
769
770 lexer.skip_whitespace()?;
772 lexer.expect_keyword("endstream")?;
773
774 Ok(stream_data)
775 } else {
776 Err(ParseError::SyntaxError {
778 position: lexer.position(),
779 message: format!(
780 "Could not find 'endstream' within {} bytes",
781 search_limit
782 ),
783 })
784 }
785 }
786 } else {
787 Err(ParseError::UnexpectedToken {
789 expected: "endstream".to_string(),
790 found: format!("{other_token:?}"),
791 })
792 }
793 }
794 Err(e) => {
795 if options.lenient_streams {
796 tracing::debug!(
798 "Warning: Stream length mismatch. Could not peek next token after {length} bytes"
799 );
800
801 let search_limit = if length == usize::MAX - 1 {
803 10 * 1024 * 1024 } else {
805 options.max_recovery_bytes
806 };
807
808 if let Some(additional_bytes) =
809 lexer.find_keyword_ahead("endstream", search_limit)?
810 {
811 let extra_data = lexer.read_bytes(additional_bytes)?;
813 stream_data.extend_from_slice(&extra_data);
814
815 let actual_length = stream_data.len();
816 tracing::debug!(
817 "Stream length corrected: declared={length}, actual={actual_length}"
818 );
819
820 lexer.skip_whitespace()?;
822 lexer.expect_keyword("endstream")?;
823
824 Ok(stream_data)
825 } else {
826 Err(ParseError::SyntaxError {
828 position: lexer.position(),
829 message: format!(
830 "Could not find 'endstream' within {} bytes",
831 search_limit
832 ),
833 })
834 }
835 } else {
836 Err(e)
838 }
839 }
840 }
841 }
842
843 pub fn is_null(&self) -> bool {
854 matches!(self, PdfObject::Null)
855 }
856
857 pub fn as_bool(&self) -> Option<bool> {
875 match self {
876 PdfObject::Boolean(b) => Some(*b),
877 _ => None,
878 }
879 }
880
881 pub fn as_integer(&self) -> Option<i64> {
883 match self {
884 PdfObject::Integer(i) => Some(*i),
885 _ => None,
886 }
887 }
888
889 pub fn as_real(&self) -> Option<f64> {
910 match self {
911 PdfObject::Real(r) => Some(*r),
912 PdfObject::Integer(i) => Some(*i as f64),
913 _ => None,
914 }
915 }
916
917 pub fn as_string(&self) -> Option<&PdfString> {
919 match self {
920 PdfObject::String(s) => Some(s),
921 _ => None,
922 }
923 }
924
925 pub fn as_name(&self) -> Option<&PdfName> {
927 match self {
928 PdfObject::Name(n) => Some(n),
929 _ => None,
930 }
931 }
932
933 pub fn as_array(&self) -> Option<&PdfArray> {
935 match self {
936 PdfObject::Array(a) => Some(a),
937 _ => None,
938 }
939 }
940
941 pub fn as_dict(&self) -> Option<&PdfDictionary> {
943 match self {
944 PdfObject::Dictionary(d) => Some(d),
945 PdfObject::Stream(s) => Some(&s.dict),
946 _ => None,
947 }
948 }
949
950 pub fn as_stream(&self) -> Option<&PdfStream> {
952 match self {
953 PdfObject::Stream(s) => Some(s),
954 _ => None,
955 }
956 }
957
958 pub fn as_reference(&self) -> Option<(u32, u16)> {
978 match self {
979 PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
980 _ => None,
981 }
982 }
983}
984
985impl Default for PdfDictionary {
986 fn default() -> Self {
987 Self::new()
988 }
989}
990
991impl PdfDictionary {
992 pub fn new() -> Self {
1003 PdfDictionary(HashMap::new())
1004 }
1005
1006 pub fn get(&self, key: &str) -> Option<&PdfObject> {
1029 self.0.get(&PdfName(key.to_string()))
1030 }
1031
1032 pub fn insert(&mut self, key: String, value: PdfObject) {
1034 self.0.insert(PdfName(key), value);
1035 }
1036
1037 pub fn contains_key(&self, key: &str) -> bool {
1039 self.0.contains_key(&PdfName(key.to_string()))
1040 }
1041
1042 pub fn get_type(&self) -> Option<&str> {
1068 self.get("Type")
1069 .and_then(|obj| obj.as_name())
1070 .map(|n| n.0.as_str())
1071 }
1072}
1073
1074impl Default for PdfArray {
1075 fn default() -> Self {
1076 Self::new()
1077 }
1078}
1079
1080impl PdfArray {
1081 pub fn new() -> Self {
1083 PdfArray(Vec::new())
1084 }
1085
1086 pub fn len(&self) -> usize {
1088 self.0.len()
1089 }
1090
1091 pub fn is_empty(&self) -> bool {
1093 self.0.is_empty()
1094 }
1095
1096 pub fn get(&self, index: usize) -> Option<&PdfObject> {
1120 self.0.get(index)
1121 }
1122
1123 pub fn push(&mut self, obj: PdfObject) {
1125 self.0.push(obj);
1126 }
1127}
1128
1129impl PdfString {
1130 pub fn new(data: Vec<u8>) -> Self {
1132 PdfString(data)
1133 }
1134
1135 pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
1156 std::str::from_utf8(&self.0)
1157 }
1158
1159 pub fn as_bytes(&self) -> &[u8] {
1161 &self.0
1162 }
1163}
1164
1165impl PdfName {
1166 pub fn new(name: String) -> Self {
1168 PdfName(name)
1169 }
1170
1171 pub fn as_str(&self) -> &str {
1173 &self.0
1174 }
1175}
1176
1177#[cfg(test)]
1178mod tests {
1179 use super::*;
1180 use crate::parser::lexer::Lexer;
1181 use crate::parser::ParseOptions;
1182 use std::collections::HashMap;
1183 use std::io::Cursor;
1184
1185 #[test]
1186 fn test_parse_simple_objects() {
1187 let input = b"null true false 123 -456 3.14 /Name (Hello)";
1188 let mut lexer = Lexer::new(Cursor::new(input));
1189
1190 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
1191 assert_eq!(
1192 PdfObject::parse(&mut lexer).unwrap(),
1193 PdfObject::Boolean(true)
1194 );
1195 assert_eq!(
1196 PdfObject::parse(&mut lexer).unwrap(),
1197 PdfObject::Boolean(false)
1198 );
1199 assert_eq!(
1200 PdfObject::parse(&mut lexer).unwrap(),
1201 PdfObject::Integer(123)
1202 );
1203 assert_eq!(
1204 PdfObject::parse(&mut lexer).unwrap(),
1205 PdfObject::Integer(-456)
1206 );
1207 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
1208 assert_eq!(
1209 PdfObject::parse(&mut lexer).unwrap(),
1210 PdfObject::Name(PdfName("Name".to_string()))
1211 );
1212 assert_eq!(
1213 PdfObject::parse(&mut lexer).unwrap(),
1214 PdfObject::String(PdfString(b"Hello".to_vec()))
1215 );
1216 }
1217
1218 #[test]
1219 fn test_parse_array() {
1220 let input = b"[100 200 300 /Name (test)]";
1222 let mut lexer = Lexer::new(Cursor::new(input));
1223
1224 let obj = PdfObject::parse(&mut lexer).unwrap();
1225 let array = obj.as_array().unwrap();
1226
1227 assert_eq!(array.len(), 5);
1228 assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
1229 assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
1230 assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
1231 assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
1232 assert_eq!(
1233 array.get(4).unwrap().as_string().unwrap().as_bytes(),
1234 b"test"
1235 );
1236 }
1237
1238 #[test]
1239 fn test_parse_array_with_references() {
1240 let input = b"[1 0 R 2 0 R]";
1242 let mut lexer = Lexer::new(Cursor::new(input));
1243
1244 let obj = PdfObject::parse(&mut lexer).unwrap();
1245 let array = obj.as_array().unwrap();
1246
1247 assert_eq!(array.len(), 2);
1248 assert!(array.get(0).unwrap().as_reference().is_some());
1249 assert!(array.get(1).unwrap().as_reference().is_some());
1250 }
1251
1252 #[test]
1253 fn test_parse_dictionary() {
1254 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
1255 let mut lexer = Lexer::new(Cursor::new(input));
1256
1257 let obj = PdfObject::parse(&mut lexer).unwrap();
1258 let dict = obj.as_dict().unwrap();
1259
1260 assert_eq!(dict.get_type(), Some("Page"));
1261 assert!(dict.get("Parent").unwrap().as_reference().is_some());
1262 assert!(dict.get("MediaBox").unwrap().as_array().is_some());
1263 }
1264
1265 mod comprehensive_tests {
1267 use super::*;
1268
1269 #[test]
1270 fn test_pdf_object_null() {
1271 let obj = PdfObject::Null;
1272 assert!(obj.is_null());
1273 assert_eq!(obj.as_bool(), None);
1274 assert_eq!(obj.as_integer(), None);
1275 assert_eq!(obj.as_real(), None);
1276 assert_eq!(obj.as_string(), None);
1277 assert_eq!(obj.as_name(), None);
1278 assert_eq!(obj.as_array(), None);
1279 assert_eq!(obj.as_dict(), None);
1280 assert_eq!(obj.as_stream(), None);
1281 assert_eq!(obj.as_reference(), None);
1282 }
1283
1284 #[test]
1285 fn test_pdf_object_boolean() {
1286 let obj_true = PdfObject::Boolean(true);
1287 let obj_false = PdfObject::Boolean(false);
1288
1289 assert!(!obj_true.is_null());
1290 assert_eq!(obj_true.as_bool(), Some(true));
1291 assert_eq!(obj_false.as_bool(), Some(false));
1292
1293 assert_eq!(obj_true.as_integer(), None);
1294 assert_eq!(obj_true.as_real(), None);
1295 assert_eq!(obj_true.as_string(), None);
1296 assert_eq!(obj_true.as_name(), None);
1297 assert_eq!(obj_true.as_array(), None);
1298 assert_eq!(obj_true.as_dict(), None);
1299 assert_eq!(obj_true.as_stream(), None);
1300 assert_eq!(obj_true.as_reference(), None);
1301 }
1302
1303 #[test]
1304 fn test_pdf_object_integer() {
1305 let obj = PdfObject::Integer(42);
1306
1307 assert!(!obj.is_null());
1308 assert_eq!(obj.as_bool(), None);
1309 assert_eq!(obj.as_integer(), Some(42));
1310 assert_eq!(obj.as_real(), Some(42.0)); assert_eq!(obj.as_string(), None);
1312 assert_eq!(obj.as_name(), None);
1313 assert_eq!(obj.as_array(), None);
1314 assert_eq!(obj.as_dict(), None);
1315 assert_eq!(obj.as_stream(), None);
1316 assert_eq!(obj.as_reference(), None);
1317
1318 let obj_neg = PdfObject::Integer(-123);
1320 assert_eq!(obj_neg.as_integer(), Some(-123));
1321 assert_eq!(obj_neg.as_real(), Some(-123.0));
1322
1323 let obj_large = PdfObject::Integer(9999999999);
1325 assert_eq!(obj_large.as_integer(), Some(9999999999));
1326 assert_eq!(obj_large.as_real(), Some(9999999999.0));
1327 }
1328
1329 #[test]
1330 fn test_pdf_object_real() {
1331 let obj = PdfObject::Real(3.14159);
1332
1333 assert!(!obj.is_null());
1334 assert_eq!(obj.as_bool(), None);
1335 assert_eq!(obj.as_integer(), None);
1336 assert_eq!(obj.as_real(), Some(3.14159));
1337 assert_eq!(obj.as_string(), None);
1338 assert_eq!(obj.as_name(), None);
1339 assert_eq!(obj.as_array(), None);
1340 assert_eq!(obj.as_dict(), None);
1341 assert_eq!(obj.as_stream(), None);
1342 assert_eq!(obj.as_reference(), None);
1343
1344 let obj_neg = PdfObject::Real(-2.71828);
1346 assert_eq!(obj_neg.as_real(), Some(-2.71828));
1347
1348 let obj_zero = PdfObject::Real(0.0);
1350 assert_eq!(obj_zero.as_real(), Some(0.0));
1351
1352 let obj_small = PdfObject::Real(0.000001);
1354 assert_eq!(obj_small.as_real(), Some(0.000001));
1355
1356 let obj_large = PdfObject::Real(1e10);
1358 assert_eq!(obj_large.as_real(), Some(1e10));
1359 }
1360
1361 #[test]
1362 fn test_pdf_object_string() {
1363 let string_data = b"Hello World".to_vec();
1364 let pdf_string = PdfString(string_data.clone());
1365 let obj = PdfObject::String(pdf_string);
1366
1367 assert!(!obj.is_null());
1368 assert_eq!(obj.as_bool(), None);
1369 assert_eq!(obj.as_integer(), None);
1370 assert_eq!(obj.as_real(), None);
1371 assert!(obj.as_string().is_some());
1372 assert_eq!(obj.as_string().unwrap().as_bytes(), string_data);
1373 assert_eq!(obj.as_name(), None);
1374 assert_eq!(obj.as_array(), None);
1375 assert_eq!(obj.as_dict(), None);
1376 assert_eq!(obj.as_stream(), None);
1377 assert_eq!(obj.as_reference(), None);
1378 }
1379
1380 #[test]
1381 fn test_pdf_object_name() {
1382 let name_str = "Type".to_string();
1383 let pdf_name = PdfName(name_str.clone());
1384 let obj = PdfObject::Name(pdf_name);
1385
1386 assert!(!obj.is_null());
1387 assert_eq!(obj.as_bool(), None);
1388 assert_eq!(obj.as_integer(), None);
1389 assert_eq!(obj.as_real(), None);
1390 assert_eq!(obj.as_string(), None);
1391 assert!(obj.as_name().is_some());
1392 assert_eq!(obj.as_name().unwrap().as_str(), name_str);
1393 assert_eq!(obj.as_array(), None);
1394 assert_eq!(obj.as_dict(), None);
1395 assert_eq!(obj.as_stream(), None);
1396 assert_eq!(obj.as_reference(), None);
1397 }
1398
1399 #[test]
1400 fn test_pdf_object_array() {
1401 let mut array = PdfArray::new();
1402 array.push(PdfObject::Integer(1));
1403 array.push(PdfObject::Integer(2));
1404 array.push(PdfObject::Integer(3));
1405 let obj = PdfObject::Array(array);
1406
1407 assert!(!obj.is_null());
1408 assert_eq!(obj.as_bool(), None);
1409 assert_eq!(obj.as_integer(), None);
1410 assert_eq!(obj.as_real(), None);
1411 assert_eq!(obj.as_string(), None);
1412 assert_eq!(obj.as_name(), None);
1413 assert!(obj.as_array().is_some());
1414 assert_eq!(obj.as_array().unwrap().len(), 3);
1415 assert_eq!(obj.as_dict(), None);
1416 assert_eq!(obj.as_stream(), None);
1417 assert_eq!(obj.as_reference(), None);
1418 }
1419
1420 #[test]
1421 fn test_pdf_object_dictionary() {
1422 let mut dict = PdfDictionary::new();
1423 dict.insert(
1424 "Type".to_string(),
1425 PdfObject::Name(PdfName("Page".to_string())),
1426 );
1427 dict.insert("Count".to_string(), PdfObject::Integer(5));
1428 let obj = PdfObject::Dictionary(dict);
1429
1430 assert!(!obj.is_null());
1431 assert_eq!(obj.as_bool(), None);
1432 assert_eq!(obj.as_integer(), None);
1433 assert_eq!(obj.as_real(), None);
1434 assert_eq!(obj.as_string(), None);
1435 assert_eq!(obj.as_name(), None);
1436 assert_eq!(obj.as_array(), None);
1437 assert!(obj.as_dict().is_some());
1438 assert_eq!(obj.as_dict().unwrap().0.len(), 2);
1439 assert_eq!(obj.as_stream(), None);
1440 assert_eq!(obj.as_reference(), None);
1441 }
1442
1443 #[test]
1444 fn test_pdf_object_stream() {
1445 let mut dict = PdfDictionary::new();
1446 dict.insert("Length".to_string(), PdfObject::Integer(13));
1447 let data = b"Hello, World!".to_vec();
1448 let stream = PdfStream { dict, data };
1449 let obj = PdfObject::Stream(stream);
1450
1451 assert!(!obj.is_null());
1452 assert_eq!(obj.as_bool(), None);
1453 assert_eq!(obj.as_integer(), None);
1454 assert_eq!(obj.as_real(), None);
1455 assert_eq!(obj.as_string(), None);
1456 assert_eq!(obj.as_name(), None);
1457 assert_eq!(obj.as_array(), None);
1458 assert!(obj.as_dict().is_some()); assert!(obj.as_stream().is_some());
1460 assert_eq!(obj.as_stream().unwrap().raw_data(), b"Hello, World!");
1461 assert_eq!(obj.as_reference(), None);
1462 }
1463
1464 #[test]
1465 fn test_pdf_object_reference() {
1466 let obj = PdfObject::Reference(42, 0);
1467
1468 assert!(!obj.is_null());
1469 assert_eq!(obj.as_bool(), None);
1470 assert_eq!(obj.as_integer(), None);
1471 assert_eq!(obj.as_real(), None);
1472 assert_eq!(obj.as_string(), None);
1473 assert_eq!(obj.as_name(), None);
1474 assert_eq!(obj.as_array(), None);
1475 assert_eq!(obj.as_dict(), None);
1476 assert_eq!(obj.as_stream(), None);
1477 assert_eq!(obj.as_reference(), Some((42, 0)));
1478
1479 let obj_gen = PdfObject::Reference(123, 5);
1481 assert_eq!(obj_gen.as_reference(), Some((123, 5)));
1482 }
1483
1484 #[test]
1485 fn test_pdf_string_methods() {
1486 let string_data = b"Hello, World!".to_vec();
1487 let pdf_string = PdfString(string_data.clone());
1488
1489 assert_eq!(pdf_string.as_bytes(), string_data);
1490 assert_eq!(pdf_string.as_str().unwrap(), "Hello, World!");
1491 assert_eq!(pdf_string.0.len(), 13);
1492 assert!(!pdf_string.0.is_empty());
1493
1494 let empty_string = PdfString(vec![]);
1496 assert!(empty_string.0.is_empty());
1497 assert_eq!(empty_string.0.len(), 0);
1498
1499 let binary_data = vec![0xFF, 0xFE, 0x00, 0x48, 0x00, 0x69]; let binary_string = PdfString(binary_data.clone());
1502 assert_eq!(binary_string.as_bytes(), binary_data);
1503 assert!(binary_string.as_str().is_err()); }
1505
1506 #[test]
1507 fn test_pdf_name_methods() {
1508 let name_str = "Type".to_string();
1509 let pdf_name = PdfName(name_str.clone());
1510
1511 assert_eq!(pdf_name.as_str(), name_str);
1512 assert_eq!(pdf_name.0.len(), 4);
1513 assert!(!pdf_name.0.is_empty());
1514
1515 let empty_name = PdfName("".to_string());
1517 assert!(empty_name.0.is_empty());
1518 assert_eq!(empty_name.0.len(), 0);
1519
1520 let special_name = PdfName("Font#20Name".to_string());
1522 assert_eq!(special_name.as_str(), "Font#20Name");
1523 assert_eq!(special_name.0.len(), 11);
1524 }
1525
1526 #[test]
1527 fn test_pdf_array_methods() {
1528 let mut array = PdfArray::new();
1529 assert_eq!(array.len(), 0);
1530 assert!(array.is_empty());
1531
1532 array.push(PdfObject::Integer(1));
1534 array.push(PdfObject::Integer(2));
1535 array.push(PdfObject::Integer(3));
1536
1537 assert_eq!(array.len(), 3);
1538 assert!(!array.is_empty());
1539
1540 assert_eq!(array.get(0).unwrap().as_integer(), Some(1));
1542 assert_eq!(array.get(1).unwrap().as_integer(), Some(2));
1543 assert_eq!(array.get(2).unwrap().as_integer(), Some(3));
1544 assert!(array.get(3).is_none());
1545
1546 let values: Vec<i64> = array.0.iter().filter_map(|obj| obj.as_integer()).collect();
1548 assert_eq!(values, vec![1, 2, 3]);
1549
1550 let mut mixed_array = PdfArray::new();
1552 mixed_array.push(PdfObject::Integer(42));
1553 mixed_array.push(PdfObject::Real(3.14));
1554 mixed_array.push(PdfObject::String(PdfString(b"text".to_vec())));
1555 mixed_array.push(PdfObject::Name(PdfName("Name".to_string())));
1556 mixed_array.push(PdfObject::Boolean(true));
1557 mixed_array.push(PdfObject::Null);
1558
1559 assert_eq!(mixed_array.len(), 6);
1560 assert_eq!(mixed_array.get(0).unwrap().as_integer(), Some(42));
1561 assert_eq!(mixed_array.get(1).unwrap().as_real(), Some(3.14));
1562 assert_eq!(
1563 mixed_array.get(2).unwrap().as_string().unwrap().as_bytes(),
1564 b"text"
1565 );
1566 assert_eq!(
1567 mixed_array.get(3).unwrap().as_name().unwrap().as_str(),
1568 "Name"
1569 );
1570 assert_eq!(mixed_array.get(4).unwrap().as_bool(), Some(true));
1571 assert!(mixed_array.get(5).unwrap().is_null());
1572 }
1573
1574 #[test]
1575 fn test_pdf_dictionary_methods() {
1576 let mut dict = PdfDictionary::new();
1577 assert_eq!(dict.0.len(), 0);
1578 assert!(dict.0.is_empty());
1579
1580 dict.insert(
1582 "Type".to_string(),
1583 PdfObject::Name(PdfName("Page".to_string())),
1584 );
1585 dict.insert("Count".to_string(), PdfObject::Integer(5));
1586 dict.insert("Resources".to_string(), PdfObject::Reference(10, 0));
1587
1588 assert_eq!(dict.0.len(), 3);
1589 assert!(!dict.0.is_empty());
1590
1591 assert_eq!(
1593 dict.get("Type").unwrap().as_name().unwrap().as_str(),
1594 "Page"
1595 );
1596 assert_eq!(dict.get("Count").unwrap().as_integer(), Some(5));
1597 assert_eq!(dict.get("Resources").unwrap().as_reference(), Some((10, 0)));
1598 assert!(dict.get("NonExistent").is_none());
1599
1600 assert!(dict.contains_key("Type"));
1602 assert!(dict.contains_key("Count"));
1603 assert!(dict.contains_key("Resources"));
1604 assert!(!dict.contains_key("NonExistent"));
1605
1606 assert_eq!(dict.get_type(), Some("Page"));
1608
1609 let mut keys: Vec<String> = dict.0.keys().map(|k| k.0.clone()).collect();
1611 keys.sort();
1612 assert_eq!(keys, vec!["Count", "Resources", "Type"]);
1613
1614 let values: Vec<&PdfObject> = dict.0.values().collect();
1616 assert_eq!(values.len(), 3);
1617 }
1618
1619 #[test]
1620 fn test_pdf_stream_methods() {
1621 let mut dict = PdfDictionary::new();
1622 dict.insert("Length".to_string(), PdfObject::Integer(13));
1623 dict.insert(
1624 "Filter".to_string(),
1625 PdfObject::Name(PdfName("FlateDecode".to_string())),
1626 );
1627
1628 let data = b"Hello, World!".to_vec();
1629 let stream = PdfStream {
1630 dict,
1631 data: data.clone(),
1632 };
1633
1634 assert_eq!(stream.raw_data(), data);
1636
1637 assert_eq!(stream.dict.get("Length").unwrap().as_integer(), Some(13));
1639 assert_eq!(
1640 stream
1641 .dict
1642 .get("Filter")
1643 .unwrap()
1644 .as_name()
1645 .unwrap()
1646 .as_str(),
1647 "FlateDecode"
1648 );
1649
1650 let options = ParseOptions::default();
1653 let decode_result = stream.decode(&options);
1654 assert!(decode_result.is_ok() || decode_result.is_err());
1655 }
1656
1657 #[test]
1658 fn test_parse_complex_nested_structures() {
1659 let input = b"[[1 2] [3 4] [5 6]]";
1661 let mut lexer = Lexer::new(Cursor::new(input));
1662 let obj = PdfObject::parse(&mut lexer).unwrap();
1663
1664 let outer_array = obj.as_array().unwrap();
1665 assert_eq!(outer_array.len(), 3);
1666
1667 for i in 0..3 {
1668 let inner_array = outer_array.get(i).unwrap().as_array().unwrap();
1669 assert_eq!(inner_array.len(), 2);
1670 assert_eq!(
1671 inner_array.get(0).unwrap().as_integer(),
1672 Some((i as i64) * 2 + 1)
1673 );
1674 assert_eq!(
1675 inner_array.get(1).unwrap().as_integer(),
1676 Some((i as i64) * 2 + 2)
1677 );
1678 }
1679 }
1680
1681 #[test]
1682 fn test_parse_complex_dictionary() {
1683 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 2 0 R >> /ProcSet [/PDF /Text] >> /Contents 3 0 R >>";
1684 let mut lexer = Lexer::new(Cursor::new(input));
1685 let obj = PdfObject::parse(&mut lexer).unwrap();
1686
1687 let dict = obj.as_dict().unwrap();
1688 assert_eq!(dict.get_type(), Some("Page"));
1689 assert_eq!(dict.get("Parent").unwrap().as_reference(), Some((1, 0)));
1690 assert_eq!(dict.get("Contents").unwrap().as_reference(), Some((3, 0)));
1691
1692 let media_box = dict.get("MediaBox").unwrap().as_array().unwrap();
1694 assert_eq!(media_box.len(), 4);
1695 assert_eq!(media_box.get(0).unwrap().as_integer(), Some(0));
1696 assert_eq!(media_box.get(1).unwrap().as_integer(), Some(0));
1697 assert_eq!(media_box.get(2).unwrap().as_integer(), Some(612));
1698 assert_eq!(media_box.get(3).unwrap().as_integer(), Some(792));
1699
1700 let resources = dict.get("Resources").unwrap().as_dict().unwrap();
1702 assert!(resources.contains_key("Font"));
1703 assert!(resources.contains_key("ProcSet"));
1704
1705 let font_dict = resources.get("Font").unwrap().as_dict().unwrap();
1707 assert_eq!(font_dict.get("F1").unwrap().as_reference(), Some((2, 0)));
1708
1709 let proc_set = resources.get("ProcSet").unwrap().as_array().unwrap();
1711 assert_eq!(proc_set.len(), 2);
1712 assert_eq!(proc_set.get(0).unwrap().as_name().unwrap().as_str(), "PDF");
1713 assert_eq!(proc_set.get(1).unwrap().as_name().unwrap().as_str(), "Text");
1714 }
1715
1716 #[test]
1717 fn test_parse_hex_strings() {
1718 let input = b"<48656C6C6F>"; let mut lexer = Lexer::new(Cursor::new(input));
1720 let obj = PdfObject::parse(&mut lexer).unwrap();
1721
1722 let string = obj.as_string().unwrap();
1723 assert_eq!(string.as_str().unwrap(), "Hello");
1724 }
1725
1726 #[test]
1727 fn test_parse_literal_strings() {
1728 let input = b"(Hello World)";
1729 let mut lexer = Lexer::new(Cursor::new(input));
1730 let obj = PdfObject::parse(&mut lexer).unwrap();
1731
1732 let string = obj.as_string().unwrap();
1733 assert_eq!(string.as_str().unwrap(), "Hello World");
1734 }
1735
1736 #[test]
1737 fn test_parse_string_with_escapes() {
1738 let input = b"(Hello\\nWorld\\t!)";
1739 let mut lexer = Lexer::new(Cursor::new(input));
1740 let obj = PdfObject::parse(&mut lexer).unwrap();
1741
1742 let string = obj.as_string().unwrap();
1743 assert!(!string.as_bytes().is_empty());
1745 }
1746
1747 #[test]
1748 fn test_parse_names_with_special_chars() {
1749 let input = b"/Name#20with#20spaces";
1750 let mut lexer = Lexer::new(Cursor::new(input));
1751 let obj = PdfObject::parse(&mut lexer).unwrap();
1752
1753 let name = obj.as_name().unwrap();
1754 assert!(!name.as_str().is_empty());
1756 }
1757
1758 #[test]
1759 fn test_parse_references() {
1760 let input = b"1 0 R";
1761 let mut lexer = Lexer::new(Cursor::new(input));
1762 let obj = PdfObject::parse(&mut lexer).unwrap();
1763
1764 assert_eq!(obj.as_reference(), Some((1, 0)));
1765
1766 let input2 = b"42 5 R";
1768 let mut lexer2 = Lexer::new(Cursor::new(input2));
1769 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1770
1771 assert_eq!(obj2.as_reference(), Some((42, 5)));
1772 }
1773
1774 #[test]
1775 fn test_parse_edge_cases() {
1776 let input = b"9223372036854775807"; let mut lexer = Lexer::new(Cursor::new(input));
1779 let obj = PdfObject::parse(&mut lexer).unwrap();
1780 assert_eq!(obj.as_integer(), Some(9223372036854775807));
1781
1782 let input2 = b"-9223372036854775808"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1785 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1786 assert_eq!(obj2.as_integer(), Some(-9223372036854775808));
1787
1788 let input3 = b"1.23e-10";
1790 let mut lexer3 = Lexer::new(Cursor::new(input3));
1791 let obj3 = PdfObject::parse(&mut lexer3).unwrap();
1792 assert!(obj3.as_real().is_some());
1794 }
1795
1796 #[test]
1797 fn test_parse_empty_structures() {
1798 let input = b"[]";
1800 let mut lexer = Lexer::new(Cursor::new(input));
1801 let obj = PdfObject::parse(&mut lexer).unwrap();
1802
1803 let array = obj.as_array().unwrap();
1804 assert_eq!(array.len(), 0);
1805 assert!(array.is_empty());
1806
1807 let input2 = b"<< >>";
1809 let mut lexer2 = Lexer::new(Cursor::new(input2));
1810 let obj2 = PdfObject::parse(&mut lexer2).unwrap();
1811
1812 let dict = obj2.as_dict().unwrap();
1813 assert_eq!(dict.0.len(), 0);
1814 assert!(dict.0.is_empty());
1815 }
1816
1817 #[test]
1818 fn test_error_handling() {
1819 let input = b"[1 2 3"; let mut lexer = Lexer::new(Cursor::new(input));
1822 let result = PdfObject::parse(&mut lexer);
1823 assert!(result.is_err());
1824
1825 let input2 = b"<< /Type /Page"; let mut lexer2 = Lexer::new(Cursor::new(input2));
1828 let result2 = PdfObject::parse(&mut lexer2);
1829 assert!(result2.is_err());
1830
1831 let input3 = b"1 0 X"; let mut lexer3 = Lexer::new(Cursor::new(input3));
1834 let result3 = PdfObject::parse(&mut lexer3);
1835 assert!(result3.is_ok() || result3.is_err());
1838 }
1839
1840 #[test]
1841 fn test_clone_and_equality() {
1842 let obj1 = PdfObject::Integer(42);
1843 let obj2 = obj1.clone();
1844 assert_eq!(obj1, obj2);
1845
1846 let obj3 = PdfObject::Integer(43);
1847 assert_ne!(obj1, obj3);
1848
1849 let mut array = PdfArray::new();
1851 array.push(PdfObject::Integer(1));
1852 array.push(PdfObject::String(PdfString(b"test".to_vec())));
1853 let obj4 = PdfObject::Array(array);
1854 let obj5 = obj4.clone();
1855 assert_eq!(obj4, obj5);
1856 }
1857
1858 #[test]
1859 fn test_debug_formatting() {
1860 let obj = PdfObject::Integer(42);
1861 let debug_str = format!("{obj:?}");
1862 assert!(debug_str.contains("Integer"));
1863 assert!(debug_str.contains("42"));
1864
1865 let name = PdfName("Type".to_string());
1866 let debug_str2 = format!("{name:?}");
1867 assert!(debug_str2.contains("PdfName"));
1868 assert!(debug_str2.contains("Type"));
1869 }
1870
1871 #[test]
1872 fn test_performance_large_array() {
1873 let mut array = PdfArray::new();
1874 for i in 0..1000 {
1875 array.push(PdfObject::Integer(i));
1876 }
1877
1878 assert_eq!(array.len(), 1000);
1879 assert_eq!(array.get(0).unwrap().as_integer(), Some(0));
1880 assert_eq!(array.get(999).unwrap().as_integer(), Some(999));
1881
1882 let sum: i64 = array.0.iter().filter_map(|obj| obj.as_integer()).sum();
1884 assert_eq!(sum, 499500); }
1886
1887 #[test]
1888 fn test_performance_large_dictionary() {
1889 let mut dict = PdfDictionary::new();
1890 for i in 0..1000 {
1891 dict.insert(format!("Key{i}"), PdfObject::Integer(i));
1892 }
1893
1894 assert_eq!(dict.0.len(), 1000);
1895 assert_eq!(dict.get("Key0").unwrap().as_integer(), Some(0));
1896 assert_eq!(dict.get("Key999").unwrap().as_integer(), Some(999));
1897
1898 for i in 0..1000 {
1900 assert!(dict.contains_key(&format!("Key{i}")));
1901 }
1902 }
1903 }
1904
1905 #[test]
1906 fn test_lenient_stream_parsing_too_short() {
1907 let dict = PdfDictionary(
1910 vec![(PdfName("Length".to_string()), PdfObject::Integer(10))]
1911 .into_iter()
1912 .collect::<HashMap<_, _>>(),
1913 );
1914
1915 let stream_content = b"This is a much longer text content than just 10 bytes";
1918 let test_data = vec![
1919 b"\n".to_vec(), stream_content.to_vec(),
1921 b"\nendstream".to_vec(),
1922 ]
1923 .concat();
1924
1925 let mut cursor = Cursor::new(test_data);
1927 let mut lexer = Lexer::new(&mut cursor);
1928 let mut options = ParseOptions::default();
1929 options.lenient_streams = true;
1930 options.max_recovery_bytes = 100;
1931 options.collect_warnings = false;
1932
1933 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1937 if let Err(e) = &result {
1938 tracing::debug!("Error in test_lenient_stream_parsing_too_short: {e:?}");
1939 tracing::debug!("Warning: Stream length mismatch expected, checking if lenient parsing is working correctly");
1940 }
1941 assert!(result.is_ok());
1942
1943 let stream_data = result.unwrap();
1944 let content = String::from_utf8_lossy(&stream_data);
1945
1946 assert!(content.contains("This is a"));
1949 }
1950
1951 #[test]
1952 fn test_lenient_stream_parsing_too_long() {
1953 let dict = PdfDictionary(
1955 vec![(PdfName("Length".to_string()), PdfObject::Integer(100))]
1956 .into_iter()
1957 .collect::<HashMap<_, _>>(),
1958 );
1959
1960 let stream_content = b"Short";
1962 let test_data = vec![
1963 b"\n".to_vec(), stream_content.to_vec(),
1965 b"\nendstream".to_vec(),
1966 ]
1967 .concat();
1968
1969 let mut cursor = Cursor::new(test_data);
1971 let mut lexer = Lexer::new(&mut cursor);
1972 let mut options = ParseOptions::default();
1973 options.lenient_streams = true;
1974 options.max_recovery_bytes = 100;
1975 options.collect_warnings = false;
1976
1977 let result = PdfObject::parse_stream_data_with_options(&mut lexer, &dict, &options);
1980
1981 assert!(result.is_err());
1985 }
1986
1987 #[test]
1988 fn test_lenient_stream_no_endstream_found() {
1989 let input = b"<< /Length 10 >>
1991stream
1992This text does not contain the magic word and continues for a very long time with no proper termination...";
1993
1994 let mut cursor = Cursor::new(input.to_vec());
1995 let mut lexer = Lexer::new(&mut cursor);
1996 let mut options = ParseOptions::default();
1997 options.lenient_streams = true;
1998 options.max_recovery_bytes = 50; options.collect_warnings = false;
2000
2001 let dict_token = lexer.next_token().unwrap();
2002 let obj = PdfObject::parse_from_token_with_options(&mut lexer, dict_token, &options);
2003
2004 assert!(obj.is_err());
2006 }
2007
2008 #[test]
2011 fn test_pdf_name_special_characters() {
2012 let name = PdfName::new("Name#20With#20Spaces".to_string());
2013 assert_eq!(name.as_str(), "Name#20With#20Spaces");
2014
2015 let unicode_name = PdfName::new("café".to_string());
2017 assert_eq!(unicode_name.as_str(), "café");
2018
2019 let special_name = PdfName::new("Font#2FSubtype".to_string());
2021 assert_eq!(special_name.as_str(), "Font#2FSubtype");
2022 }
2023
2024 #[test]
2025 fn test_pdf_name_edge_cases() {
2026 let empty_name = PdfName::new("".to_string());
2028 assert_eq!(empty_name.as_str(), "");
2029
2030 let long_name = PdfName::new("A".repeat(1000));
2032 assert_eq!(long_name.as_str().len(), 1000);
2033
2034 let complex_name = PdfName::new("ABCdef123-._~!*'()".to_string());
2036 assert_eq!(complex_name.as_str(), "ABCdef123-._~!*'()");
2037 }
2038
2039 #[test]
2040 fn test_pdf_string_encoding_validation() {
2041 let utf8_string = PdfString::new("Hello, 世界! 🌍".as_bytes().to_vec());
2043 assert!(utf8_string.as_str().is_ok());
2044
2045 let invalid_utf8 = PdfString::new(vec![0xFF, 0xFE, 0xFD]);
2047 assert!(invalid_utf8.as_str().is_err());
2048
2049 let empty_string = PdfString::new(vec![]);
2051 assert_eq!(empty_string.as_str().unwrap(), "");
2052 }
2053
2054 #[test]
2055 fn test_pdf_string_binary_data() {
2056 let binary_data = vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD, 0xFC];
2058 let binary_string = PdfString::new(binary_data.clone());
2059 assert_eq!(binary_string.as_bytes(), &binary_data);
2060
2061 let null_string = PdfString::new(vec![
2063 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x57, 0x6F, 0x72, 0x6C, 0x64,
2064 ]);
2065 assert_eq!(binary_string.as_bytes().len(), 8);
2066 assert!(null_string.as_bytes().contains(&0x00));
2067 }
2068
2069 #[test]
2070 fn test_pdf_array_nested_structures() {
2071 let mut array = PdfArray::new();
2072
2073 let mut nested_array = PdfArray::new();
2075 nested_array.push(PdfObject::Integer(1));
2076 nested_array.push(PdfObject::Integer(2));
2077 array.push(PdfObject::Array(nested_array));
2078
2079 let mut nested_dict = PdfDictionary(HashMap::new());
2081 nested_dict.0.insert(
2082 PdfName::new("Key".to_string()),
2083 PdfObject::String(PdfString::new(b"Value".to_vec())),
2084 );
2085 array.push(PdfObject::Dictionary(nested_dict));
2086
2087 assert_eq!(array.len(), 2);
2088 assert!(matches!(array.get(0), Some(PdfObject::Array(_))));
2089 assert!(matches!(array.get(1), Some(PdfObject::Dictionary(_))));
2090 }
2091
2092 #[test]
2093 fn test_pdf_array_type_mixing() {
2094 let mut array = PdfArray::new();
2095
2096 array.push(PdfObject::Null);
2098 array.push(PdfObject::Boolean(true));
2099 array.push(PdfObject::Integer(42));
2100 array.push(PdfObject::Real(3.14159));
2101 array.push(PdfObject::String(PdfString::new(b"text".to_vec())));
2102 array.push(PdfObject::Name(PdfName::new("Name".to_string())));
2103
2104 assert_eq!(array.len(), 6);
2105 assert!(matches!(array.get(0), Some(PdfObject::Null)));
2106 assert!(matches!(array.get(1), Some(PdfObject::Boolean(true))));
2107 assert!(matches!(array.get(2), Some(PdfObject::Integer(42))));
2108 assert!(matches!(array.get(3), Some(PdfObject::Real(_))));
2109 assert!(matches!(array.get(4), Some(PdfObject::String(_))));
2110 assert!(matches!(array.get(5), Some(PdfObject::Name(_))));
2111 }
2112
2113 #[test]
2114 fn test_pdf_dictionary_key_operations() {
2115 let mut dict = PdfDictionary(HashMap::new());
2116
2117 dict.0.insert(
2119 PdfName::new("Type".to_string()),
2120 PdfObject::Name(PdfName::new("Test".to_string())),
2121 );
2122 dict.0
2123 .insert(PdfName::new("Count".to_string()), PdfObject::Integer(100));
2124 dict.0
2125 .insert(PdfName::new("Flag".to_string()), PdfObject::Boolean(true));
2126
2127 assert_eq!(dict.0.len(), 3);
2128 assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
2129 assert!(dict.0.contains_key(&PdfName::new("Count".to_string())));
2130 assert!(dict.0.contains_key(&PdfName::new("Flag".to_string())));
2131 assert!(!dict.0.contains_key(&PdfName::new("Missing".to_string())));
2132
2133 assert!(dict.0.get(&PdfName::new("Type".to_string())).is_some());
2135 }
2136
2137 #[test]
2138 fn test_pdf_dictionary_complex_values() {
2139 let mut dict = PdfDictionary(HashMap::new());
2140
2141 let mut rect_array = PdfArray::new();
2143 rect_array.push(PdfObject::Real(0.0));
2144 rect_array.push(PdfObject::Real(0.0));
2145 rect_array.push(PdfObject::Real(612.0));
2146 rect_array.push(PdfObject::Real(792.0));
2147
2148 dict.0.insert(
2149 PdfName::new("MediaBox".to_string()),
2150 PdfObject::Array(rect_array),
2151 );
2152
2153 let mut resources = PdfDictionary(HashMap::new());
2155 let mut font_dict = PdfDictionary(HashMap::new());
2156 font_dict
2157 .0
2158 .insert(PdfName::new("F1".to_string()), PdfObject::Reference(10, 0));
2159 resources.0.insert(
2160 PdfName::new("Font".to_string()),
2161 PdfObject::Dictionary(font_dict),
2162 );
2163
2164 dict.0.insert(
2165 PdfName::new("Resources".to_string()),
2166 PdfObject::Dictionary(resources),
2167 );
2168
2169 assert_eq!(dict.0.len(), 2);
2170 assert!(dict.0.get(&PdfName::new("MediaBox".to_string())).is_some());
2171 assert!(dict.0.get(&PdfName::new("Resources".to_string())).is_some());
2172 }
2173
2174 #[test]
2175 fn test_object_reference_validation() {
2176 let ref1 = PdfObject::Reference(1, 0);
2177 let ref2 = PdfObject::Reference(1, 0);
2178 let ref3 = PdfObject::Reference(1, 1);
2179 let ref4 = PdfObject::Reference(2, 0);
2180
2181 assert_eq!(ref1, ref2);
2182 assert_ne!(ref1, ref3);
2183 assert_ne!(ref1, ref4);
2184
2185 let max_ref = PdfObject::Reference(u32::MAX, u16::MAX);
2187 assert!(matches!(max_ref, PdfObject::Reference(u32::MAX, u16::MAX)));
2188 }
2189
2190 #[test]
2191 fn test_pdf_object_type_checking() {
2192 let objects = vec![
2193 PdfObject::Null,
2194 PdfObject::Boolean(true),
2195 PdfObject::Integer(42),
2196 PdfObject::Real(3.14),
2197 PdfObject::String(PdfString::new(b"text".to_vec())),
2198 PdfObject::Name(PdfName::new("Name".to_string())),
2199 PdfObject::Array(PdfArray::new()),
2200 PdfObject::Dictionary(PdfDictionary(HashMap::new())),
2201 PdfObject::Reference(1, 0),
2202 ];
2203
2204 assert!(matches!(objects[0], PdfObject::Null));
2206 assert!(matches!(objects[1], PdfObject::Boolean(_)));
2207 assert!(matches!(objects[2], PdfObject::Integer(_)));
2208 assert!(matches!(objects[3], PdfObject::Real(_)));
2209 assert!(matches!(objects[4], PdfObject::String(_)));
2210 assert!(matches!(objects[5], PdfObject::Name(_)));
2211 assert!(matches!(objects[6], PdfObject::Array(_)));
2212 assert!(matches!(objects[7], PdfObject::Dictionary(_)));
2213 assert!(matches!(objects[8], PdfObject::Reference(_, _)));
2214 }
2215
2216 #[test]
2217 fn test_pdf_array_large_capacity() {
2218 let mut array = PdfArray::new();
2219
2220 for i in 0..1000 {
2222 array.push(PdfObject::Integer(i));
2223 }
2224
2225 assert_eq!(array.len(), 1000);
2226 if let Some(PdfObject::Integer(val)) = array.get(999) {
2228 assert_eq!(*val, 999);
2229 } else {
2230 panic!("Expected Integer at index 999");
2231 }
2232 assert!(array.get(1000).is_none());
2233
2234 let mut count = 0;
2236 for i in 0..array.len() {
2237 if let Some(obj) = array.get(i) {
2238 if matches!(obj, PdfObject::Integer(_)) {
2239 count += 1;
2240 }
2241 }
2242 }
2243 assert_eq!(count, 1000);
2244 }
2245
2246 #[test]
2247 fn test_pdf_dictionary_memory_efficiency() {
2248 let mut dict = PdfDictionary(HashMap::new());
2249
2250 for i in 0..100 {
2252 let key = PdfName::new(format!("Key{}", i));
2253 dict.0.insert(key, PdfObject::Integer(i));
2254 }
2255
2256 assert_eq!(dict.0.len(), 100);
2257 assert!(dict.0.contains_key(&PdfName::new("Key99".to_string())));
2258 assert!(!dict.0.contains_key(&PdfName::new("Key100".to_string())));
2259
2260 dict.0.remove(&PdfName::new("Key50".to_string()));
2262 assert_eq!(dict.0.len(), 99);
2263 assert!(!dict.0.contains_key(&PdfName::new("Key50".to_string())));
2264 }
2265
2266 #[test]
2267 fn test_parsing_simple_error_cases() {
2268 use std::io::Cursor;
2269
2270 let empty_input = b"";
2272 let mut cursor = Cursor::new(empty_input.to_vec());
2273 let mut lexer = Lexer::new(&mut cursor);
2274 let result = PdfObject::parse(&mut lexer);
2275
2276 assert!(result.is_err());
2278 }
2279
2280 #[test]
2281 fn test_unicode_string_handling() {
2282 let unicode_tests = vec![
2284 ("ASCII", "Hello World"),
2285 ("Latin-1", "Café résumé"),
2286 ("Emoji", "Hello 🌍 World 🚀"),
2287 ("CJK", "你好世界"),
2288 ("Mixed", "Hello 世界! Bonjour 🌍"),
2289 ];
2290
2291 for (name, text) in unicode_tests {
2292 let pdf_string = PdfString::new(text.as_bytes().to_vec());
2293 match pdf_string.as_str() {
2294 Ok(decoded) => assert_eq!(decoded, text, "Failed for {}", name),
2295 Err(_) => {
2296 assert!(!text.is_empty(), "Should handle {}", name);
2298 }
2299 }
2300 }
2301 }
2302
2303 #[test]
2304 fn test_deep_nesting_limits() {
2305 let mut root_array = PdfArray::new();
2307
2308 for i in 0..10 {
2310 let mut nested = PdfArray::new();
2311 nested.push(PdfObject::Integer(i as i64));
2312 root_array.push(PdfObject::Array(nested));
2313 }
2314
2315 assert_eq!(root_array.len(), 10);
2316
2317 for i in 0..10 {
2319 if let Some(PdfObject::Array(nested)) = root_array.get(i) {
2320 assert_eq!(nested.len(), 1);
2321 }
2322 }
2323 }
2324
2325 #[test]
2326 fn test_special_numeric_values() {
2327 let numbers = vec![
2329 (0i64, 0.0f64),
2330 (i32::MAX as i64, f32::MAX as f64),
2331 (i32::MIN as i64, f32::MIN as f64),
2332 (-1i64, -1.0f64),
2333 (2147483647i64, 2147483647.0f64),
2334 ];
2335
2336 for (int_val, float_val) in numbers {
2337 let int_obj = PdfObject::Integer(int_val);
2338 let float_obj = PdfObject::Real(float_val);
2339
2340 assert!(matches!(int_obj, PdfObject::Integer(_)));
2341 assert!(matches!(float_obj, PdfObject::Real(_)));
2342 }
2343
2344 let special_floats = vec![
2346 (0.0f64, "zero"),
2347 (f64::INFINITY, "infinity"),
2348 (f64::NEG_INFINITY, "negative infinity"),
2349 ];
2350
2351 for (val, _name) in special_floats {
2352 let obj = PdfObject::Real(val);
2353 assert!(matches!(obj, PdfObject::Real(_)));
2354 }
2355 }
2356
2357 #[test]
2358 fn test_array_bounds_checking() {
2359 let mut array = PdfArray::new();
2360 array.push(PdfObject::Integer(1));
2361 array.push(PdfObject::Integer(2));
2362 array.push(PdfObject::Integer(3));
2363
2364 assert!(array.get(0).is_some());
2366 assert!(array.get(1).is_some());
2367 assert!(array.get(2).is_some());
2368
2369 assert!(array.get(3).is_none());
2371 assert!(array.get(100).is_none());
2372
2373 let empty_array = PdfArray::new();
2375 assert!(empty_array.get(0).is_none());
2376 assert_eq!(empty_array.len(), 0);
2377 }
2378
2379 #[test]
2380 fn test_dictionary_case_sensitivity() {
2381 let mut dict = PdfDictionary(HashMap::new());
2382
2383 dict.0.insert(
2385 PdfName::new("Type".to_string()),
2386 PdfObject::Name(PdfName::new("Page".to_string())),
2387 );
2388 dict.0.insert(
2389 PdfName::new("type".to_string()),
2390 PdfObject::Name(PdfName::new("Font".to_string())),
2391 );
2392 dict.0.insert(
2393 PdfName::new("TYPE".to_string()),
2394 PdfObject::Name(PdfName::new("Image".to_string())),
2395 );
2396
2397 assert_eq!(dict.0.len(), 3);
2398 assert!(dict.0.contains_key(&PdfName::new("Type".to_string())));
2399 assert!(dict.0.contains_key(&PdfName::new("type".to_string())));
2400 assert!(dict.0.contains_key(&PdfName::new("TYPE".to_string())));
2401
2402 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("Type".to_string())) {
2404 assert_eq!(name.as_str(), "Page");
2405 }
2406 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("type".to_string())) {
2407 assert_eq!(name.as_str(), "Font");
2408 }
2409 if let Some(PdfObject::Name(name)) = dict.0.get(&PdfName::new("TYPE".to_string())) {
2410 assert_eq!(name.as_str(), "Image");
2411 }
2412 }
2413
2414 #[test]
2415 fn test_object_cloning_and_equality() {
2416 let original_array = {
2417 let mut arr = PdfArray::new();
2418 arr.push(PdfObject::Integer(42));
2419 arr.push(PdfObject::String(PdfString::new(b"test".to_vec())));
2420 arr
2421 };
2422
2423 let cloned_array = original_array.clone();
2424 assert_eq!(original_array.len(), cloned_array.len());
2425
2426 for i in 0..original_array.len() {
2428 let orig = original_array.get(i).unwrap();
2429 let cloned = cloned_array.get(i).unwrap();
2430 match (orig, cloned) {
2431 (PdfObject::Integer(a), PdfObject::Integer(b)) => assert_eq!(a, b),
2432 (PdfObject::String(a), PdfObject::String(b)) => {
2433 assert_eq!(a.as_bytes(), b.as_bytes())
2434 }
2435 _ => panic!("Type mismatch in cloned array"),
2436 }
2437 }
2438 }
2439
2440 #[test]
2441 fn test_concurrent_object_access() {
2442 use std::sync::Arc;
2443 use std::thread;
2444
2445 let dict = Arc::new({
2446 let mut d = PdfDictionary(HashMap::new());
2447 d.0.insert(
2448 PdfName::new("SharedKey".to_string()),
2449 PdfObject::Integer(42),
2450 );
2451 d
2452 });
2453
2454 let dict_clone = Arc::clone(&dict);
2455 let handle = thread::spawn(move || {
2456 if let Some(PdfObject::Integer(val)) =
2458 dict_clone.0.get(&PdfName::new("SharedKey".to_string()))
2459 {
2460 assert_eq!(*val, 42);
2461 }
2462 });
2463
2464 if let Some(PdfObject::Integer(val)) = dict.0.get(&PdfName::new("SharedKey".to_string())) {
2466 assert_eq!(*val, 42);
2467 }
2468
2469 handle.join().unwrap();
2470 }
2471
2472 #[test]
2473 fn test_stream_data_edge_cases() {
2474 let mut dict = PdfDictionary(HashMap::new());
2476 dict.0
2477 .insert(PdfName::new("Length".to_string()), PdfObject::Integer(0));
2478
2479 let stream = PdfStream {
2480 dict: dict.clone(),
2481 data: vec![],
2482 };
2483
2484 assert_eq!(stream.data.len(), 0);
2486 assert!(stream.raw_data().is_empty());
2487
2488 let stream_with_data = PdfStream {
2490 dict,
2491 data: b"Hello World".to_vec(),
2492 };
2493
2494 assert_eq!(stream_with_data.raw_data(), b"Hello World");
2495 }
2496
2497 #[test]
2498 fn test_name_object_hash_consistency() {
2499 use std::collections::HashSet;
2500
2501 let mut name_set = HashSet::new();
2502
2503 name_set.insert(PdfName::new("Type".to_string()));
2505 name_set.insert(PdfName::new("Pages".to_string()));
2506 name_set.insert(PdfName::new("Type".to_string())); assert_eq!(name_set.len(), 2); assert!(name_set.contains(&PdfName::new("Type".to_string())));
2510 assert!(name_set.contains(&PdfName::new("Pages".to_string())));
2511 assert!(!name_set.contains(&PdfName::new("Font".to_string())));
2512 }
2513}
2514
2515