1#![allow(clippy::uninlined_format_args)]
9
10use anyhow::{bail, Context, Result};
11use log::{debug, warn};
12use std::cell::RefCell;
13use std::collections::HashMap;
14use std::fmt;
15use std::fs::File;
16use std::hash::{Hash, Hasher};
17use std::io::{self, Write};
18use std::ops::Deref;
19use std::path::{Path, PathBuf};
20use std::rc::Rc;
21use std::str;
22use std::sync::Arc;
23use std::time;
24
25use num_bigint_dig::{BigInt, ToBigInt};
26use num_integer::Integer;
27use num_traits::cast::ToPrimitive;
28use num_traits::{Signed, Zero};
29
30use super::{config, InputOutputHelper, unwrap_os_string};
31
32const PYC_MAGIC: &[u8] = &[0x0D, 0x0A];
33const PYLONG_MARSHAL_SHIFT: i32 = 15;
34const FLAG_REF_BIT: u8 = 0x1 << 7;
35
36const TRACE: bool = false;
37
38pub fn pyc_python_version(buf: &[u8; 4]) -> Result<((u32, u32), usize)> {
39 if &buf[2..] != PYC_MAGIC {
307 return Err(super::Error::BadMagic(2, buf[2..].to_vec(), PYC_MAGIC).into());
308 }
309
310 let val = ((buf[1] as u32) << 8) + (buf[0] as u32);
311
312 #[allow(overlapping_range_endpoints)]
313 #[allow(clippy::match_overlapping_arm)]
314 match val {
315 20121 => Ok(((1, 5), 8)),
316 50428 => Ok(((1, 6), 8)),
317 50823 => Ok(((2, 0), 8)),
318 60202 => Ok(((2, 1), 8)),
319 60717 => Ok(((2, 2), 8)),
320 62011 | 62021 => Ok(((2, 3), 8)),
321 62041 | 62051 | 62061 => Ok(((2, 4), 8)),
322 62071 | 62081 | 62091 | 62092 | 62101 | 62111 | 62121 | 62131 => Ok(((2, 5), 8)),
323 62151 | 62161 => Ok(((2, 6), 8)),
324 62171 | 62181 | 62191 | 62201 | 62211 => Ok(((2, 7), 8)),
325 3000..=3131 => Ok(((3, 0), 8)),
326 3000..=3151 => Ok(((3, 1), 8)),
327 3000..=3160 => Ok(((3, 1), 8)),
328 3000..=3180 => Ok(((3, 2), 8)),
329 3000..=3230 => Ok(((3, 3), 12)),
330 3000..=3310 => Ok(((3, 4), 12)),
331 3000..=3351 => Ok(((3, 5), 12)),
332 3360 | 3361 | 3370..=3379 => Ok(((3, 6), 12)),
333 3390..=3394 => Ok(((3, 7), 16)),
334 3400 | 3401 | 3410..=3413 => Ok(((3, 8), 16)),
335 3400..=3425 => Ok(((3, 9), 16)),
336 3430..=3439 => Ok(((3, 10), 16)),
337 3450..=3495 => Ok(((3, 11), 16)),
338 3500..=3531 => Ok(((3, 12), 16)),
339 3550..=3599 => Ok(((3, 13), 16)),
340 3600..=3649 => Ok(((3, 14), 16)),
341 3650..=3699 => Ok(((3, 15), 16)),
342 3700..=4000 => Ok(((3, 16), 16)),
343 _ => Err(super::Error::Other(
344 format!("not a pyc file, unknown version magic {val}")
345 ).into()),
346 }
347}
348
349fn format_flag(show_flag: bool, flag_num: &Option<usize>) -> Option<String> {
350 if show_flag && flag_num.is_some() {
351 Some(format!(" 🚩{}", flag_num.unwrap()))
352 } else {
353 None
354 }
355}
356
357#[derive(Debug, Eq)]
358struct CodeObject {
359 argcount: u32,
360 posonlyargcount: Option<u32>,
361 kwonlyargcount: u32,
362 nlocals: Option<u32>,
363 stacksize: u32,
364 flags: u32,
365 code: Rc<Object>,
366 consts: Rc<Object>,
367 names: Rc<Object>,
368 varnames: Option<Rc<Object>>,
369 freevars: Option<Rc<Object>>,
370 cellvars: Option<Rc<Object>>,
371 localsplusnames: Option<Rc<Object>>,
372 localspluskinds: Option<Rc<Object>>,
373 filename: Rc<Object>,
374 name: Rc<Object>,
375 qualname: Option<Rc<Object>>,
376 firstlineno: u32,
377 linetable: Rc<Object>,
378 exceptiontable: Option<Rc<Object>>,
379
380 flag_num: Option<usize>, }
382
383impl PartialEq for CodeObject {
384 fn eq(&self, other: &Self) -> bool {
385 self.argcount == other.argcount &&
386 self.posonlyargcount == other.posonlyargcount &&
387 self.kwonlyargcount == other.kwonlyargcount &&
388 self.nlocals == other.nlocals &&
389 self.stacksize == other.stacksize &&
390 self.flags == other.flags &&
391 self.code == other.code &&
392 self.consts == other.consts &&
393 self.names == other.names &&
394 self.varnames == other.varnames &&
395 self.freevars == other.freevars &&
396 self.cellvars == other.cellvars &&
397 self.localsplusnames == other.localsplusnames &&
398 self.localspluskinds == other.localspluskinds &&
399 self.filename == other.filename &&
400 self.name == other.name &&
401 self.qualname == other.qualname &&
402 self.firstlineno == other.firstlineno &&
403 self.linetable == other.linetable &&
404 self.exceptiontable == other.exceptiontable
405 }
406}
407
408impl Hash for CodeObject {
409 fn hash<H: Hasher>(&self, state: &mut H) {
410 self.argcount.hash(state);
411 self.posonlyargcount.hash(state);
412 self.kwonlyargcount.hash(state);
413 self.nlocals.hash(state);
414 self.stacksize.hash(state);
415 self.flags.hash(state);
416 self.code.hash(state);
417 self.consts.hash(state);
418 self.names.hash(state);
419 self.varnames.hash(state);
420 self.freevars.hash(state);
421 self.cellvars.hash(state);
422 self.localsplusnames.hash(state);
423 self.localspluskinds.hash(state);
424 self.filename.hash(state);
425 self.name.hash(state);
426 self.qualname.hash(state);
427 self.firstlineno.hash(state);
428 self.linetable.hash(state);
429 self.exceptiontable.hash(state);
430 }
431}
432
433impl CodeObject {
434 fn pretty_print_binary_string<W>(
435 w: &mut W,
436 indent: &str,
437 name: &str,
438 mut object: &Rc<Object>,
439 show_flag: bool,
440 ) -> fmt::Result
441 where
442 W: fmt::Write,
443 {
444 let (ref_info, show_target_flag);
445 if let Object::Ref(v) = object.as_ref() {
446 ref_info = format!(
447 "(ref to {}){}",
448 v.number,
449 format_flag(show_flag, &v.flag_num).unwrap_or("".to_string()),
450 );
451 show_target_flag = false; object = &v.target;
453 } else {
454 ref_info = "".to_string();
455 show_target_flag = true;
456 };
457
458 if let Object::String(v) = object.as_ref() {
459 if !v.bytes.is_empty() {
460 return write!(w, "\n{indent}-{name}: {}[{} bytes]", ref_info, v.bytes.len())
461 }
462 }
463 object.pretty_print(w, &format!("\n{indent}-{name}: {}", ref_info), "", true, show_target_flag)
464 }
465
466 pub fn pretty_print<W>(
467 &self,
468 w: &mut W,
469 prefix: &str,
470 suffix: &str,
471 multiline: bool,
472 show_flag: bool,
473 ) -> fmt::Result
474 where
475 W: fmt::Write,
476 {
477 write!(w, "{prefix}Code")?;
478 self.name.pretty_print(w, " ", "", false, true)?;
479 if let Some(v) = &self.qualname {
480 v.pretty_print(w, "/", "", false, true)?;
481 }
482
483 if let Some(s) = format_flag(show_flag, &self.flag_num) {
484 write!(w, "{}", s)?;
485 }
486
487 if multiline {
488 let indent = " ".repeat(prefix.len() + 2);
489
490 self.filename.pretty_print(w, &format!("\n{indent}"), "", true, true)?;
491 write!(w, ":{}", self.firstlineno)?;
492
493 write!(w, "\n{indent}argcount={}", self.argcount)?;
494 if let Some(v) = self.posonlyargcount {
495 write!(w, " posonlyargcount={}", v)?;
496 }
497 write!(w, " kwonlyargcount={}", self.kwonlyargcount)?;
498 if let Some(v) = self.nlocals {
499 write!(w, " nlocals={}", v)?;
500 }
501 write!(w, " stacksize={}", self.stacksize)?;
502 write!(w, " flags={:x}", self.flags)?;
503
504 Self::pretty_print_binary_string(w, &indent, "code", &self.code, true)?;
508
509 self.consts.pretty_print(w, &format!("\n{indent}-consts: "), "", true, true)?;
510 self.names.pretty_print(w, &format!("\n{indent}-names: "), "", true, true)?;
511 if let Some(v) = &self.varnames {
512 v.pretty_print(w, &format!("\n{indent}-varnames: "), "", true, true)?;
513 }
514 if let Some(v) = &self.freevars {
515 v.pretty_print(w, &format!("\n{indent}-freevars: "), "", true, true)?;
516 }
517 if let Some(v) = &self.cellvars {
518 v.pretty_print(w, &format!("\n{indent}-cellvars: "), "", true, true)?;
519 }
520 if let Some(v) = &self.localsplusnames {
521 v.pretty_print(w, &format!("\n{indent}-locals+names: "), "", true, true)?;
522 }
523 if let Some(v) = &self.localspluskinds {
524 v.pretty_print(w, &format!("\n{indent}-locals+kinds: "), "", true, true)?;
525 }
526 Self::pretty_print_binary_string(w, &indent, "linetable", &self.linetable, true)?;
527 if let Some(v) = &self.exceptiontable {
528 Self::pretty_print_binary_string(w, &indent, "exceptiontable", v, true)?;
529 }
530 }
531
532 write!(w, "{suffix}")
533 }
534}
535
536#[derive(Debug, Eq, PartialEq, Hash)]
537enum StringVariant {
538 ShortAscii,
539 ShortAsciiInterned,
540 String,
541 Interned,
542 Unicode,
543 Ascii,
544 AsciiInterned,
545}
546
547#[derive(Debug, Eq)]
548struct StringObject {
549 variant: StringVariant,
550 bytes: Vec<u8>,
551
552 flag_num: Option<usize>, }
554
555impl PartialEq for StringObject {
556 fn eq(&self, other: &Self) -> bool {
557 self.variant == other.variant &&
558 self.bytes == other.bytes
559 }
560}
561
562impl Hash for StringObject {
563 fn hash<H: Hasher>(&self, state: &mut H) {
564 self.variant.hash(state);
565 self.bytes.hash(state);
566 }
567}
568
569impl fmt::Display for StringObject {
570 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
571 match self.variant {
572 StringVariant::ShortAscii |
573 StringVariant::ShortAsciiInterned |
574 StringVariant::Unicode |
575 StringVariant::Ascii |
576 StringVariant::AsciiInterned => {
577 if let Ok(string) = str::from_utf8(&self.bytes) {
578 write!(f, "{:?}", string)
579 } else {
580 write!(f, "[NON-UTF8] {:?}", self.bytes)
581 }
582 }
583 StringVariant::String |
584 StringVariant::Interned => {
585 write!(f, "{:?}", self.bytes)
586 }
587 }
588 }
589}
590
591impl StringObject {
592 pub fn pretty_print<W>(
593 &self,
594 w: &mut W,
595 prefix: &str,
596 suffix: &str,
597 _multiline: bool,
598 show_flag: bool,
599) -> fmt::Result
600 where
601 W: fmt::Write,
602 {
603 write!(
604 w, "{prefix}{}{}{suffix}",
605 self,
606 format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
607 )
608 }
609}
610
611#[derive(Debug, Eq, PartialEq, Hash)]
612enum SeqVariant {
613 Tuple,
614 List,
615 Set,
616 FrozenSet,
617}
618
619#[derive(Debug, Eq)]
620struct SeqObject {
621 variant: SeqVariant,
622 items: Vec<Rc<Object>>,
623
624 flag_num: Option<usize>, }
626
627impl PartialEq for SeqObject {
628 fn eq(&self, other: &Self) -> bool {
629 self.variant == other.variant &&
630 self.items == other.items
631 }
632}
633
634impl Hash for SeqObject {
635 fn hash<H: Hasher>(&self, state: &mut H) {
636 self.variant.hash(state);
637 self.items.hash(state);
638 }
639}
640
641impl SeqObject {
642 pub fn pretty_print<W>(
643 &self,
644 w: &mut W,
645 prefix: &str,
646 suffix: &str,
647 multiline: bool,
648 show_flag: bool,
649 ) -> fmt::Result
650 where
651 W: fmt::Write,
652 {
653 let (beg, end);
654 let mut extra_comma = "";
655 let multiline = multiline && self.need_multiline(1);
656 let indent = prefix
657 .chars()
658 .skip_while(|ch| *ch == '\n')
659 .take_while(|ch| ch.is_whitespace())
660 .count();
661
662 match self.variant {
663 SeqVariant::Tuple => {
664 beg = "(";
665 end = ")";
666 if self.items.len() == 1 {
667 extra_comma = ",";
668 }
669 }
670 SeqVariant::List => {
671 beg = "[";
672 end = "]";
673 }
674 SeqVariant::Set => {
675 if self.items.is_empty() {
676 beg = "set(";
677 end = ")";
678 } else {
679 beg = "{";
680 end = "}";
681 }
682 }
683 SeqVariant::FrozenSet => {
684 if self.items.is_empty() {
685 beg = "frozenset(";
686 end = ")";
687 } else {
688 beg = "frozenset({";
689 end = "})";
690 }
691 }
692 }
693
694 write!(w, "{prefix}{beg}")?;
695 for (n, v) in self.items.iter().enumerate() {
696 if multiline {
697 if n == 0 {
698 writeln!(w)?;
699 }
700 v.pretty_print(
701 w,
702 &" ".repeat(indent + 2),
703 ",\n",
704 true,
705 true,
706 )?;
707 } else {
708 v.pretty_print(
709 w,
710 if n > 0 { ", " } else { "" },
711 extra_comma,
712 false,
713 true,
714 )?;
715 }
716 }
717
718 write!(
719 w, "{:>width$}{end}{}{suffix}",
720 "",
721 format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
722 width=multiline as usize * indent,
723 )
724 }
725
726 fn need_multiline(&self, max_nesting: u8) -> bool {
727 max_nesting == 0 ||
728 self.items.len() > 10 ||
729 self.items.iter().any(|x| x.need_multiline(max_nesting - 1))
730 }
731}
732
733#[derive(Debug, Eq)]
734struct DictObject {
735 items: Vec<(Rc<Object>, Rc<Object>)>,
736
737 #[allow(dead_code)]
738 flag_num: Option<usize>, }
740
741impl PartialEq for DictObject {
742 fn eq(&self, other: &Self) -> bool {
743 self.items == other.items
744 }
745}
746
747impl Hash for DictObject {
748 fn hash<H: Hasher>(&self, state: &mut H) {
749 self.items.hash(state);
750 }
751}
752
753impl DictObject {
754 fn need_multiline(&self, max_nesting: u8) -> bool {
755 max_nesting == 0 ||
756 self.items.iter().any(|(x, y)| x.need_multiline(max_nesting - 1) || y.need_multiline(max_nesting - 1))
757 }
758}
759
760#[derive(Debug, Eq)]
761struct SliceObject {
762 start: Rc<Object>,
763 stop: Rc<Object>,
764 step: Rc<Object>,
765
766 flag_num: Option<usize>, }
768
769impl PartialEq for SliceObject {
770 fn eq(&self, other: &Self) -> bool {
771 self.start == other.start &&
772 self.stop == other.stop &&
773 self.step == other.step
774 }
775}
776
777impl Hash for SliceObject {
778 fn hash<H: Hasher>(&self, state: &mut H) {
779 self.start.hash(state);
780 self.stop.hash(state);
781 self.step.hash(state);
782 }
783}
784
785impl SliceObject {
786 pub fn pretty_print<W>(
787 &self,
788 w: &mut W,
789 prefix: &str,
790 suffix: &str,
791 _multiline: bool,
792 show_flag: bool,
793 ) -> fmt::Result
794 where
795 W: fmt::Write,
796 {
797 self.start.pretty_print(w, &format!("{} slice(", prefix), "", false, true)?;
798 self.stop.pretty_print(w, ", ", "", false, true)?;
799 self.step.pretty_print(w, ", ", "", false, true)?;
800 write!(
801 w, "){}{suffix}",
802 format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
803 )
804 }
805}
806
807#[derive(Debug, Eq)]
808struct RefObject {
809 number: u64,
810
811 target: Rc<Object>,
812 flag_num: Option<usize>, }
814
815impl PartialEq for RefObject {
816 fn eq(&self, other: &Self) -> bool {
820 self.target == other.target
821 }
822}
823
824impl Hash for RefObject {
825 fn hash<H: Hasher>(&self, state: &mut H) {
826 self.target.hash(state);
827 }
828}
829
830impl RefObject {
831 pub fn pretty_print<W>(
832 &self,
833 w: &mut W,
834 prefix: &str,
835 suffix: &str,
836 multiline: bool,
837 show_flag: bool,
838 ) -> fmt::Result
839 where
840 W: fmt::Write,
841 {
842 let prefix = format!("{prefix}(ref to {}){}",
843 self.number,
844 format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()));
845 self.target.pretty_print(w, &prefix, suffix, multiline, false)
846 }
847}
848
849#[derive(Debug, Eq)]
850enum Object {
851 Code(CodeObject),
852 Long(BigInt, Option<usize>),
853 Int(u32, Option<usize>),
854 String(StringObject),
855 Seq(SeqObject),
856 Null(Option<usize>),
857 None(Option<usize>),
858 True(Option<usize>),
859 False(Option<usize>),
860 StopIteration(Option<usize>),
861 Ellipsis(Option<usize>),
862 Float(u64, Option<usize>), Complex(u64, u64, Option<usize>),
864 Dict(DictObject),
865 Slice(SliceObject),
866 Ref(RefObject),
867}
868
869impl PartialEq for Object {
870 fn eq(&self, other: &Self) -> bool {
871 match (self, other) {
872 (Object::Ref(v), w) => v.target.deref().eq(w),
875 (v, Object::Ref(w)) => v.eq(w.target.deref()),
876
877 (Object::Code(v), Object::Code(w)) => v == w,
878 (Object::Long(v, _), Object::Long(w, _)) => v == w,
879 (Object::Int(v, _), Object::Int(w, _)) => v == w,
880 (Object::Null(_), Object::Null(_)) => true,
881 (Object::None(_), Object::None(_)) => true,
882 (Object::True(_), Object::True(_)) => true,
883 (Object::False(_),Object::False(_)) => true,
884 (Object::StopIteration(_), Object::StopIteration(_)) => true,
885 (Object::Ellipsis(_), Object::Ellipsis(_)) => true,
886 (Object::Float(v, _), Object::Float(w, _)) => v == w,
887 (Object::Complex(x, y, _), Object::Complex(u, v, _)) => x == u && y == v,
888 (Object::String(v), Object::String(w)) => v == w,
889 (Object::Seq(v), Object::Seq(w)) => v == w,
890 (Object::Dict(v), Object::Dict(w)) => v == w,
891 (Object::Slice(v), Object::Slice(w)) => v == w,
892 _ => false,
893 }
894 }
895}
896
897impl Hash for Object {
898 fn hash<H: Hasher>(&self, state: &mut H) {
899 match self {
900 Object::Code(v) => v.hash(state),
901 Object::String(v) => v.hash(state),
902 Object::Seq(v) => v.hash(state),
903 Object::Ref(v) => v.hash(state),
904 Object::Dict(v) => v.hash(state),
905 Object::Slice(v) => v.hash(state),
906
907 Object::Long(v, _) => v.hash(state),
908 Object::Int(v, _) => v.hash(state),
909 Object::Null(_) => b'0'.hash(state),
910 Object::None(_) => b'N'.hash(state),
911 Object::True(_) => b'T'.hash(state),
912 Object::False(_) => b'F'.hash(state),
913 Object::StopIteration(_) => b'S'.hash(state),
914 Object::Ellipsis(_) => b'.'.hash(state),
915 Object::Float(v, _) => v.hash(state),
916 Object::Complex(x, y, _) => {
917 x.hash(state);
918 y.hash(state);
919 }
920 }
921 }
922}
923
924impl Object {
925 #[allow(clippy::write_literal)]
926 pub fn pretty_print<W>(
927 &self,
928 w: &mut W,
929 prefix: &str,
930 suffix: &str,
931 multiline: bool,
932 show_flag: bool,
933 ) -> fmt::Result
934 where
935 W: fmt::Write,
936 {
937 let (s, flag_num) = match self {
938 Object::Code(v) => {
939 return v.pretty_print(w, prefix, suffix, multiline, show_flag);
940 }
941 Object::String(v) => {
942 return v.pretty_print(w, prefix, suffix, multiline, show_flag);
943 }
944 Object::Seq(v) => {
945 return v.pretty_print(w, prefix, suffix, multiline, show_flag);
946 }
947 Object::Slice(v) => {
948 return v.pretty_print(w, prefix, suffix, multiline, show_flag);
949 }
950 Object::Ref(v) => {
951 return v.pretty_print(w, prefix, suffix, multiline, show_flag);
952 }
953 Object::Dict(_) => todo!(),
954
955 Object::Long(v, flag_num) => (format!("{v}"), flag_num),
956 Object::Int(v, flag_num) => (format!("{v}"), flag_num),
957 Object::Null(flag_num) => ("NULL".to_string(), flag_num),
958 Object::None(flag_num) => ("None".to_string(), flag_num),
959 Object::True(flag_num) => ("True".to_string(), flag_num),
960 Object::False(flag_num) => ("False".to_string(), flag_num),
961 Object::StopIteration(flag_num) => ("StopIteration".to_string(), flag_num),
962 Object::Ellipsis(flag_num) => ("...".to_string(), flag_num),
963 Object::Float(v, flag_num) => (format!("{v}"), flag_num),
964 Object::Complex(x, y, flag_num) => (format!("{x}+{y}j"), flag_num),
965 };
966
967 write!(
968 w, "{prefix}{}{}{suffix}",
969 s,
970 format_flag(show_flag, flag_num).unwrap_or("".to_string())
971 )
972 }
973
974 fn need_multiline(&self, max_nesting: u8) -> bool {
975 match self {
976 Object::Code(..) => true,
977 Object::Ref(..) => false,
978 Object::Slice(..) |
979 Object::Long(..) |
980 Object::Int(..) |
981 Object::Null(..) |
982 Object::None(..) |
983 Object::True(..) |
984 Object::False(..) |
985 Object::StopIteration(..) |
986 Object::Ellipsis(..) |
987 Object::Float(..) |
988 Object::Complex(..) |
989 Object::String(..) => false,
990 Object::Seq(v) => v.need_multiline(max_nesting),
991 Object::Dict(v) => v.need_multiline(max_nesting),
992 }
993 }
994}
995
996pub struct PycParser {
997 input_path: PathBuf,
998 pub version: (u32, u32),
999 header_length: usize,
1000
1001 data: Vec<u8>, read_offset: usize, flag_refs: Vec<Option<Rc<Object>>>, }
1006
1007impl PycParser {
1008 pub fn from_file(input_path: &Path, mut input: impl io::Read) -> Result<Self> {
1009 let mut buf = [0; 4];
1010 input.read_exact(&mut buf)?;
1011
1012 let (version, header_length) = pyc_python_version(&buf)?;
1013 debug!("{}: pyc file for Python {}.{}", input_path.display(), version.0, version.1);
1014 if TRACE {
1015 debug!("{}: pyc file header is {} bytes", input_path.display(), header_length);
1016 }
1017
1018 let mut data = Vec::from(&buf);
1019 input.read_to_end(&mut data)?;
1020
1021 if data.len() < header_length {
1022 return Err(super::Error::Other(
1023 format!("pyc file is too short ({} < {})", data.len(), header_length)
1024 ).into());
1025 }
1026
1027 let pyc = PycParser {
1028 input_path: input_path.to_path_buf(),
1029 version,
1030 header_length,
1031 data,
1032 read_offset: header_length,
1033 flag_refs: Vec::new(),
1034 };
1035
1036 let mtime = pyc.py_content_mtime();
1037 debug!("{}: from py with mtime={} ({}), size={} bytes, {}",
1039 input_path.display(),
1040 mtime,
1041 chrono::DateTime::from_timestamp(mtime as i64, 0).unwrap(),
1042 pyc.py_content_size(),
1043 match pyc.py_content_hash() {
1044 None | Some(0) => "no hash invalidation".to_string(),
1045 Some(hash) => format!("hash={hash}"),
1046 }
1047 );
1048
1049 Ok(pyc)
1056 }
1057
1058 pub fn py_content_hash(&self) -> Option<u32> {
1059 if self.version < (3, 7) { None
1061 } else {
1062 match self._read_long_at(4) {
1063 0 => None, v => Some(v),
1065 }
1066 }
1067 }
1068
1069 pub fn py_content_mtime(&self) -> u32 {
1070 let offset = if self.version < (3, 7) { 4 } else { 8 };
1071 self._read_long_at(offset)
1072 }
1073
1074 pub fn py_content_size(&self) -> u32 {
1075 let offset = if self.version < (3, 7) { 8 } else { 12 };
1076 self._read_long_at(offset)
1077 }
1078
1079 fn take(&mut self, count: usize) -> Result<usize> {
1080 if self.read_offset + count <= self.data.len() {
1084 let offset = self.read_offset;
1085 self.read_offset += count;
1086 Ok(offset)
1087 } else {
1088 Err(super::Error::UnexpectedEOF(self.read_offset as u64, count).into())
1089 }
1090 }
1091
1092 fn _read_byte(&mut self) -> Result<(usize, u8)> {
1093 let offset = self.take(1)?;
1094 Ok((offset, self.data[offset]))
1095 }
1096
1097 fn read_object(&mut self) -> Result<Rc<Object>> {
1098 let flag_num: Option<usize>;
1099 let (offset, mut b) = self._read_byte()?;
1100
1101 if (b & FLAG_REF_BIT) != 0 {
1102 b &= !FLAG_REF_BIT;
1106
1107 flag_num = Some(self.flag_refs.len());
1110 self.flag_refs.push(None);
1111 } else {
1112 flag_num = None;
1113 }
1114
1115 if TRACE {
1116 debug!("{}:{}/0x{:x}: type {:?}{}",
1117 self.input_path.display(), offset, offset,
1118 b as char,
1119 flag_num.map_or("".to_string(), |n| format!(" 🚩{}", n)),
1120 );
1121 }
1122
1123 let obj = match b {
1124 b'0' => Object::Null(flag_num).into(),
1125 b'N' => Object::None(flag_num).into(),
1126 b'F' => Object::False(flag_num).into(),
1127 b'T' => Object::True(flag_num).into(),
1128 b'.' => Object::Ellipsis(flag_num).into(),
1129 b'S' => Object::StopIteration(flag_num).into(),
1130
1131 b'c' => self.read_codeobject(flag_num)?,
1133 b'g' => self.read_binary_float(flag_num)?,
1135 b'i' => self.read_long(flag_num)?,
1137 b'l' => self.read_py_long(flag_num)?,
1139 b'y' => self.read_binary_complex(flag_num)?,
1141
1142 b'r' => self.read_ref(flag_num)?,
1144
1145 b'z' => self.read_string(StringVariant::ShortAscii, flag_num)?,
1147 b'Z' => self.read_string(StringVariant::ShortAsciiInterned, flag_num)?,
1149 b's' => self.read_string(StringVariant::String, flag_num)?,
1151 b't' => self.read_string(StringVariant::Interned, flag_num)?,
1153 b'u' => self.read_string(StringVariant::Unicode, flag_num)?,
1155 b'a' => self.read_string(StringVariant::Ascii, flag_num)?,
1157 b'A' => self.read_string(StringVariant::AsciiInterned, flag_num)?,
1159 b')' => self.read_small_tuple(flag_num)?,
1161 b'(' => self.read_seq(SeqVariant::Tuple, flag_num)?,
1163 b'[' => self.read_seq(SeqVariant::List, flag_num)?,
1165 b'<' => self.read_seq(SeqVariant::Set, flag_num)?,
1167 b'>' => self.read_seq(SeqVariant::FrozenSet, flag_num)?,
1169 b'{' => self.read_dict(flag_num)?,
1171 b':' => self.read_slice(flag_num)?,
1173
1174 b'I' | b'f' | b'x' | b'?' => {
1179 return Err(super::Error::Other(
1180 format!("{}:{}/0x{:x}: unimplemented object type {}/'{}'",
1181 self.input_path.display(), offset, offset,
1182 b, b as char)
1183 ).into());
1184 },
1185 _
1186 => {
1187 return Err(super::Error::Other(
1188 format!("{}:{}/0x{:x}: unknown object type {}/'{}'",
1189 self.input_path.display(), offset, offset,
1190 b, b as char)
1191 ).into());
1192 },
1193 };
1194
1195 if TRACE {
1196 dbg!(&obj);
1197 }
1198
1199 if let Some(flag_num) = flag_num {
1200 assert!(self.flag_refs[flag_num].is_none());
1201 self.flag_refs[flag_num] = Some(obj.clone());
1202 }
1203
1204 Ok(obj)
1205 }
1206
1207 fn _maybe_read_long(&mut self, cond: bool) -> Result<Option<u32>> {
1208 Ok(if cond { Some(self._read_long()?) } else { None })
1209 }
1210
1211 fn maybe_read_object(&mut self, cond: bool) -> Result<Option<Rc<Object>>> {
1212 Ok(if cond {
1213 Some(self.read_object()?)
1214 } else {
1215 None
1216 })
1217 }
1218
1219 fn read_codeobject(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1220 Ok(Object::Code(CodeObject {
1221 argcount: self._read_long()?,
1222 posonlyargcount: self._maybe_read_long(self.version >= (3, 8))?,
1223 kwonlyargcount: self._read_long()?,
1224 nlocals: self._maybe_read_long(self.version < (3, 11))?,
1225 stacksize: self._read_long()?,
1226 flags: self._read_long()?,
1227 code: self.read_object()?,
1228 consts: self.read_object()?,
1229 names: self.read_object()?,
1230 varnames: self.maybe_read_object(self.version < (3, 11))?,
1231 freevars: self.maybe_read_object(self.version < (3, 11))?,
1232 cellvars: self.maybe_read_object(self.version < (3, 11))?,
1233 localsplusnames: self.maybe_read_object(self.version >= (3, 11))?,
1234 localspluskinds: self.maybe_read_object(self.version >= (3, 11))?,
1235 filename: self.read_object()?,
1236 name: self.read_object()?,
1237 qualname: self.maybe_read_object(self.version >= (3, 11))?,
1238 firstlineno: self._read_long()?,
1239 linetable: self.read_object()?,
1240 exceptiontable: self.maybe_read_object(self.version >= (3, 11))?,
1241 flag_num,
1242 }).into())
1243 }
1244
1245 fn _read_long_at(&self, offset: usize) -> u32 {
1246 let bytes = &self.data[offset .. offset + 4];
1247 u32::from_le_bytes(bytes.try_into().unwrap())
1248 }
1249
1250 fn _read_long(&mut self) -> Result<u32> {
1251 let offset = self.take(4)?;
1252 Ok(self._read_long_at(offset))
1253 }
1254
1255 fn _read_long_signed(&mut self) -> Result<i32> {
1256 let offset = self.take(4)?;
1257 let bytes = &self.data[offset .. offset + 4];
1258 Ok(i32::from_le_bytes(bytes.try_into().unwrap()))
1259 }
1260
1261 fn read_long(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1262 Ok(Object::Int(self._read_long()?, flag_num).into())
1263 }
1264
1265 fn _read_short(&mut self) -> Result<i32> {
1266 let offset = self.take(2)?;
1267
1268 let x = (self.data[offset] as i32) + ((self.data[offset + 1] as i32) << 8);
1269 Ok(x | -(x & 0x8000))
1271 }
1272
1273 fn read_py_long(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1274 let n = self._read_long_signed()?;
1275
1276 let mut result = 0_i32.to_bigint().unwrap();
1277 for i in 0 .. n.abs() {
1278 let part = self._read_short()?;
1279 result += part.to_bigint().unwrap() << (i * PYLONG_MARSHAL_SHIFT) as usize;
1280 }
1281
1282 Ok(Object::Long(result * n.signum(), flag_num).into())
1283 }
1284
1285 fn read_string(&mut self, variant: StringVariant, flag_num: Option<usize>) -> Result<Rc<Object>> {
1286 let size = match variant {
1287 StringVariant::ShortAscii |
1289 StringVariant::ShortAsciiInterned
1290 => self._read_byte()?.1 as usize,
1291 StringVariant::String |
1293 StringVariant::Interned |
1294 StringVariant::Unicode |
1295 StringVariant::Ascii |
1296 StringVariant::AsciiInterned
1297 => self._read_long()? as usize,
1298 };
1299
1300 let offset = self.take(size)?;
1301 Ok(Object::String(StringObject {
1302 variant,
1303 bytes: self.data[offset .. offset + size].to_vec(),
1304 flag_num,
1305 }).into())
1306 }
1307
1308 fn _read_tuple(&mut self, variant: SeqVariant, size: u64, flag_num: Option<usize>) -> Result<Rc<Object>> {
1309 let mut items = Vec::new();
1310 for _ in 0..size {
1311 items.push(self.read_object()?);
1312 }
1313
1314 Ok(Object::Seq(SeqObject { variant, items, flag_num }).into())
1315 }
1316
1317 fn read_small_tuple(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1318 let size = self._read_byte()?.1;
1320 self._read_tuple(SeqVariant::Tuple, size as u64, flag_num)
1321 }
1322
1323 fn read_seq(&mut self, variant: SeqVariant, flag_num: Option<usize>) -> Result<Rc<Object>> {
1324 let size = self._read_long()?;
1325 self._read_tuple(variant, size as u64, flag_num)
1326 }
1327
1328 fn read_ref(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1329 let index = self._read_long()?;
1330
1331 if index as usize >= self.flag_refs.len() {
1333 return Err(super::Error::Other(
1334 format!("{}:{}/0x{:x}: bad reference to flag_ref {} (have {})",
1335 self.input_path.display(), self.read_offset, self.read_offset,
1336 index, self.flag_refs.len())
1337 ).into());
1338 }
1339
1340 let target = match &self.flag_refs[index as usize] {
1341 None => {
1342 return Err(super::Error::Other(
1343 format!("{}:{}/0x{:x}: bad reference to flag_ref {} (reference from within)",
1344 self.input_path.display(), self.read_offset, self.read_offset,
1345 index)
1346 ).into());
1347 }
1348 Some(v) => v
1349 };
1350
1351 Ok(Object::Ref(RefObject {
1352 number: index as u64,
1353 target: target.clone(),
1354 flag_num,
1355 }).into())
1356 }
1357
1358 fn _read_binary_float(&mut self) -> Result<f64> {
1359 let offset = self.take(8)?;
1360 let bytes = &self.data[offset .. offset + 8];
1361 Ok(f64::from_le_bytes(bytes.try_into().unwrap()))
1362 }
1363
1364 fn read_binary_float(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1365 Ok(Object::Float(
1366 self._read_binary_float()?.to_bits(),
1367 flag_num,
1368 ).into())
1369 }
1370
1371 fn read_binary_complex(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1372 Ok(Object::Complex(
1373 self._read_binary_float()?.to_bits(),
1374 self._read_binary_float()?.to_bits(),
1375 flag_num,
1376 ).into())
1377 }
1378
1379 fn read_dict(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1380 let mut items = Vec::new();
1381
1382 loop {
1383 let key = self.read_object()?;
1384 if let Object::Null(..) = *key {
1385 break;
1386 }
1387
1388 let value = self.read_object()?;
1389 items.push((key, value));
1390 }
1391
1392 Ok(Object::Dict(DictObject { items, flag_num } ).into())
1393 }
1394
1395 fn read_slice(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1396 let start = self.read_object()?;
1397 let stop = self.read_object()?;
1398 let step = self.read_object()?;
1399
1400 Ok(Object::Slice(SliceObject { start, stop, step, flag_num } ).into())
1401 }
1402
1403 fn set_zero_mtime(&mut self) -> Result<bool> {
1404 if self.py_content_mtime() == 0 {
1407 return Ok(false);
1408 }
1409
1410 let offset = if self.version < (3, 7) { 4 } else { 8 };
1411 self.data[offset..offset+4].fill(0);
1412 assert!(self.py_content_mtime() == 0);
1413
1414 Ok(true)
1415 }
1416}
1417
1418type SeenState = (usize, usize, RefCell<Option<usize>>);
1419
1420struct PycWriter {
1421 buffer: Vec<u8>,
1422 seen: HashMap<Rc<Object>, SeenState>, flag_num: usize,
1424 refs_to_fix: HashMap<usize, Rc<Object>>, entry_count: usize,
1426}
1427
1428impl PycWriter {
1429 fn new(header: &[u8]) -> Self {
1430 Self {
1431 buffer: Vec::from(header),
1432 seen: HashMap::new(),
1433 flag_num: 0,
1434 refs_to_fix: HashMap::new(),
1435 entry_count: 0,
1436 }
1437 }
1438
1439 fn to_buffer(parser: &PycParser, code: &Rc<Object>) -> Vec<u8> {
1440 let mut w = PycWriter::new(
1442 &parser.data[..parser.header_length],
1443 );
1444
1445 w.write_object(code);
1446 w.add_ref_flags();
1447 w.fix_refs();
1448
1449 w.buffer
1450 }
1451
1452 fn write_object(&mut self, object: &Rc<Object>) {
1453 if let Object::Ref(v) = &**object {
1454 self.write_object(&v.target);
1455
1456 } else if self.seen.contains_key(object) {
1457 if TRACE {
1458 debug!("Referencing {:?} -> {:?}", object, self.seen[object]);
1459 }
1460
1461 self.seen.entry(object.clone()).and_modify(|tup| tup.1 += 1);
1462 self.write_ref(object.clone());
1463
1464 } else {
1465 let offset = self.buffer.len();
1466 self.entry_count += 1;
1467
1468 match &**object {
1469 Object::Code(v) => {
1471 self.write_code(v);
1472 },
1473 Object::String(v) => {
1474 self.write_string(v);
1475 },
1476 Object::Seq(v) => {
1477 self.write_seq(v);
1478 }
1479 Object::Slice(v) => {
1480 self.write_slice(v);
1481 }
1482 Object::Dict(_) => todo!(),
1483 Object::Long(v, _) => {
1486 self.write_long(v);
1487 }
1488 Object::Int(v, _) => {
1489 self.write_int(*v);
1490 }
1491 Object::Float(v, _) => {
1492 self.write_binary_float(*v);
1493 }
1494 Object::Complex(x, y, _) => {
1495 self.write_binary_complex(*x, *y);
1496 }
1497
1498 Object::Ref(_) => {
1501 panic!(); }
1503 Object::Null(_) => {
1504 return self.buffer.push(b'0');
1505 }
1506 Object::None(_) => {
1507 return self.buffer.push(b'N');
1508 }
1509 Object::False(_) => {
1510 return self.buffer.push(b'F');
1511 }
1512 Object::True(_) => {
1513 return self.buffer.push(b'T');
1514 }
1515 Object::StopIteration(_) => {
1516 return self.buffer.push(b'S');
1517 }
1518 Object::Ellipsis(_) => {
1519 return self.buffer.push(b'.');
1520 }
1521 }
1522
1523 self.seen.insert(object.clone(), (offset, 0, None.into()));
1524 }
1525 }
1526
1527 fn maybe_write_object(&mut self, object: &Option<Rc<Object>>) {
1528 if let Some(object) = object {
1529 self.write_object(object);
1530 }
1531 }
1532
1533 fn write_code(&mut self, code: &CodeObject) {
1534 self.buffer.push(b'c');
1535
1536 self._write_int(code.argcount);
1544 self._maybe_write_int(code.posonlyargcount);
1545 self._write_int(code.kwonlyargcount);
1546 self._maybe_write_int(code.nlocals);
1547 self._write_int(code.stacksize);
1548 self._write_int(code.flags);
1549 self.write_object(&code.code);
1550 self.write_object(&code.consts);
1551 self.write_object(&code.names);
1552 self.maybe_write_object(&code.varnames);
1553 self.maybe_write_object(&code.freevars);
1554 self.maybe_write_object(&code.cellvars);
1555
1556 self.maybe_write_object(&code.localsplusnames);
1557 self.maybe_write_object(&code.localspluskinds);
1558
1559 self.write_object(&code.filename);
1560 self.write_object(&code.name);
1561
1562 self.maybe_write_object(&code.qualname);
1563
1564 self._write_int(code.firstlineno);
1565
1566 self.write_object(&code.linetable);
1567 self.maybe_write_object(&code.exceptiontable);
1568 }
1569
1570 fn write_string(&mut self, string: &StringObject) {
1571 self.buffer.push(
1572 match string.variant {
1573 StringVariant::ShortAscii => b'z',
1574 StringVariant::ShortAsciiInterned => b'Z',
1575 StringVariant::String => b's',
1576 StringVariant::Interned => b't',
1577 StringVariant::Unicode => b'u',
1578 StringVariant::Ascii => b'a',
1579 StringVariant::AsciiInterned => b'A',
1580 }
1581 );
1582
1583 let len = string.bytes.len();
1584 match string.variant {
1585 StringVariant::ShortAscii |
1587 StringVariant::ShortAsciiInterned => {
1588 self.buffer.push(len as u8);
1589 }
1590 StringVariant::String |
1592 StringVariant::Interned |
1593 StringVariant::Unicode |
1594 StringVariant::Ascii |
1595 StringVariant::AsciiInterned => {
1596 self._write_int(len as u32);
1597 }
1598 };
1599
1600 self.buffer.extend_from_slice(&string.bytes);
1601 }
1602
1603 fn write_seq(&mut self, seq: &SeqObject) {
1604 let len = seq.items.len();
1605 let byte = match seq.variant {
1606 SeqVariant::Tuple => {
1607 if len < 256 {
1608 b')' } else {
1610 b'(' }
1612 }
1613 SeqVariant::List => b'[',
1614 SeqVariant::Set => b'<',
1615 SeqVariant::FrozenSet => b'>',
1616 };
1617
1618 self.buffer.push(byte);
1619
1620 if byte == b')' {
1621 self.buffer.push(len as u8);
1622 } else {
1623 self._write_int(len as u32);
1624 }
1625
1626 for item in seq.items.iter() {
1627 self.write_object(item);
1628 }
1629 }
1630
1631 fn write_slice(&mut self, slice: &SliceObject) {
1632 self.buffer.push(b':');
1633 self.write_object(&slice.start);
1634 self.write_object(&slice.stop);
1635 self.write_object(&slice.step);
1636 }
1637
1638 fn _write_int(&mut self, int: u32) {
1639 let bytes = int.to_le_bytes();
1640 self.buffer.extend_from_slice(&bytes);
1641 }
1642
1643 fn _write_signed_int(&mut self, int: i32) {
1644 let bytes = int.to_le_bytes();
1645 self.buffer.extend_from_slice(&bytes);
1646 }
1647
1648 fn _maybe_write_int(&mut self, int: Option<u32>) {
1649 if let Some(int) = int {
1650 self._write_int(int);
1651 }
1652 }
1653
1654 fn write_int(&mut self, int: u32) {
1655 self.buffer.push(b'i');
1656 self._write_int(int);
1657 }
1658
1659 fn _write_short(&mut self, int: u16) {
1660 let bytes = int.to_le_bytes();
1661 self.buffer.extend_from_slice(&bytes);
1662 }
1663
1664 fn write_long(&mut self, long: &BigInt) {
1665 self.buffer.push(b'l');
1666
1667 let n = long.bits().div_ceil(PYLONG_MARSHAL_SHIFT as usize);
1668 let sign = if *long < BigInt::zero() { -1i32 } else { 1i32 };
1669
1670 self._write_signed_int(n as i32 * sign);
1671
1672 let mut val = long.abs();
1673 let div = BigInt::from(1u16 << PYLONG_MARSHAL_SHIFT);
1674 for _ in 0 .. n {
1675 let (q, r) = val.div_rem(&div);
1676 self._write_short(r.to_u16().unwrap());
1677 val = q;
1678 }
1679 assert!(val.is_zero());
1680 }
1681
1682 fn _write_binary_float(&mut self, float: u64) {
1683 let bytes = f64::from_bits(float).to_le_bytes();
1684 self.buffer.extend_from_slice(&bytes);
1685 }
1686
1687 fn write_binary_float(&mut self, float: u64) {
1688 self.buffer.push(b'g');
1689 self._write_binary_float(float);
1690 }
1691
1692 fn write_binary_complex(&mut self, x: u64, y: u64) {
1693 self.buffer.push(b'y');
1694 self._write_binary_float(x);
1695 self._write_binary_float(y);
1696 }
1697
1698 fn write_ref(&mut self, target: Rc<Object>) {
1699 let offset = self.buffer.len();
1700 self.buffer.push(b'r');
1701 self._write_int(0); self.refs_to_fix.insert(offset, target);
1703 }
1704
1705 fn add_ref_flags(&mut self) {
1706 let mut keys: Vec<_> = self.seen.keys().collect();
1707 keys.sort_by_key(|&e| self.seen[e].0);
1708
1709 for entry in keys {
1710 let (offset, count, index) = &self.seen[entry];
1711 assert!(index.borrow().is_none());
1712
1713 if *count > 0 {
1714 let orig = self.buffer[*offset];
1715 if TRACE {
1716 debug!("Flagged {:?}, offset {}/{:x}, adding flag #{} ({} refs)",
1717 entry, offset, offset, self.flag_num, count);
1718 }
1719
1720 assert!("0NFT.ScgilyrzZstuaA)([<>{:".contains(orig as char));
1721 self.buffer[*offset] |= FLAG_REF_BIT;
1722
1723 index.replace(Some(self.flag_num));
1724
1725 self.flag_num += 1;
1726 }
1727 }
1728 }
1729
1730 fn fix_refs(&mut self) {
1731 for (offset, target) in &self.refs_to_fix {
1732 let (target_offset, count, index) = &self.seen[target];
1733 if TRACE {
1734 debug!("Ref at offset {}, setting target {}/0x{:x} {:?} #{:?} ({} refs)",
1735 offset, target_offset, target_offset, target, index, count);
1736 }
1737 assert!(*count > 0);
1738 let index = index.borrow().unwrap();
1739 assert!(index < self.flag_num);
1740 assert!(offset > target_offset);
1741
1742 assert!(self.buffer[*offset] == b'r');
1743 let bytes = &mut self.buffer[offset + 1 .. offset + 5];
1744 assert!(bytes == [0; 4]);
1745 bytes.copy_from_slice(&(index as u32).to_le_bytes());
1746 }
1747 }
1748}
1749
1750
1751pub struct Pyc {
1752 config: Arc<config::Config>,
1753}
1754
1755impl Pyc {
1756 pub fn new(config: &Arc<config::Config>) -> Self {
1757 Self { config: config.clone() }
1758 }
1759
1760 pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
1761 Box::new(Self::new(config))
1762 }
1763}
1764
1765impl super::Processor for Pyc {
1766 fn name(&self) -> &str {
1767 "pyc"
1768 }
1769
1770 fn filter(&self, path: &Path) -> Result<bool> {
1771 Ok(self.config.ignore_extension || path.extension().is_some_and(|x| x == "pyc"))
1772 }
1773
1774 fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
1775 let (mut io, input) = InputOutputHelper::open(input_path, self.config.check, true)?;
1776
1777 let mut parser = PycParser::from_file(input_path, input)?;
1778 if parser.version < (3, 0) {
1779 return Ok(super::ProcessResult::Noop); }
1781
1782 let code = parser.read_object()?;
1783
1784 let trailing = parser.data.len() - parser.read_offset;
1785 if trailing > 0 {
1786 warn!("{}: found trailing garbage ({} bytes)", input_path.display(), trailing);
1787 }
1788
1789 let new = PycWriter::to_buffer(&parser, &code);
1790 let have_mod = new != parser.data;
1791
1792 if have_mod {
1793 io.open_output(false)?;
1794 io.output.as_mut().unwrap().as_file_mut().write_all(&new)?;
1795 }
1796
1797 io.finalize(have_mod)
1798 }
1799}
1800
1801impl Pyc {
1802 pub fn pretty_print<W>(&self, writer: &mut W, input_path: &Path) -> Result<()>
1803 where
1804 W: fmt::Write,
1805 {
1806 let input = File::open(input_path)
1807 .with_context(|| format!("Cannot open {input_path:?}"))?;
1808 let mut parser = PycParser::from_file(input_path, input)?;
1809
1810 let obj = parser.read_object()?;
1811
1812 obj.pretty_print(writer, "", "\n", true, true)?;
1813
1814 Ok(())
1815 }
1816}
1817
1818pub struct PycZeroMtime {
1819 config: Arc<config::Config>,
1820}
1821
1822impl PycZeroMtime {
1823 pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
1824 Box::new(Self { config: config.clone() })
1825 }
1826
1827 fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> {
1828 let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?;
1829 let base = input_file_name.split('.').nth(0).unwrap();
1830 let py_path = input_path.with_file_name(format!("{base}.py"));
1831 debug!("Looking at {}…", py_path.display());
1832
1833 let py_file = match File::open(&py_path) {
1834 Ok(some) => some,
1835 Err(e) => {
1836 if e.kind() == io::ErrorKind::NotFound {
1837 debug!("{}: not found, ignoring", py_path.display());
1838 return Ok(());
1839 } else {
1840 bail!("{}: cannot open: {}", py_path.display(), e);
1841 }
1842 }
1843 };
1844
1845 let orig = py_file.metadata()?;
1846 if !orig.file_type().is_file() {
1847 debug!("{}: not a file, ignoring", py_path.display());
1848 } else if orig.modified()? == time::UNIX_EPOCH {
1849 debug!("{}: mtime is already 0", py_path.display());
1850 } else if self.config.check {
1851 debug!("{}: not touching mtime in --check mode", py_path.display());
1852 } else {
1853 py_file.set_modified(time::UNIX_EPOCH)?;
1854 debug!("{}: mtime set to 0", py_path.display());
1855 }
1856
1857 Ok(())
1858 }
1859}
1860
1861impl super::Processor for PycZeroMtime {
1862 fn name(&self) -> &str {
1863 "pyc-zero-mtime"
1864 }
1865
1866 fn filter(&self, path: &Path) -> Result<bool> {
1867 Ok(self.config.ignore_extension || path.extension().is_some_and(|x| x == "pyc"))
1868 }
1869
1870 fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
1871 let (mut io, input) = InputOutputHelper::open(input_path, self.config.check, false)?;
1872
1873 let mut parser = PycParser::from_file(input_path, input)?;
1874 let have_mod = parser.set_zero_mtime()?;
1875
1876 if have_mod {
1877 io.open_output(false)?;
1878 io.output.as_mut().unwrap().as_file_mut().write_all(&parser.data)?;
1879 }
1880
1881 let res = io.finalize(have_mod)?;
1882
1883 if have_mod {
1884 self.set_zero_mtime_on_py_file(input_path)?;
1885 }
1886
1887 Ok(res)
1888 }
1889}
1890
1891
1892#[cfg(test)]
1893mod tests {
1894 use std::hash::{DefaultHasher, Hasher};
1895 use super::*;
1896
1897 #[test]
1898 fn filter_a() {
1899 let cfg = config::Config::empty(0, false).into();
1900 let h = Pyc::boxed(&cfg);
1901
1902 assert!( h.filter(Path::new("/some/path/foobar.pyc")).unwrap());
1903 assert!(!h.filter(Path::new("/some/path/foobar.apyc")).unwrap());
1904 assert!( h.filter(Path::new("/some/path/foobar.opt-2.pyc")).unwrap());
1905 assert!(!h.filter(Path::new("/some/path/foobar")).unwrap());
1906 assert!(!h.filter(Path::new("/some/path/pyc")).unwrap());
1907 assert!(!h.filter(Path::new("/some/path/pyc_pyc")).unwrap());
1908 assert!(!h.filter(Path::new("/")).unwrap());
1909 }
1910
1911 #[test]
1912 fn seq_string_equality() {
1913 let seq1 = Object::Seq(
1914 SeqObject {
1915 variant: SeqVariant::FrozenSet,
1916 items: [
1917 Object::String(
1918 StringObject {
1919 variant: StringVariant::ShortAsciiInterned,
1920 bytes: [104, 116, 116, 112].to_vec(),
1921 flag_num: Some(43),
1922 }
1923 ).into(),
1924 Object::String(
1925 StringObject {
1926 variant: StringVariant::ShortAsciiInterned,
1927 bytes: [104, 116, 116, 112, 115].to_vec(),
1928 flag_num: Some(44),
1929 }
1930 ).into(),
1931 ].to_vec(),
1932 flag_num: None,
1933 }
1934 );
1935 let seq2 = Object::Seq(
1936 SeqObject {
1937 variant: SeqVariant::FrozenSet,
1938 items: [
1939 Object::String(
1940 StringObject {
1941 variant: StringVariant::ShortAsciiInterned,
1942 bytes: [104, 116, 116, 112].to_vec(),
1943 flag_num: None,
1944 }
1945 ).into(),
1946 Object::String(
1947 StringObject {
1948 variant: StringVariant::ShortAsciiInterned,
1949 bytes: [104, 116, 116, 112, 115].to_vec(),
1950 flag_num: None,
1951 }
1952 ).into(),
1953 ].to_vec(),
1954 flag_num: Some(43),
1955 }
1956 );
1957
1958 assert!(seq1 == seq1);
1959 assert!(seq2 == seq2);
1960 assert!(seq1 == seq2);
1961 assert!(seq2 == seq1);
1962
1963 let mut hash1 = DefaultHasher::new();
1964 seq1.hash(&mut hash1);
1965
1966 let mut hash2 = DefaultHasher::new();
1967 seq2.hash(&mut hash2);
1968
1969 assert!(hash1.finish() == hash2.finish());
1970 }
1971
1972 #[test]
1973 fn seq_ref_equality() {
1974 let obj1 = Object::Ref(
1975 RefObject {
1976 number: 43,
1977 target: Object::String(
1978 StringObject {
1979 variant: StringVariant::ShortAsciiInterned,
1980 bytes: [104, 116, 116, 112].to_vec(),
1981 flag_num: Some(43),
1982 },
1983 ).into(),
1984 flag_num: Some(99),
1985 }
1986 );
1987 let obj2 = Object::String(
1988 StringObject {
1989 variant: StringVariant::ShortAsciiInterned,
1990 bytes: [104, 116, 116, 112].to_vec(),
1991 flag_num: None,
1992 },
1993 );
1994
1995 assert!(obj1 == obj1);
1996 assert!(obj2 == obj2);
1997 assert!(obj1 == obj2);
1998 assert!(obj2 == obj1);
1999 }
2000}