add_determinism/add_det/handlers/
pyc.rs

1/* SPDX-License-Identifier: GPL-3.0-or-later */
2
3// The args for format!() used in pretty-printers intentionally use a
4// style where the args that are part of the data being printed are
5// not inlined. They are often non-trivial and the format strings are
6// easier to compare if the argument formatting is consistent.
7// The prefix arg, which is _not_ part of the data, is inlined.
8#![allow(clippy::uninlined_format_args)]
9
10use anyhow::{bail, Context, Result};
11use log::{debug, warn};
12use std::cell::RefCell;
13use std::collections::HashMap;
14use std::fmt;
15use std::fs::File;
16use std::hash::{Hash, Hasher};
17use std::io::{self, Write};
18use std::ops::Deref;
19use std::path::{Path, PathBuf};
20use std::rc::Rc;
21use std::str;
22use std::sync::Arc;
23use std::time;
24
25use num_bigint_dig::{BigInt, ToBigInt};
26use num_integer::Integer;
27use num_traits::cast::ToPrimitive;
28use num_traits::{Signed, Zero};
29
30use super::{config, InputOutputHelper, unwrap_os_string};
31
32const PYC_MAGIC: &[u8] = &[0x0D, 0x0A];
33const PYLONG_MARSHAL_SHIFT: i32 = 15;
34const FLAG_REF_BIT: u8 = 0x1 << 7;
35
36const TRACE: bool = false;
37
38pub fn pyc_python_version(buf: &[u8; 4]) -> Result<((u32, u32), usize)> {
39    /*
40    https://github.com/python/cpython/blob/main/Include/internal/pycore_magic_number.h#L28
41
42    Python 1.5:   20121
43    Python 1.5.1: 20121
44    Python 1.5.2: 20121
45    Python 1.6:   50428
46    Python 2.0:   50823
47    Python 2.0.1: 50823
48    Python 2.1:   60202
49    Python 2.1.1: 60202
50    Python 2.1.2: 60202
51    Python 2.2:   60717
52    Python 2.3a0: 62011
53    Python 2.3a0: 62021
54    Python 2.3a0: 62011 (!)
55    Python 2.4a0: 62041
56    Python 2.4a3: 62051
57    Python 2.4b1: 62061
58    Python 2.5a0: 62071
59    Python 2.5a0: 62081 (ast-branch)
60    Python 2.5a0: 62091 (with)
61    Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
62    Python 2.5b3: 62101 (fix wrong code: for x, in ...)
63    Python 2.5b3: 62111 (fix wrong code: x += yield)
64    Python 2.5c1: 62121 (fix wrong lnotab with for loops and
65                         storing constants that should have been removed)
66    Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
67    Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
68    Python 2.6a1: 62161 (WITH_CLEANUP optimization)
69    Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND)
70    Python 2.7a0: 62181 (optimize conditional branches:
71                         introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
72    Python 2.7a0  62191 (introduce SETUP_WITH)
73    Python 2.7a0  62201 (introduce BUILD_SET)
74    Python 2.7a0  62211 (introduce MAP_ADD and SET_ADD)
75    Python 3000:   3000
76                   3010 (removed UNARY_CONVERT)
77                   3020 (added BUILD_SET)
78                   3030 (added keyword-only parameters)
79                   3040 (added signature annotations)
80                   3050 (print becomes a function)
81                   3060 (PEP 3115 metaclass syntax)
82                   3061 (string literals become unicode)
83                   3071 (PEP 3109 raise changes)
84                   3081 (PEP 3137 make __file__ and __name__ unicode)
85                   3091 (kill str8 interning)
86                   3101 (merge from 2.6a0, see 62151)
87                   3103 (__file__ points to source file)
88    Python 3.0a4: 3111 (WITH_CLEANUP optimization).
89    Python 3.0b1: 3131 (lexical exception stacking, including POP_EXCEPT
90                          #3021)
91    Python 3.1a1: 3141 (optimize list, set and dict comprehensions:
92                        change LIST_APPEND and SET_ADD, add MAP_ADD #2183)
93    Python 3.1a1: 3151 (optimize conditional branches:
94                        introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE
95                          #4715)
96    Python 3.2a1: 3160 (add SETUP_WITH #6101)
97    Python 3.2a2: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR #9225)
98    Python 3.2a3  3180 (add DELETE_DEREF #4617)
99    Python 3.3a1  3190 (__class__ super closure changed)
100    Python 3.3a1  3200 (PEP 3155 __qualname__ added #13448)
101    Python 3.3a1  3210 (added size modulo 2**32 to the pyc header #13645)
102    Python 3.3a2  3220 (changed PEP 380 implementation #14230)
103    Python 3.3a4  3230 (revert changes to implicit __class__ closure #14857)
104    Python 3.4a1  3250 (evaluate positional default arguments before
105                       keyword-only defaults #16967)
106    Python 3.4a1  3260 (add LOAD_CLASSDEREF; allow locals of class to override
107                       free vars #17853)
108    Python 3.4a1  3270 (various tweaks to the __class__ closure #12370)
109    Python 3.4a1  3280 (remove implicit class argument)
110    Python 3.4a4  3290 (changes to __qualname__ computation #19301)
111    Python 3.4a4  3300 (more changes to __qualname__ computation #19301)
112    Python 3.4rc2 3310 (alter __qualname__ computation #20625)
113    Python 3.5a1  3320 (PEP 465: Matrix multiplication operator #21176)
114    Python 3.5b1  3330 (PEP 448: Additional Unpacking Generalizations #2292)
115    Python 3.5b2  3340 (fix dictionary display evaluation order #11205)
116    Python 3.5b3  3350 (add GET_YIELD_FROM_ITER opcode #24400)
117    Python 3.5.2  3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286)
118    Python 3.6a0  3360 (add FORMAT_VALUE opcode #25483)
119    Python 3.6a1  3361 (lineno delta of code.co_lnotab becomes signed #26107)
120    Python 3.6a2  3370 (16 bit wordcode #26647)
121    Python 3.6a2  3371 (add BUILD_CONST_KEY_MAP opcode #27140)
122    Python 3.6a2  3372 (MAKE_FUNCTION simplification, remove MAKE_CLOSURE
123                        #27095)
124    Python 3.6b1  3373 (add BUILD_STRING opcode #27078)
125    Python 3.6b1  3375 (add SETUP_ANNOTATIONS and STORE_ANNOTATION opcodes
126                        #27985)
127    Python 3.6b1  3376 (simplify CALL_FUNCTIONs & BUILD_MAP_UNPACK_WITH_CALL
128                          #27213)
129    Python 3.6b1  3377 (set __class__ cell from type.__new__ #23722)
130    Python 3.6b2  3378 (add BUILD_TUPLE_UNPACK_WITH_CALL #28257)
131    Python 3.6rc1 3379 (more thorough __class__ validation #23722)
132    Python 3.7a1  3390 (add LOAD_METHOD and CALL_METHOD opcodes #26110)
133    Python 3.7a2  3391 (update GET_AITER #31709)
134    Python 3.7a4  3392 (PEP 552: Deterministic pycs #31650)
135    Python 3.7b1  3393 (remove STORE_ANNOTATION opcode #32550)
136    Python 3.7b5  3394 (restored docstring as the first stmt in the body;
137                        this might affected the first line number #32911)
138    Python 3.8a1  3400 (move frame block handling to compiler #17611)
139    Python 3.8a1  3401 (add END_ASYNC_FOR #33041)
140    Python 3.8a1  3410 (PEP570 Python Positional-Only Parameters #36540)
141    Python 3.8b2  3411 (Reverse evaluation order of key: value in dict
142                        comprehensions #35224)
143    Python 3.8b2  3412 (Swap the position of positional args and positional
144                        only args in ast.arguments #37593)
145    Python 3.8b4  3413 (Fix "break" and "continue" in "finally" #37830)
146    Python 3.9a0  3420 (add LOAD_ASSERTION_ERROR #34880)
147    Python 3.9a0  3421 (simplified bytecode for with blocks #32949)
148    Python 3.9a0  3422 (remove BEGIN_FINALLY, END_FINALLY, CALL_FINALLY, POP_FINALLY bytecodes #33387)
149    Python 3.9a2  3423 (add IS_OP, CONTAINS_OP and JUMP_IF_NOT_EXC_MATCH bytecodes #39156)
150    Python 3.9a2  3424 (simplify bytecodes for *value unpacking)
151    Python 3.9a2  3425 (simplify bytecodes for **value unpacking)
152    Python 3.10a1 3430 (Make 'annotations' future by default)
153    Python 3.10a1 3431 (New line number table format -- PEP 626)
154    Python 3.10a2 3432 (Function annotation for MAKE_FUNCTION is changed from dict to tuple bpo-42202)
155    Python 3.10a2 3433 (RERAISE restores f_lasti if oparg != 0)
156    Python 3.10a6 3434 (PEP 634: Structural Pattern Matching)
157    Python 3.10a7 3435 Use instruction offsets (as opposed to byte offsets).
158    Python 3.10b1 3436 (Add GEN_START bytecode #43683)
159    Python 3.10b1 3437 (Undo making 'annotations' future by default - We like to dance among core devs!)
160    Python 3.10b1 3438 Safer line number table handling.
161    Python 3.10b1 3439 (Add ROT_N)
162    Python 3.11a1 3450 Use exception table for unwinding ("zero cost" exception handling)
163    Python 3.11a1 3451 (Add CALL_METHOD_KW)
164    Python 3.11a1 3452 (drop nlocals from marshaled code objects)
165    Python 3.11a1 3453 (add co_fastlocalnames and co_fastlocalkinds)
166    Python 3.11a1 3454 (compute cell offsets relative to locals bpo-43693)
167    Python 3.11a1 3455 (add MAKE_CELL bpo-43693)
168    Python 3.11a1 3456 (interleave cell args bpo-43693)
169    Python 3.11a1 3457 (Change localsplus to a bytes object bpo-43693)
170    Python 3.11a1 3458 (imported objects now don't use LOAD_METHOD/CALL_METHOD)
171    Python 3.11a1 3459 (PEP 657: add end line numbers and column offsets for instructions)
172    Python 3.11a1 3460 (Add co_qualname field to PyCodeObject bpo-44530)
173    Python 3.11a1 3461 (JUMP_ABSOLUTE must jump backwards)
174    Python 3.11a2 3462 (bpo-44511: remove COPY_DICT_WITHOUT_KEYS, change
175                        MATCH_CLASS and MATCH_KEYS, and add COPY)
176    Python 3.11a3 3463 (bpo-45711: JUMP_IF_NOT_EXC_MATCH no longer pops the
177                        active exception)
178    Python 3.11a3 3464 (bpo-45636: Merge numeric BINARY_*INPLACE_* into
179                        BINARY_OP)
180    Python 3.11a3 3465 (Add COPY_FREE_VARS opcode)
181    Python 3.11a4 3466 (bpo-45292: PEP-654 except*)
182    Python 3.11a4 3467 (Change CALL_xxx opcodes)
183    Python 3.11a4 3468 (Add SEND opcode)
184    Python 3.11a4 3469 (bpo-45711: remove type, traceback from exc_info)
185    Python 3.11a4 3470 (bpo-46221: PREP_RERAISE_STAR no longer pushes lasti)
186    Python 3.11a4 3471 (bpo-46202: remove pop POP_EXCEPT_AND_RERAISE)
187    Python 3.11a4 3472 (bpo-46009: replace GEN_START with POP_TOP)
188    Python 3.11a4 3473 (Add POP_JUMP_IF_NOT_NONE/POP_JUMP_IF_NONE opcodes)
189    Python 3.11a4 3474 (Add RESUME opcode)
190    Python 3.11a5 3475 (Add RETURN_GENERATOR opcode)
191    Python 3.11a5 3476 (Add ASYNC_GEN_WRAP opcode)
192    Python 3.11a5 3477 (Replace DUP_TOP/DUP_TOP_TWO with COPY and
193                        ROT_TWO/ROT_THREE/ROT_FOUR/ROT_N with SWAP)
194    Python 3.11a5 3478 (New CALL opcodes)
195    Python 3.11a5 3479 (Add PUSH_NULL opcode)
196    Python 3.11a5 3480 (New CALL opcodes, second iteration)
197    Python 3.11a5 3481 (Use inline cache for BINARY_OP)
198    Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL)
199    Python 3.11a5 3483 (Use inline caching for COMPARE_OP and BINARY_SUBSCR)
200    Python 3.11a5 3484 (Use inline caching for LOAD_ATTR, LOAD_METHOD, and
201                        STORE_ATTR)
202    Python 3.11a5 3485 (Add an oparg to GET_AWAITABLE)
203    Python 3.11a6 3486 (Use inline caching for PRECALL and CALL)
204    Python 3.11a6 3487 (Remove the adaptive "oparg counter" mechanism)
205    Python 3.11a6 3488 (LOAD_GLOBAL can push additional NULL)
206    Python 3.11a6 3489 (Add JUMP_BACKWARD, remove JUMP_ABSOLUTE)
207    Python 3.11a6 3490 (remove JUMP_IF_NOT_EXC_MATCH, add CHECK_EXC_MATCH)
208    Python 3.11a6 3491 (remove JUMP_IF_NOT_EG_MATCH, add CHECK_EG_MATCH,
209                        add JUMP_BACKWARD_NO_INTERRUPT, make JUMP_NO_INTERRUPT virtual)
210    Python 3.11a7 3492 (make POP_JUMP_IF_NONE/NOT_NONE/TRUE/FALSE relative)
211    Python 3.11a7 3493 (Make JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP relative)
212    Python 3.11a7 3494 (New location info table)
213    Python 3.11b4 3495 (Set line number of module's RESUME instr to 0 per PEP 626)
214    Python 3.12a1 3500 (Remove PRECALL opcode)
215    Python 3.12a1 3501 (YIELD_VALUE oparg == stack_depth)
216    Python 3.12a1 3502 (LOAD_FAST_CHECK, no NULL-check in LOAD_FAST)
217    Python 3.12a1 3503 (Shrink LOAD_METHOD cache)
218    Python 3.12a1 3504 (Merge LOAD_METHOD back into LOAD_ATTR)
219    Python 3.12a1 3505 (Specialization/Cache for FOR_ITER)
220    Python 3.12a1 3506 (Add BINARY_SLICE and STORE_SLICE instructions)
221    Python 3.12a1 3507 (Set lineno of module's RESUME to 0)
222    Python 3.12a1 3508 (Add CLEANUP_THROW)
223    Python 3.12a1 3509 (Conditional jumps only jump forward)
224    Python 3.12a2 3510 (FOR_ITER leaves iterator on the stack)
225    Python 3.12a2 3511 (Add STOPITERATION_ERROR instruction)
226    Python 3.12a2 3512 (Remove all unused consts from code objects)
227    Python 3.12a4 3513 (Add CALL_INTRINSIC_1 instruction, removed STOPITERATION_ERROR, PRINT_EXPR, IMPORT_STAR)
228    Python 3.12a4 3514 (Remove ASYNC_GEN_WRAP, LIST_TO_TUPLE, and UNARY_POSITIVE)
229    Python 3.12a5 3515 (Embed jump mask in COMPARE_OP oparg)
230    Python 3.12a5 3516 (Add COMPARE_AND_BRANCH instruction)
231    Python 3.12a5 3517 (Change YIELD_VALUE oparg to exception block depth)
232    Python 3.12a6 3518 (Add RETURN_CONST instruction)
233    Python 3.12a6 3519 (Modify SEND instruction)
234    Python 3.12a6 3520 (Remove PREP_RERAISE_STAR, add CALL_INTRINSIC_2)
235    Python 3.12a7 3521 (Shrink the LOAD_GLOBAL caches)
236    Python 3.12a7 3522 (Removed JUMP_IF_FALSE_OR_POP/JUMP_IF_TRUE_OR_POP)
237    Python 3.12a7 3523 (Convert COMPARE_AND_BRANCH back to COMPARE_OP)
238    Python 3.12a7 3524 (Shrink the BINARY_SUBSCR caches)
239    Python 3.12b1 3525 (Shrink the CALL caches)
240    Python 3.12b1 3526 (Add instrumentation support)
241    Python 3.12b1 3527 (Add LOAD_SUPER_ATTR)
242    Python 3.12b1 3528 (Add LOAD_SUPER_ATTR_METHOD specialization)
243    Python 3.12b1 3529 (Inline list/dict/set comprehensions)
244    Python 3.12b1 3530 (Shrink the LOAD_SUPER_ATTR caches)
245    Python 3.12b1 3531 (Add PEP 695 changes)
246    Python 3.13a1 3550 (Plugin optimizer support)
247    Python 3.13a1 3551 (Compact superinstructions)
248    Python 3.13a1 3552 (Remove LOAD_FAST__LOAD_CONST and LOAD_CONST__LOAD_FAST)
249    Python 3.13a1 3553 (Add SET_FUNCTION_ATTRIBUTE)
250    Python 3.13a1 3554 (more efficient bytecodes for f-strings)
251    Python 3.13a1 3555 (generate specialized opcodes metadata from bytecodes.c)
252    Python 3.13a1 3556 (Convert LOAD_CLOSURE to a pseudo-op)
253    Python 3.13a1 3557 (Make the conversion to boolean in jumps explicit)
254    Python 3.13a1 3558 (Reorder the stack items for CALL)
255    Python 3.13a1 3559 (Generate opcode IDs from bytecodes.c)
256    Python 3.13a1 3560 (Add RESUME_CHECK instruction)
257    Python 3.13a1 3561 (Add cache entry to branch instructions)
258    Python 3.13a1 3562 (Assign opcode IDs for internal ops in separate range)
259    Python 3.13a1 3563 (Add CALL_KW and remove KW_NAMES)
260    Python 3.13a1 3564 (Removed oparg from YIELD_VALUE, changed oparg values of RESUME)
261    Python 3.13a1 3565 (Oparg of YIELD_VALUE indicates whether it is in a yield-from)
262    Python 3.13a1 3566 (Emit JUMP_NO_INTERRUPT instead of JUMP for non-loop no-lineno cases)
263    Python 3.13a1 3567 (Reimplement line number propagation by the compiler)
264    Python 3.13a1 3568 (Change semantics of END_FOR)
265    Python 3.13a5 3569 (Specialize CONTAINS_OP)
266    Python 3.13a6 3570 (Add __firstlineno__ class attribute)
267    Python 3.13b1 3571 (Fix miscompilation of private names in generic classes)
268    Python 3.14a1 3600 (Add LOAD_COMMON_CONSTANT)
269    Python 3.14a1 3601 (Fix miscompilation of private names in generic classes)
270    Python 3.14a1 3602 (Add LOAD_SPECIAL. Remove BEFORE_WITH and BEFORE_ASYNC_WITH)
271    Python 3.14a1 3603 (Remove BUILD_CONST_KEY_MAP)
272    Python 3.14a1 3604 (Do not duplicate test at end of while statements)
273    Python 3.14a1 3605 (Move ENTER_EXECUTOR to opcode 255)
274    Python 3.14a1 3606 (Specialize CALL_KW)
275    Python 3.14a1 3607 (Add pseudo instructions JUMP_IF_TRUE/FALSE)
276    Python 3.14a1 3608 (Add support for slices)
277    Python 3.14a2 3609 (Add LOAD_SMALL_INT and LOAD_CONST_IMMORTAL instructions, remove RETURN_CONST)
278    Python 3.14a4 3610 (Add VALUE_WITH_FAKE_GLOBALS format to annotationlib)
279    Python 3.14a4 3611 (Add NOT_TAKEN instruction)
280    Python 3.14a4 3612 (Add POP_ITER and INSTRUMENTED_POP_ITER)
281    Python 3.14a4 3613 (Add LOAD_CONST_MORTAL instruction)
282    Python 3.14a5 3614 (Add BINARY_OP_EXTEND)
283    Python 3.14a5 3615 (CALL_FUNCTION_EX always take a kwargs argument)
284    Python 3.14a5 3616 (Remove BINARY_SUBSCR and family. Make them BINARY_OPs)
285    Python 3.14a6 3617 (Branch monitoring for async for loops)
286    Python 3.14a6 3618 (Add oparg to END_ASYNC_FOR)
287    Python 3.14a6 3619 (Renumber RESUME opcode from 149 to 128)
288    Python 3.14a6 3620 (Optimize bytecode for all/any/tuple called on a genexp)
289    Python 3.14a7 3621 (Optimize LOAD_FAST opcodes into LOAD_FAST_BORROW)
290    Python 3.14a7 3622 (Store annotations in different class dict keys)
291    Python 3.14a7 3623 (Add BUILD_INTERPOLATION & BUILD_TEMPLATE opcodes)
292    Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST)
293    Python 3.14b3 3625 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST)
294    Python 3.14rc2 3626 (Fix missing exception handlers in logical expression)
295    Python 3.14rc3 3627 (Fix miscompilation of some module-level annotations)
296    Python 3.15a0 3650 (Initial version)
297    Python 3.15a1 3651 (Simplify LOAD_CONST)
298    Python 3.15a1 3652 (Virtual iterators)
299    Python 3.15a1 3653 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST)
300    Python 3.15a1 3654 (Fix missing exception handlers in logical expression)
301    Python 3.15a1 3655 (Fix miscompilation of some module-level annotations)
302
303    Python 3.16 will start with 3700
304    */
305
306    if &buf[2..] != PYC_MAGIC {
307        return Err(super::Error::BadMagic(2, buf[2..].to_vec(), PYC_MAGIC).into());
308    }
309
310    let val = ((buf[1] as u32) << 8) + (buf[0] as u32);
311
312    #[allow(overlapping_range_endpoints)]
313    #[allow(clippy::match_overlapping_arm)]
314    match val {
315        20121 => Ok(((1, 5), 8)),
316        50428 => Ok(((1, 6), 8)),
317        50823 => Ok(((2, 0), 8)),
318        60202 => Ok(((2, 1), 8)),
319        60717 => Ok(((2, 2), 8)),
320        62011 | 62021 => Ok(((2, 3), 8)),
321        62041 | 62051 | 62061 => Ok(((2, 4), 8)),
322        62071 | 62081 | 62091 | 62092 | 62101 | 62111 | 62121 | 62131 => Ok(((2, 5), 8)),
323        62151 | 62161 => Ok(((2, 6), 8)),
324        62171 | 62181 | 62191 | 62201 | 62211 => Ok(((2, 7), 8)),
325        3000..=3131 => Ok(((3, 0), 8)),
326        3000..=3151 => Ok(((3, 1), 8)),
327        3000..=3160 => Ok(((3, 1), 8)),
328        3000..=3180 => Ok(((3, 2), 8)),
329        3000..=3230 => Ok(((3, 3), 12)),
330        3000..=3310 => Ok(((3, 4), 12)),
331        3000..=3351 => Ok(((3, 5), 12)),
332        3360 | 3361 | 3370..=3379 => Ok(((3, 6), 12)),
333        3390..=3394 => Ok(((3, 7), 16)),
334        3400 | 3401 | 3410..=3413 => Ok(((3, 8), 16)),
335        3400..=3425 => Ok(((3, 9), 16)),
336        3430..=3439 => Ok(((3, 10), 16)),
337        3450..=3495 => Ok(((3, 11), 16)),
338        3500..=3531 => Ok(((3, 12), 16)),
339        3550..=3599 => Ok(((3, 13), 16)),
340        3600..=3649 => Ok(((3, 14), 16)),
341        3650..=3699 => Ok(((3, 15), 16)),
342        3700..=4000 => Ok(((3, 16), 16)),
343        _ => Err(super::Error::Other(
344            format!("not a pyc file, unknown version magic {val}")
345        ).into()),
346    }
347}
348
349fn format_flag(show_flag: bool, flag_num: &Option<usize>) -> Option<String> {
350    if show_flag && flag_num.is_some() {
351        Some(format!(" 🚩{}", flag_num.unwrap()))
352    } else {
353        None
354    }
355}
356
357#[derive(Debug, Eq)]
358struct CodeObject {
359    argcount: u32,
360    posonlyargcount: Option<u32>,
361    kwonlyargcount: u32,
362    nlocals: Option<u32>,
363    stacksize: u32,
364    flags: u32,
365    code: Rc<Object>,
366    consts: Rc<Object>,
367    names: Rc<Object>,
368    varnames: Option<Rc<Object>>,
369    freevars: Option<Rc<Object>>,
370    cellvars: Option<Rc<Object>>,
371    localsplusnames: Option<Rc<Object>>,
372    localspluskinds: Option<Rc<Object>>,
373    filename: Rc<Object>,
374    name: Rc<Object>,
375    qualname: Option<Rc<Object>>,
376    firstlineno: u32,
377    linetable: Rc<Object>,
378    exceptiontable: Option<Rc<Object>>,
379
380    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
381}
382
383impl PartialEq for CodeObject {
384    fn eq(&self, other: &Self) -> bool {
385        self.argcount == other.argcount &&
386            self.posonlyargcount == other.posonlyargcount &&
387            self.kwonlyargcount == other.kwonlyargcount &&
388            self.nlocals == other.nlocals &&
389            self.stacksize == other.stacksize &&
390            self.flags == other.flags &&
391            self.code == other.code &&
392            self.consts == other.consts &&
393            self.names == other.names &&
394            self.varnames == other.varnames &&
395            self.freevars == other.freevars &&
396            self.cellvars == other.cellvars &&
397            self.localsplusnames == other.localsplusnames &&
398            self.localspluskinds == other.localspluskinds &&
399            self.filename == other.filename &&
400            self.name == other.name &&
401            self.qualname == other.qualname &&
402            self.firstlineno == other.firstlineno &&
403            self.linetable == other.linetable &&
404            self.exceptiontable == other.exceptiontable
405    }
406}
407
408impl Hash for CodeObject {
409    fn hash<H: Hasher>(&self, state: &mut H) {
410        self.argcount.hash(state);
411        self.posonlyargcount.hash(state);
412        self.kwonlyargcount.hash(state);
413        self.nlocals.hash(state);
414        self.stacksize.hash(state);
415        self.flags.hash(state);
416        self.code.hash(state);
417        self.consts.hash(state);
418        self.names.hash(state);
419        self.varnames.hash(state);
420        self.freevars.hash(state);
421        self.cellvars.hash(state);
422        self.localsplusnames.hash(state);
423        self.localspluskinds.hash(state);
424        self.filename.hash(state);
425        self.name.hash(state);
426        self.qualname.hash(state);
427        self.firstlineno.hash(state);
428        self.linetable.hash(state);
429        self.exceptiontable.hash(state);
430    }
431}
432
433impl CodeObject {
434    fn pretty_print_binary_string<W>(
435        w: &mut W,
436        indent: &str,
437        name: &str,
438        mut object: &Rc<Object>,
439        show_flag: bool,
440    ) -> fmt::Result
441    where
442        W: fmt::Write,
443    {
444        let (ref_info, show_target_flag);
445        if let Object::Ref(v) = object.as_ref() {
446            ref_info = format!(
447                "(ref to {}){}",
448                v.number,
449                format_flag(show_flag, &v.flag_num).unwrap_or("".to_string()),
450            );
451            show_target_flag = false; // suppress printing of flag after we show ref info
452            object = &v.target;
453        } else {
454            ref_info = "".to_string();
455            show_target_flag = true;
456        };
457
458        if let Object::String(v) = object.as_ref() {
459            if !v.bytes.is_empty() {
460                return write!(w, "\n{indent}-{name}: {}[{} bytes]", ref_info, v.bytes.len())
461            }
462        }
463        object.pretty_print(w, &format!("\n{indent}-{name}: {}", ref_info), "", true, show_target_flag)
464    }
465
466    pub fn pretty_print<W>(
467        &self,
468        w: &mut W,
469        prefix: &str,
470        suffix: &str,
471        multiline: bool,
472        show_flag: bool,
473    ) -> fmt::Result
474    where
475        W: fmt::Write,
476    {
477        write!(w, "{prefix}Code")?;
478        self.name.pretty_print(w, " ", "", false, true)?;
479        if let Some(v) = &self.qualname {
480            v.pretty_print(w, "/", "", false, true)?;
481        }
482
483        if let Some(s) = format_flag(show_flag, &self.flag_num) {
484            write!(w, "{}", s)?;
485        }
486
487        if multiline {
488            let indent = " ".repeat(prefix.len() + 2);
489
490            self.filename.pretty_print(w, &format!("\n{indent}"), "", true, true)?;
491            write!(w, ":{}", self.firstlineno)?;
492
493            write!(w, "\n{indent}argcount={}", self.argcount)?;
494            if let Some(v) = self.posonlyargcount {
495                write!(w, " posonlyargcount={}", v)?;
496            }
497            write!(w, " kwonlyargcount={}", self.kwonlyargcount)?;
498            if let Some(v) = self.nlocals {
499                write!(w, " nlocals={}", v)?;
500            }
501            write!(w, " stacksize={}", self.stacksize)?;
502            write!(w, " flags={:x}", self.flags)?;
503
504            // We expect StringVariant::String with bytecode here.
505            // Let's not print that out, since it's not going to be
506            // readable in any way. Otherwise, just print the object.
507            Self::pretty_print_binary_string(w, &indent, "code", &self.code, true)?;
508
509            self.consts.pretty_print(w, &format!("\n{indent}-consts: "), "", true, true)?;
510            self.names.pretty_print(w, &format!("\n{indent}-names: "), "", true, true)?;
511            if let Some(v) = &self.varnames {
512                v.pretty_print(w, &format!("\n{indent}-varnames: "), "", true, true)?;
513            }
514            if let Some(v) = &self.freevars {
515                v.pretty_print(w, &format!("\n{indent}-freevars: "), "", true, true)?;
516            }
517            if let Some(v) = &self.cellvars {
518                v.pretty_print(w, &format!("\n{indent}-cellvars: "), "", true, true)?;
519            }
520            if let Some(v) = &self.localsplusnames {
521                v.pretty_print(w, &format!("\n{indent}-locals+names: "), "", true, true)?;
522            }
523            if let Some(v) = &self.localspluskinds {
524                v.pretty_print(w, &format!("\n{indent}-locals+kinds: "), "", true, true)?;
525            }
526            Self::pretty_print_binary_string(w, &indent, "linetable", &self.linetable, true)?;
527            if let Some(v) = &self.exceptiontable {
528                Self::pretty_print_binary_string(w, &indent, "exceptiontable", v, true)?;
529            }
530        }
531
532        write!(w, "{suffix}")
533    }
534}
535
536#[derive(Debug, Eq, PartialEq, Hash)]
537enum StringVariant {
538    ShortAscii,
539    ShortAsciiInterned,
540    String,
541    Interned,
542    Unicode,
543    Ascii,
544    AsciiInterned,
545}
546
547#[derive(Debug, Eq)]
548struct StringObject {
549    variant: StringVariant,
550    bytes: Vec<u8>,
551
552    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
553}
554
555impl PartialEq for StringObject {
556    fn eq(&self, other: &Self) -> bool {
557        self.variant == other.variant &&
558            self.bytes == other.bytes
559    }
560}
561
562impl Hash for StringObject {
563    fn hash<H: Hasher>(&self, state: &mut H) {
564        self.variant.hash(state);
565        self.bytes.hash(state);
566    }
567}
568
569impl fmt::Display for StringObject {
570    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
571        match self.variant {
572            StringVariant::ShortAscii |
573            StringVariant::ShortAsciiInterned |
574            StringVariant::Unicode |
575            StringVariant::Ascii |
576            StringVariant::AsciiInterned => {
577                if let Ok(string) = str::from_utf8(&self.bytes) {
578                    write!(f, "{:?}", string)
579                } else {
580                    write!(f, "[NON-UTF8] {:?}", self.bytes)
581                }
582            }
583            StringVariant::String |
584            StringVariant::Interned => {
585                write!(f, "{:?}", self.bytes)
586            }
587        }
588    }
589}
590
591impl StringObject {
592    pub fn pretty_print<W>(
593        &self,
594        w: &mut W,
595        prefix: &str,
596        suffix: &str,
597        _multiline: bool,
598        show_flag: bool,
599) -> fmt::Result
600    where
601        W: fmt::Write,
602    {
603        write!(
604            w, "{prefix}{}{}{suffix}",
605            self,
606            format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
607        )
608    }
609}
610
611#[derive(Debug, Eq, PartialEq, Hash)]
612enum SeqVariant {
613    Tuple,
614    List,
615    Set,
616    FrozenSet,
617}
618
619#[derive(Debug, Eq)]
620struct SeqObject {
621    variant: SeqVariant,
622    items: Vec<Rc<Object>>,
623
624    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
625}
626
627impl PartialEq for SeqObject {
628    fn eq(&self, other: &Self) -> bool {
629        self.variant == other.variant &&
630            self.items == other.items
631    }
632}
633
634impl Hash for SeqObject {
635    fn hash<H: Hasher>(&self, state: &mut H) {
636        self.variant.hash(state);
637        self.items.hash(state);
638    }
639}
640
641impl SeqObject {
642    pub fn pretty_print<W>(
643        &self,
644        w: &mut W,
645        prefix: &str,
646        suffix: &str,
647        multiline: bool,
648        show_flag: bool,
649    ) -> fmt::Result
650    where
651        W: fmt::Write,
652    {
653        let (beg, end);
654        let mut extra_comma = "";
655        let multiline = multiline && self.need_multiline(1);
656        let indent = prefix
657            .chars()
658            .skip_while(|ch| *ch == '\n')
659            .take_while(|ch| ch.is_whitespace())
660            .count();
661
662        match self.variant {
663            SeqVariant::Tuple => {
664                beg = "(";
665                end = ")";
666                if self.items.len() == 1 {
667                    extra_comma = ",";
668                }
669            }
670            SeqVariant::List => {
671                beg = "[";
672                end = "]";
673            }
674            SeqVariant::Set => {
675                if self.items.is_empty() {
676                    beg = "set(";
677                    end = ")";
678                } else {
679                    beg = "{";
680                    end = "}";
681                }
682            }
683            SeqVariant::FrozenSet => {
684                if self.items.is_empty() {
685                    beg = "frozenset(";
686                    end = ")";
687                } else {
688                    beg = "frozenset({";
689                    end = "})";
690                }
691            }
692        }
693
694        write!(w, "{prefix}{beg}")?;
695        for (n, v) in self.items.iter().enumerate() {
696            if multiline {
697                if n == 0 {
698                    writeln!(w)?;
699                }
700                v.pretty_print(
701                    w,
702                    &" ".repeat(indent + 2),
703                    ",\n",
704                    true,
705                    true,
706                )?;
707            } else {
708                v.pretty_print(
709                    w,
710                    if n > 0 { ", " } else { "" },
711                    extra_comma,
712                    false,
713                    true,
714                )?;
715            }
716        }
717
718        write!(
719            w, "{:>width$}{end}{}{suffix}",
720            "",
721            format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
722            width=multiline as usize * indent,
723        )
724    }
725
726    fn need_multiline(&self, max_nesting: u8) -> bool {
727        max_nesting == 0 ||
728            self.items.len() > 10 ||
729            self.items.iter().any(|x| x.need_multiline(max_nesting - 1))
730    }
731}
732
733#[derive(Debug, Eq)]
734struct DictObject {
735    items: Vec<(Rc<Object>, Rc<Object>)>,
736
737    #[allow(dead_code)]
738    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
739}
740
741impl PartialEq for DictObject {
742    fn eq(&self, other: &Self) -> bool {
743        self.items == other.items
744    }
745}
746
747impl Hash for DictObject {
748    fn hash<H: Hasher>(&self, state: &mut H) {
749        self.items.hash(state);
750    }
751}
752
753impl DictObject {
754    fn need_multiline(&self, max_nesting: u8) -> bool {
755        max_nesting == 0 ||
756            self.items.iter().any(|(x, y)| x.need_multiline(max_nesting - 1) || y.need_multiline(max_nesting - 1))
757    }
758}
759
760#[derive(Debug, Eq)]
761struct SliceObject {
762    start: Rc<Object>,
763    stop: Rc<Object>,
764    step: Rc<Object>,
765
766    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
767}
768
769impl PartialEq for SliceObject {
770    fn eq(&self, other: &Self) -> bool {
771        self.start == other.start &&
772            self.stop == other.stop &&
773            self.step == other.step
774    }
775}
776
777impl Hash for SliceObject {
778    fn hash<H: Hasher>(&self, state: &mut H) {
779        self.start.hash(state);
780        self.stop.hash(state);
781        self.step.hash(state);
782    }
783}
784
785impl SliceObject {
786    pub fn pretty_print<W>(
787        &self,
788        w: &mut W,
789        prefix: &str,
790        suffix: &str,
791        _multiline: bool,
792        show_flag: bool,
793    ) -> fmt::Result
794    where
795        W: fmt::Write,
796    {
797        self.start.pretty_print(w, &format!("{} slice(", prefix), "", false, true)?;
798        self.stop.pretty_print(w, ", ", "", false, true)?;
799        self.step.pretty_print(w, ", ", "", false, true)?;
800        write!(
801            w, "){}{suffix}",
802            format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()),
803        )
804    }
805}
806
807#[derive(Debug, Eq)]
808struct RefObject {
809    number: u64,
810
811    target: Rc<Object>,
812    flag_num: Option<usize>, // filled in when the object was stored with a flag_ref
813}
814
815impl PartialEq for RefObject {
816    // We really care whether the target is the same.
817    // When writing, we'll dereference the Ref to get to the contents.
818
819    fn eq(&self, other: &Self) -> bool {
820        self.target == other.target
821    }
822}
823
824impl Hash for RefObject {
825    fn hash<H: Hasher>(&self, state: &mut H) {
826        self.target.hash(state);
827    }
828}
829
830impl RefObject {
831    pub fn pretty_print<W>(
832        &self,
833        w: &mut W,
834        prefix: &str,
835        suffix: &str,
836        multiline: bool,
837        show_flag: bool,
838    ) -> fmt::Result
839    where
840        W: fmt::Write,
841    {
842        let prefix = format!("{prefix}(ref to {}){}",
843                             self.number,
844                             format_flag(show_flag, &self.flag_num).unwrap_or("".to_string()));
845        self.target.pretty_print(w, &prefix, suffix, multiline, false)
846    }
847}
848
849#[derive(Debug, Eq)]
850enum Object {
851    Code(CodeObject),
852    Long(BigInt, Option<usize>),
853    Int(u32, Option<usize>),
854    String(StringObject),
855    Seq(SeqObject),
856    Null(Option<usize>),
857    None(Option<usize>),
858    True(Option<usize>),
859    False(Option<usize>),
860    StopIteration(Option<usize>),
861    Ellipsis(Option<usize>),
862    Float(u64, Option<usize>),        // yes, u64, so Rust allows Eq to be implemented
863    Complex(u64, u64, Option<usize>),
864    Dict(DictObject),
865    Slice(SliceObject),
866    Ref(RefObject),
867}
868
869impl PartialEq for Object {
870    fn eq(&self, other: &Self) -> bool {
871        match (self, other) {
872            // For References, we want to actually look at the
873            // reference target, so dereference before comparing.
874            (Object::Ref(v), w) => v.target.deref().eq(w),
875            (v, Object::Ref(w)) => v.eq(w.target.deref()),
876
877            (Object::Code(v), Object::Code(w)) => v == w,
878            (Object::Long(v, _), Object::Long(w, _)) => v == w,
879            (Object::Int(v, _), Object::Int(w, _)) => v == w,
880            (Object::Null(_), Object::Null(_)) => true,
881            (Object::None(_), Object::None(_)) => true,
882            (Object::True(_), Object::True(_)) => true,
883            (Object::False(_),Object::False(_)) => true,
884            (Object::StopIteration(_), Object::StopIteration(_)) => true,
885            (Object::Ellipsis(_), Object::Ellipsis(_)) => true,
886            (Object::Float(v, _), Object::Float(w, _)) => v == w,
887            (Object::Complex(x, y, _), Object::Complex(u, v, _)) => x == u && y == v,
888            (Object::String(v), Object::String(w)) => v == w,
889            (Object::Seq(v), Object::Seq(w)) => v == w,
890            (Object::Dict(v), Object::Dict(w)) => v == w,
891            (Object::Slice(v), Object::Slice(w)) => v == w,
892            _ => false,
893        }
894    }
895}
896
897impl Hash for Object {
898    fn hash<H: Hasher>(&self, state: &mut H) {
899        match self {
900            Object::Code(v) => v.hash(state),
901            Object::String(v) => v.hash(state),
902            Object::Seq(v) => v.hash(state),
903            Object::Ref(v) => v.hash(state),
904            Object::Dict(v) => v.hash(state),
905            Object::Slice(v) => v.hash(state),
906
907            Object::Long(v, _) => v.hash(state),
908            Object::Int(v, _) => v.hash(state),
909            Object::Null(_) => b'0'.hash(state),
910            Object::None(_) => b'N'.hash(state),
911            Object::True(_) => b'T'.hash(state),
912            Object::False(_) => b'F'.hash(state),
913            Object::StopIteration(_) => b'S'.hash(state),
914            Object::Ellipsis(_) => b'.'.hash(state),
915            Object::Float(v, _) => v.hash(state),
916            Object::Complex(x, y, _) => {
917                x.hash(state);
918                y.hash(state);
919            }
920        }
921    }
922}
923
924impl Object {
925    #[allow(clippy::write_literal)]
926    pub fn pretty_print<W>(
927        &self,
928        w: &mut W,
929        prefix: &str,
930        suffix: &str,
931        multiline: bool,
932        show_flag: bool,
933    ) -> fmt::Result
934    where
935        W: fmt::Write,
936    {
937        let (s, flag_num) = match self {
938            Object::Code(v) => {
939                return v.pretty_print(w, prefix, suffix, multiline, show_flag);
940            }
941            Object::String(v) => {
942                return v.pretty_print(w, prefix, suffix, multiline, show_flag);
943            }
944            Object::Seq(v) => {
945                return v.pretty_print(w, prefix, suffix, multiline, show_flag);
946            }
947            Object::Slice(v) => {
948                return v.pretty_print(w, prefix, suffix, multiline, show_flag);
949            }
950            Object::Ref(v) => {
951                return v.pretty_print(w, prefix, suffix, multiline, show_flag);
952            }
953            Object::Dict(_) => todo!(),
954
955            Object::Long(v, flag_num) => (format!("{v}"), flag_num),
956            Object::Int(v, flag_num) => (format!("{v}"), flag_num),
957            Object::Null(flag_num) => ("NULL".to_string(), flag_num),
958            Object::None(flag_num) => ("None".to_string(), flag_num),
959            Object::True(flag_num) => ("True".to_string(), flag_num),
960            Object::False(flag_num) => ("False".to_string(), flag_num),
961            Object::StopIteration(flag_num) => ("StopIteration".to_string(), flag_num),
962            Object::Ellipsis(flag_num) => ("...".to_string(), flag_num),
963            Object::Float(v, flag_num) => (format!("{v}"), flag_num),
964            Object::Complex(x, y, flag_num) => (format!("{x}+{y}j"), flag_num),
965        };
966
967        write!(
968            w, "{prefix}{}{}{suffix}",
969            s,
970            format_flag(show_flag, flag_num).unwrap_or("".to_string())
971        )
972    }
973
974    fn need_multiline(&self, max_nesting: u8) -> bool {
975        match self {
976            Object::Code(..) => true,
977            Object::Ref(..) => false,
978            Object::Slice(..) |
979            Object::Long(..) |
980            Object::Int(..) |
981            Object::Null(..) |
982            Object::None(..) |
983            Object::True(..) |
984            Object::False(..) |
985            Object::StopIteration(..) |
986            Object::Ellipsis(..) |
987            Object::Float(..) |
988            Object::Complex(..) |
989            Object::String(..) => false,
990            Object::Seq(v) => v.need_multiline(max_nesting),
991            Object::Dict(v) => v.need_multiline(max_nesting),
992        }
993    }
994}
995
996pub struct PycParser {
997    input_path: PathBuf,
998    pub version: (u32, u32),
999    header_length: usize,
1000
1001    data: Vec<u8>,      // the whole contents of the input file
1002    read_offset: usize, // index into .data
1003
1004    flag_refs: Vec<Option<Rc<Object>>>, // objects that have been flagged to be referenced
1005}
1006
1007impl PycParser {
1008    pub fn from_file(input_path: &Path, mut input: impl io::Read) -> Result<Self> {
1009        let mut buf = [0; 4];
1010        input.read_exact(&mut buf)?;
1011
1012        let (version, header_length) = pyc_python_version(&buf)?;
1013        debug!("{}: pyc file for Python {}.{}", input_path.display(), version.0, version.1);
1014        if TRACE {
1015            debug!("{}: pyc file header is {} bytes", input_path.display(), header_length);
1016        }
1017
1018        let mut data = Vec::from(&buf);
1019        input.read_to_end(&mut data)?;
1020
1021        if data.len() < header_length {
1022            return Err(super::Error::Other(
1023                format!("pyc file is too short ({} < {})", data.len(), header_length)
1024            ).into());
1025        }
1026
1027        let pyc = PycParser {
1028            input_path: input_path.to_path_buf(),
1029            version,
1030            header_length,
1031            data,
1032            read_offset: header_length,
1033            flag_refs: Vec::new(),
1034        };
1035
1036        let mtime = pyc.py_content_mtime();
1037        // 'size' seems to be the count of serialized objects, excluding TYPE_REF
1038        debug!("{}: from py with mtime={} ({}), size={} bytes, {}",
1039               input_path.display(),
1040               mtime,
1041               chrono::DateTime::from_timestamp(mtime as i64, 0).unwrap(),
1042               pyc.py_content_size(),
1043               match pyc.py_content_hash() {
1044                   None | Some(0) => "no hash invalidation".to_string(),
1045                   Some(hash) => format!("hash={hash}"),
1046               }
1047        );
1048
1049        // TODO: check if .py file exists, and if yes, check if mtime
1050        // read above matches the mtime on the file, and if the size
1051        // read above matches the size of the source file. If not,
1052        // warn that something is awry and Python would rewrite the
1053        // bytecode. Consider adjusting the mtime and hash to match.
1054
1055        Ok(pyc)
1056    }
1057
1058    pub fn py_content_hash(&self) -> Option<u32> {
1059        if self.version < (3, 7) { // The first version supporting PEP 552
1060            None
1061        } else {
1062            match self._read_long_at(4) {
1063                0 => None,  // Let's always map 0 to None.
1064                v => Some(v),
1065            }
1066        }
1067    }
1068
1069    pub fn py_content_mtime(&self) -> u32 {
1070        let offset = if self.version < (3, 7) { 4 } else { 8 };
1071        self._read_long_at(offset)
1072    }
1073
1074    pub fn py_content_size(&self) -> u32 {
1075        let offset = if self.version < (3, 7) { 8 } else { 12 };
1076        self._read_long_at(offset)
1077    }
1078
1079    fn take(&mut self, count: usize) -> Result<usize> {
1080        // This just checks availability and moves the offset.
1081        // The return value points to the beginning of data.
1082
1083        if self.read_offset + count <= self.data.len() {
1084            let offset = self.read_offset;
1085            self.read_offset += count;
1086            Ok(offset)
1087        } else {
1088            Err(super::Error::UnexpectedEOF(self.read_offset as u64, count).into())
1089        }
1090    }
1091
1092    fn _read_byte(&mut self) -> Result<(usize, u8)> {
1093        let offset = self.take(1)?;
1094        Ok((offset, self.data[offset]))
1095    }
1096
1097    fn read_object(&mut self) -> Result<Rc<Object>> {
1098        let flag_num: Option<usize>;
1099        let (offset, mut b) = self._read_byte()?;
1100
1101        if (b & FLAG_REF_BIT) != 0 {
1102            // This object has been flagged for future references. We
1103            // put it on the list of objects which can be referred to
1104            // later by index in the pyc stream.
1105            b &= !FLAG_REF_BIT;
1106
1107            // We reserve the reference index number early.
1108            // We'll put the constructed object into the slot later.
1109            flag_num = Some(self.flag_refs.len());
1110            self.flag_refs.push(None);
1111        } else {
1112            flag_num = None;
1113        }
1114
1115        if TRACE {
1116            debug!("{}:{}/0x{:x}: type {:?}{}",
1117                   self.input_path.display(), offset, offset,
1118                   b as char,
1119                   flag_num.map_or("".to_string(), |n| format!(" 🚩{}", n)),
1120            );
1121        }
1122
1123        let obj = match b {
1124            b'0' => Object::Null(flag_num).into(),
1125            b'N' => Object::None(flag_num).into(),
1126            b'F' => Object::False(flag_num).into(),
1127            b'T' => Object::True(flag_num).into(),
1128            b'.' => Object::Ellipsis(flag_num).into(),
1129            b'S' => Object::StopIteration(flag_num).into(),
1130
1131            b'c'    // CODE
1132                => self.read_codeobject(flag_num)?,
1133            b'g'    // BINARY_FLOAT
1134                => self.read_binary_float(flag_num)?,
1135            b'i'    // INT
1136                => self.read_long(flag_num)?,
1137            b'l'    // LONG
1138                => self.read_py_long(flag_num)?,
1139            b'y'    // BINARY_COMPLEX
1140                => self.read_binary_complex(flag_num)?,
1141
1142            b'r'    // REF
1143                => self.read_ref(flag_num)?,
1144
1145            b'z'    // SHORT_ASCII
1146                => self.read_string(StringVariant::ShortAscii, flag_num)?,
1147            b'Z'    // SHORT_ASCII_INTERNED
1148                => self.read_string(StringVariant::ShortAsciiInterned, flag_num)?,
1149            b's'    // STRING
1150                => self.read_string(StringVariant::String, flag_num)?,
1151            b't'    // INTERNED
1152                => self.read_string(StringVariant::Interned, flag_num)?,
1153            b'u'    // UNICODE
1154                => self.read_string(StringVariant::Unicode, flag_num)?,
1155            b'a'    // ASCII
1156                => self.read_string(StringVariant::Ascii, flag_num)?,
1157            b'A'    // ASCII_INTERNED
1158                => self.read_string(StringVariant::AsciiInterned, flag_num)?,
1159            b')'    // SMALL_TUPLE
1160                => self.read_small_tuple(flag_num)?,
1161            b'('    // TUPLE
1162                => self.read_seq(SeqVariant::Tuple, flag_num)?,
1163            b'['    // LIST
1164                => self.read_seq(SeqVariant::List, flag_num)?,
1165            b'<'    // SET
1166                => self.read_seq(SeqVariant::Set, flag_num)?,
1167            b'>'    // FROZEN_SET
1168                => self.read_seq(SeqVariant::FrozenSet, flag_num)?,
1169            b'{'    // DICT
1170                => self.read_dict(flag_num)?,
1171            b':'    // SLICE
1172                => self.read_slice(flag_num)?,
1173
1174            b'I' |  // INT64
1175            b'f' |  // FLOAT
1176            b'x' |  // COMPLEX
1177            b'?'    // UNKNOWN
1178                => {
1179                    return Err(super::Error::Other(
1180                        format!("{}:{}/0x{:x}: unimplemented object type {}/'{}'",
1181                                self.input_path.display(), offset, offset,
1182                                b, b as char)
1183                    ).into());
1184                },
1185            _
1186                => {
1187                    return Err(super::Error::Other(
1188                        format!("{}:{}/0x{:x}: unknown object type {}/'{}'",
1189                                self.input_path.display(), offset, offset,
1190                                b, b as char)
1191                    ).into());
1192                },
1193        };
1194
1195        if TRACE {
1196            dbg!(&obj);
1197        }
1198
1199        if let Some(flag_num) = flag_num {
1200            assert!(self.flag_refs[flag_num].is_none());
1201            self.flag_refs[flag_num] = Some(obj.clone());
1202        }
1203
1204        Ok(obj)
1205    }
1206
1207    fn _maybe_read_long(&mut self, cond: bool) -> Result<Option<u32>> {
1208        Ok(if cond { Some(self._read_long()?) } else { None })
1209    }
1210
1211    fn maybe_read_object(&mut self, cond: bool) -> Result<Option<Rc<Object>>> {
1212        Ok(if cond {
1213            Some(self.read_object()?)
1214        } else {
1215            None
1216        })
1217    }
1218
1219    fn read_codeobject(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1220        Ok(Object::Code(CodeObject {
1221            argcount: self._read_long()?,
1222            posonlyargcount: self._maybe_read_long(self.version >= (3, 8))?,
1223            kwonlyargcount: self._read_long()?,
1224            nlocals: self._maybe_read_long(self.version < (3, 11))?,
1225            stacksize: self._read_long()?,
1226            flags: self._read_long()?,
1227            code: self.read_object()?,
1228            consts: self.read_object()?,
1229            names: self.read_object()?,
1230            varnames: self.maybe_read_object(self.version < (3, 11))?,
1231            freevars: self.maybe_read_object(self.version < (3, 11))?,
1232            cellvars: self.maybe_read_object(self.version < (3, 11))?,
1233            localsplusnames: self.maybe_read_object(self.version >= (3, 11))?,
1234            localspluskinds: self.maybe_read_object(self.version >= (3, 11))?,
1235            filename: self.read_object()?,
1236            name: self.read_object()?,
1237            qualname: self.maybe_read_object(self.version >= (3, 11))?,
1238            firstlineno: self._read_long()?,
1239            linetable: self.read_object()?,
1240            exceptiontable: self.maybe_read_object(self.version >= (3, 11))?,
1241            flag_num,
1242        }).into())
1243    }
1244
1245    fn _read_long_at(&self, offset: usize) -> u32 {
1246        let bytes = &self.data[offset .. offset + 4];
1247        u32::from_le_bytes(bytes.try_into().unwrap())
1248    }
1249
1250    fn _read_long(&mut self) -> Result<u32> {
1251        let offset = self.take(4)?;
1252        Ok(self._read_long_at(offset))
1253    }
1254
1255    fn _read_long_signed(&mut self) -> Result<i32> {
1256        let offset = self.take(4)?;
1257        let bytes = &self.data[offset .. offset + 4];
1258        Ok(i32::from_le_bytes(bytes.try_into().unwrap()))
1259    }
1260
1261    fn read_long(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1262        Ok(Object::Int(self._read_long()?, flag_num).into())
1263    }
1264
1265    fn _read_short(&mut self) -> Result<i32> {
1266        let offset = self.take(2)?;
1267
1268        let x = (self.data[offset] as i32) + ((self.data[offset + 1] as i32) << 8);
1269        // Sign-extension, in case short greater than 16 bits
1270        Ok(x | -(x & 0x8000))
1271    }
1272
1273    fn read_py_long(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1274        let n = self._read_long_signed()?;
1275
1276        let mut result = 0_i32.to_bigint().unwrap();
1277        for i in 0 .. n.abs() {
1278            let part = self._read_short()?;
1279            result += part.to_bigint().unwrap() << (i * PYLONG_MARSHAL_SHIFT) as usize;
1280        }
1281
1282        Ok(Object::Long(result * n.signum(), flag_num).into())
1283    }
1284
1285    fn read_string(&mut self, variant: StringVariant, flag_num: Option<usize>) -> Result<Rc<Object>> {
1286        let size = match variant {
1287            // short == size is stored as one byte
1288            StringVariant::ShortAscii |
1289            StringVariant::ShortAsciiInterned
1290                => self._read_byte()?.1 as usize,
1291            // non-short == size is stored as long (4 bytes)
1292            StringVariant::String |
1293            StringVariant::Interned |
1294            StringVariant::Unicode |
1295            StringVariant::Ascii |
1296            StringVariant::AsciiInterned
1297                => self._read_long()? as usize,
1298        };
1299
1300        let offset = self.take(size)?;
1301        Ok(Object::String(StringObject {
1302            variant,
1303            bytes: self.data[offset .. offset + size].to_vec(),
1304            flag_num,
1305        }).into())
1306    }
1307
1308    fn _read_tuple(&mut self, variant: SeqVariant, size: u64, flag_num: Option<usize>) -> Result<Rc<Object>> {
1309        let mut items = Vec::new();
1310        for _ in 0..size {
1311            items.push(self.read_object()?);
1312        }
1313
1314        Ok(Object::Seq(SeqObject { variant, items, flag_num }).into())
1315    }
1316
1317    fn read_small_tuple(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1318        // small tuple — size is only one byte
1319        let size = self._read_byte()?.1;
1320        self._read_tuple(SeqVariant::Tuple, size as u64, flag_num)
1321    }
1322
1323    fn read_seq(&mut self, variant: SeqVariant, flag_num: Option<usize>) -> Result<Rc<Object>> {
1324        let size = self._read_long()?;
1325        self._read_tuple(variant, size as u64, flag_num)
1326    }
1327
1328    fn read_ref(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1329        let index = self._read_long()?;
1330
1331        // Is this a valid reference to one of the already-flagged objects?
1332        if index as usize >= self.flag_refs.len() {
1333            return Err(super::Error::Other(
1334                format!("{}:{}/0x{:x}: bad reference to flag_ref {} (have {})",
1335                        self.input_path.display(), self.read_offset, self.read_offset,
1336                        index, self.flag_refs.len())
1337            ).into());
1338        }
1339
1340        let target = match &self.flag_refs[index as usize] {
1341            None => {
1342                return Err(super::Error::Other(
1343                    format!("{}:{}/0x{:x}: bad reference to flag_ref {} (reference from within)",
1344                            self.input_path.display(), self.read_offset, self.read_offset,
1345                            index)
1346                ).into());
1347            }
1348            Some(v) => v
1349        };
1350
1351        Ok(Object::Ref(RefObject {
1352            number: index as u64,
1353            target: target.clone(),
1354            flag_num,
1355        }).into())
1356    }
1357
1358    fn _read_binary_float(&mut self) -> Result<f64> {
1359        let offset = self.take(8)?;
1360        let bytes = &self.data[offset .. offset + 8];
1361        Ok(f64::from_le_bytes(bytes.try_into().unwrap()))
1362    }
1363
1364    fn read_binary_float(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1365        Ok(Object::Float(
1366            self._read_binary_float()?.to_bits(),
1367            flag_num,
1368        ).into())
1369    }
1370
1371    fn read_binary_complex(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1372        Ok(Object::Complex(
1373            self._read_binary_float()?.to_bits(),
1374            self._read_binary_float()?.to_bits(),
1375            flag_num,
1376        ).into())
1377    }
1378
1379    fn read_dict(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1380        let mut items = Vec::new();
1381
1382        loop {
1383            let key = self.read_object()?;
1384            if let Object::Null(..) = *key {
1385                break;
1386            }
1387
1388            let value = self.read_object()?;
1389            items.push((key, value));
1390        }
1391
1392        Ok(Object::Dict(DictObject { items, flag_num } ).into())
1393    }
1394
1395    fn read_slice(&mut self, flag_num: Option<usize>) -> Result<Rc<Object>> {
1396        let start = self.read_object()?;
1397        let stop = self.read_object()?;
1398        let step = self.read_object()?;
1399
1400        Ok(Object::Slice(SliceObject { start, stop, step, flag_num } ).into())
1401    }
1402
1403    fn set_zero_mtime(&mut self) -> Result<bool> {
1404        // Set the embedded mtime timestamp of the source .py file to 0 in the header.
1405
1406        if self.py_content_mtime() == 0 {
1407            return Ok(false);
1408        }
1409
1410        let offset = if self.version < (3, 7) { 4 } else { 8 };
1411        self.data[offset..offset+4].fill(0);
1412        assert!(self.py_content_mtime() == 0);
1413
1414        Ok(true)
1415    }
1416}
1417
1418type SeenState = (usize, usize, RefCell<Option<usize>>);
1419
1420struct PycWriter {
1421    buffer: Vec<u8>,
1422    seen: HashMap<Rc<Object>, SeenState>, // map object -> (offset, reference count, flag_num)
1423    flag_num: usize,
1424    refs_to_fix: HashMap<usize, Rc<Object>>, // map offsets to ref -> object
1425    entry_count: usize,
1426}
1427
1428impl PycWriter {
1429    fn new(header: &[u8]) -> Self {
1430        Self {
1431            buffer: Vec::from(header),
1432            seen: HashMap::new(),
1433            flag_num: 0,
1434            refs_to_fix: HashMap::new(),
1435            entry_count: 0,
1436        }
1437    }
1438
1439    fn to_buffer(parser: &PycParser, code: &Rc<Object>) -> Vec<u8> {
1440        // Copy the header from original file
1441        let mut w = PycWriter::new(
1442            &parser.data[..parser.header_length],
1443        );
1444
1445        w.write_object(code);
1446        w.add_ref_flags();
1447        w.fix_refs();
1448
1449        w.buffer
1450    }
1451
1452    fn write_object(&mut self, object: &Rc<Object>) {
1453        if let Object::Ref(v) = &**object {
1454            self.write_object(&v.target);
1455
1456        } else if self.seen.contains_key(object) {
1457            if TRACE {
1458                debug!("Referencing {:?} -> {:?}", object, self.seen[object]);
1459            }
1460
1461            self.seen.entry(object.clone()).and_modify(|tup| tup.1 += 1);
1462            self.write_ref(object.clone());
1463
1464        } else {
1465            let offset = self.buffer.len();
1466            self.entry_count += 1;
1467
1468            match &**object {
1469                // Those end up in the index
1470                Object::Code(v) => {
1471                    self.write_code(v);
1472                },
1473                Object::String(v) => {
1474                    self.write_string(v);
1475                },
1476                Object::Seq(v) => {
1477                    self.write_seq(v);
1478                }
1479                Object::Slice(v) => {
1480                    self.write_slice(v);
1481                }
1482                Object::Dict(_) => todo!(),
1483                // mind null termination!
1484
1485                Object::Long(v, _) => {
1486                    self.write_long(v);
1487                }
1488                Object::Int(v, _) => {
1489                    self.write_int(*v);
1490                }
1491                Object::Float(v, _) => {
1492                    self.write_binary_float(*v);
1493                }
1494                Object::Complex(x, y, _) => {
1495                    self.write_binary_complex(*x, *y);
1496                }
1497
1498                // Those are not in the index. The reference takes as
1499                // many bytes or more to write as the object itself.
1500                Object::Ref(_) => {
1501                    panic!(); // already handled above.
1502                }
1503                Object::Null(_) => {
1504                    return self.buffer.push(b'0');
1505                }
1506                Object::None(_) => {
1507                    return self.buffer.push(b'N');
1508                }
1509                Object::False(_) =>  {
1510                    return self.buffer.push(b'F');
1511                }
1512                Object::True(_) =>  {
1513                    return self.buffer.push(b'T');
1514                }
1515                Object::StopIteration(_) =>  {
1516                    return self.buffer.push(b'S');
1517                }
1518                Object::Ellipsis(_) =>  {
1519                    return self.buffer.push(b'.');
1520                }
1521            }
1522
1523            self.seen.insert(object.clone(), (offset, 0, None.into()));
1524        }
1525    }
1526
1527    fn maybe_write_object(&mut self, object: &Option<Rc<Object>>) {
1528        if let Some(object) = object {
1529            self.write_object(object);
1530        }
1531    }
1532
1533    fn write_code(&mut self, code: &CodeObject) {
1534        self.buffer.push(b'c');
1535
1536        // When reading, the list of fields that are read depends on
1537        // the version. In the opposite direction, we skip fields
1538        // which are None and write anything that is Some. We write
1539        // the bytecode in the same version that was read. If this is
1540        // ever changed, we'd need to conditonalize here similarly as
1541        // when reading.
1542
1543        self._write_int(code.argcount);
1544        self._maybe_write_int(code.posonlyargcount);
1545        self._write_int(code.kwonlyargcount);
1546        self._maybe_write_int(code.nlocals);
1547        self._write_int(code.stacksize);
1548        self._write_int(code.flags);
1549        self.write_object(&code.code);
1550        self.write_object(&code.consts);
1551        self.write_object(&code.names);
1552        self.maybe_write_object(&code.varnames);
1553        self.maybe_write_object(&code.freevars);
1554        self.maybe_write_object(&code.cellvars);
1555
1556        self.maybe_write_object(&code.localsplusnames);
1557        self.maybe_write_object(&code.localspluskinds);
1558
1559        self.write_object(&code.filename);
1560        self.write_object(&code.name);
1561
1562        self.maybe_write_object(&code.qualname);
1563
1564        self._write_int(code.firstlineno);
1565
1566        self.write_object(&code.linetable);
1567        self.maybe_write_object(&code.exceptiontable);
1568    }
1569
1570    fn write_string(&mut self, string: &StringObject) {
1571        self.buffer.push(
1572            match string.variant {
1573                StringVariant::ShortAscii         => b'z',
1574                StringVariant::ShortAsciiInterned => b'Z',
1575                StringVariant::String             => b's',
1576                StringVariant::Interned           => b't',
1577                StringVariant::Unicode            => b'u',
1578                StringVariant::Ascii              => b'a',
1579                StringVariant::AsciiInterned      => b'A',
1580            }
1581        );
1582
1583        let len = string.bytes.len();
1584        match string.variant {
1585            // short == size is stored as one byte
1586            StringVariant::ShortAscii |
1587            StringVariant::ShortAsciiInterned => {
1588                self.buffer.push(len as u8);
1589            }
1590            // non-short == size is stored as long (4 bytes)
1591            StringVariant::String |
1592            StringVariant::Interned |
1593            StringVariant::Unicode |
1594            StringVariant::Ascii |
1595            StringVariant::AsciiInterned => {
1596                self._write_int(len as u32);
1597            }
1598        };
1599
1600        self.buffer.extend_from_slice(&string.bytes);
1601    }
1602
1603    fn write_seq(&mut self, seq: &SeqObject) {
1604        let len = seq.items.len();
1605        let byte = match seq.variant {
1606            SeqVariant::Tuple => {
1607                if len < 256 {
1608                    b')'  // SMALL_TUPLE
1609                } else {
1610                    b'('  // TUPLE
1611                }
1612            }
1613            SeqVariant::List      => b'[',
1614            SeqVariant::Set       => b'<',
1615            SeqVariant::FrozenSet => b'>',
1616        };
1617
1618        self.buffer.push(byte);
1619
1620        if byte == b')' {
1621            self.buffer.push(len as u8);
1622        } else {
1623            self._write_int(len as u32);
1624        }
1625
1626        for item in seq.items.iter() {
1627            self.write_object(item);
1628        }
1629    }
1630
1631    fn write_slice(&mut self, slice: &SliceObject) {
1632        self.buffer.push(b':');
1633        self.write_object(&slice.start);
1634        self.write_object(&slice.stop);
1635        self.write_object(&slice.step);
1636    }
1637
1638    fn _write_int(&mut self, int: u32) {
1639        let bytes = int.to_le_bytes();
1640        self.buffer.extend_from_slice(&bytes);
1641    }
1642
1643    fn _write_signed_int(&mut self, int: i32) {
1644        let bytes = int.to_le_bytes();
1645        self.buffer.extend_from_slice(&bytes);
1646    }
1647
1648    fn _maybe_write_int(&mut self, int: Option<u32>) {
1649        if let Some(int) = int {
1650            self._write_int(int);
1651        }
1652    }
1653
1654    fn write_int(&mut self, int: u32) {
1655        self.buffer.push(b'i');
1656        self._write_int(int);
1657    }
1658
1659    fn _write_short(&mut self, int: u16) {
1660        let bytes = int.to_le_bytes();
1661        self.buffer.extend_from_slice(&bytes);
1662    }
1663
1664    fn write_long(&mut self, long: &BigInt) {
1665        self.buffer.push(b'l');
1666
1667        let n = long.bits().div_ceil(PYLONG_MARSHAL_SHIFT as usize);
1668        let sign = if *long < BigInt::zero() { -1i32 } else { 1i32 };
1669
1670        self._write_signed_int(n as i32 * sign);
1671
1672        let mut val = long.abs();
1673        let div = BigInt::from(1u16 << PYLONG_MARSHAL_SHIFT);
1674        for _ in 0 .. n {
1675            let (q, r) = val.div_rem(&div);
1676            self._write_short(r.to_u16().unwrap());
1677            val = q;
1678        }
1679        assert!(val.is_zero());
1680    }
1681
1682    fn _write_binary_float(&mut self, float: u64) {
1683        let bytes = f64::from_bits(float).to_le_bytes();
1684        self.buffer.extend_from_slice(&bytes);
1685    }
1686
1687    fn write_binary_float(&mut self, float: u64) {
1688        self.buffer.push(b'g');
1689        self._write_binary_float(float);
1690    }
1691
1692    fn write_binary_complex(&mut self, x: u64, y: u64) {
1693        self.buffer.push(b'y');
1694        self._write_binary_float(x);
1695        self._write_binary_float(y);
1696    }
1697
1698    fn write_ref(&mut self, target: Rc<Object>) {
1699        let offset = self.buffer.len();
1700        self.buffer.push(b'r');
1701        self._write_int(0);  // We'll fix the reference number later
1702        self.refs_to_fix.insert(offset, target);
1703    }
1704
1705    fn add_ref_flags(&mut self) {
1706        let mut keys: Vec<_> = self.seen.keys().collect();
1707        keys.sort_by_key(|&e| self.seen[e].0);
1708
1709        for entry in keys {
1710            let (offset, count, index) = &self.seen[entry];
1711            assert!(index.borrow().is_none());
1712
1713            if *count > 0 {
1714                let orig = self.buffer[*offset];
1715                if TRACE {
1716                    debug!("Flagged {:?}, offset {}/{:x}, adding flag #{} ({} refs)",
1717                           entry, offset, offset, self.flag_num, count);
1718                }
1719
1720                assert!("0NFT.ScgilyrzZstuaA)([<>{:".contains(orig as char));
1721                self.buffer[*offset] |= FLAG_REF_BIT;
1722
1723                index.replace(Some(self.flag_num));
1724
1725                self.flag_num += 1;
1726            }
1727        }
1728    }
1729
1730    fn fix_refs(&mut self) {
1731        for (offset, target) in &self.refs_to_fix {
1732            let (target_offset, count, index) = &self.seen[target];
1733            if TRACE {
1734                debug!("Ref at offset {}, setting target {}/0x{:x} {:?} #{:?} ({} refs)",
1735                       offset, target_offset, target_offset, target, index, count);
1736            }
1737            assert!(*count > 0);
1738            let index = index.borrow().unwrap();
1739            assert!(index < self.flag_num);
1740            assert!(offset > target_offset);
1741
1742            assert!(self.buffer[*offset] == b'r');
1743            let bytes = &mut self.buffer[offset + 1 .. offset + 5];
1744            assert!(bytes == [0; 4]);
1745            bytes.copy_from_slice(&(index as u32).to_le_bytes());
1746        }
1747    }
1748}
1749
1750
1751pub struct Pyc {
1752    config: Arc<config::Config>,
1753}
1754
1755impl Pyc {
1756    pub fn new(config: &Arc<config::Config>) -> Self {
1757        Self { config: config.clone() }
1758    }
1759
1760    pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
1761        Box::new(Self::new(config))
1762    }
1763}
1764
1765impl super::Processor for Pyc {
1766    fn name(&self) -> &str {
1767        "pyc"
1768    }
1769
1770    fn filter(&self, path: &Path) -> Result<bool> {
1771        Ok(self.config.ignore_extension || path.extension().is_some_and(|x| x == "pyc"))
1772    }
1773
1774    fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
1775        let (mut io, input) = InputOutputHelper::open(input_path, self.config.check, true)?;
1776
1777        let mut parser = PycParser::from_file(input_path, input)?;
1778        if parser.version < (3, 0) {
1779            return Ok(super::ProcessResult::Noop);  // We don't want to touch python2 files
1780        }
1781
1782        let code = parser.read_object()?;
1783
1784        let trailing = parser.data.len() - parser.read_offset;
1785        if trailing > 0 {
1786            warn!("{}: found trailing garbage ({} bytes)", input_path.display(), trailing);
1787        }
1788
1789        let new = PycWriter::to_buffer(&parser, &code);
1790        let have_mod = new != parser.data;
1791
1792        if have_mod {
1793            io.open_output(false)?;
1794            io.output.as_mut().unwrap().as_file_mut().write_all(&new)?;
1795        }
1796
1797        io.finalize(have_mod)
1798    }
1799}
1800
1801impl Pyc {
1802    pub fn pretty_print<W>(&self, writer: &mut W, input_path: &Path) -> Result<()>
1803    where
1804        W: fmt::Write,
1805    {
1806        let input = File::open(input_path)
1807            .with_context(|| format!("Cannot open {input_path:?}"))?;
1808        let mut parser = PycParser::from_file(input_path, input)?;
1809
1810        let obj = parser.read_object()?;
1811
1812        obj.pretty_print(writer, "", "\n", true, true)?;
1813
1814        Ok(())
1815    }
1816}
1817
1818pub struct PycZeroMtime {
1819    config: Arc<config::Config>,
1820}
1821
1822impl PycZeroMtime {
1823    pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
1824        Box::new(Self { config: config.clone() })
1825    }
1826
1827    fn set_zero_mtime_on_py_file(&self, input_path: &Path) -> Result<()> {
1828        let input_file_name = unwrap_os_string(input_path.file_name().unwrap())?;
1829        let base = input_file_name.split('.').nth(0).unwrap();
1830        let py_path = input_path.with_file_name(format!("{base}.py"));
1831        debug!("Looking at {}…", py_path.display());
1832
1833        let py_file = match File::open(&py_path) {
1834            Ok(some) => some,
1835            Err(e) => {
1836                if e.kind() == io::ErrorKind::NotFound {
1837                    debug!("{}: not found, ignoring", py_path.display());
1838                    return Ok(());
1839                } else {
1840                    bail!("{}: cannot open: {}", py_path.display(), e);
1841                }
1842            }
1843        };
1844
1845        let orig = py_file.metadata()?;
1846        if !orig.file_type().is_file() {
1847            debug!("{}: not a file, ignoring", py_path.display());
1848        } else if orig.modified()? == time::UNIX_EPOCH {
1849            debug!("{}: mtime is already 0", py_path.display());
1850        } else if self.config.check {
1851            debug!("{}: not touching mtime in --check mode", py_path.display());
1852        } else {
1853            py_file.set_modified(time::UNIX_EPOCH)?;
1854            debug!("{}: mtime set to 0", py_path.display());
1855        }
1856
1857        Ok(())
1858    }
1859}
1860
1861impl super::Processor for PycZeroMtime {
1862    fn name(&self) -> &str {
1863        "pyc-zero-mtime"
1864    }
1865
1866    fn filter(&self, path: &Path) -> Result<bool> {
1867        Ok(self.config.ignore_extension || path.extension().is_some_and(|x| x == "pyc"))
1868    }
1869
1870    fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
1871        let (mut io, input) = InputOutputHelper::open(input_path, self.config.check, false)?;
1872
1873        let mut parser = PycParser::from_file(input_path, input)?;
1874        let have_mod = parser.set_zero_mtime()?;
1875
1876        if have_mod {
1877            io.open_output(false)?;
1878            io.output.as_mut().unwrap().as_file_mut().write_all(&parser.data)?;
1879        }
1880
1881        let res = io.finalize(have_mod)?;
1882
1883        if have_mod {
1884            self.set_zero_mtime_on_py_file(input_path)?;
1885        }
1886
1887        Ok(res)
1888    }
1889}
1890
1891
1892#[cfg(test)]
1893mod tests {
1894    use std::hash::{DefaultHasher, Hasher};
1895    use super::*;
1896
1897    #[test]
1898    fn filter_a() {
1899        let cfg = config::Config::empty(0, false).into();
1900        let h = Pyc::boxed(&cfg);
1901
1902        assert!( h.filter(Path::new("/some/path/foobar.pyc")).unwrap());
1903        assert!(!h.filter(Path::new("/some/path/foobar.apyc")).unwrap());
1904        assert!( h.filter(Path::new("/some/path/foobar.opt-2.pyc")).unwrap());
1905        assert!(!h.filter(Path::new("/some/path/foobar")).unwrap());
1906        assert!(!h.filter(Path::new("/some/path/pyc")).unwrap());
1907        assert!(!h.filter(Path::new("/some/path/pyc_pyc")).unwrap());
1908        assert!(!h.filter(Path::new("/")).unwrap());
1909    }
1910
1911    #[test]
1912    fn seq_string_equality() {
1913        let seq1 = Object::Seq(
1914            SeqObject {
1915                variant: SeqVariant::FrozenSet,
1916                items: [
1917                    Object::String(
1918                        StringObject {
1919                            variant: StringVariant::ShortAsciiInterned,
1920                            bytes: [104, 116, 116, 112].to_vec(),
1921                            flag_num: Some(43),
1922                        }
1923                    ).into(),
1924                    Object::String(
1925                        StringObject {
1926                            variant: StringVariant::ShortAsciiInterned,
1927                            bytes: [104, 116, 116, 112, 115].to_vec(),
1928                            flag_num: Some(44),
1929                        }
1930                    ).into(),
1931                ].to_vec(),
1932                flag_num: None,
1933            }
1934        );
1935        let seq2 = Object::Seq(
1936            SeqObject {
1937                variant: SeqVariant::FrozenSet,
1938                items: [
1939                    Object::String(
1940                        StringObject {
1941                            variant: StringVariant::ShortAsciiInterned,
1942                            bytes: [104, 116, 116, 112].to_vec(),
1943                            flag_num: None,
1944                        }
1945                    ).into(),
1946                    Object::String(
1947                        StringObject {
1948                            variant: StringVariant::ShortAsciiInterned,
1949                            bytes: [104, 116, 116, 112, 115].to_vec(),
1950                            flag_num: None,
1951                        }
1952                    ).into(),
1953                ].to_vec(),
1954                flag_num: Some(43),
1955            }
1956        );
1957
1958        assert!(seq1 == seq1);
1959        assert!(seq2 == seq2);
1960        assert!(seq1 == seq2);
1961        assert!(seq2 == seq1);
1962
1963        let mut hash1 = DefaultHasher::new();
1964        seq1.hash(&mut hash1);
1965
1966        let mut hash2 = DefaultHasher::new();
1967        seq2.hash(&mut hash2);
1968
1969        assert!(hash1.finish() == hash2.finish());
1970    }
1971
1972    #[test]
1973    fn seq_ref_equality() {
1974        let obj1 = Object::Ref(
1975            RefObject {
1976                number: 43,
1977                target: Object::String(
1978                    StringObject {
1979                        variant: StringVariant::ShortAsciiInterned,
1980                        bytes: [104, 116, 116, 112].to_vec(),
1981                        flag_num: Some(43),
1982                    },
1983                ).into(),
1984                flag_num: Some(99),
1985            }
1986        );
1987        let obj2 = Object::String(
1988            StringObject {
1989                variant: StringVariant::ShortAsciiInterned,
1990                bytes: [104, 116, 116, 112].to_vec(),
1991                flag_num: None,
1992            },
1993        );
1994
1995        assert!(obj1 == obj1);
1996        assert!(obj2 == obj2);
1997        assert!(obj1 == obj2);
1998        assert!(obj2 == obj1);
1999    }
2000}