Skip to main content

pcode_ir/
lib.rs

1//! P-code intermediate representation types.
2//!
3//! Zero-dependency crate defining [`PcodeOp`] and [`Varnode`] — the types
4//! emitted by SLEIGH-generated instruction decoders.
5
6#![no_std]
7
8extern crate alloc;
9use alloc::collections::{BTreeMap, BTreeSet};
10use alloc::string::String;
11use alloc::vec;
12use alloc::vec::Vec;
13
14/// Identifies an address space in the P-code model.
15///
16/// `Ord` is derived for use as `BTreeMap` keys in the peephole optimizer (no_std
17/// precludes `HashMap`). The derived ordering is arbitrary and should not be relied
18/// upon for semantic comparisons.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
20pub enum AddressSpaceId {
21    /// CPU registers (offset = Ghidra register offset).
22    Register,
23    /// Main memory / RAM.
24    Ram,
25    /// Temporary storage (unique per instruction lift).
26    Unique,
27    /// Constants (offset = the constant value itself).
28    Const,
29}
30
31/// A triple (space, offset, size) identifying a storage location or constant.
32///
33/// `Ord` is derived for use as `BTreeMap` keys in the peephole optimizer.
34/// The derived lexicographic ordering is not semantically meaningful.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
36pub struct Varnode {
37    pub space: AddressSpaceId,
38    pub offset: u64,
39    /// Size in bytes.
40    pub size: u32,
41}
42
43impl Varnode {
44    /// A CPU register at the given Ghidra offset.
45    #[inline]
46    pub fn register(offset: u64, size: u32) -> Self {
47        Self {
48            space: AddressSpaceId::Register,
49            offset,
50            size,
51        }
52    }
53
54    /// A unique (temporary) varnode.
55    #[inline]
56    pub fn unique(offset: u64, size: u32) -> Self {
57        Self {
58            space: AddressSpaceId::Unique,
59            offset,
60            size,
61        }
62    }
63
64    /// A RAM location.
65    #[inline]
66    pub fn ram(offset: u64, size: u32) -> Self {
67        Self {
68            space: AddressSpaceId::Ram,
69            offset,
70            size,
71        }
72    }
73
74    /// A constant value encoded as a varnode.
75    #[inline]
76    pub fn constant(value: u64, size: u32) -> Self {
77        Self {
78            space: AddressSpaceId::Const,
79            offset: value,
80            size,
81        }
82    }
83}
84
85/// Provenance metadata identifying which SLEIGH constructor produced an
86/// `Instruction`. Audit P2 #3 — gives the analyst a way to trace
87/// surprising P-code back to its source rule when debugging lifter
88/// divergences. `None` means the producing decoder pre-dated this
89/// metadata (legacy generated crates) or the decoder declined to emit it.
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct ConstructorSpan {
92    /// Numeric constructor index assigned by the SLEIGH compiler.
93    pub constructor_id: u32,
94    /// Numeric table id the constructor belongs to.
95    pub table_id: u32,
96    /// Free-form source location string (file:line) emitted by codegen.
97    /// Empty string when the codegen did not record one.
98    pub source: &'static str,
99}
100
101/// A decoded and lifted instruction.
102#[derive(Debug, Clone)]
103pub struct Instruction {
104    /// Number of bytes consumed by this instruction.
105    pub len: u64,
106    /// Human-readable disassembly (e.g. "MOV RAX,RBX").
107    pub disassembly: String,
108    /// P-code operations (peephole-optimized).
109    pub ops: Vec<PcodeOp>,
110    /// Optional SLEIGH constructor that emitted this instruction.
111    /// Default `None` for backward compatibility with generated crates
112    /// that don't yet populate this field.
113    pub constructor: Option<ConstructorSpan>,
114}
115
116impl Instruction {
117    /// Construct an instruction without constructor provenance —
118    /// preserves call sites that pre-date the `constructor` field.
119    pub fn new(len: u64, disassembly: String, ops: Vec<PcodeOp>) -> Self {
120        Self {
121            len,
122            disassembly,
123            ops,
124            constructor: None,
125        }
126    }
127}
128
129/// Decode error returned when an instruction cannot be parsed.
130#[derive(Debug, Clone)]
131pub enum DecodeError {
132    /// The byte sequence does not match any known instruction encoding.
133    UnknownInstruction,
134}
135
136impl core::fmt::Display for DecodeError {
137    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
138        match self {
139            DecodeError::UnknownInstruction => write!(f, "unknown instruction encoding"),
140        }
141    }
142}
143
144/// Offset all Unique-space varnode offsets in an op by the given amount.
145/// Used to avoid unique offset collisions when combining P-code from multiple subtables.
146pub fn offset_unique_varnodes(op: &mut PcodeOp, offset: u64) {
147    // Offset outputs
148    if let Some(out) = get_output_mut(op) {
149        if out.space == AddressSpaceId::Unique {
150            out.offset += offset;
151        }
152    }
153    // Offset inputs
154    visit_reads_mut(op, &mut |v| {
155        if v.space == AddressSpaceId::Unique {
156            v.offset += offset;
157        }
158    });
159}
160
161/// Peephole-optimize a P-code op sequence:
162/// - Remove identity `Subpiece { lsb: 0 }` where input.size == out.size
163/// - Forward-substitute `Copy` chains (A=B, C=A → C=B) when the
164///   intermediate is a unique varnode used only once after its definition
165pub fn optimize(ops: &mut Vec<PcodeOp>) {
166    // Run passes until fixpoint (later passes create opportunities for earlier ones)
167    for _round in 0..4 {
168        let before = ops.len();
169        optimize_once(ops);
170        if ops.len() == before {
171            break;
172        }
173    }
174}
175
176fn optimize_once(ops: &mut Vec<PcodeOp>) {
177    // Pass 0: constant folding — IntZext/IntSext of constants
178    for op in ops.iter_mut() {
179        match op {
180            PcodeOp::IntZext { out, input } if input.space == AddressSpaceId::Const => {
181                *op = PcodeOp::Copy {
182                    out: *out,
183                    input: Varnode::constant(input.offset, out.size),
184                };
185            }
186            PcodeOp::IntSext { out, input } if input.space == AddressSpaceId::Const => {
187                // Sign-extend: if the high bit of input is set, fill upper bits
188                let val = input.offset;
189                let in_bits = (input.size as u64) * 8;
190                let extended = if in_bits < 64 && (val >> (in_bits - 1)) & 1 != 0 {
191                    val | (!0u64 << in_bits)
192                } else {
193                    val
194                };
195                *op = PcodeOp::Copy {
196                    out: *out,
197                    input: Varnode::constant(extended, out.size),
198                };
199            }
200            // Shift by zero → Copy
201            PcodeOp::IntLsr { out, left, right }
202            | PcodeOp::IntLsl { out, left, right }
203            | PcodeOp::IntAsr { out, left, right }
204                if right.space == AddressSpaceId::Const && right.offset == 0 =>
205            {
206                *op = PcodeOp::Copy {
207                    out: *out,
208                    input: *left,
209                };
210            }
211            // OR with zero → Copy
212            PcodeOp::IntOr { out, left, right }
213                if right.space == AddressSpaceId::Const && right.offset == 0 =>
214            {
215                *op = PcodeOp::Copy {
216                    out: *out,
217                    input: *left,
218                };
219            }
220            PcodeOp::IntOr { out, left, right }
221                if left.space == AddressSpaceId::Const && left.offset == 0 =>
222            {
223                *op = PcodeOp::Copy {
224                    out: *out,
225                    input: *right,
226                };
227            }
228            // AND with all-ones (for the size) → Copy
229            PcodeOp::IntAnd { out, left, right }
230                if right.space == AddressSpaceId::Const
231                    && right.offset == all_ones_mask(out.size) =>
232            {
233                *op = PcodeOp::Copy {
234                    out: *out,
235                    input: *left,
236                };
237            }
238            _ => {}
239        }
240    }
241
242    // Pass 1: eliminate identity Subpiece
243    for op in ops.iter_mut() {
244        if let PcodeOp::Subpiece { out, input, lsb: 0 } = op {
245            if out.size == input.size {
246                *op = PcodeOp::Copy {
247                    out: *out,
248                    input: *input,
249                };
250            }
251        }
252    }
253
254    // Pass 1b: collapse `Subpiece{Const(v, big), lsb=0}` → `Copy{Const(v', out.size)}`.
255    // ARM32 `mov rN, #imm8` lifts to a size-8 Const followed by Subpiece-to-4;
256    // Ghidra emits a single Copy of a size-4 Const. Mask the value to the
257    // output width so the truncation is explicit and the Const carries the
258    // correct size for downstream consumers.
259    for op in ops.iter_mut() {
260        if let PcodeOp::Subpiece { out, input, lsb: 0 } = op {
261            if input.space == AddressSpaceId::Const && input.size > out.size {
262                let mask: u64 = if out.size >= 8 {
263                    u64::MAX
264                } else {
265                    (1u64 << (out.size as u64 * 8)) - 1
266                };
267                let truncated = Varnode {
268                    space: AddressSpaceId::Const,
269                    offset: input.offset & mask,
270                    size: out.size,
271                };
272                *op = PcodeOp::Copy {
273                    out: *out,
274                    input: truncated,
275                };
276            }
277        }
278    }
279
280    // Compute unique-varnode analysis once. Passes below share and recompute only
281    // after structural mutations (removals). Indices in NextAccess::Read are
282    // invalidated by any removal, so we must recompute after each ops.remove().
283    let mut analysis = analyze_unique_outputs(ops);
284
285    // Pass 1a: redundant IntAnd — if IntAnd{out1, x, mask} followed by IntAnd{out2, out1, mask}
286    // with same mask and out1 used only once, remove the second
287    let mut i = 0;
288    while i + 1 < ops.len() {
289        let collapse = if let PcodeOp::IntAnd {
290            out: out1,
291            left: _,
292            right: mask1,
293        } = &ops[i]
294        {
295            if out1.space == AddressSpaceId::Unique && mask1.space == AddressSpaceId::Const {
296                if let PcodeOp::IntAnd {
297                    out: out2,
298                    left: in2,
299                    right: mask2,
300                } = &ops[i + 1]
301                {
302                    if *in2 == *out1 && *mask2 == *mask1 {
303                        // out1 is Unique (checked above), so analysis[i] is always Some.
304                        // Use pattern match to be explicit rather than unwrap_or which
305                        // would silently treat non-Unique outputs as zero reads.
306                        let total_reads = match analysis.get(i) {
307                            Some(Some(info)) => info.future_reads,
308                            _ => usize::MAX, // conservative: don't collapse if unknown
309                        };
310                        if total_reads == 1 {
311                            Some(*out2)
312                        } else {
313                            None
314                        }
315                    } else {
316                        None
317                    }
318                } else {
319                    None
320                }
321            } else {
322                None
323            }
324        } else {
325            None
326        };
327
328        if let Some(new_out) = collapse {
329            if let PcodeOp::IntAnd { out, .. } = &mut ops[i] {
330                *out = new_out;
331            }
332            ops.remove(i + 1);
333            analysis = analyze_unique_outputs(ops);
334            continue;
335        }
336        i += 1;
337    }
338
339    // Pass 2: forward-substitute single-use Copy chains.
340    // If ops[i] is Copy { out: A, input: B } and A is Unique,
341    // and exactly one later op reads A, replace that read with B.
342    // Analysis is still valid from pass 1a (no in-place mutations between).
343    let mut i = 0;
344    while i < ops.len() {
345        if let PcodeOp::Copy { out, input } = &ops[i] {
346            if out.space == AddressSpaceId::Unique {
347                let target = *out;
348                let replacement = *input;
349                let entry = analysis.get(i).and_then(|entry| entry.as_ref());
350                if let Some(UniqueOutputInfo {
351                    future_reads: 1,
352                    next_access: Some(NextAccess::Read(read_idx)),
353                }) = entry
354                {
355                    replace_reads(&mut ops[*read_idx], &target, &replacement);
356                    ops.remove(i);
357                    analysis = analyze_unique_outputs(ops);
358                    continue;
359                }
360            }
361        }
362        i += 1;
363    }
364
365    // Pass 2b + 3: overwrite elimination and dead code elimination.
366    // Remove ops that write to Unique varnodes that are either overwritten before
367    // any read (2b) or never read at all (3). These removals are independent —
368    // removing a dead/overwritten op cannot make another op live — so we collect
369    // all indices in one scan and batch-remove in reverse order.
370    // Analysis is still valid from pass 2 (no in-place mutations between).
371    {
372        let mut to_remove = Vec::new();
373        for (i, entry) in analysis.iter().enumerate() {
374            if let Some(info) = entry {
375                if info.future_reads == 0 || matches!(info.next_access, Some(NextAccess::Write)) {
376                    to_remove.push(i);
377                }
378            }
379        }
380        for &idx in to_remove.iter().rev() {
381            ops.remove(idx);
382        }
383    }
384
385    // Pass 4: sink unique outputs into subsequent Copy destinations.
386    // If ops[i] writes to Unique(X) and some later ops[j] is Copy { out: dest, input: Unique(X) },
387    // and Unique(X) is read exactly once (by that Copy), and dest is not written or read between
388    // i and j, rewrite ops[i] to output directly to dest and remove the Copy.
389    // Must recompute — batch removal above invalidated indices.
390    analysis = analyze_unique_outputs(ops);
391    let mut i = 0;
392    while i < ops.len() {
393        let should_sink = match (
394            get_output(&ops[i]),
395            analysis.get(i).and_then(|entry| entry.as_ref()),
396        ) {
397            (
398                Some(out),
399                Some(UniqueOutputInfo {
400                    future_reads: 1,
401                    next_access: Some(NextAccess::Read(copy_idx)),
402                }),
403            ) if out.space == AddressSpaceId::Unique => {
404                if let PcodeOp::Copy {
405                    out: copy_dest,
406                    input: copy_src,
407                } = &ops[*copy_idx]
408                {
409                    if *copy_src == out {
410                        let d = *copy_dest;
411                        let safe = (i + 1..*copy_idx)
412                            .all(|k| count_reads(&ops[k], &d) == 0 && !writes_to(&ops[k], &d));
413                        if safe {
414                            Some((*copy_idx, d))
415                        } else {
416                            None
417                        }
418                    } else {
419                        None
420                    }
421                } else {
422                    None
423                }
424            }
425            _ => None,
426        };
427
428        if let Some((copy_idx, new_dest)) = should_sink {
429            if let Some(out) = get_output_mut(&mut ops[i]) {
430                *out = new_dest;
431            }
432            ops.remove(copy_idx);
433            analysis = analyze_unique_outputs(ops);
434            continue;
435        }
436        i += 1;
437    }
438}
439
440fn all_ones_mask(size_bytes: u32) -> u64 {
441    let bits = size_bytes.saturating_mul(8);
442    if bits >= 64 {
443        u64::MAX
444    } else {
445        u64::MAX >> (64 - bits)
446    }
447}
448
449#[derive(Clone, Copy)]
450struct UniqueOutputInfo {
451    future_reads: usize,
452    next_access: Option<NextAccess>,
453}
454
455#[derive(Clone, Copy)]
456enum NextAccess {
457    Read(usize),
458    Write,
459}
460
461fn analyze_unique_outputs(ops: &[PcodeOp]) -> Vec<Option<UniqueOutputInfo>> {
462    let mut future_reads = BTreeMap::<Varnode, usize>::new();
463    let mut next_access = BTreeMap::<Varnode, NextAccess>::new();
464    let mut result = vec![None; ops.len()];
465
466    // Pre-scan: find Unique varnodes that are written on both sides of an
467    // intra-instruction CBranch (Const-space dest). These are conditional-select
468    // patterns (AArch64 CSEL/CSINC/CNEG) where both writes are live because
469    // the branch may skip the second write. For these varnodes, the backward
470    // analysis must not clear future_reads or mark next_access as Write.
471    let mut cbranch_protected = BTreeSet::<Varnode>::new();
472    {
473        // Find CBranch indices
474        let cbranch_indices: Vec<usize> = ops.iter().enumerate()
475            .filter(|(_, op)| matches!(op, PcodeOp::CBranch { dest, .. } if dest.space == AddressSpaceId::Const))
476            .map(|(i, _)| i)
477            .collect();
478        for &cb_idx in &cbranch_indices {
479            // Collect Unique varnodes written before and after this CBranch
480            let mut before = BTreeSet::new();
481            let mut after = BTreeSet::new();
482            for (i, op) in ops.iter().enumerate() {
483                if let Some(out) = get_output(op) {
484                    if out.space == AddressSpaceId::Unique {
485                        if i < cb_idx {
486                            before.insert(out);
487                        } else if i > cb_idx {
488                            after.insert(out);
489                        }
490                    }
491                }
492            }
493            for v in before.intersection(&after) {
494                cbranch_protected.insert(*v);
495            }
496        }
497    }
498
499    for (idx, op) in ops.iter().enumerate().rev() {
500        if let Some(out) = get_output(op).filter(|out| out.space == AddressSpaceId::Unique) {
501            if cbranch_protected.contains(&out) {
502                // This varnode is written on both sides of a CBranch — both writes
503                // are potentially live. Return None so no pass touches either write.
504                result[idx] = None;
505            } else {
506                result[idx] = Some(UniqueOutputInfo {
507                    future_reads: future_reads.get(&out).copied().unwrap_or(0),
508                    next_access: next_access.get(&out).copied(),
509                });
510                future_reads.remove(&out);
511                next_access.insert(out, NextAccess::Write);
512            }
513        }
514
515        visit_reads(op, &mut |v| {
516            if v.space == AddressSpaceId::Unique {
517                *future_reads.entry(*v).or_insert(0) += 1;
518                next_access.insert(*v, NextAccess::Read(idx));
519            }
520        });
521    }
522
523    result
524}
525
526pub fn get_output(op: &PcodeOp) -> Option<Varnode> {
527    match op {
528        PcodeOp::Copy { out, .. }
529        | PcodeOp::Load { out, .. }
530        | PcodeOp::Subpiece { out, .. }
531        | PcodeOp::IntNeg { out, .. }
532        | PcodeOp::IntNot { out, .. }
533        | PcodeOp::IntZext { out, .. }
534        | PcodeOp::IntSext { out, .. }
535        | PcodeOp::BoolNot { out, .. }
536        | PcodeOp::FloatNeg { out, .. }
537        | PcodeOp::FloatAbs { out, .. }
538        | PcodeOp::FloatSqrt { out, .. }
539        | PcodeOp::FloatNan { out, .. }
540        | PcodeOp::Int2Float { out, .. }
541        | PcodeOp::Float2Float { out, .. }
542        | PcodeOp::Trunc { out, .. }
543        | PcodeOp::FloatCeil { out, .. }
544        | PcodeOp::FloatFloor { out, .. }
545        | PcodeOp::FloatRound { out, .. }
546        | PcodeOp::Popcount { out, .. }
547        | PcodeOp::Lzcount { out, .. }
548        | PcodeOp::IntAdd { out, .. }
549        | PcodeOp::IntSub { out, .. }
550        | PcodeOp::IntMult { out, .. }
551        | PcodeOp::IntDiv { out, .. }
552        | PcodeOp::IntSDiv { out, .. }
553        | PcodeOp::IntRem { out, .. }
554        | PcodeOp::IntSRem { out, .. }
555        | PcodeOp::IntEq { out, .. }
556        | PcodeOp::IntNotEq { out, .. }
557        | PcodeOp::IntLess { out, .. }
558        | PcodeOp::IntLessEq { out, .. }
559        | PcodeOp::IntSLess { out, .. }
560        | PcodeOp::IntSLessEq { out, .. }
561        | PcodeOp::IntAnd { out, .. }
562        | PcodeOp::IntOr { out, .. }
563        | PcodeOp::IntXor { out, .. }
564        | PcodeOp::IntLsl { out, .. }
565        | PcodeOp::IntLsr { out, .. }
566        | PcodeOp::IntAsr { out, .. }
567        | PcodeOp::IntCarry { out, .. }
568        | PcodeOp::IntSCarry { out, .. }
569        | PcodeOp::IntSBorrow { out, .. }
570        | PcodeOp::BoolAnd { out, .. }
571        | PcodeOp::BoolOr { out, .. }
572        | PcodeOp::BoolXor { out, .. }
573        | PcodeOp::FloatAdd { out, .. }
574        | PcodeOp::FloatSub { out, .. }
575        | PcodeOp::FloatMult { out, .. }
576        | PcodeOp::FloatDiv { out, .. }
577        | PcodeOp::FloatEq { out, .. }
578        | PcodeOp::FloatNotEq { out, .. }
579        | PcodeOp::FloatLess { out, .. }
580        | PcodeOp::FloatLessEq { out, .. } => Some(*out),
581        _ => None,
582    }
583}
584
585fn get_output_mut(op: &mut PcodeOp) -> Option<&mut Varnode> {
586    match op {
587        PcodeOp::Copy { out, .. }
588        | PcodeOp::Load { out, .. }
589        | PcodeOp::Subpiece { out, .. }
590        | PcodeOp::IntNeg { out, .. }
591        | PcodeOp::IntNot { out, .. }
592        | PcodeOp::IntZext { out, .. }
593        | PcodeOp::IntSext { out, .. }
594        | PcodeOp::BoolNot { out, .. }
595        | PcodeOp::FloatNeg { out, .. }
596        | PcodeOp::FloatAbs { out, .. }
597        | PcodeOp::FloatSqrt { out, .. }
598        | PcodeOp::FloatNan { out, .. }
599        | PcodeOp::Int2Float { out, .. }
600        | PcodeOp::Float2Float { out, .. }
601        | PcodeOp::Trunc { out, .. }
602        | PcodeOp::FloatCeil { out, .. }
603        | PcodeOp::FloatFloor { out, .. }
604        | PcodeOp::FloatRound { out, .. }
605        | PcodeOp::Popcount { out, .. }
606        | PcodeOp::Lzcount { out, .. }
607        | PcodeOp::IntAdd { out, .. }
608        | PcodeOp::IntSub { out, .. }
609        | PcodeOp::IntMult { out, .. }
610        | PcodeOp::IntDiv { out, .. }
611        | PcodeOp::IntSDiv { out, .. }
612        | PcodeOp::IntRem { out, .. }
613        | PcodeOp::IntSRem { out, .. }
614        | PcodeOp::IntEq { out, .. }
615        | PcodeOp::IntNotEq { out, .. }
616        | PcodeOp::IntLess { out, .. }
617        | PcodeOp::IntLessEq { out, .. }
618        | PcodeOp::IntSLess { out, .. }
619        | PcodeOp::IntSLessEq { out, .. }
620        | PcodeOp::IntAnd { out, .. }
621        | PcodeOp::IntOr { out, .. }
622        | PcodeOp::IntXor { out, .. }
623        | PcodeOp::IntLsl { out, .. }
624        | PcodeOp::IntLsr { out, .. }
625        | PcodeOp::IntAsr { out, .. }
626        | PcodeOp::IntCarry { out, .. }
627        | PcodeOp::IntSCarry { out, .. }
628        | PcodeOp::IntSBorrow { out, .. }
629        | PcodeOp::BoolAnd { out, .. }
630        | PcodeOp::BoolOr { out, .. }
631        | PcodeOp::BoolXor { out, .. }
632        | PcodeOp::FloatAdd { out, .. }
633        | PcodeOp::FloatSub { out, .. }
634        | PcodeOp::FloatMult { out, .. }
635        | PcodeOp::FloatDiv { out, .. }
636        | PcodeOp::FloatEq { out, .. }
637        | PcodeOp::FloatNotEq { out, .. }
638        | PcodeOp::FloatLess { out, .. }
639        | PcodeOp::FloatLessEq { out, .. } => Some(out),
640        _ => None,
641    }
642}
643
644pub fn writes_to(op: &PcodeOp, target: &Varnode) -> bool {
645    match op {
646        PcodeOp::Copy { out, .. }
647        | PcodeOp::Load { out, .. }
648        | PcodeOp::Subpiece { out, .. }
649        | PcodeOp::IntNeg { out, .. }
650        | PcodeOp::IntNot { out, .. }
651        | PcodeOp::IntZext { out, .. }
652        | PcodeOp::IntSext { out, .. }
653        | PcodeOp::BoolNot { out, .. }
654        | PcodeOp::FloatNeg { out, .. }
655        | PcodeOp::FloatAbs { out, .. }
656        | PcodeOp::FloatSqrt { out, .. }
657        | PcodeOp::FloatNan { out, .. }
658        | PcodeOp::Int2Float { out, .. }
659        | PcodeOp::Float2Float { out, .. }
660        | PcodeOp::Trunc { out, .. }
661        | PcodeOp::FloatCeil { out, .. }
662        | PcodeOp::FloatFloor { out, .. }
663        | PcodeOp::FloatRound { out, .. }
664        | PcodeOp::Popcount { out, .. }
665        | PcodeOp::Lzcount { out, .. }
666        | PcodeOp::IntAdd { out, .. }
667        | PcodeOp::IntSub { out, .. }
668        | PcodeOp::IntMult { out, .. }
669        | PcodeOp::IntDiv { out, .. }
670        | PcodeOp::IntSDiv { out, .. }
671        | PcodeOp::IntRem { out, .. }
672        | PcodeOp::IntSRem { out, .. }
673        | PcodeOp::IntEq { out, .. }
674        | PcodeOp::IntNotEq { out, .. }
675        | PcodeOp::IntLess { out, .. }
676        | PcodeOp::IntLessEq { out, .. }
677        | PcodeOp::IntSLess { out, .. }
678        | PcodeOp::IntSLessEq { out, .. }
679        | PcodeOp::IntAnd { out, .. }
680        | PcodeOp::IntOr { out, .. }
681        | PcodeOp::IntXor { out, .. }
682        | PcodeOp::IntLsl { out, .. }
683        | PcodeOp::IntLsr { out, .. }
684        | PcodeOp::IntAsr { out, .. }
685        | PcodeOp::IntCarry { out, .. }
686        | PcodeOp::IntSCarry { out, .. }
687        | PcodeOp::IntSBorrow { out, .. }
688        | PcodeOp::BoolAnd { out, .. }
689        | PcodeOp::BoolOr { out, .. }
690        | PcodeOp::BoolXor { out, .. }
691        | PcodeOp::FloatAdd { out, .. }
692        | PcodeOp::FloatSub { out, .. }
693        | PcodeOp::FloatMult { out, .. }
694        | PcodeOp::FloatDiv { out, .. }
695        | PcodeOp::FloatEq { out, .. }
696        | PcodeOp::FloatNotEq { out, .. }
697        | PcodeOp::FloatLess { out, .. }
698        | PcodeOp::FloatLessEq { out, .. } => out == target,
699        PcodeOp::CallOther { out: Some(out), .. } => out == target,
700        _ => false,
701    }
702}
703
704/// Check if a P-code op reads from a specific varnode.
705pub fn reads_varnode(op: &PcodeOp, target: &Varnode) -> bool {
706    count_reads(op, target) > 0
707}
708
709pub fn count_reads(op: &PcodeOp, target: &Varnode) -> usize {
710    let mut n = 0;
711    visit_reads(op, &mut |v| {
712        if v == target {
713            n += 1
714        }
715    });
716    n
717}
718
719fn replace_reads(op: &mut PcodeOp, target: &Varnode, replacement: &Varnode) -> bool {
720    let mut found = false;
721    visit_reads_mut(op, &mut |v| {
722        if v == target {
723            *v = *replacement;
724            found = true;
725        }
726    });
727    found
728}
729
730pub fn visit_reads(op: &PcodeOp, f: &mut impl FnMut(&Varnode)) {
731    match op {
732        PcodeOp::Copy { input, .. } => f(input),
733        PcodeOp::Load { ptr, .. } => f(ptr),
734        PcodeOp::Store { ptr, val, .. } => {
735            f(ptr);
736            f(val);
737        }
738        PcodeOp::Branch { dest }
739        | PcodeOp::BranchInd { dest }
740        | PcodeOp::Call { dest }
741        | PcodeOp::CallInd { dest }
742        | PcodeOp::Return { dest } => f(dest),
743        PcodeOp::CBranch { dest, cond } => {
744            f(dest);
745            f(cond);
746        }
747        PcodeOp::Subpiece { input, .. } => f(input),
748        PcodeOp::IntNeg { input, .. }
749        | PcodeOp::IntNot { input, .. }
750        | PcodeOp::IntZext { input, .. }
751        | PcodeOp::IntSext { input, .. }
752        | PcodeOp::BoolNot { input, .. }
753        | PcodeOp::FloatNeg { input, .. }
754        | PcodeOp::FloatAbs { input, .. }
755        | PcodeOp::FloatSqrt { input, .. }
756        | PcodeOp::FloatNan { input, .. }
757        | PcodeOp::Int2Float { input, .. }
758        | PcodeOp::Float2Float { input, .. }
759        | PcodeOp::Trunc { input, .. }
760        | PcodeOp::FloatCeil { input, .. }
761        | PcodeOp::FloatFloor { input, .. }
762        | PcodeOp::FloatRound { input, .. }
763        | PcodeOp::Popcount { input, .. }
764        | PcodeOp::Lzcount { input, .. } => f(input),
765        PcodeOp::IntAdd { left, right, .. }
766        | PcodeOp::IntSub { left, right, .. }
767        | PcodeOp::IntMult { left, right, .. }
768        | PcodeOp::IntDiv { left, right, .. }
769        | PcodeOp::IntSDiv { left, right, .. }
770        | PcodeOp::IntRem { left, right, .. }
771        | PcodeOp::IntSRem { left, right, .. }
772        | PcodeOp::IntEq { left, right, .. }
773        | PcodeOp::IntNotEq { left, right, .. }
774        | PcodeOp::IntLess { left, right, .. }
775        | PcodeOp::IntLessEq { left, right, .. }
776        | PcodeOp::IntSLess { left, right, .. }
777        | PcodeOp::IntSLessEq { left, right, .. }
778        | PcodeOp::IntAnd { left, right, .. }
779        | PcodeOp::IntOr { left, right, .. }
780        | PcodeOp::IntXor { left, right, .. }
781        | PcodeOp::IntLsl { left, right, .. }
782        | PcodeOp::IntLsr { left, right, .. }
783        | PcodeOp::IntAsr { left, right, .. }
784        | PcodeOp::IntCarry { left, right, .. }
785        | PcodeOp::IntSCarry { left, right, .. }
786        | PcodeOp::IntSBorrow { left, right, .. }
787        | PcodeOp::BoolAnd { left, right, .. }
788        | PcodeOp::BoolOr { left, right, .. }
789        | PcodeOp::BoolXor { left, right, .. }
790        | PcodeOp::FloatAdd { left, right, .. }
791        | PcodeOp::FloatSub { left, right, .. }
792        | PcodeOp::FloatMult { left, right, .. }
793        | PcodeOp::FloatDiv { left, right, .. }
794        | PcodeOp::FloatEq { left, right, .. }
795        | PcodeOp::FloatNotEq { left, right, .. }
796        | PcodeOp::FloatLess { left, right, .. }
797        | PcodeOp::FloatLessEq { left, right, .. } => {
798            f(left);
799            f(right);
800        }
801        PcodeOp::CallOther { inputs, .. } => {
802            for v in inputs {
803                f(v);
804            }
805        }
806    }
807}
808
809fn visit_reads_mut(op: &mut PcodeOp, f: &mut impl FnMut(&mut Varnode)) {
810    match op {
811        PcodeOp::Copy { input, .. } => f(input),
812        PcodeOp::Load { ptr, .. } => f(ptr),
813        PcodeOp::Store { ptr, val, .. } => {
814            f(ptr);
815            f(val);
816        }
817        PcodeOp::Branch { dest }
818        | PcodeOp::BranchInd { dest }
819        | PcodeOp::Call { dest }
820        | PcodeOp::CallInd { dest }
821        | PcodeOp::Return { dest } => f(dest),
822        PcodeOp::CBranch { dest, cond } => {
823            f(dest);
824            f(cond);
825        }
826        PcodeOp::Subpiece { input, .. } => f(input),
827        PcodeOp::IntNeg { input, .. }
828        | PcodeOp::IntNot { input, .. }
829        | PcodeOp::IntZext { input, .. }
830        | PcodeOp::IntSext { input, .. }
831        | PcodeOp::BoolNot { input, .. }
832        | PcodeOp::FloatNeg { input, .. }
833        | PcodeOp::FloatAbs { input, .. }
834        | PcodeOp::FloatSqrt { input, .. }
835        | PcodeOp::FloatNan { input, .. }
836        | PcodeOp::Int2Float { input, .. }
837        | PcodeOp::Float2Float { input, .. }
838        | PcodeOp::Trunc { input, .. }
839        | PcodeOp::FloatCeil { input, .. }
840        | PcodeOp::FloatFloor { input, .. }
841        | PcodeOp::FloatRound { input, .. }
842        | PcodeOp::Popcount { input, .. }
843        | PcodeOp::Lzcount { input, .. } => f(input),
844        PcodeOp::IntAdd { left, right, .. }
845        | PcodeOp::IntSub { left, right, .. }
846        | PcodeOp::IntMult { left, right, .. }
847        | PcodeOp::IntDiv { left, right, .. }
848        | PcodeOp::IntSDiv { left, right, .. }
849        | PcodeOp::IntRem { left, right, .. }
850        | PcodeOp::IntSRem { left, right, .. }
851        | PcodeOp::IntEq { left, right, .. }
852        | PcodeOp::IntNotEq { left, right, .. }
853        | PcodeOp::IntLess { left, right, .. }
854        | PcodeOp::IntLessEq { left, right, .. }
855        | PcodeOp::IntSLess { left, right, .. }
856        | PcodeOp::IntSLessEq { left, right, .. }
857        | PcodeOp::IntAnd { left, right, .. }
858        | PcodeOp::IntOr { left, right, .. }
859        | PcodeOp::IntXor { left, right, .. }
860        | PcodeOp::IntLsl { left, right, .. }
861        | PcodeOp::IntLsr { left, right, .. }
862        | PcodeOp::IntAsr { left, right, .. }
863        | PcodeOp::IntCarry { left, right, .. }
864        | PcodeOp::IntSCarry { left, right, .. }
865        | PcodeOp::IntSBorrow { left, right, .. }
866        | PcodeOp::BoolAnd { left, right, .. }
867        | PcodeOp::BoolOr { left, right, .. }
868        | PcodeOp::BoolXor { left, right, .. }
869        | PcodeOp::FloatAdd { left, right, .. }
870        | PcodeOp::FloatSub { left, right, .. }
871        | PcodeOp::FloatMult { left, right, .. }
872        | PcodeOp::FloatDiv { left, right, .. }
873        | PcodeOp::FloatEq { left, right, .. }
874        | PcodeOp::FloatNotEq { left, right, .. }
875        | PcodeOp::FloatLess { left, right, .. }
876        | PcodeOp::FloatLessEq { left, right, .. } => {
877            f(left);
878            f(right);
879        }
880        PcodeOp::CallOther { inputs, .. } => {
881            for v in inputs {
882                f(v);
883            }
884        }
885    }
886}
887
888/// A single P-code operation.
889///
890/// Variant naming follows Ghidra's P-code reference.
891/// See: <https://ghidra.re/courses/languages/html/pcoderef.html>
892#[derive(Debug, Clone, PartialEq, Eq)]
893pub enum PcodeOp {
894    // ── Data Movement ──────────────────────────────────────────────
895    Copy {
896        out: Varnode,
897        input: Varnode,
898    },
899    Load {
900        out: Varnode,
901        space: AddressSpaceId,
902        ptr: Varnode,
903    },
904    Store {
905        space: AddressSpaceId,
906        ptr: Varnode,
907        val: Varnode,
908    },
909
910    // ── Branching ──────────────────────────────────────────────────
911    Branch {
912        dest: Varnode,
913    },
914    CBranch {
915        dest: Varnode,
916        cond: Varnode,
917    },
918    BranchInd {
919        dest: Varnode,
920    },
921    Call {
922        dest: Varnode,
923    },
924    CallInd {
925        dest: Varnode,
926    },
927    Return {
928        dest: Varnode,
929    },
930
931    // ── Integer Arithmetic ─────────────────────────────────────────
932    IntAdd {
933        out: Varnode,
934        left: Varnode,
935        right: Varnode,
936    },
937    IntSub {
938        out: Varnode,
939        left: Varnode,
940        right: Varnode,
941    },
942    IntMult {
943        out: Varnode,
944        left: Varnode,
945        right: Varnode,
946    },
947    IntDiv {
948        out: Varnode,
949        left: Varnode,
950        right: Varnode,
951    },
952    IntSDiv {
953        out: Varnode,
954        left: Varnode,
955        right: Varnode,
956    },
957    IntRem {
958        out: Varnode,
959        left: Varnode,
960        right: Varnode,
961    },
962    IntSRem {
963        out: Varnode,
964        left: Varnode,
965        right: Varnode,
966    },
967    IntNeg {
968        out: Varnode,
969        input: Varnode,
970    },
971
972    // ── Integer Comparison ─────────────────────────────────────────
973    IntEq {
974        out: Varnode,
975        left: Varnode,
976        right: Varnode,
977    },
978    IntNotEq {
979        out: Varnode,
980        left: Varnode,
981        right: Varnode,
982    },
983    IntLess {
984        out: Varnode,
985        left: Varnode,
986        right: Varnode,
987    },
988    IntLessEq {
989        out: Varnode,
990        left: Varnode,
991        right: Varnode,
992    },
993    IntSLess {
994        out: Varnode,
995        left: Varnode,
996        right: Varnode,
997    },
998    IntSLessEq {
999        out: Varnode,
1000        left: Varnode,
1001        right: Varnode,
1002    },
1003
1004    // ── Integer Logical / Bitwise ──────────────────────────────────
1005    IntAnd {
1006        out: Varnode,
1007        left: Varnode,
1008        right: Varnode,
1009    },
1010    IntOr {
1011        out: Varnode,
1012        left: Varnode,
1013        right: Varnode,
1014    },
1015    IntXor {
1016        out: Varnode,
1017        left: Varnode,
1018        right: Varnode,
1019    },
1020    IntNot {
1021        out: Varnode,
1022        input: Varnode,
1023    },
1024
1025    // ── Shift ──────────────────────────────────────────────────────
1026    IntLsl {
1027        out: Varnode,
1028        left: Varnode,
1029        right: Varnode,
1030    },
1031    IntLsr {
1032        out: Varnode,
1033        left: Varnode,
1034        right: Varnode,
1035    },
1036    IntAsr {
1037        out: Varnode,
1038        left: Varnode,
1039        right: Varnode,
1040    },
1041
1042    // ── Extension / Truncation ─────────────────────────────────────
1043    IntZext {
1044        out: Varnode,
1045        input: Varnode,
1046    },
1047    IntSext {
1048        out: Varnode,
1049        input: Varnode,
1050    },
1051    Subpiece {
1052        out: Varnode,
1053        input: Varnode,
1054        lsb: u32,
1055    },
1056
1057    // ── Carry / Borrow ─────────────────────────────────────────────
1058    IntCarry {
1059        out: Varnode,
1060        left: Varnode,
1061        right: Varnode,
1062    },
1063    IntSCarry {
1064        out: Varnode,
1065        left: Varnode,
1066        right: Varnode,
1067    },
1068    IntSBorrow {
1069        out: Varnode,
1070        left: Varnode,
1071        right: Varnode,
1072    },
1073
1074    // ── Boolean ────────────────────────────────────────────────────
1075    BoolAnd {
1076        out: Varnode,
1077        left: Varnode,
1078        right: Varnode,
1079    },
1080    BoolOr {
1081        out: Varnode,
1082        left: Varnode,
1083        right: Varnode,
1084    },
1085    BoolXor {
1086        out: Varnode,
1087        left: Varnode,
1088        right: Varnode,
1089    },
1090    BoolNot {
1091        out: Varnode,
1092        input: Varnode,
1093    },
1094
1095    // ── Floating Point Arithmetic ──────────────────────────────────
1096    FloatAdd {
1097        out: Varnode,
1098        left: Varnode,
1099        right: Varnode,
1100    },
1101    FloatSub {
1102        out: Varnode,
1103        left: Varnode,
1104        right: Varnode,
1105    },
1106    FloatMult {
1107        out: Varnode,
1108        left: Varnode,
1109        right: Varnode,
1110    },
1111    FloatDiv {
1112        out: Varnode,
1113        left: Varnode,
1114        right: Varnode,
1115    },
1116    FloatNeg {
1117        out: Varnode,
1118        input: Varnode,
1119    },
1120    FloatAbs {
1121        out: Varnode,
1122        input: Varnode,
1123    },
1124    FloatSqrt {
1125        out: Varnode,
1126        input: Varnode,
1127    },
1128
1129    // ── Floating Point Comparison ──────────────────────────────────
1130    FloatEq {
1131        out: Varnode,
1132        left: Varnode,
1133        right: Varnode,
1134    },
1135    FloatNotEq {
1136        out: Varnode,
1137        left: Varnode,
1138        right: Varnode,
1139    },
1140    FloatLess {
1141        out: Varnode,
1142        left: Varnode,
1143        right: Varnode,
1144    },
1145    FloatLessEq {
1146        out: Varnode,
1147        left: Varnode,
1148        right: Varnode,
1149    },
1150    FloatNan {
1151        out: Varnode,
1152        input: Varnode,
1153    },
1154
1155    // ── Floating Point Conversion ──────────────────────────────────
1156    Int2Float {
1157        out: Varnode,
1158        input: Varnode,
1159    },
1160    Float2Float {
1161        out: Varnode,
1162        input: Varnode,
1163    },
1164    Trunc {
1165        out: Varnode,
1166        input: Varnode,
1167    },
1168    FloatCeil {
1169        out: Varnode,
1170        input: Varnode,
1171    },
1172    FloatFloor {
1173        out: Varnode,
1174        input: Varnode,
1175    },
1176    FloatRound {
1177        out: Varnode,
1178        input: Varnode,
1179    },
1180
1181    // ── Bit Manipulation ───────────────────────────────────────────
1182    Popcount {
1183        out: Varnode,
1184        input: Varnode,
1185    },
1186    Lzcount {
1187        out: Varnode,
1188        input: Varnode,
1189    },
1190
1191    // ── Miscellaneous ──────────────────────────────────────────────
1192    /// User-defined or architecture-specific operation.
1193    CallOther {
1194        out: Option<Varnode>,
1195        func_id: u64,
1196        inputs: Vec<Varnode>,
1197    },
1198}
1199
1200#[cfg(test)]
1201mod tests {
1202    use super::*;
1203
1204    #[test]
1205    fn optimize_intand_all_ones_wide_size_no_panic() {
1206        let mut ops = vec![
1207            PcodeOp::IntAnd {
1208                out: Varnode::unique(0, 16),
1209                left: Varnode::register(0, 16),
1210                right: Varnode::constant(u64::MAX, 16),
1211            },
1212            PcodeOp::Copy {
1213                out: Varnode::register(16, 16),
1214                input: Varnode::unique(0, 16),
1215            },
1216        ];
1217
1218        optimize(&mut ops);
1219
1220        assert!(matches!(
1221            ops.as_slice(),
1222            [PcodeOp::Copy { out, input }]
1223                if *out == Varnode::register(16, 16)
1224                    && *input == Varnode::register(0, 16)
1225        ));
1226    }
1227}