yaxpeax_x86/lib.rs
1//! # `yaxpeax-x86`, a decoder for x86-family instruction sets
2//!
3//! `yaxpeax-x86` provides x86 decoders, for 64-, 32-, and 16-bit modes. `yaxpeax-x86` also
4//! implements traits defined by `yaxpeax_arch`, making it suitable for interchangeable use with
5//! other `yaxpeax`-family instruction decoders.
6//!
7//! ## usage
8//!
9//! the fastest way to decode an x86 instruction is through [`amd64::InstDecoder::decode_slice()`]:
10//! ```
11//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
12//!
13//! let inst = decoder.decode_slice(&[0x33, 0xc0]).unwrap();
14//!
15//! #[cfg(features="fmt")]
16//! assert_eq!("xor eax, eax", inst.to_string());
17//! ```
18//!
19//! instructions, operands, registers, and generally all decoding structures, are in their mode's
20//! respective submodule:
21//! * `x86_64`/`amd64` decoding is under [`long_mode`]
22//! * `x86_32`/`x86` decoding is under [`protected_mode`]
23//! * `x86_16`/`8086` decoding is under [`real_mode`]
24//!
25//! all modes have equivalent data available in a decoded instruction. for example, all modes have
26//! library-friendly `Operand` and `RegSpec` types:
27//!
28//! ```
29//! use yaxpeax_x86::amd64::{InstDecoder, Operand, RegSpec};
30//!
31//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
32//!
33//! let inst = decoder.decode_slice(&[0x33, 0x01]).unwrap();
34//!
35//! #[cfg(features="fmt")]
36//! assert_eq!("xor eax, dword [rcx]", inst.to_string());
37//!
38//! assert_eq!(Operand::Register { reg: RegSpec::eax() }, inst.operand(0));
39//! #[cfg(features="fmt")]
40//! assert_eq!("eax", inst.operand(0).to_string());
41//! assert_eq!(Operand::MemDeref { base: RegSpec::rcx() }, inst.operand(1));
42//!
43//! // an operand in isolation does not know the size of memory it references, if any
44//! #[cfg(features="fmt")]
45//! assert_eq!("[rcx]", inst.operand(1).to_string());
46//!
47//! // and for memory operands, the size must be read from the instruction itself:
48//! let mem_size: yaxpeax_x86::amd64::MemoryAccessSize = inst.mem_size().unwrap();
49//! assert_eq!("dword", mem_size.size_name());
50//!
51//! // `MemoryAccessSize::size_name()` is how its `Display` impl works, as well:
52//! #[cfg(features="fmt")]
53//! assert_eq!("dword", mem_size.to_string());
54//! ```
55//!
56//! `yaxpeax-x86` can also be used to decode instructions generically through the `yaxpeax-arch`
57//! traits:
58//! ```
59//! mod decoder {
60//! use yaxpeax_arch::{Arch, AddressDisplay, Decoder, Reader, ReaderBuilder};
61//!
62//! // have to play some games so this example works right even without `fmt` enabled!
63//! #[cfg(feature="fmt")]
64//! trait InstBound: std::fmt::Display {}
65//! #[cfg(not(feature="fmt"))]
66//! trait InstBound {}
67//!
68//! #[cfg(feature="fmt")]
69//! impl <T: std::fmt::Display> InstBound for T {}
70//! #[cfg(not(feature="fmt"))]
71//! impl <T> InstBound for T {}
72//!
73//! pub fn decode_stream<
74//! 'data,
75//! A: yaxpeax_arch::Arch,
76//! U: ReaderBuilder<A::Address, A::Word>,
77//! >(data: U) where
78//! A::Instruction: InstBound,
79//! {
80//! let mut reader = ReaderBuilder::read_from(data);
81//! let mut address: A::Address = reader.total_offset();
82//!
83//! let decoder = A::Decoder::default();
84//! let mut decode_res = decoder.decode(&mut reader);
85//! loop {
86//! match decode_res {
87//! Ok(ref inst) => {
88//! #[cfg(feature="fmt")]
89//! println!("{}: {}", address.show(), inst);
90//! decode_res = decoder.decode(&mut reader);
91//! address = reader.total_offset();
92//! }
93//! Err(e) => {
94//! println!("{}: decode error: {}", address.show(), e);
95//! break;
96//! }
97//! }
98//! }
99//! }
100//! }
101//!
102//! use yaxpeax_x86::amd64::{Arch as x86_64};
103//! use yaxpeax_arch::{ReaderBuilder, U8Reader};
104//! let data: &[u8] = &[0x55, 0x33, 0xc0, 0x48, 0x8b, 0x02, 0x5d, 0xc3];
105//! decoder::decode_stream::<x86_64, _>(data);
106//! ```
107//!
108//! ## `#![no_std]`
109//!
110//! `yaxpeax-x86` supports `no_std` usage. to be built `no_std`, `yaxpeax-x86` only needs
111//! `default-features = false` in the corresponding `Cargo.toml` dependency. if formatting is
112//! needed with `std` disabled, it can be re-enabled by explicitly requesting the `fmt` features
113//! like:
114//! ```text
115//! yaxpeax-x86 = { version = "*", default-features = false, features = ["fmt"] }
116//! ```
117//!
118//! this is how the `.so` and `.a` packaging in
119//! [`ffi/`](https://github.com/iximeow/yaxpeax-x86/tree/no-gods-no-/ffi) is performed.
120
121#![no_std]
122
123#[cfg(feature="use-serde")]
124#[macro_use] extern crate serde_derive;
125#[cfg(feature="use-serde")]
126extern crate serde;
127
128#[cfg(feature="std")]
129extern crate alloc;
130
131pub mod long_mode;
132pub use long_mode as amd64;
133pub use long_mode::Arch as x86_64;
134
135pub mod protected_mode;
136pub use protected_mode::Arch as x86_32;
137
138pub mod real_mode;
139pub use real_mode::Arch as x86_16;
140
141// this exists to size `InstructionTextBuffer`'s buffer. it ideally would come from an `Arch`
142// impl, or something related to `Arch`, but i'm not yet sure how to wire that up into
143// yaxpeax-arch. so instead calculate an appropriate max size for all of 16-bit/32-bit/64-bit
144// instruction printing that `InstructionTextBuffer` can be used for.
145//
146// `InstructionTextBuffer` prints an `InstructionDisplayer`, which means either intel syntax or
147// pseudo-C. in the future, at&t probably, as well.
148//
149// the pseudo-C syntax's max length would be something like:
150// ```
151// xacquire xrelease lock { repnz qword if /* signed */ greater_or_equal(rflags) then jmp gs:[xmm31 +
152// xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae} }
153// ```
154// (which is nonsensical) or for an unknown opcode,
155// ```
156// xacquire xrelease lock { op0 = op(op0, op1, op2, op3) }
157// ```
158// where `opN` is an operand. the longest operand, same as above, would be something like
159// ```
160// gs:[xmm31 + xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae}
161// ```
162// for a length like 262 bytes of operand, 55 bytes of prefixes and syntax, and another up-to-20
163// bytes of opcode.
164//
165// the longest contextualize_c might write is around 337 bytes. round up to 512 because it's.. not
166// much extra.
167//
168// the same reasoning for intel syntax yields a smaller instruction:
169// ```
170// xacquire xrelease lock op op1, op2, op3, op4
171// ```
172// where the longest operands are the same as above. this comes out to closer to 307 bytes. 512
173// bytes is still the longest of the two options.
174#[allow(dead_code)] // can be an unused constant in some library configurations
175const MAX_INSTRUCTION_LEN: usize = 512;
176
177const MEM_SIZE_STRINGS: [&'static str; 65] = [
178 "BUG",
179 "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword",
180 "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword",
181 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
182 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword",
183 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
184 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "m384b",
185 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
186 "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ptr", "zmmword",
187];
188
189pub struct MemoryAccessSize {
190 size: u8,
191}
192impl MemoryAccessSize {
193 /// return the number of bytes referenced by this memory access.
194 ///
195 /// if the number of bytes cannot be confidently known by the instruction in isolation (as is
196 /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this
197 /// function will return `None`.
198 pub fn bytes_size(&self) -> Option<u8> {
199 if self.size == 63 {
200 None
201 } else {
202 Some(self.size)
203 }
204 }
205
206 /// a human-friendly label for the number of bytes this memory access references.
207 ///
208 /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86`
209 /// prefers to use consistent names for a width even if the way those bytes are used varies.
210 ///
211 /// the sizes `yaxpeax-x86` knows are as follows:
212 /// | size (bytes) | name |
213 /// |--------------|------------|
214 /// | 1 | `byte` |
215 /// | 2 | `word` |
216 /// | 4 | `dword` |
217 /// | 6 | `far` |
218 /// | 8 | `qword` |
219 /// | 10 | `mword` |
220 /// | 16 | `xmmword` |
221 /// | 32 | `ymmword` |
222 /// | 64 | `zmmword` |
223 /// | variable | `ptr` |
224 ///
225 /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for
226 /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment
227 /// selector and eight bytes of address.
228 ///
229 /// "variable" accesses access a number of bytes dependent on the physical processor and its
230 /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions.
231 pub fn size_name(&self) -> &'static str {
232 MEM_SIZE_STRINGS[self.size as usize]
233 }
234}
235
236#[cfg(feature = "fmt")]
237impl core::fmt::Display for MemoryAccessSize {
238 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
239 f.write_str(self.size_name())
240 }
241}
242
243#[cfg(feature = "fmt")]
244impl core::fmt::Debug for MemoryAccessSize {
245 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
246 core::fmt::Display::fmt(self, f)
247 }
248}