dynasm/
common.rs

1//! This module contains various infrastructure that is common across all assembler backends
2
3/// Enum representing the result size of a value/expression/register/etc in bytes.
4/// Uses the NASM syntax for sizes (a word is 16 bits)
5#[derive(Debug, PartialOrd, PartialEq, Ord, Eq, Hash, Clone, Copy)]
6pub enum Size {
7    BYTE  = 1,
8    WORD  = 2,
9    DWORD = 4,
10    FWORD = 6,
11    QWORD = 8,
12    PWORD = 10,
13    OWORD = 16,
14    HWORD = 32,
15}
16
17/// A number representation (sign and size).
18#[derive(Debug, PartialOrd, PartialEq, Ord, Eq, Hash, Clone, Copy)]
19pub struct NumericRepr {
20    pub size: Size,
21    pub signed: bool,
22}
23
24/// An integral value in a particular `Numeric` representation.
25#[derive(Debug, PartialOrd, PartialEq, Ord, Eq, Hash, Clone, Copy)]
26pub struct Number {
27    /// The bit representation of the number.
28    /// TODO: comment on the actual representation chosen.
29    value: u64,
30    repr: NumericRepr,
31}
32
33impl Size {
34    pub const fn in_bytes(self) -> u8 {
35        self as u8
36    }
37
38    pub fn as_literal(self) -> &'static str {
39        match self {
40            Size::BYTE  => "i8",
41            Size::WORD  => "i16",
42            Size::DWORD => "i32",
43            Size::FWORD => "i48",
44            Size::QWORD => "i64",
45            Size::PWORD => "i80",
46            Size::OWORD => "i128",
47            Size::HWORD => "i256",
48        }
49    }
50}
51
52impl NumericRepr {
53    pub const U8: NumericRepr = NumericRepr::unsigned(Size::BYTE);
54    pub const I8: NumericRepr = NumericRepr::signed(Size::BYTE);
55    pub const U16: NumericRepr = NumericRepr::unsigned(Size::WORD);
56    pub const I16: NumericRepr = NumericRepr::signed(Size::WORD);
57    pub const U32: NumericRepr = NumericRepr::unsigned(Size::DWORD);
58    pub const I32: NumericRepr = NumericRepr::signed(Size::DWORD);
59    pub const U64: NumericRepr = NumericRepr::unsigned(Size::QWORD);
60    pub const I64: NumericRepr = NumericRepr::signed(Size::QWORD);
61
62    pub const fn signed(size: Size) -> Self {
63        NumericRepr { size, signed: true }
64    }
65
66    pub const fn unsigned(size: Size) -> Self {
67        NumericRepr { size, signed: false }
68    }
69}
70
71impl Number {
72    /// Cast a short constant to a specific representation.
73    pub const fn from_u64_and_size(val: u64, size: Size) -> Self {
74        Self::from_u64_and_repr(val, NumericRepr::unsigned(size))
75    }
76
77    pub const fn from_u64_and_repr(value: u64, repr: NumericRepr) -> Self {
78        Number { value, repr }
79    }
80
81    pub const fn repr(self) -> NumericRepr {
82        self.repr
83    }
84
85    pub fn byte(val: u8) -> Self {
86        Self::from_u64_and_size(val.into(), Size::BYTE)
87    }
88
89    pub fn word(val: u16) -> Self {
90        Self::from_u64_and_size(val.into(), Size::WORD)
91    }
92
93    pub fn dword(val: u32) -> Self {
94        Self::from_u64_and_size(val.into(), Size::DWORD)
95    }
96
97    pub fn qword(val: u64) -> Self {
98        Self::from_u64_and_size(val.into(), Size::QWORD)
99    }
100
101    pub fn as_u8(self) -> u8 {
102        self.cast_as(NumericRepr::unsigned(Size::BYTE)).value as u8
103    }
104
105    pub fn as_i8(self) -> i8 {
106        self.cast_as(NumericRepr::signed(Size::BYTE)).value as i8
107    }
108
109    pub fn as_u16(self) -> u16 {
110        self.cast_as(NumericRepr::unsigned(Size::WORD)).value as u16
111    }
112
113    pub fn as_i16(self) -> i16 {
114        self.cast_as(NumericRepr::signed(Size::WORD)).value as i16
115    }
116
117    pub fn as_u32(self) -> u32 {
118        self.cast_as(NumericRepr::unsigned(Size::DWORD)).value as u32
119    }
120
121    pub fn as_i32(self) -> i32 {
122        self.cast_as(NumericRepr::signed(Size::DWORD)).value as i32
123    }
124
125    pub fn as_u64(self) -> u64 {
126        self.cast_as(NumericRepr::unsigned(Size::QWORD)).value as u64
127    }
128
129    pub fn as_i64(self) -> i64 {
130        self.cast_as(NumericRepr::signed(Size::DWORD)).value as i64
131    }
132
133    /// Perform a cast in 2-complement.
134    ///
135    /// Casts work like Rust `as` coercion. A sign extension is performed when the source is
136    /// signed, else the number is zero extended.
137    // FIXME: test coverage!
138    pub fn cast_as(mut self, repr: NumericRepr) -> Number {
139        // Just use the value, it is stored with sign/zero extension.
140        self.repr = repr;
141        // Adjust sign extension if necessary now.
142        self.correct_extension_bits_for_sign();
143        self
144    }
145
146    /// Do a value preserving (`TryFrom`) conversion.
147    ///
148    /// This is not the same as lossless, `u32` and `i32` can be converted without loss but do not
149    /// preserve the values.
150    pub fn convert(self, repr: NumericRepr) -> Option<Number> {
151        let cast = self.cast_as(repr);
152
153        let max = self.repr_of_max().min(cast.repr_of_max());
154        let below_min = self.repr_below_min().max(cast.repr_below_min());
155
156        if cast.value <= max && cast.value > below_min {
157            Some(cast)
158        } else {
159            None
160        }
161    }
162
163    pub fn make_signed(mut self, signed: bool) -> Number {
164        self.repr.signed = signed;
165        self.correct_extension_bits_for_sign();
166        self
167    }
168
169    /// Resize, keeping the same signedness.
170    pub const fn resize(self, size: Size) -> Number {
171        // Because resizing does not change signedness this yields correct extension bits.
172        Number {
173            value: self.value,
174            repr: NumericRepr { size, signed: self.repr.signed },
175        }
176    }
177
178    pub fn write_le_bytes(self, buf: &mut Vec<u8>) {
179        let bytes = (0..self.repr.size.in_bytes())
180            .scan(self.value, |value, _| {
181                let byte = *value & 0xff;
182                *value >>= 8;
183                Some(byte as u8)
184            });
185        buf.extend(bytes)
186    }
187
188    pub fn to_le_bytes(self) -> Vec<u8> {
189        let mut out = vec![];
190        self.write_le_bytes(&mut out);
191        out
192    }
193
194    /// The value bitmask for the size.
195    fn mask(self) -> u64 {
196        use core::convert::TryInto;
197        #[allow(non_snake_case)]
198        let ALL_BITS: u8 = core::mem::size_of::<u64>().try_into().unwrap();
199
200        let len: u8 = self.byte_len() * 8;
201        (!0u64) >> ALL_BITS.checked_sub(len).unwrap()
202    }
203
204    fn byte_len(self) -> u8 {
205        self.repr.size.in_bytes()
206    }
207
208    /// The maximum value representation.
209    /// Used to check the value range in unsigned representation of any length.
210    fn repr_of_max(self) -> u64 {
211        self.mask() ^ (if self.repr.signed { self.sign_bit() } else { 0 })
212    }
213
214    /// The representation below minimum value.
215    /// Used to check the value range in unsigned representation of any length.
216    fn repr_below_min(self) -> u64 {
217        if self.repr.signed {
218            ((!0u64) ^ self.repr_of_max()) - 1
219        } else {
220            0
221        }
222    }
223
224    fn sign_bit(self) -> u64 {
225        let right_shift = (self.byte_len() * 8) - 1;
226        1 << right_shift
227    }
228
229    fn is_sign_bit_set(self) -> bool {
230        self.value & self.sign_bit() != 0
231    }
232
233    /// Fix the sign extension after a cast.
234    fn correct_extension_bits_for_sign(&mut self) {
235        if self.repr.signed && self.is_sign_bit_set() {
236            self.value |= !self.mask();
237        } else {
238            self.value &= self.mask();
239        }
240    }
241}
242
243/// A value in a list of constants.
244#[derive(Debug, Clone)]
245pub enum Const {
246    /// Add constant through applying some relocation.
247    Relocate(Jump),
248
249    /// Add a simple value.
250    Value(Expr),
251}
252
253/**
254 * Jump types
255 */
256#[derive(Debug, Clone)]
257pub struct Jump {
258    pub kind: JumpKind,
259    pub offset: Option<Expr>,
260}
261
262#[derive(Debug, Clone)]
263pub enum JumpKind {
264    // note: these symbol choices try to avoid stuff that is a valid starting symbol for parse_expr
265    // in order to allow the full range of expressions to be used. the only currently existing ambiguity is
266    // with the symbol <, as this symbol is also the starting symbol for the universal calling syntax <Type as Trait>.method(args)
267    Global(Ident),   // -> label (["+" "-"] offset)?
268    Backward(Ident), //  > label (["+" "-"] offset)?
269    Forward(Ident),  //  < label (["+" "-"] offset)?
270    Dynamic(Expr),   // =>expr | => (expr) (["+" "-"] offset)?
271    Bare(Value)      // jump to this address
272}
273
274impl Jump {
275    pub fn new(kind: JumpKind, offset: Option<Expr>) -> Jump {
276        Jump {
277            kind,
278            offset,
279        }
280    }
281
282    pub fn encode(self, data: &[u8]) -> Stmt {
283        let offset = self.offset.into(); 
284        let data = data.to_vec();
285        match self.kind {
286            JumpKind::Global(ident) => Stmt::GlobalJumpTarget(ident, offset, data),
287            JumpKind::Backward(ident) => Stmt::BackwardJumpTarget(ident, offset, data),
288            JumpKind::Forward(ident) => Stmt::ForwardJumpTarget(ident, offset, data),
289            JumpKind::Dynamic(expr) => Stmt::DynamicJumpTarget(expr.into(), offset, data),
290            JumpKind::Bare(expr) => Stmt::BareJumpTarget(expr.into(), data),
291        }
292    }
293}
294
295
296/// An abstract representation of a dynasm runtime statement to be emitted
297#[derive(Debug, Clone)]
298pub enum Stmt {
299    // push integral data with arbitrary size.
300    Const(Value),
301
302    // extend the instruction stream with unsigned bytes
303    Extend(Vec<u8>),
304    // extend the instruction stream with unsigned bytes
305    ExprExtend(Value),
306    // align the instruction stream to some alignment
307    // the second is the actual alignment and might be a platform default, hence computed by the
308    // assembler library itself instead of a user defined expression.
309    Align(Expr, Value),
310
311    // label declarations
312    GlobalLabel(Ident),
313    LocalLabel(Ident),
314    DynamicLabel(Expr),
315
316    // and their respective relocations (as expressions as they differ per assembler)
317    GlobalJumpTarget(Ident, JumpOffset, Vec<u8>),
318    ForwardJumpTarget(Ident, JumpOffset, Vec<u8>),
319    BackwardJumpTarget(Ident, JumpOffset, Vec<u8>),
320    DynamicJumpTarget(JumpOffset, JumpOffset, Vec<u8>),
321    BareJumpTarget(JumpOffset, Vec<u8>),
322
323    // a random statement that has to be inserted between assembly hunks
324    Stmt(Expr),
325}
326
327/// A value that is specifically for jump offset use.
328/// Slightly more specialized than `Value` since the only non-computed value is if elided.
329#[derive(Debug, Clone, Copy)]
330pub enum JumpOffset {
331    Zero,
332    Injected(Value),
333}
334
335/// An identifier.
336#[derive(Debug, Clone)]
337pub struct Ident {
338    pub name: String,
339}
340
341/// An expression that will be inserted by the caller.
342#[derive(Debug, Clone, Copy)]
343pub struct Expr {
344    /// An index generated by the library user, uniquely identifying this expression.
345    pub idx: usize,
346    /// Indicate the representation for this numeric expression. In the input, this is used by the
347    /// caller to indicate the current type (or the smallest coercible one) while the output uses
348    /// it to inform the caller of the final cast to use.
349    pub repr: NumericRepr,
350}
351
352/// A dynamically or statically computed value.
353///
354/// To produce valid binary it is mostly only important to know the correct width of the output but
355/// the value itself can be computed by the caller. This allows freedom on parsing without
356/// requiring the assembler core (this library) to implement an arbitrary expression evaluator. In
357/// particular, the evaluation can even be further delayed by the caller and left to `rustc`.
358#[derive(Debug, Clone, Copy)]
359pub enum Value {
360    /// A constant number.
361    Number(Number),
362    /// An external expression of the caller.
363    Expr(Expr),
364}
365
366// convenience methods
367impl Stmt {
368    pub fn u8(value: u8) -> Stmt {
369        Stmt::Const(Value::Byte(value))
370    }
371
372    pub fn u16(value: u16) -> Stmt {
373        Stmt::Const(Value::Word(value))
374    }
375
376    pub fn u32(value: u32) -> Stmt {
377        Stmt::Const(Value::Dword(value))
378    }
379
380    pub fn u64(value: u64) -> Stmt {
381        Stmt::Const(Value::Qword(value))
382    }
383
384    /// Zeroed bytes of a numeric size.
385    pub fn zeroed(size: Size) -> Self {
386        let nr = Number::from_u64_and_size(0, size);
387        Stmt::Const(Value::Number(nr))
388    }
389}
390
391impl Ident {
392    pub fn to_string(self) -> String {
393        self.name
394    }
395}
396
397impl Value {
398    pub fn Byte(val: u8) -> Self {
399        Value::Number(Number::byte(val))
400    }
401
402    pub fn Word(val: u16) -> Self {
403        Value::Number(Number::word(val))
404    }
405
406    pub fn Dword(val: u32) -> Self {
407        Value::Number(Number::dword(val))
408    }
409
410    pub fn Qword(val: u64) -> Self {
411        Value::Number(Number::qword(val))
412    }
413
414    pub fn repr(self) -> NumericRepr {
415        match self {
416            Value::Number(nr) => nr.repr,
417            Value::Expr(expr) => expr.repr,
418        }
419    }
420
421    pub fn convert(self, repr: NumericRepr) -> Option<Self> {
422        Some(match self {
423            Value::Number(nr) => Value::Number(nr.convert(repr)?),
424            Value::Expr(expr) => Value::Expr(Expr { idx: expr.idx, repr }),
425        })
426    }
427
428    pub fn size(self) -> Size {
429        self.repr().size
430    }
431}
432
433/// Create a bitmask with `scale` bits set
434pub fn bitmask(scale: u8) -> u32 {
435    1u32.checked_shl(u32::from(scale)).unwrap_or(0).wrapping_sub(1)
436}
437
438
439/// Create a bitmask with `scale` bits set
440pub fn bitmask64(scale: u8) -> u64 {
441    1u64.checked_shl(u32::from(scale)).unwrap_or(0).wrapping_sub(1)
442}
443
444impl From<Option<Expr>> for JumpOffset {
445    fn from(val: Option<Expr>) -> JumpOffset {
446        match val {
447            None => JumpOffset::Zero,
448            Some(expr) => JumpOffset::Injected(expr.into()),
449        }
450    }
451}
452
453impl From<Expr> for JumpOffset {
454    fn from(expr: Expr) -> JumpOffset {
455        JumpOffset::Injected(expr.into())
456    }
457}
458
459impl From<Value> for JumpOffset {
460    fn from(val: Value) -> JumpOffset {
461        JumpOffset::Injected(val)
462    }
463}
464
465impl From<&'_ Expr> for JumpOffset {
466    fn from(expr: &'_ Expr) -> JumpOffset {
467        JumpOffset::Injected((*expr).into())
468    }
469}
470
471impl From<u8> for Value {
472    fn from(val: u8) -> Value {
473        Value::Byte(val)
474    }
475}
476
477impl From<Expr> for Value {
478    fn from(expr: Expr) -> Value {
479        Value::Expr(expr)
480    }
481}
482
483impl From<&'_ Expr> for Value {
484    fn from(expr: &'_ Expr) -> Value {
485        Value::Expr(*expr)
486    }
487}
488
489impl From<Value> for Stmt {
490    fn from(val: Value) -> Self {
491        Stmt::Const(val)
492    }
493}
494
495impl From<&'_ Value> for Stmt {
496    fn from(val: &'_ Value) -> Self {
497        Stmt::Const(*val)
498    }
499}