lazy_re/
lib.rs

1//! # Lazy RE
2//! Sometimes we're lazy and we don't need to fully reverse engineer a struct, so we can omit some
3//! fields we're not interested in.
4//!
5//! With this library, you can generate padding without the need of doing mental math every time
6//! you need to change your struct, so you won't have to keep track of the padding in your head,
7//! this proc macro will generate it for you!
8use proc_macro::TokenStream;
9use quote::{format_ident, quote};
10use std::collections::HashSet;
11use syn::parse::{Parse, Parser};
12use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, FieldsNamed, LitInt, Token};
13
14struct Offset(usize);
15
16mod keyword {
17    syn::custom_keyword!(offset);
18}
19
20fn get_fields<'a>(
21    ast: &'a mut syn::Data,
22    ident: &'_ syn::Ident,
23) -> syn::Result<&'a mut FieldsNamed> {
24    match ast {
25        Data::Struct(DataStruct {
26            fields: Fields::Named(ref mut fields),
27            ..
28        }) => Ok(fields),
29        _ => Err(syn::Error::new(ident.span(), "Expected named struct")),
30    }
31}
32
33/// This macro is in charge of generating the Debug implementation for the struct and the `::new`
34/// method. It is optional to include.
35///
36/// The implementation for the Debug trait will omit all the padding fields.
37#[proc_macro_derive(LazyRe)]
38pub fn derive_helper_attr(input: TokenStream) -> TokenStream {
39    let ast = parse_macro_input!(input as DeriveInput);
40    match derive_helper_attr_impl(ast) {
41        Ok(res) => res,
42        Err(e) => e.to_compile_error().into(),
43    }
44}
45
46fn derive_helper_attr_impl(mut ast: DeriveInput) -> syn::Result<TokenStream> {
47    let fields = &mut get_fields(&mut ast.data, &ast.ident)?.named;
48
49    let ident_string = ast.ident.to_string();
50    let ident = ast.ident;
51    // Safety:
52    // We are sure we're reading things that *actually* exist in memory.
53    let fields_names = fields
54        .iter()
55        .flat_map(|x| &x.ident)
56        .filter(|x| !x.to_string().starts_with("__pad")) // This is ugly, I wish we didn't need to do this.
57        .map(|ident| {
58            let ident_string = ident.to_string();
59            return quote! { .field(#ident_string,
60            unsafe { &std::ptr::read_unaligned(std::ptr::addr_of!(self.#ident)) }) };
61        });
62
63    let output = quote! {
64        impl std::fmt::Debug for #ident {
65            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66                return f.debug_struct(#ident_string)
67                    #( #fields_names )*
68                    .finish();
69            }
70        }
71    };
72
73    Ok(output.into())
74}
75
76impl Parse for Offset {
77    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
78        input.parse::<keyword::offset>()?;
79        input.parse::<Token![=]>()?;
80        let val: LitInt = input.parse()?;
81
82        Ok(Offset(val.base10_parse()?))
83    }
84}
85
86fn lazy_re_impl(mut ast: DeriveInput) -> syn::Result<TokenStream> {
87    let mut all_fields = Vec::new();
88    let mut current_ix: usize = 0;
89    let mut is_repr_c_packed = false;
90
91    let fields = &mut get_fields(&mut ast.data, &ast.ident)?.named;
92
93    // We need to check if the struct we're working with implements #[repr(C, packed)]. That's the
94    // only way we can guarantee the sizes correspond to what we're declaring, since a struct with
95    // offset could have some sort of padding which could make bugs harder to track down. The main
96    // disadvantage is that we cannot have pointers to everything because misalignment could
97    // happen.
98    for attr in ast.attrs.iter() {
99        let (path, nested) = match attr.parse_meta()? {
100            syn::Meta::List(syn::MetaList { path, nested, .. }) => (path, nested),
101            _ => continue,
102        };
103
104        if !path.is_ident("repr") {
105            continue;
106        }
107
108        let nested_names = nested
109            .iter()
110            .map(|x| match x {
111                syn::NestedMeta::Meta(m) => m.path().get_ident().unwrap().to_string(),
112                _ => panic!("This shouldn't be on a repr C"),
113            })
114            .collect::<HashSet<_>>();
115
116        is_repr_c_packed = nested_names.contains("C") && nested_names.contains("packed");
117    }
118
119    if !is_repr_c_packed {
120        return Err(syn::Error::new(
121            ast.ident.span(),
122            "The struct does not have the attribute #[repr(C, packed)]",
123        ));
124    }
125
126    let local_fields = std::mem::replace(fields, syn::punctuated::Punctuated::new());
127    for mut field in IntoIterator::into_iter(local_fields) {
128        let mut offs = None;
129        // We need to check the attribute offset is actually present on the struct.
130        let mut ix_to_remove = None;
131        for (i, attr) in field.attrs.iter().enumerate() {
132            if !attr.path.is_ident("lazy_re") {
133                continue;
134            }
135
136            offs = Some(attr.parse_args::<Offset>()?.0);
137            ix_to_remove = Some(i);
138        }
139
140        if offs.is_none() {
141            all_fields.push(field);
142            continue;
143        }
144
145        // ix_to_remove is Some if offs is some, So we can be sure this would never fail.
146        field.attrs.remove(ix_to_remove.unwrap());
147        let offs = offs.unwrap();
148
149        let new_ident = format_ident!("__pad{:03}", current_ix);
150        current_ix += 1;
151
152        // In the case of pointers, to avoid fighting with generic types, we can just assume that
153        // the size of a pointer (that is not dyn) is just usize.
154        let all_fields_ty = all_fields.iter().map(|field| {
155            match &field.ty {
156                syn::Type::Reference(r) => {
157                    match &*r.elem {
158                        // We have to take into account every DST, those includes the dyn pointers
159                        // and the slices, which basically are fat pointers. For every other case
160                        // we can use a single usize.
161                        syn::Type::TraitObject(_) | syn::Type::Slice(_) => {
162                            syn::Type::Verbatim(quote! {(usize, usize)})
163                        },
164                        syn::Type::Path(syn::TypePath { path, .. }) if path.is_ident("str") => {
165                            syn::Type::Verbatim(quote! {(usize, usize)})
166                        },
167                        _ => syn::Type::Verbatim(quote! {usize}.into()),
168                    }
169                }
170                other => other.clone(),
171            }
172        });
173
174        let field_to_add = syn::Field::parse_named
175            .parse2(quote! {  #new_ident: [u8; #offs - (0 #(+ std::mem::size_of::<#all_fields_ty>())*)]})
176            .unwrap();
177
178        all_fields.push(field_to_add);
179        all_fields.push(field);
180    }
181
182    fields.extend(all_fields.drain(..));
183
184    Ok(quote! { #ast }.into())
185}
186
187/// This proc macro will generate padding fields for your struct every time you have a struct that
188/// has fields with the macro.
189///
190/// # Example
191///
192/// ```
193/// use lazy_re::lazy_re;
194/// #[lazy_re]
195/// #[repr(C, packed)]
196/// pub struct Foo {
197///     #[lazy_re(offset = 0x42)]
198///     pub foo: usize
199/// }
200/// ```
201///
202/// This struct now will be expanded to a struct with two fields and its respective padding:
203///
204/// ```
205/// use lazy_re::lazy_re;
206/// #[lazy_re]
207/// #[repr(C, packed)]
208/// pub struct Foo {
209///     __pad000: [u8; 0x42],
210///     pub foo: usize
211/// }
212/// ```
213///
214/// The utility of this macro is when you're reverse engineering something and you're only
215/// interested in some fields of a big struct, you can use this macro to cast raw pointers.
216#[proc_macro_attribute]
217pub fn lazy_re(_args: TokenStream, input: TokenStream) -> TokenStream {
218    let ast = parse_macro_input!(input as DeriveInput);
219
220    match lazy_re_impl(ast) {
221        Ok(res) => res,
222        Err(e) => e.to_compile_error().into(),
223    }
224}