Skip to main content

rust_asm/
constant_pool.rs

1use crate::insn::Handle;
2use std::collections::HashMap;
3
4/// A builder for the constant pool of a class.
5///
6/// This struct manages the deduplication of constant pool entries, ensuring that
7/// strings, classes, and member references are stored efficiently.
8#[derive(Debug, Default)]
9pub struct ConstantPoolBuilder {
10    cp: Vec<CpInfo>,
11    utf8: HashMap<String, u16>,
12    class: HashMap<String, u16>,
13    string: HashMap<String, u16>,
14    name_and_type: HashMap<(String, String), u16>,
15    field_ref: HashMap<(String, String, String), u16>,
16    method_ref: HashMap<(String, String, String), u16>,
17    interface_method_ref: HashMap<(String, String, String), u16>,
18    method_type: HashMap<String, u16>,
19    method_handle: HashMap<(u8, String, String, String, bool), u16>,
20    invoke_dynamic: HashMap<(u16, String, String), u16>,
21}
22
23impl ConstantPoolBuilder {
24    /// Creates a new, empty `ConstantPoolBuilder`.
25    ///
26    /// The constant pool starts with a dummy entry at index 0, as per JVM spec.
27    pub fn new() -> Self {
28        Self {
29            cp: vec![CpInfo::Unusable],
30            ..Default::default()
31        }
32    }
33
34    /// Creates a `ConstantPoolBuilder` pre-populated with an existing pool.
35    ///
36    /// This preserves existing indices and initializes deduplication maps
37    /// based on the pool contents.
38    pub fn from_pool(pool: Vec<CpInfo>) -> Self {
39        let cp = if pool.is_empty() {
40            vec![CpInfo::Unusable]
41        } else {
42            pool
43        };
44        let mut builder = Self {
45            cp,
46            ..Default::default()
47        };
48
49        fn cp_utf8(cp: &[CpInfo], index: u16) -> Option<&str> {
50            match cp.get(index as usize) {
51                Some(CpInfo::Utf8(value)) => Some(value.as_str()),
52                _ => None,
53            }
54        }
55
56        fn cp_class_name(cp: &[CpInfo], index: u16) -> Option<&str> {
57            match cp.get(index as usize) {
58                Some(CpInfo::Class { name_index }) => cp_utf8(cp, *name_index),
59                _ => None,
60            }
61        }
62
63        fn cp_name_and_type(cp: &[CpInfo], index: u16) -> Option<(&str, &str)> {
64            match cp.get(index as usize) {
65                Some(CpInfo::NameAndType {
66                    name_index,
67                    descriptor_index,
68                }) => {
69                    let name = cp_utf8(cp, *name_index)?;
70                    let desc = cp_utf8(cp, *descriptor_index)?;
71                    Some((name, desc))
72                }
73                _ => None,
74            }
75        }
76
77        fn cp_member_ref(cp: &[CpInfo], index: u16) -> Option<(String, String, String, bool)> {
78            match cp.get(index as usize) {
79                Some(CpInfo::Fieldref {
80                    class_index,
81                    name_and_type_index,
82                }) => {
83                    let owner = cp_class_name(cp, *class_index)?.to_string();
84                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
85                    Some((owner, name.to_string(), desc.to_string(), false))
86                }
87                Some(CpInfo::Methodref {
88                    class_index,
89                    name_and_type_index,
90                }) => {
91                    let owner = cp_class_name(cp, *class_index)?.to_string();
92                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
93                    Some((owner, name.to_string(), desc.to_string(), false))
94                }
95                Some(CpInfo::InterfaceMethodref {
96                    class_index,
97                    name_and_type_index,
98                }) => {
99                    let owner = cp_class_name(cp, *class_index)?.to_string();
100                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
101                    Some((owner, name.to_string(), desc.to_string(), true))
102                }
103                _ => None,
104            }
105        }
106
107        for (index, entry) in builder.cp.iter().enumerate() {
108            let index = index as u16;
109            match entry {
110                CpInfo::Utf8(value) => {
111                    builder.utf8.entry(value.clone()).or_insert(index);
112                }
113                CpInfo::Class { name_index } => {
114                    if let Some(name) = cp_utf8(&builder.cp, *name_index) {
115                        builder.class.entry(name.to_string()).or_insert(index);
116                    }
117                }
118                CpInfo::String { string_index } => {
119                    if let Some(value) = cp_utf8(&builder.cp, *string_index) {
120                        builder.string.entry(value.to_string()).or_insert(index);
121                    }
122                }
123                CpInfo::NameAndType {
124                    name_index,
125                    descriptor_index,
126                } => {
127                    if let (Some(name), Some(desc)) = (
128                        cp_utf8(&builder.cp, *name_index),
129                        cp_utf8(&builder.cp, *descriptor_index),
130                    ) {
131                        builder
132                            .name_and_type
133                            .entry((name.to_string(), desc.to_string()))
134                            .or_insert(index);
135                    }
136                }
137                CpInfo::Fieldref {
138                    class_index,
139                    name_and_type_index,
140                } => {
141                    if let (Some(owner), Some((name, desc))) = (
142                        cp_class_name(&builder.cp, *class_index),
143                        cp_name_and_type(&builder.cp, *name_and_type_index),
144                    ) {
145                        builder
146                            .field_ref
147                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
148                            .or_insert(index);
149                    }
150                }
151                CpInfo::Methodref {
152                    class_index,
153                    name_and_type_index,
154                } => {
155                    if let (Some(owner), Some((name, desc))) = (
156                        cp_class_name(&builder.cp, *class_index),
157                        cp_name_and_type(&builder.cp, *name_and_type_index),
158                    ) {
159                        builder
160                            .method_ref
161                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
162                            .or_insert(index);
163                    }
164                }
165                CpInfo::InterfaceMethodref {
166                    class_index,
167                    name_and_type_index,
168                } => {
169                    if let (Some(owner), Some((name, desc))) = (
170                        cp_class_name(&builder.cp, *class_index),
171                        cp_name_and_type(&builder.cp, *name_and_type_index),
172                    ) {
173                        builder
174                            .interface_method_ref
175                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
176                            .or_insert(index);
177                    }
178                }
179                CpInfo::MethodType { descriptor_index } => {
180                    if let Some(desc) = cp_utf8(&builder.cp, *descriptor_index) {
181                        builder.method_type.entry(desc.to_string()).or_insert(index);
182                    }
183                }
184                CpInfo::MethodHandle {
185                    reference_kind,
186                    reference_index,
187                } => {
188                    if let Some((owner, name, desc, is_interface)) =
189                        cp_member_ref(&builder.cp, *reference_index)
190                    {
191                        builder
192                            .method_handle
193                            .entry((*reference_kind, owner, name, desc, is_interface))
194                            .or_insert(index);
195                    }
196                }
197                CpInfo::InvokeDynamic {
198                    bootstrap_method_attr_index,
199                    name_and_type_index,
200                } => {
201                    if let Some((name, desc)) = cp_name_and_type(&builder.cp, *name_and_type_index)
202                    {
203                        builder
204                            .invoke_dynamic
205                            .entry((
206                                *bootstrap_method_attr_index,
207                                name.to_string(),
208                                desc.to_string(),
209                            ))
210                            .or_insert(index);
211                    }
212                }
213                _ => {}
214            }
215        }
216
217        builder
218    }
219
220    /// Consumes the builder and returns the raw vector of `CpInfo` entries.
221    pub fn into_pool(self) -> Vec<CpInfo> {
222        self.cp
223    }
224
225    /// Adds a UTF-8 string to the constant pool if it doesn't exist.
226    ///
227    /// Returns the index of the entry.
228    pub fn utf8(&mut self, value: &str) -> u16 {
229        if let Some(index) = self.utf8.get(value) {
230            return *index;
231        }
232        let index = self.push(CpInfo::Utf8(value.to_string()));
233        self.utf8.insert(value.to_string(), index);
234        index
235    }
236
237    /// Adds a Class constant to the pool.
238    ///
239    /// This will recursively add the UTF-8 name of the class.
240    pub fn class(&mut self, name: &str) -> u16 {
241        if let Some(index) = self.class.get(name) {
242            return *index;
243        }
244        let name_index = self.utf8(name);
245        let index = self.push(CpInfo::Class { name_index });
246        self.class.insert(name.to_string(), index);
247        index
248    }
249
250    /// Adds a String constant to the pool.
251    ///
252    /// This is for string literals (e.g., `ldc "foo"`).
253    pub fn string(&mut self, value: &str) -> u16 {
254        if let Some(index) = self.string.get(value) {
255            return *index;
256        }
257        let string_index = self.utf8(value);
258        let index = self.push(CpInfo::String { string_index });
259        self.string.insert(value.to_string(), index);
260        index
261    }
262
263    pub fn integer(&mut self, value: i32) -> u16 {
264        self.push(CpInfo::Integer(value))
265    }
266
267    pub fn float(&mut self, value: f32) -> u16 {
268        self.push(CpInfo::Float(value))
269    }
270
271    pub fn long(&mut self, value: i64) -> u16 {
272        let index = self.push(CpInfo::Long(value));
273        // Long takes two entries
274        self.cp.push(CpInfo::Unusable);
275        index
276    }
277
278    pub fn double(&mut self, value: f64) -> u16 {
279        let index = self.push(CpInfo::Double(value));
280        // Double takes two entries
281        self.cp.push(CpInfo::Unusable);
282        index
283    }
284
285    /// Adds a NameAndType constant to the pool.
286    ///
287    /// Used for field and method descriptors.
288    pub fn name_and_type(&mut self, name: &str, descriptor: &str) -> u16 {
289        let key = (name.to_string(), descriptor.to_string());
290        if let Some(index) = self.name_and_type.get(&key) {
291            return *index;
292        }
293        let name_index = self.utf8(name);
294        let descriptor_index = self.utf8(descriptor);
295        let index = self.push(CpInfo::NameAndType {
296            name_index,
297            descriptor_index,
298        });
299        self.name_and_type.insert(key, index);
300        index
301    }
302
303    /// Adds a Fieldref constant to the pool.
304    pub fn field_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
305        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
306        if let Some(index) = self.field_ref.get(&key) {
307            return *index;
308        }
309        let class_index = self.class(owner);
310        let name_and_type_index = self.name_and_type(name, descriptor);
311        let index = self.push(CpInfo::Fieldref {
312            class_index,
313            name_and_type_index,
314        });
315        self.field_ref.insert(key, index);
316        index
317    }
318
319    /// Adds a Methodref constant to the pool.
320    pub fn method_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
321        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
322        if let Some(index) = self.method_ref.get(&key) {
323            return *index;
324        }
325        let class_index = self.class(owner);
326        let name_and_type_index = self.name_and_type(name, descriptor);
327        let index = self.push(CpInfo::Methodref {
328            class_index,
329            name_and_type_index,
330        });
331        self.method_ref.insert(key, index);
332        index
333    }
334
335    pub fn interface_method_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
336        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
337        if let Some(index) = self.interface_method_ref.get(&key) {
338            return *index;
339        }
340        let class_index = self.class(owner);
341        let name_and_type_index = self.name_and_type(name, descriptor);
342        let index = self.push(CpInfo::InterfaceMethodref {
343            class_index,
344            name_and_type_index,
345        });
346        self.interface_method_ref.insert(key, index);
347        index
348    }
349
350    pub fn method_type(&mut self, descriptor: &str) -> u16 {
351        if let Some(index) = self.method_type.get(descriptor) {
352            return *index;
353        }
354        let descriptor_index = self.utf8(descriptor);
355        let index = self.push(CpInfo::MethodType { descriptor_index });
356        self.method_type.insert(descriptor.to_string(), index);
357        index
358    }
359
360    pub fn method_handle(&mut self, handle: &Handle) -> u16 {
361        let key = (
362            handle.reference_kind,
363            handle.owner.clone(),
364            handle.name.clone(),
365            handle.descriptor.clone(),
366            handle.is_interface,
367        );
368        if let Some(index) = self.method_handle.get(&key) {
369            return *index;
370        }
371        let reference_index = match handle.reference_kind {
372            1 | 2 | 3 | 4 => self.field_ref(&handle.owner, &handle.name, &handle.descriptor),
373            9 => self.interface_method_ref(&handle.owner, &handle.name, &handle.descriptor),
374            _ => self.method_ref(&handle.owner, &handle.name, &handle.descriptor),
375        };
376        let index = self.push(CpInfo::MethodHandle {
377            reference_kind: handle.reference_kind,
378            reference_index,
379        });
380        self.method_handle.insert(key, index);
381        index
382    }
383
384    pub fn invoke_dynamic(&mut self, bsm_index: u16, name: &str, descriptor: &str) -> u16 {
385        let key = (bsm_index, name.to_string(), descriptor.to_string());
386        if let Some(index) = self.invoke_dynamic.get(&key) {
387            return *index;
388        }
389        let name_and_type_index = self.name_and_type(name, descriptor);
390        let index = self.push(CpInfo::InvokeDynamic {
391            bootstrap_method_attr_index: bsm_index,
392            name_and_type_index,
393        });
394        self.invoke_dynamic.insert(key, index);
395        index
396    }
397
398    fn push(&mut self, entry: CpInfo) -> u16 {
399        self.cp.push(entry);
400        (self.cp.len() - 1) as u16
401    }
402}
403#[derive(Debug, Clone)]
404pub enum CpInfo {
405    Unusable,
406    Utf8(String),
407    Integer(i32),
408    Float(f32),
409    Long(i64),
410    Double(f64),
411    Class {
412        name_index: u16,
413    },
414    String {
415        string_index: u16,
416    },
417    Fieldref {
418        class_index: u16,
419        name_and_type_index: u16,
420    },
421    Methodref {
422        class_index: u16,
423        name_and_type_index: u16,
424    },
425    InterfaceMethodref {
426        class_index: u16,
427        name_and_type_index: u16,
428    },
429    NameAndType {
430        name_index: u16,
431        descriptor_index: u16,
432    },
433    MethodHandle {
434        reference_kind: u8,
435        reference_index: u16,
436    },
437    MethodType {
438        descriptor_index: u16,
439    },
440    Dynamic {
441        bootstrap_method_attr_index: u16,
442        name_and_type_index: u16,
443    },
444    InvokeDynamic {
445        bootstrap_method_attr_index: u16,
446        name_and_type_index: u16,
447    },
448    Module {
449        name_index: u16,
450    },
451    Package {
452        name_index: u16,
453    },
454}