Skip to main content

rust_asm/
constant_pool.rs

1use crate::insn::Handle;
2use std::collections::HashMap;
3
4/// A builder for the constant pool of a class.
5///
6/// This struct manages the deduplication of constant pool entries, ensuring that
7/// strings, classes, and member references are stored efficiently.
8#[derive(Debug, Default)]
9pub struct ConstantPoolBuilder {
10    cp: Vec<CpInfo>,
11    utf8: HashMap<String, u16>,
12    class: HashMap<String, u16>,
13    module: HashMap<String, u16>,
14    package: HashMap<String, u16>,
15    string: HashMap<String, u16>,
16    name_and_type: HashMap<(String, String), u16>,
17    field_ref: HashMap<(String, String, String), u16>,
18    method_ref: HashMap<(String, String, String), u16>,
19    interface_method_ref: HashMap<(String, String, String), u16>,
20    method_type: HashMap<String, u16>,
21    method_handle: HashMap<(u8, String, String, String, bool), u16>,
22    invoke_dynamic: HashMap<(u16, String, String), u16>,
23}
24
25impl ConstantPoolBuilder {
26    /// Creates a new, empty `ConstantPoolBuilder`.
27    ///
28    /// The constant pool starts with a dummy entry at index 0, as per JVM spec.
29    pub fn new() -> Self {
30        Self {
31            cp: vec![CpInfo::Unusable],
32            ..Default::default()
33        }
34    }
35
36    /// Creates a `ConstantPoolBuilder` pre-populated with an existing pool.
37    ///
38    /// This preserves existing indices and initializes deduplication maps
39    /// based on the pool contents.
40    pub fn from_pool(pool: Vec<CpInfo>) -> Self {
41        let cp = if pool.is_empty() {
42            vec![CpInfo::Unusable]
43        } else {
44            pool
45        };
46        let mut builder = Self {
47            cp,
48            ..Default::default()
49        };
50
51        fn cp_utf8(cp: &[CpInfo], index: u16) -> Option<&str> {
52            match cp.get(index as usize) {
53                Some(CpInfo::Utf8(value)) => Some(value.as_str()),
54                _ => None,
55            }
56        }
57
58        fn cp_class_name(cp: &[CpInfo], index: u16) -> Option<&str> {
59            match cp.get(index as usize) {
60                Some(CpInfo::Class { name_index }) => cp_utf8(cp, *name_index),
61                _ => None,
62            }
63        }
64
65        fn cp_module_name(cp: &[CpInfo], index: u16) -> Option<&str> {
66            match cp.get(index as usize) {
67                Some(CpInfo::Module { name_index }) => cp_utf8(cp, *name_index),
68                _ => None,
69            }
70        }
71
72        fn cp_package_name(cp: &[CpInfo], index: u16) -> Option<&str> {
73            match cp.get(index as usize) {
74                Some(CpInfo::Package { name_index }) => cp_utf8(cp, *name_index),
75                _ => None,
76            }
77        }
78
79        fn cp_name_and_type(cp: &[CpInfo], index: u16) -> Option<(&str, &str)> {
80            match cp.get(index as usize) {
81                Some(CpInfo::NameAndType {
82                    name_index,
83                    descriptor_index,
84                }) => {
85                    let name = cp_utf8(cp, *name_index)?;
86                    let desc = cp_utf8(cp, *descriptor_index)?;
87                    Some((name, desc))
88                }
89                _ => None,
90            }
91        }
92
93        fn cp_member_ref(cp: &[CpInfo], index: u16) -> Option<(String, String, String, bool)> {
94            match cp.get(index as usize) {
95                Some(CpInfo::Fieldref {
96                    class_index,
97                    name_and_type_index,
98                }) => {
99                    let owner = cp_class_name(cp, *class_index)?.to_string();
100                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
101                    Some((owner, name.to_string(), desc.to_string(), false))
102                }
103                Some(CpInfo::Methodref {
104                    class_index,
105                    name_and_type_index,
106                }) => {
107                    let owner = cp_class_name(cp, *class_index)?.to_string();
108                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
109                    Some((owner, name.to_string(), desc.to_string(), false))
110                }
111                Some(CpInfo::InterfaceMethodref {
112                    class_index,
113                    name_and_type_index,
114                }) => {
115                    let owner = cp_class_name(cp, *class_index)?.to_string();
116                    let (name, desc) = cp_name_and_type(cp, *name_and_type_index)?;
117                    Some((owner, name.to_string(), desc.to_string(), true))
118                }
119                _ => None,
120            }
121        }
122
123        for (index, entry) in builder.cp.iter().enumerate() {
124            let index = index as u16;
125            match entry {
126                CpInfo::Utf8(value) => {
127                    builder.utf8.entry(value.clone()).or_insert(index);
128                }
129                CpInfo::Class { name_index } => {
130                    if let Some(name) = cp_utf8(&builder.cp, *name_index) {
131                        builder.class.entry(name.to_string()).or_insert(index);
132                    }
133                }
134                CpInfo::Module { .. } => {
135                    if let Some(name) = cp_module_name(&builder.cp, index) {
136                        builder.module.entry(name.to_string()).or_insert(index);
137                    }
138                }
139                CpInfo::Package { .. } => {
140                    if let Some(name) = cp_package_name(&builder.cp, index) {
141                        builder.package.entry(name.to_string()).or_insert(index);
142                    }
143                }
144                CpInfo::String { string_index } => {
145                    if let Some(value) = cp_utf8(&builder.cp, *string_index) {
146                        builder.string.entry(value.to_string()).or_insert(index);
147                    }
148                }
149                CpInfo::NameAndType {
150                    name_index,
151                    descriptor_index,
152                } => {
153                    if let (Some(name), Some(desc)) = (
154                        cp_utf8(&builder.cp, *name_index),
155                        cp_utf8(&builder.cp, *descriptor_index),
156                    ) {
157                        builder
158                            .name_and_type
159                            .entry((name.to_string(), desc.to_string()))
160                            .or_insert(index);
161                    }
162                }
163                CpInfo::Fieldref {
164                    class_index,
165                    name_and_type_index,
166                } => {
167                    if let (Some(owner), Some((name, desc))) = (
168                        cp_class_name(&builder.cp, *class_index),
169                        cp_name_and_type(&builder.cp, *name_and_type_index),
170                    ) {
171                        builder
172                            .field_ref
173                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
174                            .or_insert(index);
175                    }
176                }
177                CpInfo::Methodref {
178                    class_index,
179                    name_and_type_index,
180                } => {
181                    if let (Some(owner), Some((name, desc))) = (
182                        cp_class_name(&builder.cp, *class_index),
183                        cp_name_and_type(&builder.cp, *name_and_type_index),
184                    ) {
185                        builder
186                            .method_ref
187                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
188                            .or_insert(index);
189                    }
190                }
191                CpInfo::InterfaceMethodref {
192                    class_index,
193                    name_and_type_index,
194                } => {
195                    if let (Some(owner), Some((name, desc))) = (
196                        cp_class_name(&builder.cp, *class_index),
197                        cp_name_and_type(&builder.cp, *name_and_type_index),
198                    ) {
199                        builder
200                            .interface_method_ref
201                            .entry((owner.to_string(), name.to_string(), desc.to_string()))
202                            .or_insert(index);
203                    }
204                }
205                CpInfo::MethodType { descriptor_index } => {
206                    if let Some(desc) = cp_utf8(&builder.cp, *descriptor_index) {
207                        builder.method_type.entry(desc.to_string()).or_insert(index);
208                    }
209                }
210                CpInfo::MethodHandle {
211                    reference_kind,
212                    reference_index,
213                } => {
214                    if let Some((owner, name, desc, is_interface)) =
215                        cp_member_ref(&builder.cp, *reference_index)
216                    {
217                        builder
218                            .method_handle
219                            .entry((*reference_kind, owner, name, desc, is_interface))
220                            .or_insert(index);
221                    }
222                }
223                CpInfo::InvokeDynamic {
224                    bootstrap_method_attr_index,
225                    name_and_type_index,
226                } => {
227                    if let Some((name, desc)) = cp_name_and_type(&builder.cp, *name_and_type_index)
228                    {
229                        builder
230                            .invoke_dynamic
231                            .entry((
232                                *bootstrap_method_attr_index,
233                                name.to_string(),
234                                desc.to_string(),
235                            ))
236                            .or_insert(index);
237                    }
238                }
239                _ => {}
240            }
241        }
242
243        builder
244    }
245
246    /// Consumes the builder and returns the raw vector of `CpInfo` entries.
247    pub fn into_pool(self) -> Vec<CpInfo> {
248        self.cp
249    }
250
251    /// Adds a UTF-8 string to the constant pool if it doesn't exist.
252    ///
253    /// Returns the index of the entry.
254    pub fn utf8(&mut self, value: &str) -> u16 {
255        if let Some(index) = self.utf8.get(value) {
256            return *index;
257        }
258        let index = self.push(CpInfo::Utf8(value.to_string()));
259        self.utf8.insert(value.to_string(), index);
260        index
261    }
262
263    /// Adds a Class constant to the pool.
264    ///
265    /// This will recursively add the UTF-8 name of the class.
266    pub fn class(&mut self, name: &str) -> u16 {
267        if let Some(index) = self.class.get(name) {
268            return *index;
269        }
270        let name_index = self.utf8(name);
271        let index = self.push(CpInfo::Class { name_index });
272        self.class.insert(name.to_string(), index);
273        index
274    }
275
276    /// Adds a Module constant to the pool.
277    ///
278    /// The name is the module name string stored in `CONSTANT_Utf8_info`.
279    pub fn module(&mut self, name: &str) -> u16 {
280        if let Some(index) = self.module.get(name) {
281            return *index;
282        }
283        let name_index = self.utf8(name);
284        let index = self.push(CpInfo::Module { name_index });
285        self.module.insert(name.to_string(), index);
286        index
287    }
288
289    /// Adds a Package constant to the pool.
290    ///
291    /// The name uses JVM package format such as `java/lang`.
292    pub fn package(&mut self, name: &str) -> u16 {
293        if let Some(index) = self.package.get(name) {
294            return *index;
295        }
296        let name_index = self.utf8(name);
297        let index = self.push(CpInfo::Package { name_index });
298        self.package.insert(name.to_string(), index);
299        index
300    }
301
302    /// Adds a String constant to the pool.
303    ///
304    /// This is for string literals (e.g., `ldc "foo"`).
305    pub fn string(&mut self, value: &str) -> u16 {
306        if let Some(index) = self.string.get(value) {
307            return *index;
308        }
309        let string_index = self.utf8(value);
310        let index = self.push(CpInfo::String { string_index });
311        self.string.insert(value.to_string(), index);
312        index
313    }
314
315    pub fn integer(&mut self, value: i32) -> u16 {
316        self.push(CpInfo::Integer(value))
317    }
318
319    pub fn float(&mut self, value: f32) -> u16 {
320        self.push(CpInfo::Float(value))
321    }
322
323    pub fn long(&mut self, value: i64) -> u16 {
324        let index = self.push(CpInfo::Long(value));
325        // Long takes two entries
326        self.cp.push(CpInfo::Unusable);
327        index
328    }
329
330    pub fn double(&mut self, value: f64) -> u16 {
331        let index = self.push(CpInfo::Double(value));
332        // Double takes two entries
333        self.cp.push(CpInfo::Unusable);
334        index
335    }
336
337    /// Adds a NameAndType constant to the pool.
338    ///
339    /// Used for field and method descriptors.
340    pub fn name_and_type(&mut self, name: &str, descriptor: &str) -> u16 {
341        let key = (name.to_string(), descriptor.to_string());
342        if let Some(index) = self.name_and_type.get(&key) {
343            return *index;
344        }
345        let name_index = self.utf8(name);
346        let descriptor_index = self.utf8(descriptor);
347        let index = self.push(CpInfo::NameAndType {
348            name_index,
349            descriptor_index,
350        });
351        self.name_and_type.insert(key, index);
352        index
353    }
354
355    /// Adds a Fieldref constant to the pool.
356    pub fn field_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
357        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
358        if let Some(index) = self.field_ref.get(&key) {
359            return *index;
360        }
361        let class_index = self.class(owner);
362        let name_and_type_index = self.name_and_type(name, descriptor);
363        let index = self.push(CpInfo::Fieldref {
364            class_index,
365            name_and_type_index,
366        });
367        self.field_ref.insert(key, index);
368        index
369    }
370
371    /// Adds a Methodref constant to the pool.
372    pub fn method_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
373        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
374        if let Some(index) = self.method_ref.get(&key) {
375            return *index;
376        }
377        let class_index = self.class(owner);
378        let name_and_type_index = self.name_and_type(name, descriptor);
379        let index = self.push(CpInfo::Methodref {
380            class_index,
381            name_and_type_index,
382        });
383        self.method_ref.insert(key, index);
384        index
385    }
386
387    pub fn interface_method_ref(&mut self, owner: &str, name: &str, descriptor: &str) -> u16 {
388        let key = (owner.to_string(), name.to_string(), descriptor.to_string());
389        if let Some(index) = self.interface_method_ref.get(&key) {
390            return *index;
391        }
392        let class_index = self.class(owner);
393        let name_and_type_index = self.name_and_type(name, descriptor);
394        let index = self.push(CpInfo::InterfaceMethodref {
395            class_index,
396            name_and_type_index,
397        });
398        self.interface_method_ref.insert(key, index);
399        index
400    }
401
402    pub fn method_type(&mut self, descriptor: &str) -> u16 {
403        if let Some(index) = self.method_type.get(descriptor) {
404            return *index;
405        }
406        let descriptor_index = self.utf8(descriptor);
407        let index = self.push(CpInfo::MethodType { descriptor_index });
408        self.method_type.insert(descriptor.to_string(), index);
409        index
410    }
411
412    pub fn method_handle(&mut self, handle: &Handle) -> u16 {
413        let key = (
414            handle.reference_kind,
415            handle.owner.clone(),
416            handle.name.clone(),
417            handle.descriptor.clone(),
418            handle.is_interface,
419        );
420        if let Some(index) = self.method_handle.get(&key) {
421            return *index;
422        }
423        let reference_index = match handle.reference_kind {
424            1 | 2 | 3 | 4 => self.field_ref(&handle.owner, &handle.name, &handle.descriptor),
425            9 => self.interface_method_ref(&handle.owner, &handle.name, &handle.descriptor),
426            _ => self.method_ref(&handle.owner, &handle.name, &handle.descriptor),
427        };
428        let index = self.push(CpInfo::MethodHandle {
429            reference_kind: handle.reference_kind,
430            reference_index,
431        });
432        self.method_handle.insert(key, index);
433        index
434    }
435
436    pub fn invoke_dynamic(&mut self, bsm_index: u16, name: &str, descriptor: &str) -> u16 {
437        let key = (bsm_index, name.to_string(), descriptor.to_string());
438        if let Some(index) = self.invoke_dynamic.get(&key) {
439            return *index;
440        }
441        let name_and_type_index = self.name_and_type(name, descriptor);
442        let index = self.push(CpInfo::InvokeDynamic {
443            bootstrap_method_attr_index: bsm_index,
444            name_and_type_index,
445        });
446        self.invoke_dynamic.insert(key, index);
447        index
448    }
449
450    fn push(&mut self, entry: CpInfo) -> u16 {
451        self.cp.push(entry);
452        (self.cp.len() - 1) as u16
453    }
454}
455#[derive(Debug, Clone)]
456pub enum CpInfo {
457    Unusable,
458    Utf8(String),
459    Integer(i32),
460    Float(f32),
461    Long(i64),
462    Double(f64),
463    Class {
464        name_index: u16,
465    },
466    String {
467        string_index: u16,
468    },
469    Fieldref {
470        class_index: u16,
471        name_and_type_index: u16,
472    },
473    Methodref {
474        class_index: u16,
475        name_and_type_index: u16,
476    },
477    InterfaceMethodref {
478        class_index: u16,
479        name_and_type_index: u16,
480    },
481    NameAndType {
482        name_index: u16,
483        descriptor_index: u16,
484    },
485    MethodHandle {
486        reference_kind: u8,
487        reference_index: u16,
488    },
489    MethodType {
490        descriptor_index: u16,
491    },
492    Dynamic {
493        bootstrap_method_attr_index: u16,
494        name_and_type_index: u16,
495    },
496    InvokeDynamic {
497        bootstrap_method_attr_index: u16,
498        name_and_type_index: u16,
499    },
500    Module {
501        name_index: u16,
502    },
503    Package {
504        name_index: u16,
505    },
506}