Skip to main content

sqry_classpath/bytecode/
mod.rs

1//! JVM bytecode parsing.
2//!
3//! Parses `.class` files into `ClassStub` records, extracting:
4//! - Class/method/field declarations with visibility and modifiers
5//! - Generic type signatures (JVMS 4.7.9)
6//! - Annotations (runtime visible and invisible)
7//! - Lambda targets (BootstrapMethods attribute)
8//! - Java 9+ module declarations
9//!
10//! ## JAR scanning
11//!
12//! The [`scan_jar`] function reads a JAR (ZIP archive), parses all `.class`
13//! entries into enriched [`ClassStub`] records, and returns them. It applies
14//! security limits to prevent JAR-bomb denial-of-service attacks.
15
16pub mod annotations;
17pub mod classfile;
18pub mod constants;
19pub mod generics;
20pub mod lambda;
21pub mod modules;
22
23use std::io::Read;
24use std::path::Path;
25
26use cafebabe::ParseOptions;
27use cafebabe::attributes::AttributeData;
28use log::warn;
29use zip::ZipArchive;
30
31use crate::stub::model::ClassStub;
32use crate::{ClasspathError, ClasspathResult};
33
34pub use classfile::parse_class;
35
36// ---------------------------------------------------------------------------
37// Security limits for JAR scanning
38// ---------------------------------------------------------------------------
39
40/// Maximum number of entries allowed in a single JAR file.
41///
42/// JARs with more entries are rejected to prevent ZIP-bomb denial-of-service
43/// attacks and memory exhaustion from pathologically large archives.
44const MAX_JAR_ENTRIES: usize = 100_000;
45
46/// Maximum total uncompressed size allowed for a single JAR (2 GB).
47///
48/// If the sum of all entry sizes exceeds this limit, scanning is aborted.
49const MAX_JAR_UNCOMPRESSED_SIZE: u64 = 2 * 1024 * 1024 * 1024;
50
51// ---------------------------------------------------------------------------
52// JAR scanning
53// ---------------------------------------------------------------------------
54
55/// Scan a JAR file (ZIP archive) and parse all `.class` entries into
56/// [`ClassStub`] records.
57///
58/// Uses rayon for parallelism across JARs (called by the outer loop), but
59/// processes entries within a single JAR sequentially since [`ZipArchive`] is
60/// not `Send`.
61///
62/// # Security limits
63///
64/// - Entry count limit: 100,000 per JAR
65/// - Uncompressed size limit: 2 GB per JAR
66/// - Per-class errors are logged and skipped, never fail the whole JAR
67///
68/// # Errors
69///
70/// Returns [`ClasspathError::JarReadError`] if the JAR cannot be opened or
71/// if it exceeds security limits.
72pub fn scan_jar(jar_path: &Path) -> ClasspathResult<Vec<ClassStub>> {
73    let jar_display = jar_path.display().to_string();
74
75    let file = std::fs::File::open(jar_path).map_err(|e| ClasspathError::JarReadError {
76        path: jar_display.clone(),
77        reason: format!("cannot open file: {e}"),
78    })?;
79
80    let mut archive = ZipArchive::new(file).map_err(|e| ClasspathError::JarReadError {
81        path: jar_display.clone(),
82        reason: format!("invalid ZIP/JAR archive: {e}"),
83    })?;
84
85    // --- Security check: entry count ---
86    let entry_count = archive.len();
87    if entry_count > MAX_JAR_ENTRIES {
88        return Err(ClasspathError::JarReadError {
89            path: jar_display,
90            reason: format!(
91                "JAR bomb detected: {entry_count} entries exceeds limit of {MAX_JAR_ENTRIES}"
92            ),
93        });
94    }
95
96    // --- Security check: total uncompressed size ---
97    let mut total_uncompressed: u64 = 0;
98    for i in 0..entry_count {
99        if let Ok(entry) = archive.by_index_raw(i) {
100            total_uncompressed = total_uncompressed.saturating_add(entry.size());
101        }
102    }
103
104    if total_uncompressed > MAX_JAR_UNCOMPRESSED_SIZE {
105        return Err(ClasspathError::JarReadError {
106            path: jar_display,
107            reason: format!(
108                "JAR bomb detected: total uncompressed size {total_uncompressed} bytes \
109                 exceeds limit of {MAX_JAR_UNCOMPRESSED_SIZE} bytes (2 GB)"
110            ),
111        });
112    }
113
114    // --- Scan entries ---
115    let mut stubs = Vec::new();
116    for i in 0..entry_count {
117        let mut entry = match archive.by_index(i) {
118            Ok(e) => e,
119            Err(e) => {
120                warn!("JAR {jar_display}: cannot read entry {i}: {e}");
121                continue;
122            }
123        };
124
125        let entry_name = entry.name().to_owned();
126
127        // Only process .class files.
128        if !entry_name.ends_with(".class") {
129            continue;
130        }
131
132        // Skip module-info and package-info — handled separately.
133        if is_info_class(&entry_name) {
134            continue;
135        }
136
137        // Read entry bytes.
138        let mut bytes = Vec::with_capacity(entry.size() as usize);
139        if let Err(e) = entry.read_to_end(&mut bytes) {
140            warn!("JAR {jar_display}: cannot read entry {entry_name}: {e}");
141            continue;
142        }
143
144        // Parse and enrich.
145        match parse_class_enriched(&bytes) {
146            Ok(mut stub) => {
147                stub.source_jar = Some(jar_display.clone());
148                stubs.push(stub);
149            }
150            Err(e) => {
151                warn!("JAR {jar_display}: cannot parse class {entry_name}: {e}");
152            }
153        }
154    }
155
156    Ok(stubs)
157}
158
159/// Parse a `.class` file and apply all enrichment passes in a single
160/// cafebabe parse.
161///
162/// This combines base class parsing ([`parse_class`]) with generic signature
163/// extraction, annotation extraction, lambda target extraction, and module
164/// extraction — all from a single `cafebabe::ClassFile` parse.
165///
166/// Per-enrichment errors are logged and skipped; the base stub is always
167/// returned if the initial parse succeeds.
168fn parse_class_enriched(bytes: &[u8]) -> ClasspathResult<ClassStub> {
169    // First parse with parse_class for the base stub.
170    let mut stub = parse_class(bytes)?;
171
172    // Second parse with cafebabe for enrichment data.
173    // This is a lightweight re-parse since we disable bytecode parsing.
174    let mut opts = ParseOptions::default();
175    opts.parse_bytecode(false);
176
177    let class_file = match cafebabe::parse_class_with_options(bytes, &opts) {
178        Ok(cf) => cf,
179        Err(e) => {
180            warn!("enrichment parse failed for {}: {e}", stub.fqn);
181            return Ok(stub);
182        }
183    };
184
185    // --- Enrich: annotations (class-level) ---
186    for attr in &class_file.attributes {
187        match annotations::extract_annotations_from_attribute(&attr.data) {
188            Ok(Some(ann)) => stub.annotations.extend(ann),
189            Ok(None) => {}
190            Err(e) => {
191                warn!("annotation extraction failed for {}: {e}", stub.fqn);
192            }
193        }
194    }
195
196    // --- Enrich: annotations (method-level and field-level) ---
197    enrich_method_annotations(&class_file, &mut stub);
198    enrich_field_annotations(&class_file, &mut stub);
199
200    // --- Enrich: generic signatures ---
201    enrich_generics(&class_file, &mut stub);
202
203    // --- Enrich: lambda targets ---
204    stub.lambda_targets = lambda::extract_lambda_targets(&class_file);
205
206    // --- Enrich: module info ---
207    match modules::extract_module(&class_file) {
208        Ok(Some(module)) => stub.module = Some(module),
209        Ok(None) => {}
210        Err(e) => {
211            warn!("module extraction failed for {}: {e}", stub.fqn);
212        }
213    }
214
215    Ok(stub)
216}
217
218/// Enrich method stubs with annotations from the cafebabe parse.
219fn enrich_method_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
220    for (i, method) in class_file.methods.iter().enumerate() {
221        if i >= stub.methods.len() {
222            break;
223        }
224        // Find the matching stub method. The classfile parser may skip synthetic/bridge
225        // methods, so we match by name + descriptor.
226        let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
227            ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
228        }) else {
229            continue;
230        };
231
232        for attr in &method.attributes {
233            match annotations::extract_annotations_from_attribute(&attr.data) {
234                Ok(Some(ann)) => method_stub.annotations.extend(ann),
235                Ok(None) => {}
236                Err(e) => {
237                    warn!(
238                        "method annotation extraction failed for {}#{}: {e}",
239                        stub.fqn, method_stub.name
240                    );
241                }
242            }
243            match annotations::extract_parameter_annotations_from_attribute(&attr.data) {
244                Ok(Some(param_ann)) => {
245                    // Merge parameter annotations: extend or replace.
246                    if method_stub.parameter_annotations.is_empty() {
247                        method_stub.parameter_annotations = param_ann;
248                    } else {
249                        for (pi, anns) in param_ann.into_iter().enumerate() {
250                            if pi < method_stub.parameter_annotations.len() {
251                                method_stub.parameter_annotations[pi].extend(anns);
252                            } else {
253                                method_stub.parameter_annotations.push(anns);
254                            }
255                        }
256                    }
257                }
258                Ok(None) => {}
259                Err(e) => {
260                    warn!(
261                        "parameter annotation extraction failed for {}#{}: {e}",
262                        stub.fqn, method_stub.name
263                    );
264                }
265            }
266        }
267    }
268}
269
270/// Enrich field stubs with annotations from the cafebabe parse.
271fn enrich_field_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
272    for field in &class_file.fields {
273        let Some(field_stub) = stub
274            .fields
275            .iter_mut()
276            .find(|fs| fs.name == field.name.as_ref())
277        else {
278            continue;
279        };
280
281        for attr in &field.attributes {
282            match annotations::extract_annotations_from_attribute(&attr.data) {
283                Ok(Some(ann)) => field_stub.annotations.extend(ann),
284                Ok(None) => {}
285                Err(e) => {
286                    warn!(
287                        "field annotation extraction failed for {}.{}: {e}",
288                        stub.fqn, field_stub.name
289                    );
290                }
291            }
292        }
293    }
294}
295
296/// Enrich stubs with generic type signatures from the cafebabe parse.
297fn enrich_generics(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
298    // Class-level signature.
299    for attr in &class_file.attributes {
300        if let AttributeData::Signature(sig) = &attr.data {
301            match generics::parse_class_signature(sig) {
302                Ok(parsed) => stub.generic_signature = Some(parsed),
303                Err(e) => {
304                    warn!("class signature parse failed for {}: {e}", stub.fqn);
305                }
306            }
307            break;
308        }
309    }
310
311    // Method-level signatures.
312    for method in &class_file.methods {
313        let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
314            ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
315        }) else {
316            continue;
317        };
318
319        for attr in &method.attributes {
320            if let AttributeData::Signature(sig) = &attr.data {
321                match generics::parse_method_signature(sig) {
322                    Ok(parsed) => method_stub.generic_signature = Some(parsed),
323                    Err(e) => {
324                        warn!(
325                            "method signature parse failed for {}#{}: {e}",
326                            stub.fqn, method_stub.name
327                        );
328                    }
329                }
330                break;
331            }
332        }
333    }
334
335    // Field-level signatures.
336    for field in &class_file.fields {
337        let Some(field_stub) = stub
338            .fields
339            .iter_mut()
340            .find(|fs| fs.name == field.name.as_ref())
341        else {
342            continue;
343        };
344
345        for attr in &field.attributes {
346            if let AttributeData::Signature(sig) = &attr.data {
347                match generics::parse_field_signature(sig) {
348                    Ok(parsed) => field_stub.generic_signature = Some(parsed),
349                    Err(e) => {
350                        warn!(
351                            "field signature parse failed for {}.{}: {e}",
352                            stub.fqn, field_stub.name
353                        );
354                    }
355                }
356                break;
357            }
358        }
359    }
360}
361
362/// Check if a ZIP entry name is a `module-info.class` or `package-info.class`.
363///
364/// These are skipped during JAR scanning because they are handled by the
365/// module enrichment parser (U07b) or are not useful for type resolution.
366fn is_info_class(entry_name: &str) -> bool {
367    let file_name = entry_name.rsplit('/').next().unwrap_or(entry_name);
368    file_name == "module-info.class" || file_name == "package-info.class"
369}
370
371// ---------------------------------------------------------------------------
372// Tests
373// ---------------------------------------------------------------------------
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use std::io::Write;
379    use zip::write::SimpleFileOptions;
380
381    /// Build a minimal valid .class file for testing. This is a stripped-down
382    /// version of the `ClassFileBuilder` from `classfile.rs` — just enough
383    /// to produce parseable bytes.
384    fn build_minimal_class(class_name: &str) -> Vec<u8> {
385        let mut bytes = Vec::new();
386
387        // Magic
388        bytes.extend_from_slice(&0xCAFE_BABEu32.to_be_bytes());
389        // Minor version
390        bytes.extend_from_slice(&0u16.to_be_bytes());
391        // Major version (52 = Java 8)
392        bytes.extend_from_slice(&52u16.to_be_bytes());
393
394        // Constant pool: 5 entries
395        // #1: Utf8 <class_name>
396        // #2: Class -> #1
397        // #3: Utf8 "java/lang/Object"
398        // #4: Class -> #3
399        let class_bytes = class_name.as_bytes();
400        let object_bytes = b"java/lang/Object";
401
402        let cp_count: u16 = 5; // 4 entries + 1
403        bytes.extend_from_slice(&cp_count.to_be_bytes());
404
405        // #1: CONSTANT_Utf8 <class_name>
406        bytes.push(1);
407        bytes.extend_from_slice(&(class_bytes.len() as u16).to_be_bytes());
408        bytes.extend_from_slice(class_bytes);
409
410        // #2: CONSTANT_Class -> #1
411        bytes.push(7);
412        bytes.extend_from_slice(&1u16.to_be_bytes());
413
414        // #3: CONSTANT_Utf8 "java/lang/Object"
415        bytes.push(1);
416        bytes.extend_from_slice(&(object_bytes.len() as u16).to_be_bytes());
417        bytes.extend_from_slice(object_bytes);
418
419        // #4: CONSTANT_Class -> #3
420        bytes.push(7);
421        bytes.extend_from_slice(&3u16.to_be_bytes());
422
423        // Access flags: ACC_PUBLIC | ACC_SUPER
424        bytes.extend_from_slice(&0x0021u16.to_be_bytes());
425        // This class: #2
426        bytes.extend_from_slice(&2u16.to_be_bytes());
427        // Super class: #4
428        bytes.extend_from_slice(&4u16.to_be_bytes());
429        // Interfaces count: 0
430        bytes.extend_from_slice(&0u16.to_be_bytes());
431        // Fields count: 0
432        bytes.extend_from_slice(&0u16.to_be_bytes());
433        // Methods count: 0
434        bytes.extend_from_slice(&0u16.to_be_bytes());
435        // Attributes count: 0
436        bytes.extend_from_slice(&0u16.to_be_bytes());
437
438        bytes
439    }
440
441    /// Create an in-memory JAR (ZIP) file with the given entries.
442    fn build_test_jar(entries: &[(&str, &[u8])]) -> Vec<u8> {
443        let mut buf = Vec::new();
444        {
445            let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
446            let options =
447                SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
448            for (name, data) in entries {
449                writer.start_file(*name, options).unwrap();
450                writer.write_all(data).unwrap();
451            }
452            writer.finish().unwrap();
453        }
454        buf
455    }
456
457    #[test]
458    fn test_scan_jar_multiple_classes() {
459        let class_a = build_minimal_class("com/example/ClassA");
460        let class_b = build_minimal_class("com/example/ClassB");
461
462        let jar_bytes = build_test_jar(&[
463            ("com/example/ClassA.class", &class_a),
464            ("com/example/ClassB.class", &class_b),
465        ]);
466
467        let tmp = tempfile::NamedTempFile::new().unwrap();
468        std::fs::write(tmp.path(), &jar_bytes).unwrap();
469
470        let stubs = scan_jar(tmp.path()).unwrap();
471        assert_eq!(stubs.len(), 2);
472
473        let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
474        assert!(fqns.contains(&"com.example.ClassA"));
475        assert!(fqns.contains(&"com.example.ClassB"));
476    }
477
478    #[test]
479    fn test_scan_jar_empty() {
480        let jar_bytes = build_test_jar(&[]);
481
482        let tmp = tempfile::NamedTempFile::new().unwrap();
483        std::fs::write(tmp.path(), &jar_bytes).unwrap();
484
485        let stubs = scan_jar(tmp.path()).unwrap();
486        assert!(stubs.is_empty());
487    }
488
489    #[test]
490    fn test_scan_jar_malformed_jar() {
491        let tmp = tempfile::NamedTempFile::new().unwrap();
492        std::fs::write(tmp.path(), b"this is not a zip file").unwrap();
493
494        let result = scan_jar(tmp.path());
495        assert!(result.is_err());
496        let err = result.unwrap_err();
497        assert!(
498            matches!(err, ClasspathError::JarReadError { .. }),
499            "expected JarReadError, got: {err}"
500        );
501    }
502
503    #[test]
504    fn test_scan_jar_skips_module_and_package_info() {
505        let class_a = build_minimal_class("com/example/ClassA");
506        // module-info and package-info would fail parse_class anyway,
507        // but scan_jar should skip them before attempting parse.
508        let jar_bytes = build_test_jar(&[
509            ("com/example/ClassA.class", &class_a),
510            ("module-info.class", b"not a real class"),
511            ("com/example/package-info.class", b"not a real class"),
512            // Multi-release variant
513            ("META-INF/versions/11/module-info.class", b"not real"),
514        ]);
515
516        let tmp = tempfile::NamedTempFile::new().unwrap();
517        std::fs::write(tmp.path(), &jar_bytes).unwrap();
518
519        let stubs = scan_jar(tmp.path()).unwrap();
520        assert_eq!(stubs.len(), 1);
521        assert_eq!(stubs[0].fqn, "com.example.ClassA");
522    }
523
524    #[test]
525    fn test_scan_jar_inner_classes_included() {
526        let outer = build_minimal_class("com/example/Outer");
527        let inner = build_minimal_class("com/example/Outer$Inner");
528
529        let jar_bytes = build_test_jar(&[
530            ("com/example/Outer.class", &outer),
531            ("com/example/Outer$Inner.class", &inner),
532        ]);
533
534        let tmp = tempfile::NamedTempFile::new().unwrap();
535        std::fs::write(tmp.path(), &jar_bytes).unwrap();
536
537        let stubs = scan_jar(tmp.path()).unwrap();
538        assert_eq!(stubs.len(), 2);
539
540        let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
541        assert!(fqns.contains(&"com.example.Outer"));
542        assert!(fqns.contains(&"com.example.Outer$Inner"));
543    }
544
545    #[test]
546    fn test_scan_jar_skips_non_class_files() {
547        let class_a = build_minimal_class("com/example/ClassA");
548
549        let jar_bytes = build_test_jar(&[
550            ("com/example/ClassA.class", &class_a),
551            ("META-INF/MANIFEST.MF", b"Manifest-Version: 1.0\n"),
552            ("com/example/resource.txt", b"some resource"),
553        ]);
554
555        let tmp = tempfile::NamedTempFile::new().unwrap();
556        std::fs::write(tmp.path(), &jar_bytes).unwrap();
557
558        let stubs = scan_jar(tmp.path()).unwrap();
559        assert_eq!(stubs.len(), 1);
560        assert_eq!(stubs[0].fqn, "com.example.ClassA");
561    }
562
563    #[test]
564    fn test_scan_jar_malformed_class_skipped() {
565        let good_class = build_minimal_class("com/example/Good");
566
567        let jar_bytes = build_test_jar(&[
568            ("com/example/Good.class", &good_class),
569            ("com/example/Bad.class", b"not valid bytecode"),
570        ]);
571
572        let tmp = tempfile::NamedTempFile::new().unwrap();
573        std::fs::write(tmp.path(), &jar_bytes).unwrap();
574
575        let stubs = scan_jar(tmp.path()).unwrap();
576        assert_eq!(stubs.len(), 1);
577        assert_eq!(stubs[0].fqn, "com.example.Good");
578    }
579
580    #[test]
581    fn test_scan_jar_nonexistent_file() {
582        let result = scan_jar(Path::new("/nonexistent/path/foo.jar"));
583        assert!(result.is_err());
584        assert!(matches!(
585            result.unwrap_err(),
586            ClasspathError::JarReadError { .. }
587        ));
588    }
589
590    #[test]
591    fn test_is_info_class() {
592        assert!(is_info_class("module-info.class"));
593        assert!(is_info_class("com/example/package-info.class"));
594        assert!(is_info_class("META-INF/versions/11/module-info.class"));
595        assert!(!is_info_class("com/example/MyClass.class"));
596        assert!(!is_info_class("com/example/ModuleInfo.class"));
597    }
598}