Skip to main content

sqry_classpath/bytecode/
mod.rs

1//! JVM bytecode parsing.
2//!
3//! Parses `.class` files into `ClassStub` records, extracting:
4//! - Class/method/field declarations with visibility and modifiers
5//! - Generic type signatures (JVMS 4.7.9)
6//! - Annotations (runtime visible and invisible)
7//! - Lambda targets (`BootstrapMethods` attribute)
8//! - Java 9+ module declarations
9//!
10//! ## JAR scanning
11//!
12//! The [`scan_jar`] function reads a JAR (ZIP archive), parses all `.class`
13//! entries into enriched [`ClassStub`] records, and returns them. It applies
14//! security limits to prevent JAR-bomb denial-of-service attacks.
15
16// JVM bytecode values are spec-bounded; casts to u32 are intentional
17#![allow(clippy::cast_possible_truncation)]
18
19pub mod annotations;
20pub mod classfile;
21pub mod constants;
22pub mod generics;
23pub mod lambda;
24pub mod modules;
25
26use std::io::Read;
27use std::path::Path;
28
29use cafebabe::ParseOptions;
30use cafebabe::attributes::AttributeData;
31use log::warn;
32use zip::ZipArchive;
33
34use crate::stub::model::ClassStub;
35use crate::{ClasspathError, ClasspathResult};
36
37pub use classfile::parse_class;
38
39// ---------------------------------------------------------------------------
40// Security limits for JAR scanning
41// ---------------------------------------------------------------------------
42
43/// Maximum number of entries allowed in a single JAR file.
44///
45/// JARs with more entries are rejected to prevent ZIP-bomb denial-of-service
46/// attacks and memory exhaustion from pathologically large archives.
47const MAX_JAR_ENTRIES: usize = 100_000;
48
49/// Maximum total uncompressed size allowed for a single JAR (2 GB).
50///
51/// If the sum of all entry sizes exceeds this limit, scanning is aborted.
52const MAX_JAR_UNCOMPRESSED_SIZE: u64 = 2 * 1024 * 1024 * 1024;
53
54// ---------------------------------------------------------------------------
55// JAR scanning
56// ---------------------------------------------------------------------------
57
58/// Scan a JAR file (ZIP archive) and parse all `.class` entries into
59/// [`ClassStub`] records.
60///
61/// Uses rayon for parallelism across JARs (called by the outer loop), but
62/// processes entries within a single JAR sequentially since [`ZipArchive`] is
63/// not `Send`.
64///
65/// # Security limits
66///
67/// - Entry count limit: 100,000 per JAR
68/// - Uncompressed size limit: 2 GB per JAR
69/// - Per-class errors are logged and skipped, never fail the whole JAR
70///
71/// # Errors
72///
73/// Returns [`ClasspathError::JarReadError`] if the JAR cannot be opened or
74/// if it exceeds security limits.
75pub fn scan_jar(jar_path: &Path) -> ClasspathResult<Vec<ClassStub>> {
76    let jar_display = jar_path.display().to_string();
77
78    let file = std::fs::File::open(jar_path).map_err(|e| ClasspathError::JarReadError {
79        path: jar_display.clone(),
80        reason: format!("cannot open file: {e}"),
81    })?;
82
83    let mut archive = ZipArchive::new(file).map_err(|e| ClasspathError::JarReadError {
84        path: jar_display.clone(),
85        reason: format!("invalid ZIP/JAR archive: {e}"),
86    })?;
87
88    // --- Security check: entry count ---
89    let entry_count = archive.len();
90    if entry_count > MAX_JAR_ENTRIES {
91        return Err(ClasspathError::JarReadError {
92            path: jar_display,
93            reason: format!(
94                "JAR bomb detected: {entry_count} entries exceeds limit of {MAX_JAR_ENTRIES}"
95            ),
96        });
97    }
98
99    // --- Security check: total uncompressed size ---
100    let mut total_uncompressed: u64 = 0;
101    for i in 0..entry_count {
102        if let Ok(entry) = archive.by_index_raw(i) {
103            total_uncompressed = total_uncompressed.saturating_add(entry.size());
104        }
105    }
106
107    if total_uncompressed > MAX_JAR_UNCOMPRESSED_SIZE {
108        return Err(ClasspathError::JarReadError {
109            path: jar_display,
110            reason: format!(
111                "JAR bomb detected: total uncompressed size {total_uncompressed} bytes \
112                 exceeds limit of {MAX_JAR_UNCOMPRESSED_SIZE} bytes (2 GB)"
113            ),
114        });
115    }
116
117    // --- Scan entries ---
118    let mut stubs = Vec::new();
119    for i in 0..entry_count {
120        let mut entry = match archive.by_index(i) {
121            Ok(e) => e,
122            Err(e) => {
123                warn!("JAR {jar_display}: cannot read entry {i}: {e}");
124                continue;
125            }
126        };
127
128        #[allow(clippy::case_sensitive_file_extension_comparisons)]
129        // JVM classfiles always use .class extension
130        let entry_name = entry.name().to_owned();
131
132        // Only process .class files.
133        #[allow(clippy::case_sensitive_file_extension_comparisons)]
134        // Known file extensions in domain
135        if !entry_name.ends_with(".class") {
136            continue;
137        }
138
139        // Skip module-info and package-info — handled separately.
140        if is_info_class(&entry_name) {
141            continue;
142        }
143
144        // Read entry bytes.
145        let mut bytes = Vec::with_capacity(entry.size() as usize);
146        if let Err(e) = entry.read_to_end(&mut bytes) {
147            warn!("JAR {jar_display}: cannot read entry {entry_name}: {e}");
148            continue;
149        }
150
151        // Parse and enrich.
152        match parse_class_enriched(&bytes) {
153            Ok(mut stub) => {
154                stub.source_jar = Some(jar_display.clone());
155                stubs.push(stub);
156            }
157            Err(e) => {
158                warn!("JAR {jar_display}: cannot parse class {entry_name}: {e}");
159            }
160        }
161    }
162
163    Ok(stubs)
164}
165
166/// Parse a `.class` file and apply all enrichment passes in a single
167/// cafebabe parse.
168///
169/// This combines base class parsing ([`parse_class`]) with generic signature
170/// extraction, annotation extraction, lambda target extraction, and module
171/// extraction — all from a single `cafebabe::ClassFile` parse.
172///
173/// Per-enrichment errors are logged and skipped; the base stub is always
174/// returned if the initial parse succeeds.
175fn parse_class_enriched(bytes: &[u8]) -> ClasspathResult<ClassStub> {
176    // First parse with parse_class for the base stub.
177    let mut stub = parse_class(bytes)?;
178
179    // Second parse with cafebabe for enrichment data.
180    // This is a lightweight re-parse since we disable bytecode parsing.
181    let mut opts = ParseOptions::default();
182    opts.parse_bytecode(false);
183
184    let class_file = match cafebabe::parse_class_with_options(bytes, &opts) {
185        Ok(cf) => cf,
186        Err(e) => {
187            warn!("enrichment parse failed for {}: {e}", stub.fqn);
188            return Ok(stub);
189        }
190    };
191
192    // --- Enrich: annotations (class-level) ---
193    for attr in &class_file.attributes {
194        match annotations::extract_annotations_from_attribute(&attr.data) {
195            Ok(Some(ann)) => stub.annotations.extend(ann),
196            Ok(None) => {}
197            Err(e) => {
198                warn!("annotation extraction failed for {}: {e}", stub.fqn);
199            }
200        }
201    }
202
203    // --- Enrich: annotations (method-level and field-level) ---
204    enrich_method_annotations(&class_file, &mut stub);
205    enrich_field_annotations(&class_file, &mut stub);
206
207    // --- Enrich: generic signatures ---
208    enrich_generics(&class_file, &mut stub);
209
210    // --- Enrich: lambda targets ---
211    stub.lambda_targets = lambda::extract_lambda_targets(&class_file);
212
213    // --- Enrich: module info ---
214    match modules::extract_module(&class_file) {
215        Ok(Some(module)) => stub.module = Some(module),
216        Ok(None) => {}
217        Err(e) => {
218            warn!("module extraction failed for {}: {e}", stub.fqn);
219        }
220    }
221
222    Ok(stub)
223}
224
225/// Enrich method stubs with annotations from the cafebabe parse.
226fn enrich_method_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
227    for (i, method) in class_file.methods.iter().enumerate() {
228        if i >= stub.methods.len() {
229            break;
230        }
231        // Find the matching stub method. The classfile parser may skip synthetic/bridge
232        // methods, so we match by name + descriptor.
233        let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
234            ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
235        }) else {
236            continue;
237        };
238
239        for attr in &method.attributes {
240            match annotations::extract_annotations_from_attribute(&attr.data) {
241                Ok(Some(ann)) => method_stub.annotations.extend(ann),
242                Ok(None) => {}
243                Err(e) => {
244                    warn!(
245                        "method annotation extraction failed for {}#{}: {e}",
246                        stub.fqn, method_stub.name
247                    );
248                }
249            }
250            match annotations::extract_parameter_annotations_from_attribute(&attr.data) {
251                Ok(Some(param_ann)) => {
252                    // Merge parameter annotations: extend or replace.
253                    if method_stub.parameter_annotations.is_empty() {
254                        method_stub.parameter_annotations = param_ann;
255                    } else {
256                        for (pi, anns) in param_ann.into_iter().enumerate() {
257                            if pi < method_stub.parameter_annotations.len() {
258                                method_stub.parameter_annotations[pi].extend(anns);
259                            } else {
260                                method_stub.parameter_annotations.push(anns);
261                            }
262                        }
263                    }
264                }
265                Ok(None) => {}
266                Err(e) => {
267                    warn!(
268                        "parameter annotation extraction failed for {}#{}: {e}",
269                        stub.fqn, method_stub.name
270                    );
271                }
272            }
273        }
274    }
275}
276
277/// Enrich field stubs with annotations from the cafebabe parse.
278fn enrich_field_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
279    for field in &class_file.fields {
280        let Some(field_stub) = stub
281            .fields
282            .iter_mut()
283            .find(|fs| fs.name == field.name.as_ref())
284        else {
285            continue;
286        };
287
288        for attr in &field.attributes {
289            match annotations::extract_annotations_from_attribute(&attr.data) {
290                Ok(Some(ann)) => field_stub.annotations.extend(ann),
291                Ok(None) => {}
292                Err(e) => {
293                    warn!(
294                        "field annotation extraction failed for {}.{}: {e}",
295                        stub.fqn, field_stub.name
296                    );
297                }
298            }
299        }
300    }
301}
302
303/// Enrich stubs with generic type signatures from the cafebabe parse.
304fn enrich_generics(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
305    // Class-level signature.
306    for attr in &class_file.attributes {
307        if let AttributeData::Signature(sig) = &attr.data {
308            match generics::parse_class_signature(sig) {
309                Ok(parsed) => stub.generic_signature = Some(parsed),
310                Err(e) => {
311                    warn!("class signature parse failed for {}: {e}", stub.fqn);
312                }
313            }
314            break;
315        }
316    }
317
318    // Method-level signatures.
319    for method in &class_file.methods {
320        let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
321            ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
322        }) else {
323            continue;
324        };
325
326        for attr in &method.attributes {
327            if let AttributeData::Signature(sig) = &attr.data {
328                match generics::parse_method_signature(sig) {
329                    Ok(parsed) => method_stub.generic_signature = Some(parsed),
330                    Err(e) => {
331                        warn!(
332                            "method signature parse failed for {}#{}: {e}",
333                            stub.fqn, method_stub.name
334                        );
335                    }
336                }
337                break;
338            }
339        }
340    }
341
342    // Field-level signatures.
343    for field in &class_file.fields {
344        let Some(field_stub) = stub
345            .fields
346            .iter_mut()
347            .find(|fs| fs.name == field.name.as_ref())
348        else {
349            continue;
350        };
351
352        for attr in &field.attributes {
353            if let AttributeData::Signature(sig) = &attr.data {
354                match generics::parse_field_signature(sig) {
355                    Ok(parsed) => field_stub.generic_signature = Some(parsed),
356                    Err(e) => {
357                        warn!(
358                            "field signature parse failed for {}.{}: {e}",
359                            stub.fqn, field_stub.name
360                        );
361                    }
362                }
363                break;
364            }
365        }
366    }
367}
368
369/// Check if a ZIP entry name is a `module-info.class` or `package-info.class`.
370///
371/// These are skipped during JAR scanning because they are handled by the
372/// module enrichment parser (U07b) or are not useful for type resolution.
373fn is_info_class(entry_name: &str) -> bool {
374    let file_name = entry_name.rsplit('/').next().unwrap_or(entry_name);
375    file_name == "module-info.class" || file_name == "package-info.class"
376}
377
378// ---------------------------------------------------------------------------
379// Tests
380// ---------------------------------------------------------------------------
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use std::io::Write;
386    use zip::write::SimpleFileOptions;
387
388    /// Build a minimal valid .class file for testing. This is a stripped-down
389    /// version of the `ClassFileBuilder` from `classfile.rs` — just enough
390    /// to produce parseable bytes.
391    fn build_minimal_class(class_name: &str) -> Vec<u8> {
392        let mut bytes = Vec::new();
393
394        // Magic
395        bytes.extend_from_slice(&0xCAFE_BABEu32.to_be_bytes());
396        // Minor version
397        bytes.extend_from_slice(&0u16.to_be_bytes());
398        // Major version (52 = Java 8)
399        bytes.extend_from_slice(&52u16.to_be_bytes());
400
401        // Constant pool: 5 entries
402        // #1: Utf8 <class_name>
403        // #2: Class -> #1
404        // #3: Utf8 "java/lang/Object"
405        // #4: Class -> #3
406        let class_bytes = class_name.as_bytes();
407        let object_bytes = b"java/lang/Object";
408
409        let cp_count: u16 = 5; // 4 entries + 1
410        bytes.extend_from_slice(&cp_count.to_be_bytes());
411
412        // #1: CONSTANT_Utf8 <class_name>
413        bytes.push(1);
414        bytes.extend_from_slice(&(class_bytes.len() as u16).to_be_bytes());
415        bytes.extend_from_slice(class_bytes);
416
417        // #2: CONSTANT_Class -> #1
418        bytes.push(7);
419        bytes.extend_from_slice(&1u16.to_be_bytes());
420
421        // #3: CONSTANT_Utf8 "java/lang/Object"
422        bytes.push(1);
423        bytes.extend_from_slice(&(object_bytes.len() as u16).to_be_bytes());
424        bytes.extend_from_slice(object_bytes);
425
426        // #4: CONSTANT_Class -> #3
427        bytes.push(7);
428        bytes.extend_from_slice(&3u16.to_be_bytes());
429
430        // Access flags: ACC_PUBLIC | ACC_SUPER
431        bytes.extend_from_slice(&0x0021u16.to_be_bytes());
432        // This class: #2
433        bytes.extend_from_slice(&2u16.to_be_bytes());
434        // Super class: #4
435        bytes.extend_from_slice(&4u16.to_be_bytes());
436        // Interfaces count: 0
437        bytes.extend_from_slice(&0u16.to_be_bytes());
438        // Fields count: 0
439        bytes.extend_from_slice(&0u16.to_be_bytes());
440        // Methods count: 0
441        bytes.extend_from_slice(&0u16.to_be_bytes());
442        // Attributes count: 0
443        bytes.extend_from_slice(&0u16.to_be_bytes());
444
445        bytes
446    }
447
448    /// Create an in-memory JAR (ZIP) file with the given entries.
449    fn build_test_jar(entries: &[(&str, &[u8])]) -> Vec<u8> {
450        let mut buf = Vec::new();
451        {
452            let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
453            let options =
454                SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
455            for (name, data) in entries {
456                writer.start_file(*name, options).unwrap();
457                writer.write_all(data).unwrap();
458            }
459            writer.finish().unwrap();
460        }
461        buf
462    }
463
464    #[test]
465    fn test_scan_jar_multiple_classes() {
466        let class_a = build_minimal_class("com/example/ClassA");
467        let class_b = build_minimal_class("com/example/ClassB");
468
469        let jar_bytes = build_test_jar(&[
470            ("com/example/ClassA.class", &class_a),
471            ("com/example/ClassB.class", &class_b),
472        ]);
473
474        let tmp = tempfile::NamedTempFile::new().unwrap();
475        std::fs::write(tmp.path(), &jar_bytes).unwrap();
476
477        let stubs = scan_jar(tmp.path()).unwrap();
478        assert_eq!(stubs.len(), 2);
479
480        let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
481        assert!(fqns.contains(&"com.example.ClassA"));
482        assert!(fqns.contains(&"com.example.ClassB"));
483    }
484
485    #[test]
486    fn test_scan_jar_empty() {
487        let jar_bytes = build_test_jar(&[]);
488
489        let tmp = tempfile::NamedTempFile::new().unwrap();
490        std::fs::write(tmp.path(), &jar_bytes).unwrap();
491
492        let stubs = scan_jar(tmp.path()).unwrap();
493        assert!(stubs.is_empty());
494    }
495
496    #[test]
497    fn test_scan_jar_malformed_jar() {
498        let tmp = tempfile::NamedTempFile::new().unwrap();
499        std::fs::write(tmp.path(), b"this is not a zip file").unwrap();
500
501        let result = scan_jar(tmp.path());
502        assert!(result.is_err());
503        let err = result.unwrap_err();
504        assert!(
505            matches!(err, ClasspathError::JarReadError { .. }),
506            "expected JarReadError, got: {err}"
507        );
508    }
509
510    #[test]
511    fn test_scan_jar_skips_module_and_package_info() {
512        let class_a = build_minimal_class("com/example/ClassA");
513        // module-info and package-info would fail parse_class anyway,
514        // but scan_jar should skip them before attempting parse.
515        let jar_bytes = build_test_jar(&[
516            ("com/example/ClassA.class", &class_a),
517            ("module-info.class", b"not a real class"),
518            ("com/example/package-info.class", b"not a real class"),
519            // Multi-release variant
520            ("META-INF/versions/11/module-info.class", b"not real"),
521        ]);
522
523        let tmp = tempfile::NamedTempFile::new().unwrap();
524        std::fs::write(tmp.path(), &jar_bytes).unwrap();
525
526        let stubs = scan_jar(tmp.path()).unwrap();
527        assert_eq!(stubs.len(), 1);
528        assert_eq!(stubs[0].fqn, "com.example.ClassA");
529    }
530
531    #[test]
532    fn test_scan_jar_inner_classes_included() {
533        let outer = build_minimal_class("com/example/Outer");
534        let inner = build_minimal_class("com/example/Outer$Inner");
535
536        let jar_bytes = build_test_jar(&[
537            ("com/example/Outer.class", &outer),
538            ("com/example/Outer$Inner.class", &inner),
539        ]);
540
541        let tmp = tempfile::NamedTempFile::new().unwrap();
542        std::fs::write(tmp.path(), &jar_bytes).unwrap();
543
544        let stubs = scan_jar(tmp.path()).unwrap();
545        assert_eq!(stubs.len(), 2);
546
547        let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
548        assert!(fqns.contains(&"com.example.Outer"));
549        assert!(fqns.contains(&"com.example.Outer$Inner"));
550    }
551
552    #[test]
553    fn test_scan_jar_skips_non_class_files() {
554        let class_a = build_minimal_class("com/example/ClassA");
555
556        let jar_bytes = build_test_jar(&[
557            ("com/example/ClassA.class", &class_a),
558            ("META-INF/MANIFEST.MF", b"Manifest-Version: 1.0\n"),
559            ("com/example/resource.txt", b"some resource"),
560        ]);
561
562        let tmp = tempfile::NamedTempFile::new().unwrap();
563        std::fs::write(tmp.path(), &jar_bytes).unwrap();
564
565        let stubs = scan_jar(tmp.path()).unwrap();
566        assert_eq!(stubs.len(), 1);
567        assert_eq!(stubs[0].fqn, "com.example.ClassA");
568    }
569
570    #[test]
571    fn test_scan_jar_malformed_class_skipped() {
572        let good_class = build_minimal_class("com/example/Good");
573
574        let jar_bytes = build_test_jar(&[
575            ("com/example/Good.class", &good_class),
576            ("com/example/Bad.class", b"not valid bytecode"),
577        ]);
578
579        let tmp = tempfile::NamedTempFile::new().unwrap();
580        std::fs::write(tmp.path(), &jar_bytes).unwrap();
581
582        let stubs = scan_jar(tmp.path()).unwrap();
583        assert_eq!(stubs.len(), 1);
584        assert_eq!(stubs[0].fqn, "com.example.Good");
585    }
586
587    #[test]
588    fn test_scan_jar_nonexistent_file() {
589        let result = scan_jar(Path::new("/nonexistent/path/foo.jar"));
590        assert!(result.is_err());
591        assert!(matches!(
592            result.unwrap_err(),
593            ClasspathError::JarReadError { .. }
594        ));
595    }
596
597    #[test]
598    fn test_is_info_class() {
599        assert!(is_info_class("module-info.class"));
600        assert!(is_info_class("com/example/package-info.class"));
601        assert!(is_info_class("META-INF/versions/11/module-info.class"));
602        assert!(!is_info_class("com/example/MyClass.class"));
603        assert!(!is_info_class("com/example/ModuleInfo.class"));
604    }
605}