1pub mod annotations;
17pub mod classfile;
18pub mod constants;
19pub mod generics;
20pub mod lambda;
21pub mod modules;
22
23use std::io::Read;
24use std::path::Path;
25
26use cafebabe::ParseOptions;
27use cafebabe::attributes::AttributeData;
28use log::warn;
29use zip::ZipArchive;
30
31use crate::stub::model::ClassStub;
32use crate::{ClasspathError, ClasspathResult};
33
34pub use classfile::parse_class;
35
36const MAX_JAR_ENTRIES: usize = 100_000;
45
46const MAX_JAR_UNCOMPRESSED_SIZE: u64 = 2 * 1024 * 1024 * 1024;
50
51pub fn scan_jar(jar_path: &Path) -> ClasspathResult<Vec<ClassStub>> {
73 let jar_display = jar_path.display().to_string();
74
75 let file = std::fs::File::open(jar_path).map_err(|e| ClasspathError::JarReadError {
76 path: jar_display.clone(),
77 reason: format!("cannot open file: {e}"),
78 })?;
79
80 let mut archive = ZipArchive::new(file).map_err(|e| ClasspathError::JarReadError {
81 path: jar_display.clone(),
82 reason: format!("invalid ZIP/JAR archive: {e}"),
83 })?;
84
85 let entry_count = archive.len();
87 if entry_count > MAX_JAR_ENTRIES {
88 return Err(ClasspathError::JarReadError {
89 path: jar_display,
90 reason: format!(
91 "JAR bomb detected: {entry_count} entries exceeds limit of {MAX_JAR_ENTRIES}"
92 ),
93 });
94 }
95
96 let mut total_uncompressed: u64 = 0;
98 for i in 0..entry_count {
99 if let Ok(entry) = archive.by_index_raw(i) {
100 total_uncompressed = total_uncompressed.saturating_add(entry.size());
101 }
102 }
103
104 if total_uncompressed > MAX_JAR_UNCOMPRESSED_SIZE {
105 return Err(ClasspathError::JarReadError {
106 path: jar_display,
107 reason: format!(
108 "JAR bomb detected: total uncompressed size {total_uncompressed} bytes \
109 exceeds limit of {MAX_JAR_UNCOMPRESSED_SIZE} bytes (2 GB)"
110 ),
111 });
112 }
113
114 let mut stubs = Vec::new();
116 for i in 0..entry_count {
117 let mut entry = match archive.by_index(i) {
118 Ok(e) => e,
119 Err(e) => {
120 warn!("JAR {jar_display}: cannot read entry {i}: {e}");
121 continue;
122 }
123 };
124
125 let entry_name = entry.name().to_owned();
126
127 if !entry_name.ends_with(".class") {
129 continue;
130 }
131
132 if is_info_class(&entry_name) {
134 continue;
135 }
136
137 let mut bytes = Vec::with_capacity(entry.size() as usize);
139 if let Err(e) = entry.read_to_end(&mut bytes) {
140 warn!("JAR {jar_display}: cannot read entry {entry_name}: {e}");
141 continue;
142 }
143
144 match parse_class_enriched(&bytes) {
146 Ok(mut stub) => {
147 stub.source_jar = Some(jar_display.clone());
148 stubs.push(stub);
149 }
150 Err(e) => {
151 warn!("JAR {jar_display}: cannot parse class {entry_name}: {e}");
152 }
153 }
154 }
155
156 Ok(stubs)
157}
158
159fn parse_class_enriched(bytes: &[u8]) -> ClasspathResult<ClassStub> {
169 let mut stub = parse_class(bytes)?;
171
172 let mut opts = ParseOptions::default();
175 opts.parse_bytecode(false);
176
177 let class_file = match cafebabe::parse_class_with_options(bytes, &opts) {
178 Ok(cf) => cf,
179 Err(e) => {
180 warn!("enrichment parse failed for {}: {e}", stub.fqn);
181 return Ok(stub);
182 }
183 };
184
185 for attr in &class_file.attributes {
187 match annotations::extract_annotations_from_attribute(&attr.data) {
188 Ok(Some(ann)) => stub.annotations.extend(ann),
189 Ok(None) => {}
190 Err(e) => {
191 warn!("annotation extraction failed for {}: {e}", stub.fqn);
192 }
193 }
194 }
195
196 enrich_method_annotations(&class_file, &mut stub);
198 enrich_field_annotations(&class_file, &mut stub);
199
200 enrich_generics(&class_file, &mut stub);
202
203 stub.lambda_targets = lambda::extract_lambda_targets(&class_file);
205
206 match modules::extract_module(&class_file) {
208 Ok(Some(module)) => stub.module = Some(module),
209 Ok(None) => {}
210 Err(e) => {
211 warn!("module extraction failed for {}: {e}", stub.fqn);
212 }
213 }
214
215 Ok(stub)
216}
217
218fn enrich_method_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
220 for (i, method) in class_file.methods.iter().enumerate() {
221 if i >= stub.methods.len() {
222 break;
223 }
224 let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
227 ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
228 }) else {
229 continue;
230 };
231
232 for attr in &method.attributes {
233 match annotations::extract_annotations_from_attribute(&attr.data) {
234 Ok(Some(ann)) => method_stub.annotations.extend(ann),
235 Ok(None) => {}
236 Err(e) => {
237 warn!(
238 "method annotation extraction failed for {}#{}: {e}",
239 stub.fqn, method_stub.name
240 );
241 }
242 }
243 match annotations::extract_parameter_annotations_from_attribute(&attr.data) {
244 Ok(Some(param_ann)) => {
245 if method_stub.parameter_annotations.is_empty() {
247 method_stub.parameter_annotations = param_ann;
248 } else {
249 for (pi, anns) in param_ann.into_iter().enumerate() {
250 if pi < method_stub.parameter_annotations.len() {
251 method_stub.parameter_annotations[pi].extend(anns);
252 } else {
253 method_stub.parameter_annotations.push(anns);
254 }
255 }
256 }
257 }
258 Ok(None) => {}
259 Err(e) => {
260 warn!(
261 "parameter annotation extraction failed for {}#{}: {e}",
262 stub.fqn, method_stub.name
263 );
264 }
265 }
266 }
267 }
268}
269
270fn enrich_field_annotations(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
272 for field in &class_file.fields {
273 let Some(field_stub) = stub
274 .fields
275 .iter_mut()
276 .find(|fs| fs.name == field.name.as_ref())
277 else {
278 continue;
279 };
280
281 for attr in &field.attributes {
282 match annotations::extract_annotations_from_attribute(&attr.data) {
283 Ok(Some(ann)) => field_stub.annotations.extend(ann),
284 Ok(None) => {}
285 Err(e) => {
286 warn!(
287 "field annotation extraction failed for {}.{}: {e}",
288 stub.fqn, field_stub.name
289 );
290 }
291 }
292 }
293 }
294}
295
296fn enrich_generics(class_file: &cafebabe::ClassFile<'_>, stub: &mut ClassStub) {
298 for attr in &class_file.attributes {
300 if let AttributeData::Signature(sig) = &attr.data {
301 match generics::parse_class_signature(sig) {
302 Ok(parsed) => stub.generic_signature = Some(parsed),
303 Err(e) => {
304 warn!("class signature parse failed for {}: {e}", stub.fqn);
305 }
306 }
307 break;
308 }
309 }
310
311 for method in &class_file.methods {
313 let Some(method_stub) = stub.methods.iter_mut().find(|ms| {
314 ms.name == method.name.as_ref() && ms.descriptor == method.descriptor.to_string()
315 }) else {
316 continue;
317 };
318
319 for attr in &method.attributes {
320 if let AttributeData::Signature(sig) = &attr.data {
321 match generics::parse_method_signature(sig) {
322 Ok(parsed) => method_stub.generic_signature = Some(parsed),
323 Err(e) => {
324 warn!(
325 "method signature parse failed for {}#{}: {e}",
326 stub.fqn, method_stub.name
327 );
328 }
329 }
330 break;
331 }
332 }
333 }
334
335 for field in &class_file.fields {
337 let Some(field_stub) = stub
338 .fields
339 .iter_mut()
340 .find(|fs| fs.name == field.name.as_ref())
341 else {
342 continue;
343 };
344
345 for attr in &field.attributes {
346 if let AttributeData::Signature(sig) = &attr.data {
347 match generics::parse_field_signature(sig) {
348 Ok(parsed) => field_stub.generic_signature = Some(parsed),
349 Err(e) => {
350 warn!(
351 "field signature parse failed for {}.{}: {e}",
352 stub.fqn, field_stub.name
353 );
354 }
355 }
356 break;
357 }
358 }
359 }
360}
361
362fn is_info_class(entry_name: &str) -> bool {
367 let file_name = entry_name.rsplit('/').next().unwrap_or(entry_name);
368 file_name == "module-info.class" || file_name == "package-info.class"
369}
370
371#[cfg(test)]
376mod tests {
377 use super::*;
378 use std::io::Write;
379 use zip::write::SimpleFileOptions;
380
381 fn build_minimal_class(class_name: &str) -> Vec<u8> {
385 let mut bytes = Vec::new();
386
387 bytes.extend_from_slice(&0xCAFE_BABEu32.to_be_bytes());
389 bytes.extend_from_slice(&0u16.to_be_bytes());
391 bytes.extend_from_slice(&52u16.to_be_bytes());
393
394 let class_bytes = class_name.as_bytes();
400 let object_bytes = b"java/lang/Object";
401
402 let cp_count: u16 = 5; bytes.extend_from_slice(&cp_count.to_be_bytes());
404
405 bytes.push(1);
407 bytes.extend_from_slice(&(class_bytes.len() as u16).to_be_bytes());
408 bytes.extend_from_slice(class_bytes);
409
410 bytes.push(7);
412 bytes.extend_from_slice(&1u16.to_be_bytes());
413
414 bytes.push(1);
416 bytes.extend_from_slice(&(object_bytes.len() as u16).to_be_bytes());
417 bytes.extend_from_slice(object_bytes);
418
419 bytes.push(7);
421 bytes.extend_from_slice(&3u16.to_be_bytes());
422
423 bytes.extend_from_slice(&0x0021u16.to_be_bytes());
425 bytes.extend_from_slice(&2u16.to_be_bytes());
427 bytes.extend_from_slice(&4u16.to_be_bytes());
429 bytes.extend_from_slice(&0u16.to_be_bytes());
431 bytes.extend_from_slice(&0u16.to_be_bytes());
433 bytes.extend_from_slice(&0u16.to_be_bytes());
435 bytes.extend_from_slice(&0u16.to_be_bytes());
437
438 bytes
439 }
440
441 fn build_test_jar(entries: &[(&str, &[u8])]) -> Vec<u8> {
443 let mut buf = Vec::new();
444 {
445 let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
446 let options =
447 SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
448 for (name, data) in entries {
449 writer.start_file(*name, options).unwrap();
450 writer.write_all(data).unwrap();
451 }
452 writer.finish().unwrap();
453 }
454 buf
455 }
456
457 #[test]
458 fn test_scan_jar_multiple_classes() {
459 let class_a = build_minimal_class("com/example/ClassA");
460 let class_b = build_minimal_class("com/example/ClassB");
461
462 let jar_bytes = build_test_jar(&[
463 ("com/example/ClassA.class", &class_a),
464 ("com/example/ClassB.class", &class_b),
465 ]);
466
467 let tmp = tempfile::NamedTempFile::new().unwrap();
468 std::fs::write(tmp.path(), &jar_bytes).unwrap();
469
470 let stubs = scan_jar(tmp.path()).unwrap();
471 assert_eq!(stubs.len(), 2);
472
473 let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
474 assert!(fqns.contains(&"com.example.ClassA"));
475 assert!(fqns.contains(&"com.example.ClassB"));
476 }
477
478 #[test]
479 fn test_scan_jar_empty() {
480 let jar_bytes = build_test_jar(&[]);
481
482 let tmp = tempfile::NamedTempFile::new().unwrap();
483 std::fs::write(tmp.path(), &jar_bytes).unwrap();
484
485 let stubs = scan_jar(tmp.path()).unwrap();
486 assert!(stubs.is_empty());
487 }
488
489 #[test]
490 fn test_scan_jar_malformed_jar() {
491 let tmp = tempfile::NamedTempFile::new().unwrap();
492 std::fs::write(tmp.path(), b"this is not a zip file").unwrap();
493
494 let result = scan_jar(tmp.path());
495 assert!(result.is_err());
496 let err = result.unwrap_err();
497 assert!(
498 matches!(err, ClasspathError::JarReadError { .. }),
499 "expected JarReadError, got: {err}"
500 );
501 }
502
503 #[test]
504 fn test_scan_jar_skips_module_and_package_info() {
505 let class_a = build_minimal_class("com/example/ClassA");
506 let jar_bytes = build_test_jar(&[
509 ("com/example/ClassA.class", &class_a),
510 ("module-info.class", b"not a real class"),
511 ("com/example/package-info.class", b"not a real class"),
512 ("META-INF/versions/11/module-info.class", b"not real"),
514 ]);
515
516 let tmp = tempfile::NamedTempFile::new().unwrap();
517 std::fs::write(tmp.path(), &jar_bytes).unwrap();
518
519 let stubs = scan_jar(tmp.path()).unwrap();
520 assert_eq!(stubs.len(), 1);
521 assert_eq!(stubs[0].fqn, "com.example.ClassA");
522 }
523
524 #[test]
525 fn test_scan_jar_inner_classes_included() {
526 let outer = build_minimal_class("com/example/Outer");
527 let inner = build_minimal_class("com/example/Outer$Inner");
528
529 let jar_bytes = build_test_jar(&[
530 ("com/example/Outer.class", &outer),
531 ("com/example/Outer$Inner.class", &inner),
532 ]);
533
534 let tmp = tempfile::NamedTempFile::new().unwrap();
535 std::fs::write(tmp.path(), &jar_bytes).unwrap();
536
537 let stubs = scan_jar(tmp.path()).unwrap();
538 assert_eq!(stubs.len(), 2);
539
540 let fqns: Vec<&str> = stubs.iter().map(|s| s.fqn.as_str()).collect();
541 assert!(fqns.contains(&"com.example.Outer"));
542 assert!(fqns.contains(&"com.example.Outer$Inner"));
543 }
544
545 #[test]
546 fn test_scan_jar_skips_non_class_files() {
547 let class_a = build_minimal_class("com/example/ClassA");
548
549 let jar_bytes = build_test_jar(&[
550 ("com/example/ClassA.class", &class_a),
551 ("META-INF/MANIFEST.MF", b"Manifest-Version: 1.0\n"),
552 ("com/example/resource.txt", b"some resource"),
553 ]);
554
555 let tmp = tempfile::NamedTempFile::new().unwrap();
556 std::fs::write(tmp.path(), &jar_bytes).unwrap();
557
558 let stubs = scan_jar(tmp.path()).unwrap();
559 assert_eq!(stubs.len(), 1);
560 assert_eq!(stubs[0].fqn, "com.example.ClassA");
561 }
562
563 #[test]
564 fn test_scan_jar_malformed_class_skipped() {
565 let good_class = build_minimal_class("com/example/Good");
566
567 let jar_bytes = build_test_jar(&[
568 ("com/example/Good.class", &good_class),
569 ("com/example/Bad.class", b"not valid bytecode"),
570 ]);
571
572 let tmp = tempfile::NamedTempFile::new().unwrap();
573 std::fs::write(tmp.path(), &jar_bytes).unwrap();
574
575 let stubs = scan_jar(tmp.path()).unwrap();
576 assert_eq!(stubs.len(), 1);
577 assert_eq!(stubs[0].fqn, "com.example.Good");
578 }
579
580 #[test]
581 fn test_scan_jar_nonexistent_file() {
582 let result = scan_jar(Path::new("/nonexistent/path/foo.jar"));
583 assert!(result.is_err());
584 assert!(matches!(
585 result.unwrap_err(),
586 ClasspathError::JarReadError { .. }
587 ));
588 }
589
590 #[test]
591 fn test_is_info_class() {
592 assert!(is_info_class("module-info.class"));
593 assert!(is_info_class("com/example/package-info.class"));
594 assert!(is_info_class("META-INF/versions/11/module-info.class"));
595 assert!(!is_info_class("com/example/MyClass.class"));
596 assert!(!is_info_class("com/example/ModuleInfo.class"));
597 }
598}