#![allow(clippy::cast_possible_truncation)]
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use log::{debug, info, warn};
use rayon::prelude::*;
use crate::bytecode::scan_jar;
use crate::detect::{BuildSystem, detect_build_system};
use crate::graph::provenance::ClasspathProvenance;
use crate::resolve::{ClasspathEntry, ResolveConfig, ResolvedClasspath};
use crate::stub::cache::StubCache;
use crate::stub::index::ClasspathIndex;
use crate::stub::model::ClassStub;
use crate::{ClasspathError, ClasspathResult};
#[derive(Debug, Clone)]
pub struct ClasspathConfig {
pub enabled: bool,
pub depth: ClasspathDepth,
pub build_system_override: Option<String>,
pub classpath_file: Option<PathBuf>,
pub force: bool,
pub timeout_secs: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ClasspathDepth {
Shallow,
Full,
}
impl Default for ClasspathConfig {
fn default() -> Self {
Self {
enabled: false,
depth: ClasspathDepth::Full,
build_system_override: None,
classpath_file: None,
force: false,
timeout_secs: 60,
}
}
}
#[derive(Debug)]
pub struct ClasspathPipelineResult {
pub index: ClasspathIndex,
pub provenance: Vec<ClasspathProvenance>,
pub jars_scanned: usize,
pub classes_parsed: usize,
pub from_cache: bool,
}
pub fn run_classpath_pipeline(
project_root: &Path,
config: &ClasspathConfig,
) -> ClasspathResult<ClasspathPipelineResult> {
info!("Starting classpath pipeline for {}", project_root.display());
let resolved_classpaths = if let Some(ref classpath_file) = config.classpath_file {
resolve_from_manual_file(classpath_file)?
} else {
resolve_from_build_system(project_root, config)?
};
let all_entries: Vec<&ClasspathEntry> = resolved_classpaths
.iter()
.flat_map(|cp| &cp.entries)
.collect();
let entries_to_scan: Vec<&ClasspathEntry> = match config.depth {
ClasspathDepth::Full => all_entries,
ClasspathDepth::Shallow => all_entries.into_iter().filter(|e| e.is_direct).collect(),
};
info!(
"Classpath resolved: {} entries ({} after depth filtering)",
resolved_classpaths
.iter()
.map(|cp| cp.entries.len())
.sum::<usize>(),
entries_to_scan.len(),
);
let unique_jar_paths = deduplicate_jar_paths(&entries_to_scan);
info!("{} unique JAR files to scan", unique_jar_paths.len());
let stub_cache = StubCache::new(project_root);
let scan_results = scan_jars_parallel(&unique_jar_paths, &stub_cache, config.force);
let mut all_stubs: Vec<ClassStub> = Vec::new();
let mut jars_scanned: usize = 0;
let mut jars_from_cache: usize = 0;
for result in &scan_results {
match result {
JarScanOutcome::Scanned { jar_path, stubs } => {
let jar_str = jar_path.display().to_string();
for stub in stubs {
let mut s = stub.clone();
if s.source_jar.is_none() {
s.source_jar = Some(jar_str.clone());
}
all_stubs.push(s);
}
jars_scanned += 1;
}
JarScanOutcome::Cached { jar_path, stubs } => {
let jar_str = jar_path.display().to_string();
for stub in stubs {
let mut s = stub.clone();
if s.source_jar.is_none() {
s.source_jar = Some(jar_str.clone());
}
all_stubs.push(s);
}
jars_from_cache += 1;
}
JarScanOutcome::Failed { jar_path, error } => {
warn!("Failed to scan JAR {}: {error}", jar_path.display());
}
}
}
let classes_parsed = all_stubs.len();
info!(
"Scanned {} JARs ({} from cache, {} fresh), {} classes total",
jars_scanned + jars_from_cache,
jars_from_cache,
jars_scanned,
classes_parsed,
);
let provenance = build_provenance(&entries_to_scan);
let index = ClasspathIndex::build(all_stubs);
info!(
"Built classpath index: {} classes, {} packages",
index.classes.len(),
index.package_index.len(),
);
let sqry_classpath_dir = project_root.join(".sqry").join("classpath");
persist_artifacts(&sqry_classpath_dir, &index, &provenance)?;
Ok(ClasspathPipelineResult {
index,
provenance,
jars_scanned: jars_scanned + jars_from_cache,
classes_parsed,
from_cache: jars_from_cache > 0 && jars_scanned == 0,
})
}
fn resolve_from_manual_file(classpath_file: &Path) -> ClasspathResult<Vec<ResolvedClasspath>> {
info!("Reading manual classpath from {}", classpath_file.display());
let file = std::fs::File::open(classpath_file).map_err(|e| {
ClasspathError::ResolutionFailed(format!(
"Cannot open classpath file {}: {e}",
classpath_file.display()
))
})?;
let reader = BufReader::new(file);
let mut entries = Vec::new();
for line in reader.lines() {
let line = line.map_err(|e| {
ClasspathError::ResolutionFailed(format!(
"Error reading classpath file {}: {e}",
classpath_file.display()
))
})?;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let jar_path = PathBuf::from(trimmed);
if !jar_path.exists() {
warn!(
"Classpath file entry does not exist: {}",
jar_path.display()
);
}
entries.push(ClasspathEntry {
jar_path,
coordinates: None,
is_direct: true, source_jar: None,
});
}
info!("Manual classpath file: {} entries", entries.len());
Ok(vec![ResolvedClasspath {
module_name: "manual".to_string(),
entries,
}])
}
fn resolve_from_build_system(
project_root: &Path,
config: &ClasspathConfig,
) -> ClasspathResult<Vec<ResolvedClasspath>> {
let detection = detect_build_system(project_root, config.build_system_override.as_deref());
let build_system = detection.build_system.ok_or_else(|| {
ClasspathError::DetectionFailed(
"No JVM build system detected. Use --build-system to specify one, \
or --classpath-file to provide a manual classpath."
.to_string(),
)
})?;
info!("Detected build system: {build_system:?}");
let resolve_config = ResolveConfig {
project_root: project_root.to_path_buf(),
timeout_secs: config.timeout_secs,
cache_path: Some(
project_root
.join(".sqry")
.join("classpath")
.join("resolved-classpath.json"),
),
};
match build_system {
BuildSystem::Gradle => crate::resolve::gradle::resolve_gradle_classpath(&resolve_config),
BuildSystem::Maven => crate::resolve::maven::resolve_maven_classpath(&resolve_config),
BuildSystem::Bazel => crate::resolve::bazel::resolve_bazel_classpath(&resolve_config),
BuildSystem::Sbt => crate::resolve::sbt::resolve_sbt_classpath(&resolve_config),
}
}
enum JarScanOutcome {
Scanned {
#[allow(dead_code)] jar_path: PathBuf,
stubs: Vec<ClassStub>,
},
Cached {
#[allow(dead_code)] jar_path: PathBuf,
stubs: Vec<ClassStub>,
},
Failed { jar_path: PathBuf, error: String },
}
fn deduplicate_jar_paths(entries: &[&ClasspathEntry]) -> Vec<PathBuf> {
let mut seen = std::collections::HashSet::new();
let mut unique = Vec::new();
for entry in entries {
if seen.insert(&entry.jar_path) {
unique.push(entry.jar_path.clone());
}
}
unique
}
fn scan_jars_parallel(
jar_paths: &[PathBuf],
stub_cache: &StubCache,
force: bool,
) -> Vec<JarScanOutcome> {
jar_paths
.par_iter()
.map(|jar_path| scan_single_jar(jar_path, stub_cache, force))
.collect()
}
fn scan_single_jar(jar_path: &Path, stub_cache: &StubCache, force: bool) -> JarScanOutcome {
if !force && let Some(cached_stubs) = stub_cache.get(jar_path) {
debug!(
"Cache hit for {} ({} stubs)",
jar_path.display(),
cached_stubs.len()
);
return JarScanOutcome::Cached {
jar_path: jar_path.to_path_buf(),
stubs: cached_stubs,
};
}
match scan_jar(jar_path) {
Ok(stubs) => {
debug!("Scanned {} ({} classes)", jar_path.display(), stubs.len());
if let Err(e) = stub_cache.put(jar_path, &stubs) {
warn!("Failed to cache stubs for {}: {e}", jar_path.display());
}
JarScanOutcome::Scanned {
jar_path: jar_path.to_path_buf(),
stubs,
}
}
Err(e) => JarScanOutcome::Failed {
jar_path: jar_path.to_path_buf(),
error: e.to_string(),
},
}
}
fn build_provenance(entries: &[&ClasspathEntry]) -> Vec<ClasspathProvenance> {
entries
.iter()
.map(|entry| ClasspathProvenance {
jar_path: entry.jar_path.clone(),
coordinates: entry.coordinates.clone(),
is_direct: entry.is_direct,
})
.collect()
}
fn persist_artifacts(
classpath_dir: &Path,
index: &ClasspathIndex,
provenance: &[ClasspathProvenance],
) -> ClasspathResult<()> {
std::fs::create_dir_all(classpath_dir).map_err(|e| {
ClasspathError::IndexError(format!(
"Cannot create classpath directory {}: {e}",
classpath_dir.display()
))
})?;
let index_path = classpath_dir.join("index.sqry");
index.save(&index_path)?;
info!("Saved classpath index to {}", index_path.display());
let provenance_path = classpath_dir.join("provenance.json");
let provenance_json = serde_json::to_string_pretty(provenance)
.map_err(|e| ClasspathError::IndexError(format!("Cannot serialize provenance: {e}")))?;
std::fs::write(&provenance_path, provenance_json).map_err(|e| {
ClasspathError::IndexError(format!(
"Cannot write provenance to {}: {e}",
provenance_path.display()
))
})?;
info!("Saved provenance to {}", provenance_path.display());
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
use zip::write::SimpleFileOptions;
fn build_minimal_class(class_name: &str) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.extend_from_slice(&0xCAFE_BABEu32.to_be_bytes());
bytes.extend_from_slice(&0u16.to_be_bytes());
bytes.extend_from_slice(&52u16.to_be_bytes());
let class_bytes = class_name.as_bytes();
let object_bytes = b"java/lang/Object";
let cp_count: u16 = 5;
bytes.extend_from_slice(&cp_count.to_be_bytes());
bytes.push(1);
bytes.extend_from_slice(&(class_bytes.len() as u16).to_be_bytes());
bytes.extend_from_slice(class_bytes);
bytes.push(7);
bytes.extend_from_slice(&1u16.to_be_bytes());
bytes.push(1);
bytes.extend_from_slice(&(object_bytes.len() as u16).to_be_bytes());
bytes.extend_from_slice(object_bytes);
bytes.push(7);
bytes.extend_from_slice(&3u16.to_be_bytes());
bytes.extend_from_slice(&0x0021u16.to_be_bytes());
bytes.extend_from_slice(&2u16.to_be_bytes());
bytes.extend_from_slice(&4u16.to_be_bytes());
bytes.extend_from_slice(&0u16.to_be_bytes());
bytes.extend_from_slice(&0u16.to_be_bytes());
bytes.extend_from_slice(&0u16.to_be_bytes());
bytes.extend_from_slice(&0u16.to_be_bytes());
bytes
}
fn build_test_jar(entries: &[(&str, &[u8])]) -> Vec<u8> {
let mut buf = Vec::new();
{
let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
let options =
SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
for (name, data) in entries {
writer.start_file(*name, options).unwrap();
writer.write_all(data).unwrap();
}
writer.finish().unwrap();
}
buf
}
fn write_test_jar(dir: &Path, name: &str, classes: &[(&str, &[u8])]) -> PathBuf {
let jar_bytes = build_test_jar(classes);
let jar_path = dir.join(name);
std::fs::write(&jar_path, &jar_bytes).unwrap();
jar_path
}
#[test]
fn test_default_config() {
let config = ClasspathConfig::default();
assert!(!config.enabled);
assert_eq!(config.depth, ClasspathDepth::Full);
assert!(config.build_system_override.is_none());
assert!(config.classpath_file.is_none());
assert!(!config.force);
assert_eq!(config.timeout_secs, 60);
}
#[test]
fn test_resolve_from_manual_file_basic() {
let tmp = TempDir::new().unwrap();
let jar_a = tmp.path().join("a.jar");
let jar_b = tmp.path().join("b.jar");
std::fs::write(&jar_a, b"fake jar a").unwrap();
std::fs::write(&jar_b, b"fake jar b").unwrap();
let cp_file = tmp.path().join("classpath.txt");
std::fs::write(
&cp_file,
format!("{}\n{}\n", jar_a.display(), jar_b.display()),
)
.unwrap();
let result = resolve_from_manual_file(&cp_file).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].module_name, "manual");
assert_eq!(result[0].entries.len(), 2);
assert!(result[0].entries[0].is_direct);
assert!(result[0].entries[1].is_direct);
}
#[test]
fn test_resolve_from_manual_file_skips_comments_and_blanks() {
let tmp = TempDir::new().unwrap();
let jar_a = tmp.path().join("a.jar");
std::fs::write(&jar_a, b"fake jar a").unwrap();
let cp_file = tmp.path().join("classpath.txt");
std::fs::write(
&cp_file,
format!(
"# This is a comment\n\n{}\n\n# Another comment\n",
jar_a.display()
),
)
.unwrap();
let result = resolve_from_manual_file(&cp_file).unwrap();
assert_eq!(result[0].entries.len(), 1);
}
#[test]
fn test_resolve_from_manual_file_nonexistent_file() {
let result = resolve_from_manual_file(Path::new("/nonexistent/classpath.txt"));
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(err.contains("Cannot open classpath file"));
}
#[test]
fn test_resolve_from_manual_file_nonexistent_jars_included() {
let tmp = TempDir::new().unwrap();
let cp_file = tmp.path().join("classpath.txt");
std::fs::write(&cp_file, "/nonexistent/jar.jar\n").unwrap();
let result = resolve_from_manual_file(&cp_file).unwrap();
assert_eq!(result[0].entries.len(), 1);
assert_eq!(
result[0].entries[0].jar_path,
PathBuf::from("/nonexistent/jar.jar")
);
}
#[test]
fn test_deduplicate_jar_paths() {
let entries = vec![
ClasspathEntry {
jar_path: PathBuf::from("/a.jar"),
coordinates: None,
is_direct: true,
source_jar: None,
},
ClasspathEntry {
jar_path: PathBuf::from("/b.jar"),
coordinates: None,
is_direct: true,
source_jar: None,
},
ClasspathEntry {
jar_path: PathBuf::from("/a.jar"),
coordinates: None,
is_direct: false,
source_jar: None,
},
];
let refs: Vec<&ClasspathEntry> = entries.iter().collect();
let unique = deduplicate_jar_paths(&refs);
assert_eq!(unique.len(), 2);
assert_eq!(unique[0], PathBuf::from("/a.jar"));
assert_eq!(unique[1], PathBuf::from("/b.jar"));
}
#[test]
fn test_build_provenance() {
let entries = [
ClasspathEntry {
jar_path: PathBuf::from("/guava.jar"),
coordinates: Some("com.google.guava:guava:33.0.0".to_string()),
is_direct: true,
source_jar: None,
},
ClasspathEntry {
jar_path: PathBuf::from("/commons.jar"),
coordinates: None,
is_direct: false,
source_jar: None,
},
];
let refs: Vec<&ClasspathEntry> = entries.iter().collect();
let prov = build_provenance(&refs);
assert_eq!(prov.len(), 2);
assert_eq!(prov[0].jar_path, PathBuf::from("/guava.jar"));
assert_eq!(
prov[0].coordinates,
Some("com.google.guava:guava:33.0.0".to_string())
);
assert!(prov[0].is_direct);
assert!(!prov[1].is_direct);
assert!(prov[1].coordinates.is_none());
}
#[test]
fn test_scan_single_jar_fresh() {
let tmp = TempDir::new().unwrap();
let class_a = build_minimal_class("com/example/Foo");
let jar_path = write_test_jar(
tmp.path(),
"test.jar",
&[("com/example/Foo.class", &class_a)],
);
let cache = StubCache::new(tmp.path());
let outcome = scan_single_jar(&jar_path, &cache, false);
match outcome {
JarScanOutcome::Scanned { stubs, .. } => {
assert_eq!(stubs.len(), 1);
assert_eq!(stubs[0].fqn, "com.example.Foo");
}
other => panic!("Expected Scanned, got {:?}", outcome_name(&other)),
}
}
#[test]
fn test_scan_single_jar_cached() {
let tmp = TempDir::new().unwrap();
let class_a = build_minimal_class("com/example/Bar");
let jar_path = write_test_jar(
tmp.path(),
"test.jar",
&[("com/example/Bar.class", &class_a)],
);
let cache = StubCache::new(tmp.path());
let outcome = scan_single_jar(&jar_path, &cache, false);
assert!(matches!(outcome, JarScanOutcome::Scanned { .. }));
let outcome = scan_single_jar(&jar_path, &cache, false);
match outcome {
JarScanOutcome::Cached { stubs, .. } => {
assert_eq!(stubs.len(), 1);
assert_eq!(stubs[0].fqn, "com.example.Bar");
}
other => panic!("Expected Cached, got {:?}", outcome_name(&other)),
}
}
#[test]
fn test_scan_single_jar_force_bypasses_cache() {
let tmp = TempDir::new().unwrap();
let class_a = build_minimal_class("com/example/Baz");
let jar_path = write_test_jar(
tmp.path(),
"test.jar",
&[("com/example/Baz.class", &class_a)],
);
let cache = StubCache::new(tmp.path());
let _ = scan_single_jar(&jar_path, &cache, false);
let outcome = scan_single_jar(&jar_path, &cache, true);
assert!(
matches!(outcome, JarScanOutcome::Scanned { .. }),
"force=true should bypass cache"
);
}
#[test]
fn test_scan_single_jar_nonexistent() {
let tmp = TempDir::new().unwrap();
let cache = StubCache::new(tmp.path());
let outcome = scan_single_jar(Path::new("/nonexistent.jar"), &cache, false);
assert!(
matches!(outcome, JarScanOutcome::Failed { .. }),
"Should fail for nonexistent JAR"
);
}
#[test]
#[allow(clippy::match_same_arms)] #[allow(clippy::match_wildcard_for_single_variants)] fn test_scan_jars_parallel_multiple() {
let tmp = TempDir::new().unwrap();
let class_a = build_minimal_class("com/example/A");
let class_b = build_minimal_class("com/example/B");
let jar_a = write_test_jar(tmp.path(), "a.jar", &[("com/example/A.class", &class_a)]);
let jar_b = write_test_jar(tmp.path(), "b.jar", &[("com/example/B.class", &class_b)]);
let cache = StubCache::new(tmp.path());
let results = scan_jars_parallel(&[jar_a, jar_b], &cache, false);
assert_eq!(results.len(), 2);
let total_stubs: usize = results
.iter()
.filter_map(|r| match r {
#[allow(clippy::match_same_arms)] JarScanOutcome::Scanned { stubs, .. } | JarScanOutcome::Cached { stubs, .. } => {
Some(stubs.len())
}
_ => None,
})
.sum();
assert_eq!(total_stubs, 2);
}
#[test]
fn test_persist_artifacts_roundtrip() {
let tmp = TempDir::new().unwrap();
let classpath_dir = tmp.path().join("classpath");
let index = ClasspathIndex::build(vec![]);
let provenance = vec![ClasspathProvenance {
jar_path: PathBuf::from("/test.jar"),
coordinates: Some("test:test:1.0".to_string()),
is_direct: true,
}];
persist_artifacts(&classpath_dir, &index, &provenance).unwrap();
let index_path = classpath_dir.join("index.sqry");
assert!(index_path.exists());
let loaded_index = ClasspathIndex::load(&index_path).unwrap();
assert_eq!(loaded_index.classes.len(), 0);
let prov_path = classpath_dir.join("provenance.json");
assert!(prov_path.exists());
let prov_json = std::fs::read_to_string(&prov_path).unwrap();
let loaded_prov: Vec<ClasspathProvenance> = serde_json::from_str(&prov_json).unwrap();
assert_eq!(loaded_prov.len(), 1);
assert_eq!(
loaded_prov[0].coordinates,
Some("test:test:1.0".to_string())
);
}
#[test]
fn test_depth_shallow_filters_transitive() {
let tmp = TempDir::new().unwrap();
let class_d = build_minimal_class("com/example/Direct");
let class_t = build_minimal_class("com/example/Transitive");
let jar_d = write_test_jar(
tmp.path(),
"direct.jar",
&[("com/example/Direct.class", &class_d)],
);
let jar_t = write_test_jar(
tmp.path(),
"transitive.jar",
&[("com/example/Transitive.class", &class_t)],
);
let cp_file = tmp.path().join("classpath.txt");
std::fs::write(
&cp_file,
format!("{}\n{}\n", jar_d.display(), jar_t.display()),
)
.unwrap();
let entries = [
ClasspathEntry {
jar_path: jar_d,
coordinates: None,
is_direct: true,
source_jar: None,
},
ClasspathEntry {
jar_path: jar_t,
coordinates: None,
is_direct: false,
source_jar: None,
},
];
let all_refs: Vec<&ClasspathEntry> = entries.iter().collect();
let full: Vec<&ClasspathEntry> = all_refs.clone();
assert_eq!(full.len(), 2);
let shallow: Vec<&ClasspathEntry> = all_refs.into_iter().filter(|e| e.is_direct).collect();
assert_eq!(shallow.len(), 1);
assert!(shallow[0].is_direct);
}
#[test]
fn test_full_pipeline_with_manual_file() {
let tmp = TempDir::new().unwrap();
let class_a = build_minimal_class("com/example/Alpha");
let class_b = build_minimal_class("com/example/Beta");
let jar_path = write_test_jar(
tmp.path(),
"deps.jar",
&[
("com/example/Alpha.class", &class_a),
("com/example/Beta.class", &class_b),
],
);
let cp_file = tmp.path().join("classpath.txt");
std::fs::write(&cp_file, format!("{}\n", jar_path.display())).unwrap();
let config = ClasspathConfig {
enabled: true,
depth: ClasspathDepth::Full,
build_system_override: None,
classpath_file: Some(cp_file),
force: false,
timeout_secs: 30,
};
let result = run_classpath_pipeline(tmp.path(), &config).unwrap();
assert_eq!(result.jars_scanned, 1);
assert_eq!(result.classes_parsed, 2);
assert_eq!(result.index.classes.len(), 2);
assert!(result.index.lookup_fqn("com.example.Alpha").is_some());
assert!(result.index.lookup_fqn("com.example.Beta").is_some());
assert_eq!(result.provenance.len(), 1);
let index_path = tmp.path().join(".sqry/classpath/index.sqry");
assert!(index_path.exists());
let prov_path = tmp.path().join(".sqry/classpath/provenance.json");
assert!(prov_path.exists());
}
#[test]
fn test_pipeline_no_build_system_returns_error() {
let tmp = TempDir::new().unwrap();
let config = ClasspathConfig {
enabled: true,
..ClasspathConfig::default()
};
let result = run_classpath_pipeline(tmp.path(), &config);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(
err.contains("No JVM build system detected"),
"Expected detection error, got: {err}"
);
}
fn outcome_name(outcome: &JarScanOutcome) -> &'static str {
match outcome {
JarScanOutcome::Scanned { .. } => "Scanned",
JarScanOutcome::Cached { .. } => "Cached",
JarScanOutcome::Failed { .. } => "Failed",
}
}
}