Skip to main content

sqry_classpath/
pipeline.rs

1//! Classpath pipeline orchestration.
2//!
3//! Coordinates the full classpath analysis pipeline:
4//! detect → resolve → scan/cache → build index → emit graph nodes.
5//!
6//! This module is the single integration point called from the CLI when the
7//! `jvm-classpath` feature is enabled.
8
9// Classpath scan metrics fit in u32; casts are intentional
10#![allow(clippy::cast_possible_truncation)]
11
12use std::io::{BufRead, BufReader};
13use std::path::{Path, PathBuf};
14
15use log::{debug, info, warn};
16use rayon::prelude::*;
17
18use crate::bytecode::scan_jar;
19use crate::detect::{BuildSystem, discover_build_roots};
20use crate::graph::provenance::{ClasspathProvenance, ClasspathScope};
21use crate::resolve::{ClasspathEntry, ResolveConfig, ResolvedClasspath};
22use crate::stub::cache::StubCache;
23use crate::stub::index::ClasspathIndex;
24use crate::stub::model::ClassStub;
25use crate::{ClasspathError, ClasspathResult};
26
27// ---------------------------------------------------------------------------
28// Configuration
29// ---------------------------------------------------------------------------
30
31/// Configuration for the classpath pipeline.
32#[derive(Debug, Clone)]
33pub struct ClasspathConfig {
34    /// Whether classpath analysis is enabled.
35    pub enabled: bool,
36    /// Depth of classpath analysis.
37    pub depth: ClasspathDepth,
38    /// Override build system (from `--build-system` flag).
39    pub build_system_override: Option<String>,
40    /// Manual classpath file (from `--classpath-file` flag).
41    ///
42    /// When set, skips build system detection and resolution entirely.
43    /// The file should contain one JAR path per line.
44    pub classpath_file: Option<PathBuf>,
45    /// Whether to force classpath resolution even if cached.
46    pub force: bool,
47    /// Subprocess timeout in seconds for build tool resolution.
48    pub timeout_secs: u64,
49}
50
51/// Depth of classpath analysis.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum ClasspathDepth {
54    /// Only direct dependencies.
55    Shallow,
56    /// All transitive dependencies.
57    Full,
58}
59
60impl Default for ClasspathConfig {
61    fn default() -> Self {
62        Self {
63            enabled: false,
64            depth: ClasspathDepth::Full,
65            build_system_override: None,
66            classpath_file: None,
67            force: false,
68            timeout_secs: 60,
69        }
70    }
71}
72
73// ---------------------------------------------------------------------------
74// Pipeline result
75// ---------------------------------------------------------------------------
76
77/// Result of the classpath pipeline.
78#[derive(Debug)]
79pub struct ClasspathPipelineResult {
80    /// The built classpath index.
81    pub index: ClasspathIndex,
82    /// Provenance information for each JAR.
83    pub provenance: Vec<ClasspathProvenance>,
84    /// Resolved classpaths grouped by module/root scope.
85    pub resolved_classpaths: Vec<ResolvedClasspath>,
86    /// Number of JARs scanned.
87    pub jars_scanned: usize,
88    /// Number of classes parsed.
89    pub classes_parsed: usize,
90    /// Whether results came from cache.
91    pub from_cache: bool,
92}
93
94// ---------------------------------------------------------------------------
95// Main entry point
96// ---------------------------------------------------------------------------
97
98/// Run the full classpath pipeline: detect → resolve → scan/cache → build index.
99///
100/// This is the main entry point called from the CLI when classpath analysis
101/// is enabled. The returned [`ClasspathPipelineResult`] contains the
102/// [`ClasspathIndex`] and provenance data needed by the graph emitter.
103///
104/// # Steps
105///
106/// 1. **Detect** the build system (or use the override / manual file).
107/// 2. **Resolve** the classpath via the appropriate build tool resolver.
108/// 3. **Scan** each JAR file for `.class` entries, using the [`StubCache`]
109///    for incremental re-use. JARs are scanned in parallel via rayon.
110/// 4. **Build** a merged [`ClasspathIndex`] from all collected stubs.
111/// 5. **Persist** the index and provenance to `.sqry/classpath/` for
112///    subsequent builds that skip the resolve step.
113///
114/// # Errors
115///
116/// Returns [`ClasspathError`] if detection, resolution, scanning, or
117/// persistence fails.
118pub fn run_classpath_pipeline(
119    project_root: &Path,
120    config: &ClasspathConfig,
121) -> ClasspathResult<ClasspathPipelineResult> {
122    info!("Starting classpath pipeline for {}", project_root.display());
123
124    // ── Step 1: Resolve classpath entries ───────────────────────────────
125    let resolved_classpaths = if let Some(ref classpath_file) = config.classpath_file {
126        resolve_from_manual_file(project_root, classpath_file)?
127    } else {
128        resolve_from_build_system(project_root, config)?
129    };
130
131    // Flatten all entries across modules.
132    let all_entries: Vec<&ClasspathEntry> = resolved_classpaths
133        .iter()
134        .flat_map(|cp| &cp.entries)
135        .collect();
136
137    // Apply depth filtering.
138    let entries_to_scan: Vec<&ClasspathEntry> = match config.depth {
139        ClasspathDepth::Full => all_entries,
140        ClasspathDepth::Shallow => all_entries.into_iter().filter(|e| e.is_direct).collect(),
141    };
142
143    info!(
144        "Classpath resolved: {} entries ({} after depth filtering)",
145        resolved_classpaths
146            .iter()
147            .map(|cp| cp.entries.len())
148            .sum::<usize>(),
149        entries_to_scan.len(),
150    );
151
152    // Deduplicate by JAR path (same JAR may appear in multiple modules).
153    let unique_jar_paths = deduplicate_jar_paths(&entries_to_scan);
154    info!("{} unique JAR files to scan", unique_jar_paths.len());
155
156    // ── Step 2: Scan JARs (parallel, with stub cache) ──────────────────
157    let stub_cache = StubCache::new(project_root);
158    let scan_results = scan_jars_parallel(&unique_jar_paths, &stub_cache, config.force);
159
160    let mut all_stubs: Vec<ClassStub> = Vec::new();
161    let mut jars_scanned: usize = 0;
162    let mut jars_from_cache: usize = 0;
163
164    for result in &scan_results {
165        match result {
166            JarScanOutcome::Scanned { jar_path, stubs } => {
167                let jar_str = jar_path.display().to_string();
168                for stub in stubs {
169                    let mut s = stub.clone();
170                    // Ensure source_jar is set even if scan_jar already set it,
171                    // and for cached stubs that may predate the field.
172                    if s.source_jar.is_none() {
173                        s.source_jar = Some(jar_str.clone());
174                    }
175                    all_stubs.push(s);
176                }
177                jars_scanned += 1;
178            }
179            JarScanOutcome::Cached { jar_path, stubs } => {
180                let jar_str = jar_path.display().to_string();
181                for stub in stubs {
182                    let mut s = stub.clone();
183                    if s.source_jar.is_none() {
184                        s.source_jar = Some(jar_str.clone());
185                    }
186                    all_stubs.push(s);
187                }
188                jars_from_cache += 1;
189            }
190            JarScanOutcome::Failed { jar_path, error } => {
191                warn!("Failed to scan JAR {}: {error}", jar_path.display());
192            }
193        }
194    }
195
196    let classes_parsed = all_stubs.len();
197    info!(
198        "Scanned {} JARs ({} from cache, {} fresh), {} classes total",
199        jars_scanned + jars_from_cache,
200        jars_from_cache,
201        jars_scanned,
202        classes_parsed,
203    );
204
205    // ── Step 3: Build provenance ───────────────────────────────────────
206    let provenance = build_provenance(&resolved_classpaths, config.depth);
207
208    // ── Step 4: Build index ────────────────────────────────────────────
209    let index = ClasspathIndex::build(all_stubs);
210    info!(
211        "Built classpath index: {} classes, {} packages",
212        index.classes.len(),
213        index.package_index.len(),
214    );
215
216    // ── Step 5: Persist index and provenance ───────────────────────────
217    let sqry_classpath_dir = project_root.join(".sqry").join("classpath");
218    persist_artifacts(&sqry_classpath_dir, &index, &provenance)?;
219
220    Ok(ClasspathPipelineResult {
221        index,
222        provenance,
223        resolved_classpaths,
224        jars_scanned: jars_scanned + jars_from_cache,
225        classes_parsed,
226        from_cache: jars_from_cache > 0 && jars_scanned == 0,
227    })
228}
229
230// ---------------------------------------------------------------------------
231// Resolution strategies
232// ---------------------------------------------------------------------------
233
234/// Read a manual classpath file (one JAR path per line).
235///
236/// Lines that are empty or start with `#` are skipped (comments).
237fn resolve_from_manual_file(
238    project_root: &Path,
239    classpath_file: &Path,
240) -> ClasspathResult<Vec<ResolvedClasspath>> {
241    info!("Reading manual classpath from {}", classpath_file.display());
242
243    let file = std::fs::File::open(classpath_file).map_err(|e| {
244        ClasspathError::ResolutionFailed(format!(
245            "Cannot open classpath file {}: {e}",
246            classpath_file.display()
247        ))
248    })?;
249
250    let reader = BufReader::new(file);
251    let mut entries = Vec::new();
252
253    for line in reader.lines() {
254        let line = line.map_err(|e| {
255            ClasspathError::ResolutionFailed(format!(
256                "Error reading classpath file {}: {e}",
257                classpath_file.display()
258            ))
259        })?;
260        let trimmed = line.trim();
261
262        // Skip empty lines and comments.
263        if trimmed.is_empty() || trimmed.starts_with('#') {
264            continue;
265        }
266
267        let jar_path = PathBuf::from(trimmed);
268        if !jar_path.exists() {
269            warn!(
270                "Classpath file entry does not exist: {}",
271                jar_path.display()
272            );
273            // Still include it — the scanner will report the error.
274        }
275
276        entries.push(ClasspathEntry {
277            jar_path,
278            coordinates: None,
279            is_direct: true, // Manual entries treated as direct.
280            source_jar: None,
281        });
282    }
283
284    info!("Manual classpath file: {} entries", entries.len());
285
286    Ok(vec![ResolvedClasspath {
287        module_name: "manual".to_string(),
288        module_root: project_root.to_path_buf(),
289        entries,
290    }])
291}
292
293/// Detect the build system and resolve the classpath via the appropriate resolver.
294fn resolve_from_build_system(
295    project_root: &Path,
296    config: &ClasspathConfig,
297) -> ClasspathResult<Vec<ResolvedClasspath>> {
298    let detected_roots =
299        discover_build_roots(project_root, config.build_system_override.as_deref());
300    if detected_roots.is_empty() {
301        return Err(ClasspathError::DetectionFailed(
302            "No JVM build system detected. Use --build-system to specify one, \
303             or --classpath-file to provide a manual classpath."
304                .to_string(),
305        ));
306    }
307
308    info!("Discovered {} JVM build roots", detected_roots.len());
309    let mut resolved = Vec::new();
310    for detection in detected_roots {
311        let Some(build_system) = detection.build_system else {
312            continue;
313        };
314        info!(
315            "Resolving {:?} classpath in {}",
316            build_system,
317            detection.project_root.display()
318        );
319
320        let resolve_config = ResolveConfig {
321            project_root: detection.project_root.clone(),
322            timeout_secs: config.timeout_secs,
323            cache_path: Some(detection.project_root.join(".sqry").join("classpath")),
324        };
325
326        let mut root_resolved = match build_system {
327            BuildSystem::Gradle => {
328                crate::resolve::gradle::resolve_gradle_classpath(&resolve_config)
329            }
330            BuildSystem::Maven => crate::resolve::maven::resolve_maven_classpath(&resolve_config),
331            BuildSystem::Bazel => crate::resolve::bazel::resolve_bazel_classpath(&resolve_config),
332            BuildSystem::Sbt => crate::resolve::sbt::resolve_sbt_classpath(&resolve_config),
333        }?;
334        resolved.append(&mut root_resolved);
335    }
336
337    resolved.sort_by(|a, b| {
338        a.module_root
339            .cmp(&b.module_root)
340            .then_with(|| a.module_name.cmp(&b.module_name))
341    });
342    Ok(resolved)
343}
344
345// ---------------------------------------------------------------------------
346// JAR scanning
347// ---------------------------------------------------------------------------
348
349/// Outcome of scanning a single JAR file.
350enum JarScanOutcome {
351    /// JAR was freshly scanned and parsed.
352    Scanned {
353        #[allow(dead_code)] // Used in tests for pattern matching.
354        jar_path: PathBuf,
355        stubs: Vec<ClassStub>,
356    },
357    /// Stubs were loaded from the stub cache (JAR hash matched).
358    Cached {
359        #[allow(dead_code)] // Used in tests for pattern matching.
360        jar_path: PathBuf,
361        stubs: Vec<ClassStub>,
362    },
363    /// JAR could not be scanned.
364    Failed { jar_path: PathBuf, error: String },
365}
366
367/// Deduplicate JAR paths, preserving the first occurrence.
368fn deduplicate_jar_paths(entries: &[&ClasspathEntry]) -> Vec<PathBuf> {
369    let mut seen = std::collections::HashSet::new();
370    let mut unique = Vec::new();
371
372    for entry in entries {
373        if seen.insert(&entry.jar_path) {
374            unique.push(entry.jar_path.clone());
375        }
376    }
377
378    unique
379}
380
381/// Scan JAR files in parallel using rayon, with stub cache for incremental builds.
382///
383/// Each JAR is either loaded from the stub cache (if the JAR's SHA-256 hash
384/// matches a cached entry) or freshly scanned. Freshly scanned stubs are
385/// written to the cache for future use.
386fn scan_jars_parallel(
387    jar_paths: &[PathBuf],
388    stub_cache: &StubCache,
389    force: bool,
390) -> Vec<JarScanOutcome> {
391    jar_paths
392        .par_iter()
393        .map(|jar_path| scan_single_jar(jar_path, stub_cache, force))
394        .collect()
395}
396
397/// Scan a single JAR file, using the stub cache when possible.
398fn scan_single_jar(jar_path: &Path, stub_cache: &StubCache, force: bool) -> JarScanOutcome {
399    // Try cache first (unless force is set).
400    if !force && let Some(cached_stubs) = stub_cache.get(jar_path) {
401        debug!(
402            "Cache hit for {} ({} stubs)",
403            jar_path.display(),
404            cached_stubs.len()
405        );
406        return JarScanOutcome::Cached {
407            jar_path: jar_path.to_path_buf(),
408            stubs: cached_stubs,
409        };
410    }
411
412    // Fresh scan.
413    match scan_jar(jar_path) {
414        Ok(stubs) => {
415            debug!("Scanned {} ({} classes)", jar_path.display(), stubs.len());
416
417            // Write to cache (non-fatal on error).
418            if let Err(e) = stub_cache.put(jar_path, &stubs) {
419                warn!("Failed to cache stubs for {}: {e}", jar_path.display());
420            }
421
422            JarScanOutcome::Scanned {
423                jar_path: jar_path.to_path_buf(),
424                stubs,
425            }
426        }
427        Err(e) => JarScanOutcome::Failed {
428            jar_path: jar_path.to_path_buf(),
429            error: e.to_string(),
430        },
431    }
432}
433
434// ---------------------------------------------------------------------------
435// Provenance construction
436// ---------------------------------------------------------------------------
437
438/// Build provenance records from classpath entries.
439fn build_provenance(
440    resolved_classpaths: &[ResolvedClasspath],
441    depth: ClasspathDepth,
442) -> Vec<ClasspathProvenance> {
443    let mut by_jar: std::collections::HashMap<PathBuf, ClasspathProvenance> =
444        std::collections::HashMap::new();
445
446    for classpath in resolved_classpaths {
447        for entry in &classpath.entries {
448            if matches!(depth, ClasspathDepth::Shallow) && !entry.is_direct {
449                continue;
450            }
451
452            let provenance =
453                by_jar
454                    .entry(entry.jar_path.clone())
455                    .or_insert_with(|| ClasspathProvenance {
456                        jar_path: entry.jar_path.clone(),
457                        coordinates: entry.coordinates.clone(),
458                        is_direct: entry.is_direct,
459                        scopes: Vec::new(),
460                    });
461
462            if provenance.coordinates.is_none() {
463                provenance.coordinates.clone_from(&entry.coordinates);
464            }
465            provenance.is_direct &= entry.is_direct;
466
467            let scope = ClasspathScope {
468                module_name: classpath.module_name.clone(),
469                module_root: classpath.module_root.clone(),
470                is_direct: entry.is_direct,
471            };
472            if !provenance.scopes.iter().any(|existing| existing == &scope) {
473                provenance.scopes.push(scope);
474            }
475        }
476    }
477
478    let mut result: Vec<_> = by_jar.into_values().collect();
479    result.sort_by(|a, b| a.jar_path.cmp(&b.jar_path));
480    result
481}
482
483// ---------------------------------------------------------------------------
484// Persistence
485// ---------------------------------------------------------------------------
486
487/// Persist the classpath index and provenance to `.sqry/classpath/`.
488fn persist_artifacts(
489    classpath_dir: &Path,
490    index: &ClasspathIndex,
491    provenance: &[ClasspathProvenance],
492) -> ClasspathResult<()> {
493    std::fs::create_dir_all(classpath_dir).map_err(|e| {
494        ClasspathError::IndexError(format!(
495            "Cannot create classpath directory {}: {e}",
496            classpath_dir.display()
497        ))
498    })?;
499
500    // Persist index.
501    let index_path = classpath_dir.join("index.sqry");
502    index.save(&index_path)?;
503    info!("Saved classpath index to {}", index_path.display());
504
505    // Persist provenance.
506    let provenance_path = classpath_dir.join("provenance.json");
507    let provenance_json = serde_json::to_string_pretty(provenance)
508        .map_err(|e| ClasspathError::IndexError(format!("Cannot serialize provenance: {e}")))?;
509    std::fs::write(&provenance_path, provenance_json).map_err(|e| {
510        ClasspathError::IndexError(format!(
511            "Cannot write provenance to {}: {e}",
512            provenance_path.display()
513        ))
514    })?;
515    info!("Saved provenance to {}", provenance_path.display());
516
517    Ok(())
518}
519
520// ---------------------------------------------------------------------------
521// Tests
522// ---------------------------------------------------------------------------
523
524#[cfg(test)]
525mod tests {
526    use super::*;
527    use std::io::Write;
528    use tempfile::TempDir;
529    use zip::write::SimpleFileOptions;
530
531    /// Build a minimal valid .class file for testing.
532    fn build_minimal_class(class_name: &str) -> Vec<u8> {
533        let mut bytes = Vec::new();
534
535        // Magic
536        bytes.extend_from_slice(&0xCAFE_BABEu32.to_be_bytes());
537        // Minor version
538        bytes.extend_from_slice(&0u16.to_be_bytes());
539        // Major version (52 = Java 8)
540        bytes.extend_from_slice(&52u16.to_be_bytes());
541
542        // Constant pool: 5 entries
543        let class_bytes = class_name.as_bytes();
544        let object_bytes = b"java/lang/Object";
545
546        let cp_count: u16 = 5;
547        bytes.extend_from_slice(&cp_count.to_be_bytes());
548
549        // #1: CONSTANT_Utf8 <class_name>
550        bytes.push(1);
551        bytes.extend_from_slice(&(class_bytes.len() as u16).to_be_bytes());
552        bytes.extend_from_slice(class_bytes);
553
554        // #2: CONSTANT_Class -> #1
555        bytes.push(7);
556        bytes.extend_from_slice(&1u16.to_be_bytes());
557
558        // #3: CONSTANT_Utf8 "java/lang/Object"
559        bytes.push(1);
560        bytes.extend_from_slice(&(object_bytes.len() as u16).to_be_bytes());
561        bytes.extend_from_slice(object_bytes);
562
563        // #4: CONSTANT_Class -> #3
564        bytes.push(7);
565        bytes.extend_from_slice(&3u16.to_be_bytes());
566
567        // Access flags: ACC_PUBLIC | ACC_SUPER
568        bytes.extend_from_slice(&0x0021u16.to_be_bytes());
569        // This class: #2
570        bytes.extend_from_slice(&2u16.to_be_bytes());
571        // Super class: #4
572        bytes.extend_from_slice(&4u16.to_be_bytes());
573        // Interfaces count: 0
574        bytes.extend_from_slice(&0u16.to_be_bytes());
575        // Fields count: 0
576        bytes.extend_from_slice(&0u16.to_be_bytes());
577        // Methods count: 0
578        bytes.extend_from_slice(&0u16.to_be_bytes());
579        // Attributes count: 0
580        bytes.extend_from_slice(&0u16.to_be_bytes());
581
582        bytes
583    }
584
585    /// Create an in-memory JAR (ZIP) file containing test classes.
586    fn build_test_jar(entries: &[(&str, &[u8])]) -> Vec<u8> {
587        let mut buf = Vec::new();
588        {
589            let mut writer = zip::ZipWriter::new(std::io::Cursor::new(&mut buf));
590            let options =
591                SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
592            for (name, data) in entries {
593                writer.start_file(*name, options).unwrap();
594                writer.write_all(data).unwrap();
595            }
596            writer.finish().unwrap();
597        }
598        buf
599    }
600
601    /// Write a test JAR file to disk and return its path.
602    fn write_test_jar(dir: &Path, name: &str, classes: &[(&str, &[u8])]) -> PathBuf {
603        let jar_bytes = build_test_jar(classes);
604        let jar_path = dir.join(name);
605        std::fs::write(&jar_path, &jar_bytes).unwrap();
606        jar_path
607    }
608
609    // ── Default config tests ───────────────────────────────────────────
610
611    #[test]
612    fn test_default_config() {
613        let config = ClasspathConfig::default();
614        assert!(!config.enabled);
615        assert_eq!(config.depth, ClasspathDepth::Full);
616        assert!(config.build_system_override.is_none());
617        assert!(config.classpath_file.is_none());
618        assert!(!config.force);
619        assert_eq!(config.timeout_secs, 60);
620    }
621
622    // ── Manual classpath file tests ────────────────────────────────────
623
624    #[test]
625    fn test_resolve_from_manual_file_basic() {
626        let tmp = TempDir::new().unwrap();
627
628        // Create some fake JAR files.
629        let jar_a = tmp.path().join("a.jar");
630        let jar_b = tmp.path().join("b.jar");
631        std::fs::write(&jar_a, b"fake jar a").unwrap();
632        std::fs::write(&jar_b, b"fake jar b").unwrap();
633
634        // Write classpath file.
635        let cp_file = tmp.path().join("classpath.txt");
636        std::fs::write(
637            &cp_file,
638            format!("{}\n{}\n", jar_a.display(), jar_b.display()),
639        )
640        .unwrap();
641
642        let result = resolve_from_manual_file(tmp.path(), &cp_file).unwrap();
643        assert_eq!(result.len(), 1);
644        assert_eq!(result[0].module_name, "manual");
645        assert_eq!(result[0].module_root, tmp.path());
646        assert_eq!(result[0].entries.len(), 2);
647        assert!(result[0].entries[0].is_direct);
648        assert!(result[0].entries[1].is_direct);
649    }
650
651    #[test]
652    fn test_resolve_from_manual_file_skips_comments_and_blanks() {
653        let tmp = TempDir::new().unwrap();
654        let jar_a = tmp.path().join("a.jar");
655        std::fs::write(&jar_a, b"fake jar a").unwrap();
656
657        let cp_file = tmp.path().join("classpath.txt");
658        std::fs::write(
659            &cp_file,
660            format!(
661                "# This is a comment\n\n{}\n\n# Another comment\n",
662                jar_a.display()
663            ),
664        )
665        .unwrap();
666
667        let result = resolve_from_manual_file(tmp.path(), &cp_file).unwrap();
668        assert_eq!(result[0].entries.len(), 1);
669    }
670
671    #[test]
672    fn test_resolve_from_manual_file_nonexistent_file() {
673        let result =
674            resolve_from_manual_file(Path::new("/tmp"), Path::new("/nonexistent/classpath.txt"));
675        assert!(result.is_err());
676        let err = result.unwrap_err().to_string();
677        assert!(err.contains("Cannot open classpath file"));
678    }
679
680    #[test]
681    fn test_resolve_from_manual_file_nonexistent_jars_included() {
682        let tmp = TempDir::new().unwrap();
683        let cp_file = tmp.path().join("classpath.txt");
684        std::fs::write(&cp_file, "/nonexistent/jar.jar\n").unwrap();
685
686        let result = resolve_from_manual_file(tmp.path(), &cp_file).unwrap();
687        assert_eq!(result[0].entries.len(), 1);
688        assert_eq!(
689            result[0].entries[0].jar_path,
690            PathBuf::from("/nonexistent/jar.jar")
691        );
692    }
693
694    // ── Deduplication tests ────────────────────────────────────────────
695
696    #[test]
697    fn test_deduplicate_jar_paths() {
698        let entries = vec![
699            ClasspathEntry {
700                jar_path: PathBuf::from("/a.jar"),
701                coordinates: None,
702                is_direct: true,
703                source_jar: None,
704            },
705            ClasspathEntry {
706                jar_path: PathBuf::from("/b.jar"),
707                coordinates: None,
708                is_direct: true,
709                source_jar: None,
710            },
711            ClasspathEntry {
712                jar_path: PathBuf::from("/a.jar"),
713                coordinates: None,
714                is_direct: false,
715                source_jar: None,
716            },
717        ];
718        let refs: Vec<&ClasspathEntry> = entries.iter().collect();
719        let unique = deduplicate_jar_paths(&refs);
720        assert_eq!(unique.len(), 2);
721        assert_eq!(unique[0], PathBuf::from("/a.jar"));
722        assert_eq!(unique[1], PathBuf::from("/b.jar"));
723    }
724
725    // ── Provenance construction tests ──────────────────────────────────
726
727    #[test]
728    fn test_build_provenance() {
729        let classpaths = vec![ResolvedClasspath {
730            module_name: "app".to_string(),
731            module_root: PathBuf::from("/repo/app"),
732            entries: vec![
733                ClasspathEntry {
734                    jar_path: PathBuf::from("/guava.jar"),
735                    coordinates: Some("com.google.guava:guava:33.0.0".to_string()),
736                    is_direct: true,
737                    source_jar: None,
738                },
739                ClasspathEntry {
740                    jar_path: PathBuf::from("/commons.jar"),
741                    coordinates: None,
742                    is_direct: false,
743                    source_jar: None,
744                },
745            ],
746        }];
747        let prov = build_provenance(&classpaths, ClasspathDepth::Full);
748
749        assert_eq!(prov.len(), 2);
750        assert_eq!(prov[0].jar_path, PathBuf::from("/commons.jar"));
751        assert_eq!(
752            prov[1].coordinates,
753            Some("com.google.guava:guava:33.0.0".to_string())
754        );
755        assert!(!prov[0].is_direct);
756        assert!(prov[1].is_direct);
757        assert!(prov[0].coordinates.is_none());
758        assert_eq!(prov[1].scopes[0].module_root, PathBuf::from("/repo/app"));
759    }
760
761    #[test]
762    fn test_build_provenance_mixed_directness_same_jar_is_conservative() {
763        let shared_jar = PathBuf::from("/shared.jar");
764        let classpaths = vec![
765            ResolvedClasspath {
766                module_name: "app".to_string(),
767                module_root: PathBuf::from("/repo/app"),
768                entries: vec![ClasspathEntry {
769                    jar_path: shared_jar.clone(),
770                    coordinates: Some("com.example:shared:1.0.0".to_string()),
771                    is_direct: true,
772                    source_jar: None,
773                }],
774            },
775            ResolvedClasspath {
776                module_name: "worker".to_string(),
777                module_root: PathBuf::from("/repo/worker"),
778                entries: vec![ClasspathEntry {
779                    jar_path: shared_jar.clone(),
780                    coordinates: Some("com.example:shared:1.0.0".to_string()),
781                    is_direct: false,
782                    source_jar: None,
783                }],
784            },
785        ];
786        let prov = build_provenance(&classpaths, ClasspathDepth::Full);
787
788        assert_eq!(prov.len(), 1);
789        assert_eq!(prov[0].jar_path, shared_jar);
790        assert!(
791            !prov[0].is_direct,
792            "aggregate directness should be conservative when scopes disagree"
793        );
794        assert!(
795            prov[0].has_direct_scope(),
796            "per-scope metadata should retain the direct scope"
797        );
798        assert_eq!(prov[0].scopes.len(), 2);
799    }
800
801    // ── Scan + cache integration tests ─────────────────────────────────
802
803    #[test]
804    fn test_scan_single_jar_fresh() {
805        let tmp = TempDir::new().unwrap();
806        let class_a = build_minimal_class("com/example/Foo");
807        let jar_path = write_test_jar(
808            tmp.path(),
809            "test.jar",
810            &[("com/example/Foo.class", &class_a)],
811        );
812
813        let cache = StubCache::new(tmp.path());
814        let outcome = scan_single_jar(&jar_path, &cache, false);
815
816        match outcome {
817            JarScanOutcome::Scanned { stubs, .. } => {
818                assert_eq!(stubs.len(), 1);
819                assert_eq!(stubs[0].fqn, "com.example.Foo");
820            }
821            other => panic!("Expected Scanned, got {:?}", outcome_name(&other)),
822        }
823    }
824
825    #[test]
826    fn test_scan_single_jar_cached() {
827        let tmp = TempDir::new().unwrap();
828        let class_a = build_minimal_class("com/example/Bar");
829        let jar_path = write_test_jar(
830            tmp.path(),
831            "test.jar",
832            &[("com/example/Bar.class", &class_a)],
833        );
834
835        let cache = StubCache::new(tmp.path());
836
837        // First scan populates cache.
838        let outcome = scan_single_jar(&jar_path, &cache, false);
839        assert!(matches!(outcome, JarScanOutcome::Scanned { .. }));
840
841        // Second scan should hit cache.
842        let outcome = scan_single_jar(&jar_path, &cache, false);
843        match outcome {
844            JarScanOutcome::Cached { stubs, .. } => {
845                assert_eq!(stubs.len(), 1);
846                assert_eq!(stubs[0].fqn, "com.example.Bar");
847            }
848            other => panic!("Expected Cached, got {:?}", outcome_name(&other)),
849        }
850    }
851
852    #[test]
853    fn test_scan_single_jar_force_bypasses_cache() {
854        let tmp = TempDir::new().unwrap();
855        let class_a = build_minimal_class("com/example/Baz");
856        let jar_path = write_test_jar(
857            tmp.path(),
858            "test.jar",
859            &[("com/example/Baz.class", &class_a)],
860        );
861
862        let cache = StubCache::new(tmp.path());
863
864        // Populate cache.
865        let _ = scan_single_jar(&jar_path, &cache, false);
866
867        // Force should bypass cache.
868        let outcome = scan_single_jar(&jar_path, &cache, true);
869        assert!(
870            matches!(outcome, JarScanOutcome::Scanned { .. }),
871            "force=true should bypass cache"
872        );
873    }
874
875    #[test]
876    fn test_scan_single_jar_nonexistent() {
877        let tmp = TempDir::new().unwrap();
878        let cache = StubCache::new(tmp.path());
879        let outcome = scan_single_jar(Path::new("/nonexistent.jar"), &cache, false);
880        assert!(
881            matches!(outcome, JarScanOutcome::Failed { .. }),
882            "Should fail for nonexistent JAR"
883        );
884    }
885
886    // ── Parallel scan tests ────────────────────────────────────────────
887
888    #[test]
889    #[allow(clippy::match_same_arms)] // Arms separated for documentation clarity
890    #[allow(clippy::match_wildcard_for_single_variants)] // Wildcard covers future variants
891    fn test_scan_jars_parallel_multiple() {
892        let tmp = TempDir::new().unwrap();
893        let class_a = build_minimal_class("com/example/A");
894        let class_b = build_minimal_class("com/example/B");
895
896        let jar_a = write_test_jar(tmp.path(), "a.jar", &[("com/example/A.class", &class_a)]);
897        let jar_b = write_test_jar(tmp.path(), "b.jar", &[("com/example/B.class", &class_b)]);
898
899        let cache = StubCache::new(tmp.path());
900        let results = scan_jars_parallel(&[jar_a, jar_b], &cache, false);
901
902        assert_eq!(results.len(), 2);
903        let total_stubs: usize = results
904            .iter()
905            .filter_map(|r| match r {
906                #[allow(clippy::match_same_arms)] // Pipeline stage arms separated for traceability
907                JarScanOutcome::Scanned { stubs, .. } | JarScanOutcome::Cached { stubs, .. } => {
908                    Some(stubs.len())
909                }
910                _ => None,
911            })
912            .sum();
913        assert_eq!(total_stubs, 2);
914    }
915
916    // ── Persistence tests ──────────────────────────────────────────────
917
918    #[test]
919    fn test_persist_artifacts_roundtrip() {
920        let tmp = TempDir::new().unwrap();
921        let classpath_dir = tmp.path().join("classpath");
922
923        let index = ClasspathIndex::build(vec![]);
924        let provenance = vec![ClasspathProvenance {
925            jar_path: PathBuf::from("/test.jar"),
926            coordinates: Some("test:test:1.0".to_string()),
927            is_direct: true,
928            scopes: vec![ClasspathScope {
929                module_name: "manual".to_string(),
930                module_root: tmp.path().to_path_buf(),
931                is_direct: true,
932            }],
933        }];
934
935        persist_artifacts(&classpath_dir, &index, &provenance).unwrap();
936
937        // Verify index file exists and is loadable.
938        let index_path = classpath_dir.join("index.sqry");
939        assert!(index_path.exists());
940        let loaded_index = ClasspathIndex::load(&index_path).unwrap();
941        assert_eq!(loaded_index.classes.len(), 0);
942
943        // Verify provenance file exists and is valid JSON.
944        let prov_path = classpath_dir.join("provenance.json");
945        assert!(prov_path.exists());
946        let prov_json = std::fs::read_to_string(&prov_path).unwrap();
947        let loaded_prov: Vec<ClasspathProvenance> = serde_json::from_str(&prov_json).unwrap();
948        assert_eq!(loaded_prov.len(), 1);
949        assert_eq!(
950            loaded_prov[0].coordinates,
951            Some("test:test:1.0".to_string())
952        );
953    }
954
955    // ── Depth filtering tests ──────────────────────────────────────────
956
957    #[test]
958    fn test_depth_shallow_filters_transitive() {
959        let tmp = TempDir::new().unwrap();
960
961        let class_d = build_minimal_class("com/example/Direct");
962        let class_t = build_minimal_class("com/example/Transitive");
963
964        let jar_d = write_test_jar(
965            tmp.path(),
966            "direct.jar",
967            &[("com/example/Direct.class", &class_d)],
968        );
969        let jar_t = write_test_jar(
970            tmp.path(),
971            "transitive.jar",
972            &[("com/example/Transitive.class", &class_t)],
973        );
974
975        // Write a manual classpath file.
976        let cp_file = tmp.path().join("classpath.txt");
977        std::fs::write(
978            &cp_file,
979            format!("{}\n{}\n", jar_d.display(), jar_t.display()),
980        )
981        .unwrap();
982
983        // Manually create resolved classpaths with mixed direct/transitive.
984        let entries = [
985            ClasspathEntry {
986                jar_path: jar_d,
987                coordinates: None,
988                is_direct: true,
989                source_jar: None,
990            },
991            ClasspathEntry {
992                jar_path: jar_t,
993                coordinates: None,
994                is_direct: false,
995                source_jar: None,
996            },
997        ];
998        let all_refs: Vec<&ClasspathEntry> = entries.iter().collect();
999
1000        // Full depth should include both.
1001        let full: Vec<&ClasspathEntry> = all_refs.clone();
1002        assert_eq!(full.len(), 2);
1003
1004        // Shallow depth should only include direct.
1005        let shallow: Vec<&ClasspathEntry> = all_refs.into_iter().filter(|e| e.is_direct).collect();
1006        assert_eq!(shallow.len(), 1);
1007        assert!(shallow[0].is_direct);
1008    }
1009
1010    // ── Full pipeline test with manual file ────────────────────────────
1011
1012    #[test]
1013    fn test_full_pipeline_with_manual_file() {
1014        let tmp = TempDir::new().unwrap();
1015
1016        let class_a = build_minimal_class("com/example/Alpha");
1017        let class_b = build_minimal_class("com/example/Beta");
1018
1019        let jar_path = write_test_jar(
1020            tmp.path(),
1021            "deps.jar",
1022            &[
1023                ("com/example/Alpha.class", &class_a),
1024                ("com/example/Beta.class", &class_b),
1025            ],
1026        );
1027
1028        // Write classpath file.
1029        let cp_file = tmp.path().join("classpath.txt");
1030        std::fs::write(&cp_file, format!("{}\n", jar_path.display())).unwrap();
1031
1032        let config = ClasspathConfig {
1033            enabled: true,
1034            depth: ClasspathDepth::Full,
1035            build_system_override: None,
1036            classpath_file: Some(cp_file),
1037            force: false,
1038            timeout_secs: 30,
1039        };
1040
1041        let result = run_classpath_pipeline(tmp.path(), &config).unwrap();
1042        assert_eq!(result.jars_scanned, 1);
1043        assert_eq!(result.classes_parsed, 2);
1044        assert_eq!(result.index.classes.len(), 2);
1045        assert!(result.index.lookup_fqn("com.example.Alpha").is_some());
1046        assert!(result.index.lookup_fqn("com.example.Beta").is_some());
1047        assert_eq!(result.provenance.len(), 1);
1048
1049        // Verify persistence.
1050        let index_path = tmp.path().join(".sqry/classpath/index.sqry");
1051        assert!(index_path.exists());
1052        let prov_path = tmp.path().join(".sqry/classpath/provenance.json");
1053        assert!(prov_path.exists());
1054    }
1055
1056    #[test]
1057    fn test_pipeline_no_build_system_returns_error() {
1058        let tmp = TempDir::new().unwrap();
1059        let config = ClasspathConfig {
1060            enabled: true,
1061            ..ClasspathConfig::default()
1062        };
1063
1064        let result = run_classpath_pipeline(tmp.path(), &config);
1065        assert!(result.is_err());
1066        let err = result.unwrap_err().to_string();
1067        assert!(
1068            err.contains("No JVM build system detected"),
1069            "Expected detection error, got: {err}"
1070        );
1071    }
1072
1073    // ── Helper for test output ─────────────────────────────────────────
1074
1075    fn outcome_name(outcome: &JarScanOutcome) -> &'static str {
1076        match outcome {
1077            JarScanOutcome::Scanned { .. } => "Scanned",
1078            JarScanOutcome::Cached { .. } => "Cached",
1079            JarScanOutcome::Failed { .. } => "Failed",
1080        }
1081    }
1082}