Skip to main content

crush_gpu/
lib.rs

1//! `crush-gpu` — GPU-accelerated tile-based compression engine
2//!
3//! Implements a GDeflate-inspired GPU compression engine using 64KB independent
4//! tiles with 32-way sub-stream parallelism for massively parallel decompression.
5
6pub mod backend;
7pub mod engine;
8pub mod entropy;
9pub mod error;
10pub mod format;
11pub mod gdeflate;
12pub mod lz77;
13pub mod scorer;
14pub mod vectorize;
15
16use std::sync::atomic::AtomicBool;
17use std::sync::{Arc, OnceLock};
18
19use crush_core::error::Result;
20use crush_core::plugin::{CompressionAlgorithm, PluginMetadata, COMPRESSION_ALGORITHMS};
21use linkme::distributed_slice;
22
23// Re-export GPU device discovery types for CLI `plugins info` usage.
24pub use backend::{discover_gpu, GpuInfo, GpuVendor};
25
26/// Magic number for the gpu-deflate plugin in the crush-core outer format.
27///
28/// Format: `[0x43, 0x52, 0x01, plugin_id]` = `"CR"` + version 1 + plugin ID 0x03.
29pub const PLUGIN_MAGIC: [u8; 4] = [0x43, 0x52, 0x01, 0x03];
30
31// ============================================================================
32// Process-global GPU plugin configuration
33// ============================================================================
34
35/// Which GPU compute backend to prefer.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum BackendPreference {
38    /// Auto-select: try CUDA first (if feature enabled), then wgpu.
39    #[default]
40    Auto,
41    /// Force the CUDA backend (requires `cuda` feature + NVIDIA GPU).
42    Cuda,
43    /// Force the wgpu backend (Vulkan / Metal / DX12).
44    Wgpu,
45}
46
47/// Process-global GPU plugin configuration.
48///
49/// Set once at CLI startup via [`configure()`]. The GPU plugin reads these
50/// settings when constructing [`engine::EngineConfig`] for compression and
51/// decompression.
52#[derive(Debug, Clone, Default)]
53pub struct GpuPluginConfig {
54    /// If `true`, never attempt GPU decompression — always use CPU fallback.
55    pub force_cpu: bool,
56    /// Specific GPU device to use. `None` means auto-select best available.
57    pub device_index: Option<u32>,
58    /// Which GPU compute backend to prefer.
59    pub backend: BackendPreference,
60}
61
62/// Cached process-global GPU plugin configuration.
63static GPU_PLUGIN_CONFIG: OnceLock<GpuPluginConfig> = OnceLock::new();
64
65/// Configure the GPU plugin with CLI/config-derived settings.
66///
67/// Must be called before any compression/decompression operations.
68/// Can only be called once per process (uses `OnceLock` internally).
69/// Subsequent calls are silently ignored.
70pub fn configure(config: GpuPluginConfig) {
71    let _ = GPU_PLUGIN_CONFIG.set(config);
72}
73
74/// Get the current GPU plugin configuration.
75///
76/// Returns a reference to the default config if [`configure()`] was never called.
77pub fn get_config() -> &'static GpuPluginConfig {
78    static DEFAULT_CONFIG: GpuPluginConfig = GpuPluginConfig {
79        force_cpu: false,
80        device_index: None,
81        backend: BackendPreference::Auto,
82    };
83    GPU_PLUGIN_CONFIG.get().unwrap_or(&DEFAULT_CONFIG)
84}
85
86// ============================================================================
87// Plugin implementation
88// ============================================================================
89
90/// Crush-gpu plugin implementation registered into the crush-core plugin registry.
91struct GpuDeflatePlugin;
92
93impl CompressionAlgorithm for GpuDeflatePlugin {
94    fn name(&self) -> &'static str {
95        "gpu-deflate"
96    }
97
98    fn metadata(&self) -> PluginMetadata {
99        PluginMetadata {
100            name: "gpu-deflate",
101            version: env!("CARGO_PKG_VERSION"),
102            magic_number: PLUGIN_MAGIC,
103            throughput: 2000.0,
104            compression_ratio: 0.65,
105            description:
106                "GPU-accelerated tile-based compression with 32-way parallel decompression",
107        }
108    }
109
110    fn compress(&self, input: &[u8], cancel_flag: Arc<AtomicBool>) -> Result<Vec<u8>> {
111        let config = engine::EngineConfig::default();
112        engine::compress(input, &config, &cancel_flag)
113    }
114
115    fn decompress(&self, input: &[u8], cancel_flag: Arc<AtomicBool>) -> Result<Vec<u8>> {
116        let plugin_cfg = get_config();
117        let config = engine::EngineConfig {
118            force_cpu: plugin_cfg.force_cpu,
119            ..engine::EngineConfig::default()
120        };
121        engine::decompress(input, &config, &cancel_flag)
122    }
123
124    fn detect(&self, file_header: &[u8]) -> bool {
125        // CGPU files start with the 4-byte magic [0x43, 0x47, 0x50, 0x55]
126        file_header.len() >= 4 && file_header[0..4] == format::CGPU_MAGIC
127    }
128}
129
130/// Compile-time plugin registration via `linkme` distributed slice.
131#[distributed_slice(COMPRESSION_ALGORITHMS)]
132static GPU_DEFLATE_PLUGIN: &dyn CompressionAlgorithm = &GpuDeflatePlugin;