crush_gpu/lib.rs
1//! `crush-gpu` — GPU-accelerated tile-based compression engine
2//!
3//! Implements a GDeflate-inspired GPU compression engine using 64KB independent
4//! tiles with 32-way sub-stream parallelism for massively parallel decompression.
5
6pub mod backend;
7pub mod engine;
8pub mod entropy;
9pub mod error;
10pub mod format;
11pub mod gdeflate;
12pub mod lz77;
13pub mod scorer;
14pub mod vectorize;
15
16use std::sync::atomic::AtomicBool;
17use std::sync::{Arc, OnceLock};
18
19use crush_core::error::Result;
20use crush_core::plugin::{CompressionAlgorithm, PluginMetadata, COMPRESSION_ALGORITHMS};
21use linkme::distributed_slice;
22
23// Re-export GPU device discovery types for CLI `plugins info` usage.
24pub use backend::{discover_gpu, GpuInfo, GpuVendor};
25
26/// Magic number for the gpu-deflate plugin in the crush-core outer format.
27///
28/// Format: `[0x43, 0x52, 0x01, plugin_id]` = `"CR"` + version 1 + plugin ID 0x03.
29pub const PLUGIN_MAGIC: [u8; 4] = [0x43, 0x52, 0x01, 0x03];
30
31// ============================================================================
32// Process-global GPU plugin configuration
33// ============================================================================
34
35/// Which GPU compute backend to prefer.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub enum BackendPreference {
38 /// Auto-select: try CUDA first (if feature enabled), then wgpu.
39 #[default]
40 Auto,
41 /// Force the CUDA backend (requires `cuda` feature + NVIDIA GPU).
42 Cuda,
43 /// Force the wgpu backend (Vulkan / Metal / DX12).
44 Wgpu,
45}
46
47/// Process-global GPU plugin configuration.
48///
49/// Set once at CLI startup via [`configure()`]. The GPU plugin reads these
50/// settings when constructing [`engine::EngineConfig`] for compression and
51/// decompression.
52#[derive(Debug, Clone, Default)]
53pub struct GpuPluginConfig {
54 /// If `true`, never attempt GPU decompression — always use CPU fallback.
55 pub force_cpu: bool,
56 /// Specific GPU device to use. `None` means auto-select best available.
57 pub device_index: Option<u32>,
58 /// Which GPU compute backend to prefer.
59 pub backend: BackendPreference,
60}
61
62/// Cached process-global GPU plugin configuration.
63static GPU_PLUGIN_CONFIG: OnceLock<GpuPluginConfig> = OnceLock::new();
64
65/// Configure the GPU plugin with CLI/config-derived settings.
66///
67/// Must be called before any compression/decompression operations.
68/// Can only be called once per process (uses `OnceLock` internally).
69/// Subsequent calls are silently ignored.
70pub fn configure(config: GpuPluginConfig) {
71 let _ = GPU_PLUGIN_CONFIG.set(config);
72}
73
74/// Get the current GPU plugin configuration.
75///
76/// Returns a reference to the default config if [`configure()`] was never called.
77pub fn get_config() -> &'static GpuPluginConfig {
78 static DEFAULT_CONFIG: GpuPluginConfig = GpuPluginConfig {
79 force_cpu: false,
80 device_index: None,
81 backend: BackendPreference::Auto,
82 };
83 GPU_PLUGIN_CONFIG.get().unwrap_or(&DEFAULT_CONFIG)
84}
85
86// ============================================================================
87// Plugin implementation
88// ============================================================================
89
90/// Crush-gpu plugin implementation registered into the crush-core plugin registry.
91struct GpuDeflatePlugin;
92
93impl CompressionAlgorithm for GpuDeflatePlugin {
94 fn name(&self) -> &'static str {
95 "gpu-deflate"
96 }
97
98 fn metadata(&self) -> PluginMetadata {
99 PluginMetadata {
100 name: "gpu-deflate",
101 version: env!("CARGO_PKG_VERSION"),
102 magic_number: PLUGIN_MAGIC,
103 throughput: 2000.0,
104 compression_ratio: 0.65,
105 description:
106 "GPU-accelerated tile-based compression with 32-way parallel decompression",
107 }
108 }
109
110 fn compress(&self, input: &[u8], cancel_flag: Arc<AtomicBool>) -> Result<Vec<u8>> {
111 let config = engine::EngineConfig::default();
112 engine::compress(input, &config, &cancel_flag)
113 }
114
115 fn decompress(&self, input: &[u8], cancel_flag: Arc<AtomicBool>) -> Result<Vec<u8>> {
116 let plugin_cfg = get_config();
117 let config = engine::EngineConfig {
118 force_cpu: plugin_cfg.force_cpu,
119 ..engine::EngineConfig::default()
120 };
121 engine::decompress(input, &config, &cancel_flag)
122 }
123
124 fn detect(&self, file_header: &[u8]) -> bool {
125 // CGPU files start with the 4-byte magic [0x43, 0x47, 0x50, 0x55]
126 file_header.len() >= 4 && file_header[0..4] == format::CGPU_MAGIC
127 }
128}
129
130/// Compile-time plugin registration via `linkme` distributed slice.
131#[distributed_slice(COMPRESSION_ALGORITHMS)]
132static GPU_DEFLATE_PLUGIN: &dyn CompressionAlgorithm = &GpuDeflatePlugin;