Skip to main content

embeddenator_io/io/
profiles.rs

1//! Compression profiles for different filesystem use cases
2//!
3//! This module provides pre-configured compression profiles optimized for
4//! different types of data found in operating systems and filesystems.
5//!
6//! ## Available Profiles
7//!
8//! | Profile     | Codec | Level | Use Case                           |
9//! |-------------|-------|-------|-----------------------------------|
10//! | `Kernel`    | zstd  | 19    | Boot files, vmlinuz, initramfs    |
11//! | `Libraries` | zstd  | 9     | Shared libraries (.so, .dll)      |
12//! | `Binaries`  | zstd  | 6     | Executables (/bin, /usr/bin)      |
13//! | `Config`    | lz4   | -     | Configuration files (/etc)         |
14//! | `Runtime`   | none  | -     | tmpfs, frequently mutating data    |
15//! | `Archive`   | zstd  | 22    | Cold storage, backups              |
16//! | `Balanced`  | zstd  | 3     | General purpose default            |
17//!
18//! ## Path-Based Auto-Selection
19//!
20//! The `CompressionProfiler` can automatically select appropriate profiles
21//! based on file paths and extensions:
22//!
23//! ```rust,no_run
24//! use embeddenator_io::CompressionProfiler;
25//!
26//! let profiler = CompressionProfiler::default();
27//!
28//! // Kernel files get maximum compression
29//! let profile = profiler.for_path("/boot/vmlinuz");
30//! assert_eq!(profile.name, "Kernel");
31//!
32//! // Config files get fast LZ4
33//! let profile = profiler.for_path("/etc/passwd");
34//! assert_eq!(profile.name, "Config");
35//! ```
36
37use super::envelope::{BinaryWriteOptions, CompressionCodec};
38
39/// A compression profile with codec, level, and metadata
40#[derive(Clone, Debug)]
41pub struct CompressionProfile {
42    /// Profile name for identification
43    pub name: &'static str,
44    /// Compression codec to use
45    pub codec: CompressionCodec,
46    /// Compression level (codec-specific, None = default)
47    pub level: Option<i32>,
48    /// Expected compression ratio (for planning)
49    pub expected_ratio: f32,
50    /// Brief description
51    pub description: &'static str,
52}
53
54impl CompressionProfile {
55    /// Create a new compression profile
56    pub const fn new(
57        name: &'static str,
58        codec: CompressionCodec,
59        level: Option<i32>,
60        expected_ratio: f32,
61        description: &'static str,
62    ) -> Self {
63        Self {
64            name,
65            codec,
66            level,
67            expected_ratio,
68            description,
69        }
70    }
71
72    /// Convert to BinaryWriteOptions for the envelope module
73    pub fn to_write_options(&self) -> BinaryWriteOptions {
74        BinaryWriteOptions {
75            codec: self.codec,
76            level: self.level,
77        }
78    }
79}
80
81// Predefined profiles
82
83/// Maximum compression for kernel and boot components
84/// Use for: vmlinuz, initramfs, kernel modules
85/// Trade-off: Slow compression, fast decompression, best ratio
86pub const PROFILE_KERNEL: CompressionProfile = CompressionProfile::new(
87    "Kernel",
88    CompressionCodec::Zstd,
89    Some(19),
90    0.25, // ~4:1 compression typical for kernel images
91    "Maximum compression for kernel/boot files",
92);
93
94/// Balanced compression for shared libraries
95/// Use for: .so files, dynamically linked libraries
96/// Trade-off: Good compression, reasonable speed
97pub const PROFILE_LIBRARIES: CompressionProfile = CompressionProfile::new(
98    "Libraries",
99    CompressionCodec::Zstd,
100    Some(9),
101    0.40, // ~2.5:1 compression for compiled code
102    "Balanced compression for shared libraries",
103);
104
105/// Moderate compression for executables
106/// Use for: /bin, /sbin, /usr/bin binaries
107/// Trade-off: Faster compression, decent ratio
108pub const PROFILE_BINARIES: CompressionProfile = CompressionProfile::new(
109    "Binaries",
110    CompressionCodec::Zstd,
111    Some(6),
112    0.45, // ~2:1 compression for executables
113    "Moderate compression for executables",
114);
115
116/// Fast compression for configuration files
117/// Use for: /etc, small text configs, JSON, YAML
118/// Trade-off: Very fast, lower ratio
119pub const PROFILE_CONFIG: CompressionProfile = CompressionProfile::new(
120    "Config",
121    CompressionCodec::Lz4,
122    None,
123    0.50, // ~2:1 for text configs
124    "Fast LZ4 compression for config files",
125);
126
127/// No compression for runtime/temporary data
128/// Use for: tmpfs, frequently mutating files, memory-mapped
129/// Trade-off: No CPU overhead, no size reduction
130pub const PROFILE_RUNTIME: CompressionProfile = CompressionProfile::new(
131    "Runtime",
132    CompressionCodec::None,
133    None,
134    1.0, // No compression
135    "No compression for runtime/temp data",
136);
137
138/// Maximum compression for cold storage/archives
139/// Use for: Backups, infrequently accessed data
140/// Trade-off: Very slow compression, best ratio
141pub const PROFILE_ARCHIVE: CompressionProfile = CompressionProfile::new(
142    "Archive",
143    CompressionCodec::Zstd,
144    Some(22), // Near-max zstd level
145    0.20,     // ~5:1 compression
146    "Maximum compression for archives/backups",
147);
148
149/// General-purpose balanced profile
150/// Use for: Default when no specific profile applies
151/// Trade-off: Fast compression, decent ratio
152pub const PROFILE_BALANCED: CompressionProfile = CompressionProfile::new(
153    "Balanced",
154    CompressionCodec::Zstd,
155    Some(3),
156    0.55, // ~1.8:1 compression
157    "General-purpose balanced compression",
158);
159
160/// Database and log files
161/// Use for: SQLite, logs, journals
162/// Trade-off: Good compression for structured data
163pub const PROFILE_DATABASE: CompressionProfile = CompressionProfile::new(
164    "Database",
165    CompressionCodec::Zstd,
166    Some(5),
167    0.35, // ~3:1 for repetitive structured data
168    "Compression for databases and logs",
169);
170
171/// Media files (usually pre-compressed)
172/// Use for: Images, audio, video that are already compressed
173/// Trade-off: Skip compression to avoid wasting CPU
174pub const PROFILE_MEDIA: CompressionProfile = CompressionProfile::new(
175    "Media",
176    CompressionCodec::None,
177    None,
178    0.98, // Minimal gain on pre-compressed data
179    "Skip compression for pre-compressed media",
180);
181
182/// All predefined profiles
183pub const ALL_PROFILES: &[&CompressionProfile] = &[
184    &PROFILE_KERNEL,
185    &PROFILE_LIBRARIES,
186    &PROFILE_BINARIES,
187    &PROFILE_CONFIG,
188    &PROFILE_RUNTIME,
189    &PROFILE_ARCHIVE,
190    &PROFILE_BALANCED,
191    &PROFILE_DATABASE,
192    &PROFILE_MEDIA,
193];
194
195/// Auto-select compression profiles based on file paths
196#[derive(Clone, Debug)]
197pub struct CompressionProfiler {
198    /// Default profile when no pattern matches
199    pub default_profile: CompressionProfile,
200}
201
202impl Default for CompressionProfiler {
203    fn default() -> Self {
204        Self {
205            default_profile: PROFILE_BALANCED,
206        }
207    }
208}
209
210impl CompressionProfiler {
211    /// Create a profiler with a custom default
212    pub fn with_default(default: CompressionProfile) -> Self {
213        Self {
214            default_profile: default,
215        }
216    }
217
218    /// Select compression profile based on file path
219    pub fn for_path(&self, path: &str) -> CompressionProfile {
220        // Normalize path for matching
221        let path_lower = path.to_lowercase();
222
223        // Boot/kernel paths
224        if path_lower.starts_with("/boot")
225            || path_lower.contains("vmlinuz")
226            || path_lower.contains("initr")
227            || path_lower.ends_with(".ko")
228            || path_lower.ends_with(".ko.zst")
229            || path_lower.ends_with(".ko.xz")
230        {
231            return PROFILE_KERNEL;
232        }
233
234        // Shared libraries
235        if path_lower.ends_with(".so")
236            || path_lower.contains(".so.")
237            || path_lower.ends_with(".dll")
238            || path_lower.starts_with("/lib")
239            || path_lower.starts_with("/usr/lib")
240        {
241            return PROFILE_LIBRARIES;
242        }
243
244        // Executables
245        if path_lower.starts_with("/bin")
246            || path_lower.starts_with("/sbin")
247            || path_lower.starts_with("/usr/bin")
248            || path_lower.starts_with("/usr/sbin")
249            || path_lower.starts_with("/usr/local/bin")
250        {
251            return PROFILE_BINARIES;
252        }
253
254        // Configuration files
255        if path_lower.starts_with("/etc")
256            || path_lower.ends_with(".conf")
257            || path_lower.ends_with(".cfg")
258            || path_lower.ends_with(".ini")
259            || path_lower.ends_with(".yaml")
260            || path_lower.ends_with(".yml")
261            || path_lower.ends_with(".toml")
262            || path_lower.ends_with(".json")
263            || path_lower.ends_with(".xml")
264        {
265            return PROFILE_CONFIG;
266        }
267
268        // Runtime/temporary
269        if path_lower.starts_with("/tmp")
270            || path_lower.starts_with("/var/tmp")
271            || path_lower.starts_with("/run")
272            || path_lower.starts_with("/dev/shm")
273            || path_lower.contains("/cache/")
274        {
275            return PROFILE_RUNTIME;
276        }
277
278        // Database and logs
279        if path_lower.ends_with(".db")
280            || path_lower.ends_with(".sqlite")
281            || path_lower.ends_with(".sqlite3")
282            || path_lower.ends_with(".log")
283            || path_lower.starts_with("/var/log")
284            || path_lower.ends_with(".journal")
285        {
286            return PROFILE_DATABASE;
287        }
288
289        // Media files (pre-compressed, skip)
290        if path_lower.ends_with(".jpg")
291            || path_lower.ends_with(".jpeg")
292            || path_lower.ends_with(".png")
293            || path_lower.ends_with(".gif")
294            || path_lower.ends_with(".webp")
295            || path_lower.ends_with(".mp3")
296            || path_lower.ends_with(".mp4")
297            || path_lower.ends_with(".mkv")
298            || path_lower.ends_with(".webm")
299            || path_lower.ends_with(".ogg")
300            || path_lower.ends_with(".flac")
301            || path_lower.ends_with(".zip")
302            || path_lower.ends_with(".gz")
303            || path_lower.ends_with(".xz")
304            || path_lower.ends_with(".zst")
305            || path_lower.ends_with(".bz2")
306            || path_lower.ends_with(".7z")
307            || path_lower.ends_with(".rar")
308        {
309            return PROFILE_MEDIA;
310        }
311
312        // Archive paths
313        if path_lower.starts_with("/var/backups")
314            || path_lower.starts_with("/backup")
315            || path_lower.contains("/archive/")
316        {
317            return PROFILE_ARCHIVE;
318        }
319
320        // Default
321        self.default_profile.clone()
322    }
323
324    /// Get profile by name
325    pub fn by_name(&self, name: &str) -> Option<CompressionProfile> {
326        ALL_PROFILES
327            .iter()
328            .find(|p| p.name.eq_ignore_ascii_case(name))
329            .map(|p| (*p).clone())
330    }
331
332    /// Estimate compressed size for planning
333    pub fn estimate_compressed_size(&self, path: &str, original_size: usize) -> usize {
334        let profile = self.for_path(path);
335        (original_size as f32 * profile.expected_ratio) as usize
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn test_profile_selection_kernel() {
345        let profiler = CompressionProfiler::default();
346
347        assert_eq!(profiler.for_path("/boot/vmlinuz").name, "Kernel");
348        assert_eq!(profiler.for_path("/boot/initrd.img").name, "Kernel");
349        assert_eq!(
350            profiler.for_path("/lib/modules/5.4.0/ext4.ko").name,
351            "Kernel"
352        );
353    }
354
355    #[test]
356    fn test_profile_selection_libraries() {
357        let profiler = CompressionProfiler::default();
358
359        assert_eq!(
360            profiler.for_path("/lib/x86_64-linux-gnu/libc.so.6").name,
361            "Libraries"
362        );
363        assert_eq!(profiler.for_path("/usr/lib/libssl.so.3").name, "Libraries");
364    }
365
366    #[test]
367    fn test_profile_selection_binaries() {
368        let profiler = CompressionProfiler::default();
369
370        assert_eq!(profiler.for_path("/bin/bash").name, "Binaries");
371        assert_eq!(profiler.for_path("/usr/bin/python3").name, "Binaries");
372        assert_eq!(profiler.for_path("/sbin/init").name, "Binaries");
373    }
374
375    #[test]
376    fn test_profile_selection_config() {
377        let profiler = CompressionProfiler::default();
378
379        assert_eq!(profiler.for_path("/etc/passwd").name, "Config");
380        assert_eq!(profiler.for_path("/etc/nginx/nginx.conf").name, "Config");
381        assert_eq!(profiler.for_path("/app/config.yaml").name, "Config");
382    }
383
384    #[test]
385    fn test_profile_selection_runtime() {
386        let profiler = CompressionProfiler::default();
387
388        assert_eq!(profiler.for_path("/tmp/session.sock").name, "Runtime");
389        assert_eq!(profiler.for_path("/run/systemd/notify").name, "Runtime");
390    }
391
392    #[test]
393    fn test_profile_selection_media() {
394        let profiler = CompressionProfiler::default();
395
396        assert_eq!(profiler.for_path("/home/user/photo.jpg").name, "Media");
397        assert_eq!(profiler.for_path("/var/data/video.mp4").name, "Media");
398        assert_eq!(profiler.for_path("/archive/backup.tar.gz").name, "Media");
399    }
400
401    #[test]
402    fn test_profile_to_write_options() {
403        let profile = PROFILE_KERNEL;
404        let opts = profile.to_write_options();
405
406        assert_eq!(opts.codec, CompressionCodec::Zstd);
407        assert_eq!(opts.level, Some(19));
408    }
409
410    #[test]
411    fn test_estimate_compressed_size() {
412        let profiler = CompressionProfiler::default();
413
414        // Kernel: 25% of original
415        let est = profiler.estimate_compressed_size("/boot/vmlinuz", 10_000_000);
416        assert_eq!(est, 2_500_000);
417
418        // Runtime: 100% (no compression)
419        let est = profiler.estimate_compressed_size("/tmp/data", 10_000_000);
420        assert_eq!(est, 10_000_000);
421    }
422
423    #[test]
424    fn test_by_name() {
425        let profiler = CompressionProfiler::default();
426
427        assert!(profiler.by_name("Kernel").is_some());
428        assert!(profiler.by_name("kernel").is_some()); // Case insensitive
429        assert!(profiler.by_name("NonExistent").is_none());
430    }
431}