1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
//! Trie compaction configuration, statistics, and progress reporting.
//!
//! Split out of the monolithic byte `dict_impl.rs` (lines ~386-479) as the
//! first piece of the Phase-5 decomposition. The compaction *execution*
//! logic still lives on `PersistentARTrie` in `dict_impl.rs`; only the
//! configuration / observability types live here.
/// Configuration for trie compaction operations.
///
/// Compaction rebuilds the trie from scratch, eliminating orphaned nodes
/// and fragmentation that accumulate from update/delete operations.
///
/// # Example
///
/// ```rust,no_run
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// use libdictenstein::persistent_artrie::{PersistentARTrie, CompactionConfig};
///
/// let mut trie = PersistentARTrie::<u64>::open("data.artrie")?;
///
/// // In-place compaction with default settings
/// let stats = trie.compact(CompactionConfig::default(), |progress| {
/// println!("{}: {:.1}%", progress.phase, progress.percent_complete);
/// })?;
///
/// println!("Saved {:.1}% space", stats.space_savings_percent);
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct CompactionConfig {
/// Target output path.
///
/// - `None` (default): In-place compaction via atomic rename
/// - `Some(path)`: Write to a new file, leaving original unchanged
pub output_path: Option<std::path::PathBuf>,
/// Progress callback interval (in terms).
///
/// The progress callback is invoked every `progress_interval` terms.
/// Set to 0 to disable progress callbacks. Default: 10,000.
pub progress_interval: usize,
/// Whether to verify data integrity after compaction.
///
/// When enabled, verifies that the compacted trie has the same term count
/// as the original. Default: true.
pub verify_after_compact: bool,
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
output_path: None,
progress_interval: 10_000,
verify_after_compact: true,
}
}
}
/// Statistics from a completed compaction operation.
#[derive(Debug, Clone)]
pub struct CompactionStats {
/// Number of terms copied to the compacted trie.
pub terms_copied: u64,
/// Original file size in bytes before compaction.
pub original_bytes: u64,
/// Compacted file size in bytes after compaction.
pub compacted_bytes: u64,
/// Percentage of space saved (0.0 to 100.0).
///
/// Calculated as: `(1.0 - compacted_bytes / original_bytes) * 100.0`
pub space_savings_percent: f64,
/// Duration of the compaction operation in milliseconds.
pub duration_ms: u64,
}
/// Progress information during compaction.
///
/// Passed to the progress callback during `compact()` to report status.
#[derive(Debug, Clone)]
pub struct CompactionProgress {
/// Current phase of compaction.
///
/// Possible values:
/// - `"copying"`: Iterating and copying terms
/// - `"checkpointing"`: Persisting to disk
/// - `"verifying"`: Verifying data integrity
/// - `"finalizing"`: Atomic rename (in-place mode only)
pub phase: &'static str,
/// Number of terms processed so far.
pub terms_processed: u64,
/// Estimated total number of terms.
pub estimated_total: u64,
/// Percentage complete (0.0 to 100.0).
pub percent_complete: f32,
}