liquid_cache_parquet/cache/
id.rs1use std::{
2 ops::Deref,
3 path::{Path, PathBuf},
4};
5
6use liquid_cache_storage::cache::EntryID;
7
8#[repr(C, align(8))]
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
11pub struct ParquetArrayID {
12 file_id: u16,
13 rg_id: u16,
14 col_id: u16,
15 batch_id: BatchID,
16}
17
18impl From<ParquetArrayID> for usize {
19 fn from(id: ParquetArrayID) -> Self {
20 (id.file_id as usize) << 48
21 | (id.rg_id as usize) << 32
22 | (id.col_id as usize) << 16
23 | (id.batch_id.v as usize)
24 }
25}
26
27impl From<usize> for ParquetArrayID {
28 fn from(value: usize) -> Self {
29 Self {
30 file_id: (value >> 48) as u16,
31 rg_id: ((value >> 32) & 0xFFFF) as u16,
32 col_id: ((value >> 16) & 0xFFFF) as u16,
33 batch_id: BatchID::from_raw((value & 0xFFFF) as u16),
34 }
35 }
36}
37
38impl ParquetArrayID {}
39
40impl From<ParquetArrayID> for EntryID {
41 fn from(id: ParquetArrayID) -> Self {
42 EntryID::from(usize::from(id))
43 }
44}
45
46impl From<EntryID> for ParquetArrayID {
47 fn from(id: EntryID) -> Self {
48 ParquetArrayID::from(usize::from(id))
49 }
50}
51
52const _: () = assert!(std::mem::size_of::<ParquetArrayID>() == 8);
53const _: () = assert!(std::mem::align_of::<ParquetArrayID>() == 8);
54
55impl ParquetArrayID {
56 pub fn new(file_id: u64, row_group_id: u64, column_id: u64, batch_id: BatchID) -> Self {
58 debug_assert!(file_id <= u16::MAX as u64);
59 debug_assert!(row_group_id <= u16::MAX as u64);
60 debug_assert!(column_id <= u16::MAX as u64);
61 Self {
62 file_id: file_id as u16,
63 rg_id: row_group_id as u16,
64 col_id: column_id as u16,
65 batch_id,
66 }
67 }
68
69 pub fn batch_id_inner(&self) -> u64 {
71 self.batch_id.v as u64
72 }
73
74 pub fn file_id_inner(&self) -> u64 {
76 self.file_id as u64
77 }
78
79 pub fn row_group_id_inner(&self) -> u64 {
81 self.rg_id as u64
82 }
83
84 pub fn column_id_inner(&self) -> u64 {
86 self.col_id as u64
87 }
88
89 pub fn on_disk_path(&self, cache_root_dir: &Path) -> PathBuf {
91 let batch_id = self.batch_id_inner();
92 cache_root_dir
93 .join(format!("file_{}", self.file_id_inner()))
94 .join(format!("rg_{}", self.row_group_id_inner()))
95 .join(format!("col_{}", self.column_id_inner()))
96 .join(format!("batch_{batch_id}.liquid"))
97 }
98
99 pub fn on_disk_arrow_path(&self, cache_root_dir: &Path) -> PathBuf {
101 let batch_id = self.batch_id_inner();
102 cache_root_dir
103 .join(format!("file_{}", self.file_id_inner()))
104 .join(format!("rg_{}", self.row_group_id_inner()))
105 .join(format!("col_{}", self.column_id_inner()))
106 .join(format!("batch_{batch_id}.arrow"))
107 }
108}
109
110#[repr(C, align(2))]
116#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
117pub struct BatchID {
118 v: u16,
119}
120
121impl BatchID {
122 pub fn from_row_id(row_id: usize, batch_size: usize) -> Self {
125 Self {
126 v: (row_id / batch_size) as u16,
127 }
128 }
129
130 pub fn from_raw(v: u16) -> Self {
132 Self { v }
133 }
134
135 pub fn inc(&mut self) {
137 debug_assert!(self.v < u16::MAX);
138 self.v += 1;
139 }
140}
141
142impl Deref for BatchID {
143 type Target = u16;
144
145 fn deref(&self) -> &Self::Target {
146 &self.v
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use tempfile::tempdir;
153
154 use super::*;
155
156 #[test]
157 fn test_cache_entry_id_new_and_getters() {
158 let file_id = 10u64;
159 let row_group_id = 20u64;
160 let column_id = 30u64;
161 let batch_id = BatchID::from_raw(40);
162 let entry_id = ParquetArrayID::new(file_id, row_group_id, column_id, batch_id);
163
164 assert_eq!(entry_id.file_id_inner(), file_id);
165 assert_eq!(entry_id.row_group_id_inner(), row_group_id);
166 assert_eq!(entry_id.column_id_inner(), column_id);
167 assert_eq!(entry_id.batch_id_inner(), *batch_id as u64);
168 }
169
170 #[test]
171 fn test_cache_entry_id_boundaries() {
172 let file_id = u16::MAX as u64;
173 let row_group_id = 0u64;
174 let column_id = u16::MAX as u64;
175 let batch_id = BatchID::from_raw(0);
176 let entry_id = ParquetArrayID::new(file_id, row_group_id, column_id, batch_id);
177
178 assert_eq!(entry_id.file_id_inner(), file_id);
179 assert_eq!(entry_id.row_group_id_inner(), row_group_id);
180 assert_eq!(entry_id.column_id_inner(), column_id);
181 assert_eq!(entry_id.batch_id_inner(), *batch_id as u64);
182 }
183
184 #[test]
185 #[should_panic]
186 fn test_cache_entry_id_new_panic_file_id() {
187 ParquetArrayID::new((u16::MAX as u64) + 1, 0, 0, BatchID::from_raw(0));
188 }
189
190 #[test]
191 #[should_panic]
192 fn test_cache_entry_id_new_panic_row_group_id() {
193 ParquetArrayID::new(0, (u16::MAX as u64) + 1, 0, BatchID::from_raw(0));
194 }
195
196 #[test]
197 #[should_panic]
198 fn test_cache_entry_id_new_panic_column_id() {
199 ParquetArrayID::new(0, 0, (u16::MAX as u64) + 1, BatchID::from_raw(0));
200 }
201
202 #[test]
203 fn test_cache_entry_id_on_disk_path() {
204 let temp_dir = tempdir().unwrap();
205 let cache_root = temp_dir.path();
206 let entry_id = ParquetArrayID::new(1, 2, 3, BatchID::from_raw(4));
207 let expected_path = cache_root
208 .join("file_1")
209 .join("rg_2")
210 .join("col_3")
211 .join("batch_4.liquid");
212 assert_eq!(entry_id.on_disk_path(cache_root), expected_path);
213 }
214
215 #[test]
216 fn test_batch_id_from_row_id() {
217 let batch_id = BatchID::from_row_id(256, 128);
218 assert_eq!(batch_id.v, 2);
219 }
220
221 #[test]
222 fn test_batch_id_from_raw() {
223 let batch_id = BatchID::from_raw(5);
224 assert_eq!(batch_id.v, 5);
225 }
226
227 #[test]
228 fn test_batch_id_inc() {
229 let mut batch_id = BatchID::from_raw(10);
230 batch_id.inc();
231 assert_eq!(batch_id.v, 11);
232 }
233
234 #[test]
235 #[should_panic]
236 fn test_batch_id_inc_overflow() {
237 let mut batch_id = BatchID::from_raw(u16::MAX);
238 batch_id.inc();
240 }
241
242 #[test]
243 fn test_batch_id_deref() {
244 let batch_id = BatchID::from_raw(15);
245 assert_eq!(*batch_id, 15);
246 }
247}