1use std::{io, isize, marker::PhantomData};
2
3use binout::{AsIs, Serializer, VByte};
4use bitm::{bits_to_store, ceiling_div, get_bits57, init_bits57, n_lowest_bits, BitAccess, BitVec};
5use dyn_size_of::GetSize;
6#[cfg(feature = "sux")] use sux::traits::IndexedSeq;
7
8pub trait CompressedArray {
10
11 const MAX_FOR_UNUSED: bool = false;
13
14 fn new(values: Vec<usize>, last_in_value: usize, number_of_keys: usize) -> Self;
16
17 fn get(&self, index: usize) -> usize;
19
20 fn write(&self, output: &mut dyn io::Write) -> io::Result<()>;
22
23 fn write_bytes(&self) -> usize;
25
26 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized;
28}
29
30pub trait CompressedBuilder: Sized {
32 fn new(num_of_values: usize, max_value: usize) -> Self;
33 fn push(&mut self, value: usize);
34
35 #[inline]
36 fn with_all(values: Vec<usize>, last: usize) -> Self {
37 let mut builder = Self::new(values.len(), last);
38 for value in values { builder.push(value); }
39 builder
40 }
41}
42
43#[cfg(feature = "cseq")] pub type CSeqEliasFano = cseq::elias_fano::Sequence<bitm::CombinedSampling<bitm::ConstCombinedSamplingDensity<11>>, bitm::BinaryRankSearch>;
45
46#[cfg(feature = "cseq")]
47impl CompressedBuilder for cseq::elias_fano::Builder {
48 #[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
49 cseq::elias_fano::Builder::new(num_of_values, max_value as u64+1)
50 }
51
52 #[inline] fn push(&mut self, value: usize) {
53 unsafe { self.push_unchecked(value as u64); }
54 }
55}
56
57#[cfg(feature = "cseq")]
58impl CompressedArray for CSeqEliasFano {
59
60 fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
61 cseq::elias_fano::Builder::with_all(values, last).finish_s()
62 }
63
64 #[inline]
65 fn get(&self, index: usize) -> usize {
66 unsafe { self.get_unchecked(index) as usize }
68 }
69
70 fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
71 CSeqEliasFano::write(&self, output)
72 }
73
74 fn write_bytes(&self) -> usize {
75 CSeqEliasFano::write_bytes(&self)
76 }
77
78 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
79 CSeqEliasFano::read_s(input)
80 }
81}
82
83pub struct LinearRegression {
85 multiplier: isize, divider: isize, offset: isize, }
89
90impl LinearRegression {
91 pub fn new(multiplier: usize, divider: usize, values: Vec<usize>) -> (Self, CompactFast) {
94 let mut max_diff = isize::MIN; let mut min_diff = isize::MAX; for (i, v) in values.iter().copied().enumerate() {
97 if v == usize::MAX { continue; }
98 let diff = (i * multiplier) as isize - (v * divider) as isize; if diff > max_diff { max_diff = diff }
100 if diff < min_diff { min_diff = diff }
101 }
102 let regression = LinearRegression {
103 multiplier: multiplier as isize,
104 divider: divider as isize,
105 offset: min_diff
106 };
107 let max_correction = (max_diff - min_diff) as usize / divider;
108 let mut corrections = CompactFastBuilder::new(values.len(), max_correction);
109 for (i, v) in values.iter().copied().enumerate() {
112 if v == usize::MAX {
113 corrections.push(0);
114 } else {
115 let correction = regression.get(i) - v as isize;
116 debug_assert!(correction >= 0);
117 let correction = correction as usize;
118 debug_assert!(correction <= max_correction, "{correction} <= {max_correction}");
119 corrections.push(correction as usize);
120 }
123 }
124 (regression, corrections.compact)
127 }
128
129 #[inline(always)] pub fn get(&self, i: usize) -> isize {
136 (self.multiplier * i as isize - self.offset) / self.divider
137 }
138}
139
140pub trait LinearRegressionConstructor {
141 fn new(values: &[usize], num_of_keys: usize) -> (usize, usize);
143}
144
145pub struct Simple;
146
147impl LinearRegressionConstructor for Simple {
148 #[inline] fn new(values: &[usize], num_of_keys: usize) -> (usize, usize) {
149 (num_of_keys, values.len()+1)
150 }
151}
152
153pub struct LeastSquares;
154
155impl LinearRegressionConstructor for LeastSquares {
156 fn new(values: &[usize], _num_of_keys: usize) -> (usize, usize) {
157 let mut n= 0u128;
158 let mut x_sum = 0;
159 let mut y_sum = 0;
160 let mut x_sqr_sum = 0;
161 let mut xy_sum = 0;
162 for (x, y) in values.iter().copied().enumerate() {
163 if y == usize::MAX { continue; }
164 n += 1;
165 x_sum += x as u128;
166 y_sum += y as u128;
167 x_sqr_sum += (x as u128) * (x as u128);
168 xy_sum += (x as u128) * (y as u128);
169 }
170 if n == 0 { return (1, 1); }
171 let mut multiplier = (n * xy_sum).abs_diff(x_sum * y_sum);
172 let mut divider = (n * x_sqr_sum).abs_diff(x_sum * x_sum);
173 let max_vals = (1<<(isize::BITS-2)) / n;
174 if multiplier > max_vals || divider > max_vals {
175 let div = (multiplier / max_vals).max(divider / max_vals);
176 let divh = div / 2;
177 multiplier = (multiplier + divh) / div;
178 divider = (divider + divh) / div;
179 }
180 (multiplier as usize, divider as usize)
181 }
182}
183
184pub struct LinearRegressionArray<C> {
187 regression: LinearRegression,
188 corrections: CompactFast,
189 constructor: PhantomData<C>
190}
191
192impl<C: LinearRegressionConstructor> CompressedArray for LinearRegressionArray<C> {
193 const MAX_FOR_UNUSED: bool = true;
194
195 fn new(values: Vec<usize>, _last: usize, num_of_keys: usize) -> Self {
196 let (multiplier, divider) = C::new(&values, num_of_keys);
197 let (regression, corrections) = LinearRegression::new(multiplier, divider, values);
198 Self { regression, corrections, constructor: PhantomData }
199 }
200
201 fn get(&self, index: usize) -> usize {
202 (self.regression.get(index) - self.corrections.get(index) as isize) as usize
203 }
205
206 fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
207 AsIs::write(output, self.regression.multiplier as usize)?;
208 AsIs::write(output, self.regression.divider as usize)?;
209 AsIs::write(output, self.regression.offset as usize)?;
210 self.corrections.write(output)
211 }
212
213 fn write_bytes(&self) -> usize {
214 AsIs::size(self.regression.multiplier as usize)
215 + AsIs::size(self.regression.divider as usize)
216 + AsIs::size(self.regression.offset as usize)
217 + self.corrections.write_bytes()
218 }
219
220 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
222 let multiplier: usize = AsIs::read(input)?;
223 let divider: usize = AsIs::read(input)?;
224 let offset: usize = AsIs::read(input)?;
225 let corrections = CompactFast::read(input)?;
226 Ok(Self {
227 regression: LinearRegression {
228 multiplier: multiplier as isize,
229 divider: divider as isize,
230 offset: offset as isize,
231 },
232 corrections,
233 constructor: PhantomData
234 })
235 }
236}
237
238impl<C> GetSize for LinearRegressionArray<C> {
239 fn size_bytes_dyn(&self) -> usize { self.corrections.size_bytes_dyn() }
240 fn size_bytes_content_dyn(&self) -> usize { self.corrections.size_bytes_dyn() }
241 const USES_DYN_MEM: bool = true;
242}
243
244pub struct Compact {
246 pub items: Box<[u64]>,
247 pub item_size: u8,
248}
249
250pub struct CompactBuilder {
251 compact: Compact,
252 index: usize
253}
254
255impl CompressedBuilder for CompactBuilder {
256 fn new(num_of_values: usize, max_value: usize) -> Self {
257 let item_size = bits_to_store(max_value as u64);
258 Self {
259 compact: Compact { items: Box::with_zeroed_bits(item_size as usize * num_of_values), item_size },
260 index: 0
261 }
262 }
263
264 #[inline] fn push(&mut self, value: usize) {
265 self.compact.items.init_successive_bits(&mut self.index, value as u64, self.compact.item_size);
266 }
267}
268
269impl GetSize for Compact {
270 fn size_bytes_dyn(&self) -> usize { self.items.size_bytes_dyn() }
271 fn size_bytes_content_dyn(&self) -> usize { self.items.size_bytes_dyn() }
272 const USES_DYN_MEM: bool = true;
273}
274
275impl CompressedArray for Compact {
276 fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
277 CompactBuilder::with_all(values, last).compact
278 }
279
280 #[inline]
281 fn get(&self, index: usize) -> usize {
282 unsafe { self.items.get_fragment_unchecked(index, self.item_size) as usize }
283 }
284
285 fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
286 VByte::write(output, self.item_size)?;
287 AsIs::write_array(output, &self.items)
288 }
289
290 fn write_bytes(&self) -> usize {
291 VByte::size(self.item_size) + AsIs::array_size(&self.items)
292 }
293
294 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
295 let item_size = VByte::read(input)?;
296 let items = AsIs::read_array(input)?;
297 Ok(Self { items, item_size })
298 }
299}
300
301
302pub struct CompactFast {
305 pub items: Box<[u8]>,
306 pub item_size: u8,
307}
308
309pub struct CompactFastBuilder {
310 compact: CompactFast,
311 first_bit: usize,
312}
313
314impl CompressedBuilder for CompactFastBuilder {
315 #[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
316 let item_size = bits_to_store(max_value as u64);
317 Self {
318 compact: CompactFast { items: vec![0; ceiling_div(item_size as usize * num_of_values, 8) + 7].into_boxed_slice(), item_size },
319 first_bit: 0,
320 }
321 }
322
323 #[inline] fn push(&mut self, value: usize) {
324 unsafe{init_bits57(self.compact.items.as_mut_ptr(), self.first_bit, value as u64)};
325 self.first_bit += self.compact.item_size as usize;
326 }
327}
328
329impl GetSize for CompactFast {
330 fn size_bytes_dyn(&self) -> usize { self.items.size_bytes_dyn() }
331 fn size_bytes_content_dyn(&self) -> usize { self.items.size_bytes_dyn() }
332 const USES_DYN_MEM: bool = true;
333}
334
335impl CompressedArray for CompactFast {
336 fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
337 CompactFastBuilder::with_all(values, last).compact
338 }
339
340 #[inline]
341 fn get(&self, index: usize) -> usize {
342 (unsafe { get_bits57(self.items.as_ptr(), index * self.item_size as usize) & n_lowest_bits(self.item_size) }) as usize
343 }
344
345 #[inline]
346 fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
347 VByte::write(output, self.item_size)?;
348 AsIs::write_array(output, &self.items)
349 }
350
351 fn write_bytes(&self) -> usize {
352 VByte::size(self.item_size) + AsIs::array_size(&self.items)
353 }
354
355 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
356 let item_size = VByte::read(input)?;
357 let items = AsIs::read_array(input)?;
358 Ok(Self { items, item_size })
359 }
360}
361
362
363#[cfg(feature = "sux")] pub struct SuxEliasFano(sux::dict::elias_fano::EfSeq);
365
366#[cfg(feature = "sux")] impl CompressedBuilder for sux::dict::EliasFanoBuilder {
367 #[inline] fn new(num_of_values: usize, max_value: usize) -> Self {
368 sux::dict::EliasFanoBuilder::new(num_of_values, max_value)
369 }
370
371 #[inline] fn push(&mut self, value: usize) {
372 unsafe{ self.push_unchecked(value); }
373 }
374}
375
376#[cfg(feature = "sux")]
377impl CompressedArray for SuxEliasFano {
378 fn new(values: Vec<usize>, last: usize, _num_of_keys: usize) -> Self {
379 SuxEliasFano(sux::dict::EliasFanoBuilder::with_all(values, last).build_with_seq())
380 }
381
382 #[inline]
383 fn get(&self, index: usize) -> usize {
384 unsafe { self.0.get_unchecked(index) }
385 }
386
387 fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
388 use std::io::Write;
389
390 use epserde::ser::Serialize;
391 let mut bw = std::io::BufWriter::new(output);
392 match unsafe {self.0.serialize(&mut bw)} {
393 Ok(_) => Ok(()),
394 Err(epserde::ser::Error::FileOpenError(io_err)) => Err(io_err),
395 Err(e) => Err(io::Error::new(io::ErrorKind::Other, e))
396 }?;
397 bw.flush()
398 }
399
400 fn write_bytes(&self) -> usize {
401 let mut buf = Vec::<u8>::new();
402 use epserde::ser::Serialize;
403 unsafe { self.0.serialize(&mut buf).unwrap(); }
404 buf.len()
405 }
406
407 fn read(input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
408 use epserde::deser::Deserialize;
409 match unsafe{ sux::dict::elias_fano::EfSeq::deserialize_full(&mut std::io::BufReader::with_capacity(1, input))} {
410 Ok(v) => Ok(Self(v)),
411 Err(epserde::deser::Error::FileOpenError(io_err)) => Err(io_err),
412 Err(e) => Err(io::Error::new(io::ErrorKind::Other, e))
413 }
414 }
415}
416
417#[cfg(feature = "sux")]
418impl GetSize for SuxEliasFano {
419 fn size_bytes_dyn(&self) -> usize {
420 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default()) - std::mem::size_of_val(self)
421 }
422
423 fn size_bytes_content_dyn(&self) -> usize {
424 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default() | mem_dbg::SizeFlags::CAPACITY) - std::mem::size_of_val(self)
425 }
426
427 fn size_bytes(&self) -> usize {
428 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default())
429 }
430
431 const USES_DYN_MEM: bool = true;
432}
433
434#[cfg(feature = "cacheline-ef")]
435pub struct CachelineEF(cacheline_ef::CachelineEfVec);
437
438#[cfg(feature = "cacheline-ef")]
439impl CompressedArray for CachelineEF {
440 fn new(values: Vec<usize>, _last: usize, _num_of_keys: usize) -> Self {
441 let v: Vec<_> = values.iter().map(|v| *v as u64).collect();
442 CachelineEF(cacheline_ef::CachelineEfVec::new(&v))
443 }
444
445 fn get(&self, index: usize) -> usize {
447 unsafe { self.0.index_unchecked(index) as usize }
448 }
450
451 fn write(&self, _output: &mut dyn io::Write) -> io::Result<()> {
452 todo!()
453 }
454
455 fn write_bytes(&self) -> usize {
456 todo!()
457 }
458
459 fn read(_input: &mut dyn io::Read) -> io::Result<Self> where Self: Sized {
460 todo!()
461 }
462}
463
464#[cfg(feature = "cacheline-ef")]
465impl GetSize for CachelineEF {
466 fn size_bytes_dyn(&self) -> usize {
467 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default()) - std::mem::size_of_val(self)
468 }
469
470 fn size_bytes_content_dyn(&self) -> usize {
471 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default() | mem_dbg::SizeFlags::CAPACITY) - std::mem::size_of_val(self)
472 }
473
474 fn size_bytes(&self) -> usize {
475 mem_dbg::MemSize::mem_size(&self.0, mem_dbg::SizeFlags::default())
476 }
477
478 const USES_DYN_MEM: bool = true;
479}
480
481#[cfg(feature = "sux")] pub type DefaultCompressedArray = SuxEliasFano;
482#[cfg(all(feature = "cacheline-ef", not(feature = "sux")))] pub type DefaultCompressedArray = CachelineEF;
483#[cfg(all(feature = "cseq", not(feature = "sux"), not(feature="cacheline-ef")))] pub type DefaultCompressedArray = CSeqEliasFano;
484#[cfg(all(not(feature="cseq"), not(feature = "sux"), not(feature="cacheline-ef")))] pub type DefaultCompressedArray = Compact;