simple_sds_sbwt/
int_vector.rs

1//! A bit-packed integer vector storing fixed-width integers.
2
3use crate::ops::{Vector, Resize, Pack, Access, AccessIter, Push, Pop};
4use crate::raw_vector::{RawVector, RawVectorWriter, AccessRaw, PushRaw, PopRaw};
5#[cfg(not(target_family = "wasm"))]
6use crate::raw_vector::RawVectorMapper;
7use crate::serialize::Serialize;
8#[cfg(not(target_family = "wasm"))]
9use crate::serialize::{MemoryMap, MemoryMapped};
10use crate::bits;
11
12use std::io::{Error, ErrorKind};
13use std::iter::{FusedIterator, FromIterator};
14use std::path::Path;
15use std::io;
16
17#[cfg(test)]
18mod tests;
19
20//-----------------------------------------------------------------------------
21
22/// A contiguous growable and mutable bit-packed array of fixed-width integers.
23///
24/// This structure contains [`RawVector`], which is in turn contains [`Vec`].
25/// Each item consists of the lowest 1 to 64 bits of a [`u64`] value, as specified by parameter `width`.
26/// The maximum length of the vector is `usize::MAX / width` items.
27///
28/// A default constructed `IntVector` has `width == 64`.
29/// `IntVector` can be built from an iterator over [`u8`], [`u16`], [`u32`], [`u64`], or [`usize`] values.
30///
31/// `IntVector` implements the following `simple_sds` traits:
32/// * Basic functionality: [`Vector`], [`Resize`], [`Pack`]
33/// * Queries and operations: [`Access`], [`Push`], [`Pop`]
34/// * Serialization: [`Serialize`]
35///
36/// # Notes
37///
38/// * `IntVector` never panics from I/O errors.
39#[derive(Clone, Debug, PartialEq, Eq)]
40pub struct IntVector {
41    len: usize,
42    width: usize,
43    data: RawVector,
44}
45
46impl IntVector {
47    /// Creates an empty vector with specified width.
48    /// 
49    /// Returns [`Err`] if the width is invalid.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// use simple_sds_sbwt::int_vector::IntVector;
55    /// use simple_sds_sbwt::ops::Vector;
56    ///
57    /// let v = IntVector::new(13).unwrap();
58    /// assert!(v.is_empty());
59    /// assert_eq!(v.width(), 13);
60    /// ```
61    pub fn new(width: usize) -> Result<IntVector, &'static str> {
62        if width == 0 || width > bits::WORD_BITS {
63            Err("Integer width must be 1 to 64 bits")
64        }
65        else {
66            Ok(IntVector {
67                len: 0,
68                width,
69                data: RawVector::new(),
70            })
71        }
72    }
73
74    /// Creates an initialized vector of specified length and width.
75    /// 
76    /// Returns [`Err`] if the width is invalid.
77    ///
78    /// # Arguments
79    ///
80    /// * `len`: Number of items in the vector.
81    /// * `width`: Width of each item in bits.
82    /// * `value`: Initialization value.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use simple_sds_sbwt::int_vector::IntVector;
88    /// use simple_sds_sbwt::ops::{Vector, Access};
89    ///
90    /// let v = IntVector::with_len(4, 13, 1234).unwrap();
91    /// assert_eq!(v.len(), 4);
92    /// assert_eq!(v.width(), 13);
93    /// for i in 0..v.len() {
94    ///     assert_eq!(v.get(i), 1234);
95    /// }
96    /// ```
97    ///
98    /// # Panics
99    ///
100    /// May panic if the vector would exceed the maximum length.
101    pub fn with_len(len: usize, width: usize, value: u64) -> Result<IntVector, &'static str> {
102        if width == 0 || width > bits::WORD_BITS {
103            return Err("Integer width must be 1 to 64 bits");
104        }
105        let mut data = RawVector::with_capacity(len * width);
106        for _ in 0..len {
107            unsafe { data.push_int(value, width); }
108        }
109        Ok(IntVector {
110            len, width, data,
111        })
112    }
113
114    /// Creates an empty vector with enough capacity for at least the specified number of items of specified width.
115    ///
116    /// Returns [`Err`] if the width is invalid.
117    ///
118    /// # Arguments
119    ///
120    /// * `capacity`: Minimum capacity of the vector in items.
121    /// * `width`: Width of each item in bits.
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use simple_sds_sbwt::int_vector::IntVector;
127    /// use simple_sds_sbwt::ops::{Vector, Resize};
128    ///
129    /// let v = IntVector::with_capacity(4, 13).unwrap();
130    /// assert!(v.is_empty());
131    /// assert_eq!(v.width(), 13);
132    /// assert!(v.capacity() >= 4);
133    /// ```
134    ///
135    /// # Panics
136    ///
137    /// May panic if the capacity would exceed the maximum length.
138    pub fn with_capacity(capacity: usize, width: usize) -> Result<IntVector, &'static str> {
139        if width == 0 || width > bits::WORD_BITS {
140            Err("Integer width must be 1 to 64 bits")
141        } else {
142            Ok(IntVector {
143                len: 0,
144                width,
145                data: RawVector::with_capacity(capacity * width),
146            })
147        }
148    }
149
150    /// Returns the size of a serialized vector with the given parameters in [`u64`] elements.
151    ///
152    /// # Arguments
153    ///
154    /// * `capacity`: Minimum capacity of the vector in items.
155    /// * `width`: Width of each item in bits.
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// use simple_sds_sbwt::int_vector::IntVector;
161    ///
162    /// assert_eq!(IntVector::size_by_params(12, 31), 10);
163    /// ```
164    ///
165    /// # Panics
166    ///
167    /// May panic if the vector would exceed the maximum length.
168    pub fn size_by_params(capacity: usize, width: usize) -> usize {
169        2 + RawVector::size_by_params(capacity * width)
170    }
171}
172
173//-----------------------------------------------------------------------------
174
175impl Vector for IntVector {
176    type Item = u64;
177
178    #[inline]
179    fn len(&self) -> usize {
180        self.len
181    }
182
183    #[inline]
184    fn width(&self) -> usize {
185        self.width
186    }
187
188    #[inline]
189    fn max_len(&self) -> usize {
190        usize::MAX / self.width()
191    }
192}
193
194impl Resize for IntVector {
195    fn resize(&mut self, new_len: usize, value: <Self as Vector>::Item) {
196        match new_len {
197            new_len if new_len > self.len() => {
198                self.reserve(new_len - self.len());
199                while self.len() < new_len {
200                    self.push(value);
201                }
202            },
203            new_len if new_len < self.len() => {
204                self.data.resize(new_len * self.width(), false);
205                self.len = new_len;
206            },
207            _ => (),
208        }
209    }
210
211    fn clear(&mut self) {
212        self.data.clear();
213        self.len = 0;
214    }
215
216    #[inline]
217    fn capacity(&self) -> usize {
218        self.data.capacity() / self.width()
219    }
220
221    fn reserve(&mut self, additional: usize) {
222        self.data.reserve(additional * self.width());
223    }
224}
225
226impl Pack for IntVector {
227    fn pack(&mut self) {
228        if self.is_empty() {
229            return;
230        }
231        let new_width = bits::bit_len(self.iter().max().unwrap());
232        if new_width == self.width() {
233            return;
234        }
235        let mut new_data = RawVector::with_capacity(self.len() * new_width);
236        for value in self.iter() {
237            unsafe { new_data.push_int(value, new_width); }
238        }
239        self.width = new_width;
240        self.data = new_data;
241    }
242}
243
244impl<'a> Access<'a> for IntVector {
245    type Iter = AccessIter<'a, Self>;
246
247    #[inline]
248    fn get(&self, index: usize) -> <Self as Vector>::Item {
249        assert!(index < self.len(), "Index is out of bounds");
250        unsafe { self.data.int(index * self.width(), self.width()) }
251    }
252
253    fn iter(&'a self) -> Self::Iter {
254        Self::Iter::new(self)
255    }
256
257    #[inline]
258    fn is_mutable(&self) -> bool {
259        true
260    }
261
262    #[inline]
263    fn set(&mut self, index: usize, value: <Self as Vector>::Item) {
264        assert!(index < self.len(), "Index is out of bounds");
265        unsafe { self.data.set_int(index * self.width(), value, self.width()); }
266    }
267}
268
269impl Push for IntVector {
270    #[inline]
271    fn push(&mut self, value: <Self as Vector>::Item) {
272        unsafe { self.data.push_int(value, self.width()); }
273        self.len += 1;
274    }    
275}
276
277impl Pop for IntVector {
278    #[inline]
279    fn pop(&mut self) -> Option<<Self as Vector>::Item> {
280        if self.len() > 0 {
281            self.len -= 1;
282        }
283        unsafe { self.data.pop_int(self.width()) }
284    }    
285}
286
287impl Serialize for IntVector {
288    fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
289        self.len.serialize(writer)?;
290        self.width.serialize(writer)?;
291        self.data.serialize_header(writer)?;
292        Ok(())
293    }
294
295    fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
296        self.data.serialize_body(writer)?;
297        Ok(())
298    }
299
300    fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
301        let len = usize::load(reader)?;
302        let width = usize::load(reader)?;
303        let data = RawVector::load(reader)?;
304        if len * width != data.len() {
305            Err(Error::new(ErrorKind::InvalidData, "Data length does not match len * width"))
306        }
307        else {
308            Ok(IntVector {
309                len, width, data,
310            })
311        }
312    }
313
314    fn size_in_elements(&self) -> usize {
315        self.len.size_in_elements() + self.width.size_in_elements() + self.data.size_in_elements()
316    }
317}
318
319//-----------------------------------------------------------------------------
320
321impl Default for IntVector {
322    fn default() -> Self {
323        IntVector {
324            len: 0,
325            width: bits::WORD_BITS,
326            data: RawVector::new(),
327        }
328    }
329}
330
331impl AsRef<RawVector> for IntVector {
332    #[inline]
333    fn as_ref(&self) -> &RawVector {
334        &(self.data)
335    }
336}
337
338impl From<IntVector> for RawVector {
339    fn from(source: IntVector) -> Self {
340        source.data
341    }
342}
343
344//-----------------------------------------------------------------------------
345
346/// [`IntVector`] transformed into an iterator.
347///
348/// The type of `Item` is [`u64`].
349///
350/// # Examples
351///
352/// ```
353/// use simple_sds_sbwt::int_vector::IntVector;
354///
355/// let source: Vec<u64> = vec![1, 3, 15, 255, 65535];
356/// let v: IntVector = source.iter().cloned().collect();
357/// let target: Vec<u64> = v.into_iter().collect();
358/// assert_eq!(target, source);
359/// ```
360#[derive(Clone, Debug)]
361pub struct IntoIter {
362    parent: IntVector,
363    index: usize,
364}
365
366impl Iterator for IntoIter {
367    type Item = <IntVector as Vector>::Item;
368
369    fn next(&mut self) -> Option<Self::Item> {
370        if self.index >= self.parent.len() {
371            None
372        } else {
373            let result = Some(self.parent.get(self.index));
374            self.index += 1;
375            result
376        }
377    }
378
379    #[inline]
380    fn size_hint(&self) -> (usize, Option<usize>) {
381        let remaining = self.parent.len() - self.index;
382        (remaining, Some(remaining))
383    }
384}
385
386impl ExactSizeIterator for IntoIter {}
387
388impl FusedIterator for IntoIter {}
389
390impl IntoIterator for IntVector {
391    type Item = <Self as Vector>::Item;
392    type IntoIter = IntoIter;
393
394    fn into_iter(self) -> Self::IntoIter {
395        IntoIter {
396            parent: self,
397            index: 0,
398        }
399    }
400}
401
402//-----------------------------------------------------------------------------
403
404/// A buffered file writer compatible with the serialization format of [`IntVector`].
405///
406/// When the writer goes out of scope, the internal buffer is flushed, the file is closed, and all errors are ignored.
407/// Call [`IntVectorWriter::close`] explicitly to handle the errors.
408///
409/// # Examples
410///
411/// ```
412/// use simple_sds_sbwt::int_vector::{IntVector, IntVectorWriter};
413/// use simple_sds_sbwt::ops::{Vector, Access, Push};
414/// use simple_sds_sbwt::serialize;
415/// use std::fs;
416///
417/// let filename = serialize::temp_file_name("int-vector-writer");
418/// let mut writer = IntVectorWriter::new(&filename, 13).unwrap();
419/// assert!(writer.is_empty());
420/// writer.push(123); writer.push(456); writer.push(789);
421/// assert_eq!(writer.len(), 3);
422/// writer.close().unwrap();
423///
424/// let v: IntVector = serialize::load_from(&filename).unwrap();
425/// assert_eq!(v.len(), 3);
426/// assert_eq!(v.get(0), 123); assert_eq!(v.get(1), 456); assert_eq!(v.get(2), 789);
427/// fs::remove_file(&filename).unwrap();
428/// ```
429#[derive(Debug)]
430pub struct IntVectorWriter {
431    len: usize,
432    width: usize,
433    writer: RawVectorWriter,
434}
435
436impl IntVectorWriter {
437    /// Creates an empty vector stored in the specified file with the default buffer size.
438    ///
439    /// If the file already exists, it will be overwritten.
440    ///
441    /// # Arguments
442    ///
443    /// * `filename`: Name of the file.
444    /// * `width`: Width of each item in bits.
445    ///
446    /// # Errors
447    ///
448    /// Returns an error of the kind [`ErrorKind::Other`] if the width is invalid.
449    /// Any I/O errors will be passed through.
450    pub fn new<P: AsRef<Path>>(filename: P, width: usize) -> io::Result<IntVectorWriter> {
451        if width == 0 || width > bits::WORD_BITS {
452            return Err(Error::new(ErrorKind::Other, "Integer width must be 1 to 64 bits"));
453        }
454        // The header will contain `len` and `width`.
455        let mut header: Vec<u64> = vec![0, 0];
456        let writer = RawVectorWriter::new(filename, &mut header)?;
457        let result = IntVectorWriter {
458            len: 0,
459            width,
460            writer,
461        };
462        Ok(result)
463    }
464
465    /// Creates an empty vector stored in the specified file with user-defined buffer size.
466    ///
467    /// If the file already exists, it will be overwritten.
468    /// Actual buffer size may be slightly higher than requested.
469    ///
470    /// # Arguments
471    ///
472    /// * `filename`: Name of the file.
473    /// * `width`: Width of each item in bits.
474    /// * `buf_len`: Buffer size in items.
475    ///
476    /// # Errors
477    ///
478    /// Returns an error of the kind [`ErrorKind::Other`] if the width is invalid.
479    /// Any I/O errors will be passed through.
480    ///
481    /// # Panics
482    ///
483    /// May panic if buffer length would exceed the maximum length.
484    pub fn with_buf_len<P: AsRef<Path>>(filename: P, width: usize, buf_len: usize) -> io::Result<IntVectorWriter> {
485        if width == 0 || width > bits::WORD_BITS {
486            return Err(Error::new(ErrorKind::Other, "Integer width must be 1 to 64 bits"));
487        }
488        // The header will contain `len` and `width`.
489        let mut header: Vec<u64> = vec![0, 0];
490        let writer = RawVectorWriter::with_buf_len(filename, &mut header, buf_len * width)?;
491        let result = IntVectorWriter {
492            len: 0,
493            width,
494            writer,
495        };
496        Ok(result)
497    }
498
499    /// Returns the name of the file.
500    pub fn filename(&self) -> &Path {
501        self.writer.filename()
502    }
503
504    /// Returns `true` if the file is open for writing.
505    pub fn is_open(&self) -> bool {
506        self.writer.is_open()
507    }
508
509    /// Flushes the buffer, writes the header, and closes the file.
510    ///
511    /// No effect if the file is closed.
512    ///
513    /// # Errors
514    ///
515    /// Any I/O errors will be passed through.
516    pub fn close(&mut self) -> io::Result<()> {
517        let mut header: Vec<u64> = vec![self.len as u64, self.width as u64];
518        self.writer.close_with_header(&mut header)
519    }
520}
521
522//-----------------------------------------------------------------------------
523
524impl Vector for IntVectorWriter {
525    type Item = u64;
526
527    #[inline]
528    fn len(&self) -> usize {
529        self.len
530    }
531
532    #[inline]
533    fn width(&self) -> usize {
534        self.width
535    }
536
537    #[inline]
538    fn max_len(&self) -> usize {
539        usize::MAX / self.width()
540    }
541}
542
543impl Push for IntVectorWriter {
544    #[inline]
545    fn push(&mut self, value: <Self as Vector>::Item) {
546        unsafe { self.writer.push_int(value, self.width()); }
547        self.len += 1;
548    }
549}
550
551impl Drop for IntVectorWriter {
552    fn drop(&mut self) {
553        let _ = self.close();
554    }
555}
556
557//-----------------------------------------------------------------------------
558
559/// An immutable memory-mapped [`IntVector`].
560///
561/// This is compatible with the serialization format of [`IntVector`].
562///
563/// # Examples
564///
565/// ```
566/// use simple_sds_sbwt::int_vector::{IntVector, IntVectorMapper};
567/// use simple_sds_sbwt::ops::{Vector, Access};
568/// use simple_sds_sbwt::serialize::{MemoryMap, MemoryMapped, MappingMode};
569/// use simple_sds_sbwt::serialize;
570/// use std::fs;
571///
572/// let filename = serialize::temp_file_name("int-vector-mapper");
573/// let mut v = IntVector::with_len(3, 13, 0).unwrap();
574/// v.set(0, 123); v.set(1, 456); v.set(2, 789);
575/// serialize::serialize_to(&v, &filename);
576///
577/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
578/// let mapper = IntVectorMapper::new(&map, 0).unwrap();
579/// assert_eq!(mapper.len(), v.len());
580/// for i in 0..mapper.len() {
581///     assert_eq!(mapper.get(i), v.get(i));
582/// }
583///
584/// drop(mapper); drop(map);
585/// fs::remove_file(&filename).unwrap();
586/// ```
587#[cfg(not(target_family = "wasm"))]
588#[derive(PartialEq, Eq, Debug)]
589pub struct IntVectorMapper<'a> {
590    len: usize,
591    width: usize,
592    data: RawVectorMapper<'a>,
593}
594
595#[cfg(not(target_family = "wasm"))]
596impl<'a> Vector for IntVectorMapper<'a> {
597    type Item = u64;
598
599    #[inline]
600    fn len(&self) -> usize {
601        self.len
602    }
603
604    #[inline]
605    fn width(&self) -> usize {
606        self.width
607    }
608
609    #[inline]
610    fn max_len(&self) -> usize {
611        usize::MAX / self.width()
612    }
613}
614
615#[cfg(not(target_family = "wasm"))]
616impl<'a> Access<'a> for IntVectorMapper<'a> {
617    type Iter = AccessIter<'a, Self>;
618
619    #[inline]
620    fn get(&self, index: usize) -> <Self as Vector>::Item {
621        assert!(index < self.len(), "Index is out of bounds");
622        unsafe { self.data.int(index * self.width(), self.width()) }
623    }
624
625    fn iter(&'a self) -> Self::Iter {
626        Self::Iter::new(self)
627    }
628}
629
630#[cfg(not(target_family = "wasm"))]
631impl<'a> MemoryMapped<'a> for IntVectorMapper<'a> {
632    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
633        if offset + 1 >= map.len() {
634            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
635        }
636        let slice: &[u64] = map.as_ref();
637        let len = slice[offset] as usize;
638        let width = slice[offset + 1] as usize;
639        let data = RawVectorMapper::new(map, offset + 2)?;
640        Ok(IntVectorMapper {
641            len, width, data,
642        })
643    }
644
645    fn map_offset(&self) -> usize {
646        self.data.map_offset() - 2
647    }
648
649    fn map_len(&self) -> usize {
650        self.data.map_len() + 2
651    }
652}
653
654#[cfg(not(target_family = "wasm"))]
655impl<'a> AsRef<RawVectorMapper<'a>> for IntVectorMapper<'a> {
656    #[inline]
657    fn as_ref(&self) -> &RawVectorMapper<'a> {
658        &(self.data)
659    }
660}
661
662//-----------------------------------------------------------------------------
663
664macro_rules! from_extend_int_vector {
665    ($t:ident, $w:expr) => {
666        impl From<Vec<$t>> for IntVector {
667            fn from(v: Vec<$t>) -> Self {
668                let mut result = IntVector::with_capacity(v.len(), $w).unwrap();
669                result.extend(v);
670                result
671            }
672        }
673
674        impl FromIterator<$t> for IntVector {
675            fn from_iter<I: IntoIterator<Item = $t>>(iter: I) -> Self {
676                let mut result = IntVector::new($w).unwrap();
677                result.extend(iter);
678                result
679            }
680        }
681
682        impl Extend<$t> for IntVector {
683            fn extend<I: IntoIterator<Item = $t>>(&mut self, iter: I) {
684                let mut iter = iter.into_iter();
685                let (lower_bound, _) = iter.size_hint();
686                self.reserve(lower_bound);
687                while let Some(value) = iter.next() {
688                    self.push(value as <Self as Vector>::Item);
689                }
690            }
691        }
692
693        impl Extend<$t> for IntVectorWriter {
694            fn extend<I: IntoIterator<Item = $t>>(&mut self, iter: I) {
695                for value in iter {
696                    self.push(value as <Self as Vector>::Item);
697                }
698            }
699        }
700    }
701}
702
703from_extend_int_vector!(u8, 8);
704from_extend_int_vector!(u16, 16);
705from_extend_int_vector!(u32, 32);
706from_extend_int_vector!(u64, 64);
707from_extend_int_vector!(usize, 64);
708
709//-----------------------------------------------------------------------------