genedex 0.2.2

A small and fast FM-Index implementation
Documentation
pub(crate) trait SliceCompression {
    fn get(idx: usize, slice: &[u8]) -> u8;

    fn set(idx: usize, slice: &mut [u8], value: u8);

    fn transform_chunk_size(chunk_size: usize) -> usize;

    fn transformed_slice_len(slice: &[u8]) -> usize;

    fn iter(slice: &[u8]) -> impl Iterator<Item = u8>;

    fn iter_zero_indices(slice: &[u8]) -> impl Iterator<Item = usize>;
}

pub(crate) struct NoSliceCompression {}

impl SliceCompression for NoSliceCompression {
    fn get(idx: usize, slice: &[u8]) -> u8 {
        slice[idx]
    }

    fn set(idx: usize, slice: &mut [u8], value: u8) {
        slice[idx] = value;
    }

    fn transform_chunk_size(chunk_size: usize) -> usize {
        chunk_size
    }

    fn transformed_slice_len(slice: &[u8]) -> usize {
        slice.len()
    }

    fn iter(slice: &[u8]) -> impl Iterator<Item = u8> {
        slice.iter().copied()
    }

    fn iter_zero_indices(slice: &[u8]) -> impl Iterator<Item = usize> {
        memchr::memchr_iter(0, slice)
    }
}

pub(crate) struct HalfBytesCompression {}

impl SliceCompression for HalfBytesCompression {
    fn get(idx: usize, slice: &[u8]) -> u8 {
        let byte = slice[idx / 2];

        if idx.is_multiple_of(2) {
            unpack_from_left_half_of_byte(byte)
        } else {
            unpack_from_right_half_of_byte(byte)
        }
    }

    fn set(idx: usize, slice: &mut [u8], value: u8) {
        let byte = &mut slice[idx / 2];

        if idx.is_multiple_of(2) {
            pack_into_left_half_of_byte(byte, value);
        } else {
            pack_into_right_half_of_byte(byte, value);
        }
    }

    fn transform_chunk_size(chunk_size: usize) -> usize {
        chunk_size / 2
    }

    fn transformed_slice_len(slice: &[u8]) -> usize {
        slice.len() * 2
    }

    fn iter(slice: &[u8]) -> impl Iterator<Item = u8> {
        slice.iter().flat_map(|&byte| {
            [
                unpack_from_left_half_of_byte(byte),
                unpack_from_right_half_of_byte(byte),
            ]
        })
    }

    fn iter_zero_indices(slice: &[u8]) -> impl Iterator<Item = usize> {
        Self::iter(slice)
            .enumerate()
            .filter_map(|(idx, byte)| if byte == 0 { Some(idx) } else { None })
    }
}

fn pack_into_left_half_of_byte(byte: &mut u8, value: u8) {
    *byte = (value << 4) | (*byte & 0b00001111);
}

fn pack_into_right_half_of_byte(byte: &mut u8, value: u8) {
    *byte = (*byte & 0b11110000) | (value & 0b00001111);
}

fn unpack_from_left_half_of_byte(byte: u8) -> u8 {
    byte >> 4
}

fn unpack_from_right_half_of_byte(byte: u8) -> u8 {
    byte & 0b00001111
}

pub(crate) fn half_byte_compress_text(text: &mut [u8]) {
    for i in 0..text.len() / 2 {
        let mut byte = 0;

        let left = text[i * 2];
        let right = text[i * 2 + 1];

        pack_into_left_half_of_byte(&mut byte, left);
        pack_into_right_half_of_byte(&mut byte, right);

        text[i] = byte;
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_half_byte_compress_text() {
        let text = [
            0b00001101, 0b00000000, 0b00000110, 0b00000011, 0b00001111, 0b00000101,
        ];

        let mut text_copy = text;

        half_byte_compress_text(&mut text_copy);
        let compressed_text = &text_copy[..3];
        let expected_compressed_text = [0b11010000, 0b01100011, 0b11110101];

        assert_eq!(compressed_text, expected_compressed_text);

        for (idx, &expected_symbol) in text.iter().enumerate() {
            assert_eq!(
                expected_symbol,
                HalfBytesCompression::get(idx, compressed_text)
            );
        }

        let mut second_compressed_text = [0u8; 3];

        for (idx, &expected_symbol) in text.iter().enumerate() {
            HalfBytesCompression::set(idx, second_compressed_text.as_mut_slice(), expected_symbol);
        }
        assert_eq!(second_compressed_text, compressed_text);

        let collected: Vec<_> = HalfBytesCompression::iter(compressed_text).collect();
        assert_eq!(collected, text);

        let zero_indices: Vec<_> =
            HalfBytesCompression::iter_zero_indices(compressed_text).collect();
        assert_eq!(zero_indices, [1]);
    }
}