bustools_core 0.16.2

Interacting with the kallisto/bus format of scRNAseq data
Documentation

/// Run length encoder/decoder
/// It only encodes the runlength of a special item (rle_val), all other values are encoded as is
/// Encoding will yield a stream of u64s, encoding either (RLE_item, runlength) or (items)
pub struct RunlengthCodec {
    pub rle_val : u64,
    pub shift_up_1: bool //whether to shift encoded values +1; usefull in conjunction with fib-enc
}

impl RunlengthCodec {

    /// encoding a single item. really just about shifting the value if needed
    fn encode_literal(&self, lit: u64) -> u64 {
        if self.shift_up_1{ 
            lit+1 
        } else { 
            lit
        }
    }

    /// decoding a single item. really just about shifting the value if needed
    fn decode_literal(&self, item: u64) -> u64 {
        if self.shift_up_1 {
             item -1
        } else {
            item
        }
    }

    pub fn encode(&self, input: impl Iterator<Item=u64>) -> Vec<u64>{
        let mut encoded: Vec<u64> = Vec::new();
        let mut runlen = 0;

        for x in input {
            if x == self.rle_val {
                runlen += 1;
            }
            else {
                // finish the previous run and encode it
                // we're encoding a runlength (char 0 for x times)
                if runlen > 0 {
                    // encode the value
                    let val = self.encode_literal(self.rle_val);
                    encoded.push(val);
                    // encode the runlength
                    encoded.push(runlen);
                    // reset runlength
                    runlen = 0;
                }
                // we're just encoding a single value
                encoded.push(
                    self.encode_literal(x)
                );
            }
        }
        // aftermath
        if runlen >0 {
            // encode the value
            let val = self.encode_literal(self.rle_val);
            encoded.push(val);  
            // encode the runlength
            encoded.push(runlen);    
        }
        encoded
    }

    
    pub fn decode(&self, input: Vec<u64>) -> Vec<u64>{
        let mut decoded = Vec::with_capacity(input.len());  // its going to be AT LEAST as long as the input
        let mut iii = input.iter();

        while let Some(&item) = iii.next() {
            // if there's still some item in the stream
            // println!("{}", item);
            let adjusted_item = self.decode_literal(item);

            if adjusted_item == (self.rle_val) {
                let runlen = *iii.next().unwrap(); // this shouldnt fail, each RLE is followed by runlen
                for _ in 0..runlen {
                    decoded.push(adjusted_item);
                }
            } else {
                decoded.push(adjusted_item);
            }   
        }
        decoded
    }
}

#[cfg(test)]
mod test {
    use crate::busz::runlength_codec::RunlengthCodec;

    #[test]
    fn test_encode_decode_single_run(){
        // a CODEC which compresses runs of thevalue 0
        let c = RunlengthCodec { rle_val: 0, shift_up_1: true };

        let plain = vec![0,0,0];
        let enc = c.encode(plain.clone().into_iter());
        assert!(plain.len()> enc.len());

        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }
    #[test]
    fn test_encode_decode_no_run(){
        // a CODEC which compresses runs of thevalue 0
        let c = RunlengthCodec { rle_val: 0, shift_up_1: true };
        let plain = vec![1,2,1];
        let enc = c.encode(plain.clone().into_iter());
        assert!(plain.len()== enc.len()); // cant be compressed

        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }
    #[test]
    fn test_encode_decode_minxed_run(){
        // a CODEC which compresses runs of thevalue 0
        let c = RunlengthCodec { rle_val: 0, shift_up_1: true };
        let plain = vec![1,0,0, 2,0,1,0];
        let enc = c.encode(plain.clone().into_iter());
        assert!(plain.len()< enc.len()); // here the encoding is acually longer! dueto the freqeunt 1-runs of zeros

        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }

    #[test]
    fn test_encode_decode_minxed_run_rle1(){
        // a CODEC which compresses runs of thevalue 0
        let c = RunlengthCodec { rle_val: 1, shift_up_1: true };
        let plain = vec![0,1,1, 1,1, 0];
        let enc = c.encode(plain.clone().into_iter());
        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }

    #[test]
    fn test_encode_decode_minxed_run_rle1_1(){
        let c = RunlengthCodec { rle_val: 1 , shift_up_1: true};
        let plain = vec![0,1,1,];
        let enc = c.encode(plain.clone().into_iter());
        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }


    #[test]
    fn test_encode_decode_no_shift(){
        let c = RunlengthCodec { rle_val: 0 , shift_up_1: false};
        let plain = vec![0,0,1,1,];
        let enc = c.encode(plain.clone().into_iter());
        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }
    #[test]
    fn test_encode_decode_no_shift_2(){
        let c = RunlengthCodec { rle_val: 1 , shift_up_1: false};
        let plain = vec![0,0,1,1,];
        let enc = c.encode(plain.clone().into_iter());
        let dec = c.decode(enc);
        assert_eq!(plain, dec)
    }

}