bio_seq/seq/
slice.rs

1// Copyright 2021-2024 Jeff Knaggs
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use crate::codec::Codec;
7use crate::error::ParseBioError;
8use crate::seq::Seq;
9use crate::{
10    Complement, ComplementMut, Reverse, ReverseComplement, ReverseComplementMut, ReverseMut,
11};
12
13use crate::Bs;
14use bitvec::field::BitField;
15
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::marker::PhantomData;
19use core::str;
20
21use core::ops::{BitAnd, BitOr};
22
23/// An unsized, read-only window into part of a sequence
24#[derive(Debug, Eq)]
25#[repr(transparent)]
26pub struct SeqSlice<A: Codec> {
27    pub(crate) _p: PhantomData<A>,
28    pub(crate) bs: Bs,
29}
30
31impl<A: Codec> TryFrom<&SeqSlice<A>> for usize {
32    type Error = ParseBioError;
33
34    fn try_from(slice: &SeqSlice<A>) -> Result<usize, Self::Error> {
35        if slice.bs.len() <= usize::BITS as usize {
36            Ok(slice.bs.load_le::<usize>())
37        } else {
38            let len: usize = slice.bs.len() / A::BITS as usize;
39            let expected: usize = usize::BITS as usize / A::BITS as usize;
40            Err(ParseBioError::SequenceTooLong(len, expected))
41        }
42    }
43}
44
45impl<A: Codec> From<&SeqSlice<A>> for u8 {
46    fn from(slice: &SeqSlice<A>) -> u8 {
47        debug_assert!(slice.bs.len() <= u8::BITS as usize);
48        slice.bs.load_le::<u8>()
49    }
50}
51
52impl<A: Codec> SeqSlice<A> {
53    /// unsafely index into the `i`th position of a sequence
54    pub fn nth(&self, i: usize) -> A {
55        A::unsafe_from_bits(self[i].into())
56    }
57
58    pub fn len(&self) -> usize {
59        self.bs.len() / A::BITS as usize
60    }
61
62    /// Get the `i`th element of a `Seq`. Returns `None` if index out of range.
63    pub fn get(&self, i: usize) -> Option<A> {
64        if i >= self.bs.len() / A::BITS as usize {
65            None
66        } else {
67            Some(A::unsafe_from_bits(self[i].into()))
68        }
69    }
70
71    pub fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74}
75
76impl<A: Codec> From<&SeqSlice<A>> for String {
77    fn from(seq: &SeqSlice<A>) -> Self {
78        seq.into_iter().map(Codec::to_char).collect()
79    }
80}
81
82impl<A: Codec> PartialEq<SeqSlice<A>> for SeqSlice<A> {
83    fn eq(&self, other: &SeqSlice<A>) -> bool {
84        self.bs == other.bs
85    }
86}
87
88impl<A: Codec> PartialEq<SeqSlice<A>> for &SeqSlice<A> {
89    fn eq(&self, other: &SeqSlice<A>) -> bool {
90        self.bs == other.bs
91    }
92}
93
94impl<A: Codec> PartialEq<Seq<A>> for SeqSlice<A> {
95    fn eq(&self, other: &Seq<A>) -> bool {
96        self == other.as_ref()
97    }
98}
99
100impl<A: Codec> PartialEq<Seq<A>> for &SeqSlice<A> {
101    fn eq(&self, other: &Seq<A>) -> bool {
102        *self == other.as_ref()
103    }
104}
105
106impl<A: Codec> PartialEq<&str> for SeqSlice<A> {
107    fn eq(&self, other: &&str) -> bool {
108        let bs = other.as_bytes();
109        if bs.len() != self.len() {
110            return false;
111        }
112        for (a, c) in self.iter().zip(bs) {
113            match A::try_from_ascii(*c) {
114                Some(b) => {
115                    if a != b {
116                        return false;
117                    }
118                }
119                None => return false,
120            }
121        }
122        true
123    }
124}
125
126/// Warning! hashes are not currently stable between platforms/version
127impl<A: Codec> Hash for SeqSlice<A> {
128    fn hash<H: Hasher>(&self, state: &mut H) {
129        self.bs.hash(state);
130        // prepend length to make robust against matching prefixes
131        self.len().hash(state);
132    }
133}
134
135/// Clone a borrowed slice of a sequence into an owned version.
136///
137/// ```
138/// use bio_seq::prelude::*;
139///
140/// let seq = dna!("CATCGATCGATCG");
141/// let slice = &seq[2..7]; // TCGAT
142/// let owned = slice.to_owned();
143///
144/// assert_eq!(&owned, &seq[2..7]);
145/// ```
146///
147impl<A: Codec> ToOwned for SeqSlice<A> {
148    type Owned = Seq<A>;
149
150    fn to_owned(&self) -> Self::Owned {
151        Seq {
152            _p: PhantomData,
153            bv: self.bs.into(),
154        }
155    }
156}
157
158impl<A: Codec> AsRef<SeqSlice<A>> for SeqSlice<A> {
159    fn as_ref(&self) -> &SeqSlice<A> {
160        self
161    }
162}
163
164impl<A: Codec> fmt::Display for SeqSlice<A> {
165    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166        write!(f, "{}", String::from(self))
167    }
168}
169
170impl<A: Codec> BitAnd for &SeqSlice<A> {
171    type Output = Seq<A>;
172
173    fn bitand(self, rhs: Self) -> Self::Output {
174        let mut bv = self.bs.to_bitvec();
175        bv &= &rhs.bs;
176        Seq::<A> {
177            bv,
178            _p: PhantomData,
179        }
180    }
181}
182
183impl<A: Codec> BitOr for &SeqSlice<A> {
184    type Output = Seq<A>;
185
186    fn bitor(self, rhs: Self) -> Self::Output {
187        let mut bv = self.bs.to_bitvec();
188        bv |= &rhs.bs;
189
190        Seq::<A> {
191            bv,
192            _p: PhantomData,
193        }
194    }
195}
196
197impl<A: Codec> ReverseMut for SeqSlice<A> {
198    fn rev(&mut self) {
199        self.bs.reverse();
200        for chunk in self.bs.rchunks_exact_mut(A::BITS as usize) {
201            chunk.reverse();
202        }
203    }
204}
205
206impl<A: Codec + ComplementMut> ComplementMut for SeqSlice<A> {
207    fn comp(&mut self) {
208        unsafe {
209            for base in self.bs.chunks_exact_mut(A::BITS as usize).remove_alias() {
210                let mut bc = A::unsafe_from_bits(base.load_le::<u8>());
211                bc.comp();
212                base.store(bc.to_bits() as usize);
213            }
214        }
215    }
216}
217
218impl<A: Codec + ComplementMut> ReverseComplementMut for SeqSlice<A> where
219    SeqSlice<A>: ComplementMut + ReverseMut
220{
221}
222
223impl<A: Codec> Reverse for SeqSlice<A> {}
224
225impl<A: Codec + ComplementMut> Complement for SeqSlice<A> {}
226
227impl<A: Codec + ComplementMut> ReverseComplement for SeqSlice<A> where
228    SeqSlice<A>: ComplementMut + ReverseMut
229{
230}