1pub mod index;
10pub mod iterators;
11
12mod array;
13mod slice;
14
15pub use array::SeqArray;
16pub use slice::SeqSlice;
17
18use crate::codec::{Codec, text};
19use crate::error::ParseBioError;
20use crate::{
21 Complement, ComplementMut, Maskable, MaskableMut, Reverse, ReverseComplement,
22 ReverseComplementMut, ReverseMut,
23};
24
25use crate::{Bs, Bv, Order};
26
27use bitvec::field::BitField;
28use bitvec::view::BitView;
29
30#[cfg(feature = "serde")]
31use serde::{Deserialize, Serialize};
32
33use core::borrow::Borrow;
34use core::hash::{Hash, Hasher};
35use core::marker::PhantomData;
36use core::ops::{Bound, Deref, RangeBounds};
37use core::str::FromStr;
38use core::{fmt, ptr, str};
39
40#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
44#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
45#[repr(transparent)]
46pub struct Seq<A: Codec> {
47 pub(crate) _p: PhantomData<A>,
48 pub(crate) bv: Bv,
49}
50
51impl<A: Codec> From<Seq<A>> for usize {
52 fn from(slice: Seq<A>) -> usize {
53 debug_assert!(slice.bv.len() <= usize::BITS as usize);
54 slice.bv.load_le::<usize>() }
56}
57
58impl<A: Codec> Hash for Seq<A> {
59 fn hash<H: Hasher>(&self, state: &mut H) {
60 self.as_ref().hash(state);
61 }
63}
64
65impl<A: Codec> Default for Seq<A> {
66 fn default() -> Self {
67 Self::new()
68 }
69}
70
71impl<A: Codec> Seq<A> {
72 pub fn new() -> Self {
73 Seq {
74 _p: PhantomData,
75 bv: Bv::new(),
76 }
77 }
78
79 fn bit_range<R: RangeBounds<usize>>(&self, range: R) -> (usize, usize) {
80 let s = match range.start_bound() {
81 Bound::Included(&n) => n,
82 Bound::Excluded(&n) => n + 1,
83 Bound::Unbounded => 0,
84 };
85
86 let e = match range.end_bound() {
87 Bound::Included(&n) => n + 1,
88 Bound::Excluded(&n) => n,
89 Bound::Unbounded => self.len(),
90 };
91
92 debug_assert!(s <= e, "Start of range must be less than or equal to end");
93 debug_assert!(e <= self.len(), "Range out of bounds");
94
95 (s * A::BITS as usize, e * A::BITS as usize)
96 }
97
98 pub fn trim_u8(v: &[u8]) -> Result<Self, ParseBioError> {
111 let start = v
112 .iter()
113 .position(|&byte| A::try_from_ascii(byte).is_some())
114 .unwrap_or(v.len());
115
116 let end = v[start..]
117 .iter()
118 .rposition(|&byte| A::try_from_ascii(byte).is_some())
119 .map_or(start, |pos| start + pos + 1);
120
121 v[start..end]
122 .iter()
123 .map(|&byte| A::try_from_ascii(byte).ok_or(ParseBioError::UnrecognisedBase(byte)))
124 .collect()
125 }
126
127 pub fn with_capacity(len: usize) -> Self {
128 Seq {
129 _p: PhantomData,
130 bv: Bv::with_capacity(len * A::BITS as usize),
131 }
132 }
133
134 pub fn bit_and(self, rhs: Seq<A>) -> Seq<A> {
135 Seq::<A> {
136 _p: PhantomData,
137 bv: Bv::from_bitslice(&(self.bv & rhs.bv)),
138 }
139 }
140
141 pub fn bit_or(self, rhs: Seq<A>) -> Seq<A> {
142 Seq::<A> {
143 _p: PhantomData,
144 bv: Bv::from_bitslice(&(self.bv | rhs.bv)),
145 }
146 }
147
148 pub fn push(&mut self, item: A) {
149 let byte: u8 = item.to_bits();
150 self.bv
151 .extend_from_bitslice(&byte.view_bits::<Order>()[..A::BITS as usize]);
152 }
153
154 pub fn clear(&mut self) {
155 self.bv.clear();
156 }
157
158 pub fn truncate(&mut self, len: usize) {
166 self.bv.truncate(len * A::BITS as usize);
167 }
168
169 pub fn prepend(&mut self, other: &SeqSlice<A>) {
177 let mut bv = Bv::with_capacity(self.bv.len() + other.bs.len());
178 bv.extend_from_bitslice(&other.bs);
179 bv.extend_from_bitslice(&self.bv);
180 self.bv = bv;
181 }
182
183 pub fn append(&mut self, other: &SeqSlice<A>) {
191 self.bv.extend_from_bitslice(&other.bs);
192 }
193
194 pub fn insert(&mut self, index: usize, other: &SeqSlice<A>) {
220 assert!(index <= self.len(), "Index out of bounds");
221
222 let i = index * A::BITS as usize;
223 let mut bv = Bv::with_capacity(self.bv.len() + other.bs.len());
224
225 bv.extend_from_bitslice(&self.bs[..i]);
226 bv.extend_from_bitslice(&other.bs);
227 bv.extend_from_bitslice(&self.bs[i..]);
228
229 self.bv = bv;
230 }
231
232 pub fn remove<R: RangeBounds<usize>>(&mut self, range: R) {
240 let (s, e) = self.bit_range(range);
241 self.bv.drain(s..e);
242 }
243
244 pub fn extend<I: IntoIterator<Item = A>>(&mut self, iter: I) {
245 iter.into_iter().for_each(|base| self.push(base));
246 }
247
248 pub fn from_raw(len: usize, bits: &[usize]) -> Option<Self> {
256 let mut bv: Bv = Bv::from_slice(bits);
257 if len > bv.len() {
259 None
260 } else {
261 bv.truncate(len * A::BITS as usize);
262 Some(Seq {
263 _p: PhantomData,
264 bv,
265 })
266 }
267 }
268
269 pub fn into_raw(&self) -> &[usize] {
278 self.bv.as_raw_slice()
279 }
280}
281
282impl<A: Codec> ReverseMut for Seq<A> {
283 fn rev(&mut self) {
284 self.bv.reverse();
285 for chunk in self.bv.rchunks_exact_mut(A::BITS as usize) {
286 chunk.reverse();
287 }
288 }
289}
290
291impl<A: Codec + ComplementMut> ComplementMut for Seq<A> {
292 fn comp(&mut self) {
293 unsafe {
294 for base in self.bv.chunks_exact_mut(A::BITS as usize).remove_alias() {
295 let mut bc = A::unsafe_from_bits(base.load_le::<u8>());
296 bc.comp();
297 base.store(bc.to_bits() as usize);
298 }
299 }
300 }
301}
302
303impl<A: Codec + MaskableMut> MaskableMut for Seq<A> {
304 fn mask(&mut self) {
305 unsafe {
306 for base in self.bv.chunks_exact_mut(A::BITS as usize).remove_alias() {
307 let mut bc = A::unsafe_from_bits(base.load_le::<u8>());
308 bc.mask();
309 base.store(bc.to_bits() as usize);
310 }
311 }
312 }
313 fn unmask(&mut self) {
314 unsafe {
315 for base in self.bv.chunks_exact_mut(A::BITS as usize).remove_alias() {
316 let mut bc = A::unsafe_from_bits(base.load_le::<u8>());
317 bc.unmask();
318 base.store(bc.to_bits() as usize);
319 }
320 }
321 }
322}
323
324impl<A: Codec + MaskableMut> Maskable for Seq<A> {}
325
326impl<A: Codec + ComplementMut> ReverseComplementMut for Seq<A> where
327 Seq<A>: ComplementMut + ReverseMut
328{
329}
330
331impl<A: Codec> Reverse for Seq<A> {}
332
333impl<A: Codec + ComplementMut> Complement for Seq<A> {}
334
335impl<A: Codec + ComplementMut> ReverseComplement for Seq<A> where Seq<A>: ComplementMut + ReverseMut {}
336
337impl<A: Codec> PartialEq<SeqSlice<A>> for Seq<A> {
338 fn eq(&self, other: &SeqSlice<A>) -> bool {
339 self.as_ref() == other
340 }
341}
342
343impl<A: Codec> PartialEq<&SeqSlice<A>> for Seq<A> {
344 fn eq(&self, other: &&SeqSlice<A>) -> bool {
345 self.as_ref() == *other
346 }
347}
348
349impl<A: Codec> PartialEq<Seq<A>> for &Seq<A> {
350 fn eq(&self, other: &Seq<A>) -> bool {
351 **self == *other
352 }
353}
354
355impl<A: Codec> PartialEq<&Seq<A>> for Seq<A> {
356 fn eq(&self, other: &&Seq<A>) -> bool {
357 *self == **other
358 }
359}
360
361impl<A: Codec> Borrow<SeqSlice<A>> for Seq<A> {
385 fn borrow(&self) -> &SeqSlice<A> {
386 self.as_ref()
387 }
388}
389
390impl<A: Codec> Borrow<SeqSlice<A>> for &Seq<A> {
391 fn borrow(&self) -> &SeqSlice<A> {
392 self.as_ref()
393 }
394}
395
396impl<A: Codec> Deref for Seq<A> {
410 type Target = SeqSlice<A>;
411
412 fn deref(&self) -> &Self::Target {
413 let bs: *const Bs = ptr::from_ref::<Bs>(&self.bv);
414 unsafe { &*(bs as *const SeqSlice<A>) }
415 }
416}
417
418impl<A: Codec> AsRef<SeqSlice<A>> for Seq<A> {
432 fn as_ref(&self) -> &SeqSlice<A> {
433 self
434 }
435}
436
437impl<A: Codec> Clone for Seq<A> {
450 fn clone(&self) -> Self {
451 Self {
452 _p: PhantomData,
453 bv: self.bv.clone(),
454 }
455 }
456}
457
458impl<A: Codec> FromIterator<A> for Seq<A> {
459 fn from_iter<I: IntoIterator<Item = A>>(iter: I) -> Self {
460 let i = iter.into_iter();
461 let mut seq = Seq::with_capacity(i.size_hint().0);
462 seq.extend(i);
463 seq
464 }
465}
466
467impl<A: Codec> From<&Vec<A>> for Seq<A> {
468 fn from(vec: &Vec<A>) -> Self {
469 vec.iter().copied().collect()
472 }
473}
474
475impl<A: Codec, B: Codec> From<&SeqSlice<A>> for Seq<B>
476where
477 A: Into<B>,
478{
479 fn from(slice: &SeqSlice<A>) -> Self {
480 slice.iter().map(Into::into).collect()
481 }
482}
483
484impl<A: Codec, B: Codec, const N: usize, const W: usize> From<&SeqArray<A, N, W>> for Seq<B>
485where
486 A: Into<B>,
487{
488 fn from(slice: &SeqArray<A, N, W>) -> Self {
489 slice.iter().map(Into::into).collect()
490 }
491}
492
493impl<A: Codec, B: Codec, const N: usize, const W: usize> From<SeqArray<A, N, W>> for Seq<B>
494where
495 A: Into<B>,
496{
497 fn from(slice: SeqArray<A, N, W>) -> Self {
498 slice.iter().map(Into::into).collect()
499 }
500}
501
502impl<A: Codec> TryFrom<&str> for Seq<A> {
503 type Error = ParseBioError;
504
505 fn try_from(s: &str) -> Result<Self, Self::Error> {
506 Seq::<A>::try_from(s.as_bytes())
507 }
508}
509
510impl<A: Codec> TryFrom<String> for Seq<A> {
511 type Error = ParseBioError;
512
513 fn try_from(s: String) -> Result<Self, Self::Error> {
514 Seq::<A>::try_from(s.as_str())
515 }
516}
517
518impl<A: Codec> TryFrom<&String> for Seq<A> {
519 type Error = ParseBioError;
520
521 fn try_from(s: &String) -> Result<Self, Self::Error> {
522 Seq::<A>::try_from(s.as_str())
523 }
524}
525
526impl<A: Codec> FromStr for Seq<A> {
527 type Err = ParseBioError;
528
529 fn from_str(s: &str) -> Result<Self, Self::Err> {
530 Seq::<A>::try_from(s)
531 }
532}
533
534impl<A: Codec> TryFrom<&[u8]> for Seq<A> {
535 type Error = ParseBioError;
536
537 fn try_from(v: &[u8]) -> Result<Self, Self::Error> {
538 Self::try_from(v.to_vec())
539 }
540}
541
542impl<A: Codec> TryFrom<Vec<u8>> for Seq<A> {
543 type Error = ParseBioError;
544
545 fn try_from(v: Vec<u8>) -> Result<Self, Self::Error> {
546 v.into_iter()
550 .map(|byte| A::try_from_ascii(byte).ok_or(ParseBioError::UnrecognisedBase(byte)))
551 .collect()
552 }
553}
554
555impl<A: Codec> From<Seq<A>> for String {
556 fn from(seq: Seq<A>) -> Self {
557 String::from(seq.as_ref())
558 }
559}
560
561impl<A: Codec> From<&Seq<A>> for String {
562 fn from(seq: &Seq<A>) -> Self {
563 String::from(seq.as_ref())
564 }
565}
566
567impl<A: Codec> fmt::Display for Seq<A> {
568 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
569 fmt::Display::fmt(self.as_ref(), f)
570 }
571}
572
573impl<A: Codec> Extend<A> for Seq<A> {
574 fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
575 self.extend(iter);
576 }
577}
578
579impl From<Vec<usize>> for Seq<text::Dna> {
580 fn from(vec: Vec<usize>) -> Self {
581 Seq {
582 _p: PhantomData,
583 bv: Bv::from_vec(vec),
584 }
585 }
586}
587
588impl<A: Codec> From<&Bs> for Seq<A> {
590 fn from(bs: &Bs) -> Self {
591 Seq {
592 _p: PhantomData,
593 bv: bs.into(),
594 }
595 }
596}
597
598impl<A: Codec> From<Bv> for Seq<A> {
600 fn from(bv: Bv) -> Self {
601 Seq {
602 _p: PhantomData,
603 bv,
604 }
605 }
606}
607
608#[cfg(test)]
609mod tests {
610 use crate::codec::text;
611 use crate::prelude::*;
612 use crate::{Bv, Order};
613 use bitvec::prelude::*;
614 use core::borrow::Borrow;
615 use core::hash::{Hash, Hasher};
616 use core::marker::PhantomData;
617 use std::collections::hash_map::DefaultHasher;
618
619 #[test]
620 fn test_revcomp() {
621 let s1: Seq<Dna> = dna!("ATGTGTGCGACTGA").into();
622 let mut s2: Seq<Dna> = dna!("TCAGTCGCACACAT").into();
623 let s3: &SeqSlice<Dna> = &s1;
624
625 assert_eq!(s3.to_revcomp(), s2.to_revcomp().to_revcomp());
626 assert_eq!(s3.to_revcomp(), &s2);
627
628 s2.revcomp();
629
630 assert_eq!(s3.to_revcomp(), s2.to_revcomp());
631 assert_ne!(s3, s2.to_revcomp());
632 }
633
634 #[test]
635 fn test_revcomp_mismatched_sizes() {
636 let s1 = dna!("AAAA");
637 let mut s2: Seq<Dna> = dna!("TTTTT").into();
638 s2.revcomp();
639 assert_ne!(s1, s2);
640 }
641
642 #[test]
643 fn test_revcomp_idempotence() {
644 let mut s = dna!("AAACGCTACGTACGCGCCTTCGGGGCATCAGCACCAC").to_owned();
645 let sc = dna!("AAACGCTACGTACGCGCCTTCGGGGCATCAGCACCAC");
646 s.revcomp();
647 assert_eq!(s.to_revcomp(), sc);
648 s.comp();
649 s.rev();
650 s.rev();
651 s.comp();
652 assert_eq!(s.to_revcomp(), sc);
653 }
654 #[test]
655 fn slice_index_comparisions() {
656 let s1 = dna!("ATGTGTGCGACTGATGATCAAACGTAGCTACG");
657 let s2 = dna!("ACGTGTGTGCTAGCTAATCGATCAAAAAG");
658
659 assert_eq!(&s1[0], &s2[0]);
660 assert_ne!(&s1[1], &s2[1]);
661 assert_eq!(&s1[2..4], &s2[2..4]);
662 assert_eq!(&s1[15..21], &s2[19..25]);
663 assert_ne!(&s1[2..20], &s2[2..20]);
664 }
665
666 #[test]
667 fn slice_index_owned() {
668 let seq = dna!("GCTCGATCACT");
669
670 assert_eq!(&seq[..], dna!("GCTCGATCACT"));
671 assert_eq!(&seq[..=4], dna!("GCTCG"));
672 assert_eq!(&seq[1..=4], dna!("CTCG"));
673 assert_eq!(&seq[1..4], dna!("CTC"));
674 assert_eq!(&seq[..4], dna!("GCTC"));
675 assert_eq!(&seq[1..], dna!("CTCGATCACT"));
676 }
677
678 #[test]
679 fn slice_indexing() {
680 let seq = dna!("TGCATCGAT");
681
682 assert_ne!(&seq[..], &dna!("AGCATCGAA")[..]);
683 assert_ne!(&seq[3..=6], &dna!("ATC")[..]);
684 assert_ne!(&seq[..=6], &dna!("TGCATC")[..]);
685 assert_ne!(&seq[4..5], &dna!("TC")[..]);
686 assert_ne!(&seq[..6], &dna!("TGCAT")[..]);
687 assert_ne!(&seq[5..], &dna!("TCGAT")[..]);
688
689 assert_eq!(&seq[..], &dna!("TGCATCGAT")[..]);
690 assert_eq!(&seq[3..=6], &dna!("ATCG")[..]);
691 assert_eq!(&seq[..=6], &dna!("TGCATCG")[..]);
692 assert_eq!(&seq[4..5], &dna!("T")[..]);
693 assert_eq!(&seq[..6], &dna!("TGCATC")[..]);
694 assert_eq!(&seq[5..], &dna!("CGAT")[..]);
695 }
696
697 #[test]
698 fn slice_index_ranges() {
699 let s1: &'static SeqSlice<Dna> = dna!("ACGACTGATCGA");
700 let s2: &'static SeqSlice<Dna> = dna!("TCGAACGACTGA");
701
702 assert_eq!(&s1[..8], &s2[4..]);
703 assert_eq!(&s1[8..], &s2[..4]);
704 assert_ne!(&s1[8..], &s2[8..]);
705 assert_ne!(&s1[..], &s2[..4]);
706
707 assert_eq!(&s1[..=7], &s2[4..]);
708 assert_eq!(&s1[8..], &s2[..=3]);
709 assert_ne!(&s1[8..11], &s2[8..=11]);
710 assert_ne!(&s1[..], &s2[..=4]);
711 }
712
713 #[test]
714 fn slice_nth() {
715 let s = dna!("ATGTGTGCGACTGATGATCAAACGTAGCTACG");
716
717 assert_eq!(s.nth(0), Dna::A);
718 assert_ne!(s.nth(0), Dna::G);
719
720 assert_eq!(s.nth(1), Dna::T);
721 assert_ne!(s.nth(1), Dna::C);
722
723 assert_eq!(s.nth(s.len() - 1), Dna::G);
724 assert_ne!(s.nth(s.len() - 1), Dna::C);
725 }
726
727 #[test]
728 fn slice_rangeto_and_full() {
729 let s1 = dna!("ATCGACTAGCATGCTACG");
730 let s2 = dna!("ATCGACTAG");
731
732 assert_eq!(&s1[..s2.len()], &s2[..]);
733 assert_ne!(&s2[..s2.len()], &s1[..]);
734 }
735
736 #[test]
737 fn from_slice() {
738 let s1 = dna!("ATGTGTGCGACTGATGATCAAACGTAGCTACG");
739 let s: &SeqSlice<Dna> = &s1[15..21];
740 assert_eq!(format!("{}", s), "GATCAA");
741 }
742
743 #[test]
744 fn string_to_seq() {
745 let seq_str = "ACTGACTG";
746 let seq: Result<Seq<Dna>, _> = seq_str.try_into();
747 assert!(seq.is_ok());
748 assert_eq!(seq.unwrap().to_string(), seq_str);
749 }
750
751 #[test]
752 fn invalid_string_to_seq() {
753 let invalid_seq_str = "ACUGACTG";
754 let seq: Result<Seq<Dna>, _> = invalid_seq_str.try_into();
755 assert!(seq.is_err());
756 }
757
758 #[test]
759 fn seq_to_string() {
760 let seq_str = "ACTGACTG";
761 let seq: Seq<Dna> = seq_str.try_into().unwrap();
762 let result_str: String = seq.into();
763 assert_eq!(result_str, seq_str);
764 }
765
766 #[test]
767 fn seqslice_to_string() {
768 let seq_str = "ACTGACTG";
769 let seq: Seq<Dna> = seq_str.try_into().unwrap();
770 let slice = &seq[1..5];
771 let result_str: String = slice.into();
772 assert_eq!(result_str, "CTGA");
773 }
774
775 #[test]
776 #[should_panic(expected = "range 2..18 out of bounds: 16")]
777 fn invalid_seqslice_to_string() {
778 let seq_str = "ACTGACTG";
779 let seq: Seq<Dna> = seq_str.try_into().unwrap();
780 let _ = &seq[1..9];
781 }
782
783 #[test]
784 fn test_push() {
785 let mut seq = Seq::<Dna>::new();
786 seq.push(Dna::A);
787 seq.push(Dna::C);
788 seq.push(Dna::G);
789 seq.push(Dna::T);
790
791 assert_eq!(seq.len(), 4);
792 assert_eq!(String::from(seq), "ACGT")
793 }
794
795 #[test]
796 fn test_extend_amino() {
797 let mut seq = Seq::<Amino>::new();
798 seq.push(Amino::S);
799 seq.push(Amino::L);
800
801 seq.extend(vec![Amino::Y, Amino::M].into_iter());
802
803 assert_eq!(seq.len(), 4);
804 assert_eq!(String::from(seq), "SLYM");
805 }
806 #[test]
807 fn test_extend() {
808 let mut seq = Seq::<Dna>::new();
809 seq.push(Dna::A);
810 seq.push(Dna::C);
811
812 seq.extend(vec![Dna::G, Dna::T].into_iter());
813
814 assert_eq!(seq.len(), 4);
815 assert_eq!(String::from(seq), "ACGT");
816 }
817
818 #[test]
819 fn test_eqs() {
820 let seq: Seq<Dna> = "ACTAGCATCGA".try_into().unwrap();
821 let seq2: Seq<Dna> = "ACTAGCATCGA".try_into().unwrap();
822 let slice: &SeqSlice<Dna> = &seq;
823 let slice2: &SeqSlice<Dna> = &seq2[..];
824 assert_eq!(seq, slice);
825 assert_eq!(seq2, slice);
826 assert_eq!(seq2, slice2);
827 assert_eq!(slice, slice2);
828 assert_eq!(seq, seq2);
829 }
830
831 #[test]
832 fn test_str_eqs() {
833 let string: String = "ACTAGCATCGA".into();
834 let slice: &str = "GCTGCATCGATC";
835
836 let seq1: Seq<Dna> = Seq::<Dna>::try_from(slice).unwrap();
837 let seq2: Seq<Dna> = Seq::<Dna>::try_from(string.clone()).unwrap();
838
839 assert_eq!(seq2.to_string(), string);
840 assert_eq!(seq1.to_string(), slice);
841
842 assert_ne!(seq2.to_string(), slice);
843 assert_ne!(seq1.to_string(), string);
844 }
845
846 #[test]
847 fn test_from_iter() {
848 let iter = vec![Dna::A, Dna::C, Dna::G, Dna::T].into_iter();
849 let seq: Seq<Dna> = Seq::from_iter(iter);
850
851 assert_eq!(seq.len(), 4);
852 assert_eq!(String::from(seq), "ACGT");
853 }
854
855 #[test]
856 fn test_bit_order() {
857 let raw: usize = 0b10_11_01_11_10_01_00_01;
858 let mut bv: Bv = Default::default();
859 bv.extend(&raw.view_bits::<Order>()[..(Dna::BITS as usize * 8)]);
860 let s = Seq::<Dna> {
861 bv,
862 _p: PhantomData,
863 };
864 assert_eq!(dna!("CACGTCTG").to_string(), "CACGTCTG");
865 assert_eq!(String::from(s), "CACGTCTG");
866 }
868
869 #[test]
870 fn test_borrow() {
871 let seq: Seq<Dna> = dna!("ACGACCCCCATAGATGGGCTG").into();
872 let slice: &SeqSlice<Dna> = seq.borrow();
873 assert_eq!(slice, &seq[..]);
874 assert_ne!(slice, &seq[1..]);
875 }
876
877 #[test]
878 fn test_deref() {
879 let seq: Seq<Dna> = dna!("AGAATGATCG").into();
880 let slice: &SeqSlice<Dna> = &*seq;
881
882 assert_eq!(slice, &seq[..]);
883 assert_ne!(slice, &seq[1..]);
884 }
885
886 #[test]
887 fn test_asref() {
888 let seq: Seq<Dna> = dna!("AGAATGATCAAAATATATATAAAG").into();
889 let slice: &SeqSlice<Dna> = seq.as_ref();
890 assert_ne!(slice, &seq[2..5]);
891 assert_eq!(slice, &seq[..]);
892 }
893
894 #[test]
895 fn test_to_owned() {
896 let seq: Seq<Dna> = dna!("AGAATGAATCG").into();
897 let slice: &SeqSlice<Dna> = &seq;
898 let owned: Seq<Dna> = slice[2..5].to_owned();
899 assert_eq!(&owned, &seq[2..5]);
900 assert_eq!(owned, seq[2..5].to_owned());
901 assert_ne!(&owned, &seq[..]);
902 }
903
904 #[test]
905 fn test_clone() {
906 let seq: Seq<Dna> = dna!("AGAATGATGGGGGGGGGGGCG").into();
907 let cloned = seq.clone();
908 assert_eq!(seq, cloned);
909 }
910 #[test]
911 fn test_trim() {
912 let seq = b"AGAATGATGGGGGGGGGGGCG";
913 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
914 assert_eq!(s, dna!("AGAATGATGGGGGGGGGGGCG"));
915
916 let seq = b"NNNNAGAATGATGGGGGGGGGGGCGNNNNNNNNNNN";
917 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
918 assert_eq!(s, dna!("AGAATGATGGGGGGGGGGGCG"));
919
920 let seq = b"NNNNAGAATGATGGGGNGGGGGGGCGNNNNNNNNNNN";
921 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
922 assert_eq!(s, Err(ParseBioError::UnrecognisedBase(b'N')));
923
924 let seq = b"AGAATGATGGGGGGGGGGGCG";
925 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
926 assert_eq!(s, dna!("AGAATGATGGGGGGGGGGGCG"));
927
928 let seq = b"NNNNAGAATGATGGGGGGGGGGGCGNNNNNNNNNNN";
929 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
930 assert_eq!(s, dna!("AGAATGATGGGGGGGGGGGCG"));
931
932 let seq = b"NNNNAGAATGATGGGGNGGGGGGGCGNNNNNNNNNNN";
933 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
934 assert_eq!(s, Err(ParseBioError::UnrecognisedBase(b'N')));
935
936 let seq = b"";
937 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
938 assert!(s.is_ok());
939 assert_eq!(s.unwrap(), dna!(""));
940
941 let seq = b"XXXX";
942 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
943 assert!(s.is_ok());
944 assert_eq!(s.unwrap(), dna!(""));
945
946 let seq = b"XXACGT";
947 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
948 assert_eq!(s, dna!("ACGT"));
949
950 let seq = b"ACGTXX";
951 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
952 assert_eq!(s, dna!("ACGT"));
953
954 let seq = b"ACGTACGTACGTACGTACGTACGT";
955 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
956 assert_eq!(s, dna!("ACGTACGTACGTACGTACGTACGT"));
957
958 let seq = b"XXACGTXXACGTXX";
959 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
960 assert_eq!(s, Err(ParseBioError::UnrecognisedBase(b'X')));
961
962 let seq = b"A";
963 let s: Seq<Dna> = Seq::trim_u8(seq).unwrap();
964 assert_eq!(s, dna!("A"));
965
966 let seq = b"X";
967 let s: Result<Seq<Dna>, ParseBioError> = Seq::trim_u8(seq);
968 assert!(s.is_ok());
969 assert_eq!(s.unwrap(), dna!(""));
970
971 }
985
986 #[test]
987 fn test_seq_eq_and_hash() {
988 let seq1: Seq<Dna> = "ACGT".try_into().unwrap();
989 let seq2: Seq<Dna> = "ACGT".try_into().unwrap();
990
991 assert_eq!(seq1, seq2);
993
994 let mut hasher1 = DefaultHasher::new();
996 seq1.hash(&mut hasher1);
997 let hash1 = hasher1.finish();
998
999 let mut hasher2 = DefaultHasher::new();
1000 seq2.hash(&mut hasher2);
1001 let hash2 = hasher2.finish();
1002
1003 assert_eq!(hash1, hash2);
1004 }
1005
1006 #[test]
1007 fn test_seq_slice_eq() {
1008 let seq1: Seq<Dna> = "ACGTAAAAAAAAAAAAACGTAAAACCCCGGGGTTTTA".try_into().unwrap();
1009 let seq2: Seq<Dna> = "ACGTAAAAAAAAAAAAACGTAAAACCCCGGGGTTTTAA".try_into().unwrap();
1010
1011 let slice1a: &SeqSlice<Dna> = &seq1[..];
1012 let slice1b: &SeqSlice<Dna> = &seq1[..];
1013 let slice2a: &SeqSlice<Dna> = &seq2[..seq2.len() - 1];
1014 let slice2b: &SeqSlice<Dna> = &seq2[..seq2.len() - 1];
1015
1016 let seq3: Seq<Dna> = slice2b.into();
1017
1018 assert_eq!(slice1a, slice2b);
1019 assert_eq!(&slice2a, &slice2b);
1020 assert_eq!(seq1, slice2a);
1021 assert_eq!(slice1b, seq3);
1022 assert_eq!(slice1a, slice1b);
1023 assert_eq!(seq1, seq3);
1024
1025 assert_ne!(seq1, seq2);
1026 assert_ne!(seq2, seq3);
1027 assert_ne!(seq2, slice2a);
1028 assert_ne!(seq2, slice1b);
1029
1030 assert_eq!(&slice1a, &slice1b);
1031
1032 assert_eq!(seq1, &seq1);
1033
1034 assert_eq!(seq1, &seq3);
1035 assert_eq!(&seq1, seq3);
1036 assert_eq!(&seq1, &seq3);
1037
1038 assert_eq!(slice1a, seq3);
1039 assert_eq!(seq1, slice2a);
1042 assert_eq!(&seq1, slice2a);
1043 assert_eq!(&seq1, slice2a);
1046 }
1047 #[test]
1048 fn test_seq_slice_hash() {
1049 let seq1: Seq<Dna> = "ACGTAAAAAAAAAAAAACGTAAAACCCCGGGGAAAAA".try_into().unwrap();
1050 let seq2: Seq<Dna> = "ACGTAAAAAAAAAAAAACGTAAAACCCCGGGGAAAAA".try_into().unwrap();
1051
1052 let seq3: Seq<Dna> = "ACGTAAAAAAAAAAAAACGTAAAACCCCGGGG".try_into().unwrap();
1053
1054 let slice1 = &seq1[..];
1055 let slice2 = &seq2[..];
1056
1057 let slice3 = &seq3[..];
1058
1059 let slice1_32 = &seq1[..32];
1060
1061 let mut hasher1 = DefaultHasher::new();
1062 seq1.hash(&mut hasher1);
1063 let full1 = hasher1.finish();
1064
1065 let mut hasher1a = DefaultHasher::new();
1066 seq1.hash(&mut hasher1a);
1067 let full1_alt = hasher1a.finish();
1068
1069 let mut hasher2 = DefaultHasher::new();
1070 seq2.hash(&mut hasher2);
1071 let full2 = hasher2.finish();
1072
1073 let mut hasher3 = DefaultHasher::new();
1074 slice1.hash(&mut hasher3);
1075 let full1_slice = hasher3.finish();
1076
1077 let mut hasher4 = DefaultHasher::new();
1078 slice2.hash(&mut hasher4);
1079 let full2_slice = hasher4.finish();
1080
1081 let mut hasher5 = DefaultHasher::new();
1082 slice3.hash(&mut hasher5);
1083 let short1_slice = hasher5.finish();
1084
1085 let mut hasher6 = DefaultHasher::new();
1086 slice1_32.hash(&mut hasher6);
1087 let seq1_short = hasher6.finish();
1088
1089 assert_eq!(full1, full1_alt);
1090 assert_eq!(full1, full2);
1091 assert_ne!(full2_slice, short1_slice);
1092 assert_eq!(full2, full1_slice);
1093 assert_eq!(short1_slice, seq1_short);
1094 assert_ne!(seq1_short, full1_slice);
1095
1096 assert_ne!(full1, short1_slice);
1097 }
1098
1099 #[test]
1100 fn test_fromstr() {
1101 let seq: Result<Seq<Dna>, ParseBioError> =
1102 "ACGATGAGTAGTCGCCATCGTATCTTTGACTGCCGATGCTA".parse();
1103 assert!(seq.is_ok());
1104
1105 let seq: Result<Seq<Dna>, ParseBioError> =
1106 "ACGATGAGTAGBCGCCATCGTATCTTTGACTGCCGATGCTA".parse();
1107 assert_eq!(seq, Err(ParseBioError::UnrecognisedBase(b'B')));
1108 }
1109
1110 #[test]
1111 fn test_lens() {
1112 assert_eq!(iupac!("AWANWATNA---SKAGTCAA").len(), 20)
1113 }
1114
1115 #[test]
1116 fn test_unique_bitarray_ident() {
1117 let s1 = dna!(
1118 "ATCGACTACGATCGCTACGATCGATCGATCGATCGAATCTCCCGCGCGATCATCGATCATCGCTACGTACGTCGAAAAATATAATGGG"
1119 );
1120 let s2 = dna!(
1121 "ATCGACTACGATCGCTACGATCGATCGATCGATCGAATCTCCCGCGCGATCATCGATCATCGCTACGTACGTCGAAAAATATAATGGG"
1122 );
1123
1124 let s3 = dna!(
1126 "CTCGACTACGATCGCTACGATCGATCGATCGATCGAATCTCCCGCGCGATCATCGATCATCGCTACGTACGTCGAAAAATATAATGGG"
1127 );
1128
1129 let s4 = dna!(
1131 "ATCGACTACGATCGCTACGATCGATCGATCGATCGAATCTCCCGCGCGATCATCGATCATCGCTACGTACGTCGAAAAATATAATGGGA"
1132 );
1133
1134 let s5 = dna!(
1136 "ATCGACTACGATCGCTACGATCGATCGATCGATCGAATCTCCCGCGCGATCATCGATCATCGCTACGTACGTCGAAAAATATAATGGC"
1137 );
1138
1139 assert_eq!(s1, s2);
1140 assert_ne!(s1, s3);
1141
1142 assert_ne!(s1.len(), s4.len());
1144 assert_ne!(s4.len(), s1.len());
1145
1146 assert_ne!(s5, s1);
1147 }
1148
1149 #[test]
1150 fn test_static() {
1151 use crate::seq::SeqArray;
1152
1153 static B: SeqArray<Dna, 5, 1> = SeqArray {
1154 _p: PhantomData,
1155 ba: bitarr![const usize, Lsb0; 1,1,0,1,0,0,1,0,1,1],
1156 };
1157
1158 let s: &'static SeqSlice<Dna> = &B;
1159
1160 let x: &'static str = "TGACT";
1161
1162 assert_eq!(s.to_string(), x);
1163 }
1164
1165 #[test]
1166 fn test_to_from_raw() {
1167 let s = "TCAGCTAGCTACGACTGATCGATCGACTGATGCCGCGCGCGGCGCCGCGCGCGCGCGCCGCGCGCCCCGCGCGCGGCGCGCGCCGCGCGCGCGCGCGGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGCGC";
1168
1169 let seq: Seq<Dna> = s.try_into().unwrap();
1170 let raw = seq.into_raw();
1171 let new = Seq::<Dna>::from_raw(s.len(), raw);
1172 assert_eq!(new.unwrap(), seq);
1173
1174 let bad = Seq::<Dna>::from_raw(s.len() + 1, raw);
1175 assert_ne!(bad.unwrap(), seq);
1176
1177 let bad = Seq::<Dna>::from_raw(342, raw);
1178 assert_eq!(bad, None);
1179 }
1180 #[test]
1181 fn test_seq_and_seq_slice_eq_and_hash() {
1182 let seq: Seq<text::Dna> = Seq::try_from("ACGT".to_string()).unwrap();
1183 let slice = &seq[..];
1184
1185 assert_eq!(seq, slice);
1187 assert_eq!(&seq, slice);
1188 assert_eq!(slice, &seq);
1189
1190 let mut hasher1 = std::collections::hash_map::DefaultHasher::new();
1192 seq.hash(&mut hasher1);
1193 let hash1 = hasher1.finish();
1194
1195 let mut hasher2 = std::collections::hash_map::DefaultHasher::new();
1196 slice.hash(&mut hasher2);
1197 let hash2 = hasher2.finish();
1198
1199 assert_eq!(hash1, hash2);
1200 }
1201
1202 #[test]
1203 fn test_prepend() {
1204 let mut seq1 =
1205 Seq::<Dna>::from_str("GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT").unwrap();
1206 seq1.prepend(dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"));
1207 assert_eq!(
1208 seq1.to_string(),
1209 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT"
1210 );
1211
1212 let mut seq2 =
1213 Seq::<Dna>::from_str("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA").unwrap();
1214 seq2.prepend(dna!("GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT"));
1215 assert_eq!(
1216 seq2.to_string(),
1217 "GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
1218 );
1219
1220 assert_ne!(seq1, seq2);
1221 }
1222
1223 #[test]
1224 fn test_append() {
1225 let mut seq1 =
1226 Seq::<Dna>::from_str("GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT").unwrap();
1227 seq1.append(dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"));
1228 assert_eq!(
1229 seq1.to_string(),
1230 "GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
1231 );
1232
1233 let mut seq2 =
1234 Seq::<Dna>::from_str("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA").unwrap();
1235 seq2.append(dna!("GCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT"));
1236 assert_eq!(
1237 seq2.to_string(),
1238 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCTCGATCGATCGATCGACTGACTGACGCGCGCATCCGATAAAAAAAAT"
1239 );
1240
1241 assert_ne!(seq1, seq2);
1242 }
1243
1244 #[test]
1245 fn test_remove() {
1246 let mut seq: Seq<Dna> = dna!("TCAGCATCGATCAATCG").into();
1247 seq.remove(4..6);
1248 assert_eq!(&seq, dna!("TCAGTCGATCAATCG"));
1249 seq.remove(..4);
1250 assert_eq!(&seq, dna!("TCGATCAATCG"));
1251 seq.remove(6..);
1252 assert_eq!(&seq, dna!("TCGATC"));
1253 }
1254
1255 #[test]
1256 fn test_insert() {
1257 let mut seq: Seq<Dna> = dna!("TCAGCATCGATCAATCG").into();
1258 let insertion = dna!("CCCCC");
1259
1260 seq.insert(4, insertion);
1261 assert_eq!(&seq, dna!("TCAGCCCCCCATCGATCAATCG"));
1262
1263 seq.insert(seq.len(), dna!("AAAAA"));
1264 assert_eq!(&seq, dna!("TCAGCCCCCCATCGATCAATCGAAAAA"));
1265 }
1266
1267 #[test]
1268 fn test_truncate() {
1269 let mut seq: Seq<Dna> = dna!("TCAGCATCGATCAATCG").into();
1270
1271 seq.truncate(seq.len());
1272 assert_eq!(&seq, dna!("TCAGCATCGATCAATCG"));
1273
1274 seq.truncate(10);
1275 assert_eq!(&seq, dna!("TCAGCATCGA"));
1276
1277 seq.truncate(0);
1278 assert_eq!(&seq, dna!(""));
1279 }
1280
1281 }