bio_seq/
lib.rs

1// Copyright 2021-2024 Jeff Knaggs
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Bit-packed and well-typed biological sequences
7//!
8//! The strength of rust is that we can safely separate the science (well-typed) and the engineering (bit-packed) of bioinformatics. An incremental benchmark improvement in the reverse complement algorithm should benefit the user of a succinct datastructure without anyone unwillingly learning about endianess.
9//!
10//! Contributions are very welcome. There's lots of low hanging fruit for optimisation and ideally we should only have to write them once!
11//!
12//! ## Sequences
13//!
14//! A [`Seq`](seq::Seq) is a heap allocated [sequence](seq) of symbols that owns its data. A [`SeqSlice`](seq::SeqSlice) is a read-only window into a `Seq`. Static [`SeqArray`s](seq::SeqArray) can be declared with the [`dna!`](macro@dna) and [`iupac!`](macro@iupac) macros but these should be dereferenced as `&'static SeqSlice`s.
15//!
16//! [`Kmer`s](mod@kmer) are shorter, fixed-length sequences. They generally fit in a single register and implement `Copy`. They are used for optimised algorithms on sequences and succinct datastructures. The default implementation uses a `usize` for storage. Using the 2-bit `Dna` encoding a `Kmer<Dna, 32>` occupies 64 bits.
17//!
18//! These sequence types are parameterised with [`Codec`s](`codec`) (e.g. `Seq<Dna>`, `Seq<Amino>`, etc.) that define how symbols are encoded into strings of bits and decoded as readable strings.
19//!
20//! ## Quick start
21//!
22//! Add `bio-seq` to `Cargo.toml`:
23//!
24//! ```toml
25//! [dependencies]
26//! bio-seq = "0.13"
27//! ```
28//!
29//! ```rust
30//! use bio_seq::prelude::*;
31//!
32//! let seq = dna!("ATACGATCGATCGATCGATCCGT");
33//!
34//! // iterate over the 8-mers of the reverse complement
35//! for kmer in seq.to_revcomp().kmers::<8>() {
36//!     println!("{kmer}");
37//! }
38//!
39//! // ACGGATCG
40//! // CGGATCGA
41//! // GGATCGAT
42//! // GATCGATC
43//! // ATCGATCG
44//! // ...
45//! ```
46//!
47//! Sequences are analogous to rust's string types and follow similar dereferencing conventions:
48//!
49//! ```rust
50//! # use bio_seq::prelude::*;
51//! // The `dna!` macro packs a static sequence with 2-bits per symbol at compile time:
52//! let s: &'static str = "hello!";
53//! let seq: &'static SeqSlice<Dna> = dna!("CGCTAGCTACGATCGCAT");
54//!
55//! // Sequences can also be copied into `Kmer`s:
56//! let kmer: Kmer<Dna, 18> = dna!("CGCTAGCTACGATCGCAT").try_into().unwrap();
57//! // or with the kmer! macro:
58//! let kmer = kmer!("CGCTAGCTACGATCGCAT");
59//!
60//! // `Seq`s can be allocated on the heap like `String`s are:
61//! let s: String = "hello!".into();
62//! let seq: Seq<Dna> = dna!("CGCTAGCTACGATCGCAT").into();
63//!
64//! // Alternatively, a `Seq` can be fallibly encoded at runtime:
65//! let seq: Seq<Dna> = "CGCTAGCTACGATCGCAT".try_into().unwrap();
66//!
67//! // `&SeqSlice` is analogous to `&str`:
68//! let slice: &str = &s[1..3];
69//! let seqslice: &SeqSlice<Dna> = &seq[2..4];
70//! ```
71//!
72//! ## Bit-packed encodings
73//!
74//! Encodings of genomic symbols are implemented as [`Codec`s](codec). This crate provides four common ones:
75//!   - [`codec::dna`]: 2-bit encoding of the four nucleotides
76//!   - [`codec::text`]: 8-bit ASCII encoding of nucleotides, meant to be compatible with plaintext sequencing data formats
77//!   - [`codec::iupac`]: 4-bit encoding of ambiguous nucleotide identities (the IUPAC ambiguity codes)
78//!   - [`codec::amino`]: 6-bit encoding of amino acids
79//!
80//! Each of these encodings is designed to facilitate common bioinformatics tasks, such as minimising k-mers and implementing succinct datastructures. The [translation] module provides traits and methods for translating between nucleotide and amino acid sequences.
81//!
82//! Custom codecs can also be implemented with the `Codec` trait and derived on specially crafted enums.
83//!
84
85#![warn(clippy::pedantic)]
86#![allow(clippy::must_use_candidate)]
87#![allow(clippy::return_self_not_must_use)]
88#![allow(clippy::module_name_repetitions)]
89// the lint doesn't seem to recognise our implementations
90#![allow(clippy::into_iter_without_iter)]
91//#[cfg(not(target_pointer_width = "64"))]
92//compile_error!("bio-seq currently only supports 64-bit platforms");
93//#![feature(simd_wasm64)]
94//#![feature(portable_simd)]
95
96use bitvec::prelude::*;
97
98type Order = Lsb0;
99type Bs = BitSlice<usize, Order>;
100type Bv = BitVec<usize, Order>;
101type Ba<const W: usize> = BitArray<[usize; W], Order>;
102
103pub mod codec;
104pub mod error;
105#[macro_use]
106pub mod kmer;
107pub mod seq;
108
109pub use bio_seq_derive::{dna, iupac};
110
111#[doc(hidden)]
112pub use bitvec::bitarr as __bio_seq_bitarr;
113
114#[doc(hidden)]
115pub use bitvec::prelude::Lsb0 as __bio_seq_Lsb0;
116
117#[cfg(feature = "translation")]
118pub mod translation;
119
120pub mod prelude {
121    pub use crate::codec::Codec;
122    pub use crate::codec::amino::Amino;
123    pub use crate::codec::dna::Dna;
124    pub use crate::codec::iupac::Iupac;
125    pub use crate::{
126        Complement, ComplementMut, Maskable, MaskableMut, Reverse, ReverseComplement,
127        ReverseComplementMut, ReverseMut,
128    };
129
130    pub use crate::kmer::Kmer;
131    pub use crate::seq::{Seq, SeqArray, SeqSlice};
132
133    #[cfg(feature = "translation")]
134    pub use crate::translation;
135
136    pub use core::str::FromStr;
137
138    pub use crate::error::ParseBioError;
139
140    pub use crate::{dna, iupac, kmer};
141
142    #[doc(hidden)]
143    pub use crate::__bio_seq_Lsb0;
144    #[doc(hidden)]
145    pub use crate::__bio_seq_bitarr;
146    #[doc(hidden)]
147    pub use crate::__bio_seq_count_words;
148}
149
150/// Nucleotide bases and sequences can be complemented
151pub trait ComplementMut {
152    fn comp(&mut self);
153}
154
155pub trait Complement: ComplementMut + ToOwned
156where
157    <Self as ToOwned>::Owned: ComplementMut,
158{
159    /// ```
160    /// use bio_seq::prelude::{Dna, Complement};
161    /// assert_eq!(Dna::A.to_comp(), Dna::T);
162    /// ````
163    fn to_comp(&self) -> <Self as ToOwned>::Owned {
164        let mut owned = self.to_owned();
165        owned.comp();
166        owned
167    }
168}
169
170//impl<T: ?Sized + ComplementMut + ToOwned> Complement for T where <T as ToOwned>::Owned: ComplementMut {}
171
172/// A reversible sequence
173pub trait ReverseMut {
174    /// Reverse sequence in place
175    fn rev(&mut self);
176}
177
178pub trait Reverse: ReverseMut + ToOwned
179where
180    <Self as ToOwned>::Owned: ReverseMut,
181{
182    fn to_rev(&self) -> <Self as ToOwned>::Owned {
183        let mut owned = self.to_owned();
184        owned.rev();
185        owned
186    }
187}
188
189//impl<T: ?Sized + ReverseMut + ToOwned> Reverse for T where <T as ToOwned>::Owned: ReverseMut {}
190
191/// A reversible sequence that can be complemented can be reverse complemented
192pub trait ReverseComplementMut: ComplementMut + ReverseMut {
193    /// Reverse complement a sequence in place
194    fn revcomp(&mut self) {
195        self.comp();
196        self.rev();
197    }
198}
199
200//impl<T: ?Sized + ReverseMut + ComplementMut> ReverseComplementMut for T {}
201
202pub trait ReverseComplement: ReverseComplementMut + ToOwned
203where
204    <Self as ToOwned>::Owned: ReverseComplementMut,
205{
206    fn to_revcomp(&self) -> <Self as ToOwned>::Owned {
207        let mut owned = self.to_owned();
208        owned.revcomp();
209        owned
210    }
211}
212
213// TODO: Marker trait to allow overriding this blanket impl
214//impl<T: ReverseComplementMut + ToOwned> ReverseComplement for T where
215//    <T as ToOwned>::Owned: ReverseComplementMut
216//{
217//}
218
219/// Some sequence types may be maskable
220pub trait MaskableMut {
221    fn mask(&mut self);
222    fn unmask(&mut self);
223}
224
225pub trait Maskable: MaskableMut + ToOwned
226where
227    <Self as ToOwned>::Owned: MaskableMut,
228{
229    fn to_mask(&self) -> <Self as ToOwned>::Owned {
230        let mut owned = self.to_owned();
231        owned.mask();
232        owned
233    }
234    fn to_unmask(&self) -> <Self as ToOwned>::Owned {
235        let mut owned = self.to_owned();
236        owned.unmask();
237        owned
238    }
239}
240
241//impl<T: MaskableMut + ToOwned> Maskable for T where <T as ToOwned>::Owned: MaskableMut {}
242
243#[macro_export]
244macro_rules! __bio_seq_count_words {
245    ($len:expr) => {{ $len.div_ceil(usize::BITS) as usize }};
246}
247
248#[cfg(test)]
249mod tests {
250    use crate::codec::dna::Dna::{A, C, G, T};
251    use crate::prelude::*;
252    use std::hash::{DefaultHasher, Hash, Hasher};
253
254    #[test]
255    fn alt_repr() {
256        assert_eq!(iupac!("-").nth(0), Iupac::X);
257    }
258
259    #[test]
260    fn into_usize() {
261        let a: usize = dna!("ACGT").to_owned().into_raw()[0];
262        assert_eq!(a, 0b11_10_01_00);
263
264        let b: usize = dna!("CGCG").to_owned().into_raw()[0];
265        assert_eq!(b, 0b10_01_10_01);
266
267        let c: usize = Seq::from(&vec![T, T]).into();
268        assert_eq!(c, 0b11_11);
269
270        let d: usize = Seq::<Dna>::from_str("TCA").unwrap().into();
271        assert_eq!(d, 0b00_01_11);
272
273        let e: usize = Seq::<Dna>::from_str("TGA").unwrap().into();
274        assert_eq!(e, 0b00_10_11);
275
276        let f: usize = Seq::from(&vec![C, G, T, A, C, G, A, T]).into();
277        assert_eq!(f, 0b11_00_10_01_00_11_10_01);
278
279        let g: usize = Seq::from(&vec![A]).into();
280        assert_eq!(g, 0b00);
281    }
282
283    #[test]
284    fn test_display_aminos() {
285        let a: Seq<Amino> = Seq::from_str("DCMNLKG*HI").unwrap();
286        assert_eq!(format!("{a}"), "DCMNLKG*HI");
287    }
288    #[test]
289    fn test_display_dna() {
290        let seq = Seq::from(&vec![A, C, G, T, T, A, T, C]);
291        assert_eq!(format!("{}", &seq), "ACGTTATC");
292        assert_eq!(format!("{}", dna!("ACGT")), "ACGT");
293    }
294
295    #[test]
296    fn iterate_bases() {
297        let seq = dna!("ACGTACGT");
298        assert_eq!(
299            seq.into_iter().collect::<Vec<Dna>>(),
300            vec![A, C, G, T, A, C, G, T]
301        );
302    }
303
304    #[test]
305    fn from_string() {
306        let seq = Seq::<Dna>::from_str("ACGTACGT").unwrap();
307        assert_eq!(
308            seq.into_iter().collect::<Vec<Dna>>(),
309            vec![A, C, G, T, A, C, G, T]
310        );
311    }
312    #[test]
313    fn rev_seq() {
314        let seq = dna!("ACGTACGT");
315        assert_eq!(
316            seq.rev_iter().collect::<Vec<Dna>>(),
317            vec![T, G, C, A, T, G, C, A]
318        );
319        assert_eq!(
320            seq.to_rev().into_iter().collect::<Vec<Dna>>(),
321            vec![T, G, C, A, T, G, C, A]
322        );
323        assert_eq!(
324            iupac!("GN-").rev_iter().collect::<Vec<Iupac>>(),
325            vec![Iupac::X, Iupac::N, Iupac::G]
326        );
327
328        assert_eq!(
329            Seq::<Amino>::try_from("DCMNLKGHI")
330                .unwrap()
331                .to_rev()
332                .into_iter()
333                .collect::<Vec<Amino>>(),
334            vec![
335                Amino::I,
336                Amino::H,
337                Amino::G,
338                Amino::K,
339                Amino::L,
340                Amino::N,
341                Amino::M,
342                Amino::C,
343                Amino::D
344            ]
345        );
346    }
347    #[test]
348    fn iterate_kmers() {
349        let seq = dna!("ACGTAAGGGG");
350        for (kmer, answer) in seq
351            .kmers::<4>()
352            .zip(["ACGT", "CGTA", "GTAA", "TAAG", "AAGG", "AGGG", "GGGG"])
353        {
354            assert_eq!(format!("{}", kmer), answer);
355        }
356    }
357
358    #[test]
359    fn iterate_kmer8() {
360        let seq = dna!("AAAACCCCGGGG");
361        for (kmer, answer) in seq
362            .kmers::<8>()
363            .zip(["AAAACCCC", "AAACCCCG", "AACCCCGG", "ACCCCGGG", "CCCCGGGG"])
364        {
365            assert_eq!(format!("{}", kmer), answer);
366        }
367    }
368
369    #[test]
370    fn iterate_kmer4() {
371        let seq = dna!("AAAACCCCGGGGTTTT");
372        for (kmer, answer) in seq.kmers::<4>().zip([
373            "AAAA", "AAAC", "AACC", "ACCC", "CCCC", "CCCG", "CCGG", "CGGG", "GGGG", "GGGT", "GGTT",
374            "GTTT", "TTTT",
375        ]) {
376            assert_eq!(format!("{}", kmer), answer);
377        }
378    }
379
380    #[test]
381    fn iupac_bitwise_ops() {
382        let s1: &SeqSlice<Iupac> = iupac!("AS-GYTNA");
383        let s2: &SeqSlice<Iupac> = iupac!("ANTGCAT-");
384
385        let s3: &SeqSlice<Iupac> = iupac!("ACGTSWKM");
386        let s4: &SeqSlice<Iupac> = iupac!("WKMSTNNA");
387
388        assert_eq!(s1 | s2, iupac!("ANTGYWNA"));
389        assert_eq!(s3 & s4, iupac!("A----WKA"));
390    }
391    #[test]
392    fn min_sequence() {
393        let seq = dna!("GCTCGATCGTAAAAAATCGTATT");
394
395        let minimised = seq.kmers::<8>().min().unwrap();
396        assert_eq!(minimised, Kmer::try_from(dna!("GTAAAAAA")).unwrap());
397    }
398
399    #[test]
400    fn hash_minimiser() {
401        use core::cmp::min;
402
403        fn hash(seq: &SeqSlice<Dna>) -> u64 {
404            if seq == dna!("GGCTCTCTCTCCTCCA") {
405                0
406            } else {
407                1
408            }
409        }
410
411        let seq =
412            dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCTAAAAAAAAAAAAAAAAGGGGTGTGTGGGTTGTGGAGGAGAGAGAGCC");
413
414        //        let minimised = seq.kmers::<16>().map(hash).min().unwrap();
415
416        let (minimiser_rc, min_hash_rc) = seq
417            .to_revcomp()
418            .kmers::<16>()
419            .map(|kmer| (kmer, hash(&kmer)))
420            .min_by_key(|&(_, hash)| hash)
421            .unwrap();
422
423        let (minimiser, min_hash) = seq
424            .kmers::<16>()
425            .map(|kmer| (kmer, hash(&kmer)))
426            .min_by_key(|&(_, hash)| hash)
427            .unwrap();
428
429        //        let x = min(min_hash, min_hash_rc);
430
431        let (canonical_minimiser, canonical_hash) = seq
432            .kmers::<16>()
433            .map(|kmer| {
434                let canonical_hash = min(hash(&kmer), hash(&kmer.to_revcomp()));
435                (kmer, canonical_hash)
436            })
437            .min_by_key(|&(_, hash)| hash)
438            .unwrap();
439
440        println!(
441            "{minimiser_rc} {min_hash_rc}\n{minimiser} {min_hash}\n{canonical_minimiser} {canonical_hash}"
442        );
443        assert_eq!(min_hash_rc, canonical_hash);
444        assert_ne!(min_hash, canonical_hash);
445        assert_eq!(minimiser_rc, canonical_minimiser.to_revcomp());
446    }
447
448    #[test]
449    fn hash_characteristics() {
450        fn hash<T: Hash>(chunk: T) -> u64 {
451            let mut hasher = DefaultHasher::new();
452            chunk.hash(&mut hasher);
453            hasher.finish()
454        }
455
456        let s1 = dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCT");
457        let s2 = dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCTA");
458
459        let q1: Seq<Dna> = dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCT").into();
460        let q2: Seq<Dna> = dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCTA").into();
461
462        let s3 = dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
463        let s4 = dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
464
465        let q3 = dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
466        let q4 = dna!("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
467
468        let l3: &SeqSlice<Dna> = &q3;
469        let l3_a: &SeqSlice<Dna> = &q4[1..];
470        let l3_b: &SeqSlice<Dna> = &q4[..32];
471        let l4: &SeqSlice<Dna> = &q4;
472
473        let k1: Kmer<Dna, 32, u64> = s1.try_into().unwrap();
474        let k1_a: Kmer<Dna, 32, u64> = s1.try_into().unwrap();
475
476        let k3: Kmer<Dna, 32, u64> = s3.try_into().unwrap();
477
478        assert_eq!(hash(&l3), hash(q3));
479        assert_eq!(hash(&l3), hash(&l3_a));
480        assert_eq!(hash(&l3_a), hash(&l3_b));
481
482        assert_eq!(hash(&s2), hash(&q2));
483
484        assert_eq!(hash(&s1), hash(s1));
485        assert_eq!(hash(s2), hash(&s2));
486        assert_ne!(hash(&s4), hash(&s3));
487
488        assert_ne!(hash(&l3), hash(&l4));
489        assert_ne!(hash(&l3_a), hash(&l4));
490
491        assert_ne!(hash(&q2), hash(&q1));
492
493        assert_eq!(hash(q3), hash(s3));
494        assert_eq!(hash(s1), hash(&q1));
495        assert_ne!(hash(s3), hash(s4));
496
497        assert_ne!(hash(&k3), hash(&k1));
498        assert_eq!(hash(&k1_a), hash(&k1));
499        assert_eq!(hash(s1), hash(&k1));
500    }
501
502    #[test]
503    fn sequence_type_hashes() {
504        fn hash<T: Hash>(chunk: &T) -> u64 {
505            let mut hasher = DefaultHasher::new();
506            chunk.hash(&mut hasher);
507            hasher.finish()
508        }
509
510        let seq_arr: &SeqSlice<Dna> = dna!("AGCGCTAGTCGTACTGCCGCATCGCTAGCGCT");
511        let seq: Seq<Dna> = seq_arr.into();
512        let seq_slice: &SeqSlice<Dna> = &seq;
513        let kmer: Kmer<Dna, 32, u64> = seq_arr.try_into().unwrap();
514
515        assert_eq!(hash(&seq_arr), hash(&seq));
516        assert_eq!(hash(&seq), hash(&seq_slice));
517        assert_eq!(hash(&seq_slice), hash(&kmer));
518    }
519
520    #[test]
521    fn nth_chars() {
522        assert_eq!(iupac!("ACGTRYSWKMBDHVN-").nth(0), Iupac::A);
523        assert_ne!(iupac!("ACGTRYSWKMBDHVN-").nth(0), Iupac::C);
524        assert_eq!(iupac!("ACGTRYSWKMBDHVN-").nth(15), Iupac::X);
525        assert_eq!(iupac!("ACGTRYSWKMBDHVN-").nth(3), Iupac::from(Dna::T));
526        assert_ne!(iupac!("ACGTRYSWKMBDHVN-").nth(3), Iupac::from(Dna::G));
527
528        assert_eq!(
529            Seq::<Amino>::try_from("DCMNLKGHI").unwrap().nth(1),
530            Amino::C
531        );
532        assert_ne!(
533            Seq::<Amino>::try_from("DCMNLKGHI").unwrap().nth(7),
534            Amino::I
535        );
536    }
537
538    #[test]
539    fn colexicographic_order() {
540        for (i, e) in ["AA", "CA", "GA", "TA", "AC", "CC", "GC", "TC"]
541            .iter()
542            .enumerate()
543        {
544            assert_eq!(format!("{}", Kmer::<Dna, 2>::from(i)), format!("{}", e));
545            assert_eq!(Kmer::<Dna, 2>::from(i), *e);
546        }
547    }
548
549    #[test]
550    fn sequence_type_equality() {
551        let raw_a = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAA";
552        let raw_b = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
553        let raw_c = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
554        let raw_d = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAAA";
555
556        assert_eq!(raw_a.len(), 63);
557        assert_eq!(raw_b.len(), 64);
558        assert_eq!(raw_d.len(), 65);
559
560        assert_eq!(raw_c, raw_b);
561        assert_eq!(raw_c, &raw_b[..]);
562
563        assert_ne!(raw_b, raw_d);
564        assert_ne!(raw_a, raw_b);
565
566        // Seq
567
568        let seq_a: Seq<Dna> = raw_a.try_into().unwrap();
569        let seq_b: Seq<Dna> = raw_b.try_into().unwrap();
570        let seq_c: Seq<Dna> = raw_c.try_into().unwrap();
571        let seq_d: Seq<Dna> = raw_d.try_into().unwrap();
572
573        assert_eq!(seq_a.len(), raw_a.len());
574        assert_eq!(seq_d.len(), raw_d.len());
575
576        assert_eq!(seq_c, seq_b);
577        assert_eq!(seq_c, &seq_b);
578
579        assert_ne!(seq_a, &seq_b);
580        assert_ne!(seq_a, seq_b);
581        assert_ne!(seq_c, seq_d);
582
583        // SeqSlice
584
585        let slice_a: &SeqSlice<Dna> = &seq_a;
586        let slice_b: &SeqSlice<Dna> = &seq_b;
587        let slice_c: &SeqSlice<Dna> = &seq_c;
588        let slice_d: &SeqSlice<Dna> = &seq_d;
589
590        assert_eq!(slice_a.len(), raw_a.len());
591        assert_eq!(slice_d.len(), raw_d.len());
592
593        assert_eq!(slice_c, slice_b);
594        assert_eq!(slice_c, &slice_b[..]);
595
596        assert_ne!(slice_a, slice_b);
597        assert_ne!(slice_c, slice_d);
598        assert_ne!(slice_c, &slice_d[..]);
599
600        // SeqArray references
601
602        let array_a = dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAA");
603        let array_b = dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA");
604        let array_c: &'static SeqSlice<Dna> =
605            dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA");
606        let array_d: &'static SeqSlice<Dna> =
607            dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAAA");
608
609        assert_eq!(array_a.len(), raw_a.len());
610        assert_eq!(array_d.len(), raw_d.len());
611
612        assert_eq!(array_c, array_b);
613
614        assert_ne!(array_a, array_b);
615        assert_ne!(array_c, array_d);
616
617        // Kmers
618
619        let kmer_ax_32: Kmer<Dna, 32, u64> = kmer!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAG", u64);
620        let kmer_bx_32 = Kmer::<Dna, 32, u64>::from_str(&raw_b[..32]).unwrap();
621
622        let kmer_x_32: Kmer<Dna, 32, u64> = kmer!("AATTGTGGGTTCGTCTGCGCCTCCGCCCTTAG", u64);
623
624        assert_eq!(kmer_ax_32.len(), 32);
625
626        assert_eq!(kmer_ax_32, kmer_bx_32);
627        assert_ne!(kmer_ax_32, kmer_x_32);
628
629        let kmer_b_64 = Kmer::<Dna, 64, u128>::from_str(&raw_b).unwrap();
630        let kmer_cx_64 = Kmer::<Dna, 64, u128>::from_str(&raw_d[..64]).unwrap();
631        let kmer_dx_64 = Kmer::<Dna, 64, u128>::from_str(&raw_d[1..]).unwrap();
632
633        assert_eq!(kmer_cx_64.len(), 64);
634
635        assert_eq!(kmer_b_64, kmer_cx_64);
636        assert_ne!(kmer_b_64, kmer_dx_64);
637
638        // Cross-type equality:
639
640        assert_eq!(seq_c, slice_b);
641        assert_eq!(seq_c, *array_b);
642        //        assert_eq!(seq_c, kmer_b_64);
643
644        assert_eq!(&seq_c, slice_b);
645        assert_eq!(&seq_c, array_b);
646        //        assert_eq!(&seq_c, kmer_b_64);
647
648        assert_eq!(slice_c, seq_b);
649        assert_eq!(slice_c, &seq_b);
650        assert_eq!(&slice_c, array_b);
651        //        assert_eq!(slice_c, kmer_b_64);
652
653        assert_eq!(array_c, &seq_b);
654        assert_eq!(array_c, seq_b);
655        assert_eq!(array_c, slice_b);
656        //        assert_eq!(array_c, kmer_b_64);
657
658        //        assert_eq!(kmer_b_64, &seq_c);
659        //        assert_eq!(kmer_b_64, seq_c);
660        //        assert_eq!(kmer_b_64, slice_c);
661        //        assert_eq!(kmer_b_64, array_c);
662
663        // Cross-type inequality (shorter):
664
665        assert_ne!(&seq_a, slice_b);
666        assert_ne!(&seq_a, array_b);
667        assert_ne!(seq_a, slice_b);
668        assert_ne!(seq_a, array_b);
669        //        assert_ne!(seq_a, kmer_b_64);
670        //        assert_ne!(&seq_a, kmer_b_64);
671
672        assert_ne!(slice_a, &seq_b);
673        assert_ne!(slice_a, seq_b);
674        assert_ne!(&slice_a, array_b);
675        //        assert_ne!(slice_a, kmer_b_64);
676
677        assert_ne!(array_a, &seq_b);
678        assert_ne!(array_a, seq_b);
679        assert_ne!(array_a, slice_b);
680        //        assert_ne!(array_a, kmer_b_64);
681
682        //        assert_ne!(kmer_b_64, &seq_a);
683        //        assert_ne!(kmer_b_64, seq_a);
684        //        assert_ne!(kmer_b_64, slice_a);
685        //        assert_ne!(kmer_b_64, array_a);
686
687        // Cross-type inequality (longer):
688
689        assert_ne!(seq_d, slice_b);
690        assert_ne!(seq_d, array_b);
691        //        assert_ne!(seq_d, kmer_b_64);
692        assert_ne!(&seq_d, slice_b);
693        assert_ne!(&seq_d, array_b);
694        //        assert_ne!(&seq_d, kmer_b_64);
695
696        assert_ne!(slice_d, &seq_b);
697        assert_ne!(slice_d, seq_b);
698        assert_ne!(&slice_d, array_b);
699        //        assert_ne!(slice_d, kmer_b_64);
700
701        assert_ne!(array_d, &seq_b);
702        assert_ne!(array_d, seq_b);
703
704        assert_ne!(slice_b, array_d);
705        assert_ne!(array_d, slice_b);
706        //        assert_ne!(array_d, kmer_b_64);
707
708        //        assert_ne!(kmer_b_64, &seq_d);
709        //        assert_ne!(kmer_b_64, seq_d);
710        //        assert_ne!(kmer_b_64, slice_d);
711        //        assert_ne!(kmer_b_64, array_d);
712    }
713}
714
715#[cfg(test)]
716#[cfg(target_arch = "wasm32")]
717mod wasm_tests {
718    use crate::prelude::*;
719    use wasm_bindgen_test::*;
720
721    #[wasm_bindgen_test]
722    fn sequence_type_equality() {
723        let raw_a = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAA";
724        let raw_b = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
725        let raw_c = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
726        let raw_d = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAAA";
727
728        assert_eq!(raw_a.len(), 63);
729        assert_eq!(raw_b.len(), 64);
730        assert_eq!(raw_d.len(), 65);
731
732        assert_eq!(raw_c, raw_b);
733        assert_eq!(raw_c, &raw_b[..]);
734
735        assert_ne!(raw_b, raw_d);
736        assert_ne!(raw_a, raw_b);
737
738        // Seq
739
740        let seq_a: Seq<Dna> = raw_a.try_into().unwrap();
741        let seq_b: Seq<Dna> = raw_b.try_into().unwrap();
742        let seq_c: Seq<Dna> = raw_c.try_into().unwrap();
743        let seq_d: Seq<Dna> = raw_d.try_into().unwrap();
744
745        assert_eq!(seq_a.len(), raw_a.len());
746        assert_eq!(seq_d.len(), raw_d.len());
747
748        assert_eq!(seq_c, seq_b);
749        assert_eq!(seq_c, &seq_b);
750
751        assert_ne!(seq_a, &seq_b);
752        assert_ne!(seq_a, seq_b);
753        assert_ne!(seq_c, seq_d);
754
755        // SeqSlice
756
757        let slice_a: &SeqSlice<Dna> = &seq_a;
758        let slice_b: &SeqSlice<Dna> = &seq_b;
759        let slice_c: &SeqSlice<Dna> = &seq_c;
760        let slice_d: &SeqSlice<Dna> = &seq_d;
761
762        assert_eq!(slice_a.len(), raw_a.len());
763        assert_eq!(slice_d.len(), raw_d.len());
764
765        assert_eq!(slice_c, slice_b);
766        assert_eq!(slice_c, &slice_b[..]);
767
768        assert_ne!(slice_a, slice_b);
769        assert_ne!(slice_c, slice_d);
770        assert_ne!(slice_c, &slice_d[..]);
771
772        // SeqArray references
773
774        let array_a = dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAA");
775        let array_b = dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA");
776        let array_c: &'static SeqSlice<Dna> =
777            dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA");
778        let array_d: &'static SeqSlice<Dna> =
779            dna!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAAA");
780
781        assert_eq!(array_a.len(), raw_a.len());
782        assert_eq!(array_d.len(), raw_d.len());
783
784        assert_eq!(array_c, array_b);
785
786        assert_ne!(array_a, array_b);
787        assert_ne!(array_c, array_d);
788
789        assert_eq!(seq_c, slice_b);
790        assert_eq!(seq_c, *array_b);
791
792        assert_eq!(&seq_c, slice_b);
793        assert_eq!(&seq_c, array_b);
794
795        assert_eq!(slice_c, seq_b);
796        assert_eq!(slice_c, &seq_b);
797        assert_eq!(&slice_c, array_b);
798
799        assert_eq!(array_c, &seq_b);
800        assert_eq!(array_c, seq_b);
801        assert_eq!(array_c, slice_b);
802        // Cross-type inequality (shorter):
803
804        assert_ne!(&seq_a, slice_b);
805        assert_ne!(&seq_a, array_b);
806        assert_ne!(seq_a, slice_b);
807        assert_ne!(seq_a, array_b);
808        assert_ne!(slice_a, &seq_b);
809        assert_ne!(slice_a, seq_b);
810        assert_ne!(&slice_a, array_b);
811
812        assert_ne!(array_a, &seq_b);
813        assert_ne!(array_a, seq_b);
814        assert_ne!(array_a, slice_b);
815        // Cross-type inequality (longer):
816
817        assert_ne!(seq_d, slice_b);
818        assert_ne!(seq_d, array_b);
819        assert_ne!(&seq_d, slice_b);
820        assert_ne!(&seq_d, array_b);
821
822        assert_ne!(slice_d, &seq_b);
823        assert_ne!(slice_d, seq_b);
824        assert_ne!(&slice_d, array_b);
825
826        assert_ne!(array_d, &seq_b);
827        assert_ne!(array_d, seq_b);
828
829        assert_ne!(slice_b, array_d);
830        assert_ne!(array_d, slice_b);
831    }
832
833    #[wasm_bindgen_test]
834    fn wasm_kmers() {
835        //let raw_a = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAA";
836        let raw_b = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
837        //let raw_c = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAA";
838        let raw_d = "AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAGTACTATAGGACGATCAGCACCATAAGAACAAAA";
839
840        let kmer_ax_32: Kmer<Dna, 32, u64> = kmer!("AATTGTGGGTTCGTCTGCGGCTCCGCCCTTAG", u64);
841        let kmer_bx_32 = Kmer::<Dna, 32, u64>::from_str(&raw_b[..32]).unwrap();
842
843        let kmer_x_32: Kmer<Dna, 32, u64> = kmer!("AATTGTGGGTTCGTCTGCGCCTCCGCCCTTAG", u64);
844
845        assert_eq!(kmer_ax_32.len(), 32);
846
847        assert_eq!(kmer_ax_32, kmer_bx_32);
848        assert_ne!(kmer_ax_32, kmer_x_32);
849
850        let kmer_b_64 = Kmer::<Dna, 64, u128>::from_str(&raw_b).unwrap();
851        let kmer_cx_64 = Kmer::<Dna, 64, u128>::from_str(&raw_d[..64]).unwrap();
852        let kmer_dx_64 = Kmer::<Dna, 64, u128>::from_str(&raw_d[1..]).unwrap();
853
854        assert_eq!(kmer_cx_64.len(), 64);
855
856        assert_eq!(kmer_b_64, kmer_cx_64);
857        assert_ne!(kmer_b_64, kmer_dx_64);
858    }
859
860    /*
861        #[wasm_bindgen_test]
862        fn test_splice() {
863            let mut seq: Seq<Dna> = dna!("TCAGCATCGATCAATCG").into();
864            let insertion = dna!("CCCCC");
865
866            seq.splice(4..6, insertion);
867            assert_eq!(&seq, dna!("TCAGCCCCCTCGATCAATCG"));
868
869            seq.splice(1..=1, dna!("AAA"));
870            assert_eq!(&seq, dna!("TAAAAGCCCCCTCGATCAATCG"));
871
872            seq.splice(10.., dna!("TTTT"));
873            assert_eq!(&seq, dna!("TAAAAGCCCCTTTT"));
874        }
875    */
876}