1pub mod iupac;
4pub mod modtype;
5pub mod motif;
6pub mod read;
7pub mod sequence;
8pub mod strand;
9
10pub use iupac::IupacBase;
11pub use modtype::ModType;
12pub use motif::Motif;
13pub use strand::Strand;
14
15use crate::sequence::Sequence;
16
17pub fn find_motif_indices_in_sequence(sequence: &Sequence, motif: &Motif) -> Vec<usize> {
18 let motif_bases = motif.sequence.clone();
27 let motif_len = motif_bases.len();
28 let mut indices = Vec::new();
29
30 if sequence.len() < motif_len {
31 return indices;
32 }
33
34 for i in 0..=(sequence.len() - motif_len) {
35 let mut matches = true;
36
37 for (j, &motif_base) in motif_bases.iter().enumerate() {
38 let seq_base = sequence[i + j];
39 if (seq_base.mask() & motif_base.mask()) == 0 {
40 matches = false;
41 break;
42 }
43 }
44
45 if matches {
46 indices.push(i + motif.mod_position as usize);
47 }
48 }
49
50 indices
51}
52
53#[cfg(test)]
54mod tests {
55 use super::*;
56
57 use crate::read::{MethBase, MethQual, Read};
58 use noodles_fastq::{self as fastq, record::Definition};
59
60 #[test]
61 fn test_find_motif_indices_in_contig() {
62 let contig = Sequence::from_str("GGATCTCCATGATC").unwrap();
63 let contig2 = Sequence::from_str("TGGACGATCCCGATC").unwrap();
64 let motif1 = Motif::new("GATC", "m", 3).unwrap();
65 let motif2 = Motif::new("RGATCY", "m", 4).unwrap();
66 let motif3 = Motif::new("GATC", "a", 1).unwrap();
67 let motif4 = Motif::new("GGANNNTCC", "a", 2).unwrap();
68
69 println!("{}", &motif4.to_regex());
70 assert_eq!(
71 find_motif_indices_in_sequence(&contig, &motif1),
72 vec![4, 13]
73 );
74 assert_eq!(find_motif_indices_in_sequence(&contig, &motif2), vec![4]);
75
76 assert_eq!(
77 find_motif_indices_in_sequence(&contig2, &motif3),
78 vec![6, 12]
79 );
80 assert_eq!(
81 find_motif_indices_in_sequence(&contig2, &motif3.reverse_complement()),
82 vec![7, 13]
83 );
84
85 assert_eq!(find_motif_indices_in_sequence(&contig2, &motif4), vec![3])
86 }
87
88 #[test]
89 fn test_find_motif_indices_in_read() {
90 let description = "MM:Z:A+a.,0,0,0,0,0,2,0,9,0,0,0,0,1,0,0,0,0,2,0,0,0,0,16,0,0,0,4,0,0,0,1,11,1,0,1,0,0,0,0,0,4,0,0,0,2,0,10,6,5,11,0,11,1,6,0,0,0,0,0,2,3,12,0,4,16,0,0,1,0,1,4,0,0,0,0,0;C+21839.,6,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,2,11,0,0,0,0,6,0,5,4,2,9,0,1,3,0,0,0,5,2,1,11,1,0,3,0,0;C+m.,6,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,2,11,0,0,0,0,6,0,5,4,2,9,0,1,3,0,0,0,5,2,1,11,1,0,3,0,0; ML:B:C,204,119,22,36,26,40,16,20,15,25,97,104,150,20,112,20,16,34,81,66,52,12,30,67,20,155,15,21,28,20,85,22,13,14,13,19,13,17,24,12,12,14,30,13,20,20,147,16,17,22,36,41,37,163,29,14,71,28,58,12,12,14,14,12,12,15,64,25,137,42,19,34,29,23,231,46,6,16,17,30,9,41,40,25,27,26,14,179,86,24,8,23,42,15,48,12,16,13,15,14,10,16,162,21,9,3,16,14,8,31,3,2,7,4,6,21,3,15,12,19,20,12,83,45,12,18,10,26,17,33,68,70,49,53,23,13,23,21,48,40,83,5,5,5,5,2,11,13,29,60,7,24,12,16,2,3,14,14,44,12,20,13,13,9,14,6,10,6,7,4,2,34";
91 let sequence = "ACTATAAATCATTTATTTTATATTTAATGTAAACATTTCTTCACCTTCTAAGGTGCCACAAAGATAATCATTAGCATCTACCCGTCCTACACCTGCTGGTGTACCTGTAAATATGACATCTCCTTTTTTTAACATAAAGTATTGAGAAACATACGATATAAGCTCGTCTATTTTCCACAACATTAAATTTGTATTTCCCTTTTGTACTATTTCTTCATTTTTTAACAATGAAAAATTTATATTATCTACAGAAGAAAATTTACTTTTTGGCAACCATTTACCTATTACTGCCGCACCATCAAAACCTTTTGCTTTTTCCCAAGGTAATCCTTTTTCTTTTAACTTAGATTGAAGATCACGTGCTGTAAAGTCTATTCCTAGACCAATTTCATCATAATAATTGGCAGCAAATTTTTGTTCTATATGTTTTCCTACTTTTTTAATTTTAACTAAAACCTCTACTTCATAATGTATGTTATTAGAGAATTCTGGTATGTAAAAATCTTGTTCTTTTGGTAGCACAGCAGAATCTGGCTTAATAAAAACAACAGGATCTGTAGGCTTTTCATTGGCCAATTCTTTAATATGATCTGTGTAATTACGACCTATACAAATTATCTTCATAATAAACTTTATTAGGTTATTGCTTTTCTTTAGACAATTTTAAACTTACCGCTGGTACTAAATGCTTGGTATTATCTTTAGACTTTACCTTAAATCTATCAT";
92 let quals = "JIGSSGIH=GLLMLJFB;=>@?HSSNKJSSMKIKDCD?>??<>?JMSJJGGFSSLLJSSSOIJLSONMJSSRSSJSSSSLSLIIHFGSQSLSSSLSSRSSSSKSQMIHSSISSSSLIQSLLK88:1111EDMQIIOROPMS::22336C@CICCBCDSSNJJEG?;;:<;<>@;8B44HSSSSSPPISHHILGKJQAMLRPJJ:SLMSMQS99999KHKISSSSMJIKSIMSSSSSSSSSIKGJJMSHM50))))GSMSLSS>;;<PSSSSJMPMSSSSNLSSSKIKNSSNKKKONMJSSQLSSB@PKHGJFBSSSJSSQDGSSSQKKSSSSSSSSMMNJSSSSSSSSSSORISSSSSSSSSSSOSSSSSSMSKSSNSRQSSSSSSSSNISSSSSNSSSSMJKNSSQMMPSSSSSSSSSPSNSSSOJJMSSSSJJMKSSSSSSSSSSSSSSSSSSSSSQSSSSSNSSSSSOSNMSHSGFIDMIIFFIG@@ILSMINSLKSSSSLGSBSNSRPSSSSSPPSFCA@@CLSSSSSKNSSHEEEHHSSSSRNLLOSSSSSSSPSSSNSSSSSSRLOLILSSSSSSSQPSSSHJMKJSSSSRSSSIINSLNSRSSSMLOCCHILKJKJKLOSSSSSSSSSSSPSSSSPSSMIAGSA>EEE;AAABBI@>>==@SLLKIKEIHLLLOJHHKMSHISSHQSKQNSPSJFD>@>>>950///.-./(,*((&&&";
93 let fastq = fastq::Record::new(
94 Definition::new("@8c32d39d-6b88-480f-ac6d-e8061e4d674b", description),
95 sequence,
96 quals,
97 );
98
99 let read = Read::from_fastq_record(fastq).unwrap();
100 let motif = Motif::new("ACTATA", "a", 0).unwrap();
101
102 let indices = find_motif_indices_in_sequence(read.get_sequence(), &motif);
103 assert_eq!(indices, vec![0]);
104 }
105 #[test]
106 fn test_read_construction_simple() {
107 let description = "MM:Z:A+a.,0,1; ML:B:C,204,255";
108 let sequence = "GGGCGGATCAGATC";
109 let quals = "JIGSSGIH=";
110 let fastq = fastq::Record::new(
111 Definition::new("8c32d39d-6b88-480f-ac6d-e8061e4d674b", description),
112 sequence,
113 quals,
114 );
115
116 let read = Read::from_fastq_record(fastq).unwrap();
117 let modifcations = read.get_modifications();
118 let motif = Motif::new("GATC", "a", 1).unwrap();
119
120 assert_eq!(
121 *modifcations.0.get(&6).unwrap(),
122 MethBase {
123 base: ModType::SixMA,
124 quality: MethQual(204)
125 }
126 );
127 assert_eq!(modifcations.0.get(&9), None);
128 assert_eq!(
129 *modifcations.0.get(&11).unwrap(),
130 MethBase {
131 base: ModType::SixMA,
132 quality: MethQual(255)
133 }
134 );
135 let indices = find_motif_indices_in_sequence(read.get_sequence(), &motif);
136 assert_eq!(indices, vec![6, 11]);
137 }
138}