1pub mod digest;
37
38pub use digest::{
40 ASCII_ALPHABET,
41 Alphabet,
43 AlphabetGuesser,
44 AlphabetType,
45 DNA_2BIT_ALPHABET,
46 DNA_3BIT_ALPHABET,
47 DNA_IUPAC_ALPHABET,
48 FaiMetadata,
49 FastaStreamHasher,
51 PROTEIN_ALPHABET,
52 ParseOptions,
53 SeqColDigestLvl1,
54 SequenceCollection,
55 SequenceCollectionMetadata,
56 SequenceCollectionRecord,
57 SequenceEncoder,
58 SequenceMetadata,
59 SequenceRecord,
61 canonicalize_json,
62 decode_string_from_bytes,
63 decode_substring_from_bytes,
64 digest_fasta_bytes,
66 digest_sequence,
67 digest_sequence_with_description,
68 encode_sequence,
70 guess_alphabet,
71 load_fasta_bytes,
72 lookup_alphabet,
73 md5,
74 parse_fasta_header,
75 parse_rgsi_line,
76 sha512t24u,
78};
79
80#[cfg(feature = "filesystem")]
87pub mod fasta;
88
89#[cfg(feature = "filesystem")]
92pub mod collection;
93
94#[cfg(feature = "filesystem")]
96pub mod store;
97
98#[cfg(feature = "filesystem")]
100mod hashkeyable;
101#[cfg(feature = "filesystem")]
102mod utils;
103
104#[cfg(feature = "filesystem")]
106pub use collection::{
107 SequenceCollectionExt, SequenceCollectionRecordExt, SequenceMetadataExt, SequenceRecordExt,
108 read_rgsi_file,
109};
110#[cfg(feature = "filesystem")]
111pub use fasta::{FaiRecord, compute_fai, digest_fasta, load_fasta};
112
113#[cfg(all(test, feature = "filesystem"))]
118mod tests {
119 use super::*;
120
121 use std::time::Instant;
122 use store::RefgetStore;
123 use tempfile::tempdir;
124
125 #[test]
126 #[ignore]
127 fn test_loading_large_fasta_file() {
128 let fasta_path =
130 std::env::var("FASTA_PATH").expect("FASTA_PATH environment variable not set");
131 println!("Loading large FASTA file: {}", &fasta_path);
132
133 println!("Adding sequences from FASTA file...");
135 let start = Instant::now();
136 let mut store = RefgetStore::in_memory();
137 store
138 .add_sequence_collection_from_fasta(&fasta_path)
139 .unwrap();
140 let duration = start.elapsed();
141 println!("Time taken to load: {:.2?}", duration);
142
143 let mut store2 = RefgetStore::in_memory();
144 store2.disable_encoding(); store2
146 .add_sequence_collection_from_fasta(&fasta_path)
147 .unwrap();
148
149 let sequences: Vec<_> = store.sequence_digests().collect();
151 assert!(!sequences.is_empty(), "No sequences found in the store");
152
153 println!("Look up a sequence by digest...");
155 let digest = &sequences[0];
156 let digest_str = String::from_utf8(digest.to_vec()).expect("Invalid ASCII data");
157
158 println!("Retrieving a substring of sequence named: {:?}", digest_str);
160 let start_basic = 0;
161 let end_basic = 3;
162 let substring = store.get_substring(digest, start_basic, end_basic);
163 assert!(
164 substring.is_ok(),
165 "Failed to retrieve substring with name: {:?}",
166 digest_str
167 );
168 println!("Retrieved substring: {:?}", substring.unwrap());
169
170 let start = 148 * 70;
172 let end = 148 * 70 + 70;
173 let substring2 = store.get_substring(digest, start, end);
174 assert!(
175 substring2.is_ok(),
176 "Failed to retrieve substring with name: {:?}",
177 digest_str
178 );
179
180 let substring3 = store2.get_substring(digest, start, end);
181 assert_eq!(substring2.as_ref().unwrap(), substring3.as_ref().unwrap());
182 println!("Retrieved substring: {:?}", substring2.unwrap());
183 println!("Retrieved substring: {:?}", substring3.unwrap());
184 }
185
186 #[test]
187 fn test_get_sequence_encoded() {
188 let temp_dir = tempdir().expect("Failed to create temporary directory");
189 let temp_path = temp_dir.path();
190 let mut store = RefgetStore::in_memory();
192 let fasta_path = "../tests/data/fasta/base.fa.gz";
193 let temp_fasta = temp_path.join("base.fa.gz");
194 std::fs::copy(fasta_path, &temp_fasta).expect("Failed to copy base.fa.gz to tempdir");
195
196 store
198 .add_sequence_collection_from_fasta(temp_fasta)
199 .unwrap();
200 println!("Listing sequences in the store...");
201 let digest = "iYtREV555dUFKg2_agSJW6suquUyPpMw"; let digest_str = String::from_utf8(digest.as_bytes().to_vec()).expect("Invalid ASCII data");
203
204 println!("Retrieving a substring of sequence named: {:?}", digest_str);
206 let start = 2;
207 let end = start + 5;
208 let substring = store.get_substring(digest, start, end);
209 assert!(
210 substring.is_ok(),
211 "Failed to retrieve substring with name: {:?}",
212 digest_str
213 );
214 println!("Retrieved substring: {:?}", substring.as_ref().unwrap());
215 assert_eq!(substring.unwrap(), "GGGGA");
216
217 println!("Retrieving a substring of sequence named: {:?}", digest_str);
218 let start = 3;
219 let end = start + 2;
220 let substring = store.get_substring(digest, start, end);
221 assert!(
222 substring.is_ok(),
223 "Failed to retrieve substring with name: {:?}",
224 digest_str
225 );
226 println!("Retrieved substring: {:?}", substring.as_ref().unwrap());
227 assert_eq!(substring.unwrap(), "GG");
228 }
229}