1pub mod digest;
37
38pub use digest::{
40 ASCII_ALPHABET,
41 Alphabet,
43 AlphabetGuesser,
44 AlphabetType,
45 DNA_2BIT_ALPHABET,
46 DNA_3BIT_ALPHABET,
47 DNA_IUPAC_ALPHABET,
48 FaiMetadata,
49 FastaStreamHasher,
51 PROTEIN_ALPHABET,
52 ParseOptions,
53 SeqColDigestLvl1,
54 SequenceCollection,
55 SequenceCollectionMetadata,
56 SequenceCollectionRecord,
57 SequenceEncoder,
58 SequenceMetadata,
59 SequenceRecord,
61 canonicalize_json,
62 decode_string_from_bytes,
63 decode_substring_from_bytes,
64 digest_fasta_bytes,
66 digest_sequence,
67 digest_sequence_with_description,
68 encode_sequence,
70 guess_alphabet,
71 load_fasta_bytes,
72 lookup_alphabet,
73 md5,
74 parse_fasta_header,
75 parse_rgsi_line,
76 sha512t24u,
78};
79
80#[cfg(feature = "filesystem")]
87pub mod fasta;
88
89#[cfg(feature = "filesystem")]
92pub mod collection;
93
94#[cfg(feature = "filesystem")]
96pub mod store;
97
98#[cfg(feature = "filesystem")]
100pub mod seqcol;
101
102#[cfg(feature = "filesystem")]
104mod hashkeyable;
105#[cfg(feature = "filesystem")]
106mod utils;
107
108#[cfg(feature = "filesystem")]
110pub use collection::{
111 SequenceCollectionExt, SequenceCollectionRecordExt, SequenceMetadataExt, SequenceRecordExt,
112 read_rgsi_file,
113};
114#[cfg(feature = "filesystem")]
115pub use fasta::{FaiRecord, compute_fai, digest_fasta, load_fasta};
116#[cfg(feature = "filesystem")]
117pub use store::{FhrAuthor, FhrIdentifier, FhrMetadata, FhrTaxon, FhrVitalStats};
118#[cfg(feature = "filesystem")]
119pub use seqcol::SeqColService;
120#[cfg(feature = "filesystem")]
121pub use store::{AvailableAliases, PagedResult, Pagination, PullResult, SyncStrategy};
122
123#[cfg(all(test, feature = "filesystem"))]
128mod tests {
129 use super::*;
130
131 use std::time::Instant;
132 use store::{FastaImportOptions, RefgetStore};
133 use tempfile::tempdir;
134
135 #[test]
136 #[ignore]
137 fn test_loading_large_fasta_file() {
138 let fasta_path =
140 std::env::var("FASTA_PATH").expect("FASTA_PATH environment variable not set");
141 println!("Loading large FASTA file: {}", &fasta_path);
142
143 println!("Adding sequences from FASTA file...");
145 let start = Instant::now();
146 let mut store = RefgetStore::in_memory();
147 store
148 .add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
149 .unwrap();
150 let duration = start.elapsed();
151 println!("Time taken to load: {:.2?}", duration);
152
153 let mut store2 = RefgetStore::in_memory();
154 store2.disable_encoding(); store2
156 .add_sequence_collection_from_fasta(&fasta_path, FastaImportOptions::new())
157 .unwrap();
158
159 let sequences: Vec<_> = store.sequence_digests().collect();
161 assert!(!sequences.is_empty(), "No sequences found in the store");
162
163 println!("Look up a sequence by digest...");
165 let digest = &sequences[0];
166 let digest_str = String::from_utf8(digest.to_vec()).expect("Invalid ASCII data");
167
168 println!("Retrieving a substring of sequence named: {:?}", digest_str);
170 let start_basic = 0;
171 let end_basic = 3;
172 let substring = store.get_substring(digest, start_basic, end_basic);
173 assert!(
174 substring.is_ok(),
175 "Failed to retrieve substring with name: {:?}",
176 digest_str
177 );
178 println!("Retrieved substring: {:?}", substring.unwrap());
179
180 let start = 148 * 70;
182 let end = 148 * 70 + 70;
183 let substring2 = store.get_substring(digest, start, end);
184 assert!(
185 substring2.is_ok(),
186 "Failed to retrieve substring with name: {:?}",
187 digest_str
188 );
189
190 let substring3 = store2.get_substring(digest, start, end);
191 assert_eq!(substring2.as_ref().unwrap(), substring3.as_ref().unwrap());
192 println!("Retrieved substring: {:?}", substring2.unwrap());
193 println!("Retrieved substring: {:?}", substring3.unwrap());
194 }
195
196 #[test]
197 fn test_get_sequence_encoded() {
198 let temp_dir = tempdir().expect("Failed to create temporary directory");
199 let temp_path = temp_dir.path();
200 let mut store = RefgetStore::in_memory();
202 let fasta_path = "../tests/data/fasta/base.fa.gz";
203 let temp_fasta = temp_path.join("base.fa.gz");
204 std::fs::copy(fasta_path, &temp_fasta).expect("Failed to copy base.fa.gz to tempdir");
205
206 store
208 .add_sequence_collection_from_fasta(temp_fasta, FastaImportOptions::new())
209 .unwrap();
210 println!("Listing sequences in the store...");
211 let digest = "iYtREV555dUFKg2_agSJW6suquUyPpMw"; let digest_str = String::from_utf8(digest.as_bytes().to_vec()).expect("Invalid ASCII data");
213
214 println!("Retrieving a substring of sequence named: {:?}", digest_str);
216 let start = 2;
217 let end = start + 5;
218 let substring = store.get_substring(digest, start, end);
219 assert!(
220 substring.is_ok(),
221 "Failed to retrieve substring with name: {:?}",
222 digest_str
223 );
224 println!("Retrieved substring: {:?}", substring.as_ref().unwrap());
225 assert_eq!(substring.unwrap(), "GGGGA");
226
227 println!("Retrieving a substring of sequence named: {:?}", digest_str);
228 let start = 3;
229 let end = start + 2;
230 let substring = store.get_substring(digest, start, end);
231 assert!(
232 substring.is_ok(),
233 "Failed to retrieve substring with name: {:?}",
234 digest_str
235 );
236 println!("Retrieved substring: {:?}", substring.as_ref().unwrap());
237 assert_eq!(substring.unwrap(), "GG");
238 }
239}