1use super::*;
5use super::readonly::ReadonlyRefgetStore;
6use super::fhr_metadata;
7
8use std::collections::HashMap;
9use std::ffi::OsStr;
10
11use indexmap::IndexMap;
12use std::fs::{self, File, create_dir_all};
13use std::io::{BufRead, Write};
14use std::path::Path;
15
16use anyhow::{Context, Result};
17use sha2::{Sha256, Digest};
18
19use crate::collection::{
20 SequenceCollectionRecordExt,
21 read_rgsi_file,
22};
23use crate::digest::{
24 SequenceCollectionRecord, SequenceMetadata, SequenceRecord,
25 parse_rgci_line, parse_rgsi_line,
26};
27use crate::hashkeyable::HashKeyable;
28
29use chrono::Utc;
30
31impl ReadonlyRefgetStore {
36 pub(crate) fn write_sequence_to_disk_single(
38 &self,
39 metadata: &SequenceMetadata,
40 sequence: &[u8],
41 ) -> Result<()> {
42 let template = self
43 .seqdata_path_template
44 .as_ref()
45 .context("seqdata_path_template not set")?;
46 let local_path = self.local_path.as_ref().context("local_path not set")?;
47
48 let seq_file_path = Self::expand_template(&metadata.sha512t24u, template);
49 let full_path = local_path.join(&seq_file_path);
50
51 if let Some(parent) = full_path.parent() {
52 create_dir_all(parent)?;
53 }
54
55 let mut file = File::create(&full_path)?;
56 file.write_all(sequence)?;
57
58 Ok(())
59 }
60
61 pub(crate) fn write_collection_to_disk_single(&self, record: &SequenceCollectionRecord) -> Result<()> {
64 let local_path = self.local_path.as_ref().context("local_path not set")?;
65
66 let coll_file_path = format!("collections/{}.rgsi", record.metadata().digest);
67 let full_path = local_path.join(&coll_file_path);
68
69 if let Some(parent) = full_path.parent() {
70 create_dir_all(parent)?;
71 }
72
73 record.write_collection_rgsi(&full_path)?;
74
75 Ok(())
76 }
77
78 pub(crate) fn write_index_files(&self) -> Result<()> {
83 let local_path = self.local_path.as_ref().context("local_path not set")?;
84 let template = self
85 .seqdata_path_template
86 .as_ref()
87 .context("seqdata_path_template not set")?;
88
89 let sequence_index_path = local_path.join("sequences.rgsi");
90 self.write_sequences_rgsi(&sequence_index_path)?;
91
92 let collection_index_path = local_path.join("collections.rgci");
93 self.write_collections_rgci(&collection_index_path)?;
94
95 let sequences_digest = Self::sha256_file(&sequence_index_path).ok();
97 let collections_digest = Self::sha256_file(&collection_index_path).ok();
98 let aliases_digest = self.compute_aliases_digest();
99 let fhr_digest = self.compute_fhr_digest();
100
101 self.write_rgstore_json_with_digests(
102 local_path, template,
103 collections_digest, sequences_digest,
104 aliases_digest, fhr_digest,
105 )?;
106
107 Ok(())
108 }
109
110 pub(crate) fn write_rgstore_json(&self, dir: &Path, seqdata_template: &str) -> Result<()> {
112 self.write_rgstore_json_with_digests(dir, seqdata_template, None, None, None, None)
113 }
114
115 pub(crate) fn write_rgstore_json_with_digests(
117 &self,
118 dir: &Path,
119 seqdata_template: &str,
120 collections_digest: Option<String>,
121 sequences_digest: Option<String>,
122 aliases_digest: Option<String>,
123 fhr_digest: Option<String>,
124 ) -> Result<()> {
125 let metadata = StoreMetadata {
126 version: 1,
127 seqdata_path_template: seqdata_template.to_string(),
128 collections_path_template: "collections/%s.rgsi".to_string(),
129 sequence_index: "sequences.rgsi".to_string(),
130 collection_index: Some("collections.rgci".to_string()),
131 mode: self.mode,
132 created_at: Utc::now().to_rfc3339(),
133 ancillary_digests: self.ancillary_digests,
134 attribute_index: self.attribute_index,
135 sequence_alias_namespaces: self.aliases.sequence_namespaces(),
136 collection_alias_namespaces: self.aliases.collection_namespaces(),
137 modified: Some(Utc::now().to_rfc3339()),
138 collections_digest,
139 sequences_digest,
140 aliases_digest,
141 fhr_digest,
142 };
143
144 let json = serde_json::to_string_pretty(&metadata)
145 .context("Failed to serialize metadata to JSON")?;
146 fs::write(dir.join("rgstore.json"), json).context("Failed to write rgstore.json")?;
147
148 Ok(())
149 }
150
151 pub(crate) fn write_collections_rgci<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
155 let file_path = file_path.as_ref();
156 let mut file = File::create(file_path)?;
157
158 writeln!(
159 file,
160 "#digest\tn_sequences\tnames_digest\tsequences_digest\tlengths_digest\tname_length_pairs_digest\tsorted_name_length_pairs_digest\tsorted_sequences_digest"
161 )?;
162
163 for record in self.collections.values() {
164 let meta = record.metadata();
165 writeln!(
166 file,
167 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
168 meta.digest,
169 meta.n_sequences,
170 meta.names_digest,
171 meta.sequences_digest,
172 meta.lengths_digest,
173 meta.name_length_pairs_digest.as_deref().unwrap_or(""),
174 meta.sorted_name_length_pairs_digest.as_deref().unwrap_or(""),
175 meta.sorted_sequences_digest.as_deref().unwrap_or(""),
176 )?;
177 }
178 Ok(())
179 }
180
181 pub fn write_sequences_rgsi<P: AsRef<Path>>(&self, file_path: P) -> Result<()> {
183 let file_path = file_path.as_ref();
184 let mut file = std::fs::File::create(file_path)?;
185
186 writeln!(
187 file,
188 "#name\tlength\talphabet\tsha512t24u\tmd5\tdescription"
189 )?;
190
191 for result_sr in self.sequence_store.values() {
192 let result = result_sr.metadata();
193 let description = result.description.as_deref().unwrap_or("");
194 writeln!(
195 file,
196 "{}\t{}\t{}\t{}\t{}\t{}",
197 result.name,
198 result.length,
199 result.alphabet,
200 result.sha512t24u,
201 result.md5,
202 description
203 )?;
204 }
205 Ok(())
206 }
207
208 pub fn store_metadata(&self) -> Result<HashMap<String, String>> {
213 let local_path = self.local_path.as_ref().context("local_path not set")?;
214 let json = fs::read_to_string(local_path.join("rgstore.json"))
215 .context("Failed to read rgstore.json")?;
216 let metadata: StoreMetadata =
217 serde_json::from_str(&json).context("Failed to parse store metadata")?;
218
219 let mut map = HashMap::new();
220 if let Some(v) = metadata.modified { map.insert("modified".to_string(), v); }
221 if let Some(v) = metadata.collections_digest { map.insert("collections_digest".to_string(), v); }
222 if let Some(v) = metadata.sequences_digest { map.insert("sequences_digest".to_string(), v); }
223 if let Some(v) = metadata.aliases_digest { map.insert("aliases_digest".to_string(), v); }
224 if let Some(v) = metadata.fhr_digest { map.insert("fhr_digest".to_string(), v); }
225 Ok(map)
226 }
227
228 fn sha256_file(path: &Path) -> Result<String> {
230 let bytes = fs::read(path)?;
231 let hash = Sha256::digest(&bytes);
232 Ok(format!("{:x}", hash))
233 }
234
235 fn compute_aliases_digest(&self) -> Option<String> {
238 let local_path = self.local_path.as_ref()?;
239 let aliases_dir = local_path.join("aliases");
240 if !aliases_dir.exists() { return None; }
241
242 let mut paths: Vec<_> = Vec::new();
243 for subdir in &["sequences", "collections"] {
244 let sub = aliases_dir.join(subdir);
245 if sub.exists() {
246 if let Ok(entries) = fs::read_dir(&sub) {
247 for entry in entries.filter_map(|e| e.ok()) {
248 let p = entry.path();
249 if p.extension().map_or(false, |e| e == "tsv") {
250 paths.push(p);
251 }
252 }
253 }
254 }
255 }
256 if paths.is_empty() { return None; }
257 paths.sort();
258
259 let mut hasher = Sha256::new();
260 for path in &paths {
261 hasher.update(fs::read(path).ok()?);
262 }
263 Some(format!("{:x}", hasher.finalize()))
264 }
265
266 fn compute_fhr_digest(&self) -> Option<String> {
268 let local_path = self.local_path.as_ref()?;
269 let fhr_dir = local_path.join("fhr");
270 if !fhr_dir.exists() { return None; }
271
272 let mut paths: Vec<_> = fs::read_dir(&fhr_dir).ok()?
273 .filter_map(|e| e.ok())
274 .map(|e| e.path())
275 .collect();
276 if paths.is_empty() { return None; }
277 paths.sort();
278
279 let mut hasher = Sha256::new();
280 for path in paths {
281 hasher.update(fs::read(&path).ok()?);
282 }
283 Some(format!("{:x}", hasher.finalize()))
284 }
285
286 pub(crate) fn open_local<P: AsRef<Path>>(path: P) -> Result<Self> {
292 let root_path = path.as_ref();
293
294 let index_path = root_path.join("rgstore.json");
295 let json = fs::read_to_string(&index_path).context(format!(
296 "Failed to read rgstore.json from {}",
297 index_path.display()
298 ))?;
299
300 let metadata: StoreMetadata =
301 serde_json::from_str(&json).context("Failed to parse store metadata")?;
302
303 Self::sanitize_relative_path(&metadata.seqdata_path_template)?;
304 Self::sanitize_relative_path(&metadata.sequence_index)?;
305 if let Some(ref ci) = metadata.collection_index {
306 Self::sanitize_relative_path(ci)?;
307 }
308
309 let mut store = ReadonlyRefgetStore::new(metadata.mode);
310 store.local_path = Some(root_path.to_path_buf());
311 store.seqdata_path_template = Some(metadata.seqdata_path_template.clone());
312 store.persist_to_disk = true;
313 store.ancillary_digests = metadata.ancillary_digests;
314 store.attribute_index = metadata.attribute_index;
315
316 let sequence_index_path = root_path.join(&metadata.sequence_index);
317 if sequence_index_path.exists() {
318 Self::load_sequences_from_index(&mut store, &sequence_index_path)?;
319 }
320
321 if let Some(ref collection_index) = metadata.collection_index {
322 let collection_index_path = root_path.join(collection_index);
323 if collection_index_path.exists() {
324 Self::load_collection_stubs_from_rgci(&mut store, &collection_index_path)?;
325 }
326 }
327
328 if store.collections.is_empty() {
329 let collections_dir = root_path.join("collections");
330 Self::load_collections_from_directory(&mut store, &collections_dir)?;
331 }
332
333 let aliases_dir = root_path.join("aliases");
334 store.aliases.load_from_dir(&aliases_dir)?;
335
336 store.available_sequence_alias_namespaces = if metadata.sequence_alias_namespaces.is_empty() {
337 store.aliases.sequence_namespaces()
338 } else {
339 metadata.sequence_alias_namespaces
340 };
341 store.available_collection_alias_namespaces = if metadata.collection_alias_namespaces.is_empty() {
342 store.aliases.collection_namespaces()
343 } else {
344 metadata.collection_alias_namespaces
345 };
346
347 store.fhr_metadata =
348 fhr_metadata::load_sidecars(&root_path.join("fhr"));
349
350 Ok(store)
351 }
352
353 pub(crate) fn open_remote<P: AsRef<Path>, S: AsRef<str>>(
355 cache_path: P,
356 remote_url: S,
357 ) -> Result<Self> {
358 let cache_path = cache_path.as_ref();
359 let remote_url = remote_url.as_ref().to_string();
360
361 create_dir_all(cache_path)?;
362
363 let index_data = Self::fetch_file(
364 &Some(cache_path.to_path_buf()),
365 &Some(remote_url.clone()),
366 "rgstore.json",
367 true,
368 false,
369 )?;
370
371 let json =
372 String::from_utf8(index_data).context("Store metadata contains invalid UTF-8")?;
373
374 let metadata: StoreMetadata =
375 serde_json::from_str(&json).context("Failed to parse store metadata")?;
376
377 Self::sanitize_relative_path(&metadata.seqdata_path_template)?;
378 Self::sanitize_relative_path(&metadata.sequence_index)?;
379 if let Some(ref ci) = metadata.collection_index {
380 Self::sanitize_relative_path(ci)?;
381 }
382
383 let mut store = ReadonlyRefgetStore::new(metadata.mode);
384 store.local_path = Some(cache_path.to_path_buf());
385 store.remote_source = Some(remote_url.clone());
386 store.seqdata_path_template = Some(metadata.seqdata_path_template.clone());
387 store.persist_to_disk = true;
388 store.ancillary_digests = metadata.ancillary_digests;
389 store.attribute_index = metadata.attribute_index;
390 store.available_sequence_alias_namespaces = metadata.sequence_alias_namespaces;
391 store.available_collection_alias_namespaces = metadata.collection_alias_namespaces;
392
393 let sequence_index_data = Self::fetch_file(
394 &Some(cache_path.to_path_buf()),
395 &Some(remote_url.clone()),
396 &metadata.sequence_index,
397 true,
398 false,
399 )?;
400 let sequence_index_str = String::from_utf8(sequence_index_data)
401 .context("sequence index contains invalid UTF-8")?;
402
403 Self::load_sequences_from_reader(&mut store, sequence_index_str.as_bytes())?;
404
405 if let Some(ref collection_index) = metadata.collection_index {
406 if let Ok(collection_index_data) = Self::fetch_file(
407 &Some(cache_path.to_path_buf()),
408 &Some(remote_url.clone()),
409 collection_index,
410 true,
411 false,
412 ) {
413 let collection_index_str = String::from_utf8(collection_index_data)
414 .context("collection index contains invalid UTF-8")?;
415
416 Self::load_collection_stubs_from_reader(
417 &mut store,
418 collection_index_str.as_bytes(),
419 )?;
420 }
421 }
422
423 if store.collections.is_empty() {
424 let local_collections_dir = cache_path.join("collections");
425 create_dir_all(&local_collections_dir)?;
426 Self::load_collections_from_directory(&mut store, &local_collections_dir)?;
427 }
428
429 Ok(store)
430 }
431
432 pub(crate) fn load_sequences_from_reader<R: BufRead>(store: &mut ReadonlyRefgetStore, reader: R) -> Result<()> {
438 for line in reader.lines() {
439 let line = line?;
440
441 if line.starts_with('#') {
442 continue;
443 }
444
445 if let Some(seq_metadata) = parse_rgsi_line(&line) {
446 let record = SequenceRecord::Stub(seq_metadata.clone());
447
448 let sha512_key = seq_metadata.sha512t24u.to_key();
449 store.sequence_store.insert(sha512_key, record);
450
451 let md5_key = seq_metadata.md5.to_key();
452 store.md5_lookup.insert(md5_key, sha512_key);
453 }
454 }
455
456 Ok(())
457 }
458
459 pub(crate) fn load_sequences_from_index(store: &mut ReadonlyRefgetStore, index_path: &Path) -> Result<()> {
461 let file = std::fs::File::open(index_path)?;
462 let reader = std::io::BufReader::new(file);
463 Self::load_sequences_from_reader(store, reader)
464 }
465
466 pub(crate) fn load_collection_stubs_from_reader<R: BufRead>(
468 store: &mut ReadonlyRefgetStore,
469 reader: R,
470 ) -> Result<()> {
471 for line in reader.lines() {
472 let line = line?;
473
474 if let Some(metadata) = parse_rgci_line(&line) {
475 let key = metadata.digest.to_key();
476 store
477 .collections
478 .insert(key, SequenceCollectionRecord::Stub(metadata));
479 }
480 }
481
482 Ok(())
483 }
484
485 pub(crate) fn load_collection_stubs_from_rgci(store: &mut ReadonlyRefgetStore, index_path: &Path) -> Result<()> {
487 let file = std::fs::File::open(index_path)?;
488 let reader = std::io::BufReader::new(file);
489 Self::load_collection_stubs_from_reader(store, reader)
490 }
491
492 pub(crate) fn load_collections_from_directory(
494 store: &mut ReadonlyRefgetStore,
495 collections_dir: &Path,
496 ) -> Result<()> {
497 if !collections_dir.exists() {
498 return Ok(());
499 }
500
501 for entry in fs::read_dir(collections_dir)? {
502 let entry = entry?;
503 let path = entry.path();
504
505 if path.is_file() && path.extension() == Some(OsStr::new("rgsi")) {
506 let collection = read_rgsi_file(&path)?;
507 let collection_digest = collection.metadata.digest.to_key();
508
509 let mut name_map = IndexMap::new();
510 for sequence_record in &collection.sequences {
511 let metadata = sequence_record.metadata();
512 name_map.insert(metadata.name.clone(), metadata.sha512t24u.to_key());
513 }
514 store.name_lookup.insert(collection_digest, name_map);
515
516 let record = SequenceCollectionRecord::from(collection);
517 store.collections.insert(collection_digest, record);
518 }
519 }
520
521 Ok(())
522 }
523}