1use super::*;
9use super::readonly::ReadonlyRefgetStore;
10use super::core::RefgetStore;
11
12use std::collections::HashMap;
13use std::fs;
14use std::path::Path;
15
16use anyhow::{Context, Result};
17use serde::{Deserialize, Serialize};
18
19use crate::hashkeyable::{DigestKey, HashKeyable, key_to_digest_string};
20
21#[derive(Clone, Debug, Serialize, Deserialize, Default)]
30#[serde(rename_all = "camelCase")]
31pub struct FhrMetadata {
32 #[serde(default, skip_serializing_if = "Option::is_none")]
34 pub schema: Option<String>,
35
36 #[serde(default, skip_serializing_if = "Option::is_none")]
38 pub schema_version: Option<serde_json::Number>,
39
40 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub genome: Option<String>,
43
44 #[serde(default, skip_serializing_if = "Option::is_none")]
46 pub taxon: Option<FhrTaxon>,
47
48 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub version: Option<String>,
51
52 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub metadata_author: Option<Vec<FhrAuthor>>,
55
56 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub assembly_author: Option<Vec<FhrAuthor>>,
59
60 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub date_created: Option<String>,
63
64 #[serde(default, skip_serializing_if = "Option::is_none")]
66 pub voucher_specimen: Option<String>,
67
68 #[serde(default, skip_serializing_if = "Option::is_none")]
70 pub masking: Option<String>,
71
72 #[serde(default, skip_serializing_if = "Option::is_none")]
74 pub checksum: Option<String>,
75
76 #[serde(default, skip_serializing_if = "Option::is_none")]
78 pub genome_synonym: Option<Vec<String>>,
79
80 #[serde(
82 default,
83 skip_serializing_if = "Option::is_none",
84 rename = "accessionID"
85 )]
86 pub accession_id: Option<FhrIdentifier>,
87
88 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub instrument: Option<Vec<String>>,
91
92 #[serde(default, skip_serializing_if = "Option::is_none")]
94 pub scholarly_article: Option<String>,
95
96 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub documentation: Option<String>,
99
100 #[serde(default, skip_serializing_if = "Option::is_none")]
102 pub identifier: Option<Vec<String>>,
103
104 #[serde(default, skip_serializing_if = "Option::is_none")]
106 pub license: Option<String>,
107
108 #[serde(default, skip_serializing_if = "Option::is_none")]
110 pub related_link: Option<Vec<String>>,
111
112 #[serde(default, skip_serializing_if = "Option::is_none")]
114 pub funding: Option<String>,
115
116 #[serde(default, skip_serializing_if = "Option::is_none")]
118 pub vital_stats: Option<FhrVitalStats>,
119
120 #[serde(skip)]
122 pub seqcol_digest: Option<String>,
123
124 #[serde(flatten)]
126 pub extra: HashMap<String, serde_json::Value>,
127}
128
129#[derive(Clone, Debug, Serialize, Deserialize, Default)]
131#[serde(rename_all = "camelCase")]
132pub struct FhrVitalStats {
133 #[serde(default, skip_serializing_if = "Option::is_none", rename = "L50")]
134 pub l50: Option<i64>,
135 #[serde(default, skip_serializing_if = "Option::is_none", rename = "N50")]
136 pub n50: Option<i64>,
137 #[serde(default, skip_serializing_if = "Option::is_none", rename = "L90")]
138 pub l90: Option<i64>,
139 #[serde(default, skip_serializing_if = "Option::is_none")]
140 pub total_base_pairs: Option<i64>,
141 #[serde(default, skip_serializing_if = "Option::is_none")]
142 pub number_contigs: Option<i64>,
143 #[serde(default, skip_serializing_if = "Option::is_none")]
144 pub number_scaffolds: Option<i64>,
145 #[serde(default, skip_serializing_if = "Option::is_none")]
146 pub read_technology: Option<String>,
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150pub struct FhrTaxon {
151 #[serde(default, skip_serializing_if = "Option::is_none")]
152 pub name: Option<String>,
153 #[serde(default, skip_serializing_if = "Option::is_none")]
154 pub uri: Option<String>,
155}
156
157#[derive(Clone, Debug, Serialize, Deserialize)]
158pub struct FhrAuthor {
159 #[serde(default, skip_serializing_if = "Option::is_none")]
160 pub name: Option<String>,
161 #[serde(default, skip_serializing_if = "Option::is_none")]
162 pub uri: Option<String>,
163}
164
165#[derive(Clone, Debug, Serialize, Deserialize)]
166pub struct FhrIdentifier {
167 #[serde(default, skip_serializing_if = "Option::is_none")]
168 pub name: Option<String>,
169 #[serde(default, skip_serializing_if = "Option::is_none")]
170 pub url: Option<String>,
171}
172
173pub(crate) const SIDECAR_EXTENSION: &str = ".fhr.json";
178
179pub fn load_sidecars(fhr_dir: &Path) -> HashMap<DigestKey, FhrMetadata> {
184 let mut map = HashMap::new();
185 if !fhr_dir.exists() {
186 return map;
187 }
188 let entries = match fs::read_dir(fhr_dir) {
189 Ok(e) => e,
190 Err(e) => {
191 eprintln!(
192 "Warning: could not read FHR sidecar directory {}: {}",
193 fhr_dir.display(),
194 e
195 );
196 return map;
197 }
198 };
199 for entry in entries.flatten() {
200 let path = entry.path();
201 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
202 if name.ends_with(SIDECAR_EXTENSION) {
203 let digest_str = &name[..name.len() - SIDECAR_EXTENSION.len()];
204 let key = digest_str.to_key();
205 match fs::read_to_string(&path) {
206 Ok(json) => {
207 match serde_json::from_str::<FhrMetadata>(&json) {
208 Ok(fhr) => {
209 map.insert(key, fhr);
210 }
211 Err(e) => {
212 eprintln!(
213 "Warning: skipping malformed FHR sidecar {}: {}",
214 path.display(),
215 e
216 );
217 }
218 }
219 }
220 Err(e) => {
221 eprintln!(
222 "Warning: could not read FHR sidecar {}: {}",
223 path.display(),
224 e
225 );
226 }
227 }
228 }
229 }
230 }
231 map
232}
233
234pub fn write_sidecars(
236 fhr_dir: &Path,
237 metadata: &HashMap<DigestKey, FhrMetadata>,
238) -> Result<()> {
239 for (key, fhr) in metadata {
240 let digest_str = key_to_digest_string(key);
241 let path = fhr_dir.join(format!("{}{}", digest_str, SIDECAR_EXTENSION));
242 write_sidecar(&path, fhr)?;
243 }
244 Ok(())
245}
246
247pub fn write_sidecar(path: &Path, metadata: &FhrMetadata) -> Result<()> {
249 if let Some(parent) = path.parent() {
250 fs::create_dir_all(parent)?;
251 }
252 let json = serde_json::to_string_pretty(metadata)?;
253 fs::write(path, json)?;
254 Ok(())
255}
256
257pub fn remove_sidecar(fhr_dir: &Path, digest_str: &str) {
259 let path = fhr_dir.join(format!("{}{}", digest_str, SIDECAR_EXTENSION));
260 let _ = fs::remove_file(path);
261}
262
263pub fn sidecar_path(fhr_dir: &Path, digest_str: &str) -> std::path::PathBuf {
265 fhr_dir.join(format!("{}{}", digest_str, SIDECAR_EXTENSION))
266}
267
268pub fn load_from_json(path: &str) -> Result<FhrMetadata> {
270 let json = fs::read_to_string(path)
271 .context(format!("Failed to read FHR metadata from {}", path))?;
272 serde_json::from_str(&json).context("Failed to parse FHR JSON")
273}
274
275impl ReadonlyRefgetStore {
280 pub fn set_fhr_metadata(
282 &mut self,
283 collection_digest: &str,
284 metadata: FhrMetadata,
285 ) -> Result<()> {
286 let key = collection_digest.to_key();
287 if !self.collections.contains_key(&key) {
288 return Err(anyhow::anyhow!("Collection not found: {}", collection_digest));
289 }
290 if self.persist_to_disk {
291 if let Some(ref local_path) = self.local_path {
292 let path = sidecar_path(
293 &local_path.join("fhr"),
294 collection_digest,
295 );
296 write_sidecar(&path, &metadata)?;
297 }
298 }
299 self.fhr_metadata.insert(key, metadata);
300 Ok(())
301 }
302
303 pub fn get_fhr_metadata(&self, collection_digest: &str) -> Option<&FhrMetadata> {
305 let key = collection_digest.to_key();
306 self.fhr_metadata.get(&key)
307 }
308
309 pub fn remove_fhr_metadata(&mut self, collection_digest: &str) -> bool {
311 let key = collection_digest.to_key();
312 if self.persist_to_disk {
313 if let Some(ref local_path) = self.local_path {
314 remove_sidecar(
315 &local_path.join("fhr"),
316 collection_digest,
317 );
318 }
319 }
320 self.fhr_metadata.remove(&key).is_some()
321 }
322
323 pub fn list_fhr_metadata(&self) -> Vec<String> {
325 self.fhr_metadata
326 .keys()
327 .map(|key| key_to_digest_string(key))
328 .collect()
329 }
330
331 pub fn load_fhr_metadata(&mut self, collection_digest: &str, path: &str) -> Result<()> {
333 let metadata = load_from_json(path)?;
334 self.set_fhr_metadata(collection_digest, metadata)
335 }
336}
337
338impl RefgetStore {
343 pub fn set_fhr_metadata(&mut self, collection_digest: &str, metadata: FhrMetadata) -> Result<()> {
345 self.inner.set_fhr_metadata(collection_digest, metadata)
346 }
347
348 pub fn remove_fhr_metadata(&mut self, collection_digest: &str) -> bool {
350 self.inner.remove_fhr_metadata(collection_digest)
351 }
352
353 pub fn load_fhr_metadata(&mut self, collection_digest: &str, path: &str) -> Result<()> {
355 self.inner.load_fhr_metadata(collection_digest, path)
356 }
357
358 pub fn pull_fhr(
363 &mut self,
364 digest: Option<&str>,
365 strategy: SyncStrategy,
366 ) -> Result<PullResult> {
367 let mut result = PullResult::default();
368
369 let digests: Vec<String> = match digest {
370 Some(d) => vec![d.to_string()],
371 None => self
372 .inner
373 .collections
374 .values()
375 .map(|r| r.metadata().digest.to_string())
376 .collect(),
377 };
378
379 for digest_str in &digests {
380 let relative_path = format!("fhr/{}.fhr.json", digest_str);
381
382 match strategy {
383 SyncStrategy::KeepOurs => {
384 let was_local = self
385 .inner
386 .local_path
387 .as_ref()
388 .map(|p| p.join(&relative_path).exists())
389 .unwrap_or(false);
390 match ReadonlyRefgetStore::fetch_file(
391 &self.inner.local_path,
392 &self.inner.remote_source,
393 &relative_path,
394 self.inner.persist_to_disk,
395 false,
396 ) {
397 Ok(data) => {
398 if was_local {
399 result.skipped += 1;
400 } else {
401 if let Ok(fhr) = serde_json::from_slice::<FhrMetadata>(&data) {
402 let key = digest_str.to_key();
403 self.inner.fhr_metadata.insert(key, fhr);
404 }
405 result.pulled += 1;
406 }
407 }
408 Err(_) => {
409 result.not_found += 1;
410 }
411 }
412 }
413 SyncStrategy::KeepTheirs => {
414 match ReadonlyRefgetStore::fetch_file(
415 &self.inner.local_path,
416 &self.inner.remote_source,
417 &relative_path,
418 self.inner.persist_to_disk,
419 true,
420 ) {
421 Ok(data) => {
422 if let Ok(fhr) = serde_json::from_slice::<FhrMetadata>(&data) {
423 let key = digest_str.to_key();
424 self.inner.fhr_metadata.insert(key, fhr);
425 }
426 result.pulled += 1;
427 }
428 Err(_) => {
429 result.not_found += 1;
430 }
431 }
432 }
433 SyncStrategy::Notify => {
434 let local_exists = self
435 .inner
436 .local_path
437 .as_ref()
438 .map(|p| p.join(&relative_path).exists())
439 .unwrap_or(false);
440
441 if local_exists {
442 match ReadonlyRefgetStore::fetch_file(
443 &None,
444 &self.inner.remote_source,
445 &relative_path,
446 false,
447 false,
448 ) {
449 Ok(remote_data) => {
450 let local_path = self
451 .inner
452 .local_path
453 .as_ref()
454 .unwrap()
455 .join(&relative_path);
456 let local_data = fs::read(&local_path)?;
457 if local_data != remote_data {
458 result.conflicts.push(relative_path);
459 } else {
460 result.skipped += 1;
461 }
462 }
463 Err(_) => {
464 result.not_found += 1;
465 }
466 }
467 } else {
468 match ReadonlyRefgetStore::fetch_file(
469 &None,
470 &self.inner.remote_source,
471 &relative_path,
472 false,
473 false,
474 ) {
475 Ok(_) => {
476 result.conflicts.push(relative_path);
477 }
478 Err(_) => {
479 result.not_found += 1;
480 }
481 }
482 }
483 }
484 }
485 }
486
487 Ok(result)
488 }
489}
490
491#[cfg(test)]
496mod tests {
497 use super::*;
498 use tempfile::tempdir;
499
500 #[test]
501 fn test_json_roundtrip() {
502 let fhr = FhrMetadata {
503 schema: Some("https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json".to_string()),
504 schema_version: Some(serde_json::Number::from_f64(1.0).unwrap()),
505 genome: Some("Homo sapiens".to_string()),
506 taxon: Some(FhrTaxon {
507 name: Some("Homo sapiens".to_string()),
508 uri: Some("https://identifiers.org/taxonomy:9606".to_string()),
509 }),
510 version: Some("GRCh38.p14".to_string()),
511 masking: Some("soft-masked".to_string()),
512 genome_synonym: Some(vec!["hg38".to_string()]),
513 scholarly_article: Some("10.1371/journal.pntd.0008755".to_string()),
514 funding: Some("NIH R01".to_string()),
515 accession_id: Some(FhrIdentifier {
516 name: Some("GCA_000001405.29".to_string()),
517 url: Some("https://www.ncbi.nlm.nih.gov/assembly/GCA_000001405.29".to_string()),
518 }),
519 ..Default::default()
520 };
521
522 let json = serde_json::to_string_pretty(&fhr).unwrap();
523 let roundtripped: FhrMetadata = serde_json::from_str(&json).unwrap();
524 assert_eq!(roundtripped.genome, fhr.genome);
525 assert_eq!(roundtripped.taxon.as_ref().unwrap().name, fhr.taxon.as_ref().unwrap().name);
526 assert_eq!(roundtripped.genome_synonym, fhr.genome_synonym);
527 assert_eq!(roundtripped.scholarly_article, Some("10.1371/journal.pntd.0008755".to_string()));
528 assert_eq!(roundtripped.funding, Some("NIH R01".to_string()));
529 assert!(roundtripped.accession_id.is_some());
530 }
531
532 #[test]
533 fn test_extra_fields_preserved() {
534 let json = r#"{
535 "genome": "Test",
536 "customField": "custom_value",
537 "anotherCustom": [1, 2, 3]
538 }"#;
539 let fhr: FhrMetadata = serde_json::from_str(json).unwrap();
540 assert_eq!(fhr.genome, Some("Test".to_string()));
541 assert!(fhr.extra.contains_key("customField"));
542
543 let json_out = serde_json::to_string(&fhr).unwrap();
544 assert!(json_out.contains("customField"));
545 assert!(json_out.contains("custom_value"));
546 }
547
548 #[test]
549 fn test_camel_case_serialization() {
550 let fhr = FhrMetadata {
551 schema_version: Some(serde_json::Number::from_f64(1.0).unwrap()),
552 genome_synonym: Some(vec!["hg38".to_string()]),
553 date_created: Some("2024-01-01".to_string()),
554 ..Default::default()
555 };
556 let json = serde_json::to_string(&fhr).unwrap();
557 assert!(json.contains("schemaVersion"));
558 assert!(json.contains("genomeSynonym"));
559 assert!(json.contains("dateCreated"));
560 assert!(!json.contains("schema_version"));
561 assert!(!json.contains("genome_synonym"));
562 }
563
564 #[test]
565 fn test_default_is_empty() {
566 let fhr = FhrMetadata::default();
567 let json = serde_json::to_string(&fhr).unwrap();
568 assert_eq!(json, "{}");
569 }
570
571 #[test]
572 fn test_write_and_load_sidecar() {
573 let dir = tempdir().unwrap();
574 let path = dir.path().join("test.fhr.json");
575
576 let fhr = FhrMetadata {
577 genome: Some("Test".to_string()),
578 version: Some("1.0".to_string()),
579 ..Default::default()
580 };
581
582 write_sidecar(&path, &fhr).unwrap();
583 assert!(path.exists());
584
585 let loaded = load_from_json(path.to_str().unwrap()).unwrap();
586 assert_eq!(loaded.genome, Some("Test".to_string()));
587 assert_eq!(loaded.version, Some("1.0".to_string()));
588 }
589
590 #[test]
591 fn test_load_sidecars_empty_dir() {
592 let dir = tempdir().unwrap();
593 let map = load_sidecars(dir.path());
594 assert!(map.is_empty());
595 }
596
597 #[test]
598 fn test_load_sidecars_nonexistent_dir() {
599 let map = load_sidecars(Path::new("/nonexistent/path"));
600 assert!(map.is_empty());
601 }
602
603 #[test]
604 fn test_remove_sidecar_missing_is_ok() {
605 let dir = tempdir().unwrap();
606 remove_sidecar(dir.path(), "nonexistent_digest");
607 }
608
609 #[test]
610 fn test_accession_id_casing() {
611 let fhr = FhrMetadata {
612 accession_id: Some(FhrIdentifier {
613 name: Some("GCA_000001405.29".to_string()),
614 url: Some("https://ncbi.nlm.nih.gov".to_string()),
615 }),
616 ..Default::default()
617 };
618 let json = serde_json::to_string(&fhr).unwrap();
619 assert!(json.contains("accessionID"));
620 assert!(!json.contains("accessionId"));
621 }
622
623 #[test]
624 fn test_schema_version_as_number() {
625 let json = r#"{"schemaVersion": 1}"#;
626 let fhr: FhrMetadata = serde_json::from_str(json).unwrap();
627 assert!(fhr.schema_version.is_some());
628 let ver = fhr.schema_version.unwrap();
629 assert_eq!(ver.to_string(), "1");
630
631 let json = r#"{"schemaVersion": 1.0}"#;
632 let fhr: FhrMetadata = serde_json::from_str(json).unwrap();
633 assert!(fhr.schema_version.is_some());
634 let ver = fhr.schema_version.unwrap();
635 assert_eq!(ver.to_string(), "1.0");
636 }
637
638 #[test]
639 fn test_vital_stats_roundtrip() {
640 let fhr = FhrMetadata {
641 vital_stats: Some(FhrVitalStats {
642 l50: Some(42),
643 n50: Some(1_000_000),
644 l90: Some(100),
645 total_base_pairs: Some(3_000_000_000),
646 number_contigs: Some(500),
647 number_scaffolds: Some(24),
648 read_technology: Some("hifi".to_string()),
649 }),
650 ..Default::default()
651 };
652 let json = serde_json::to_string_pretty(&fhr).unwrap();
653 assert!(json.contains("\"L50\""));
654 assert!(json.contains("\"N50\""));
655 assert!(json.contains("\"L90\""));
656 assert!(json.contains("\"totalBasePairs\""));
657 assert!(json.contains("\"numberContigs\""));
658 let roundtripped: FhrMetadata = serde_json::from_str(&json).unwrap();
659 let stats = roundtripped.vital_stats.unwrap();
660 assert_eq!(stats.l50, Some(42));
661 assert_eq!(stats.n50, Some(1_000_000));
662 assert_eq!(stats.read_technology, Some("hifi".to_string()));
663 }
664
665 #[test]
666 fn test_spec_example_roundtrip() {
667 let json = r#"{
668 "schema":"https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.jso",
669 "schemaVersion": 1.0,
670 "taxon": {"name":"Bombas huntii", "uri": "https://identifiers.org/taxonomy:9606"},
671 "genome": "Bombas huntii",
672 "genomeSynonym": ["B. huntii"],
673 "version": "0.0.1",
674 "metadataAuthor": [{"name":"Adam Wright", "uri":"https://orcid.org/0000-0002-5719-4024"}],
675 "assemblyAuthor": [{"name":"David Molik", "url":"https://orcid.org/0000-0003-3192-6538"}],
676 "dateCreated":"2022-03-21",
677 "accessionID": {"name":"PBARC", "url":"https://www.ars.usda.gov/pacific-west-area/hilo-hi/daniel-k-inouye-us-pacific-basin-agricultural-research-center/"},
678 "instrument": ["Sequel IIe", "Nanopore"],
679 "voucherSpecimen":"Located in Freezer 33, Drawer 137",
680 "scholarlyArticle":"10.1371/journal.pntd.0008755",
681 "assemblySoftware":"HiFiASM",
682 "funding":"funding",
683 "reuseConditions":"public domain",
684 "documentation":"Built assembly from... ",
685 "masking":"soft-masked",
686 "identifier": ["beetlebase:TC010103"],
687 "relatedLink": ["http://wfleabase.org/genome/Daphnia_pulex/dpulex_jgi060905/fasta/"],
688 "checksum":"md5:7582b26fcb0a9775b87c38f836e97c42"
689 }"#;
690 let fhr: FhrMetadata = serde_json::from_str(json).unwrap();
691 assert_eq!(fhr.genome, Some("Bombas huntii".to_string()));
692 assert_eq!(fhr.voucher_specimen, Some("Located in Freezer 33, Drawer 137".to_string()));
693 assert_eq!(fhr.documentation, Some("Built assembly from... ".to_string()));
694 assert_eq!(fhr.scholarly_article, Some("10.1371/journal.pntd.0008755".to_string()));
695 assert_eq!(fhr.funding, Some("funding".to_string()));
696 assert_eq!(fhr.identifier, Some(vec!["beetlebase:TC010103".to_string()]));
697 assert!(fhr.accession_id.is_some());
698 assert_eq!(fhr.accession_id.as_ref().unwrap().name, Some("PBARC".to_string()));
699 assert!(fhr.extra.contains_key("assemblySoftware"));
700 assert!(fhr.extra.contains_key("reuseConditions"));
701 }
702
703 #[test]
704 fn test_seqcol_digest_skipped_in_json() {
705 let mut fhr = FhrMetadata {
706 genome: Some("Test".to_string()),
707 ..Default::default()
708 };
709 fhr.seqcol_digest = Some("abc123".to_string());
710 let json = serde_json::to_string(&fhr).unwrap();
711 assert!(!json.contains("seqcolDigest"));
712 assert!(!json.contains("seqcol_digest"));
713 assert!(!json.contains("abc123"));
714 }
715
716 #[test]
717 fn test_new_fields_present() {
718 let fhr = FhrMetadata {
719 voucher_specimen: Some("Freezer 33".to_string()),
720 documentation: Some("Assembly notes".to_string()),
721 identifier: Some(vec!["ncbi:GCA_000001405".to_string()]),
722 ..Default::default()
723 };
724 let json = serde_json::to_string(&fhr).unwrap();
725 assert!(json.contains("voucherSpecimen"));
726 assert!(json.contains("documentation"));
727 assert!(json.contains("identifier"));
728 }
729
730 #[test]
731 fn test_load_sidecars_skips_malformed_json() {
732 let dir = tempdir().unwrap();
733 let bad_path = dir.path().join("baddigest.fhr.json");
734 fs::write(&bad_path, "{ not valid json }").unwrap();
735 let map = load_sidecars(dir.path());
736 assert!(map.is_empty());
737 }
738
739 #[test]
740 fn test_load_sidecars_loads_valid_skips_invalid() {
741 let dir = tempdir().unwrap();
742
743 let valid_fhr = FhrMetadata {
744 genome: Some("ValidGenome".to_string()),
745 ..Default::default()
746 };
747 write_sidecar(&dir.path().join("validdigest.fhr.json"), &valid_fhr).unwrap();
748
749 fs::write(dir.path().join("baddigest.fhr.json"), "not json at all").unwrap();
750
751 let map = load_sidecars(dir.path());
752 assert_eq!(map.len(), 1);
753 }
754
755 #[test]
760 fn test_fhr_metadata_empty_by_default() {
761 let mut store = RefgetStore::in_memory();
762
763 let (meta, _) = store
764 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
765 .unwrap();
766
767 assert!(store.get_fhr_metadata(&meta.digest).is_none());
768 assert!(store.list_fhr_metadata().is_empty());
769 }
770
771 #[test]
772 fn test_fhr_metadata_set_get() {
773 let mut store = RefgetStore::in_memory();
774 let (meta, _) = store
775 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
776 .unwrap();
777
778 let mut fhr = FhrMetadata::default();
779 fhr.genome = Some("Test genome".to_string());
780 fhr.version = Some("1.0".to_string());
781 fhr.masking = Some("not-masked".to_string());
782
783 store.set_fhr_metadata(&meta.digest, fhr.clone()).unwrap();
784
785 let retrieved = store.get_fhr_metadata(&meta.digest).unwrap();
786 assert_eq!(retrieved.genome, Some("Test genome".to_string()));
787 assert_eq!(retrieved.version, Some("1.0".to_string()));
788 }
789
790 #[test]
791 fn test_fhr_metadata_nonexistent_collection() {
792 let mut store = RefgetStore::in_memory();
793 let fhr = FhrMetadata::default();
794 assert!(store.set_fhr_metadata("nonexistent_digest", fhr).is_err());
795 }
796
797 #[test]
798 fn test_fhr_metadata_remove() {
799 let mut store = RefgetStore::in_memory();
800 let (meta, _) = store
801 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
802 .unwrap();
803
804 let fhr = FhrMetadata {
805 genome: Some("Test".to_string()),
806 ..Default::default()
807 };
808 store.set_fhr_metadata(&meta.digest, fhr).unwrap();
809
810 assert!(store.get_fhr_metadata(&meta.digest).is_some());
811 assert!(store.remove_fhr_metadata(&meta.digest));
812 assert!(store.get_fhr_metadata(&meta.digest).is_none());
813 }
814
815 #[test]
816 fn test_fhr_metadata_persistence() {
817 let dir = tempdir().unwrap();
818 let store_path = dir.path().join("store");
819 let digest: String;
820
821 {
822 let mut store = RefgetStore::on_disk(&store_path).unwrap();
823 let (meta, _) = store
824 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
825 .unwrap();
826 digest = meta.digest.clone();
827
828 let fhr = FhrMetadata {
829 genome: Some("Homo sapiens".to_string()),
830 version: Some("GRCh38".to_string()),
831 masking: Some("soft-masked".to_string()),
832 ..Default::default()
833 };
834 store.set_fhr_metadata(&digest, fhr).unwrap();
835 }
836
837 {
838 let store = RefgetStore::open_local(&store_path).unwrap();
839 let fhr = store.get_fhr_metadata(&digest).unwrap();
840 assert_eq!(fhr.genome, Some("Homo sapiens".to_string()));
841 assert_eq!(fhr.version, Some("GRCh38".to_string()));
842 assert_eq!(fhr.masking, Some("soft-masked".to_string()));
843 }
844 }
845
846 #[test]
847 fn test_fhr_list() {
848 let mut store = RefgetStore::in_memory();
849 assert!(store.list_fhr_metadata().is_empty());
850
851 let (meta, _) = store
852 .add_sequence_collection_from_fasta("../tests/data/fasta/base.fa", FastaImportOptions::new())
853 .unwrap();
854 let fhr = FhrMetadata {
855 genome: Some("Test".to_string()),
856 ..Default::default()
857 };
858 store.set_fhr_metadata(&meta.digest, fhr).unwrap();
859
860 let list = store.list_fhr_metadata();
861 assert_eq!(list.len(), 1);
862 assert!(list.contains(&meta.digest));
863 }
864
865 #[test]
866 fn test_remove_collection_cleans_up_fhr_metadata() {
867 let dir = tempdir().unwrap();
868 let fasta = dir.path().join("test.fa");
869 std::fs::write(&fasta, ">chr1\nACGT\n").unwrap();
870
871 let mut store = RefgetStore::in_memory();
872 let (meta, _) = store
873 .add_sequence_collection_from_fasta(&fasta, FastaImportOptions::new())
874 .unwrap();
875 let digest = meta.digest;
876
877 let fhr = FhrMetadata::default();
878 store.set_fhr_metadata(&digest, fhr).unwrap();
879 assert!(store.get_fhr_metadata(&digest).is_some());
880
881 store.remove_collection(&digest, false).unwrap();
882
883 assert!(store.get_fhr_metadata(&digest).is_none());
884 }
885
886 fn start_file_server(serve_dir: std::path::PathBuf) -> (String, impl FnOnce()) {
893 use std::io::{Read as _, Write as _};
894 use std::net::TcpListener;
895 use std::sync::{Arc, atomic::{AtomicBool, Ordering}};
896
897 let listener = TcpListener::bind("127.0.0.1:0").expect("bind");
898 let port = listener.local_addr().unwrap().port();
899 let base_url = format!("http://127.0.0.1:{}", port);
900 let stop = Arc::new(AtomicBool::new(false));
901 let stop_clone = Arc::clone(&stop);
902
903 std::thread::spawn(move || {
904 listener.set_nonblocking(false).ok();
905 while !stop_clone.load(Ordering::Relaxed) {
906 match listener.accept() {
907 Ok((mut stream, _)) => {
908 let mut buf = [0u8; 4096];
909 let n = stream.read(&mut buf).unwrap_or(0);
910 let request = std::str::from_utf8(&buf[..n]).unwrap_or("");
911 let path = request
912 .lines()
913 .next()
914 .and_then(|l| l.split_whitespace().nth(1))
915 .unwrap_or("/");
916 let rel = path.trim_start_matches('/');
917 let file_path = serve_dir.join(rel);
918 if file_path.exists() && file_path.is_file() {
919 let data = fs::read(&file_path).unwrap_or_default();
920 let header = format!(
921 "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
922 data.len()
923 );
924 let _ = stream.write_all(header.as_bytes());
925 let _ = stream.write_all(&data);
926 } else {
927 let body = b"Not Found";
928 let header = format!(
929 "HTTP/1.1 404 Not Found\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
930 body.len()
931 );
932 let _ = stream.write_all(header.as_bytes());
933 let _ = stream.write_all(body);
934 }
935 }
936 Err(_) => break,
937 }
938 }
939 });
940
941 let shutdown = move || {
942 stop.store(true, Ordering::Relaxed);
943 let _ = std::net::TcpStream::connect(format!("127.0.0.1:{}", port));
944 };
945
946 (base_url, shutdown)
947 }
948
949 #[test]
952 fn test_keep_ours_fhr_first_pull_counts_as_pulled() {
953 let remote_dir = tempdir().unwrap();
955 let collections_dir = remote_dir.path().join("fhr");
956 fs::create_dir_all(&collections_dir).unwrap();
957
958 let fake_digest = "SQ.aaaaaaaaaaaaaaaaaaaaaaaa";
960 let sidecar_name = format!("{}.fhr.json", fake_digest);
961 let fhr = FhrMetadata {
962 genome: Some("TestGenome".to_string()),
963 ..Default::default()
964 };
965 let sidecar_json = serde_json::to_string(&fhr).unwrap();
966 fs::write(collections_dir.join(&sidecar_name), &sidecar_json).unwrap();
967
968 let (base_url, shutdown) = start_file_server(remote_dir.path().to_path_buf());
970
971 let local_dir = tempdir().unwrap();
973 let local_store_path = local_dir.path().join("store");
974
975 let mut store = RefgetStore::on_disk(&local_store_path).unwrap();
976 store.inner.remote_source = Some(base_url);
977
978 use crate::hashkeyable::HashKeyable;
980 use crate::digest::SequenceCollectionRecord;
981 let key = fake_digest.to_key();
982 let stub = crate::digest::SequenceCollectionMetadata {
983 digest: fake_digest.to_string(),
984 n_sequences: 0,
985 names_digest: String::new(),
986 sequences_digest: String::new(),
987 lengths_digest: String::new(),
988 name_length_pairs_digest: None,
989 sorted_name_length_pairs_digest: None,
990 sorted_sequences_digest: None,
991 file_path: None,
992 };
993 store.inner.collections.insert(key, SequenceCollectionRecord::Stub(stub));
994
995 let result = store.pull_fhr(Some(fake_digest), SyncStrategy::KeepOurs).unwrap();
997 assert_eq!(result.pulled, 1, "first pull should count as pulled, not skipped");
998 assert_eq!(result.skipped, 0, "first pull should not be skipped");
999 assert_eq!(result.not_found, 0);
1000
1001 let result2 = store.pull_fhr(Some(fake_digest), SyncStrategy::KeepOurs).unwrap();
1003 assert_eq!(result2.skipped, 1, "second pull should be skipped (file already local)");
1004 assert_eq!(result2.pulled, 0, "second pull should not count as pulled");
1005
1006 shutdown();
1007 }
1008}