1pub mod fasta;
4mod memory;
5mod mmap;
6mod seqcol_store;
7
8pub use fasta::{DigestCache, FastaSequenceStore, FastaSequenceSummary, SeqColCache, SidecarCache};
9pub use memory::InMemorySequenceStore;
10pub use mmap::MmapSequenceStore;
11pub use seqcol_store::InMemorySeqColStore;
12
13use std::path::{Path, PathBuf};
14
15use refget_model::SequenceMetadata;
16use serde::{Deserialize, Serialize};
17
18#[derive(Debug, thiserror::Error)]
20pub enum StoreError {
21 #[error("I/O error: {0}")]
22 Io(#[from] std::io::Error),
23 #[error("FASTA index error: {0}")]
24 Fasta(String),
25 #[error("Sequence not found: {0}")]
26 NotFound(String),
27}
28
29pub type StoreResult<T> = Result<T, StoreError>;
31
32pub(crate) fn extract_subsequence(seq: &[u8], start: Option<u64>, end: Option<u64>) -> Vec<u8> {
35 let start = start.unwrap_or(0) as usize;
36 let end = end.unwrap_or(seq.len() as u64) as usize;
37 let end = end.min(seq.len());
38 if start >= seq.len() {
39 return vec![];
40 }
41 seq[start..end].to_vec()
42}
43
44pub trait SequenceStore: Send + Sync {
46 fn get_sequence(
49 &self,
50 digest: &str,
51 start: Option<u64>,
52 end: Option<u64>,
53 ) -> StoreResult<Option<Vec<u8>>>;
54
55 fn get_metadata(&self, digest: &str) -> StoreResult<Option<SequenceMetadata>>;
57
58 fn get_length(&self, digest: &str) -> StoreResult<Option<u64>>;
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ListResult {
65 pub items: Vec<ListItem>,
66 pub total: usize,
67 pub page: usize,
68 pub page_size: usize,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct ListItem {
74 pub digest: String,
75}
76
77pub trait SeqColStore: Send + Sync {
79 fn get_collection(&self, digest: &str) -> Option<&refget_model::SeqCol>;
81
82 fn list_collections(
84 &self,
85 filters: &[(String, String)],
86 page: usize,
87 page_size: usize,
88 ) -> ListResult;
89
90 fn get_attribute(&self, name: &str, digest: &str) -> Option<serde_json::Value>;
92
93 fn count(&self) -> usize;
95}
96
97pub fn collect_fasta_files(paths: &[PathBuf]) -> StoreResult<Vec<PathBuf>> {
102 let mut files = Vec::new();
103 for path in paths {
104 if path.is_dir() {
105 let entries = std::fs::read_dir(path)?;
106 for entry in entries {
107 let p = entry?.path();
108 if is_fasta_file(&p) {
109 files.push(p);
110 }
111 }
112 } else if path.is_file() {
113 files.push(path.clone());
114 } else {
115 return Err(StoreError::Fasta(format!("Path does not exist: {}", path.display())));
116 }
117 }
118 files.sort();
119 Ok(files)
120}
121
122pub fn is_fasta_file(path: &Path) -> bool {
124 matches!(path.extension().and_then(|e| e.to_str()), Some("fa" | "fasta" | "fna" | "fas"))
125}