pub mod fasta;
mod memory;
mod mmap;
mod seqcol_store;
pub use fasta::{DigestCache, FastaSequenceStore, FastaSequenceSummary, SeqColCache, SidecarCache};
pub use memory::InMemorySequenceStore;
pub use mmap::MmapSequenceStore;
pub use seqcol_store::InMemorySeqColStore;
use std::path::{Path, PathBuf};
use refget_model::SequenceMetadata;
use serde::{Deserialize, Serialize};
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("FASTA index error: {0}")]
Fasta(String),
#[error("Sequence not found: {0}")]
NotFound(String),
}
pub type StoreResult<T> = Result<T, StoreError>;
pub(crate) fn extract_subsequence(seq: &[u8], start: Option<u64>, end: Option<u64>) -> Vec<u8> {
let start = start.unwrap_or(0) as usize;
let end = end.unwrap_or(seq.len() as u64) as usize;
let end = end.min(seq.len());
if start >= seq.len() {
return vec![];
}
seq[start..end].to_vec()
}
pub trait SequenceStore: Send + Sync {
fn get_sequence(
&self,
digest: &str,
start: Option<u64>,
end: Option<u64>,
) -> StoreResult<Option<Vec<u8>>>;
fn get_metadata(&self, digest: &str) -> StoreResult<Option<SequenceMetadata>>;
fn get_length(&self, digest: &str) -> StoreResult<Option<u64>>;
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ListResult {
pub items: Vec<ListItem>,
pub total: usize,
pub page: usize,
pub page_size: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ListItem {
pub digest: String,
}
pub trait SeqColStore: Send + Sync {
fn get_collection(&self, digest: &str) -> Option<&refget_model::SeqCol>;
fn list_collections(
&self,
filters: &[(String, String)],
page: usize,
page_size: usize,
) -> ListResult;
fn get_attribute(&self, name: &str, digest: &str) -> Option<serde_json::Value>;
fn count(&self) -> usize;
}
pub fn collect_fasta_files(paths: &[PathBuf]) -> StoreResult<Vec<PathBuf>> {
let mut files = Vec::new();
for path in paths {
if path.is_dir() {
let entries = std::fs::read_dir(path)?;
for entry in entries {
let p = entry?.path();
if is_fasta_file(&p) {
files.push(p);
}
}
} else if path.is_file() {
files.push(path.clone());
} else {
return Err(StoreError::Fasta(format!("Path does not exist: {}", path.display())));
}
}
files.sort();
Ok(files)
}
pub fn is_fasta_file(path: &Path) -> bool {
matches!(path.extension().and_then(|e| e.to_str()), Some("fa" | "fasta" | "fna" | "fas"))
}