Skip to main content

ref_solver/refget/
mod.rs

1//! Refget integration for enriching unknown contigs with metadata from a refget server.
2//!
3//! After matching, contigs that don't match any known reference can be queried against
4//! a refget server (e.g., EBI's ENA CRAM server) to retrieve aliases and other metadata.
5//! This helps users identify what unknown contigs actually are.
6
7pub mod enrichment;
8
9use std::time::Duration;
10
11/// Default refget server URL (EBI's ENA CRAM refget endpoint).
12pub const DEFAULT_REFGET_SERVER: &str = "https://www.ebi.ac.uk/ena/cram";
13
14/// Default per-request timeout for refget lookups.
15const DEFAULT_TIMEOUT_SECS: u64 = 5;
16
17/// Default maximum number of concurrent refget requests.
18const DEFAULT_MAX_CONCURRENT: usize = 5;
19
20/// Configuration for refget server lookups.
21#[derive(Debug, Clone)]
22#[non_exhaustive]
23pub struct RefgetConfig {
24    /// Base URL of the refget server.
25    pub server_url: String,
26    /// Timeout for each individual HTTP request.
27    pub timeout: Duration,
28    /// Maximum number of concurrent requests to the refget server.
29    pub max_concurrent: usize,
30}
31
32impl RefgetConfig {
33    /// Create a new `RefgetConfig` with the given server URL and default settings.
34    #[must_use]
35    pub fn new(server_url: &str) -> Self {
36        Self {
37            server_url: server_url.to_string(),
38            timeout: Duration::from_secs(DEFAULT_TIMEOUT_SECS),
39            max_concurrent: DEFAULT_MAX_CONCURRENT,
40        }
41    }
42}
43
44impl Default for RefgetConfig {
45    fn default() -> Self {
46        Self::new(DEFAULT_REFGET_SERVER)
47    }
48}
49
50/// Result of looking up a single contig in refget.
51#[derive(Debug, Clone, serde::Serialize)]
52#[serde(tag = "status", rename_all = "snake_case")]
53#[non_exhaustive]
54pub enum RefgetLookupResult {
55    /// Metadata was found for this contig.
56    Found {
57        /// Known aliases for this sequence.
58        aliases: Vec<RefgetAlias>,
59        /// GA4GH sha512t24u digest.
60        sha512t24u: String,
61        /// Whether the sequence is circular (e.g., mitochondrial).
62        circular: bool,
63    },
64    /// No metadata found for this digest.
65    NotFound,
66    /// An error occurred during lookup.
67    Error {
68        /// Description of the error.
69        message: String,
70    },
71}
72
73/// A naming-authority alias for a sequence, as returned by refget.
74#[derive(Debug, Clone, serde::Serialize)]
75#[non_exhaustive]
76pub struct RefgetAlias {
77    /// The naming authority (e.g., "insdc", "ensembl").
78    pub naming_authority: String,
79    /// The identifier value within that authority.
80    pub value: String,
81}
82
83/// A contig enriched with optional refget metadata.
84#[derive(Debug, Clone, serde::Serialize)]
85#[non_exhaustive]
86pub struct EnrichedContig {
87    /// Name of the contig.
88    pub name: String,
89    /// MD5 digest used for the lookup, if available.
90    pub md5: Option<String>,
91    /// Result of the refget lookup.
92    pub refget_metadata: RefgetLookupResult,
93}