ref_solver/refget/mod.rs
1//! Refget integration for enriching unknown contigs with metadata from a refget server.
2//!
3//! After matching, contigs that don't match any known reference can be queried against
4//! a refget server (e.g., EBI's ENA CRAM server) to retrieve aliases and other metadata.
5//! This helps users identify what unknown contigs actually are.
6
7pub mod enrichment;
8
9use std::time::Duration;
10
11/// Default refget server URL (EBI's ENA CRAM refget endpoint).
12pub const DEFAULT_REFGET_SERVER: &str = "https://www.ebi.ac.uk/ena/cram";
13
14/// Default per-request timeout for refget lookups.
15const DEFAULT_TIMEOUT_SECS: u64 = 5;
16
17/// Default maximum number of concurrent refget requests.
18const DEFAULT_MAX_CONCURRENT: usize = 5;
19
20/// Configuration for refget server lookups.
21#[derive(Debug, Clone)]
22#[non_exhaustive]
23pub struct RefgetConfig {
24 /// Base URL of the refget server.
25 pub server_url: String,
26 /// Timeout for each individual HTTP request.
27 pub timeout: Duration,
28 /// Maximum number of concurrent requests to the refget server.
29 pub max_concurrent: usize,
30}
31
32impl RefgetConfig {
33 /// Create a new `RefgetConfig` with the given server URL and default settings.
34 #[must_use]
35 pub fn new(server_url: &str) -> Self {
36 Self {
37 server_url: server_url.to_string(),
38 timeout: Duration::from_secs(DEFAULT_TIMEOUT_SECS),
39 max_concurrent: DEFAULT_MAX_CONCURRENT,
40 }
41 }
42}
43
44impl Default for RefgetConfig {
45 fn default() -> Self {
46 Self::new(DEFAULT_REFGET_SERVER)
47 }
48}
49
50/// Result of looking up a single contig in refget.
51#[derive(Debug, Clone, serde::Serialize)]
52#[serde(tag = "status", rename_all = "snake_case")]
53#[non_exhaustive]
54pub enum RefgetLookupResult {
55 /// Metadata was found for this contig.
56 Found {
57 /// Known aliases for this sequence.
58 aliases: Vec<RefgetAlias>,
59 /// GA4GH sha512t24u digest.
60 sha512t24u: String,
61 /// Whether the sequence is circular (e.g., mitochondrial).
62 circular: bool,
63 },
64 /// No metadata found for this digest.
65 NotFound,
66 /// An error occurred during lookup.
67 Error {
68 /// Description of the error.
69 message: String,
70 },
71}
72
73/// A naming-authority alias for a sequence, as returned by refget.
74#[derive(Debug, Clone, serde::Serialize)]
75#[non_exhaustive]
76pub struct RefgetAlias {
77 /// The naming authority (e.g., "insdc", "ensembl").
78 pub naming_authority: String,
79 /// The identifier value within that authority.
80 pub value: String,
81}
82
83/// A contig enriched with optional refget metadata.
84#[derive(Debug, Clone, serde::Serialize)]
85#[non_exhaustive]
86pub struct EnrichedContig {
87 /// Name of the contig.
88 pub name: String,
89 /// MD5 digest used for the lookup, if available.
90 pub md5: Option<String>,
91 /// Result of the refget lookup.
92 pub refget_metadata: RefgetLookupResult,
93}