htsget_config/
resolver.rs

1//! Resolvers map ids to storage locations.
2
3use crate::config::advanced::allow_guard::QueryAllowed;
4use crate::config::advanced::regex_location::RegexLocation;
5use crate::config::location::{LocationEither, Locations};
6use crate::storage;
7use crate::storage::{Backend, ResolvedId};
8use crate::types::{Query, Response, Result};
9use async_trait::async_trait;
10use tracing::instrument;
11
12/// A trait which matches the query id, replacing the match in the substitution text.
13pub trait IdResolver {
14  /// Resolve the id, returning the substituted string if there is a match.
15  fn resolve_id(&self, query: &Query) -> Option<ResolvedId>;
16}
17
18/// A trait for determining the response from `Storage`.
19#[async_trait]
20pub trait ResolveResponse {
21  /// Convert from `File`.
22  async fn from_file(file_storage: &storage::file::File, query: &Query) -> Result<Response>;
23
24  /// Convert from `S3`.
25  #[cfg(feature = "aws")]
26  async fn from_s3(s3_storage: &storage::s3::S3, query: &Query) -> Result<Response>;
27
28  /// Convert from `Url`.
29  #[cfg(feature = "url")]
30  async fn from_url(url_storage: &storage::url::Url, query: &Query) -> Result<Response>;
31}
32
33/// A trait which uses storage to resolve requests into responses.
34#[async_trait]
35pub trait StorageResolver {
36  /// Resolve a request into a response.
37  async fn resolve_request<T: ResolveResponse>(
38    &self,
39    query: &mut Query,
40  ) -> Option<Result<Response>>;
41}
42
43/// A type which holds a resolved storage and an resolved id.
44#[derive(Debug)]
45pub struct ResolvedStorage<T> {
46  resolved_storage: T,
47  resolved_id: ResolvedId,
48}
49
50impl<T> ResolvedStorage<T> {
51  /// Create a new resolved storage.
52  pub fn new(resolved_storage: T, resolved_id: ResolvedId) -> Self {
53    Self {
54      resolved_storage,
55      resolved_id,
56    }
57  }
58
59  /// Get the resolved storage.
60  pub fn resolved_storage(&self) -> &T {
61    &self.resolved_storage
62  }
63
64  /// Get the resolved id.
65  pub fn resolved_id(&self) -> &ResolvedId {
66    &self.resolved_id
67  }
68}
69
70impl IdResolver for LocationEither {
71  #[instrument(level = "trace", skip(self), ret)]
72  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
73    let replace = |regex_location: &RegexLocation| {
74      Some(ResolvedId::new(
75        regex_location
76          .regex()
77          .replace(query.id(), regex_location.substitution_string())
78          .to_string(),
79      ))
80    };
81
82    match self {
83      LocationEither::Simple(location) => {
84        if query.id().starts_with(location.prefix()) {
85          return Some(ResolvedId::new(query.id().to_string()));
86        }
87      }
88      LocationEither::Regex(regex_location) => {
89        if regex_location.regex().is_match(query.id()) {
90          if let Some(guard) = regex_location.guard() {
91            if guard.query_allowed(query) {
92              return replace(regex_location);
93            }
94          }
95
96          return replace(regex_location);
97        }
98      }
99    }
100
101    None
102  }
103}
104
105#[async_trait]
106impl StorageResolver for LocationEither {
107  #[instrument(level = "trace", skip(self), ret)]
108  async fn resolve_request<T: ResolveResponse>(
109    &self,
110    query: &mut Query,
111  ) -> Option<Result<Response>> {
112    let resolved_id = self.resolve_id(query)?;
113    let _matched_id = query.id().to_string();
114
115    query.set_id(resolved_id.into_inner());
116
117    match self.backend() {
118      Backend::File(file) => Some(T::from_file(file, query).await),
119      #[cfg(feature = "aws")]
120      Backend::S3(s3) => {
121        let s3 = if let Self::Regex(regex_location) = self {
122          if s3.bucket().is_empty() {
123            let first_match = regex_location
124              .regex()
125              .captures(&_matched_id)?
126              .get(1)?
127              .as_str()
128              .to_string();
129            &s3.clone().with_bucket(first_match)
130          } else {
131            s3
132          }
133        } else {
134          s3
135        };
136
137        Some(T::from_s3(s3, query).await)
138      }
139      #[cfg(feature = "url")]
140      Backend::Url(url_storage) => Some(T::from_url(url_storage, query).await),
141    }
142  }
143}
144
145impl IdResolver for &[LocationEither] {
146  #[instrument(level = "trace", skip(self), ret)]
147  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
148    self.iter().find_map(|location| location.resolve_id(query))
149  }
150}
151
152#[async_trait]
153impl StorageResolver for &[LocationEither] {
154  #[instrument(level = "trace", skip(self), ret)]
155  async fn resolve_request<T: ResolveResponse>(
156    &self,
157    query: &mut Query,
158  ) -> Option<Result<Response>> {
159    for location in self.iter() {
160      if let Some(location) = location.resolve_request::<T>(query).await {
161        return Some(location);
162      }
163    }
164
165    None
166  }
167}
168
169impl IdResolver for Locations {
170  #[instrument(level = "trace", skip(self), ret)]
171  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
172    self.as_slice().resolve_id(query)
173  }
174}
175
176#[async_trait]
177impl StorageResolver for Locations {
178  #[instrument(level = "trace", skip(self), ret)]
179  async fn resolve_request<T: ResolveResponse>(
180    &self,
181    query: &mut Query,
182  ) -> Option<Result<Response>> {
183    self.as_slice().resolve_request::<T>(query).await
184  }
185}
186
187#[cfg(test)]
188mod tests {
189  use super::*;
190  use crate::config::location::Location;
191  use crate::config::tests::{test_config_from_env, test_config_from_file};
192  use crate::storage;
193  use crate::types::Format::Bam;
194  use crate::types::Scheme::Http;
195  use crate::types::Url;
196  use http::uri::Authority;
197  #[cfg(feature = "url")]
198  use reqwest::ClientBuilder;
199  #[cfg(feature = "aws")]
200  use {
201    crate::config::advanced::allow_guard::{AllowGuard, ReferenceNames},
202    crate::types::{Class, Fields, Interval, Tags},
203    std::collections::HashSet,
204  };
205
206  struct TestResolveResponse;
207
208  #[async_trait]
209  impl ResolveResponse for TestResolveResponse {
210    async fn from_file(file: &storage::file::File, query: &Query) -> Result<Response> {
211      Ok(Response::new(
212        Bam,
213        Self::format_url(file.authority().as_ref(), query.id()),
214      ))
215    }
216
217    #[cfg(feature = "aws")]
218    async fn from_s3(s3_storage: &storage::s3::S3, query: &Query) -> Result<Response> {
219      Ok(Response::new(
220        Bam,
221        Self::format_url(s3_storage.bucket(), query.id()),
222      ))
223    }
224
225    #[cfg(feature = "url")]
226    async fn from_url(url: &storage::url::Url, query: &Query) -> Result<Response> {
227      Ok(Response::new(
228        Bam,
229        Self::format_url(url.url().to_string().strip_suffix('/').unwrap(), query.id()),
230      ))
231    }
232  }
233
234  impl TestResolveResponse {
235    fn format_url(prefix: &str, id: &str) -> Vec<Url> {
236      vec![Url::new(format!("{}/{}", prefix, id))]
237    }
238  }
239
240  #[tokio::test]
241  async fn resolver_resolve_local_request() {
242    let file = storage::file::File::new(
243      Http,
244      Authority::from_static("127.0.0.1:8080"),
245      "data".to_string(),
246    );
247
248    let regex_location = RegexLocation::new(
249      "id".parse().unwrap(),
250      "$0-test".to_string(),
251      Backend::File(file.clone()),
252      Default::default(),
253    );
254    expected_resolved_request(vec![regex_location.into()], "127.0.0.1:8080/id-test-1").await;
255
256    let location = Location::new(Backend::File(file), "".to_string());
257    expected_resolved_request(vec![location.into()], "127.0.0.1:8080/id-1").await;
258  }
259
260  #[cfg(feature = "aws")]
261  #[tokio::test]
262  async fn resolver_resolve_s3_request_tagged() {
263    let s3_storage = storage::s3::S3::new("id2".to_string(), None, false);
264    let regex_location = RegexLocation::new(
265      "(id)-1".parse().unwrap(),
266      "$1-test".to_string(),
267      Backend::S3(s3_storage.clone()),
268      Default::default(),
269    );
270    expected_resolved_request(vec![regex_location.into()], "id2/id-test").await;
271
272    let location = Location::new(Backend::S3(s3_storage), "".to_string());
273    expected_resolved_request(vec![location.into()], "id2/id-1").await;
274  }
275
276  #[cfg(feature = "aws")]
277  #[tokio::test]
278  async fn resolver_resolve_s3_request() {
279    let regex_location = RegexLocation::new(
280      "(id)-1".parse().unwrap(),
281      "$1-test".to_string(),
282      Backend::S3(storage::s3::S3::default()),
283      Default::default(),
284    );
285    expected_resolved_request(vec![regex_location.clone().into()], "id/id-test").await;
286
287    let regex_location = RegexLocation::new(
288      "^(id)-(?P<key>.*)$".parse().unwrap(),
289      "$key".to_string(),
290      Backend::S3(storage::s3::S3::default()),
291      Default::default(),
292    );
293    expected_resolved_request(vec![regex_location.clone().into()], "id/1").await;
294
295    let location = Location::new(
296      Backend::S3(storage::s3::S3::new("bucket".to_string(), None, false)),
297      "".to_string(),
298    );
299    expected_resolved_request(vec![location.into()], "bucket/id-1").await;
300  }
301
302  #[cfg(feature = "url")]
303  #[tokio::test]
304  async fn resolver_resolve_url_request() {
305    let client = ClientBuilder::new().build().unwrap();
306    let url_storage = storage::url::Url::new(
307      "https://example.com/".parse().unwrap(),
308      "https://example.com/".parse().unwrap(),
309      true,
310      vec![],
311      client,
312    );
313
314    let regex_location = RegexLocation::new(
315      "(id)-1".parse().unwrap(),
316      "$1-test".to_string(),
317      Backend::Url(url_storage.clone()),
318      Default::default(),
319    );
320    expected_resolved_request(
321      vec![regex_location.clone().into()],
322      "https://example.com/id-test",
323    )
324    .await;
325
326    let location = Location::new(Backend::Url(url_storage), "".to_string());
327    expected_resolved_request(vec![location.into()], "https://example.com/id-1").await;
328  }
329
330  #[test]
331  fn resolver_array_resolve_id() {
332    let resolver = Locations::new(vec![
333      RegexLocation::new(
334        "^(id-1)(.*)$".parse().unwrap(),
335        "$1-test-1".to_string(),
336        Default::default(),
337        Default::default(),
338      )
339      .into(),
340      RegexLocation::new(
341        "^(id-2)(.*)$".parse().unwrap(),
342        "$1-test-2".to_string(),
343        Default::default(),
344        Default::default(),
345      )
346      .into(),
347    ]);
348
349    assert_eq!(
350      resolver
351        .as_slice()
352        .resolve_id(&Query::new_with_default_request("id-1", Bam))
353        .unwrap()
354        .into_inner(),
355      "id-1-test-1"
356    );
357    assert_eq!(
358      resolver
359        .as_slice()
360        .resolve_id(&Query::new_with_default_request("id-2", Bam))
361        .unwrap()
362        .into_inner(),
363      "id-2-test-2"
364    );
365
366    let resolver = Locations::new(vec![
367      Location::new(Default::default(), "id-1".to_string()).into(),
368      Location::new(Default::default(), "id-2".to_string()).into(),
369    ]);
370
371    assert_eq!(
372      resolver
373        .as_slice()
374        .resolve_id(&Query::new_with_default_request("id-1", Bam))
375        .unwrap()
376        .into_inner(),
377      "id-1"
378    );
379    assert_eq!(
380      resolver
381        .as_slice()
382        .resolve_id(&Query::new_with_default_request("id-2", Bam))
383        .unwrap()
384        .into_inner(),
385      "id-2"
386    );
387  }
388
389  #[test]
390  fn config_resolvers_file() {
391    test_config_from_file(
392      r#"
393        [[locations]]
394        regex = "regex"
395        "#,
396      |config| {
397        let regex = config.locations().first().unwrap().as_regex().unwrap();
398        assert_eq!(regex.regex().as_str(), "regex");
399      },
400    );
401  }
402
403  #[test]
404  fn config_resolvers_guard_file() {
405    test_config_from_file(
406      r#"
407      [[locations]]
408      regex = "regex"
409
410      [locations.guard]
411      allow_formats = ["BAM"]
412      "#,
413      |config| {
414        let regex = config.locations().first().unwrap().as_regex().unwrap();
415        assert_eq!(regex.guard().unwrap().allow_formats(), &vec![Bam]);
416      },
417    );
418  }
419
420  #[test]
421  fn config_resolvers_env() {
422    test_config_from_env(vec![("HTSGET_LOCATIONS", "[{regex=regex}]")], |config| {
423      let regex = config.locations().first().unwrap().as_regex().unwrap();
424      assert_eq!(regex.regex().as_str(), "regex");
425    });
426  }
427
428  #[cfg(feature = "aws")]
429  #[test]
430  fn config_resolvers_all_options_env() {
431    test_config_from_env(
432      vec![(
433        "HTSGET_LOCATIONS",
434        "[{ regex=regex, substitution_string=substitution_string, \
435        backend={ kind=S3, bucket=bucket }, \
436        guard={ allow_reference_names=[chr1], allow_fields=[QNAME], allow_tags=[RG], \
437        allow_formats=[BAM], allow_classes=[body], allow_interval={ start=100, \
438        end=1000 } } }]",
439      )],
440      |config| {
441        let allow_guard = AllowGuard::new(
442          ReferenceNames::List(HashSet::from_iter(vec!["chr1".to_string()])),
443          Fields::List(HashSet::from_iter(vec!["QNAME".to_string()])),
444          Tags::List(HashSet::from_iter(vec!["RG".to_string()])),
445          vec![Bam],
446          vec![Class::Body],
447          Interval::new(Some(100), Some(1000)),
448        );
449        let resolver = config.locations().first().unwrap();
450        let expected_storage = storage::s3::S3::new("bucket".to_string(), None, false);
451        let Backend::S3(storage) = resolver.backend() else {
452          panic!();
453        };
454
455        assert_eq!(storage.bucket(), expected_storage.bucket());
456        assert_eq!(storage.endpoint(), expected_storage.endpoint());
457        assert_eq!(storage.path_style(), expected_storage.path_style());
458
459        let regex = config.locations().first().unwrap().as_regex().unwrap();
460        assert_eq!(regex.regex().to_string(), "regex");
461        assert_eq!(regex.substitution_string(), "substitution_string");
462        assert_eq!(regex.guard().unwrap(), &allow_guard);
463      },
464    );
465  }
466
467  async fn expected_resolved_request(resolver: Vec<LocationEither>, expected_id: &str) {
468    assert_eq!(
469      Locations::new(resolver)
470        .resolve_request::<TestResolveResponse>(&mut Query::new_with_default_request("id-1", Bam))
471        .await
472        .unwrap()
473        .unwrap(),
474      Response::new(Bam, vec![Url::new(expected_id)])
475    );
476  }
477}