htsget_config/
resolver.rs

1//! Resolvers map ids to storage locations.
2
3use crate::config::advanced::allow_guard::QueryAllowed;
4use crate::config::advanced::regex_location::RegexLocation;
5use crate::config::location::{Location, Locations, PrefixOrId};
6use crate::storage;
7use crate::storage::{Backend, ResolvedId};
8use crate::types::{Query, Response, Result};
9use async_trait::async_trait;
10use tracing::instrument;
11
12/// A trait which matches the query id, replacing the match in the substitution text.
13pub trait IdResolver {
14  /// Resolve the id, returning the substituted string if there is a match.
15  fn resolve_id(&self, query: &Query) -> Option<ResolvedId>;
16}
17
18/// A trait for determining the response from `Storage`.
19#[async_trait]
20pub trait ResolveResponse {
21  /// Convert from `File`.
22  async fn from_file(file_storage: &storage::file::File, query: &Query) -> Result<Response>;
23
24  /// Convert from `S3`.
25  #[cfg(feature = "aws")]
26  async fn from_s3(s3_storage: &storage::s3::S3, query: &Query) -> Result<Response>;
27
28  /// Convert from `Url`.
29  #[cfg(feature = "url")]
30  async fn from_url(url_storage: &storage::url::Url, query: &Query) -> Result<Response>;
31}
32
33/// A trait which uses storage to resolve requests into responses.
34#[async_trait]
35pub trait StorageResolver {
36  /// Resolve a request into a response.
37  async fn resolve_request<T: ResolveResponse>(
38    &self,
39    query: &mut Query,
40  ) -> Option<Result<Response>>;
41}
42
43/// A type which holds a resolved storage and an resolved id.
44#[derive(Debug)]
45pub struct ResolvedStorage<T> {
46  resolved_storage: T,
47  resolved_id: ResolvedId,
48}
49
50impl<T> ResolvedStorage<T> {
51  /// Create a new resolved storage.
52  pub fn new(resolved_storage: T, resolved_id: ResolvedId) -> Self {
53    Self {
54      resolved_storage,
55      resolved_id,
56    }
57  }
58
59  /// Get the resolved storage.
60  pub fn resolved_storage(&self) -> &T {
61    &self.resolved_storage
62  }
63
64  /// Get the resolved id.
65  pub fn resolved_id(&self) -> &ResolvedId {
66    &self.resolved_id
67  }
68}
69
70impl IdResolver for Location {
71  #[instrument(level = "trace", skip(self), ret)]
72  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
73    let replace = |regex_location: &RegexLocation| {
74      Some(
75        regex_location
76          .regex()
77          .replace(query.id(), regex_location.substitution_string())
78          .to_string(),
79      )
80    };
81
82    let resolved_id = match self {
83      Location::Simple(location) => match location.prefix_or_id().unwrap_or_default() {
84        PrefixOrId::Prefix(prefix) if query.id().starts_with(&prefix) => {
85          Some(format!("{}/{}", location.to_append(), query.id()))
86        }
87        PrefixOrId::Id(id) => {
88          if query.id() == id.as_str() {
89            Some(location.to_append().to_string())
90          } else {
91            None
92          }
93        }
94        _ => None,
95      },
96      Location::Regex(regex_location) => {
97        if regex_location.regex().is_match(query.id()) {
98          if let Some(guard) = regex_location.guard() {
99            if guard.query_allowed(query) {
100              replace(regex_location)
101            } else {
102              None
103            }
104          } else {
105            replace(regex_location)
106          }
107        } else {
108          None
109        }
110      }
111    };
112
113    resolved_id.map(|id| {
114      let id = id.strip_prefix("/").unwrap_or(&id);
115      ResolvedId::new(id.to_string())
116    })
117  }
118}
119
120#[async_trait]
121impl StorageResolver for Location {
122  #[instrument(level = "trace", skip(self), ret)]
123  async fn resolve_request<T: ResolveResponse>(
124    &self,
125    query: &mut Query,
126  ) -> Option<Result<Response>> {
127    let resolved_id = self.resolve_id(query)?;
128    let _matched_id = query.id().to_string();
129
130    query.set_id(resolved_id.into_inner());
131
132    match self.backend() {
133      Backend::File(file) => Some(T::from_file(file, query).await),
134      #[cfg(feature = "aws")]
135      Backend::S3(s3) => {
136        let s3 = if let Self::Regex(regex_location) = self {
137          if s3.bucket().is_empty() {
138            let first_match = regex_location
139              .regex()
140              .captures(&_matched_id)?
141              .get(1)?
142              .as_str()
143              .to_string();
144            &s3.clone().with_bucket(first_match)
145          } else {
146            s3
147          }
148        } else {
149          s3
150        };
151
152        Some(T::from_s3(s3, query).await)
153      }
154      #[cfg(feature = "url")]
155      Backend::Url(url_storage) => Some(T::from_url(url_storage, query).await),
156    }
157  }
158}
159
160impl IdResolver for &[Location] {
161  #[instrument(level = "trace", skip(self), ret)]
162  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
163    self.iter().find_map(|location| location.resolve_id(query))
164  }
165}
166
167#[async_trait]
168impl StorageResolver for &[Location] {
169  #[instrument(level = "trace", skip(self), ret)]
170  async fn resolve_request<T: ResolveResponse>(
171    &self,
172    query: &mut Query,
173  ) -> Option<Result<Response>> {
174    for location in self.iter() {
175      if let Some(location) = location.resolve_request::<T>(query).await {
176        return Some(location);
177      }
178    }
179
180    None
181  }
182}
183
184impl IdResolver for Locations {
185  #[instrument(level = "trace", skip(self), ret)]
186  fn resolve_id(&self, query: &Query) -> Option<ResolvedId> {
187    self.as_slice().resolve_id(query)
188  }
189}
190
191#[async_trait]
192impl StorageResolver for Locations {
193  #[instrument(level = "trace", skip(self), ret)]
194  async fn resolve_request<T: ResolveResponse>(
195    &self,
196    query: &mut Query,
197  ) -> Option<Result<Response>> {
198    self.as_slice().resolve_request::<T>(query).await
199  }
200}
201
202#[cfg(test)]
203mod tests {
204  use super::*;
205  use crate::config::location::SimpleLocation;
206  use crate::config::tests::{test_config_from_env, test_config_from_file};
207  use crate::storage;
208  use crate::types::Format::Bam;
209  use crate::types::Scheme::Http;
210  use crate::types::Url;
211  use http::uri::Authority;
212  #[cfg(feature = "url")]
213  use reqwest::ClientBuilder;
214  #[cfg(feature = "aws")]
215  use {
216    crate::config::advanced::allow_guard::{AllowGuard, ReferenceNames},
217    crate::types::{Class, Fields, Interval, Tags},
218    std::collections::HashSet,
219  };
220
221  struct TestResolveResponse;
222
223  #[async_trait]
224  impl ResolveResponse for TestResolveResponse {
225    async fn from_file(file: &storage::file::File, query: &Query) -> Result<Response> {
226      Ok(Response::new(
227        Bam,
228        Self::format_url(file.authority().as_ref(), query.id()),
229      ))
230    }
231
232    #[cfg(feature = "aws")]
233    async fn from_s3(s3_storage: &storage::s3::S3, query: &Query) -> Result<Response> {
234      Ok(Response::new(
235        Bam,
236        Self::format_url(s3_storage.bucket(), query.id()),
237      ))
238    }
239
240    #[cfg(feature = "url")]
241    async fn from_url(url: &storage::url::Url, query: &Query) -> Result<Response> {
242      Ok(Response::new(
243        Bam,
244        Self::format_url(url.url().to_string().strip_suffix('/').unwrap(), query.id()),
245      ))
246    }
247  }
248
249  impl TestResolveResponse {
250    fn format_url(prefix: &str, id: &str) -> Vec<Url> {
251      vec![Url::new(format!("{prefix}/{id}"))]
252    }
253  }
254
255  #[tokio::test]
256  async fn resolver_resolve_local_request() {
257    let file = storage::file::File::new(
258      Http,
259      Authority::from_static("127.0.0.1:8080"),
260      "data".to_string(),
261    );
262
263    let regex_location = RegexLocation::new(
264      "id".parse().unwrap(),
265      "$0-test".to_string(),
266      Backend::File(file.clone()),
267      Default::default(),
268    );
269    expected_resolved_request(vec![regex_location.into()], "127.0.0.1:8080/id-test-1").await;
270
271    let location = SimpleLocation::new(
272      Backend::File(file),
273      "".to_string(),
274      Some(PrefixOrId::Prefix("".to_string())),
275    );
276    expected_resolved_request(vec![location.into()], "127.0.0.1:8080/id-1").await;
277  }
278
279  #[cfg(feature = "aws")]
280  #[tokio::test]
281  async fn resolver_resolve_s3_request_tagged() {
282    let s3_storage = storage::s3::S3::new("id2".to_string(), None, false);
283    let regex_location = RegexLocation::new(
284      "(id)-1".parse().unwrap(),
285      "$1-test".to_string(),
286      Backend::S3(s3_storage.clone()),
287      Default::default(),
288    );
289    expected_resolved_request(vec![regex_location.into()], "id2/id-test").await;
290
291    let location = SimpleLocation::new(
292      Backend::S3(s3_storage),
293      "".to_string(),
294      Some(PrefixOrId::Prefix("".to_string())),
295    );
296    expected_resolved_request(vec![location.into()], "id2/id-1").await;
297  }
298
299  #[cfg(feature = "aws")]
300  #[tokio::test]
301  async fn resolver_resolve_s3_request() {
302    let regex_location = RegexLocation::new(
303      "(id)-1".parse().unwrap(),
304      "$1-test".to_string(),
305      Backend::S3(storage::s3::S3::default()),
306      Default::default(),
307    );
308    expected_resolved_request(vec![regex_location.clone().into()], "id/id-test").await;
309
310    let regex_location = RegexLocation::new(
311      "^(id)-(?P<key>.*)$".parse().unwrap(),
312      "$key".to_string(),
313      Backend::S3(storage::s3::S3::default()),
314      Default::default(),
315    );
316    expected_resolved_request(vec![regex_location.clone().into()], "id/1").await;
317
318    let location = SimpleLocation::new(
319      Backend::S3(storage::s3::S3::new("bucket".to_string(), None, false)),
320      "".to_string(),
321      Some(PrefixOrId::Prefix("".to_string())),
322    );
323    expected_resolved_request(vec![location.into()], "bucket/id-1").await;
324  }
325
326  #[cfg(feature = "url")]
327  #[tokio::test]
328  async fn resolver_resolve_url_request() {
329    let client =
330      reqwest_middleware::ClientBuilder::new(ClientBuilder::new().build().unwrap()).build();
331    let url_storage = storage::url::Url::new(
332      "https://example.com/".parse().unwrap(),
333      "https://example.com/".parse().unwrap(),
334      true,
335      vec![],
336      client,
337    );
338
339    let regex_location = RegexLocation::new(
340      "(id)-1".parse().unwrap(),
341      "$1-test".to_string(),
342      Backend::Url(url_storage.clone()),
343      Default::default(),
344    );
345    expected_resolved_request(
346      vec![regex_location.clone().into()],
347      "https://example.com/id-test",
348    )
349    .await;
350
351    let location = SimpleLocation::new(
352      Backend::Url(url_storage),
353      "".to_string(),
354      Some(PrefixOrId::Prefix("".to_string())),
355    );
356    expected_resolved_request(vec![location.into()], "https://example.com/id-1").await;
357  }
358
359  #[test]
360  fn resolver_array_resolve_id() {
361    let resolver = Locations::new(vec![
362      RegexLocation::new(
363        "^(id-1)(.*)$".parse().unwrap(),
364        "$1-test-1".to_string(),
365        Default::default(),
366        Default::default(),
367      )
368      .into(),
369      RegexLocation::new(
370        "^(id-2)(.*)$".parse().unwrap(),
371        "$1-test-2".to_string(),
372        Default::default(),
373        Default::default(),
374      )
375      .into(),
376    ]);
377
378    assert_eq!(
379      resolver
380        .as_slice()
381        .resolve_id(&Query::new_with_default_request("id-1", Bam))
382        .unwrap()
383        .into_inner(),
384      "id-1-test-1"
385    );
386    assert_eq!(
387      resolver
388        .as_slice()
389        .resolve_id(&Query::new_with_default_request("id-2", Bam))
390        .unwrap()
391        .into_inner(),
392      "id-2-test-2"
393    );
394
395    let resolver = Locations::new(vec![
396      SimpleLocation::new(
397        Default::default(),
398        "".to_string(),
399        Some(PrefixOrId::Prefix("id-1".to_string())),
400      )
401      .into(),
402      SimpleLocation::new(
403        Default::default(),
404        "".to_string(),
405        Some(PrefixOrId::Prefix("id-2".to_string())),
406      )
407      .into(),
408    ]);
409    assert_eq!(
410      resolver
411        .as_slice()
412        .resolve_id(&Query::new_with_default_request("id-1", Bam))
413        .unwrap()
414        .into_inner(),
415      "id-1"
416    );
417    assert_eq!(
418      resolver
419        .as_slice()
420        .resolve_id(&Query::new_with_default_request("id-2", Bam))
421        .unwrap()
422        .into_inner(),
423      "id-2"
424    );
425    let resolver = Locations::new(vec![
426      SimpleLocation::new(
427        Default::default(),
428        "append_to".to_string(),
429        Some(PrefixOrId::Prefix("id-1".to_string())),
430      )
431      .into(),
432      SimpleLocation::new(
433        Default::default(),
434        "append_to".to_string(),
435        Some(PrefixOrId::Prefix("id-2".to_string())),
436      )
437      .into(),
438    ]);
439    assert_eq!(
440      resolver
441        .as_slice()
442        .resolve_id(&Query::new_with_default_request("id-1", Bam))
443        .unwrap()
444        .into_inner(),
445      "append_to/id-1"
446    );
447    assert_eq!(
448      resolver
449        .as_slice()
450        .resolve_id(&Query::new_with_default_request("id-2", Bam))
451        .unwrap()
452        .into_inner(),
453      "append_to/id-2"
454    );
455
456    let resolver = Locations::new(vec![
457      SimpleLocation::new(
458        Default::default(),
459        "append_to".to_string(),
460        Some(PrefixOrId::Id("id-1".to_string())),
461      )
462      .into(),
463      SimpleLocation::new(
464        Default::default(),
465        "append_to".to_string(),
466        Some(PrefixOrId::Id("id-2".to_string())),
467      )
468      .into(),
469    ]);
470    assert_eq!(
471      resolver
472        .as_slice()
473        .resolve_id(&Query::new_with_default_request("id-1", Bam))
474        .unwrap()
475        .into_inner(),
476      "append_to"
477    );
478    assert_eq!(
479      resolver
480        .as_slice()
481        .resolve_id(&Query::new_with_default_request("id-2", Bam))
482        .unwrap()
483        .into_inner(),
484      "append_to"
485    );
486  }
487
488  #[test]
489  fn config_resolvers_file() {
490    test_config_from_file(
491      r#"
492        [[locations]]
493        regex = "regex"
494        "#,
495      |config| {
496        let regex = config.locations().first().unwrap().as_regex().unwrap();
497        assert_eq!(regex.regex().as_str(), "regex");
498      },
499    );
500  }
501
502  #[test]
503  fn config_resolvers_guard_file() {
504    test_config_from_file(
505      r#"
506      [[locations]]
507      regex = "regex"
508
509      [locations.guard]
510      allow_formats = ["BAM"]
511      "#,
512      |config| {
513        let regex = config.locations().first().unwrap().as_regex().unwrap();
514        assert_eq!(regex.guard().unwrap().allow_formats(), &vec![Bam]);
515      },
516    );
517  }
518
519  #[test]
520  fn config_resolvers_env() {
521    test_config_from_env(vec![("HTSGET_LOCATIONS", "[{regex=regex}]")], |config| {
522      let regex = config.locations().first().unwrap().as_regex().unwrap();
523      assert_eq!(regex.regex().as_str(), "regex");
524    });
525  }
526
527  #[cfg(feature = "aws")]
528  #[test]
529  fn config_resolvers_all_options_env() {
530    test_config_from_env(
531      vec![(
532        "HTSGET_LOCATIONS",
533        "[{ regex=regex, substitution_string=substitution_string, \
534        backend={ kind=S3, bucket=bucket }, \
535        guard={ allow_reference_names=[chr1], allow_fields=[QNAME], allow_tags=[RG], \
536        allow_formats=[BAM], allow_classes=[body], allow_interval={ start=100, \
537        end=1000 } } }]",
538      )],
539      |config| {
540        let allow_guard = AllowGuard::new(
541          ReferenceNames::List(HashSet::from_iter(vec!["chr1".to_string()])),
542          Fields::List(HashSet::from_iter(vec!["QNAME".to_string()])),
543          Tags::List(HashSet::from_iter(vec!["RG".to_string()])),
544          vec![Bam],
545          vec![Class::Body],
546          Interval::new(Some(100), Some(1000)),
547        );
548        let resolver = config.locations().first().unwrap();
549        let expected_storage = storage::s3::S3::new("bucket".to_string(), None, false);
550        let Backend::S3(storage) = resolver.backend() else {
551          panic!();
552        };
553
554        assert_eq!(storage.bucket(), expected_storage.bucket());
555        assert_eq!(storage.endpoint(), expected_storage.endpoint());
556        assert_eq!(storage.path_style(), expected_storage.path_style());
557
558        let regex = config.locations().first().unwrap().as_regex().unwrap();
559        assert_eq!(regex.regex().to_string(), "regex");
560        assert_eq!(regex.substitution_string(), "substitution_string");
561        assert_eq!(regex.guard().unwrap(), &allow_guard);
562      },
563    );
564  }
565
566  async fn expected_resolved_request(resolver: Vec<Location>, expected_id: &str) {
567    assert_eq!(
568      Locations::new(resolver)
569        .resolve_request::<TestResolveResponse>(&mut Query::new_with_default_request("id-1", Bam))
570        .await
571        .unwrap()
572        .unwrap(),
573      Response::new(Bam, vec![Url::new(expected_id)])
574    );
575  }
576}