sigalign_core/reference/mod.rs
1/*!
2A database for multiple targeted sequences.
3
4## What is `Reference`?
5- `Reference` is a target of alignment, containing multiple sequences.
6- It is primarily used by the `Aligner` to perform alignments.
7
8## Features
9- `Reference` is designed to be **agnostic** to the storage and retrieval methods for sequences.
10 - Sequences could be stored in memory, files, or remote physical locations accessible over a network.
11- During alignment, `Reference` remains **immutable**.
12 - `Aligner` manages the `SequenceBuffer` defined in `SequenceStorage` to store temporary sequences.
13 - `Reference` only defines the how to access the target sequences.
14- The **target sequences' range can be adjusted** after building the `Reference`, unlike conventional "Reference" in bioinformatics.
15- Basically, `Reference` is simply built from `SequenceStorage` and `PatternIndex::Option` in `sigalign-core`.
16
17## Internal Traits
18- [SequenceStorage]: Fetches a target sequence based on a given target index.
19- [PatternIndex]: Accepts pattern bytes and returns the indices of the targets exactly matching the pattern. This is necessary for the SigAlign algorithm to exactly locate all patterns in the target sequences.
20*/
21
22// Internal components
23mod pattern_index;
24mod sequence_storage;
25// Implementations
26mod pattern_locate; // Implements the `BufferedPatternLocater` trait.
27mod debug;
28// Extensions for additional features for `Reference`.
29pub mod extensions;
30
31pub use pattern_index::PatternIndex;
32pub use sequence_storage::SequenceStorage;
33pub use crate::core::{PatternLocation, SequenceBuffer};
34
35/// A database for multiple target sequences.
36#[derive(Debug)]
37pub struct Reference<I, S> where
38 I: PatternIndex,
39 S: SequenceStorage,
40{
41 target_boundaries: Vec<u32>,
42 pattern_index: I,
43 sequence_storage: S,
44}
45
46impl<I, S> Reference<I, S> where
47 I: PatternIndex,
48 S: SequenceStorage,
49{
50 pub fn new(
51 sequence_storage: S,
52 pattern_index_option: I::Option,
53 ) -> Result<Self, I::BuildError> {
54 let (concatenated_sequence, target_boundaries) = sequence_storage.get_concatenated_sequence_with_boundaries_of_targets();
55 let pattern_index = I::new(concatenated_sequence, pattern_index_option)?;
56
57 Ok(Self {
58 target_boundaries,
59 pattern_index,
60 sequence_storage,
61 })
62 }
63 pub fn get_sequence_storage(&self) -> &S {
64 &self.sequence_storage
65 }
66 pub fn get_pattern_index(&self) -> &I {
67 &self.pattern_index
68 }
69}