sigalign/
lib.rs

1/*!
2# SigAlign
3
4SigAlign is a library for gap-affine sequence alignment tasks guided by explicit similarity cutoffs.
5
6## Quick Start
7```rust
8use sigalign::{
9    Aligner,
10    algorithms::Local,
11    ReferenceBuilder,
12};
13
14// (1) Build `Reference`
15let fasta =
16br#">target_1
17ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA
18>target_2
19TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC"#;
20let reference = ReferenceBuilder::new()
21    .set_uppercase(true) // Ignore case
22    .ignore_base(b'N') // 'N' is never matched
23    .add_fasta(&fasta[..]).unwrap() // Add sequences from FASTA
24    .add_target(
25        "target_3",
26        b"AAAAAAAAAAA",
27    ) // Add sequence manually
28    .build().unwrap();
29
30// (2) Initialize `Aligner`
31let algorithm = Local::new(
32    4,   // Mismatch penalty
33    6,   // Gap-open penalty
34    2,   // Gap-extend penalty
35    50,  // Minimum length
36    0.2, // Maximum penalty per length
37).unwrap();
38let mut aligner = Aligner::new(algorithm);
39
40// (3) Align query to reference
41let query = b"CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA";
42let result = aligner.align(query, &reference);
43println!("{:#?}", result);
44```
45
46## Core Structures
47- `Reference`: A **database** for multiple target sequences.
48   - **Generated** from `ReferenceBuilder`.
49   - **Purpose**: Combining multiple sequences into one struct, indexing them to facilitate alignment processes.
50   - Can be **immutable** while alignment.
51- `Aligner`: An **executor** for alignment tasks.
52   - **Generated** from `Algorithm`.
53   - **Purpose**: Managing the workspace for alignment tasks.
54   - Need to be **mutable** while alignment.
55
56## Parameters: Definition of alignment results
57- Penalties
58    - Mismatch penalty (`u32`)
59    - Gap-open penalty (`u32`)
60    - Gap-extend penalty (`u32`)
61- Cutoffs
62    - Minimum alignment length (MinL) (`u32`)
63    - Maximum penalty per alignment length (MaxP) (`f32`)
64
65## Inputs and Outputs
66- Inputs
67    - Query: `&[u8]` (byte array)
68    - Reference: ref(&) to `Reference`
69- Outputs
70    - `QueryAlignment`: A vector of `TargetAlignment` for each target sequence.
71        - `TargetAlignment`: A vector of `Alignment` for each alignment.
72            - Index: Index of the target sequence in Reference.
73            - Alignment: Alignment results.
74                - Penalty score
75                - Length of alignment
76                - Alignment position
77                - Operations (Match, Substitution, Insertion, Deletion)
78*/
79
80pub mod results;
81
82mod reference;
83pub use reference::{
84    Reference,
85    ReferenceBuilder,
86    ReferenceBuildError,
87    ReferenceLoadError,
88};
89
90mod aligner;
91pub use aligner::{
92    Aligner,
93    algorithms,
94};
95
96pub mod utils;
97
98
99#[cfg(test)]
100mod doc_tests {
101#[test]
102fn test_readme() {
103
104
105use crate::{
106    Aligner,
107    algorithms::Local,
108    ReferenceBuilder,
109};
110
111// (1) Build `Reference`
112let fasta =
113br#">target_1
114ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA
115>target_2
116TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC"#;
117let reference = ReferenceBuilder::new()
118    .set_uppercase(true) // Ignore case
119    .ignore_base(b'N') // 'N' is never matched
120    .add_fasta(&fasta[..]).unwrap() // Add sequences from FASTA
121    .add_target(
122        "target_3",
123        b"AAAAAAAAAAA",
124    ) // Add sequence manually
125    .build().unwrap();
126
127// (2) Initialize `Aligner`
128let algorithm = Local::new(
129    4,   // Mismatch penalty
130    6,   // Gap-open penalty
131    2,   // Gap-extend penalty
132    50,  // Minimum length
133    0.2, // Maximum penalty per length
134).unwrap();
135let mut aligner = Aligner::new(algorithm);
136
137// (3) Align query to reference
138let query = b"CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA";
139let result = aligner.align(query, &reference);
140println!("{:#?}", result);
141
142
143}
144}