sigalign/lib.rs
1/*!
2# SigAlign
3
4SigAlign is a library for gap-affine sequence alignment tasks guided by explicit similarity cutoffs.
5
6## Quick Start
7```rust
8use sigalign::{
9 Aligner,
10 algorithms::Local,
11 ReferenceBuilder,
12};
13
14// (1) Build `Reference`
15let fasta =
16br#">target_1
17ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA
18>target_2
19TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC"#;
20let reference = ReferenceBuilder::new()
21 .set_uppercase(true) // Ignore case
22 .ignore_base(b'N') // 'N' is never matched
23 .add_fasta(&fasta[..]).unwrap() // Add sequences from FASTA
24 .add_target(
25 "target_3",
26 b"AAAAAAAAAAA",
27 ) // Add sequence manually
28 .build().unwrap();
29
30// (2) Initialize `Aligner`
31let algorithm = Local::new(
32 4, // Mismatch penalty
33 6, // Gap-open penalty
34 2, // Gap-extend penalty
35 50, // Minimum length
36 0.2, // Maximum penalty per length
37).unwrap();
38let mut aligner = Aligner::new(algorithm);
39
40// (3) Align query to reference
41let query = b"CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA";
42let result = aligner.align(query, &reference);
43println!("{:#?}", result);
44```
45
46## Core Structures
47- `Reference`: A **database** for multiple target sequences.
48 - **Generated** from `ReferenceBuilder`.
49 - **Purpose**: Combining multiple sequences into one struct, indexing them to facilitate alignment processes.
50 - Can be **immutable** while alignment.
51- `Aligner`: An **executor** for alignment tasks.
52 - **Generated** from `Algorithm`.
53 - **Purpose**: Managing the workspace for alignment tasks.
54 - Need to be **mutable** while alignment.
55
56## Parameters: Definition of alignment results
57- Penalties
58 - Mismatch penalty (`u32`)
59 - Gap-open penalty (`u32`)
60 - Gap-extend penalty (`u32`)
61- Cutoffs
62 - Minimum alignment length (MinL) (`u32`)
63 - Maximum penalty per alignment length (MaxP) (`f32`)
64
65## Inputs and Outputs
66- Inputs
67 - Query: `&[u8]` (byte array)
68 - Reference: ref(&) to `Reference`
69- Outputs
70 - `QueryAlignment`: A vector of `TargetAlignment` for each target sequence.
71 - `TargetAlignment`: A vector of `Alignment` for each alignment.
72 - Index: Index of the target sequence in Reference.
73 - Alignment: Alignment results.
74 - Penalty score
75 - Length of alignment
76 - Alignment position
77 - Operations (Match, Substitution, Insertion, Deletion)
78*/
79
80pub mod results;
81
82mod reference;
83pub use reference::{
84 Reference,
85 ReferenceBuilder,
86 ReferenceBuildError,
87 ReferenceLoadError,
88};
89
90mod aligner;
91pub use aligner::{
92 Aligner,
93 algorithms,
94};
95
96pub mod utils;
97
98
99#[cfg(test)]
100mod doc_tests {
101#[test]
102fn test_readme() {
103
104
105use crate::{
106 Aligner,
107 algorithms::Local,
108 ReferenceBuilder,
109};
110
111// (1) Build `Reference`
112let fasta =
113br#">target_1
114ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA
115>target_2
116TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC"#;
117let reference = ReferenceBuilder::new()
118 .set_uppercase(true) // Ignore case
119 .ignore_base(b'N') // 'N' is never matched
120 .add_fasta(&fasta[..]).unwrap() // Add sequences from FASTA
121 .add_target(
122 "target_3",
123 b"AAAAAAAAAAA",
124 ) // Add sequence manually
125 .build().unwrap();
126
127// (2) Initialize `Aligner`
128let algorithm = Local::new(
129 4, // Mismatch penalty
130 6, // Gap-open penalty
131 2, // Gap-extend penalty
132 50, // Minimum length
133 0.2, // Maximum penalty per length
134).unwrap();
135let mut aligner = Aligner::new(algorithm);
136
137// (3) Align query to reference
138let query = b"CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA";
139let result = aligner.align(query, &reference);
140println!("{:#?}", result);
141
142
143}
144}