1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
//! # DNA Restrictive
//!
//! Validated DNA sequence wrapper that guarantees only valid bases (A, C, G, T).
//! Provides type-safe handling of DNA sequences.
use crate::Error;
use serde::{Deserialize, Serialize};
use std::{fmt, str::FromStr};
/// Validated DNA sequence wrapper that guarantees only valid bases (A, C, G, T).
/// Stores sequences in uppercase.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct DNARestrictive(Vec<u8>);
impl DNARestrictive {
/// Returns a reference to the underlying DNA sequence bytes
#[must_use]
pub fn get(&self) -> &[u8] {
&self.0
}
}
/// Trait that returns a `DNARestrictive` object
pub trait GetDNARestrictive {
/// Returns a `DNARestrictive` object
fn get_dna_restrictive(&self) -> &DNARestrictive;
}
impl GetDNARestrictive for DNARestrictive {
fn get_dna_restrictive(&self) -> &DNARestrictive {
self
}
}
impl FromStr for DNARestrictive {
type Err = Error;
/// Convert from string with only valid bases (A, C, G, T, or lowercase).
/// Does not accept ambiguous bases like 'N'.
///
/// # Examples
/// ```
/// use nanalogue_core::{DNARestrictive, Error};
/// use std::str::FromStr;
///
/// let val_1 = DNARestrictive::from_str("ACGT").unwrap();
/// let val_2 = DNARestrictive::from_str("acgt").unwrap();
/// let val_3: Error = DNARestrictive::from_str("ACGTN").unwrap_err();
/// let val_4: Error = DNARestrictive::from_str("").unwrap_err();
/// ```
fn from_str(s: &str) -> Result<Self, Self::Err> {
DNARestrictive::try_from(s.as_bytes().to_vec())
}
}
impl TryFrom<Vec<u8>> for DNARestrictive {
type Error = Error;
/// Converts from a vector of `u8`, only `ACGT` upper or lowercases allowed.
///
/// # Example
/// ```
/// use nanalogue_core::{DNARestrictive, Error};
/// let val_1 = DNARestrictive::try_from(vec![b'A', b'C', b'G', b'T', b'a', b'c', b'g', b't']).unwrap();
/// let val_2: Error = DNARestrictive::try_from(vec![b'h']).unwrap_err();
/// let val_3: Error = DNARestrictive::try_from(vec![b'N']).unwrap_err();
/// let val_4: Error = DNARestrictive::try_from(vec![]).unwrap_err();
/// ```
fn try_from(s: Vec<u8>) -> Result<Self, Self::Error> {
if s.is_empty() {
Err(Error::InvalidSeq("empty sequence supplied!".to_owned()))
} else {
Ok(DNARestrictive(
s.into_iter()
.map(|x| match x {
b'A' | b'a' => Ok(b'A'),
b'C' | b'c' => Ok(b'C'),
b'G' | b'g' => Ok(b'G'),
b'T' | b't' => Ok(b'T'),
v => Err(Error::InvalidBase(char::from(v).to_string())),
})
.collect::<Result<Vec<u8>, _>>()?,
))
}
}
}
impl fmt::Display for DNARestrictive {
/// Standard display function.
///
/// We are fine with unsafe as we guarantee only AGCT are allowed.
///
/// # Example
/// ```
/// use nanalogue_core::DNARestrictive;
/// let val_1 = DNARestrictive::try_from(vec![b'A', b'C', b'G', b'T', b'a', b'c', b'g', b't']).unwrap();
/// assert_eq!(val_1.to_string(), String::from("ACGTACGT"));
/// ```
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
unsafe { String::from_utf8_unchecked(self.0.clone()).fmt(f) }
}
}
impl<'de> Deserialize<'de> for DNARestrictive {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
DNARestrictive::from_str(&s).map_err(serde::de::Error::custom)
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Tests `DNARestrictive` parsing with invalid barcode
#[test]
#[should_panic(expected = "InvalidBase")]
fn dna_restrictive_invalid() {
let invalid_barcode = "ACGTN";
let _: DNARestrictive = DNARestrictive::from_str(invalid_barcode).unwrap();
}
#[test]
fn dna_restrictive_display_uppercase() {
// Test all uppercase bases
let seq = DNARestrictive::try_from(vec![b'A', b'C', b'G', b'T']).expect("should create");
assert_eq!(seq.to_string(), "ACGT");
assert_eq!(format!("{seq}"), "ACGT");
// Test longer sequence
let long_seq =
DNARestrictive::try_from(vec![b'A', b'T', b'G', b'C', b'G', b'T', b'A', b'C'])
.expect("should create");
assert_eq!(long_seq.to_string(), "ATGCGTAC");
}
#[test]
fn dna_restrictive_display_lowercase() {
// Test that lowercase input is converted to uppercase for display
let seq = DNARestrictive::try_from(vec![b'a', b'c', b'g', b't']).expect("should create");
assert_eq!(seq.to_string(), "ACGT");
// Test lowercase via FromStr gets uppercased
let lower_seq = DNARestrictive::from_str("acgt").expect("should create");
assert_eq!(format!("{lower_seq}"), "ACGT");
}
#[test]
fn dna_restrictive_display_mixedcase() {
// Test mixed case
let mixed_seq =
DNARestrictive::try_from(vec![b'A', b'c', b'G', b't']).expect("should create");
assert_eq!(mixed_seq.to_string(), "ACGT");
// Test another mixed case pattern
let mixed_seq2 =
DNARestrictive::try_from(vec![b'a', b'C', b'g', b'T']).expect("should create");
assert_eq!(mixed_seq2.to_string(), "ACGT");
}
#[test]
fn dna_restrictive_display_via_from_str() {
// Test using FromStr
let seq = DNARestrictive::from_str("ACGT").expect("should create");
assert_eq!(format!("{seq}"), "ACGT");
}
#[test]
fn dna_restrictive_display_single_base() {
// Test each individual base
let a_seq = DNARestrictive::try_from(vec![b'A']).expect("should create");
assert_eq!(a_seq.to_string(), "A");
let c_seq = DNARestrictive::try_from(vec![b'C']).expect("should create");
assert_eq!(c_seq.to_string(), "C");
let g_seq = DNARestrictive::try_from(vec![b'G']).expect("should create");
assert_eq!(g_seq.to_string(), "G");
let t_seq = DNARestrictive::try_from(vec![b'T']).expect("should create");
assert_eq!(t_seq.to_string(), "T");
}
}