1use std::fmt;
9use std::hash::{Hash, Hasher};
10use std::marker::PhantomData;
11use std::ops::Deref;
12
13use cyanea_core::{CyaneaError, ContentAddressable, Sequence, Summarizable};
14
15use crate::alphabet::Alphabet;
16
17#[derive(Clone)]
22pub struct ValidatedSeq<A: Alphabet> {
23 data: Vec<u8>,
24 _alphabet: PhantomData<A>,
25}
26
27impl<A: Alphabet> ValidatedSeq<A> {
28 pub fn new(bytes: impl AsRef<[u8]>) -> cyanea_core::Result<Self> {
33 let data: Vec<u8> = bytes.as_ref().iter().map(|b| b.to_ascii_uppercase()).collect();
34 for (i, &b) in data.iter().enumerate() {
35 if !A::is_valid(b) {
36 return Err(CyaneaError::InvalidInput(format!(
37 "invalid {} byte '{}' (0x{:02X}) at position {}",
38 A::NAME,
39 b as char,
40 b,
41 i
42 )));
43 }
44 }
45 Ok(Self {
46 data,
47 _alphabet: PhantomData,
48 })
49 }
50
51 pub(crate) fn from_validated(data: Vec<u8>) -> Self {
57 Self {
58 data,
59 _alphabet: PhantomData,
60 }
61 }
62
63 pub fn into_bytes(self) -> Vec<u8> {
65 self.data
66 }
67}
68
69impl<A: Alphabet> Deref for ValidatedSeq<A> {
70 type Target = [u8];
71
72 fn deref(&self) -> &[u8] {
73 &self.data
74 }
75}
76
77impl<A: Alphabet> AsRef<[u8]> for ValidatedSeq<A> {
78 fn as_ref(&self) -> &[u8] {
79 &self.data
80 }
81}
82
83impl<A: Alphabet> Sequence for ValidatedSeq<A> {
84 fn as_bytes(&self) -> &[u8] {
85 &self.data
86 }
87}
88
89impl<A: Alphabet> ContentAddressable for ValidatedSeq<A> {
90 fn content_hash(&self) -> String {
91 cyanea_core::hash::sha256(&self.data)
92 }
93}
94
95impl<A: Alphabet> Summarizable for ValidatedSeq<A> {
96 fn summary(&self) -> String {
97 let preview_len = self.data.len().min(20);
98 let preview = std::str::from_utf8(&self.data[..preview_len]).unwrap_or("???");
99 if self.data.len() > 20 {
100 format!("{} sequence ({} bp): {}...", A::NAME, self.data.len(), preview)
101 } else {
102 format!("{} sequence ({} bp): {}", A::NAME, self.data.len(), preview)
103 }
104 }
105}
106
107impl<A: Alphabet> fmt::Debug for ValidatedSeq<A> {
108 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109 let s = std::str::from_utf8(&self.data).unwrap_or("???");
110 write!(f, "{}(\"{}\")", A::NAME, s)
111 }
112}
113
114impl<A: Alphabet> fmt::Display for ValidatedSeq<A> {
115 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116 let s = std::str::from_utf8(&self.data).unwrap_or("???");
117 f.write_str(s)
118 }
119}
120
121impl<A: Alphabet> PartialEq for ValidatedSeq<A> {
122 fn eq(&self, other: &Self) -> bool {
123 self.data == other.data
124 }
125}
126
127impl<A: Alphabet> Eq for ValidatedSeq<A> {}
128
129impl<A: Alphabet> Hash for ValidatedSeq<A> {
130 fn hash<H: Hasher>(&self, state: &mut H) {
131 self.data.hash(state);
132 }
133}
134
135#[cfg(feature = "serde")]
136impl<A: Alphabet> serde::Serialize for ValidatedSeq<A> {
137 fn serialize<S: serde::Serializer>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> {
138 let s = std::str::from_utf8(&self.data).map_err(serde::ser::Error::custom)?;
139 serializer.serialize_str(s)
140 }
141}
142
143#[cfg(feature = "serde")]
144impl<'de, A: Alphabet> serde::Deserialize<'de> for ValidatedSeq<A> {
145 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> std::result::Result<Self, D::Error> {
146 let s = String::deserialize(deserializer)?;
147 Self::new(s.as_bytes()).map_err(serde::de::Error::custom)
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154 use crate::alphabet::DnaAlphabet;
155
156 type DnaSeq = ValidatedSeq<DnaAlphabet>;
157
158 #[test]
159 fn stores_uppercase() {
160 let seq = DnaSeq::new(b"acgt").unwrap();
161 assert_eq!(seq.as_bytes(), b"ACGT");
162 }
163
164 #[test]
165 fn empty_sequence_ok() {
166 let seq = DnaSeq::new(b"").unwrap();
167 assert!(seq.is_empty());
168 assert_eq!(seq.len(), 0);
169 }
170
171 #[test]
172 fn as_bytes_uppercase() {
173 let seq = DnaSeq::new(b"AcGtN").unwrap();
174 assert_eq!(seq.as_bytes(), b"ACGTN");
175 }
176
177 #[test]
178 fn deref_to_slice() {
179 let seq = DnaSeq::new(b"ACGT").unwrap();
180 let slice: &[u8] = &*seq;
181 assert_eq!(slice, b"ACGT");
182 assert_eq!(seq[0], b'A');
183 }
184
185 #[test]
186 fn content_addressable_deterministic() {
187 let seq1 = DnaSeq::new(b"ACGT").unwrap();
188 let seq2 = DnaSeq::new(b"acgt").unwrap();
189 assert_eq!(seq1.content_hash(), seq2.content_hash());
190 }
191
192 #[test]
193 fn rejects_invalid_bytes() {
194 let result = DnaSeq::new(b"ACGX");
195 assert!(result.is_err());
196 }
197}