Skip to main content

use_sequence_id/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7fn non_empty_identifier(value: impl AsRef<str>) -> Result<String, SequenceIdError> {
8    let value = value.as_ref();
9
10    if value.trim().is_empty() {
11        Err(SequenceIdError::Empty)
12    } else {
13        Ok(value.to_string())
14    }
15}
16
17/// Error returned by sequence identifier constructors.
18#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19pub enum SequenceIdError {
20    /// The supplied identifier was empty after trimming surrounding whitespace.
21    Empty,
22}
23
24impl fmt::Display for SequenceIdError {
25    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
26        match self {
27            Self::Empty => formatter.write_str("sequence identifier cannot be empty"),
28        }
29    }
30}
31
32impl Error for SequenceIdError {}
33
34/// A non-empty sequence identifier.
35#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
36pub struct SequenceId(String);
37
38impl SequenceId {
39    /// Creates a sequence identifier from non-empty text.
40    ///
41    /// Casing and punctuation are preserved exactly as supplied.
42    ///
43    /// # Errors
44    ///
45    /// Returns [`SequenceIdError::Empty`] when the trimmed identifier is empty.
46    pub fn new(value: impl AsRef<str>) -> Result<Self, SequenceIdError> {
47        non_empty_identifier(value).map(Self)
48    }
49
50    /// Returns the identifier text.
51    #[must_use]
52    pub fn as_str(&self) -> &str {
53        &self.0
54    }
55
56    /// Consumes the identifier and returns the owned string.
57    #[must_use]
58    pub fn into_string(self) -> String {
59        self.0
60    }
61}
62
63impl AsRef<str> for SequenceId {
64    fn as_ref(&self) -> &str {
65        self.as_str()
66    }
67}
68
69impl fmt::Display for SequenceId {
70    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
71        formatter.write_str(self.as_str())
72    }
73}
74
75impl FromStr for SequenceId {
76    type Err = SequenceIdError;
77
78    fn from_str(value: &str) -> Result<Self, Self::Err> {
79        Self::new(value)
80    }
81}
82
83/// A non-empty sequence accession.
84#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
85pub struct Accession(String);
86
87impl Accession {
88    /// Creates a sequence accession from non-empty text.
89    ///
90    /// Casing and punctuation are preserved exactly as supplied.
91    ///
92    /// # Errors
93    ///
94    /// Returns [`SequenceIdError::Empty`] when the trimmed accession is empty.
95    pub fn new(value: impl AsRef<str>) -> Result<Self, SequenceIdError> {
96        non_empty_identifier(value).map(Self)
97    }
98
99    /// Returns the accession text.
100    #[must_use]
101    pub fn as_str(&self) -> &str {
102        &self.0
103    }
104}
105
106impl AsRef<str> for Accession {
107    fn as_ref(&self) -> &str {
108        self.as_str()
109    }
110}
111
112impl fmt::Display for Accession {
113    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
114        formatter.write_str(self.as_str())
115    }
116}
117
118impl FromStr for Accession {
119    type Err = SequenceIdError;
120
121    fn from_str(value: &str) -> Result<Self, Self::Err> {
122        Self::new(value)
123    }
124}
125
126/// A sequence accession with an optional descriptive version component.
127#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
128pub struct VersionedAccession {
129    accession: Accession,
130    version: Option<String>,
131}
132
133impl VersionedAccession {
134    /// Creates a versioned accession without a version component.
135    #[must_use]
136    pub const fn without_version(accession: Accession) -> Self {
137        Self {
138            accession,
139            version: None,
140        }
141    }
142
143    /// Creates a versioned accession with a non-empty version component.
144    ///
145    /// # Errors
146    ///
147    /// Returns [`SequenceIdError::Empty`] when the trimmed version is empty.
148    pub fn with_version(
149        accession: Accession,
150        version: impl AsRef<str>,
151    ) -> Result<Self, SequenceIdError> {
152        Ok(Self {
153            accession,
154            version: Some(non_empty_identifier(version)?),
155        })
156    }
157
158    /// Returns the base accession.
159    #[must_use]
160    pub const fn accession(&self) -> &Accession {
161        &self.accession
162    }
163
164    /// Returns the optional version component.
165    #[must_use]
166    pub fn version(&self) -> Option<&str> {
167        self.version.as_deref()
168    }
169}
170
171impl fmt::Display for VersionedAccession {
172    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
173        match self.version() {
174            Some(version) => write!(formatter, "{}.{}", self.accession, version),
175            None => formatter.write_str(self.accession.as_str()),
176        }
177    }
178}
179
180/// Descriptive source labels for sequence identifiers.
181#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
182pub enum SequenceSource {
183    /// `GenBank` source label.
184    GenBank,
185    /// `RefSeq` source label.
186    RefSeq,
187    /// Ensembl source label.
188    Ensembl,
189    /// `UniProt` source label.
190    UniProt,
191    /// Protein Data Bank source label.
192    Pdb,
193    /// Local sequence source.
194    Local,
195    /// Unknown sequence source.
196    Unknown,
197    /// Domain-specific source label.
198    Custom(String),
199}
200
201impl fmt::Display for SequenceSource {
202    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
203        match self {
204            Self::GenBank => formatter.write_str("genbank"),
205            Self::RefSeq => formatter.write_str("refseq"),
206            Self::Ensembl => formatter.write_str("ensembl"),
207            Self::UniProt => formatter.write_str("uniprot"),
208            Self::Pdb => formatter.write_str("pdb"),
209            Self::Local => formatter.write_str("local"),
210            Self::Unknown => formatter.write_str("unknown"),
211            Self::Custom(source) => formatter.write_str(source),
212        }
213    }
214}
215
216impl FromStr for SequenceSource {
217    type Err = core::convert::Infallible;
218
219    fn from_str(value: &str) -> Result<Self, Self::Err> {
220        let source = match value.trim().to_ascii_lowercase().as_str() {
221            "genbank" | "gen_bank" => Self::GenBank,
222            "refseq" | "ref_seq" => Self::RefSeq,
223            "ensembl" => Self::Ensembl,
224            "uniprot" | "uni_prot" => Self::UniProt,
225            "pdb" => Self::Pdb,
226            "local" => Self::Local,
227            "unknown" | "" => Self::Unknown,
228            _ => Self::Custom(value.to_string()),
229        };
230
231        Ok(source)
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::{Accession, SequenceId, SequenceIdError, SequenceSource, VersionedAccession};
238    use core::str::FromStr;
239
240    #[test]
241    fn creates_valid_sequence_id() {
242        let id = SequenceId::new("chr1:10-20").expect("valid identifier");
243
244        assert_eq!(id.as_str(), "chr1:10-20");
245    }
246
247    #[test]
248    fn rejects_empty_sequence_id() {
249        assert_eq!(SequenceId::new("  "), Err(SequenceIdError::Empty));
250    }
251
252    #[test]
253    fn constructs_accession_preserving_text() {
254        let accession = Accession::new("NM_007294").expect("valid accession");
255
256        assert_eq!(accession.to_string(), "NM_007294");
257    }
258
259    #[test]
260    fn constructs_versioned_accession() {
261        let accession = Accession::new("NM_007294").expect("valid accession");
262        let versioned = VersionedAccession::with_version(accession, "3").expect("valid version");
263
264        assert_eq!(versioned.to_string(), "NM_007294.3");
265        assert_eq!(versioned.version(), Some("3"));
266    }
267
268    #[test]
269    fn sequence_source_displays_and_parses() {
270        assert_eq!(SequenceSource::GenBank.to_string(), "genbank");
271        assert_eq!(
272            SequenceSource::from_str("UniProt"),
273            Ok(SequenceSource::UniProt)
274        );
275    }
276
277    #[test]
278    fn supports_custom_source() {
279        assert_eq!(
280            SequenceSource::from_str("lab"),
281            Ok(SequenceSource::Custom("lab".into()))
282        );
283    }
284
285    #[test]
286    fn versionless_accession_is_descriptive() {
287        let accession = Accession::new("P12345").expect("valid accession");
288        let versioned = VersionedAccession::without_version(accession);
289
290        assert_eq!(versioned.to_string(), "P12345");
291        assert_eq!(versioned.version(), None);
292    }
293}