ref_solver/core/
contig.rs1use serde::{Deserialize, Serialize};
2
3use crate::core::types::NamingConvention;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
7#[serde(rename_all = "kebab-case")]
8pub enum SequenceRole {
9 AssembledMolecule,
11 AltScaffold,
13 FixPatch,
15 NovelPatch,
17 UnlocalizedScaffold,
19 UnplacedScaffold,
21 #[default]
23 Unknown,
24}
25
26impl SequenceRole {
27 #[must_use]
29 pub fn parse(s: &str) -> Self {
30 match s.to_lowercase().as_str() {
31 "assembled-molecule" => SequenceRole::AssembledMolecule,
32 "alt-scaffold" => SequenceRole::AltScaffold,
33 "fix-patch" => SequenceRole::FixPatch,
34 "novel-patch" => SequenceRole::NovelPatch,
35 "unlocalized-scaffold" => SequenceRole::UnlocalizedScaffold,
36 "unplaced-scaffold" => SequenceRole::UnplacedScaffold,
37 _ => SequenceRole::Unknown,
38 }
39 }
40}
41
42#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
44pub struct Contig {
45 pub name: String,
47
48 pub length: u64,
50
51 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub md5: Option<String>,
55
56 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub assembly: Option<String>,
59
60 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub uri: Option<String>,
63
64 #[serde(default, skip_serializing_if = "Option::is_none")]
66 pub species: Option<String>,
67
68 #[serde(default, skip_serializing_if = "Vec::is_empty")]
70 pub aliases: Vec<String>,
71
72 #[serde(default, skip_serializing_if = "is_unknown_role")]
74 pub sequence_role: SequenceRole,
75}
76
77#[allow(clippy::trivially_copy_pass_by_ref)] fn is_unknown_role(role: &SequenceRole) -> bool {
79 matches!(role, SequenceRole::Unknown)
80}
81
82impl Contig {
83 pub fn new(name: impl Into<String>, length: u64) -> Self {
84 Self {
85 name: name.into(),
86 length,
87 md5: None,
88 assembly: None,
89 uri: None,
90 species: None,
91 aliases: Vec::new(),
92 sequence_role: SequenceRole::Unknown,
93 }
94 }
95
96 #[cfg(test)]
97 #[must_use]
98 pub fn with_md5(mut self, md5: impl Into<String>) -> Self {
99 self.md5 = Some(md5.into());
100 self
101 }
102
103 #[cfg(test)]
104 #[must_use]
105 pub fn with_aliases(mut self, aliases: Vec<String>) -> Self {
106 self.aliases = aliases;
107 self
108 }
109
110 #[must_use]
113 pub fn is_primary_chromosome(&self) -> bool {
114 matches!(
117 self.name.as_str(),
118 "1" | "2"
119 | "3"
120 | "4"
121 | "5"
122 | "6"
123 | "7"
124 | "8"
125 | "9"
126 | "10"
127 | "11"
128 | "12"
129 | "13"
130 | "14"
131 | "15"
132 | "16"
133 | "17"
134 | "18"
135 | "19"
136 | "20"
137 | "21"
138 | "22"
139 | "X"
140 | "Y"
141 | "chr1"
142 | "chr2"
143 | "chr3"
144 | "chr4"
145 | "chr5"
146 | "chr6"
147 | "chr7"
148 | "chr8"
149 | "chr9"
150 | "chr10"
151 | "chr11"
152 | "chr12"
153 | "chr13"
154 | "chr14"
155 | "chr15"
156 | "chr16"
157 | "chr17"
158 | "chr18"
159 | "chr19"
160 | "chr20"
161 | "chr21"
162 | "chr22"
163 | "chrX"
164 | "chrY"
165 )
166 }
167
168 #[must_use]
171 pub fn is_mitochondrial(&self) -> bool {
172 let name_lower = self.name.to_lowercase();
173 matches!(
174 name_lower.as_str(),
175 "mt" | "m" | "chrm" | "chrmt" | "mito" | "mitochondrion" | "rcrs" | "nc_012920.1"
176 ) || name_lower.contains("mitochon")
177 }
178
179 #[must_use]
181 pub fn is_alt(&self) -> bool {
182 self.name.ends_with("_alt") || self.name.contains("_alt_")
183 }
184
185 #[must_use]
187 pub fn is_decoy(&self) -> bool {
188 self.name.contains("decoy")
189 || self.name == "hs37d5"
190 || self.name.starts_with("chrUn_")
191 || self.name.contains("_random")
192 }
193}
194
195#[must_use]
201pub fn detect_naming_convention(contigs: &[Contig]) -> NamingConvention {
202 let mut has_chr_prefix = false;
203 let mut has_no_prefix = false;
204
205 for contig in contigs {
206 if contig.is_primary_chromosome() {
207 if contig.name.starts_with("chr") {
208 has_chr_prefix = true;
209 } else {
210 has_no_prefix = true;
211 }
212 }
213 }
214
215 match (has_chr_prefix, has_no_prefix) {
216 (true, false) => NamingConvention::Ucsc,
217 (false, true) => NamingConvention::Ncbi,
218 _ => NamingConvention::Mixed,
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225
226 #[test]
227 fn test_is_primary_chromosome() {
228 assert!(Contig::new("chr1", 100).is_primary_chromosome());
229 assert!(Contig::new("1", 100).is_primary_chromosome());
230 assert!(Contig::new("chrX", 100).is_primary_chromosome());
231 assert!(Contig::new("Y", 100).is_primary_chromosome());
232 assert!(!Contig::new("chrM", 100).is_primary_chromosome());
233 assert!(!Contig::new("chr1_random", 100).is_primary_chromosome());
234 }
235
236 #[test]
237 fn test_is_mitochondrial() {
238 assert!(Contig::new("chrM", 100).is_mitochondrial());
240 assert!(Contig::new("MT", 100).is_mitochondrial());
241 assert!(Contig::new("chrMT", 100).is_mitochondrial());
242 assert!(Contig::new("M", 100).is_mitochondrial());
243 assert!(Contig::new("mito", 100).is_mitochondrial());
245 assert!(Contig::new("Mitochondrion", 100).is_mitochondrial());
246 assert!(Contig::new("rCRS", 100).is_mitochondrial());
247 assert!(Contig::new("NC_012920.1", 100).is_mitochondrial());
248 assert!(Contig::new("mitochondrial_genome", 100).is_mitochondrial());
250 assert!(!Contig::new("chr1", 100).is_mitochondrial());
252 assert!(!Contig::new("chrX", 100).is_mitochondrial());
253 }
254}