1use super::is_valid_base;
2use anyhow::Result;
3use fgoxide::io::DelimFile;
4use itertools::Itertools;
5use serde::Deserialize;
6use serde_aux::prelude::*;
7use std::collections::hash_map::RandomState;
8use std::collections::HashSet;
9use std::fmt::{self, Display};
10use std::path::Path;
11
12const DEFAULT_FILE_DELIMETER: u8 = b'\t';
13
14#[derive(Clone, Deserialize, Debug, PartialEq, Eq)]
16pub struct Sample {
17 pub sample_id: String,
19 pub barcode: String,
21 #[serde(skip_deserializing)]
24 ordinal: usize,
25}
26
27impl Display for Sample {
28 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 write!(
33 f,
34 "Sample({:04}) - {{ name: {}\tbarcode: {} }}",
35 self.ordinal, self.sample_id, self.barcode
36 )
37 }
38}
39
40impl Sample {
41 #[must_use]
48 pub fn new(ordinal: usize, name: String, barcode: String) -> Self {
49 assert!(!name.is_empty(), "Sample name cannot be empty");
50 assert!(!barcode.is_empty(), "Sample barcode cannot be empty");
51 assert!(
52 barcode.as_bytes().iter().all(|&b| is_valid_base(b)),
53 "All sample barcode bases must be one of A, C, G, or T"
54 );
55 Self { sample_id: name, barcode, ordinal }
56 }
57
58 #[must_use]
60 pub fn deserialize_header_line() -> String {
61 let field_names = serde_introspect::<Self>();
62 let skip_deserialize_fields: HashSet<&str, RandomState> = HashSet::from_iter(["ordinal"]);
63 let final_field_names: Vec<String> = field_names
64 .iter()
65 .filter(|&&f| !skip_deserialize_fields.contains(f))
66 .map(|&f| f.to_owned())
67 .collect();
68 final_field_names.join("\t")
69 }
70}
71
72#[derive(Clone, Debug, PartialEq, Eq)]
75pub struct SampleGroup {
76 pub samples: Vec<Sample>,
78}
79
80impl Display for SampleGroup {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82 writeln!(f, "SampleGroup {{")?;
83 for sample in &self.samples {
84 writeln!(f, " {}", sample)?;
85 }
86 writeln!(f, "}}")
87 }
88}
89
90impl SampleGroup {
91 #[must_use]
100 pub fn from_samples(samples: &[Sample]) -> Self {
101 assert!(!samples.is_empty(), "Must provide one or more sample");
103
104 assert!(
106 samples.iter().map(|s| &s.sample_id).all_unique(),
107 "Each sample name must be unique, duplicate identified"
108 );
109
110 assert!(
112 samples.iter().map(|s| &s.barcode).all_unique(),
113 "Each sample barcode must be unique, duplicate identified",
114 );
115
116 let first_barcode_length = samples[0].barcode.len();
118 assert!(
119 samples.iter().map(|s| &s.barcode).all(|b| b.len() == first_barcode_length),
120 "All barcodes must have the same length",
121 );
122
123 Self {
124 samples: samples
125 .iter()
126 .enumerate()
127 .map(|(ordinal, sample)| {
128 Sample::new(ordinal, sample.sample_id.clone(), sample.barcode.clone())
129 })
130 .collect(),
131 }
132 }
133
134 pub fn from_file<P: AsRef<Path>>(path: &P) -> Result<SampleGroup, fgoxide::FgError> {
144 let reader = DelimFile::default();
145 Ok(Self::from_samples(&reader.read(path, DEFAULT_FILE_DELIMETER, false)?))
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use core::panic;
152
153 use super::*;
154 use csv::DeserializeErrorKind as CsvDeserializeErrorEnum;
155 use csv::ErrorKind as CsvErrorEnum;
156 use fgoxide::{self, io::Io};
157 use serde::de::value::Error as SerdeError;
158 use serde::de::Error;
159 use tempfile::TempDir;
160
161 #[test]
165 fn test_reading_from_tsv_file() {
166 let lines = vec![
167 Sample::deserialize_header_line(),
168 "sample1\tGATTACA".to_owned(),
169 "sample2\tCATGCTA".to_owned(),
170 ];
171 let tempdir = TempDir::new().unwrap();
172 let f1 = tempdir.path().join("sample_metadata.tsv");
173
174 let io = Io::default();
175 io.write_lines(&f1, &lines).unwrap();
176 let samples_metadata = SampleGroup::from_file(&f1).unwrap();
177
178 assert!(samples_metadata.samples[0].sample_id == "sample1");
179 assert!(samples_metadata.samples[1].sample_id == "sample2");
180 assert!(samples_metadata.samples[0].barcode == "GATTACA");
181 assert!(samples_metadata.samples[1].barcode == "CATGCTA");
182 }
183
184 #[test]
185 fn test_reading_from_file_with_empty_lines_at_end() {
186 let lines = vec![
187 Sample::deserialize_header_line(),
188 "sample1\tGATTACA".to_owned(),
189 "sample2\tCATGCTA".to_owned(),
190 String::new(),
191 String::new(),
192 ];
193 let tempdir = TempDir::new().unwrap();
194 let f1 = tempdir.path().join("sample_metadata.tsv");
195
196 let io = Io::default();
197 io.write_lines(&f1, &lines).unwrap();
198 let samples_metadata = SampleGroup::from_file(&f1).unwrap();
199
200 assert!(samples_metadata.samples[0].sample_id == "sample1");
201 assert!(samples_metadata.samples[1].sample_id == "sample2");
202 assert!(samples_metadata.samples[0].barcode == "GATTACA");
203 assert!(samples_metadata.samples[1].barcode == "CATGCTA");
204 }
205
206 #[test]
207 fn test_new_sample_non_agct_bases_in_barcode_allowed() {
208 let name = "s_1_example_name".to_owned();
209 let barcode = "GATTANN".to_owned();
210 let ordinal = 0;
211 let _sample = Sample::new(ordinal, name, barcode);
212 }
213
214 #[test]
218 fn test_reading_from_file_with_no_header() {
219 let lines = vec!["sample1\tGATTACA", "sample2\tCATGCTA"];
220 let tempdir = TempDir::new().unwrap();
221 let f1 = tempdir.path().join("sample_metadata.tsv");
222
223 let io = Io::default();
224 io.write_lines(&f1, &lines).unwrap();
225 let mut to_panic = true;
226 if let fgoxide::FgError::ConversionError(csv_e) = SampleGroup::from_file(&f1).unwrap_err() {
227 if let CsvErrorEnum::Deserialize { pos: _, err: csv_de_err } = csv_e.into_kind() {
228 if let CsvDeserializeErrorEnum::Message(s) = csv_de_err.kind() {
229 to_panic = false;
230 assert_eq!(s, &SerdeError::missing_field("sample_id").to_string());
231 }
232 }
233 }
234 assert!(!to_panic, "Different error type than expected reading from headerless file.");
235 }
236
237 #[test]
238 #[should_panic(expected = "Must provide one or more sample")]
239 fn test_reading_header_only_file() {
240 let lines = vec![Sample::deserialize_header_line()];
241 let tempdir = TempDir::new().unwrap();
242 let f1 = tempdir.path().join("sample_metadata.tsv");
243
244 let io = Io::default();
245 io.write_lines(&f1, &lines).unwrap();
246 let _sm = SampleGroup::from_file(&f1).unwrap();
247 }
248
249 #[test]
250 #[should_panic(expected = "Must provide one or more sample")]
251 fn test_reading_empty_file() {
252 let lines = vec![""];
253 let tempdir = TempDir::new().unwrap();
254 let f1 = tempdir.path().join("sample_metadata.tsv");
255
256 let io = Io::default();
257 io.write_lines(&f1, &lines).unwrap();
258 let _sm = SampleGroup::from_file(&f1).unwrap();
259 }
260
261 #[test]
262 fn test_reading_non_existent_file() {
263 let tempdir = TempDir::new().unwrap();
264 let f1 = tempdir.path().join("sample_metadata.tsv");
265 if let fgoxide::FgError::IoError(e) = SampleGroup::from_file(&f1).unwrap_err() {
266 assert_eq!(e.to_string(), "No such file or directory (os error 2)");
267 } else {
268 panic!("Different error than expected reading non-existent file")
269 }
270 }
271
272 #[test]
276 fn test_new_sample_success() {
277 let name = "s_1_example_name".to_owned();
278 let barcode = "GATTACA".to_owned();
279 let ordinal = 0;
280 let sample = Sample::new(ordinal, name.clone(), barcode.clone());
281 assert_eq!(
282 Sample { sample_id: name, barcode, ordinal },
283 sample,
284 "Sample differed from expectation"
285 );
286 }
287
288 #[test]
293 #[should_panic(expected = "Sample name cannot be empty")]
294 fn test_new_sample_fail1_empty_sample_name() {
295 let name = String::new();
296 let barcode = "GATTACA".to_owned();
297 let ordinal = 0;
298 let _sample = Sample::new(ordinal, name, barcode);
299 }
300
301 #[test]
302 #[should_panic(expected = "Sample barcode cannot be empty")]
303 fn test_new_sample_fail2_empty_barcode() {
304 let name = "s_1_example_name".to_owned();
305 let barcode = String::new();
306 let ordinal = 0;
307 let _sample = Sample::new(ordinal, name, barcode);
308 }
309
310 #[test]
314 fn test_from_samples_sample_group_pass1_single_sample() {
315 let sample1 = Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned());
316 let samples_vec = vec![sample1.clone()];
317 let sample_group = SampleGroup::from_samples(&samples_vec);
318
319 assert_eq!(sample_group, SampleGroup { samples: vec![sample1] });
320 }
321
322 #[test]
323 fn test_from_samples_sample_group_pass2_multi_unique_samples() {
324 let sample1 = Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned());
325 let sample2 = Sample::new(1, "sample_2".to_owned(), "CATGGAT".to_owned());
326 let samples_vec = vec![sample1.clone(), sample2.clone()];
327 let sample_group = SampleGroup::from_samples(&samples_vec);
328
329 assert_eq!(sample_group, SampleGroup { samples: vec![sample1, sample2] });
330 }
331
332 #[test]
333 fn test_from_samples_sample_group_pass3_ordinal_values_will_be_changed_by_new() {
334 let sample1 = Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned());
335 let sample2_before = Sample::new(2, "sample_2".to_owned(), "CATGGAT".to_owned());
336 let sample2_after = Sample::new(1, "sample_2".to_owned(), "CATGGAT".to_owned());
337 let samples_vec = vec![sample1.clone(), sample2_before];
338 let sample_group = SampleGroup::from_samples(&samples_vec);
339
340 assert_eq!(sample_group, SampleGroup { samples: vec![sample1, sample2_after] });
341 }
342
343 #[test]
347 #[should_panic(expected = "Must provide one or more sample")]
348 fn test_from_samples_sample_group_fail1_no_samples() {
349 let samples = vec![];
350 let _sample_group = SampleGroup::from_samples(&samples);
351 }
352
353 #[test]
354 #[should_panic(expected = "Each sample name must be unique, duplicate identified")]
355 fn test_from_samples_sample_group_fail2_duplicate_barcodes() {
356 let samples = vec![
357 Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned()),
358 Sample::new(0, "sample_1".to_owned(), "CATGGAT".to_owned()),
359 ];
360 let _sample_group = SampleGroup::from_samples(&samples);
361 }
362
363 #[test]
364 #[should_panic(expected = "Each sample barcode must be unique, duplicate identified")]
365 fn test_from_samples_sample_group_fail3_duplicate_sample_names() {
366 let samples = vec![
367 Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned()),
368 Sample::new(0, "sample_2".to_owned(), "GATTACA".to_owned()),
369 ];
370 let _sample_group = SampleGroup::from_samples(&samples);
371 }
372
373 #[test]
374 #[should_panic(expected = "All barcodes must have the same length")]
375 fn test_from_samples_sample_group_fail4_barcodes_of_different_lengths() {
376 let samples = vec![
377 Sample::new(0, "sample_1".to_owned(), "GATTACA".to_owned()),
378 Sample::new(0, "sample_2".to_owned(), "CATGGA".to_owned()),
379 ];
380 let _sample_group = SampleGroup::from_samples(&samples);
381 }
382}