readfish_tools/readfish.rs
1//! Ports of readfish functions from python to rust.
2//! The functions are in order to parse a TOML file for configuring a readfish run, and implement all the same methods as the python version.
3//! This allows parsing the TOML Into the correct regions and barcodes, and then using the same methods to get the same results as readfish.
4//!
5
6use crate::nanopore;
7use csv::ReaderBuilder;
8use serde::Deserialize;
9use std::{
10 any::Any,
11 collections::HashMap,
12 hash::{Hash, Hasher},
13 io::Cursor,
14 path::{Path, PathBuf},
15};
16use toml::{map::Map, Table, Value};
17
18/// Action types that can be taken once a decision (one of single_off, single_on, multi_off, multi_on, no_map, no_seq, exceeded_max_chunks, below_min_chunks)
19/// has been made.
20#[derive(Debug, PartialEq)]
21pub enum Action {
22 /// Read would be unblocked
23 Unblock,
24 /// Complete sequencing naturally
25 StopReceiving,
26 /// Proceed with sequencing
27 Proceed,
28}
29/// Type for the Contig -> coordinates hashmap.
30type HashedTargets = HashMap<String, Vec<(usize, usize)>>;
31
32impl From<&str> for Action {
33 fn from(source: &str) -> Action {
34 match source {
35 "unblock" => Action::Unblock,
36 "stop_receiving" => Action::StopReceiving,
37 "proceed" => Action::Proceed,
38 _ => {
39 panic!("Unknown Action given")
40 }
41 }
42 }
43}
44
45/// The _Condition struct holds the settings lifted from the TOML file, for each
46/// region of the flowcell or barcode.
47#[derive(Debug, PartialEq)]
48pub struct _Condition {
49 /// The name of the Condition (Barcode/Region).
50 pub name: String,
51 /// Is this Region/Barcode a control region / Barcode
52 pub control: bool,
53 /// The minimum number of read chunks that have to be captured for a read to be processed. Default if not met is to proceed.
54 pub min_chunks: u8,
55 /// The maximum number of read chunks that can be captured for a read. Default if exceed is to unblock.
56 pub max_chunks: u8,
57 /// The targets associated with the Condition.
58 pub targets: Targets,
59 /// The action to perform when an alignment returns one single primary mapping, outside of any target regions.
60 pub single_off: Action,
61 /// The action to perform when an alignment returns one single primary mapping, inside of a target regions.
62 pub single_on: Action,
63 /// The action to perform when an alignment returns multiple primary mappings, all outside of any target regions.
64 pub multi_off: Action,
65 /// The action to perform when an alignment returns multiple primary mappings, at LEAST ONE of which is inside of a target region.
66 pub multi_on: Action,
67 /// The action to perform when no alignments are returned for this read.
68 pub no_map: Action,
69 /// The action to perform when no sequence is produced for this read sequence.
70 pub no_seq: Action,
71}
72
73impl _Condition {
74 /// Getter for the `name` field
75 pub fn get_name(&self) -> &String {
76 &self.name
77 }
78
79 /// Setter for the `name` field
80 pub fn set_name(&mut self, name: String) {
81 self.name = name;
82 }
83
84 /// Getter for the `control` field
85 pub fn is_control(&self) -> bool {
86 self.control
87 }
88
89 /// Setter for the `control` field
90 pub fn set_control(&mut self, control: bool) {
91 self.control = control;
92 }
93
94 /// Getter for the `min_chunks` field
95 pub fn get_min_chunks(&self) -> u8 {
96 self.min_chunks
97 }
98
99 /// Setter for the `min_chunks` field
100 pub fn set_min_chunks(&mut self, min_chunks: u8) {
101 self.min_chunks = min_chunks;
102 }
103
104 /// Getter for the `max_chunks` field
105 pub fn get_max_chunks(&self) -> u8 {
106 self.max_chunks
107 }
108
109 /// Setter for the `max_chunks` field
110 pub fn set_max_chunks(&mut self, max_chunks: u8) {
111 self.max_chunks = max_chunks;
112 }
113
114 /// Getter for the `targets` field
115 pub fn get_targets(&self) -> &Targets {
116 &self.targets
117 }
118
119 /// Setter for the `targets` field
120 pub fn set_targets(&mut self, targets: Targets) {
121 self.targets = targets;
122 }
123
124 /// Getter and Setter for the `single_off` field
125 pub fn get_single_off(&self) -> &Action {
126 &self.single_off
127 }
128
129 /// Set the Action to take when a single primary mapping is found outside of any target regions.
130 pub fn set_single_off(&mut self, single_off: Action) {
131 self.single_off = single_off;
132 }
133
134 /// Getter and Setter for the `single_on` field
135 pub fn get_single_on(&self) -> &Action {
136 &self.single_on
137 }
138
139 /// Set the Action to take when a single primary mapping is found on of any target regions.
140 pub fn set_single_on(&mut self, single_on: Action) {
141 self.single_on = single_on;
142 }
143
144 /// Getter and Setter for the `multi_off` field
145 pub fn get_multi_off(&self) -> &Action {
146 &self.multi_off
147 }
148
149 /// Getter and Setter for the `multi_off` field
150 pub fn set_multi_off(&mut self, multi_off: Action) {
151 self.multi_off = multi_off;
152 }
153
154 /// Getter and Setter for the `multi_on` field
155 pub fn get_multi_on(&self) -> &Action {
156 &self.multi_on
157 }
158 /// Getter and Setter for the `multi_off` field
159 pub fn set_multi_on(&mut self, multi_on: Action) {
160 self.multi_on = multi_on;
161 }
162
163 /// Getter and Setter for the `no_map` field
164 pub fn get_no_map(&self) -> &Action {
165 &self.no_map
166 }
167 /// Getter and Setter for the `multi_off` field
168 pub fn set_no_map(&mut self, no_map: Action) {
169 self.no_map = no_map;
170 }
171
172 /// Getter and Setter for the `no_seq` field
173 pub fn get_no_seq(&self) -> &Action {
174 &self.no_seq
175 }
176 /// Getter and Setter for the `multi_off` field
177 pub fn set_no_seq(&mut self, no_seq: Action) {
178 self.no_seq = no_seq;
179 }
180}
181
182#[derive(Debug, PartialEq)]
183/// Represents a region of the flow cell, denoted in the configuration toml as
184///
185/// ```toml
186///
187/// [[regions]]
188/// name = "Rapid_CNS"
189/// min_chunks = 1
190/// max_chunks = 4
191/// targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
192/// single_off = "unblock"
193/// multi_off = "unblock"
194/// single_on = "stop_receiving"
195/// multi_on = "stop_receiving"
196/// no_seq = "proceed"
197/// no_map = "proceed"
198/// ```
199/// All the parsed fields are stored with a _Condition struct, as they could also be from a barcodes table.
200pub struct Region {
201 /// The parsed region settings.
202 pub condition: _Condition,
203}
204
205/// Represents a barcode on the sequencing library. This supercedes any regions.
206///
207/// ```toml
208///
209//[barcodes.barcode02]
210//name = "barcode02"
211//control = false
212//min_chunks = 0
213//max_chunks = 4
214//targets = []
215//single_on = "unblock"
216//multi_on = "unblock"
217//single_off = "unblock"
218//multi_off = "unblock"
219//no_seq = "proceed"
220///no_map = "unblock"
221/// ```
222///
223/// All the parsed fields are stored with a _Condition struct, as they could also be from a regions table.
224#[derive(Debug, PartialEq)]
225struct Barcode {
226 /// The parsed barcode settings.
227 condition: _Condition,
228}
229
230// Define a trait to represent the common behaviour of Region and Barcode
231/// Trait for shared behaviour for Barcodes and Regions
232pub trait Condition {
233 // Add any common methods or behaviour for Region or Barcode
234 /// Return whether this Condition is a control
235 fn control(&self) -> bool;
236 /// Implement a method that returns something with the Any trait - which allows downcasting of Barcodes and Regions.
237 fn any(&self) -> &dyn Any;
238 /// Get the targets from the condition
239 fn get_targets(&self) -> &Targets;
240 /// get the underlying _Condition struct
241 fn get_condition(&self) -> &_Condition;
242}
243
244impl Condition for Region {
245 fn control(&self) -> bool {
246 self.condition.control
247 }
248
249 fn any(&self) -> &dyn Any {
250 self
251 }
252
253 fn get_targets(&self) -> &Targets {
254 &self.condition.targets
255 }
256
257 fn get_condition(&self) -> &_Condition {
258 &self.condition
259 }
260}
261
262impl Condition for Barcode {
263 fn control(&self) -> bool {
264 self.condition.control
265 }
266 fn any(&self) -> &dyn Any {
267 self
268 }
269 fn get_targets(&self) -> &Targets {
270 &self.condition.targets
271 }
272 fn get_condition(&self) -> &_Condition {
273 &self.condition
274 }
275}
276
277impl From<&Map<String, Value>> for _Condition {
278 fn from(source: &Map<String, Value>) -> Self {
279 let targets: TargetType = source.get("targets").unwrap().into();
280 let target: Targets = Targets::new(targets);
281 _Condition {
282 name: source.get("name").unwrap().as_str().unwrap().to_string(),
283 control: source
284 .get("control")
285 .unwrap_or(&toml::Value::Boolean(false))
286 .as_bool()
287 .unwrap(),
288 min_chunks: source
289 .get("min_chunks")
290 .unwrap_or(&toml::Value::Integer(0))
291 .as_integer()
292 .unwrap()
293 .try_into()
294 .unwrap(),
295
296 max_chunks: source
297 .get("max_chunks")
298 .unwrap_or(&toml::Value::Integer(4))
299 .as_integer()
300 .unwrap()
301 .try_into()
302 .unwrap(),
303 targets: target,
304 single_off: source.get("single_off").unwrap().as_str().unwrap().into(),
305 single_on: source.get("single_on").unwrap().as_str().unwrap().into(),
306 multi_on: source.get("multi_on").unwrap().as_str().unwrap().into(),
307 multi_off: source.get("multi_off").unwrap().as_str().unwrap().into(),
308 no_map: source.get("no_map").unwrap().as_str().unwrap().into(),
309 no_seq: source.get("no_seq").unwrap().as_str().unwrap().into(),
310 }
311 }
312}
313
314/// Strand that the target is on.
315#[derive(Debug, Hash, PartialEq)]
316enum Strand {
317 /// Represents he forward (sense) strand
318 Forward,
319 /// Represents he reverse (anti-sense) strand
320 Reverse,
321}
322
323/// A wrapper for the Strand, which implements Hash and Eq, allowing the Strand enum to be used for
324/// a HashMap key.
325///
326/// Implements to_string and AsRef str to get string representations, so we can take it along with multiple other types into functions
327/// that need the strand.
328#[derive(PartialEq, Debug)]
329struct StrandWrapper(Strand);
330
331impl Eq for StrandWrapper {}
332
333impl Hash for StrandWrapper {
334 fn hash<H: Hasher>(&self, state: &mut H) {
335 std::mem::discriminant(&self.0).hash(state);
336 }
337}
338
339impl From<&str> for Strand {
340 fn from(source: &str) -> Strand {
341 match source {
342 "+" => Strand::Forward,
343 "-" => Strand::Reverse,
344 "1" => Strand::Forward,
345 "-1" => Strand::Reverse,
346 _ => Strand::Forward,
347 }
348 }
349}
350
351impl ToString for Strand {
352 fn to_string(&self) -> String {
353 match self {
354 Strand::Forward => "+".to_string(),
355 Strand::Reverse => "-".to_string(),
356 }
357 }
358}
359
360impl AsRef<str> for Strand {
361 fn as_ref(&self) -> &str {
362 match self {
363 Strand::Forward => "+",
364 Strand::Reverse => "-",
365 }
366 }
367}
368/// TargetRype Enum, represents whther targets were listed directly in the TOML file
369/// or a path to a targets containing file was given.
370#[derive(Clone, Debug, PartialEq)]
371enum TargetType {
372 /// Variant representing targets that were given directly in the TOML file.
373 Direct(Vec<String>),
374 /// Variant representing targets that were given as a path to a file that contains targets.
375 ViaFile(PathBuf),
376}
377/// Represents a BED record, which is read from a BedFILE. All six columns are expected, however we do not use _name or _score.
378#[derive(Debug, Deserialize)]
379#[serde(deny_unknown_fields)]
380struct BedRecord {
381 /// The contig or chromosome name associated with the record.
382 contig: String,
383 /// The starting position of the record.
384 start: usize,
385 /// The stopping position of the record.
386 stop: usize,
387 /// The name associated with the record (optional).
388 _name: String,
389 /// The score associated with the record (optional).
390 _score: String,
391 /// The strand information of the record.
392 strand: String,
393}
394
395/// CSV record parsed from targets specified in TOML file,
396/// If A bed file is provided, the six records are taken and placed in a
397/// BedRecord. This BedRecord is then converted into a CsvRecord.
398#[derive(Debug, Deserialize)]
399#[serde(deny_unknown_fields)]
400pub struct CsvRecord {
401 /// Contig the target is on
402 pub contig: String,
403 /// Optional start coordinate of target
404 #[serde(default)]
405 pub start: Option<usize>,
406 /// Optional stop coordinate of target. Required if start is present
407 #[serde(default)]
408 pub stop: Option<usize>,
409 /// Optional strand target is on. .One of "+"/"-". Required if Start/Stop are provided.
410 #[serde(default)]
411 pub strand: Option<String>,
412}
413
414impl From<BedRecord> for CsvRecord {
415 fn from(source: BedRecord) -> CsvRecord {
416 CsvRecord {
417 contig: source.contig,
418 start: Some(source.start),
419 stop: Some(source.stop),
420 strand: Some(source.strand),
421 }
422 }
423}
424
425impl CsvRecord {
426 /// Checks if the structure has valid coordinates.
427 ///
428 /// Returns `true` if both `start` and `stop` fields have values,
429 /// indicating that the structure has valid coordinates. Otherwise, returns `false`.
430 ///
431 /// # Examples
432 ///
433 /// ```rust, ignore
434 /// # use readfish_tools::CsvRecord;
435 ///
436 /// let record = CsvRecord {
437 /// contig: "chr1".to_string(),
438 /// start: Some(100),
439 /// stop: Some(200),
440 /// strand: Some("+".to_string()),
441 /// };
442 ///
443 /// assert!(record.has_coords()); // Returns true
444 ///
445 /// let invalid_record = CsvRecord {
446 /// contig: "chr2".to_string(),
447 /// start: None,
448 /// stop: None,
449 /// strand: Some("-".to_string()),
450 /// };
451 ///
452 /// assert!(!invalid_record.has_coords()); // Returns false
453 /// ```
454 fn has_coords(&self) -> bool {
455 self.start.is_some() && self.stop.is_some()
456 }
457
458 /// Retrieves the coordinates from the structure.
459 ///
460 /// Returns a tuple containing the start and stop coordinates of the structure.
461 /// If the structure has valid coordinates (i.e., `has_coords()` is true),
462 /// the actual values of the `start` and `stop` fields are returned.
463 /// Otherwise, a default range of (0, usize::MAX) is returned.
464 ///
465 /// # Examples
466 ///
467 /// ```rust, ignore
468 /// # use readfish_tools::CsvRecord;
469 ///
470 /// let record = CsvRecord {
471 /// contig: "chr1".to_string(),
472 /// start: Some(100),
473 /// stop: Some(200),
474 /// strand: Some("+".to_string()),
475 /// };
476 ///
477 /// assert_eq!(record.get_coords(), (100, 200));
478 ///
479 /// let invalid_record = CsvRecord {
480 /// contig: "chr2".to_string(),
481 /// start: None,
482 /// stop: Some(300),
483 /// strand: Some("-".to_string()),
484 /// };
485 ///
486 /// assert_eq!(invalid_record.get_coords(), (0, usize::MAX));
487 /// ```
488 fn get_coords(&self) -> (usize, usize) {
489 if self.has_coords() {
490 (self.start.unwrap(), self.stop.unwrap())
491 } else {
492 (0, usize::MAX)
493 }
494 }
495
496 /// Retrieves the strand information from the struct.
497 ///
498 /// This function returns an `Option<Strand>` representing the strand information stored in the struct.
499 /// If the `strand` field is [`Some`], the function maps the string value to the corresponding [`Strand`] enum variant
500 /// using the `from` method. If the `strand` field is [`None`], the function returns `None`.
501 ///
502 /// # Returns
503 ///
504 /// An `Option<Strand>` representing the strand information, or `None` if no strand is available.
505 ///
506 /// # Examples
507 ///
508 /// ```rust,ignore
509 /// # use readfish_tools::CsvRecord;
510 ///
511 /// let record = CsvRecord {
512 /// contig: "chr1".to_string(),
513 /// start: Some(100),
514 /// stop: Some(200),
515 /// strand: Some("+".to_string()),
516 /// };
517 /// let strand = record.get_strand();
518 /// assert_eq(strand, Some(Strand::Forward))
519 /// ```
520 fn get_strand(&self) -> Option<Strand> {
521 self.strand
522 .as_ref()
523 .map(|strand_string| Strand::from(strand_string.as_str()))
524 }
525}
526
527impl From<&Value> for TargetType {
528 fn from(source: &Value) -> TargetType {
529 match source.is_array() {
530 true => TargetType::Direct(
531 source
532 .as_array()
533 .unwrap()
534 .iter()
535 .map(|x| x.as_str().unwrap().to_string())
536 .collect(),
537 ),
538 false => TargetType::ViaFile(PathBuf::from(source.as_str().unwrap())),
539 }
540 }
541}
542
543/// Represents a configuration for a flowcell.
544#[derive(Debug)]
545pub struct Conf {
546 /// The total number of channels on the flowcell.
547 channels: usize,
548 /// The regions of the flowcell. contains the name of the region and the Action to take for each Alignment type.
549 regions: Vec<Region>,
550 /// The barcodes from the sequencing library.
551 barcodes: HashMap<String, Barcode>,
552 /// The mapping of channel number to the index of the region that channel belongs to.
553 _channel_map: HashMap<usize, usize>,
554}
555#[derive(Debug, PartialEq)]
556/// Holds the targets for a given region or barcode.
557pub struct Targets {
558 /// The target string as listed in the Toml. Can either be an array of strings, in which case that is assumed to be the targets themselves, or a string,
559 /// which is assumed to be a file path to a file containing the targets.
560 value: TargetType,
561 /// A hashmap containg the targets themselves, in the form of
562 /// Strand => Contig => Start and stop target coordinates.
563 _targets: HashMap<StrandWrapper, HashedTargets>,
564}
565
566impl Targets {
567 /// Creates a new instance of [`Targets`] with the provided target data.
568 ///
569 /// This function takes the target data in the form of [`TargetType`] and constructs a new [`Targets`] struct.
570 /// The [`TargetType`] can either be an array of strings, representing the targets themselves,
571 /// or a string representing a file path to a file containing the targets (.bed or .csv).
572 ///
573 /// If the targets is an array of strings, they must be im the format "contig, start, stop, strand" OR "contig".
574 /// For example:
575 ///
576 /// ```toml
577 /// targets = ["chr2,10.20.+", "chr1"]
578 /// ```
579 ///
580 /// If only the contig is provided, it is assumed that the whole contig is the target, on BOTH strands. See example below.
581 ///
582 /// The target data is stored in the `value` field, while the parsed targets are stored in the `_targets` field
583 /// as a hashmap with strand, contig, and start/stop target coordinates.
584 ///
585 /// # Arguments
586 ///
587 /// * `targets` - The target data in the form of [`TargetType`].
588 ///
589 /// # Examples
590 ///
591 /// ```rust, ignore
592 /// # use my_module::{Targets, TargetType};
593 ///
594 /// let target_data = TargetType::Direct(vec!["chr1".to_string(), "chr2,10.20.+".to_string()]);
595 /// let targets = Targets::new(target_data);
596 ///
597 /// assert_eq!(targets.value, TargetType::Array(vec!["chr1".to_string(), "chr2,10.20.+".to_string()]));
598 /// assert_eq!(targets._targets.len(), 2);
599 ///
600 /// println!("{:#?}", targets._targets)
601 /// // {
602 /// // StrandWrapper(Forward): {"chr1": [(0, 18_446_744_073_709_551_615)], "chr2": [(10, 20)]}
603 /// // StrandWrapper(Reverse): {"chr1": [(0, 18_446_744_073_709_551_615)]}
604 /// // }
605 /// // NOTE the single contig target chr1 is on both strands in its entirety.
606 ///
607 /// ```
608 fn new(targets: TargetType) -> Targets {
609 let t = targets.clone();
610 Targets {
611 value: targets,
612 _targets: Targets::from_parsed_toml(t),
613 }
614 }
615
616 /// Inserts target coordinates into the `targets` hashmap based on the provided record and strand.
617 ///
618 /// This function takes a mutable reference to the `targets` hashmap, a reference to a [`CsvRecord`],
619 /// and a variant from the `strand` Enum. It inserts the record coordinates into a Vec at the lowest level of
620 /// the `targets` hashmap based on the strand and contig.
621 ///
622 /// If the strand does not exist in the `targets` hashmap, a new entry is created for the strand,
623 /// and an empty hashmap is inserted for the contig. If the contig does not exist for the strand,
624 /// a new entry is created for the contig, and an empty vector is inserted to store the coordinates.
625 ///
626 /// The record coordinates are retrieved using the `get_coords()` method from the [`CsvRecord`] struct.
627 ///
628 /// # Arguments
629 ///
630 /// * `targets` - A mutable reference to the `HashMap<StrandWrapper, HashedTargets>]` where the record will be inserted.
631 /// * `record` - A reference to the `CsvRecord` containing the record information.
632 /// * `strand` - The strand information associated with the record.
633 ///
634 /// # Examples
635 ///
636 /// ```rust,ignore
637 /// use readfish_tools::{insert_into_targets, CsvRecord, Strand, StrandWrapper, HashedTargets};
638 /// use std::collections::HashMap;
639 ///
640 /// let mut targets: HashMap<StrandWrapper, HashedTargets> = HashMap::new();
641 ///
642 /// let record = CsvRecord {
643 /// contig: "chr1".to_string(),
644 /// start: Some(100),
645 /// stop: Some(200),
646 /// strand: Some("+".to_string()),
647 /// };
648 ///
649 /// insert_into_targets(&mut targets, &record, "+");
650 ///
651 /// assert_eq!(targets.len(), 1);
652 /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().len(), 1);
653 /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap().len(), 1);
654 /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap()[0], (100, 200));
655 /// ```
656 fn insert_into_targets(
657 targets: &mut HashMap<StrandWrapper, HashedTargets>,
658 record: &CsvRecord,
659 strand: Strand,
660 ) {
661 let coords = targets
662 .entry(StrandWrapper(strand))
663 .or_insert(HashMap::new())
664 .entry(record.contig.clone())
665 .or_insert(Vec::with_capacity(1000));
666 coords.push(record.get_coords())
667 }
668
669 /// Creates a hashmap of targets from the parsed TOML data.
670 ///
671 /// This function takes the `targets` data in the form of [`TargetType`]] and constructs a hashmap of targets
672 /// grouped by strand and contig, with start and stop coordinates as values. The `targets` can be provided
673 /// either as a direct array of target strings or as a path to a CSV or BED file containing the targets.
674 ///
675 /// If `targets` is of type [`TargetType::Direct`], the function treats the data as direct target strings,
676 /// parses them as CSV data, and populates the hashmap with the targets grouped by strand and contig.
677 /// If `targets` is of type [`TargetType::ViaFile`], the function treats the data as a file path,
678 /// determines the file type (CSV or BED), and parses the data accordingly to populate the hashmap.
679 ///
680 /// The function uses the [`CsvRecord`] struct for deserialization of CSV records and the [`BedRecord`] struct
681 /// for deserialization of BED records. The appropriate deserialization is performed based on the file type.
682 ///
683 /// After populating the hashmap, the function merges overlapping intervals within each contig
684 /// using the [`Self::_merge_intervals()`] helper function.
685 ///
686 /// # Arguments
687 ///
688 /// * `targets` - The target data in the form of [`TargetType`].
689 ///
690 /// # Returns
691 ///
692 /// A hashmap of targets grouped by strand and contig, with start and stop coordinates as values.
693 ///
694 /// # Examples
695 ///
696 /// ```rust,ignore
697 /// use readfish_tools::{Targets::from_parsed_toml, TargetType, CsvRecord};
698 /// use std::collections::HashMap;
699 /// use std::path::PathBuf;
700 ///
701 /// let target_data = TargetType::Direct(vec![
702 /// "chr2,".to_string(),
703 /// "chr1,10,20,+".to_string(),
704 /// ]);
705 ///
706 /// let targets = from_parsed_toml(target_data);
707 ///
708 /// assert_eq!(targets.len(), 2);
709 /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr2").unwrap()[0], (0_usize, usize::MAX));
710 /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap()[0], (10_usize,20_usize));
711 /// ```
712 fn from_parsed_toml(
713 targets: TargetType,
714 ) -> HashMap<StrandWrapper, HashMap<String, Vec<(usize, usize)>>> {
715 let mut results = HashMap::new();
716 let mut bed_file = false;
717 let mut delim = b',';
718 match targets {
719 TargetType::Direct(target_vec) => {
720 if target_vec.is_empty() {
721 return results;
722 }
723 let csv_data = target_vec.join("\n");
724 let file = Cursor::new(csv_data);
725 let mut reader = ReaderBuilder::new()
726 .flexible(true)
727 .has_headers(false)
728 .delimiter(delim)
729 .from_reader(file);
730 for record in reader.records() {
731 let record = record.unwrap();
732 let record: CsvRecord = record.deserialize(None).unwrap();
733 if record.has_coords() {
734 Targets::insert_into_targets(
735 &mut results,
736 &record,
737 record.get_strand().unwrap(),
738 );
739 } else {
740 Targets::insert_into_targets(&mut results, &record, Strand::Forward);
741 Targets::insert_into_targets(&mut results, &record, Strand::Reverse);
742 }
743 }
744 }
745 TargetType::ViaFile(file_path) => {
746 // TODO won't handle gzipped bed files
747 if file_path.extension().unwrap() == "bed" {
748 bed_file = true;
749 delim = b'\t';
750 }
751 let mut rdr = ReaderBuilder::new()
752 .delimiter(delim)
753 .flexible(true)
754 .has_headers(false)
755 .from_path(file_path)
756 .expect("Could not open targets file!");
757 for record in rdr.records() {
758 let record = record.unwrap();
759 let record: CsvRecord = match bed_file {
760 true => {
761 let x: BedRecord = record.deserialize(None).unwrap();
762 x.into()
763 }
764 false => {
765 let x: CsvRecord = record.deserialize(None).unwrap();
766 x
767 }
768 };
769 // Has coordinates and strand provided
770 if record.has_coords() {
771 Targets::insert_into_targets(
772 &mut results,
773 &record,
774 record.get_strand().unwrap(),
775 );
776 } else {
777 Targets::insert_into_targets(&mut results, &record, Strand::Forward);
778 Targets::insert_into_targets(&mut results, &record, Strand::Reverse);
779 }
780 }
781 }
782 }
783 results.iter_mut().for_each(|(_strand, contig_hashmap)| {
784 contig_hashmap
785 .iter_mut()
786 .for_each(|(_, v)| *v = Targets::_merge_intervals(v))
787 });
788 results
789 }
790
791 /// Merges overlapping intervals within a vector of intervals.
792 ///
793 /// This function takes a mutable reference to a vector of intervals represented as tuples `(usize, usize)`
794 /// and merges any overlapping intervals into collapsed ranges. The intervals are expected to be sorted
795 /// based on the starting index before calling this function.
796 ///
797 /// If the number of intervals is less than 2, the function returns a clone of the input vector as there
798 /// are no overlapping intervals to merge.
799 ///
800 /// The function iterates over the sorted intervals and maintains a current range. For each interval,
801 /// if it overlaps with the current range, the end index of the current range is updated to the maximum
802 /// of the current end index and the interval's end index. If the interval is non-overlapping, the
803 /// current range is added to the collapsed ranges and updated to the new interval. If it's the first
804 /// range encountered, the current range is initialized. Finally, the last current range (if any) is added
805 /// to the collapsed ranges.
806 ///
807 /// The resulting collapsed ranges are returned as a new vector.
808 ///
809 /// # Arguments
810 ///
811 /// * `intervals` - A mutable reference to a vector of intervals to be merged.
812 ///
813 /// # Returns
814 ///
815 /// A vector of collapsed ranges after merging overlapping intervals.
816 ///
817 /// # Examples
818 ///
819 /// ```rust,ignore
820 ///
821 /// let mut intervals = vec![(1, 5), (4, 9), (10, 15), (13, 18)];
822 /// let collapsed_ranges = Targets::_merge_intervals(&mut intervals);
823 ///
824 /// assert_eq!(collapsed_ranges, vec![(1, 9), (10, 18)]);
825 /// ```
826 fn _merge_intervals(intervals: &mut Vec<(usize, usize)>) -> Vec<(usize, usize)> {
827 // ToDo consider explicitly forbidding start > end or end < start
828 let n_args = intervals.len();
829 if n_args < 2 {
830 return intervals.clone();
831 }
832 intervals.sort(); // Sort the ranges based on the starting index
833 let mut collapsed_ranges: Vec<(usize, usize)> = Vec::new();
834 let mut current_range: Option<(usize, usize)> = None;
835 for &(start, end) in intervals.iter() {
836 if let Some((current_start, current_end)) = current_range {
837 if start <= current_end {
838 // Overlapping range, update the current range's end index
839 current_range = Some((current_start, current_end.max(end)));
840 } else {
841 // Non-overlapping range, add the current range and update the current range
842 collapsed_ranges.push((current_start, current_end));
843 current_range = Some((start, end));
844 }
845 } else {
846 // First range encountered, initialize the current range
847 current_range = Some((start, end));
848 }
849 }
850 // Add the last current range (if any)
851 if let Some((current_start, current_end)) = current_range {
852 collapsed_ranges.push((current_start, current_end));
853 }
854 collapsed_ranges
855 }
856
857 /// Checks if the given coordinate falls within any of the target intervals for the specified contig and strand.
858 ///
859 /// This function takes a reference to a [`CsvRecord`] struct and performs a lookup in the [`Targets`] struct's
860 /// `_targets` hashmap to retrieve the intervals for the specified contig and strand. It then checks if the
861 /// given coordinate falls within any of the target intervals by iterating over the intervals and performing
862 /// the comparison.
863 ///
864 /// The function expects the `strand` argument to implement the [`ToString`] trait, which allows the function
865 /// to convert it to a [`String`]. The `strand` is then converted to the [`Strand`] enum type using the `into()`
866 /// method.
867 ///
868 /// # Generic Parameters
869 ///
870 /// * `T` - The type of the `strand` argument that implements the [`ToString`] trait.
871 ///
872 /// # Arguments
873 ///
874 /// * `contig` - The contig string to lookup the intervals for.
875 /// * `strand` - The strand value to lookup the intervals for. It is expected to be convertible to a [`String`].
876 /// * `coord` - The coordinate value to check against the intervals.
877 ///
878 /// # Returns
879 ///
880 /// A boolean value indicating whether the coordinate falls within any of the target intervals for the
881 /// specified contig and strand.
882 ///
883 /// # Examples
884 ///
885 /// ```rust, ignore
886 /// ///
887 /// let targets = Targets::new(TargetType::Direct(vec![
888 /// "Contig1,100,200,+".to_string(),
889 /// "Contig2,300,400,-".to_string(),
890 /// ]));
891 ///
892 /// let record = CsvRecord {
893 /// contig: "Contig1".to_string(),
894 /// start: Some(150),
895 /// stop: Some(180),
896 /// strand: Some("+".to_string()),
897 /// };
898 ///
899 /// let is_within_interval = record.get_coords("Contig1", "+", 160);
900 ///
901 /// assert!(is_within_interval);
902 /// ```
903 fn check_coords<T: ToString>(&self, contig: &str, strand: T, coord: usize) -> bool {
904 let strand: Strand = strand.to_string().as_str().into();
905 let intervals = self
906 ._targets
907 .get(&StrandWrapper(strand))
908 .and_then(|inner_map| inner_map.get(contig));
909 if let Some(intervals) = intervals {
910 intervals
911 .iter()
912 .any(|&(start, end)| start <= coord && coord <= end)
913 } else {
914 false
915 }
916 }
917}
918
919impl Conf {
920 /// Creates a [`Conf`] instance from a TOML string.
921 ///
922 /// This function parses the TOML string and constructs a `Conf` struct
923 /// using the parsed data. It returns the constructed `Conf` instance.
924 ///
925 /// # Arguments
926 ///
927 /// * `toml_string` - The TOML string to parse and create the `Conf` from.
928 ///
929 /// # Examples
930 ///
931 /// ```rust,ignore
932 /// let toml_str = r#"
933 /// channels = 10
934 ///
935 /// [regions]
936 /// [[regions.condition]]
937 /// name = "Region 1"
938 /// min_chunks = 2
939 /// max_chunks = 5
940 /// # ...
941 ///
942 /// [barcodes]
943 /// [[barcodes.condition]]
944 /// name = "Barcode 1"
945 /// min_chunks = 1
946 /// max_chunks = 3
947 /// # ...
948 /// "#;
949 ///
950 /// let conf = Conf::from_string(toml_str);
951 /// ```
952 ///
953 /// # Panics
954 ///
955 /// This function panics if the TOML string fails to parse or if there
956 /// are any invalid values in the TOML data.
957 ///
958 /// # Returns
959 ///
960 /// The constructed `Conf` instance.
961 ///
962 fn from_string(toml_string: &str) -> Conf {
963 Conf::new(toml_string)
964 }
965
966 /// Constructs a new [`Conf`] instance by parsing a TOML file.
967 ///
968 /// This function takes a TOML file path (`toml_path`) and reads its contents
969 /// using [`std::fs::read_to_string`]. The contents of the TOML file are then
970 /// passed to the `Conf::new` function to create a new `Conf` instance.
971 ///
972 /// # Arguments
973 ///
974 /// * `toml_path` - The path to the TOML file to be parsed.
975 ///
976 /// # Panics
977 ///
978 /// This function panics if the TOML file cannot be read or if parsing the TOML
979 /// content into a `Conf` instance fails.
980 ///
981 /// # Examples
982 ///
983 /// ```rust,ignore
984 /// use std::path::Path;
985 ///
986 /// let toml_path = Path::new("config.toml");
987 /// let conf = Conf::from_file(toml_path);
988 /// ```
989 pub fn from_file(toml_path: impl AsRef<Path>) -> Conf {
990 let toml_content = std::fs::read_to_string(toml_path).unwrap();
991 Conf::new(&toml_content)
992 }
993
994 /// Constructs a new [`Conf`] instance by parsing a String representation of TOML file.
995 ///
996 /// This function takes a String representation of a toml file (`toml_content`).
997 /// The TOML content is then parsed into a `Table` using the `parse::<Table>` method. The [`Table`] represents
998 /// the parsed TOML structure.
999 ///
1000 /// The function initializes empty vectors `regions` and `barcodes` to hold the parsed regions and barcodes,
1001 /// respectively. It then checks if the parsed TOML structure contains the "regions" and "barcodes" sections.
1002 /// If the sections are present, the function iterates over the corresponding values and converts them into
1003 /// [`Region`] and [`Barcode`] structs, which are added to the `regions` and `barcodes` vectors, respectively.
1004 ///
1005 /// Finally, the function constructs and returns a new [`Conf`] instance with the populated `regions` and `barcodes`
1006 /// vectors. The `channels` field is set to 0, and the `_channel_map` field is initialized as an empty [`HashMap].
1007 ///
1008 /// # Arguments
1009 ///
1010 /// * `toml_path` - The path to the TOML file.
1011 ///
1012 /// # Returns
1013 ///
1014 /// A new [`Conf`] instance with the parsed regions and barcodes.
1015 ///
1016 /// # Panics
1017 ///
1018 /// This function panics if there is an error reading the TOML file or parsing its contents.
1019 ///
1020 /// # Examples
1021 ///
1022 /// ```rust,ignore
1023 /// use my_module::Conf;
1024 ///
1025 /// let conf = Conf::new("config.toml");
1026 ///
1027 /// // Perform operations on the `conf` instance
1028 /// ```
1029 fn new(toml_content: &str) -> Conf {
1030 let value = toml_content.parse::<Table>().unwrap();
1031 let mut regions = Vec::new();
1032 if let Some(parsed_regions) = value.get("regions") {
1033 let parsed_regions = parsed_regions.as_array().unwrap();
1034 for region in parsed_regions {
1035 let x = region.as_table().unwrap();
1036 let z: Region = Region {
1037 condition: x.try_into().unwrap(),
1038 };
1039 regions.push(z);
1040 }
1041 }
1042
1043 let mut barcodes = HashMap::new();
1044 if let Some(parsed_barcodes) = value.get("barcodes") {
1045 let parsed_barcodes = parsed_barcodes.as_table().unwrap().iter();
1046 for (barcode_name, barcode_value) in parsed_barcodes {
1047 let barcode_table = barcode_value.as_table().unwrap();
1048 let barcode_struct: Barcode = Barcode {
1049 condition: barcode_table.try_into().unwrap(),
1050 };
1051 barcodes.insert(barcode_name.clone(), barcode_struct);
1052 }
1053 }
1054 let mut conf = Conf {
1055 channels: 0,
1056 regions,
1057 barcodes,
1058 _channel_map: HashMap::new(),
1059 };
1060 conf.validate_post_init().unwrap();
1061 conf.generate_channel_map(512).unwrap();
1062 conf
1063 }
1064
1065 /// Validates the state of the [`Conf`] struct after initialization.
1066 ///
1067 /// This function checks if the [`Conf`] struct contains `regions`, and if not that the Barcodes has
1068 /// the required 'unclassified' or 'clasiffied' `barcodes` conditions.
1069 /// and returns a [`Result`] indicating whether the validation passed or failed.
1070 ///
1071 /// # Errors
1072 ///
1073 /// Returns an [`Err`] variant with a descriptive error message if the validation fails.
1074 ///
1075 /// # Examples
1076 ///
1077 /// ```rust, ignore
1078 /// let conf = Conf::new("config.toml");
1079 /// conf.validate_post_init().unwrap();
1080 /// ```
1081 /// # Returns
1082 ///
1083 /// - [`Ok(())`] if the validation passes and the `Conf` struct is in a valid state.
1084 /// - [`Err`] with a descriptive error message if the validation fails.
1085 fn validate_post_init(&self) -> Result<(), String> {
1086 let required_barcodes = ["unclassified", "classified"];
1087 if self.regions.is_empty()
1088 && !required_barcodes
1089 .iter()
1090 .all(|&required_barcode| self.barcodes.contains_key(required_barcode))
1091 {
1092 Err("This TOML configuration does not contain any `regions` or `barcodes` and cannot be used by readfish".to_string())
1093 } else {
1094 Ok(())
1095 }
1096 }
1097
1098 /// Generates a channel map based on the given number of channels and regions.
1099 ///
1100 /// This method splits the channels evenly among the regions and assigns each channel
1101 /// a corresponding region index, linking to the position of the region in `Conf.regions`.
1102 ///
1103 /// # Arguments
1104 ///
1105 /// * `channels` - The total number of channels.
1106 /// * `regions` - A slice of regions to distribute the channels among.
1107 ///
1108 /// # Returns
1109 ///
1110 /// A `HashMap<usize, usize>` representing the channel map, where the keys are the
1111 /// channel numbers and the values are the positions of the channels within the regions.
1112 ///
1113 /// # Examples
1114 ///
1115 /// ```
1116 /// # use std::collections::HashMap;
1117 /// # struct Region {}
1118 /// # fn generate_flowcell(flowcell_size: usize, split: usize, axis: usize, odd_even: bool) -> Vec<Vec<usize>> { vec![vec![1, 2, 3], vec![4, 5, 6]] }
1119 /// #
1120 /// # fn generate_channel_map(channels: usize, regions: &[Region]) -> HashMap<usize, usize> {
1121 /// # let split_channels = generate_flowcell(channels, regions.len().max(1), 0, false);
1122 /// # let mut channel_map = HashMap::new();
1123 /// #
1124 /// # for (pos, (channels, region)) in split_channels.iter().zip(regions.iter()).enumerate() {
1125 /// # for &channel in channels.iter() {
1126 /// # channel_map.insert(channel, pos);
1127 /// # }
1128 /// # }
1129 /// #
1130 /// # channel_map
1131 /// # }
1132 /// let regions = vec![
1133 /// Region {},
1134 /// Region {},
1135 /// ];
1136 ///
1137 ///
1138 /// let channel_map = generate_channel_map(6, ®ions);
1139 /// // If we split our imaginary 6 channel flowcell into 2 regions.
1140 /// // NB This would panic in reality - as generate flowcell would not recognise 6 as a valid flow cell size.
1141 /// assert_eq!(channel_map.get(&1), Some(&0));
1142 /// assert_eq!(channel_map.get(&2), Some(&0));
1143 /// assert_eq!(channel_map.get(&3), Some(&0));
1144 /// assert_eq!(channel_map.get(&4), Some(&1));
1145 /// assert_eq!(channel_map.get(&5), Some(&1));
1146 /// assert_eq!(channel_map.get(&6), Some(&1));
1147 /// assert_eq!(channel_map.get(&7), None);
1148 /// ```
1149 ///
1150 /// # Panics
1151 ///
1152 /// This method will panic if the `channels` parameter is zero.
1153 fn generate_channel_map(&mut self, channels: usize) -> Result<(), String> {
1154 let split_channels =
1155 nanopore::generate_flowcell(channels, self.regions.len().max(1), 1, false);
1156 let mut channel_map = HashMap::new();
1157
1158 for (pos, (channels, _region)) in split_channels.iter().zip(self.regions.iter()).enumerate()
1159 {
1160 for &channel in channels.iter() {
1161 channel_map.insert(channel, pos);
1162 }
1163 }
1164 self._channel_map = channel_map;
1165 Ok(())
1166 }
1167
1168 /// Get the condition for a given channel or barcode from the Conf TOML
1169 ///
1170 /// The barcode should be passed as an optional `&str` parameter. If barcoding
1171 /// is not being done and the barcode is not provided, the `channel` will be used instead.
1172 ///
1173 /// # Arguments
1174 ///
1175 /// * `channel` - The channel number for the result
1176 /// * `barcode` - Optional barcode classification from basecalling
1177 ///
1178 /// # Returns
1179 ///
1180 /// * `Ok` - A tuple `(bool, &dyn Condition)` representing the control flag and the condition
1181 /// * `Err` - A `String` containing an error message if the channel/barcode combination does not find a `Region` or a `Barcode`
1182 ///
1183 /// # Errors
1184 ///
1185 /// This function will return an error if both the region (channel) and barcode were not found in the configuration.
1186 ///
1187 pub fn get_conditions<T: AsRef<str> + std::fmt::Debug>(
1188 &self,
1189 channel: usize,
1190 barcode: Option<T>,
1191 ) -> Result<(bool, &dyn Condition), String> {
1192 let region_ = self.get_region(channel);
1193 let barcode_ = self.get_barcode(barcode.as_ref());
1194
1195 if let (Some(region), Some(barcode)) = (region_, barcode_) {
1196 let control = region.control() || barcode.control();
1197 Ok((control, barcode))
1198 } else if let Some(region) = region_ {
1199 let control = region.control();
1200 Ok((control, region))
1201 } else if let Some(barcode) = barcode_ {
1202 let control = barcode.control();
1203 Ok((control, barcode))
1204 } else {
1205 Err(format!(
1206 "Both region (channel={}) and barcode ({:?}) were not found. This config is invalid!",
1207 channel, barcode
1208 ))
1209 }
1210 }
1211
1212 /// Get the region for a given channel.
1213 ///
1214 /// Parameters:
1215 /// - `channel`: The channel number.
1216 ///
1217 /// Returns:
1218 /// - Returns an [`Option`] containing a reference to the [`Region`] if a region exists for the given channel,
1219 /// otherwise returns [`None`].
1220 fn get_region(&self, channel: usize) -> Option<&Region> {
1221 if let Some(channel_index) = self._channel_map.get(&channel) {
1222 self.regions.get(*channel_index)
1223 } else {
1224 None
1225 }
1226 }
1227
1228 /// Get the barcode condition for a given barcode name.
1229 ///
1230 /// Parameters:
1231 /// - `barcode`: The name of the barcode, example "barcode01".
1232 ///
1233 /// Returns:
1234 /// - Returns an [`Option`] containing a reference to the [`Barcode`] if a barcode exists for the given name,
1235 /// otherwise returns [`None`]. If the `barcode` parameter is [`None`], function returns [`None`].
1236 fn get_barcode<T: AsRef<str>>(&self, barcode: Option<T>) -> Option<&Barcode> {
1237 if let Some(barcode_name) = barcode {
1238 if !self.barcodes.is_empty() {
1239 self.barcodes
1240 .get(barcode_name.as_ref())
1241 .or_else(|| self.barcodes.get("classified"))
1242 } else {
1243 None
1244 }
1245 } else {
1246 None
1247 }
1248 }
1249 /// Get the targets associated with a specific channel and barcode (if provided) from the configuration.
1250 ///
1251 /// This function looks up the given `channel` and `barcode` (optional) in the configuration and returns the corresponding targets.
1252 /// If the combination of `channel` and `barcode` is not found in the configuration, or if the condition associated with the
1253 /// combination does not have targets, this function will return a reference to the default targets.
1254 ///
1255 /// # Arguments
1256 ///
1257 /// * `channel`: The channel number for the result.
1258 /// * `barcode`: The optional barcode classification from basecalling. If `Some`, it will be override the `channel` to find the targets.
1259 ///
1260 /// # Returns
1261 ///
1262 /// A reference to the `Targets` associated with the given `channel` and `barcode` combination.
1263 /// If the combination is not found, the function returns a reference to the default targets.
1264 pub fn get_targets(&self, channel: usize, barcode: Option<&str>) -> &Targets {
1265 let (_control, condition) = self.get_conditions(channel, barcode).unwrap();
1266 condition.get_targets()
1267 }
1268
1269 /// Make a decision based on the provided inputs for the specified channel and barcode (if provided).
1270 /// Todo: Write unit tests/integration tests for this function.
1271 /// This function takes several parameters, including `channel`, `barcode`, `contig`, `strand`, and `coord`,
1272 /// and determines whether the given coordinates are considered "on target" or not based on the configuration.
1273 ///
1274 /// # Arguments
1275 ///
1276 /// * `channel`: The channel number associated with the decision-making process.
1277 /// * `barcode`: The optional barcode classification from basecalling. If `Some`, it will be used along with the `channel` to find the relevant targets.
1278 /// * `contig`: The name of the contig where the coordinates are located.
1279 /// * `strand`: The strand information. This can be any type that implements the `ToString` trait, such as a `String` or `&str`.
1280 /// * `coord`: The coordinate position to check against the targets.
1281 ///
1282 /// # Returns
1283 ///
1284 /// A boolean value indicating whether the given `contig`, `strand`, and `coord` are considered "on target" or not based on the configuration.
1285 /// If the combination of `channel` and `barcode` is not found in the configuration, the function will use the default targets.
1286 ///
1287 /// # Example
1288 ///
1289 /// ```rust,ignore
1290 /// # use your_crate::YourConfStruct;
1291 /// # let conf = YourConfStruct::new(); // Assume you have your configuration instance.
1292 /// let channel = 1;
1293 /// let barcode = Some("barcode01");
1294 /// let contig = "chr1";
1295 /// let strand = "+";
1296 /// let coord = 1000;
1297 ///
1298 /// let decision = conf.make_decision(channel, barcode, contig, strand, coord);
1299 /// println!("Decision: {}", decision);
1300 /// ```
1301 pub fn make_decision<T: ToString>(
1302 &self,
1303 channel: usize,
1304 barcode: Option<&str>,
1305 contig: &str,
1306 strand: T,
1307 coord: usize,
1308 ) -> bool {
1309 let targets = self.get_targets(channel, barcode);
1310 targets.check_coords(contig, strand, coord)
1311 }
1312}
1313
1314#[cfg(test)]
1315mod tests {
1316 // BEdfile, with not 6 rows, bedfile with wrong types, csv with wrong types, csv with more than 4 rws
1317 use toml::{Table, Value};
1318
1319 use super::*;
1320 use std::fs;
1321 use std::path::PathBuf;
1322
1323 fn get_resource_dir() -> PathBuf {
1324 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1325 path.push("resources/");
1326 path
1327 }
1328
1329 fn get_test_file(file: &str) -> PathBuf {
1330 let mut path = get_resource_dir();
1331 path.push(file);
1332 path
1333 }
1334
1335 fn test_toml_string() -> &'static str {
1336 r#"
1337 [[regions]]
1338 name = "Rapid_CNS"
1339 min_chunks = 1
1340 max_chunks = 4
1341 targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
1342 single_off = "unblock"
1343 multi_off = "unblock"
1344 single_on = "stop_receiving"
1345 multi_on = "stop_receiving"
1346 no_seq = "proceed"
1347 no_map = "proceed"
1348
1349
1350 [[regions]]
1351 name = "Direct_CNS"
1352 min_chunks = 1
1353 max_chunks = 4
1354 targets = ["chr2,3001,4000,-", "chr2,3000,4000,-", "chr20,3000,4000,-"]
1355 single_off = "unblock"
1356 multi_off = "unblock"
1357 single_on = "stop_receiving"
1358 multi_on = "stop_receiving"
1359 no_seq = "proceed"
1360 no_map = "proceed""#
1361 }
1362
1363 fn test_barcoded_toml_string() -> &'static str {
1364 r#"
1365 [barcodes.unclassified]
1366 name = "unclassified_reads"
1367 control = false
1368 min_chunks = 0
1369 max_chunks = 4
1370 targets = []
1371 single_on = "unblock"
1372 multi_on = "unblock"
1373 single_off = "unblock"
1374 multi_off = "unblock"
1375 no_seq = "proceed"
1376 no_map = "proceed"
1377
1378 [barcodes.classified]
1379 name = "classified_reads"
1380 control = false
1381 min_chunks = 0
1382 max_chunks = 4
1383 targets = []
1384 single_on = "unblock"
1385 multi_on = "unblock"
1386 single_off = "unblock"
1387 multi_off = "unblock"
1388 no_seq = "proceed"
1389 no_map = "proceed"
1390
1391 [barcodes.barcode01]
1392 name = "barcode01"
1393 control = false
1394 min_chunks = 0
1395 max_chunks = 4
1396 targets = []
1397 single_on = "unblock"
1398 multi_on = "unblock"
1399 single_off = "unblock"
1400 multi_off = "unblock"
1401 no_seq = "proceed"
1402 no_map = "unblock"
1403
1404 [barcodes.barcode02]
1405 name = "barcode02"
1406 control = false
1407 min_chunks = 0
1408 max_chunks = 4
1409 targets = []
1410 single_on = "unblock"
1411 multi_on = "unblock"
1412 single_off = "unblock"
1413 multi_off = "unblock"
1414 no_seq = "proceed"
1415 no_map = "unblock"
1416
1417 [barcodes.barcode03]
1418 name = "barcode03"
1419 control = false
1420 min_chunks = 0
1421 max_chunks = 4
1422 targets = [
1423 "NC_002516.2",
1424 "NC_003997.3"
1425 ]
1426 single_on = "stop_receiving"
1427 multi_on = "stop_receiving"
1428 single_off = "unblock"
1429 multi_off = "unblock"
1430 no_seq = "proceed"
1431 no_map = "proceed""#
1432 }
1433
1434 #[test]
1435 fn test_from_string() {
1436 let toml_str = r#"
1437 [[regions]]
1438 name = "Rapid_CNS"
1439 min_chunks = 1
1440 max_chunks = 4
1441 targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
1442 single_off = "unblock"
1443 multi_off = "unblock"
1444 single_on = "stop_receiving"
1445 multi_on = "stop_receiving"
1446 no_seq = "proceed"
1447 no_map = "proceed"
1448 "#;
1449
1450 let conf = Conf::from_string(toml_str);
1451
1452 // Assert that the Conf instance is constructed correctly
1453 assert_eq!(conf.regions.len(), 1);
1454
1455 let region = &conf.regions[0];
1456 assert_eq!(region.condition.name, "Rapid_CNS");
1457 assert_eq!(region.condition.min_chunks, 1);
1458 assert_eq!(region.condition.max_chunks, 4);
1459 assert_eq!(
1460 region.condition.targets.value,
1461 TargetType::ViaFile("resources/panel_adaptive_nogenenames_20122021_hg38.bed".into())
1462 );
1463 assert_eq!(region.condition.single_off, "unblock".into());
1464 assert_eq!(region.condition.multi_off, "unblock".into());
1465 assert_eq!(region.condition.single_on, "stop_receiving".into());
1466 assert_eq!(region.condition.multi_on, "stop_receiving".into());
1467 assert_eq!(region.condition.no_seq, "proceed".into());
1468 assert_eq!(region.condition.no_map, "proceed".into());
1469 }
1470
1471 // todo need a barcode and region containing toml
1472 #[test]
1473 fn test_get_conditions() {
1474 let test_toml = test_toml_string();
1475 let conf = Conf::from_string(test_toml);
1476 let (_control, x) = conf.get_conditions::<String>(10, None).unwrap();
1477 // Convert the `Box<dyn Condition>` back into a `Region` if it is one
1478 if let Some(region) = x.any().downcast_ref::<Region>() {
1479 // Use the `Region` here
1480 println!("It's a Region: {:?}", region);
1481 } else if let Some(barcode) = x.any().downcast_ref::<Barcode>() {
1482 // Convert the `Box<dyn Condition>` back into a `Barcode` if it is one
1483 // Use the `Barcode` here
1484 println!("It's a Barcode: {:?}", barcode);
1485 } else {
1486 println!("It's neither a Region nor a Barcode");
1487 }
1488 }
1489
1490 #[test]
1491 fn test_get_region() {
1492 let test_toml = test_toml_string();
1493 let conf = Conf::from_string(test_toml);
1494 let region = conf.get_region(1).unwrap();
1495 assert_eq!(region.condition.name, "Direct_CNS");
1496 let region = conf.get_region(128).unwrap();
1497 assert_eq!(region.condition.name, "Rapid_CNS")
1498 }
1499
1500 #[test]
1501 fn test_get_regions_no_regions() {
1502 let test_toml = test_barcoded_toml_string();
1503 let conf = Conf::from_string(test_toml);
1504 let region = conf.get_region(1);
1505 assert_eq!(region, None);
1506 let region = conf.get_region(128);
1507 assert_eq!(region, None)
1508 }
1509
1510 #[test]
1511 fn test_generate_channel_map() {
1512 let test_toml = test_toml_string();
1513 let mut conf = Conf::from_string(test_toml);
1514 conf.generate_channel_map(512).unwrap();
1515 assert_eq!(conf._channel_map.get(&121).unwrap(), &0_usize);
1516 assert_eq!(conf._channel_map.get(&12).unwrap(), &1_usize);
1517 }
1518
1519 #[test]
1520 fn test_generate_channel_map_barcode() {
1521 let test_toml = test_barcoded_toml_string();
1522 let mut conf = Conf::from_string(test_toml);
1523 conf.generate_channel_map(512).unwrap();
1524 assert_eq!(conf._channel_map.get(&121), None);
1525 assert_eq!(conf._channel_map.get(&12), None);
1526 }
1527
1528 #[test]
1529 fn test_conf_validate_post_init() {
1530 let test_toml = test_barcoded_toml_string();
1531 let conf = Conf::from_string(test_toml);
1532 conf.validate_post_init().unwrap();
1533 }
1534
1535 // Now try without the unclassified barcode condition
1536 #[test]
1537 #[should_panic]
1538 fn test_conf_validate_post_init_panic() {
1539 let test_toml = test_barcoded_toml_string();
1540 let mut conf = Conf::from_string(test_toml);
1541 conf.barcodes.remove("unclassified");
1542 conf.validate_post_init().unwrap();
1543 }
1544
1545 #[test]
1546 fn test_get_csv_record_strand() {
1547 let record = CsvRecord {
1548 contig: "chr1".to_string(),
1549 start: Some(100),
1550 stop: Some(200),
1551 strand: Some("+".to_string()),
1552 };
1553 let strand = record.get_strand();
1554 assert_eq!(strand, Some(Strand::Forward));
1555 let record = CsvRecord {
1556 contig: "chr1".to_string(),
1557 start: Some(100),
1558 stop: Some(200),
1559 strand: Some("-1".to_string()),
1560 };
1561 let strand = record.get_strand();
1562 assert_eq!(strand, Some(Strand::Reverse))
1563 }
1564
1565 #[test]
1566 fn test_insert_into_targets() {
1567 use std::collections::HashMap;
1568 let mut targets: HashMap<StrandWrapper, HashedTargets> = HashMap::new();
1569 let record = CsvRecord {
1570 contig: "chr1".to_string(),
1571 start: Some(100),
1572 stop: Some(200),
1573 strand: Some("+".to_string()),
1574 };
1575 Targets::insert_into_targets(&mut targets, &record, record.get_strand().unwrap());
1576 assert_eq!(targets.len(), 1);
1577 assert_eq!(
1578 targets.get(&StrandWrapper(Strand::Forward)).unwrap().len(),
1579 1
1580 );
1581 assert_eq!(
1582 targets
1583 .get(&StrandWrapper(Strand::Forward))
1584 .unwrap()
1585 .get("chr1")
1586 .unwrap()
1587 .len(),
1588 1
1589 );
1590 assert_eq!(
1591 targets
1592 .get(&StrandWrapper(Strand::Forward))
1593 .unwrap()
1594 .get("chr1")
1595 .unwrap()[0],
1596 (100, 200)
1597 );
1598 }
1599
1600 #[test]
1601 #[cfg_attr(miri, ignore)]
1602 fn read_toml() {
1603 let test_toml = get_test_file("RAPID_CNS2.toml");
1604 let toml_content = fs::read_to_string(test_toml).unwrap();
1605 let value = toml_content.parse::<Table>().unwrap();
1606 // println!("{:#?}", value);
1607 assert_eq!(
1608 value["regions"][0]["targets"].as_str(),
1609 Some("resources/panel_adaptive_nogenenames_20122021_hg38.bed")
1610 );
1611 assert!(match value["regions"][1]["targets"] {
1612 Value::Array(_) => true,
1613 Value::String(_) => false,
1614 _ => false,
1615 })
1616 }
1617
1618 #[test]
1619 #[cfg_attr(miri, ignore)]
1620 fn test_load_conf() {
1621 let test_toml = get_test_file("RAPID_CNS2.toml");
1622 let conf = Conf::from_file(test_toml);
1623 assert!(conf
1624 .regions
1625 .get(0)
1626 .map(|x| x.condition.name == "Rapid_CNS")
1627 .unwrap_or(false));
1628 assert!(conf
1629 .regions
1630 .get(1)
1631 .map(|x| x.condition.name == "Direct_CNS")
1632 .unwrap_or(false));
1633 assert!(conf
1634 .regions
1635 .get(1)
1636 .map(
1637 |x| x.condition.targets._targets[&StrandWrapper(Strand::Reverse)]["chr2"][0]
1638 == (3000_usize, 4000_usize)
1639 )
1640 .unwrap_or(false));
1641 assert!(conf.barcodes.is_empty())
1642 }
1643
1644 #[test]
1645 fn test_merge_intervals() {
1646 assert_eq!(
1647 Targets::_merge_intervals(&mut vec![
1648 (11, 15),
1649 (1, 3),
1650 (14, 17),
1651 (2, 4),
1652 (15, 100),
1653 (169, 173),
1654 (10, 29)
1655 ]),
1656 vec![(1, 4), (10, 100), (169, 173)]
1657 )
1658 }
1659
1660 #[test]
1661 fn test_make_targets() {
1662 let targets: Targets = Targets::new(TargetType::Direct(vec![
1663 "chr1,10,20,+".to_string(),
1664 "chr1,15,30,+".to_string(),
1665 ]));
1666 assert_eq!(
1667 targets
1668 ._targets
1669 .get(&StrandWrapper(Strand::Forward))
1670 .unwrap()
1671 .get("chr1")
1672 .unwrap(),
1673 &vec![(10, 30)]
1674 )
1675 }
1676
1677 #[test]
1678 fn test_get_coord() {
1679 let targets: Targets = Targets::new(TargetType::Direct(vec![
1680 "chr1,10,20,+".to_string(),
1681 "chr1,15,30,+".to_string(),
1682 ]));
1683 assert_eq!(
1684 targets
1685 ._targets
1686 .get(&StrandWrapper(Strand::Forward))
1687 .unwrap()
1688 .get("chr1")
1689 .unwrap(),
1690 &vec![(10, 30)]
1691 );
1692 assert!(targets.check_coords("chr1", Strand::Forward, 15));
1693 assert!(targets.check_coords("chr1", "+", 15));
1694 assert!(targets.check_coords("chr1", 1, 15));
1695 assert!(!targets.check_coords("chr1", 1, 40));
1696 assert!(!targets.check_coords("chr2", 1, 40));
1697 assert!(!targets.check_coords("chr1", "-", 15));
1698 assert!(!targets.check_coords("chr1", -1, 15));
1699 }
1700
1701 #[test]
1702 fn test_get_coord_contig() {
1703 let targets: Targets = Targets::new(TargetType::Direct(vec!["chr1".to_string()]));
1704 assert_eq!(
1705 targets
1706 ._targets
1707 .get(&StrandWrapper(Strand::Forward))
1708 .unwrap()
1709 .get("chr1")
1710 .unwrap(),
1711 &vec![(0_usize, usize::MAX)]
1712 );
1713 assert!(targets.check_coords("chr1", Strand::Forward, 15));
1714 assert!(targets.check_coords("chr1", "+", 15));
1715 assert!(targets.check_coords("chr1", 1, 15));
1716 assert!(targets.check_coords("chr1", 1, 40));
1717 assert!(!targets.check_coords("chr2", 1, 40));
1718 assert!(targets.check_coords("chr1", "-", 15));
1719 assert!(targets.check_coords("chr1", -1, 15));
1720 }
1721
1722 #[test]
1723 #[cfg_attr(miri, ignore)]
1724 fn test_load_barcoded_conf() {
1725 let test_toml = get_test_file("clockface.toml");
1726 let conf = Conf::from_file(test_toml);
1727 assert!(conf.regions.is_empty());
1728 assert_eq!(
1729 conf.barcodes.get("barcode01").unwrap().condition.name,
1730 "barcode01"
1731 );
1732 assert_eq!(
1733 conf.barcodes.get("barcode02").unwrap().condition.name,
1734 "barcode02"
1735 );
1736 assert_eq!(
1737 conf.barcodes.get("barcode03").unwrap().condition.name,
1738 "barcode03"
1739 );
1740 assert!(conf
1741 .barcodes
1742 .get("barcode03")
1743 .map(
1744 |x| x.condition.targets._targets[&StrandWrapper(Strand::Reverse)]["NC_002516.2"][0]
1745 == (0_usize, usize::MAX)
1746 )
1747 .unwrap_or(false))
1748 }
1749}