readfish_tools/
readfish.rs

1//! Ports of readfish functions from python to rust.
2//! The functions are in order to parse a TOML file for configuring a readfish run, and implement all the same methods as the python version.
3//! This allows parsing the TOML Into the correct regions and barcodes, and then using the same methods to get the same results as readfish.
4//!
5
6use crate::nanopore;
7use csv::ReaderBuilder;
8use serde::Deserialize;
9use std::{
10    any::Any,
11    collections::HashMap,
12    hash::{Hash, Hasher},
13    io::Cursor,
14    path::{Path, PathBuf},
15};
16use toml::{map::Map, Table, Value};
17
18/// Action types that can be taken once a decision (one of single_off, single_on, multi_off, multi_on, no_map, no_seq, exceeded_max_chunks, below_min_chunks)
19/// has been made.
20#[derive(Debug, PartialEq)]
21pub enum Action {
22    /// Read would be unblocked
23    Unblock,
24    /// Complete sequencing naturally
25    StopReceiving,
26    /// Proceed with sequencing
27    Proceed,
28}
29/// Type for the Contig -> coordinates hashmap.
30type HashedTargets = HashMap<String, Vec<(usize, usize)>>;
31
32impl From<&str> for Action {
33    fn from(source: &str) -> Action {
34        match source {
35            "unblock" => Action::Unblock,
36            "stop_receiving" => Action::StopReceiving,
37            "proceed" => Action::Proceed,
38            _ => {
39                panic!("Unknown Action given")
40            }
41        }
42    }
43}
44
45/// The _Condition struct holds the settings lifted from the TOML file, for each
46/// region of the flowcell or barcode.
47#[derive(Debug, PartialEq)]
48pub struct _Condition {
49    /// The name of the Condition (Barcode/Region).
50    pub name: String,
51    /// Is this Region/Barcode a control region / Barcode
52    pub control: bool,
53    /// The minimum number of read chunks that have to be captured for a read to be processed. Default if not met is to proceed.
54    pub min_chunks: u8,
55    /// The maximum number of read chunks that can be captured for a read. Default if exceed is to unblock.
56    pub max_chunks: u8,
57    /// The targets associated with the Condition.
58    pub targets: Targets,
59    /// The action to perform when an alignment returns one single primary mapping, outside of any target regions.
60    pub single_off: Action,
61    /// The action to perform when an alignment returns one single primary mapping, inside of a target regions.
62    pub single_on: Action,
63    /// The action to perform when an alignment returns multiple primary mappings, all outside of any target regions.
64    pub multi_off: Action,
65    /// The action to perform when an alignment returns multiple primary mappings, at LEAST ONE of which is inside of a target region.
66    pub multi_on: Action,
67    /// The action to perform when no alignments are returned for this read.
68    pub no_map: Action,
69    /// The action to perform when no sequence is produced for this read sequence.
70    pub no_seq: Action,
71}
72
73impl _Condition {
74    /// Getter for the `name` field
75    pub fn get_name(&self) -> &String {
76        &self.name
77    }
78
79    /// Setter for the `name` field
80    pub fn set_name(&mut self, name: String) {
81        self.name = name;
82    }
83
84    /// Getter for the `control` field
85    pub fn is_control(&self) -> bool {
86        self.control
87    }
88
89    /// Setter for the `control` field
90    pub fn set_control(&mut self, control: bool) {
91        self.control = control;
92    }
93
94    /// Getter for the `min_chunks` field
95    pub fn get_min_chunks(&self) -> u8 {
96        self.min_chunks
97    }
98
99    /// Setter for the `min_chunks` field
100    pub fn set_min_chunks(&mut self, min_chunks: u8) {
101        self.min_chunks = min_chunks;
102    }
103
104    /// Getter for the `max_chunks` field
105    pub fn get_max_chunks(&self) -> u8 {
106        self.max_chunks
107    }
108
109    /// Setter for the `max_chunks` field
110    pub fn set_max_chunks(&mut self, max_chunks: u8) {
111        self.max_chunks = max_chunks;
112    }
113
114    /// Getter for the `targets` field
115    pub fn get_targets(&self) -> &Targets {
116        &self.targets
117    }
118
119    /// Setter for the `targets` field
120    pub fn set_targets(&mut self, targets: Targets) {
121        self.targets = targets;
122    }
123
124    /// Getter and Setter for the `single_off` field
125    pub fn get_single_off(&self) -> &Action {
126        &self.single_off
127    }
128
129    /// Set the Action to take when a single primary mapping is found outside of any target regions.
130    pub fn set_single_off(&mut self, single_off: Action) {
131        self.single_off = single_off;
132    }
133
134    /// Getter and Setter for the `single_on` field
135    pub fn get_single_on(&self) -> &Action {
136        &self.single_on
137    }
138
139    /// Set the Action to take when a single primary mapping is found on of any target regions.
140    pub fn set_single_on(&mut self, single_on: Action) {
141        self.single_on = single_on;
142    }
143
144    /// Getter and Setter for the `multi_off` field
145    pub fn get_multi_off(&self) -> &Action {
146        &self.multi_off
147    }
148
149    /// Getter and Setter for the `multi_off` field
150    pub fn set_multi_off(&mut self, multi_off: Action) {
151        self.multi_off = multi_off;
152    }
153
154    /// Getter and Setter for the `multi_on` field
155    pub fn get_multi_on(&self) -> &Action {
156        &self.multi_on
157    }
158    /// Getter and Setter for the `multi_off` field
159    pub fn set_multi_on(&mut self, multi_on: Action) {
160        self.multi_on = multi_on;
161    }
162
163    /// Getter and Setter for the `no_map` field
164    pub fn get_no_map(&self) -> &Action {
165        &self.no_map
166    }
167    /// Getter and Setter for the `multi_off` field
168    pub fn set_no_map(&mut self, no_map: Action) {
169        self.no_map = no_map;
170    }
171
172    /// Getter and Setter for the `no_seq` field
173    pub fn get_no_seq(&self) -> &Action {
174        &self.no_seq
175    }
176    /// Getter and Setter for the `multi_off` field
177    pub fn set_no_seq(&mut self, no_seq: Action) {
178        self.no_seq = no_seq;
179    }
180}
181
182#[derive(Debug, PartialEq)]
183/// Represents a region of the flow cell, denoted in the configuration toml as
184///
185/// ```toml
186///
187///    [[regions]]
188///    name = "Rapid_CNS"
189///    min_chunks = 1
190///    max_chunks = 4
191///    targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
192///    single_off = "unblock"
193///    multi_off = "unblock"
194///    single_on = "stop_receiving"
195///    multi_on = "stop_receiving"
196///    no_seq = "proceed"
197///    no_map = "proceed"
198/// ```
199/// All the parsed fields are stored with a _Condition struct, as they could also be from a barcodes table.
200pub struct Region {
201    /// The parsed region settings.
202    pub condition: _Condition,
203}
204
205/// Represents a barcode on the sequencing library. This supercedes any regions.
206///
207/// ```toml
208///
209//[barcodes.barcode02]
210//name = "barcode02"
211//control = false
212//min_chunks = 0
213//max_chunks = 4
214//targets = []
215//single_on = "unblock"
216//multi_on = "unblock"
217//single_off = "unblock"
218//multi_off = "unblock"
219//no_seq = "proceed"
220///no_map = "unblock"
221/// ```
222///
223/// All the parsed fields are stored with a _Condition struct, as they could also be from a regions table.
224#[derive(Debug, PartialEq)]
225struct Barcode {
226    /// The parsed barcode settings.
227    condition: _Condition,
228}
229
230// Define a trait to represent the common behaviour of Region and Barcode
231/// Trait for shared behaviour for Barcodes and Regions
232pub trait Condition {
233    // Add any common methods or behaviour for Region or Barcode
234    /// Return whether this Condition is a control
235    fn control(&self) -> bool;
236    /// Implement a method that returns something with the Any trait - which allows downcasting of Barcodes and Regions.
237    fn any(&self) -> &dyn Any;
238    /// Get the targets from the condition
239    fn get_targets(&self) -> &Targets;
240    /// get the underlying _Condition struct
241    fn get_condition(&self) -> &_Condition;
242}
243
244impl Condition for Region {
245    fn control(&self) -> bool {
246        self.condition.control
247    }
248
249    fn any(&self) -> &dyn Any {
250        self
251    }
252
253    fn get_targets(&self) -> &Targets {
254        &self.condition.targets
255    }
256
257    fn get_condition(&self) -> &_Condition {
258        &self.condition
259    }
260}
261
262impl Condition for Barcode {
263    fn control(&self) -> bool {
264        self.condition.control
265    }
266    fn any(&self) -> &dyn Any {
267        self
268    }
269    fn get_targets(&self) -> &Targets {
270        &self.condition.targets
271    }
272    fn get_condition(&self) -> &_Condition {
273        &self.condition
274    }
275}
276
277impl From<&Map<String, Value>> for _Condition {
278    fn from(source: &Map<String, Value>) -> Self {
279        let targets: TargetType = source.get("targets").unwrap().into();
280        let target: Targets = Targets::new(targets);
281        _Condition {
282            name: source.get("name").unwrap().as_str().unwrap().to_string(),
283            control: source
284                .get("control")
285                .unwrap_or(&toml::Value::Boolean(false))
286                .as_bool()
287                .unwrap(),
288            min_chunks: source
289                .get("min_chunks")
290                .unwrap_or(&toml::Value::Integer(0))
291                .as_integer()
292                .unwrap()
293                .try_into()
294                .unwrap(),
295
296            max_chunks: source
297                .get("max_chunks")
298                .unwrap_or(&toml::Value::Integer(4))
299                .as_integer()
300                .unwrap()
301                .try_into()
302                .unwrap(),
303            targets: target,
304            single_off: source.get("single_off").unwrap().as_str().unwrap().into(),
305            single_on: source.get("single_on").unwrap().as_str().unwrap().into(),
306            multi_on: source.get("multi_on").unwrap().as_str().unwrap().into(),
307            multi_off: source.get("multi_off").unwrap().as_str().unwrap().into(),
308            no_map: source.get("no_map").unwrap().as_str().unwrap().into(),
309            no_seq: source.get("no_seq").unwrap().as_str().unwrap().into(),
310        }
311    }
312}
313
314/// Strand that the target is on.
315#[derive(Debug, Hash, PartialEq)]
316enum Strand {
317    /// Represents he forward (sense) strand
318    Forward,
319    /// Represents he reverse (anti-sense) strand
320    Reverse,
321}
322
323/// A wrapper for the Strand, which implements Hash and Eq, allowing the Strand enum to be used for
324/// a HashMap key.
325///
326/// Implements to_string and AsRef str to get string representations, so we can take it along with multiple other types into functions
327/// that need the strand.
328#[derive(PartialEq, Debug)]
329struct StrandWrapper(Strand);
330
331impl Eq for StrandWrapper {}
332
333impl Hash for StrandWrapper {
334    fn hash<H: Hasher>(&self, state: &mut H) {
335        std::mem::discriminant(&self.0).hash(state);
336    }
337}
338
339impl From<&str> for Strand {
340    fn from(source: &str) -> Strand {
341        match source {
342            "+" => Strand::Forward,
343            "-" => Strand::Reverse,
344            "1" => Strand::Forward,
345            "-1" => Strand::Reverse,
346            _ => Strand::Forward,
347        }
348    }
349}
350
351impl ToString for Strand {
352    fn to_string(&self) -> String {
353        match self {
354            Strand::Forward => "+".to_string(),
355            Strand::Reverse => "-".to_string(),
356        }
357    }
358}
359
360impl AsRef<str> for Strand {
361    fn as_ref(&self) -> &str {
362        match self {
363            Strand::Forward => "+",
364            Strand::Reverse => "-",
365        }
366    }
367}
368/// TargetRype Enum, represents whther targets were listed directly in the TOML file
369/// or a path to a targets containing file was given.
370#[derive(Clone, Debug, PartialEq)]
371enum TargetType {
372    /// Variant representing targets that were given directly in the TOML file.
373    Direct(Vec<String>),
374    /// Variant representing targets that were given as a path to a file that contains targets.
375    ViaFile(PathBuf),
376}
377/// Represents a BED record, which is read from a BedFILE. All six columns are expected, however we do not use _name or _score.
378#[derive(Debug, Deserialize)]
379#[serde(deny_unknown_fields)]
380struct BedRecord {
381    /// The contig or chromosome name associated with the record.
382    contig: String,
383    /// The starting position of the record.
384    start: usize,
385    /// The stopping position of the record.
386    stop: usize,
387    /// The name associated with the record (optional).
388    _name: String,
389    /// The score associated with the record (optional).
390    _score: String,
391    /// The strand information of the record.
392    strand: String,
393}
394
395/// CSV record parsed from targets specified in TOML file,
396/// If A bed file is provided, the six records are taken and placed in a
397/// BedRecord. This BedRecord is then converted into a CsvRecord.
398#[derive(Debug, Deserialize)]
399#[serde(deny_unknown_fields)]
400pub struct CsvRecord {
401    /// Contig the target is on
402    pub contig: String,
403    /// Optional start coordinate of target
404    #[serde(default)]
405    pub start: Option<usize>,
406    /// Optional stop coordinate of target. Required if start is present
407    #[serde(default)]
408    pub stop: Option<usize>,
409    /// Optional strand target is on. .One of "+"/"-". Required if Start/Stop are provided.
410    #[serde(default)]
411    pub strand: Option<String>,
412}
413
414impl From<BedRecord> for CsvRecord {
415    fn from(source: BedRecord) -> CsvRecord {
416        CsvRecord {
417            contig: source.contig,
418            start: Some(source.start),
419            stop: Some(source.stop),
420            strand: Some(source.strand),
421        }
422    }
423}
424
425impl CsvRecord {
426    /// Checks if the structure has valid coordinates.
427    ///
428    /// Returns `true` if both `start` and `stop` fields have values,
429    /// indicating that the structure has valid coordinates. Otherwise, returns `false`.
430    ///
431    /// # Examples
432    ///
433    /// ```rust, ignore
434    /// # use readfish_tools::CsvRecord;
435    ///
436    /// let record = CsvRecord {
437    ///     contig: "chr1".to_string(),
438    ///     start: Some(100),
439    ///     stop: Some(200),
440    ///     strand: Some("+".to_string()),
441    /// };
442    ///
443    /// assert!(record.has_coords());  // Returns true
444    ///
445    /// let invalid_record = CsvRecord {
446    ///     contig: "chr2".to_string(),
447    ///     start: None,
448    ///     stop: None,
449    ///     strand: Some("-".to_string()),
450    /// };
451    ///
452    /// assert!(!invalid_record.has_coords());  // Returns false
453    /// ```
454    fn has_coords(&self) -> bool {
455        self.start.is_some() && self.stop.is_some()
456    }
457
458    /// Retrieves the coordinates from the structure.
459    ///
460    /// Returns a tuple containing the start and stop coordinates of the structure.
461    /// If the structure has valid coordinates (i.e., `has_coords()` is true),
462    /// the actual values of the `start` and `stop` fields are returned.
463    /// Otherwise, a default range of (0, usize::MAX) is returned.
464    ///
465    /// # Examples
466    ///
467    /// ```rust, ignore
468    /// # use readfish_tools::CsvRecord;
469    ///
470    /// let record = CsvRecord {
471    ///     contig: "chr1".to_string(),
472    ///     start: Some(100),
473    ///     stop: Some(200),
474    ///     strand: Some("+".to_string()),
475    /// };
476    ///
477    /// assert_eq!(record.get_coords(), (100, 200));
478    ///
479    /// let invalid_record = CsvRecord {
480    ///     contig: "chr2".to_string(),
481    ///     start: None,
482    ///     stop: Some(300),
483    ///     strand: Some("-".to_string()),
484    /// };
485    ///
486    /// assert_eq!(invalid_record.get_coords(), (0, usize::MAX));
487    /// ```
488    fn get_coords(&self) -> (usize, usize) {
489        if self.has_coords() {
490            (self.start.unwrap(), self.stop.unwrap())
491        } else {
492            (0, usize::MAX)
493        }
494    }
495
496    /// Retrieves the strand information from the struct.
497    ///
498    /// This function returns an `Option<Strand>` representing the strand information stored in the struct.
499    /// If the `strand` field is [`Some`], the function maps the string value to the corresponding [`Strand`] enum variant
500    /// using the `from` method. If the `strand` field is [`None`], the function returns `None`.
501    ///
502    /// # Returns
503    ///
504    /// An `Option<Strand>` representing the strand information, or `None` if no strand is available.
505    ///
506    /// # Examples
507    ///
508    /// ```rust,ignore
509    /// # use readfish_tools::CsvRecord;
510    ///
511    /// let record = CsvRecord {
512    ///     contig: "chr1".to_string(),
513    ///     start: Some(100),
514    ///     stop: Some(200),
515    ///     strand: Some("+".to_string()),
516    /// };
517    /// let strand = record.get_strand();
518    /// assert_eq(strand, Some(Strand::Forward))
519    /// ```
520    fn get_strand(&self) -> Option<Strand> {
521        self.strand
522            .as_ref()
523            .map(|strand_string| Strand::from(strand_string.as_str()))
524    }
525}
526
527impl From<&Value> for TargetType {
528    fn from(source: &Value) -> TargetType {
529        match source.is_array() {
530            true => TargetType::Direct(
531                source
532                    .as_array()
533                    .unwrap()
534                    .iter()
535                    .map(|x| x.as_str().unwrap().to_string())
536                    .collect(),
537            ),
538            false => TargetType::ViaFile(PathBuf::from(source.as_str().unwrap())),
539        }
540    }
541}
542
543/// Represents a configuration for a flowcell.
544#[derive(Debug)]
545pub struct Conf {
546    /// The total number of channels on the flowcell.
547    channels: usize,
548    /// The regions of the flowcell. contains the name of the region and the Action to take for each Alignment type.
549    regions: Vec<Region>,
550    /// The barcodes from the sequencing library.
551    barcodes: HashMap<String, Barcode>,
552    /// The mapping of channel number to the index of the region that channel belongs to.
553    _channel_map: HashMap<usize, usize>,
554}
555#[derive(Debug, PartialEq)]
556/// Holds the targets for a given region or barcode.
557pub struct Targets {
558    /// The target string as listed in the Toml. Can either be an array of strings, in which case that is assumed to be the targets themselves, or a string,
559    /// which is assumed to be a file path to a file containing the targets.
560    value: TargetType,
561    /// A hashmap containg the targets themselves, in the form of
562    /// Strand => Contig => Start and stop target coordinates.
563    _targets: HashMap<StrandWrapper, HashedTargets>,
564}
565
566impl Targets {
567    /// Creates a new instance of [`Targets`] with the provided target data.
568    ///
569    /// This function takes the target data in the form of [`TargetType`] and constructs a new [`Targets`] struct.
570    /// The [`TargetType`] can either be an array of strings, representing the targets themselves,
571    /// or a string representing a file path to a file containing the targets (.bed or .csv).
572    ///
573    /// If the targets is an array of strings, they must be im the format "contig, start, stop, strand" OR "contig".
574    /// For example:
575    ///
576    /// ```toml
577    /// targets = ["chr2,10.20.+", "chr1"]
578    /// ```
579    ///
580    /// If only the contig is provided, it is assumed that the whole contig is the target, on BOTH strands. See example below.
581    ///
582    /// The target data is stored in the `value` field, while the parsed targets are stored in the `_targets` field
583    /// as a hashmap with strand, contig, and start/stop target coordinates.
584    ///
585    /// # Arguments
586    ///
587    /// * `targets` - The target data in the form of [`TargetType`].
588    ///
589    /// # Examples
590    ///
591    /// ```rust, ignore
592    /// # use my_module::{Targets, TargetType};
593    ///
594    /// let target_data = TargetType::Direct(vec!["chr1".to_string(), "chr2,10.20.+".to_string()]);
595    /// let targets = Targets::new(target_data);
596    ///
597    /// assert_eq!(targets.value, TargetType::Array(vec!["chr1".to_string(), "chr2,10.20.+".to_string()]));
598    /// assert_eq!(targets._targets.len(), 2);
599    ///
600    /// println!("{:#?}", targets._targets)
601    /// // {
602    /// //    StrandWrapper(Forward): {"chr1": [(0, 18_446_744_073_709_551_615)], "chr2": [(10, 20)]}
603    /// //    StrandWrapper(Reverse): {"chr1": [(0, 18_446_744_073_709_551_615)]}
604    /// // }
605    /// // NOTE the single contig target chr1 is on both strands in its entirety.
606    ///
607    /// ```
608    fn new(targets: TargetType) -> Targets {
609        let t = targets.clone();
610        Targets {
611            value: targets,
612            _targets: Targets::from_parsed_toml(t),
613        }
614    }
615
616    /// Inserts target coordinates into the `targets` hashmap based on the provided record and strand.
617    ///
618    /// This function takes a mutable reference to the `targets` hashmap, a reference to a [`CsvRecord`],
619    /// and a variant from the `strand` Enum. It inserts the record coordinates into a Vec at the lowest level of
620    /// the `targets` hashmap based on the strand and contig.
621    ///
622    /// If the strand does not exist in the `targets` hashmap, a new entry is created for the strand,
623    /// and an empty hashmap is inserted for the contig. If the contig does not exist for the strand,
624    /// a new entry is created for the contig, and an empty vector is inserted to store the coordinates.
625    ///
626    /// The record coordinates are retrieved using the `get_coords()` method from the [`CsvRecord`] struct.
627    ///
628    /// # Arguments
629    ///
630    /// * `targets` - A mutable reference to the `HashMap<StrandWrapper, HashedTargets>]` where the record will be inserted.
631    /// * `record` - A reference to the `CsvRecord` containing the record information.
632    /// * `strand` - The strand information associated with the record.
633    ///
634    /// # Examples
635    ///
636    /// ```rust,ignore
637    /// use readfish_tools::{insert_into_targets, CsvRecord, Strand, StrandWrapper, HashedTargets};
638    /// use std::collections::HashMap;
639    ///
640    /// let mut targets: HashMap<StrandWrapper, HashedTargets> = HashMap::new();
641    ///
642    /// let record = CsvRecord {
643    ///     contig: "chr1".to_string(),
644    ///     start: Some(100),
645    ///     stop: Some(200),
646    ///     strand: Some("+".to_string()),
647    /// };
648    ///
649    /// insert_into_targets(&mut targets, &record, "+");
650    ///
651    /// assert_eq!(targets.len(), 1);
652    /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().len(), 1);
653    /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap().len(), 1);
654    /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap()[0], (100, 200));
655    /// ```
656    fn insert_into_targets(
657        targets: &mut HashMap<StrandWrapper, HashedTargets>,
658        record: &CsvRecord,
659        strand: Strand,
660    ) {
661        let coords = targets
662            .entry(StrandWrapper(strand))
663            .or_insert(HashMap::new())
664            .entry(record.contig.clone())
665            .or_insert(Vec::with_capacity(1000));
666        coords.push(record.get_coords())
667    }
668
669    /// Creates a hashmap of targets from the parsed TOML data.
670    ///
671    /// This function takes the `targets` data in the form of [`TargetType`]] and constructs a hashmap of targets
672    /// grouped by strand and contig, with start and stop coordinates as values. The `targets` can be provided
673    /// either as a direct array of target strings or as a path to a CSV or BED file containing the targets.
674    ///
675    /// If `targets` is of type [`TargetType::Direct`], the function treats the data as direct target strings,
676    /// parses them as CSV data, and populates the hashmap with the targets grouped by strand and contig.
677    /// If `targets` is of type [`TargetType::ViaFile`], the function treats the data as a file path,
678    /// determines the file type (CSV or BED), and parses the data accordingly to populate the hashmap.
679    ///
680    /// The function uses the [`CsvRecord`] struct for deserialization of CSV records and the [`BedRecord`] struct
681    /// for deserialization of BED records. The appropriate deserialization is performed based on the file type.
682    ///
683    /// After populating the hashmap, the function merges overlapping intervals within each contig
684    /// using the [`Self::_merge_intervals()`] helper function.
685    ///
686    /// # Arguments
687    ///
688    /// * `targets` - The target data in the form of [`TargetType`].
689    ///
690    /// # Returns
691    ///
692    /// A hashmap of targets grouped by strand and contig, with start and stop coordinates as values.
693    ///
694    /// # Examples
695    ///
696    /// ```rust,ignore
697    /// use readfish_tools::{Targets::from_parsed_toml, TargetType, CsvRecord};
698    /// use std::collections::HashMap;
699    /// use std::path::PathBuf;
700    ///
701    /// let target_data = TargetType::Direct(vec![
702    ///     "chr2,".to_string(),
703    ///     "chr1,10,20,+".to_string(),
704    /// ]);
705    ///
706    /// let targets = from_parsed_toml(target_data);
707    ///
708    /// assert_eq!(targets.len(), 2);
709    /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr2").unwrap()[0], (0_usize, usize::MAX));
710    /// assert_eq!(targets.get(&StrandWrapper(Strand::Forward)).unwrap().get("chr1").unwrap()[0], (10_usize,20_usize));
711    /// ```
712    fn from_parsed_toml(
713        targets: TargetType,
714    ) -> HashMap<StrandWrapper, HashMap<String, Vec<(usize, usize)>>> {
715        let mut results = HashMap::new();
716        let mut bed_file = false;
717        let mut delim = b',';
718        match targets {
719            TargetType::Direct(target_vec) => {
720                if target_vec.is_empty() {
721                    return results;
722                }
723                let csv_data = target_vec.join("\n");
724                let file = Cursor::new(csv_data);
725                let mut reader = ReaderBuilder::new()
726                    .flexible(true)
727                    .has_headers(false)
728                    .delimiter(delim)
729                    .from_reader(file);
730                for record in reader.records() {
731                    let record = record.unwrap();
732                    let record: CsvRecord = record.deserialize(None).unwrap();
733                    if record.has_coords() {
734                        Targets::insert_into_targets(
735                            &mut results,
736                            &record,
737                            record.get_strand().unwrap(),
738                        );
739                    } else {
740                        Targets::insert_into_targets(&mut results, &record, Strand::Forward);
741                        Targets::insert_into_targets(&mut results, &record, Strand::Reverse);
742                    }
743                }
744            }
745            TargetType::ViaFile(file_path) => {
746                // TODO won't handle gzipped bed files
747                if file_path.extension().unwrap() == "bed" {
748                    bed_file = true;
749                    delim = b'\t';
750                }
751                let mut rdr = ReaderBuilder::new()
752                    .delimiter(delim)
753                    .flexible(true)
754                    .has_headers(false)
755                    .from_path(file_path)
756                    .expect("Could not open targets file!");
757                for record in rdr.records() {
758                    let record = record.unwrap();
759                    let record: CsvRecord = match bed_file {
760                        true => {
761                            let x: BedRecord = record.deserialize(None).unwrap();
762                            x.into()
763                        }
764                        false => {
765                            let x: CsvRecord = record.deserialize(None).unwrap();
766                            x
767                        }
768                    };
769                    // Has coordinates and strand provided
770                    if record.has_coords() {
771                        Targets::insert_into_targets(
772                            &mut results,
773                            &record,
774                            record.get_strand().unwrap(),
775                        );
776                    } else {
777                        Targets::insert_into_targets(&mut results, &record, Strand::Forward);
778                        Targets::insert_into_targets(&mut results, &record, Strand::Reverse);
779                    }
780                }
781            }
782        }
783        results.iter_mut().for_each(|(_strand, contig_hashmap)| {
784            contig_hashmap
785                .iter_mut()
786                .for_each(|(_, v)| *v = Targets::_merge_intervals(v))
787        });
788        results
789    }
790
791    /// Merges overlapping intervals within a vector of intervals.
792    ///
793    /// This function takes a mutable reference to a vector of intervals represented as tuples `(usize, usize)`
794    /// and merges any overlapping intervals into collapsed ranges. The intervals are expected to be sorted
795    /// based on the starting index before calling this function.
796    ///
797    /// If the number of intervals is less than 2, the function returns a clone of the input vector as there
798    /// are no overlapping intervals to merge.
799    ///
800    /// The function iterates over the sorted intervals and maintains a current range. For each interval,
801    /// if it overlaps with the current range, the end index of the current range is updated to the maximum
802    /// of the current end index and the interval's end index. If the interval is non-overlapping, the
803    /// current range is added to the collapsed ranges and updated to the new interval. If it's the first
804    /// range encountered, the current range is initialized. Finally, the last current range (if any) is added
805    /// to the collapsed ranges.
806    ///
807    /// The resulting collapsed ranges are returned as a new vector.
808    ///
809    /// # Arguments
810    ///
811    /// * `intervals` - A mutable reference to a vector of intervals to be merged.
812    ///
813    /// # Returns
814    ///
815    /// A vector of collapsed ranges after merging overlapping intervals.
816    ///
817    /// # Examples
818    ///
819    /// ```rust,ignore
820    ///
821    /// let mut intervals = vec![(1, 5), (4, 9), (10, 15), (13, 18)];
822    /// let collapsed_ranges = Targets::_merge_intervals(&mut intervals);
823    ///
824    /// assert_eq!(collapsed_ranges, vec![(1, 9), (10, 18)]);
825    /// ```
826    fn _merge_intervals(intervals: &mut Vec<(usize, usize)>) -> Vec<(usize, usize)> {
827        // ToDo consider explicitly forbidding start > end or end < start
828        let n_args = intervals.len();
829        if n_args < 2 {
830            return intervals.clone();
831        }
832        intervals.sort(); // Sort the ranges based on the starting index
833        let mut collapsed_ranges: Vec<(usize, usize)> = Vec::new();
834        let mut current_range: Option<(usize, usize)> = None;
835        for &(start, end) in intervals.iter() {
836            if let Some((current_start, current_end)) = current_range {
837                if start <= current_end {
838                    // Overlapping range, update the current range's end index
839                    current_range = Some((current_start, current_end.max(end)));
840                } else {
841                    // Non-overlapping range, add the current range and update the current range
842                    collapsed_ranges.push((current_start, current_end));
843                    current_range = Some((start, end));
844                }
845            } else {
846                // First range encountered, initialize the current range
847                current_range = Some((start, end));
848            }
849        }
850        // Add the last current range (if any)
851        if let Some((current_start, current_end)) = current_range {
852            collapsed_ranges.push((current_start, current_end));
853        }
854        collapsed_ranges
855    }
856
857    /// Checks if the given coordinate falls within any of the target intervals for the specified contig and strand.
858    ///
859    /// This function takes a reference to a [`CsvRecord`] struct and performs a lookup in the [`Targets`] struct's
860    /// `_targets` hashmap to retrieve the intervals for the specified contig and strand. It then checks if the
861    /// given coordinate falls within any of the target intervals by iterating over the intervals and performing
862    /// the comparison.
863    ///
864    /// The function expects the `strand` argument to implement the [`ToString`] trait, which allows the function
865    /// to convert it to a [`String`]. The `strand` is then converted to the [`Strand`] enum type using the `into()`
866    /// method.
867    ///
868    /// # Generic Parameters
869    ///
870    /// * `T` - The type of the `strand` argument that implements the [`ToString`] trait.
871    ///
872    /// # Arguments
873    ///
874    /// * `contig` - The contig string to lookup the intervals for.
875    /// * `strand` - The strand value to lookup the intervals for. It is expected to be convertible to a [`String`].
876    /// * `coord` - The coordinate value to check against the intervals.
877    ///
878    /// # Returns
879    ///
880    /// A boolean value indicating whether the coordinate falls within any of the target intervals for the
881    /// specified contig and strand.
882    ///
883    /// # Examples
884    ///
885    /// ```rust, ignore
886    ///     ///
887    /// let targets = Targets::new(TargetType::Direct(vec![
888    ///     "Contig1,100,200,+".to_string(),
889    ///     "Contig2,300,400,-".to_string(),
890    /// ]));
891    ///
892    /// let record = CsvRecord {
893    ///     contig: "Contig1".to_string(),
894    ///     start: Some(150),
895    ///     stop: Some(180),
896    ///     strand: Some("+".to_string()),
897    /// };
898    ///
899    /// let is_within_interval = record.get_coords("Contig1", "+", 160);
900    ///
901    /// assert!(is_within_interval);
902    /// ```
903    fn check_coords<T: ToString>(&self, contig: &str, strand: T, coord: usize) -> bool {
904        let strand: Strand = strand.to_string().as_str().into();
905        let intervals = self
906            ._targets
907            .get(&StrandWrapper(strand))
908            .and_then(|inner_map| inner_map.get(contig));
909        if let Some(intervals) = intervals {
910            intervals
911                .iter()
912                .any(|&(start, end)| start <= coord && coord <= end)
913        } else {
914            false
915        }
916    }
917}
918
919impl Conf {
920    /// Creates a [`Conf`] instance from a TOML string.
921    ///
922    /// This function parses the TOML string and constructs a `Conf` struct
923    /// using the parsed data. It returns the constructed `Conf` instance.
924    ///
925    /// # Arguments
926    ///
927    /// * `toml_string` - The TOML string to parse and create the `Conf` from.
928    ///
929    /// # Examples
930    ///
931    /// ```rust,ignore
932    /// let toml_str = r#"
933    ///     channels = 10
934    ///
935    ///     [regions]
936    ///     [[regions.condition]]
937    ///     name = "Region 1"
938    ///     min_chunks = 2
939    ///     max_chunks = 5
940    ///     # ...
941    ///
942    ///     [barcodes]
943    ///     [[barcodes.condition]]
944    ///     name = "Barcode 1"
945    ///     min_chunks = 1
946    ///     max_chunks = 3
947    ///     # ...
948    /// "#;
949    ///
950    /// let conf = Conf::from_string(toml_str);
951    /// ```
952    ///
953    /// # Panics
954    ///
955    /// This function panics if the TOML string fails to parse or if there
956    /// are any invalid values in the TOML data.
957    ///
958    /// # Returns
959    ///
960    /// The constructed `Conf` instance.
961    ///
962    fn from_string(toml_string: &str) -> Conf {
963        Conf::new(toml_string)
964    }
965
966    /// Constructs a new [`Conf`] instance by parsing a TOML file.
967    ///
968    /// This function takes a TOML file path (`toml_path`) and reads its contents
969    /// using [`std::fs::read_to_string`]. The contents of the TOML file are then
970    /// passed to the `Conf::new` function to create a new `Conf` instance.
971    ///
972    /// # Arguments
973    ///
974    /// * `toml_path` - The path to the TOML file to be parsed.
975    ///
976    /// # Panics
977    ///
978    /// This function panics if the TOML file cannot be read or if parsing the TOML
979    /// content into a `Conf` instance fails.
980    ///
981    /// # Examples
982    ///
983    /// ```rust,ignore
984    /// use std::path::Path;
985    ///
986    /// let toml_path = Path::new("config.toml");
987    /// let conf = Conf::from_file(toml_path);
988    /// ```
989    pub fn from_file(toml_path: impl AsRef<Path>) -> Conf {
990        let toml_content = std::fs::read_to_string(toml_path).unwrap();
991        Conf::new(&toml_content)
992    }
993
994    /// Constructs a new [`Conf`] instance by parsing a String representation of TOML file.
995    ///
996    /// This function takes a String representation of a toml file (`toml_content`).
997    /// The TOML content is then parsed into a `Table` using the `parse::<Table>` method. The [`Table`] represents
998    /// the parsed TOML structure.
999    ///
1000    /// The function initializes empty vectors `regions` and `barcodes` to hold the parsed regions and barcodes,
1001    /// respectively. It then checks if the parsed TOML structure contains the "regions" and "barcodes" sections.
1002    /// If the sections are present, the function iterates over the corresponding values and converts them into
1003    /// [`Region`] and [`Barcode`] structs, which are added to the `regions` and `barcodes` vectors, respectively.
1004    ///
1005    /// Finally, the function constructs and returns a new [`Conf`] instance with the populated `regions` and `barcodes`
1006    /// vectors. The `channels` field is set to 0, and the `_channel_map` field is initialized as an empty [`HashMap].
1007    ///
1008    /// # Arguments
1009    ///
1010    /// * `toml_path` - The path to the TOML file.
1011    ///
1012    /// # Returns
1013    ///
1014    /// A new [`Conf`] instance with the parsed regions and barcodes.
1015    ///
1016    /// # Panics
1017    ///
1018    /// This function panics if there is an error reading the TOML file or parsing its contents.
1019    ///
1020    /// # Examples
1021    ///
1022    /// ```rust,ignore
1023    /// use my_module::Conf;
1024    ///
1025    /// let conf = Conf::new("config.toml");
1026    ///
1027    /// // Perform operations on the `conf` instance
1028    /// ```
1029    fn new(toml_content: &str) -> Conf {
1030        let value = toml_content.parse::<Table>().unwrap();
1031        let mut regions = Vec::new();
1032        if let Some(parsed_regions) = value.get("regions") {
1033            let parsed_regions = parsed_regions.as_array().unwrap();
1034            for region in parsed_regions {
1035                let x = region.as_table().unwrap();
1036                let z: Region = Region {
1037                    condition: x.try_into().unwrap(),
1038                };
1039                regions.push(z);
1040            }
1041        }
1042
1043        let mut barcodes = HashMap::new();
1044        if let Some(parsed_barcodes) = value.get("barcodes") {
1045            let parsed_barcodes = parsed_barcodes.as_table().unwrap().iter();
1046            for (barcode_name, barcode_value) in parsed_barcodes {
1047                let barcode_table = barcode_value.as_table().unwrap();
1048                let barcode_struct: Barcode = Barcode {
1049                    condition: barcode_table.try_into().unwrap(),
1050                };
1051                barcodes.insert(barcode_name.clone(), barcode_struct);
1052            }
1053        }
1054        let mut conf = Conf {
1055            channels: 0,
1056            regions,
1057            barcodes,
1058            _channel_map: HashMap::new(),
1059        };
1060        conf.validate_post_init().unwrap();
1061        conf.generate_channel_map(512).unwrap();
1062        conf
1063    }
1064
1065    /// Validates the state of the [`Conf`] struct after initialization.
1066    ///
1067    /// This function checks if the [`Conf`] struct contains `regions`, and if not that the Barcodes has
1068    /// the required 'unclassified' or 'clasiffied' `barcodes` conditions.
1069    /// and returns a [`Result`] indicating whether the validation passed or failed.
1070    ///
1071    /// # Errors
1072    ///
1073    /// Returns an [`Err`] variant with a descriptive error message if the validation fails.
1074    ///
1075    /// # Examples
1076    ///
1077    /// ```rust, ignore
1078    ///     let conf = Conf::new("config.toml");
1079    ///     conf.validate_post_init().unwrap();
1080    /// ```
1081    /// # Returns
1082    ///
1083    /// - [`Ok(())`] if the validation passes and the `Conf` struct is in a valid state.
1084    /// - [`Err`] with a descriptive error message if the validation fails.
1085    fn validate_post_init(&self) -> Result<(), String> {
1086        let required_barcodes = ["unclassified", "classified"];
1087        if self.regions.is_empty()
1088            && !required_barcodes
1089                .iter()
1090                .all(|&required_barcode| self.barcodes.contains_key(required_barcode))
1091        {
1092            Err("This TOML configuration does not contain any `regions` or `barcodes` and cannot be used by readfish".to_string())
1093        } else {
1094            Ok(())
1095        }
1096    }
1097
1098    /// Generates a channel map based on the given number of channels and regions.
1099    ///
1100    /// This method splits the channels evenly among the regions and assigns each channel
1101    /// a corresponding region index, linking to the position of the region in `Conf.regions`.
1102    ///
1103    /// # Arguments
1104    ///
1105    /// * `channels` - The total number of channels.
1106    /// * `regions` - A slice of regions to distribute the channels among.
1107    ///
1108    /// # Returns
1109    ///
1110    /// A `HashMap<usize, usize>` representing the channel map, where the keys are the
1111    /// channel numbers and the values are the positions of the channels within the regions.
1112    ///
1113    /// # Examples
1114    ///
1115    /// ```
1116    /// # use std::collections::HashMap;
1117    /// # struct Region {}
1118    /// # fn generate_flowcell(flowcell_size: usize, split: usize, axis: usize, odd_even: bool) -> Vec<Vec<usize>> { vec![vec![1, 2, 3], vec![4, 5, 6]] }
1119    /// #
1120    /// # fn generate_channel_map(channels: usize, regions: &[Region]) -> HashMap<usize, usize> {
1121    /// #     let split_channels = generate_flowcell(channels, regions.len().max(1), 0, false);
1122    /// #     let mut channel_map = HashMap::new();
1123    /// #
1124    /// #     for (pos, (channels, region)) in split_channels.iter().zip(regions.iter()).enumerate() {
1125    /// #         for &channel in channels.iter() {
1126    /// #             channel_map.insert(channel, pos);
1127    /// #         }
1128    /// #     }
1129    /// #
1130    /// #     channel_map
1131    /// # }
1132    /// let regions = vec![
1133    ///     Region {},
1134    ///     Region {},
1135    /// ];
1136    ///
1137    ///
1138    /// let channel_map = generate_channel_map(6, &regions);
1139    /// // If we split our imaginary 6 channel flowcell into 2 regions.
1140    /// // NB This would panic in reality - as generate flowcell would not recognise 6 as a valid flow cell size.
1141    /// assert_eq!(channel_map.get(&1), Some(&0));
1142    /// assert_eq!(channel_map.get(&2), Some(&0));
1143    /// assert_eq!(channel_map.get(&3), Some(&0));
1144    /// assert_eq!(channel_map.get(&4), Some(&1));
1145    /// assert_eq!(channel_map.get(&5), Some(&1));
1146    /// assert_eq!(channel_map.get(&6), Some(&1));
1147    /// assert_eq!(channel_map.get(&7), None);
1148    /// ```
1149    ///
1150    /// # Panics
1151    ///
1152    /// This method will panic if the `channels` parameter is zero.
1153    fn generate_channel_map(&mut self, channels: usize) -> Result<(), String> {
1154        let split_channels =
1155            nanopore::generate_flowcell(channels, self.regions.len().max(1), 1, false);
1156        let mut channel_map = HashMap::new();
1157
1158        for (pos, (channels, _region)) in split_channels.iter().zip(self.regions.iter()).enumerate()
1159        {
1160            for &channel in channels.iter() {
1161                channel_map.insert(channel, pos);
1162            }
1163        }
1164        self._channel_map = channel_map;
1165        Ok(())
1166    }
1167
1168    /// Get the condition for a given channel or barcode from the Conf TOML
1169    ///
1170    /// The barcode should be passed as an optional `&str` parameter. If barcoding
1171    /// is not being done and the barcode is not provided, the `channel` will be used instead.
1172    ///
1173    /// # Arguments
1174    ///
1175    /// * `channel` - The channel number for the result
1176    /// * `barcode` - Optional barcode classification from basecalling
1177    ///
1178    /// # Returns
1179    ///
1180    /// * `Ok` - A tuple `(bool, &dyn Condition)` representing the control flag and the condition
1181    /// * `Err` - A `String` containing an error message if the channel/barcode combination does not find a `Region` or a `Barcode`
1182    ///
1183    /// # Errors
1184    ///
1185    /// This function will return an error if both the region (channel) and barcode were not found in the configuration.
1186    ///
1187    pub fn get_conditions<T: AsRef<str> + std::fmt::Debug>(
1188        &self,
1189        channel: usize,
1190        barcode: Option<T>,
1191    ) -> Result<(bool, &dyn Condition), String> {
1192        let region_ = self.get_region(channel);
1193        let barcode_ = self.get_barcode(barcode.as_ref());
1194
1195        if let (Some(region), Some(barcode)) = (region_, barcode_) {
1196            let control = region.control() || barcode.control();
1197            Ok((control, barcode))
1198        } else if let Some(region) = region_ {
1199            let control = region.control();
1200            Ok((control, region))
1201        } else if let Some(barcode) = barcode_ {
1202            let control = barcode.control();
1203            Ok((control, barcode))
1204        } else {
1205            Err(format!(
1206                "Both region (channel={}) and barcode ({:?}) were not found. This config is invalid!",
1207                channel, barcode
1208            ))
1209        }
1210    }
1211
1212    /// Get the region for a given channel.
1213    ///
1214    /// Parameters:
1215    /// - `channel`: The channel number.
1216    ///
1217    /// Returns:
1218    /// - Returns an [`Option`] containing a reference to the [`Region`] if a region exists for the given channel,
1219    ///   otherwise returns [`None`].
1220    fn get_region(&self, channel: usize) -> Option<&Region> {
1221        if let Some(channel_index) = self._channel_map.get(&channel) {
1222            self.regions.get(*channel_index)
1223        } else {
1224            None
1225        }
1226    }
1227
1228    /// Get the barcode condition for a given barcode name.
1229    ///
1230    /// Parameters:
1231    /// - `barcode`: The name of the barcode, example "barcode01".
1232    ///
1233    /// Returns:
1234    /// - Returns an [`Option`] containing a reference to the [`Barcode`] if a barcode exists for the given name,
1235    ///   otherwise returns [`None`]. If the `barcode` parameter is [`None`], function returns [`None`].
1236    fn get_barcode<T: AsRef<str>>(&self, barcode: Option<T>) -> Option<&Barcode> {
1237        if let Some(barcode_name) = barcode {
1238            if !self.barcodes.is_empty() {
1239                self.barcodes
1240                    .get(barcode_name.as_ref())
1241                    .or_else(|| self.barcodes.get("classified"))
1242            } else {
1243                None
1244            }
1245        } else {
1246            None
1247        }
1248    }
1249    /// Get the targets associated with a specific channel and barcode (if provided) from the configuration.
1250    ///
1251    /// This function looks up the given `channel` and `barcode` (optional) in the configuration and returns the corresponding targets.
1252    /// If the combination of `channel` and `barcode` is not found in the configuration, or if the condition associated with the
1253    /// combination does not have targets, this function will return a reference to the default targets.
1254    ///
1255    /// # Arguments
1256    ///
1257    /// * `channel`: The channel number for the result.
1258    /// * `barcode`: The optional barcode classification from basecalling. If `Some`, it will be override the `channel` to find the targets.
1259    ///
1260    /// # Returns
1261    ///
1262    /// A reference to the `Targets` associated with the given `channel` and `barcode` combination.
1263    /// If the combination is not found, the function returns a reference to the default targets.
1264    pub fn get_targets(&self, channel: usize, barcode: Option<&str>) -> &Targets {
1265        let (_control, condition) = self.get_conditions(channel, barcode).unwrap();
1266        condition.get_targets()
1267    }
1268
1269    /// Make a decision based on the provided inputs for the specified channel and barcode (if provided).
1270    /// Todo: Write unit tests/integration tests for this function.
1271    /// This function takes several parameters, including `channel`, `barcode`, `contig`, `strand`, and `coord`,
1272    /// and determines whether the given coordinates are considered "on target" or not based on the configuration.
1273    ///
1274    /// # Arguments
1275    ///
1276    /// * `channel`: The channel number associated with the decision-making process.
1277    /// * `barcode`: The optional barcode classification from basecalling. If `Some`, it will be used along with the `channel` to find the relevant targets.
1278    /// * `contig`: The name of the contig where the coordinates are located.
1279    /// * `strand`: The strand information. This can be any type that implements the `ToString` trait, such as a `String` or `&str`.
1280    /// * `coord`: The coordinate position to check against the targets.
1281    ///
1282    /// # Returns
1283    ///
1284    /// A boolean value indicating whether the given `contig`, `strand`, and `coord` are considered "on target" or not based on the configuration.
1285    /// If the combination of `channel` and `barcode` is not found in the configuration, the function will use the default targets.
1286    ///
1287    /// # Example
1288    ///
1289    /// ```rust,ignore
1290    /// # use your_crate::YourConfStruct;
1291    /// # let conf = YourConfStruct::new(); // Assume you have your configuration instance.
1292    /// let channel = 1;
1293    /// let barcode = Some("barcode01");
1294    /// let contig = "chr1";
1295    /// let strand = "+";
1296    /// let coord = 1000;
1297    ///
1298    /// let decision = conf.make_decision(channel, barcode, contig, strand, coord);
1299    /// println!("Decision: {}", decision);
1300    /// ```
1301    pub fn make_decision<T: ToString>(
1302        &self,
1303        channel: usize,
1304        barcode: Option<&str>,
1305        contig: &str,
1306        strand: T,
1307        coord: usize,
1308    ) -> bool {
1309        let targets = self.get_targets(channel, barcode);
1310        targets.check_coords(contig, strand, coord)
1311    }
1312}
1313
1314#[cfg(test)]
1315mod tests {
1316    // BEdfile, with not 6 rows, bedfile with wrong types, csv with wrong types, csv with more than 4 rws
1317    use toml::{Table, Value};
1318
1319    use super::*;
1320    use std::fs;
1321    use std::path::PathBuf;
1322
1323    fn get_resource_dir() -> PathBuf {
1324        let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1325        path.push("resources/");
1326        path
1327    }
1328
1329    fn get_test_file(file: &str) -> PathBuf {
1330        let mut path = get_resource_dir();
1331        path.push(file);
1332        path
1333    }
1334
1335    fn test_toml_string() -> &'static str {
1336        r#"
1337        [[regions]]
1338        name = "Rapid_CNS"
1339        min_chunks = 1
1340        max_chunks = 4
1341        targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
1342        single_off = "unblock"
1343        multi_off = "unblock"
1344        single_on = "stop_receiving"
1345        multi_on = "stop_receiving"
1346        no_seq = "proceed"
1347        no_map = "proceed"
1348
1349
1350        [[regions]]
1351        name = "Direct_CNS"
1352        min_chunks = 1
1353        max_chunks = 4
1354        targets = ["chr2,3001,4000,-", "chr2,3000,4000,-", "chr20,3000,4000,-"]
1355        single_off = "unblock"
1356        multi_off = "unblock"
1357        single_on = "stop_receiving"
1358        multi_on = "stop_receiving"
1359        no_seq = "proceed"
1360        no_map = "proceed""#
1361    }
1362
1363    fn test_barcoded_toml_string() -> &'static str {
1364        r#"
1365        [barcodes.unclassified]
1366        name = "unclassified_reads"
1367        control = false
1368        min_chunks = 0
1369        max_chunks = 4
1370        targets = []
1371        single_on = "unblock"
1372        multi_on = "unblock"
1373        single_off = "unblock"
1374        multi_off = "unblock"
1375        no_seq = "proceed"
1376        no_map = "proceed"
1377
1378        [barcodes.classified]
1379        name = "classified_reads"
1380        control = false
1381        min_chunks = 0
1382        max_chunks = 4
1383        targets = []
1384        single_on = "unblock"
1385        multi_on = "unblock"
1386        single_off = "unblock"
1387        multi_off = "unblock"
1388        no_seq = "proceed"
1389        no_map = "proceed"
1390
1391        [barcodes.barcode01]
1392        name = "barcode01"
1393        control = false
1394        min_chunks = 0
1395        max_chunks = 4
1396        targets = []
1397        single_on = "unblock"
1398        multi_on = "unblock"
1399        single_off = "unblock"
1400        multi_off = "unblock"
1401        no_seq = "proceed"
1402        no_map = "unblock"
1403
1404        [barcodes.barcode02]
1405        name = "barcode02"
1406        control = false
1407        min_chunks = 0
1408        max_chunks = 4
1409        targets = []
1410        single_on = "unblock"
1411        multi_on = "unblock"
1412        single_off = "unblock"
1413        multi_off = "unblock"
1414        no_seq = "proceed"
1415        no_map = "unblock"
1416
1417        [barcodes.barcode03]
1418        name = "barcode03"
1419        control = false
1420        min_chunks = 0
1421        max_chunks = 4
1422        targets = [
1423          "NC_002516.2",
1424          "NC_003997.3"
1425        ]
1426        single_on = "stop_receiving"
1427        multi_on = "stop_receiving"
1428        single_off = "unblock"
1429        multi_off = "unblock"
1430        no_seq = "proceed"
1431        no_map = "proceed""#
1432    }
1433
1434    #[test]
1435    fn test_from_string() {
1436        let toml_str = r#"
1437            [[regions]]
1438            name = "Rapid_CNS"
1439            min_chunks = 1
1440            max_chunks = 4
1441            targets = "resources/panel_adaptive_nogenenames_20122021_hg38.bed"
1442            single_off = "unblock"
1443            multi_off = "unblock"
1444            single_on = "stop_receiving"
1445            multi_on = "stop_receiving"
1446            no_seq = "proceed"
1447            no_map = "proceed"
1448        "#;
1449
1450        let conf = Conf::from_string(toml_str);
1451
1452        // Assert that the Conf instance is constructed correctly
1453        assert_eq!(conf.regions.len(), 1);
1454
1455        let region = &conf.regions[0];
1456        assert_eq!(region.condition.name, "Rapid_CNS");
1457        assert_eq!(region.condition.min_chunks, 1);
1458        assert_eq!(region.condition.max_chunks, 4);
1459        assert_eq!(
1460            region.condition.targets.value,
1461            TargetType::ViaFile("resources/panel_adaptive_nogenenames_20122021_hg38.bed".into())
1462        );
1463        assert_eq!(region.condition.single_off, "unblock".into());
1464        assert_eq!(region.condition.multi_off, "unblock".into());
1465        assert_eq!(region.condition.single_on, "stop_receiving".into());
1466        assert_eq!(region.condition.multi_on, "stop_receiving".into());
1467        assert_eq!(region.condition.no_seq, "proceed".into());
1468        assert_eq!(region.condition.no_map, "proceed".into());
1469    }
1470
1471    // todo need a barcode and region containing toml
1472    #[test]
1473    fn test_get_conditions() {
1474        let test_toml = test_toml_string();
1475        let conf = Conf::from_string(test_toml);
1476        let (_control, x) = conf.get_conditions::<String>(10, None).unwrap();
1477        // Convert the `Box<dyn Condition>` back into a `Region` if it is one
1478        if let Some(region) = x.any().downcast_ref::<Region>() {
1479            // Use the `Region` here
1480            println!("It's a Region: {:?}", region);
1481        } else if let Some(barcode) = x.any().downcast_ref::<Barcode>() {
1482            // Convert the `Box<dyn Condition>` back into a `Barcode` if it is one
1483            // Use the `Barcode` here
1484            println!("It's a Barcode: {:?}", barcode);
1485        } else {
1486            println!("It's neither a Region nor a Barcode");
1487        }
1488    }
1489
1490    #[test]
1491    fn test_get_region() {
1492        let test_toml = test_toml_string();
1493        let conf = Conf::from_string(test_toml);
1494        let region = conf.get_region(1).unwrap();
1495        assert_eq!(region.condition.name, "Direct_CNS");
1496        let region = conf.get_region(128).unwrap();
1497        assert_eq!(region.condition.name, "Rapid_CNS")
1498    }
1499
1500    #[test]
1501    fn test_get_regions_no_regions() {
1502        let test_toml = test_barcoded_toml_string();
1503        let conf = Conf::from_string(test_toml);
1504        let region = conf.get_region(1);
1505        assert_eq!(region, None);
1506        let region = conf.get_region(128);
1507        assert_eq!(region, None)
1508    }
1509
1510    #[test]
1511    fn test_generate_channel_map() {
1512        let test_toml = test_toml_string();
1513        let mut conf = Conf::from_string(test_toml);
1514        conf.generate_channel_map(512).unwrap();
1515        assert_eq!(conf._channel_map.get(&121).unwrap(), &0_usize);
1516        assert_eq!(conf._channel_map.get(&12).unwrap(), &1_usize);
1517    }
1518
1519    #[test]
1520    fn test_generate_channel_map_barcode() {
1521        let test_toml = test_barcoded_toml_string();
1522        let mut conf = Conf::from_string(test_toml);
1523        conf.generate_channel_map(512).unwrap();
1524        assert_eq!(conf._channel_map.get(&121), None);
1525        assert_eq!(conf._channel_map.get(&12), None);
1526    }
1527
1528    #[test]
1529    fn test_conf_validate_post_init() {
1530        let test_toml = test_barcoded_toml_string();
1531        let conf = Conf::from_string(test_toml);
1532        conf.validate_post_init().unwrap();
1533    }
1534
1535    // Now try without the unclassified barcode condition
1536    #[test]
1537    #[should_panic]
1538    fn test_conf_validate_post_init_panic() {
1539        let test_toml = test_barcoded_toml_string();
1540        let mut conf = Conf::from_string(test_toml);
1541        conf.barcodes.remove("unclassified");
1542        conf.validate_post_init().unwrap();
1543    }
1544
1545    #[test]
1546    fn test_get_csv_record_strand() {
1547        let record = CsvRecord {
1548            contig: "chr1".to_string(),
1549            start: Some(100),
1550            stop: Some(200),
1551            strand: Some("+".to_string()),
1552        };
1553        let strand = record.get_strand();
1554        assert_eq!(strand, Some(Strand::Forward));
1555        let record = CsvRecord {
1556            contig: "chr1".to_string(),
1557            start: Some(100),
1558            stop: Some(200),
1559            strand: Some("-1".to_string()),
1560        };
1561        let strand = record.get_strand();
1562        assert_eq!(strand, Some(Strand::Reverse))
1563    }
1564
1565    #[test]
1566    fn test_insert_into_targets() {
1567        use std::collections::HashMap;
1568        let mut targets: HashMap<StrandWrapper, HashedTargets> = HashMap::new();
1569        let record = CsvRecord {
1570            contig: "chr1".to_string(),
1571            start: Some(100),
1572            stop: Some(200),
1573            strand: Some("+".to_string()),
1574        };
1575        Targets::insert_into_targets(&mut targets, &record, record.get_strand().unwrap());
1576        assert_eq!(targets.len(), 1);
1577        assert_eq!(
1578            targets.get(&StrandWrapper(Strand::Forward)).unwrap().len(),
1579            1
1580        );
1581        assert_eq!(
1582            targets
1583                .get(&StrandWrapper(Strand::Forward))
1584                .unwrap()
1585                .get("chr1")
1586                .unwrap()
1587                .len(),
1588            1
1589        );
1590        assert_eq!(
1591            targets
1592                .get(&StrandWrapper(Strand::Forward))
1593                .unwrap()
1594                .get("chr1")
1595                .unwrap()[0],
1596            (100, 200)
1597        );
1598    }
1599
1600    #[test]
1601    #[cfg_attr(miri, ignore)]
1602    fn read_toml() {
1603        let test_toml = get_test_file("RAPID_CNS2.toml");
1604        let toml_content = fs::read_to_string(test_toml).unwrap();
1605        let value = toml_content.parse::<Table>().unwrap();
1606        // println!("{:#?}", value);
1607        assert_eq!(
1608            value["regions"][0]["targets"].as_str(),
1609            Some("resources/panel_adaptive_nogenenames_20122021_hg38.bed")
1610        );
1611        assert!(match value["regions"][1]["targets"] {
1612            Value::Array(_) => true,
1613            Value::String(_) => false,
1614            _ => false,
1615        })
1616    }
1617
1618    #[test]
1619    #[cfg_attr(miri, ignore)]
1620    fn test_load_conf() {
1621        let test_toml = get_test_file("RAPID_CNS2.toml");
1622        let conf = Conf::from_file(test_toml);
1623        assert!(conf
1624            .regions
1625            .get(0)
1626            .map(|x| x.condition.name == "Rapid_CNS")
1627            .unwrap_or(false));
1628        assert!(conf
1629            .regions
1630            .get(1)
1631            .map(|x| x.condition.name == "Direct_CNS")
1632            .unwrap_or(false));
1633        assert!(conf
1634            .regions
1635            .get(1)
1636            .map(
1637                |x| x.condition.targets._targets[&StrandWrapper(Strand::Reverse)]["chr2"][0]
1638                    == (3000_usize, 4000_usize)
1639            )
1640            .unwrap_or(false));
1641        assert!(conf.barcodes.is_empty())
1642    }
1643
1644    #[test]
1645    fn test_merge_intervals() {
1646        assert_eq!(
1647            Targets::_merge_intervals(&mut vec![
1648                (11, 15),
1649                (1, 3),
1650                (14, 17),
1651                (2, 4),
1652                (15, 100),
1653                (169, 173),
1654                (10, 29)
1655            ]),
1656            vec![(1, 4), (10, 100), (169, 173)]
1657        )
1658    }
1659
1660    #[test]
1661    fn test_make_targets() {
1662        let targets: Targets = Targets::new(TargetType::Direct(vec![
1663            "chr1,10,20,+".to_string(),
1664            "chr1,15,30,+".to_string(),
1665        ]));
1666        assert_eq!(
1667            targets
1668                ._targets
1669                .get(&StrandWrapper(Strand::Forward))
1670                .unwrap()
1671                .get("chr1")
1672                .unwrap(),
1673            &vec![(10, 30)]
1674        )
1675    }
1676
1677    #[test]
1678    fn test_get_coord() {
1679        let targets: Targets = Targets::new(TargetType::Direct(vec![
1680            "chr1,10,20,+".to_string(),
1681            "chr1,15,30,+".to_string(),
1682        ]));
1683        assert_eq!(
1684            targets
1685                ._targets
1686                .get(&StrandWrapper(Strand::Forward))
1687                .unwrap()
1688                .get("chr1")
1689                .unwrap(),
1690            &vec![(10, 30)]
1691        );
1692        assert!(targets.check_coords("chr1", Strand::Forward, 15));
1693        assert!(targets.check_coords("chr1", "+", 15));
1694        assert!(targets.check_coords("chr1", 1, 15));
1695        assert!(!targets.check_coords("chr1", 1, 40));
1696        assert!(!targets.check_coords("chr2", 1, 40));
1697        assert!(!targets.check_coords("chr1", "-", 15));
1698        assert!(!targets.check_coords("chr1", -1, 15));
1699    }
1700
1701    #[test]
1702    fn test_get_coord_contig() {
1703        let targets: Targets = Targets::new(TargetType::Direct(vec!["chr1".to_string()]));
1704        assert_eq!(
1705            targets
1706                ._targets
1707                .get(&StrandWrapper(Strand::Forward))
1708                .unwrap()
1709                .get("chr1")
1710                .unwrap(),
1711            &vec![(0_usize, usize::MAX)]
1712        );
1713        assert!(targets.check_coords("chr1", Strand::Forward, 15));
1714        assert!(targets.check_coords("chr1", "+", 15));
1715        assert!(targets.check_coords("chr1", 1, 15));
1716        assert!(targets.check_coords("chr1", 1, 40));
1717        assert!(!targets.check_coords("chr2", 1, 40));
1718        assert!(targets.check_coords("chr1", "-", 15));
1719        assert!(targets.check_coords("chr1", -1, 15));
1720    }
1721
1722    #[test]
1723    #[cfg_attr(miri, ignore)]
1724    fn test_load_barcoded_conf() {
1725        let test_toml = get_test_file("clockface.toml");
1726        let conf = Conf::from_file(test_toml);
1727        assert!(conf.regions.is_empty());
1728        assert_eq!(
1729            conf.barcodes.get("barcode01").unwrap().condition.name,
1730            "barcode01"
1731        );
1732        assert_eq!(
1733            conf.barcodes.get("barcode02").unwrap().condition.name,
1734            "barcode02"
1735        );
1736        assert_eq!(
1737            conf.barcodes.get("barcode03").unwrap().condition.name,
1738            "barcode03"
1739        );
1740        assert!(conf
1741            .barcodes
1742            .get("barcode03")
1743            .map(
1744                |x| x.condition.targets._targets[&StrandWrapper(Strand::Reverse)]["NC_002516.2"][0]
1745                    == (0_usize, usize::MAX)
1746            )
1747            .unwrap_or(false))
1748    }
1749}