bio_types/annot/
spliced.rs

1// Copyright 2017 Nicholas Ingolia
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Spliced region on a named sequence, e.g., the reverse strand of
7//! chromosome V, exon #1 at 166,885 through 166,875 and exon #2 at
8//! 166,771 through 166,237.
9
10use std::cmp::{max, min};
11use std::convert::Into;
12use std::fmt::{self, Display, Formatter};
13use std::ops::Neg;
14use std::str::FromStr;
15
16use regex::Regex;
17
18use crate::annot::contig::Contig;
19use crate::annot::loc::Loc;
20use crate::annot::pos::Pos;
21use crate::annot::*;
22use crate::strand::*;
23
24// The spliced location representation inherently cannot represent
25// "bad" splicing structures. Locations comprise a first exon length,
26// with a vector of intron-length / exon-length pairs, all type usize
27// and hence non-negative.
28//
29// The InEx data type used to represent intron-exon pairs after the
30// first exon enforces an additional strict positivity constraint
31// (i.e., >0) on the lengths. This further eliminates degeneracy in
32// representations: only one unique set of positive-length exons with
33// interleaved positive-length introns can represent a splicing
34// structure.
35mod inex {
36    use std::slice::Iter;
37
38    use super::SplicingError;
39
40    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
41    pub struct InEx {
42        intron_length: usize,
43        exon_length: usize,
44    }
45
46    impl InEx {
47        pub fn new(intron_length: usize, exon_length: usize) -> Result<Self, SplicingError> {
48            if intron_length < 1 {
49                Err(SplicingError::IntronLength)
50            } else if exon_length < 1 {
51                Err(SplicingError::ExonLength)
52            } else {
53                Ok(InEx {
54                    intron_length,
55                    exon_length,
56                })
57            }
58        }
59        pub fn intron_length(&self) -> usize {
60            self.intron_length
61        }
62        pub fn exon_length(&self) -> usize {
63            self.exon_length
64        }
65        pub fn length(&self) -> usize {
66            self.intron_length + self.exon_length
67        }
68    }
69
70    // Represent just the start (relative to the start of the location) and length of exons
71    // Useful for internal coordinate math
72    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
73    pub struct Ex {
74        start: usize,
75        length: usize,
76    }
77
78    impl Ex {
79        pub fn start(&self) -> usize {
80            self.start
81        }
82        pub fn length(&self) -> usize {
83            self.length
84        }
85        pub fn end(&self) -> usize {
86            self.start + self.length
87        }
88    }
89
90    // Iterator over the Ex exons from lowest to highest coordinate
91    pub struct Exes<'a> {
92        state: ExesState,
93        curr_start: usize,
94        rest: Iter<'a, InEx>,
95    }
96
97    enum ExesState {
98        FirstExon(usize),
99        LaterExon,
100    }
101
102    impl<'a> Exes<'a> {
103        pub fn new(exon_0_length: usize, inexes: &'a [InEx]) -> Self {
104            Exes {
105                state: ExesState::FirstExon(exon_0_length),
106                curr_start: 0,
107                rest: inexes.iter(),
108            }
109        }
110    }
111
112    impl<'a> Iterator for Exes<'a> {
113        type Item = Ex;
114
115        fn next(&mut self) -> Option<Ex> {
116            match self.state {
117                ExesState::FirstExon(len) => {
118                    let ex = Ex {
119                        start: self.curr_start,
120                        length: len,
121                    };
122                    self.curr_start += len;
123                    self.state = ExesState::LaterExon;
124                    Some(ex)
125                }
126                ExesState::LaterExon => match self.rest.next() {
127                    Some(inex) => {
128                        let ex = Ex {
129                            start: self.curr_start + inex.intron_length(),
130                            length: inex.exon_length(),
131                        };
132                        self.curr_start += inex.length();
133                        Some(ex)
134                    }
135                    None => None,
136                },
137            }
138        }
139    }
140
141    // Represent just the start (relative to the start of the location) and length of introns
142    // Useful for internal coordinate math
143    #[derive(Debug, Clone, Hash, PartialEq, Eq)]
144    pub struct In {
145        start: usize,
146        length: usize,
147    }
148
149    #[allow(dead_code)]
150    impl In {
151        pub fn start(&self) -> usize {
152            self.start
153        }
154        pub fn length(&self) -> usize {
155            self.length
156        }
157        pub fn end(&self) -> usize {
158            self.start + self.length
159        }
160    }
161
162    // Iterator over the Ex introns from lowest to highest coordinate
163    pub struct Ins<'a> {
164        curr_start: usize,
165        rest: Iter<'a, InEx>,
166    }
167
168    impl<'a> Ins<'a> {
169        #[allow(dead_code)]
170        pub fn new(exon_0_length: usize, inexes: &'a [InEx]) -> Self {
171            Ins {
172                curr_start: exon_0_length,
173                rest: inexes.iter(),
174            }
175        }
176    }
177
178    impl<'a> Iterator for Ins<'a> {
179        type Item = In;
180
181        fn next(&mut self) -> Option<In> {
182            match self.rest.next() {
183                Some(inex) => {
184                    let intr = In {
185                        start: self.curr_start,
186                        length: inex.intron_length(),
187                    };
188                    self.curr_start += inex.length();
189                    Some(intr)
190                }
191                None => None,
192            }
193        }
194    }
195}
196
197/// Spliced sequence annotation on a particular, named sequence
198/// (e.g. a chromosome).
199///
200/// Parameterized over the type of the reference sequence identifier
201/// and over the strandedness of the position.
202///
203/// The display format for a `Spliced` is
204/// _chr:start_0-end_0;start_1-end_1;...;start_N-end_N(+/-/.)_. The
205/// boundaries for each individual exon are given as a half-open
206/// 0-based interval, like the Rust `Range` and BED format.
207///
208/// ```
209/// # use bio_types::strand::ReqStrand;
210/// # use bio_types::annot::AnnotError;
211/// # use bio_types::annot::spliced::{Spliced,SplicingError};
212/// # fn try_main() -> Result<(), Box<SplicingError>> {
213/// let tad3 = Spliced::with_lengths_starts("chrXII".to_owned(), 765265,
214///                                         &vec![808,52,109], &vec![0,864,984],
215///                                         ReqStrand::Reverse)?;
216/// assert_eq!(tad3.to_string(), "chrXII:765265-766073;766129-766181;766249-766358(-)");
217/// let tad3_exons = tad3.exon_contigs();
218/// assert_eq!(tad3_exons.len(), 3);
219/// assert_eq!(tad3_exons[0].to_string(), "chrXII:766249-766358(-)");
220/// assert_eq!(tad3_exons[1].to_string(), "chrXII:766129-766181(-)");
221/// assert_eq!(tad3_exons[2].to_string(), "chrXII:765265-766073(-)");
222/// # Ok(())
223/// # }
224/// # fn main() { try_main().unwrap(); }
225/// ```
226#[derive(Debug, Clone, Hash, PartialEq, Eq)]
227pub struct Spliced<R, S> {
228    refid: R,
229    start: isize,
230    exon_0_length: usize,
231    inexes: Vec<inex::InEx>,
232    strand: S,
233}
234
235impl<R, S> Spliced<R, S> {
236    /// Construct a new, single-exon "spliced" location
237    ///
238    /// ```
239    /// use std::rc::Rc;
240    /// use bio_types::annot::spliced::Spliced;
241    /// use bio_types::strand::ReqStrand;
242    /// let chr = Rc::new("chrX".to_owned());
243    /// let tma22 = Spliced::new(chr, 461829, 462426 - 461829, ReqStrand::Forward);
244    /// ```
245    pub fn new(refid: R, start: isize, exon_0_length: usize, strand: S) -> Self {
246        Spliced {
247            refid,
248            start,
249            exon_0_length,
250            inexes: Vec::new(),
251            strand,
252        }
253    }
254
255    /// Construct a multi-exon "spliced" location using BED-style exon
256    /// starts and lengths.
257    pub fn with_lengths_starts(
258        refid: R,
259        start: isize,
260        exon_lengths: &[usize],
261        exon_starts: &[usize],
262        strand: S,
263    ) -> Result<Self, SplicingError> {
264        if exon_starts.is_empty() {
265            return Err(SplicingError::NoExons);
266        } else if exon_starts[0] != 0 {
267            return Err(SplicingError::BlockStart);
268        } else if exon_starts.len() != exon_lengths.len() {
269            return Err(SplicingError::BlockMismatch);
270        }
271
272        let exon_0_length = exon_lengths[0];
273        let mut intron_start = exon_0_length;
274        let mut inexes = Vec::new();
275        for exno in 1..exon_starts.len() {
276            let exon_start = exon_starts[exno];
277            if intron_start >= exon_start {
278                return Err(SplicingError::BlockOverlap);
279            }
280            let intron_length = exon_start - intron_start;
281            let exon_length = exon_lengths[exno];
282            if exon_length == 0 {
283                return Err(SplicingError::ExonLength);
284            }
285            inexes.push(inex::InEx::new(intron_length, exon_length)?);
286            intron_start = exon_start + exon_length;
287        }
288
289        Ok(Spliced {
290            refid,
291            start,
292            exon_0_length,
293            inexes,
294            strand,
295        })
296    }
297
298    /// Number of exons
299    pub fn exon_count(&self) -> usize {
300        self.inexes.len() + 1
301    }
302
303    // This is the unique representation for zero-length Spliced
304    // locations, because InEx pairs have positive lengths for both
305    // introns and exons.
306    #[allow(dead_code)]
307    fn is_zero_length(&self) -> bool {
308        self.exon_0_length == 0 && self.inexes.is_empty()
309    }
310
311    fn exes(&self) -> inex::Exes {
312        inex::Exes::new(self.exon_0_length, &self.inexes)
313    }
314
315    /// Vector of exon starting positions, relative to the start of
316    /// the location overall.
317    ///
318    /// These positions run from left to right on the reference
319    /// sequence, regardless of the location's strand.
320    pub fn exon_starts(&self) -> Vec<usize> {
321        let mut starts = vec![0];
322        let mut intron_start = self.exon_0_length;
323        for inex in self.inexes.iter() {
324            starts.push(intron_start + inex.intron_length());
325            intron_start += inex.length();
326        }
327        starts
328    }
329
330    /// Vector of exon lengths.
331    ///
332    /// Exon lengths are given from left to right on the reference
333    /// sequence, regardless of the location's strand.
334    pub fn exon_lengths(&self) -> Vec<usize> {
335        let mut lengths = vec![self.exon_0_length];
336        for inex in self.inexes.iter() {
337            lengths.push(inex.exon_length());
338        }
339        lengths
340    }
341
342    /// Total length of exons only.
343    ///
344    /// The `length` method from the `Loc` trait returns the total
345    /// length spanned by the annotation, including both introns and
346    /// exons.
347    pub fn exon_total_length(&self) -> usize {
348        self.exes().map(|e| e.length()).sum()
349    }
350
351    /// Convert into a stranded sequence location on the specified strand
352    pub fn into_stranded(self, strand: ReqStrand) -> Spliced<R, ReqStrand> {
353        Spliced {
354            refid: self.refid,
355            start: self.start,
356            exon_0_length: self.exon_0_length,
357            inexes: self.inexes,
358            strand,
359        }
360    }
361
362    pub fn contig_cover(self) -> Contig<R, S> {
363        let length = self.length();
364        Contig::new(self.refid, self.start, length, self.strand)
365    }
366
367    // Get exons in reference sequence order.
368    fn exon_contigs_vec(&self) -> Vec<Contig<R, S>>
369    where
370        R: Clone,
371        S: Copy,
372    {
373        let mut exons = Vec::new();
374
375        for ex in self.exes() {
376            exons.push(Contig::new(
377                self.refid().clone(),
378                self.start + ex.start() as isize,
379                ex.length(),
380                self.strand,
381            ));
382        }
383
384        exons
385    }
386}
387
388impl<R> Spliced<R, ReqStrand> {
389    pub fn exon_contigs(&self) -> Vec<Contig<R, ReqStrand>>
390    where
391        R: Clone,
392    {
393        let mut exons = self.exon_contigs_vec();
394        if self.strand == ReqStrand::Reverse {
395            exons.reverse()
396        }
397        exons
398    }
399}
400
401impl<R, S> Loc for Spliced<R, S> {
402    type RefID = R;
403    type Strand = S;
404    fn refid(&self) -> &R {
405        &self.refid
406    }
407    fn start(&self) -> isize {
408        self.start
409    }
410    fn length(&self) -> usize {
411        let mut len = self.exon_0_length;
412        for inex in self.inexes.iter() {
413            len += inex.length()
414        }
415        len
416    }
417    fn strand(&self) -> S
418    where
419        S: Copy,
420    {
421        self.strand
422    }
423
424    fn pos_into<T>(&self, pos: &Pos<Self::RefID, T>) -> Option<Pos<(), T>>
425    where
426        Self::RefID: Eq,
427        Self::Strand: Into<ReqStrand> + Copy,
428        T: Neg<Output = T> + Copy,
429    {
430        if (self.refid != *pos.refid()) || pos.pos() < self.start {
431            return None;
432        }
433
434        let pos_offset = (pos.pos() - self.start) as usize;
435
436        let mut offset_before = 0;
437        for ex in self.exes() {
438            if pos_offset >= ex.start() && pos_offset < ex.end() {
439                let offset = (offset_before + pos_offset - ex.start()) as isize;
440                let into = match self.strand().into() {
441                    ReqStrand::Forward => Pos::new((), offset, pos.strand()),
442                    ReqStrand::Reverse => Pos::new(
443                        (),
444                        self.exon_total_length() as isize - (offset + 1),
445                        -pos.strand(),
446                    ),
447                };
448                return Some(into);
449            }
450            offset_before += ex.length();
451        }
452
453        None
454    }
455
456    fn pos_outof<Q, T>(&self, pos: &Pos<Q, T>) -> Option<Pos<Self::RefID, T>>
457    where
458        Self::RefID: Clone,
459        Self::Strand: Into<ReqStrand> + Copy,
460        T: Neg<Output = T> + Copy,
461    {
462        let mut offset = match self.strand().into() {
463            ReqStrand::Forward => pos.pos(),
464            ReqStrand::Reverse => self.exon_total_length() as isize - (pos.pos() + 1),
465        };
466
467        if offset < 0 {
468            return None;
469        }
470
471        for ex in self.exes() {
472            if offset < ex.length() as isize {
473                return Some(Pos::new(
474                    self.refid.clone(),
475                    self.start + ex.start() as isize + offset,
476                    self.strand.into().on_strand(pos.strand()),
477                ));
478            }
479            offset -= ex.length() as isize;
480        }
481
482        None
483    }
484
485    fn contig_intersection<T>(&self, contig: &Contig<Self::RefID, T>) -> Option<Self>
486    where
487        Self::RefID: PartialEq + Clone,
488        Self::Strand: Copy,
489    {
490        if self.refid() != contig.refid() {
491            return None;
492        }
493
494        let contig_rel_start = if contig.start() < self.start {
495            0
496        } else {
497            (contig.start() - self.start) as usize
498        };
499        let contig_end = contig.start() + contig.length() as isize;
500        let contig_rel_end = if contig_end < self.start {
501            0
502        } else {
503            (contig_end - self.start) as usize
504        };
505
506        let mut exon_lengths = Vec::new();
507        let mut exon_starts = Vec::new();
508
509        for ex in self.exes() {
510            let start = max(contig_rel_start, ex.start());
511            let end = min(contig_rel_end, ex.end());
512
513            if start < end {
514                exon_starts.push(start - contig_rel_start);
515                exon_lengths.push(end - start);
516            }
517        }
518
519        if !exon_starts.is_empty() {
520            let first_start = exon_starts[0];
521            for start in exon_starts.iter_mut() {
522                *start -= first_start;
523            }
524            let ixn = Self::with_lengths_starts(
525                self.refid.clone(),
526                max(self.start, contig.start()) + first_start as isize,
527                &exon_lengths,
528                &exon_starts,
529                self.strand,
530            )
531            .unwrap_or_else(|e| {
532                panic!(
533                    "Creating intersection spliced: {:?} for {:?} {:?}",
534                    e, exon_lengths, exon_starts
535                )
536            });
537
538            Some(ixn)
539        } else {
540            None
541        }
542    }
543}
544
545impl<R, S> Display for Spliced<R, S>
546where
547    R: Display,
548    S: Display + Clone + Into<Strand>,
549{
550    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
551        write!(f, "{}:", self.refid)?;
552
553        let mut sep = false;
554        for ex in self.exes() {
555            let ex_start = self.start + ex.start() as isize;
556            write!(
557                f,
558                "{}{}-{}",
559                if sep { ";" } else { "" },
560                ex_start,
561                ex_start + ex.length() as isize
562            )?;
563            sep = true;
564        }
565        let strand: Strand = self.strand.clone().into();
566        if !strand.is_unknown() {
567            write!(f, "({})", self.strand)?;
568        }
569        Ok(())
570    }
571}
572
573impl<R, S> FromStr for Spliced<R, S>
574where
575    R: From<String>,
576    S: FromStr<Err = StrandError>,
577{
578    type Err = ParseAnnotError;
579
580    fn from_str(s: &str) -> Result<Self, Self::Err> {
581        lazy_static! {
582            static ref SPLICED_RE: Regex =
583                Regex::new(r"^(.*):(\d+)-(\d+)((?:;\d+-\d+)*)(\([+-]\))?$").unwrap();
584            static ref EXON_RE: Regex = Regex::new(r";(\d+)-(\d+)").unwrap();
585        }
586
587        let cap = SPLICED_RE.captures(s).ok_or(ParseAnnotError::BadAnnot)?;
588
589        let mut starts = Vec::new();
590        let mut lengths = Vec::new();
591
592        let first_start = cap[2].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
593        let first_end = cap[3].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
594        let strand = cap[5].parse::<S>().map_err(ParseAnnotError::ParseStrand)?;
595
596        starts.push(0);
597        lengths.push((first_end - first_start) as usize);
598
599        let exon_caps = EXON_RE.captures_iter(&cap[4]);
600
601        for exon_cap in exon_caps {
602            let next_start = exon_cap[1]
603                .parse::<isize>()
604                .map_err(ParseAnnotError::ParseInt)?;
605            let next_end = exon_cap[2]
606                .parse::<isize>()
607                .map_err(ParseAnnotError::ParseInt)?;
608            starts.push((next_start - first_start) as usize);
609            lengths.push((next_end - next_start) as usize);
610        }
611
612        let spliced = Spliced::with_lengths_starts(
613            R::from(cap[1].to_owned()),
614            first_start,
615            lengths.as_slice(),
616            starts.as_slice(),
617            strand,
618        )
619        .map_err(ParseAnnotError::Splicing)?;
620        Ok(spliced)
621    }
622}
623
624impl<R> From<Spliced<R, ReqStrand>> for Spliced<R, Strand> {
625    fn from(x: Spliced<R, ReqStrand>) -> Self {
626        Spliced {
627            refid: x.refid,
628            start: x.start,
629            exon_0_length: x.exon_0_length,
630            inexes: x.inexes,
631            strand: match x.strand {
632                ReqStrand::Forward => Strand::Forward,
633                ReqStrand::Reverse => Strand::Reverse,
634            },
635        }
636    }
637}
638
639impl<R> From<Spliced<R, NoStrand>> for Spliced<R, Strand> {
640    fn from(x: Spliced<R, NoStrand>) -> Self {
641        Spliced {
642            refid: x.refid,
643            start: x.start,
644            exon_0_length: x.exon_0_length,
645            inexes: x.inexes,
646            strand: Strand::Unknown,
647        }
648    }
649}
650
651impl<R> From<Spliced<R, Strand>> for Spliced<R, NoStrand> {
652    fn from(x: Spliced<R, Strand>) -> Self {
653        Spliced {
654            refid: x.refid,
655            start: x.start,
656            exon_0_length: x.exon_0_length,
657            inexes: x.inexes,
658            strand: NoStrand::Unknown,
659        }
660    }
661}
662
663impl<R> From<Spliced<R, ReqStrand>> for Spliced<R, NoStrand> {
664    fn from(x: Spliced<R, ReqStrand>) -> Self {
665        Spliced {
666            refid: x.refid,
667            start: x.start,
668            exon_0_length: x.exon_0_length,
669            inexes: x.inexes,
670            strand: NoStrand::Unknown,
671        }
672    }
673}
674
675/// Default stranded sequence position on a reference sequence named
676/// by a `String`.
677pub type SeqSplicedStranded = Spliced<String, ReqStrand>;
678
679/// Default unstranded sequence position on a reference sequence named
680/// by a `String`
681pub type SeqSplicedUnstranded = Spliced<String, NoStrand>;
682
683#[derive(Error, Debug)]
684pub enum SplicingError {
685    #[error("Invalid (non-positive) intron length")]
686    IntronLength,
687    #[error("Invalid (non-positive) exon length")]
688    ExonLength,
689    #[error("No exons")]
690    NoExons,
691    #[error("Exons do not start at position 0")]
692    BlockStart,
693    #[error("Number of exon starts != number of exon lengths")]
694    BlockMismatch,
695    #[error("Exon blocks overlap")]
696    BlockOverlap,
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702
703    #[test]
704    fn length_start_to_contig() {
705        //chrV    166236  166885  YER007C-A       0       -       166236  166885  0       2       535,11, 0,638,
706        let tma20 = Spliced::with_lengths_starts(
707            "chrV".to_owned(),
708            166236,
709            &vec![535, 11],
710            &vec![0, 638],
711            ReqStrand::Reverse,
712        )
713        .unwrap();
714        assert_eq!(tma20.exon_starts(), vec![0, 638]);
715        assert_eq!(tma20.exon_lengths(), vec![535, 11]);
716        assert_eq!(tma20.to_string(), "chrV:166236-166771;166874-166885(-)");
717        assert_eq!(
718            tma20,
719            tma20
720                .to_string()
721                .parse::<Spliced<String, ReqStrand>>()
722                .unwrap()
723        );
724        let tma20_exons = tma20.exon_contigs();
725        assert_eq!(tma20_exons.len(), 2);
726        assert_eq!(tma20_exons[0].to_string(), "chrV:166874-166885(-)");
727        assert_eq!(tma20_exons[1].to_string(), "chrV:166236-166771(-)");
728
729        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
730        let rpl7b = Spliced::with_lengths_starts(
731            "chrXVI".to_owned(),
732            173151,
733            &vec![11, 94, 630],
734            &vec![0, 420, 921],
735            ReqStrand::Forward,
736        )
737        .unwrap();
738        assert_eq!(
739            rpl7b.to_string(),
740            "chrXVI:173151-173162;173571-173665;174072-174702(+)"
741        );
742        assert_eq!(
743            rpl7b,
744            rpl7b
745                .to_string()
746                .parse::<Spliced<String, ReqStrand>>()
747                .unwrap()
748        );
749        let rpl7b_exons = rpl7b.exon_contigs();
750        assert_eq!(rpl7b_exons.len(), 3);
751        assert_eq!(rpl7b_exons[0].to_string(), "chrXVI:173151-173162(+)");
752        assert_eq!(rpl7b_exons[1].to_string(), "chrXVI:173571-173665(+)");
753        assert_eq!(rpl7b_exons[2].to_string(), "chrXVI:174072-174702(+)");
754
755        //chrXII  765265  766358  YLR316C 0       -       765265  766358  0       3       808,52,109,     0,864,984,
756        let tad3 = Spliced::with_lengths_starts(
757            "chrXII".to_owned(),
758            765265,
759            &vec![808, 52, 109],
760            &vec![0, 864, 984],
761            ReqStrand::Reverse,
762        )
763        .unwrap();
764        assert_eq!(
765            tad3.to_string(),
766            "chrXII:765265-766073;766129-766181;766249-766358(-)"
767        );
768        assert_eq!(
769            tad3,
770            tad3.to_string()
771                .parse::<Spliced<String, ReqStrand>>()
772                .unwrap()
773        );
774        let tad3_exons = tad3.exon_contigs();
775        assert_eq!(tad3_exons.len(), 3);
776        assert_eq!(tad3_exons[0].to_string(), "chrXII:766249-766358(-)");
777        assert_eq!(tad3_exons[1].to_string(), "chrXII:766129-766181(-)");
778        assert_eq!(tad3_exons[2].to_string(), "chrXII:765265-766073(-)");
779    }
780
781    fn test_into_outof(
782        loc: &Spliced<String, ReqStrand>,
783        outstr: &str,
784        in_offset: isize,
785        in_strand: ReqStrand,
786    ) -> () {
787        let p0 = outstr.parse::<Pos<String, ReqStrand>>().unwrap();
788        let p0_into_expected = Pos::new((), in_offset, in_strand);
789        let p0_into_actual = loc.pos_into(&p0);
790        let p0_back_out_actual = loc.pos_outof(&p0_into_expected);
791        println!(
792            "{}\t{}\t{:?}\t{:?}\t{:?}",
793            outstr, p0, p0_into_expected, p0_into_actual, p0_back_out_actual
794        );
795        assert!(Some(p0_into_expected).same(&p0_into_actual));
796        assert!(Some(p0).same(&p0_back_out_actual));
797    }
798
799    fn test_no_into(loc: &Spliced<String, ReqStrand>, outstr: &str) -> () {
800        let p0 = outstr.parse::<Pos<String, ReqStrand>>().unwrap();
801        assert!(None.same(&loc.pos_into(&p0)));
802    }
803
804    #[test]
805    fn into_outof() {
806        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
807        let rpl7b = Spliced::with_lengths_starts(
808            "chrXVI".to_owned(),
809            173151,
810            &vec![11, 94, 630],
811            &vec![0, 420, 921],
812            ReqStrand::Forward,
813        )
814        .unwrap();
815        let p0_into = Pos::new((), -1, ReqStrand::Forward);
816        assert!(None.same(&rpl7b.pos_outof(&p0_into)));
817
818        test_no_into(&rpl7b, "chrXVI:173150(+)");
819        test_into_outof(&rpl7b, "chrXVI:173151(+)", 0, ReqStrand::Forward);
820        test_into_outof(&rpl7b, "chrXVI:173152(-)", 1, ReqStrand::Reverse);
821        test_into_outof(&rpl7b, "chrXVI:173161(+)", 10, ReqStrand::Forward);
822        test_no_into(&rpl7b, "chrXVI:173162(+)");
823        test_no_into(&rpl7b, "chrXVI:173570(+)");
824        test_into_outof(&rpl7b, "chrXVI:173571(+)", 11, ReqStrand::Forward);
825        test_into_outof(&rpl7b, "chrXVI:173664(+)", 104, ReqStrand::Forward);
826        test_no_into(&rpl7b, "chrXVI:173665(+)");
827        test_no_into(&rpl7b, "chrXVI:174071(+)");
828        test_into_outof(&rpl7b, "chrXVI:174072(+)", 105, ReqStrand::Forward);
829        test_into_outof(&rpl7b, "chrXVI:174701(+)", 734, ReqStrand::Forward);
830        test_no_into(&rpl7b, "chrXVI:174702(+)");
831
832        let p0_into = Pos::new((), 735, ReqStrand::Forward);
833        assert!(None.same(&rpl7b.pos_outof(&p0_into)));
834
835        //chrXII  765265  766358  YLR316C 0       -       765265  766358  0       3       808,52,109,     0,864,984,
836        let tad3 = Spliced::with_lengths_starts(
837            "chrXII".to_owned(),
838            765265,
839            &vec![808, 52, 109],
840            &vec![0, 864, 984],
841            ReqStrand::Reverse,
842        )
843        .unwrap();
844
845        let p0_into = Pos::new((), -1, ReqStrand::Forward);
846        assert!(None.same(&tad3.pos_outof(&p0_into)));
847
848        test_no_into(&tad3, "chrXII:765264(-)");
849        test_into_outof(&tad3, "chrXII:765265(-)", 968, ReqStrand::Forward);
850        test_into_outof(&tad3, "chrXII:765266(+)", 967, ReqStrand::Reverse);
851        test_into_outof(&tad3, "chrXII:766072(-)", 161, ReqStrand::Forward);
852        test_no_into(&tad3, "chrXII:766073(-)");
853
854        test_no_into(&tad3, "chrXII:766128(-)");
855        test_into_outof(&tad3, "chrXII:766129(-)", 160, ReqStrand::Forward);
856        test_into_outof(&tad3, "chrXII:766180(-)", 109, ReqStrand::Forward);
857        test_no_into(&tad3, "chrXII:766181(-)");
858
859        test_no_into(&tad3, "chrXII:766248(-)");
860        test_into_outof(&tad3, "chrXII:766249(-)", 108, ReqStrand::Forward);
861        test_into_outof(&tad3, "chrXII:766357(-)", 0, ReqStrand::Forward);
862        test_no_into(&tad3, "chrXII:766358(-)");
863
864        let p0_into = Pos::new((), 969, ReqStrand::Forward);
865        assert!(None.same(&tad3.pos_outof(&p0_into)));
866    }
867
868    fn test_contig_ixn(
869        spl: &Spliced<String, ReqStrand>,
870        cb_str: &str,
871        cab_str: Option<String>,
872    ) -> () {
873        let cb = cb_str.parse::<Contig<String, ReqStrand>>().unwrap();
874        match spl.contig_intersection(&cb) {
875            None => assert_eq!(None, cab_str),
876            Some(cab) => assert_eq!(Some(cab.to_string()), cab_str),
877        };
878    }
879
880    #[test]
881    fn intersection() {
882        //chrXVI  173151  174702  YPL198W 0       +       173151  174702  0       3       11,94,630,      0,420,921,
883        let rpl7b = Spliced::with_lengths_starts(
884            "chrXVI".to_owned(),
885            173151,
886            &vec![11, 94, 630],
887            &vec![0, 420, 921],
888            ReqStrand::Forward,
889        )
890        .unwrap();
891
892        test_contig_ixn(
893            &rpl7b,
894            "chrXVI:173000-175000(+)",
895            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
896        );
897
898        test_contig_ixn(
899            &rpl7b,
900            "chrXVI:173150-175000(+)",
901            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
902        );
903        test_contig_ixn(
904            &rpl7b,
905            "chrXVI:173151-175000(+)",
906            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
907        );
908        test_contig_ixn(
909            &rpl7b,
910            "chrXVI:173152-175000(+)",
911            Some("chrXVI:173152-173162;173571-173665;174072-174702(+)".to_owned()),
912        );
913        test_contig_ixn(
914            &rpl7b,
915            "chrXVI:173155-175000(+)",
916            Some("chrXVI:173155-173162;173571-173665;174072-174702(+)".to_owned()),
917        );
918        test_contig_ixn(
919            &rpl7b,
920            "chrXVI:173161-175000(+)",
921            Some("chrXVI:173161-173162;173571-173665;174072-174702(+)".to_owned()),
922        );
923        test_contig_ixn(
924            &rpl7b,
925            "chrXVI:173162-175000(+)",
926            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
927        );
928        test_contig_ixn(
929            &rpl7b,
930            "chrXVI:173500-175000(+)",
931            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
932        );
933        test_contig_ixn(
934            &rpl7b,
935            "chrXVI:173570-175000(+)",
936            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
937        );
938        test_contig_ixn(
939            &rpl7b,
940            "chrXVI:173571-175000(+)",
941            Some("chrXVI:173571-173665;174072-174702(+)".to_owned()),
942        );
943        test_contig_ixn(
944            &rpl7b,
945            "chrXVI:173572-175000(+)",
946            Some("chrXVI:173572-173665;174072-174702(+)".to_owned()),
947        );
948        test_contig_ixn(
949            &rpl7b,
950            "chrXVI:173600-175000(+)",
951            Some("chrXVI:173600-173665;174072-174702(+)".to_owned()),
952        );
953        test_contig_ixn(
954            &rpl7b,
955            "chrXVI:173664-175000(+)",
956            Some("chrXVI:173664-173665;174072-174702(+)".to_owned()),
957        );
958        test_contig_ixn(
959            &rpl7b,
960            "chrXVI:173665-175000(+)",
961            Some("chrXVI:174072-174702(+)".to_owned()),
962        );
963        test_contig_ixn(
964            &rpl7b,
965            "chrXVI:174100-175000(+)",
966            Some("chrXVI:174100-174702(+)".to_owned()),
967        );
968        test_contig_ixn(&rpl7b, "chrXVI:174800-175000(+)", None);
969
970        test_contig_ixn(
971            &rpl7b,
972            "chrXVI:173150-174703(+)",
973            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
974        );
975        test_contig_ixn(
976            &rpl7b,
977            "chrXVI:173150-174702(+)",
978            Some("chrXVI:173151-173162;173571-173665;174072-174702(+)".to_owned()),
979        );
980        test_contig_ixn(
981            &rpl7b,
982            "chrXVI:173150-174701(+)",
983            Some("chrXVI:173151-173162;173571-173665;174072-174701(+)".to_owned()),
984        );
985        test_contig_ixn(
986            &rpl7b,
987            "chrXVI:173000-174500(+)",
988            Some("chrXVI:173151-173162;173571-173665;174072-174500(+)".to_owned()),
989        );
990        test_contig_ixn(
991            &rpl7b,
992            "chrXVI:173000-174072(+)",
993            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
994        );
995        test_contig_ixn(
996            &rpl7b,
997            "chrXVI:173000-173800(+)",
998            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
999        );
1000        test_contig_ixn(
1001            &rpl7b,
1002            "chrXVI:173000-173666(+)",
1003            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
1004        );
1005        test_contig_ixn(
1006            &rpl7b,
1007            "chrXVI:173000-173665(+)",
1008            Some("chrXVI:173151-173162;173571-173665(+)".to_owned()),
1009        );
1010        test_contig_ixn(
1011            &rpl7b,
1012            "chrXVI:173000-173664(+)",
1013            Some("chrXVI:173151-173162;173571-173664(+)".to_owned()),
1014        );
1015        test_contig_ixn(
1016            &rpl7b,
1017            "chrXVI:173000-173600(+)",
1018            Some("chrXVI:173151-173162;173571-173600(+)".to_owned()),
1019        );
1020        test_contig_ixn(
1021            &rpl7b,
1022            "chrXVI:173000-173571(+)",
1023            Some("chrXVI:173151-173162(+)".to_owned()),
1024        );
1025        test_contig_ixn(
1026            &rpl7b,
1027            "chrXVI:173000-173300(+)",
1028            Some("chrXVI:173151-173162(+)".to_owned()),
1029        );
1030        test_contig_ixn(
1031            &rpl7b,
1032            "chrXVI:173000-173155(+)",
1033            Some("chrXVI:173151-173155(+)".to_owned()),
1034        );
1035        test_contig_ixn(&rpl7b, "chrXVI:173000-173100(+)", None);
1036
1037        test_contig_ixn(
1038            &rpl7b,
1039            "chrXVI:173155-174500(+)",
1040            Some("chrXVI:173155-173162;173571-173665;174072-174500(+)".to_owned()),
1041        );
1042        test_contig_ixn(
1043            &rpl7b,
1044            "chrXVI:173600-174500(+)",
1045            Some("chrXVI:173600-173665;174072-174500(+)".to_owned()),
1046        );
1047        test_contig_ixn(
1048            &rpl7b,
1049            "chrXVI:173155-173600(+)",
1050            Some("chrXVI:173155-173162;173571-173600(+)".to_owned()),
1051        );
1052        test_contig_ixn(
1053            &rpl7b,
1054            "chrXVI:173590-173610(+)",
1055            Some("chrXVI:173590-173610(+)".to_owned()),
1056        );
1057
1058        test_contig_ixn(
1059            &rpl7b,
1060            "chrXVI:173155-173160(+)",
1061            Some("chrXVI:173155-173160(+)".to_owned()),
1062        );
1063        test_contig_ixn(
1064            &rpl7b,
1065            "chrXVI:174400-174500(+)",
1066            Some("chrXVI:174400-174500(+)".to_owned()),
1067        );
1068
1069        test_contig_ixn(&rpl7b, "chrXVI:173200-173300(+)", None);
1070        test_contig_ixn(&rpl7b, "chrXVI:173800-174000(+)", None);
1071    }
1072}