omics_coordinate/
interval.rs

1//! Intervals.
2
3use std::cmp::max;
4use std::cmp::min;
5
6use thiserror::Error;
7
8use crate::Contig;
9use crate::Position;
10use crate::Strand;
11use crate::System;
12use crate::coordinate;
13use crate::coordinate::Coordinate;
14use crate::position;
15use crate::position::Number;
16use crate::strand;
17use crate::system::Base;
18
19pub mod base;
20pub mod interbase;
21
22////////////////////////////////////////////////////////////////////////////////////////
23// Errors
24////////////////////////////////////////////////////////////////////////////////////////
25
26/// An error that occurs during clamping.
27#[derive(Error, Debug, PartialEq, Eq)]
28pub enum ClampError {
29    /// A mismatched contig error.
30    ///
31    /// This error occurs when one attempts to clamp an interval with another
32    /// interval that is not located on the same contig.
33    #[error("mismatched contigs: `{original}` and `{operand}`")]
34    MismatchedContigs {
35        /// The contig of the interval being clamped.
36        original: Contig,
37
38        /// The contig of the interval doing the clamping.
39        operand: Contig,
40    },
41
42    /// A mismatched strand error.
43    ///
44    /// This error occurs when one attempts to clamp an interval with another
45    /// interval that is not located on the same strand.
46    #[error("mismatched strand: `{original}` and `{operand}`")]
47    MismatchedStrand {
48        /// The strand of the interval being clamped.
49        original: Strand,
50
51        /// The strand of the interval doing the clamping.
52        operand: Strand,
53    },
54}
55
56/// A [`Result`](std::result::Result) with a [`ClampError`].
57pub type ClampResult<T> = std::result::Result<T, ClampError>;
58
59/// An error related to the creation of a nonsensical interval.
60#[derive(Error, Debug, PartialEq, Eq)]
61pub enum NonsensicalError {
62    /// A mismatched contig error.
63    ///
64    /// This error occurs when one attempts to clamp an interval with another
65    /// interval that is not located on the same contig.
66    #[error("mismatched contigs for coordinates: `{start}` and `{end}`")]
67    MismatchedContigs {
68        /// The contig of the interval being clamped.
69        start: Contig,
70
71        /// The contig of the interval doing the clamping.
72        end: Contig,
73    },
74
75    /// A mismatched strand error.
76    ///
77    /// This error occurs when one attempts to clamp an interval with another
78    /// interval that is not located on the same strand.
79    #[error("mismatched strands for coordinates: `{start}` and `{end}`")]
80    MismatchedStrands {
81        /// The strand of the interval being clamped.
82        start: Strand,
83
84        /// The strand of the interval doing the clamping.
85        end: Strand,
86    },
87
88    /// A negative sized interval.
89    ///
90    /// This error occurs when the start of the interval comes _after_ the end
91    /// of the interval.
92    ///
93    /// On positive stranded intervals, this is when the start position is
94    /// _greater than_ the end position. On negative stranded intervals, this is
95    /// when the start position is _less than_ the end position.
96    #[error("negatively sized interval: start is `{start}`, end is `{end}`, strand is `{strand}`")]
97    NegativelySized {
98        /// The start position.
99        start: Number,
100        /// The end position.
101        end: Number,
102        /// The strand.
103        strand: Strand,
104    },
105}
106
107/// A [`Result`](std::result::Result) with a [`NonsensicalError`].
108pub type NonsensicalResult<T> = std::result::Result<T, NonsensicalError>;
109
110/// An error related to parsing an interval.
111#[derive(Error, Debug, PartialEq, Eq)]
112pub enum ParseError {
113    /// An invalid format was encountered.
114    #[error("invalid format: {value}")]
115    Format {
116        /// The value that was passed.
117        value: String,
118    },
119}
120
121/// A [`Result`](std::result::Result) with a [`ParseError`].
122pub type ParseResult<T> = std::result::Result<T, ParseError>;
123
124/// An error related to an interval.
125#[derive(Error, Debug, PartialEq, Eq)]
126pub enum Error {
127    /// A clamping error.
128    #[error("clamp error: {0}")]
129    Clamp(#[from] ClampError),
130
131    /// A coordinate error.
132    #[error("coordinate error: {0}")]
133    Coordinate(#[from] coordinate::Error),
134
135    /// A nonsensical interval.
136    #[error("nonsensical interval: {0}")]
137    Nonsensical(#[from] NonsensicalError),
138
139    /// One or more of the coordinates were out of bounds.
140    #[error("one or more of the coordinates were out of bounds")]
141    OutOfBounds,
142
143    /// A parse error.
144    #[error("parse error: {0}")]
145    Parse(#[from] ParseError),
146
147    /// A position error.
148    #[error("position error: {0}")]
149    Position(#[from] position::Error),
150
151    /// A strand error.
152    #[error("strand error: {0}")]
153    Strand(#[from] strand::Error),
154}
155
156/// A [`Result`](std::result::Result) with an [`Error`].
157pub type Result<T> = std::result::Result<T, Error>;
158
159////////////////////////////////////////////////////////////////////////////////////////
160// The `Coordinate` trait
161////////////////////////////////////////////////////////////////////////////////////////
162
163/// Traits related to a coordinate.
164pub mod r#trait {
165    use super::*;
166    use crate::system::Base;
167
168    /// Requirements to be an interval.
169    #[allow(clippy::len_without_is_empty)]
170    pub trait Interval<S: System> {
171        /// Returns whether or not the entity at the in-base coordinate is
172        /// contained within this interval.
173        fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool;
174
175        /// Gets the number of member contained within the interval.
176        fn count_entities(&self) -> Number;
177    }
178}
179
180/// An interval.
181#[derive(Clone, Debug, PartialEq, Eq)]
182pub struct Interval<S: System> {
183    /// The start coordinate.
184    start: Coordinate<S>,
185
186    /// The end coordinate.
187    end: Coordinate<S>,
188}
189
190impl<S: System> Interval<S>
191where
192    Interval<S>: r#trait::Interval<S>,
193    Position<S>: position::r#trait::Position<S>,
194{
195    /// Creates a new interval if the following invariants are upheld.
196    ///
197    /// * The contigs of the two coordinates must match.
198    ///   * If this does not hold, a [`NonsensicalError::MismatchedContigs`]
199    ///     will be returned.
200    /// * The strands of the two coordinates must match.
201    ///   * If this does not hold, a [`NonsensicalError::MismatchedStrands`]
202    ///     will be returned.
203    /// * The start must come _before or be equal to_ the end in that (a) on
204    ///   positive strand, `start <= end`, or, (b) on the negative strand, `end
205    ///   <= start`. This ensures that the interval is always oriented from
206    ///   start to end of the molecule.
207    ///   * If this does not hold, a [`NonsensicalError::NegativelySized`] will
208    ///     be returned.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// use omics_coordinate::Coordinate;
214    /// use omics_coordinate::Interval;
215    /// use omics_coordinate::system::Base;
216    /// use omics_coordinate::system::Interbase;
217    ///
218    /// //===========//
219    /// // Interbase //
220    /// //===========//
221    ///
222    /// // Positive strand.
223    ///
224    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
225    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
226    /// let interval = Interval::try_new(start, end)?;
227    ///
228    /// // Negative strand.
229    ///
230    /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
231    /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
232    /// let interval = Interval::try_new(start, end)?;
233    ///
234    /// //======//
235    /// // Base //
236    /// //======//
237    ///
238    /// // Positive strand.
239    ///
240    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
241    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
242    /// let interval = Interval::try_new(start, end)?;
243    ///
244    /// // Negative strand.
245    ///
246    /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
247    /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
248    /// let interval = Interval::try_new(start, end)?;
249    ///
250    /// # Ok::<(), Box<dyn std::error::Error>>(())
251    /// ```
252    pub fn try_new(start: Coordinate<S>, end: Coordinate<S>) -> Result<super::Interval<S>> {
253        if start.contig() != end.contig() {
254            return Err(Error::Nonsensical(NonsensicalError::MismatchedContigs {
255                start: start.contig().clone(),
256                end: end.contig().clone(),
257            }));
258        }
259
260        if start.strand() != end.strand() {
261            return Err(Error::Nonsensical(NonsensicalError::MismatchedStrands {
262                start: start.strand(),
263                end: end.strand(),
264            }));
265        }
266
267        match start.strand() {
268            Strand::Positive => {
269                if start.position() > end.position() {
270                    return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
271                        start: start.position().get(),
272                        end: end.position().get(),
273                        strand: start.strand(),
274                    }));
275                }
276            }
277            Strand::Negative => {
278                if end.position() > start.position() {
279                    return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
280                        start: start.position().get(),
281                        end: end.position().get(),
282                        strand: start.strand(),
283                    }));
284                }
285            }
286        }
287
288        Ok(Interval { start, end })
289    }
290
291    /// Gets a reference to the start coordinate.
292    ///
293    /// # Examples
294    ///
295    /// ```
296    /// use omics_coordinate::Coordinate;
297    /// use omics_coordinate::Interval;
298    /// use omics_coordinate::system::Base;
299    /// use omics_coordinate::system::Interbase;
300    ///
301    /// //===========//
302    /// // Interbase //
303    /// //===========//
304    ///
305    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
306    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
307    /// let interval = Interval::try_new(start.clone(), end)?;
308    ///
309    /// assert_eq!(interval.start(), &start);
310    ///
311    /// //======//
312    /// // Base //
313    /// //======//
314    ///
315    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
316    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
317    /// let interval = Interval::try_new(start.clone(), end)?;
318    ///
319    /// assert_eq!(interval.start(), &start);
320    ///
321    /// # Ok::<(), Box<dyn std::error::Error>>(())
322    /// ```
323    pub fn start(&self) -> &Coordinate<S> {
324        &self.start
325    }
326
327    /// Consumes `self` and returns the start coordinate.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// use omics_coordinate::Coordinate;
333    /// use omics_coordinate::Interval;
334    /// use omics_coordinate::system::Base;
335    /// use omics_coordinate::system::Interbase;
336    ///
337    /// //===========//
338    /// // Interbase //
339    /// //===========//
340    ///
341    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
342    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
343    /// let interval = Interval::try_new(start.clone(), end)?;
344    ///
345    /// assert_eq!(interval.into_start(), start);
346    ///
347    /// //======//
348    /// // Base //
349    /// //======//
350    ///
351    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
352    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
353    /// let interval = Interval::try_new(start.clone(), end)?;
354    ///
355    /// assert_eq!(interval.into_start(), start);
356    ///
357    /// # Ok::<(), Box<dyn std::error::Error>>(())
358    /// ```
359    pub fn into_start(self) -> Coordinate<S> {
360        self.start
361    }
362
363    /// Gets a reference to the end coordinate.
364    ///
365    /// # Examples
366    ///
367    /// ```
368    /// use omics_coordinate::Coordinate;
369    /// use omics_coordinate::Interval;
370    /// use omics_coordinate::system::Base;
371    /// use omics_coordinate::system::Interbase;
372    ///
373    /// //===========//
374    /// // Interbase //
375    /// //===========//
376    ///
377    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
378    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
379    /// let interval = Interval::try_new(start, end.clone())?;
380    ///
381    /// assert_eq!(interval.end(), &end);
382    ///
383    /// //======//
384    /// // Base //
385    /// //======//
386    ///
387    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
388    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
389    /// let interval = Interval::try_new(start, end.clone())?;
390    ///
391    /// assert_eq!(interval.end(), &end);
392    ///
393    /// # Ok::<(), Box<dyn std::error::Error>>(())
394    /// ```
395    pub fn end(&self) -> &Coordinate<S> {
396        &self.end
397    }
398
399    /// Consumes `self` and returns the end coordinate.
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// use omics_coordinate::Coordinate;
405    /// use omics_coordinate::Interval;
406    /// use omics_coordinate::system::Base;
407    /// use omics_coordinate::system::Interbase;
408    ///
409    /// //===========//
410    /// // Interbase //
411    /// //===========//
412    ///
413    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
414    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
415    /// let interval = Interval::try_new(start, end.clone())?;
416    ///
417    /// assert_eq!(interval.into_end(), end);
418    ///
419    /// //======//
420    /// // Base //
421    /// //======//
422    ///
423    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
424    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
425    /// let interval = Interval::try_new(start, end.clone())?;
426    ///
427    /// assert_eq!(interval.into_end(), end);
428    ///
429    /// # Ok::<(), Box<dyn std::error::Error>>(())
430    /// ```
431    pub fn into_end(self) -> Coordinate<S> {
432        self.end
433    }
434
435    /// Consumes `self` and returns the start and end coordinates.
436    ///
437    /// # Examples
438    ///
439    /// ```
440    /// use omics_coordinate::Coordinate;
441    /// use omics_coordinate::Interval;
442    /// use omics_coordinate::system::Base;
443    /// use omics_coordinate::system::Interbase;
444    ///
445    /// //===========//
446    /// // Interbase //
447    /// //===========//
448    ///
449    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
450    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
451    /// let interval = Interval::try_new(start.clone(), end.clone())?;
452    /// let parts = interval.into_coordinates();
453    ///
454    /// assert_eq!(parts.0, start);
455    /// assert_eq!(parts.1, end);
456    ///
457    /// //======//
458    /// // Base //
459    /// //======//
460    ///
461    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
462    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
463    /// let interval = Interval::try_new(start.clone(), end.clone())?;
464    /// let parts = interval.into_coordinates();
465    ///
466    /// assert_eq!(parts.0, start);
467    /// assert_eq!(parts.1, end);
468    ///
469    /// # Ok::<(), Box<dyn std::error::Error>>(())
470    /// ```
471    pub fn into_coordinates(self) -> (Coordinate<S>, Coordinate<S>) {
472        (self.start, self.end)
473    }
474
475    /// Returns a reference to the contig.
476    ///
477    /// # Examples
478    ///
479    /// ```
480    /// use omics_coordinate::Coordinate;
481    /// use omics_coordinate::Interval;
482    /// use omics_coordinate::system::Base;
483    /// use omics_coordinate::system::Interbase;
484    ///
485    /// //===========//
486    /// // Interbase //
487    /// //===========//
488    ///
489    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
490    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
491    /// let interval = Interval::try_new(start, end)?;
492    ///
493    /// assert_eq!(interval.contig().as_str(), "seq0");
494    ///
495    /// //======//
496    /// // Base //
497    /// //======//
498    ///
499    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
500    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
501    /// let interval = Interval::try_new(start, end)?;
502    ///
503    /// assert_eq!(interval.contig().as_str(), "seq0");
504    ///
505    /// # Ok::<(), Box<dyn std::error::Error>>(())
506    /// ```
507    pub fn contig(&self) -> &Contig {
508        self.start().contig()
509    }
510
511    /// Returns the strand.
512    ///
513    /// # Examples
514    ///
515    /// ```
516    /// use omics_coordinate::Coordinate;
517    /// use omics_coordinate::Interval;
518    /// use omics_coordinate::Strand;
519    /// use omics_coordinate::system::Base;
520    /// use omics_coordinate::system::Interbase;
521    ///
522    /// //===========//
523    /// // Interbase //
524    /// //===========//
525    ///
526    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
527    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
528    /// let interval = Interval::try_new(start, end)?;
529    ///
530    /// assert_eq!(interval.strand(), Strand::Positive);
531    ///
532    /// //======//
533    /// // Base //
534    /// //======//
535    ///
536    /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
537    /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
538    /// let interval = Interval::try_new(start, end)?;
539    ///
540    /// assert_eq!(interval.strand(), Strand::Negative);
541    ///
542    /// # Ok::<(), Box<dyn std::error::Error>>(())
543    /// ```
544    pub fn strand(&self) -> Strand {
545        self.start().strand()
546    }
547
548    /// Returns whether or not a coordinate is contained within this interval.
549    /// Notably, when checked whether coordinates are included in the interval,
550    /// both the start and end positions are considered inclusive.
551    ///
552    /// # Caution
553    ///
554    /// **This is not the method you want to use when checking if a nucleotide
555    /// or amino acid at a particular position is included in the interval. This
556    /// checks the coordinates themselves and, in-so-doing, considers both the
557    /// start and the end positions of the interval to be inclusive.
558    ///
559    /// If you'd like to check whether a particular nucleotide, amino acid, or
560    /// other entity is contained within the interval, use the
561    /// [`contains_entity()`](Interval::contains_entity) method.
562    ///
563    /// # Examples
564    ///
565    /// ```
566    /// use omics_coordinate::Coordinate;
567    /// use omics_coordinate::Interval;
568    /// use omics_coordinate::Strand;
569    /// use omics_coordinate::system::Base;
570    /// use omics_coordinate::system::Interbase;
571    ///
572    /// //===========//
573    /// // Interbase //
574    /// //===========//
575    ///
576    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
577    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
578    /// let interval = Interval::try_new(start, end)?;
579    ///
580    /// // Coordinates on the same contig, strand, and within the interval's range
581    /// // are contained within the interval.
582    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
583    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
584    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
585    ///
586    /// // Coordinates on different contigs, strands, or outside the range are
587    /// // not contained within the interval.
588    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
589    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
590    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
591    ///
592    /// //======//
593    /// // Base //
594    /// //======//
595    ///
596    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
597    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
598    /// let interval = Interval::try_new(start, end)?;
599    ///
600    /// // Coordinates on the same contig, strand, and within the interval's range
601    /// // are contained within the interval.
602    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
603    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
604    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
605    ///
606    /// // Coordinates on different contigs, strands, or outside the range are
607    /// // not contained within the interval.
608    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
609    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
610    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
611    ///
612    /// # Ok::<(), Box<dyn std::error::Error>>(())
613    /// ```
614    pub fn contains_coordinate(&self, coordinate: &crate::Coordinate<S>) -> bool {
615        if self.contig() != coordinate.contig() {
616            return false;
617        }
618
619        if self.strand() != coordinate.strand() {
620            return false;
621        }
622
623        match self.strand() {
624            Strand::Positive => {
625                self.start().position().get() <= coordinate.position().get()
626                    && self.end().position().get() >= coordinate.position().get()
627            }
628            Strand::Negative => {
629                self.start().position().get() >= coordinate.position().get()
630                    && self.end().position().get() <= coordinate.position().get()
631            }
632        }
633    }
634
635    /// Returns whether or not the entity at the in-base coordinate is
636    /// contained within this interval.
637    ///
638    /// /// # Examples
639    ///
640    /// ```
641    /// use omics_coordinate::Coordinate;
642    /// use omics_coordinate::Interval;
643    /// use omics_coordinate::Strand;
644    /// use omics_coordinate::system::Base;
645    /// use omics_coordinate::system::Interbase;
646    ///
647    /// //===========//
648    /// // Interbase //
649    /// //===========//
650    ///
651    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
652    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
653    /// let interval = Interval::try_new(start, end)?;
654    ///
655    /// // Coordinates on the same contig, strand, and within the interval's range
656    /// // are contained within the interval.
657    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
658    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
659    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
660    ///
661    /// // Coordinates on different contigs, strands, or outside the range are
662    /// // not contained within the interval.
663    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
664    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
665    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
666    ///
667    /// //======//
668    /// // Base //
669    /// //======//
670    ///
671    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
672    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
673    /// let interval = Interval::try_new(start, end)?;
674    ///
675    /// // Coordinates on the same contig, strand, and within the interval's range
676    /// // are contained within the interval.
677    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
678    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
679    /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
680    ///
681    /// // Coordinates on different contigs, strands, or outside the range are
682    /// // not contained within the interval.
683    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
684    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
685    /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
686    ///
687    /// # Ok::<(), Box<dyn std::error::Error>>(())
688    /// ```
689    pub fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool {
690        <Self as r#trait::Interval<S>>::contains_entity(self, coordinate)
691    }
692
693    /// Counts the number of entities in the interval.
694    ///
695    /// # Examples
696    ///
697    /// ```
698    /// use omics_coordinate::Coordinate;
699    /// use omics_coordinate::Interval;
700    /// use omics_coordinate::system::Base;
701    /// use omics_coordinate::system::Interbase;
702    ///
703    /// //===========//
704    /// // Interbase //
705    /// //===========//
706    ///
707    /// // Positive strand.
708    ///
709    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
710    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
711    /// let interval = Interval::try_new(start, end)?;
712    ///
713    /// assert_eq!(interval.count_entities(), 10);
714    ///
715    /// // Negative strand.
716    ///
717    /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
718    /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
719    /// let interval = Interval::try_new(start, end)?;
720    ///
721    /// assert_eq!(interval.count_entities(), 10);
722    ///
723    /// //======//
724    /// // Base //
725    /// //======//
726    ///
727    /// // Positive strand.
728    ///
729    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
730    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
731    /// let interval = Interval::try_new(start, end)?;
732    ///
733    /// assert_eq!(interval.count_entities(), 11);
734    ///
735    /// // Negative strand.
736    ///
737    /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
738    /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
739    /// let interval = Interval::try_new(start, end)?;
740    ///
741    /// assert_eq!(interval.count_entities(), 11);
742    ///
743    /// # Ok::<(), Box<dyn std::error::Error>>(())
744    /// ```
745    pub fn count_entities(&self) -> Number {
746        <Self as r#trait::Interval<S>>::count_entities(self)
747    }
748
749    /// Consumes `self` and clamps an interval by another interval.
750    ///
751    /// Clamping is an operation whereby the ends of an interval are restricted
752    /// to the range of the argument passed in with a tendency to restrict
753    /// towards the middle of the interval.
754    ///
755    /// # Summary
756    ///
757    /// * If the interval being operated on is completely contained within the
758    ///   argument interval, the interval being operated on is returned.
759    ///
760    /// ```text
761    /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
762    /// 10    11    12    13    14    15    16    17    18    19    20       |
763    ///                   ●───────────────────────● [13, 17]                 | Original Interval
764    ///       ●───────────────────────────────────────────────● [11, 19]     | Argument Interval
765    /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
766    ///                   ●───────────────────────● [13, 17]                 | Resulting Interval
767    ///
768    ///
769    /// Here, no modifications were made to the original interval, as neither
770    /// the start nor the end of the interval would be restricted by the
771    /// argument interval.
772    /// ```
773    ///
774    /// * If the argument interval is completely within the interval being
775    ///   operated on, the argument interval will clamp both sides of the
776    ///   original interval, and the argument interval will be returned.
777    /// ```text
778    /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
779    /// 10    11    12    13    14    15    16    17    18    19    20       |
780    ///       ●───────────────────────────────────────────────● [11, 19]     | Original Interval
781    ///                   ●───────────────────────● [13, 17]                 | Argument Interval
782    /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
783    ///                   ●───────────────────────● [13, 17]                 | Resulting Interval
784    ///
785    ///
786    /// Here, both the start and the end position of the original interval were
787    /// restricted by the start and end of the argument interval respectively.
788    /// ```
789    ///
790    /// * If the argument interval would restrict the length of one side of the
791    ///   subject interval on either end, that end is restricted to the argument
792    ///   interval's value, whereas the non-restricted end is the original
793    ///   interval's value.
794    ///
795    /// ```text
796    /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
797    /// 10    11    12    13    14    15    16    17    18    19    20       |
798    ///       ●───────────────────────────────────● [11, 17]                 | Original Interval
799    ///                   ●───────────────────────● [13, 17]                 | Argument Interval
800    /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
801    ///                   ●───────────────────────● [13, 17]                 | Resulting Interval
802    ///
803    ///
804    /// Here, the start of the original interval is clamped by the argument
805    /// interval's start position. However, the end position of the original
806    /// interval is not restricted by the argument interval's end position,
807    /// so it remains the same. This results in the latter half of the interval
808    /// being clamped.
809    ///
810    ///
811    /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
812    /// 10    11    12    13    14    15    16    17    18    19    20       |
813    ///                   ●───────────────────────────────────● [13, 19]     | Original Interval
814    ///                   ●───────────────────────● [13, 17]                 | Argument Interval
815    /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
816    ///                   ●───────────────────────● [13, 17]                 | Resulting Interval
817    ///
818    ///
819    /// Here, the start position of the original interval would not be
820    /// restricted by the argument interval's start position, so it remains
821    /// the same. However, the end position is clamped by the end position
822    /// of the argument interval, so the resulting end position is that of the
823    /// argument interval's end position. This results in the first half of
824    /// interval being clamped.
825    /// ```
826    ///
827    /// # Examples
828    ///
829    /// ```
830    /// use omics_coordinate::Coordinate;
831    /// use omics_coordinate::Interval;
832    /// use omics_coordinate::system::Base;
833    /// use omics_coordinate::system::Interbase;
834    ///
835    /// //===========//
836    /// // Interbase //
837    /// //===========//
838    ///
839    /// let interval = "seq0:+:10-20".parse::<Interval<Interbase>>()?;
840    /// let clamped = interval.clamp("seq0:+:5-15".parse::<Interval<Interbase>>()?)?;
841    /// assert_eq!(clamped, "seq0:+:10-15".parse::<Interval<Interbase>>()?);
842    ///
843    /// //======//
844    /// // Base //
845    /// //======//
846    ///
847    /// let interval = "seq0:-:20-10".parse::<Interval<Base>>()?;
848    /// let clamped = interval.clamp("seq0:-:25-15".parse::<Interval<Base>>()?)?;
849    /// assert_eq!(clamped, "seq0:-:20-15".parse::<Interval<Base>>()?);
850    ///
851    /// Ok::<(), Box<dyn std::error::Error>>(())
852    /// ```
853    pub fn clamp(self, interval: Interval<S>) -> Result<Interval<S>> {
854        let (start, end) = self.into_coordinates();
855        let (operand_start, operand_end) = interval.into_coordinates();
856
857        let (start_contig, start_strand, start) = start.into_parts();
858        let (end_contig, end_strand, end) = end.into_parts();
859
860        let (operand_contig, operand_strand, operand_start) = operand_start.into_parts();
861        let (_, _, operand_end) = operand_end.into_parts();
862
863        if start_contig != operand_contig {
864            return Err(Error::Clamp(ClampError::MismatchedContigs {
865                original: start_contig,
866                operand: operand_contig,
867            }));
868        }
869
870        if start_strand != operand_strand {
871            return Err(Error::Clamp(ClampError::MismatchedStrand {
872                original: start_strand,
873                operand: operand_strand,
874            }));
875        }
876
877        let (new_start, new_end) = match start_strand {
878            Strand::Positive => (max(start, operand_start), min(end, operand_end)),
879            Strand::Negative => (min(start, operand_start), max(end, operand_end)),
880        };
881
882        let start = Coordinate::<S>::new(start_contig, start_strand, new_start);
883        let end = Coordinate::<S>::new(end_contig, end_strand, new_end);
884
885        // SAFETY: both the start _and_ the end positions were originally on
886        // intervals that were valid. Since we are not breaking any rules that
887        // would make the intervals invalid in this method, this should always
888        // unwrap.
889        Ok(Self::try_new(start, end).unwrap())
890    }
891
892    /// Gets the offset of a coordinate from the start of the interval.
893    ///
894    /// If the coordinate is not contained within the interval, `None` is
895    /// returned.
896    ///
897    /// # Examples
898    ///
899    /// ```
900    /// use omics_coordinate::Coordinate;
901    /// use omics_coordinate::Interval;
902    /// use omics_coordinate::system::Base;
903    /// use omics_coordinate::system::Interbase;
904    ///
905    /// //===========//
906    /// // Interbase //
907    /// //===========//
908    ///
909    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
910    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
911    /// let interval = Interval::try_new(start, end)?;
912    ///
913    /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 15)?;
914    /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
915    ///
916    /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
917    /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
918    ///
919    /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 21)?;
920    /// assert!(interval.coordinate_offset(&query).is_none());
921    ///
922    /// //======//
923    /// // Base //
924    /// //======//
925    ///
926    /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
927    /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
928    /// let interval = Interval::try_new(start, end)?;
929    ///
930    /// let query = Coordinate::<Base>::try_new("seq0", "-", 15)?;
931    /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
932    ///
933    /// let query = Coordinate::<Base>::try_new("seq0", "-", 10)?;
934    /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
935    ///
936    /// let query = Coordinate::<Base>::try_new("seq0", "-", 9)?;
937    /// assert!(interval.coordinate_offset(&query).is_none());
938    ///
939    /// Ok::<(), Box<dyn std::error::Error>>(())
940    /// ```
941    pub fn coordinate_offset(&self, coordinate: &Coordinate<S>) -> Option<Number> {
942        if !self.contains_coordinate(coordinate) {
943            return None;
944        }
945
946        Some(
947            coordinate
948                .position()
949                .distance_unchecked(self.start().position()),
950        )
951    }
952
953    /// Returns the coordinate at the offset within the interval.
954    ///
955    /// This method only returns the coordinate if the coordinate falls within
956    /// the interval.
957    ///
958    /// # Examples
959    ///
960    /// ```
961    /// use omics_coordinate::Coordinate;
962    /// use omics_coordinate::Interval;
963    /// use omics_coordinate::system::Base;
964    /// use omics_coordinate::system::Interbase;
965    ///
966    /// //===========//
967    /// // Interbase //
968    /// //===========//
969    ///
970    /// // Positive strand.
971    ///
972    /// let interval = "seq0:+:0-1000".parse::<Interval<Interbase>>()?;
973    ///
974    /// let expected = "seq0:+:5".parse::<Coordinate<Interbase>>()?;
975    /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
976    ///
977    /// let expected = "seq0:+:1000".parse::<Coordinate<Interbase>>()?;
978    /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
979    ///
980    /// assert!(interval.coordinate_at_offset(1001).is_none());
981    ///
982    /// // Negative strand.
983    ///
984    /// let interval = "seq0:-:1000-0".parse::<Interval<Interbase>>()?;
985    ///
986    /// let expected = "seq0:-:995".parse::<Coordinate<Interbase>>()?;
987    /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
988    ///
989    /// let expected = "seq0:-:0".parse::<Coordinate<Interbase>>()?;
990    /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
991    ///
992    /// assert_eq!(interval.coordinate_at_offset(1001), None);
993    ///
994    /// //======//
995    /// // Base //
996    /// //======//
997    ///
998    /// // Positive strand.
999    ///
1000    /// let interval = "seq0:+:1-1000".parse::<Interval<Base>>()?;
1001    ///
1002    /// let expected = "seq0:+:6".parse::<Coordinate<Base>>()?;
1003    /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1004    ///
1005    /// let expected = "seq0:+:1000".parse::<Coordinate<Base>>()?;
1006    /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1007    ///
1008    /// assert!(interval.coordinate_at_offset(1000).is_none());
1009    ///
1010    /// // Negative strand.
1011    ///
1012    /// let interval = "seq0:-:1000-1".parse::<Interval<Base>>()?;
1013    ///
1014    /// let expected = "seq0:-:995".parse::<Coordinate<Base>>()?;
1015    /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1016    ///
1017    /// let expected = "seq0:-:1".parse::<Coordinate<Base>>()?;
1018    /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1019    ///
1020    /// assert_eq!(interval.coordinate_at_offset(1000), None);
1021    ///
1022    /// Ok::<(), Box<dyn std::error::Error>>(())
1023    /// ```
1024    pub fn coordinate_at_offset(&self, offset: Number) -> Option<Coordinate<S>> {
1025        let coordinate = self.start().clone().move_forward(offset)?;
1026
1027        match self.contains_coordinate(&coordinate) {
1028            true => Some(coordinate),
1029            false => None,
1030        }
1031    }
1032
1033    /// Reverse complements the interval, meaning that:
1034    ///
1035    /// * the start and end positions are swapped, and
1036    /// * the strand is swapped.
1037    ///
1038    /// # Examples
1039    ///
1040    /// ```
1041    /// use omics_coordinate::Coordinate;
1042    /// use omics_coordinate::Interval;
1043    /// use omics_coordinate::system::Base;
1044    /// use omics_coordinate::system::Interbase;
1045    ///
1046    /// //===========//
1047    /// // Interbase //
1048    /// //===========//
1049    ///
1050    /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
1051    /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
1052    /// let original = Interval::try_new(start, end)?;
1053    ///
1054    /// let complemented = original.clone().reverse_complement();
1055    /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Interbase>>()?);
1056    ///
1057    /// let recomplemented = complemented.reverse_complement();
1058    /// assert_eq!(recomplemented, original);
1059    ///
1060    /// //======//
1061    /// // Base //
1062    /// //======//
1063    ///
1064    /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
1065    /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
1066    /// let original = Interval::try_new(start, end)?;
1067    ///
1068    /// let complemented = original.clone().reverse_complement();
1069    /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Base>>()?);
1070    ///
1071    /// let recomplemented = complemented.reverse_complement();
1072    /// assert_eq!(recomplemented, original);
1073    ///
1074    /// Ok::<(), Box<dyn std::error::Error>>(())
1075    /// ```
1076    pub fn reverse_complement(self) -> super::Interval<S> {
1077        let (start, end) = self.into_coordinates();
1078        // SAFETY: because (a) intervals are inclusive of both of their start
1079        // and end coordinates, (b) all positions can be represented on the
1080        // opposite strand, and (c) swapping the start and end while also
1081        // swapping strand will always create the correct directionality, this will
1082        // always unwrap.
1083        Interval::try_new(end.swap_strand(), start.swap_strand()).unwrap()
1084    }
1085}
1086
1087////////////////////////////////////////////////////////////////////////////////////////
1088// Trait implementations
1089////////////////////////////////////////////////////////////////////////////////////////
1090
1091impl<S: System> std::fmt::Display for Interval<S>
1092where
1093    Interval<S>: r#trait::Interval<S>,
1094    Position<S>: position::r#trait::Position<S>,
1095{
1096    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1097        write!(
1098            f,
1099            "{}:{}:{}-{}",
1100            self.contig(),
1101            self.strand(),
1102            self.start().position(),
1103            self.end().position(),
1104        )
1105    }
1106}
1107
1108impl<S: System> std::str::FromStr for Interval<S>
1109where
1110    Interval<S>: r#trait::Interval<S>,
1111    Position<S>: position::r#trait::Position<S>,
1112{
1113    type Err = Error;
1114
1115    fn from_str(s: &str) -> Result<Self> {
1116        let parts = s.split(':').collect::<Vec<_>>();
1117
1118        if parts.len() != 3 {
1119            return Err(Error::Parse(ParseError::Format {
1120                value: s.to_string(),
1121            }));
1122        }
1123
1124        let mut parts = parts.iter();
1125
1126        // SAFETY: we checked that there are three parts above. Given that we
1127        // haven't pulled anything from the iterator, we can always safely
1128        // unwrap this.
1129        let contig = parts
1130            .next()
1131            .unwrap()
1132            .parse::<Contig>()
1133            // SAFETY: this is infallible.
1134            .unwrap();
1135
1136        // SAFETY: we checked that there are three parts above. Given that we
1137        // have only pulled one item from the iterator, we can always safely
1138        // unwrap this.
1139        let strand = parts
1140            .next()
1141            .unwrap()
1142            .parse::<Strand>()
1143            .map_err(Error::Strand)?;
1144
1145        // SAFETY: we checked that there are three parts above. Given that we
1146        // have only pulled two items from the iterator, we can always safely
1147        // unwrap this.
1148        let positions = parts.next().unwrap().split('-').collect::<Vec<_>>();
1149
1150        if positions.len() != 2 {
1151            return Err(Error::Parse(ParseError::Format {
1152                value: s.to_string(),
1153            }));
1154        }
1155
1156        // SAFETY: we just ensured that two parts exist, so the direct
1157        // indexing of the slice for both index zero and one will never
1158        // fail.
1159        let start = positions[0]
1160            .parse::<Position<S>>()
1161            .map_err(Error::Position)?;
1162        let end = positions[1]
1163            .parse::<Position<S>>()
1164            .map_err(Error::Position)?;
1165
1166        Interval::try_new(
1167            Coordinate::new(contig.clone(), strand, start),
1168            Coordinate::new(contig, strand, end),
1169        )
1170    }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175    use super::*;
1176    use crate::position::Error as PositionError;
1177    use crate::position::Number;
1178    use crate::position::ParseError as PositionParseError;
1179    use crate::strand::Error as StrandError;
1180    use crate::strand::ParseError as StrandParseError;
1181    use crate::system::Interbase;
1182
1183    #[test]
1184    fn valid() {
1185        let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1186        let end = "seq0:+:9".parse::<Coordinate<Interbase>>().unwrap();
1187
1188        let interval = Interval::try_new(start, end).unwrap();
1189        assert_eq!(interval.count_entities(), 9);
1190    }
1191
1192    #[test]
1193    fn nonsensical_mismatched_contigs() {
1194        let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1195        let end = "seq1:+:10".parse::<Coordinate<Interbase>>().unwrap();
1196
1197        let err = Interval::try_new(start, end).unwrap_err();
1198        assert_eq!(
1199            err,
1200            Error::Nonsensical(NonsensicalError::MismatchedContigs {
1201                start: Contig::new("seq0"),
1202                end: Contig::new("seq1")
1203            })
1204        );
1205
1206        assert_eq!(
1207            err.to_string(),
1208            "nonsensical interval: mismatched contigs for coordinates: `seq0` and `seq1`"
1209        );
1210    }
1211
1212    #[test]
1213    fn nonsensical_mismatched_strands() {
1214        let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1215        let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1216
1217        let err = Interval::try_new(start, end).unwrap_err();
1218        assert_eq!(
1219            err,
1220            Error::Nonsensical(NonsensicalError::MismatchedStrands {
1221                start: Strand::Positive,
1222                end: Strand::Negative
1223            })
1224        );
1225
1226        assert_eq!(
1227            err.to_string(),
1228            "nonsensical interval: mismatched strands for coordinates: `+` and `-`"
1229        );
1230    }
1231
1232    #[test]
1233    fn nonsensical_start_greater_than_end() {
1234        //===================//
1235        // Positive stranded //
1236        //===================//
1237
1238        let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1239        let end = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1240
1241        let err = Interval::try_new(start, end).unwrap_err();
1242
1243        assert_eq!(
1244            err,
1245            Error::Nonsensical(NonsensicalError::NegativelySized {
1246                start: 10,
1247                end: 0,
1248                strand: Strand::Positive
1249            })
1250        );
1251
1252        assert_eq!(
1253            err.to_string(),
1254            "nonsensical interval: negatively sized interval: start is `10`, end is `0`, strand \
1255             is `+`"
1256        );
1257
1258        //===================//
1259        // Negative stranded //
1260        //===================//
1261
1262        let start = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1263        let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1264
1265        let err = Interval::try_new(start, end).unwrap_err();
1266
1267        assert_eq!(
1268            err,
1269            Error::Nonsensical(NonsensicalError::NegativelySized {
1270                start: 0,
1271                end: 10,
1272                strand: Strand::Negative
1273            })
1274        );
1275
1276        assert_eq!(
1277            err.to_string(),
1278            "nonsensical interval: negatively sized interval: start is `0`, end is `10`, strand \
1279             is `-`"
1280        );
1281    }
1282
1283    #[test]
1284    fn zero_sized() {
1285        let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1286        let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1287
1288        let interval = Interval::try_new(start.clone(), end.clone()).unwrap();
1289        assert!(interval.end().position().get() - interval.start().position().get() == 0);
1290        assert!(interval.contains_coordinate(&start));
1291        assert!(interval.contains_coordinate(&end));
1292        assert!(
1293            !interval.contains_coordinate(&"seq0:+:9".parse::<Coordinate<Interbase>>().unwrap())
1294        );
1295        assert!(
1296            !interval.contains_coordinate(&"seq0:+:11".parse::<Coordinate<Interbase>>().unwrap())
1297        );
1298    }
1299
1300    #[test]
1301    fn positive_strand_clamp() {
1302        let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1303
1304        assert_eq!(
1305            interval
1306                .clone()
1307                .clamp("seq1:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1308            Err(Error::Clamp(ClampError::MismatchedContigs {
1309                original: Contig::new("seq0"),
1310                operand: Contig::new("seq1")
1311            }))
1312        );
1313
1314        assert_eq!(
1315            interval
1316                .clone()
1317                .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1318            Err(Error::Clamp(ClampError::MismatchedStrand {
1319                original: Strand::Positive,
1320                operand: Strand::Negative
1321            }))
1322        );
1323
1324        assert_eq!(
1325            interval
1326                .clone()
1327                .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap())
1328                .unwrap(),
1329            "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap()
1330        );
1331
1332        assert_eq!(
1333            interval
1334                .clone()
1335                .clamp("seq0:+:1250-3000".parse::<Interval<Interbase>>().unwrap())
1336                .unwrap(),
1337            "seq0:+:1250-2000".parse::<Interval<Interbase>>().unwrap()
1338        );
1339
1340        assert_eq!(
1341            interval
1342                .clone()
1343                .clamp("seq0:+:0-1750".parse::<Interval<Interbase>>().unwrap())
1344                .unwrap(),
1345            "seq0:+:1000-1750".parse::<Interval<Interbase>>().unwrap()
1346        );
1347
1348        assert_eq!(
1349            interval
1350                .clone()
1351                .clamp("seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap())
1352                .unwrap(),
1353            "seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap()
1354        );
1355    }
1356
1357    #[test]
1358    fn negative_strand_clamp() {
1359        let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1360
1361        assert_eq!(
1362            interval
1363                .clone()
1364                .clamp("seq1:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1365            Err(Error::Clamp(ClampError::MismatchedContigs {
1366                original: Contig::new("seq0"),
1367                operand: Contig::new("seq1")
1368            }))
1369        );
1370
1371        assert_eq!(
1372            interval
1373                .clone()
1374                .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1375            Err(Error::Clamp(ClampError::MismatchedStrand {
1376                original: Strand::Negative,
1377                operand: Strand::Positive
1378            }))
1379        );
1380
1381        assert_eq!(
1382            interval
1383                .clone()
1384                .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap())
1385                .unwrap(),
1386            "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap()
1387        );
1388
1389        assert_eq!(
1390            interval
1391                .clone()
1392                .clamp("seq0:-:3000-1250".parse::<Interval<Interbase>>().unwrap())
1393                .unwrap(),
1394            "seq0:-:2000-1250".parse::<Interval<Interbase>>().unwrap()
1395        );
1396
1397        assert_eq!(
1398            interval
1399                .clone()
1400                .clamp("seq0:-:1750-0".parse::<Interval<Interbase>>().unwrap())
1401                .unwrap(),
1402            "seq0:-:1750-1000".parse::<Interval<Interbase>>().unwrap()
1403        );
1404
1405        assert_eq!(
1406            interval
1407                .clone()
1408                .clamp("seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap())
1409                .unwrap(),
1410            "seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap()
1411        );
1412    }
1413
1414    #[test]
1415    fn positive_strand_offset() {
1416        let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1417
1418        // Mismatched contigs means the interval does not contain the coordinate.
1419        let coordinate = "seq1:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1420        assert!(interval.coordinate_offset(&coordinate).is_none());
1421
1422        // Mismatched strands means the interval does not contain the coordinate.
1423        let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1424        assert!(interval.coordinate_offset(&coordinate).is_none());
1425
1426        // Contained within.
1427        let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1428        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1429
1430        let coordinate = "seq0:+:2000".parse::<Coordinate<Interbase>>().unwrap();
1431        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1432
1433        // Just outside of range.
1434        let coordinate = "seq0:+:999".parse::<Coordinate<Interbase>>().unwrap();
1435        assert!(interval.coordinate_offset(&coordinate).is_none());
1436
1437        let coordinate = "seq0:+:2001".parse::<Coordinate<Interbase>>().unwrap();
1438        assert!(interval.coordinate_offset(&coordinate).is_none());
1439    }
1440
1441    #[test]
1442    fn negative_strand_offset() {
1443        let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1444
1445        // Mismatched contigs means the interval does not contain the coordinate.
1446        let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1447        assert!(interval.coordinate_offset(&coordinate).is_none());
1448
1449        // Mismatched strands means the interval does not contain the coordinate.
1450        let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1451        assert!(interval.coordinate_offset(&coordinate).is_none());
1452
1453        // Contained within.
1454        let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1455        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1456
1457        let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1458        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1459
1460        // Just outside of range.
1461        let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1462        assert!(interval.coordinate_offset(&coordinate).is_none());
1463
1464        let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1465        assert!(interval.coordinate_offset(&coordinate).is_none());
1466    }
1467
1468    #[test]
1469    fn len() {
1470        assert_eq!(
1471            "seq0:+:0-1000"
1472                .parse::<Interval<Interbase>>()
1473                .unwrap()
1474                .count_entities(),
1475            1000
1476        );
1477
1478        assert_eq!(
1479            "seq0:-:1000-0"
1480                .parse::<Interval<Interbase>>()
1481                .unwrap()
1482                .count_entities(),
1483            1000
1484        );
1485        let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1486
1487        // Mismatched contigs means the interval does not contain the coordinate.
1488        let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1489        assert!(interval.coordinate_offset(&coordinate).is_none());
1490
1491        // Mismatched strands means the interval does not contain the coordinate.
1492        let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1493        assert!(interval.coordinate_offset(&coordinate).is_none());
1494
1495        // Contained within.
1496        let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1497        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1498
1499        let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1500        assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1501
1502        // Just outside of range.
1503        let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1504        assert!(interval.coordinate_offset(&coordinate).is_none());
1505
1506        let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1507        assert!(interval.coordinate_offset(&coordinate).is_none());
1508    }
1509
1510    #[test]
1511    fn parse() {
1512        let value = format!("seq0:+:0-{}", Number::MAX);
1513        let interval = value.parse::<Interval<Interbase>>().unwrap();
1514        assert_eq!(interval.contig().as_str(), "seq0");
1515        assert_eq!(interval.strand(), Strand::Positive);
1516        assert_eq!(interval.start().position().get(), 0);
1517        assert_eq!(interval.end().position().get(), Number::MAX);
1518
1519        let value = format!("seq0:-:{}-0", Number::MAX);
1520        let interval = value.parse::<Interval<Interbase>>().unwrap();
1521        assert_eq!(interval.contig().as_str(), "seq0");
1522        assert_eq!(interval.strand(), Strand::Negative);
1523        assert_eq!(interval.start().position().get(), Number::MAX);
1524        assert_eq!(interval.end().position().get(), 0);
1525    }
1526
1527    #[test]
1528    fn parse_error() {
1529        let err = "1".parse::<Interval<Interbase>>().unwrap_err();
1530        assert_eq!(
1531            err,
1532            Error::Parse(ParseError::Format {
1533                value: String::from("1")
1534            })
1535        );
1536
1537        let err = "1-1000".parse::<Interval<Interbase>>().unwrap_err();
1538        assert_eq!(
1539            err,
1540            Error::Parse(ParseError::Format {
1541                value: String::from("1-1000")
1542            })
1543        );
1544
1545        let err = "seq0:".parse::<Interval<Interbase>>().unwrap_err();
1546        assert_eq!(
1547            err,
1548            Error::Parse(ParseError::Format {
1549                value: String::from("seq0:")
1550            })
1551        );
1552
1553        let err = "seq0:0-".parse::<Interval<Interbase>>().unwrap_err();
1554        assert_eq!(
1555            err,
1556            Error::Parse(ParseError::Format {
1557                value: String::from("seq0:0-")
1558            })
1559        );
1560
1561        let err = "seq0:0-10000:".parse::<Interval<Interbase>>().unwrap_err();
1562        assert_eq!(
1563            err,
1564            Error::Strand(StrandError::Parse(StrandParseError::Invalid {
1565                value: String::from("0-10000")
1566            }))
1567        );
1568
1569        let err = "seq0:+".parse::<Interval<Interbase>>().unwrap_err();
1570        assert_eq!(
1571            err,
1572            Error::Parse(ParseError::Format {
1573                value: String::from("seq0:+")
1574            })
1575        );
1576
1577        let err = "seq0:+:0".parse::<Interval<Interbase>>().unwrap_err();
1578        assert_eq!(
1579            err,
1580            Error::Parse(ParseError::Format {
1581                value: String::from("seq0:+:0")
1582            })
1583        );
1584
1585        let err = "seq0:+:0-".parse::<Interval<Interbase>>().unwrap_err();
1586        assert!(matches!(
1587            err,
1588            Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1589        ));
1590
1591        let err = "seq0:+:0-$".parse::<Interval<Interbase>>().unwrap_err();
1592        assert!(matches!(
1593            err,
1594            Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1595        ));
1596    }
1597
1598    #[test]
1599    fn to_string() {
1600        // Positive-stranded interval
1601        let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1602        let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1603        let interval = Interval::try_new(start, end).unwrap();
1604
1605        assert_eq!(interval.to_string(), "seq0:+:0-10");
1606
1607        // Negative-stranded interval
1608        let start = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1609        let end = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1610        let interval = Interval::try_new(start, end).unwrap();
1611
1612        assert_eq!(interval.to_string(), "seq0:-:10-0");
1613    }
1614}