omics_coordinate/interval/
interbase.rs

1//! Interbase intervals.
2
3use crate::Strand;
4use crate::base;
5use crate::interbase::Coordinate;
6use crate::interval::Number;
7use crate::interval::r#trait;
8use crate::system::Base;
9use crate::system::Interbase;
10
11////////////////////////////////////////////////////////////////////////////////////////
12// Intervals
13////////////////////////////////////////////////////////////////////////////////////////
14
15/// An interbase interval.
16///
17/// Interbase intervals consist of two interbase positions. The range is
18/// represented by the interval `[start, end]`.
19///
20/// It is worth noting that, historically, the translation of interbase
21/// positions to the nucleotide encoded in the range have included a heuristic
22/// whereby the following nucleotide was associated with the interbase position.
23/// This is why interbase intervals are often described as _exclusively_ bounded
24/// at the end, as the nucleotide _following_ the interbase position is not
25/// included in the range (and, generally, what one is most interested in is the
26/// nucleotides or other entities contained therein).
27///
28/// Because this crate does not co-mingle the idea of interbase and in-base
29/// representations, no such gymnastics are required.
30pub type Interval = crate::Interval<Interbase>;
31
32impl Interval {
33    /// Checks whether the interbase interval contains the entity _after_ the
34    /// specified interbase coordinate.
35    ///
36    /// This method returns an [`Option`] because the next coordinate may or may
37    /// not be a valid position—if you'd like to handle that separately, you can
38    /// do so with the check on the option.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// use omics_coordinate::Coordinate;
44    /// use omics_coordinate::Interval;
45    /// use omics_coordinate::system::Interbase;
46    ///
47    /// let interval = "seq0:+:10-1000".parse::<Interval<Interbase>>()?;
48    ///
49    /// assert!(
50    ///     !interval
51    ///         .contains_next_entity("seq0:+:9".parse::<Coordinate<Interbase>>()?)
52    ///         .unwrap()
53    /// );
54    ///
55    /// assert!(
56    ///     interval
57    ///         .contains_next_entity("seq0:+:10".parse::<Coordinate<Interbase>>()?)
58    ///         .unwrap()
59    /// );
60    ///
61    /// assert!(
62    ///     interval
63    ///         .contains_next_entity("seq0:+:999".parse::<Coordinate<Interbase>>()?)
64    ///         .unwrap()
65    /// );
66    ///
67    /// assert!(
68    ///     !interval
69    ///         .contains_next_entity("seq0:+:1000".parse::<Coordinate<Interbase>>()?)
70    ///         .unwrap()
71    /// );
72    ///
73    /// # Ok::<(), Box<dyn std::error::Error>>(())
74    /// ```
75    pub fn contains_next_entity(&self, coordinate: Coordinate) -> Option<bool> {
76        let coordinate = coordinate.nudge_forward()?;
77        Some(self.contains_entity(&coordinate))
78    }
79
80    /// Checks whether the interbase interval contains the entity _before_ the
81    /// specified interbase coordinate.
82    ///
83    /// This method returns an [`Option`] because the next coordinate may or may
84    /// not be a valid position—if you'd like to handle that separately, you can
85    /// do so with the check on the option.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// use omics_coordinate::Coordinate;
91    /// use omics_coordinate::Interval;
92    /// use omics_coordinate::system::Interbase;
93    ///
94    /// let interval = "seq0:+:10-1000".parse::<Interval<Interbase>>()?;
95    ///
96    /// assert!(
97    ///     !interval
98    ///         .contains_prev_entity("seq0:+:10".parse::<Coordinate<Interbase>>()?)
99    ///         .unwrap()
100    /// );
101    ///
102    /// assert!(
103    ///     interval
104    ///         .contains_prev_entity("seq0:+:11".parse::<Coordinate<Interbase>>()?)
105    ///         .unwrap()
106    /// );
107    ///
108    /// assert!(
109    ///     interval
110    ///         .contains_prev_entity("seq0:+:1000".parse::<Coordinate<Interbase>>()?)
111    ///         .unwrap()
112    /// );
113    ///
114    /// assert!(
115    ///     !interval
116    ///         .contains_prev_entity("seq0:+:1001".parse::<Coordinate<Interbase>>()?)
117    ///         .unwrap()
118    /// );
119    ///
120    /// # Ok::<(), Box<dyn std::error::Error>>(())
121    /// ```
122    pub fn contains_prev_entity(&self, coordinate: Coordinate) -> Option<bool> {
123        let coordinate = coordinate.nudge_backward()?;
124        Some(self.contains_entity(&coordinate))
125    }
126
127    /// Consumes `self` and returns the equivalent in-base interval.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use omics_coordinate::Interval;
133    /// use omics_coordinate::system::Base;
134    /// use omics_coordinate::system::Interbase;
135    ///
136    /// let interval = "seq0:+:0-1000".parse::<Interval<Interbase>>()?;
137    /// let equivalent = interval.into_equivalent_base();
138    ///
139    /// assert_eq!("seq0:+:1-1000".parse::<Interval<Base>>()?, equivalent);
140    ///
141    /// # Ok::<(), Box<dyn std::error::Error>>(())
142    /// ```
143    pub fn into_equivalent_base(self) -> crate::interval::Interval<Base> {
144        let (start, end) = self.into_coordinates();
145
146        // SAFETY: given the rules of how interbase and base coordinate systems
147        // work, this should always unwrap.
148        let start = start.nudge_forward().unwrap();
149        let end = end.nudge_backward().unwrap();
150
151        // SAFETY: since this was previously a valid interbase interval, as long
152        // as the two nudges above succeed, this should always unwrap.
153        crate::interval::Interval::<Base>::try_new(start, end).unwrap()
154    }
155}
156
157////////////////////////////////////////////////////////////////////////////////////////
158// Trait implementations
159////////////////////////////////////////////////////////////////////////////////////////
160
161impl r#trait::Interval<Interbase> for Interval {
162    fn contains_entity(&self, coordinate: &base::Coordinate) -> bool {
163        if self.contig() != coordinate.contig() {
164            return false;
165        }
166
167        if self.strand() != coordinate.strand() {
168            return false;
169        }
170
171        match self.strand() {
172            Strand::Positive => {
173                self.start().position().get() < coordinate.position().get()
174                    && self.end().position().get() >= coordinate.position().get()
175            }
176            Strand::Negative => {
177                self.start().position().get() >= coordinate.position().get()
178                    && self.end().position().get() < coordinate.position().get()
179            }
180        }
181    }
182
183    /// Gets the number of entities within the interval.
184    fn count_entities(&self) -> Number {
185        self.start()
186            .position()
187            .distance_unchecked(self.end().position())
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194    use crate::Coordinate;
195    use crate::system::Base;
196
197    fn create_coordinate(
198        contig: &str,
199        strand: &str,
200        position: Number,
201    ) -> crate::Coordinate<Interbase> {
202        Coordinate::try_new(contig, strand, position).unwrap()
203    }
204
205    fn create_base_coordinate(
206        contig: &str,
207        strand: &str,
208        position: Number,
209    ) -> crate::Coordinate<Base> {
210        Coordinate::try_new(contig, strand, position).unwrap()
211    }
212
213    fn create_interval(contig: &str, strand: &str, start: Number, end: Number) -> Interval {
214        Interval::try_new(
215            create_coordinate(contig, strand, start),
216            create_coordinate(contig, strand, end),
217        )
218        .unwrap()
219    }
220
221    #[test]
222    fn contains() {
223        let interval = create_interval("seq0", "+", 10, 20);
224
225        // An interval contains the coordinate representing its start position.
226        assert!(interval.contains_coordinate(interval.start()));
227
228        // An interval contains the coordinate representing its end position.
229        assert!(interval.contains_coordinate(interval.end()));
230
231        // An interval contains a coordinate in the middle of its range.
232        assert!(interval.contains_coordinate(&create_coordinate("seq0", "+", 15)));
233
234        // An interval does not contain the position _before_ its start
235        // position.
236        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 9)));
237
238        // An interval does not contain the position _after_ its end position.
239        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 21)));
240
241        // An interval does not contain a random other position.
242        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 1000)));
243
244        // An interval does not contain a coordinate on another contig.
245        assert!(!interval.contains_coordinate(&create_coordinate("seq1", "+", 15)));
246
247        // An interval does not contain a coordinate on another strand.
248        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 15)));
249
250        let interval = create_interval("seq0", "-", 20, 10);
251
252        // An interval contains the coordinate representing its start position.
253        assert!(interval.contains_coordinate(interval.start()));
254
255        // An interval contains the coordinate representing its end position.
256        assert!(interval.contains_coordinate(interval.end()));
257
258        // An interval contains a coordinate in the middle of its range.
259        assert!(interval.contains_coordinate(&create_coordinate("seq0", "-", 15)));
260
261        // An interval does not contain the position _before_ its start
262        // position.
263        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 21)));
264
265        // An interval does not contain the position _after_ its end position.
266        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 9)));
267
268        // An interval does not contain a random other position.
269        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 0)));
270
271        // An interval does not contain a coordinate on another contig.
272        assert!(!interval.contains_coordinate(&create_coordinate("seq1", "-", 15)));
273
274        // An interval does not contain a coordinate on another strand.
275        assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 15)));
276    }
277
278    #[test]
279    fn contains_entity() {
280        let interval = create_interval("seq0", "+", 10, 20);
281
282        // An interbase interval does not contain the entity at the half-step
283        // _before_ its start position when on the positive strand.
284        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 10)));
285
286        // An interbase interval does contain the entity at the half-step
287        // _after_ its start position when on the positive strand.
288        assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 11)));
289
290        // An interbase interval does contain the entity at the half-step
291        // _before_ its end position when on the positive strand.
292        assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 20)));
293
294        // An interbase interval does not contain the entity at the half-step
295        // _after_ its end position when on the positive strand.
296        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 21)));
297
298        // An interbase interval does contain an entity midway through its range
299        // on the positive strand.
300        assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 15)));
301
302        // An interbase interval does not contain an entity on a different
303        // contig on the positive strand.
304        assert!(!interval.contains_entity(&create_base_coordinate("seq1", "+", 15)));
305
306        // An interbase interval does not contain an entity on a different
307        // strand on the positive strand.
308        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 15)));
309
310        let interval = create_interval("seq0", "-", 20, 10);
311
312        // An interbase interval does not contain the entity at the half-step
313        // _before_ its start position when on the negative strand.
314        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 21)));
315
316        // An interbase interval does contain the entity at the half-step
317        // _after_ its start position when on the negative strand.
318        assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 20)));
319
320        // An interbase interval does contain the entity at the half-step
321        // _before_ its end position when on the negative strand.
322        assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 11)));
323
324        // An interbase interval does not contain the entity at the half-step
325        // _after_ its end position when on the negative strand.
326        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 10)));
327
328        // An interbase interval does contain an entity midway through its range
329        // on the negative strand.
330        assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 15)));
331
332        // An interbase interval does not contain an entity on a different
333        // contig on the negative strand.
334        assert!(!interval.contains_entity(&create_base_coordinate("seq1", "-", 15)));
335
336        // An interbase interval does not contain an entity on a different
337        // strand on the negative strand.
338        assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 15)));
339    }
340
341    #[test]
342    fn contains_next_entity() {
343        let interval = create_interval("seq0", "+", 10, 20);
344
345        // An interbase interval does not contain the next entity after its start
346        // position is moved backwards one on the positive strand.
347        assert!(
348            !interval
349                .contains_next_entity(create_coordinate("seq0", "+", 9))
350                .unwrap()
351        );
352
353        // An interbase interval does contain the next entity after its start
354        // position on the positive strand.
355        assert!(
356            interval
357                .contains_next_entity(create_coordinate("seq0", "+", 10))
358                .unwrap()
359        );
360
361        // An interbase interval does contain the next entity after its end
362        // position is moved backwards one on the positive strand.
363        assert!(
364            interval
365                .contains_next_entity(create_coordinate("seq0", "+", 19))
366                .unwrap()
367        );
368
369        // An interbase interval does not contain the next entity after its end
370        // position on the positive strand.
371        assert!(
372            !interval
373                .contains_next_entity(create_coordinate("seq0", "+", 20))
374                .unwrap()
375        );
376
377        // An interbase interval does contain the next entity after a position
378        // in the middle of its range.
379        assert!(
380            interval
381                .contains_next_entity(create_coordinate("seq0", "+", 15))
382                .unwrap()
383        );
384
385        // An interbase interval does not contain the next entity for a
386        // coordinate on a different contig.
387        assert!(
388            !interval
389                .contains_next_entity(create_coordinate("seq1", "+", 15))
390                .unwrap()
391        );
392
393        // An interbase interval does not contain the next entity for a
394        // coordinate on a different strand.
395        assert!(
396            !interval
397                .contains_next_entity(create_coordinate("seq0", "-", 15))
398                .unwrap()
399        );
400
401        let interval = create_interval("seq0", "-", 20, 10);
402
403        // An interbase interval does not contain the next entity after its start
404        // position is moved backwards one on the negative strand.
405        assert!(
406            !interval
407                .contains_next_entity(create_coordinate("seq0", "-", 21))
408                .unwrap()
409        );
410
411        // An interbase interval does contain the next entity after its start
412        // position on the negative strand.
413        assert!(
414            interval
415                .contains_next_entity(create_coordinate("seq0", "-", 20))
416                .unwrap()
417        );
418
419        // An interbase interval does contain the next entity after its end
420        // position is moved backwards on the negative strand.
421        assert!(
422            interval
423                .contains_next_entity(create_coordinate("seq0", "-", 11))
424                .unwrap()
425        );
426
427        // An interbase interval does not contain the next entity after its end
428        // position on the negative strand.
429        assert!(
430            !interval
431                .contains_next_entity(create_coordinate("seq0", "-", 10))
432                .unwrap()
433        );
434
435        // An interbase interval does contain the next entity after a position
436        // in the middle of its range.
437        assert!(
438            interval
439                .contains_next_entity(create_coordinate("seq0", "-", 15))
440                .unwrap()
441        );
442
443        // An interbase interval does not contain the next entity for a
444        // coordinate on a different contig.
445        assert!(
446            !interval
447                .contains_next_entity(create_coordinate("seq1", "-", 15))
448                .unwrap()
449        );
450
451        // An interbase interval does not contain the next entity for a
452        // coordinate on a different strand.
453        assert!(
454            !interval
455                .contains_next_entity(create_coordinate("seq0", "+", 15))
456                .unwrap()
457        );
458
459        let interval = create_interval("seq0", "+", Number::MAX - 10, Number::MAX);
460
461        // This position should fail to bump forward a half-step, so the entire
462        // operation should return a [`None`].
463        assert!(
464            interval
465                .contains_next_entity(create_coordinate("seq0", "+", Number::MAX))
466                .is_none()
467        );
468
469        let interval = create_interval("seq0", "-", 10, 0);
470
471        // This position should fail to bump forward a half-step, so the entire
472        // operation should return a [`None`].
473        assert!(
474            interval
475                .contains_next_entity(create_coordinate("seq0", "-", 0))
476                .is_none()
477        );
478    }
479
480    #[test]
481    fn contains_prev_entity() {
482        let interval = create_interval("seq0", "+", 10, 20);
483
484        // An interbase interval does not contain the previous entity before its start
485        // position on the positive strand.
486        assert!(
487            !interval
488                .contains_prev_entity(create_coordinate("seq0", "+", 10))
489                .unwrap()
490        );
491
492        // An interbase interval does contain the previous entity before its start
493        // position is moved forward by one on the positive strand.
494        assert!(
495            interval
496                .contains_prev_entity(create_coordinate("seq0", "+", 11))
497                .unwrap()
498        );
499
500        // An interbase interval does contain the previous entity before its end
501        // position on the positive strand.
502        assert!(
503            interval
504                .contains_prev_entity(create_coordinate("seq0", "+", 20))
505                .unwrap()
506        );
507
508        // An interbase interval does not contain the previous entity before its end
509        // position is moved forward by one on the positive strand.
510        assert!(
511            !interval
512                .contains_prev_entity(create_coordinate("seq0", "+", 21))
513                .unwrap()
514        );
515
516        // An interbase interval does contain the previous entity before a position
517        // in the middle of its range.
518        assert!(
519            interval
520                .contains_prev_entity(create_coordinate("seq0", "+", 15))
521                .unwrap()
522        );
523
524        // An interbase interval does not contain the previous entity for a
525        // coordinate on a different contig.
526        assert!(
527            !interval
528                .contains_prev_entity(create_coordinate("seq1", "+", 15))
529                .unwrap()
530        );
531
532        // An interbase interval does not contain the previous entity for a
533        // coordinate on a different strand.
534        assert!(
535            !interval
536                .contains_prev_entity(create_coordinate("seq0", "-", 15))
537                .unwrap()
538        );
539
540        let interval = create_interval("seq0", "-", 20, 10);
541
542        // An interbase interval does not contain the previous entity before its start
543        // position on the negative strand.
544        assert!(
545            !interval
546                .contains_prev_entity(create_coordinate("seq0", "-", 20))
547                .unwrap()
548        );
549
550        // An interbase interval does contain the previous entity before its start
551        // position is moved forward by one on the negative strand.
552        assert!(
553            interval
554                .contains_prev_entity(create_coordinate("seq0", "-", 19))
555                .unwrap()
556        );
557
558        // An interbase interval does contain the previous entity before its end
559        // position on the negative strand.
560        assert!(
561            interval
562                .contains_prev_entity(create_coordinate("seq0", "-", 10))
563                .unwrap()
564        );
565
566        // An interbase interval does not contain the previous entity before its end
567        // position is moved forward by one on the negative strand.
568        assert!(
569            !interval
570                .contains_prev_entity(create_coordinate("seq0", "-", 9))
571                .unwrap()
572        );
573
574        // An interbase interval does contain the previous entity before a position
575        // in the middle of its range.
576        assert!(
577            interval
578                .contains_prev_entity(create_coordinate("seq0", "-", 15))
579                .unwrap()
580        );
581
582        // An interbase interval does not contain the previous entity for a
583        // coordinate on a different contig.
584        assert!(
585            !interval
586                .contains_prev_entity(create_coordinate("seq1", "-", 15))
587                .unwrap()
588        );
589
590        // An interbase interval does not contain the previous entity for a
591        // coordinate on a different strand.
592        assert!(
593            !interval
594                .contains_prev_entity(create_coordinate("seq0", "+", 15))
595                .unwrap()
596        );
597
598        let interval = create_interval("seq0", "+", 0, 10);
599
600        // This position should fail to bump backwards a half-step, so the entire
601        // operation should return a [`None`].
602        assert!(
603            interval
604                .contains_prev_entity(create_coordinate("seq0", "+", 0))
605                .is_none()
606        );
607
608        let interval = create_interval("seq0", "-", Number::MAX, Number::MAX - 10);
609
610        // This position should fail to bump forward a half-step, so the entire
611        // operation should return a [`None`].
612        assert!(
613            interval
614                .contains_prev_entity(create_coordinate("seq0", "-", Number::MAX))
615                .is_none()
616        );
617    }
618}