omics_coordinate/interval/interbase.rs
1//! Interbase intervals.
2
3use crate::Strand;
4use crate::base;
5use crate::interbase::Coordinate;
6use crate::interval::Number;
7use crate::interval::r#trait;
8use crate::system::Base;
9use crate::system::Interbase;
10
11////////////////////////////////////////////////////////////////////////////////////////
12// Intervals
13////////////////////////////////////////////////////////////////////////////////////////
14
15/// An interbase interval.
16///
17/// Interbase intervals consist of two interbase positions. The range is
18/// represented by the interval `[start, end]`.
19///
20/// It is worth noting that, historically, the translation of interbase
21/// positions to the nucleotide encoded in the range have included a heuristic
22/// whereby the following nucleotide was associated with the interbase position.
23/// This is why interbase intervals are often described as _exclusively_ bounded
24/// at the end, as the nucleotide _following_ the interbase position is not
25/// included in the range (and, generally, what one is most interested in is the
26/// nucleotides or other entities contained therein).
27///
28/// Because this crate does not co-mingle the idea of interbase and in-base
29/// representations, no such gymnastics are required.
30pub type Interval = crate::Interval<Interbase>;
31
32impl Interval {
33 /// Checks whether the interbase interval contains the entity _after_ the
34 /// specified interbase coordinate.
35 ///
36 /// This method returns an [`Option`] because the next coordinate may or may
37 /// not be a valid position—if you'd like to handle that separately, you can
38 /// do so with the check on the option.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use omics_coordinate::Coordinate;
44 /// use omics_coordinate::Interval;
45 /// use omics_coordinate::system::Interbase;
46 ///
47 /// let interval = "seq0:+:10-1000".parse::<Interval<Interbase>>()?;
48 ///
49 /// assert!(
50 /// !interval
51 /// .contains_next_entity("seq0:+:9".parse::<Coordinate<Interbase>>()?)
52 /// .unwrap()
53 /// );
54 ///
55 /// assert!(
56 /// interval
57 /// .contains_next_entity("seq0:+:10".parse::<Coordinate<Interbase>>()?)
58 /// .unwrap()
59 /// );
60 ///
61 /// assert!(
62 /// interval
63 /// .contains_next_entity("seq0:+:999".parse::<Coordinate<Interbase>>()?)
64 /// .unwrap()
65 /// );
66 ///
67 /// assert!(
68 /// !interval
69 /// .contains_next_entity("seq0:+:1000".parse::<Coordinate<Interbase>>()?)
70 /// .unwrap()
71 /// );
72 ///
73 /// # Ok::<(), Box<dyn std::error::Error>>(())
74 /// ```
75 pub fn contains_next_entity(&self, coordinate: Coordinate) -> Option<bool> {
76 let coordinate = coordinate.nudge_forward()?;
77 Some(self.contains_entity(&coordinate))
78 }
79
80 /// Checks whether the interbase interval contains the entity _before_ the
81 /// specified interbase coordinate.
82 ///
83 /// This method returns an [`Option`] because the next coordinate may or may
84 /// not be a valid position—if you'd like to handle that separately, you can
85 /// do so with the check on the option.
86 ///
87 /// # Examples
88 ///
89 /// ```
90 /// use omics_coordinate::Coordinate;
91 /// use omics_coordinate::Interval;
92 /// use omics_coordinate::system::Interbase;
93 ///
94 /// let interval = "seq0:+:10-1000".parse::<Interval<Interbase>>()?;
95 ///
96 /// assert!(
97 /// !interval
98 /// .contains_prev_entity("seq0:+:10".parse::<Coordinate<Interbase>>()?)
99 /// .unwrap()
100 /// );
101 ///
102 /// assert!(
103 /// interval
104 /// .contains_prev_entity("seq0:+:11".parse::<Coordinate<Interbase>>()?)
105 /// .unwrap()
106 /// );
107 ///
108 /// assert!(
109 /// interval
110 /// .contains_prev_entity("seq0:+:1000".parse::<Coordinate<Interbase>>()?)
111 /// .unwrap()
112 /// );
113 ///
114 /// assert!(
115 /// !interval
116 /// .contains_prev_entity("seq0:+:1001".parse::<Coordinate<Interbase>>()?)
117 /// .unwrap()
118 /// );
119 ///
120 /// # Ok::<(), Box<dyn std::error::Error>>(())
121 /// ```
122 pub fn contains_prev_entity(&self, coordinate: Coordinate) -> Option<bool> {
123 let coordinate = coordinate.nudge_backward()?;
124 Some(self.contains_entity(&coordinate))
125 }
126
127 /// Consumes `self` and returns the equivalent in-base interval.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// use omics_coordinate::Interval;
133 /// use omics_coordinate::system::Base;
134 /// use omics_coordinate::system::Interbase;
135 ///
136 /// let interval = "seq0:+:0-1000".parse::<Interval<Interbase>>()?;
137 /// let equivalent = interval.into_equivalent_base();
138 ///
139 /// assert_eq!("seq0:+:1-1000".parse::<Interval<Base>>()?, equivalent);
140 ///
141 /// # Ok::<(), Box<dyn std::error::Error>>(())
142 /// ```
143 pub fn into_equivalent_base(self) -> crate::interval::Interval<Base> {
144 let (start, end) = self.into_coordinates();
145
146 // SAFETY: given the rules of how interbase and base coordinate systems
147 // work, this should always unwrap.
148 let start = start.nudge_forward().unwrap();
149 let end = end.nudge_backward().unwrap();
150
151 // SAFETY: since this was previously a valid interbase interval, as long
152 // as the two nudges above succeed, this should always unwrap.
153 crate::interval::Interval::<Base>::try_new(start, end).unwrap()
154 }
155}
156
157////////////////////////////////////////////////////////////////////////////////////////
158// Trait implementations
159////////////////////////////////////////////////////////////////////////////////////////
160
161impl r#trait::Interval<Interbase> for Interval {
162 fn contains_entity(&self, coordinate: &base::Coordinate) -> bool {
163 if self.contig() != coordinate.contig() {
164 return false;
165 }
166
167 if self.strand() != coordinate.strand() {
168 return false;
169 }
170
171 match self.strand() {
172 Strand::Positive => {
173 self.start().position().get() < coordinate.position().get()
174 && self.end().position().get() >= coordinate.position().get()
175 }
176 Strand::Negative => {
177 self.start().position().get() >= coordinate.position().get()
178 && self.end().position().get() < coordinate.position().get()
179 }
180 }
181 }
182
183 /// Gets the number of entities within the interval.
184 fn count_entities(&self) -> Number {
185 self.start()
186 .position()
187 .distance_unchecked(self.end().position())
188 }
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194 use crate::Coordinate;
195 use crate::system::Base;
196
197 fn create_coordinate(
198 contig: &str,
199 strand: &str,
200 position: Number,
201 ) -> crate::Coordinate<Interbase> {
202 Coordinate::try_new(contig, strand, position).unwrap()
203 }
204
205 fn create_base_coordinate(
206 contig: &str,
207 strand: &str,
208 position: Number,
209 ) -> crate::Coordinate<Base> {
210 Coordinate::try_new(contig, strand, position).unwrap()
211 }
212
213 fn create_interval(contig: &str, strand: &str, start: Number, end: Number) -> Interval {
214 Interval::try_new(
215 create_coordinate(contig, strand, start),
216 create_coordinate(contig, strand, end),
217 )
218 .unwrap()
219 }
220
221 #[test]
222 fn contains() {
223 let interval = create_interval("seq0", "+", 10, 20);
224
225 // An interval contains the coordinate representing its start position.
226 assert!(interval.contains_coordinate(interval.start()));
227
228 // An interval contains the coordinate representing its end position.
229 assert!(interval.contains_coordinate(interval.end()));
230
231 // An interval contains a coordinate in the middle of its range.
232 assert!(interval.contains_coordinate(&create_coordinate("seq0", "+", 15)));
233
234 // An interval does not contain the position _before_ its start
235 // position.
236 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 9)));
237
238 // An interval does not contain the position _after_ its end position.
239 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 21)));
240
241 // An interval does not contain a random other position.
242 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 1000)));
243
244 // An interval does not contain a coordinate on another contig.
245 assert!(!interval.contains_coordinate(&create_coordinate("seq1", "+", 15)));
246
247 // An interval does not contain a coordinate on another strand.
248 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 15)));
249
250 let interval = create_interval("seq0", "-", 20, 10);
251
252 // An interval contains the coordinate representing its start position.
253 assert!(interval.contains_coordinate(interval.start()));
254
255 // An interval contains the coordinate representing its end position.
256 assert!(interval.contains_coordinate(interval.end()));
257
258 // An interval contains a coordinate in the middle of its range.
259 assert!(interval.contains_coordinate(&create_coordinate("seq0", "-", 15)));
260
261 // An interval does not contain the position _before_ its start
262 // position.
263 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 21)));
264
265 // An interval does not contain the position _after_ its end position.
266 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 9)));
267
268 // An interval does not contain a random other position.
269 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "-", 0)));
270
271 // An interval does not contain a coordinate on another contig.
272 assert!(!interval.contains_coordinate(&create_coordinate("seq1", "-", 15)));
273
274 // An interval does not contain a coordinate on another strand.
275 assert!(!interval.contains_coordinate(&create_coordinate("seq0", "+", 15)));
276 }
277
278 #[test]
279 fn contains_entity() {
280 let interval = create_interval("seq0", "+", 10, 20);
281
282 // An interbase interval does not contain the entity at the half-step
283 // _before_ its start position when on the positive strand.
284 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 10)));
285
286 // An interbase interval does contain the entity at the half-step
287 // _after_ its start position when on the positive strand.
288 assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 11)));
289
290 // An interbase interval does contain the entity at the half-step
291 // _before_ its end position when on the positive strand.
292 assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 20)));
293
294 // An interbase interval does not contain the entity at the half-step
295 // _after_ its end position when on the positive strand.
296 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 21)));
297
298 // An interbase interval does contain an entity midway through its range
299 // on the positive strand.
300 assert!(interval.contains_entity(&create_base_coordinate("seq0", "+", 15)));
301
302 // An interbase interval does not contain an entity on a different
303 // contig on the positive strand.
304 assert!(!interval.contains_entity(&create_base_coordinate("seq1", "+", 15)));
305
306 // An interbase interval does not contain an entity on a different
307 // strand on the positive strand.
308 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 15)));
309
310 let interval = create_interval("seq0", "-", 20, 10);
311
312 // An interbase interval does not contain the entity at the half-step
313 // _before_ its start position when on the negative strand.
314 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 21)));
315
316 // An interbase interval does contain the entity at the half-step
317 // _after_ its start position when on the negative strand.
318 assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 20)));
319
320 // An interbase interval does contain the entity at the half-step
321 // _before_ its end position when on the negative strand.
322 assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 11)));
323
324 // An interbase interval does not contain the entity at the half-step
325 // _after_ its end position when on the negative strand.
326 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "-", 10)));
327
328 // An interbase interval does contain an entity midway through its range
329 // on the negative strand.
330 assert!(interval.contains_entity(&create_base_coordinate("seq0", "-", 15)));
331
332 // An interbase interval does not contain an entity on a different
333 // contig on the negative strand.
334 assert!(!interval.contains_entity(&create_base_coordinate("seq1", "-", 15)));
335
336 // An interbase interval does not contain an entity on a different
337 // strand on the negative strand.
338 assert!(!interval.contains_entity(&create_base_coordinate("seq0", "+", 15)));
339 }
340
341 #[test]
342 fn contains_next_entity() {
343 let interval = create_interval("seq0", "+", 10, 20);
344
345 // An interbase interval does not contain the next entity after its start
346 // position is moved backwards one on the positive strand.
347 assert!(
348 !interval
349 .contains_next_entity(create_coordinate("seq0", "+", 9))
350 .unwrap()
351 );
352
353 // An interbase interval does contain the next entity after its start
354 // position on the positive strand.
355 assert!(
356 interval
357 .contains_next_entity(create_coordinate("seq0", "+", 10))
358 .unwrap()
359 );
360
361 // An interbase interval does contain the next entity after its end
362 // position is moved backwards one on the positive strand.
363 assert!(
364 interval
365 .contains_next_entity(create_coordinate("seq0", "+", 19))
366 .unwrap()
367 );
368
369 // An interbase interval does not contain the next entity after its end
370 // position on the positive strand.
371 assert!(
372 !interval
373 .contains_next_entity(create_coordinate("seq0", "+", 20))
374 .unwrap()
375 );
376
377 // An interbase interval does contain the next entity after a position
378 // in the middle of its range.
379 assert!(
380 interval
381 .contains_next_entity(create_coordinate("seq0", "+", 15))
382 .unwrap()
383 );
384
385 // An interbase interval does not contain the next entity for a
386 // coordinate on a different contig.
387 assert!(
388 !interval
389 .contains_next_entity(create_coordinate("seq1", "+", 15))
390 .unwrap()
391 );
392
393 // An interbase interval does not contain the next entity for a
394 // coordinate on a different strand.
395 assert!(
396 !interval
397 .contains_next_entity(create_coordinate("seq0", "-", 15))
398 .unwrap()
399 );
400
401 let interval = create_interval("seq0", "-", 20, 10);
402
403 // An interbase interval does not contain the next entity after its start
404 // position is moved backwards one on the negative strand.
405 assert!(
406 !interval
407 .contains_next_entity(create_coordinate("seq0", "-", 21))
408 .unwrap()
409 );
410
411 // An interbase interval does contain the next entity after its start
412 // position on the negative strand.
413 assert!(
414 interval
415 .contains_next_entity(create_coordinate("seq0", "-", 20))
416 .unwrap()
417 );
418
419 // An interbase interval does contain the next entity after its end
420 // position is moved backwards on the negative strand.
421 assert!(
422 interval
423 .contains_next_entity(create_coordinate("seq0", "-", 11))
424 .unwrap()
425 );
426
427 // An interbase interval does not contain the next entity after its end
428 // position on the negative strand.
429 assert!(
430 !interval
431 .contains_next_entity(create_coordinate("seq0", "-", 10))
432 .unwrap()
433 );
434
435 // An interbase interval does contain the next entity after a position
436 // in the middle of its range.
437 assert!(
438 interval
439 .contains_next_entity(create_coordinate("seq0", "-", 15))
440 .unwrap()
441 );
442
443 // An interbase interval does not contain the next entity for a
444 // coordinate on a different contig.
445 assert!(
446 !interval
447 .contains_next_entity(create_coordinate("seq1", "-", 15))
448 .unwrap()
449 );
450
451 // An interbase interval does not contain the next entity for a
452 // coordinate on a different strand.
453 assert!(
454 !interval
455 .contains_next_entity(create_coordinate("seq0", "+", 15))
456 .unwrap()
457 );
458
459 let interval = create_interval("seq0", "+", Number::MAX - 10, Number::MAX);
460
461 // This position should fail to bump forward a half-step, so the entire
462 // operation should return a [`None`].
463 assert!(
464 interval
465 .contains_next_entity(create_coordinate("seq0", "+", Number::MAX))
466 .is_none()
467 );
468
469 let interval = create_interval("seq0", "-", 10, 0);
470
471 // This position should fail to bump forward a half-step, so the entire
472 // operation should return a [`None`].
473 assert!(
474 interval
475 .contains_next_entity(create_coordinate("seq0", "-", 0))
476 .is_none()
477 );
478 }
479
480 #[test]
481 fn contains_prev_entity() {
482 let interval = create_interval("seq0", "+", 10, 20);
483
484 // An interbase interval does not contain the previous entity before its start
485 // position on the positive strand.
486 assert!(
487 !interval
488 .contains_prev_entity(create_coordinate("seq0", "+", 10))
489 .unwrap()
490 );
491
492 // An interbase interval does contain the previous entity before its start
493 // position is moved forward by one on the positive strand.
494 assert!(
495 interval
496 .contains_prev_entity(create_coordinate("seq0", "+", 11))
497 .unwrap()
498 );
499
500 // An interbase interval does contain the previous entity before its end
501 // position on the positive strand.
502 assert!(
503 interval
504 .contains_prev_entity(create_coordinate("seq0", "+", 20))
505 .unwrap()
506 );
507
508 // An interbase interval does not contain the previous entity before its end
509 // position is moved forward by one on the positive strand.
510 assert!(
511 !interval
512 .contains_prev_entity(create_coordinate("seq0", "+", 21))
513 .unwrap()
514 );
515
516 // An interbase interval does contain the previous entity before a position
517 // in the middle of its range.
518 assert!(
519 interval
520 .contains_prev_entity(create_coordinate("seq0", "+", 15))
521 .unwrap()
522 );
523
524 // An interbase interval does not contain the previous entity for a
525 // coordinate on a different contig.
526 assert!(
527 !interval
528 .contains_prev_entity(create_coordinate("seq1", "+", 15))
529 .unwrap()
530 );
531
532 // An interbase interval does not contain the previous entity for a
533 // coordinate on a different strand.
534 assert!(
535 !interval
536 .contains_prev_entity(create_coordinate("seq0", "-", 15))
537 .unwrap()
538 );
539
540 let interval = create_interval("seq0", "-", 20, 10);
541
542 // An interbase interval does not contain the previous entity before its start
543 // position on the negative strand.
544 assert!(
545 !interval
546 .contains_prev_entity(create_coordinate("seq0", "-", 20))
547 .unwrap()
548 );
549
550 // An interbase interval does contain the previous entity before its start
551 // position is moved forward by one on the negative strand.
552 assert!(
553 interval
554 .contains_prev_entity(create_coordinate("seq0", "-", 19))
555 .unwrap()
556 );
557
558 // An interbase interval does contain the previous entity before its end
559 // position on the negative strand.
560 assert!(
561 interval
562 .contains_prev_entity(create_coordinate("seq0", "-", 10))
563 .unwrap()
564 );
565
566 // An interbase interval does not contain the previous entity before its end
567 // position is moved forward by one on the negative strand.
568 assert!(
569 !interval
570 .contains_prev_entity(create_coordinate("seq0", "-", 9))
571 .unwrap()
572 );
573
574 // An interbase interval does contain the previous entity before a position
575 // in the middle of its range.
576 assert!(
577 interval
578 .contains_prev_entity(create_coordinate("seq0", "-", 15))
579 .unwrap()
580 );
581
582 // An interbase interval does not contain the previous entity for a
583 // coordinate on a different contig.
584 assert!(
585 !interval
586 .contains_prev_entity(create_coordinate("seq1", "-", 15))
587 .unwrap()
588 );
589
590 // An interbase interval does not contain the previous entity for a
591 // coordinate on a different strand.
592 assert!(
593 !interval
594 .contains_prev_entity(create_coordinate("seq0", "+", 15))
595 .unwrap()
596 );
597
598 let interval = create_interval("seq0", "+", 0, 10);
599
600 // This position should fail to bump backwards a half-step, so the entire
601 // operation should return a [`None`].
602 assert!(
603 interval
604 .contains_prev_entity(create_coordinate("seq0", "+", 0))
605 .is_none()
606 );
607
608 let interval = create_interval("seq0", "-", Number::MAX, Number::MAX - 10);
609
610 // This position should fail to bump forward a half-step, so the entire
611 // operation should return a [`None`].
612 assert!(
613 interval
614 .contains_prev_entity(create_coordinate("seq0", "-", Number::MAX))
615 .is_none()
616 );
617 }
618}