omics_coordinate/interval.rs
1//! Intervals.
2
3use std::cmp::max;
4use std::cmp::min;
5
6use thiserror::Error;
7
8use crate::Contig;
9use crate::Position;
10use crate::Strand;
11use crate::System;
12use crate::coordinate;
13use crate::coordinate::Coordinate;
14use crate::position;
15use crate::position::Number;
16use crate::strand;
17use crate::system::Base;
18
19pub mod base;
20pub mod interbase;
21
22////////////////////////////////////////////////////////////////////////////////////////
23// Errors
24////////////////////////////////////////////////////////////////////////////////////////
25
26/// An error that occurs during clamping.
27#[derive(Error, Debug, PartialEq, Eq)]
28pub enum ClampError {
29 /// A mismatched contig error.
30 ///
31 /// This error occurs when one attempts to clamp an interval with another
32 /// interval that is not located on the same contig.
33 #[error("mismatched contigs: `{original}` and `{operand}`")]
34 MismatchedContigs {
35 /// The contig of the interval being clamped.
36 original: Contig,
37
38 /// The contig of the interval doing the clamping.
39 operand: Contig,
40 },
41
42 /// A mismatched strand error.
43 ///
44 /// This error occurs when one attempts to clamp an interval with another
45 /// interval that is not located on the same strand.
46 #[error("mismatched strand: `{original}` and `{operand}`")]
47 MismatchedStrand {
48 /// The strand of the interval being clamped.
49 original: Strand,
50
51 /// The strand of the interval doing the clamping.
52 operand: Strand,
53 },
54}
55
56/// A [`Result`](std::result::Result) with a [`ClampError`].
57pub type ClampResult<T> = std::result::Result<T, ClampError>;
58
59/// An error related to the creation of a nonsensical interval.
60#[derive(Error, Debug, PartialEq, Eq)]
61pub enum NonsensicalError {
62 /// A mismatched contig error.
63 ///
64 /// This error occurs when one attempts to clamp an interval with another
65 /// interval that is not located on the same contig.
66 #[error("mismatched contigs for coordinates: `{start}` and `{end}`")]
67 MismatchedContigs {
68 /// The contig of the interval being clamped.
69 start: Contig,
70
71 /// The contig of the interval doing the clamping.
72 end: Contig,
73 },
74
75 /// A mismatched strand error.
76 ///
77 /// This error occurs when one attempts to clamp an interval with another
78 /// interval that is not located on the same strand.
79 #[error("mismatched strands for coordinates: `{start}` and `{end}`")]
80 MismatchedStrands {
81 /// The strand of the interval being clamped.
82 start: Strand,
83
84 /// The strand of the interval doing the clamping.
85 end: Strand,
86 },
87
88 /// A negative sized interval.
89 ///
90 /// This error occurs when the start of the interval comes _after_ the end
91 /// of the interval.
92 ///
93 /// On positive stranded intervals, this is when the start position is
94 /// _greater than_ the end position. On negative stranded intervals, this is
95 /// when the start position is _less than_ the end position.
96 #[error("negatively sized interval: start is `{start}`, end is `{end}`, strand is `{strand}`")]
97 NegativelySized {
98 /// The start position.
99 start: Number,
100 /// The end position.
101 end: Number,
102 /// The strand.
103 strand: Strand,
104 },
105}
106
107/// A [`Result`](std::result::Result) with a [`NonsensicalError`].
108pub type NonsensicalResult<T> = std::result::Result<T, NonsensicalError>;
109
110/// An error related to parsing an interval.
111#[derive(Error, Debug, PartialEq, Eq)]
112pub enum ParseError {
113 /// An invalid format was encountered.
114 #[error("invalid format: {value}")]
115 Format {
116 /// The value that was passed.
117 value: String,
118 },
119}
120
121/// A [`Result`](std::result::Result) with a [`ParseError`].
122pub type ParseResult<T> = std::result::Result<T, ParseError>;
123
124/// An error related to an interval.
125#[derive(Error, Debug, PartialEq, Eq)]
126pub enum Error {
127 /// A clamping error.
128 #[error("clamp error: {0}")]
129 Clamp(#[from] ClampError),
130
131 /// A coordinate error.
132 #[error("coordinate error: {0}")]
133 Coordinate(#[from] coordinate::Error),
134
135 /// A nonsensical interval.
136 #[error("nonsensical interval: {0}")]
137 Nonsensical(#[from] NonsensicalError),
138
139 /// One or more of the coordinates were out of bounds.
140 #[error("one or more of the coordinates were out of bounds")]
141 OutOfBounds,
142
143 /// A parse error.
144 #[error("parse error: {0}")]
145 Parse(#[from] ParseError),
146
147 /// A position error.
148 #[error("position error: {0}")]
149 Position(#[from] position::Error),
150
151 /// A strand error.
152 #[error("strand error: {0}")]
153 Strand(#[from] strand::Error),
154}
155
156/// A [`Result`](std::result::Result) with an [`Error`].
157pub type Result<T> = std::result::Result<T, Error>;
158
159////////////////////////////////////////////////////////////////////////////////////////
160// The `Coordinate` trait
161////////////////////////////////////////////////////////////////////////////////////////
162
163/// Traits related to a coordinate.
164pub mod r#trait {
165 use super::*;
166 use crate::system::Base;
167
168 /// Requirements to be an interval.
169 #[allow(clippy::len_without_is_empty)]
170 pub trait Interval<S: System> {
171 /// Returns whether or not the entity at the in-base coordinate is
172 /// contained within this interval.
173 fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool;
174
175 /// Gets the number of member contained within the interval.
176 fn count_entities(&self) -> Number;
177 }
178}
179
180/// An interval.
181#[derive(Clone, Debug, PartialEq, Eq)]
182pub struct Interval<S: System> {
183 /// The start coordinate.
184 start: Coordinate<S>,
185
186 /// The end coordinate.
187 end: Coordinate<S>,
188}
189
190impl<S: System> Interval<S>
191where
192 Interval<S>: r#trait::Interval<S>,
193 Position<S>: position::r#trait::Position<S>,
194{
195 /// Creates a new interval if the following invariants are upheld.
196 ///
197 /// * The contigs of the two coordinates must match.
198 /// * If this does not hold, a [`NonsensicalError::MismatchedContigs`]
199 /// will be returned.
200 /// * The strands of the two coordinates must match.
201 /// * If this does not hold, a [`NonsensicalError::MismatchedStrands`]
202 /// will be returned.
203 /// * The start must come _before or be equal to_ the end in that (a) on
204 /// positive strand, `start <= end`, or, (b) on the negative strand, `end
205 /// <= start`. This ensures that the interval is always oriented from
206 /// start to end of the molecule.
207 /// * If this does not hold, a [`NonsensicalError::NegativelySized`] will
208 /// be returned.
209 ///
210 /// # Examples
211 ///
212 /// ```
213 /// use omics_coordinate::Coordinate;
214 /// use omics_coordinate::Interval;
215 /// use omics_coordinate::system::Base;
216 /// use omics_coordinate::system::Interbase;
217 ///
218 /// //===========//
219 /// // Interbase //
220 /// //===========//
221 ///
222 /// // Positive strand.
223 ///
224 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
225 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
226 /// let interval = Interval::try_new(start, end)?;
227 ///
228 /// // Negative strand.
229 ///
230 /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
231 /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
232 /// let interval = Interval::try_new(start, end)?;
233 ///
234 /// //======//
235 /// // Base //
236 /// //======//
237 ///
238 /// // Positive strand.
239 ///
240 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
241 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
242 /// let interval = Interval::try_new(start, end)?;
243 ///
244 /// // Negative strand.
245 ///
246 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
247 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
248 /// let interval = Interval::try_new(start, end)?;
249 ///
250 /// # Ok::<(), Box<dyn std::error::Error>>(())
251 /// ```
252 pub fn try_new(start: Coordinate<S>, end: Coordinate<S>) -> Result<super::Interval<S>> {
253 if start.contig() != end.contig() {
254 return Err(Error::Nonsensical(NonsensicalError::MismatchedContigs {
255 start: start.contig().clone(),
256 end: end.contig().clone(),
257 }));
258 }
259
260 if start.strand() != end.strand() {
261 return Err(Error::Nonsensical(NonsensicalError::MismatchedStrands {
262 start: start.strand(),
263 end: end.strand(),
264 }));
265 }
266
267 match start.strand() {
268 Strand::Positive => {
269 if start.position() > end.position() {
270 return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
271 start: start.position().get(),
272 end: end.position().get(),
273 strand: start.strand(),
274 }));
275 }
276 }
277 Strand::Negative => {
278 if end.position() > start.position() {
279 return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
280 start: start.position().get(),
281 end: end.position().get(),
282 strand: start.strand(),
283 }));
284 }
285 }
286 }
287
288 Ok(Interval { start, end })
289 }
290
291 /// Gets a reference to the start coordinate.
292 ///
293 /// # Examples
294 ///
295 /// ```
296 /// use omics_coordinate::Coordinate;
297 /// use omics_coordinate::Interval;
298 /// use omics_coordinate::system::Base;
299 /// use omics_coordinate::system::Interbase;
300 ///
301 /// //===========//
302 /// // Interbase //
303 /// //===========//
304 ///
305 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
306 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
307 /// let interval = Interval::try_new(start.clone(), end)?;
308 ///
309 /// assert_eq!(interval.start(), &start);
310 ///
311 /// //======//
312 /// // Base //
313 /// //======//
314 ///
315 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
316 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
317 /// let interval = Interval::try_new(start.clone(), end)?;
318 ///
319 /// assert_eq!(interval.start(), &start);
320 ///
321 /// # Ok::<(), Box<dyn std::error::Error>>(())
322 /// ```
323 pub fn start(&self) -> &Coordinate<S> {
324 &self.start
325 }
326
327 /// Consumes `self` and returns the start coordinate.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use omics_coordinate::Coordinate;
333 /// use omics_coordinate::Interval;
334 /// use omics_coordinate::system::Base;
335 /// use omics_coordinate::system::Interbase;
336 ///
337 /// //===========//
338 /// // Interbase //
339 /// //===========//
340 ///
341 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
342 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
343 /// let interval = Interval::try_new(start.clone(), end)?;
344 ///
345 /// assert_eq!(interval.into_start(), start);
346 ///
347 /// //======//
348 /// // Base //
349 /// //======//
350 ///
351 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
352 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
353 /// let interval = Interval::try_new(start.clone(), end)?;
354 ///
355 /// assert_eq!(interval.into_start(), start);
356 ///
357 /// # Ok::<(), Box<dyn std::error::Error>>(())
358 /// ```
359 pub fn into_start(self) -> Coordinate<S> {
360 self.start
361 }
362
363 /// Gets a reference to the end coordinate.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// use omics_coordinate::Coordinate;
369 /// use omics_coordinate::Interval;
370 /// use omics_coordinate::system::Base;
371 /// use omics_coordinate::system::Interbase;
372 ///
373 /// //===========//
374 /// // Interbase //
375 /// //===========//
376 ///
377 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
378 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
379 /// let interval = Interval::try_new(start, end.clone())?;
380 ///
381 /// assert_eq!(interval.end(), &end);
382 ///
383 /// //======//
384 /// // Base //
385 /// //======//
386 ///
387 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
388 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
389 /// let interval = Interval::try_new(start, end.clone())?;
390 ///
391 /// assert_eq!(interval.end(), &end);
392 ///
393 /// # Ok::<(), Box<dyn std::error::Error>>(())
394 /// ```
395 pub fn end(&self) -> &Coordinate<S> {
396 &self.end
397 }
398
399 /// Consumes `self` and returns the end coordinate.
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use omics_coordinate::Coordinate;
405 /// use omics_coordinate::Interval;
406 /// use omics_coordinate::system::Base;
407 /// use omics_coordinate::system::Interbase;
408 ///
409 /// //===========//
410 /// // Interbase //
411 /// //===========//
412 ///
413 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
414 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
415 /// let interval = Interval::try_new(start, end.clone())?;
416 ///
417 /// assert_eq!(interval.into_end(), end);
418 ///
419 /// //======//
420 /// // Base //
421 /// //======//
422 ///
423 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
424 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
425 /// let interval = Interval::try_new(start, end.clone())?;
426 ///
427 /// assert_eq!(interval.into_end(), end);
428 ///
429 /// # Ok::<(), Box<dyn std::error::Error>>(())
430 /// ```
431 pub fn into_end(self) -> Coordinate<S> {
432 self.end
433 }
434
435 /// Consumes `self` and returns the start and end coordinates.
436 ///
437 /// # Examples
438 ///
439 /// ```
440 /// use omics_coordinate::Coordinate;
441 /// use omics_coordinate::Interval;
442 /// use omics_coordinate::system::Base;
443 /// use omics_coordinate::system::Interbase;
444 ///
445 /// //===========//
446 /// // Interbase //
447 /// //===========//
448 ///
449 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
450 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
451 /// let interval = Interval::try_new(start.clone(), end.clone())?;
452 /// let parts = interval.into_coordinates();
453 ///
454 /// assert_eq!(parts.0, start);
455 /// assert_eq!(parts.1, end);
456 ///
457 /// //======//
458 /// // Base //
459 /// //======//
460 ///
461 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
462 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
463 /// let interval = Interval::try_new(start.clone(), end.clone())?;
464 /// let parts = interval.into_coordinates();
465 ///
466 /// assert_eq!(parts.0, start);
467 /// assert_eq!(parts.1, end);
468 ///
469 /// # Ok::<(), Box<dyn std::error::Error>>(())
470 /// ```
471 pub fn into_coordinates(self) -> (Coordinate<S>, Coordinate<S>) {
472 (self.start, self.end)
473 }
474
475 /// Returns a reference to the contig.
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// use omics_coordinate::Coordinate;
481 /// use omics_coordinate::Interval;
482 /// use omics_coordinate::system::Base;
483 /// use omics_coordinate::system::Interbase;
484 ///
485 /// //===========//
486 /// // Interbase //
487 /// //===========//
488 ///
489 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
490 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
491 /// let interval = Interval::try_new(start, end)?;
492 ///
493 /// assert_eq!(interval.contig().as_str(), "seq0");
494 ///
495 /// //======//
496 /// // Base //
497 /// //======//
498 ///
499 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
500 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
501 /// let interval = Interval::try_new(start, end)?;
502 ///
503 /// assert_eq!(interval.contig().as_str(), "seq0");
504 ///
505 /// # Ok::<(), Box<dyn std::error::Error>>(())
506 /// ```
507 pub fn contig(&self) -> &Contig {
508 self.start().contig()
509 }
510
511 /// Returns the strand.
512 ///
513 /// # Examples
514 ///
515 /// ```
516 /// use omics_coordinate::Coordinate;
517 /// use omics_coordinate::Interval;
518 /// use omics_coordinate::Strand;
519 /// use omics_coordinate::system::Base;
520 /// use omics_coordinate::system::Interbase;
521 ///
522 /// //===========//
523 /// // Interbase //
524 /// //===========//
525 ///
526 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
527 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
528 /// let interval = Interval::try_new(start, end)?;
529 ///
530 /// assert_eq!(interval.strand(), Strand::Positive);
531 ///
532 /// //======//
533 /// // Base //
534 /// //======//
535 ///
536 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
537 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
538 /// let interval = Interval::try_new(start, end)?;
539 ///
540 /// assert_eq!(interval.strand(), Strand::Negative);
541 ///
542 /// # Ok::<(), Box<dyn std::error::Error>>(())
543 /// ```
544 pub fn strand(&self) -> Strand {
545 self.start().strand()
546 }
547
548 /// Returns whether or not a coordinate is contained within this interval.
549 /// Notably, when checked whether coordinates are included in the interval,
550 /// both the start and end positions are considered inclusive.
551 ///
552 /// # Caution
553 ///
554 /// **This is not the method you want to use when checking if a nucleotide
555 /// or amino acid at a particular position is included in the interval. This
556 /// checks the coordinates themselves and, in-so-doing, considers both the
557 /// start and the end positions of the interval to be inclusive.
558 ///
559 /// If you'd like to check whether a particular nucleotide, amino acid, or
560 /// other entity is contained within the interval, use the
561 /// [`contains_entity()`](Interval::contains_entity) method.
562 ///
563 /// # Examples
564 ///
565 /// ```
566 /// use omics_coordinate::Coordinate;
567 /// use omics_coordinate::Interval;
568 /// use omics_coordinate::Strand;
569 /// use omics_coordinate::system::Base;
570 /// use omics_coordinate::system::Interbase;
571 ///
572 /// //===========//
573 /// // Interbase //
574 /// //===========//
575 ///
576 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
577 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
578 /// let interval = Interval::try_new(start, end)?;
579 ///
580 /// // Coordinates on the same contig, strand, and within the interval's range
581 /// // are contained within the interval.
582 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
583 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
584 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
585 ///
586 /// // Coordinates on different contigs, strands, or outside the range are
587 /// // not contained within the interval.
588 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
589 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
590 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
591 ///
592 /// //======//
593 /// // Base //
594 /// //======//
595 ///
596 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
597 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
598 /// let interval = Interval::try_new(start, end)?;
599 ///
600 /// // Coordinates on the same contig, strand, and within the interval's range
601 /// // are contained within the interval.
602 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
603 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
604 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
605 ///
606 /// // Coordinates on different contigs, strands, or outside the range are
607 /// // not contained within the interval.
608 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
609 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
610 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
611 ///
612 /// # Ok::<(), Box<dyn std::error::Error>>(())
613 /// ```
614 pub fn contains_coordinate(&self, coordinate: &crate::Coordinate<S>) -> bool {
615 if self.contig() != coordinate.contig() {
616 return false;
617 }
618
619 if self.strand() != coordinate.strand() {
620 return false;
621 }
622
623 match self.strand() {
624 Strand::Positive => {
625 self.start().position().get() <= coordinate.position().get()
626 && self.end().position().get() >= coordinate.position().get()
627 }
628 Strand::Negative => {
629 self.start().position().get() >= coordinate.position().get()
630 && self.end().position().get() <= coordinate.position().get()
631 }
632 }
633 }
634
635 /// Returns whether or not the entity at the in-base coordinate is
636 /// contained within this interval.
637 ///
638 /// /// # Examples
639 ///
640 /// ```
641 /// use omics_coordinate::Coordinate;
642 /// use omics_coordinate::Interval;
643 /// use omics_coordinate::Strand;
644 /// use omics_coordinate::system::Base;
645 /// use omics_coordinate::system::Interbase;
646 ///
647 /// //===========//
648 /// // Interbase //
649 /// //===========//
650 ///
651 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
652 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
653 /// let interval = Interval::try_new(start, end)?;
654 ///
655 /// // Coordinates on the same contig, strand, and within the interval's range
656 /// // are contained within the interval.
657 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
658 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
659 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
660 ///
661 /// // Coordinates on different contigs, strands, or outside the range are
662 /// // not contained within the interval.
663 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
664 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
665 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
666 ///
667 /// //======//
668 /// // Base //
669 /// //======//
670 ///
671 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
672 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
673 /// let interval = Interval::try_new(start, end)?;
674 ///
675 /// // Coordinates on the same contig, strand, and within the interval's range
676 /// // are contained within the interval.
677 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
678 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
679 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
680 ///
681 /// // Coordinates on different contigs, strands, or outside the range are
682 /// // not contained within the interval.
683 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
684 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
685 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
686 ///
687 /// # Ok::<(), Box<dyn std::error::Error>>(())
688 /// ```
689 pub fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool {
690 <Self as r#trait::Interval<S>>::contains_entity(self, coordinate)
691 }
692
693 /// Counts the number of entities in the interval.
694 ///
695 /// # Examples
696 ///
697 /// ```
698 /// use omics_coordinate::Coordinate;
699 /// use omics_coordinate::Interval;
700 /// use omics_coordinate::system::Base;
701 /// use omics_coordinate::system::Interbase;
702 ///
703 /// //===========//
704 /// // Interbase //
705 /// //===========//
706 ///
707 /// // Positive strand.
708 ///
709 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
710 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
711 /// let interval = Interval::try_new(start, end)?;
712 ///
713 /// assert_eq!(interval.count_entities(), 10);
714 ///
715 /// // Negative strand.
716 ///
717 /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
718 /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
719 /// let interval = Interval::try_new(start, end)?;
720 ///
721 /// assert_eq!(interval.count_entities(), 10);
722 ///
723 /// //======//
724 /// // Base //
725 /// //======//
726 ///
727 /// // Positive strand.
728 ///
729 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
730 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
731 /// let interval = Interval::try_new(start, end)?;
732 ///
733 /// assert_eq!(interval.count_entities(), 11);
734 ///
735 /// // Negative strand.
736 ///
737 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
738 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
739 /// let interval = Interval::try_new(start, end)?;
740 ///
741 /// assert_eq!(interval.count_entities(), 11);
742 ///
743 /// # Ok::<(), Box<dyn std::error::Error>>(())
744 /// ```
745 pub fn count_entities(&self) -> Number {
746 <Self as r#trait::Interval<S>>::count_entities(self)
747 }
748
749 /// Consumes `self` and clamps an interval by another interval.
750 ///
751 /// Clamping is an operation whereby the ends of an interval are restricted
752 /// to the range of the argument passed in with a tendency to restrict
753 /// towards the middle of the interval.
754 ///
755 /// # Summary
756 ///
757 /// * If the interval being operated on is completely contained within the
758 /// argument interval, the interval being operated on is returned.
759 ///
760 /// ```text
761 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
762 /// 10 11 12 13 14 15 16 17 18 19 20 |
763 /// ●───────────────────────● [13, 17] | Original Interval
764 /// ●───────────────────────────────────────────────● [11, 19] | Argument Interval
765 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
766 /// ●───────────────────────● [13, 17] | Resulting Interval
767 ///
768 ///
769 /// Here, no modifications were made to the original interval, as neither
770 /// the start nor the end of the interval would be restricted by the
771 /// argument interval.
772 /// ```
773 ///
774 /// * If the argument interval is completely within the interval being
775 /// operated on, the argument interval will clamp both sides of the
776 /// original interval, and the argument interval will be returned.
777 /// ```text
778 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
779 /// 10 11 12 13 14 15 16 17 18 19 20 |
780 /// ●───────────────────────────────────────────────● [11, 19] | Original Interval
781 /// ●───────────────────────● [13, 17] | Argument Interval
782 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
783 /// ●───────────────────────● [13, 17] | Resulting Interval
784 ///
785 ///
786 /// Here, both the start and the end position of the original interval were
787 /// restricted by the start and end of the argument interval respectively.
788 /// ```
789 ///
790 /// * If the argument interval would restrict the length of one side of the
791 /// subject interval on either end, that end is restricted to the argument
792 /// interval's value, whereas the non-restricted end is the original
793 /// interval's value.
794 ///
795 /// ```text
796 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
797 /// 10 11 12 13 14 15 16 17 18 19 20 |
798 /// ●───────────────────────────────────● [11, 17] | Original Interval
799 /// ●───────────────────────● [13, 17] | Argument Interval
800 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
801 /// ●───────────────────────● [13, 17] | Resulting Interval
802 ///
803 ///
804 /// Here, the start of the original interval is clamped by the argument
805 /// interval's start position. However, the end position of the original
806 /// interval is not restricted by the argument interval's end position,
807 /// so it remains the same. This results in the latter half of the interval
808 /// being clamped.
809 ///
810 ///
811 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
812 /// 10 11 12 13 14 15 16 17 18 19 20 |
813 /// ●───────────────────────────────────● [13, 19] | Original Interval
814 /// ●───────────────────────● [13, 17] | Argument Interval
815 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
816 /// ●───────────────────────● [13, 17] | Resulting Interval
817 ///
818 ///
819 /// Here, the start position of the original interval would not be
820 /// restricted by the argument interval's start position, so it remains
821 /// the same. However, the end position is clamped by the end position
822 /// of the argument interval, so the resulting end position is that of the
823 /// argument interval's end position. This results in the first half of
824 /// interval being clamped.
825 /// ```
826 ///
827 /// # Examples
828 ///
829 /// ```
830 /// use omics_coordinate::Coordinate;
831 /// use omics_coordinate::Interval;
832 /// use omics_coordinate::system::Base;
833 /// use omics_coordinate::system::Interbase;
834 ///
835 /// //===========//
836 /// // Interbase //
837 /// //===========//
838 ///
839 /// let interval = "seq0:+:10-20".parse::<Interval<Interbase>>()?;
840 /// let clamped = interval.clamp("seq0:+:5-15".parse::<Interval<Interbase>>()?)?;
841 /// assert_eq!(clamped, "seq0:+:10-15".parse::<Interval<Interbase>>()?);
842 ///
843 /// //======//
844 /// // Base //
845 /// //======//
846 ///
847 /// let interval = "seq0:-:20-10".parse::<Interval<Base>>()?;
848 /// let clamped = interval.clamp("seq0:-:25-15".parse::<Interval<Base>>()?)?;
849 /// assert_eq!(clamped, "seq0:-:20-15".parse::<Interval<Base>>()?);
850 ///
851 /// Ok::<(), Box<dyn std::error::Error>>(())
852 /// ```
853 pub fn clamp(self, interval: Interval<S>) -> Result<Interval<S>> {
854 let (start, end) = self.into_coordinates();
855 let (operand_start, operand_end) = interval.into_coordinates();
856
857 let (start_contig, start_strand, start) = start.into_parts();
858 let (end_contig, end_strand, end) = end.into_parts();
859
860 let (operand_contig, operand_strand, operand_start) = operand_start.into_parts();
861 let (_, _, operand_end) = operand_end.into_parts();
862
863 if start_contig != operand_contig {
864 return Err(Error::Clamp(ClampError::MismatchedContigs {
865 original: start_contig,
866 operand: operand_contig,
867 }));
868 }
869
870 if start_strand != operand_strand {
871 return Err(Error::Clamp(ClampError::MismatchedStrand {
872 original: start_strand,
873 operand: operand_strand,
874 }));
875 }
876
877 let (new_start, new_end) = match start_strand {
878 Strand::Positive => (max(start, operand_start), min(end, operand_end)),
879 Strand::Negative => (min(start, operand_start), max(end, operand_end)),
880 };
881
882 let start = Coordinate::<S>::new(start_contig, start_strand, new_start);
883 let end = Coordinate::<S>::new(end_contig, end_strand, new_end);
884
885 // SAFETY: both the start _and_ the end positions were originally on
886 // intervals that were valid. Since we are not breaking any rules that
887 // would make the intervals invalid in this method, this should always
888 // unwrap.
889 Ok(Self::try_new(start, end).unwrap())
890 }
891
892 /// Gets the offset of a coordinate from the start of the interval.
893 ///
894 /// If the coordinate is not contained within the interval, `None` is
895 /// returned.
896 ///
897 /// # Examples
898 ///
899 /// ```
900 /// use omics_coordinate::Coordinate;
901 /// use omics_coordinate::Interval;
902 /// use omics_coordinate::system::Base;
903 /// use omics_coordinate::system::Interbase;
904 ///
905 /// //===========//
906 /// // Interbase //
907 /// //===========//
908 ///
909 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
910 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
911 /// let interval = Interval::try_new(start, end)?;
912 ///
913 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 15)?;
914 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
915 ///
916 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
917 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
918 ///
919 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 21)?;
920 /// assert!(interval.coordinate_offset(&query).is_none());
921 ///
922 /// //======//
923 /// // Base //
924 /// //======//
925 ///
926 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
927 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
928 /// let interval = Interval::try_new(start, end)?;
929 ///
930 /// let query = Coordinate::<Base>::try_new("seq0", "-", 15)?;
931 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
932 ///
933 /// let query = Coordinate::<Base>::try_new("seq0", "-", 10)?;
934 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
935 ///
936 /// let query = Coordinate::<Base>::try_new("seq0", "-", 9)?;
937 /// assert!(interval.coordinate_offset(&query).is_none());
938 ///
939 /// Ok::<(), Box<dyn std::error::Error>>(())
940 /// ```
941 pub fn coordinate_offset(&self, coordinate: &Coordinate<S>) -> Option<Number> {
942 if !self.contains_coordinate(coordinate) {
943 return None;
944 }
945
946 Some(
947 coordinate
948 .position()
949 .distance_unchecked(self.start().position()),
950 )
951 }
952
953 /// Returns the coordinate at the offset within the interval.
954 ///
955 /// This method only returns the coordinate if the coordinate falls within
956 /// the interval.
957 ///
958 /// # Examples
959 ///
960 /// ```
961 /// use omics_coordinate::Coordinate;
962 /// use omics_coordinate::Interval;
963 /// use omics_coordinate::system::Base;
964 /// use omics_coordinate::system::Interbase;
965 ///
966 /// //===========//
967 /// // Interbase //
968 /// //===========//
969 ///
970 /// // Positive strand.
971 ///
972 /// let interval = "seq0:+:0-1000".parse::<Interval<Interbase>>()?;
973 ///
974 /// let expected = "seq0:+:5".parse::<Coordinate<Interbase>>()?;
975 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
976 ///
977 /// let expected = "seq0:+:1000".parse::<Coordinate<Interbase>>()?;
978 /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
979 ///
980 /// assert!(interval.coordinate_at_offset(1001).is_none());
981 ///
982 /// // Negative strand.
983 ///
984 /// let interval = "seq0:-:1000-0".parse::<Interval<Interbase>>()?;
985 ///
986 /// let expected = "seq0:-:995".parse::<Coordinate<Interbase>>()?;
987 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
988 ///
989 /// let expected = "seq0:-:0".parse::<Coordinate<Interbase>>()?;
990 /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
991 ///
992 /// assert_eq!(interval.coordinate_at_offset(1001), None);
993 ///
994 /// //======//
995 /// // Base //
996 /// //======//
997 ///
998 /// // Positive strand.
999 ///
1000 /// let interval = "seq0:+:1-1000".parse::<Interval<Base>>()?;
1001 ///
1002 /// let expected = "seq0:+:6".parse::<Coordinate<Base>>()?;
1003 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1004 ///
1005 /// let expected = "seq0:+:1000".parse::<Coordinate<Base>>()?;
1006 /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1007 ///
1008 /// assert!(interval.coordinate_at_offset(1000).is_none());
1009 ///
1010 /// // Negative strand.
1011 ///
1012 /// let interval = "seq0:-:1000-1".parse::<Interval<Base>>()?;
1013 ///
1014 /// let expected = "seq0:-:995".parse::<Coordinate<Base>>()?;
1015 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1016 ///
1017 /// let expected = "seq0:-:1".parse::<Coordinate<Base>>()?;
1018 /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1019 ///
1020 /// assert_eq!(interval.coordinate_at_offset(1000), None);
1021 ///
1022 /// Ok::<(), Box<dyn std::error::Error>>(())
1023 /// ```
1024 pub fn coordinate_at_offset(&self, offset: Number) -> Option<Coordinate<S>> {
1025 let coordinate = self.start().clone().move_forward(offset)?;
1026
1027 match self.contains_coordinate(&coordinate) {
1028 true => Some(coordinate),
1029 false => None,
1030 }
1031 }
1032
1033 /// Reverse complements the interval, meaning that:
1034 ///
1035 /// * the start and end positions are swapped, and
1036 /// * the strand is swapped.
1037 ///
1038 /// # Examples
1039 ///
1040 /// ```
1041 /// use omics_coordinate::Coordinate;
1042 /// use omics_coordinate::Interval;
1043 /// use omics_coordinate::system::Base;
1044 /// use omics_coordinate::system::Interbase;
1045 ///
1046 /// //===========//
1047 /// // Interbase //
1048 /// //===========//
1049 ///
1050 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
1051 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
1052 /// let original = Interval::try_new(start, end)?;
1053 ///
1054 /// let complemented = original.clone().reverse_complement();
1055 /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Interbase>>()?);
1056 ///
1057 /// let recomplemented = complemented.reverse_complement();
1058 /// assert_eq!(recomplemented, original);
1059 ///
1060 /// //======//
1061 /// // Base //
1062 /// //======//
1063 ///
1064 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
1065 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
1066 /// let original = Interval::try_new(start, end)?;
1067 ///
1068 /// let complemented = original.clone().reverse_complement();
1069 /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Base>>()?);
1070 ///
1071 /// let recomplemented = complemented.reverse_complement();
1072 /// assert_eq!(recomplemented, original);
1073 ///
1074 /// Ok::<(), Box<dyn std::error::Error>>(())
1075 /// ```
1076 pub fn reverse_complement(self) -> super::Interval<S> {
1077 let (start, end) = self.into_coordinates();
1078 // SAFETY: because (a) intervals are inclusive of both of their start
1079 // and end coordinates, (b) all positions can be represented on the
1080 // opposite strand, and (c) swapping the start and end while also
1081 // swapping strand will always create the correct directionality, this will
1082 // always unwrap.
1083 Interval::try_new(end.swap_strand(), start.swap_strand()).unwrap()
1084 }
1085}
1086
1087////////////////////////////////////////////////////////////////////////////////////////
1088// Trait implementations
1089////////////////////////////////////////////////////////////////////////////////////////
1090
1091impl<S: System> std::fmt::Display for Interval<S>
1092where
1093 Interval<S>: r#trait::Interval<S>,
1094 Position<S>: position::r#trait::Position<S>,
1095{
1096 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1097 write!(
1098 f,
1099 "{}:{}:{}-{}",
1100 self.contig(),
1101 self.strand(),
1102 self.start().position(),
1103 self.end().position(),
1104 )
1105 }
1106}
1107
1108impl<S: System> std::str::FromStr for Interval<S>
1109where
1110 Interval<S>: r#trait::Interval<S>,
1111 Position<S>: position::r#trait::Position<S>,
1112{
1113 type Err = Error;
1114
1115 fn from_str(s: &str) -> Result<Self> {
1116 let parts = s.split(':').collect::<Vec<_>>();
1117
1118 if parts.len() != 3 {
1119 return Err(Error::Parse(ParseError::Format {
1120 value: s.to_string(),
1121 }));
1122 }
1123
1124 let mut parts = parts.iter();
1125
1126 // SAFETY: we checked that there are three parts above. Given that we
1127 // haven't pulled anything from the iterator, we can always safely
1128 // unwrap this.
1129 let contig = parts
1130 .next()
1131 .unwrap()
1132 .parse::<Contig>()
1133 // SAFETY: this is infallible.
1134 .unwrap();
1135
1136 // SAFETY: we checked that there are three parts above. Given that we
1137 // have only pulled one item from the iterator, we can always safely
1138 // unwrap this.
1139 let strand = parts
1140 .next()
1141 .unwrap()
1142 .parse::<Strand>()
1143 .map_err(Error::Strand)?;
1144
1145 // SAFETY: we checked that there are three parts above. Given that we
1146 // have only pulled two items from the iterator, we can always safely
1147 // unwrap this.
1148 let positions = parts.next().unwrap().split('-').collect::<Vec<_>>();
1149
1150 if positions.len() != 2 {
1151 return Err(Error::Parse(ParseError::Format {
1152 value: s.to_string(),
1153 }));
1154 }
1155
1156 // SAFETY: we just ensured that two parts exist, so the direct
1157 // indexing of the slice for both index zero and one will never
1158 // fail.
1159 let start = positions[0]
1160 .parse::<Position<S>>()
1161 .map_err(Error::Position)?;
1162 let end = positions[1]
1163 .parse::<Position<S>>()
1164 .map_err(Error::Position)?;
1165
1166 Interval::try_new(
1167 Coordinate::new(contig.clone(), strand, start),
1168 Coordinate::new(contig, strand, end),
1169 )
1170 }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175 use super::*;
1176 use crate::position::Error as PositionError;
1177 use crate::position::Number;
1178 use crate::position::ParseError as PositionParseError;
1179 use crate::strand::Error as StrandError;
1180 use crate::strand::ParseError as StrandParseError;
1181 use crate::system::Interbase;
1182
1183 #[test]
1184 fn valid() {
1185 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1186 let end = "seq0:+:9".parse::<Coordinate<Interbase>>().unwrap();
1187
1188 let interval = Interval::try_new(start, end).unwrap();
1189 assert_eq!(interval.count_entities(), 9);
1190 }
1191
1192 #[test]
1193 fn nonsensical_mismatched_contigs() {
1194 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1195 let end = "seq1:+:10".parse::<Coordinate<Interbase>>().unwrap();
1196
1197 let err = Interval::try_new(start, end).unwrap_err();
1198 assert_eq!(
1199 err,
1200 Error::Nonsensical(NonsensicalError::MismatchedContigs {
1201 start: Contig::new("seq0"),
1202 end: Contig::new("seq1")
1203 })
1204 );
1205
1206 assert_eq!(
1207 err.to_string(),
1208 "nonsensical interval: mismatched contigs for coordinates: `seq0` and `seq1`"
1209 );
1210 }
1211
1212 #[test]
1213 fn nonsensical_mismatched_strands() {
1214 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1215 let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1216
1217 let err = Interval::try_new(start, end).unwrap_err();
1218 assert_eq!(
1219 err,
1220 Error::Nonsensical(NonsensicalError::MismatchedStrands {
1221 start: Strand::Positive,
1222 end: Strand::Negative
1223 })
1224 );
1225
1226 assert_eq!(
1227 err.to_string(),
1228 "nonsensical interval: mismatched strands for coordinates: `+` and `-`"
1229 );
1230 }
1231
1232 #[test]
1233 fn nonsensical_start_greater_than_end() {
1234 //===================//
1235 // Positive stranded //
1236 //===================//
1237
1238 let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1239 let end = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1240
1241 let err = Interval::try_new(start, end).unwrap_err();
1242
1243 assert_eq!(
1244 err,
1245 Error::Nonsensical(NonsensicalError::NegativelySized {
1246 start: 10,
1247 end: 0,
1248 strand: Strand::Positive
1249 })
1250 );
1251
1252 assert_eq!(
1253 err.to_string(),
1254 "nonsensical interval: negatively sized interval: start is `10`, end is `0`, strand \
1255 is `+`"
1256 );
1257
1258 //===================//
1259 // Negative stranded //
1260 //===================//
1261
1262 let start = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1263 let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1264
1265 let err = Interval::try_new(start, end).unwrap_err();
1266
1267 assert_eq!(
1268 err,
1269 Error::Nonsensical(NonsensicalError::NegativelySized {
1270 start: 0,
1271 end: 10,
1272 strand: Strand::Negative
1273 })
1274 );
1275
1276 assert_eq!(
1277 err.to_string(),
1278 "nonsensical interval: negatively sized interval: start is `0`, end is `10`, strand \
1279 is `-`"
1280 );
1281 }
1282
1283 #[test]
1284 fn zero_sized() {
1285 let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1286 let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1287
1288 let interval = Interval::try_new(start.clone(), end.clone()).unwrap();
1289 assert!(interval.end().position().get() - interval.start().position().get() == 0);
1290 assert!(interval.contains_coordinate(&start));
1291 assert!(interval.contains_coordinate(&end));
1292 assert!(
1293 !interval.contains_coordinate(&"seq0:+:9".parse::<Coordinate<Interbase>>().unwrap())
1294 );
1295 assert!(
1296 !interval.contains_coordinate(&"seq0:+:11".parse::<Coordinate<Interbase>>().unwrap())
1297 );
1298 }
1299
1300 #[test]
1301 fn positive_strand_clamp() {
1302 let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1303
1304 assert_eq!(
1305 interval
1306 .clone()
1307 .clamp("seq1:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1308 Err(Error::Clamp(ClampError::MismatchedContigs {
1309 original: Contig::new("seq0"),
1310 operand: Contig::new("seq1")
1311 }))
1312 );
1313
1314 assert_eq!(
1315 interval
1316 .clone()
1317 .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1318 Err(Error::Clamp(ClampError::MismatchedStrand {
1319 original: Strand::Positive,
1320 operand: Strand::Negative
1321 }))
1322 );
1323
1324 assert_eq!(
1325 interval
1326 .clone()
1327 .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap())
1328 .unwrap(),
1329 "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap()
1330 );
1331
1332 assert_eq!(
1333 interval
1334 .clone()
1335 .clamp("seq0:+:1250-3000".parse::<Interval<Interbase>>().unwrap())
1336 .unwrap(),
1337 "seq0:+:1250-2000".parse::<Interval<Interbase>>().unwrap()
1338 );
1339
1340 assert_eq!(
1341 interval
1342 .clone()
1343 .clamp("seq0:+:0-1750".parse::<Interval<Interbase>>().unwrap())
1344 .unwrap(),
1345 "seq0:+:1000-1750".parse::<Interval<Interbase>>().unwrap()
1346 );
1347
1348 assert_eq!(
1349 interval
1350 .clone()
1351 .clamp("seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap())
1352 .unwrap(),
1353 "seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap()
1354 );
1355 }
1356
1357 #[test]
1358 fn negative_strand_clamp() {
1359 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1360
1361 assert_eq!(
1362 interval
1363 .clone()
1364 .clamp("seq1:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1365 Err(Error::Clamp(ClampError::MismatchedContigs {
1366 original: Contig::new("seq0"),
1367 operand: Contig::new("seq1")
1368 }))
1369 );
1370
1371 assert_eq!(
1372 interval
1373 .clone()
1374 .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1375 Err(Error::Clamp(ClampError::MismatchedStrand {
1376 original: Strand::Negative,
1377 operand: Strand::Positive
1378 }))
1379 );
1380
1381 assert_eq!(
1382 interval
1383 .clone()
1384 .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap())
1385 .unwrap(),
1386 "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap()
1387 );
1388
1389 assert_eq!(
1390 interval
1391 .clone()
1392 .clamp("seq0:-:3000-1250".parse::<Interval<Interbase>>().unwrap())
1393 .unwrap(),
1394 "seq0:-:2000-1250".parse::<Interval<Interbase>>().unwrap()
1395 );
1396
1397 assert_eq!(
1398 interval
1399 .clone()
1400 .clamp("seq0:-:1750-0".parse::<Interval<Interbase>>().unwrap())
1401 .unwrap(),
1402 "seq0:-:1750-1000".parse::<Interval<Interbase>>().unwrap()
1403 );
1404
1405 assert_eq!(
1406 interval
1407 .clone()
1408 .clamp("seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap())
1409 .unwrap(),
1410 "seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap()
1411 );
1412 }
1413
1414 #[test]
1415 fn positive_strand_offset() {
1416 let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1417
1418 // Mismatched contigs means the interval does not contain the coordinate.
1419 let coordinate = "seq1:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1420 assert!(interval.coordinate_offset(&coordinate).is_none());
1421
1422 // Mismatched strands means the interval does not contain the coordinate.
1423 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1424 assert!(interval.coordinate_offset(&coordinate).is_none());
1425
1426 // Contained within.
1427 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1428 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1429
1430 let coordinate = "seq0:+:2000".parse::<Coordinate<Interbase>>().unwrap();
1431 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1432
1433 // Just outside of range.
1434 let coordinate = "seq0:+:999".parse::<Coordinate<Interbase>>().unwrap();
1435 assert!(interval.coordinate_offset(&coordinate).is_none());
1436
1437 let coordinate = "seq0:+:2001".parse::<Coordinate<Interbase>>().unwrap();
1438 assert!(interval.coordinate_offset(&coordinate).is_none());
1439 }
1440
1441 #[test]
1442 fn negative_strand_offset() {
1443 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1444
1445 // Mismatched contigs means the interval does not contain the coordinate.
1446 let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1447 assert!(interval.coordinate_offset(&coordinate).is_none());
1448
1449 // Mismatched strands means the interval does not contain the coordinate.
1450 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1451 assert!(interval.coordinate_offset(&coordinate).is_none());
1452
1453 // Contained within.
1454 let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1455 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1456
1457 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1458 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1459
1460 // Just outside of range.
1461 let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1462 assert!(interval.coordinate_offset(&coordinate).is_none());
1463
1464 let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1465 assert!(interval.coordinate_offset(&coordinate).is_none());
1466 }
1467
1468 #[test]
1469 fn len() {
1470 assert_eq!(
1471 "seq0:+:0-1000"
1472 .parse::<Interval<Interbase>>()
1473 .unwrap()
1474 .count_entities(),
1475 1000
1476 );
1477
1478 assert_eq!(
1479 "seq0:-:1000-0"
1480 .parse::<Interval<Interbase>>()
1481 .unwrap()
1482 .count_entities(),
1483 1000
1484 );
1485 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1486
1487 // Mismatched contigs means the interval does not contain the coordinate.
1488 let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1489 assert!(interval.coordinate_offset(&coordinate).is_none());
1490
1491 // Mismatched strands means the interval does not contain the coordinate.
1492 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1493 assert!(interval.coordinate_offset(&coordinate).is_none());
1494
1495 // Contained within.
1496 let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1497 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1498
1499 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1500 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1501
1502 // Just outside of range.
1503 let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1504 assert!(interval.coordinate_offset(&coordinate).is_none());
1505
1506 let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1507 assert!(interval.coordinate_offset(&coordinate).is_none());
1508 }
1509
1510 #[test]
1511 fn parse() {
1512 let value = format!("seq0:+:0-{}", Number::MAX);
1513 let interval = value.parse::<Interval<Interbase>>().unwrap();
1514 assert_eq!(interval.contig().as_str(), "seq0");
1515 assert_eq!(interval.strand(), Strand::Positive);
1516 assert_eq!(interval.start().position().get(), 0);
1517 assert_eq!(interval.end().position().get(), Number::MAX);
1518
1519 let value = format!("seq0:-:{}-0", Number::MAX);
1520 let interval = value.parse::<Interval<Interbase>>().unwrap();
1521 assert_eq!(interval.contig().as_str(), "seq0");
1522 assert_eq!(interval.strand(), Strand::Negative);
1523 assert_eq!(interval.start().position().get(), Number::MAX);
1524 assert_eq!(interval.end().position().get(), 0);
1525 }
1526
1527 #[test]
1528 fn parse_error() {
1529 let err = "1".parse::<Interval<Interbase>>().unwrap_err();
1530 assert_eq!(
1531 err,
1532 Error::Parse(ParseError::Format {
1533 value: String::from("1")
1534 })
1535 );
1536
1537 let err = "1-1000".parse::<Interval<Interbase>>().unwrap_err();
1538 assert_eq!(
1539 err,
1540 Error::Parse(ParseError::Format {
1541 value: String::from("1-1000")
1542 })
1543 );
1544
1545 let err = "seq0:".parse::<Interval<Interbase>>().unwrap_err();
1546 assert_eq!(
1547 err,
1548 Error::Parse(ParseError::Format {
1549 value: String::from("seq0:")
1550 })
1551 );
1552
1553 let err = "seq0:0-".parse::<Interval<Interbase>>().unwrap_err();
1554 assert_eq!(
1555 err,
1556 Error::Parse(ParseError::Format {
1557 value: String::from("seq0:0-")
1558 })
1559 );
1560
1561 let err = "seq0:0-10000:".parse::<Interval<Interbase>>().unwrap_err();
1562 assert_eq!(
1563 err,
1564 Error::Strand(StrandError::Parse(StrandParseError::Invalid {
1565 value: String::from("0-10000")
1566 }))
1567 );
1568
1569 let err = "seq0:+".parse::<Interval<Interbase>>().unwrap_err();
1570 assert_eq!(
1571 err,
1572 Error::Parse(ParseError::Format {
1573 value: String::from("seq0:+")
1574 })
1575 );
1576
1577 let err = "seq0:+:0".parse::<Interval<Interbase>>().unwrap_err();
1578 assert_eq!(
1579 err,
1580 Error::Parse(ParseError::Format {
1581 value: String::from("seq0:+:0")
1582 })
1583 );
1584
1585 let err = "seq0:+:0-".parse::<Interval<Interbase>>().unwrap_err();
1586 assert!(matches!(
1587 err,
1588 Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1589 ));
1590
1591 let err = "seq0:+:0-$".parse::<Interval<Interbase>>().unwrap_err();
1592 assert!(matches!(
1593 err,
1594 Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1595 ));
1596 }
1597
1598 #[test]
1599 fn to_string() {
1600 // Positive-stranded interval
1601 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1602 let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1603 let interval = Interval::try_new(start, end).unwrap();
1604
1605 assert_eq!(interval.to_string(), "seq0:+:0-10");
1606
1607 // Negative-stranded interval
1608 let start = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1609 let end = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1610 let interval = Interval::try_new(start, end).unwrap();
1611
1612 assert_eq!(interval.to_string(), "seq0:-:10-0");
1613 }
1614}