omics_coordinate/interval.rs
1//! Intervals.
2
3use std::cmp::max;
4use std::cmp::min;
5
6use thiserror::Error;
7
8use crate::Contig;
9use crate::Position;
10use crate::Strand;
11use crate::System;
12use crate::coordinate;
13use crate::coordinate::Coordinate;
14use crate::position;
15use crate::position::Number;
16use crate::strand;
17use crate::system::Base;
18
19pub mod base;
20pub mod interbase;
21
22////////////////////////////////////////////////////////////////////////////////////////
23// Errors
24////////////////////////////////////////////////////////////////////////////////////////
25
26/// An error that occurs during clamping.
27#[derive(Error, Debug, PartialEq, Eq)]
28pub enum ClampError {
29 /// A mismatched contig error.
30 ///
31 /// This error occurs when one attempts to clamp an interval with another
32 /// interval that is not located on the same contig.
33 #[error("mismatched contigs: `{original}` and `{operand}`")]
34 MismatchedContigs {
35 /// The contig of the interval being clamped.
36 original: Contig,
37
38 /// The contig of the interval doing the clamping.
39 operand: Contig,
40 },
41
42 /// A mismatched strand error.
43 ///
44 /// This error occurs when one attempts to clamp an interval with another
45 /// interval that is not located on the same strand.
46 #[error("mismatched strand: `{original}` and `{operand}`")]
47 MismatchedStrand {
48 /// The strand of the interval being clamped.
49 original: Strand,
50
51 /// The strand of the interval doing the clamping.
52 operand: Strand,
53 },
54}
55
56/// A [`Result`](std::result::Result) with a [`ClampError`].
57pub type ClampResult<T> = std::result::Result<T, ClampError>;
58
59/// An error related to the creation of a nonsensical interval.
60#[derive(Error, Debug, PartialEq, Eq)]
61pub enum NonsensicalError {
62 /// A mismatched contig error.
63 ///
64 /// This error occurs when one attempts to clamp an interval with another
65 /// interval that is not located on the same contig.
66 #[error("mismatched contigs for coordinates: `{start}` and `{end}`")]
67 MismatchedContigs {
68 /// The contig of the interval being clamped.
69 start: Contig,
70
71 /// The contig of the interval doing the clamping.
72 end: Contig,
73 },
74
75 /// A mismatched strand error.
76 ///
77 /// This error occurs when one attempts to clamp an interval with another
78 /// interval that is not located on the same strand.
79 #[error("mismatched strands for coordinates: `{start}` and `{end}`")]
80 MismatchedStrands {
81 /// The strand of the interval being clamped.
82 start: Strand,
83
84 /// The strand of the interval doing the clamping.
85 end: Strand,
86 },
87
88 /// A negative sized interval.
89 ///
90 /// This error occurs when the start of the interval comes _after_ the end
91 /// of the interval.
92 ///
93 /// On positive stranded intervals, this is when the start position is
94 /// _greater than_ the end position. On negative stranded intervals, this is
95 /// when the start position is _less than_ the end position.
96 #[error("negatively sized interval: start is `{start}`, end is `{end}`, strand is `{strand}`")]
97 NegativelySized {
98 /// The start position.
99 start: Number,
100 /// The end position.
101 end: Number,
102 /// The strand.
103 strand: Strand,
104 },
105}
106
107/// A [`Result`](std::result::Result) with a [`NonsensicalError`].
108pub type NonsensicalResult<T> = std::result::Result<T, NonsensicalError>;
109
110/// An error related to parsing an interval.
111#[derive(Error, Debug, PartialEq, Eq)]
112pub enum ParseError {
113 /// An invalid format was encountered.
114 #[error("invalid format: {value}")]
115 Format {
116 /// The value that was passed.
117 value: String,
118 },
119}
120
121/// A [`Result`](std::result::Result) with a [`ParseError`].
122pub type ParseResult<T> = std::result::Result<T, ParseError>;
123
124/// An error related to an interval.
125#[derive(Error, Debug, PartialEq, Eq)]
126pub enum Error {
127 /// A clamping error.
128 #[error("clamp error: {0}")]
129 Clamp(#[from] ClampError),
130
131 /// A coordinate error.
132 #[error("coordinate error: {0}")]
133 Coordinate(#[from] coordinate::Error),
134
135 /// A nonsensical interval.
136 #[error("nonsensical interval: {0}")]
137 Nonsensical(#[from] NonsensicalError),
138
139 /// One or more of the coordinates were out of bounds.
140 #[error("one or more of the coordinates were out of bounds")]
141 OutOfBounds,
142
143 /// A parse error.
144 #[error("parse error: {0}")]
145 Parse(#[from] ParseError),
146
147 /// A position error.
148 #[error("position error: {0}")]
149 Position(#[from] position::Error),
150
151 /// A strand error.
152 #[error("strand error: {0}")]
153 Strand(#[from] strand::Error),
154}
155
156/// A [`Result`](std::result::Result) with an [`Error`](enum@Error).
157pub type Result<T> = std::result::Result<T, Error>;
158
159////////////////////////////////////////////////////////////////////////////////////////
160// The `Coordinate` trait
161////////////////////////////////////////////////////////////////////////////////////////
162
163/// Traits related to a coordinate.
164pub mod r#trait {
165 use super::*;
166 use crate::system::Base;
167
168 /// Requirements to be an interval.
169 #[allow(clippy::len_without_is_empty)]
170 pub trait Interval<S: System> {
171 /// Returns whether or not the entity at the in-base coordinate is
172 /// contained within this interval.
173 fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool;
174
175 /// Gets the number of member contained within the interval.
176 fn count_entities(&self) -> Number;
177 }
178}
179
180/// An interval.
181#[derive(Clone, Debug, PartialEq, Eq)]
182pub struct Interval<S: System> {
183 /// The start coordinate.
184 start: Coordinate<S>,
185
186 /// The end coordinate.
187 end: Coordinate<S>,
188}
189
190impl<S: System> Interval<S>
191where
192 Interval<S>: r#trait::Interval<S>,
193 Position<S>: position::r#trait::Position<S>,
194{
195 /// Creates a new interval if the following invariants are upheld.
196 ///
197 /// * The contigs of the two coordinates must match.
198 /// * If this does not hold, a [`NonsensicalError::MismatchedContigs`]
199 /// will be returned.
200 /// * The strands of the two coordinates must match.
201 /// * If this does not hold, a [`NonsensicalError::MismatchedStrands`]
202 /// will be returned.
203 /// * The start must come _before or be equal to_ the end in that (a) on
204 /// positive strand, `start <= end`, or, (b) on the negative strand, `end
205 /// <= start`. This ensures that the interval is always oriented from
206 /// start to end of the molecule.
207 /// * If this does not hold, a [`NonsensicalError::NegativelySized`] will
208 /// be returned.
209 ///
210 /// # Examples
211 ///
212 /// ```
213 /// use omics_coordinate::Coordinate;
214 /// use omics_coordinate::Interval;
215 /// use omics_coordinate::system::Base;
216 /// use omics_coordinate::system::Interbase;
217 ///
218 /// //===========//
219 /// // Interbase //
220 /// //===========//
221 ///
222 /// // Positive strand.
223 ///
224 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
225 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
226 /// let interval = Interval::try_new(start, end)?;
227 ///
228 /// // Negative strand.
229 ///
230 /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
231 /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
232 /// let interval = Interval::try_new(start, end)?;
233 ///
234 /// //======//
235 /// // Base //
236 /// //======//
237 ///
238 /// // Positive strand.
239 ///
240 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
241 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
242 /// let interval = Interval::try_new(start, end)?;
243 ///
244 /// // Negative strand.
245 ///
246 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
247 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
248 /// let interval = Interval::try_new(start, end)?;
249 ///
250 /// # Ok::<(), Box<dyn std::error::Error>>(())
251 /// ```
252 pub fn try_new(start: Coordinate<S>, end: Coordinate<S>) -> Result<super::Interval<S>> {
253 if start.contig() != end.contig() {
254 return Err(Error::Nonsensical(NonsensicalError::MismatchedContigs {
255 start: start.contig().clone(),
256 end: end.contig().clone(),
257 }));
258 }
259
260 if start.strand() != end.strand() {
261 return Err(Error::Nonsensical(NonsensicalError::MismatchedStrands {
262 start: start.strand(),
263 end: end.strand(),
264 }));
265 }
266
267 match start.strand() {
268 Strand::Positive => {
269 if start.position() > end.position() {
270 return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
271 start: start.position().get(),
272 end: end.position().get(),
273 strand: start.strand(),
274 }));
275 }
276 }
277 Strand::Negative => {
278 if end.position() > start.position() {
279 return Err(Error::Nonsensical(NonsensicalError::NegativelySized {
280 start: start.position().get(),
281 end: end.position().get(),
282 strand: start.strand(),
283 }));
284 }
285 }
286 }
287
288 Ok(Interval { start, end })
289 }
290
291 /// Gets a reference to the start coordinate.
292 ///
293 /// # Examples
294 ///
295 /// ```
296 /// use omics_coordinate::Coordinate;
297 /// use omics_coordinate::Interval;
298 /// use omics_coordinate::system::Base;
299 /// use omics_coordinate::system::Interbase;
300 ///
301 /// //===========//
302 /// // Interbase //
303 /// //===========//
304 ///
305 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
306 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
307 /// let interval = Interval::try_new(start.clone(), end)?;
308 ///
309 /// assert_eq!(interval.start(), &start);
310 ///
311 /// //======//
312 /// // Base //
313 /// //======//
314 ///
315 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
316 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
317 /// let interval = Interval::try_new(start.clone(), end)?;
318 ///
319 /// assert_eq!(interval.start(), &start);
320 ///
321 /// # Ok::<(), Box<dyn std::error::Error>>(())
322 /// ```
323 pub fn start(&self) -> &Coordinate<S> {
324 &self.start
325 }
326
327 /// Consumes `self` and returns the start coordinate.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use omics_coordinate::Coordinate;
333 /// use omics_coordinate::Interval;
334 /// use omics_coordinate::system::Base;
335 /// use omics_coordinate::system::Interbase;
336 ///
337 /// //===========//
338 /// // Interbase //
339 /// //===========//
340 ///
341 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
342 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
343 /// let interval = Interval::try_new(start.clone(), end)?;
344 ///
345 /// assert_eq!(interval.into_start(), start);
346 ///
347 /// //======//
348 /// // Base //
349 /// //======//
350 ///
351 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
352 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
353 /// let interval = Interval::try_new(start.clone(), end)?;
354 ///
355 /// assert_eq!(interval.into_start(), start);
356 ///
357 /// # Ok::<(), Box<dyn std::error::Error>>(())
358 /// ```
359 pub fn into_start(self) -> Coordinate<S> {
360 self.start
361 }
362
363 /// Gets a reference to the end coordinate.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// use omics_coordinate::Coordinate;
369 /// use omics_coordinate::Interval;
370 /// use omics_coordinate::system::Base;
371 /// use omics_coordinate::system::Interbase;
372 ///
373 /// //===========//
374 /// // Interbase //
375 /// //===========//
376 ///
377 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
378 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
379 /// let interval = Interval::try_new(start, end.clone())?;
380 ///
381 /// assert_eq!(interval.end(), &end);
382 ///
383 /// //======//
384 /// // Base //
385 /// //======//
386 ///
387 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
388 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
389 /// let interval = Interval::try_new(start, end.clone())?;
390 ///
391 /// assert_eq!(interval.end(), &end);
392 ///
393 /// # Ok::<(), Box<dyn std::error::Error>>(())
394 /// ```
395 pub fn end(&self) -> &Coordinate<S> {
396 &self.end
397 }
398
399 /// Consumes `self` and returns the end coordinate.
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use omics_coordinate::Coordinate;
405 /// use omics_coordinate::Interval;
406 /// use omics_coordinate::system::Base;
407 /// use omics_coordinate::system::Interbase;
408 ///
409 /// //===========//
410 /// // Interbase //
411 /// //===========//
412 ///
413 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
414 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
415 /// let interval = Interval::try_new(start, end.clone())?;
416 ///
417 /// assert_eq!(interval.into_end(), end);
418 ///
419 /// //======//
420 /// // Base //
421 /// //======//
422 ///
423 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
424 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
425 /// let interval = Interval::try_new(start, end.clone())?;
426 ///
427 /// assert_eq!(interval.into_end(), end);
428 ///
429 /// # Ok::<(), Box<dyn std::error::Error>>(())
430 /// ```
431 pub fn into_end(self) -> Coordinate<S> {
432 self.end
433 }
434
435 /// Consumes `self` and returns the start and end coordinates.
436 ///
437 /// # Examples
438 ///
439 /// ```
440 /// use omics_coordinate::Coordinate;
441 /// use omics_coordinate::Interval;
442 /// use omics_coordinate::system::Base;
443 /// use omics_coordinate::system::Interbase;
444 ///
445 /// //===========//
446 /// // Interbase //
447 /// //===========//
448 ///
449 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
450 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
451 /// let interval = Interval::try_new(start.clone(), end.clone())?;
452 /// let parts = interval.into_coordinates();
453 ///
454 /// assert_eq!(parts.0, start);
455 /// assert_eq!(parts.1, end);
456 ///
457 /// //======//
458 /// // Base //
459 /// //======//
460 ///
461 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
462 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
463 /// let interval = Interval::try_new(start.clone(), end.clone())?;
464 /// let parts = interval.into_coordinates();
465 ///
466 /// assert_eq!(parts.0, start);
467 /// assert_eq!(parts.1, end);
468 ///
469 /// # Ok::<(), Box<dyn std::error::Error>>(())
470 /// ```
471 pub fn into_coordinates(self) -> (Coordinate<S>, Coordinate<S>) {
472 (self.start, self.end)
473 }
474
475 /// Returns a reference to the contig.
476 ///
477 /// # Examples
478 ///
479 /// ```
480 /// use omics_coordinate::Coordinate;
481 /// use omics_coordinate::Interval;
482 /// use omics_coordinate::system::Base;
483 /// use omics_coordinate::system::Interbase;
484 ///
485 /// //===========//
486 /// // Interbase //
487 /// //===========//
488 ///
489 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
490 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
491 /// let interval = Interval::try_new(start, end)?;
492 ///
493 /// assert_eq!(interval.contig().as_str(), "seq0");
494 ///
495 /// //======//
496 /// // Base //
497 /// //======//
498 ///
499 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
500 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
501 /// let interval = Interval::try_new(start, end)?;
502 ///
503 /// assert_eq!(interval.contig().as_str(), "seq0");
504 ///
505 /// # Ok::<(), Box<dyn std::error::Error>>(())
506 /// ```
507 pub fn contig(&self) -> &Contig {
508 self.start().contig()
509 }
510
511 /// Returns the strand.
512 ///
513 /// # Examples
514 ///
515 /// ```
516 /// use omics_coordinate::Coordinate;
517 /// use omics_coordinate::Interval;
518 /// use omics_coordinate::Strand;
519 /// use omics_coordinate::system::Base;
520 /// use omics_coordinate::system::Interbase;
521 ///
522 /// //===========//
523 /// // Interbase //
524 /// //===========//
525 ///
526 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
527 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
528 /// let interval = Interval::try_new(start, end)?;
529 ///
530 /// assert_eq!(interval.strand(), Strand::Positive);
531 ///
532 /// //======//
533 /// // Base //
534 /// //======//
535 ///
536 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
537 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
538 /// let interval = Interval::try_new(start, end)?;
539 ///
540 /// assert_eq!(interval.strand(), Strand::Negative);
541 ///
542 /// # Ok::<(), Box<dyn std::error::Error>>(())
543 /// ```
544 pub fn strand(&self) -> Strand {
545 self.start().strand()
546 }
547
548 /// Returns whether or not a coordinate is contained within this interval.
549 /// Notably, when checked whether coordinates are included in the interval,
550 /// both the start and end positions are considered inclusive.
551 ///
552 /// # Caution
553 ///
554 /// **This is not the method you want to use when checking if a nucleotide
555 /// or amino acid at a particular position is included in the interval. This
556 /// checks the coordinates themselves and, in-so-doing, considers both the
557 /// start and the end positions of the interval to be inclusive.
558 ///
559 /// This method checks containment using coordinates in the interval's
560 /// native coordinate system (the generic type `S`). If you'd like to
561 /// check whether a particular nucleotide, amino acid, or other entity
562 /// is contained within the interval (using in-base coordinates), use
563 /// the [`contains_entity()`](Interval::contains_entity) method.
564 ///
565 /// # Examples
566 ///
567 /// ```
568 /// use omics_coordinate::Coordinate;
569 /// use omics_coordinate::Interval;
570 /// use omics_coordinate::Strand;
571 /// use omics_coordinate::system::Base;
572 /// use omics_coordinate::system::Interbase;
573 ///
574 /// //===========//
575 /// // Interbase //
576 /// //===========//
577 ///
578 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
579 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
580 /// let interval = Interval::try_new(start, end)?;
581 ///
582 /// // Coordinates on the same contig, strand, and within the interval's range
583 /// // are contained within the interval.
584 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
585 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
586 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
587 ///
588 /// // Coordinates on different contigs, strands, or outside the range are
589 /// // not contained within the interval.
590 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
591 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
592 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
593 ///
594 /// //======//
595 /// // Base //
596 /// //======//
597 ///
598 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
599 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
600 /// let interval = Interval::try_new(start, end)?;
601 ///
602 /// // Coordinates on the same contig, strand, and within the interval's range
603 /// // are contained within the interval.
604 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
605 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
606 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
607 ///
608 /// // Coordinates on different contigs, strands, or outside the range are
609 /// // not contained within the interval.
610 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
611 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
612 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
613 ///
614 /// # Ok::<(), Box<dyn std::error::Error>>(())
615 /// ```
616 pub fn contains_coordinate(&self, coordinate: &crate::Coordinate<S>) -> bool {
617 if self.contig() != coordinate.contig() {
618 return false;
619 }
620
621 if self.strand() != coordinate.strand() {
622 return false;
623 }
624
625 match self.strand() {
626 Strand::Positive => {
627 self.start().position().get() <= coordinate.position().get()
628 && self.end().position().get() >= coordinate.position().get()
629 }
630 Strand::Negative => {
631 self.start().position().get() >= coordinate.position().get()
632 && self.end().position().get() <= coordinate.position().get()
633 }
634 }
635 }
636
637 /// Returns whether or not the entity at the in-base coordinate is
638 /// contained within this interval.
639 ///
640 /// This method always works with in-base coordinates (which directly point
641 /// to entities like nucleotides or amino acids), regardless of the
642 /// interval's coordinate system. Use
643 /// [`contains_coordinate()`](Self::contains_coordinate) if you need to
644 /// check containment using the interval's native coordinate system.
645 ///
646 /// # Examples
647 ///
648 /// ```
649 /// use omics_coordinate::Coordinate;
650 /// use omics_coordinate::Interval;
651 /// use omics_coordinate::Strand;
652 /// use omics_coordinate::system::Base;
653 /// use omics_coordinate::system::Interbase;
654 ///
655 /// //===========//
656 /// // Interbase //
657 /// //===========//
658 ///
659 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 0)?;
660 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
661 /// let interval = Interval::try_new(start, end)?;
662 ///
663 /// // Coordinates on the same contig, strand, and within the interval's range
664 /// // are contained within the interval.
665 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 0)?));
666 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
667 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
668 ///
669 /// // Coordinates on different contigs, strands, or outside the range are
670 /// // not contained within the interval.
671 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
672 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
673 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
674 ///
675 /// //======//
676 /// // Base //
677 /// //======//
678 ///
679 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 1)?;
680 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
681 /// let interval = Interval::try_new(start, end)?;
682 ///
683 /// // Coordinates on the same contig, strand, and within the interval's range
684 /// // are contained within the interval.
685 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 1)?));
686 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 5)?));
687 /// assert!(interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 10)?));
688 ///
689 /// // Coordinates on different contigs, strands, or outside the range are
690 /// // not contained within the interval.
691 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq1", "+", 5)?));
692 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "-", 5)?));
693 /// assert!(!interval.contains_coordinate(&Coordinate::try_new("seq0", "+", 11)?));
694 ///
695 /// # Ok::<(), Box<dyn std::error::Error>>(())
696 /// ```
697 pub fn contains_entity(&self, coordinate: &Coordinate<Base>) -> bool {
698 <Self as r#trait::Interval<S>>::contains_entity(self, coordinate)
699 }
700
701 /// Counts the number of entities in the interval.
702 ///
703 /// # Examples
704 ///
705 /// ```
706 /// use omics_coordinate::Coordinate;
707 /// use omics_coordinate::Interval;
708 /// use omics_coordinate::system::Base;
709 /// use omics_coordinate::system::Interbase;
710 ///
711 /// //===========//
712 /// // Interbase //
713 /// //===========//
714 ///
715 /// // Positive strand.
716 ///
717 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
718 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
719 /// let interval = Interval::try_new(start, end)?;
720 ///
721 /// assert_eq!(interval.count_entities(), 10);
722 ///
723 /// // Negative strand.
724 ///
725 /// let start = Coordinate::<Interbase>::try_new("seq0", "-", 20)?;
726 /// let end = Coordinate::<Interbase>::try_new("seq0", "-", 10)?;
727 /// let interval = Interval::try_new(start, end)?;
728 ///
729 /// assert_eq!(interval.count_entities(), 10);
730 ///
731 /// //======//
732 /// // Base //
733 /// //======//
734 ///
735 /// // Positive strand.
736 ///
737 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
738 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
739 /// let interval = Interval::try_new(start, end)?;
740 ///
741 /// assert_eq!(interval.count_entities(), 11);
742 ///
743 /// // Negative strand.
744 ///
745 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
746 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
747 /// let interval = Interval::try_new(start, end)?;
748 ///
749 /// assert_eq!(interval.count_entities(), 11);
750 ///
751 /// # Ok::<(), Box<dyn std::error::Error>>(())
752 /// ```
753 pub fn count_entities(&self) -> Number {
754 <Self as r#trait::Interval<S>>::count_entities(self)
755 }
756
757 /// Consumes `self` and clamps an interval by another interval.
758 ///
759 /// Clamping is an operation whereby the ends of an interval are restricted
760 /// to the range of the argument passed in with a tendency to restrict
761 /// towards the middle of the interval.
762 ///
763 /// # Summary
764 ///
765 /// * If the interval being operated on is completely contained within the
766 /// argument interval, the interval being operated on is returned.
767 ///
768 /// ```text
769 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
770 /// 10 11 12 13 14 15 16 17 18 19 20 |
771 /// ●───────────────────────● [13, 17] | Original Interval
772 /// ●───────────────────────────────────────────────● [11, 19] | Argument Interval
773 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
774 /// ●───────────────────────● [13, 17] | Resulting Interval
775 ///
776 ///
777 /// Here, no modifications were made to the original interval, as neither
778 /// the start nor the end of the interval would be restricted by the
779 /// argument interval.
780 /// ```
781 ///
782 /// * If the argument interval is completely within the interval being
783 /// operated on, the argument interval will clamp both sides of the
784 /// original interval, and the argument interval will be returned.
785 /// ```text
786 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
787 /// 10 11 12 13 14 15 16 17 18 19 20 |
788 /// ●───────────────────────────────────────────────● [11, 19] | Original Interval
789 /// ●───────────────────────● [13, 17] | Argument Interval
790 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
791 /// ●───────────────────────● [13, 17] | Resulting Interval
792 ///
793 ///
794 /// Here, both the start and the end position of the original interval were
795 /// restricted by the start and end of the argument interval respectively.
796 /// ```
797 ///
798 /// * If the argument interval would restrict the length of one side of the
799 /// subject interval on either end, that end is restricted to the argument
800 /// interval's value, whereas the non-restricted end is the original
801 /// interval's value.
802 ///
803 /// ```text
804 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
805 /// 10 11 12 13 14 15 16 17 18 19 20 |
806 /// ●───────────────────────────────────● [11, 17] | Original Interval
807 /// ●───────────────────────● [13, 17] | Argument Interval
808 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
809 /// ●───────────────────────● [13, 17] | Resulting Interval
810 ///
811 ///
812 /// Here, the start of the original interval is clamped by the argument
813 /// interval's start position. However, the end position of the original
814 /// interval is not restricted by the argument interval's end position,
815 /// so it remains the same. This results in the latter half of the interval
816 /// being clamped.
817 ///
818 ///
819 /// ╔═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════╦═════ →
820 /// 10 11 12 13 14 15 16 17 18 19 20 |
821 /// ●───────────────────────────────────● [13, 19] | Original Interval
822 /// ●───────────────────────● [13, 17] | Argument Interval
823 /// ┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄
824 /// ●───────────────────────● [13, 17] | Resulting Interval
825 ///
826 ///
827 /// Here, the start position of the original interval would not be
828 /// restricted by the argument interval's start position, so it remains
829 /// the same. However, the end position is clamped by the end position
830 /// of the argument interval, so the resulting end position is that of the
831 /// argument interval's end position. This results in the first half of
832 /// interval being clamped.
833 /// ```
834 ///
835 /// # Examples
836 ///
837 /// ```
838 /// use omics_coordinate::Coordinate;
839 /// use omics_coordinate::Interval;
840 /// use omics_coordinate::system::Base;
841 /// use omics_coordinate::system::Interbase;
842 ///
843 /// //===========//
844 /// // Interbase //
845 /// //===========//
846 ///
847 /// let interval = "seq0:+:10-20".parse::<Interval<Interbase>>()?;
848 /// let clamped = interval.clamp("seq0:+:5-15".parse::<Interval<Interbase>>()?)?;
849 /// assert_eq!(clamped, "seq0:+:10-15".parse::<Interval<Interbase>>()?);
850 ///
851 /// //======//
852 /// // Base //
853 /// //======//
854 ///
855 /// let interval = "seq0:-:20-10".parse::<Interval<Base>>()?;
856 /// let clamped = interval.clamp("seq0:-:25-15".parse::<Interval<Base>>()?)?;
857 /// assert_eq!(clamped, "seq0:-:20-15".parse::<Interval<Base>>()?);
858 ///
859 /// Ok::<(), Box<dyn std::error::Error>>(())
860 /// ```
861 #[must_use = "this method returns a new interval"]
862 pub fn clamp(self, interval: Interval<S>) -> Result<Interval<S>> {
863 let (start, end) = self.into_coordinates();
864 let (operand_start, operand_end) = interval.into_coordinates();
865
866 let (start_contig, start_strand, start) = start.into_parts();
867 let (end_contig, end_strand, end) = end.into_parts();
868
869 let (operand_contig, operand_strand, operand_start) = operand_start.into_parts();
870 let (_, _, operand_end) = operand_end.into_parts();
871
872 if start_contig != operand_contig {
873 return Err(Error::Clamp(ClampError::MismatchedContigs {
874 original: start_contig,
875 operand: operand_contig,
876 }));
877 }
878
879 if start_strand != operand_strand {
880 return Err(Error::Clamp(ClampError::MismatchedStrand {
881 original: start_strand,
882 operand: operand_strand,
883 }));
884 }
885
886 let (new_start, new_end) = match start_strand {
887 Strand::Positive => (max(start, operand_start), min(end, operand_end)),
888 Strand::Negative => (min(start, operand_start), max(end, operand_end)),
889 };
890
891 let start = Coordinate::<S>::new(start_contig, start_strand, new_start);
892 let end = Coordinate::<S>::new(end_contig, end_strand, new_end);
893
894 // SAFETY: both the start _and_ the end positions were originally on
895 // intervals that were valid. Since we are not breaking any rules that
896 // would make the intervals invalid in this method, this should always
897 // unwrap.
898 Ok(Self::try_new(start, end).unwrap())
899 }
900
901 /// Gets the offset of a coordinate from the start of the interval.
902 ///
903 /// If the coordinate is not contained within the interval, `None` is
904 /// returned.
905 ///
906 /// # Examples
907 ///
908 /// ```
909 /// use omics_coordinate::Coordinate;
910 /// use omics_coordinate::Interval;
911 /// use omics_coordinate::system::Base;
912 /// use omics_coordinate::system::Interbase;
913 ///
914 /// //===========//
915 /// // Interbase //
916 /// //===========//
917 ///
918 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
919 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
920 /// let interval = Interval::try_new(start, end)?;
921 ///
922 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 15)?;
923 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
924 ///
925 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
926 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
927 ///
928 /// let query = Coordinate::<Interbase>::try_new("seq0", "+", 21)?;
929 /// assert!(interval.coordinate_offset(&query).is_none());
930 ///
931 /// //======//
932 /// // Base //
933 /// //======//
934 ///
935 /// let start = Coordinate::<Base>::try_new("seq0", "-", 20)?;
936 /// let end = Coordinate::<Base>::try_new("seq0", "-", 10)?;
937 /// let interval = Interval::try_new(start, end)?;
938 ///
939 /// let query = Coordinate::<Base>::try_new("seq0", "-", 15)?;
940 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 5);
941 ///
942 /// let query = Coordinate::<Base>::try_new("seq0", "-", 10)?;
943 /// assert_eq!(interval.coordinate_offset(&query).unwrap(), 10);
944 ///
945 /// let query = Coordinate::<Base>::try_new("seq0", "-", 9)?;
946 /// assert!(interval.coordinate_offset(&query).is_none());
947 ///
948 /// Ok::<(), Box<dyn std::error::Error>>(())
949 /// ```
950 pub fn coordinate_offset(&self, coordinate: &Coordinate<S>) -> Option<Number> {
951 if !self.contains_coordinate(coordinate) {
952 return None;
953 }
954
955 Some(
956 coordinate
957 .position()
958 .distance_unchecked(self.start().position()),
959 )
960 }
961
962 /// Returns the coordinate at the offset within the interval.
963 ///
964 /// This method only returns the coordinate if the coordinate falls within
965 /// the interval.
966 ///
967 /// # Examples
968 ///
969 /// ```
970 /// use omics_coordinate::Coordinate;
971 /// use omics_coordinate::Interval;
972 /// use omics_coordinate::system::Base;
973 /// use omics_coordinate::system::Interbase;
974 ///
975 /// //===========//
976 /// // Interbase //
977 /// //===========//
978 ///
979 /// // Positive strand.
980 ///
981 /// let interval = "seq0:+:0-1000".parse::<Interval<Interbase>>()?;
982 ///
983 /// let expected = "seq0:+:5".parse::<Coordinate<Interbase>>()?;
984 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
985 ///
986 /// let expected = "seq0:+:1000".parse::<Coordinate<Interbase>>()?;
987 /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
988 ///
989 /// assert!(interval.coordinate_at_offset(1001).is_none());
990 ///
991 /// // Negative strand.
992 ///
993 /// let interval = "seq0:-:1000-0".parse::<Interval<Interbase>>()?;
994 ///
995 /// let expected = "seq0:-:995".parse::<Coordinate<Interbase>>()?;
996 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
997 ///
998 /// let expected = "seq0:-:0".parse::<Coordinate<Interbase>>()?;
999 /// assert_eq!(interval.coordinate_at_offset(1000).unwrap(), expected);
1000 ///
1001 /// assert_eq!(interval.coordinate_at_offset(1001), None);
1002 ///
1003 /// //======//
1004 /// // Base //
1005 /// //======//
1006 ///
1007 /// // Positive strand.
1008 ///
1009 /// let interval = "seq0:+:1-1000".parse::<Interval<Base>>()?;
1010 ///
1011 /// let expected = "seq0:+:6".parse::<Coordinate<Base>>()?;
1012 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1013 ///
1014 /// let expected = "seq0:+:1000".parse::<Coordinate<Base>>()?;
1015 /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1016 ///
1017 /// assert!(interval.coordinate_at_offset(1000).is_none());
1018 ///
1019 /// // Negative strand.
1020 ///
1021 /// let interval = "seq0:-:1000-1".parse::<Interval<Base>>()?;
1022 ///
1023 /// let expected = "seq0:-:995".parse::<Coordinate<Base>>()?;
1024 /// assert_eq!(interval.coordinate_at_offset(5).unwrap(), expected);
1025 ///
1026 /// let expected = "seq0:-:1".parse::<Coordinate<Base>>()?;
1027 /// assert_eq!(interval.coordinate_at_offset(999).unwrap(), expected);
1028 ///
1029 /// assert_eq!(interval.coordinate_at_offset(1000), None);
1030 ///
1031 /// Ok::<(), Box<dyn std::error::Error>>(())
1032 /// ```
1033 pub fn coordinate_at_offset(&self, offset: Number) -> Option<Coordinate<S>> {
1034 let coordinate = self.start().clone().into_move_forward(offset)?;
1035
1036 match self.contains_coordinate(&coordinate) {
1037 true => Some(coordinate),
1038 false => None,
1039 }
1040 }
1041
1042 /// Reverse complements the interval, meaning that:
1043 ///
1044 /// * the start and end positions are swapped, and
1045 /// * the strand is swapped.
1046 ///
1047 /// # Examples
1048 ///
1049 /// ```
1050 /// use omics_coordinate::Coordinate;
1051 /// use omics_coordinate::Interval;
1052 /// use omics_coordinate::system::Base;
1053 /// use omics_coordinate::system::Interbase;
1054 ///
1055 /// //===========//
1056 /// // Interbase //
1057 /// //===========//
1058 ///
1059 /// let start = Coordinate::<Interbase>::try_new("seq0", "+", 10)?;
1060 /// let end = Coordinate::<Interbase>::try_new("seq0", "+", 20)?;
1061 /// let original = Interval::try_new(start, end)?;
1062 ///
1063 /// let complemented = original.clone().reverse_complement();
1064 /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Interbase>>()?);
1065 ///
1066 /// let recomplemented = complemented.reverse_complement();
1067 /// assert_eq!(recomplemented, original);
1068 ///
1069 /// //======//
1070 /// // Base //
1071 /// //======//
1072 ///
1073 /// let start = Coordinate::<Base>::try_new("seq0", "+", 10)?;
1074 /// let end = Coordinate::<Base>::try_new("seq0", "+", 20)?;
1075 /// let original = Interval::try_new(start, end)?;
1076 ///
1077 /// let complemented = original.clone().reverse_complement();
1078 /// assert_eq!(complemented, "seq0:-:20-10".parse::<Interval<Base>>()?);
1079 ///
1080 /// let recomplemented = complemented.reverse_complement();
1081 /// assert_eq!(recomplemented, original);
1082 ///
1083 /// Ok::<(), Box<dyn std::error::Error>>(())
1084 /// ```
1085 #[must_use = "this method returns a new interval"]
1086 pub fn reverse_complement(self) -> super::Interval<S> {
1087 let (start, end) = self.into_coordinates();
1088 // SAFETY: because (a) intervals are inclusive of both of their start
1089 // and end coordinates, (b) all positions can be represented on the
1090 // opposite strand, and (c) swapping the start and end while also
1091 // swapping strand will always create the correct directionality, this will
1092 // always unwrap.
1093 Interval::try_new(end.swap_strand(), start.swap_strand()).unwrap()
1094 }
1095}
1096
1097////////////////////////////////////////////////////////////////////////////////////////
1098// Trait implementations
1099////////////////////////////////////////////////////////////////////////////////////////
1100
1101impl<S: System> std::fmt::Display for Interval<S>
1102where
1103 Interval<S>: r#trait::Interval<S>,
1104 Position<S>: position::r#trait::Position<S>,
1105{
1106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1107 write!(
1108 f,
1109 "{}:{}:{}-{}",
1110 self.contig(),
1111 self.strand(),
1112 self.start().position(),
1113 self.end().position(),
1114 )
1115 }
1116}
1117
1118impl<S: System> std::str::FromStr for Interval<S>
1119where
1120 Interval<S>: r#trait::Interval<S>,
1121 Position<S>: position::r#trait::Position<S>,
1122{
1123 type Err = Error;
1124
1125 fn from_str(s: &str) -> Result<Self> {
1126 let parts = s.split(':').collect::<Vec<_>>();
1127
1128 if parts.len() != 3 {
1129 return Err(Error::Parse(ParseError::Format {
1130 value: s.to_string(),
1131 }));
1132 }
1133
1134 let mut parts = parts.iter();
1135
1136 // SAFETY: we checked that there are three parts above. Given that we
1137 // haven't pulled anything from the iterator, we can always safely
1138 // unwrap this.
1139 let contig = parts.next().unwrap().parse::<Contig>().map_err(|_| {
1140 Error::Parse(ParseError::Format {
1141 value: s.to_string(),
1142 })
1143 })?;
1144
1145 // SAFETY: we checked that there are three parts above. Given that we
1146 // have only pulled one item from the iterator, we can always safely
1147 // unwrap this.
1148 let strand = parts
1149 .next()
1150 .unwrap()
1151 .parse::<Strand>()
1152 .map_err(Error::Strand)?;
1153
1154 // SAFETY: we checked that there are three parts above. Given that we
1155 // have only pulled two items from the iterator, we can always safely
1156 // unwrap this.
1157 let positions = parts.next().unwrap().split('-').collect::<Vec<_>>();
1158
1159 if positions.len() != 2 {
1160 return Err(Error::Parse(ParseError::Format {
1161 value: s.to_string(),
1162 }));
1163 }
1164
1165 // SAFETY: we just ensured that two parts exist, so the direct
1166 // indexing of the slice for both index zero and one will never
1167 // fail.
1168 let start = positions[0]
1169 .parse::<Position<S>>()
1170 .map_err(Error::Position)?;
1171 let end = positions[1]
1172 .parse::<Position<S>>()
1173 .map_err(Error::Position)?;
1174
1175 Interval::try_new(
1176 Coordinate::new(contig.clone(), strand, start),
1177 Coordinate::new(contig, strand, end),
1178 )
1179 }
1180}
1181
1182#[cfg(test)]
1183mod tests {
1184 use super::*;
1185 use crate::position::Error as PositionError;
1186 use crate::position::Number;
1187 use crate::position::ParseError as PositionParseError;
1188 use crate::strand::Error as StrandError;
1189 use crate::strand::ParseError as StrandParseError;
1190 use crate::system::Interbase;
1191
1192 #[test]
1193 fn valid() {
1194 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1195 let end = "seq0:+:9".parse::<Coordinate<Interbase>>().unwrap();
1196
1197 let interval = Interval::try_new(start, end).unwrap();
1198 assert_eq!(interval.count_entities(), 9);
1199 }
1200
1201 #[test]
1202 fn nonsensical_mismatched_contigs() {
1203 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1204 let end = "seq1:+:10".parse::<Coordinate<Interbase>>().unwrap();
1205
1206 let err = Interval::try_new(start, end).unwrap_err();
1207 assert_eq!(
1208 err,
1209 Error::Nonsensical(NonsensicalError::MismatchedContigs {
1210 start: Contig::new_unchecked("seq0"),
1211 end: Contig::new_unchecked("seq1")
1212 })
1213 );
1214
1215 assert_eq!(
1216 err.to_string(),
1217 "nonsensical interval: mismatched contigs for coordinates: `seq0` and `seq1`"
1218 );
1219 }
1220
1221 #[test]
1222 fn nonsensical_mismatched_strands() {
1223 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1224 let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1225
1226 let err = Interval::try_new(start, end).unwrap_err();
1227 assert_eq!(
1228 err,
1229 Error::Nonsensical(NonsensicalError::MismatchedStrands {
1230 start: Strand::Positive,
1231 end: Strand::Negative
1232 })
1233 );
1234
1235 assert_eq!(
1236 err.to_string(),
1237 "nonsensical interval: mismatched strands for coordinates: `+` and `-`"
1238 );
1239 }
1240
1241 #[test]
1242 fn nonsensical_start_greater_than_end() {
1243 //===================//
1244 // Positive stranded //
1245 //===================//
1246
1247 let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1248 let end = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1249
1250 let err = Interval::try_new(start, end).unwrap_err();
1251
1252 assert_eq!(
1253 err,
1254 Error::Nonsensical(NonsensicalError::NegativelySized {
1255 start: 10,
1256 end: 0,
1257 strand: Strand::Positive
1258 })
1259 );
1260
1261 assert_eq!(
1262 err.to_string(),
1263 "nonsensical interval: negatively sized interval: start is `10`, end is `0`, strand \
1264 is `+`"
1265 );
1266
1267 //===================//
1268 // Negative stranded //
1269 //===================//
1270
1271 let start = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1272 let end = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1273
1274 let err = Interval::try_new(start, end).unwrap_err();
1275
1276 assert_eq!(
1277 err,
1278 Error::Nonsensical(NonsensicalError::NegativelySized {
1279 start: 0,
1280 end: 10,
1281 strand: Strand::Negative
1282 })
1283 );
1284
1285 assert_eq!(
1286 err.to_string(),
1287 "nonsensical interval: negatively sized interval: start is `0`, end is `10`, strand \
1288 is `-`"
1289 );
1290 }
1291
1292 #[test]
1293 fn zero_sized() {
1294 let start = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1295 let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1296
1297 let interval = Interval::try_new(start.clone(), end.clone()).unwrap();
1298 assert!(interval.end().position().get() - interval.start().position().get() == 0);
1299 assert!(interval.contains_coordinate(&start));
1300 assert!(interval.contains_coordinate(&end));
1301 assert!(
1302 !interval.contains_coordinate(&"seq0:+:9".parse::<Coordinate<Interbase>>().unwrap())
1303 );
1304 assert!(
1305 !interval.contains_coordinate(&"seq0:+:11".parse::<Coordinate<Interbase>>().unwrap())
1306 );
1307 }
1308
1309 #[test]
1310 fn positive_strand_clamp() {
1311 let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1312
1313 assert_eq!(
1314 interval
1315 .clone()
1316 .clamp("seq1:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1317 Err(Error::Clamp(ClampError::MismatchedContigs {
1318 original: Contig::new_unchecked("seq0"),
1319 operand: Contig::new_unchecked("seq1")
1320 }))
1321 );
1322
1323 assert_eq!(
1324 interval
1325 .clone()
1326 .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1327 Err(Error::Clamp(ClampError::MismatchedStrand {
1328 original: Strand::Positive,
1329 operand: Strand::Negative
1330 }))
1331 );
1332
1333 assert_eq!(
1334 interval
1335 .clone()
1336 .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap())
1337 .unwrap(),
1338 "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap()
1339 );
1340
1341 assert_eq!(
1342 interval
1343 .clone()
1344 .clamp("seq0:+:1250-3000".parse::<Interval<Interbase>>().unwrap())
1345 .unwrap(),
1346 "seq0:+:1250-2000".parse::<Interval<Interbase>>().unwrap()
1347 );
1348
1349 assert_eq!(
1350 interval
1351 .clone()
1352 .clamp("seq0:+:0-1750".parse::<Interval<Interbase>>().unwrap())
1353 .unwrap(),
1354 "seq0:+:1000-1750".parse::<Interval<Interbase>>().unwrap()
1355 );
1356
1357 assert_eq!(
1358 interval
1359 .clone()
1360 .clamp("seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap())
1361 .unwrap(),
1362 "seq0:+:1250-1750".parse::<Interval<Interbase>>().unwrap()
1363 );
1364 }
1365
1366 #[test]
1367 fn negative_strand_clamp() {
1368 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1369
1370 assert_eq!(
1371 interval
1372 .clone()
1373 .clamp("seq1:-:3000-0".parse::<Interval<Interbase>>().unwrap()),
1374 Err(Error::Clamp(ClampError::MismatchedContigs {
1375 original: Contig::new_unchecked("seq0"),
1376 operand: Contig::new_unchecked("seq1")
1377 }))
1378 );
1379
1380 assert_eq!(
1381 interval
1382 .clone()
1383 .clamp("seq0:+:0-3000".parse::<Interval<Interbase>>().unwrap()),
1384 Err(Error::Clamp(ClampError::MismatchedStrand {
1385 original: Strand::Negative,
1386 operand: Strand::Positive
1387 }))
1388 );
1389
1390 assert_eq!(
1391 interval
1392 .clone()
1393 .clamp("seq0:-:3000-0".parse::<Interval<Interbase>>().unwrap())
1394 .unwrap(),
1395 "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap()
1396 );
1397
1398 assert_eq!(
1399 interval
1400 .clone()
1401 .clamp("seq0:-:3000-1250".parse::<Interval<Interbase>>().unwrap())
1402 .unwrap(),
1403 "seq0:-:2000-1250".parse::<Interval<Interbase>>().unwrap()
1404 );
1405
1406 assert_eq!(
1407 interval
1408 .clone()
1409 .clamp("seq0:-:1750-0".parse::<Interval<Interbase>>().unwrap())
1410 .unwrap(),
1411 "seq0:-:1750-1000".parse::<Interval<Interbase>>().unwrap()
1412 );
1413
1414 assert_eq!(
1415 interval
1416 .clone()
1417 .clamp("seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap())
1418 .unwrap(),
1419 "seq0:-:1750-1250".parse::<Interval<Interbase>>().unwrap()
1420 );
1421 }
1422
1423 #[test]
1424 fn positive_strand_offset() {
1425 let interval = "seq0:+:1000-2000".parse::<Interval<Interbase>>().unwrap();
1426
1427 // Mismatched contigs means the interval does not contain the coordinate.
1428 let coordinate = "seq1:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1429 assert!(interval.coordinate_offset(&coordinate).is_none());
1430
1431 // Mismatched strands means the interval does not contain the coordinate.
1432 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1433 assert!(interval.coordinate_offset(&coordinate).is_none());
1434
1435 // Contained within.
1436 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1437 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1438
1439 let coordinate = "seq0:+:2000".parse::<Coordinate<Interbase>>().unwrap();
1440 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1441
1442 // Just outside of range.
1443 let coordinate = "seq0:+:999".parse::<Coordinate<Interbase>>().unwrap();
1444 assert!(interval.coordinate_offset(&coordinate).is_none());
1445
1446 let coordinate = "seq0:+:2001".parse::<Coordinate<Interbase>>().unwrap();
1447 assert!(interval.coordinate_offset(&coordinate).is_none());
1448 }
1449
1450 #[test]
1451 fn negative_strand_offset() {
1452 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1453
1454 // Mismatched contigs means the interval does not contain the coordinate.
1455 let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1456 assert!(interval.coordinate_offset(&coordinate).is_none());
1457
1458 // Mismatched strands means the interval does not contain the coordinate.
1459 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1460 assert!(interval.coordinate_offset(&coordinate).is_none());
1461
1462 // Contained within.
1463 let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1464 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1465
1466 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1467 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1468
1469 // Just outside of range.
1470 let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1471 assert!(interval.coordinate_offset(&coordinate).is_none());
1472
1473 let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1474 assert!(interval.coordinate_offset(&coordinate).is_none());
1475 }
1476
1477 #[test]
1478 fn len() {
1479 assert_eq!(
1480 "seq0:+:0-1000"
1481 .parse::<Interval<Interbase>>()
1482 .unwrap()
1483 .count_entities(),
1484 1000
1485 );
1486
1487 assert_eq!(
1488 "seq0:-:1000-0"
1489 .parse::<Interval<Interbase>>()
1490 .unwrap()
1491 .count_entities(),
1492 1000
1493 );
1494 let interval = "seq0:-:2000-1000".parse::<Interval<Interbase>>().unwrap();
1495
1496 // Mismatched contigs means the interval does not contain the coordinate.
1497 let coordinate = "seq1:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1498 assert!(interval.coordinate_offset(&coordinate).is_none());
1499
1500 // Mismatched strands means the interval does not contain the coordinate.
1501 let coordinate = "seq0:+:1000".parse::<Coordinate<Interbase>>().unwrap();
1502 assert!(interval.coordinate_offset(&coordinate).is_none());
1503
1504 // Contained within.
1505 let coordinate = "seq0:-:2000".parse::<Coordinate<Interbase>>().unwrap();
1506 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 0);
1507
1508 let coordinate = "seq0:-:1000".parse::<Coordinate<Interbase>>().unwrap();
1509 assert_eq!(interval.coordinate_offset(&coordinate).unwrap(), 1000);
1510
1511 // Just outside of range.
1512 let coordinate = "seq0:-:999".parse::<Coordinate<Interbase>>().unwrap();
1513 assert!(interval.coordinate_offset(&coordinate).is_none());
1514
1515 let coordinate = "seq0:-:2001".parse::<Coordinate<Interbase>>().unwrap();
1516 assert!(interval.coordinate_offset(&coordinate).is_none());
1517 }
1518
1519 #[test]
1520 fn parse() {
1521 let value = format!("seq0:+:0-{}", Number::MAX);
1522 let interval = value.parse::<Interval<Interbase>>().unwrap();
1523 assert_eq!(interval.contig().as_str(), "seq0");
1524 assert_eq!(interval.strand(), Strand::Positive);
1525 assert_eq!(interval.start().position().get(), 0);
1526 assert_eq!(interval.end().position().get(), Number::MAX);
1527
1528 let value = format!("seq0:-:{}-0", Number::MAX);
1529 let interval = value.parse::<Interval<Interbase>>().unwrap();
1530 assert_eq!(interval.contig().as_str(), "seq0");
1531 assert_eq!(interval.strand(), Strand::Negative);
1532 assert_eq!(interval.start().position().get(), Number::MAX);
1533 assert_eq!(interval.end().position().get(), 0);
1534 }
1535
1536 #[test]
1537 fn parse_error() {
1538 let err = "1".parse::<Interval<Interbase>>().unwrap_err();
1539 assert_eq!(
1540 err,
1541 Error::Parse(ParseError::Format {
1542 value: String::from("1")
1543 })
1544 );
1545
1546 let err = "1-1000".parse::<Interval<Interbase>>().unwrap_err();
1547 assert_eq!(
1548 err,
1549 Error::Parse(ParseError::Format {
1550 value: String::from("1-1000")
1551 })
1552 );
1553
1554 let err = "seq0:".parse::<Interval<Interbase>>().unwrap_err();
1555 assert_eq!(
1556 err,
1557 Error::Parse(ParseError::Format {
1558 value: String::from("seq0:")
1559 })
1560 );
1561
1562 let err = "seq0:0-".parse::<Interval<Interbase>>().unwrap_err();
1563 assert_eq!(
1564 err,
1565 Error::Parse(ParseError::Format {
1566 value: String::from("seq0:0-")
1567 })
1568 );
1569
1570 let err = "seq0:0-10000:".parse::<Interval<Interbase>>().unwrap_err();
1571 assert_eq!(
1572 err,
1573 Error::Strand(StrandError::Parse(StrandParseError::Invalid {
1574 value: String::from("0-10000")
1575 }))
1576 );
1577
1578 let err = "seq0:+".parse::<Interval<Interbase>>().unwrap_err();
1579 assert_eq!(
1580 err,
1581 Error::Parse(ParseError::Format {
1582 value: String::from("seq0:+")
1583 })
1584 );
1585
1586 let err = "seq0:+:0".parse::<Interval<Interbase>>().unwrap_err();
1587 assert_eq!(
1588 err,
1589 Error::Parse(ParseError::Format {
1590 value: String::from("seq0:+:0")
1591 })
1592 );
1593
1594 let err = "seq0:+:0-".parse::<Interval<Interbase>>().unwrap_err();
1595 assert!(matches!(
1596 err,
1597 Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1598 ));
1599
1600 let err = "seq0:+:0-$".parse::<Interval<Interbase>>().unwrap_err();
1601 assert!(matches!(
1602 err,
1603 Error::Position(PositionError::Parse(PositionParseError::Int { .. }))
1604 ));
1605 }
1606
1607 #[test]
1608 fn to_string() {
1609 // Positive-stranded interval
1610 let start = "seq0:+:0".parse::<Coordinate<Interbase>>().unwrap();
1611 let end = "seq0:+:10".parse::<Coordinate<Interbase>>().unwrap();
1612 let interval = Interval::try_new(start, end).unwrap();
1613
1614 assert_eq!(interval.to_string(), "seq0:+:0-10");
1615
1616 // Negative-stranded interval
1617 let start = "seq0:-:10".parse::<Coordinate<Interbase>>().unwrap();
1618 let end = "seq0:-:0".parse::<Coordinate<Interbase>>().unwrap();
1619 let interval = Interval::try_new(start, end).unwrap();
1620
1621 assert_eq!(interval.to_string(), "seq0:-:10-0");
1622 }
1623}