bio_types/annot/
pos.rs

1// Copyright 2017 Nicholas Ingolia
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6//! Positions on a named sequence, e.g., 683,946 on chromosome IV.
7
8use std::convert::Into;
9use std::fmt::{self, Display, Formatter};
10use std::ops::AddAssign;
11use std::ops::Neg;
12use std::ops::SubAssign;
13use std::str::FromStr;
14
15use regex::Regex;
16
17use crate::annot::contig::Contig;
18use crate::annot::loc::Loc;
19use crate::annot::*;
20use crate::strand::*;
21
22/// Position on a particular, named sequence (e.g. a chromosome).
23///
24/// Parameterized over the type of the reference sequence identifier
25/// and over the strandedness of the position.
26///
27/// The display format for a `Pos` is _chr:pos(+/-)_. A stranded
28/// position must have a _(+)_ or a _(-)_, while an unstranded
29/// position does not.
30///
31/// ```
32/// # use bio_types::annot::ParseAnnotError;
33/// # fn try_main() -> Result<(), Box<ParseAnnotError>> {
34/// use bio_types::annot::pos::Pos;
35/// use bio_types::strand::ReqStrand;
36/// let start = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Reverse);
37/// let start_str = start.to_string();
38/// assert_eq!(start_str, "chrIV:683946(-)");
39/// let start_str_pos = start_str.parse()?;
40/// assert_eq!(start, start_str_pos);
41/// # Ok(())
42/// # }
43/// # fn main() { try_main().unwrap(); }
44/// ```
45#[derive(Debug, Clone, Hash, PartialEq, Eq)]
46pub struct Pos<R, S> {
47    refid: R,
48    pos: isize,
49    strand: S,
50}
51
52impl<R, S> Pos<R, S> {
53    /// Construct a new sequence position
54    ///
55    /// ```
56    /// use std::rc::Rc;
57    /// use bio_types::annot::pos::Pos;
58    /// use bio_types::strand::ReqStrand;
59    /// let chr = Rc::new("chrIV".to_owned());
60    /// let start = Pos::new(chr, 683946, ReqStrand::Reverse);
61    /// ```
62    pub fn new(refid: R, pos: isize, strand: S) -> Self {
63        Pos { refid, pos, strand }
64    }
65
66    /// Position on the reference sequence (0-based).
67    pub fn pos(&self) -> isize {
68        self.pos
69    }
70
71    /// Convert into a stranded sequence position on the specified strand
72    pub fn into_stranded(self, strand: ReqStrand) -> Pos<R, ReqStrand> {
73        Pos {
74            refid: self.refid,
75            pos: self.pos,
76            strand,
77        }
78    }
79}
80
81impl<R, T> AddAssign<T> for Pos<R, ReqStrand>
82where
83    isize: AddAssign<T>,
84    isize: SubAssign<T>,
85{
86    /// Slide the reference position by an offset on the strand of the
87    /// annotation.
88    ///
89    /// # Arguments
90    ///
91    /// * `dist` specifies the offset for sliding the position. A
92    /// positive `dist` will numerically increase the position for
93    /// forward-strand features and decrease it for reverse-strand
94    /// features.
95    ///
96    /// ```
97    /// use bio_types::annot::pos::Pos;
98    /// use bio_types::strand::ReqStrand;
99    /// let mut start = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Reverse);
100    /// assert_eq!(start.to_string(), "chrIV:683946(-)");
101    /// start += 100;
102    /// assert_eq!(start.to_string(), "chrIV:683846(-)");
103    /// ```
104    fn add_assign(&mut self, dist: T) {
105        match self.strand {
106            ReqStrand::Forward => self.pos += dist,
107            ReqStrand::Reverse => self.pos -= dist,
108        }
109    }
110}
111
112impl<R, T> SubAssign<T> for Pos<R, ReqStrand>
113where
114    isize: AddAssign<T>,
115    isize: SubAssign<T>,
116{
117    /// Slide the reference position by an offset on the strand of the
118    /// annotation.
119    ///
120    /// # Arguments
121    ///
122    /// * `dist` specifies the offset for sliding the position. A
123    /// positive `dist` will numerically decrease the position for
124    /// forward-strand features and increase it for reverse-strand
125    /// features.
126    ///
127    /// ```
128    /// use bio_types::annot::pos::Pos;
129    /// use bio_types::strand::ReqStrand;
130    /// let mut start = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Reverse);
131    /// assert_eq!(start.to_string(), "chrIV:683946(-)");
132    /// start -= 100;
133    /// assert_eq!(start.to_string(), "chrIV:684046(-)");
134    /// ```
135    fn sub_assign(&mut self, dist: T) {
136        match self.strand {
137            ReqStrand::Forward => self.pos -= dist,
138            ReqStrand::Reverse => self.pos += dist,
139        }
140    }
141}
142
143impl<R, S> Loc for Pos<R, S> {
144    type RefID = R;
145    type Strand = S;
146    fn refid(&self) -> &R {
147        &self.refid
148    }
149    fn start(&self) -> isize {
150        self.pos
151    }
152    fn length(&self) -> usize {
153        1
154    }
155    fn strand(&self) -> S
156    where
157        S: Copy,
158    {
159        self.strand
160    }
161
162    fn pos_into<T>(&self, pos: &Pos<Self::RefID, T>) -> Option<Pos<(), T>>
163    where
164        Self::RefID: Eq,
165        Self::Strand: Into<ReqStrand> + Copy,
166        T: Neg<Output = T> + Copy,
167    {
168        if (self.refid != pos.refid) || (self.pos != pos.pos) {
169            None
170        } else {
171            Some(Pos::new(
172                (),
173                0,
174                self.strand().into().on_strand(pos.strand()),
175            ))
176        }
177    }
178
179    fn pos_outof<Q, T>(&self, pos: &Pos<Q, T>) -> Option<Pos<Self::RefID, T>>
180    where
181        Self::RefID: Clone,
182        Self::Strand: Into<ReqStrand> + Copy,
183        T: Neg<Output = T> + Copy,
184    {
185        if pos.pos == 0 {
186            Some(Pos::new(
187                self.refid.clone(),
188                self.pos,
189                self.strand().into().on_strand(pos.strand()),
190            ))
191        } else {
192            None
193        }
194    }
195
196    fn contig_intersection<T>(&self, contig: &Contig<Self::RefID, T>) -> Option<Self>
197    where
198        Self::RefID: PartialEq + Clone,
199        Self::Strand: Copy,
200    {
201        if self.refid() != contig.refid() {
202            return None;
203        }
204
205        if (self.pos >= contig.start()) && (self.pos < (contig.start() + contig.length() as isize))
206        {
207            Some(self.clone())
208        } else {
209            None
210        }
211    }
212}
213
214impl<R, S> Same for Pos<R, S>
215where
216    R: Eq,
217    S: Same,
218{
219    /// Indicate when two positions are the "same" -- when positions
220    /// have unknown/unspecified strands they can be the "same" but
221    /// not equal.
222    fn same(&self, p: &Self) -> bool {
223        self.pos == p.pos && self.refid == p.refid && self.strand.same(&p.strand)
224    }
225}
226
227impl<R, S> Display for Pos<R, S>
228where
229    R: Display,
230    S: Display + Clone + Into<Strand>,
231{
232    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
233        let strand: Strand = self.strand.clone().into();
234        if strand.is_unknown() {
235            write!(f, "{}:{}", self.refid, self.pos)
236        } else {
237            write!(f, "{}:{}({})", self.refid, self.pos, strand)
238        }
239    }
240}
241
242impl<R, S> FromStr for Pos<R, S>
243where
244    R: From<String>,
245    S: FromStr<Err = StrandError>,
246{
247    type Err = ParseAnnotError;
248
249    fn from_str(s: &str) -> Result<Self, Self::Err> {
250        lazy_static! {
251            static ref POS_RE: Regex = Regex::new(r"^(.*):(\d+)(\([+-]\))?$").unwrap();
252        }
253
254        let cap = POS_RE.captures(s).ok_or(ParseAnnotError::BadAnnot)?;
255
256        let strand = cap
257            .get(3)
258            .map_or("", |m| m.as_str())
259            .parse::<S>()
260            .map_err(ParseAnnotError::ParseStrand)?;
261
262        Ok(Pos::new(
263            R::from(cap[1].to_owned()),
264            cap[2].parse::<isize>().map_err(ParseAnnotError::ParseInt)?,
265            strand,
266        ))
267    }
268}
269
270impl<R> From<Pos<R, ReqStrand>> for Pos<R, Strand> {
271    fn from(x: Pos<R, ReqStrand>) -> Self {
272        Pos {
273            refid: x.refid,
274            pos: x.pos,
275            strand: match x.strand {
276                ReqStrand::Forward => Strand::Forward,
277                ReqStrand::Reverse => Strand::Reverse,
278            },
279        }
280    }
281}
282
283impl<R> From<Pos<R, NoStrand>> for Pos<R, Strand> {
284    fn from(x: Pos<R, NoStrand>) -> Self {
285        Pos {
286            refid: x.refid,
287            pos: x.pos,
288            strand: Strand::Unknown,
289        }
290    }
291}
292
293impl<R> From<Pos<R, Strand>> for Pos<R, NoStrand> {
294    fn from(x: Pos<R, Strand>) -> Self {
295        Pos {
296            refid: x.refid,
297            pos: x.pos,
298            strand: NoStrand::Unknown,
299        }
300    }
301}
302
303impl<R> From<Pos<R, ReqStrand>> for Pos<R, NoStrand> {
304    fn from(x: Pos<R, ReqStrand>) -> Self {
305        Pos {
306            refid: x.refid,
307            pos: x.pos,
308            strand: NoStrand::Unknown,
309        }
310    }
311}
312
313/// Default stranded sequence position on a reference sequence named
314/// by a `String`.
315pub type SeqPosStranded = Pos<String, ReqStrand>;
316
317/// Default unstranded sequence position on a reference sequence named
318/// by a `String`
319pub type SeqPosUnstranded = Pos<String, NoStrand>;
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn pos_accessors() {
327        let start = Pos::new("chrIV".to_owned(), 683946, Strand::Unknown);
328        assert_eq!(start.refid(), "chrIV");
329        assert_eq!(start.pos(), 683946);
330        assert!(start.strand().same(&Strand::Unknown));
331
332        let start = Pos::new("chrIV".to_owned(), 683946, Strand::Reverse);
333        assert_eq!(start.refid(), "chrIV");
334        assert_eq!(start.pos(), 683946);
335        assert!(start.strand().same(&Strand::Reverse));
336
337        let start = Pos::new("chrXV".to_owned(), 493433, Strand::Forward);
338        assert_eq!(start.refid(), "chrXV");
339        assert_eq!(start.pos(), 493433);
340        assert!(start.strand().same(&Strand::Forward));
341    }
342
343    #[test]
344    fn strand_conversion() {
345        let start = "chrIV:683946(-)".parse::<Pos<String, Strand>>().unwrap();
346        let start_un: Pos<String, NoStrand> = start.into();
347        assert!(start_un.same(&"chrIV:683946".parse::<Pos<String, NoStrand>>().unwrap()));
348        let start_re = start_un.into_stranded(ReqStrand::Reverse);
349        assert!(start_re.same(&"chrIV:683946(-)".parse::<Pos<String, ReqStrand>>().unwrap()));
350
351        let start = "chrXV:493433(+)".parse::<Pos<String, Strand>>().unwrap();
352        let start_un: Pos<String, NoStrand> = start.into();
353        assert!(start_un.same(&"chrXV:493433".parse::<Pos<String, NoStrand>>().unwrap()));
354        let start_re = start_un.into_stranded(ReqStrand::Forward);
355        assert!(start_re.same(&"chrXV:493433(+)".parse::<Pos<String, ReqStrand>>().unwrap()));
356    }
357
358    #[test]
359    fn string_representation() {
360        let start = Pos::new("chrIV".to_owned(), 683946, NoStrand::Unknown);
361        assert_eq!(start.to_string(), "chrIV:683946");
362        assert!(start.same(&"chrIV:683946".parse::<Pos<String, NoStrand>>().unwrap()));
363
364        let start = Pos::new("chrIV".to_owned(), 683946, Strand::Unknown);
365        assert_eq!(start.to_string(), "chrIV:683946");
366        assert!(start.same(&"chrIV:683946".parse::<Pos<String, Strand>>().unwrap()));
367
368        let start = Pos::new("chrIV".to_owned(), 683946, Strand::Reverse);
369        assert_eq!(start.to_string(), "chrIV:683946(-)");
370        assert!(start.same(&"chrIV:683946(-)".parse::<Pos<String, Strand>>().unwrap()));
371
372        let start = Pos::new("chrXV".to_owned(), 493433, Strand::Forward);
373        assert_eq!(start.to_string(), "chrXV:493433(+)");
374        assert!(start.same(&"chrXV:493433(+)".parse::<Pos<String, Strand>>().unwrap()));
375
376        let start = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Reverse);
377        assert_eq!(start.to_string(), "chrIV:683946(-)");
378        assert!(start.same(&"chrIV:683946(-)".parse::<Pos<String, ReqStrand>>().unwrap()));
379
380        let start = Pos::new("chrXV".to_owned(), 493433, ReqStrand::Forward);
381        assert_eq!(start.to_string(), "chrXV:493433(+)");
382        assert!(start.same(&"chrXV:493433(+)".parse::<Pos<String, ReqStrand>>().unwrap()));
383    }
384
385    #[test]
386    fn loc_impl() {
387        let start = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Forward);
388
389        assert_eq!(
390            None,
391            start.contig_intersection(&Contig::new(
392                "chrIV".to_owned(),
393                683900,
394                40,
395                ReqStrand::Forward
396            ))
397        );
398        assert_eq!(
399            None,
400            start.contig_intersection(&Contig::new(
401                "chrV".to_owned(),
402                683900,
403                100,
404                ReqStrand::Forward
405            ))
406        );
407        assert_eq!(
408            None,
409            start.contig_intersection(&Contig::new(
410                "chrIV".to_owned(),
411                683950,
412                40,
413                ReqStrand::Forward
414            ))
415        );
416
417        assert_eq!(
418            Some(start.clone()),
419            start.contig_intersection(&Contig::new(
420                "chrIV".to_owned(),
421                683900,
422                100,
423                ReqStrand::Forward
424            ))
425        );
426        assert_eq!(
427            Some(start.clone()),
428            start.contig_intersection(&Contig::new(
429                "chrIV".to_owned(),
430                683900,
431                100,
432                ReqStrand::Reverse
433            ))
434        );
435
436        let rstart = Pos::new("chrIV".to_owned(), 683946, ReqStrand::Reverse);
437        assert_eq!(
438            Some(rstart.clone()),
439            rstart.contig_intersection(&Contig::new(
440                "chrIV".to_owned(),
441                683900,
442                100,
443                ReqStrand::Forward
444            ))
445        );
446        assert_eq!(
447            Some(rstart.clone()),
448            rstart.contig_intersection(&Contig::new(
449                "chrIV".to_owned(),
450                683900,
451                100,
452                ReqStrand::Reverse
453            ))
454        );
455    }
456}
457// chrXV:493433..494470